@@ -839,8 +839,6 @@ func (con *converter) callMatches(target *exprpb.Expr, args []*exprpb.Expr) erro
839839 return err
840840 }
841841
842- con .str .WriteString (" ~ " )
843-
844842 // Visit the pattern expression and convert from RE2 to POSIX if it's a string literal
845843 if constExpr := patternExpr .GetConstExpr (); constExpr != nil && constExpr .GetStringValue () != "" {
846844 // Convert RE2 pattern to POSIX
@@ -851,29 +849,34 @@ func (con *converter) callMatches(target *exprpb.Expr, args []*exprpb.Expr) erro
851849 }
852850
853851 // Convert RE2 to POSIX with security validation
854- posixPattern , err := convertRE2ToPOSIX (re2Pattern )
852+ posixPattern , caseInsensitive , err := convertRE2ToPOSIX (re2Pattern )
855853 if err != nil {
856854 return fmt .Errorf ("invalid regex pattern: %w" , err )
857855 }
858856
859- // Determine case sensitivity
860- caseInsensitive := strings .HasPrefix (re2Pattern , "(?i)" )
861-
862857 con .logger .LogAttrs (context .Background (), slog .LevelDebug ,
863858 "regex pattern conversion" ,
864859 slog .String ("original_pattern" , re2Pattern ),
865860 slog .String ("converted_pattern" , posixPattern ),
866861 slog .Bool ("case_insensitive" , caseInsensitive ),
867862 )
868863
864+ // Use ~* for case-insensitive matching, ~ for case-sensitive
865+ if caseInsensitive {
866+ con .str .WriteString (" ~* " )
867+ } else {
868+ con .str .WriteString (" ~ " )
869+ }
870+
869871 // Write the converted pattern as a string literal
870872 escaped := strings .ReplaceAll (posixPattern , "'" , "''" )
871873 con .str .WriteString ("'" )
872874 con .str .WriteString (escaped )
873875 con .str .WriteString ("'" )
874876 } else {
875877 // For non-literal patterns, we can't convert at compile time
876- // Just use the pattern as-is and hope it's POSIX compatible
878+ // Just use the pattern as-is with case-sensitive operator
879+ con .str .WriteString (" ~ " )
877880 if err := con .visit (patternExpr ); err != nil {
878881 return err
879882 }
@@ -1947,19 +1950,45 @@ func isBinaryOrTernaryOperator(expr *exprpb.Expr) bool {
19471950
19481951// convertRE2ToPOSIX converts an RE2 regex pattern to POSIX ERE format for PostgreSQL.
19491952// It performs security validation to prevent ReDoS attacks (CWE-1333).
1953+ // Returns: (posixPattern, caseInsensitive, error)
19501954// Note: This is a basic conversion for common patterns. Full RE2 to POSIX conversion is complex.
1951- func convertRE2ToPOSIX (re2Pattern string ) (string , error ) {
1955+ func convertRE2ToPOSIX (re2Pattern string ) (string , bool , error ) {
19521956 // 1. Check pattern length to prevent processing extremely long patterns
19531957 if len (re2Pattern ) > maxRegexPatternLength {
1954- return "" , fmt .Errorf ("regex pattern exceeds maximum length of %d characters" , maxRegexPatternLength )
1958+ return "" , false , fmt .Errorf ("regex pattern exceeds maximum length of %d characters" , maxRegexPatternLength )
1959+ }
1960+
1961+ // 2. Extract case-insensitive flag if present
1962+ caseInsensitive := false
1963+ if strings .HasPrefix (re2Pattern , "(?i)" ) {
1964+ caseInsensitive = true
1965+ re2Pattern = strings .TrimPrefix (re2Pattern , "(?i)" )
19551966 }
19561967
1957- // 2. Detect catastrophic nested quantifiers that cause exponential backtracking
1968+ // 3. Detect unsupported RE2 features and return errors
1969+ // Lookahead assertions
1970+ if strings .Contains (re2Pattern , "(?=" ) || strings .Contains (re2Pattern , "(?!" ) {
1971+ return "" , false , errors .New ("lookahead assertions (?=...), (?!...) are not supported in PostgreSQL POSIX regex" )
1972+ }
1973+ // Lookbehind assertions
1974+ if strings .Contains (re2Pattern , "(?<=" ) || strings .Contains (re2Pattern , "(?<!" ) {
1975+ return "" , false , errors .New ("lookbehind assertions (?<=...), (?<!...) are not supported in PostgreSQL POSIX regex" )
1976+ }
1977+ // Named capture groups
1978+ if strings .Contains (re2Pattern , "(?P<" ) {
1979+ return "" , false , errors .New ("named capture groups (?P<name>...) are not supported in PostgreSQL POSIX regex" )
1980+ }
1981+ // Other inline flags (after we've already handled (?i))
1982+ if strings .Contains (re2Pattern , "(?m" ) || strings .Contains (re2Pattern , "(?s" ) || strings .Contains (re2Pattern , "(?-" ) {
1983+ return "" , false , errors .New ("inline flags other than (?i) are not supported in PostgreSQL POSIX regex" )
1984+ }
1985+
1986+ // 4. Detect catastrophic nested quantifiers that cause exponential backtracking
19581987 // Patterns like (a+)+, (a*)*, (x+x+)+, ((a)+b)+, etc. are extremely dangerous
19591988
19601989 // Check for doubled quantifiers
19611990 if matched , _ := regexp .MatchString (`[*+][*+]` , re2Pattern ); matched {
1962- return "" , errors .New ("regex contains catastrophic nested quantifiers that could cause ReDoS" )
1991+ return "" , false , errors .New ("regex contains catastrophic nested quantifiers that could cause ReDoS" )
19631992 }
19641993
19651994 // Check for groups that contain quantifiers and are themselves quantified
@@ -1990,7 +2019,7 @@ func convertRE2ToPOSIX(re2Pattern string) (string, error) {
19902019 if nextChar == '*' || nextChar == '+' || nextChar == '?' || nextChar == '{' {
19912020 // This group is quantified. Check if it contains quantifiers
19922021 if len (groupHasQuantifier ) > 0 && groupHasQuantifier [len (groupHasQuantifier )- 1 ] {
1993- return "" , errors .New ("regex contains catastrophic nested quantifiers that could cause ReDoS" )
2022+ return "" , false , errors .New ("regex contains catastrophic nested quantifiers that could cause ReDoS" )
19942023 }
19952024 }
19962025 }
@@ -2018,21 +2047,21 @@ func convertRE2ToPOSIX(re2Pattern string) (string, error) {
20182047 }
20192048 }
20202049
2021- // 3 . Count and limit capture groups to prevent memory exhaustion
2050+ // 5 . Count and limit capture groups to prevent memory exhaustion
20222051 groupCount := strings .Count (re2Pattern , "(" ) - strings .Count (re2Pattern , `\(` )
20232052 if groupCount > maxRegexGroups {
2024- return "" , fmt .Errorf ("regex contains %d capture groups, exceeds maximum of %d" , groupCount , maxRegexGroups )
2053+ return "" , false , fmt .Errorf ("regex contains %d capture groups, exceeds maximum of %d" , groupCount , maxRegexGroups )
20252054 }
20262055
2027- // 4 . Detect exponential alternation patterns like (a|a)*b or (a|ab)*
2056+ // 6 . Detect exponential alternation patterns like (a|a)*b or (a|ab)*
20282057 alternationPattern := regexp .MustCompile (`\([^)]*\|[^)]*\)[*+]` )
20292058 if alternationPattern .MatchString (re2Pattern ) {
20302059 // Check if alternation has overlapping branches (more dangerous)
20312060 // This is a simple heuristic - full analysis would be more complex
2032- return "" , errors .New ("regex contains quantified alternation that could cause ReDoS" )
2061+ return "" , false , errors .New ("regex contains quantified alternation that could cause ReDoS" )
20332062 }
20342063
2035- // 5 . Check nesting depth to prevent deeply nested patterns
2064+ // 7 . Check nesting depth to prevent deeply nested patterns
20362065 maxDepth := 0
20372066 currentDepth := 0
20382067 for _ , char := range re2Pattern {
@@ -2046,7 +2075,7 @@ func convertRE2ToPOSIX(re2Pattern string) (string, error) {
20462075 }
20472076 }
20482077 if maxDepth > maxRegexNestingDepth {
2049- return "" , fmt .Errorf ("regex nesting depth %d exceeds maximum of %d" , maxDepth , maxRegexNestingDepth )
2078+ return "" , false , fmt .Errorf ("regex nesting depth %d exceeds maximum of %d" , maxDepth , maxRegexNestingDepth )
20502079 }
20512080
20522081 // Passed all security checks - proceed with conversion
@@ -2080,16 +2109,19 @@ func convertRE2ToPOSIX(re2Pattern string) (string, error) {
20802109 // 8. Non-whitespace shortcuts: \S -> [^[:space:]]
20812110 posixPattern = strings .ReplaceAll (posixPattern , `\S` , `[^[:space:]]` )
20822111
2083- // Note: Many RE2 features are not directly convertible to POSIX ERE:
2084- // - Lookahead/lookbehind assertions (?=...), (?!...), (?<=...), (?<!...)
2085- // - Non-capturing groups (?:...)
2086- // - Named groups (?P<name>...)
2087- // - Case-insensitive flags (?i)
2088- // - Multiline flags (?m)
2089- // - Unicode character classes
2112+ // 9. Non-capturing groups: (?:...) -> (...)
2113+ // POSIX ERE doesn't have non-capturing groups, so convert to regular groups
2114+ posixPattern = strings .ReplaceAll (posixPattern , `(?:` , `(` )
2115+
2116+ // Note: Unsupported RE2 features that are now validated and return errors:
2117+ // - Lookahead/lookbehind assertions (?=...), (?!...), (?<=...), (?<!...) - ERROR
2118+ // - Named groups (?P<name>...) - ERROR
2119+ // - Case-insensitive flag (?i) - CONVERTED (returned as separate boolean)
2120+ // - Other inline flags (?m), (?s) - ERROR
20902121 //
2091- // For these cases, the pattern is returned as-is, which may cause PostgreSQL errors
2092- // if the pattern uses unsupported RE2 features.
2122+ // Converted features:
2123+ // - Non-capturing groups (?:...) - Converted to regular groups (...)
2124+ // - Character class shortcuts (\d, \w, \s, etc.) - Converted to POSIX equivalents
20932125
2094- return posixPattern , nil
2126+ return posixPattern , caseInsensitive , nil
20952127}
0 commit comments