@@ -218,6 +218,13 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
218218 RexNode fieldRex = rexVisitor .analyze (node .getField (), context );
219219 String patternStr = (String ) node .getPattern ().getValue ();
220220
221+ if (node .getMode () == Rex .RexMode .SED ) {
222+ RexNode sedCall = createOptimizedSedCall (fieldRex , patternStr , context );
223+ String fieldName = node .getField ().toString ();
224+ projectPlusOverriding (List .of (sedCall ), List .of (fieldName ), context );
225+ return context .relBuilder .peek ();
226+ }
227+
221228 List <String > namedGroups = RegexCommonUtils .getNamedGroupCandidates (patternStr );
222229
223230 if (namedGroups .isEmpty ()) {
@@ -252,6 +259,17 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
252259 newFieldNames .add (namedGroups .get (i ));
253260 }
254261
262+ if (node .getOffsetField ().isPresent ()) {
263+ RexNode offsetCall =
264+ PPLFuncImpTable .INSTANCE .resolve (
265+ context .rexBuilder ,
266+ BuiltinFunctionName .REX_OFFSET ,
267+ fieldRex ,
268+ context .rexBuilder .makeLiteral (patternStr ));
269+ newFields .add (offsetCall );
270+ newFieldNames .add (node .getOffsetField ().get ());
271+ }
272+
255273 projectPlusOverriding (newFields , newFieldNames , context );
256274 return context .relBuilder .peek ();
257275 }
@@ -2253,4 +2271,115 @@ private void buildExpandRelNode(
22532271 context .relBuilder .rename (names );
22542272 }
22552273 }
2274+
2275+ /** Creates an optimized sed call using native Calcite functions */
2276+ private RexNode createOptimizedSedCall (
2277+ RexNode fieldRex , String sedExpression , CalcitePlanContext context ) {
2278+ if (sedExpression .startsWith ("s/" )) {
2279+ return createOptimizedSubstitution (fieldRex , sedExpression , context );
2280+ } else if (sedExpression .startsWith ("y/" )) {
2281+ return createOptimizedTransliteration (fieldRex , sedExpression , context );
2282+ } else {
2283+ throw new RuntimeException ("Unsupported sed pattern: " + sedExpression );
2284+ }
2285+ }
2286+
2287+ /** Creates optimized substitution calls for s/pattern/replacement/flags syntax. */
2288+ private RexNode createOptimizedSubstitution (
2289+ RexNode fieldRex , String sedExpression , CalcitePlanContext context ) {
2290+ try {
2291+ // Parse sed substitution: s/pattern/replacement/flags
2292+ if (!sedExpression .matches ("s/.+/.*/.*" )) {
2293+ throw new IllegalArgumentException ("Invalid sed substitution format" );
2294+ }
2295+
2296+ // Find the delimiters - sed format is s/pattern/replacement/flags
2297+ int firstDelimiter = sedExpression .indexOf ('/' , 2 ); // First '/' after 's/'
2298+ int secondDelimiter = sedExpression .indexOf ('/' , firstDelimiter + 1 ); // Second '/'
2299+ int thirdDelimiter = sedExpression .indexOf ('/' , secondDelimiter + 1 ); // Third '/' (optional)
2300+
2301+ if (firstDelimiter == -1 || secondDelimiter == -1 ) {
2302+ throw new IllegalArgumentException ("Invalid sed substitution format" );
2303+ }
2304+
2305+ String pattern = sedExpression .substring (2 , firstDelimiter );
2306+ String replacement = sedExpression .substring (firstDelimiter + 1 , secondDelimiter );
2307+ String flags =
2308+ secondDelimiter + 1 < sedExpression .length ()
2309+ ? sedExpression .substring (secondDelimiter + 1 )
2310+ : "" ;
2311+
2312+ // Convert sed backreferences (\1, \2) to Java style ($1, $2)
2313+ String javaReplacement = replacement .replaceAll ("\\ \\ (\\ d+)" , "\\ $$1" );
2314+
2315+ if (flags .isEmpty ()) {
2316+ // 3-parameter REGEXP_REPLACE
2317+ return PPLFuncImpTable .INSTANCE .resolve (
2318+ context .rexBuilder ,
2319+ BuiltinFunctionName .INTERNAL_REGEXP_REPLACE_3 ,
2320+ fieldRex ,
2321+ context .rexBuilder .makeLiteral (pattern ),
2322+ context .rexBuilder .makeLiteral (javaReplacement ));
2323+ } else if (flags .matches ("[gi]+" )) {
2324+ // 4-parameter REGEXP_REPLACE with flags
2325+ return PPLFuncImpTable .INSTANCE .resolve (
2326+ context .rexBuilder ,
2327+ BuiltinFunctionName .INTERNAL_REGEXP_REPLACE_PG_4 ,
2328+ fieldRex ,
2329+ context .rexBuilder .makeLiteral (pattern ),
2330+ context .rexBuilder .makeLiteral (javaReplacement ),
2331+ context .rexBuilder .makeLiteral (flags ));
2332+ } else if (flags .matches ("\\ d+" )) {
2333+ // 5-parameter REGEXP_REPLACE with occurrence
2334+ int occurrence = Integer .parseInt (flags );
2335+ return PPLFuncImpTable .INSTANCE .resolve (
2336+ context .rexBuilder ,
2337+ BuiltinFunctionName .INTERNAL_REGEXP_REPLACE_5 ,
2338+ fieldRex ,
2339+ context .rexBuilder .makeLiteral (pattern ),
2340+ context .rexBuilder .makeLiteral (javaReplacement ),
2341+ context .relBuilder .literal (1 ), // start position
2342+ context .relBuilder .literal (occurrence ));
2343+ } else {
2344+ throw new RuntimeException (
2345+ "Unsupported sed flags: " + flags + " in expression: " + sedExpression );
2346+ }
2347+ } catch (Exception e ) {
2348+ throw new RuntimeException ("Failed to optimize sed expression: " + sedExpression , e );
2349+ }
2350+ }
2351+
2352+ /** Creates optimized transliteration calls for y/from/to/ syntax. */
2353+ private RexNode createOptimizedTransliteration (
2354+ RexNode fieldRex , String sedExpression , CalcitePlanContext context ) {
2355+ try {
2356+ // Parse sed transliteration: y/from/to/
2357+ if (!sedExpression .matches ("y/.+/.*/.*" )) {
2358+ throw new IllegalArgumentException ("Invalid sed transliteration format" );
2359+ }
2360+
2361+ int firstSlash = sedExpression .indexOf ('/' , 1 );
2362+ int secondSlash = sedExpression .indexOf ('/' , firstSlash + 1 );
2363+ int thirdSlash = sedExpression .indexOf ('/' , secondSlash + 1 );
2364+
2365+ if (firstSlash == -1 || secondSlash == -1 ) {
2366+ throw new IllegalArgumentException ("Invalid sed transliteration format" );
2367+ }
2368+
2369+ String from = sedExpression .substring (firstSlash + 1 , secondSlash );
2370+ String to =
2371+ sedExpression .substring (
2372+ secondSlash + 1 , thirdSlash != -1 ? thirdSlash : sedExpression .length ());
2373+
2374+ // Use Calcite's native TRANSLATE3 function
2375+ return PPLFuncImpTable .INSTANCE .resolve (
2376+ context .rexBuilder ,
2377+ BuiltinFunctionName .INTERNAL_TRANSLATE3 ,
2378+ fieldRex ,
2379+ context .rexBuilder .makeLiteral (from ),
2380+ context .rexBuilder .makeLiteral (to ));
2381+ } catch (Exception e ) {
2382+ throw new RuntimeException ("Failed to optimize sed expression: " + sedExpression , e );
2383+ }
2384+ }
22562385}
0 commit comments