Skip to content

Commit 84ad3c2

Browse files
Use Index Scan for EXACT and PREFIX pattern matches for LIKE for Const node inside CollateExpr (#3770) (#3776)
With this commit: e119968, babelfish now use Index Scans for the following cases: -- CASE 1 SELECT COL FROM TAB WHERE COL LIKE 'ab' -- CASE 2 SELECT COL FROM TAB WHERE COL LIKE 'a%' However if there is a Collate clause with the right operand : -- CASE 1 SELECT COL FROM TAB WHERE COL LIKE 'ab' COLLATE DATABASE_DEFAULT -- CASE 2 SELECT COL FROM TAB WHERE COL LIKE 'a%' COLLATE DATABASE_DEFAULT then Babelfish does NOT use Index Scan. This commit enables Index scan for such cases. We do this by removing CollateExpr on both operands as those are redundant and the OpExpr (LIKE) operator is already aware of the collation it needs to use. (cherry picked from commit 431bf0b) Task: BABEL-5077 Signed-off-by: Shameem Ahmed <shmeeh@amazon.com>
1 parent 11f1a9f commit 84ad3c2

16 files changed

+16813
-2080
lines changed

contrib/babelfishpg_tsql/src/collation.c

Lines changed: 80 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ static Node *
339339
optimise_likenode(Node *node, OpExpr *op, like_ilike_info_t like_entry, coll_info_t coll_info_of_inputcollid, bool is_constraint)
340340
{
341341
Node *leftop = copyObject(linitial(op->args));
342-
Node *rightop = (Node *) lsecond(op->args);
342+
Node *rightop = copyObject(lsecond(op->args));
343343
Oid ltypeId = exprType(leftop);
344344
Oid rtypeId = exprType(rightop);
345345
char *op_str;
@@ -386,6 +386,20 @@ optimise_likenode(Node *node, OpExpr *op, like_ilike_info_t like_entry, coll_inf
386386
op->opfuncid = like_entry.ilike_opfuncid;
387387
}
388388

389+
op->inputcollid = tsql_get_oid_from_collidx(collidx_of_cs_as);
390+
391+
392+
/* Remove CollateExpr as the op->inputcollid has already been set */
393+
if (IsA(rightop, CollateExpr))
394+
{
395+
lsecond(op->args) = rightop = (Node*)((CollateExpr*) rightop)->arg;
396+
}
397+
398+
if (IsA(leftop, CollateExpr))
399+
{
400+
linitial(op->args) = leftop = (Node*)((CollateExpr*) leftop)->arg;
401+
}
402+
389403
/*
390404
* This is needed to process CI_AI for Const nodes
391405
* Because after we call coerce_to_target_type for type conversion in transform_likenode_for_AI,
@@ -401,7 +415,6 @@ optimise_likenode(Node *node, OpExpr *op, like_ilike_info_t like_entry, coll_inf
401415
rightop = (Node *) lsecond(op->args);
402416
}
403417
}
404-
op->inputcollid = tsql_get_oid_from_collidx(collidx_of_cs_as);
405418

406419
/*
407420
* no constant prefix found in pattern, or pattern is not constant
@@ -871,7 +884,9 @@ convert_node_to_funcexpr_for_like(Node *node, Oid inputcollid)
871884
con = (Const *) new_node;
872885
if (con->constisnull)
873886
return new_node;
874-
con->constvalue = DirectFunctionCall1(remove_accents_internal, con->constvalue);
887+
888+
con->constvalue = OidFunctionCall1(remove_accents_internal_oid, con->constvalue);
889+
con->constcollid = InvalidOid;
875890
return (Node *) con;
876891
}
877892
else
@@ -888,7 +903,6 @@ convert_node_to_funcexpr_for_like(Node *node, Oid inputcollid)
888903
case T_CaseExpr:
889904
case T_RelabelType:
890905
case T_CoerceViaIO:
891-
case T_CollateExpr:
892906
{
893907
new_node = coerce_to_target_type(NULL, (Node *) node, exprType(node),
894908
TEXTOID, -1,
@@ -904,6 +918,60 @@ convert_node_to_funcexpr_for_like(Node *node, Oid inputcollid)
904918
newFuncExpr->args = list_make1(new_node);
905919
break;
906920
}
921+
case T_CollateExpr:
922+
{
923+
CollateExpr *collateexpr = (CollateExpr*) node;
924+
if (IsA(collateexpr->arg, Const))
925+
{
926+
Const *constnode = (Const*) (collateexpr->arg);
927+
constnode->constcollid = collateexpr->collOid;
928+
new_node = coerce_to_target_type(NULL, (Node *) constnode, exprType((Node *)constnode),
929+
TEXTOID, -1,
930+
COERCION_EXPLICIT,
931+
COERCE_EXPLICIT_CAST,
932+
exprLocation(node));
933+
if (unlikely(new_node == NULL))
934+
{
935+
ereport(ERROR,
936+
(errcode(ERRCODE_INTERNAL_ERROR),
937+
errmsg("Could not type cast the input argument of LIKE operator to desired data type")));
938+
}
939+
940+
if (IsA(new_node, Const))
941+
{
942+
constnode = (Const *) new_node;
943+
if (constnode->constisnull)
944+
return new_node;
945+
946+
constnode->constvalue = OidFunctionCall1(remove_accents_internal_oid, constnode->constvalue);
947+
constnode->constcollid = InvalidOid;
948+
return (Node *) constnode;
949+
}
950+
else
951+
{
952+
ereport(ERROR,
953+
(errcode(ERRCODE_INTERNAL_ERROR),
954+
errmsg("Could not convert Const node to desired node type")));
955+
}
956+
}
957+
else
958+
{
959+
new_node = coerce_to_target_type(NULL, (Node *) node, exprType(node),
960+
TEXTOID, -1,
961+
COERCION_EXPLICIT,
962+
COERCE_EXPLICIT_CAST,
963+
exprLocation(node));
964+
if (unlikely(new_node == NULL))
965+
{
966+
ereport(ERROR,
967+
(errcode(ERRCODE_INTERNAL_ERROR),
968+
errmsg("Could not type cast the input argument of LIKE operator to desired data type")));
969+
}
970+
newFuncExpr->args = list_make1(new_node);
971+
break;
972+
}
973+
break;
974+
}
907975
case T_SubLink:
908976
{
909977
new_node = coerce_to_target_type(NULL, (Node *) node, exprType(node),
@@ -1112,9 +1180,14 @@ pltsql_predicate_transformer(Node *expr, bool is_constraint)
11121180
}
11131181
else if (IsA(qual, OpExpr))
11141182
{
1115-
qual = transform_likenode(qual, is_constraint);
1116-
new_predicates = lappend(new_predicates,
1117-
expression_tree_mutator(qual, pgtsql_expression_tree_mutator, NULL));
1183+
Node *ret = transform_likenode(qual, is_constraint);
1184+
if (qual == ret)
1185+
/* If it's not a like Opexpr, then walk through args */
1186+
new_predicates = lappend(new_predicates,
1187+
expression_tree_mutator(qual, pgtsql_expression_tree_mutator, NULL));
1188+
else
1189+
/* Singleton predicate */
1190+
new_predicates = lappend(new_predicates, ret);
11181191
}
11191192
else
11201193
new_predicates = lappend(new_predicates, qual);

test/JDBC/expected/babel_collection.out

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ Bitmap Heap Scan on testing4 (cost=4.17..11.28 rows=3 width=32)
191191

192192
~~START~~
193193
text
194-
Babelfish T-SQL Batch Parsing Time: 6.623 ms
194+
Babelfish T-SQL Batch Parsing Time: 0.154 ms
195195
~~END~~
196196

197197

@@ -209,7 +209,7 @@ Bitmap Heap Scan on testing4 (cost=4.18..11.30 rows=1 width=32)
209209

210210
~~START~~
211211
text
212-
Babelfish T-SQL Batch Parsing Time: 0.143 ms
212+
Babelfish T-SQL Batch Parsing Time: 0.134 ms
213213
~~END~~
214214

215215

@@ -227,7 +227,7 @@ Bitmap Heap Scan on testing4 (cost=4.18..11.30 rows=1 width=32)
227227

228228
~~START~~
229229
text
230-
Babelfish T-SQL Batch Parsing Time: 0.135 ms
230+
Babelfish T-SQL Batch Parsing Time: 0.133 ms
231231
~~END~~
232232

233233

@@ -243,7 +243,7 @@ Bitmap Heap Scan on testing4 (cost=11.40..29.53 rows=5 width=32)
243243

244244
~~START~~
245245
text
246-
Babelfish T-SQL Batch Parsing Time: 0.135 ms
246+
Babelfish T-SQL Batch Parsing Time: 0.132 ms
247247
~~END~~
248248

249249

@@ -259,7 +259,7 @@ Bitmap Heap Scan on testing4 (cost=11.41..29.53 rows=26 width=32)
259259

260260
~~START~~
261261
text
262-
Babelfish T-SQL Batch Parsing Time: 0.140 ms
262+
Babelfish T-SQL Batch Parsing Time: 0.130 ms
263263
~~END~~
264264

265265

@@ -278,7 +278,7 @@ Bitmap Heap Scan on testing4 (cost=4.18..11.30 rows=1 width=32)
278278

279279
~~START~~
280280
text
281-
Babelfish T-SQL Batch Parsing Time: 0.149 ms
281+
Babelfish T-SQL Batch Parsing Time: 0.135 ms
282282
~~END~~
283283

284284
select c1 from testing4 where c1 LIKE 'äb%';
@@ -295,7 +295,7 @@ Bitmap Heap Scan on testing4 (cost=4.18..11.30 rows=1 width=32)
295295

296296
~~START~~
297297
text
298-
Babelfish T-SQL Batch Parsing Time: 0.144 ms
298+
Babelfish T-SQL Batch Parsing Time: 0.140 ms
299299
~~END~~
300300

301301
select c1 from testing4 where c1 LIKE 'äḃĆ_';
@@ -312,7 +312,7 @@ Bitmap Heap Scan on testing4 (cost=4.18..11.30 rows=1 width=32)
312312

313313
~~START~~
314314
text
315-
Babelfish T-SQL Batch Parsing Time: 0.147 ms
315+
Babelfish T-SQL Batch Parsing Time: 0.144 ms
316316
~~END~~
317317

318318

@@ -329,7 +329,7 @@ Bitmap Heap Scan on testing4 (cost=11.56..29.69 rows=647 width=32)
329329

330330
~~START~~
331331
text
332-
Babelfish T-SQL Batch Parsing Time: 0.990 ms
332+
Babelfish T-SQL Batch Parsing Time: 0.139 ms
333333
~~END~~
334334

335335

@@ -345,7 +345,7 @@ Bitmap Heap Scan on testing4 (cost=11.56..32.94 rows=648 width=32)
345345

346346
~~START~~
347347
text
348-
Babelfish T-SQL Batch Parsing Time: 0.201 ms
348+
Babelfish T-SQL Batch Parsing Time: 0.134 ms
349349
~~END~~
350350

351351

@@ -361,7 +361,7 @@ Bitmap Heap Scan on testing4 (cost=11.55..32.92 rows=592 width=32)
361361

362362
~~START~~
363363
text
364-
Babelfish T-SQL Batch Parsing Time: 0.208 ms
364+
Babelfish T-SQL Batch Parsing Time: 0.142 ms
365365
~~END~~
366366

367367

@@ -381,7 +381,7 @@ Bitmap Heap Scan on testing4 (cost=4.18..11.30 rows=1 width=32)
381381

382382
~~START~~
383383
text
384-
Babelfish T-SQL Batch Parsing Time: 0.148 ms
384+
Babelfish T-SQL Batch Parsing Time: 0.137 ms
385385
~~END~~
386386

387387

@@ -399,7 +399,7 @@ Bitmap Heap Scan on testing4 (cost=4.18..11.30 rows=1 width=32)
399399

400400
~~START~~
401401
text
402-
Babelfish T-SQL Batch Parsing Time: 0.135 ms
402+
Babelfish T-SQL Batch Parsing Time: 0.133 ms
403403
~~END~~
404404

405405

@@ -531,7 +531,7 @@ Seq Scan on testing5 (cost=10000000000.00..10000000033.80 rows=1 width=32)
531531

532532
~~START~~
533533
text
534-
Babelfish T-SQL Batch Parsing Time: 0.124 ms
534+
Babelfish T-SQL Batch Parsing Time: 0.122 ms
535535
~~END~~
536536

537537
SET babelfish_showplan_all OFF;
@@ -572,7 +572,7 @@ Seq Scan on testing5 (cost=10000000000.00..10000000027.00 rows=7 width=32)
572572

573573
~~START~~
574574
text
575-
Babelfish T-SQL Batch Parsing Time: 0.120 ms
575+
Babelfish T-SQL Batch Parsing Time: 0.118 ms
576576
~~END~~
577577

578578
SET babelfish_showplan_all OFF;
@@ -596,7 +596,7 @@ Result (cost=0.00..0.00 rows=0 width=0)
596596

597597
~~START~~
598598
text
599-
Babelfish T-SQL Batch Parsing Time: 0.166 ms
599+
Babelfish T-SQL Batch Parsing Time: 0.118 ms
600600
~~END~~
601601

602602
SET babelfish_showplan_all OFF;
@@ -618,12 +618,12 @@ GO
618618
text
619619
Query Text: SELECT * FROM testing5 where c1 COLLATE French_CI_AS like 'jo%'
620620
Seq Scan on testing5 (cost=10000000000.00..10000000033.80 rows=1 width=32)
621-
Filter: (((c1)::text ~~* 'jo%'::text COLLATE french_cs_as) AND ((c1)::text >= 'jo'::text COLLATE french_ci_as) AND ((c1)::text < 'jo?'::text COLLATE french_ci_as))
621+
Filter: (((c1)::text ~~* 'jo%'::text COLLATE french_cs_as) AND (c1 >= 'jo'::"varchar" COLLATE french_ci_as) AND (c1 < 'jo?'::"varchar" COLLATE french_ci_as))
622622
~~END~~
623623

624624
~~START~~
625625
text
626-
Babelfish T-SQL Batch Parsing Time: 0.151 ms
626+
Babelfish T-SQL Batch Parsing Time: 0.135 ms
627627
~~END~~
628628

629629
SET babelfish_showplan_all OFF;
@@ -644,12 +644,12 @@ GO
644644
text
645645
Query Text: SELECT * FROM testing5 where c1 COLLATE Chinese_PRC_CI_AS like 'jo%'
646646
Seq Scan on testing5 (cost=10000000000.00..10000000033.80 rows=1 width=32)
647-
Filter: (((c1)::text ~~* 'jo%'::text COLLATE chinese_prc_cs_as) AND ((c1)::text >= 'jo'::text COLLATE chinese_prc_ci_as) AND ((c1)::text < 'jo?'::text COLLATE chinese_prc_ci_as))
647+
Filter: (((c1)::text ~~* 'jo%'::text COLLATE chinese_prc_cs_as) AND (c1 >= 'jo'::"varchar" COLLATE chinese_prc_ci_as) AND (c1 < 'jo?'::"varchar" COLLATE chinese_prc_ci_as))
648648
~~END~~
649649

650650
~~START~~
651651
text
652-
Babelfish T-SQL Batch Parsing Time: 0.138 ms
652+
Babelfish T-SQL Batch Parsing Time: 0.135 ms
653653
~~END~~
654654

655655
SET babelfish_showplan_all OFF;

0 commit comments

Comments
 (0)