-
Notifications
You must be signed in to change notification settings - Fork 133
Fix to enable index scan with LIKE operator for non-const nodes #4589
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: BABEL_6_X_DEV
Are you sure you want to change the base?
Changes from 18 commits
088e87f
21d6b84
868da18
f8cdd51
86997fe
3357524
ffdb176
ac796d5
75140af
cd87af9
69b694c
71e35f2
7917299
7f34556
926c335
2b17d45
2f55c79
fdf6fca
afeb5b3
9822783
5197005
293001e
aec3662
710ceec
4a8fd1d
c6d91e0
2021734
ba425e9
fea7f92
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,12 +12,15 @@ | |
| #include "catalog/pg_collation.h" | ||
| #include "catalog/namespace.h" | ||
| #include "tsearch/ts_locale.h" | ||
| #include "optimizer/clauses.h" | ||
| #include "optimizer/optimizer.h" | ||
| #include "parser/parser.h" | ||
| #include "parser/parse_coerce.h" | ||
| #include "parser/parse_type.h" | ||
| #include "parser/parse_oper.h" | ||
| #include "nodes/makefuncs.h" | ||
| #include "nodes/nodes.h" | ||
| #include "rewrite/rewriteManip.h" | ||
| #ifdef USE_ICU | ||
| #include <unicode/utrans.h> | ||
| #include "utils/removeaccent.map" | ||
|
|
@@ -327,8 +330,9 @@ create_collate_expr(Node *arg, Oid collid) | |
| * function to remove the accents and optimize. | ||
| * If the node is OpExpr and the collation is cs_as, then simply use optimization: | ||
| * | ||
| * Case 1: if the pattern is a constant stirng | ||
| * col LIKE PATTERN -> col = PATTERN | ||
| * Case 1: if the pattern is a constant string | ||
| * col LIKE PATTERN -> col = PATTERN AND col LIKE PATTERN | ||
| * col NOT LIKE PATTERN -> col <> PATTERN OR col NOT LIKE PATTERN | ||
| * Case 2: if the pattern have a constant prefix | ||
| * col LIKE PATTERN -> | ||
| * col LIKE PATTERN BETWEEN prefix AND prefix||E'\uFFFF' | ||
|
|
@@ -351,6 +355,7 @@ optimise_likenode(Node *node, OpExpr *op, like_ilike_info_t like_entry, coll_inf | |
| Pattern_Prefix_Status pstatus; | ||
| int collidx_of_cs_as; | ||
| CollateExpr *prefix_collate; | ||
| Node *check_node; | ||
|
||
|
|
||
| tsql_get_database_or_server_collation_oid_internal(true); | ||
|
|
||
|
|
@@ -386,9 +391,8 @@ optimise_likenode(Node *node, OpExpr *op, like_ilike_info_t like_entry, coll_inf | |
| op->opno = like_entry.ilike_oid; | ||
| op->opfuncid = like_entry.ilike_opfuncid; | ||
| } | ||
|
|
||
| op->inputcollid = tsql_get_oid_from_collidx(collidx_of_cs_as); | ||
|
|
||
| op->inputcollid = tsql_get_oid_from_collidx(collidx_of_cs_as); | ||
|
|
||
| /* Remove CollateExpr as the op->inputcollid has already been set */ | ||
| if (IsA(rightop, CollateExpr)) | ||
|
|
@@ -401,7 +405,44 @@ optimise_likenode(Node *node, OpExpr *op, like_ilike_info_t like_entry, coll_inf | |
| linitial(op->args) = leftop = (Node*)((CollateExpr*) leftop)->arg; | ||
| } | ||
|
|
||
| /* | ||
| /* | ||
| * Try to simplify rightop to a Const for prefix extraction. | ||
| * eval_const_expressions folds immutable subexpressions, evaluate_expr | ||
| * handles stable functions as a fallback. | ||
| * | ||
| * Peek through RelabelType wrappers to check for like_escape (ESCAPE | ||
| * clause) and remove_accents_internal (AI mode). Evaluating these at | ||
| * plan time loses escape and bracket pattern semantics. | ||
| * For AI mode, the RelabelType unwrap below handles simple patterns. | ||
| */ | ||
|
|
||
| check_node = rightop; | ||
| while (IsA(check_node, RelabelType)) | ||
|
||
| check_node = (Node *) ((RelabelType *) check_node)->arg; | ||
|
|
||
| if (!(IsA(check_node, FuncExpr) && | ||
| (strcmp(get_func_name(((FuncExpr *) check_node)->funcid), | ||
| "like_escape") == 0 || | ||
| strcmp(get_func_name(((FuncExpr *) check_node)->funcid), | ||
| "remove_accents_internal") == 0))) | ||
| { | ||
| rightop = eval_const_expressions(NULL, rightop); | ||
|
|
||
| if (!IsA(rightop, Const) && !IsA(rightop, Param) && | ||
|
||
| !checkExprHasSubLink(rightop) && | ||
| !contain_var_clause(rightop) && | ||
| !contain_volatile_functions(rightop) && | ||
| bms_is_empty(pull_paramids((Expr *) rightop))) | ||
| { | ||
| rightop = (Node *) evaluate_expr((Expr *) rightop, | ||
| exprType(rightop), | ||
| exprTypmod(rightop), | ||
| exprCollation(rightop)); | ||
| } | ||
| lsecond(op->args) = rightop; | ||
| } | ||
|
|
||
| /* | ||
| * This is needed to process CI_AI for Const nodes | ||
| * Because after we call coerce_to_target_type for type conversion in transform_likenode_for_AI, | ||
| * we obtain a Relabel node which won't help us to perform optimization | ||
|
|
@@ -466,9 +507,12 @@ optimise_likenode(Node *node, OpExpr *op, like_ilike_info_t like_entry, coll_inf | |
| { | ||
| RelabelType *relabel = (RelabelType *) leftop; | ||
| leftop = copyObject((Node*) relabel->arg); | ||
| prefix->consttype = rtypeId = ltypeId = exprType(leftop); | ||
| ltypeId = exprType(leftop); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the necessity to make this change? Is there any test case which fails?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. Without this, BABEL_SCHEMATA-vu-verify crashes: rtypeId is set once at the top from original rightop. After evaluate_expr changes rightop to a Const, rtypeId becomes stale. Previously this code was never reached because rightop was never a Const for such queries. Now it is, and types mismatch. |
||
| } | ||
|
|
||
| /* Reconcile types — LIKE coerces to TEXT but we need original column type */ | ||
| prefix->consttype = rtypeId = ltypeId; | ||
|
|
||
| /* | ||
| * We need to do this because the dump considers rightop as Const with COLLATE being added | ||
| * whereas during restore, that is considered as CollateExpr while building new expression tree | ||
|
|
@@ -506,7 +550,7 @@ optimise_likenode(Node *node, OpExpr *op, like_ilike_info_t like_entry, coll_inf | |
| InvalidOid, | ||
| coll_info_of_inputcollid.oid, | ||
| oprfuncid(optup))); | ||
| ret = make_and_qual(ret, node); | ||
| ret = like_entry.is_not_match ? make_or_qual(ret, node) : make_and_qual(ret, node); | ||
| ReleaseSysCache(optup); | ||
| } | ||
| else | ||
|
|
@@ -1053,6 +1097,43 @@ transform_likenode(Node *node, bool is_constraint) | |
|
|
||
| get_remove_accents_internal_oid(); | ||
|
|
||
| /* | ||
| * View definitions (after dump/restore) lose original column collation | ||
| * due to ::text casts, causing wrong LIKE/ILIKE operator selection. | ||
| * Unwrap coercions on leftop to find the original Var collation. | ||
| * Skip if an explicit COLLATE clause is present. | ||
| */ | ||
|
|
||
| if (OidIsValid(like_entry.like_oid) && list_length(op->args) >= 2) | ||
| { | ||
| Node *left_arg = linitial(op->args); | ||
|
||
| Node *right_arg = lsecond(op->args); | ||
| if (!IsA(left_arg, CollateExpr) && !IsA(right_arg, CollateExpr) && !contain_var_clause(right_arg)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please explain this if condition
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These conditions check if it's safe to consider the column's collation for optimization:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems like we are putting some hack in our code, maybe in some cases that is unavoidable but in most of the conditions I see that if not done then this crashes. I am not comfortable with such explaination. Is there anyway to have a generic solution?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, agreed. I've implemented a better approach now which removes the need for all the manual guards that solely arose from use of evaluate_expr. Replaced it with PG's built-in eval_const_expressions + estimate_expression_value - these handle safety checks internally. Any unhandled expression simply skips optimization with correct results. The conditions in this block are unrelated to index optimization. They are for collation recovery needed because views lose column collation during storage due to ::text casts. They ensure we only recover when collation wasn't explicitly set and don't unnecessarily override collation. I think the test coverage for views with LIKE pattern was not much earlier and hence we couldn't catch this issue. |
||
| { | ||
| Node *unwrapped = left_arg; | ||
| while (IsA(unwrapped, RelabelType)) | ||
| unwrapped = (Node *) ((RelabelType *) unwrapped)->arg; | ||
|
|
||
| if (IsA(unwrapped, Var)) | ||
| { | ||
| Oid var_collation = ((Var *) unwrapped)->varcollid; | ||
|
|
||
| if (OidIsValid(var_collation) && | ||
| var_collation != op->inputcollid) | ||
| { | ||
| coll_info_t var_coll_info = | ||
| tsql_lookup_collation_table_internal(var_collation); | ||
|
|
||
| if (OidIsValid(var_coll_info.oid)) | ||
| { | ||
| op->inputcollid = var_collation; | ||
| coll_info_of_inputcollid = var_coll_info; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /* | ||
| * We do not allow CREATE TABLE statements with CHECK constraint where | ||
| * the constraint has an ILIKE operator and the collation is ci_as. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With this OR clause, planner may loose an ability to choose index scan. Hence in most of the cases where we have correctness issue will be fixed with this but perf might get degraded. But I think tht should be okay