Skip to content

Commit 169c6f0

Browse files
committed
update
1 parent eac615d commit 169c6f0

File tree

7 files changed

+916
-18
lines changed

7 files changed

+916
-18
lines changed

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java

Lines changed: 100 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -233,14 +233,19 @@ private static String expandSimplifiedDsl(String dsl, String defaultField, Strin
233233
}
234234

235235
/**
236-
* Check if DSL contains field references (has colon not in quoted strings)
236+
* Check if DSL contains field references (has colon not in quoted strings or escaped)
237237
*/
238238
private static boolean containsFieldReference(String dsl) {
239239
boolean inQuotes = false;
240240
boolean inRegex = false;
241241
for (int i = 0; i < dsl.length(); i++) {
242242
char c = dsl.charAt(i);
243-
if (c == '"' && (i == 0 || dsl.charAt(i - 1) != '\\')) {
243+
// Handle escape sequences - skip the escaped character
244+
if (c == '\\' && i + 1 < dsl.length()) {
245+
i++; // Skip next character (it's escaped)
246+
continue;
247+
}
248+
if (c == '"') {
244249
inQuotes = !inQuotes;
245250
} else if (c == '/' && !inQuotes) {
246251
inRegex = !inRegex;
@@ -276,21 +281,30 @@ private static boolean containsExplicitOperators(String dsl) {
276281
/**
277282
* Add field prefix to expressions with explicit operators
278283
* Example: "foo AND bar" → "field:foo AND field:bar"
284+
* Handles escape sequences properly (e.g., "First\ Value" stays as single term)
279285
*/
280286
private static String addFieldPrefixToOperatorExpression(String dsl, String defaultField) {
281287
StringBuilder result = new StringBuilder();
282288
StringBuilder currentTerm = new StringBuilder();
283289
int i = 0;
284290

285291
while (i < dsl.length()) {
286-
// Skip whitespace
292+
// Skip whitespace (but not escaped whitespace)
287293
while (i < dsl.length() && Character.isWhitespace(dsl.charAt(i))) {
288294
i++;
289295
}
290296
if (i >= dsl.length()) {
291297
break;
292298
}
293299

300+
// Handle escape sequences - include both backslash and next char
301+
if (dsl.charAt(i) == '\\' && i + 1 < dsl.length()) {
302+
currentTerm.append(dsl.charAt(i));
303+
currentTerm.append(dsl.charAt(i + 1));
304+
i += 2;
305+
continue;
306+
}
307+
294308
// Try to match operators
295309
String remaining = dsl.substring(i);
296310
String upperRemaining = remaining.toUpperCase();
@@ -362,7 +376,7 @@ private static String addFieldPrefixToOperatorExpression(String dsl, String defa
362376
}
363377

364378
/**
365-
* Tokenize DSL into terms (split by whitespace, respecting quotes and functions)
379+
* Tokenize DSL into terms (split by whitespace, respecting quotes, escapes, and functions)
366380
*/
367381
private static List<String> tokenizeDsl(String dsl) {
368382
List<String> terms = new ArrayList<>();
@@ -387,8 +401,13 @@ private static List<String> tokenizeDsl(String dsl) {
387401
inParens = false;
388402
}
389403
currentTerm.append(c);
404+
} else if (c == '\\' && i + 1 < dsl.length()) {
405+
// Escape sequence - include both backslash and next char in term
406+
currentTerm.append(c);
407+
currentTerm.append(dsl.charAt(i + 1));
408+
i++; // Skip next character
390409
} else if (Character.isWhitespace(c) && !inQuotes && !inParens) {
391-
// End of term
410+
// End of term (only if not escaped - handled above)
392411
if (currentTerm.length() > 0) {
393412
terms.add(currentTerm.toString());
394413
currentTerm = new StringBuilder();
@@ -408,6 +427,7 @@ private static List<String> tokenizeDsl(String dsl) {
408427

409428
/**
410429
* Check if a term contains wildcard characters (* or ?)
430+
* Escaped wildcards (\* or \?) are not counted.
411431
*/
412432
private static boolean containsWildcard(String term) {
413433
// Ignore wildcards in quoted strings or regex
@@ -417,7 +437,19 @@ private static boolean containsWildcard(String term) {
417437
if (term.startsWith("/") && term.endsWith("/")) {
418438
return false;
419439
}
420-
return term.contains("*") || term.contains("?");
440+
// Check for unescaped wildcards
441+
for (int i = 0; i < term.length(); i++) {
442+
char c = term.charAt(i);
443+
if (c == '\\' && i + 1 < term.length()) {
444+
// Skip escaped character
445+
i++;
446+
continue;
447+
}
448+
if (c == '*' || c == '?') {
449+
return true;
450+
}
451+
}
452+
return false;
421453
}
422454

423455
/**
@@ -640,15 +672,15 @@ public QsNode visitSearchValue(SearchParser.SearchValueContext ctx) {
640672
}
641673

642674
private QsNode createTermNode(String fieldName, String value) {
643-
return new QsNode(QsClauseType.TERM, fieldName, value);
675+
return new QsNode(QsClauseType.TERM, fieldName, unescapeTermValue(value));
644676
}
645677

646678
private QsNode createPrefixNode(String fieldName, String value) {
647-
return new QsNode(QsClauseType.PREFIX, fieldName, value);
679+
return new QsNode(QsClauseType.PREFIX, fieldName, unescapeTermValue(value));
648680
}
649681

650682
private QsNode createWildcardNode(String fieldName, String value) {
651-
return new QsNode(QsClauseType.WILDCARD, fieldName, value);
683+
return new QsNode(QsClauseType.WILDCARD, fieldName, unescapeTermValue(value));
652684
}
653685

654686
private QsNode createRegexpNode(String fieldName, String regexpText) {
@@ -1096,11 +1128,15 @@ private QsNode processLuceneBooleanChain(SearchParser.OrClauseContext ctx) {
10961128
}
10971129

10981130
if (terms.size() == 1) {
1099-
// Single term - set occur if negated, then return without OCCUR_BOOLEAN wrapper
11001131
TermWithOccur singleTerm = terms.get(0);
11011132
if (singleTerm.isNegated) {
1133+
// Single negated term - must wrap in OCCUR_BOOLEAN for BE to handle MUST_NOT
11021134
singleTerm.node.occur = QsOccur.MUST_NOT;
1135+
List<QsNode> children = new ArrayList<>();
1136+
children.add(singleTerm.node);
1137+
return new QsNode(QsClauseType.OCCUR_BOOLEAN, children, 0);
11031138
}
1139+
// Single non-negated term - return directly without wrapper
11041140
return singleTerm.node;
11051141
}
11061142

@@ -1132,7 +1168,15 @@ private QsNode processLuceneBooleanChain(SearchParser.OrClauseContext ctx) {
11321168
}
11331169

11341170
if (terms.size() == 1) {
1135-
return terms.get(0).node;
1171+
TermWithOccur remainingTerm = terms.get(0);
1172+
if (remainingTerm.occur == QsOccur.MUST_NOT) {
1173+
// Single MUST_NOT term - must wrap in OCCUR_BOOLEAN for BE to handle
1174+
remainingTerm.node.occur = QsOccur.MUST_NOT;
1175+
List<QsNode> children = new ArrayList<>();
1176+
children.add(remainingTerm.node);
1177+
return new QsNode(QsClauseType.OCCUR_BOOLEAN, children, 0);
1178+
}
1179+
return remainingTerm.node;
11361180
}
11371181

11381182
// Build OCCUR_BOOLEAN node
@@ -1329,13 +1373,13 @@ public QsNode visitSearchValue(SearchParser.SearchValueContext ctx) {
13291373
String fieldName = currentFieldName != null ? currentFieldName : "_all";
13301374

13311375
if (ctx.TERM() != null) {
1332-
return new QsNode(QsClauseType.TERM, fieldName, ctx.TERM().getText());
1376+
return new QsNode(QsClauseType.TERM, fieldName, unescapeTermValue(ctx.TERM().getText()));
13331377
}
13341378
if (ctx.PREFIX() != null) {
1335-
return new QsNode(QsClauseType.PREFIX, fieldName, ctx.PREFIX().getText());
1379+
return new QsNode(QsClauseType.PREFIX, fieldName, unescapeTermValue(ctx.PREFIX().getText()));
13361380
}
13371381
if (ctx.WILDCARD() != null) {
1338-
return new QsNode(QsClauseType.WILDCARD, fieldName, ctx.WILDCARD().getText());
1382+
return new QsNode(QsClauseType.WILDCARD, fieldName, unescapeTermValue(ctx.WILDCARD().getText()));
13391383
}
13401384
if (ctx.REGEXP() != null) {
13411385
String regexp = ctx.REGEXP().getText();
@@ -1388,7 +1432,7 @@ public QsNode visitSearchValue(SearchParser.SearchValueContext ctx) {
13881432
return new QsNode(QsClauseType.EXACT, fieldName, innerContent);
13891433
}
13901434

1391-
return new QsNode(QsClauseType.TERM, fieldName, ctx.getText());
1435+
return new QsNode(QsClauseType.TERM, fieldName, unescapeTermValue(ctx.getText()));
13921436
}
13931437

13941438
private String extractParenthesesContent(String text) {
@@ -1428,4 +1472,45 @@ private static class TermWithOccur {
14281472
this.occur = occur;
14291473
}
14301474
}
1475+
1476+
/**
1477+
* Process escape sequences in a term value.
1478+
* Converts escape sequences to their literal characters:
1479+
* - \ (backslash space) -> space
1480+
* - \( -> (
1481+
* - \) -> )
1482+
* - \: -> :
1483+
* - \\ -> \
1484+
* - \* -> *
1485+
* - \? -> ?
1486+
* - etc.
1487+
*
1488+
* @param value The raw term value with escape sequences
1489+
* @return The unescaped value
1490+
*/
1491+
private static String unescapeTermValue(String value) {
1492+
if (value == null || value.isEmpty()) {
1493+
return value;
1494+
}
1495+
1496+
// Quick check: if no backslash, return as-is
1497+
if (value.indexOf('\\') < 0) {
1498+
return value;
1499+
}
1500+
1501+
StringBuilder result = new StringBuilder(value.length());
1502+
int i = 0;
1503+
while (i < value.length()) {
1504+
char c = value.charAt(i);
1505+
if (c == '\\' && i + 1 < value.length()) {
1506+
// Escape sequence - take the next character literally
1507+
result.append(value.charAt(i + 1));
1508+
i += 2;
1509+
} else {
1510+
result.append(c);
1511+
i++;
1512+
}
1513+
}
1514+
return result.toString();
1515+
}
14311516
}

0 commit comments

Comments
 (0)