-
Notifications
You must be signed in to change notification settings - Fork 112
fix(elasticsearch): escape Lucene special chars in values only, not field patterns (APIM-13030) #15831
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: 4.9.x
Are you sure you want to change the base?
fix(elasticsearch): escape Lucene special chars in values only, not field patterns (APIM-13030) #15831
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -96,7 +96,7 @@ public TabularResponse query(final QueryContext queryContext, final TabularQuery | |
| .size(MAX_RESULT_WINDOW) | ||
| .query(logQueryString) | ||
| .build(); | ||
| final String sQuery = this.createSafeElasticsearchJsonQuery(logQuery); | ||
| final String sQuery = this.createElasticsearchJsonQuery(logQuery); | ||
|
|
||
| Single<SearchResponse> result = this.client.search( | ||
| this.indexNameGenerator.getIndexName(queryContext.placeholder(), Type.LOG, from, to, clusters), | ||
|
|
@@ -127,7 +127,7 @@ public TabularResponse query(final QueryContext queryContext, final TabularQuery | |
| result = this.client.search( | ||
| this.indexNameGenerator.getIndexName(queryContext.placeholder(), Type.REQUEST, from, to, clusters), | ||
| !info.getVersion().canUseTypeRequests() ? null : Type.REQUEST.getType(), | ||
| this.createSafeElasticsearchJsonQuery(requestQueryBuilder.build()) | ||
| this.createElasticsearchJsonQuery(requestQueryBuilder.build()) | ||
| ); | ||
| } | ||
|
|
||
|
|
@@ -154,10 +154,11 @@ private String getQuery(final QueryFilter query, final boolean log) { | |
| }) | ||
| .map(filter -> { | ||
| final String filterKey = filter[0]; | ||
| final String escapedValue = escapeValueForLuceneJson(filter[1]); | ||
| if ("body".equals(filterKey)) { | ||
| return "\\\\*.body" + ":" + filter[1]; | ||
| return "\\\\*.body" + ":" + escapedValue; | ||
| } else { | ||
| return filterKey + ":" + filter[1]; | ||
| return filterKey + ":" + escapedValue; | ||
| } | ||
| }) | ||
| .collect(joining(filterSeparator)); | ||
|
|
@@ -252,9 +253,11 @@ private TabularResponse toTabularResponse(final SearchResponse response, final l | |
| } | ||
|
|
||
| /** | ||
| * Fix APIM-12955: Escapes Lucene special characters in the query string. | ||
| * Quadruple backslashes are required to survive both Java and JSON parsing levels | ||
| * so that Lucene finally receives the mandatory single backslash escape (e.g., \( ). | ||
| * Fix APIM-12955: Escapes Lucene special characters in query filter values. | ||
| * Only escapes the value portion of each field:value clause, preserving field | ||
| * patterns like {@code \\*.body} which use backslashes intentionally. | ||
| * Quadruple backslashes survive both Java and JSON parsing so Lucene receives | ||
| * the single backslash escape (e.g., \( ). | ||
| */ | ||
| private String createSafeElasticsearchJsonQuery(final TabularQuery query) { | ||
| String json = this.createElasticsearchJsonQuery(query); | ||
|
|
@@ -263,12 +266,43 @@ private String createSafeElasticsearchJsonQuery(final TabularQuery query) { | |
| String filter = query.query().filter(); | ||
|
|
||
| if (!filter.isEmpty()) { | ||
| String escaped = filter.replace("\\", "\\\\\\\\").replace("/", "\\\\/").replace("(", "\\\\(").replace(")", "\\\\)"); | ||
| String escaped = escapeFilterValues(filter); | ||
|
|
||
| // Targeted replacement within quotes to protect JSON structure | ||
| json = json.replace("\"" + filter + "\"", "\"" + escaped + "\""); | ||
| } | ||
| } | ||
| return json; | ||
| } | ||
|
|
||
| /** | ||
| * Escapes Lucene special characters only in the value portions of field:value | ||
| * clauses, preserving field names and patterns (e.g. \\*.body wildcard fields). | ||
| */ | ||
| private String escapeFilterValues(String filter) { | ||
| final String andSeparator = " AND "; | ||
| return stream(filter.split(andSeparator)).map(ElasticLogRepository::escapeClauseValues).collect(joining(andSeparator)); | ||
| } | ||
|
|
||
| private static String escapeClauseValues(String clause) { | ||
| // Handle OR-separated sub-clauses (e.g. "_id:x OR _id:y") | ||
| if (clause.contains(" OR ")) { | ||
| return stream(clause.split(" OR ")).map(ElasticLogRepository::escapeSingleClauseValue).collect(joining(" OR ")); | ||
| } | ||
| return escapeSingleClauseValue(clause); | ||
|
Comment on lines
+287
to
+292
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can add a test for this to verify field names survive because of explicit "OR" handling |
||
| } | ||
|
|
||
| private static String escapeSingleClauseValue(String clause) { | ||
| int colonIdx = clause.indexOf(':'); | ||
| if (colonIdx < 0) { | ||
| return escapeValueForLuceneJson(clause); | ||
| } | ||
| String field = clause.substring(0, colonIdx + 1); | ||
| String value = clause.substring(colonIdx + 1); | ||
| return field + escapeValueForLuceneJson(value); | ||
| } | ||
|
|
||
| private static String escapeValueForLuceneJson(String value) { | ||
| return value.replace("\\", "\\\\\\\\").replace("/", "\\\\/").replace("(", "\\\\(").replace(")", "\\\\)"); | ||
| } | ||
|
Comment on lines
+305
to
+307
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This method only escapes a small subset of Lucene's special characters ( |
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,26 +17,92 @@ | |
|
|
||
| import static org.assertj.core.api.Assertions.assertThat; | ||
|
|
||
| import java.lang.reflect.Method; | ||
| import org.apache.lucene.queryparser.classic.QueryParser; | ||
| import org.junit.jupiter.api.Test; | ||
|
|
||
| public class LuceneEscapeTest { | ||
|
|
||
| @Test | ||
| public void compareImplementationWithStandardParser() { | ||
| // Original APIM-12955 regression test | ||
| String input = "status:200 AND custom.userAgent:RMel/1.0.0 (Ubuntu)"; | ||
|
|
||
| String luceneStandard = QueryParser.escape(input); | ||
|
|
||
| String result = input.replace("\\", "\\\\\\\\").replace("/", "\\\\/").replace("(", "\\\\(").replace(")", "\\\\)"); | ||
| // escapeFilterValues splits field:value and only escapes values | ||
| String result = invokeEscapeFilterValues(input); | ||
|
|
||
| assertThat(result).contains("RMel\\\\/1.0.0"); | ||
| assertThat(result).contains("\\\\(Ubuntu\\\\)"); | ||
|
|
||
| // NO REGRESSION Test | ||
| // NO REGRESSION: field names preserved | ||
| assertThat(result).contains("status:200"); | ||
| assertThat(result).contains("custom.userAgent:"); | ||
|
|
||
| assertThat(luceneStandard).contains("status\\:200"); | ||
| } | ||
|
|
||
| @Test | ||
| public void escapeFilterValues_preservesFieldNames() { | ||
| String result = invokeEscapeFilterValues("status:200 AND api:some-uuid"); | ||
| assertThat(result).isEqualTo("status:200 AND api:some-uuid"); | ||
| } | ||
|
|
||
| @Test | ||
| public void escapeFilterValues_escapesSlashInValue() { | ||
| String result = invokeEscapeFilterValues("custom.userAgent:RMel/1.0.0"); | ||
| assertThat(result).isEqualTo("custom.userAgent:RMel\\\\/1.0.0"); | ||
| } | ||
|
|
||
| @Test | ||
| public void escapeFilterValues_escapesParensInValue() { | ||
| String result = invokeEscapeFilterValues("custom.userAgent:App (v2)"); | ||
| assertThat(result).isEqualTo("custom.userAgent:App \\\\(v2\\\\)"); | ||
| } | ||
|
|
||
| @Test | ||
| public void escapeValueForLuceneJson_noSpecialChars() { | ||
| String result = invokeEscapeValue("searchterm"); | ||
| assertThat(result).isEqualTo("searchterm"); | ||
| } | ||
|
|
||
| @Test | ||
| public void escapeValueForLuceneJson_escapesParens() { | ||
| String result = invokeEscapeValue("error(500)"); | ||
| assertThat(result).isEqualTo("error\\\\(500\\\\)"); | ||
| } | ||
|
|
||
| @Test | ||
| public void escapeValueForLuceneJson_escapesSlash() { | ||
| String result = invokeEscapeValue("path/to/resource"); | ||
| assertThat(result).isEqualTo("path\\\\/to\\\\/resource"); | ||
| } | ||
|
|
||
| @Test | ||
| public void escapeValueForLuceneJson_doesNotEscapeDollarOrAt() { | ||
| // $ and @ are not Lucene special chars | ||
| String result = invokeEscapeValue("$99 user@example.com"); | ||
| assertThat(result).isEqualTo("$99 user@example.com"); | ||
| } | ||
|
|
||
| private String invokeEscapeFilterValues(String filter) { | ||
| try { | ||
| Method method = ElasticLogRepository.class.getDeclaredMethod("escapeFilterValues", String.class); | ||
| method.setAccessible(true); | ||
| return (String) method.invoke(new ElasticLogRepository(), filter); | ||
| } catch (Exception e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
Comment on lines
+89
to
+97
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test helper invokes |
||
|
|
||
| private String invokeEscapeValue(String value) { | ||
| try { | ||
| Method method = ElasticLogRepository.class.getDeclaredMethod("escapeValueForLuceneJson", String.class); | ||
| method.setAccessible(true); | ||
| return (String) method.invoke(null, value); | ||
| } catch (Exception e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This block assumes
filteris a two-element array derived fromsplit(":")in the preceding stream operation. This is fragile and will break if a filter value contains a colon (e.g., a URL), causing parts of the value to be lost. A more robust approach is to useindexOf(':')to find the first colon and split the field from the value. A good example of this is in theescapeSingleClauseValuemethod added in this same PR, although it is currently part of an unused code path.