Skip to content

Commit 9062154

Browse files
authored
ESQL: Fix REVERSE with backspace character (#115245)
* ESQL: Fix `REVERSE` with backspace character If the text contains a backspace character aka `0x28` aka ctrl-H then we should use the slow reverse path. This is going to be quite rare but our test data is sure good at making rare, fun stuff. Closes #115228 Closes #115227 Closes #114372
1 parent eae3a42 commit 9062154

File tree

3 files changed

+14
-13
lines changed

3 files changed

+14
-13
lines changed

docs/changelog/115245.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
pr: 115245
2+
summary: "ESQL: Fix `REVERSE` with backspace character"
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 114372
7+
- 115227
8+
- 115228

muted-tests.yml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -346,12 +346,6 @@ tests:
346346
issue: https://github.com/elastic/elasticsearch/issues/115129
347347
- class: org.elasticsearch.xpack.esql.ccq.MultiClusterSpecIT
348348
issue: https://github.com/elastic/elasticsearch/issues/115135
349-
- class: org.elasticsearch.xpack.esql.expression.function.scalar.string.ReverseTests
350-
method: testEvaluateInManyThreads {TestCase=<long unicode TEXT>}
351-
issue: https://github.com/elastic/elasticsearch/issues/115227
352-
- class: org.elasticsearch.xpack.esql.expression.function.scalar.string.ReverseTests
353-
method: testEvaluateInManyThreads {TestCase=<long unicode KEYWORD>}
354-
issue: https://github.com/elastic/elasticsearch/issues/115228
355349
- class: org.elasticsearch.xpack.test.rest.XPackRestIT
356350
method: test {p0=esql/60_usage/Basic ESQL usage output (telemetry)}
357351
issue: https://github.com/elastic/elasticsearch/issues/115231

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Reverse.java

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import org.apache.lucene.util.BytesRef;
1111
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1212
import org.elasticsearch.common.io.stream.StreamInput;
13-
import org.elasticsearch.common.lucene.BytesRefs;
1413
import org.elasticsearch.compute.ann.Evaluator;
1514
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
1615
import org.elasticsearch.xpack.esql.core.expression.Expression;
@@ -79,8 +78,6 @@ protected TypeResolution resolveType() {
7978

8079
/**
8180
* Reverses a unicode string, keeping grapheme clusters together
82-
* @param str
83-
* @return
8481
*/
8582
public static String reverseStringWithUnicodeCharacters(String str) {
8683
BreakIterator boundary = BreakIterator.getCharacterInstance(Locale.ROOT);
@@ -100,10 +97,12 @@ public static String reverseStringWithUnicodeCharacters(String str) {
10097
return reversed.toString();
10198
}
10299

103-
private static boolean isOneByteUTF8(BytesRef ref) {
100+
private static boolean reverseBytesIsReverseUnicode(BytesRef ref) {
104101
int end = ref.offset + ref.length;
105102
for (int i = ref.offset; i < end; i++) {
106-
if (ref.bytes[i] < 0) {
103+
if (ref.bytes[i] < 0 // Anything encoded in multibyte utf-8
104+
|| ref.bytes[i] == 0x28 // Backspace
105+
) {
107106
return false;
108107
}
109108
}
@@ -112,13 +111,13 @@ private static boolean isOneByteUTF8(BytesRef ref) {
112111

113112
@Evaluator
114113
static BytesRef process(BytesRef val) {
115-
if (isOneByteUTF8(val)) {
114+
if (reverseBytesIsReverseUnicode(val)) {
116115
// this is the fast path. we know we can just reverse the bytes.
117116
BytesRef reversed = BytesRef.deepCopyOf(val);
118117
reverseArray(reversed.bytes, reversed.offset, reversed.length);
119118
return reversed;
120119
}
121-
return BytesRefs.toBytesRef(reverseStringWithUnicodeCharacters(val.utf8ToString()));
120+
return new BytesRef(reverseStringWithUnicodeCharacters(val.utf8ToString()));
122121
}
123122

124123
@Override

0 commit comments

Comments
 (0)