Skip to content

Commit cabe15d

Browse files
authored
ESQL: Fix REVERSE with backspace character (#115245) (#115263)
* ESQL: Fix `REVERSE` with backspace character If the text contains a backspace character aka `0x28` aka ctrl-H then we should use the slow reverse path. This is going to be quite rare but our test data is sure good at making rare, fun stuff. Closes #115228 Closes #115227 Closes #114372
1 parent a7c7004 commit cabe15d

File tree

2 files changed

+14
-7
lines changed
  • docs/changelog
  • x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string

2 files changed

+14
-7
lines changed

docs/changelog/115245.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
pr: 115245
2+
summary: "ESQL: Fix `REVERSE` with backspace character"
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 114372
7+
- 115227
8+
- 115228

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Reverse.java

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import org.apache.lucene.util.BytesRef;
1111
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1212
import org.elasticsearch.common.io.stream.StreamInput;
13-
import org.elasticsearch.common.lucene.BytesRefs;
1413
import org.elasticsearch.compute.ann.Evaluator;
1514
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
1615
import org.elasticsearch.xpack.esql.core.expression.Expression;
@@ -83,8 +82,6 @@ protected TypeResolution resolveType() {
8382

8483
/**
8584
* Reverses a unicode string, keeping grapheme clusters together
86-
* @param str
87-
* @return
8885
*/
8986
public static String reverseStringWithUnicodeCharacters(String str) {
9087
BreakIterator boundary = BreakIterator.getCharacterInstance(Locale.ROOT);
@@ -104,10 +101,12 @@ public static String reverseStringWithUnicodeCharacters(String str) {
104101
return reversed.toString();
105102
}
106103

107-
private static boolean isOneByteUTF8(BytesRef ref) {
104+
private static boolean reverseBytesIsReverseUnicode(BytesRef ref) {
108105
int end = ref.offset + ref.length;
109106
for (int i = ref.offset; i < end; i++) {
110-
if (ref.bytes[i] < 0) {
107+
if (ref.bytes[i] < 0 // Anything encoded in multibyte utf-8
108+
|| ref.bytes[i] == 0x28 // Backspace
109+
) {
111110
return false;
112111
}
113112
}
@@ -116,13 +115,13 @@ private static boolean isOneByteUTF8(BytesRef ref) {
116115

117116
@Evaluator
118117
static BytesRef process(BytesRef val) {
119-
if (isOneByteUTF8(val)) {
118+
if (reverseBytesIsReverseUnicode(val)) {
120119
// this is the fast path. we know we can just reverse the bytes.
121120
BytesRef reversed = BytesRef.deepCopyOf(val);
122121
reverseArray(reversed.bytes, reversed.offset, reversed.length);
123122
return reversed;
124123
}
125-
return BytesRefs.toBytesRef(reverseStringWithUnicodeCharacters(val.utf8ToString()));
124+
return new BytesRef(reverseStringWithUnicodeCharacters(val.utf8ToString()));
126125
}
127126

128127
@Override

0 commit comments

Comments
 (0)