Skip to content

Commit bcb195e

Browse files
committed
Use unsigned comparison in UTF8Bytes#compareTo
1 parent d75f180 commit bcb195e

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentString.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
package org.elasticsearch.xcontent;
1111

1212
import java.nio.ByteBuffer;
13+
import java.util.Arrays;
1314

1415
public interface XContentString {
1516
record UTF8Bytes(byte[] bytes, int offset, int length) implements Comparable<UTF8Bytes> {
@@ -23,7 +24,7 @@ public int compareTo(UTF8Bytes o) {
2324
return 0;
2425
}
2526

26-
return ByteBuffer.wrap(bytes, offset, length).compareTo(ByteBuffer.wrap(o.bytes, o.offset, o.length));
27+
return Arrays.compareUnsigned(bytes, offset, offset + length, o.bytes, o.offset, o.offset + o.length);
2728
}
2829

2930
@Override

libs/x-content/src/test/java/org/elasticsearch/xcontent/TextTests.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.elasticsearch.test.ESTestCase;
1313

1414
import java.nio.charset.StandardCharsets;
15+
import java.util.Arrays;
1516

1617
public class TextTests extends ESTestCase {
1718
public void testConvertToBytes() {
@@ -136,7 +137,8 @@ public void testCompareTo() {
136137
byte[] encodedArr2 = value2.getBytes(StandardCharsets.UTF_8);
137138
var encoded2 = new XContentString.UTF8Bytes(encodedArr2);
138139

139-
int compSign = (int) Math.signum(encoded1.compareTo(encoded2));
140+
// String.compareTo() wasn't handling surrogate pairs very well, so here we compare the full 32-bit codepoints
141+
int compSign = (int) Math.signum(Arrays.compare(value1.codePoints().toArray(), value2.codePoints().toArray()));
140142

141143
{
142144
var text1 = new Text(value1);

0 commit comments

Comments
 (0)