-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Move Text class to libs/xcontent #128780
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Move Text class to libs/xcontent #128780
Changes from all commits
f72a133
614ec17
dcbf2cb
f4f6da6
7bb2f17
0af1611
4a0ddcd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the "Elastic License | ||
| * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
| * Public License v 1"; you may not use this file except in compliance with, at | ||
| * your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
| * License v3.0 only", or the "Server Side Public License, v 1". | ||
| */ | ||
|
|
||
| package org.elasticsearch.xcontent; | ||
|
|
||
| import java.nio.ByteBuffer; | ||
|
|
||
| public interface XContentString { | ||
| record UTF8Bytes(byte[] bytes, int offset, int length) implements Comparable<UTF8Bytes> { | ||
| public UTF8Bytes(byte[] bytes) { | ||
| this(bytes, 0, bytes.length); | ||
| } | ||
|
|
||
| @Override | ||
| public int compareTo(UTF8Bytes o) { | ||
| if (this.bytes == o.bytes && this.offset == o.offset && this.length == o.length) { | ||
| return 0; | ||
| } | ||
|
|
||
| return ByteBuffer.wrap(bytes, offset, length).compareTo(ByteBuffer.wrap(o.bytes, o.offset, o.length)); | ||
| } | ||
|
|
||
| @Override | ||
| public boolean equals(Object o) { | ||
| if (this == o) { | ||
| return true; | ||
| } | ||
| if (o == null || getClass() != o.getClass()) { | ||
| return false; | ||
| } | ||
|
|
||
| return this.compareTo((UTF8Bytes) o) == 0; | ||
| } | ||
|
|
||
| @Override | ||
| public int hashCode() { | ||
| return ByteBuffer.wrap(bytes, offset, length).hashCode(); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Returns a {@link String} view of the data. | ||
| */ | ||
| String string(); | ||
|
|
||
| /** | ||
| * Returns an encoded {@link UTF8Bytes} view of the data. | ||
| */ | ||
| UTF8Bytes bytes(); | ||
|
|
||
| /** | ||
| * Returns the number of characters in the represented string. | ||
| */ | ||
| int stringLength(); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,190 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the "Elastic License | ||
| * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
| * Public License v 1"; you may not use this file except in compliance with, at | ||
| * your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
| * License v3.0 only", or the "Server Side Public License, v 1". | ||
| */ | ||
|
|
||
| package org.elasticsearch.xcontent; | ||
|
|
||
| import org.elasticsearch.test.ESTestCase; | ||
|
|
||
| import java.nio.charset.StandardCharsets; | ||
|
|
||
| public class TextTests extends ESTestCase { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🎉 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a pretty good test suite! Optional: Perhaps consider some tests that use randomization to do a sequence of operations (like This is probably not necessary, since you've already added regression tests for the sequences that bit us before. |
||
| public void testConvertToBytes() { | ||
| String value = randomUnicodeOfLength(randomInt(128)); | ||
| byte[] encodedArr = value.getBytes(StandardCharsets.UTF_8); | ||
| var encoded = new XContentString.UTF8Bytes(encodedArr); | ||
|
|
||
| var text = new Text(value); | ||
| assertTrue(text.hasString()); | ||
| assertFalse(text.hasBytes()); | ||
|
|
||
| assertEquals(value, text.string()); | ||
| assertEquals(encoded, text.bytes()); | ||
|
|
||
| assertTrue(text.hasString()); | ||
| assertTrue(text.hasBytes()); | ||
|
|
||
| // Ensure the conversion didn't mess up subsequent calls | ||
| assertEquals(value, text.string()); | ||
| assertEquals(encoded, text.bytes()); | ||
|
|
||
| assertSame(text.bytes(), text.bytes()); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tricky! |
||
| } | ||
|
|
||
| public void testConvertToString() { | ||
| String value = randomUnicodeOfLength(randomInt(128)); | ||
| byte[] encodedArr = value.getBytes(StandardCharsets.UTF_8); | ||
| var encoded = new XContentString.UTF8Bytes(encodedArr); | ||
|
|
||
| var text = new Text(encoded); | ||
| assertFalse(text.hasString()); | ||
| assertTrue(text.hasBytes()); | ||
|
|
||
| assertEquals(value, text.string()); | ||
| assertEquals(encoded, text.bytes()); | ||
|
|
||
| assertTrue(text.hasString()); | ||
| assertTrue(text.hasBytes()); | ||
|
|
||
| // Ensure the conversion didn't mess up subsequent calls | ||
| assertEquals(value, text.string()); | ||
| assertEquals(encoded, text.bytes()); | ||
|
|
||
| assertSame(encoded, text.bytes()); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably we could just change all prior There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I want to keep the |
||
| } | ||
|
|
||
| public void testStringLength() { | ||
| int stringLength = randomInt(128); | ||
| String value = randomUnicodeOfLength(stringLength); | ||
| byte[] encodedArr = value.getBytes(StandardCharsets.UTF_8); | ||
| var encoded = new XContentString.UTF8Bytes(encodedArr); | ||
|
|
||
| { | ||
| var text = new Text(value); | ||
| assertTrue(text.hasString()); | ||
| assertEquals(stringLength, text.stringLength()); | ||
| } | ||
|
|
||
| { | ||
| var text = new Text(encoded); | ||
| assertFalse(text.hasString()); | ||
| assertEquals(stringLength, text.stringLength()); | ||
| assertTrue(text.hasString()); | ||
| } | ||
|
|
||
| { | ||
| var text = new Text(encoded, stringLength); | ||
| assertFalse(text.hasString()); | ||
| assertEquals(stringLength, text.stringLength()); | ||
| assertFalse(text.hasString()); | ||
| } | ||
| } | ||
|
|
||
| public void testEquals() { | ||
| String value = randomUnicodeOfLength(randomInt(128)); | ||
| byte[] encodedArr = value.getBytes(StandardCharsets.UTF_8); | ||
| var encoded = new XContentString.UTF8Bytes(encodedArr); | ||
|
|
||
| { | ||
| var text1 = new Text(value); | ||
| var text2 = new Text(value); | ||
| assertTrue(text1.equals(text2)); | ||
| } | ||
|
|
||
| { | ||
| var text1 = new Text(value); | ||
| var text2 = new Text(encoded); | ||
| assertTrue(text1.equals(text2)); | ||
| } | ||
|
|
||
| { | ||
| var text1 = new Text(encoded); | ||
| var text2 = new Text(encoded); | ||
| assertTrue(text1.equals(text2)); | ||
| } | ||
| } | ||
|
|
||
| public void testCompareTo() { | ||
| String value1 = randomUnicodeOfLength(randomInt(128)); | ||
| byte[] encodedArr1 = value1.getBytes(StandardCharsets.UTF_8); | ||
| var encoded1 = new XContentString.UTF8Bytes(encodedArr1); | ||
|
|
||
| { | ||
| var text1 = new Text(value1); | ||
| var text2 = new Text(value1); | ||
| assertEquals(0, text1.compareTo(text2)); | ||
| } | ||
|
|
||
| { | ||
| var text1 = new Text(value1); | ||
| var text2 = new Text(encoded1); | ||
| assertEquals(0, text1.compareTo(text2)); | ||
| } | ||
|
|
||
| { | ||
| var text1 = new Text(encoded1); | ||
| var text2 = new Text(encoded1); | ||
| assertEquals(0, text1.compareTo(text2)); | ||
| } | ||
|
|
||
| String value2 = randomUnicodeOfLength(randomInt(128)); | ||
| byte[] encodedArr2 = value2.getBytes(StandardCharsets.UTF_8); | ||
| var encoded2 = new XContentString.UTF8Bytes(encodedArr2); | ||
|
|
||
| int compSign = (int) Math.signum(encoded1.compareTo(encoded2)); | ||
|
|
||
| { | ||
| var text1 = new Text(value1); | ||
| var text2 = new Text(value2); | ||
| assertEquals(compSign, (int) Math.signum(text1.compareTo(text2))); | ||
| } | ||
|
|
||
| { | ||
| var text1 = new Text(value1); | ||
| var text2 = new Text(encoded2); | ||
| assertEquals(compSign, (int) Math.signum(text1.compareTo(text2))); | ||
| } | ||
|
|
||
| { | ||
| var text1 = new Text(encoded1); | ||
| var text2 = new Text(value2); | ||
| assertEquals(compSign, (int) Math.signum(text1.compareTo(text2))); | ||
| } | ||
|
|
||
| { | ||
| var text1 = new Text(encoded1); | ||
| var text2 = new Text(encoded2); | ||
| assertEquals(compSign, (int) Math.signum(text1.compareTo(text2))); | ||
| } | ||
| } | ||
|
|
||
| public void testRandomized() { | ||
| int stringLength = randomInt(128); | ||
| String value = randomUnicodeOfLength(stringLength); | ||
| byte[] encodedArr = value.getBytes(StandardCharsets.UTF_8); | ||
| var encoded = new XContentString.UTF8Bytes(encodedArr); | ||
|
|
||
| Text text = switch (randomInt(2)) { | ||
| case 0 -> new Text(value); | ||
| case 1 -> new Text(encoded); | ||
| default -> new Text(encoded, stringLength); | ||
| }; | ||
|
|
||
| for (int i = 0; i < 20; i++) { | ||
| switch (randomInt(5)) { | ||
| case 0 -> assertEquals(encoded, text.bytes()); | ||
| case 1 -> assertSame(text.bytes(), text.bytes()); | ||
| case 2 -> assertEquals(value, text.string()); | ||
| case 3 -> assertEquals(value, text.toString()); | ||
| case 4 -> assertEquals(stringLength, text.stringLength()); | ||
| case 5 -> assertEquals(new Text(value), text); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Random thought: Is there any value in peeling off a fast-path case for when all the fields are identical by
==? Not sure whether this ever actually happens, but it would avoid two object allocations.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Make sense to me, especially since
equal()delegates tocompareTo(), so this could happen fairly frequently.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, essentially you're making sure you're no slower than the built-in
Record.equals()for the case that it returns true. Could be a little slower for thefalsecase.