Skip to content

Commit adfd67e

Browse files
committed
more
1 parent 1f303cf commit adfd67e

File tree

3 files changed

+74
-71
lines changed

3 files changed

+74
-71
lines changed

server/src/main/java/org/elasticsearch/ingest/ESONIndexed.java

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -779,30 +779,35 @@ private static ESONSource.Value mutationToValue(int newOffset, BytesStreamOutput
779779
}
780780
case BigInteger bigInteger -> {
781781
byte[] numberBytes = bigInteger.toString().getBytes(StandardCharsets.UTF_8);
782+
newValuesOut.writeVInt(numberBytes.length);
782783
newValuesOut.write(numberBytes);
783-
yield new ESONSource.VariableValue(position, numberBytes.length, ESONEntry.BIG_INTEGER);
784+
yield new ESONSource.VariableValue(position, ESONEntry.BIG_INTEGER);
784785
}
785786
case BigDecimal bigDecimal -> {
786787
byte[] numberBytes = bigDecimal.toString().getBytes(StandardCharsets.UTF_8);
788+
newValuesOut.writeVInt(numberBytes.length);
787789
newValuesOut.write(numberBytes);
788-
yield new ESONSource.VariableValue(position, numberBytes.length, ESONEntry.BIG_DECIMAL);
790+
yield new ESONSource.VariableValue(position, ESONEntry.BIG_DECIMAL);
789791
}
790792
default -> {
791-
byte[] numberBytes = num.toString().getBytes(StandardCharsets.UTF_8);
792-
newValuesOut.write(numberBytes);
793-
yield new ESONSource.VariableValue(position, numberBytes.length, ESONEntry.STRING);
793+
byte[] utf8Bytes = num.toString().getBytes(StandardCharsets.UTF_8);
794+
newValuesOut.writeVInt(utf8Bytes.length);
795+
newValuesOut.write(utf8Bytes);
796+
yield new ESONSource.VariableValue(position, ESONEntry.STRING);
794797
}
795798
};
796799
} else if (obj instanceof Boolean bool) {
797800
value = bool ? ESONSource.ConstantValue.TRUE : ESONSource.ConstantValue.FALSE;
798801
} else if (obj instanceof byte[] bytes) {
802+
newValuesOut.writeVInt(bytes.length);
799803
newValuesOut.writeBytes(bytes);
800-
value = new ESONSource.VariableValue(position, bytes.length, ESONEntry.BINARY);
804+
value = new ESONSource.VariableValue(position, ESONEntry.BINARY);
801805
} else {
802806
String str = obj.toString();
803807
byte[] bytes = str.getBytes(StandardCharsets.UTF_8);
808+
newValuesOut.writeVInt(bytes.length);
804809
newValuesOut.writeBytes(bytes);
805-
value = new ESONSource.VariableValue(position, bytes.length, ESONEntry.STRING);
810+
value = new ESONSource.VariableValue(position, ESONEntry.STRING);
806811
}
807812

808813
return value;

server/src/main/java/org/elasticsearch/ingest/ESONSource.java

Lines changed: 57 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,9 @@ private static Value parseSimpleValue(XContentParser parser, BytesStreamOutput b
113113
return switch (token) {
114114
case VALUE_STRING -> {
115115
XContentString.UTF8Bytes stringBytes = parser.optimizedText().bytes();
116+
bytes.writeVInt(stringBytes.length());
116117
bytes.write(stringBytes.bytes(), stringBytes.offset(), stringBytes.length());
117-
yield new VariableValue((int) position, stringBytes.length(), ESONEntry.STRING);
118+
yield new VariableValue((int) position, ESONEntry.STRING);
118119
}
119120
case VALUE_NUMBER -> {
120121
XContentParser.NumberType numberType = parser.numberType();
@@ -138,17 +139,19 @@ yield switch (numberType) {
138139
case BIG_INTEGER, BIG_DECIMAL -> {
139140
byte type = numberType == XContentParser.NumberType.BIG_INTEGER ? ESONEntry.BIG_INTEGER : ESONEntry.BIG_DECIMAL;
140141
byte[] numberBytes = parser.text().getBytes(StandardCharsets.UTF_8);
142+
bytes.writeVInt(numberBytes.length);
141143
bytes.write(numberBytes);
142-
yield new VariableValue((int) position, numberBytes.length, type);
144+
yield new VariableValue((int) position, type);
143145
}
144146
};
145147
}
146148
case VALUE_BOOLEAN -> parser.booleanValue() ? ConstantValue.TRUE : ConstantValue.FALSE;
147149
case VALUE_NULL -> ConstantValue.NULL;
148150
case VALUE_EMBEDDED_OBJECT -> {
149151
byte[] binaryValue = parser.binaryValue();
152+
bytes.writeVInt(binaryValue.length);
150153
bytes.write(binaryValue);
151-
yield new VariableValue((int) position, binaryValue.length, ESONEntry.BINARY);
154+
yield new VariableValue((int) position, ESONEntry.BINARY);
152155
}
153156
default -> throw new IllegalArgumentException("Unexpected token: " + token);
154157
};
@@ -214,42 +217,36 @@ public void writeToXContent(XContentBuilder builder, Values values) throws IOExc
214217
}
215218
}
216219

217-
public record VariableValue(int position, int length, byte type) implements Value {
220+
public record VariableValue(int position, byte type) implements Value {
218221
public Object getValue(Values source) {
219222
return switch (type) {
220-
case ESONEntry.STRING -> source.readString(position, length);
221-
case ESONEntry.BINARY -> source.readByteArray(position, length);
222-
case ESONEntry.BIG_INTEGER -> new BigInteger(source.readString(position, length));
223-
case ESONEntry.BIG_DECIMAL -> new BigDecimal(source.readString(position, length));
223+
case ESONEntry.STRING -> source.readString(position);
224+
case ESONEntry.BINARY -> source.readByteArray(position);
225+
case ESONEntry.BIG_INTEGER -> new BigInteger(source.readString(position));
226+
case ESONEntry.BIG_DECIMAL -> new BigDecimal(source.readString(position));
224227
default -> throw new IllegalArgumentException("Invalid value type: " + type);
225228
};
226229
}
227230

228231
public void writeToXContent(XContentBuilder builder, Values values) throws IOException {
229-
BytesReference slice = values.data.slice(position, length);
230-
231-
byte[] bytes;
232-
int offset;
233-
if (slice.hasArray()) {
234-
bytes = slice.array();
235-
offset = slice.arrayOffset();
236-
} else {
237-
BytesRef bytesRef = slice.toBytesRef();
238-
bytes = bytesRef.bytes;
239-
offset = bytesRef.offset;
240-
}
232+
BytesRef bytesRef = Values.readByteSlice(values.data, position);
241233
switch (type) {
242-
case ESONEntry.STRING -> builder.utf8Value(bytes, offset, length);
243-
case ESONEntry.BINARY -> builder.value(bytes, offset, length);
234+
case ESONEntry.STRING -> builder.utf8Value(bytesRef.bytes, bytesRef.offset, bytesRef.length);
235+
case ESONEntry.BINARY -> builder.value(bytesRef.bytes, bytesRef.offset, bytesRef.length);
244236
// TODO: Improve?
245-
case ESONEntry.BIG_INTEGER -> builder.value(new BigInteger(new String(bytes, offset, length, StandardCharsets.UTF_8)));
246-
case ESONEntry.BIG_DECIMAL -> builder.value(new BigDecimal(new String(bytes, offset, length, StandardCharsets.UTF_8)));
237+
case ESONEntry.BIG_INTEGER -> builder.value(
238+
new BigInteger(new String(bytesRef.bytes, bytesRef.offset, bytesRef.length, StandardCharsets.UTF_8))
239+
);
240+
case ESONEntry.BIG_DECIMAL -> builder.value(
241+
new BigDecimal(new String(bytesRef.bytes, bytesRef.offset, bytesRef.length, StandardCharsets.UTF_8))
242+
);
247243
default -> throw new IllegalArgumentException("Invalid value type: " + type);
248244
}
249245
}
250246
}
251247

252248
public record Values(BytesReference data) {
249+
253250
public int readInt(int position) {
254251
return data.getInt(position);
255252
}
@@ -272,28 +269,45 @@ public boolean readBoolean(int position) {
272269
return data.get(position) != 0;
273270
}
274271

275-
private byte[] readByteArray(int position, int length) {
276-
byte[] result = new byte[length];
277-
for (int i = 0; i < length; i++) {
278-
result[i] = data.get(position + i);
279-
}
280-
return result;
272+
private byte[] readByteArray(int position) {
273+
BytesRef bytesRef = readByteSlice(data, position);
274+
byte[] bytes = new byte[bytesRef.length];
275+
System.arraycopy(bytesRef.bytes, bytesRef.offset, bytes, 0, bytesRef.length);
276+
return bytes;
277+
}
278+
279+
public String readString(int position) {
280+
BytesRef bytesRef = readByteSlice(data, position);
281+
return new String(bytesRef.bytes, bytesRef.offset, bytesRef.length, java.nio.charset.StandardCharsets.UTF_8);
281282
}
282283

283-
public String readString(int position, int length) {
284-
BytesReference slice = data.slice(position, length);
285-
286-
final byte[] bytes;
287-
final int offset;
288-
if (slice.hasArray()) {
289-
bytes = slice.array();
290-
offset = slice.arrayOffset();
291-
} else {
292-
BytesRef bytesRef = slice.toBytesRef();
293-
bytes = bytesRef.bytes;
294-
offset = bytesRef.offset;
284+
public static BytesRef readByteSlice(BytesReference data, int position) {
285+
byte b = data.get(position);
286+
if (b >= 0) {
287+
return data.slice(position + 1, b).toBytesRef();
288+
}
289+
int i = b & 0x7F;
290+
b = data.get(position + 1);
291+
i |= (b & 0x7F) << 7;
292+
if (b >= 0) {
293+
return data.slice(position + 2, i).toBytesRef();
294+
}
295+
b = data.get(position + 2);
296+
i |= (b & 0x7F) << 14;
297+
if (b >= 0) {
298+
return data.slice(position + 3, i).toBytesRef();
299+
}
300+
b = data.get(position + 3);
301+
i |= (b & 0x7F) << 21;
302+
if (b >= 0) {
303+
return data.slice(position + 4, i).toBytesRef();
304+
}
305+
b = data.get(position + 4);
306+
i |= (b & 0x0F) << 28;
307+
if ((b & 0xF0) != 0) {
308+
throw new RuntimeException("Invalid vInt ((" + Integer.toHexString(b) + " & 0x7f) << 28) | " + Integer.toHexString(i));
295309
}
296-
return new String(bytes, offset, length, java.nio.charset.StandardCharsets.UTF_8);
310+
return data.slice(position + 5, i).toBytesRef();
297311
}
298312
}
299313
}

server/src/main/java/org/elasticsearch/ingest/ESONXContentParser.java

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
package org.elasticsearch.ingest;
1111

1212
import org.apache.lucene.util.BytesRef;
13-
import org.elasticsearch.common.bytes.BytesReference;
1413
import org.elasticsearch.xcontent.DeprecationHandler;
1514
import org.elasticsearch.xcontent.NamedXContentRegistry;
1615
import org.elasticsearch.xcontent.Text;
@@ -251,20 +250,9 @@ public String text() throws IOException {
251250
public XContentString optimizedText() throws IOException {
252251
// For strings, try to access raw bytes directly without materializing the string
253252
if (currentType instanceof ESONSource.VariableValue varValue && varValue.type() == ESONEntry.STRING) {
254-
BytesReference slice = values.data().slice(varValue.position(), varValue.length());
255-
256-
final byte[] bytes;
257-
final int offset;
258-
if (slice.hasArray()) {
259-
bytes = slice.array();
260-
offset = slice.arrayOffset();
261-
} else {
262-
BytesRef bytesRef = slice.toBytesRef();
263-
bytes = bytesRef.bytes;
264-
offset = bytesRef.offset;
265-
}
253+
BytesRef bytesRef = ESONSource.Values.readByteSlice(values.data(), varValue.position());
266254
// TODO: Fix Length
267-
return new Text(new XContentString.UTF8Bytes(bytes, offset, varValue.length()), varValue.length());
255+
return new Text(new XContentString.UTF8Bytes(bytesRef.bytes, bytesRef.offset, bytesRef.length), bytesRef.length);
268256
}
269257

270258
// Fallback: materialize value and convert to bytes
@@ -277,14 +265,10 @@ public boolean optimizedTextToStream(OutputStream out) throws IOException {
277265
// For strings, try to write raw bytes directly without materializing the string
278266
if (currentType instanceof ESONSource.VariableValue varValue && varValue.type() == ESONEntry.STRING) {
279267
try {
268+
BytesRef bytesRef = ESONSource.Values.readByteSlice(values.data(), varValue.position());
269+
out.write(bytesRef.bytes, bytesRef.offset, bytesRef.length);
280270
// TODO: Can optimize more. Just not sure if this method needs to stay.
281-
if (values.data().hasArray()) {
282-
// Write directly from the raw bytes
283-
byte[] rawBytes = values.data().array();
284-
int offset = values.data().arrayOffset() + varValue.position();
285-
out.write(rawBytes, offset, varValue.length());
286-
return true;
287-
}
271+
return true;
288272
} catch (Exception e) {
289273
// Fall back to materialized string
290274
}

0 commit comments

Comments
 (0)