Skip to content

Commit 1b2d6ef

Browse files
committed
Use optimizedText in match_only_text fields
1 parent bc11bdf commit 1b2d6ef

File tree

4 files changed

+75
-7
lines changed

4 files changed

+75
-7
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.apache.lucene.util.IOFunction;
3232
import org.elasticsearch.common.CheckedIntFunction;
3333
import org.elasticsearch.common.lucene.Lucene;
34+
import org.elasticsearch.common.text.UTF8DecodingReader;
3435
import org.elasticsearch.common.unit.Fuzziness;
3536
import org.elasticsearch.index.IndexVersion;
3637
import org.elasticsearch.index.IndexVersions;
@@ -384,7 +385,7 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext
384385
) {
385386
@Override
386387
protected BytesRef storedToBytesRef(Object stored) {
387-
return new BytesRef((String) stored);
388+
return (BytesRef) stored;
388389
}
389390
};
390391
}
@@ -442,18 +443,20 @@ public FieldMapper.Builder getMergeBuilder() {
442443

443444
@Override
444445
protected void parseCreateField(DocumentParserContext context) throws IOException {
445-
final String value = context.parser().textOrNull();
446+
final var value = context.parser().optimizedTextOrNull();
446447

447448
if (value == null) {
448449
return;
449450
}
450451

451-
Field field = new Field(fieldType().name(), value, fieldType);
452+
final var utfBytes = value.bytes();
453+
Field field = new Field(fieldType().name(), new UTF8DecodingReader(utfBytes), fieldType);
452454
context.doc().add(field);
453455
context.addToFieldNames(fieldType().name());
454456

455457
if (storeSource) {
456-
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), value));
458+
final var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length());
459+
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), bytesRef));
457460
}
458461
}
459462

@@ -473,7 +476,7 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
473476
() -> new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), fieldType().name(), leafName()) {
474477
@Override
475478
protected void write(XContentBuilder b, Object value) throws IOException {
476-
b.value((String) value);
479+
b.value(((BytesRef) value).utf8ToString());
477480
}
478481
}
479482
);

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQuery.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import org.apache.lucene.search.Weight;
4242
import org.apache.lucene.search.similarities.Similarity;
4343
import org.apache.lucene.search.similarities.Similarity.SimScorer;
44+
import org.apache.lucene.util.BytesRef;
4445
import org.apache.lucene.util.IOFunction;
4546
import org.elasticsearch.common.CheckedIntFunction;
4647
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
@@ -438,7 +439,13 @@ private MemoryIndex getOrCreateMemoryIndex() throws IOException {
438439
if (value == null) {
439440
continue;
440441
}
441-
cacheEntry.memoryIndex.addField(field, value.toString(), indexAnalyzer);
442+
String valueStr;
443+
if (value instanceof BytesRef valueRef) {
444+
valueStr = valueRef.utf8ToString();
445+
} else {
446+
valueStr = value.toString();
447+
}
448+
cacheEntry.memoryIndex.addField(field, valueStr, indexAnalyzer);
442449
}
443450
}
444451
return cacheEntry.memoryIndex;

modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,12 @@ public void testDefaults() throws IOException {
123123
ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234")));
124124
List<IndexableField> fields = doc.rootDoc().getFields("field");
125125
assertEquals(1, fields.size());
126-
assertEquals("1234", fields.get(0).stringValue());
126+
127+
var reader = fields.get(0).readerValue();
128+
char[] buff = new char[20];
129+
assertEquals(4, reader.read(buff));
130+
assertEquals("1234", new String(buff, 0, 4));
131+
127132
IndexableFieldType fieldType = fields.get(0).fieldType();
128133
assertThat(fieldType.omitNorms(), equalTo(true));
129134
assertTrue(fieldType.tokenized());
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.common.text;
11+
12+
import org.elasticsearch.xcontent.XContentString;
13+
14+
import java.io.Reader;
15+
import java.nio.ByteBuffer;
16+
import java.nio.CharBuffer;
17+
import java.nio.charset.CharsetDecoder;
18+
import java.nio.charset.StandardCharsets;
19+
20+
/**
21+
* Reader that decodes UTF-8 formatted bytes into chars.
22+
*/
23+
public class UTF8DecodingReader extends Reader {
24+
private CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder();
25+
private ByteBuffer bytes;
26+
27+
public UTF8DecodingReader(ByteBuffer bytes) {
28+
this.bytes = bytes;
29+
}
30+
31+
public UTF8DecodingReader(XContentString.UTF8Bytes utf8bytes) {
32+
this.bytes = ByteBuffer.wrap(utf8bytes.bytes(), utf8bytes.offset(), utf8bytes.length());
33+
}
34+
35+
@Override
36+
public int read(char[] cbuf, int off, int len) {
37+
return read(CharBuffer.wrap(cbuf, off, len));
38+
}
39+
40+
@Override
41+
public int read(CharBuffer cbuf) {
42+
if (bytes.hasRemaining() == false) {
43+
return -1;
44+
}
45+
46+
int startPos = cbuf.position();
47+
decoder.decode(bytes, cbuf, true);
48+
return cbuf.position() - startPos;
49+
}
50+
51+
@Override
52+
public void close() {}
53+
}

0 commit comments

Comments
 (0)