Skip to content

Commit 9c04c4f

Browse files
committed
experiment with XContentParser#XContentParser()
1 parent 6b27e42 commit 9c04c4f

File tree

3 files changed

+47
-15
lines changed

3 files changed

+47
-15
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.apache.lucene.search.Query;
2929
import org.apache.lucene.search.TermQuery;
3030
import org.apache.lucene.util.BytesRef;
31+
import org.apache.lucene.util.CharsRef;
3132
import org.apache.lucene.util.IOFunction;
3233
import org.elasticsearch.common.CheckedIntFunction;
3334
import org.elasticsearch.common.lucene.Lucene;
@@ -70,6 +71,8 @@
7071
import java.util.Objects;
7172
import java.util.Set;
7273

74+
import static org.elasticsearch.index.mapper.KeywordFieldMapper.parseTextOrNull;
75+
7376
/**
7477
* A {@link FieldMapper} for full-text fields that only indexes
7578
* {@link IndexOptions#DOCS} and runs positional queries by looking at the
@@ -438,18 +441,19 @@ public FieldMapper.Builder getMergeBuilder() {
438441

439442
@Override
440443
protected void parseCreateField(DocumentParserContext context) throws IOException {
441-
final String value = context.parser().textOrNull();
444+
final CharsRef value = parseTextOrNull(context.parser());
442445

443446
if (value == null) {
444447
return;
445448
}
446449

447-
Field field = new Field(fieldType().name(), value, fieldType);
450+
BytesRef copy = new BytesRef(value);
451+
Field field = new Field(fieldType().name(), copy, fieldType);
448452
context.doc().add(field);
449453
context.addToFieldNames(fieldType().name());
450454

451455
if (storeSource) {
452-
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), value));
456+
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), copy));
453457
}
454458
}
455459

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.apache.lucene.search.MultiTermQuery;
3030
import org.apache.lucene.search.Query;
3131
import org.apache.lucene.util.BytesRef;
32+
import org.apache.lucene.util.CharsRef;
3233
import org.apache.lucene.util.automaton.Automata;
3334
import org.apache.lucene.util.automaton.Automaton;
3435
import org.apache.lucene.util.automaton.CompiledAutomaton;
@@ -69,6 +70,7 @@
6970
import org.elasticsearch.xcontent.XContentBuilder;
7071
import org.elasticsearch.xcontent.XContentParser;
7172

73+
import java.io.CharArrayReader;
7274
import java.io.IOException;
7375
import java.io.UncheckedIOException;
7476
import java.util.ArrayList;
@@ -891,7 +893,7 @@ private String applyIgnoreAboveAndNormalizer(String value) {
891893
return null;
892894
}
893895

894-
return normalizeValue(normalizer(), name(), value);
896+
return normalizeValue(normalizer(), name(), new CharsRef(value)).toString();
895897
}
896898

897899
@Override
@@ -1104,9 +1106,10 @@ public String getOffsetFieldName() {
11041106
}
11051107

11061108
protected void parseCreateField(DocumentParserContext context) throws IOException {
1107-
String value = context.parser().textOrNull();
1109+
CharsRef value = parseTextOrNull(context.parser());
11081110
if (value == null) {
1109-
value = fieldType().nullValue;
1111+
// TODO: fix conversion
1112+
value = new CharsRef(fieldType().nullValue);
11101113
}
11111114

11121115
boolean indexed = indexValue(context, value);
@@ -1119,17 +1122,40 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
11191122
}
11201123
}
11211124

1125+
/**
1126+
* Parses values without making a copy, like when {@link XContentParser#textOrNull()} gets invoked.
1127+
*
1128+
* Typically, two copies are made, first time when {@link XContentParser#textOrNull()} is invoked,
1129+
* then second time when we convert to {@link BytesRef}.
1130+
*/
1131+
public static CharsRef parseTextOrNull(XContentParser parser) throws IOException {
1132+
var currentToken = parser.currentToken();
1133+
if (currentToken == XContentParser.Token.VALUE_NULL) {
1134+
return null;
1135+
} else if (currentToken.isValue()) {
1136+
return new CharsRef(parser.textCharacters(), parser.textOffset(), parser.textLength());
1137+
} else {
1138+
assert false : "unexpected token [" + currentToken + "]";
1139+
return null;
1140+
}
1141+
}
1142+
11221143
@Override
11231144
protected void indexScriptValues(
11241145
SearchLookup searchLookup,
11251146
LeafReaderContext readerContext,
11261147
int doc,
11271148
DocumentParserContext documentParserContext
11281149
) {
1129-
this.fieldType().scriptValues.valuesForDoc(searchLookup, readerContext, doc, value -> indexValue(documentParserContext, value));
1150+
this.fieldType().scriptValues.valuesForDoc(
1151+
searchLookup,
1152+
readerContext,
1153+
doc,
1154+
value -> indexValue(documentParserContext, new CharsRef(value))
1155+
);
11301156
}
11311157

1132-
private boolean indexValue(DocumentParserContext context, String value) {
1158+
private boolean indexValue(DocumentParserContext context, CharsRef value) {
11331159
if (value == null) {
11341160
return false;
11351161
}
@@ -1186,11 +1212,11 @@ private boolean indexValue(DocumentParserContext context, String value) {
11861212
return true;
11871213
}
11881214

1189-
private static String normalizeValue(NamedAnalyzer normalizer, String field, String value) {
1215+
private static CharsRef normalizeValue(NamedAnalyzer normalizer, String field, CharsRef value) {
11901216
if (normalizer == Lucene.KEYWORD_ANALYZER) {
11911217
return value;
11921218
}
1193-
try (TokenStream ts = normalizer.tokenStream(field, value)) {
1219+
try (TokenStream ts = normalizer.tokenStream(field, new CharArrayReader(value.chars, value.offset, value.length))) {
11941220
final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
11951221
ts.reset();
11961222
if (ts.incrementToken() == false) {
@@ -1199,7 +1225,7 @@ private static String normalizeValue(NamedAnalyzer normalizer, String field, Str
11991225
but got 0 for analyzer %s and input "%s"
12001226
""", normalizer, value));
12011227
}
1202-
final String newValue = termAtt.toString();
1228+
final CharsRef newValue = new CharsRef(termAtt.buffer(), 0, termAtt.length());
12031229
if (ts.incrementToken()) {
12041230
throw new IllegalStateException(String.format(Locale.ROOT, """
12051231
The normalization token stream is expected to produce exactly 1 token, \

server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
import java.util.Objects;
8686
import java.util.function.IntPredicate;
8787

88+
import static org.elasticsearch.index.mapper.KeywordFieldMapper.parseTextOrNull;
8889
import static org.elasticsearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES;
8990

9091
/** A {@link FieldMapper} for full-text fields. */
@@ -1296,23 +1297,24 @@ public FieldMapper.Builder getMergeBuilder() {
12961297

12971298
@Override
12981299
protected void parseCreateField(DocumentParserContext context) throws IOException {
1299-
final String value = context.parser().textOrNull();
1300+
final var value = parseTextOrNull(context.parser());
13001301

13011302
if (value == null) {
13021303
return;
13031304
}
13041305

13051306
if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) {
1306-
Field field = new Field(fieldType().name(), value, fieldType);
1307+
BytesRef copy = new BytesRef(value);
1308+
Field field = new Field(fieldType().name(), copy, fieldType);
13071309
context.doc().add(field);
13081310
if (fieldType.omitNorms()) {
13091311
context.addToFieldNames(fieldType().name());
13101312
}
13111313
if (prefixFieldInfo != null) {
1312-
context.doc().add(new Field(prefixFieldInfo.field, value, prefixFieldInfo.fieldType));
1314+
context.doc().add(new Field(prefixFieldInfo.field, copy, prefixFieldInfo.fieldType));
13131315
}
13141316
if (phraseFieldInfo != null) {
1315-
context.doc().add(new Field(phraseFieldInfo.field, value, phraseFieldInfo.fieldType));
1317+
context.doc().add(new Field(phraseFieldInfo.field, copy, phraseFieldInfo.fieldType));
13161318
}
13171319
}
13181320
}

0 commit comments

Comments
 (0)