2929import org .apache .lucene .search .MultiTermQuery ;
3030import org .apache .lucene .search .Query ;
3131import org .apache .lucene .util .BytesRef ;
32+ import org .apache .lucene .util .CharsRef ;
3233import org .apache .lucene .util .automaton .Automata ;
3334import org .apache .lucene .util .automaton .Automaton ;
3435import org .apache .lucene .util .automaton .CompiledAutomaton ;
6970import org .elasticsearch .xcontent .XContentBuilder ;
7071import org .elasticsearch .xcontent .XContentParser ;
7172
73+ import java .io .CharArrayReader ;
7274import java .io .IOException ;
7375import java .io .UncheckedIOException ;
7476import java .util .ArrayList ;
@@ -891,7 +893,7 @@ private String applyIgnoreAboveAndNormalizer(String value) {
891893 return null ;
892894 }
893895
894- return normalizeValue (normalizer (), name (), value );
896+ return normalizeValue (normalizer (), name (), new CharsRef ( value )). toString ( );
895897 }
896898
897899 @ Override
@@ -1104,9 +1106,10 @@ public String getOffsetFieldName() {
11041106 }
11051107
11061108 protected void parseCreateField (DocumentParserContext context ) throws IOException {
1107- String value = context .parser (). textOrNull ( );
1109+ CharsRef value = parseTextOrNull ( context .parser ());
11081110 if (value == null ) {
1109- value = fieldType ().nullValue ;
1111+ // TODO: fix conversion
1112+ value = new CharsRef (fieldType ().nullValue );
11101113 }
11111114
11121115 boolean indexed = indexValue (context , value );
@@ -1119,17 +1122,40 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
11191122 }
11201123 }
11211124
1125+ /**
1126+ * Parses values without making a copy, like when {@link XContentParser#textOrNull()} gets invoked.
1127+ *
1128+ * Typically, two copies are made, first time when {@link XContentParser#textOrNull()} is invoked,
1129+ * then second time when we convert to {@link BytesRef}.
1130+ */
1131+ public static CharsRef parseTextOrNull (XContentParser parser ) throws IOException {
1132+ var currentToken = parser .currentToken ();
1133+ if (currentToken == XContentParser .Token .VALUE_NULL ) {
1134+ return null ;
1135+ } else if (currentToken .isValue ()) {
1136+ return new CharsRef (parser .textCharacters (), parser .textOffset (), parser .textLength ());
1137+ } else {
1138+ assert false : "unexpected token [" + currentToken + "]" ;
1139+ return null ;
1140+ }
1141+ }
1142+
11221143 @ Override
11231144 protected void indexScriptValues (
11241145 SearchLookup searchLookup ,
11251146 LeafReaderContext readerContext ,
11261147 int doc ,
11271148 DocumentParserContext documentParserContext
11281149 ) {
1129- this .fieldType ().scriptValues .valuesForDoc (searchLookup , readerContext , doc , value -> indexValue (documentParserContext , value ));
1150+ this .fieldType ().scriptValues .valuesForDoc (
1151+ searchLookup ,
1152+ readerContext ,
1153+ doc ,
1154+ value -> indexValue (documentParserContext , new CharsRef (value ))
1155+ );
11301156 }
11311157
1132- private boolean indexValue (DocumentParserContext context , String value ) {
1158+ private boolean indexValue (DocumentParserContext context , CharsRef value ) {
11331159 if (value == null ) {
11341160 return false ;
11351161 }
@@ -1186,11 +1212,11 @@ private boolean indexValue(DocumentParserContext context, String value) {
11861212 return true ;
11871213 }
11881214
1189- private static String normalizeValue (NamedAnalyzer normalizer , String field , String value ) {
1215+ private static CharsRef normalizeValue (NamedAnalyzer normalizer , String field , CharsRef value ) {
11901216 if (normalizer == Lucene .KEYWORD_ANALYZER ) {
11911217 return value ;
11921218 }
1193- try (TokenStream ts = normalizer .tokenStream (field , value )) {
1219+ try (TokenStream ts = normalizer .tokenStream (field , new CharArrayReader ( value . chars , value . offset , value . length ) )) {
11941220 final CharTermAttribute termAtt = ts .addAttribute (CharTermAttribute .class );
11951221 ts .reset ();
11961222 if (ts .incrementToken () == false ) {
@@ -1199,7 +1225,7 @@ private static String normalizeValue(NamedAnalyzer normalizer, String field, Str
11991225 but got 0 for analyzer %s and input "%s"
12001226 """ , normalizer , value ));
12011227 }
1202- final String newValue = termAtt .toString ( );
1228+ final CharsRef newValue = new CharsRef ( termAtt .buffer (), 0 , termAtt . length () );
12031229 if (ts .incrementToken ()) {
12041230 throw new IllegalStateException (String .format (Locale .ROOT , """
12051231 The normalization token stream is expected to produce exactly 1 token, \
0 commit comments