1313import org .apache .lucene .document .Field ;
1414import org .apache .lucene .document .FieldType ;
1515import org .apache .lucene .document .SortedSetDocValuesField ;
16+ import org .apache .lucene .document .StoredField ;
1617import org .apache .lucene .index .IndexOptions ;
1718import org .apache .lucene .index .IndexWriter ;
1819import org .apache .lucene .util .BytesRef ;
3637
3738import java .io .IOException ;
3839import java .util .ArrayList ;
39- import java .util .Arrays ;
4040import java .util .Iterator ;
4141import java .util .List ;
4242import java .util .Map ;
4343import java .util .function .Function ;
4444import java .util .function .Supplier ;
4545
46- import static org .apache .lucene .index .IndexWriter .MAX_TERM_LENGTH ;
47-
4846/**
4947 * A {@link FieldMapper} for full-text log fields that internally splits text into a low cardinality template component
5048 * and high cardinality argument component. Separating these pieces allows the template component to be highly compressed.
@@ -159,7 +157,15 @@ public PatternedTextFieldMapper build(MapperBuilderContext context) {
159157 indexCreatedVersion ,
160158 true
161159 ).indexed (false ).build (context );
162- return new PatternedTextFieldMapper (leafName (), fieldType , patternedTextFieldType , builderParams , this , templateIdMapper );
160+ return new PatternedTextFieldMapper (
161+ leafName (),
162+ fieldType ,
163+ patternedTextFieldType ,
164+ builderParams ,
165+ this ,
166+ templateIdMapper ,
167+ context .isSourceSynthetic ()
168+ );
163169 }
164170 }
165171
@@ -172,14 +178,16 @@ public PatternedTextFieldMapper build(MapperBuilderContext context) {
172178 private final int positionIncrementGap ;
173179 private final FieldType fieldType ;
174180 private final KeywordFieldMapper templateIdMapper ;
181+ private final boolean isSourceSynthetic ;
175182
176183 private PatternedTextFieldMapper (
177184 String simpleName ,
178185 FieldType fieldType ,
179186 PatternedTextFieldType mappedFieldType ,
180187 BuilderParams builderParams ,
181188 Builder builder ,
182- KeywordFieldMapper templateIdMapper
189+ KeywordFieldMapper templateIdMapper ,
190+ boolean isSourceSynthetic
183191 ) {
184192 super (simpleName , mappedFieldType , builderParams );
185193 assert mappedFieldType .getTextSearchInfo ().isTokenized ();
@@ -191,6 +199,7 @@ private PatternedTextFieldMapper(
191199 this .indexOptions = builder .indexOptions .getValue ();
192200 this .positionIncrementGap = builder .analyzers .positionIncrementGap .getValue ();
193201 this .templateIdMapper = templateIdMapper ;
202+ this .isSourceSynthetic = isSourceSynthetic ;
194203 }
195204
196205 @ Override
@@ -232,21 +241,19 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
232241 if (templateBytes .length >= IndexWriter .MAX_TERM_LENGTH ) {
233242 logger .error (
234243 "pattern text template is longer than allowed maximum term length.\n Template={}\n Original value={}" ,
235- templateBytes ,
244+ templateBytes . utf8ToString () ,
236245 value
237246 );
238- byte [] prefix = new byte [30 ];
239- System .arraycopy (templateBytes .bytes , templateBytes .offset , prefix , 0 , 30 );
240- String msg = "pattern text template is longer than allowed maximum term length=\" "
241- + fieldType ().name ()
242- + "\" (whose "
243- + "UTF8 encoding is longer than the max length "
244- + MAX_TERM_LENGTH
245- + "), all of which were "
246- + "skipped. The prefix of the first immense term is: '"
247- + Arrays .toString (prefix )
248- + "...'" ;
249- throw new IllegalArgumentException (msg );
247+ // Maybe adding template id helps with compressing the original stored field:
248+ context .doc ().add (templateIdMapper .buildKeywordField (new BytesRef (parts .templateId ())));
249+ // Even when template too large we can still create an inverted index:
250+ context .doc ().add (new Field (fieldType ().name (), value , fieldType ));
251+ // It is kind of ignored:
252+ context .addIgnoredField (fullPath ());
253+ if (isSourceSynthetic ) {
254+ context .doc ().add (new StoredField (fieldType ().name () + ".original" , value ));
255+ }
256+ return ;
250257 }
251258
252259 // Add index on original value
0 commit comments