1111
1212import org .apache .lucene .document .FeatureField ;
1313import org .apache .lucene .index .IndexableField ;
14+ import org .apache .lucene .index .LeafReader ;
15+ import org .apache .lucene .index .LeafReaderContext ;
16+ import org .apache .lucene .index .PostingsEnum ;
17+ import org .apache .lucene .index .TermVectors ;
18+ import org .apache .lucene .index .TermsEnum ;
19+ import org .apache .lucene .search .DocIdSetIterator ;
1420import org .apache .lucene .search .MatchNoDocsQuery ;
1521import org .apache .lucene .search .Query ;
1622import org .apache .lucene .util .BytesRef ;
2531import org .elasticsearch .index .mapper .FieldMapper ;
2632import org .elasticsearch .index .mapper .MappedFieldType ;
2733import org .elasticsearch .index .mapper .MapperBuilderContext ;
34+ import org .elasticsearch .index .mapper .SourceLoader ;
2835import org .elasticsearch .index .mapper .SourceValueFetcher ;
2936import org .elasticsearch .index .mapper .TextSearchInfo ;
3037import org .elasticsearch .index .mapper .ValueFetcher ;
3138import org .elasticsearch .index .query .SearchExecutionContext ;
39+ import org .elasticsearch .search .fetch .StoredFieldsSpec ;
40+ import org .elasticsearch .search .lookup .Source ;
41+ import org .elasticsearch .xcontent .XContentBuilder ;
3242import org .elasticsearch .xcontent .XContentParser .Token ;
3343
3444import java .io .IOException ;
45+ import java .io .UncheckedIOException ;
46+ import java .util .LinkedHashMap ;
47+ import java .util .List ;
3548import java .util .Map ;
49+ import java .util .stream .Stream ;
3650
3751import static org .elasticsearch .index .query .AbstractQueryBuilder .DEFAULT_BOOST ;
3852
@@ -52,8 +66,12 @@ public class SparseVectorFieldMapper extends FieldMapper {
5266 static final IndexVersion NEW_SPARSE_VECTOR_INDEX_VERSION = IndexVersions .NEW_SPARSE_VECTOR ;
5367 static final IndexVersion SPARSE_VECTOR_IN_FIELD_NAMES_INDEX_VERSION = IndexVersions .SPARSE_VECTOR_IN_FIELD_NAMES_SUPPORT ;
5468
55- public static class Builder extends FieldMapper .Builder {
69+ private static SparseVectorFieldMapper toType (FieldMapper in ) {
70+ return (SparseVectorFieldMapper ) in ;
71+ }
5672
73+ public static class Builder extends FieldMapper .Builder {
74+ private final Parameter <Boolean > stored = Parameter .storeParam (m -> toType (m ).fieldType ().isStored (), false );
5775 private final Parameter <Map <String , String >> meta = Parameter .metaParam ();
5876
5977 public Builder (String name ) {
@@ -62,14 +80,14 @@ public Builder(String name) {
6280
6381 @ Override
6482 protected Parameter <?>[] getParameters () {
65- return new Parameter <?>[] { meta };
83+ return new Parameter <?>[] { stored , meta };
6684 }
6785
6886 @ Override
6987 public SparseVectorFieldMapper build (MapperBuilderContext context ) {
7088 return new SparseVectorFieldMapper (
7189 leafName (),
72- new SparseVectorFieldType (context .buildFullName (leafName ()), meta .getValue ()),
90+ new SparseVectorFieldType (context .buildFullName (leafName ()), stored . getValue (), meta .getValue ()),
7391 builderParams (this , context )
7492 );
7593 }
@@ -87,8 +105,8 @@ public SparseVectorFieldMapper build(MapperBuilderContext context) {
87105
88106 public static final class SparseVectorFieldType extends MappedFieldType {
89107
90- public SparseVectorFieldType (String name , Map <String , String > meta ) {
91- super (name , true , false , false , TextSearchInfo .SIMPLE_MATCH_ONLY , meta );
108+ public SparseVectorFieldType (String name , boolean isStored , Map <String , String > meta ) {
109+ super (name , true , isStored , false , TextSearchInfo .SIMPLE_MATCH_ONLY , meta );
92110 }
93111
94112 @ Override
@@ -103,6 +121,9 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext
103121
104122 @ Override
105123 public ValueFetcher valueFetcher (SearchExecutionContext context , String format ) {
124+ if (isStored ()) {
125+ return new SparseVectorValueFetcher (name ());
126+ }
106127 return SourceValueFetcher .identity (name (), context , format );
107128 }
108129
@@ -135,6 +156,14 @@ private SparseVectorFieldMapper(String simpleName, MappedFieldType mappedFieldTy
135156 super (simpleName , mappedFieldType , builderParams );
136157 }
137158
159+ @ Override
160+ protected SyntheticSourceSupport syntheticSourceSupport () {
161+ if (fieldType ().isStored ()) {
162+ return new SyntheticSourceSupport .Native (new SparseVectorSyntheticFieldLoader (fullPath (), leafName ()));
163+ }
164+ return super .syntheticSourceSupport ();
165+ }
166+
138167 @ Override
139168 public Map <String , NamedAnalyzer > indexAnalyzers () {
140169 return Map .of (mappedFieldType .name (), Lucene .KEYWORD_ANALYZER );
@@ -189,9 +218,9 @@ public void parse(DocumentParserContext context) throws IOException {
189218 // based on recommendations from this paper: https://arxiv.org/pdf/2305.18494.pdf
190219 IndexableField currentField = context .doc ().getByKey (key );
191220 if (currentField == null ) {
192- context .doc ().addWithKey (key , new FeatureField (fullPath (), feature , value ));
193- } else if (currentField instanceof FeatureField && ((FeatureField ) currentField ).getFeatureValue () < value ) {
194- ((FeatureField ) currentField ).setFeatureValue (value );
221+ context .doc ().addWithKey (key , new XFeatureField (fullPath (), feature , value , fieldType (). isStored () ));
222+ } else if (currentField instanceof XFeatureField && ((XFeatureField ) currentField ).getFeatureValue () < value ) {
223+ ((XFeatureField ) currentField ).setFeatureValue (value );
195224 }
196225 } else {
197226 throw new IllegalArgumentException (
@@ -219,4 +248,114 @@ protected String contentType() {
219248 return CONTENT_TYPE ;
220249 }
221250
251+ private static class SparseVectorValueFetcher implements ValueFetcher {
252+ private final String fieldName ;
253+ private TermVectors termVectors ;
254+
255+ private SparseVectorValueFetcher (String fieldName ) {
256+ this .fieldName = fieldName ;
257+ }
258+
259+ @ Override
260+ public void setNextReader (LeafReaderContext context ) {
261+ try {
262+ termVectors = context .reader ().termVectors ();
263+ } catch (IOException exc ) {
264+ throw new UncheckedIOException (exc );
265+ }
266+ }
267+
268+ @ Override
269+ public List <Object > fetchValues (Source source , int doc , List <Object > ignoredValues ) throws IOException {
270+ if (termVectors == null ) {
271+ return List .of ();
272+ }
273+ var terms = termVectors .get (doc , fieldName );
274+ if (terms == null ) {
275+ return List .of ();
276+ }
277+
278+ var termsEnum = terms .iterator ();
279+ PostingsEnum postingsScratch = null ;
280+ Map <String , Float > result = new LinkedHashMap <>();
281+ while (termsEnum .next () != null ) {
282+ postingsScratch = termsEnum .postings (postingsScratch );
283+ postingsScratch .nextDoc ();
284+ result .put (termsEnum .term ().utf8ToString (), XFeatureField .decodeFeatureValue (postingsScratch .freq ()));
285+ assert postingsScratch .nextDoc () == DocIdSetIterator .NO_MORE_DOCS ;
286+ }
287+ return List .of (result );
288+ }
289+
290+ @ Override
291+ public StoredFieldsSpec storedFieldsSpec () {
292+ return StoredFieldsSpec .NO_REQUIREMENTS ;
293+ }
294+ }
295+
296+ private static class SparseVectorSyntheticFieldLoader implements SourceLoader .SyntheticFieldLoader {
297+ private final String fullPath ;
298+ private final String leafName ;
299+
300+ private TermsEnum termsDocEnum ;
301+
302+ private SparseVectorSyntheticFieldLoader (String fullPath , String leafName ) {
303+ this .fullPath = fullPath ;
304+ this .leafName = leafName ;
305+ }
306+
307+ @ Override
308+ public Stream <Map .Entry <String , StoredFieldLoader >> storedFieldLoaders () {
309+ return Stream .of ();
310+ }
311+
312+ @ Override
313+ public DocValuesLoader docValuesLoader (LeafReader leafReader , int [] docIdsInLeaf ) throws IOException {
314+ var fieldInfos = leafReader .getFieldInfos ().fieldInfo (fullPath );
315+ if (fieldInfos == null || fieldInfos .hasTermVectors () == false ) {
316+ return null ;
317+ }
318+ return docId -> {
319+ var terms = leafReader .termVectors ().get (docId , fullPath );
320+ if (terms == null ) {
321+ return false ;
322+ }
323+ termsDocEnum = terms .iterator ();
324+ if (termsDocEnum .next () == null ) {
325+ termsDocEnum = null ;
326+ return false ;
327+ }
328+ return true ;
329+ };
330+ }
331+
332+ @ Override
333+ public boolean hasValue () {
334+ return termsDocEnum != null ;
335+ }
336+
337+ @ Override
338+ public void write (XContentBuilder b ) throws IOException {
339+ assert termsDocEnum != null ;
340+ PostingsEnum reuse = null ;
341+ b .startObject (leafName );
342+ do {
343+ reuse = termsDocEnum .postings (reuse );
344+ reuse .nextDoc ();
345+ b .field (termsDocEnum .term ().utf8ToString (), XFeatureField .decodeFeatureValue (reuse .freq ()));
346+ } while (termsDocEnum .next () != null );
347+ b .endObject ();
348+ }
349+
350+ @ Override
351+ public String fieldName () {
352+ return leafName ;
353+ }
354+
355+ @ Override
356+ public void reset () {
357+ termsDocEnum = null ;
358+ }
359+ }
360+
222361}
0 commit comments