| 
18 | 18 | 
 
  | 
19 | 19 | package org.elasticsearch.index.mapper.vectors;  | 
20 | 20 | 
 
  | 
21 |  | -import org.apache.lucene.analysis.Analyzer;  | 
22 |  | -import org.apache.lucene.analysis.TokenStream;  | 
23 |  | -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;  | 
24 |  | -import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;  | 
25 | 21 | import org.apache.lucene.document.FeatureField;  | 
26 |  | -import org.apache.lucene.document.Field;  | 
27 |  | -import org.apache.lucene.document.FieldType;  | 
28 |  | -import org.apache.lucene.index.IndexOptions;  | 
29 | 22 | 
 
  | 
30 | 23 | /**  | 
31 | 24 |  * This class is forked from the Lucene {@link FeatureField} implementation to enable support for storing term vectors.  | 
32 |  | - * It should be removed once apache/lucene#14034 becomes available.  | 
 | 25 | + * Its purpose is to allow decoding the feature value from the term frequency  | 
33 | 26 |  */  | 
34 |  | -public final class XFeatureField extends Field {  | 
35 |  | -    private static final FieldType FIELD_TYPE = new FieldType();  | 
36 |  | -    private static final FieldType FIELD_TYPE_STORE_TERM_VECTORS = new FieldType();  | 
37 |  | - | 
38 |  | -    static {  | 
39 |  | -        FIELD_TYPE.setTokenized(false);  | 
40 |  | -        FIELD_TYPE.setOmitNorms(true);  | 
41 |  | -        FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS);  | 
42 |  | - | 
43 |  | -        FIELD_TYPE_STORE_TERM_VECTORS.setTokenized(false);  | 
44 |  | -        FIELD_TYPE_STORE_TERM_VECTORS.setOmitNorms(true);  | 
45 |  | -        FIELD_TYPE_STORE_TERM_VECTORS.setIndexOptions(IndexOptions.DOCS_AND_FREQS);  | 
46 |  | -        FIELD_TYPE_STORE_TERM_VECTORS.setStoreTermVectors(true);  | 
47 |  | -    }  | 
48 |  | - | 
49 |  | -    private float featureValue;  | 
50 |  | - | 
51 |  | -    /**  | 
52 |  | -     * Create a feature.  | 
53 |  | -     *  | 
54 |  | -     * @param fieldName The name of the field to store the information into. All features may be  | 
55 |  | -     *     stored in the same field.  | 
56 |  | -     * @param featureName The name of the feature, eg. 'pagerank`. It will be indexed as a term.  | 
57 |  | -     * @param featureValue The value of the feature, must be a positive, finite, normal float.  | 
58 |  | -     */  | 
59 |  | -    public XFeatureField(String fieldName, String featureName, float featureValue) {  | 
60 |  | -        this(fieldName, featureName, featureValue, false);  | 
61 |  | -    }  | 
62 |  | - | 
63 |  | -    /**  | 
64 |  | -     * Create a feature.  | 
65 |  | -     *  | 
66 |  | -     * @param fieldName    The name of the field to store the information into. All features may be  | 
67 |  | -     *                     stored in the same field.  | 
68 |  | -     * @param featureName  The name of the feature, eg. 'pagerank`. It will be indexed as a term.  | 
69 |  | -     * @param featureValue The value of the feature, must be a positive, finite, normal float.  | 
70 |  | -     */  | 
71 |  | -    public XFeatureField(String fieldName, String featureName, float featureValue, boolean storeTermVectors) {  | 
72 |  | -        super(fieldName, featureName, storeTermVectors ? FIELD_TYPE_STORE_TERM_VECTORS : FIELD_TYPE);  | 
73 |  | -        setFeatureValue(featureValue);  | 
74 |  | -    }  | 
75 |  | - | 
76 |  | -    /**  | 
77 |  | -     * Update the feature value of this field.  | 
78 |  | -     */  | 
79 |  | -    public void setFeatureValue(float featureValue) {  | 
80 |  | -        if (Float.isFinite(featureValue) == false) {  | 
81 |  | -            throw new IllegalArgumentException(  | 
82 |  | -                "featureValue must be finite, got: " + featureValue + " for feature " + fieldsData + " on field " + name  | 
83 |  | -            );  | 
84 |  | -        }  | 
85 |  | -        if (featureValue < Float.MIN_NORMAL) {  | 
86 |  | -            throw new IllegalArgumentException(  | 
87 |  | -                "featureValue must be a positive normal float, got: "  | 
88 |  | -                    + featureValue  | 
89 |  | -                    + " for feature "  | 
90 |  | -                    + fieldsData  | 
91 |  | -                    + " on field "  | 
92 |  | -                    + name  | 
93 |  | -                    + " which is less than the minimum positive normal float: "  | 
94 |  | -                    + Float.MIN_NORMAL  | 
95 |  | -            );  | 
96 |  | -        }  | 
97 |  | -        this.featureValue = featureValue;  | 
98 |  | -    }  | 
99 |  | - | 
100 |  | -    @Override  | 
101 |  | -    public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {  | 
102 |  | -        FeatureTokenStream stream;  | 
103 |  | -        if (reuse instanceof FeatureTokenStream) {  | 
104 |  | -            stream = (FeatureTokenStream) reuse;  | 
105 |  | -        } else {  | 
106 |  | -            stream = new FeatureTokenStream();  | 
107 |  | -        }  | 
108 |  | - | 
109 |  | -        int freqBits = Float.floatToIntBits(featureValue);  | 
110 |  | -        stream.setValues((String) fieldsData, freqBits >>> 15);  | 
111 |  | -        return stream;  | 
112 |  | -    }  | 
113 |  | - | 
114 |  | -    /**  | 
115 |  | -     * This is useful if you have multiple features sharing a name and you want to take action to  | 
116 |  | -     * deduplicate them.  | 
117 |  | -     *  | 
118 |  | -     * @return the feature value of this field.  | 
119 |  | -     */  | 
120 |  | -    public float getFeatureValue() {  | 
121 |  | -        return featureValue;  | 
122 |  | -    }  | 
123 |  | - | 
124 |  | -    private static final class FeatureTokenStream extends TokenStream {  | 
125 |  | -        private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);  | 
126 |  | -        private final TermFrequencyAttribute freqAttribute = addAttribute(TermFrequencyAttribute.class);  | 
127 |  | -        private boolean used = true;  | 
128 |  | -        private String value = null;  | 
129 |  | -        private int freq = 0;  | 
130 |  | - | 
131 |  | -        private FeatureTokenStream() {}  | 
132 |  | - | 
133 |  | -        /**  | 
134 |  | -         * Sets the values  | 
135 |  | -         */  | 
136 |  | -        void setValues(String value, int freq) {  | 
137 |  | -            this.value = value;  | 
138 |  | -            this.freq = freq;  | 
139 |  | -        }  | 
140 |  | - | 
141 |  | -        @Override  | 
142 |  | -        public boolean incrementToken() {  | 
143 |  | -            if (used) {  | 
144 |  | -                return false;  | 
145 |  | -            }  | 
146 |  | -            clearAttributes();  | 
147 |  | -            termAttribute.append(value);  | 
148 |  | -            freqAttribute.setTermFrequency(freq);  | 
149 |  | -            used = true;  | 
150 |  | -            return true;  | 
151 |  | -        }  | 
152 |  | - | 
153 |  | -        @Override  | 
154 |  | -        public void reset() {  | 
155 |  | -            used = false;  | 
156 |  | -        }  | 
157 |  | - | 
158 |  | -        @Override  | 
159 |  | -        public void close() {  | 
160 |  | -            value = null;  | 
161 |  | -        }  | 
162 |  | -    }  | 
163 |  | - | 
 | 27 | +public final class XFeatureField {  | 
164 | 28 |     static final int MAX_FREQ = Float.floatToIntBits(Float.MAX_VALUE) >>> 15;  | 
165 | 29 | 
 
  | 
166 | 30 |     static float decodeFeatureValue(float freq) {  | 
 | 
0 commit comments