|
18 | 18 |
|
19 | 19 | package org.elasticsearch.index.mapper.vectors; |
20 | 20 |
|
21 | | -import org.apache.lucene.analysis.Analyzer; |
22 | | -import org.apache.lucene.analysis.TokenStream; |
23 | | -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
24 | | -import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute; |
25 | 21 | import org.apache.lucene.document.FeatureField; |
26 | | -import org.apache.lucene.document.Field; |
27 | | -import org.apache.lucene.document.FieldType; |
28 | | -import org.apache.lucene.index.IndexOptions; |
29 | 22 |
|
30 | 23 | /** |
31 | 24 | * This class is forked from the Lucene {@link FeatureField} implementation to enable support for storing term vectors. |
32 | | - * It should be removed once apache/lucene#14034 becomes available. |
| 25 | + * Its purpose is to allow decoding the feature value from the term frequency |
33 | 26 | */ |
34 | | -public final class XFeatureField extends Field { |
35 | | - private static final FieldType FIELD_TYPE = new FieldType(); |
36 | | - private static final FieldType FIELD_TYPE_STORE_TERM_VECTORS = new FieldType(); |
37 | | - |
38 | | - static { |
39 | | - FIELD_TYPE.setTokenized(false); |
40 | | - FIELD_TYPE.setOmitNorms(true); |
41 | | - FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS); |
42 | | - |
43 | | - FIELD_TYPE_STORE_TERM_VECTORS.setTokenized(false); |
44 | | - FIELD_TYPE_STORE_TERM_VECTORS.setOmitNorms(true); |
45 | | - FIELD_TYPE_STORE_TERM_VECTORS.setIndexOptions(IndexOptions.DOCS_AND_FREQS); |
46 | | - FIELD_TYPE_STORE_TERM_VECTORS.setStoreTermVectors(true); |
47 | | - } |
48 | | - |
49 | | - private float featureValue; |
50 | | - |
51 | | - /** |
52 | | - * Create a feature. |
53 | | - * |
54 | | - * @param fieldName The name of the field to store the information into. All features may be |
55 | | - * stored in the same field. |
56 | | - * @param featureName The name of the feature, eg. 'pagerank`. It will be indexed as a term. |
57 | | - * @param featureValue The value of the feature, must be a positive, finite, normal float. |
58 | | - */ |
59 | | - public XFeatureField(String fieldName, String featureName, float featureValue) { |
60 | | - this(fieldName, featureName, featureValue, false); |
61 | | - } |
62 | | - |
63 | | - /** |
64 | | - * Create a feature. |
65 | | - * |
66 | | - * @param fieldName The name of the field to store the information into. All features may be |
67 | | - * stored in the same field. |
68 | | - * @param featureName The name of the feature, eg. 'pagerank`. It will be indexed as a term. |
69 | | - * @param featureValue The value of the feature, must be a positive, finite, normal float. |
70 | | - */ |
71 | | - public XFeatureField(String fieldName, String featureName, float featureValue, boolean storeTermVectors) { |
72 | | - super(fieldName, featureName, storeTermVectors ? FIELD_TYPE_STORE_TERM_VECTORS : FIELD_TYPE); |
73 | | - setFeatureValue(featureValue); |
74 | | - } |
75 | | - |
76 | | - /** |
77 | | - * Update the feature value of this field. |
78 | | - */ |
79 | | - public void setFeatureValue(float featureValue) { |
80 | | - if (Float.isFinite(featureValue) == false) { |
81 | | - throw new IllegalArgumentException( |
82 | | - "featureValue must be finite, got: " + featureValue + " for feature " + fieldsData + " on field " + name |
83 | | - ); |
84 | | - } |
85 | | - if (featureValue < Float.MIN_NORMAL) { |
86 | | - throw new IllegalArgumentException( |
87 | | - "featureValue must be a positive normal float, got: " |
88 | | - + featureValue |
89 | | - + " for feature " |
90 | | - + fieldsData |
91 | | - + " on field " |
92 | | - + name |
93 | | - + " which is less than the minimum positive normal float: " |
94 | | - + Float.MIN_NORMAL |
95 | | - ); |
96 | | - } |
97 | | - this.featureValue = featureValue; |
98 | | - } |
99 | | - |
100 | | - @Override |
101 | | - public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) { |
102 | | - FeatureTokenStream stream; |
103 | | - if (reuse instanceof FeatureTokenStream) { |
104 | | - stream = (FeatureTokenStream) reuse; |
105 | | - } else { |
106 | | - stream = new FeatureTokenStream(); |
107 | | - } |
108 | | - |
109 | | - int freqBits = Float.floatToIntBits(featureValue); |
110 | | - stream.setValues((String) fieldsData, freqBits >>> 15); |
111 | | - return stream; |
112 | | - } |
113 | | - |
114 | | - /** |
115 | | - * This is useful if you have multiple features sharing a name and you want to take action to |
116 | | - * deduplicate them. |
117 | | - * |
118 | | - * @return the feature value of this field. |
119 | | - */ |
120 | | - public float getFeatureValue() { |
121 | | - return featureValue; |
122 | | - } |
123 | | - |
124 | | - private static final class FeatureTokenStream extends TokenStream { |
125 | | - private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); |
126 | | - private final TermFrequencyAttribute freqAttribute = addAttribute(TermFrequencyAttribute.class); |
127 | | - private boolean used = true; |
128 | | - private String value = null; |
129 | | - private int freq = 0; |
130 | | - |
131 | | - private FeatureTokenStream() {} |
132 | | - |
133 | | - /** |
134 | | - * Sets the values |
135 | | - */ |
136 | | - void setValues(String value, int freq) { |
137 | | - this.value = value; |
138 | | - this.freq = freq; |
139 | | - } |
140 | | - |
141 | | - @Override |
142 | | - public boolean incrementToken() { |
143 | | - if (used) { |
144 | | - return false; |
145 | | - } |
146 | | - clearAttributes(); |
147 | | - termAttribute.append(value); |
148 | | - freqAttribute.setTermFrequency(freq); |
149 | | - used = true; |
150 | | - return true; |
151 | | - } |
152 | | - |
153 | | - @Override |
154 | | - public void reset() { |
155 | | - used = false; |
156 | | - } |
157 | | - |
158 | | - @Override |
159 | | - public void close() { |
160 | | - value = null; |
161 | | - } |
162 | | - } |
163 | | - |
| 27 | +public final class XFeatureField { |
164 | 28 | static final int MAX_FREQ = Float.floatToIntBits(Float.MAX_VALUE) >>> 15; |
165 | 29 |
|
166 | 30 | static float decodeFeatureValue(float freq) { |
|
0 commit comments