Skip to content

Commit fca0f83

Browse files
committed
Change to BinaryDocValues
PatternedDocValues implemented SortedSetDocValues, but this does not work since there is no ordinal for the full patterned text. Instead use binary doc values which don't use a term dictionary
1 parent e4d4830 commit fca0f83

File tree

9 files changed

+131
-157
lines changed

9 files changed

+131
-157
lines changed

x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextBlockLoader.java

Lines changed: 0 additions & 62 deletions
This file was deleted.

x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextDocValues.java

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
package org.elasticsearch.xpack.patternedtext;
99

10+
import org.apache.lucene.index.BinaryDocValues;
1011
import org.apache.lucene.index.DocValues;
1112
import org.apache.lucene.index.LeafReader;
1213
import org.apache.lucene.index.SortedSetDocValues;
@@ -16,7 +17,7 @@
1617
import java.util.ArrayList;
1718
import java.util.List;
1819

19-
public class PatternedTextDocValues extends SortedSetDocValues {
20+
public class PatternedTextDocValues extends BinaryDocValues {
2021
private final SortedSetDocValues templateDocValues;
2122
private final SortedSetDocValues argsDocValues;
2223

@@ -35,40 +36,28 @@ static PatternedTextDocValues from(LeafReader leafReader, String templateFieldNa
3536
return new PatternedTextDocValues(templateDocValues, argsDocValues);
3637
}
3738

38-
@Override
39-
public long nextOrd() throws IOException {
40-
return templateDocValues.nextOrd();
41-
}
42-
43-
@Override
44-
public int docValueCount() {
45-
return templateDocValues.docValueCount();
46-
}
47-
48-
@Override
49-
public BytesRef lookupOrd(long l) throws IOException {
50-
return new BytesRef(lookupOrdAsString(l));
51-
}
52-
53-
String lookupOrdAsString(long l) throws IOException {
54-
String template = templateDocValues.lookupOrd(l).utf8ToString();
39+
private String getNextStringValue() throws IOException {
40+
assert templateDocValues.docValueCount() == 1;
41+
String template = templateDocValues.lookupOrd(templateDocValues.nextOrd()).utf8ToString();
5542
int argsCount = PatternedTextValueProcessor.countArgs(template);
5643
List<String> args = new ArrayList<>(argsCount);
5744
if (argsCount > 0) {
45+
assert argsDocValues.docValueCount() == 1;
5846
var mergedArgs = argsDocValues.lookupOrd(argsDocValues.nextOrd());
5947
PatternedTextValueProcessor.decodeRemainingArgs(args, mergedArgs.utf8ToString());
6048
}
6149
return PatternedTextValueProcessor.merge(new PatternedTextValueProcessor.Parts(template, args));
6250
}
6351

6452
@Override
65-
public long getValueCount() {
66-
return templateDocValues.getValueCount();
53+
public BytesRef binaryValue() throws IOException {
54+
return new BytesRef(getNextStringValue());
6755
}
6856

6957
@Override
7058
public boolean advanceExact(int i) throws IOException {
7159
argsDocValues.advanceExact(i);
60+
// If template has a value, then message has a value. We don't have to check args here, since there may not be args for the doc
7261
return templateDocValues.advanceExact(i);
7362
}
7463

@@ -79,12 +68,18 @@ public int docID() {
7968

8069
@Override
8170
public int nextDoc() throws IOException {
82-
return templateDocValues.nextDoc();
71+
int templateNext = templateDocValues.nextDoc();
72+
int argsNext = argsDocValues.nextDoc();
73+
assert templateNext == argsNext;
74+
return templateNext;
8375
}
8476

8577
@Override
8678
public int advance(int i) throws IOException {
87-
return templateDocValues.advance(i);
79+
int templateAdvance = templateDocValues.advance(i);
80+
int argAdvance = argsDocValues.advance(i);
81+
assert templateAdvance == argAdvance;
82+
return templateAdvance;
8883
}
8984

9085
@Override

x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextFieldType.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.elasticsearch.index.fielddata.FieldDataContext;
3232
import org.elasticsearch.index.fielddata.IndexFieldData;
3333
import org.elasticsearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData;
34+
import org.elasticsearch.index.mapper.BlockDocValuesReader;
3435
import org.elasticsearch.index.mapper.BlockLoader;
3536
import org.elasticsearch.index.mapper.DocValueFetcher;
3637
import org.elasticsearch.index.mapper.StringFieldType;
@@ -88,11 +89,6 @@ public String familyTypeName() {
8889
return TextFieldMapper.CONTENT_TYPE;
8990
}
9091

91-
@Override
92-
public boolean isAggregatable() {
93-
return false;
94-
}
95-
9692
@Override
9793
public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
9894
return new DocValueFetcher(docValueFormat(format, null), context.getForField(this, FielddataOperation.SEARCH));
@@ -240,7 +236,7 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions,
240236

241237
@Override
242238
public BlockLoader blockLoader(BlockLoaderContext blContext) {
243-
return new PatternedTextBlockLoader(name(), templateFieldName(), argsFieldName());
239+
return new BlockDocValuesReader.BytesRefsFromBinaryBlockLoader(name());
244240
}
245241

246242
@Override

x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextIndexFieldData.java

Lines changed: 53 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,28 +10,33 @@
1010
import org.apache.lucene.index.LeafReaderContext;
1111
import org.apache.lucene.index.SortedSetDocValues;
1212
import org.apache.lucene.search.SortField;
13+
import org.apache.lucene.util.BytesRef;
1314
import org.elasticsearch.common.util.BigArrays;
14-
import org.elasticsearch.index.fielddata.FieldData;
1515
import org.elasticsearch.index.fielddata.IndexFieldData;
1616
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
17+
import org.elasticsearch.index.fielddata.LeafFieldData;
1718
import org.elasticsearch.index.fielddata.LeafOrdinalsFieldData;
18-
import org.elasticsearch.index.fielddata.plain.AbstractIndexOrdinalsFieldData;
19-
import org.elasticsearch.index.fielddata.plain.AbstractLeafOrdinalsFieldData;
19+
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
2020
import org.elasticsearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData;
2121
import org.elasticsearch.indices.breaker.CircuitBreakerService;
22+
import org.elasticsearch.script.field.DocValuesScriptFieldFactory;
2223
import org.elasticsearch.script.field.KeywordDocValuesField;
2324
import org.elasticsearch.script.field.ToScriptFieldFactory;
2425
import org.elasticsearch.search.DocValueFormat;
2526
import org.elasticsearch.search.MultiValueMode;
27+
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
2628
import org.elasticsearch.search.sort.BucketedSort;
2729
import org.elasticsearch.search.sort.SortOrder;
2830

29-
import static org.elasticsearch.search.aggregations.support.CoreValuesSourceType.KEYWORD;
31+
import java.io.IOException;
3032

31-
public class PatternedTextIndexFieldData extends AbstractIndexOrdinalsFieldData {
3233

33-
final SortedSetOrdinalsIndexFieldData templateFieldData;
34-
final SortedSetOrdinalsIndexFieldData argsFieldData;
34+
public class PatternedTextIndexFieldData implements IndexFieldData<LeafFieldData> {
35+
36+
private final SortedSetOrdinalsIndexFieldData templateFieldData;
37+
private final SortedSetOrdinalsIndexFieldData argsFieldData;
38+
private final ToScriptFieldFactory<SortedBinaryDocValues> toScriptFieldFactory;
39+
private final String name;
3540

3641
static class Builder implements IndexFieldData.Builder {
3742

@@ -52,45 +57,74 @@ static class Builder implements IndexFieldData.Builder {
5257
public PatternedTextIndexFieldData build(IndexFieldDataCache cache, CircuitBreakerService breakerService) {
5358
SortedSetOrdinalsIndexFieldData templateFieldData = templateFieldDataBuilder.build(cache, breakerService);
5459
SortedSetOrdinalsIndexFieldData argsFieldData = argsFieldDataBuilder.build(cache, breakerService);
55-
ToScriptFieldFactory<SortedSetDocValues> factory = (dv, n) -> new KeywordDocValuesField(FieldData.toString(dv), n);
56-
return new PatternedTextIndexFieldData(name, cache, breakerService, factory, templateFieldData, argsFieldData);
60+
ToScriptFieldFactory<SortedBinaryDocValues> factory = KeywordDocValuesField::new;
61+
return new PatternedTextIndexFieldData(name, factory, templateFieldData, argsFieldData);
5762
}
5863
}
5964

6065
PatternedTextIndexFieldData(
6166
String name,
62-
IndexFieldDataCache cache,
63-
CircuitBreakerService breakerService,
64-
ToScriptFieldFactory<SortedSetDocValues> toScriptFieldFactory,
67+
ToScriptFieldFactory<SortedBinaryDocValues> toScriptFieldFactory,
6568
SortedSetOrdinalsIndexFieldData templateFieldData,
6669
SortedSetOrdinalsIndexFieldData argsFieldData
6770
) {
68-
super(name, KEYWORD, cache, breakerService, toScriptFieldFactory);
71+
this.name = name;
6972
this.templateFieldData = templateFieldData;
7073
this.argsFieldData = argsFieldData;
74+
this.toScriptFieldFactory = toScriptFieldFactory;
7175
}
7276

7377
@Override
74-
public LeafOrdinalsFieldData load(LeafReaderContext context) {
78+
public String getFieldName() {
79+
return name;
80+
}
81+
82+
@Override
83+
public ValuesSourceType getValuesSourceType() {
84+
return null;
85+
}
86+
87+
@Override
88+
public LeafFieldData load(LeafReaderContext context) {
7589
return loadDirect(context);
7690
}
7791

7892
@Override
79-
public LeafOrdinalsFieldData loadDirect(LeafReaderContext context) {
93+
public LeafFieldData loadDirect(LeafReaderContext context) {
8094
LeafOrdinalsFieldData leafTemplateFieldData = templateFieldData.loadDirect(context);
8195
LeafOrdinalsFieldData leafArgsFieldData = argsFieldData.loadDirect(context);
96+
return new LeafFieldData() {
97+
@Override
98+
public DocValuesScriptFieldFactory getScriptFieldFactory(String name) {
99+
return toScriptFieldFactory.getScriptFieldFactory(getBytesValues(), name);
100+
}
82101

83-
return new AbstractLeafOrdinalsFieldData(toScriptFieldFactory) {
84102
@Override
85-
public SortedSetDocValues getOrdinalsValues() {
103+
public SortedBinaryDocValues getBytesValues() {
86104
SortedSetDocValues templateDocValues = leafTemplateFieldData.getOrdinalsValues();
87105
SortedSetDocValues argsDocValues = leafArgsFieldData.getOrdinalsValues();
88-
return new PatternedTextDocValues(templateDocValues, argsDocValues);
106+
var docValues = new PatternedTextDocValues(templateDocValues, argsDocValues);
107+
return new SortedBinaryDocValues() {
108+
@Override
109+
public boolean advanceExact(int doc) throws IOException {
110+
return docValues.advanceExact(doc);
111+
}
112+
113+
@Override
114+
public int docValueCount() {
115+
return 1;
116+
}
117+
118+
@Override
119+
public BytesRef nextValue() throws IOException {
120+
return docValues.binaryValue();
121+
}
122+
};
89123
}
90124

91125
@Override
92126
public long ramBytesUsed() {
93-
return 0; // unknown
127+
return leafTemplateFieldData.ramBytesUsed() + leafArgsFieldData.ramBytesUsed();
94128
}
95129
};
96130
}

x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextSyntheticFieldLoaderLayer.java

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
package org.elasticsearch.xpack.patternedtext;
99

1010
import org.apache.lucene.index.LeafReader;
11+
import org.apache.lucene.search.DocIdSetIterator;
1112
import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader;
1213
import org.elasticsearch.xcontent.XContentBuilder;
1314

@@ -27,13 +28,13 @@ class PatternedTextSyntheticFieldLoaderLayer implements CompositeSyntheticFieldL
2728

2829
@Override
2930
public long valueCount() {
30-
return loader != null ? loader.count() : 0;
31+
return loader != null && loader.hasValue() ? 1 : 0;
3132
}
3233

3334
@Override
3435
public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
3536
var docValues = PatternedTextDocValues.from(leafReader, templateFieldName, argsFieldName);
36-
if (docValues == null || docValues.getValueCount() == 0) {
37+
if (docValues == null) {
3738
return null;
3839
}
3940
loader = new PatternedTextSyntheticFieldLoader(docValues);
@@ -42,7 +43,7 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
4243

4344
@Override
4445
public boolean hasValue() {
45-
return loader != null && loader.count() > 0;
46+
return loader != null && loader.hasValue();
4647
}
4748

4849
@Override
@@ -57,23 +58,26 @@ public String fieldName() {
5758
return "";
5859
}
5960

60-
private record PatternedTextSyntheticFieldLoader(PatternedTextDocValues docValues) implements DocValuesLoader {
61+
private static class PatternedTextSyntheticFieldLoader implements DocValuesLoader {
62+
private final PatternedTextDocValues docValues;
63+
private boolean hasValue = false;
64+
PatternedTextSyntheticFieldLoader(PatternedTextDocValues docValues) {
65+
this.docValues = docValues;
66+
}
6167

62-
@Override
63-
public boolean advanceToDoc(int docId) throws IOException {
64-
return docValues.advanceExact(docId);
68+
public boolean hasValue() {
69+
assert docValues.docID() != DocIdSetIterator.NO_MORE_DOCS;
70+
return hasValue;
6571
}
6672

67-
public int count() {
68-
return docValues.docValueCount();
73+
@Override
74+
public boolean advanceToDoc(int docId) throws IOException {
75+
return hasValue = docValues.advanceExact(docId);
6976
}
7077

7178
public void write(XContentBuilder b) throws IOException {
72-
if (docValues.getValueCount() == 0) {
73-
return;
74-
}
75-
for (int i = 0; i < count(); i++) {
76-
b.value(docValues.lookupOrdAsString(docValues.nextOrd()));
79+
if (hasValue) {
80+
b.value(docValues.binaryValue().utf8ToString());
7781
}
7882
}
7983
}

0 commit comments

Comments
 (0)