Skip to content

Commit 8ab4215

Browse files
authored
Added support for doc_values to text fields (elastic#141225)
* Added support for doc_values to text fields * Addressed feedback * Added a check to verify doc ids, fixed newline error * Fixed terms query requests failing when indexing is disabled * Changed cardinality to HIGH by default, disable indexing on all new tests * Unmuted cardinality yaml tests, changed termsQuery to use BooleanQuery
1 parent 8c91446 commit 8ab4215

File tree

25 files changed

+1219
-105
lines changed

25 files changed

+1219
-105
lines changed

modules/aggregations/build.gradle

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@ if (buildParams.snapshotBuild == false) {
3333
tasks.named("internalClusterTest").configure {
3434
systemProperty 'es.index_mode_feature_flag_registered', 'true'
3535
}
36+
tasks.named("yamlRestTest").configure {
37+
// text doc_values feature flag not available in non-snapshot builds
38+
systemProperty 'tests.rest.blacklist', [
39+
"aggregations/terms_text_docvalues/*"
40+
].join(',')
41+
}
3642
}
3743

3844
artifacts {
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
---
2+
"Terms aggregation on text field with doc_values":
3+
- requires:
4+
cluster_features: ["mapper.text.doc_values"]
5+
reason: "text doc_values support required"
6+
7+
- do:
8+
indices.create:
9+
index: test_text_docvalues
10+
body:
11+
settings:
12+
number_of_shards: 1
13+
mappings:
14+
properties:
15+
text_field:
16+
type: text
17+
index: false
18+
doc_values: true
19+
20+
- do:
21+
index:
22+
index: test_text_docvalues
23+
id: "1"
24+
body: { "text_field": "foo bar" }
25+
26+
- do:
27+
index:
28+
index: test_text_docvalues
29+
id: "2"
30+
body: { "text_field": "foo bar" }
31+
32+
- do:
33+
index:
34+
index: test_text_docvalues
35+
id: "3"
36+
body: { "text_field": "baz qux" }
37+
38+
- do:
39+
indices.refresh: {}
40+
41+
- do:
42+
search:
43+
rest_total_hits_as_int: true
44+
body:
45+
size: 0
46+
aggs:
47+
text_terms:
48+
terms:
49+
field: text_field
50+
51+
- match: { hits.total: 3 }
52+
- length: { aggregations.text_terms.buckets: 2 }
53+
- match: { aggregations.text_terms.buckets.0.key: "foo bar" }
54+
- match: { aggregations.text_terms.buckets.0.doc_count: 2 }
55+
- match: { aggregations.text_terms.buckets.1.key: "baz qux" }
56+
- match: { aggregations.text_terms.buckets.1.doc_count: 1 }
57+
58+
---
59+
"Terms aggregation on text field with doc_values - multi value":
60+
- requires:
61+
cluster_features: ["mapper.text.doc_values"]
62+
reason: "text doc_values support required"
63+
64+
- do:
65+
indices.create:
66+
index: test_text_docvalues_multi
67+
body:
68+
settings:
69+
number_of_shards: 1
70+
mappings:
71+
properties:
72+
text_field:
73+
type: text
74+
index: false
75+
doc_values: true
76+
77+
- do:
78+
index:
79+
index: test_text_docvalues_multi
80+
id: "1"
81+
body:
82+
text_field: [ "value one", "value two" ]
83+
84+
- do:
85+
index:
86+
index: test_text_docvalues_multi
87+
id: "2"
88+
body:
89+
text_field: [ "value one", "value three", "value four" ]
90+
91+
- do:
92+
indices.refresh: {}
93+
94+
- do:
95+
search:
96+
rest_total_hits_as_int: true
97+
body:
98+
size: 0
99+
aggs:
100+
text_terms:
101+
terms:
102+
field: text_field
103+
104+
- match: { hits.total: 2 }
105+
- length: { aggregations.text_terms.buckets: 4 }
106+
- match: { aggregations.text_terms.buckets.0.key: "value one" }
107+
- match: { aggregations.text_terms.buckets.0.doc_count: 2 }
108+
- match: { aggregations.text_terms.buckets.1.key: "value four" }
109+
- match: { aggregations.text_terms.buckets.1.doc_count: 1 }
110+
- match: { aggregations.text_terms.buckets.2.key: "value three" }
111+
- match: { aggregations.text_terms.buckets.2.doc_count: 1 }
112+
- match: { aggregations.text_terms.buckets.3.key: "value two" }
113+
- match: { aggregations.text_terms.buckets.3.doc_count: 1 }
114+
115+
---
116+
"Indexing disabled":
117+
- requires:
118+
cluster_features: ["mapper.text.doc_values"]
119+
reason: "text doc_values support required"
120+
121+
- do:
122+
indices.create:
123+
index: test_text_no_index_docvalues
124+
body:
125+
settings:
126+
number_of_shards: 1
127+
mappings:
128+
properties:
129+
text_field:
130+
type: text
131+
index: false
132+
doc_values: true
133+
134+
- do:
135+
index:
136+
index: test_text_no_index_docvalues
137+
id: "1"
138+
body: { "text_field": "foo bar" }
139+
140+
- do:
141+
index:
142+
index: test_text_no_index_docvalues
143+
id: "2"
144+
body: { "text_field": "foo bar" }
145+
146+
- do:
147+
index:
148+
index: test_text_no_index_docvalues
149+
id: "3"
150+
body: { "text_field": "baz qux" }
151+
152+
- do:
153+
indices.refresh: {}
154+
155+
# Verify term query
156+
- do:
157+
search:
158+
rest_total_hits_as_int: true
159+
index: test_text_no_index_docvalues
160+
body:
161+
query:
162+
term:
163+
text_field: "foo bar"
164+
165+
- match: { hits.total: 2 }
166+
- match: { hits.hits.0._id: "1" }
167+
- match: { hits.hits.1._id: "2" }
168+
169+
# Verify terms query
170+
- do:
171+
search:
172+
rest_total_hits_as_int: true
173+
index: test_text_no_index_docvalues
174+
body:
175+
query:
176+
terms:
177+
text_field: ["foo bar", "baz qux"]
178+
179+
- match: { hits.total: 3 }
180+
181+
# Verify terms aggregation
182+
- do:
183+
search:
184+
rest_total_hits_as_int: true
185+
index: test_text_no_index_docvalues
186+
body:
187+
size: 0
188+
aggs:
189+
text_terms:
190+
terms:
191+
field: text_field
192+
193+
- match: { hits.total: 3 }
194+
- length: { aggregations.text_terms.buckets: 2 }
195+
- match: { aggregations.text_terms.buckets.0.key: "foo bar" }
196+
- match: { aggregations.text_terms.buckets.0.doc_count: 2 }
197+
- match: { aggregations.text_terms.buckets.1.key: "baz qux" }
198+
- match: { aggregations.text_terms.buckets.1.doc_count: 1 }

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -786,7 +786,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
786786
context.addToFieldNames(fieldType().name());
787787

788788
// match only text isn't stored, so if synthetic source needs to be supported, we must find an alternative way of loading the field
789-
if (fieldType().textFieldType.storeFieldForSyntheticSource(indexCreatedVersion)) {
789+
if (fieldType().textFieldType.needsFallbackStorageForSyntheticSource(indexCreatedVersion)) {
790790
// check if we can use the delegate
791791
if (fieldType().canUseSyntheticSourceDelegateForSyntheticSource(value)) {
792792
return;

plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ private AnnotatedTextFieldType buildFieldType(FieldType fieldType, MapperBuilder
150150

151151
@Override
152152
public AnnotatedTextFieldMapper build(MapperBuilderContext context) {
153-
FieldType fieldType = TextParams.buildFieldType(() -> true, store, indexOptions, norms, termVectors);
153+
FieldType fieldType = TextParams.buildFieldType(() -> true, store, () -> false, indexOptions, norms, termVectors);
154154
if (fieldType.indexOptions() == IndexOptions.NONE) {
155155
throw new IllegalArgumentException("[" + CONTENT_TYPE + "] fields must be indexed");
156156
}

qa/ccs-common-rest/build.gradle

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,6 @@ tasks.named("yamlRestTest") {
4141
'search.aggregation/50_filter/Terms lookup gets cached', // terms lookup by "index" doesn't seem to work correctly
4242
'search.aggregation/70_adjacency_matrix/Terms lookup' // terms lookup by "index" doesn't seem to work correctly
4343
]
44-
if (buildParams.snapshotBuild == false) {
45-
blacklist += [
46-
// doc_values.cardinality option not available in snapshots
47-
"search/395_binary_doc_values_search/*"
48-
]
49-
}
5044
systemProperty 'tests.rest.blacklist', blacklist.join(',')
5145
}
5246

qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@ public class CcsCommonYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
101101
// geohex_grid requires gold license
102102
.setting("xpack.license.self_generated.type", "trial")
103103
.feature(FeatureFlag.TIME_SERIES_MODE)
104-
.feature(FeatureFlag.SYNTHETIC_VECTORS);
104+
.feature(FeatureFlag.SYNTHETIC_VECTORS)
105+
.feature(FeatureFlag.EXTENDED_DOC_VALUES_PARAMS);
105106

106107
private static ElasticsearchCluster remoteCluster = ElasticsearchCluster.local()
107108
.name(REMOTE_CLUSTER_NAME)

qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/RcsCcsCommonYamlTestSuiteIT.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ public class RcsCcsCommonYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
101101
.setting("xpack.security.remote_cluster_client.ssl.enabled", "false")
102102
.feature(FeatureFlag.TIME_SERIES_MODE)
103103
.feature(FeatureFlag.SYNTHETIC_VECTORS)
104+
.feature(FeatureFlag.EXTENDED_DOC_VALUES_PARAMS)
104105
.user("test_admin", "x-pack-test-password");
105106

106107
private static ElasticsearchCluster fulfillingCluster = ElasticsearchCluster.local()

qa/smoke-test-multinode/build.gradle

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,5 @@ tasks.named("yamlRestTest").configure {
2929
'cat.templates/10_basic/Sort templates',
3030
'cat.templates/10_basic/Multiple template',
3131
]
32-
if (buildParams.snapshotBuild == false) {
33-
blacklist += [
34-
// doc_values.cardinality option not available in snapshots
35-
"search/395_binary_doc_values_search/*"
36-
]
37-
}
3832
systemProperty 'tests.rest.blacklist', blacklist.join(',')
3933
}

qa/smoke-test-multinode/src/yamlRestTest/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ public class SmokeTestMultiNodeClientYamlTestSuiteIT extends ESClientYamlSuiteTe
3737
.feature(FeatureFlag.TIME_SERIES_MODE)
3838
.feature(FeatureFlag.SYNTHETIC_VECTORS)
3939
.feature(FeatureFlag.RANDOM_SAMPLING)
40+
.feature(FeatureFlag.EXTENDED_DOC_VALUES_PARAMS)
4041
.build();
4142

4243
public SmokeTestMultiNodeClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) {

rest-api-spec/build.gradle

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,6 @@ tasks.named("precommit").configure {
5454
dependsOn 'enforceYamlTestConvention'
5555
}
5656

57-
tasks.named("yamlRestTest") {
58-
if (buildParams.snapshotBuild == false) {
59-
// doc_values.cardinality option not available in snapshots
60-
systemProperty 'tests.rest.blacklist', [
61-
"search/395_binary_doc_values_search/*"
62-
].join(',')
63-
}
64-
}
65-
6657
tasks.named("yamlRestCompatTestTransform").configure ({ task ->
6758
task.replaceValueInMatch("profile.shards.0.dfs.knn.0.query.0.description", "DocAndScoreQuery[0,...][0.009673266,...],0.009673266", "dfs knn vector profiling")
6859
task.replaceValueInMatch("profile.shards.0.dfs.knn.0.query.0.description", "DocAndScoreQuery[0,...][0.009673266,...],0.009673266", "dfs knn vector profiling with vector_operations_count")

0 commit comments

Comments
 (0)