Skip to content

Commit 713de2b

Browse files
committed
Add new full text functions data set and modify VerifierTests
1 parent 0dda9d2 commit 713de2b

File tree

5 files changed

+529
-151
lines changed

5 files changed

+529
-151
lines changed

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ public class CsvTestsDataLoader {
144144
private static final TestDataset LOGS = new TestDataset("logs");
145145
private static final TestDataset MV_TEXT = new TestDataset("mv_text");
146146
private static final TestDataset DENSE_VECTOR = new TestDataset("dense_vector");
147+
private static final TestDataset COLORS = new TestDataset("colors");
148+
private static final TestDataset FULL_TEXT_SEARCH = new TestDataset("full_text_search");
147149

148150
public static final Map<String, TestDataset> CSV_DATASET_MAP = Map.ofEntries(
149151
Map.entry(EMPLOYEES.indexName, EMPLOYEES),
@@ -204,7 +206,9 @@ public class CsvTestsDataLoader {
204206
Map.entry(SEMANTIC_TEXT.indexName, SEMANTIC_TEXT),
205207
Map.entry(LOGS.indexName, LOGS),
206208
Map.entry(MV_TEXT.indexName, MV_TEXT),
207-
Map.entry(DENSE_VECTOR.indexName, DENSE_VECTOR)
209+
Map.entry(DENSE_VECTOR.indexName, DENSE_VECTOR),
210+
Map.entry(COLORS.indexName, COLORS),
211+
Map.entry(FULL_TEXT_SEARCH.indexName, FULL_TEXT_SEARCH)
208212
);
209213

210214
private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json");
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
id:integer,title:text,body:text,tags:keyword,category:keyword,published_date:date,vector:dense_vector
2+
1,The Rise of AI,Artificial intelligence is revolutionizing industries, from healthcare to finance.,ai,technology,future,technology,2023-01-15,[0.89, 0.61, 0.13]
3+
2,Hiking the Grand Canyon,Exploring the vast landscapes of the Grand Canyon is an unforgettable experience.,travel,nature,hiking,travel,2022-11-20,[0.31, 0.85, 0.44]
4+
3,Understanding Quantum Computing,Quantum computing leverages the principles of quantum mechanics for computation.,quantum,computing,research,science,2023-06-05,[0.92, 0.47, 0.22]
5+
4,Healthy Meal Planning,Meal prepping with nutritious ingredients can save time and improve well-being.,health,food,planning,lifestyle,2024-03-08,[0.41, 0.66, 0.30]
6+
5,Dogs: Loyal Companions,Dogs provide emotional support and are known for their loyalty and affection.,dogs,pets,companionship,animals,2021-12-30,[0.14, 0.91, 0.19]
7+
6,A Guide to the Solar System,The solar system is home to eight planets, each with unique characteristics.,space,planets,astronomy,science,2022-07-14,[0.75, 0.34, 0.56]
8+
7,Meditation for Beginners,Meditation can help reduce stress and improve mental clarity when practiced regularly.,meditation,wellness,mental health,lifestyle,2023-02-28,[0.36, 0.72, 0.28]
9+
8,Exploring Tokyo,Tokyo blends modern skyscrapers with traditional temples and vibrant street culture.,japan,tokyo,travel,travel,2024-09-10,[0.45, 0.82, 0.39]
10+
9,Introduction to Neural Networks,Neural networks are foundational to deep learning, a subfield of machine learning.,neural networks,deep learning,ai,technology,2023-10-01,[0.88, 0.60, 0.15]
11+
10,Gardening Tips for Spring,Spring is the ideal time to start planting flowers, herbs, and vegetables.,gardening,plants,spring,lifestyle,2022-03-15,[0.33, 0.76, 0.21]
12+
11,Basics of Blockchain Technology,Blockchain provides a decentralized way to store and verify transactions.,blockchain,cryptocurrency,tech,technology,2023-05-22,[0.91, 0.55, 0.20]
13+
12,Cats vs Dogs,Cats and dogs are both popular pets, each with unique behaviors and needs.,cats,dogs,pets,animals,2022-08-18,[0.18, 0.89, 0.23]
14+
13,The Benefits of Yoga,Yoga combines physical postures with breathing exercises and meditation.,yoga,fitness,health,lifestyle,2024-01-04,[0.40, 0.69, 0.27]
15+
14,Visiting the Louvre Museum,The Louvre houses famous artworks like the Mona Lisa and Venus de Milo.,art,museum,paris,travel,2021-06-25,[0.50, 0.78, 0.41]
16+
15,Climate Change Explained,Climate change is a global challenge affecting ecosystems and weather patterns.,climate,environment,science,science,2022-10-11,[0.66, 0.70, 0.45]
17+
16,Intro to Programming with Python,Python is a versatile language for beginners and experts alike.,python,programming,code,technology,2023-11-30,[0.87, 0.64, 0.18]
18+
17,Exploring the Amazon Rainforest,The Amazon Rainforest is rich in biodiversity and cultural heritage.,amazon,nature,exploration,travel,2023-07-19,[0.38, 0.84, 0.46]
19+
18,The Basics of Nutrition,Balanced nutrition is essential for energy, growth, and health maintenance.,nutrition,food,health,lifestyle,2022-05-12,[0.43, 0.68, 0.29]
20+
19,Mars Missions Update,NASA and private companies continue exploring Mars with rovers and future plans.,mars,space,exploration,science,2024-04-26,[0.79, 0.37, 0.52]
21+
20,Bird Watching in Costa Rica,Costa Rica offers a paradise for bird watchers with hundreds of species.,birds,wildlife,nature,travel,2021-09-09,[0.29, 0.83, 0.38]
Lines changed: 321 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
knnSearch
2+
required_capability: knn_function
3+
4+
// tag::knn-function[]
5+
from colors metadata _score
6+
| where knn(rgb_vector, [0, 120, 0])
7+
| sort _score desc
8+
// end::knn-function[]
9+
| keep color, rgb_vector
10+
;
11+
12+
// tag::knn-function-result[]
13+
color:text | rgb_vector:dense_vector
14+
green | [0.0, 128.0, 0.0]
15+
dark green | [0.0, 100.0, 0.0]
16+
forest green | [34.0, 139.0, 34.0]
17+
dark olive green | [85.0, 107.0, 47.0]
18+
sea green | [46.0, 139.0, 87.0]
19+
dark slate gray | [47.0, 79.0, 79.0]
20+
olive drab | [107.0, 142.0, 35.0]
21+
lime green | [50.0, 205.0, 50.0]
22+
black | [0.0, 0.0, 0.0]
23+
olive | [128.0, 128.0, 0.0]
24+
// end::knn-function-result[]
25+
;
26+
27+
knnSearchWithKOption
28+
required_capability: knn_function
29+
30+
// tag::knn-function-options[]
31+
from colors metadata _score
32+
| where knn(rgb_vector, [0,255,255], {"k": 4})
33+
| sort _score desc
34+
// end::knn-function-options[]
35+
| keep color, rgb_vector
36+
;
37+
38+
color:text | rgb_vector:dense_vector
39+
cyan | [0.0, 255.0, 255.0]
40+
deep sky blue | [0.0, 191.0, 255.0]
41+
dark turquoise | [0.0, 206.0, 209.0]
42+
turquoise | [64.0, 224.0, 208.0]
43+
;
44+
45+
knnSearchWithSimilarityOption
46+
required_capability: knn_function
47+
48+
from colors metadata _score
49+
| where knn(rgb_vector, [255,192,203], {"similarity": 40})
50+
| sort _score desc
51+
| keep color, rgb_vector
52+
;
53+
54+
color:text | rgb_vector:dense_vector
55+
pink | [255.0, 192.0, 203.0]
56+
light pink | [255.0, 182.0, 193.0]
57+
peach puff | [255.0, 218.0, 185.0]
58+
bisque | [255.0, 228.0, 196.0]
59+
thistle | [216.0, 191.0, 216.0]
60+
wheat | [245.0, 222.0, 179.0]
61+
;
62+
63+
knnHybridSearch
64+
required_capability: knn_function
65+
66+
from colors metadata _score
67+
| where match(color, "violet") or knn(rgb_vector, [238,130,238], {"boost": 10.0, "k": 5})
68+
| sort _score desc
69+
| eval round_score = round(_score, 4)
70+
| keep color, rgb_vector, round_score
71+
;
72+
73+
color:text | rgb_vector:dense_vector | round_score:double
74+
violet | [238.0, 130.0, 238.0] | 13.9457
75+
blue violet | [138.0, 43.0, 226.0] | 3.0871
76+
dark violet | [148.0, 0.0, 211.0] | 3.0871
77+
medium violet red | [199.0, 21.0, 133.0] | 2.5355
78+
pale violet red | [219.0, 112.0, 147.0] | 2.5355
79+
orchid | [218.0, 112.0, 214.0] | 0.0083
80+
plum | [221.0, 160.0, 221.0] | 0.0071
81+
hot pink | [255.0, 105.0, 180.0] | 0.0024
82+
thistle | [216.0, 191.0, 216.0] | 0.0021
83+
;
84+
85+
knnWithMultipleFunctions
86+
required_capability: knn_function
87+
88+
from colors metadata _score
89+
| where knn(rgb_vector, [128,128,0]) and match(color, "olive")
90+
| sort _score desc
91+
| eval round_score = round(_score, 4)
92+
| keep color, rgb_vector, round_score
93+
;
94+
95+
color:text | rgb_vector:dense_vector | round_score:double
96+
olive | [128.0, 128.0, 0.0] | 5.4979
97+
olive drab | [107.0, 142.0, 35.0] | 3.5206
98+
dark olive green | [85.0, 107.0, 47.0] | 2.8906
99+
;
100+
101+
knnAfterKeep
102+
required_capability: knn_function
103+
104+
from colors metadata _score
105+
| keep rgb_vector, _score
106+
| where knn(rgb_vector, [128,128,0])
107+
| eval round_score = round(_score, 4)
108+
| sort round_score desc
109+
| keep rgb_vector, round_score
110+
| limit 5
111+
;
112+
113+
rgb_vector:dense_vector | round_score:double
114+
[128.0, 128.0, 0.0] | 1.0
115+
[107.0, 142.0, 35.0] | 0.0014
116+
[85.0, 107.0, 47.0] | 4.0E-4
117+
[139.0, 69.0, 19.0] | 3.0E-4
118+
[184.0, 134.0, 11.0] | 3.0E-4
119+
;
120+
121+
knnAfterDrop
122+
required_capability: knn_function
123+
124+
from colors metadata _score
125+
| drop color
126+
| where knn(rgb_vector, [128,128,0])
127+
| eval round_score = round(_score, 4)
128+
| keep rgb_vector, round_score
129+
| limit 5
130+
;
131+
132+
rgb_vector:dense_vector | round_score:double
133+
[184.0, 134.0, 11.0] | 3.0E-4
134+
[128.0, 128.0, 0.0] | 1.0
135+
[154.0, 205.0, 50.0] | 1.0E-4
136+
[85.0, 107.0, 47.0] | 4.0E-4
137+
[107.0, 142.0, 35.0] | 0.0014
138+
;
139+
140+
knnAfterEval
141+
required_capability: knn_function
142+
143+
from colors metadata _score
144+
| eval composed_name = locate(color, " ") > 0
145+
| where knn(rgb_vector, [128,128,0])
146+
| sort _score, color desc
147+
| keep color, composed_name
148+
;
149+
150+
color:text | composed_name:boolean
151+
peru | false
152+
yellow green | true
153+
chocolate | false
154+
dim gray | true
155+
saddle brown | true
156+
sienna | false
157+
dark golden rod | true
158+
dark olive green | true
159+
olive drab | true
160+
olive | false
161+
;
162+
163+
knnWithConjunction
164+
required_capability: knn_function
165+
166+
# TODO We need kNN prefiltering here so we get more candidates that pass the filter
167+
from colors metadata _score
168+
| where knn(rgb_vector, [255,255,238]) and hex_code like "#FFF*"
169+
| keep color, hex_code, rgb_vector
170+
;
171+
ignoreOrder:true
172+
173+
color:text | hex_code: keyword | rgb_vector:dense_vector
174+
light yellow | #FFFFE0 | [255.0, 255.0, 224.0]
175+
lavender blush | #FFF0F5 | [255.0, 240.0, 245.0]
176+
sea shell | #FFF5EE | [255.0, 245.0, 238.0]
177+
floral white | #FFFAF0 | [255.0, 250.0, 240.0]
178+
ivory | #FFFFF0 | [255.0, 255.0, 240.0]
179+
snow | #FFFAFA | [255.0, 250.0, 250.0]
180+
white | #FFFFFF | [255.0, 255.0, 255.0]
181+
;
182+
183+
knnWithDisjunctionAndFiltersConjunction
184+
required_capability: knn_function
185+
186+
# TODO We need kNN prefiltering here so we get more candidates that pass the filter
187+
from colors metadata _score
188+
| where (knn(rgb_vector, [0,255,255]) or knn(rgb_vector, [128, 0, 255])) and primary == true
189+
| keep color, rgb_vector, _score
190+
;
191+
192+
color:text | rgb_vector:dense_vector | _score:double
193+
cyan | [0.0, 255.0, 255.0] | 1.0
194+
blue | [0.0, 0.0, 255.0] | 9.922293975250795E-5
195+
;
196+
197+
knnWithDisjunctionAndConjunction
198+
required_capability: knn_function
199+
required_capability: full_text_functions_disjunctions
200+
201+
# TODO We need kNN prefiltering here so we get more candidates that pass the filter
202+
from colors metadata _score
203+
| where (knn(rgb_vector, [0,255,255]) or knn(rgb_vector, [0, 0, 255])) and knn(rgb_vector, [0, 255, 0])
204+
| keep color, rgb_vector, _score
205+
;
206+
207+
color:text | rgb_vector:dense_vector | _score:double
208+
medium spring green | [0.0, 250.0, 154.0] | 1.6871128173079342E-4
209+
;
210+
211+
knnWithNonPushableConjunction
212+
required_capability: knn_function
213+
214+
from colors metadata _score
215+
| eval composed_name = locate(color, " ") > 0
216+
| where knn(rgb_vector, [128,128,0]) and composed_name == false
217+
| eval round_score = round(_score, 4)
218+
| keep color, composed_name, round_score
219+
;
220+
221+
color:text | composed_name:boolean | round_score:double
222+
olive | false | 1.0
223+
sienna | false | 3.0E-4
224+
chocolate | false | 1.0E-4
225+
peru | false | 1.0E-4
226+
;
227+
228+
testKnnWithNonPushableDisjunctions
229+
required_capability: knn_function
230+
231+
from colors metadata _score
232+
| where knn(rgb_vector, [128,128,0], {"k": 5}) or length(color) > 17
233+
| sort _score desc, color asc
234+
| eval round_score = round(_score, 4)
235+
| keep color, round_score
236+
;
237+
238+
color:text | round_score: double
239+
olive | 1.0
240+
olive drab | 0.0014
241+
dark olive green | 4.0E-4
242+
dark golden rod | 3.0E-4
243+
sienna | 3.0E-4
244+
medium aqua marine | 0.0
245+
medium spring green | 0.0
246+
light golden rod yellow | 0.0
247+
;
248+
249+
testKnnWithNonPushableDisjunctionsOnComplexExpressions
250+
required_capability: knn_function
251+
252+
from colors metadata _score
253+
| where (knn(rgb_vector, [128,128,0]) and length(color) > 12) or (knn(rgb_vector, [128,0,128]) and primary == false)
254+
| sort _score desc
255+
| eval round_score = round(_score, 4)
256+
| keep color, primary, round_score
257+
;
258+
259+
color: text | primary: boolean | round_score: double
260+
purple | false | 1.0
261+
dark magenta | false | 0.0045
262+
dark olive green | false | 4.0E-4
263+
indigo | false | 4.0E-4
264+
dark golden rod | false | 3.0E-4
265+
dim gray | false | 3.0E-4
266+
dark slate blue | false | 2.0E-4
267+
medium violet red | false | 2.0E-4
268+
dark orchid | false | 1.0E-4
269+
dark violet | false | 1.0E-4
270+
brown | false | 1.0E-4
271+
blue violet | false | 1.0E-4
272+
;
273+
274+
testKnnInStatsNonPushable
275+
required_capability: knn_function
276+
277+
from colors
278+
| where length(color) < 10
279+
| stats c = count(*) where knn(rgb_vector, [128,128,255], {"k": 140})
280+
;
281+
282+
c: long
283+
59
284+
;
285+
286+
287+
testKnnInStatsPushableAndNonPushable
288+
required_capability: knn_function
289+
required_capability: full_text_functions_in_stats_where
290+
291+
from colors metadata _score
292+
| stats c = count(*) where (knn(rgb_vector, [0,255,255], {"k": 140}) or knn(rgb_vector, [0, 0, 255])) and knn(rgb_vector, [0, 255, 0], {"k": 40})
293+
;
294+
295+
c:long
296+
40
297+
;
298+
299+
testKnnInStatsWithGrouping
300+
from colors
301+
| where length(color) < 10
302+
| stats c = count(*) where knn(rgb_vector, [128,128,255], {"k": 140}) by primary
303+
;
304+
305+
c: long | primary: boolean
306+
50 | false
307+
9 | true
308+
;
309+
310+
testKnnInStatsPushable
311+
required_capability: knn_function
312+
required_capability: full_text_functions_in_stats_where
313+
314+
from colors
315+
| stats c = count(*) where knn(rgb_vector, [128,128,255], {"k": 40})
316+
;
317+
318+
# No surprises, gets the number of top k
319+
c:long
320+
40
321+
;
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"properties": {
3+
"id": {
4+
"type": "integer"
5+
},
6+
"title": {
7+
"type": "text"
8+
},
9+
"body": {
10+
"type": "text"
11+
},
12+
"tags": {
13+
"type": "keyword"
14+
},
15+
"category": {
16+
"type": "integer"
17+
},
18+
"published_date": {
19+
"type": "date"
20+
},
21+
"vector": {
22+
"type": "dense_vector",
23+
"similarity": "l2_norm"
24+
}
25+
}
26+
}

0 commit comments

Comments
 (0)