@@ -82,21 +82,33 @@ def test_agg(session, gemini_flash_model, max_agg_rows, cluster_column):
82
82
marks = pytest .mark .xfail (raises = ValueError ),
83
83
),
84
84
pytest .param (
85
- "{city } is in the {non_existing_column} " ,
85
+ "{Movies } is good " ,
86
86
id = "non_existing_column" ,
87
87
marks = pytest .mark .xfail (raises = ValueError ),
88
88
),
89
89
pytest .param (
90
- "{city } is in the {country }" ,
90
+ "{Movies } is better than {Movies }" ,
91
91
id = "two_columns" ,
92
92
marks = pytest .mark .xfail (raises = NotImplementedError ),
93
93
),
94
+ pytest .param (
95
+ "{Year}" ,
96
+ id = "invalid_type" ,
97
+ marks = pytest .mark .xfail (raises = TypeError ),
98
+ ),
94
99
],
95
100
)
96
101
def test_agg_invalid_instruction_raise_error (instruction , gemini_flash_model ):
97
102
bigframes .options .experiments .semantic_operators = True
98
103
df = dataframe .DataFrame (
99
- {"country" : ["USA" , "Germany" ], "city" : ["Seattle" , "Berlin" ]}
104
+ data = {
105
+ "Movies" : [
106
+ "Titanic" ,
107
+ "The Wolf of Wall Street" ,
108
+ "Killers of the Flower Moon" ,
109
+ ],
110
+ "Year" : [1997 , 2013 , 2023 ],
111
+ },
100
112
)
101
113
df .semantics .agg (instruction , gemini_flash_model )
102
114
@@ -229,15 +241,26 @@ def test_filter_single_column_reference(session, gemini_flash_model):
229
241
@pytest .mark .parametrize (
230
242
"instruction" ,
231
243
[
232
- "No column reference" ,
233
- "{city} is in the {non_existing_column}" ,
244
+ pytest .param (
245
+ "No column reference" ,
246
+ id = "zero_column" ,
247
+ marks = pytest .mark .xfail (raises = ValueError ),
248
+ ),
249
+ pytest .param (
250
+ "{city} is in the {non_existing_column}" ,
251
+ id = "non_existing_column" ,
252
+ marks = pytest .mark .xfail (raises = ValueError ),
253
+ ),
254
+ pytest .param (
255
+ "{id}" ,
256
+ id = "invalid_type" ,
257
+ marks = pytest .mark .xfail (raises = TypeError ),
258
+ ),
234
259
],
235
260
)
236
261
def test_filter_invalid_instruction_raise_error (instruction , gemini_flash_model ):
237
262
bigframes .options .experiments .semantic_operators = True
238
- df = dataframe .DataFrame (
239
- {"country" : ["USA" , "Germany" ], "city" : ["Seattle" , "Berlin" ]}
240
- )
263
+ df = dataframe .DataFrame ({"id" : [1 , 2 ], "city" : ["Seattle" , "Berlin" ]})
241
264
242
265
with pytest .raises (ValueError ):
243
266
df .semantics .filter (instruction , gemini_flash_model )
@@ -249,7 +272,7 @@ def test_filter_invalid_model_raise_error():
249
272
{"country" : ["USA" , "Germany" ], "city" : ["Seattle" , "Berlin" ]}
250
273
)
251
274
252
- with pytest .raises (ValueError ):
275
+ with pytest .raises (TypeError ):
253
276
df .semantics .filter ("{city} is the capital of {country}" , None )
254
277
255
278
@@ -290,14 +313,28 @@ def test_map(session, gemini_flash_model):
290
313
@pytest .mark .parametrize (
291
314
"instruction" ,
292
315
[
293
- "No column reference" ,
294
- "What is the food made from {ingredient_1} and {non_existing_column}?}" ,
316
+ pytest .param (
317
+ "No column reference" ,
318
+ id = "zero_column" ,
319
+ marks = pytest .mark .xfail (raises = ValueError ),
320
+ ),
321
+ pytest .param (
322
+ "What is the food made from {ingredient_1} and {non_existing_column}?}" ,
323
+ id = "non_existing_column" ,
324
+ marks = pytest .mark .xfail (raises = ValueError ),
325
+ ),
326
+ pytest .param (
327
+ "{id}" ,
328
+ id = "invalid_type" ,
329
+ marks = pytest .mark .xfail (raises = TypeError ),
330
+ ),
295
331
],
296
332
)
297
333
def test_map_invalid_instruction_raise_error (instruction , gemini_flash_model ):
298
334
bigframes .options .experiments .semantic_operators = True
299
335
df = dataframe .DataFrame (
300
336
data = {
337
+ "id" : [1 , 2 ],
301
338
"ingredient_1" : ["Burger Bun" , "Soy Bean" ],
302
339
"ingredient_2" : ["Beef Patty" , "Bittern" ],
303
340
}
@@ -316,7 +353,7 @@ def test_map_invalid_model_raise_error():
316
353
},
317
354
)
318
355
319
- with pytest .raises (ValueError ):
356
+ with pytest .raises (TypeError ):
320
357
df .semantics .map (
321
358
"What is the food made from {ingredient_1} and {ingredient_2}? One word only." ,
322
359
"food" ,
@@ -462,7 +499,7 @@ def test_join_invalid_model_raise_error():
462
499
cities = dataframe .DataFrame ({"city" : ["Seattle" , "Berlin" ]})
463
500
countries = dataframe .DataFrame ({"country" : ["USA" , "UK" , "Germany" ]})
464
501
465
- with pytest .raises (ValueError ):
502
+ with pytest .raises (TypeError ):
466
503
cities .semantics .join (countries , "{city} is in {country}" , None )
467
504
468
505
@@ -528,6 +565,19 @@ def test_search_invalid_model_raises_error(session):
528
565
df .semantics .search ("creatures" , "monkey" , top_k = 2 , model = None )
529
566
530
567
568
+ def test_search_invalid_top_k_raises_error (session , text_embedding_generator ):
569
+ bigframes .options .experiments .semantic_operators = True
570
+ df = dataframe .DataFrame (
571
+ data = {"creatures" : ["salmon" , "sea urchin" , "baboons" , "frog" , "chimpanzee" ]},
572
+ session = session ,
573
+ )
574
+
575
+ with pytest .raises (ValueError ):
576
+ df .semantics .search (
577
+ "creatures" , "monkey" , top_k = 0 , model = text_embedding_generator
578
+ )
579
+
580
+
531
581
@pytest .mark .parametrize (
532
582
"score_column" ,
533
583
[
@@ -614,6 +664,27 @@ def test_sim_join_invalid_model_raises_error(session):
614
664
)
615
665
616
666
667
+ def test_sim_join_invalid_top_k_raises_error (session , text_embedding_generator ):
668
+ bigframes .options .experiments .semantic_operators = True
669
+ df1 = dataframe .DataFrame (
670
+ data = {"creatures" : ["salmon" , "cat" ]},
671
+ session = session ,
672
+ )
673
+ df2 = dataframe .DataFrame (
674
+ data = {"creatures" : ["dog" , "tuna" ]},
675
+ session = session ,
676
+ )
677
+
678
+ with pytest .raises (ValueError ):
679
+ df1 .semantics .sim_join (
680
+ df2 ,
681
+ left_on = "creatures" ,
682
+ right_on = "creatures" ,
683
+ top_k = 0 ,
684
+ model = text_embedding_generator ,
685
+ )
686
+
687
+
617
688
def test_sim_join_data_too_large_raises_error (session , text_embedding_generator ):
618
689
bigframes .options .experiments .semantic_operators = True
619
690
df1 = dataframe .DataFrame (
0 commit comments