Skip to content

Commit dd082e1

Browse files
authored
Improved Parenthesis Handling and Spanner Limit Mitigation (#1272)
* Improved Parenthesis Handling and Spanner Limit Mitigation This PR improves search query generation by: - Grammar: Adding parenthesizedCriteria to explicitly capture parenthesized expressions. - Visitor: Updating the visitor to create SearchNode objects with parenthesis information. - Filter Generation: Refactoring traverseAndGenerateFilters to correctly handle parentheses and minimize unnecessary nesting in Spanner queries, mitigating Spanner's complexity limits. Without this change, a user cannot query more than 50 ids at a given time. * fix nits, add nested test case, & address feedback
1 parent dcbe89d commit dd082e1

File tree

6 files changed

+457
-78
lines changed

6 files changed

+457
-78
lines changed

antlr/FeatureSearch.g4

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,16 +52,18 @@ search_criteria:
5252
generic_search_term
5353
| ANY_VALUE; // Default to ANY_VALUE search without "name:" prefix.
5454

55+
parenthesizedCriteria: '(' combined_search_criteria ')';
56+
5557
// Combined search criteria
5658
combined_search_criteria:
5759
// Single term or grouped expression
58-
(search_criteria | '(' combined_search_criteria ')')
60+
(search_criteria | parenthesizedCriteria)
5961
// Optional chaining with implicit AND or explicit operators
6062
(
6163
(operator)? // Optional explicit operator
6264
(
6365
search_criteria
64-
| '(' combined_search_criteria ')'
66+
| parenthesizedCriteria
6567
) // Next search term or group
6668
)*;
6769

lib/gcpspanner/feature_search_query.go

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,27 @@ func (b *FeatureSearchFilterBuilder) traverseAndGenerateFilters(node *searchtype
104104
filterString := strings.Join(childFilters, joiner)
105105

106106
if strings.TrimSpace(filterString) != "" {
107-
filters = append(filters, "("+filterString+")")
107+
filters = append(filters, filterString)
108108
}
109109

110110
}
111111

112+
case node.Keyword == searchtypes.KeywordParens:
113+
// Handle parenthesized sub-expressions.
114+
var childFilters []string
115+
for _, child := range node.Children {
116+
childFilters = append(childFilters, b.traverseAndGenerateFilters(child)...)
117+
}
118+
119+
// Join without spaces because if there are multiple terms, they will
120+
// currently fall under a parent keyword node (AND/OR), which
121+
// takes care of adding the necessary spaces.
122+
filter := strings.Join(childFilters, "")
123+
124+
filter = "(" + filter + ")"
125+
126+
filters = append(filters, filter)
127+
112128
case node.Term != nil && (node.Keyword == searchtypes.KeywordNone):
113129
var filter string
114130
switch node.Term.Identifier {
@@ -133,7 +149,7 @@ func (b *FeatureSearchFilterBuilder) traverseAndGenerateFilters(node *searchtype
133149
filter = b.handleIdentifierAvailableBrowserDateTerm(node)
134150
}
135151
if filter != "" {
136-
filters = append(filters, "("+filter+")")
152+
filters = append(filters, filter)
137153
}
138154
}
139155

lib/gcpspanner/feature_search_query_test.go

Lines changed: 197 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -241,36 +241,188 @@ var (
241241
Keyword: searchtypes.KeywordNone,
242242
Children: nil,
243243
Term: &searchtypes.SearchTerm{
244-
Identifier: searchtypes.IdentifierAvailableOn, Value: "chrome", Operator: searchtypes.OperatorEq},
244+
Identifier: searchtypes.IdentifierAvailableOn,
245+
Value: "chrome",
246+
Operator: searchtypes.OperatorEq,
247+
},
245248
},
246249
{
247-
Keyword: searchtypes.KeywordOR,
250+
Keyword: searchtypes.KeywordParens,
248251
Term: nil,
249252
Children: []*searchtypes.SearchNode{
250253
{
251-
Keyword: searchtypes.KeywordNone,
252-
Children: nil,
253-
Term: &searchtypes.SearchTerm{
254-
Identifier: searchtypes.IdentifierBaselineStatus, Value: "widely", Operator: searchtypes.OperatorEq},
254+
Keyword: searchtypes.KeywordOR,
255+
Term: nil,
256+
Children: []*searchtypes.SearchNode{
257+
{
258+
Keyword: searchtypes.KeywordNone,
259+
Children: nil,
260+
Term: &searchtypes.SearchTerm{
261+
Identifier: searchtypes.IdentifierBaselineStatus,
262+
Value: "widely",
263+
Operator: searchtypes.OperatorEq,
264+
},
265+
},
266+
{
267+
Keyword: searchtypes.KeywordNone,
268+
Children: nil,
269+
Term: &searchtypes.SearchTerm{
270+
Identifier: searchtypes.IdentifierName,
271+
Value: "avif",
272+
Operator: searchtypes.OperatorEq,
273+
},
274+
},
275+
},
255276
},
277+
},
278+
},
279+
},
280+
},
281+
{
282+
Keyword: searchtypes.KeywordNone,
283+
Children: nil,
284+
Term: &searchtypes.SearchTerm{
285+
Identifier: searchtypes.IdentifierName,
286+
Value: "grid",
287+
Operator: searchtypes.OperatorEq,
288+
},
289+
},
290+
},
291+
},
292+
},
293+
},
294+
}
295+
296+
complexNestedQuery = TestTree{
297+
Query: "(available_on:chrome AND (baseline_status:widely OR baseline_status:limited)) OR name:grid",
298+
InputTree: &searchtypes.SearchNode{
299+
Keyword: searchtypes.KeywordRoot,
300+
Term: nil,
301+
Children: []*searchtypes.SearchNode{
302+
{
303+
Keyword: searchtypes.KeywordOR,
304+
Term: nil,
305+
Children: []*searchtypes.SearchNode{
306+
{
307+
Keyword: searchtypes.KeywordParens,
308+
Term: nil,
309+
Children: []*searchtypes.SearchNode{
310+
{
311+
Keyword: searchtypes.KeywordAND,
312+
Children: []*searchtypes.SearchNode{
256313
{
257-
Keyword: searchtypes.KeywordNone,
258-
Children: nil,
259314
Term: &searchtypes.SearchTerm{
260-
Identifier: searchtypes.IdentifierName, Value: "avif", Operator: searchtypes.OperatorEq},
315+
Identifier: searchtypes.IdentifierAvailableOn,
316+
Value: "chrome",
317+
Operator: searchtypes.OperatorEq,
318+
},
319+
Keyword: searchtypes.KeywordNone,
320+
},
321+
{
322+
Term: nil,
323+
Keyword: searchtypes.KeywordParens,
324+
Children: []*searchtypes.SearchNode{
325+
{
326+
Keyword: searchtypes.KeywordOR,
327+
Term: nil,
328+
Children: []*searchtypes.SearchNode{
329+
{
330+
Children: nil,
331+
Term: &searchtypes.SearchTerm{
332+
Identifier: searchtypes.IdentifierBaselineStatus,
333+
Value: "widely",
334+
Operator: searchtypes.OperatorEq,
335+
},
336+
Keyword: searchtypes.KeywordNone,
337+
},
338+
{
339+
Children: nil,
340+
Term: &searchtypes.SearchTerm{
341+
Identifier: searchtypes.IdentifierBaselineStatus,
342+
Value: "limited",
343+
Operator: searchtypes.OperatorEq,
344+
},
345+
Keyword: searchtypes.KeywordNone,
346+
},
347+
},
348+
},
349+
},
261350
},
262351
},
263352
},
264353
},
265354
},
266355
{
267-
Keyword: searchtypes.KeywordNone,
268-
Children: nil,
269356
Term: &searchtypes.SearchTerm{
270357
Identifier: searchtypes.IdentifierName,
271358
Value: "grid",
272359
Operator: searchtypes.OperatorEq,
273360
},
361+
Keyword: searchtypes.KeywordNone,
362+
},
363+
},
364+
},
365+
},
366+
},
367+
}
368+
369+
repeatedSimpleTermQuery = TestTree{
370+
Query: "id:html OR id:css OR id:typescript OR id:javascript",
371+
InputTree: &searchtypes.SearchNode{
372+
Keyword: searchtypes.KeywordRoot,
373+
Term: nil,
374+
Children: []*searchtypes.SearchNode{
375+
{
376+
Term: nil,
377+
Keyword: searchtypes.KeywordOR,
378+
Children: []*searchtypes.SearchNode{
379+
{
380+
Term: nil,
381+
Keyword: searchtypes.KeywordOR,
382+
Children: []*searchtypes.SearchNode{
383+
{
384+
Term: nil,
385+
Keyword: searchtypes.KeywordOR,
386+
Children: []*searchtypes.SearchNode{
387+
{
388+
Keyword: searchtypes.KeywordNone,
389+
Term: &searchtypes.SearchTerm{
390+
Identifier: searchtypes.IdentifierID,
391+
Value: "html",
392+
Operator: searchtypes.OperatorEq,
393+
},
394+
Children: nil,
395+
},
396+
{
397+
Keyword: searchtypes.KeywordNone,
398+
Term: &searchtypes.SearchTerm{
399+
Identifier: searchtypes.IdentifierID,
400+
Value: "css",
401+
Operator: searchtypes.OperatorEq,
402+
},
403+
Children: nil,
404+
},
405+
},
406+
},
407+
{
408+
Keyword: searchtypes.KeywordNone,
409+
Term: &searchtypes.SearchTerm{
410+
Identifier: searchtypes.IdentifierID,
411+
Value: "typescript",
412+
Operator: searchtypes.OperatorEq,
413+
},
414+
Children: nil,
415+
},
416+
},
417+
},
418+
{
419+
Keyword: searchtypes.KeywordNone,
420+
Term: &searchtypes.SearchTerm{
421+
Identifier: searchtypes.IdentifierID,
422+
Value: "javascript",
423+
Operator: searchtypes.OperatorEq,
424+
},
425+
Children: nil,
274426
},
275427
},
276428
},
@@ -288,48 +440,48 @@ func TestBuild(t *testing.T) {
288440
}{
289441
{
290442
inputTestTree: simpleAvailableOnQuery,
291-
expectedClauses: []string{`(wf.ID IN (SELECT WebFeatureID FROM BrowserFeatureAvailabilities
292-
WHERE BrowserName = @param0))`},
443+
expectedClauses: []string{`wf.ID IN (SELECT WebFeatureID FROM BrowserFeatureAvailabilities
444+
WHERE BrowserName = @param0)`},
293445
expectedParams: map[string]interface{}{
294446
"param0": "chrome",
295447
},
296448
},
297449
{
298450
inputTestTree: simpleNameQuery,
299-
expectedClauses: []string{`((wf.Name_Lowercase LIKE @param0 OR wf.FeatureKey_Lowercase LIKE @param0))`},
451+
expectedClauses: []string{`(wf.Name_Lowercase LIKE @param0 OR wf.FeatureKey_Lowercase LIKE @param0)`},
300452
expectedParams: map[string]interface{}{
301453
"param0": "%" + "css grid" + "%",
302454
},
303455
},
304456
{
305457
inputTestTree: simpleNameByIDQuery,
306-
expectedClauses: []string{`((wf.Name_Lowercase LIKE @param0 OR wf.FeatureKey_Lowercase LIKE @param0))`},
458+
expectedClauses: []string{`(wf.Name_Lowercase LIKE @param0 OR wf.FeatureKey_Lowercase LIKE @param0)`},
307459
expectedParams: map[string]interface{}{
308460
"param0": "%" + "grid" + "%",
309461
},
310462
},
311463
{
312464
inputTestTree: availableOnBaselineStatus,
313-
expectedClauses: []string{`((wf.ID IN (SELECT WebFeatureID FROM BrowserFeatureAvailabilities
314-
WHERE BrowserName = @param0)) AND (fbs.Status = @param1))`},
465+
expectedClauses: []string{`wf.ID IN (SELECT WebFeatureID FROM BrowserFeatureAvailabilities
466+
WHERE BrowserName = @param0) AND fbs.Status = @param1`},
315467
expectedParams: map[string]interface{}{
316468
"param0": "chrome",
317469
"param1": "high",
318470
},
319471
},
320472
{
321473
inputTestTree: availableOnBaselineStatusWithNegation,
322-
expectedClauses: []string{`((wf.ID NOT IN (SELECT WebFeatureID FROM BrowserFeatureAvailabilities
323-
WHERE BrowserName = @param0)) AND (fbs.Status = @param1))`},
474+
expectedClauses: []string{`wf.ID NOT IN (SELECT WebFeatureID FROM BrowserFeatureAvailabilities
475+
WHERE BrowserName = @param0) AND fbs.Status = @param1`},
324476
expectedParams: map[string]interface{}{
325477
"param0": "chrome",
326478
"param1": "high",
327479
},
328480
},
329481
{
330482
inputTestTree: complexQuery,
331-
expectedClauses: []string{`(((wf.ID IN (SELECT WebFeatureID FROM BrowserFeatureAvailabilities
332-
WHERE BrowserName = @param0)) AND ((fbs.Status = @param1) OR ((wf.Name_Lowercase LIKE @param2 OR wf.FeatureKey_Lowercase LIKE @param2)))) OR ((wf.Name_Lowercase LIKE @param3 OR wf.FeatureKey_Lowercase LIKE @param3)))`},
483+
expectedClauses: []string{`wf.ID IN (SELECT WebFeatureID FROM BrowserFeatureAvailabilities
484+
WHERE BrowserName = @param0) AND (fbs.Status = @param1 OR (wf.Name_Lowercase LIKE @param2 OR wf.FeatureKey_Lowercase LIKE @param2)) OR (wf.Name_Lowercase LIKE @param3 OR wf.FeatureKey_Lowercase LIKE @param3)`},
333485
expectedParams: map[string]interface{}{
334486
"param0": "chrome",
335487
"param1": "high",
@@ -340,7 +492,7 @@ WHERE BrowserName = @param0)) AND ((fbs.Status = @param1) OR ((wf.Name_Lowercase
340492
{
341493
inputTestTree: baselineDateRange,
342494
expectedClauses: []string{
343-
`((LowDate >= @param0) AND (LowDate <= @param1))`,
495+
`LowDate >= @param0 AND LowDate <= @param1`,
344496
},
345497
expectedParams: map[string]interface{}{
346498
"param0": time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC),
@@ -350,13 +502,35 @@ WHERE BrowserName = @param0)) AND ((fbs.Status = @param1) OR ((wf.Name_Lowercase
350502
{
351503
inputTestTree: baselineDateRangeNegation,
352504
expectedClauses: []string{
353-
`((LowDate < @param0) OR (LowDate > @param1))`,
505+
`LowDate < @param0 OR LowDate > @param1`,
354506
},
355507
expectedParams: map[string]interface{}{
356508
"param0": time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC),
357509
"param1": time.Date(2000, 12, 31, 0, 0, 0, 0, time.UTC),
358510
},
359511
},
512+
{
513+
inputTestTree: repeatedSimpleTermQuery,
514+
expectedClauses: []string{`(wf.FeatureKey_Lowercase = @param0) OR (wf.FeatureKey_Lowercase = @param1) OR (wf.FeatureKey_Lowercase = @param2) OR (wf.FeatureKey_Lowercase = @param3)`},
515+
expectedParams: map[string]interface{}{
516+
"param0": "html",
517+
"param1": "css",
518+
"param2": "typescript",
519+
"param3": "javascript",
520+
},
521+
},
522+
{
523+
inputTestTree: complexNestedQuery,
524+
expectedClauses: []string{`(wf.ID IN (SELECT WebFeatureID FROM BrowserFeatureAvailabilities
525+
WHERE BrowserName = @param0) AND (fbs.Status = @param1 OR fbs.Status = @param2)) OR (wf.Name_Lowercase LIKE @param3 OR wf.FeatureKey_Lowercase LIKE @param3)`,
526+
},
527+
expectedParams: map[string]interface{}{
528+
"param0": "chrome",
529+
"param1": "high",
530+
"param2": "none",
531+
"param3": "%" + "grid" + "%",
532+
},
533+
},
360534
}
361535
for _, tc := range testCases {
362536
t.Run(tc.inputTestTree.Query, func(t *testing.T) {

0 commit comments

Comments
 (0)