Skip to content

Commit 74ce072

Browse files
committed
Optimize the optimized vm more
1 parent 3016bfd commit 74ce072

File tree

4 files changed

+188
-34
lines changed

4 files changed

+188
-34
lines changed

filterqlvm/compiler/compiler.go

Lines changed: 164 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@ package compiler
33
import (
44
"fmt"
55
"hash/fnv"
6+
"strconv"
67
"strings"
78
"sync"
89

910
"github.com/mb0/glob"
1011

1112
"github.com/lytics/qlbridge/expr"
1213
"github.com/lytics/qlbridge/lex"
14+
"github.com/lytics/qlbridge/rel"
1315
"github.com/lytics/qlbridge/value"
1416
"github.com/lytics/qlbridge/vm"
1517
)
@@ -35,6 +37,32 @@ func NewDirectCompiler() *DirectCompiler {
3537
}
3638
}
3739

40+
func (c *DirectCompiler) CompileFilter(node *rel.FilterStatement) (*CompiledExpr, error) {
41+
// Generate a hash for the node to use as cache key
42+
hash := hashFilter(node)
43+
44+
// Check cache first
45+
c.cacheLock.RLock()
46+
if compiled, ok := c.cache[hash]; ok {
47+
c.cacheLock.RUnlock()
48+
return compiled, nil
49+
}
50+
c.cacheLock.RUnlock()
51+
52+
// Create the compiled expression
53+
compiled, err := c.compileToFunc(node.Filter)
54+
if err != nil {
55+
return nil, err
56+
}
57+
58+
// Cache the result
59+
c.cacheLock.Lock()
60+
c.cache[hash] = compiled
61+
c.cacheLock.Unlock()
62+
63+
return compiled, nil
64+
}
65+
3866
// Compile compiles an expression node into a direct evaluation function
3967
func (c *DirectCompiler) Compile(node expr.Node) (*CompiledExpr, error) {
4068
// Generate a hash for the node to use as cache key
@@ -116,29 +144,69 @@ func (c *DirectCompiler) compileBinary(node *expr.BinaryNode) (*CompiledExpr, er
116144
evalFunc := func(ctx expr.EvalContext) (value.Value, bool) {
117145
// Get left and right values
118146
left, leftOk := leftExpr.EvalFunc(ctx)
119-
if !leftOk {
120-
return nil, false
121-
}
122147

123148
// Short-circuit for logical operators
124149
switch node.Operator.T {
125150
case lex.TokenLogicAnd, lex.TokenAnd:
151+
if !leftOk {
152+
return nil, false
153+
}
126154
// If left is false, no need to evaluate right
127155
if leftBool, ok := left.(value.BoolValue); ok && !leftBool.Val() {
128156
return value.NewBoolValue(false), true
129157
}
130158
case lex.TokenLogicOr, lex.TokenOr:
131-
// If left is true, no need to evaluate right
132-
if leftBool, ok := left.(value.BoolValue); ok && leftBool.Val() {
133-
return value.NewBoolValue(true), true
159+
if leftOk {
160+
// If left is true, no need to evaluate right
161+
if leftBool, ok := left.(value.BoolValue); ok && leftBool.Val() {
162+
return value.NewBoolValue(true), true
163+
}
134164
}
135165
}
136166

137167
right, rightOk := rightExpr.EvalFunc(ctx)
138-
if !rightOk {
168+
// If we could not evaluate either we can shortcut
169+
if !leftOk && !rightOk {
170+
switch node.Operator.T {
171+
case lex.TokenLogicOr, lex.TokenOr:
172+
return value.NewBoolValue(false), true
173+
case lex.TokenEqualEqual, lex.TokenEqual:
174+
// We don't alllow nil == nil here bc we have a NilValue type
175+
// that we would use for that
176+
return value.NewBoolValue(false), true
177+
case lex.TokenNE:
178+
return value.NewBoolValue(false), true
179+
case lex.TokenGT, lex.TokenGE, lex.TokenLT, lex.TokenLE, lex.TokenLike:
180+
return value.NewBoolValue(false), true
181+
}
139182
return nil, false
140183
}
141184

185+
// Else if we can only evaluate right
186+
if !leftOk {
187+
switch node.Operator.T {
188+
case lex.TokenIntersects, lex.TokenIN, lex.TokenContains, lex.TokenLike:
189+
return value.NewBoolValue(false), true
190+
}
191+
}
192+
193+
// Else if we can only evaluate one, we can short circuit as well
194+
if !leftOk || !rightOk {
195+
switch node.Operator.T {
196+
case lex.TokenAnd, lex.TokenLogicAnd:
197+
return value.NewBoolValue(false), true
198+
case lex.TokenEqualEqual, lex.TokenEqual:
199+
return value.NewBoolValue(false), true
200+
case lex.TokenNE:
201+
// they are technically not equal?
202+
return value.NewBoolValue(true), true
203+
case lex.TokenIN, lex.TokenIntersects:
204+
return value.NewBoolValue(false), true
205+
case lex.TokenGT, lex.TokenGE, lex.TokenLT, lex.TokenLE, lex.TokenLike:
206+
return value.NewBoolValue(false), true
207+
}
208+
}
209+
142210
// Handle different operators
143211
switch node.Operator.T {
144212
case lex.TokenEqual, lex.TokenEqualEqual:
@@ -163,6 +231,10 @@ func (c *DirectCompiler) compileBinary(node *expr.BinaryNode) (*CompiledExpr, er
163231
return value.NewBoolValue(lv.Val() > rv.Val()), true
164232
} else if rv, ok := right.(value.IntValue); ok {
165233
return value.NewBoolValue(lv.Val() > float64(rv.Val())), true
234+
} else if rv, ok := right.(value.StringValue); ok {
235+
if rf, err := strconv.ParseFloat(rv.Val(), 64); err == nil {
236+
return value.NewBoolValue(lv.Val() > rf), true
237+
}
166238
}
167239
case value.IntValue:
168240
if rv, ok := right.(value.NumberValue); ok {
@@ -171,13 +243,22 @@ func (c *DirectCompiler) compileBinary(node *expr.BinaryNode) (*CompiledExpr, er
171243
return value.NewBoolValue(lv.Val() > rv.Val()), true
172244
}
173245
case value.StringValue:
246+
if rv, ok := right.(value.TimeValue); ok {
247+
leftTime, ok := value.ValueToTime(left)
248+
if !ok {
249+
return value.BoolValueFalse, false
250+
}
251+
return value.NewBoolValue(rv.Val().Unix() > leftTime.Unix()), true
252+
}
174253
if rv, ok := right.(value.StringValue); ok {
175254
return value.NewBoolValue(lv.Val() > rv.Val()), true
176255
}
177256
case value.TimeValue:
178-
if rv, ok := right.(value.TimeValue); ok {
179-
return value.NewBoolValue(lv.Val().Unix() > rv.Val().Unix()), true
257+
rightTime, ok := value.ValueToTime(right)
258+
if !ok {
259+
return value.BoolValueFalse, false
180260
}
261+
return value.NewBoolValue(lv.Val().Unix() > rightTime.Unix()), true
181262
}
182263
// Try converting to strings
183264
return value.NewBoolValue(left.ToString() > right.ToString()), true
@@ -190,6 +271,10 @@ func (c *DirectCompiler) compileBinary(node *expr.BinaryNode) (*CompiledExpr, er
190271
return value.NewBoolValue(lv.Val() >= rv.Val()), true
191272
} else if rv, ok := right.(value.IntValue); ok {
192273
return value.NewBoolValue(lv.Val() >= float64(rv.Val())), true
274+
} else if rv, ok := right.(value.StringValue); ok {
275+
if rf, err := strconv.ParseFloat(rv.Val(), 64); err == nil {
276+
return value.NewBoolValue(lv.Val() >= rf), true
277+
}
193278
}
194279
case value.IntValue:
195280
if rv, ok := right.(value.NumberValue); ok {
@@ -198,13 +283,23 @@ func (c *DirectCompiler) compileBinary(node *expr.BinaryNode) (*CompiledExpr, er
198283
return value.NewBoolValue(lv.Val() >= rv.Val()), true
199284
}
200285
case value.StringValue:
286+
// TODO (need this to work for all the operators)
287+
if rv, ok := right.(value.TimeValue); ok {
288+
leftTime, ok := value.ValueToTime(left)
289+
if !ok {
290+
return value.BoolValueFalse, false
291+
}
292+
return value.NewBoolValue(rv.Val().Unix() >= leftTime.Unix()), true
293+
}
201294
if rv, ok := right.(value.StringValue); ok {
202295
return value.NewBoolValue(lv.Val() >= rv.Val()), true
203296
}
204297
case value.TimeValue:
205-
if rv, ok := right.(value.TimeValue); ok {
206-
return value.NewBoolValue(lv.Val().Unix() >= rv.Val().Unix()), true
298+
rightTime, ok := value.ValueToTime(right)
299+
if !ok {
300+
return value.BoolValueFalse, false
207301
}
302+
return value.NewBoolValue(lv.Val().Unix() >= rightTime.Unix()), true
208303
}
209304
return value.NewBoolValue(left.ToString() >= right.ToString()), true
210305

@@ -216,6 +311,10 @@ func (c *DirectCompiler) compileBinary(node *expr.BinaryNode) (*CompiledExpr, er
216311
return value.NewBoolValue(lv.Val() < rv.Val()), true
217312
} else if rv, ok := right.(value.IntValue); ok {
218313
return value.NewBoolValue(lv.Val() < float64(rv.Val())), true
314+
} else if rv, ok := right.(value.StringValue); ok {
315+
if rf, err := strconv.ParseFloat(rv.Val(), 64); err == nil {
316+
return value.NewBoolValue(lv.Val() < rf), true
317+
}
219318
}
220319
case value.IntValue:
221320
if rv, ok := right.(value.NumberValue); ok {
@@ -224,13 +323,22 @@ func (c *DirectCompiler) compileBinary(node *expr.BinaryNode) (*CompiledExpr, er
224323
return value.NewBoolValue(lv.Val() < rv.Val()), true
225324
}
226325
case value.StringValue:
326+
if rv, ok := right.(value.TimeValue); ok {
327+
leftTime, ok := value.ValueToTime(left)
328+
if !ok {
329+
return value.BoolValueFalse, false
330+
}
331+
return value.NewBoolValue(rv.Val().Unix() < leftTime.Unix()), true
332+
}
227333
if rv, ok := right.(value.StringValue); ok {
228334
return value.NewBoolValue(lv.Val() < rv.Val()), true
229335
}
230336
case value.TimeValue:
231-
if rv, ok := right.(value.TimeValue); ok {
232-
return value.NewBoolValue(lv.Val().Unix() < rv.Val().Unix()), true
337+
rightTime, ok := value.ValueToTime(right)
338+
if !ok {
339+
return value.BoolValueFalse, false
233340
}
341+
return value.NewBoolValue(lv.Val().Unix() < rightTime.Unix()), true
234342
}
235343
return value.NewBoolValue(left.ToString() < right.ToString()), true
236344

@@ -242,21 +350,36 @@ func (c *DirectCompiler) compileBinary(node *expr.BinaryNode) (*CompiledExpr, er
242350
return value.NewBoolValue(lv.Val() <= rv.Val()), true
243351
} else if rv, ok := right.(value.IntValue); ok {
244352
return value.NewBoolValue(lv.Val() <= float64(rv.Val())), true
353+
} else if rv, ok := right.(value.StringValue); ok {
354+
if rf, err := strconv.ParseFloat(rv.Val(), 64); err == nil {
355+
return value.NewBoolValue(lv.Val() <= rf), true
356+
}
245357
}
246358
case value.IntValue:
359+
// TODO (Add parsing of strings to ints)
360+
// TODO (How to handle string floats?)
247361
if rv, ok := right.(value.NumberValue); ok {
248362
return value.NewBoolValue(float64(lv.Val()) <= rv.Val()), true
249363
} else if rv, ok := right.(value.IntValue); ok {
250364
return value.NewBoolValue(lv.Val() <= rv.Val()), true
251365
}
252366
case value.StringValue:
367+
if rv, ok := right.(value.TimeValue); ok {
368+
leftTime, ok := value.ValueToTime(left)
369+
if !ok {
370+
return value.BoolValueFalse, false
371+
}
372+
return value.NewBoolValue(rv.Val().Unix() <= leftTime.Unix()), true
373+
}
253374
if rv, ok := right.(value.StringValue); ok {
254375
return value.NewBoolValue(lv.Val() <= rv.Val()), true
255376
}
256377
case value.TimeValue:
257-
if rv, ok := right.(value.TimeValue); ok {
258-
return value.NewBoolValue(lv.Val().Unix() <= rv.Val().Unix()), true
378+
rightTime, ok := value.ValueToTime(right)
379+
if !ok {
380+
return value.BoolValueFalse, false
259381
}
382+
return value.NewBoolValue(lv.Val().Unix() <= rightTime.Unix()), true
260383
}
261384
return value.NewBoolValue(left.ToString() <= right.ToString()), true
262385

@@ -445,6 +568,26 @@ func (c *DirectCompiler) compileBinary(node *expr.BinaryNode) (*CompiledExpr, er
445568
// IN or INTERSECTS operation
446569
switch rv := right.(type) {
447570
case value.Slice:
571+
if lv, ok := left.(value.Slice); ok {
572+
// Check if any left item is in right slice
573+
for _, item := range lv.SliceValue() {
574+
for _, rightItem := range rv.SliceValue() {
575+
if eq, err := value.Equal(item, rightItem); err == nil && eq {
576+
return value.NewBoolValue(true), true
577+
}
578+
}
579+
}
580+
return value.NewBoolValue(false), true
581+
}
582+
if lv, ok := left.(value.Map); ok {
583+
// Check if any left item is in right slice
584+
for _, item := range rv.SliceValue() {
585+
if _, exists := lv.Get(item.ToString()); exists {
586+
return value.NewBoolValue(exists), true
587+
}
588+
}
589+
return value.NewBoolValue(false), true
590+
}
448591
// Check if left side is in right slice
449592
for _, item := range rv.SliceValue() {
450593
if eq, err := value.Equal(left, item); err == nil && eq {
@@ -963,3 +1106,9 @@ func hashNode(node expr.Node) uint64 {
9631106
h.Write([]byte(node.String()))
9641107
return h.Sum64()
9651108
}
1109+
1110+
func hashFilter(filter *rel.FilterStatement) uint64 {
1111+
h := fnv.New64()
1112+
h.Write([]byte(filter.String()))
1113+
return h.Sum64()
1114+
}

filterqlvm/filterqlvm.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ func NewOptimizedVM() *OptimizedVM {
2323

2424
// CompileFilter compiles a FilterQL statement
2525
func (vm *OptimizedVM) CompileFilter(filter *rel.FilterStatement) (*compiler.CompiledExpr, error) {
26-
return vm.compiler.Compile(filter.Filter)
26+
return vm.compiler.CompileFilter(filter)
2727
}
2828

2929
// CompileNode compiles any expression node

filterqlvm/filterqlvm_test.go

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ func (tc *testContext) Get(key string) (value.Value, bool) {
2020
v, ok := tc.data[key]
2121
left, right, hasNamespacing := expr.LeftRight(key)
2222

23-
//u.Debugf("left:%q right:%q key=%v", left, right, key)
2423
if hasNamespacing {
2524
f, ok := tc.data[left]
2625
if !ok {
@@ -175,12 +174,11 @@ func BenchmarkOptimizedVM(b *testing.B) {
175174
optimizedVM := NewOptimizedVM()
176175

177176
for _, pattern := range benchmarkPatterns {
177+
filter, err := rel.ParseFilterQL("FILTER " + pattern.filter)
178+
if err != nil {
179+
b.Fatalf("Failed to parse filter: %v", err)
180+
}
178181
b.Run(pattern.name, func(b *testing.B) {
179-
filter, err := rel.ParseFilterQL("FILTER " + pattern.filter)
180-
if err != nil {
181-
b.Fatalf("Failed to parse filter: %v", err)
182-
}
183-
184182
var ctx *testContext
185183
if pattern.complex {
186184
ctx = newComplexContext()
@@ -193,7 +191,6 @@ func BenchmarkOptimizedVM(b *testing.B) {
193191
if err != nil {
194192
b.Fatalf("Failed to compile filter: %v", err)
195193
}
196-
197194
b.ResetTimer()
198195
for i := 0; i < b.N; i++ {
199196
optimizedVM.Matches(ctx, filter)

0 commit comments

Comments
 (0)