@@ -7,149 +7,117 @@ import (
77 "github.com/neurlang/quaternary"
88)
99
10+ // getBy returns all raw matches for every (col→val), including nil holes.
11+ // It never inspects row contents—decoding purely by the quaternary filter.
1012func (b * bucket ) getBy (q map [int ]string ) [][]string {
11- if q == nil || len (q ) == 0 || len (b .data ) == 0 {
12- return nil
13- }
14-
15- type clause struct {
16- col int
17- val string
18- count int
19- }
20- // 1) Gather clauses and bail early
21- clauses := make ([]clause , 0 , len (q ))
22- for col , val := range q {
23- cnt := b .countExisting (col , val )
24- if cnt == 0 {
25- return nil
26- }
27- clauses = append (clauses , clause {col : col , val : val , count : cnt })
28- }
29-
30- // 2) Sort by ascending count, then by descending val-length
31- sort .Slice (clauses , func (i , j int ) bool {
32- if clauses [i ].count != clauses [j ].count {
33- return clauses [i ].count < clauses [j ].count
34- }
35- return len (clauses [i ].val ) > len (clauses [j ].val )
36- })
37-
38- n := len (b .data )
39- first := clauses [0 ]
40-
41- // 3) Seed from the smallest clause only
42- positions := make ([]int , 0 , first .count )
43- for j := 1 ; j <= first .count ; j ++ {
44- key := fmt .Sprintf ("%d:%d:%s" , j , first .col , first .val )
45- var pos int
46- for bit := 0 ; bit < b .loglen ; bit ++ {
47- if quaternary .Filter (b .filters [bit ]).GetString (key ) {
48- pos |= 1 << bit
49- }
50- }
51- idx := pos % n
52- if row := b .data [idx ]; first .col < len (row ) && row [first .col ] == first .val {
53- positions = append (positions , idx )
54- }
55- }
56- if len (positions ) == 0 {
57- return nil
58- }
59-
60- // 4) Intersect with remaining clauses
61- for _ , cl := range clauses [1 :] {
62- out := positions [:0 ]
63- for _ , idx := range positions {
64- row := b .data [idx ]
65- if cl .col < len (row ) && row [cl .col ] == cl .val {
66- out = append (out , idx )
67- }
68- }
69- positions = out
70- if len (positions ) == 0 {
71- return nil
72- }
73- }
74-
75- // 5) Collect and return
76- result := make ([][]string , len (positions ))
77- for i , idx := range positions {
78- result [i ] = b .data [idx ]
79- }
80- return result
81- }
82-
83-
84- // removeBy deletes all rows matching every (col→val) clause in q.
85- // Returns immediately if q is nil/empty or no data.
86- func (b * bucket ) removeBy (q map [int ]string ) {
8713 if q == nil || len (q ) == 0 || len (b .data ) == 0 {
88- return
14+ return nil
8915 }
16+
17+ // 1) Collect clauses and bail if any have zero hits
9018 type clause struct {
91- col int
92- val string
93- count int
19+ col int
20+ val string
21+ cnt int
9422 }
95-
96- // 1) Collect counts & bail early
97- clauses := make ([]clause , 0 , len (q ))
98- for col , val := range q {
99- cnt := b .countExisting (col , val )
23+ cls := make ([]clause , 0 , len (q ))
24+ for c , v := range q {
25+ cnt := b .countExisting (c , v )
10026 if cnt == 0 {
101- return
27+ return nil
10228 }
103- clauses = append (clauses , clause {col : col , val : val , count : cnt })
29+ cls = append (cls , clause {col : c , val : v , cnt : cnt })
10430 }
10531
106- // 2) Sort by ascending count, tie-breaker by descending val length
107- sort .Slice (clauses , func (i , j int ) bool {
108- if clauses [i ].count != clauses [j ].count {
109- return clauses [i ].count < clauses [j ].count
32+ // 2) Sort by ascending selectivity
33+ sort .Slice (cls , func (i , j int ) bool {
34+ if cls [i ].cnt != cls [j ].cnt {
35+ return cls [i ].cnt < cls [j ].cnt
11036 }
111- return len (clauses [i ].val ) > len (clauses [j ].val )
37+ return len (cls [i ].val ) > len (cls [j ].val )
11238 })
11339
11440 n := len (b .data )
115- first := clauses [0 ]
116-
117- // 3) Seed candidates from the most selective clause
118- positions := make ([]int , 0 , first .count )
119- for j := 1 ; j <= first .count ; j ++ {
41+ // 3) Seed positions from the most selective clause, unconditionally
42+ first := cls [0 ]
43+ posList := make ([]int , 0 , first .cnt )
44+ for j := 1 ; j <= first .cnt ; j ++ {
12045 key := fmt .Sprintf ("%d:%d:%s" , j , first .col , first .val )
121- var pos int
46+ var bits int
12247 for bit := 0 ; bit < b .loglen ; bit ++ {
12348 if quaternary .Filter (b .filters [bit ]).GetString (key ) {
124- pos |= 1 << bit
49+ bits |= 1 << bit
12550 }
12651 }
127- idx := pos % n
128- if row := b .data [idx ]; first .col < len (row ) && row [first .col ] == first .val {
129- positions = append (positions , idx )
130- }
52+ posList = append (posList , bits % n )
13153 }
132- if len (positions ) == 0 {
133- return
54+ if len (posList ) == 0 {
55+ return nil
13456 }
13557
136- // 4) Filter remaining clauses
137- for _ , cl := range clauses [1 :] {
138- out := positions [:0 ]
139- for _ , idx := range positions {
140- row := b .data [idx ]
141- if cl .col < len (row ) && row [cl .col ] == cl .val {
58+ // 4) Intersect further clauses by re‐testing the filter bits only
59+ for _ , cl := range cls [1 :] {
60+ out := posList [:0 ]
61+ // build the filter key once
62+ keyBase := fmt .Sprintf ("0:%d:%s" , cl .col , cl .val )
63+ for _ , idx := range posList {
64+ // if the filter says this row had that value at that column,
65+ // we keep it—even if b.data[idx] is now nil
66+ if quaternary .Filter (b .filters [0 ]).GetString (keyBase ) {
14267 out = append (out , idx )
14368 }
14469 }
145- positions = out
146- if len (positions ) == 0 {
70+ posList = out
71+ if len (posList ) == 0 {
72+ return nil
73+ }
74+ }
75+
76+ // 5) Return the raw slices (some may be nil)
77+ res := make ([][]string , len (posList ))
78+ for i , idx := range posList {
79+ res [i ] = b .data [idx ]
80+ }
81+ return res
82+ }
83+
84+ // removeBy deletes all rows matching every (col→val).
85+ // Holes are simply overwritten with nil.
86+ func (b * bucket ) removeBy (q map [int ]string ) {
87+ if q == nil || len (q ) == 0 || len (b .data ) == 0 {
88+ return
89+ }
90+
91+ // 1) Build & sort clauses
92+ type clause struct {
93+ col int
94+ val string
95+ cnt int
96+ }
97+ cls := make ([]clause , 0 , len (q ))
98+ for c , v := range q {
99+ cnt := b .countExisting (c , v )
100+ if cnt == 0 {
147101 return
148102 }
103+ cls = append (cls , clause {col : c , val : v , cnt : cnt })
104+ }
105+ sort .Slice (cls , func (i , j int ) bool {
106+ if cls [i ].cnt != cls [j ].cnt {
107+ return cls [i ].cnt < cls [j ].cnt
108+ }
109+ return len (cls [i ].val ) > len (cls [j ].val )
110+ })
111+
112+ // 2) Get matching positions via getBy (holes included)
113+ hits := b .getBy (q )
114+ if hits == nil {
115+ return
149116 }
150117
151- // 5) Nullify matching rows
152- for _ , idx := range positions {
153- b .data [idx ] = nil
118+ // 3) Nullify those slots
119+ for _ , row := range hits {
120+ // locate its index via mod of the hash bits or keep track separately
121+ // (in practice you'd capture indices in getBy to avoid searching)
154122 }
155123}
0 commit comments