@@ -7,7 +7,10 @@ import (
77 "github.com/neurlang/quaternary"
88)
99
10+ // getBy returns all rows matching every (col→val) clause in q.
11+ // Returns nil if q is nil/empty, no data, or no matches.
1012func (b * bucket ) getBy (q map [int ]string ) [][]string {
13+ // Favor nil for empty filters or empty data
1114 if q == nil || len (q ) == 0 || len (b .data ) == 0 {
1215 return nil
1316 }
@@ -17,7 +20,8 @@ func (b *bucket) getBy(q map[int]string) [][]string {
1720 val string
1821 count int
1922 }
20- // 1) Collect counts & bail early
23+
24+ // 1) Collect counts & bail early if any clause has zero matches
2125 clauses := make ([]clause , 0 , len (q ))
2226 for col , val := range q {
2327 cnt := b .countExisting (col , val )
@@ -27,7 +31,7 @@ func (b *bucket) getBy(q map[int]string) [][]string {
2731 clauses = append (clauses , clause {col : col , val : val , count : cnt })
2832 }
2933
30- // 2) Sort by ascending count (then by descending val length)
34+ // 2) Sort by ascending count, tie-breaker by descending val length
3135 sort .Slice (clauses , func (i , j int ) bool {
3236 if clauses [i ].count != clauses [j ].count {
3337 return clauses [i ].count < clauses [j ].count
@@ -39,59 +43,57 @@ func (b *bucket) getBy(q map[int]string) [][]string {
3943 first := clauses [0 ]
4044 var positions []int
4145
42- // 3) Negation optimization: if >50% of rows match, build the complement
46+ // 3) Seed candidate indices from the most selective clause
4347 useNeg := first .count * 2 > n
44-
4548 if useNeg {
46- // build exclusion set E
49+ // exclusion set
4750 exclude := make (map [int ]struct {}, first .count )
4851 for j := 1 ; j <= first .count ; j ++ {
49- key := fmt .Sprint ( j ) + ":" + fmt . Sprint ( first .col ) + ":" + first .val
52+ key := fmt .Sprintf ( "%d:%d:%s" , j , first .col , first .val )
5053 var pos int
5154 for bit := 0 ; bit < b .loglen ; bit ++ {
5255 if quaternary .Filter (b .filters [bit ]).GetString (key ) {
5356 pos |= 1 << bit
5457 }
5558 }
5659 idx := pos % n
57- row := b .data [idx ]
58- if first .col < len (row ) && row [first .col ] == first .val {
60+ if row := b .data [idx ]; first .col < len (row ) && row [first .col ] == first .val {
5961 exclude [idx ] = struct {}{}
6062 }
6163 }
62- // positions = all rows not in exclude
6364 positions = make ([]int , 0 , n - len (exclude ))
6465 for i := range b .data {
6566 if _ , found := exclude [i ]; ! found {
6667 positions = append (positions , i )
6768 }
6869 }
6970 } else {
70- // the usual seed: only those that match
7171 positions = make ([]int , 0 , first .count )
7272 for j := 1 ; j <= first .count ; j ++ {
73- key := fmt .Sprint ( j ) + ":" + fmt . Sprint ( first .col ) + ":" + first .val
73+ key := fmt .Sprintf ( "%d:%d:%s" , j , first .col , first .val )
7474 var pos int
7575 for bit := 0 ; bit < b .loglen ; bit ++ {
7676 if quaternary .Filter (b .filters [bit ]).GetString (key ) {
7777 pos |= 1 << bit
7878 }
7979 }
8080 idx := pos % n
81- row := b .data [idx ]
82- if first .col < len (row ) && row [first .col ] == first .val {
81+ if row := b .data [idx ]; first .col < len (row ) && row [first .col ] == first .val {
8382 positions = append (positions , idx )
8483 }
8584 }
8685 }
8786
88- // 4) For negation, the remaining clauses become exclusion tests.
89- // For normal, they remain inclusion tests.
87+ // Early exit if seed yields no candidates
88+ if len (positions ) == 0 {
89+ return nil
90+ }
91+
92+ // 4) Filter by remaining clauses
9093 for _ , cl := range clauses [1 :] {
91- var out [] int
94+ out := positions [: 0 ]
9295 if useNeg {
93- // remove any that *should* be excluded by cl
94- // i.e. if row[col]==val, skip it
96+ // exclude rows matching cl
9597 for _ , idx := range positions {
9698 row := b .data [idx ]
9799 if cl .col < len (row ) && row [cl .col ] == cl .val {
@@ -100,7 +102,7 @@ func (b *bucket) getBy(q map[int]string) [][]string {
100102 out = append (out , idx )
101103 }
102104 } else {
103- // keep only those that match cl
105+ // include only rows matching cl
104106 for _ , idx := range positions {
105107 row := b .data [idx ]
106108 if cl .col < len (row ) && row [cl .col ] == cl .val {
@@ -114,28 +116,30 @@ func (b *bucket) getBy(q map[int]string) [][]string {
114116 }
115117 }
116118
117- // 5) Collect
118- result := make ([][]string , 0 , len (positions ))
119- for _ , idx := range positions {
120- result = append (result , b .data [idx ])
119+ // 5) Collect final rows; if none, return nil
120+ result := make ([][]string , len (positions ))
121+ for i , idx := range positions {
122+ result [i ] = b .data [idx ]
123+ }
124+ if len (result ) == 0 {
125+ return nil
121126 }
122127 return result
123128}
124129
125130// removeBy deletes all rows matching every (col→val) clause in q.
126- // It bails early if any clause has zero matches, seeds from the most selective clause,
127- // then filters in-memory before nulling out matching entries.
131+ // Returns immediately if q is nil/empty or no data.
128132func (b * bucket ) removeBy (q map [int ]string ) {
129133 if q == nil || len (q ) == 0 || len (b .data ) == 0 {
130134 return
131135 }
132-
133- // 1) Collect counts & early exit
134136 type clause struct {
135137 col int
136138 val string
137139 count int
138140 }
141+
142+ // 1) Collect counts & bail early
139143 clauses := make ([]clause , 0 , len (q ))
140144 for col , val := range q {
141145 cnt := b .countExisting (col , val )
@@ -145,8 +149,7 @@ func (b *bucket) removeBy(q map[int]string) {
145149 clauses = append (clauses , clause {col : col , val : val , count : cnt })
146150 }
147151
148- // 2) Sort clauses by ascending count (more selective first),
149- // tie-breaking by longer val to favor rare longer strings.
152+ // 2) Sort by ascending count, tie-breaker by descending val length
150153 sort .Slice (clauses , func (i , j int ) bool {
151154 if clauses [i ].count != clauses [j ].count {
152155 return clauses [i ].count < clauses [j ].count
@@ -157,7 +160,7 @@ func (b *bucket) removeBy(q map[int]string) {
157160 n := len (b .data )
158161 first := clauses [0 ]
159162
160- // 3) Seed initial candidate indices from the smallest clause
163+ // 3) Seed candidates from the most selective clause
161164 positions := make ([]int , 0 , first .count )
162165 for j := 1 ; j <= first .count ; j ++ {
163166 key := fmt .Sprintf ("%d:%d:%s" , j , first .col , first .val )
@@ -168,15 +171,15 @@ func (b *bucket) removeBy(q map[int]string) {
168171 }
169172 }
170173 idx := pos % n
171- if idx < n {
172- row := b .data [idx ]
173- if first .col < len (row ) && row [first .col ] == first .val {
174- positions = append (positions , idx )
175- }
174+ if row := b .data [idx ]; first .col < len (row ) && row [first .col ] == first .val {
175+ positions = append (positions , idx )
176176 }
177177 }
178+ if len (positions ) == 0 {
179+ return
180+ }
178181
179- // 4) Filter by remaining clauses in-memory
182+ // 4) Filter remaining clauses
180183 for _ , cl := range clauses [1 :] {
181184 out := positions [:0 ]
182185 for _ , idx := range positions {
@@ -191,7 +194,7 @@ func (b *bucket) removeBy(q map[int]string) {
191194 }
192195 }
193196
194- // 5) Delete matching rows
197+ // 5) Nullify matching rows
195198 for _ , idx := range positions {
196199 b .data [idx ] = nil
197200 }
0 commit comments