Skip to content

Commit f9ba483

Browse files
authored
Update bucketby.go
Implement consistent nil semantics and early exits in getBy Enforce nil return when query map is nil/empty or dataset is empty Introduce early exit immediately after seeding candidate positions if none were found Add final check to return nil instead of an empty slice when no rows match Preserve low‑level API’s guarantee of nil for “no filters” and “no matches” cases Update method documentation to reflect corrected behavior
1 parent 8f40ec9 commit f9ba483

File tree

1 file changed

+40
-37
lines changed

1 file changed

+40
-37
lines changed

bucketby.go

Lines changed: 40 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@ import (
77
"github.com/neurlang/quaternary"
88
)
99

10+
// getBy returns all rows matching every (col→val) clause in q.
11+
// Returns nil if q is nil/empty, no data, or no matches.
1012
func (b *bucket) getBy(q map[int]string) [][]string {
13+
// Favor nil for empty filters or empty data
1114
if q == nil || len(q) == 0 || len(b.data) == 0 {
1215
return nil
1316
}
@@ -17,7 +20,8 @@ func (b *bucket) getBy(q map[int]string) [][]string {
1720
val string
1821
count int
1922
}
20-
// 1) Collect counts & bail early
23+
24+
// 1) Collect counts & bail early if any clause has zero matches
2125
clauses := make([]clause, 0, len(q))
2226
for col, val := range q {
2327
cnt := b.countExisting(col, val)
@@ -27,7 +31,7 @@ func (b *bucket) getBy(q map[int]string) [][]string {
2731
clauses = append(clauses, clause{col: col, val: val, count: cnt})
2832
}
2933

30-
// 2) Sort by ascending count (then by descending val length)
34+
// 2) Sort by ascending count, tie-breaker by descending val length
3135
sort.Slice(clauses, func(i, j int) bool {
3236
if clauses[i].count != clauses[j].count {
3337
return clauses[i].count < clauses[j].count
@@ -39,59 +43,57 @@ func (b *bucket) getBy(q map[int]string) [][]string {
3943
first := clauses[0]
4044
var positions []int
4145

42-
// 3) Negation optimization: if >50% of rows match, build the complement
46+
// 3) Seed candidate indices from the most selective clause
4347
useNeg := first.count*2 > n
44-
4548
if useNeg {
46-
// build exclusion set E
49+
// exclusion set
4750
exclude := make(map[int]struct{}, first.count)
4851
for j := 1; j <= first.count; j++ {
49-
key := fmt.Sprint(j) + ":" + fmt.Sprint(first.col) + ":" + first.val
52+
key := fmt.Sprintf("%d:%d:%s", j, first.col, first.val)
5053
var pos int
5154
for bit := 0; bit < b.loglen; bit++ {
5255
if quaternary.Filter(b.filters[bit]).GetString(key) {
5356
pos |= 1 << bit
5457
}
5558
}
5659
idx := pos % n
57-
row := b.data[idx]
58-
if first.col < len(row) && row[first.col] == first.val {
60+
if row := b.data[idx]; first.col < len(row) && row[first.col] == first.val {
5961
exclude[idx] = struct{}{}
6062
}
6163
}
62-
// positions = all rows not in exclude
6364
positions = make([]int, 0, n-len(exclude))
6465
for i := range b.data {
6566
if _, found := exclude[i]; !found {
6667
positions = append(positions, i)
6768
}
6869
}
6970
} else {
70-
// the usual seed: only those that match
7171
positions = make([]int, 0, first.count)
7272
for j := 1; j <= first.count; j++ {
73-
key := fmt.Sprint(j) + ":" + fmt.Sprint(first.col) + ":" + first.val
73+
key := fmt.Sprintf("%d:%d:%s", j, first.col, first.val)
7474
var pos int
7575
for bit := 0; bit < b.loglen; bit++ {
7676
if quaternary.Filter(b.filters[bit]).GetString(key) {
7777
pos |= 1 << bit
7878
}
7979
}
8080
idx := pos % n
81-
row := b.data[idx]
82-
if first.col < len(row) && row[first.col] == first.val {
81+
if row := b.data[idx]; first.col < len(row) && row[first.col] == first.val {
8382
positions = append(positions, idx)
8483
}
8584
}
8685
}
8786

88-
// 4) For negation, the remaining clauses become exclusion tests.
89-
// For normal, they remain inclusion tests.
87+
// Early exit if seed yields no candidates
88+
if len(positions) == 0 {
89+
return nil
90+
}
91+
92+
// 4) Filter by remaining clauses
9093
for _, cl := range clauses[1:] {
91-
var out []int
94+
out := positions[:0]
9295
if useNeg {
93-
// remove any that *should* be excluded by cl
94-
// i.e. if row[col]==val, skip it
96+
// exclude rows matching cl
9597
for _, idx := range positions {
9698
row := b.data[idx]
9799
if cl.col < len(row) && row[cl.col] == cl.val {
@@ -100,7 +102,7 @@ func (b *bucket) getBy(q map[int]string) [][]string {
100102
out = append(out, idx)
101103
}
102104
} else {
103-
// keep only those that match cl
105+
// include only rows matching cl
104106
for _, idx := range positions {
105107
row := b.data[idx]
106108
if cl.col < len(row) && row[cl.col] == cl.val {
@@ -114,28 +116,30 @@ func (b *bucket) getBy(q map[int]string) [][]string {
114116
}
115117
}
116118

117-
// 5) Collect
118-
result := make([][]string, 0, len(positions))
119-
for _, idx := range positions {
120-
result = append(result, b.data[idx])
119+
// 5) Collect final rows; if none, return nil
120+
result := make([][]string, len(positions))
121+
for i, idx := range positions {
122+
result[i] = b.data[idx]
123+
}
124+
if len(result) == 0 {
125+
return nil
121126
}
122127
return result
123128
}
124129

125130
// removeBy deletes all rows matching every (col→val) clause in q.
126-
// It bails early if any clause has zero matches, seeds from the most selective clause,
127-
// then filters in-memory before nulling out matching entries.
131+
// Returns immediately if q is nil/empty or no data.
128132
func (b *bucket) removeBy(q map[int]string) {
129133
if q == nil || len(q) == 0 || len(b.data) == 0 {
130134
return
131135
}
132-
133-
// 1) Collect counts & early exit
134136
type clause struct {
135137
col int
136138
val string
137139
count int
138140
}
141+
142+
// 1) Collect counts & bail early
139143
clauses := make([]clause, 0, len(q))
140144
for col, val := range q {
141145
cnt := b.countExisting(col, val)
@@ -145,8 +149,7 @@ func (b *bucket) removeBy(q map[int]string) {
145149
clauses = append(clauses, clause{col: col, val: val, count: cnt})
146150
}
147151

148-
// 2) Sort clauses by ascending count (more selective first),
149-
// tie-breaking by longer val to favor rare longer strings.
152+
// 2) Sort by ascending count, tie-breaker by descending val length
150153
sort.Slice(clauses, func(i, j int) bool {
151154
if clauses[i].count != clauses[j].count {
152155
return clauses[i].count < clauses[j].count
@@ -157,7 +160,7 @@ func (b *bucket) removeBy(q map[int]string) {
157160
n := len(b.data)
158161
first := clauses[0]
159162

160-
// 3) Seed initial candidate indices from the smallest clause
163+
// 3) Seed candidates from the most selective clause
161164
positions := make([]int, 0, first.count)
162165
for j := 1; j <= first.count; j++ {
163166
key := fmt.Sprintf("%d:%d:%s", j, first.col, first.val)
@@ -168,15 +171,15 @@ func (b *bucket) removeBy(q map[int]string) {
168171
}
169172
}
170173
idx := pos % n
171-
if idx < n {
172-
row := b.data[idx]
173-
if first.col < len(row) && row[first.col] == first.val {
174-
positions = append(positions, idx)
175-
}
174+
if row := b.data[idx]; first.col < len(row) && row[first.col] == first.val {
175+
positions = append(positions, idx)
176176
}
177177
}
178+
if len(positions) == 0 {
179+
return
180+
}
178181

179-
// 4) Filter by remaining clauses in-memory
182+
// 4) Filter remaining clauses
180183
for _, cl := range clauses[1:] {
181184
out := positions[:0]
182185
for _, idx := range positions {
@@ -191,7 +194,7 @@ func (b *bucket) removeBy(q map[int]string) {
191194
}
192195
}
193196

194-
// 5) Delete matching rows
197+
// 5) Nullify matching rows
195198
for _, idx := range positions {
196199
b.data[idx] = nil
197200
}

0 commit comments

Comments
 (0)