Skip to content

Commit a727a16

Browse files
authored
refactor(array_hash_set): streamline set operations and fix containsAll (#15)
- Reimplement managed set operations using unmanaged counterparts - Fix incorrect element check in unmanaged.containsAll - Add benchmark tests for common operations - Simplify cloneWithAllocator implementation The containsAll bugfix was needed because the unmanaged version's iterator returns Entry pointers (key/value pairs), but was checking for the Entry itself rather than the entry's key. The corrected line properly accesses the key via el.key_ptr.*. Signed-off-by: Ville Vesilehto <[email protected]>
1 parent c22dcf5 commit a727a16

File tree

2 files changed

+89
-89
lines changed

2 files changed

+89
-89
lines changed

src/array_hash_set/managed.zig

Lines changed: 88 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -72,18 +72,20 @@ pub fn ArraySetManaged(comptime E: type) type {
7272
/// A bool is returned indicating if the element was actually added
7373
/// if not already known.
7474
pub fn addAssumeCapacity(self: *Self, element: E) bool {
75-
const prevCount = self.unmanaged.cardinality();
76-
self.unmanaged.putAssumeCapacity(self.allocator, element, {});
77-
return prevCount != self.unmanaged.cardinality();
75+
return self.unmanaged.add(self.allocator, element) catch unreachable;
7876
}
7977

8078
/// Appends all elements from the provided set, and may allocate.
8179
/// append returns an Allocator.Error or Size which represents how
8280
/// many elements added and not previously in the Set.
8381
pub fn append(self: *Self, other: Self) Allocator.Error!Size {
8482
const prevCount = self.unmanaged.cardinality();
85-
86-
try self.unionUpdate(self.allocator, other);
83+
// Directly access the underlying map instead of using unionUpdate
84+
// We avoid double existence/capacity checks by accessing map directly
85+
var iter = other.unmanaged.iterator();
86+
while (iter.next()) |entry| {
87+
_ = try self.unmanaged.put(self.allocator, entry.key_ptr.*, {});
88+
}
8789
return self.unmanaged.cardinality() - prevCount;
8890
}
8991

@@ -133,16 +135,12 @@ pub fn ArraySetManaged(comptime E: type) type {
133135
/// Creates a copy of this set, using a specified allocator.
134136
/// cloneWithAllocator may be return an Allocator.Error or the cloned Set.
135137
pub fn cloneWithAllocator(self: *Self, allocator: Allocator) Allocator.Error!Self {
136-
// Since we're borrowing the internal map allocator, temporarily back it up.
137-
const prevAllocator = self.allocator;
138-
// Restore it at the end of the func, because the self.unmanaged should use the
139-
// original allocator.
140-
defer self.allocator = prevAllocator;
141-
142-
// The cloned map must use and refer to the new allocator only.
143-
self.allocator = allocator;
144-
const cloneSelf = try self.clone();
145-
return cloneSelf;
138+
// Directly clone the unmanaged structure with the new allocator
139+
const clonedUnmanaged = try self.unmanaged.cloneWithAllocator(allocator);
140+
return Self{
141+
.allocator = allocator,
142+
.unmanaged = clonedUnmanaged,
143+
};
146144
}
147145

148146
/// Returns true when the provided element exists within the Set otherwise false.
@@ -153,7 +151,7 @@ pub fn ArraySetManaged(comptime E: type) type {
153151
/// Returns true when all elements in the other Set are present in this Set
154152
/// otherwise false.
155153
pub fn containsAll(self: Self, other: Self) bool {
156-
return self.unmanaged.containsAll(other);
154+
return self.unmanaged.containsAll(other.unmanaged);
157155
}
158156

159157
/// Returns true when all elements in the provided slice are present otherwise false.
@@ -164,13 +162,8 @@ pub fn ArraySetManaged(comptime E: type) type {
164162
/// Returns true when at least one or more elements from the other Set exist within
165163
/// this Set otherwise false.
166164
pub fn containsAny(self: Self, other: Self) bool {
167-
var iter = other.iterator();
168-
while (iter.next()) |el| {
169-
if (self.unmanaged.contains(el.*)) {
170-
return true;
171-
}
172-
}
173-
return false;
165+
// Delegate to the unmanaged implementation which might have optimizations
166+
return self.unmanaged.containsAny(other.unmanaged);
174167
}
175168

176169
/// Returns true when at least one or more elements from the slice exist within
@@ -191,15 +184,12 @@ pub fn ArraySetManaged(comptime E: type) type {
191184
///
192185
/// Caller owns the newly allocated/returned set.
193186
pub fn differenceOf(self: Self, other: Self) Allocator.Error!Self {
194-
var diffSet = Self.init(self.allocator);
195-
196-
var iter = self.unmanaged.iterator();
197-
while (iter.next()) |pVal| {
198-
if (!other.unmanaged.contains(pVal.key_ptr.*)) {
199-
_ = try diffSet.add(pVal.key_ptr.*);
200-
}
201-
}
202-
return diffSet;
187+
// Delegate to unmanaged implementation to avoid double iteration
188+
const diffUnmanaged = try self.unmanaged.differenceOf(self.allocator, other.unmanaged);
189+
return Self{
190+
.allocator = self.allocator,
191+
.unmanaged = diffUnmanaged,
192+
};
203193
}
204194

205195
/// differenceUpdate does an in-place mutation of this set
@@ -272,25 +262,11 @@ pub fn ArraySetManaged(comptime E: type) type {
272262
///
273263
/// Caller owns the newly allocated/returned set.
274264
pub fn intersectionOf(self: Self, other: Self) Allocator.Error!Self {
275-
var interSet = Self.init(self.allocator);
276-
277-
// Optimization: iterate over whichever set is smaller.
278-
// Matters when disparity in cardinality is large.
279-
var s = other;
280-
var o = self;
281-
if (self.unmanaged.cardinality() < other.unmanaged.cardinality()) {
282-
s = self;
283-
o = other;
284-
}
285-
286-
var iter = s.unmanaged.iterator();
287-
while (iter.next()) |pVal| {
288-
if (o.unmanaged.contains(pVal.key_ptr.*)) {
289-
_ = try interSet.add(pVal.key_ptr.*);
290-
}
291-
}
292-
293-
return interSet;
265+
const interUnmanaged = try self.unmanaged.intersectionOf(self.allocator, other.unmanaged);
266+
return Self{
267+
.allocator = self.allocator,
268+
.unmanaged = interUnmanaged,
269+
};
294270
}
295271

296272
/// intersectionUpdate does an in-place intersecting update
@@ -412,23 +388,12 @@ pub fn ArraySetManaged(comptime E: type) type {
412388
///
413389
/// The caller owns the newly allocated/returned Set.
414390
pub fn symmetricDifferenceOf(self: Self, other: Self) Allocator.Error!Self {
415-
var sdSet = Self.init(self.allocator);
416-
417-
var iter = self.unmanaged.iterator();
418-
while (iter.next()) |pVal| {
419-
if (!other.unmanaged.contains(pVal.key_ptr.*)) {
420-
_ = try sdSet.add(pVal.key_ptr.*);
421-
}
422-
}
423-
424-
iter = other.unmanaged.iterator();
425-
while (iter.next()) |pVal| {
426-
if (!self.unmanaged.contains(pVal.key_ptr.*)) {
427-
_ = try sdSet.add(pVal.key_ptr.*);
428-
}
429-
}
430-
431-
return sdSet;
391+
// Use optimized unmanaged implementation
392+
const sdUnmanaged = try self.unmanaged.symmetricDifferenceOf(self.allocator, other.unmanaged);
393+
return Self{
394+
.allocator = self.allocator,
395+
.unmanaged = sdUnmanaged,
396+
};
432397
}
433398

434399
/// symmetricDifferenceUpdate does an in-place mutation with all elements
@@ -455,26 +420,11 @@ pub fn ArraySetManaged(comptime E: type) type {
455420
///
456421
/// The caller owns the newly allocated/returned Set.
457422
pub fn unionOf(self: Self, other: Self) Allocator.Error!Self {
458-
// Sniff out larger set for capacity hint.
459-
var n = self.unmanaged.cardinality();
460-
if (other.unmanaged.cardinality() > n) n = other.unmanaged.cardinality();
461-
462-
var uSet = try Self.initCapacity(
463-
self.allocator,
464-
@intCast(n),
465-
);
466-
467-
var iter = self.unmanaged.iterator();
468-
while (iter.next()) |pVal| {
469-
_ = try uSet.add(pVal.key_ptr.*);
470-
}
471-
472-
iter = other.unmanaged.iterator();
473-
while (iter.next()) |pVal| {
474-
_ = try uSet.add(pVal.key_ptr.*);
475-
}
476-
477-
return uSet;
423+
const unionUnmanaged = try self.unmanaged.unionOf(self.allocator, other.unmanaged);
424+
return Self{
425+
.allocator = self.allocator,
426+
.unmanaged = unionUnmanaged,
427+
};
478428
}
479429

480430
/// unionUpdate does an in-place union of the current Set and other Set.
@@ -834,3 +784,53 @@ test "sizeOf" {
834784
const expectedDiff = 16;
835785
try expectEqual(expectedDiff, managedSize - unmanagedSize);
836786
}
787+
788+
test "benchmark" {
789+
const allocator = std.testing.allocator;
790+
const Iterations = 10_000;
791+
const SetSize = 1000;
792+
793+
// Setup
794+
var base = try ArraySetManaged(u32).initCapacity(allocator, SetSize);
795+
defer base.deinit();
796+
for (0..SetSize) |i| _ = base.addAssumeCapacity(@intCast(i));
797+
798+
var other = try ArraySetManaged(u32).initCapacity(allocator, SetSize);
799+
defer other.deinit();
800+
for (0..SetSize) |i| _ = other.addAssumeCapacity(@intCast(i + SetSize / 2));
801+
802+
// Benchmark unionOf
803+
var union_timer = try std.time.Timer.start();
804+
for (0..Iterations) |_| {
805+
var result = try base.unionOf(other);
806+
defer result.deinit();
807+
}
808+
const union_elapsed = union_timer.read();
809+
std.debug.print("\nunionOf: {d} ops/sec ({d:.2} ns/op)\n", .{
810+
Iterations * std.time.ns_per_s / union_elapsed,
811+
@as(f64, @floatFromInt(union_elapsed)) / @as(f64, @floatFromInt(Iterations)),
812+
});
813+
814+
// Benchmark intersectionOf
815+
var inter_timer = try std.time.Timer.start();
816+
for (0..Iterations) |_| {
817+
var result = try base.intersectionOf(other);
818+
defer result.deinit();
819+
}
820+
const inter_elapsed = inter_timer.read();
821+
std.debug.print("intersectionOf: {d} ops/sec ({d:.2} ns/op)\n", .{
822+
Iterations * std.time.ns_per_s / inter_elapsed,
823+
@as(f64, @floatFromInt(inter_elapsed)) / @as(f64, @floatFromInt(Iterations)),
824+
});
825+
826+
// Benchmark containsAll
827+
var contains_timer = try std.time.Timer.start();
828+
for (0..Iterations) |_| {
829+
_ = base.containsAll(other);
830+
}
831+
const contains_elapsed = contains_timer.read();
832+
std.debug.print("containsAll: {d} ops/sec ({d:.2} ns/op)\n", .{
833+
Iterations * std.time.ns_per_s / contains_elapsed,
834+
@as(f64, @floatFromInt(contains_elapsed)) / @as(f64, @floatFromInt(Iterations)),
835+
});
836+
}

src/array_hash_set/unmanaged.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ pub fn ArraySetUnmanaged(comptime E: type) type {
146146
pub fn containsAll(self: Self, other: Self) bool {
147147
var iter = other.iterator();
148148
while (iter.next()) |el| {
149-
if (!self.unmanaged.contains(el.*)) {
149+
if (!self.unmanaged.contains(el.key_ptr.*)) {
150150
return false;
151151
}
152152
}

0 commit comments

Comments
 (0)