Skip to content

Commit ef1b896

Browse files
committed
Matt comments
1 parent 26fb1aa commit ef1b896

File tree

1 file changed

+11
-7
lines changed

1 file changed

+11
-7
lines changed

libc/src/__support/GPU/allocator.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,14 @@ static inline constexpr T round_up(const T x) {
129129
return (x + N) & ~(N - 1);
130130
}
131131

132+
// Perform a lane parallel memset on a uint32_t pointer.
133+
void uniform_memset(uint32_t *s, uint32_t c, uint32_t n, uint64_t uniform) {
134+
uint64_t mask = gpu::get_lane_mask();
135+
uint32_t workers = cpp::popcount(uniform);
136+
for (uint32_t i = impl::lane_count(mask & uniform); i < n; i += workers)
137+
s[i] = c;
138+
}
139+
132140
} // namespace impl
133141

134142
/// A slab allocator used to hand out identically sized slabs of memory.
@@ -163,13 +171,9 @@ struct Slab {
163171
// must be called before the bitfield can be accessed safely, memory is not
164172
// guaranteed to be zero initialized in the current implementation.
165173
void initialize(uint64_t uniform) {
166-
uint64_t mask = gpu::get_lane_mask();
167-
uint32_t *bitfield = get_bitfield();
168-
uint32_t workers = cpp::popcount(uniform);
169-
uint32_t words = (bitfield_bytes(get_chunk_size()) + sizeof(uint32_t) - 1) /
170-
sizeof(uint32_t);
171-
for (uint32_t i = impl::lane_count(mask & uniform); i < words; i += workers)
172-
bitfield[i] = 0;
174+
uint32_t size = (bitfield_bytes(get_chunk_size()) + sizeof(uint32_t) - 1) /
175+
sizeof(uint32_t);
176+
impl::uniform_memset(get_bitfield(), 0, size, uniform);
173177
}
174178

175179
// Get the number of chunks that can theoretically fit inside this slab.

0 commit comments

Comments
 (0)