Skip to content

Commit ae1b444

Browse files
mluggachan1989
andcommitted
std.Progress: fix many bugs
There were several bugs with the synchronization here; most notably an ABA problem which was causing ziglang#21663. I fixed that and some other issues, and took the opportunity to get rid of the `.seq_cst` orderings from this file. I'm at least relatively sure my new orderings are correct. Co-authored-by: achan1989 <[email protected]> Resolves: ziglang#21663
1 parent bf9b15e commit ae1b444

File tree

1 file changed

+89
-53
lines changed

1 file changed

+89
-53
lines changed

lib/std/Progress.zig

Lines changed: 89 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,20 @@ draw_buffer: []u8,
3939
/// CPU cache.
4040
node_parents: []Node.Parent,
4141
node_storage: []Node.Storage,
42-
node_freelist: []Node.OptionalIndex,
43-
node_freelist_first: Node.OptionalIndex,
42+
node_freelist_next: []Node.OptionalIndex,
43+
node_freelist: Freelist,
44+
/// This is the number of elements in node arrays which have been used so far. Nodes before this
45+
/// index are either active, or on the freelist. The remaining nodes are implicitly free. This
46+
/// value may at times temporarily exceed the node count.
4447
node_end_index: u32,
4548

49+
const Freelist = packed struct(u32) {
50+
head: Node.OptionalIndex,
51+
/// Whenever `node_freelist` is added to, this generation is incremented
52+
/// to avoid ABA bugs when acquiring nodes. Wrapping arithmetic is used.
53+
generation: u24,
54+
};
55+
4656
pub const TerminalMode = union(enum) {
4757
off,
4858
ansi_escape_codes,
@@ -112,7 +122,7 @@ pub const Node = struct {
112122
// causes `completed_count` to be treated as a file descriptor, so
113123
// the order here matters.
114124
@atomicStore(u32, &s.completed_count, integer, .monotonic);
115-
@atomicStore(u32, &s.estimated_total_count, std.math.maxInt(u32), .release);
125+
@atomicStore(u32, &s.estimated_total_count, std.math.maxInt(u32), .release); // synchronizes with acquire in `serialize`
116126
}
117127

118128
/// Not thread-safe.
@@ -184,12 +194,24 @@ pub const Node = struct {
184194
const node_index = node.index.unwrap() orelse return Node.none;
185195
const parent = node_index.toParent();
186196

187-
const freelist_head = &global_progress.node_freelist_first;
188-
var opt_free_index = @atomicLoad(Node.OptionalIndex, freelist_head, .seq_cst);
189-
while (opt_free_index.unwrap()) |free_index| {
190-
const freelist_ptr = freelistByIndex(free_index);
191-
const next = @atomicLoad(Node.OptionalIndex, freelist_ptr, .seq_cst);
192-
opt_free_index = @cmpxchgWeak(Node.OptionalIndex, freelist_head, opt_free_index, next, .seq_cst, .seq_cst) orelse {
197+
const freelist = &global_progress.node_freelist;
198+
var old_freelist = @atomicLoad(Freelist, freelist, .acquire); // acquire to ensure we have the correct "next" entry
199+
while (old_freelist.head.unwrap()) |free_index| {
200+
const next_ptr = freelistNextByIndex(free_index);
201+
const new_freelist: Freelist = .{
202+
.head = @atomicLoad(Node.OptionalIndex, next_ptr, .monotonic),
203+
// We don't need to increment the generation when removing nodes from the free list,
204+
// only when adding them. (This choice is arbitrary; the opposite would also work.)
205+
.generation = old_freelist.generation,
206+
};
207+
old_freelist = @cmpxchgWeak(
208+
Freelist,
209+
freelist,
210+
old_freelist,
211+
new_freelist,
212+
.acquire, // not theoretically necessary, but not allowed to be weaker than the failure order
213+
.acquire, // ensure we have the correct `node_freelist_next` entry on the next iteration
214+
) orelse {
193215
// We won the allocation race.
194216
return init(free_index, parent, name, estimated_total_items);
195217
};
@@ -243,18 +265,28 @@ pub const Node = struct {
243265
}
244266
const index = n.index.unwrap() orelse return;
245267
const parent_ptr = parentByIndex(index);
246-
if (parent_ptr.unwrap()) |parent_index| {
268+
if (@atomicLoad(Node.Parent, parent_ptr, .monotonic).unwrap()) |parent_index| {
247269
_ = @atomicRmw(u32, &storageByIndex(parent_index).completed_count, .Add, 1, .monotonic);
248-
@atomicStore(Node.Parent, parent_ptr, .unused, .seq_cst);
270+
@atomicStore(Node.Parent, parent_ptr, .unused, .monotonic);
249271

250-
const freelist_head = &global_progress.node_freelist_first;
251-
var first = @atomicLoad(Node.OptionalIndex, freelist_head, .seq_cst);
272+
const freelist = &global_progress.node_freelist;
273+
var old_freelist = @atomicLoad(Freelist, freelist, .monotonic);
252274
while (true) {
253-
@atomicStore(Node.OptionalIndex, freelistByIndex(index), first, .seq_cst);
254-
first = @cmpxchgWeak(Node.OptionalIndex, freelist_head, first, index.toOptional(), .seq_cst, .seq_cst) orelse break;
275+
@atomicStore(Node.OptionalIndex, freelistNextByIndex(index), old_freelist.head, .monotonic);
276+
old_freelist = @cmpxchgWeak(
277+
Freelist,
278+
freelist,
279+
old_freelist,
280+
.{ .head = index.toOptional(), .generation = old_freelist.generation +% 1 },
281+
.release, // ensure a matching `start` sees the freelist link written above
282+
.monotonic, // our write above is irrelevant if we need to retry
283+
) orelse {
284+
// We won the race.
285+
return;
286+
};
255287
}
256288
} else {
257-
@atomicStore(bool, &global_progress.done, true, .seq_cst);
289+
@atomicStore(bool, &global_progress.done, true, .monotonic);
258290
global_progress.redraw_event.set();
259291
if (global_progress.update_thread) |thread| thread.join();
260292
}
@@ -291,8 +323,8 @@ pub const Node = struct {
291323
return &global_progress.node_parents[@intFromEnum(index)];
292324
}
293325

294-
fn freelistByIndex(index: Node.Index) *Node.OptionalIndex {
295-
return &global_progress.node_freelist[@intFromEnum(index)];
326+
fn freelistNextByIndex(index: Node.Index) *Node.OptionalIndex {
327+
return &global_progress.node_freelist_next[@intFromEnum(index)];
296328
}
297329

298330
fn init(free_index: Index, parent: Parent, name: []const u8, estimated_total_items: usize) Node {
@@ -307,8 +339,10 @@ pub const Node = struct {
307339
@atomicStore(u8, &storage.name[name_len], 0, .monotonic);
308340

309341
const parent_ptr = parentByIndex(free_index);
310-
assert(parent_ptr.* == .unused);
311-
@atomicStore(Node.Parent, parent_ptr, parent, .release);
342+
if (std.debug.runtime_safety) {
343+
assert(@atomicLoad(Node.Parent, parent_ptr, .monotonic) == .unused);
344+
}
345+
@atomicStore(Node.Parent, parent_ptr, parent, .monotonic);
312346

313347
return .{ .index = free_index.toOptional() };
314348
}
@@ -329,15 +363,15 @@ var global_progress: Progress = .{
329363

330364
.node_parents = &node_parents_buffer,
331365
.node_storage = &node_storage_buffer,
332-
.node_freelist = &node_freelist_buffer,
333-
.node_freelist_first = .none,
366+
.node_freelist_next = &node_freelist_next_buffer,
367+
.node_freelist = .{ .head = .none, .generation = 0 },
334368
.node_end_index = 0,
335369
};
336370

337371
const node_storage_buffer_len = 83;
338372
var node_parents_buffer: [node_storage_buffer_len]Node.Parent = undefined;
339373
var node_storage_buffer: [node_storage_buffer_len]Node.Storage = undefined;
340-
var node_freelist_buffer: [node_storage_buffer_len]Node.OptionalIndex = undefined;
374+
var node_freelist_next_buffer: [node_storage_buffer_len]Node.OptionalIndex = undefined;
341375

342376
var default_draw_buffer: [4096]u8 = undefined;
343377

@@ -456,7 +490,7 @@ fn updateThreadRun() void {
456490

457491
{
458492
const resize_flag = wait(global_progress.initial_delay_ns);
459-
if (@atomicLoad(bool, &global_progress.done, .seq_cst)) return;
493+
if (@atomicLoad(bool, &global_progress.done, .monotonic)) return;
460494
maybeUpdateSize(resize_flag);
461495

462496
const buffer, _ = computeRedraw(&serialized_buffer);
@@ -470,7 +504,7 @@ fn updateThreadRun() void {
470504
while (true) {
471505
const resize_flag = wait(global_progress.refresh_rate_ns);
472506

473-
if (@atomicLoad(bool, &global_progress.done, .seq_cst)) {
507+
if (@atomicLoad(bool, &global_progress.done, .monotonic)) {
474508
stderr_mutex.lock();
475509
defer stderr_mutex.unlock();
476510
return clearWrittenWithEscapeCodes() catch {};
@@ -500,7 +534,7 @@ fn windowsApiUpdateThreadRun() void {
500534

501535
{
502536
const resize_flag = wait(global_progress.initial_delay_ns);
503-
if (@atomicLoad(bool, &global_progress.done, .seq_cst)) return;
537+
if (@atomicLoad(bool, &global_progress.done, .monotonic)) return;
504538
maybeUpdateSize(resize_flag);
505539

506540
const buffer, const nl_n = computeRedraw(&serialized_buffer);
@@ -516,7 +550,7 @@ fn windowsApiUpdateThreadRun() void {
516550
while (true) {
517551
const resize_flag = wait(global_progress.refresh_rate_ns);
518552

519-
if (@atomicLoad(bool, &global_progress.done, .seq_cst)) {
553+
if (@atomicLoad(bool, &global_progress.done, .monotonic)) {
520554
stderr_mutex.lock();
521555
defer stderr_mutex.unlock();
522556
return clearWrittenWindowsApi() catch {};
@@ -558,7 +592,7 @@ fn ipcThreadRun(fd: posix.fd_t) anyerror!void {
558592
{
559593
_ = wait(global_progress.initial_delay_ns);
560594

561-
if (@atomicLoad(bool, &global_progress.done, .seq_cst))
595+
if (@atomicLoad(bool, &global_progress.done, .monotonic))
562596
return;
563597

564598
const serialized = serialize(&serialized_buffer);
@@ -570,7 +604,7 @@ fn ipcThreadRun(fd: posix.fd_t) anyerror!void {
570604
while (true) {
571605
_ = wait(global_progress.refresh_rate_ns);
572606

573-
if (@atomicLoad(bool, &global_progress.done, .seq_cst))
607+
if (@atomicLoad(bool, &global_progress.done, .monotonic))
574608
return;
575609

576610
const serialized = serialize(&serialized_buffer);
@@ -765,37 +799,39 @@ fn serialize(serialized_buffer: *Serialized.Buffer) Serialized {
765799
var any_ipc = false;
766800

767801
// Iterate all of the nodes and construct a serializable copy of the state that can be examined
768-
// without atomics.
769-
const end_index = @atomicLoad(u32, &global_progress.node_end_index, .monotonic);
802+
// without atomics. The `@min` call is here because `node_end_index` might briefly exceed the
803+
// node count sometimes.
804+
const end_index = @min(@atomicLoad(u32, &global_progress.node_end_index, .monotonic), global_progress.node_storage.len);
770805
for (
771806
global_progress.node_parents[0..end_index],
772807
global_progress.node_storage[0..end_index],
773808
serialized_buffer.map[0..end_index],
774809
) |*parent_ptr, *storage_ptr, *map| {
775-
var begin_parent = @atomicLoad(Node.Parent, parent_ptr, .acquire);
776-
while (begin_parent != .unused) {
777-
const dest_storage = &serialized_buffer.storage[serialized_len];
778-
copyAtomicLoad(&dest_storage.name, &storage_ptr.name);
779-
dest_storage.estimated_total_count = @atomicLoad(u32, &storage_ptr.estimated_total_count, .acquire);
780-
dest_storage.completed_count = @atomicLoad(u32, &storage_ptr.completed_count, .monotonic);
781-
const end_parent = @atomicLoad(Node.Parent, parent_ptr, .acquire);
782-
if (begin_parent == end_parent) {
783-
any_ipc = any_ipc or (dest_storage.getIpcFd() != null);
784-
serialized_buffer.parents[serialized_len] = begin_parent;
785-
map.* = @enumFromInt(serialized_len);
786-
serialized_len += 1;
787-
break;
788-
}
789-
790-
begin_parent = end_parent;
791-
} else {
792-
// A node may be freed during the execution of this loop, causing
793-
// there to be a parent reference to a nonexistent node. Without
794-
// this assignment, this would lead to the map entry containing
795-
// stale data. By assigning none, the child node with the bad
796-
// parent pointer will be harmlessly omitted from the tree.
810+
const parent = @atomicLoad(Node.Parent, parent_ptr, .monotonic);
811+
if (parent == .unused) {
812+
// We might read "mixed" node data in this loop, due to weird atomic things
813+
// or just a node actually being freed while this loop runs. That could cause
814+
// there to be a parent reference to a nonexistent node. Without this assignment,
815+
// this would lead to the map entry containing stale data. By assigning none, the
816+
// child node with the bad parent pointer will be harmlessly omitted from the tree.
817+
//
818+
// Note that there's no concern of potentially creating "looping" data if we read
819+
// "mixed" node data like this, because if a node is (directly or indirectly) its own
820+
// parent, it will just not be printed at all. The general idea here is that performance
821+
// is more important than 100% correct output every frame, given that this API is likely
822+
// to be used in hot paths!
797823
map.* = .none;
824+
continue;
798825
}
826+
const dest_storage = &serialized_buffer.storage[serialized_len];
827+
copyAtomicLoad(&dest_storage.name, &storage_ptr.name);
828+
dest_storage.estimated_total_count = @atomicLoad(u32, &storage_ptr.estimated_total_count, .acquire); // sychronizes with release in `setIpcFd`
829+
dest_storage.completed_count = @atomicLoad(u32, &storage_ptr.completed_count, .monotonic);
830+
831+
any_ipc = any_ipc or (dest_storage.getIpcFd() != null);
832+
serialized_buffer.parents[serialized_len] = parent;
833+
map.* = @enumFromInt(serialized_len);
834+
serialized_len += 1;
799835
}
800836

801837
// Remap parents to point inside serialized arrays.

0 commit comments

Comments
 (0)