@@ -39,10 +39,20 @@ draw_buffer: []u8,
3939/// CPU cache.
4040node_parents : []Node.Parent ,
4141node_storage : []Node.Storage ,
42- node_freelist : []Node.OptionalIndex ,
43- node_freelist_first : Node.OptionalIndex ,
42+ node_freelist_next : []Node.OptionalIndex ,
43+ node_freelist : Freelist ,
44+ /// This is the number of elements in node arrays which have been used so far. Nodes before this
45+ /// index are either active, or on the freelist. The remaining nodes are implicitly free. This
46+ /// value may at times temporarily exceed the node count.
4447node_end_index : u32 ,
4548
49+ const Freelist = packed struct (u32 ) {
50+ head : Node.OptionalIndex ,
51+ /// Whenever `node_freelist` is added to, this generation is incremented
52+ /// to avoid ABA bugs when acquiring nodes. Wrapping arithmetic is used.
53+ generation : u24 ,
54+ };
55+
4656pub const TerminalMode = union (enum ) {
4757 off ,
4858 ansi_escape_codes ,
@@ -112,7 +122,7 @@ pub const Node = struct {
112122 // causes `completed_count` to be treated as a file descriptor, so
113123 // the order here matters.
114124 @atomicStore (u32 , & s .completed_count , integer , .monotonic );
115- @atomicStore (u32 , & s .estimated_total_count , std .math .maxInt (u32 ), .release );
125+ @atomicStore (u32 , & s .estimated_total_count , std .math .maxInt (u32 ), .release ); // synchronizes with acquire in `serialize`
116126 }
117127
118128 /// Not thread-safe.
@@ -184,12 +194,24 @@ pub const Node = struct {
184194 const node_index = node .index .unwrap () orelse return Node .none ;
185195 const parent = node_index .toParent ();
186196
187- const freelist_head = & global_progress .node_freelist_first ;
188- var opt_free_index = @atomicLoad (Node .OptionalIndex , freelist_head , .seq_cst );
189- while (opt_free_index .unwrap ()) | free_index | {
190- const freelist_ptr = freelistByIndex (free_index );
191- const next = @atomicLoad (Node .OptionalIndex , freelist_ptr , .seq_cst );
192- opt_free_index = @cmpxchgWeak (Node .OptionalIndex , freelist_head , opt_free_index , next , .seq_cst , .seq_cst ) orelse {
197+ const freelist = & global_progress .node_freelist ;
198+ var old_freelist = @atomicLoad (Freelist , freelist , .acquire ); // acquire to ensure we have the correct "next" entry
199+ while (old_freelist .head .unwrap ()) | free_index | {
200+ const next_ptr = freelistNextByIndex (free_index );
201+ const new_freelist : Freelist = .{
202+ .head = @atomicLoad (Node .OptionalIndex , next_ptr , .monotonic ),
203+ // We don't need to increment the generation when removing nodes from the free list,
204+ // only when adding them. (This choice is arbitrary; the opposite would also work.)
205+ .generation = old_freelist .generation ,
206+ };
207+ old_freelist = @cmpxchgWeak (
208+ Freelist ,
209+ freelist ,
210+ old_freelist ,
211+ new_freelist ,
212+ .acquire , // not theoretically necessary, but not allowed to be weaker than the failure order
213+ .acquire , // ensure we have the correct `node_freelist_next` entry on the next iteration
214+ ) orelse {
193215 // We won the allocation race.
194216 return init (free_index , parent , name , estimated_total_items );
195217 };
@@ -243,18 +265,28 @@ pub const Node = struct {
243265 }
244266 const index = n .index .unwrap () orelse return ;
245267 const parent_ptr = parentByIndex (index );
246- if (parent_ptr .unwrap ()) | parent_index | {
268+ if (@atomicLoad ( Node . Parent , parent_ptr , .monotonic ) .unwrap ()) | parent_index | {
247269 _ = @atomicRmw (u32 , & storageByIndex (parent_index ).completed_count , .Add , 1 , .monotonic );
248- @atomicStore (Node .Parent , parent_ptr , .unused , .seq_cst );
270+ @atomicStore (Node .Parent , parent_ptr , .unused , .monotonic );
249271
250- const freelist_head = & global_progress .node_freelist_first ;
251- var first = @atomicLoad (Node . OptionalIndex , freelist_head , .seq_cst );
272+ const freelist = & global_progress .node_freelist ;
273+ var old_freelist = @atomicLoad (Freelist , freelist , .monotonic );
252274 while (true ) {
253- @atomicStore (Node .OptionalIndex , freelistByIndex (index ), first , .seq_cst );
254- first = @cmpxchgWeak (Node .OptionalIndex , freelist_head , first , index .toOptional (), .seq_cst , .seq_cst ) orelse break ;
275+ @atomicStore (Node .OptionalIndex , freelistNextByIndex (index ), old_freelist .head , .monotonic );
276+ old_freelist = @cmpxchgWeak (
277+ Freelist ,
278+ freelist ,
279+ old_freelist ,
280+ .{ .head = index .toOptional (), .generation = old_freelist .generation +% 1 },
281+ .release , // ensure a matching `start` sees the freelist link written above
282+ .monotonic , // our write above is irrelevant if we need to retry
283+ ) orelse {
284+ // We won the race.
285+ return ;
286+ };
255287 }
256288 } else {
257- @atomicStore (bool , & global_progress .done , true , .seq_cst );
289+ @atomicStore (bool , & global_progress .done , true , .monotonic );
258290 global_progress .redraw_event .set ();
259291 if (global_progress .update_thread ) | thread | thread .join ();
260292 }
@@ -291,8 +323,8 @@ pub const Node = struct {
291323 return & global_progress .node_parents [@intFromEnum (index )];
292324 }
293325
294- fn freelistByIndex (index : Node.Index ) * Node.OptionalIndex {
295- return & global_progress .node_freelist [@intFromEnum (index )];
326+ fn freelistNextByIndex (index : Node.Index ) * Node.OptionalIndex {
327+ return & global_progress .node_freelist_next [@intFromEnum (index )];
296328 }
297329
298330 fn init (free_index : Index , parent : Parent , name : []const u8 , estimated_total_items : usize ) Node {
@@ -307,8 +339,10 @@ pub const Node = struct {
307339 @atomicStore (u8 , & storage .name [name_len ], 0 , .monotonic );
308340
309341 const parent_ptr = parentByIndex (free_index );
310- assert (parent_ptr .* == .unused );
311- @atomicStore (Node .Parent , parent_ptr , parent , .release );
342+ if (std .debug .runtime_safety ) {
343+ assert (@atomicLoad (Node .Parent , parent_ptr , .monotonic ) == .unused );
344+ }
345+ @atomicStore (Node .Parent , parent_ptr , parent , .monotonic );
312346
313347 return .{ .index = free_index .toOptional () };
314348 }
@@ -329,15 +363,15 @@ var global_progress: Progress = .{
329363
330364 .node_parents = & node_parents_buffer ,
331365 .node_storage = & node_storage_buffer ,
332- .node_freelist = & node_freelist_buffer ,
333- .node_freelist_first = .none ,
366+ .node_freelist_next = & node_freelist_next_buffer ,
367+ .node_freelist = .{ . head = . none, . generation = 0 } ,
334368 .node_end_index = 0 ,
335369};
336370
337371const node_storage_buffer_len = 83 ;
338372var node_parents_buffer : [node_storage_buffer_len ]Node.Parent = undefined ;
339373var node_storage_buffer : [node_storage_buffer_len ]Node.Storage = undefined ;
340- var node_freelist_buffer : [node_storage_buffer_len ]Node.OptionalIndex = undefined ;
374+ var node_freelist_next_buffer : [node_storage_buffer_len ]Node.OptionalIndex = undefined ;
341375
342376var default_draw_buffer : [4096 ]u8 = undefined ;
343377
@@ -456,7 +490,7 @@ fn updateThreadRun() void {
456490
457491 {
458492 const resize_flag = wait (global_progress .initial_delay_ns );
459- if (@atomicLoad (bool , & global_progress .done , .seq_cst )) return ;
493+ if (@atomicLoad (bool , & global_progress .done , .monotonic )) return ;
460494 maybeUpdateSize (resize_flag );
461495
462496 const buffer , _ = computeRedraw (& serialized_buffer );
@@ -470,7 +504,7 @@ fn updateThreadRun() void {
470504 while (true ) {
471505 const resize_flag = wait (global_progress .refresh_rate_ns );
472506
473- if (@atomicLoad (bool , & global_progress .done , .seq_cst )) {
507+ if (@atomicLoad (bool , & global_progress .done , .monotonic )) {
474508 stderr_mutex .lock ();
475509 defer stderr_mutex .unlock ();
476510 return clearWrittenWithEscapeCodes () catch {};
@@ -500,7 +534,7 @@ fn windowsApiUpdateThreadRun() void {
500534
501535 {
502536 const resize_flag = wait (global_progress .initial_delay_ns );
503- if (@atomicLoad (bool , & global_progress .done , .seq_cst )) return ;
537+ if (@atomicLoad (bool , & global_progress .done , .monotonic )) return ;
504538 maybeUpdateSize (resize_flag );
505539
506540 const buffer , const nl_n = computeRedraw (& serialized_buffer );
@@ -516,7 +550,7 @@ fn windowsApiUpdateThreadRun() void {
516550 while (true ) {
517551 const resize_flag = wait (global_progress .refresh_rate_ns );
518552
519- if (@atomicLoad (bool , & global_progress .done , .seq_cst )) {
553+ if (@atomicLoad (bool , & global_progress .done , .monotonic )) {
520554 stderr_mutex .lock ();
521555 defer stderr_mutex .unlock ();
522556 return clearWrittenWindowsApi () catch {};
@@ -558,7 +592,7 @@ fn ipcThreadRun(fd: posix.fd_t) anyerror!void {
558592 {
559593 _ = wait (global_progress .initial_delay_ns );
560594
561- if (@atomicLoad (bool , & global_progress .done , .seq_cst ))
595+ if (@atomicLoad (bool , & global_progress .done , .monotonic ))
562596 return ;
563597
564598 const serialized = serialize (& serialized_buffer );
@@ -570,7 +604,7 @@ fn ipcThreadRun(fd: posix.fd_t) anyerror!void {
570604 while (true ) {
571605 _ = wait (global_progress .refresh_rate_ns );
572606
573- if (@atomicLoad (bool , & global_progress .done , .seq_cst ))
607+ if (@atomicLoad (bool , & global_progress .done , .monotonic ))
574608 return ;
575609
576610 const serialized = serialize (& serialized_buffer );
@@ -765,37 +799,39 @@ fn serialize(serialized_buffer: *Serialized.Buffer) Serialized {
765799 var any_ipc = false ;
766800
767801 // Iterate all of the nodes and construct a serializable copy of the state that can be examined
768- // without atomics.
769- const end_index = @atomicLoad (u32 , & global_progress .node_end_index , .monotonic );
802+ // without atomics. The `@min` call is here because `node_end_index` might briefly exceed the
803+ // node count sometimes.
804+ const end_index = @min (@atomicLoad (u32 , & global_progress .node_end_index , .monotonic ), global_progress .node_storage .len );
770805 for (
771806 global_progress .node_parents [0.. end_index ],
772807 global_progress .node_storage [0.. end_index ],
773808 serialized_buffer .map [0.. end_index ],
774809 ) | * parent_ptr , * storage_ptr , * map | {
775- var begin_parent = @atomicLoad (Node .Parent , parent_ptr , .acquire );
776- while (begin_parent != .unused ) {
777- const dest_storage = & serialized_buffer .storage [serialized_len ];
778- copyAtomicLoad (& dest_storage .name , & storage_ptr .name );
779- dest_storage .estimated_total_count = @atomicLoad (u32 , & storage_ptr .estimated_total_count , .acquire );
780- dest_storage .completed_count = @atomicLoad (u32 , & storage_ptr .completed_count , .monotonic );
781- const end_parent = @atomicLoad (Node .Parent , parent_ptr , .acquire );
782- if (begin_parent == end_parent ) {
783- any_ipc = any_ipc or (dest_storage .getIpcFd () != null );
784- serialized_buffer .parents [serialized_len ] = begin_parent ;
785- map .* = @enumFromInt (serialized_len );
786- serialized_len += 1 ;
787- break ;
788- }
789-
790- begin_parent = end_parent ;
791- } else {
792- // A node may be freed during the execution of this loop, causing
793- // there to be a parent reference to a nonexistent node. Without
794- // this assignment, this would lead to the map entry containing
795- // stale data. By assigning none, the child node with the bad
796- // parent pointer will be harmlessly omitted from the tree.
810+ const parent = @atomicLoad (Node .Parent , parent_ptr , .monotonic );
811+ if (parent == .unused ) {
812+ // We might read "mixed" node data in this loop, due to weird atomic things
813+ // or just a node actually being freed while this loop runs. That could cause
814+ // there to be a parent reference to a nonexistent node. Without this assignment,
815+ // this would lead to the map entry containing stale data. By assigning none, the
816+ // child node with the bad parent pointer will be harmlessly omitted from the tree.
817+ //
818+ // Note that there's no concern of potentially creating "looping" data if we read
819+ // "mixed" node data like this, because if a node is (directly or indirectly) its own
820+ // parent, it will just not be printed at all. The general idea here is that performance
821+ // is more important than 100% correct output every frame, given that this API is likely
822+ // to be used in hot paths!
797823 map .* = .none ;
824+ continue ;
798825 }
826+ const dest_storage = & serialized_buffer .storage [serialized_len ];
827+ copyAtomicLoad (& dest_storage .name , & storage_ptr .name );
828+ dest_storage .estimated_total_count = @atomicLoad (u32 , & storage_ptr .estimated_total_count , .acquire ); // sychronizes with release in `setIpcFd`
829+ dest_storage .completed_count = @atomicLoad (u32 , & storage_ptr .completed_count , .monotonic );
830+
831+ any_ipc = any_ipc or (dest_storage .getIpcFd () != null );
832+ serialized_buffer .parents [serialized_len ] = parent ;
833+ map .* = @enumFromInt (serialized_len );
834+ serialized_len += 1 ;
799835 }
800836
801837 // Remap parents to point inside serialized arrays.
0 commit comments