62
62
//! You can refer to test cases in weak_memory/extra_cpp.rs and weak_memory/extra_cpp_unsafe.rs for examples of these operations.
63
63
64
64
// Our and the author's own implementation (tsan11) of the paper have some deviations from the provided operational semantics in §5.3:
65
- // 1. In the operational semantics, store elements keep a copy of the atomic object's vector clock (AtomicCellClocks::sync_vector in miri),
66
- // but this is not used anywhere so it's omitted here.
65
+ // 1. In the operational semantics, loads acquire the vector clock of the atomic location
66
+ // irrespective of which store buffer element is loaded. That's incorrect; the synchronization clock
67
+ // needs to be tracked per-store-buffer-element. (The paper has a field "clocks" for that purpose,
68
+ // but it is not actuallt used.) tsan11 does this correctly
69
+ // (https://github.com/ChrisLidbury/tsan11/blob/ecbd6b81e9b9454e01cba78eb9d88684168132c7/lib/tsan/rtl/tsan_relaxed.cc#L305).
67
70
//
68
71
// 2. In the operational semantics, each store element keeps the timestamp of a thread when it loads from the store.
69
72
// If the same thread loads from the same store element multiple times, then the timestamps at all loads are saved in a list of load elements.
@@ -138,16 +141,17 @@ enum LoadRecency {
138
141
139
142
#[ derive( Debug , Clone , PartialEq , Eq ) ]
140
143
struct StoreElement {
141
- /// The identifier of the vector index, corresponding to a thread
142
- /// that performed the store.
143
- store_index : VectorIdx ,
144
+ /// The thread that performed the store.
145
+ store_thread : VectorIdx ,
146
+ /// The timestamp of the storing thread when it performed the store
147
+ store_timestamp : VTimestamp ,
148
+
149
+ /// The vector clock that can be acquired by loading this store.
150
+ sync_clock : VClock ,
144
151
145
152
/// Whether this store is SC.
146
153
is_seqcst : bool ,
147
154
148
- /// The timestamp of the storing thread when it performed the store
149
- timestamp : VTimestamp ,
150
-
151
155
/// The value of this store. `None` means uninitialized.
152
156
// FIXME: Currently, we cannot represent partial initialization.
153
157
val : Option < Scalar > ,
@@ -159,9 +163,12 @@ struct StoreElement {
159
163
160
164
#[ derive( Debug , Clone , PartialEq , Eq , Default ) ]
161
165
struct LoadInfo {
162
- /// Timestamp of first loads from this store element by each thread
166
+ /// Timestamp of first loads from this store element by each thread.
163
167
timestamps : FxHashMap < VectorIdx , VTimestamp > ,
164
- /// Whether this store element has been read by an SC load
168
+ /// Whether this store element has been read by an SC load.
169
+ /// This is crucial to ensure we respect coherence-ordered-before. Concretely we use
170
+ /// this to ensure that if a store element is seen by an SC load, then all later SC loads
171
+ /// cannot see `mo`-earlier store elements.
165
172
sc_loaded : bool ,
166
173
}
167
174
@@ -243,8 +250,10 @@ impl<'tcx> StoreBuffer {
243
250
let store_elem = StoreElement {
244
251
// The thread index and timestamp of the initialisation write
245
252
// are never meaningfully used, so it's fine to leave them as 0
246
- store_index : VectorIdx :: from ( 0 ) ,
247
- timestamp : VTimestamp :: ZERO ,
253
+ store_thread : VectorIdx :: from ( 0 ) ,
254
+ store_timestamp : VTimestamp :: ZERO ,
255
+ // The initialization write is non-atomic so nothing can be acquired.
256
+ sync_clock : VClock :: default ( ) ,
248
257
val : init,
249
258
is_seqcst : false ,
250
259
load_info : RefCell :: new ( LoadInfo :: default ( ) ) ,
@@ -273,7 +282,7 @@ impl<'tcx> StoreBuffer {
273
282
thread_mgr : & ThreadManager < ' _ > ,
274
283
is_seqcst : bool ,
275
284
rng : & mut ( impl rand:: Rng + ?Sized ) ,
276
- validate : impl FnOnce ( ) -> InterpResult < ' tcx > ,
285
+ validate : impl FnOnce ( Option < & VClock > ) -> InterpResult < ' tcx > ,
277
286
) -> InterpResult < ' tcx , ( Option < Scalar > , LoadRecency ) > {
278
287
// Having a live borrow to store_buffer while calling validate_atomic_load is fine
279
288
// because the race detector doesn't touch store_buffer
@@ -290,7 +299,7 @@ impl<'tcx> StoreBuffer {
290
299
// after we've picked a store element from the store buffer, as presented
291
300
// in ATOMIC LOAD rule of the paper. This is because fetch_store
292
301
// requires access to ThreadClockSet.clock, which is updated by the race detector
293
- validate ( ) ?;
302
+ validate ( Some ( & store_elem . sync_clock ) ) ?;
294
303
295
304
let ( index, clocks) = global. active_thread_state ( thread_mgr) ;
296
305
let loaded = store_elem. load_impl ( index, & clocks, is_seqcst) ;
@@ -303,10 +312,11 @@ impl<'tcx> StoreBuffer {
303
312
global : & DataRaceState ,
304
313
thread_mgr : & ThreadManager < ' _ > ,
305
314
is_seqcst : bool ,
315
+ sync_clock : VClock ,
306
316
) -> InterpResult < ' tcx > {
307
317
let ( index, clocks) = global. active_thread_state ( thread_mgr) ;
308
318
309
- self . store_impl ( val, index, & clocks. clock , is_seqcst) ;
319
+ self . store_impl ( val, index, & clocks. clock , is_seqcst, sync_clock ) ;
310
320
interp_ok ( ( ) )
311
321
}
312
322
@@ -333,7 +343,9 @@ impl<'tcx> StoreBuffer {
333
343
return false ;
334
344
}
335
345
336
- keep_searching = if store_elem. timestamp <= clocks. clock [ store_elem. store_index ] {
346
+ keep_searching = if store_elem. store_timestamp
347
+ <= clocks. clock [ store_elem. store_thread ]
348
+ {
337
349
// CoWR: if a store happens-before the current load,
338
350
// then we can't read-from anything earlier in modification order.
339
351
// C++20 §6.9.2.2 [intro.races] paragraph 18
@@ -345,15 +357,15 @@ impl<'tcx> StoreBuffer {
345
357
// then we cannot read-from anything earlier in modification order.
346
358
// C++20 §6.9.2.2 [intro.races] paragraph 16
347
359
false
348
- } else if store_elem. timestamp <= clocks. write_seqcst [ store_elem. store_index ]
360
+ } else if store_elem. store_timestamp <= clocks. write_seqcst [ store_elem. store_thread ]
349
361
&& store_elem. is_seqcst
350
362
{
351
363
// The current non-SC load, which may be sequenced-after an SC fence,
352
364
// cannot read-before the last SC store executed before the fence.
353
365
// C++17 §32.4 [atomics.order] paragraph 4
354
366
false
355
367
} else if is_seqcst
356
- && store_elem. timestamp <= clocks. read_seqcst [ store_elem. store_index ]
368
+ && store_elem. store_timestamp <= clocks. read_seqcst [ store_elem. store_thread ]
357
369
{
358
370
// The current SC load cannot read-before the last store sequenced-before
359
371
// the last SC fence.
@@ -391,17 +403,19 @@ impl<'tcx> StoreBuffer {
391
403
}
392
404
}
393
405
394
- /// ATOMIC STORE IMPL in the paper (except we don't need the location's vector clock)
406
+ /// ATOMIC STORE IMPL in the paper
395
407
fn store_impl (
396
408
& mut self ,
397
409
val : Scalar ,
398
410
index : VectorIdx ,
399
411
thread_clock : & VClock ,
400
412
is_seqcst : bool ,
413
+ sync_clock : VClock ,
401
414
) {
402
415
let store_elem = StoreElement {
403
- store_index : index,
404
- timestamp : thread_clock[ index] ,
416
+ store_thread : index,
417
+ store_timestamp : thread_clock[ index] ,
418
+ sync_clock,
405
419
// In the language provided in the paper, an atomic store takes the value from a
406
420
// non-atomic memory location.
407
421
// But we already have the immediate value here so we don't need to do the memory
@@ -419,7 +433,7 @@ impl<'tcx> StoreBuffer {
419
433
// so that in a later SC load, only the last SC store (i.e. this one) or stores that
420
434
// aren't ordered by hb with the last SC is picked.
421
435
self . buffer . iter_mut ( ) . rev ( ) . for_each ( |elem| {
422
- if elem. timestamp <= thread_clock[ elem. store_index ] {
436
+ if elem. store_timestamp <= thread_clock[ elem. store_thread ] {
423
437
elem. is_seqcst = true ;
424
438
}
425
439
} )
@@ -462,7 +476,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
462
476
let ( alloc_id, base_offset, ..) = this. ptr_get_alloc_id ( place. ptr ( ) , 0 ) ?;
463
477
if let (
464
478
crate :: AllocExtra {
465
- data_race : AllocDataRaceHandler :: Vclocks ( _ , Some ( alloc_buffers) ) ,
479
+ data_race : AllocDataRaceHandler :: Vclocks ( data_race_clocks , Some ( alloc_buffers) ) ,
466
480
..
467
481
} ,
468
482
crate :: MiriMachine {
@@ -475,19 +489,29 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
475
489
global. sc_write ( threads) ;
476
490
}
477
491
let range = alloc_range ( base_offset, place. layout . size ) ;
492
+ let sync_clock = data_race_clocks. sync_clock ( range) ;
478
493
let buffer = alloc_buffers. get_or_create_store_buffer_mut ( range, Some ( init) ) ?;
494
+ // The RMW always reads from the most recent store.
479
495
buffer. read_from_last_store ( global, threads, atomic == AtomicRwOrd :: SeqCst ) ;
480
- buffer. buffered_write ( new_val, global, threads, atomic == AtomicRwOrd :: SeqCst ) ?;
496
+ buffer. buffered_write (
497
+ new_val,
498
+ global,
499
+ threads,
500
+ atomic == AtomicRwOrd :: SeqCst ,
501
+ sync_clock,
502
+ ) ?;
481
503
}
482
504
interp_ok ( ( ) )
483
505
}
484
506
507
+ /// The argument to `validate` is the synchronization clock of the memory that is being read,
508
+ /// if we are reading from a store buffer element.
485
509
fn buffered_atomic_read (
486
510
& self ,
487
511
place : & MPlaceTy < ' tcx > ,
488
512
atomic : AtomicReadOrd ,
489
513
latest_in_mo : Scalar ,
490
- validate : impl FnOnce ( ) -> InterpResult < ' tcx > ,
514
+ validate : impl FnOnce ( Option < & VClock > ) -> InterpResult < ' tcx > ,
491
515
) -> InterpResult < ' tcx , Option < Scalar > > {
492
516
let this = self . eval_context_ref ( ) ;
493
517
' fallback: {
@@ -525,14 +549,16 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
525
549
}
526
550
527
551
// Race detector or weak memory disabled, simply read the latest value
528
- validate ( ) ?;
552
+ validate ( None ) ?;
529
553
interp_ok ( Some ( latest_in_mo) )
530
554
}
531
555
532
556
/// Add the given write to the store buffer. (Does not change machine memory.)
533
557
///
534
558
/// `init` says with which value to initialize the store buffer in case there wasn't a store
535
559
/// buffer for this memory range before.
560
+ ///
561
+ /// Must be called *after* `validate_atomic_store` to ensure that `sync_clock` is up-to-date.
536
562
fn buffered_atomic_write (
537
563
& mut self ,
538
564
val : Scalar ,
@@ -544,7 +570,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
544
570
let ( alloc_id, base_offset, ..) = this. ptr_get_alloc_id ( dest. ptr ( ) , 0 ) ?;
545
571
if let (
546
572
crate :: AllocExtra {
547
- data_race : AllocDataRaceHandler :: Vclocks ( _ , Some ( alloc_buffers) ) ,
573
+ data_race : AllocDataRaceHandler :: Vclocks ( data_race_clocks , Some ( alloc_buffers) ) ,
548
574
..
549
575
} ,
550
576
crate :: MiriMachine {
@@ -556,9 +582,18 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
556
582
global. sc_write ( threads) ;
557
583
}
558
584
559
- let buffer = alloc_buffers
560
- . get_or_create_store_buffer_mut ( alloc_range ( base_offset, dest. layout . size ) , init) ?;
561
- buffer. buffered_write ( val, global, threads, atomic == AtomicWriteOrd :: SeqCst ) ?;
585
+ let range = alloc_range ( base_offset, dest. layout . size ) ;
586
+ // It's a bit annoying that we have to go back to the data race part to get the clock...
587
+ // but it does make things a lot simpler.
588
+ let sync_clock = data_race_clocks. sync_clock ( range) ;
589
+ let buffer = alloc_buffers. get_or_create_store_buffer_mut ( range, init) ?;
590
+ buffer. buffered_write (
591
+ val,
592
+ global,
593
+ threads,
594
+ atomic == AtomicWriteOrd :: SeqCst ,
595
+ sync_clock,
596
+ ) ?;
562
597
}
563
598
564
599
// Caller should've written to dest with the vanilla scalar write, we do nothing here
0 commit comments