62
62
//! You can refer to test cases in weak_memory/extra_cpp.rs and weak_memory/extra_cpp_unsafe.rs for examples of these operations.
63
63
64
64
// Our and the author's own implementation (tsan11) of the paper have some deviations from the provided operational semantics in §5.3:
65
- // 1. In the operational semantics, store elements keep a copy of the atomic object's vector clock (AtomicCellClocks::sync_vector in miri),
66
- // but this is not used anywhere so it's omitted here.
65
+ // 1. In the operational semantics, loads acquire the vector clock of the atomic location
66
+ // irrespective of which store buffer element is loaded. That's incorrect; the synchronization clock
67
+ // needs to be tracked per-store-buffer-element. (The paper has a field "clocks" for that purpose,
68
+ // but it is not actuallt used.) tsan11 does this correctly
69
+ // (https://github.com/ChrisLidbury/tsan11/blob/ecbd6b81e9b9454e01cba78eb9d88684168132c7/lib/tsan/rtl/tsan_relaxed.cc#L305).
67
70
//
68
71
// 2. In the operational semantics, each store element keeps the timestamp of a thread when it loads from the store.
69
72
// If the same thread loads from the same store element multiple times, then the timestamps at all loads are saved in a list of load elements.
@@ -138,16 +141,17 @@ enum LoadRecency {
138
141
139
142
#[ derive( Debug , Clone , PartialEq , Eq ) ]
140
143
struct StoreElement {
141
- /// The identifier of the vector index, corresponding to a thread
142
- /// that performed the store.
143
- store_index : VectorIdx ,
144
+ /// The thread that performed the store.
145
+ store_thread : VectorIdx ,
146
+ /// The timestamp of the storing thread when it performed the store
147
+ store_timestamp : VTimestamp ,
148
+
149
+ /// The vector clock that can be acquired by loading this store.
150
+ sync_clock : VClock ,
144
151
145
152
/// Whether this store is SC.
146
153
is_seqcst : bool ,
147
154
148
- /// The timestamp of the storing thread when it performed the store
149
- timestamp : VTimestamp ,
150
-
151
155
/// The value of this store. `None` means uninitialized.
152
156
// FIXME: Currently, we cannot represent partial initialization.
153
157
val : Option < Scalar > ,
@@ -246,8 +250,10 @@ impl<'tcx> StoreBuffer {
246
250
let store_elem = StoreElement {
247
251
// The thread index and timestamp of the initialisation write
248
252
// are never meaningfully used, so it's fine to leave them as 0
249
- store_index : VectorIdx :: from ( 0 ) ,
250
- timestamp : VTimestamp :: ZERO ,
253
+ store_thread : VectorIdx :: from ( 0 ) ,
254
+ store_timestamp : VTimestamp :: ZERO ,
255
+ // The initialization write is non-atomic so nothing can be acquired.
256
+ sync_clock : VClock :: default ( ) ,
251
257
val : init,
252
258
is_seqcst : false ,
253
259
load_info : RefCell :: new ( LoadInfo :: default ( ) ) ,
@@ -276,7 +282,7 @@ impl<'tcx> StoreBuffer {
276
282
thread_mgr : & ThreadManager < ' _ > ,
277
283
is_seqcst : bool ,
278
284
rng : & mut ( impl rand:: Rng + ?Sized ) ,
279
- validate : impl FnOnce ( ) -> InterpResult < ' tcx > ,
285
+ validate : impl FnOnce ( Option < & VClock > ) -> InterpResult < ' tcx > ,
280
286
) -> InterpResult < ' tcx , ( Option < Scalar > , LoadRecency ) > {
281
287
// Having a live borrow to store_buffer while calling validate_atomic_load is fine
282
288
// because the race detector doesn't touch store_buffer
@@ -293,7 +299,7 @@ impl<'tcx> StoreBuffer {
293
299
// after we've picked a store element from the store buffer, as presented
294
300
// in ATOMIC LOAD rule of the paper. This is because fetch_store
295
301
// requires access to ThreadClockSet.clock, which is updated by the race detector
296
- validate ( ) ?;
302
+ validate ( Some ( & store_elem . sync_clock ) ) ?;
297
303
298
304
let ( index, clocks) = global. active_thread_state ( thread_mgr) ;
299
305
let loaded = store_elem. load_impl ( index, & clocks, is_seqcst) ;
@@ -306,10 +312,11 @@ impl<'tcx> StoreBuffer {
306
312
global : & DataRaceState ,
307
313
thread_mgr : & ThreadManager < ' _ > ,
308
314
is_seqcst : bool ,
315
+ sync_clock : VClock ,
309
316
) -> InterpResult < ' tcx > {
310
317
let ( index, clocks) = global. active_thread_state ( thread_mgr) ;
311
318
312
- self . store_impl ( val, index, & clocks. clock , is_seqcst) ;
319
+ self . store_impl ( val, index, & clocks. clock , is_seqcst, sync_clock ) ;
313
320
interp_ok ( ( ) )
314
321
}
315
322
@@ -336,7 +343,9 @@ impl<'tcx> StoreBuffer {
336
343
return false ;
337
344
}
338
345
339
- keep_searching = if store_elem. timestamp <= clocks. clock [ store_elem. store_index ] {
346
+ keep_searching = if store_elem. store_timestamp
347
+ <= clocks. clock [ store_elem. store_thread ]
348
+ {
340
349
// CoWR: if a store happens-before the current load,
341
350
// then we can't read-from anything earlier in modification order.
342
351
// C++20 §6.9.2.2 [intro.races] paragraph 18
@@ -348,15 +357,15 @@ impl<'tcx> StoreBuffer {
348
357
// then we cannot read-from anything earlier in modification order.
349
358
// C++20 §6.9.2.2 [intro.races] paragraph 16
350
359
false
351
- } else if store_elem. timestamp <= clocks. write_seqcst [ store_elem. store_index ]
360
+ } else if store_elem. store_timestamp <= clocks. write_seqcst [ store_elem. store_thread ]
352
361
&& store_elem. is_seqcst
353
362
{
354
363
// The current non-SC load, which may be sequenced-after an SC fence,
355
364
// cannot read-before the last SC store executed before the fence.
356
365
// C++17 §32.4 [atomics.order] paragraph 4
357
366
false
358
367
} else if is_seqcst
359
- && store_elem. timestamp <= clocks. read_seqcst [ store_elem. store_index ]
368
+ && store_elem. store_timestamp <= clocks. read_seqcst [ store_elem. store_thread ]
360
369
{
361
370
// The current SC load cannot read-before the last store sequenced-before
362
371
// the last SC fence.
@@ -394,17 +403,19 @@ impl<'tcx> StoreBuffer {
394
403
}
395
404
}
396
405
397
- /// ATOMIC STORE IMPL in the paper (except we don't need the location's vector clock)
406
+ /// ATOMIC STORE IMPL in the paper
398
407
fn store_impl (
399
408
& mut self ,
400
409
val : Scalar ,
401
410
index : VectorIdx ,
402
411
thread_clock : & VClock ,
403
412
is_seqcst : bool ,
413
+ sync_clock : VClock ,
404
414
) {
405
415
let store_elem = StoreElement {
406
- store_index : index,
407
- timestamp : thread_clock[ index] ,
416
+ store_thread : index,
417
+ store_timestamp : thread_clock[ index] ,
418
+ sync_clock,
408
419
// In the language provided in the paper, an atomic store takes the value from a
409
420
// non-atomic memory location.
410
421
// But we already have the immediate value here so we don't need to do the memory
@@ -422,7 +433,7 @@ impl<'tcx> StoreBuffer {
422
433
// so that in a later SC load, only the last SC store (i.e. this one) or stores that
423
434
// aren't ordered by hb with the last SC is picked.
424
435
self . buffer . iter_mut ( ) . rev ( ) . for_each ( |elem| {
425
- if elem. timestamp <= thread_clock[ elem. store_index ] {
436
+ if elem. store_timestamp <= thread_clock[ elem. store_thread ] {
426
437
elem. is_seqcst = true ;
427
438
}
428
439
} )
@@ -465,7 +476,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
465
476
let ( alloc_id, base_offset, ..) = this. ptr_get_alloc_id ( place. ptr ( ) , 0 ) ?;
466
477
if let (
467
478
crate :: AllocExtra {
468
- data_race : AllocDataRaceHandler :: Vclocks ( _ , Some ( alloc_buffers) ) ,
479
+ data_race : AllocDataRaceHandler :: Vclocks ( data_race_clocks , Some ( alloc_buffers) ) ,
469
480
..
470
481
} ,
471
482
crate :: MiriMachine {
@@ -478,20 +489,29 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
478
489
global. sc_write ( threads) ;
479
490
}
480
491
let range = alloc_range ( base_offset, place. layout . size ) ;
492
+ let sync_clock = data_race_clocks. sync_clock ( range) ;
481
493
let buffer = alloc_buffers. get_or_create_store_buffer_mut ( range, Some ( init) ) ?;
482
494
// The RMW always reads from the most recent store.
483
495
buffer. read_from_last_store ( global, threads, atomic == AtomicRwOrd :: SeqCst ) ;
484
- buffer. buffered_write ( new_val, global, threads, atomic == AtomicRwOrd :: SeqCst ) ?;
496
+ buffer. buffered_write (
497
+ new_val,
498
+ global,
499
+ threads,
500
+ atomic == AtomicRwOrd :: SeqCst ,
501
+ sync_clock,
502
+ ) ?;
485
503
}
486
504
interp_ok ( ( ) )
487
505
}
488
506
507
+ /// The argument to `validate` is the synchronization clock of the memory that is being read,
508
+ /// if we are reading from a store buffer element.
489
509
fn buffered_atomic_read (
490
510
& self ,
491
511
place : & MPlaceTy < ' tcx > ,
492
512
atomic : AtomicReadOrd ,
493
513
latest_in_mo : Scalar ,
494
- validate : impl FnOnce ( ) -> InterpResult < ' tcx > ,
514
+ validate : impl FnOnce ( Option < & VClock > ) -> InterpResult < ' tcx > ,
495
515
) -> InterpResult < ' tcx , Option < Scalar > > {
496
516
let this = self . eval_context_ref ( ) ;
497
517
' fallback: {
@@ -529,14 +549,16 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
529
549
}
530
550
531
551
// Race detector or weak memory disabled, simply read the latest value
532
- validate ( ) ?;
552
+ validate ( None ) ?;
533
553
interp_ok ( Some ( latest_in_mo) )
534
554
}
535
555
536
556
/// Add the given write to the store buffer. (Does not change machine memory.)
537
557
///
538
558
/// `init` says with which value to initialize the store buffer in case there wasn't a store
539
559
/// buffer for this memory range before.
560
+ ///
561
+ /// Must be called *after* `validate_atomic_store` to ensure that `sync_clock` is up-to-date.
540
562
fn buffered_atomic_write (
541
563
& mut self ,
542
564
val : Scalar ,
@@ -548,7 +570,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
548
570
let ( alloc_id, base_offset, ..) = this. ptr_get_alloc_id ( dest. ptr ( ) , 0 ) ?;
549
571
if let (
550
572
crate :: AllocExtra {
551
- data_race : AllocDataRaceHandler :: Vclocks ( _ , Some ( alloc_buffers) ) ,
573
+ data_race : AllocDataRaceHandler :: Vclocks ( data_race_clocks , Some ( alloc_buffers) ) ,
552
574
..
553
575
} ,
554
576
crate :: MiriMachine {
@@ -560,9 +582,18 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
560
582
global. sc_write ( threads) ;
561
583
}
562
584
563
- let buffer = alloc_buffers
564
- . get_or_create_store_buffer_mut ( alloc_range ( base_offset, dest. layout . size ) , init) ?;
565
- buffer. buffered_write ( val, global, threads, atomic == AtomicWriteOrd :: SeqCst ) ?;
585
+ let range = alloc_range ( base_offset, dest. layout . size ) ;
586
+ // It's a bit annoying that we have to go back to the data race part to get the clock...
587
+ // but it does make things a lot simpler.
588
+ let sync_clock = data_race_clocks. sync_clock ( range) ;
589
+ let buffer = alloc_buffers. get_or_create_store_buffer_mut ( range, init) ?;
590
+ buffer. buffered_write (
591
+ val,
592
+ global,
593
+ threads,
594
+ atomic == AtomicWriteOrd :: SeqCst ,
595
+ sync_clock,
596
+ ) ?;
566
597
}
567
598
568
599
// Caller should've written to dest with the vanilla scalar write, we do nothing here
0 commit comments