Skip to content

Commit 724c99e

Browse files
committed
weak_memory: fix sync clock handling when loading from old store elements
1 parent e063dee commit 724c99e

File tree

3 files changed

+121
-50
lines changed

3 files changed

+121
-50
lines changed

src/concurrency/data_race.rs

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,9 @@ struct AtomicMemoryCellClocks {
183183
/// contains the vector of timestamps that will
184184
/// happen-before a thread if an acquire-load is
185185
/// performed on the data.
186+
///
187+
/// With weak memory emulation, this is the clock of the most recent write. It is then only used
188+
/// for release sequences, to integrate the most recent clock into the next one for RMWs.
186189
sync_vector: VClock,
187190

188191
/// The size of accesses to this atomic location.
@@ -504,10 +507,11 @@ impl MemoryCellClocks {
504507
thread_clocks: &mut ThreadClockSet,
505508
index: VectorIdx,
506509
access_size: Size,
510+
sync_clock: Option<&VClock>,
507511
) -> Result<(), DataRace> {
508512
self.atomic_read_detect(thread_clocks, index, access_size)?;
509-
if let Some(atomic) = self.atomic() {
510-
thread_clocks.clock.join(&atomic.sync_vector);
513+
if let Some(sync_clock) = sync_clock.or_else(|| self.atomic().map(|a| &a.sync_vector)) {
514+
thread_clocks.clock.join(sync_clock);
511515
}
512516
Ok(())
513517
}
@@ -520,10 +524,11 @@ impl MemoryCellClocks {
520524
thread_clocks: &mut ThreadClockSet,
521525
index: VectorIdx,
522526
access_size: Size,
527+
sync_clock: Option<&VClock>,
523528
) -> Result<(), DataRace> {
524529
self.atomic_read_detect(thread_clocks, index, access_size)?;
525-
if let Some(atomic) = self.atomic() {
526-
thread_clocks.fence_acquire.join(&atomic.sync_vector);
530+
if let Some(sync_clock) = sync_clock.or_else(|| self.atomic().map(|a| &a.sync_vector)) {
531+
thread_clocks.fence_acquire.join(sync_clock);
527532
}
528533
Ok(())
529534
}
@@ -736,8 +741,8 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
736741
}
737742

738743
let scalar = this.allow_data_races_ref(move |this| this.read_scalar(place))?;
739-
let buffered_scalar = this.buffered_atomic_read(place, atomic, scalar, || {
740-
this.validate_atomic_load(place, atomic)
744+
let buffered_scalar = this.buffered_atomic_read(place, atomic, scalar, |sync_clock| {
745+
this.validate_atomic_load(place, atomic, sync_clock)
741746
})?;
742747
interp_ok(buffered_scalar.ok_or_else(|| err_ub!(InvalidUninitBytes(None)))?)
743748
}
@@ -935,7 +940,7 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
935940
this.validate_atomic_rmw(place, success)?;
936941
this.buffered_atomic_rmw(new, place, success, old.to_scalar())?;
937942
} else {
938-
this.validate_atomic_load(place, fail)?;
943+
this.validate_atomic_load(place, fail, /* can use latest sync clock */ None)?;
939944
// A failed compare exchange is equivalent to a load, reading from the latest store
940945
// in the modification order.
941946
// Since `old` is only a value and not the store element, we need to separately
@@ -1177,6 +1182,18 @@ impl VClockAlloc {
11771182
}))?
11781183
}
11791184

1185+
/// Return the release/acquire synchronization clock for the given memory range.
1186+
pub(super) fn sync_clock(&self, access_range: AllocRange) -> VClock {
1187+
let alloc_ranges = self.alloc_ranges.borrow();
1188+
let mut clock = VClock::default();
1189+
for (_, mem_clocks) in alloc_ranges.iter(access_range.start, access_range.size) {
1190+
if let Some(atomic) = mem_clocks.atomic() {
1191+
clock.join(&atomic.sync_vector);
1192+
}
1193+
}
1194+
clock
1195+
}
1196+
11801197
/// Detect data-races for an unsynchronized read operation. It will not perform
11811198
/// data-race detection if `race_detecting()` is false, either due to no threads
11821199
/// being created or if it is temporarily disabled during a racy read or write
@@ -1453,6 +1470,7 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
14531470
&self,
14541471
place: &MPlaceTy<'tcx>,
14551472
atomic: AtomicReadOrd,
1473+
sync_clock: Option<&VClock>,
14561474
) -> InterpResult<'tcx> {
14571475
let this = self.eval_context_ref();
14581476
this.validate_atomic_op(
@@ -1461,9 +1479,9 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
14611479
AccessType::AtomicLoad,
14621480
move |memory, clocks, index, atomic| {
14631481
if atomic == AtomicReadOrd::Relaxed {
1464-
memory.load_relaxed(&mut *clocks, index, place.layout.size)
1482+
memory.load_relaxed(&mut *clocks, index, place.layout.size, sync_clock)
14651483
} else {
1466-
memory.load_acquire(&mut *clocks, index, place.layout.size)
1484+
memory.load_acquire(&mut *clocks, index, place.layout.size, sync_clock)
14671485
}
14681486
},
14691487
)
@@ -1508,9 +1526,9 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
15081526
AccessType::AtomicRmw,
15091527
move |memory, clocks, index, _| {
15101528
if acquire {
1511-
memory.load_acquire(clocks, index, place.layout.size)?;
1529+
memory.load_acquire(clocks, index, place.layout.size, None)?;
15121530
} else {
1513-
memory.load_relaxed(clocks, index, place.layout.size)?;
1531+
memory.load_relaxed(clocks, index, place.layout.size, None)?;
15141532
}
15151533
if release {
15161534
memory.rmw_release(clocks, index, place.layout.size)

src/concurrency/weak_memory.rs

Lines changed: 59 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,11 @@
6262
//! You can refer to test cases in weak_memory/extra_cpp.rs and weak_memory/extra_cpp_unsafe.rs for examples of these operations.
6363
6464
// Our and the author's own implementation (tsan11) of the paper have some deviations from the provided operational semantics in §5.3:
65-
// 1. In the operational semantics, store elements keep a copy of the atomic object's vector clock (AtomicCellClocks::sync_vector in miri),
66-
// but this is not used anywhere so it's omitted here.
65+
// 1. In the operational semantics, loads acquire the vector clock of the atomic location
66+
// irrespective of which store buffer element is loaded. That's incorrect; the synchronization clock
67+
// needs to be tracked per-store-buffer-element. (The paper has a field "clocks" for that purpose,
68+
// but it is not actuallt used.) tsan11 does this correctly
69+
// (https://github.com/ChrisLidbury/tsan11/blob/ecbd6b81e9b9454e01cba78eb9d88684168132c7/lib/tsan/rtl/tsan_relaxed.cc#L305).
6770
//
6871
// 2. In the operational semantics, each store element keeps the timestamp of a thread when it loads from the store.
6972
// If the same thread loads from the same store element multiple times, then the timestamps at all loads are saved in a list of load elements.
@@ -138,16 +141,17 @@ enum LoadRecency {
138141

139142
#[derive(Debug, Clone, PartialEq, Eq)]
140143
struct StoreElement {
141-
/// The identifier of the vector index, corresponding to a thread
142-
/// that performed the store.
143-
store_index: VectorIdx,
144+
/// The thread that performed the store.
145+
store_thread: VectorIdx,
146+
/// The timestamp of the storing thread when it performed the store
147+
store_timestamp: VTimestamp,
148+
149+
/// The vector clock that can be acquired by loading this store.
150+
sync_clock: VClock,
144151

145152
/// Whether this store is SC.
146153
is_seqcst: bool,
147154

148-
/// The timestamp of the storing thread when it performed the store
149-
timestamp: VTimestamp,
150-
151155
/// The value of this store. `None` means uninitialized.
152156
// FIXME: Currently, we cannot represent partial initialization.
153157
val: Option<Scalar>,
@@ -246,8 +250,10 @@ impl<'tcx> StoreBuffer {
246250
let store_elem = StoreElement {
247251
// The thread index and timestamp of the initialisation write
248252
// are never meaningfully used, so it's fine to leave them as 0
249-
store_index: VectorIdx::from(0),
250-
timestamp: VTimestamp::ZERO,
253+
store_thread: VectorIdx::from(0),
254+
store_timestamp: VTimestamp::ZERO,
255+
// The initialization write is non-atomic so nothing can be acquired.
256+
sync_clock: VClock::default(),
251257
val: init,
252258
is_seqcst: false,
253259
load_info: RefCell::new(LoadInfo::default()),
@@ -276,7 +282,7 @@ impl<'tcx> StoreBuffer {
276282
thread_mgr: &ThreadManager<'_>,
277283
is_seqcst: bool,
278284
rng: &mut (impl rand::Rng + ?Sized),
279-
validate: impl FnOnce() -> InterpResult<'tcx>,
285+
validate: impl FnOnce(Option<&VClock>) -> InterpResult<'tcx>,
280286
) -> InterpResult<'tcx, (Option<Scalar>, LoadRecency)> {
281287
// Having a live borrow to store_buffer while calling validate_atomic_load is fine
282288
// because the race detector doesn't touch store_buffer
@@ -293,7 +299,7 @@ impl<'tcx> StoreBuffer {
293299
// after we've picked a store element from the store buffer, as presented
294300
// in ATOMIC LOAD rule of the paper. This is because fetch_store
295301
// requires access to ThreadClockSet.clock, which is updated by the race detector
296-
validate()?;
302+
validate(Some(&store_elem.sync_clock))?;
297303

298304
let (index, clocks) = global.active_thread_state(thread_mgr);
299305
let loaded = store_elem.load_impl(index, &clocks, is_seqcst);
@@ -306,10 +312,11 @@ impl<'tcx> StoreBuffer {
306312
global: &DataRaceState,
307313
thread_mgr: &ThreadManager<'_>,
308314
is_seqcst: bool,
315+
sync_clock: VClock,
309316
) -> InterpResult<'tcx> {
310317
let (index, clocks) = global.active_thread_state(thread_mgr);
311318

312-
self.store_impl(val, index, &clocks.clock, is_seqcst);
319+
self.store_impl(val, index, &clocks.clock, is_seqcst, sync_clock);
313320
interp_ok(())
314321
}
315322

@@ -336,7 +343,9 @@ impl<'tcx> StoreBuffer {
336343
return false;
337344
}
338345

339-
keep_searching = if store_elem.timestamp <= clocks.clock[store_elem.store_index] {
346+
keep_searching = if store_elem.store_timestamp
347+
<= clocks.clock[store_elem.store_thread]
348+
{
340349
// CoWR: if a store happens-before the current load,
341350
// then we can't read-from anything earlier in modification order.
342351
// C++20 §6.9.2.2 [intro.races] paragraph 18
@@ -348,15 +357,15 @@ impl<'tcx> StoreBuffer {
348357
// then we cannot read-from anything earlier in modification order.
349358
// C++20 §6.9.2.2 [intro.races] paragraph 16
350359
false
351-
} else if store_elem.timestamp <= clocks.write_seqcst[store_elem.store_index]
360+
} else if store_elem.store_timestamp <= clocks.write_seqcst[store_elem.store_thread]
352361
&& store_elem.is_seqcst
353362
{
354363
// The current non-SC load, which may be sequenced-after an SC fence,
355364
// cannot read-before the last SC store executed before the fence.
356365
// C++17 §32.4 [atomics.order] paragraph 4
357366
false
358367
} else if is_seqcst
359-
&& store_elem.timestamp <= clocks.read_seqcst[store_elem.store_index]
368+
&& store_elem.store_timestamp <= clocks.read_seqcst[store_elem.store_thread]
360369
{
361370
// The current SC load cannot read-before the last store sequenced-before
362371
// the last SC fence.
@@ -394,17 +403,19 @@ impl<'tcx> StoreBuffer {
394403
}
395404
}
396405

397-
/// ATOMIC STORE IMPL in the paper (except we don't need the location's vector clock)
406+
/// ATOMIC STORE IMPL in the paper
398407
fn store_impl(
399408
&mut self,
400409
val: Scalar,
401410
index: VectorIdx,
402411
thread_clock: &VClock,
403412
is_seqcst: bool,
413+
sync_clock: VClock,
404414
) {
405415
let store_elem = StoreElement {
406-
store_index: index,
407-
timestamp: thread_clock[index],
416+
store_thread: index,
417+
store_timestamp: thread_clock[index],
418+
sync_clock,
408419
// In the language provided in the paper, an atomic store takes the value from a
409420
// non-atomic memory location.
410421
// But we already have the immediate value here so we don't need to do the memory
@@ -422,7 +433,7 @@ impl<'tcx> StoreBuffer {
422433
// so that in a later SC load, only the last SC store (i.e. this one) or stores that
423434
// aren't ordered by hb with the last SC is picked.
424435
self.buffer.iter_mut().rev().for_each(|elem| {
425-
if elem.timestamp <= thread_clock[elem.store_index] {
436+
if elem.store_timestamp <= thread_clock[elem.store_thread] {
426437
elem.is_seqcst = true;
427438
}
428439
})
@@ -465,7 +476,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
465476
let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr(), 0)?;
466477
if let (
467478
crate::AllocExtra {
468-
data_race: AllocDataRaceHandler::Vclocks(_, Some(alloc_buffers)),
479+
data_race: AllocDataRaceHandler::Vclocks(data_race_clocks, Some(alloc_buffers)),
469480
..
470481
},
471482
crate::MiriMachine {
@@ -478,20 +489,29 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
478489
global.sc_write(threads);
479490
}
480491
let range = alloc_range(base_offset, place.layout.size);
492+
let sync_clock = data_race_clocks.sync_clock(range);
481493
let buffer = alloc_buffers.get_or_create_store_buffer_mut(range, Some(init))?;
482494
// The RMW always reads from the most recent store.
483495
buffer.read_from_last_store(global, threads, atomic == AtomicRwOrd::SeqCst);
484-
buffer.buffered_write(new_val, global, threads, atomic == AtomicRwOrd::SeqCst)?;
496+
buffer.buffered_write(
497+
new_val,
498+
global,
499+
threads,
500+
atomic == AtomicRwOrd::SeqCst,
501+
sync_clock,
502+
)?;
485503
}
486504
interp_ok(())
487505
}
488506

507+
/// The argument to `validate` is the synchronization clock of the memory that is being read,
508+
/// if we are reading from a store buffer element.
489509
fn buffered_atomic_read(
490510
&self,
491511
place: &MPlaceTy<'tcx>,
492512
atomic: AtomicReadOrd,
493513
latest_in_mo: Scalar,
494-
validate: impl FnOnce() -> InterpResult<'tcx>,
514+
validate: impl FnOnce(Option<&VClock>) -> InterpResult<'tcx>,
495515
) -> InterpResult<'tcx, Option<Scalar>> {
496516
let this = self.eval_context_ref();
497517
'fallback: {
@@ -529,14 +549,16 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
529549
}
530550

531551
// Race detector or weak memory disabled, simply read the latest value
532-
validate()?;
552+
validate(None)?;
533553
interp_ok(Some(latest_in_mo))
534554
}
535555

536556
/// Add the given write to the store buffer. (Does not change machine memory.)
537557
///
538558
/// `init` says with which value to initialize the store buffer in case there wasn't a store
539559
/// buffer for this memory range before.
560+
///
561+
/// Must be called *after* `validate_atomic_store` to ensure that `sync_clock` is up-to-date.
540562
fn buffered_atomic_write(
541563
&mut self,
542564
val: Scalar,
@@ -548,7 +570,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
548570
let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(dest.ptr(), 0)?;
549571
if let (
550572
crate::AllocExtra {
551-
data_race: AllocDataRaceHandler::Vclocks(_, Some(alloc_buffers)),
573+
data_race: AllocDataRaceHandler::Vclocks(data_race_clocks, Some(alloc_buffers)),
552574
..
553575
},
554576
crate::MiriMachine {
@@ -560,9 +582,18 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
560582
global.sc_write(threads);
561583
}
562584

563-
let buffer = alloc_buffers
564-
.get_or_create_store_buffer_mut(alloc_range(base_offset, dest.layout.size), init)?;
565-
buffer.buffered_write(val, global, threads, atomic == AtomicWriteOrd::SeqCst)?;
585+
let range = alloc_range(base_offset, dest.layout.size);
586+
// It's a bit annoying that we have to go back to the data race part to get the clock...
587+
// but it does make things a lot simpler.
588+
let sync_clock = data_race_clocks.sync_clock(range);
589+
let buffer = alloc_buffers.get_or_create_store_buffer_mut(range, init)?;
590+
buffer.buffered_write(
591+
val,
592+
global,
593+
threads,
594+
atomic == AtomicWriteOrd::SeqCst,
595+
sync_clock,
596+
)?;
566597
}
567598

568599
// Caller should've written to dest with the vanilla scalar write, we do nothing here

0 commit comments

Comments
 (0)