Skip to content

Commit f04fc4d

Browse files
committed
Auto merge of #129778 - RalfJung:interp-lossy-typed-copy, r=saethlin
interpret: make typed copies lossy wrt provenance and padding A "typed copy" in Rust can be a lossy process: when copying at type `usize` (or any other non-pointer type), if the original memory had any provenance, that provenance is lost. When copying at pointer type, if the original memory had partial provenance (i.e., not the same provenance for all bytes), that provenance is lost. When copying any type with padding, the contents of padding are lost. This PR equips our validity-checking pass with the ability to reset provenance and padding according to those rules. Can be reviewed commit-by-commit. The first three commits are just preparation without any functional change. Fixes #845 Fixes #2182
2 parents 9a46223 + ed3e1c4 commit f04fc4d

33 files changed

+503
-12
lines changed

src/concurrency/data_race.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -637,7 +637,7 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
637637
// The program didn't actually do a read, so suppress the memory access hooks.
638638
// This is also a very special exception where we just ignore an error -- if this read
639639
// was UB e.g. because the memory is uninitialized, we don't want to know!
640-
let old_val = this.run_for_validation(|| this.read_scalar(dest)).ok();
640+
let old_val = this.run_for_validation(|this| this.read_scalar(dest)).ok();
641641
this.allow_data_races_mut(move |this| this.write_scalar(val, dest))?;
642642
this.validate_atomic_store(dest, atomic)?;
643643
this.buffered_atomic_write(val, dest, atomic, old_val)

src/helpers.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -869,7 +869,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
869869
/// Dereference a pointer operand to a place using `layout` instead of the pointer's declared type
870870
fn deref_pointer_as(
871871
&self,
872-
op: &impl Readable<'tcx, Provenance>,
872+
op: &impl Projectable<'tcx, Provenance>,
873873
layout: TyAndLayout<'tcx>,
874874
) -> InterpResult<'tcx, MPlaceTy<'tcx>> {
875875
let this = self.eval_context_ref();
@@ -880,7 +880,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
880880
/// Calculates the MPlaceTy given the offset and layout of an access on an operand
881881
fn deref_pointer_and_offset(
882882
&self,
883-
op: &impl Readable<'tcx, Provenance>,
883+
op: &impl Projectable<'tcx, Provenance>,
884884
offset: u64,
885885
base_layout: TyAndLayout<'tcx>,
886886
value_layout: TyAndLayout<'tcx>,
@@ -897,7 +897,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
897897

898898
fn deref_pointer_and_read(
899899
&self,
900-
op: &impl Readable<'tcx, Provenance>,
900+
op: &impl Projectable<'tcx, Provenance>,
901901
offset: u64,
902902
base_layout: TyAndLayout<'tcx>,
903903
value_layout: TyAndLayout<'tcx>,
@@ -909,7 +909,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
909909

910910
fn deref_pointer_and_write(
911911
&mut self,
912-
op: &impl Readable<'tcx, Provenance>,
912+
op: &impl Projectable<'tcx, Provenance>,
913913
offset: u64,
914914
value: impl Into<Scalar>,
915915
base_layout: TyAndLayout<'tcx>,

src/intrinsics/mod.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
152152
// ```
153153
// Would not be considered UB, or the other way around (`is_val_statically_known(0)`).
154154
"is_val_statically_known" => {
155-
let [arg] = check_arg_count(args)?;
156-
this.validate_operand(arg, /*recursive*/ false)?;
155+
let [_arg] = check_arg_count(args)?;
156+
// FIXME: should we check for validity here? It's tricky because we do not have a
157+
// place. Codegen does not seem to set any attributes like `noundef` for intrinsic
158+
// calls, so we don't *have* to do anything.
157159
let branch: bool = this.machine.rng.get_mut().gen();
158160
this.write_scalar(Scalar::from_bool(branch), dest)?;
159161
}

src/machine.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,9 @@ pub struct MiriMachine<'tcx> {
572572
/// Invariant: the promised alignment will never be less than the native alignment of the
573573
/// allocation.
574574
pub(crate) symbolic_alignment: RefCell<FxHashMap<AllocId, (Size, Align)>>,
575+
576+
/// A cache of "data range" computations for unions (i.e., the offsets of non-padding bytes).
577+
union_data_ranges: FxHashMap<Ty<'tcx>, RangeSet>,
575578
}
576579

577580
impl<'tcx> MiriMachine<'tcx> {
@@ -714,6 +717,7 @@ impl<'tcx> MiriMachine<'tcx> {
714717
allocation_spans: RefCell::new(FxHashMap::default()),
715718
const_cache: RefCell::new(FxHashMap::default()),
716719
symbolic_alignment: RefCell::new(FxHashMap::default()),
720+
union_data_ranges: FxHashMap::default(),
717721
}
718722
}
719723

@@ -826,6 +830,7 @@ impl VisitProvenance for MiriMachine<'_> {
826830
allocation_spans: _,
827831
const_cache: _,
828832
symbolic_alignment: _,
833+
union_data_ranges: _,
829834
} = self;
830835

831836
threads.visit_provenance(visit);
@@ -1627,4 +1632,12 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
16271632
ecx.machine.rng.borrow_mut().gen::<usize>() % ADDRS_PER_ANON_GLOBAL
16281633
}
16291634
}
1635+
1636+
fn cached_union_data_range<'e>(
1637+
ecx: &'e mut InterpCx<'tcx, Self>,
1638+
ty: Ty<'tcx>,
1639+
compute_range: impl FnOnce() -> RangeSet,
1640+
) -> Cow<'e, RangeSet> {
1641+
Cow::Borrowed(ecx.machine.union_data_ranges.entry(ty).or_insert_with(compute_range))
1642+
}
16301643
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
use std::mem;
2+
3+
// Doing a copy at integer type should lose provenance.
4+
// This tests the unoptimized base case.
5+
fn main() {
6+
let ptrs = [(&42, true)];
7+
let ints: [(usize, bool); 1] = unsafe { mem::transmute(ptrs) };
8+
let ptr = (&raw const ints[0].0).cast::<&i32>();
9+
let _val = unsafe { *ptr.read() }; //~ERROR: dangling
10+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
error: Undefined Behavior: constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
2+
--> $DIR/int_copy_looses_provenance0.rs:LL:CC
3+
|
4+
LL | let _val = unsafe { *ptr.read() };
5+
| ^^^^^^^^^^ constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
6+
|
7+
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
8+
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
9+
= note: BACKTRACE:
10+
= note: inside `main` at $DIR/int_copy_looses_provenance0.rs:LL:CC
11+
12+
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
13+
14+
error: aborting due to 1 previous error
15+
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
use std::mem;
2+
3+
// Doing a copy at integer type should lose provenance.
4+
// This tests the optimized-array case of integer copies.
5+
fn main() {
6+
let ptrs = [&42];
7+
let ints: [usize; 1] = unsafe { mem::transmute(ptrs) };
8+
let ptr = (&raw const ints[0]).cast::<&i32>();
9+
let _val = unsafe { *ptr.read() }; //~ERROR: dangling
10+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
error: Undefined Behavior: constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
2+
--> $DIR/int_copy_looses_provenance1.rs:LL:CC
3+
|
4+
LL | let _val = unsafe { *ptr.read() };
5+
| ^^^^^^^^^^ constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
6+
|
7+
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
8+
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
9+
= note: BACKTRACE:
10+
= note: inside `main` at $DIR/int_copy_looses_provenance1.rs:LL:CC
11+
12+
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
13+
14+
error: aborting due to 1 previous error
15+
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
use std::mem;
2+
3+
// Doing a copy at integer type should lose provenance.
4+
// This tests the case where provenacne is hiding in the metadata of a pointer.
5+
fn main() {
6+
let ptrs = [(&42, &42)];
7+
// Typed copy at wide pointer type (with integer-typed metadata).
8+
let ints: [*const [usize]; 1] = unsafe { mem::transmute(ptrs) };
9+
// Get a pointer to the metadata field.
10+
let ptr = (&raw const ints[0]).wrapping_byte_add(mem::size_of::<*const ()>()).cast::<&i32>();
11+
let _val = unsafe { *ptr.read() }; //~ERROR: dangling
12+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
error: Undefined Behavior: constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
2+
--> $DIR/int_copy_looses_provenance2.rs:LL:CC
3+
|
4+
LL | let _val = unsafe { *ptr.read() };
5+
| ^^^^^^^^^^ constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
6+
|
7+
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
8+
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
9+
= note: BACKTRACE:
10+
= note: inside `main` at $DIR/int_copy_looses_provenance2.rs:LL:CC
11+
12+
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
13+
14+
error: aborting due to 1 previous error
15+

0 commit comments

Comments
 (0)