diff --git a/Cargo.lock b/Cargo.lock index c723495711..b15eae1ccc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -215,6 +215,17 @@ dependencies = [ "serde", ] +[[package]] +name = "c2rust-analyze" +version = "0.1.0" +dependencies = [ + "bitflags", + "polonius-engine", + "print_bytes", + "rustc-hash", + "rustc-private-link", +] + [[package]] name = "c2rust-asm-casts" version = "0.2.0" @@ -705,6 +716,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "datafrog" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0afaad2b26fa326569eb264b1363e8ae3357618c43982b3f285f0774ce76b69" + [[package]] name = "digest" version = "0.8.1" @@ -1513,6 +1530,17 @@ version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae" +[[package]] +name = "polonius-engine" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4e8e505342045d397d0b6674dcb82d6faf5cf40484d30eeb88fc82ef14e903f" +dependencies = [ + "datafrog", + "log", + "rustc-hash", +] + [[package]] name = "prettyplease" version = "0.1.11" diff --git a/Cargo.toml b/Cargo.toml index bfaa5a58cf..55f79663fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,7 @@ [workspace] members = [ "c2rust", + "c2rust-analyze", "c2rust-transpile", "c2rust-ast-builder", "c2rust-ast-exporter", diff --git a/c2rust-analyze/.gitignore b/c2rust-analyze/.gitignore new file mode 100644 index 0000000000..c0dc9bcc98 --- /dev/null +++ b/c2rust-analyze/.gitignore @@ -0,0 +1,2 @@ +/inspect/ +*.rlib diff --git a/c2rust-analyze/Cargo.toml b/c2rust-analyze/Cargo.toml new file mode 100644 index 0000000000..767dbe42a4 --- /dev/null +++ b/c2rust-analyze/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "c2rust-analyze" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +polonius-engine = "0.13.0" +rustc-hash = "1.1.0" +bitflags = "1.3.2" + +[build-dependencies] +rustc-private-link = { path = "../rustc-private-link" } +print_bytes = "0.6" + +[package.metadata.rust-analyzer] +rustc_private = true diff --git a/c2rust-analyze/README.md b/c2rust-analyze/README.md new file mode 100644 index 0000000000..b80e6eb058 --- /dev/null +++ b/c2rust-analyze/README.md @@ -0,0 +1,7 @@ +```sh +cargo run --bin c2rust-analyze -- tests/filecheck/insertion_sort.rs -L "$(rustc --print sysroot)/lib/rustlib/x86_64-unknown-linux-gnu/lib" --crate-type rlib +``` + +This should produce a large amount of debug output, including a table at the +end listing the type and expression rewrites the analysis has inferred for the +`insertion_sort` function. diff --git a/c2rust-analyze/build.rs b/c2rust-analyze/build.rs new file mode 100644 index 0000000000..b917afbe87 --- /dev/null +++ b/c2rust-analyze/build.rs @@ -0,0 +1,9 @@ +use rustc_private_link::SysRoot; + +fn main() { + let sysroot = SysRoot::resolve(); + sysroot.link_rustc_private(); + + print!("cargo:rustc-env=C2RUST_TARGET_LIB_DIR="); + print_bytes::println_bytes(&sysroot.rustlib()); +} diff --git a/c2rust-analyze/rename_nll_facts.py b/c2rust-analyze/rename_nll_facts.py new file mode 100644 index 0000000000..3374400087 --- /dev/null +++ b/c2rust-analyze/rename_nll_facts.py @@ -0,0 +1,139 @@ +''' +Usage: `python3 rename_nll_facts.py src ref dest` + +Renames atoms in `src/*.facts` to match the names used in `ref/*.facts`, then +writes the renamed facts to `dest/`. +''' + +import ast +from collections import defaultdict +import os +import sys + +src_dir, ref_dir, dest_dir = sys.argv[1:] + +# Map `src` loan/origin/path names to `ref` loan/origin/path names. We don't +# break this down by type because the names for each type don't collide anyway. +name_map = {} +# Set of `ref` names that appear as values in `name_map`. +ref_names_seen = set() + +def match_name(src_name, ref_name): + if src_name in name_map: + old_ref_name = name_map[src_name] + if ref_name != old_ref_name: + print('error: %r matches both %r and %r' % ( + src_name, old_ref_name, ref_name)) + return + else: + if ref_name in ref_names_seen: + print('error: %r matches %r, but %r is already used' % ( + src_name, ref_name, ref_name)) + return + name_map[src_name] = ref_name + ref_names_seen.add(ref_name) + +def match_loan(src_name, ref_name): + match_name(src_name, ref_name) + +def match_origin(src_name, ref_name): + match_name(src_name, ref_name) + +def match_path(src_name, ref_name): + match_name(src_name, ref_name) + + +def load(name): + with open(os.path.join(src_dir, name + '.facts')) as f: + src_rows = [[ast.literal_eval(s) for s in line.strip().split('\t')] + for line in f] + with open(os.path.join(ref_dir, name + '.facts')) as f: + ref_rows = [[ast.literal_eval(s) for s in line.strip().split('\t')] + for line in f] + return src_rows, ref_rows + + +# Match up paths using `path_is_var` and `path_assigned_at_base`. + +def match_path_is_var(): + src, ref = load('path_is_var') + ref_dct = {var: path for path, var in ref} + for path, var in src: + if var not in ref_dct: + continue + match_path(path, ref_dct[var]) + +match_path_is_var() + +def match_path_assigned_at_base(): + src, ref = load('path_assigned_at_base') + ref_dct = {point: path for path, point in ref} + for path, point in src: + if point not in ref_dct: + continue + match_path(path, ref_dct[point]) + +match_path_assigned_at_base() + +# Match up origins and loans using `loan_issued_at` + +def match_loan_issued_at(): + src, ref = load('loan_issued_at') + ref_dct = {point: (origin, loan) for origin, loan, point in ref} + for origin, loan, point in src: + if point not in ref_dct: + continue + match_origin(origin, ref_dct[point][0]) + match_origin(loan, ref_dct[point][1]) + +match_loan_issued_at() + +# Match up origins using `use_of_var_derefs_origin` + +def match_use_of_var_derefs_origin(): + src, ref = load('use_of_var_derefs_origin') + src_dct = defaultdict(list) + for var, origin in src: + src_dct[var].append(origin) + ref_dct = defaultdict(list) + for var, origin in ref: + ref_dct[var].append(origin) + for var in set(src_dct.keys()) & set(ref_dct.keys()): + src_origins = src_dct[var] + ref_origins = ref_dct[var] + if len(src_origins) != len(ref_origins): + print('error: var %r has %d origins in src but %d in ref' % ( + var, len(src_origins), len(ref_origins))) + continue + for src_origin, ref_origin in zip(src_origins, ref_origins): + match_origin(src_origin, ref_origin) + +match_use_of_var_derefs_origin() + + +# Rewrite `src` using the collected name mappings. + +os.makedirs(dest_dir, exist_ok=True) +for name in os.listdir(src_dir): + if name.startswith('.') or not name.endswith('.facts'): + continue + + with open(os.path.join(src_dir, name)) as src, \ + open(os.path.join(dest_dir, name), 'w') as dest: + for line in src: + src_parts = [ast.literal_eval(s) for s in line.strip().split('\t')] + dest_parts = [] + for part in src_parts: + if part.startswith('_') or part.startswith('Start') or part.startswith('Mid'): + dest_parts.append(part) + continue + + dest_part = name_map.get(part) + if dest_part is None: + print('error: no mapping for %r (used in %s: %r)' % ( + part, name, src_parts)) + dest_part = 'OLD:' + part + dest_parts.append(dest_part) + + dest.write('\t'.join('"%s"' % part for part in dest_parts) + '\n') + diff --git a/c2rust-analyze/rust-toolchain b/c2rust-analyze/rust-toolchain new file mode 120000 index 0000000000..9327ba4034 --- /dev/null +++ b/c2rust-analyze/rust-toolchain @@ -0,0 +1 @@ +../rust-toolchain \ No newline at end of file diff --git a/c2rust-analyze/src/borrowck/atoms.rs b/c2rust-analyze/src/borrowck/atoms.rs new file mode 100644 index 0000000000..971d56e751 --- /dev/null +++ b/c2rust-analyze/src/borrowck/atoms.rs @@ -0,0 +1,193 @@ +use polonius_engine::{self, Atom, FactTypes}; +use rustc_middle::mir::{BasicBlock, Local, Location, Place, PlaceElem}; +use rustc_middle::ty::TyCtxt; +use std::collections::hash_map::{Entry, HashMap}; +use std::hash::Hash; + +macro_rules! define_atom_type { + ($Atom:ident) => { + #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] + pub struct $Atom(usize); + + impl From for $Atom { + fn from(x: usize) -> $Atom { + $Atom(x) + } + } + + impl From<$Atom> for usize { + fn from(x: $Atom) -> usize { + x.0 + } + } + + impl Atom for $Atom { + fn index(self) -> usize { + self.0 + } + } + }; +} + +define_atom_type!(Origin); +define_atom_type!(Loan); +define_atom_type!(Point); +define_atom_type!(Variable); +define_atom_type!(Path); + +#[derive(Clone, Copy, Debug, Default)] +pub struct AnalysisFactTypes; +impl FactTypes for AnalysisFactTypes { + type Origin = Origin; + type Loan = Loan; + type Point = Point; + type Variable = Variable; + type Path = Path; +} + +pub type AllFacts = polonius_engine::AllFacts; +pub type Output = polonius_engine::Output; + +#[derive(Clone, Debug)] +struct AtomMap { + atom_to_thing: Vec, + thing_to_atom: HashMap, +} + +impl Default for AtomMap { + fn default() -> AtomMap { + AtomMap { + atom_to_thing: Vec::new(), + thing_to_atom: HashMap::new(), + } + } +} + +impl AtomMap { + #[allow(dead_code)] + pub fn new() -> AtomMap { + AtomMap { + atom_to_thing: Vec::new(), + thing_to_atom: HashMap::new(), + } + } + + pub fn add(&mut self, x: T) -> A { + match self.thing_to_atom.entry(x.clone()) { + Entry::Occupied(e) => *e.get(), + Entry::Vacant(e) => { + let atom = A::from(self.atom_to_thing.len()); + self.atom_to_thing.push(x); + e.insert(atom); + atom + } + } + } + + pub fn add_new(&mut self, x: T) -> (A, bool) { + match self.thing_to_atom.entry(x.clone()) { + Entry::Occupied(e) => (*e.get(), false), + Entry::Vacant(e) => { + let atom = A::from(self.atom_to_thing.len()); + self.atom_to_thing.push(x); + e.insert(atom); + (atom, true) + } + } + } + + pub fn get(&self, x: A) -> T { + self.atom_to_thing[x.into()].clone() + } +} + +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +pub enum SubPoint { + Start, + Mid, +} + +#[derive(Clone, Debug, Default)] +pub struct AtomMaps<'tcx> { + next_origin: usize, + next_loan: usize, + point: AtomMap<(BasicBlock, usize, SubPoint), Point>, + path: AtomMap<(Local, &'tcx [PlaceElem<'tcx>]), Path>, +} + +impl<'tcx> AtomMaps<'tcx> { + pub fn origin(&mut self) -> Origin { + let idx = self.next_origin; + self.next_origin += 1; + Origin(idx) + } + + pub fn loan(&mut self) -> Loan { + let idx = self.next_loan; + self.next_loan += 1; + Loan(idx) + } + + pub fn point(&mut self, bb: BasicBlock, idx: usize, sub: SubPoint) -> Point { + self.point.add((bb, idx, sub)) + } + + pub fn point_mid_location(&mut self, loc: Location) -> Point { + self.point(loc.block, loc.statement_index, SubPoint::Mid) + } + + pub fn get_point(&self, x: Point) -> (BasicBlock, usize, SubPoint) { + self.point.get(x) + } + + pub fn get_point_location(&self, x: Point) -> Location { + let (block, statement_index, _) = self.get_point(x); + Location { + block, + statement_index, + } + } + + pub fn variable(&mut self, l: Local) -> Variable { + Variable(l.as_usize()) + } + + pub fn _get_variable(&self, x: Variable) -> Local { + Local::from_usize(x.0) + } + + pub fn path(&mut self, facts: &mut AllFacts, place: Place<'tcx>) -> Path { + self.path_slice(facts, place.local, place.projection) + } + + fn path_slice( + &mut self, + facts: &mut AllFacts, + local: Local, + projection: &'tcx [PlaceElem<'tcx>], + ) -> Path { + let (path, new) = self.path.add_new((local, projection)); + if new { + if projection.len() == 0 { + let var = self.variable(local); + facts.path_is_var.push((path, var)); + } else { + let parent = self.path_slice(facts, local, &projection[..projection.len() - 1]); + // TODO: check ordering of arguments here + facts.child_path.push((parent, path)); + } + } + path + } + + pub fn _get_path(&self, tcx: TyCtxt<'tcx>, x: Path) -> Place<'tcx> { + let (local, projection) = self.path.get(x); + let projection = tcx.intern_place_elems(projection); + Place { local, projection } + } + + pub fn get_path_projection(&self, _tcx: TyCtxt<'tcx>, x: Path) -> &'tcx [PlaceElem<'tcx>] { + let (_local, projection) = self.path.get(x); + projection + } +} diff --git a/c2rust-analyze/src/borrowck/def_use.rs b/c2rust-analyze/src/borrowck/def_use.rs new file mode 100644 index 0000000000..6494ba0ded --- /dev/null +++ b/c2rust-analyze/src/borrowck/def_use.rs @@ -0,0 +1,287 @@ +use crate::borrowck::atoms::{AllFacts, AtomMaps, Loan, Path, SubPoint}; +use rustc_middle::mir::visit::{ + MutatingUseContext, NonMutatingUseContext, NonUseContext, PlaceContext, Visitor, +}; +use rustc_middle::mir::{ + Body, BorrowKind, Local, Location, Place, ProjectionElem, Statement, StatementKind, +}; +use rustc_middle::ty::{List, TyCtxt}; +use std::cmp; +use std::collections::HashMap; + +// From `rustc_borrowck/src/def_use.rs`, licensed MIT/Apache2 +#[derive(Eq, PartialEq, Clone, Debug)] +pub enum DefUse { + Def, + Use, + Drop, +} + +// From `rustc_borrowck/src/def_use.rs`, licensed MIT/Apache2 +pub fn categorize(context: PlaceContext) -> Option { + match context { + /////////////////////////////////////////////////////////////////////////// + // DEFS + + PlaceContext::MutatingUse(MutatingUseContext::Store) | + + // We let Call define the result in both the success and + // unwind cases. This is not really correct, however it + // does not seem to be observable due to the way that we + // generate MIR. To do things properly, we would apply + // the def in call only to the input from the success + // path and not the unwind path. -nmatsakis + PlaceContext::MutatingUse(MutatingUseContext::Call) | + PlaceContext::MutatingUse(MutatingUseContext::AsmOutput) | + PlaceContext::MutatingUse(MutatingUseContext::Yield) | + + // Storage live and storage dead aren't proper defines, but we can ignore + // values that come before them. + PlaceContext::NonUse(NonUseContext::StorageLive) | + PlaceContext::NonUse(NonUseContext::StorageDead) => Some(DefUse::Def), + + /////////////////////////////////////////////////////////////////////////// + // REGULAR USES + // + // These are uses that occur *outside* of a drop. For the + // purposes of NLL, these are special in that **all** the + // lifetimes appearing in the variable must be live for each regular use. + + PlaceContext::NonMutatingUse(NonMutatingUseContext::Projection) | + PlaceContext::MutatingUse(MutatingUseContext::Projection) | + + // Borrows only consider their local used at the point of the borrow. + // This won't affect the results since we use this analysis for generators + // and we only care about the result at suspension points. Borrows cannot + // cross suspension points so this behavior is unproblematic. + PlaceContext::MutatingUse(MutatingUseContext::Borrow) | + PlaceContext::NonMutatingUse(NonMutatingUseContext::SharedBorrow) | + PlaceContext::NonMutatingUse(NonMutatingUseContext::ShallowBorrow) | + PlaceContext::NonMutatingUse(NonMutatingUseContext::UniqueBorrow) | + + PlaceContext::MutatingUse(MutatingUseContext::AddressOf) | + PlaceContext::NonMutatingUse(NonMutatingUseContext::AddressOf) | + PlaceContext::NonMutatingUse(NonMutatingUseContext::Inspect) | + PlaceContext::NonMutatingUse(NonMutatingUseContext::Copy) | + PlaceContext::NonMutatingUse(NonMutatingUseContext::Move) | + PlaceContext::NonUse(NonUseContext::AscribeUserTy) | + PlaceContext::MutatingUse(MutatingUseContext::Retag) => + Some(DefUse::Use), + + /////////////////////////////////////////////////////////////////////////// + // DROP USES + // + // These are uses that occur in a DROP (a MIR drop, not a + // call to `std::mem::drop()`). For the purposes of NLL, + // uses in drop are special because `#[may_dangle]` + // attributes can affect whether lifetimes must be live. + + PlaceContext::MutatingUse(MutatingUseContext::Drop) => + Some(DefUse::Drop), + + // Debug info is neither def nor use. + PlaceContext::NonUse(NonUseContext::VarDebugInfo) => None, + } +} + +struct DefUseVisitor<'tcx, 'a> { + facts: &'a mut AllFacts, + maps: &'a mut AtomMaps<'tcx>, +} + +impl<'tcx> Visitor<'tcx> for DefUseVisitor<'tcx, '_> { + fn visit_place(&mut self, place: &Place<'tcx>, context: PlaceContext, location: Location) { + self.super_place(place, context, location); + eprintln!( + "visit place {:?} with context {:?} = {:?} at {:?}", + place, + context, + categorize(context), + location + ); + + if place.is_indirect() { + // TODO + //return; + } + + let path = self.maps.path(self.facts, *place); + let point = self.maps.point_mid_location(location); + + // TODO: figure out when exactly paths should be recorded as assigned/accessed/moved + if let PlaceContext::NonMutatingUse(NonMutatingUseContext::Move) = context { + self.facts.path_accessed_at_base.push((path, point)); + self.facts.path_moved_at_base.push((path, point)); + return; + } + + match categorize(context) { + Some(DefUse::Def) => { + self.facts.path_assigned_at_base.push((path, point)); + } + Some(DefUse::Use) => { + self.facts.path_accessed_at_base.push((path, point)); + } + Some(DefUse::Drop) => {} + None => {} + } + } + + fn visit_local(&mut self, local: &Local, context: PlaceContext, location: Location) { + eprintln!( + "visit local {:?} with context {:?} = {:?} at {:?}", + local, + context, + categorize(context), + location + ); + let var = self.maps.variable(*local); + let point = self.maps.point_mid_location(location); + match categorize(context) { + Some(DefUse::Def) => { + self.facts.var_defined_at.push((var, point)); + } + Some(DefUse::Use) => { + self.facts.var_used_at.push((var, point)); + } + Some(DefUse::Drop) => { + self.facts.var_dropped_at.push((var, point)); + } + None => {} + } + } + + fn visit_statement(&mut self, stmt: &Statement<'tcx>, location: Location) { + self.super_statement(stmt, location); + eprintln!("visit stmt {:?} at {:?}", stmt, location); + + if let StatementKind::StorageDead(local) = stmt.kind { + // Observed: `StorageDead` emits `path_moved_at_base` at the `Mid` point. + let place = Place { + local, + projection: List::empty(), + }; + let path = self.maps.path(self.facts, place); + let point = self.maps.point_mid_location(location); + self.facts.path_moved_at_base.push((path, point)); + } + } +} + +pub fn visit<'tcx>(facts: &mut AllFacts, maps: &mut AtomMaps<'tcx>, mir: &Body<'tcx>) { + let mut v = DefUseVisitor { facts, maps }; + v.visit_body(mir); +} + +struct LoanInvalidatedAtVisitor<'tcx, 'a> { + tcx: TyCtxt<'tcx>, + facts: &'a mut AllFacts, + maps: &'a mut AtomMaps<'tcx>, + loans: &'a HashMap>, +} + +impl<'tcx> LoanInvalidatedAtVisitor<'tcx, '_> { + /// Handle an access of a path overlapping `loan` in `context` at `location`. `borrow_kind` is + /// the original kind of the loan. + fn access_loan_at_location( + &mut self, + loan: Loan, + borrow_kind: BorrowKind, + context: PlaceContext, + location: Location, + ) { + eprintln!( + "access loan {:?} (kind {:?}) at location {:?} (context {:?} = {:?})", + loan, + borrow_kind, + location, + context, + categorize(context) + ); + let invalidate = match (borrow_kind, categorize(context)) { + (BorrowKind::Shared, Some(DefUse::Use)) => false, + (_, None) => false, + _ => true, + }; + if !invalidate { + return; + } + + let point = self + .maps + .point(location.block, location.statement_index, SubPoint::Start); + self.facts.loan_invalidated_at.push((point, loan)); + } +} + +impl<'tcx> Visitor<'tcx> for LoanInvalidatedAtVisitor<'tcx, '_> { + fn visit_place(&mut self, place: &Place<'tcx>, context: PlaceContext, location: Location) { + //self.super_place(place, context, location); + eprintln!( + "loan_invalidated_at: visit place {:?} with context {:?} = {:?} at {:?}", + place, + context, + categorize(context), + location + ); + + if place.is_indirect() { + // TODO + //return; + } + + let local_loans = self.loans.get(&place.local).map_or(&[] as &[_], |x| x); + for &(path, loan, borrow_kind) in local_loans { + let proj = self.maps.get_path_projection(self.tcx, path); + + // If `proj` is a prefix of `place.projection` or vice versa, then the paths overlap. + let common_len = cmp::min(proj.len(), place.projection.len()); + let overlap = proj[..common_len] + .iter() + .zip(place.projection[..common_len].iter()) + .all(|(&elem1, &elem2)| match (elem1, elem2) { + (ProjectionElem::Field(f1, _), ProjectionElem::Field(f2, _)) => f1 == f2, + (ProjectionElem::Index(_), ProjectionElem::Index(_)) => true, + // Conservatively assume that any unsupported variants overlap. + _ => true, + }); + if !overlap { + continue; + } + + self.access_loan_at_location(loan, borrow_kind, context, location); + } + } + + fn visit_local(&mut self, local: &Local, context: PlaceContext, location: Location) { + eprintln!( + "loan_invalidated_at: visit local {:?} with context {:?} = {:?} at {:?}", + local, + context, + categorize(context), + location + ); + + let local_loans = self.loans.get(&local).map_or(&[] as &[_], |x| x); + for &(_path, loan, borrow_kind) in local_loans { + // All paths rooted in this local overlap the local. + self.access_loan_at_location(loan, borrow_kind, context, location); + } + } +} + +pub fn visit_loan_invalidated_at<'tcx>( + tcx: TyCtxt<'tcx>, + facts: &mut AllFacts, + maps: &mut AtomMaps<'tcx>, + loans: &HashMap>, + mir: &Body<'tcx>, +) { + let mut v = LoanInvalidatedAtVisitor { + tcx, + facts, + maps, + loans, + }; + v.visit_body(mir) +} diff --git a/c2rust-analyze/src/borrowck/dump.rs b/c2rust-analyze/src/borrowck/dump.rs new file mode 100644 index 0000000000..be9fa03485 --- /dev/null +++ b/c2rust-analyze/src/borrowck/dump.rs @@ -0,0 +1,349 @@ +use crate::borrowck::atoms::{AllFacts, AtomMaps, Loan, Origin, Output, Path, Point, Variable}; +use rustc_hash::{FxHashMap, FxHashSet}; +/// Copied partly from rustc `compiler/rustc_borrowck/src/facts.rs`, which is dual-licensed MIT and +/// Apache 2.0. +use std::collections::{BTreeMap, BTreeSet}; +use std::error::Error; +use std::fmt::Write as _; +use std::fs::{self, File}; +use std::hash::Hash; +use std::io::{BufWriter, Write}; +use std::path; + +pub fn dump_facts_to_dir( + facts: &AllFacts, + maps: &AtomMaps, + dir: impl AsRef, +) -> Result<(), Box> { + let dir: &path::Path = dir.as_ref(); + fs::create_dir_all(dir)?; + let wr = FactWriter { maps, dir }; + macro_rules! write_facts_to_path { + ($wr:ident . write_facts_to_path($this:ident . [ + $($field:ident,)* + ])) => { + $( + $wr.write_facts_to_path( + &$this.$field, + &format!("{}.facts", stringify!($field)) + )?; + )* + } + } + write_facts_to_path! { + wr.write_facts_to_path(facts.[ + loan_issued_at, + universal_region, + cfg_edge, + loan_killed_at, + subset_base, + loan_invalidated_at, + var_used_at, + var_defined_at, + var_dropped_at, + use_of_var_derefs_origin, + drop_of_var_derefs_origin, + child_path, + path_is_var, + path_assigned_at_base, + path_moved_at_base, + path_accessed_at_base, + known_placeholder_subset, + placeholder, + ]) + } + Ok(()) +} + +pub fn dump_output_to_dir( + output: &Output, + maps: &AtomMaps, + dir: impl AsRef, +) -> Result<(), Box> { + let dir: &path::Path = dir.as_ref(); + fs::create_dir_all(dir)?; + let wr = FactWriter { maps, dir }; + macro_rules! write_output_to_path { + ($wr:ident . write_output_to_path($this:ident . [ + $($field:ident,)* + ])) => { + let Output { $(ref $field,)* } = $this; + $( + $wr.write_output_to_path( + $field, + &format!("{}.output", stringify!($field)) + )?; + )* + } + } + write_output_to_path! { + wr.write_output_to_path(output.[ + errors, + subset_errors, + move_errors, + dump_enabled, + loan_live_at, + origin_contains_loan_at, + origin_contains_loan_anywhere, + origin_live_on_entry, + loan_invalidated_at, + subset, + subset_anywhere, + var_live_on_entry, + var_drop_live_on_entry, + path_maybe_initialized_on_exit, + path_maybe_uninitialized_on_exit, + known_contains, + var_maybe_partly_initialized_on_exit, + ]) + } + Ok(()) +} + +struct FactWriter<'tcx, 'w> { + maps: &'w AtomMaps<'tcx>, + dir: &'w path::Path, +} + +impl FactWriter<'_, '_> { + fn write_facts_to_path(&self, rows: &[T], file_name: &str) -> Result<(), Box> + where + T: FactRow, + { + let file = &self.dir.join(file_name); + let mut file = BufWriter::new(File::create(file)?); + for row in rows { + row.write(&mut file, self.maps)?; + } + Ok(()) + } + + fn write_output_to_path(&self, rows: &T, file_name: &str) -> Result<(), Box> + where + T: OutputTable, + { + let file = &self.dir.join(file_name); + let mut file = BufWriter::new(File::create(file)?); + rows.write(&mut file, self.maps)?; + Ok(()) + } +} + +trait FactRow { + fn write(&self, out: &mut dyn Write, maps: &AtomMaps) -> Result<(), Box>; +} + +impl FactRow for Origin { + fn write(&self, out: &mut dyn Write, maps: &AtomMaps) -> Result<(), Box> { + write_row(out, maps, &[self]) + } +} + +impl FactRow for (A, B) +where + A: Render, + B: Render, +{ + fn write(&self, out: &mut dyn Write, maps: &AtomMaps) -> Result<(), Box> { + write_row(out, maps, &[&self.0, &self.1]) + } +} + +impl FactRow for (A, B, C) +where + A: Render, + B: Render, + C: Render, +{ + fn write(&self, out: &mut dyn Write, maps: &AtomMaps) -> Result<(), Box> { + write_row(out, maps, &[&self.0, &self.1, &self.2]) + } +} + +impl FactRow for (A, B, C, D) +where + A: Render, + B: Render, + C: Render, + D: Render, +{ + fn write(&self, out: &mut dyn Write, maps: &AtomMaps) -> Result<(), Box> { + write_row(out, maps, &[&self.0, &self.1, &self.2, &self.3]) + } +} + +fn write_row( + out: &mut dyn Write, + maps: &AtomMaps, + columns: &[&dyn Render], +) -> Result<(), Box> { + for (index, c) in columns.iter().enumerate() { + let tail = if index == columns.len() - 1 { + "\n" + } else { + "\t" + }; + write!(out, "{:?}{}", c.to_string(maps), tail)?; + } + Ok(()) +} + +trait OutputTable { + fn write(&self, out: &mut dyn Write, maps: &AtomMaps) -> Result<(), Box>; +} + +impl OutputTable for FxHashMap { + fn write(&self, out: &mut dyn Write, maps: &AtomMaps) -> Result<(), Box> { + let mut entries = self.iter().collect::>(); + entries.sort_by_key(|&(k, _)| k); + for (k, v) in entries { + writeln!(out, "{}: {}", k.to_string(maps), v.to_string(maps))?; + } + Ok(()) + } +} + +impl OutputTable for bool { + fn write(&self, out: &mut dyn Write, _maps: &AtomMaps) -> Result<(), Box> { + writeln!(out, "{}", self)?; + Ok(()) + } +} + +trait Render { + fn to_string(&self, maps: &AtomMaps) -> String; +} + +impl Render for FxHashMap { + fn to_string(&self, maps: &AtomMaps) -> String { + let mut s = String::new(); + write!(s, "{{").unwrap(); + let mut first = true; + for (k, v) in self { + if !first { + write!(s, ",").unwrap(); + } + first = false; + write!(s, " {}: {}", k.to_string(maps), v.to_string(maps)).unwrap(); + } + if !first { + write!(s, " ").unwrap(); + } + write!(s, "}}").unwrap(); + s + } +} + +impl Render for FxHashSet { + fn to_string(&self, maps: &AtomMaps) -> String { + let mut s = String::new(); + write!(s, "{{").unwrap(); + let mut first = true; + for x in self { + if !first { + write!(s, ",").unwrap(); + } + first = false; + write!(s, " {}", x.to_string(maps)).unwrap(); + } + if !first { + write!(s, " ").unwrap(); + } + write!(s, "}}").unwrap(); + s + } +} + +impl Render for BTreeMap { + fn to_string(&self, maps: &AtomMaps) -> String { + let mut s = String::new(); + write!(s, "{{").unwrap(); + let mut first = true; + for (k, v) in self { + if !first { + write!(s, ",").unwrap(); + } + first = false; + write!(s, " {}: {}", k.to_string(maps), v.to_string(maps)).unwrap(); + } + if !first { + write!(s, " ").unwrap(); + } + write!(s, "}}").unwrap(); + s + } +} + +impl Render for BTreeSet { + fn to_string(&self, maps: &AtomMaps) -> String { + let mut s = String::new(); + write!(s, "{{").unwrap(); + let mut first = true; + for x in self { + if !first { + write!(s, ",").unwrap(); + } + first = false; + write!(s, " {}", x.to_string(maps)).unwrap(); + } + if !first { + write!(s, " ").unwrap(); + } + write!(s, "}}").unwrap(); + s + } +} + +impl Render for Vec { + fn to_string(&self, maps: &AtomMaps) -> String { + let mut s = String::new(); + write!(s, "[").unwrap(); + let mut first = true; + for x in self { + if !first { + write!(s, ", ").unwrap(); + } + first = false; + write!(s, "{}", x.to_string(maps)).unwrap(); + } + write!(s, "]").unwrap(); + s + } +} + +impl Render for (A, B) { + fn to_string(&self, maps: &AtomMaps) -> String { + format!("({}, {})", self.0.to_string(maps), self.1.to_string(maps)) + } +} + +impl Render for Origin { + fn to_string(&self, _maps: &AtomMaps) -> String { + format!("'_#{}r", usize::from(*self)) + } +} + +impl Render for Loan { + fn to_string(&self, _maps: &AtomMaps) -> String { + format!("bw{}", usize::from(*self)) + } +} + +impl Render for Point { + fn to_string(&self, maps: &AtomMaps) -> String { + let (bb, idx, sub) = maps.get_point(*self); + format!("{:?}({:?}[{}])", sub, bb, idx) + } +} + +impl Render for Variable { + fn to_string(&self, _maps: &AtomMaps) -> String { + format!("_{}", usize::from(*self)) + } +} + +impl Render for Path { + fn to_string(&self, _maps: &AtomMaps) -> String { + format!("mp{}", usize::from(*self)) + } +} diff --git a/c2rust-analyze/src/borrowck/mod.rs b/c2rust-analyze/src/borrowck/mod.rs new file mode 100644 index 0000000000..b8433060b2 --- /dev/null +++ b/c2rust-analyze/src/borrowck/mod.rs @@ -0,0 +1,240 @@ +use self::atoms::{AllFacts, AtomMaps, Loan, Origin, Output, Path, SubPoint}; +use crate::context::{AnalysisCtxt, PermissionSet}; +use crate::dataflow::DataflowConstraints; +use crate::labeled_ty::{LabeledTy, LabeledTyCtxt}; +use crate::util::{describe_rvalue, RvalueDesc}; +use polonius_engine; +use rustc_middle::mir::{Body, BorrowKind, Local, LocalKind, Place, StatementKind, START_BLOCK}; +use rustc_middle::ty::{List, TyKind}; +use std::collections::HashMap; +use std::hash::Hash; + +mod atoms; +mod def_use; +mod dump; +mod type_check; + +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash, Default)] +pub struct Label { + pub origin: Option, + pub perm: PermissionSet, +} + +pub type LTy<'tcx> = LabeledTy<'tcx, Label>; +pub type LTyCtxt<'tcx> = LabeledTyCtxt<'tcx, Label>; + +pub fn borrowck_mir<'tcx>( + acx: &AnalysisCtxt<'tcx>, + dataflow: &DataflowConstraints, + hypothesis: &mut [PermissionSet], + name: &str, + mir: &Body<'tcx>, +) { + let mut i = 0; + loop { + eprintln!("run polonius"); + let (facts, maps, output) = run_polonius(acx, hypothesis, name, mir); + eprintln!( + "polonius: iteration {}: {} errors, {} move_errors", + i, + output.errors.len(), + output.move_errors.len(), + ); + i += 1; + + if output.errors.len() == 0 { + break; + } + if i >= 20 { + panic!() + } + + let mut changed = false; + for (_, loans) in &output.errors { + for &loan in loans { + let issued_point = facts + .loan_issued_at + .iter() + .find(|&&(_, l, _)| l == loan) + .map(|&(_, _, point)| point) + .unwrap_or_else(|| panic!("loan {:?} was never issued?", loan)); + let issued_loc = maps.get_point_location(issued_point); + let stmt = mir.stmt_at(issued_loc).left().unwrap_or_else(|| { + panic!( + "loan {:?} was issued by a terminator (at {:?})?", + loan, issued_loc + ); + }); + let ptr = match stmt.kind { + StatementKind::Assign(ref x) => match describe_rvalue(&x.1) { + Some(RvalueDesc::Project { base, proj: _ }) => acx + .ptr_of(base) + .unwrap_or_else(|| panic!("missing pointer ID for {:?}", base)), + Some(RvalueDesc::AddrOfLocal { local, proj: _ }) => { + acx.addr_of_local[local] + } + None => panic!("loan {:?} was issued by unknown rvalue {:?}?", loan, x.1), + }, + _ => panic!("loan {:?} was issued by non-assign stmt {:?}?", loan, stmt), + }; + eprintln!("want to drop UNIQUE from pointer {:?}", ptr); + + if hypothesis[ptr.index()].contains(PermissionSet::UNIQUE) { + hypothesis[ptr.index()].remove(PermissionSet::UNIQUE); + changed = true; + } + } + } + + eprintln!("propagate"); + changed |= dataflow.propagate(hypothesis); + eprintln!("done propagating"); + + if !changed { + eprintln!( + "{} unresolved borrowck errors in function {:?} (after {} iterations)", + output.errors.len(), + name, + i, + ); + break; + } + } +} + +fn run_polonius<'tcx>( + acx: &AnalysisCtxt<'tcx>, + hypothesis: &[PermissionSet], + name: &str, + mir: &Body<'tcx>, +) -> (AllFacts, AtomMaps<'tcx>, Output) { + let tcx = acx.tcx; + let mut facts = AllFacts::default(); + let mut maps = AtomMaps::default(); + + // Start the origin counter at 3. This has no effect on the semantics, but makes for easier + // diffs between our facts and the facts generated by rustc. + for _ in 0..3 { + let _ = maps.origin(); + } + + //pretty::write_mir_fn(tcx, mir, &mut |_, _| Ok(()), &mut std::io::stdout()).unwrap(); + + // Populate `cfg_edge` + for (bb, bb_data) in mir.basic_blocks().iter_enumerated() { + eprintln!("{:?}:", bb); + + for idx in 0..bb_data.statements.len() { + eprintln!(" {}: {:?}", idx, bb_data.statements[idx]); + let start = maps.point(bb, idx, SubPoint::Start); + let mid = maps.point(bb, idx, SubPoint::Mid); + let next_start = maps.point(bb, idx + 1, SubPoint::Start); + facts.cfg_edge.push((start, mid)); + facts.cfg_edge.push((mid, next_start)); + } + + let term_idx = bb_data.statements.len(); + eprintln!(" {}: {:?}", term_idx, bb_data.terminator()); + let term_start = maps.point(bb, term_idx, SubPoint::Start); + let term_mid = maps.point(bb, term_idx, SubPoint::Mid); + facts.cfg_edge.push((term_start, term_mid)); + for &succ in bb_data.terminator().successors() { + let succ_start = maps.point(succ, 0, SubPoint::Start); + facts.cfg_edge.push((term_mid, succ_start)); + } + } + + // From rustc_borrowck::nll::populate_polonius_move_facts: "Non-arguments start out + // deinitialised; we simulate this with an initial move". On the other hand, arguments are + // considered assigned at the entry point. + let entry_point = maps.point(START_BLOCK, 0, SubPoint::Start); + for local in mir.local_decls.indices() { + if mir.local_kind(local) == LocalKind::Arg { + let path = maps.path( + &mut facts, + Place { + local, + projection: List::empty(), + }, + ); + facts.path_assigned_at_base.push((path, entry_point)); + } else { + let path = maps.path( + &mut facts, + Place { + local, + projection: List::empty(), + }, + ); + facts.path_moved_at_base.push((path, entry_point)); + } + } + + // Populate `use_of_var_derefs_origin`, and generate `LTy`s for all locals. + let ltcx = LabeledTyCtxt::new(tcx); + let mut local_ltys = Vec::with_capacity(mir.local_decls.len()); + for local in mir.local_decls.indices() { + let lty = assign_origins( + ltcx, + hypothesis, + &mut facts, + &mut maps, + acx.local_tys[local], + ); + let var = maps.variable(local); + lty.for_each_label(&mut |label| { + if let Some(origin) = label.origin { + facts.use_of_var_derefs_origin.push((var, origin)); + } + }); + local_ltys.push(lty); + } + + let mut loans = HashMap::>::new(); + // Populate `loan_issued_at` and `loans`. + type_check::visit( + tcx, + ltcx, + &mut facts, + &mut maps, + &mut loans, + &local_ltys, + mir, + ); + + // Populate `loan_invalidated_at` + def_use::visit_loan_invalidated_at(acx.tcx, &mut facts, &mut maps, &loans, mir); + + // Populate `var_defined/used/dropped_at` and `path_assigned/accessed_at_base`. + def_use::visit(&mut facts, &mut maps, mir); + + dump::dump_facts_to_dir(&facts, &maps, format!("inspect/{}", name)).unwrap(); + + let output = polonius_engine::Output::compute(&facts, polonius_engine::Algorithm::Naive, true); + dump::dump_output_to_dir(&output, &maps, format!("inspect/{}", name)).unwrap(); + + (facts, maps, output) +} + +fn assign_origins<'tcx>( + ltcx: LTyCtxt<'tcx>, + hypothesis: &[PermissionSet], + _facts: &mut AllFacts, + maps: &mut AtomMaps<'tcx>, + lty: crate::LTy<'tcx>, +) -> LTy<'tcx> { + ltcx.relabel(lty, &mut |lty| { + let perm = if lty.label.is_none() { + PermissionSet::empty() + } else { + hypothesis[lty.label.index()] + }; + match lty.ty.kind() { + TyKind::Ref(_, _, _) | TyKind::RawPtr(_) => { + let origin = Some(maps.origin()); + Label { origin, perm } + } + _ => Label { origin: None, perm }, + } + }) +} diff --git a/c2rust-analyze/src/borrowck/type_check.rs b/c2rust-analyze/src/borrowck/type_check.rs new file mode 100644 index 0000000000..4620ed6776 --- /dev/null +++ b/c2rust-analyze/src/borrowck/type_check.rs @@ -0,0 +1,266 @@ +use crate::borrowck::atoms::{AllFacts, AtomMaps, Loan, Origin, Path, Point, SubPoint}; +use crate::borrowck::{LTy, LTyCtxt, Label}; +use crate::context::PermissionSet; +use crate::util::{self, Callee}; +use rustc_index::vec::IndexVec; +use rustc_middle::mir::{ + BinOp, Body, BorrowKind, Local, LocalDecl, Location, Operand, Place, ProjectionElem, Rvalue, + Statement, StatementKind, Terminator, TerminatorKind, +}; +use rustc_middle::ty::{TyCtxt, TyKind}; +use std::collections::HashMap; + +struct TypeChecker<'tcx, 'a> { + tcx: TyCtxt<'tcx>, + ltcx: LTyCtxt<'tcx>, + facts: &'a mut AllFacts, + maps: &'a mut AtomMaps<'tcx>, + loans: &'a mut HashMap>, + local_ltys: &'a [LTy<'tcx>], + local_decls: &'a IndexVec>, + + current_location: Location, +} + +impl<'tcx> TypeChecker<'tcx, '_> { + fn current_point(&mut self, sub: SubPoint) -> Point { + self.maps.point( + self.current_location.block, + self.current_location.statement_index, + sub, + ) + } + + pub fn visit_place(&mut self, pl: Place<'tcx>) -> LTy<'tcx> { + let mut lty = self.local_ltys[pl.local.index()]; + for proj in pl.projection { + match proj { + ProjectionElem::Deref => { + assert_eq!(lty.args.len(), 1); + lty = lty.args[0]; + } + + ProjectionElem::Field(f, _field_ty) => match lty.ty.kind() { + TyKind::Tuple(..) => { + lty = lty.args[f.as_usize()]; + } + _ => todo!("field of {:?}", lty), + }, + + ref proj => panic!("unsupported projection {:?} in {:?}", proj, pl), + } + } + lty + } + + pub fn visit_operand(&mut self, op: &Operand<'tcx>) -> LTy<'tcx> { + match *op { + Operand::Copy(pl) | Operand::Move(pl) => self.visit_place(pl), + Operand::Constant(ref c) => { + let ty = c.ty(); + self.ltcx.label(ty, &mut |_| Label::default()) + } + } + } + + /// Create a new origin and issue an associated loan. The loan is issued at + /// `self.current_location`. + fn issue_loan(&mut self, pl: Place<'tcx>, borrow_kind: BorrowKind) -> Origin { + // Create a new origin and issue an associated loan. + let origin = self.maps.origin(); + let path = self.maps.path(self.facts, pl); + let loan = self.maps.loan(); + self.loans + .entry(pl.local) + .or_default() + .push((path, loan, borrow_kind)); + let point = self.current_point(SubPoint::Mid); + self.facts.loan_issued_at.push((origin, loan, point)); + eprintln!("issued loan {:?} = {:?} ({:?})", loan, pl, borrow_kind); + origin + } + + pub fn visit_rvalue(&mut self, rv: &Rvalue<'tcx>, expect_ty: LTy<'tcx>) -> LTy<'tcx> { + match *rv { + Rvalue::Use(Operand::Move(pl)) | Rvalue::Use(Operand::Copy(pl)) + if matches!(expect_ty.ty.kind(), TyKind::RawPtr(_)) => + { + // Copy of a raw pointer. We treat this as a reborrow. + let perm = expect_ty.label.perm; + let borrow_kind = if perm.contains(PermissionSet::UNIQUE) { + BorrowKind::Mut { + allow_two_phase_borrow: false, + } + } else { + BorrowKind::Shared + }; + + let pl_deref = self.tcx.mk_place_deref(pl); + let origin = self.issue_loan(pl_deref, borrow_kind); + + // Return a type with the new loan on the outermost `ref`. + let ty = rv.ty(self.local_decls, *self.ltcx); + let pl_lty = self.visit_place(pl_deref); + let label = Label { + origin: Some(origin), + perm, + }; + let lty = self.ltcx.mk(ty, self.ltcx.mk_slice(&[pl_lty]), label); + lty + } + + Rvalue::Use(ref op) => self.visit_operand(op), + + Rvalue::Ref(_, borrow_kind, pl) => { + let perm = expect_ty.label.perm; + let origin = self.issue_loan(pl, borrow_kind); + + // Return a type with the new loan on the outermost `ref`. + let ty = rv.ty(self.local_decls, *self.ltcx); + let pl_lty = self.visit_place(pl); + let label = Label { + origin: Some(origin), + perm, + }; + let lty = self.ltcx.mk(ty, self.ltcx.mk_slice(&[pl_lty]), label); + lty + } + + Rvalue::AddressOf(_, pl) => { + let perm = expect_ty.label.perm; + let borrow_kind = if perm.contains(PermissionSet::UNIQUE) { + BorrowKind::Mut { + allow_two_phase_borrow: false, + } + } else { + BorrowKind::Shared + }; + + let origin = self.issue_loan(pl, borrow_kind); + + // Return a type with the new loan on the outermost `ref`. + let ty = rv.ty(self.local_decls, *self.ltcx); + let pl_lty = self.visit_place(pl); + let label = Label { + origin: Some(origin), + perm, + }; + let lty = self.ltcx.mk(ty, self.ltcx.mk_slice(&[pl_lty]), label); + lty + } + + Rvalue::BinaryOp(BinOp::Offset, _) | Rvalue::CheckedBinaryOp(BinOp::Offset, _) => { + todo!("visit_rvalue BinOp::Offset") + } + Rvalue::BinaryOp(_, ref _ab) | Rvalue::CheckedBinaryOp(_, ref _ab) => { + let ty = rv.ty(self.local_decls, *self.ltcx); + self.ltcx.label(ty, &mut |ty| { + assert!( + !matches!(ty.kind(), TyKind::RawPtr(..) | TyKind::Ref(..)), + "pointer BinaryOp NYI" + ); + Label::default() + }) + } + + Rvalue::Cast(_, _, ty) => self.ltcx.label(ty, &mut |ty| { + assert!( + !matches!(ty.kind(), TyKind::RawPtr(..) | TyKind::Ref(..)), + "pointer Cast NYI" + ); + Label::default() + }), + + ref rv => panic!("unsupported rvalue {:?}", rv), + } + } + + fn do_assign(&mut self, pl_lty: LTy<'tcx>, rv_lty: LTy<'tcx>) { + eprintln!("assign {:?} = {:?}", pl_lty, rv_lty); + + let pl_origin = pl_lty.label.origin; + let rv_origin = rv_lty.label.origin; + if let (Some(pl_origin), Some(rv_origin)) = (pl_origin, rv_origin) { + let point = self.current_point(SubPoint::Mid); + self.facts.subset_base.push((rv_origin, pl_origin, point)); + } + } + + pub fn visit_statement(&mut self, stmt: &Statement<'tcx>) { + match stmt.kind { + StatementKind::Assign(ref x) => { + let (pl, ref rv) = **x; + let pl_lty = self.visit_place(pl); + let rv_lty = self.visit_rvalue(rv, pl_lty); + self.do_assign(pl_lty, rv_lty); + } + _ => {} + } + } + + pub fn visit_terminator(&mut self, term: &Terminator<'tcx>) { + eprintln!("borrowck: visit_terminator({:?})", term.kind); + match term.kind { + TerminatorKind::Call { + ref func, + ref args, + destination, + .. + } => { + let func_ty = func.ty(self.local_decls, *self.ltcx); + eprintln!("callee = {:?}", util::ty_callee(*self.ltcx, func_ty)); + match util::ty_callee(*self.ltcx, func_ty) { + Some(Callee::PtrOffset { .. }) => { + // We handle this like a pointer assignment. + + // `destination` must be `Some` because the function doesn't diverge. + let destination = destination.unwrap(); + let pl_lty = self.visit_place(destination.0); + assert!(args.len() == 2); + let rv_lty = self.visit_operand(&args[0]); + self.do_assign(pl_lty, rv_lty); + } + None => {} + } + } + _ => {} + } + } +} + +pub fn visit<'tcx>( + tcx: TyCtxt<'tcx>, + ltcx: LTyCtxt<'tcx>, + facts: &mut AllFacts, + maps: &mut AtomMaps<'tcx>, + loans: &mut HashMap>, + local_ltys: &[LTy<'tcx>], + mir: &Body<'tcx>, +) { + let mut tc = TypeChecker { + tcx, + ltcx, + facts, + maps, + loans, + local_ltys, + local_decls: &mir.local_decls, + current_location: Location::START, + }; + + for (bb, bb_data) in mir.basic_blocks().iter_enumerated() { + for (idx, stmt) in bb_data.statements.iter().enumerate() { + tc.current_location = Location { + block: bb, + statement_index: idx, + }; + tc.visit_statement(stmt); + } + + tc.current_location = Location { + block: bb, + statement_index: bb_data.statements.len(), + }; + tc.visit_terminator(bb_data.terminator()); + } +} diff --git a/c2rust-analyze/src/context.rs b/c2rust-analyze/src/context.rs new file mode 100644 index 0000000000..665eea31e6 --- /dev/null +++ b/c2rust-analyze/src/context.rs @@ -0,0 +1,157 @@ +use crate::labeled_ty::{LabeledTy, LabeledTyCtxt}; +use bitflags::bitflags; +use rustc_index::vec::IndexVec; +use rustc_middle::mir::{Local, Place, PlaceRef, ProjectionElem}; +use rustc_middle::ty::{TyCtxt, TyKind}; +use std::cell::Cell; + +bitflags! { + #[derive(Default)] + pub struct PermissionSet: u16 { + /// The value(s) accessible through this pointer can be read. + const READ = 0x0001; + /// The value(s) accessible through this pointer can be written. + const WRITE = 0x0002; + /// This pointer is unique: using an alias not derived from this + /// pointer invalidates this pointer, after which it is not valid to use. + const UNIQUE = 0x0004; + /// This pointer is linear-typed. Copying a `LINEAR` pointer to another `LINEAR` location + /// moves the pointer and invalidates the source of the copy. (However, a + /// copy-and-downcast to a non-`LINEAR` location is a borrow, which does not invalidate the + /// source pointer.) + const LINEAR = 0x0008; + /// This pointer can be offset in the positive direction. + /// + /// Offsetting the pointer in an unknown direction requires both `OFFSET_ADD` and + /// `OFFSET_SUB`. Offsetting by zero requires neither `OFFSET_ADD` nor `OFFSET_SUB`. + const OFFSET_ADD = 0x0010; + /// This pointer can be offset in the negative direction. + const OFFSET_SUB = 0x0020; + } +} + +bitflags! { + /// Additional flags describing a given pointer type. These are mainly derived from + /// `PermissionSet`, but don't follow the normal subtyping rules and propagation algorithm. + #[derive(Default)] + pub struct FlagSet: u16 { + /// The pointee type is wrapped in `Cell`. This is tracked separately from the + /// `PermissionSet` since it depends on the past/future uses of the pointer in an unusual + /// way, and it can't be freely discarded (or its inverse freely added) as is the case for + /// everything in `PermissionSet`. + const CELL = 0x0001; + } +} + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] +pub struct PointerId(u32); + +impl PointerId { + pub const NONE: PointerId = PointerId(u32::MAX); + + pub fn index(self) -> usize { + self.0 as usize + } + + pub fn is_none(self) -> bool { + self == Self::NONE + } +} + +pub type LTy<'tcx> = LabeledTy<'tcx, PointerId>; +pub type LTyCtxt<'tcx> = LabeledTyCtxt<'tcx, PointerId>; + +pub struct AnalysisCtxt<'tcx> { + pub tcx: TyCtxt<'tcx>, + pub lcx: LTyCtxt<'tcx>, + + pub local_tys: IndexVec>, + pub addr_of_local: IndexVec, + + next_ptr_id: Cell, +} + +impl<'tcx> AnalysisCtxt<'tcx> { + pub fn new(tcx: TyCtxt<'tcx>) -> AnalysisCtxt<'tcx> { + AnalysisCtxt { + tcx, + lcx: LabeledTyCtxt::new(tcx), + local_tys: IndexVec::new(), + addr_of_local: IndexVec::new(), + next_ptr_id: Cell::new(0), + } + } + + pub fn new_pointer(&self) -> PointerId { + let next = self.next_ptr_id.get(); + self.next_ptr_id.set(next + 1); + PointerId(next) + } + + pub fn num_pointers(&self) -> usize { + self.next_ptr_id.get() as usize + } + + pub fn type_of>(&self, x: T) -> LTy<'tcx> { + x.type_of(self) + } + + pub fn ptr_of>(&self, x: T) -> Option { + let ptr = self.type_of(x).label; + if ptr == PointerId::NONE { + None + } else { + Some(ptr) + } + } +} + +pub trait TypeOf<'tcx> { + fn type_of(&self, acx: &AnalysisCtxt<'tcx>) -> LTy<'tcx>; +} + +impl<'tcx, T: TypeOf<'tcx>> TypeOf<'tcx> for &T { + fn type_of(&self, acx: &AnalysisCtxt<'tcx>) -> LTy<'tcx> { + (**self).type_of(acx) + } +} + +impl<'tcx> TypeOf<'tcx> for Local { + fn type_of(&self, acx: &AnalysisCtxt<'tcx>) -> LTy<'tcx> { + acx.local_tys[*self] + } +} + +impl<'tcx> TypeOf<'tcx> for Place<'tcx> { + fn type_of(&self, acx: &AnalysisCtxt<'tcx>) -> LTy<'tcx> { + acx.type_of(self.as_ref()) + } +} + +impl<'tcx> TypeOf<'tcx> for PlaceRef<'tcx> { + fn type_of(&self, acx: &AnalysisCtxt<'tcx>) -> LTy<'tcx> { + let mut ty = acx.type_of(self.local); + for proj in self.projection { + match *proj { + ProjectionElem::Deref => { + assert!(matches!(ty.kind(), TyKind::Ref(..) | TyKind::RawPtr(..))); + assert_eq!(ty.args.len(), 1); + ty = ty.args[0]; + } + ProjectionElem::Field(f, _) => match ty.kind() { + TyKind::Tuple(_) => { + ty = ty.args[f.index()]; + } + TyKind::Adt(..) => todo!("type_of Field(Adt)"), + _ => panic!("Field projection is unsupported on type {:?}", ty), + }, + ProjectionElem::Index(..) | ProjectionElem::ConstantIndex { .. } => { + todo!("type_of Index") + } + ProjectionElem::Subslice { .. } => todo!("type_of Subslice"), + ProjectionElem::Downcast(..) => todo!("type_of Downcast"), + } + } + ty + } +} diff --git a/c2rust-analyze/src/dataflow/mod.rs b/c2rust-analyze/src/dataflow/mod.rs new file mode 100644 index 0000000000..2555c79aa8 --- /dev/null +++ b/c2rust-analyze/src/dataflow/mod.rs @@ -0,0 +1,298 @@ +use std::mem; + +use crate::context::{AnalysisCtxt, FlagSet, PermissionSet, PointerId}; +use rustc_middle::mir::Body; + +mod type_check; + +#[derive(Clone, Debug)] +enum Constraint { + /// Pointer `.0` must have a subset of the permissions of pointer `.1`. + Subset(PointerId, PointerId), + /// Pointer `.0` must have all the permissions in `.1`. + AllPerms(PointerId, PermissionSet), + /// Pointer `.0` must not have any of the permissions in `.1`. + NoPerms(PointerId, PermissionSet), +} + +#[derive(Clone, Debug, Default)] +pub struct DataflowConstraints { + constraints: Vec, +} + +impl DataflowConstraints { + fn add_subset(&mut self, a: PointerId, b: PointerId) { + self.constraints.push(Constraint::Subset(a, b)); + } + + fn add_all_perms(&mut self, ptr: PointerId, perms: PermissionSet) { + self.constraints.push(Constraint::AllPerms(ptr, perms)); + } + + #[allow(dead_code)] + fn _add_no_perms(&mut self, ptr: PointerId, perms: PermissionSet) { + self.constraints.push(Constraint::NoPerms(ptr, perms)); + } + + /// Update the pointer permissions in `hypothesis` to satisfy these constraints. + pub fn propagate(&self, hypothesis: &mut [PermissionSet]) -> bool { + eprintln!("=== propagating ==="); + eprintln!("constraints:"); + for c in &self.constraints { + eprintln!(" {:?}", c); + } + eprintln!("hypothesis:"); + for (i, p) in hypothesis.iter().enumerate() { + eprintln!(" {}: {:?}", i, p); + } + + struct PropagatePerms; + impl PropagateRules for PropagatePerms { + fn subset( + &mut self, + _a_ptr: PointerId, + a_val: &PermissionSet, + _b_ptr: PointerId, + b_val: &PermissionSet, + ) -> (PermissionSet, PermissionSet) { + let old_a = *a_val; + let old_b = *b_val; + + // These should be `const`s, but that produces `error[E0015]: cannot call + // non-const operator in constants`. + + // Permissions that should be propagated "down": if the superset (`b`) + // doesn't have it, then the subset (`a`) should have it removed. + #[allow(bad_style)] + let PROPAGATE_DOWN = PermissionSet::UNIQUE; + // Permissions that should be propagated "up": if the subset (`a`) has it, + // then the superset (`b`) should be given it. + #[allow(bad_style)] + let PROPAGATE_UP = PermissionSet::READ + | PermissionSet::WRITE + | PermissionSet::OFFSET_ADD + | PermissionSet::OFFSET_SUB; + + ( + old_a & !(!old_b & PROPAGATE_DOWN), + old_b | (old_a & PROPAGATE_UP), + ) + } + + fn all_perms( + &mut self, + _ptr: PointerId, + perms: PermissionSet, + val: &PermissionSet, + ) -> PermissionSet { + *val | perms + } + + fn no_perms( + &mut self, + _ptr: PointerId, + perms: PermissionSet, + val: &PermissionSet, + ) -> PermissionSet { + *val & !perms + } + } + + match self.propagate_inner(hypothesis, &mut PropagatePerms) { + Ok(changed) => changed, + Err(msg) => { + panic!("{}", msg); + } + } + } + + fn propagate_inner(&self, xs: &mut [T], rules: &mut R) -> Result + where + T: PartialEq, + R: PropagateRules, + { + let mut xs = TrackedSlice::new(xs); + + let mut changed = false; + let mut i = 0; + loop { + if i > xs.len() + self.constraints.len() { + return Err(format!("infinite loop in dataflow edges")); + } + i += 1; + + for c in &self.constraints { + match *c { + Constraint::Subset(a, b) => { + if !xs.dirty(a.index()) && !xs.dirty(b.index()) { + continue; + } + + let old_a = xs.get(a.index()); + let old_b = xs.get(b.index()); + let (new_a, new_b) = rules.subset(a, old_a, b, old_b); + xs.set(a.index(), new_a); + xs.set(b.index(), new_b); + } + + Constraint::AllPerms(ptr, perms) => { + if !xs.dirty(ptr.index()) { + continue; + } + + let old = xs.get(ptr.index()); + let new = rules.all_perms(ptr, perms, old); + xs.set(ptr.index(), new); + } + + Constraint::NoPerms(ptr, perms) => { + if !xs.dirty(ptr.index()) { + continue; + } + + let old = xs.get(ptr.index()); + let new = rules.no_perms(ptr, perms, old); + xs.set(ptr.index(), new); + } + } + } + + if !xs.any_new_dirty() { + break; + } + xs.swap_dirty(); + changed = true; + } + + Ok(changed) + } + + /// Update the pointer permissions in `hypothesis` to satisfy these constraints. + pub fn propagate_cell(&self, perms: &[PermissionSet], flags: &mut [FlagSet]) { + // All pointers that are WRITE and not UNIQUE must have a type like `&Cell<_>`. + for (p, f) in perms.iter().zip(flags.iter_mut()) { + if p.contains(PermissionSet::WRITE) && !p.contains(PermissionSet::UNIQUE) { + f.insert(FlagSet::CELL); + } + } + + struct Rules<'a> { + perms: &'a [PermissionSet], + } + impl PropagateRules for Rules<'_> { + fn subset( + &mut self, + _a_ptr: PointerId, + a_val: &FlagSet, + b_ptr: PointerId, + b_val: &FlagSet, + ) -> (FlagSet, FlagSet) { + // Propagate `CELL` both forward and backward. On the backward side, if `b` has + // both `WRITE` and `UNIQUE`, then we remove `CELL`, since `&mut T` can be + // converted to `&Cell`. + let mut a_flags = *a_val; + let mut b_flags = *b_val; + if a_flags.contains(FlagSet::CELL) { + b_flags.insert(FlagSet::CELL); + } + if b_flags.contains(FlagSet::CELL) { + a_flags.insert(FlagSet::CELL); + } + + let b_perms = self.perms[b_ptr.index()]; + if b_perms.contains(PermissionSet::WRITE | PermissionSet::UNIQUE) { + b_flags.remove(FlagSet::CELL); + } + + (a_flags, b_flags) + } + + fn all_perms( + &mut self, + _ptr: PointerId, + _perms: PermissionSet, + val: &FlagSet, + ) -> FlagSet { + *val + } + + fn no_perms( + &mut self, + _ptr: PointerId, + _perms: PermissionSet, + val: &FlagSet, + ) -> FlagSet { + *val + } + } + + match self.propagate_inner(flags, &mut Rules { perms }) { + Ok(_changed) => {} + Err(msg) => { + panic!("{}", msg); + } + } + } +} + +struct TrackedSlice<'a, T> { + xs: &'a mut [T], + dirty: Vec, + new_dirty: Vec, + any_new_dirty: bool, +} + +impl<'a, T: PartialEq> TrackedSlice<'a, T> { + pub fn new(xs: &'a mut [T]) -> TrackedSlice<'a, T> { + let n = xs.len(); + TrackedSlice { + xs, + dirty: vec![true; n], + new_dirty: vec![false; n], + any_new_dirty: false, + } + } + + pub fn len(&self) -> usize { + self.xs.len() + } + + pub fn get(&self, i: usize) -> &T { + &self.xs[i] + } + + pub fn dirty(&self, i: usize) -> bool { + self.dirty[i] + } + + pub fn any_new_dirty(&self) -> bool { + self.any_new_dirty + } + + pub fn set(&mut self, i: usize, x: T) { + if x != self.xs[i] { + self.xs[i] = x; + self.new_dirty[i] = true; + self.any_new_dirty = true; + } + } + + pub fn swap_dirty(&mut self) { + mem::swap(&mut self.dirty, &mut self.new_dirty); + self.new_dirty.fill(false); + self.any_new_dirty = false; + } +} + +trait PropagateRules { + fn subset(&mut self, a_ptr: PointerId, a_val: &T, b_ptr: PointerId, b_val: &T) -> (T, T); + fn all_perms(&mut self, ptr: PointerId, perms: PermissionSet, val: &T) -> T; + fn no_perms(&mut self, ptr: PointerId, perms: PermissionSet, val: &T) -> T; +} + +pub fn generate_constraints<'tcx>( + acx: &AnalysisCtxt<'tcx>, + mir: &Body<'tcx>, +) -> DataflowConstraints { + self::type_check::visit(acx, mir) +} diff --git a/c2rust-analyze/src/dataflow/type_check.rs b/c2rust-analyze/src/dataflow/type_check.rs new file mode 100644 index 0000000000..f2de458150 --- /dev/null +++ b/c2rust-analyze/src/dataflow/type_check.rs @@ -0,0 +1,205 @@ +use super::DataflowConstraints; +use crate::context::{AnalysisCtxt, LTy, PermissionSet, PointerId}; +use crate::util::{self, describe_rvalue, Callee, RvalueDesc}; +use rustc_middle::mir::visit::{MutatingUseContext, NonMutatingUseContext, PlaceContext}; +use rustc_middle::mir::{ + BinOp, Body, Mutability, Operand, Place, PlaceRef, ProjectionElem, Rvalue, Statement, + StatementKind, Terminator, TerminatorKind, +}; +use rustc_middle::ty::TyKind; + +/// Visitor that walks over the MIR, computing types of rvalues/operands/places and generating +/// constraints as a side effect. +struct TypeChecker<'tcx, 'a> { + acx: &'a AnalysisCtxt<'tcx>, + mir: &'a Body<'tcx>, + constraints: DataflowConstraints, +} + +impl<'tcx> TypeChecker<'tcx, '_> { + fn add_edge(&mut self, src: PointerId, dest: PointerId) { + // Copying `src` to `dest` can discard permissions, but can't add new ones. + self.constraints.add_subset(dest, src); + } + + fn record_access(&mut self, ptr: PointerId, mutbl: Mutability) { + eprintln!("record_access({:?}, {:?})", ptr, mutbl); + if ptr == PointerId::NONE { + return; + } + match mutbl { + Mutability::Mut => { + self.constraints + .add_all_perms(ptr, PermissionSet::READ | PermissionSet::WRITE); + } + Mutability::Not => { + self.constraints.add_all_perms(ptr, PermissionSet::READ); + } + } + } + + pub fn visit_place(&mut self, pl: Place<'tcx>, ctx: PlaceContext) -> LTy<'tcx> { + self.visit_place_ref(pl.as_ref(), ctx) + } + + pub fn visit_place_ref(&mut self, pl: PlaceRef<'tcx>, ctx: PlaceContext) -> LTy<'tcx> { + let mut lty = self.acx.local_tys[pl.local]; + let mut prev_deref_ptr = None; + + for proj in pl.projection { + match proj { + ProjectionElem::Deref => { + // All derefs except the last are loads, to retrieve the pointer for the next + // deref. The last deref is either a load or a store, depending on `ctx`. + if let Some(ptr) = prev_deref_ptr.take() { + self.record_access(ptr, Mutability::Not); + } + prev_deref_ptr = Some(lty.label); + assert_eq!(lty.args.len(), 1); + lty = lty.args[0]; + } + + ProjectionElem::Field(f, _field_ty) => match lty.ty.kind() { + TyKind::Tuple(..) => { + lty = lty.args[f.as_usize()]; + } + _ => todo!("field of {:?}", lty), + }, + + ref proj => panic!("unsupported projection {:?} in {:?}", proj, pl), + } + } + + if let Some(ptr) = prev_deref_ptr.take() { + match ctx { + PlaceContext::NonMutatingUse(..) => { + self.record_access(ptr, Mutability::Not); + } + PlaceContext::MutatingUse(..) => { + self.record_access(ptr, Mutability::Mut); + } + PlaceContext::NonUse(..) => {} + } + } + + lty + } + + pub fn visit_rvalue(&mut self, rv: &Rvalue<'tcx>) -> PointerId { + eprintln!("visit_rvalue({:?}), desc = {:?}", rv, describe_rvalue(rv)); + + match describe_rvalue(rv) { + Some(RvalueDesc::Project { base, proj: _ }) => { + let ctx = PlaceContext::NonMutatingUse(NonMutatingUseContext::Copy); + let base_ty = self.visit_place_ref(base, ctx); + base_ty.label + } + Some(RvalueDesc::AddrOfLocal { local, proj: _ }) => self.acx.addr_of_local[local], + None => match *rv { + Rvalue::Use(ref op) => self.visit_operand(op).label, + Rvalue::BinaryOp(BinOp::Offset, _) => todo!("visit_rvalue BinOp::Offset"), + Rvalue::BinaryOp(..) => PointerId::NONE, + Rvalue::CheckedBinaryOp(BinOp::Offset, _) => todo!("visit_rvalue BinOp::Offset"), + Rvalue::CheckedBinaryOp(..) => PointerId::NONE, + Rvalue::Cast(_, _, ty) => { + assert!(!matches!(ty.kind(), TyKind::RawPtr(..) | TyKind::Ref(..))); + PointerId::NONE + } + _ => panic!("TODO: handle assignment of {:?}", rv), + }, + } + } + + pub fn visit_operand(&mut self, op: &Operand<'tcx>) -> LTy<'tcx> { + match *op { + Operand::Copy(pl) => { + let ctx = PlaceContext::NonMutatingUse(NonMutatingUseContext::Copy); + self.visit_place(pl, ctx) + } + Operand::Move(pl) => { + let ctx = PlaceContext::NonMutatingUse(NonMutatingUseContext::Move); + self.visit_place(pl, ctx) + } + Operand::Constant(ref c) => { + let ty = c.ty(); + // TODO + self.acx.lcx.label(ty, &mut |_| PointerId::NONE) + } + } + } + + fn do_assign(&mut self, pl_ptr: PointerId, rv_ptr: PointerId) { + if pl_ptr != PointerId::NONE || rv_ptr != PointerId::NONE { + assert!(pl_ptr != PointerId::NONE); + assert!(rv_ptr != PointerId::NONE); + self.add_edge(rv_ptr, pl_ptr); + } + } + + pub fn visit_statement(&mut self, stmt: &Statement<'tcx>) { + eprintln!("visit_statement({:?})", stmt); + match stmt.kind { + StatementKind::Assign(ref x) => { + let (pl, ref rv) = **x; + let ctx = PlaceContext::MutatingUse(MutatingUseContext::Store); + let pl_lty = self.visit_place(pl, ctx); + let pl_ptr = pl_lty.label; + + let rv_ptr = self.visit_rvalue(rv); + + self.do_assign(pl_ptr, rv_ptr); + } + _ => {} + } + } + + pub fn visit_terminator(&mut self, term: &Terminator<'tcx>) { + eprintln!("visit_terminator({:?})", term.kind); + let tcx = self.acx.tcx; + match term.kind { + TerminatorKind::Call { + ref func, + ref args, + destination, + .. + } => { + let func_ty = func.ty(self.mir, tcx); + eprintln!("callee = {:?}", util::ty_callee(tcx, func_ty)); + match util::ty_callee(tcx, func_ty) { + Some(Callee::PtrOffset { .. }) => { + // We handle this like a pointer assignment. + + // `destination` must be `Some` because the function doesn't diverge. + let destination = destination.unwrap(); + let ctx = PlaceContext::MutatingUse(MutatingUseContext::Store); + let pl_lty = self.visit_place(destination.0, ctx); + assert!(args.len() == 2); + let rv_lty = self.visit_operand(&args[0]); + self.do_assign(pl_lty.label, rv_lty.label); + let perms = PermissionSet::OFFSET_ADD | PermissionSet::OFFSET_SUB; + self.constraints.add_all_perms(rv_lty.label, perms); + } + None => {} + } + } + _ => {} + } + } +} + +pub fn visit<'tcx>(acx: &AnalysisCtxt<'tcx>, mir: &Body<'tcx>) -> DataflowConstraints { + let mut tc = TypeChecker { + acx, + mir, + constraints: DataflowConstraints::default(), + }; + + for bb_data in mir.basic_blocks().iter() { + for stmt in bb_data.statements.iter() { + tc.visit_statement(stmt); + } + tc.visit_terminator(bb_data.terminator()); + } + + tc.constraints +} diff --git a/c2rust-analyze/src/expr_rewrite.rs b/c2rust-analyze/src/expr_rewrite.rs new file mode 100644 index 0000000000..e3900d09da --- /dev/null +++ b/c2rust-analyze/src/expr_rewrite.rs @@ -0,0 +1,402 @@ +use crate::context::{AnalysisCtxt, FlagSet, LTy, PermissionSet, PointerId}; +use crate::type_desc::{self, Ownership, Quantity}; +use crate::util::{self, Callee}; +use rustc_middle::mir::{ + BasicBlock, Body, Location, Operand, Rvalue, Statement, StatementKind, Terminator, + TerminatorKind, +}; +use rustc_span::{Span, DUMMY_SP}; + +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct ExprLoc { + pub stmt: Location, + pub span: Span, + pub sub: Vec, +} + +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum SubLoc { + /// The LHS of an assignment or call. `StatementKind::Assign/TerminatorKind::Call -> Place` + Dest, + /// The RHS of an assignment. `StatementKind::Assign -> Rvalue` + AssignRvalue, + /// The Nth argument of a call. `TerminatorKind::Call -> Operand` + CallArg(usize), + /// The Nth operand of an rvalue. `Rvalue -> Operand` + RvalueOperand(usize), + /// The place referenced by an operand. `Operand::Move/Operand::Copy -> Place` + OperandPlace, + /// The pointer used in the Nth innermost deref within a place. `Place -> Place` + PlacePointer(usize), +} + +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum RewriteKind { + /// Replace `ptr.offset(i)` with something like `&ptr[i..]`. + OffsetSlice { mutbl: bool }, + /// Replace `slice` with `&slice[0]`. + SliceFirst { mutbl: bool }, + /// Replace `ptr` with `&*ptr`, converting `&mut T` to `&T`. + MutToImm, +} + +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct ExprRewrite { + pub loc: ExprLoc, + pub kinds: Vec, +} + +struct ExprRewriteVisitor<'a, 'tcx> { + acx: &'a AnalysisCtxt<'tcx>, + perms: &'a [PermissionSet], + flags: &'a [FlagSet], + rewrites: &'a mut Vec, + mir: &'a Body<'tcx>, + loc: ExprLoc, +} + +impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { + pub fn new( + acx: &'a AnalysisCtxt<'tcx>, + perms: &'a [PermissionSet], + flags: &'a [FlagSet], + rewrites: &'a mut Vec, + mir: &'a Body<'tcx>, + ) -> ExprRewriteVisitor<'a, 'tcx> { + ExprRewriteVisitor { + acx, + perms, + flags, + rewrites, + mir, + loc: ExprLoc { + stmt: Location { + block: BasicBlock::from_usize(0), + statement_index: 0, + }, + span: DUMMY_SP, + sub: Vec::new(), + }, + } + } + + fn enter R, R>(&mut self, sub: SubLoc, f: F) -> R { + self.loc.sub.push(sub); + let r = f(self); + self.loc.sub.pop(); + r + } + + #[allow(dead_code)] + fn _enter_dest R, R>(&mut self, f: F) -> R { + self.enter(SubLoc::Dest, f) + } + + fn enter_assign_rvalue R, R>(&mut self, f: F) -> R { + self.enter(SubLoc::AssignRvalue, f) + } + + fn enter_call_arg R, R>(&mut self, i: usize, f: F) -> R { + self.enter(SubLoc::CallArg(i), f) + } + + fn enter_rvalue_operand R, R>(&mut self, i: usize, f: F) -> R { + self.enter(SubLoc::RvalueOperand(i), f) + } + + #[allow(dead_code)] + fn _enter_operand_place R, R>(&mut self, f: F) -> R { + self.enter(SubLoc::OperandPlace, f) + } + + #[allow(dead_code)] + fn _enter_place_pointer R, R>(&mut self, i: usize, f: F) -> R { + self.enter(SubLoc::PlacePointer(i), f) + } + + fn visit_statement(&mut self, stmt: &Statement<'tcx>, loc: Location) { + self.loc = ExprLoc { + stmt: loc, + span: stmt.source_info.span, + sub: Vec::new(), + }; + + match stmt.kind { + StatementKind::Assign(ref x) => { + let (pl, ref rv) = **x; + let pl_ty = self.acx.type_of(pl); + self.enter_assign_rvalue(|v| v.visit_rvalue(rv, pl_ty)); + // TODO: visit place + } + StatementKind::FakeRead(..) => {} + StatementKind::SetDiscriminant { .. } => todo!("statement {:?}", stmt), + StatementKind::StorageLive(..) => {} + StatementKind::StorageDead(..) => {} + StatementKind::Retag(..) => {} + StatementKind::AscribeUserType(..) => {} + StatementKind::Coverage(..) => {} + StatementKind::CopyNonOverlapping(..) => todo!("statement {:?}", stmt), + StatementKind::Nop => {} + } + } + + fn visit_terminator(&mut self, term: &Terminator<'tcx>, loc: Location) { + let tcx = self.acx.tcx; + self.loc = ExprLoc { + stmt: loc, + span: term.source_info.span, + sub: Vec::new(), + }; + + match term.kind { + TerminatorKind::Goto { .. } => {} + TerminatorKind::SwitchInt { .. } => {} + TerminatorKind::Resume => {} + TerminatorKind::Abort => {} + TerminatorKind::Return => {} + TerminatorKind::Unreachable => {} + TerminatorKind::Drop { .. } => {} + TerminatorKind::DropAndReplace { .. } => {} + TerminatorKind::Call { + ref func, + ref args, + destination, + .. + } => { + let func_ty = func.ty(self.mir, tcx); + let pl_ty = destination.map(|(pl, _)| self.acx.type_of(pl)); + + if let Some(callee) = util::ty_callee(tcx, func_ty) { + // Special cases for particular functions. + let pl_ty = pl_ty.unwrap(); + match callee { + Callee::PtrOffset { .. } => { + self.visit_ptr_offset(&args[0], pl_ty); + return; + } + } + } + + // General case: cast `args` to match the signature of `func`. + let poly_sig = func_ty.fn_sig(tcx); + let sig = tcx.erase_late_bound_regions(poly_sig); + + for (i, _op) in args.iter().enumerate() { + if i >= sig.inputs().len() { + // This is a call to a variadic function, and we've gone past the end of + // the declared arguments. + // TODO: insert a cast to turn `op` back into its original declared type + // (i.e. upcast the chosen reference type back to a raw pointer) + continue; + } + + // TODO: get the `LTy` to use for the callee's argument + // let expect_ty = ...; + // self.enter_call_arg(i, |v| v.visit_operand(op, expect_ty)); + } + } + TerminatorKind::Assert { .. } => {} + TerminatorKind::Yield { .. } => {} + TerminatorKind::GeneratorDrop => {} + TerminatorKind::FalseEdge { .. } => {} + TerminatorKind::FalseUnwind { .. } => {} + TerminatorKind::InlineAsm { .. } => todo!("terminator {:?}", term), + } + } + + fn visit_rvalue(&mut self, rv: &Rvalue<'tcx>, expect_ty: LTy<'tcx>) { + // TODO: most of these cases need to recurse into operands/places to find derefs + match *rv { + Rvalue::Use(ref op) => { + self.enter_rvalue_operand(0, |v| v.visit_operand(op, expect_ty)); + } + Rvalue::Repeat(ref _op, _) => { + // TODO + } + Rvalue::Ref(_rg, _kind, _pl) => { + // TODO + } + Rvalue::ThreadLocalRef(_def_id) => { + // TODO + } + Rvalue::AddressOf(_mutbl, _pl) => { + // TODO + } + Rvalue::Len(_pl) => { + // TODO + } + Rvalue::Cast(_kind, ref _op, _ty) => { + // TODO + } + Rvalue::BinaryOp(_bop, ref _ops) => { + // TODO + } + Rvalue::CheckedBinaryOp(_bop, ref _ops) => { + // TODO + } + Rvalue::NullaryOp(..) => {} + Rvalue::UnaryOp(_uop, ref _op) => { + // TODO + } + Rvalue::Discriminant(_pl) => { + // TODO + } + Rvalue::Aggregate(ref _kind, ref _ops) => { + // TODO + } + Rvalue::ShallowInitBox(ref _op, _ty) => { + // TODO + } + } + } + + fn visit_operand(&mut self, op: &Operand<'tcx>, expect_ty: LTy<'tcx>) { + match *op { + Operand::Copy(pl) | Operand::Move(pl) => { + if let Some(ptr) = self.acx.ptr_of(pl) { + let expect_ptr = expect_ty.label; + self.emit_ptr_cast(ptr, expect_ptr); + } + + // TODO: walk over `pl` to handle all derefs (casts, `*x` -> `(*x).get()`) + } + Operand::Constant(..) => {} + } + } + + fn visit_operand_desc( + &mut self, + op: &Operand<'tcx>, + expect_own: Ownership, + expect_qty: Quantity, + ) { + match *op { + Operand::Copy(pl) | Operand::Move(pl) => { + if let Some(ptr) = self.acx.ptr_of(pl) { + self.emit_cast(ptr, expect_own, expect_qty); + } + + // TODO: walk over `pl` to handle all derefs (casts, `*x` -> `(*x).get()`) + } + Operand::Constant(..) => {} + } + } + + fn visit_ptr_offset(&mut self, op: &Operand<'tcx>, result_ty: LTy<'tcx>) { + // Compute the expected type for the argument, and emit a cast if needed. + let result_ptr = result_ty.label; + let (result_own, result_qty) = type_desc::perms_to_desc( + self.perms[result_ptr.index()], + self.flags[result_ptr.index()], + ); + + let arg_expect_own = result_own; + // TODO: infer `arg_expect_qty` based on the type of offset this is (positive / unknown) + let arg_expect_qty = match result_qty { + Quantity::Single => Quantity::Slice, + Quantity::Slice => Quantity::Slice, + Quantity::OffsetPtr => todo!("OffsetPtr"), + }; + + self.enter_call_arg(0, |v| { + v.visit_operand_desc(op, arg_expect_own, arg_expect_qty) + }); + + // Emit `OffsetSlice` for the offset itself. + let mutbl = match result_own { + Ownership::Mut => true, + _ => false, + }; + + self.emit(RewriteKind::OffsetSlice { mutbl }); + + // If the result is `Single`, also insert an upcast. + if result_qty == Quantity::Single { + self.emit(RewriteKind::SliceFirst { mutbl }); + } + } + + fn emit(&mut self, rw: RewriteKind) { + if let Some(er) = self.rewrites.last_mut() { + if er.loc == self.loc { + er.kinds.push(rw); + return; + } + } + + self.rewrites.push(ExprRewrite { + loc: self.loc.clone(), + kinds: vec![rw], + }); + } + + fn emit_ptr_cast(&mut self, ptr: PointerId, expect_ptr: PointerId) { + assert!(expect_ptr != PointerId::NONE); + + let (own2, qty2) = type_desc::perms_to_desc( + self.perms[expect_ptr.index()], + self.flags[expect_ptr.index()], + ); + + self.emit_cast(ptr, own2, qty2); + } + + fn emit_cast(&mut self, ptr: PointerId, expect_own: Ownership, expect_qty: Quantity) { + assert!(ptr != PointerId::NONE); + + let (own1, qty1) = + type_desc::perms_to_desc(self.perms[ptr.index()], self.flags[ptr.index()]); + let (own2, qty2) = (expect_own, expect_qty); + + if (own1, qty1) == (own2, qty2) { + return; + } + + if qty1 == qty2 && (own1, own2) == (Ownership::Mut, Ownership::Imm) { + self.emit(RewriteKind::MutToImm); + return; + } + + eprintln!( + "unsupported cast kind: {:?} {:?} -> {:?}", + self.perms[ptr.index()], + (own1, qty1), + (own2, qty2) + ); + } +} + +pub fn gen_expr_rewrites<'tcx>( + acx: &AnalysisCtxt<'tcx>, + perms: &[PermissionSet], + flags: &[FlagSet], + mir: &Body<'tcx>, +) -> Vec { + // - walk over statements/terminators + // - Assign: find RHS operands that need casting to match LHS + // - Call: special case for `ptr.offset(i)` + + let mut out = Vec::new(); + + let mut v = ExprRewriteVisitor::new(acx, perms, flags, &mut out, mir); + + for (bb_id, bb) in mir.basic_blocks().iter_enumerated() { + for (i, stmt) in bb.statements.iter().enumerate() { + let loc = Location { + block: bb_id, + statement_index: i, + }; + v.visit_statement(stmt, loc); + } + + if let Some(ref term) = bb.terminator { + let loc = Location { + block: bb_id, + statement_index: bb.statements.len(), + }; + v.visit_terminator(term, loc); + } + } + + out +} diff --git a/c2rust-analyze/src/labeled_ty.rs b/c2rust-analyze/src/labeled_ty.rs new file mode 100644 index 0000000000..5d96bc92ee --- /dev/null +++ b/c2rust-analyze/src/labeled_ty.rs @@ -0,0 +1,327 @@ +//! Provides a wrapper around `rustc::ty::Ty` with a label attached to each type constructor. We +//! use this, for example, to attach a Polonius `Origin` to every reference type. Labeled type +//! data is manipulated by reference, the same as with `Ty`s, and the data is stored in the same +//! arena as the underlying `Ty`s. +use rustc_arena::DroplessArena; +use rustc_middle::ty::subst::{GenericArg, GenericArgKind}; +use rustc_middle::ty::{Ty, TyCtxt, TyKind, TypeAndMut}; +use std::convert::TryInto; +use std::fmt; +use std::marker::PhantomData; +use std::ops::Deref; + +/// The actual data for a labeled type. +/// +/// This struct shouldn't be constructed directly - instead, use `LabeledTyCtxt` methods to build +/// instances inside the tcx arena and return `LabeledTy` references. +/// +/// Labeled types have to mimic the tree structure of the underlying `Ty`, so that each type +/// constructor in the tree can have its own label. But maintaining a custom copy of +/// `TyKind` would be annoying, so instead, we let labeled types form arbitrary trees, and +/// make the `LabeledTyCtxt` responsible for making those trees match the `Ty`'s structure. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct LabeledTyS<'tcx, L: 'tcx> { + /// The underlying type. + pub ty: Ty<'tcx>, + /// The arguments of this type constructor. The number and meaning of these arguments depends + /// on which type constructor this is (specifically, which `TyKind` variant is used for + /// `self.ty.sty`). + pub args: &'tcx [LabeledTy<'tcx, L>], + /// The label for the current type constructor. + pub label: L, +} + +impl<'tcx, L> LabeledTyS<'tcx, L> { + pub fn kind(&self) -> &'tcx TyKind<'tcx> { + self.ty.kind() + } +} + +/// A labeled type. Like `rustc::ty::Ty`, this is a reference to some arena-allocated data. +pub type LabeledTy<'tcx, L> = &'tcx LabeledTyS<'tcx, L>; + +impl<'tcx, L: fmt::Debug> fmt::Debug for LabeledTyS<'tcx, L> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}#{:?}{:?}", self.label, self.ty, self.args) + } +} + +impl<'tcx, L: Copy> LabeledTyS<'tcx, L> { + pub fn for_each_label(&'tcx self, callback: &mut F) { + callback(self.label); + for &arg in self.args { + arg.for_each_label(callback); + } + } +} + +/// Context for constructing `LabeledTy`s. +pub struct LabeledTyCtxt<'tcx, L: 'tcx> { + tcx: TyCtxt<'tcx>, + _marker: PhantomData, +} + +impl<'tcx, L> Clone for LabeledTyCtxt<'tcx, L> { + fn clone(&self) -> LabeledTyCtxt<'tcx, L> { + *self + } +} + +impl<'tcx, L> Copy for LabeledTyCtxt<'tcx, L> {} + +impl<'tcx, L: Copy> LabeledTyCtxt<'tcx, L> { + /// Build a new `LabeledTyCtxt`. The `arena` must be the same one used by the `TyCtxt` that + /// built the underlying `Ty`s to be labeled. + pub fn new(tcx: TyCtxt<'tcx>) -> LabeledTyCtxt<'tcx, L> { + LabeledTyCtxt { + tcx, + _marker: PhantomData, + } + } + + fn arena(&self) -> &'tcx DroplessArena { + &self.tcx.arena.dropless + } + + /// Manually construct a slice in the context's arena. + pub fn mk_slice(&self, ltys: &[LabeledTy<'tcx, L>]) -> &'tcx [LabeledTy<'tcx, L>] { + if ltys.is_empty() { + return &[]; + } + self.arena().alloc_slice(ltys) + } + + /// Manually construct a labeled type. Note that this does not do any checks on `args`! The + /// caller is responsible for making sure the number of arguments matches `ty.sty`. + pub fn mk( + &self, + ty: Ty<'tcx>, + args: &'tcx [LabeledTy<'tcx, L>], + label: L, + ) -> LabeledTy<'tcx, L> { + self.arena().alloc(LabeledTyS { ty, args, label }) + } + + /// Label a `Ty` using a callback. The callback runs at every type constructor to produce a + /// label for that node in the tree. + pub fn label) -> L>(&self, ty: Ty<'tcx>, f: &mut F) -> LabeledTy<'tcx, L> { + use rustc_middle::ty::TyKind::*; + let label = f(ty); + match ty.kind() { + // Types with no arguments + Bool | Char | Int(_) | Uint(_) | Float(_) | Str | Foreign(_) | Never => { + self.mk(ty, &[], label) + } + + // Types with arguments + Adt(_, substs) => { + let args = substs.types().map(|t| self.label(t, f)).collect::>(); + self.mk(ty, self.mk_slice(&args), label) + } + Array(elem, _) => { + let args = [self.label(elem, f)]; + self.mk(ty, self.mk_slice(&args), label) + } + Slice(elem) => { + let args = [self.label(elem, f)]; + self.mk(ty, self.mk_slice(&args), label) + } + RawPtr(mty) => { + let args = [self.label(mty.ty, f)]; + self.mk(ty, self.mk_slice(&args), label) + } + Ref(_, mty, _) => { + let args = [self.label(mty, f)]; + self.mk(ty, self.mk_slice(&args), label) + } + FnDef(_, substs) => { + let args = substs + .types() + .map(|ty| self.label(ty, f)) + .collect::>(); + self.mk(ty, self.mk_slice(&args), label) + } + FnPtr(ref sig) => { + let args = sig + .skip_binder() + .inputs_and_output + .iter() + .map(|ty| self.label(ty, f)) + .collect::>(); + self.mk(ty, self.mk_slice(&args), label) + } + Tuple(ref elems) => { + let args = elems + .types() + .map(|ty| self.label(ty, f)) + .collect::>(); + self.mk(ty, self.mk_slice(&args), label) + } + + // Types that aren't actually supported by this code yet + Dynamic(..) | Closure(..) | Generator(..) | GeneratorWitness(..) | Projection(..) + | Opaque(..) | Param(..) | Bound(..) | Placeholder(..) | Infer(..) | Error(..) => { + self.mk(ty, &[], label) + } + } + } + + /// Label multiple `Ty`s using a callback. + #[allow(dead_code)] + pub fn _label_slice(&self, tys: &[Ty<'tcx>], f: &mut F) -> &'tcx [LabeledTy<'tcx, L>] + where + F: FnMut(Ty<'tcx>) -> L, + { + self.mk_slice(&tys.iter().map(|ty| self.label(ty, f)).collect::>()) + } + + /// Substitute in arguments for any type parameter references (`Param`) in a labeled type. + /// Panics if `lty` contains a reference to a type parameter that is past the end of `substs` + /// (usually this means the caller is providing the wrong list of type arguments as `substs`). + /// + /// TODO: This produces a `LabeledTy` with the right structure, but doesn't actually do + /// substitution on the underlying `Ty`s! This means if you substitute `u32` for `T`, you can + /// end up with a `LabeledTy` whose `ty` is `S`, but whose args are `[u32]`. By some + /// miracle, this hasn't broken anything yet, but we may need to fix it eventually. + #[allow(dead_code)] + pub fn subst( + &self, + lty: LabeledTy<'tcx, L>, + substs: &[LabeledTy<'tcx, L>], + ) -> LabeledTy<'tcx, L> { + if let TyKind::Param(ref ty) = lty.ty.kind() { + if let Some(p) = substs.get(ty.index as usize) { + return p; + } + } + + self.mk( + lty.ty, + self.subst_slice(lty.args, substs), + lty.label.clone(), + ) + } + + /// Substitute arguments in multiple labeled types. + pub fn subst_slice( + &self, + ltys: &[LabeledTy<'tcx, L>], + substs: &[LabeledTy<'tcx, L>], + ) -> &'tcx [LabeledTy<'tcx, L>] { + self.mk_slice( + <ys + .iter() + .map(|lty| self.subst(lty, substs)) + .collect::>(), + ) + } + + /// Run a callback to replace the labels on a type. + pub fn relabel(&self, lty: LabeledTy<'tcx, L2>, func: &mut F) -> LabeledTy<'tcx, L> + where + F: FnMut(LabeledTy<'tcx, L2>) -> L, + { + let args = self.relabel_slice(lty.args, func); + self.mk(lty.ty, args, func(lty)) + } + + /// Replace the labels on several labeled types. + pub fn relabel_slice( + &self, + ltys: &[LabeledTy<'tcx, L2>], + func: &mut F, + ) -> &'tcx [LabeledTy<'tcx, L>] + where + F: FnMut(LabeledTy<'tcx, L2>) -> L, + { + let ltys = ltys + .iter() + .cloned() + .map(|lty| self.relabel(lty, func)) + .collect::>(); + self.mk_slice(<ys) + } + + /// Perform a bottom-up rewrite on a type and convert it to unlabeled form. + pub fn rewrite_unlabeled(&self, lty: LabeledTy<'tcx, L>, func: &mut F) -> Ty<'tcx> + where + F: FnMut(Ty<'tcx>, &[Ty<'tcx>], L) -> Ty<'tcx>, + { + use rustc_middle::ty::TyKind::*; + let args = lty + .args + .iter() + .map(|<y| self.rewrite_unlabeled(lty, func)) + .collect::>(); + + let ty = match *lty.ty.kind() { + Bool | Char | Int(_) | Uint(_) | Float(_) | Str | Foreign(_) | Never => lty.ty, + + Adt(adt, substs) => { + // Copy `substs`, but replace all types with those from `args`. + let mut it = args.iter().cloned(); + let substs = self + .tcx + .mk_substs(substs.iter().map(|arg| match arg.unpack() { + GenericArgKind::Type(_) => it.next().unwrap().into(), + GenericArgKind::Lifetime(rg) => GenericArg::from(rg), + GenericArgKind::Const(cn) => GenericArg::from(cn), + })); + assert!(it.next().is_none()); + self.tcx.mk_adt(adt, substs) + } + Array(_, len) => { + let &[elem]: &[_; 1] = args[..].try_into().unwrap(); + self.tcx.mk_ty(Array(elem, len)) + } + Slice(_) => { + let &[elem]: &[_; 1] = args[..].try_into().unwrap(); + self.tcx.mk_slice(elem) + } + RawPtr(mty) => { + let &[target]: &[_; 1] = args[..].try_into().unwrap(); + self.tcx.mk_ptr(TypeAndMut { + ty: target, + mutbl: mty.mutbl, + }) + } + Ref(rg, _, mutbl) => { + let &[target]: &[_; 1] = args[..].try_into().unwrap(); + self.tcx.mk_ref(rg, TypeAndMut { ty: target, mutbl }) + } + FnDef(def_id, substs) => { + // Copy `substs`, but replace all types with those from `args`. + let mut it = args.iter().cloned(); + let substs = self + .tcx + .mk_substs(substs.iter().map(|arg| match arg.unpack() { + GenericArgKind::Type(_) => it.next().unwrap().into(), + GenericArgKind::Lifetime(rg) => GenericArg::from(rg), + GenericArgKind::Const(cn) => GenericArg::from(cn), + })); + assert!(it.next().is_none()); + self.tcx.mk_fn_def(def_id, substs) + } + FnPtr(ref _sig) => { + // TODO: replace all the types under the binder + todo!() + } + Tuple(_) => self.tcx.mk_tup(args.iter().cloned()), + + // Types that aren't actually supported by this code yet + Dynamic(..) | Closure(..) | Generator(..) | GeneratorWitness(..) | Projection(..) + | Opaque(..) | Param(..) | Bound(..) | Placeholder(..) | Infer(..) | Error(..) => { + lty.ty + } + }; + + func(ty, &args, lty.label) + } +} + +impl<'tcx, L> Deref for LabeledTyCtxt<'tcx, L> { + type Target = TyCtxt<'tcx>; + fn deref(&self) -> &TyCtxt<'tcx> { + &self.tcx + } +} diff --git a/c2rust-analyze/src/main.rs b/c2rust-analyze/src/main.rs new file mode 100644 index 0000000000..761fcb0191 --- /dev/null +++ b/c2rust-analyze/src/main.rs @@ -0,0 +1,195 @@ +#![feature(rustc_private)] +extern crate either; +extern crate rustc_arena; +extern crate rustc_ast; +extern crate rustc_data_structures; +extern crate rustc_driver; +extern crate rustc_hir; +extern crate rustc_index; +extern crate rustc_interface; +extern crate rustc_middle; +extern crate rustc_mir_build; +extern crate rustc_session; +extern crate rustc_span; +extern crate rustc_target; + +use crate::context::{AnalysisCtxt, FlagSet, LTy, PermissionSet, PointerId}; +use rustc_index::vec::IndexVec; +use rustc_middle::mir::{BindingForm, Body, LocalDecl, LocalInfo}; +use rustc_middle::ty::query::{ExternProviders, Providers}; +use rustc_middle::ty::{Ty, TyCtxt, TyKind, WithOptConstParam}; +use rustc_session::Session; +use rustc_span::def_id::LocalDefId; +use rustc_span::Span; +use std::env; + +mod borrowck; +mod context; +mod dataflow; +mod expr_rewrite; +mod labeled_ty; +mod type_desc; +mod util; + +fn inspect_mir<'tcx>(tcx: TyCtxt<'tcx>, def: WithOptConstParam, mir: &Body<'tcx>) { + let name = tcx.item_name(def.to_global().did); + eprintln!("\nprocessing function {:?}", name); + + let mut acx = AnalysisCtxt::new(tcx); + + // Label all pointers in local variables. + // TODO: also label pointers in Rvalue::Cast (and ShallowInitBox?) + assert!(acx.local_tys.len() == 0); + acx.local_tys = IndexVec::with_capacity(mir.local_decls.len()); + for (local, decl) in mir.local_decls.iter_enumerated() { + let lty = assign_pointer_ids(&acx, decl.ty); + let l = acx.local_tys.push(lty); + assert_eq!(local, l); + + let ptr = acx.new_pointer(); + let l = acx.addr_of_local.push(ptr); + assert_eq!(local, l); + } + + let dataflow = self::dataflow::generate_constraints(&acx, mir); + + let mut hypothesis = Vec::with_capacity(acx.num_pointers()); + for _ in 0..acx.num_pointers() { + hypothesis.push(PermissionSet::UNIQUE); + } + dataflow.propagate(&mut hypothesis); + + borrowck::borrowck_mir(&acx, &dataflow, &mut hypothesis, name.as_str(), mir); + + let mut flags = vec![FlagSet::empty(); acx.num_pointers()]; + dataflow.propagate_cell(&hypothesis, &mut flags); + + eprintln!("final labeling for {:?}:", name); + let lcx1 = crate::labeled_ty::LabeledTyCtxt::new(acx.tcx); + let lcx2 = crate::labeled_ty::LabeledTyCtxt::new(acx.tcx); + for (local, decl) in mir.local_decls.iter_enumerated() { + let addr_of1 = hypothesis[acx.addr_of_local[local].index()]; + let ty1 = lcx1.relabel(acx.local_tys[local], &mut |lty| { + if lty.label == PointerId::NONE { + PermissionSet::empty() + } else { + hypothesis[lty.label.index()] + } + }); + eprintln!( + "{:?} ({}): addr_of = {:?}, type = {:?}", + local, + describe_local(acx.tcx, decl), + addr_of1, + ty1, + ); + + let addr_of2 = flags[acx.addr_of_local[local].index()]; + let ty2 = lcx2.relabel(acx.local_tys[local], &mut |lty| { + if lty.label == PointerId::NONE { + FlagSet::empty() + } else { + flags[lty.label.index()] + } + }); + eprintln!( + "{:?} ({}): addr_of flags = {:?}, type flags = {:?}", + local, + describe_local(acx.tcx, decl), + addr_of2, + ty2, + ); + } + + eprintln!("\ntype assignment for {:?}:", name); + for (local, decl) in mir.local_decls.iter_enumerated() { + // TODO: apply `Cell` if `addr_of_local` indicates it's needed + let ty = type_desc::convert_type(&acx, acx.local_tys[local], &hypothesis, &flags); + eprintln!("{:?} ({}): {:?}", local, describe_local(acx.tcx, decl), ty,); + } + + eprintln!(""); + let rewrites = expr_rewrite::gen_expr_rewrites(&acx, &hypothesis, &flags, mir); + for rw in &rewrites { + eprintln!( + "at {:?} ({}, {:?}):", + rw.loc.stmt, + describe_span(tcx, rw.loc.span), + rw.loc.sub, + ); + for kind in &rw.kinds { + eprintln!(" {:?}", kind); + } + } +} + +fn assign_pointer_ids<'tcx>(acx: &AnalysisCtxt<'tcx>, ty: Ty<'tcx>) -> LTy<'tcx> { + acx.lcx.label(ty, &mut |ty| match ty.kind() { + TyKind::Ref(_, _, _) | TyKind::RawPtr(_) => acx.new_pointer(), + _ => PointerId::NONE, + }) +} + +fn describe_local(tcx: TyCtxt, decl: &LocalDecl) -> String { + let mut span = decl.source_info.span; + if let Some(ref info) = decl.local_info { + if let LocalInfo::User(ref binding_form) = **info { + let binding_form = binding_form.as_ref().assert_crate_local(); + if let BindingForm::Var(ref v) = *binding_form { + span = v.pat_span; + } + } + } + describe_span(tcx, span) +} + +fn describe_span(tcx: TyCtxt, span: Span) -> String { + let s = tcx.sess.source_map().span_to_snippet(span).unwrap(); + let s = { + let mut s2 = String::new(); + for word in s.split_ascii_whitespace() { + if s2.len() > 0 { + s2.push(' '); + } + s2.push_str(word); + } + s2 + }; + + let (src1, src2, src3) = if s.len() > 20 { + (&s[..15], " ... ", &s[s.len() - 5..]) + } else { + (&s[..], "", "") + }; + let line = tcx.sess.source_map().lookup_char_pos(span.lo()).line; + format!("{}: {}{}{}", line, src1, src2, src3) +} + +struct AnalysisCallbacks; + +impl rustc_driver::Callbacks for AnalysisCallbacks { + fn config(&mut self, config: &mut rustc_interface::Config) { + config.override_queries = Some(override_queries); + } +} + +fn override_queries( + _sess: &Session, + providers: &mut Providers, + _extern_providers: &mut ExternProviders, +) { + providers.mir_built = |tcx, def: WithOptConstParam| { + let mut providers = Providers::default(); + rustc_mir_build::provide(&mut providers); + let steal_mir = (providers.mir_built)(tcx, def); + + inspect_mir(tcx, def, &steal_mir.borrow()); + + steal_mir + }; +} + +fn main() -> rustc_interface::interface::Result<()> { + let args = env::args().collect::>(); + rustc_driver::RunCompiler::new(&args, &mut AnalysisCallbacks).run() +} diff --git a/c2rust-analyze/src/type_desc.rs b/c2rust-analyze/src/type_desc.rs new file mode 100644 index 0000000000..daf12963c0 --- /dev/null +++ b/c2rust-analyze/src/type_desc.rs @@ -0,0 +1,132 @@ +use crate::context::{AnalysisCtxt, FlagSet, LTy, PermissionSet, PointerId}; +use rustc_hir::def::{DefKind, Res}; +use rustc_middle::ty::subst::GenericArg; +use rustc_middle::ty::{ReErased, Ty, TyCtxt}; + +#[allow(dead_code)] +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +pub enum Ownership { + /// E.g. `*const T` + Raw, + /// E.g. `*mut T` + RawMut, + /// E.g. `&T` + Imm, + /// E.g. `&Cell` + Cell, + /// E.g. `&mut T` + Mut, + /// E.g. `Rc` + Rc, + /// E.g. `Box` + Box, +} + +#[allow(dead_code)] +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +pub enum Quantity { + /// E.g. `&T` + Single, + /// E.g. `&[T]` + Slice, + /// E.g. `OffsetPtr` + OffsetPtr, +} + +pub fn perms_to_desc(perms: PermissionSet, flags: FlagSet) -> (Ownership, Quantity) { + let own = if perms.contains(PermissionSet::UNIQUE | PermissionSet::WRITE) { + Ownership::Mut + } else if flags.contains(FlagSet::CELL) { + Ownership::Cell + } else { + // Anything with WRITE and not UNIQUE should have CELL set, and use the previous case. + assert!(!perms.contains(PermissionSet::WRITE)); + Ownership::Imm + }; + + let qty = if perms.contains(PermissionSet::OFFSET_SUB) { + // TODO: should be Quantity::OffsetPtr, but that's not implemented yet + Quantity::Slice + } else if perms.contains(PermissionSet::OFFSET_ADD) { + Quantity::Slice + } else { + Quantity::Single + }; + + (own, qty) +} + +fn mk_cell<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Ty<'tcx> { + let core_crate = tcx + .crates(()) + .iter() + .cloned() + .find(|&krate| tcx.crate_name(krate).as_str() == "core") + .expect("failed to find crate `core`"); + + let cell_mod_child = tcx + .module_children(core_crate.as_def_id()) + .iter() + .find(|child| child.ident.as_str() == "cell") + .expect("failed to find module `core::cell`"); + let cell_mod = match cell_mod_child.res { + Res::Def(DefKind::Mod, did) => did, + ref r => panic!("unexpected resolution {:?} for `core::cell`", r), + }; + + let cell_struct_child = tcx + .module_children(cell_mod) + .iter() + .find(|child| child.ident.as_str() == "Cell") + .expect("failed to find struct `core::cell::Cell`"); + let cell_struct = match cell_struct_child.res { + Res::Def(DefKind::Struct, did) => did, + ref r => panic!("unexpected resolution {:?} for `core::cell::Cell`", r), + }; + + let cell_adt = tcx.adt_def(cell_struct); + let substs = tcx.mk_substs([GenericArg::from(ty)].into_iter()); + tcx.mk_adt(cell_adt, substs) +} + +pub fn convert_type<'tcx>( + acx: &AnalysisCtxt<'tcx>, + lty: LTy<'tcx>, + perms: &[PermissionSet], + flags: &[FlagSet], +) -> Ty<'tcx> { + let tcx = acx.tcx; + acx.lcx.rewrite_unlabeled(lty, &mut |ty, args, label| { + if label == PointerId::NONE { + return ty; + } + let ptr = label; + + let (own, qty) = perms_to_desc(perms[ptr.index()], flags[ptr.index()]); + + assert_eq!(args.len(), 1); + let mut ty = args[0]; + + if own == Ownership::Cell { + ty = mk_cell(tcx, ty); + } + + ty = match qty { + Quantity::Single => ty, + Quantity::Slice => tcx.mk_slice(ty), + Quantity::OffsetPtr => todo!(), + }; + + ty = match own { + Ownership::Raw => tcx.mk_imm_ptr(ty), + Ownership::RawMut => tcx.mk_mut_ptr(ty), + Ownership::Imm => tcx.mk_imm_ref(tcx.mk_region(ReErased), ty), + Ownership::Cell => tcx.mk_imm_ref(tcx.mk_region(ReErased), ty), + Ownership::Mut => tcx.mk_mut_ref(tcx.mk_region(ReErased), ty), + Ownership::Rc => todo!(), + Ownership::Box => todo!(), + }; + + ty + }) +} diff --git a/c2rust-analyze/src/util.rs b/c2rust-analyze/src/util.rs new file mode 100644 index 0000000000..0780520905 --- /dev/null +++ b/c2rust-analyze/src/util.rs @@ -0,0 +1,89 @@ +use rustc_hir::def::DefKind; +use rustc_middle::mir::{Local, Mutability, Operand, PlaceElem, PlaceRef, Rvalue}; +use rustc_middle::ty::{DefIdTree, Ty, TyCtxt, TyKind}; + +#[derive(Debug)] +pub enum RvalueDesc<'tcx> { + Project { + base: PlaceRef<'tcx>, + proj: &'tcx [PlaceElem<'tcx>], + }, + AddrOfLocal { + local: Local, + proj: &'tcx [PlaceElem<'tcx>], + }, +} + +pub fn describe_rvalue<'tcx>(rv: &Rvalue<'tcx>) -> Option> { + Some(match *rv { + Rvalue::Use(ref op) => match *op { + Operand::Move(pl) | Operand::Copy(pl) => RvalueDesc::Project { + base: pl.as_ref(), + proj: &[], + }, + Operand::Constant(_) => return None, + }, + Rvalue::Ref(_, _, pl) | Rvalue::AddressOf(_, pl) => { + let projection = &pl.projection[..]; + match projection + .iter() + .rposition(|p| matches!(p, PlaceElem::Deref)) + { + Some(i) => { + // `i` is the index of the last `ProjectionElem::Deref` in `pl`. + RvalueDesc::Project { + base: PlaceRef { + local: pl.local, + projection: &projection[..i], + }, + proj: &projection[i + 1..], + } + } + None => { + // `pl` refers to a field/element of a local. + RvalueDesc::AddrOfLocal { + local: pl.local, + proj: projection, + } + } + } + } + _ => return None, + }) +} + +#[derive(Debug)] +pub enum Callee<'tcx> { + PtrOffset { + pointee_ty: Ty<'tcx>, + mutbl: Mutability, + }, +} + +pub fn ty_callee<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Option> { + let (did, _substs) = match *ty.kind() { + TyKind::FnDef(did, substs) => (did, substs), + _ => return None, + }; + let name = tcx.item_name(did); + + match name.as_str() { + "offset" => { + // The `offset` inherent method of `*const T` and `*mut T`. + let parent_did = tcx.parent(did)?; + if tcx.def_kind(parent_did) != DefKind::Impl { + return None; + } + if tcx.impl_trait_ref(parent_did).is_some() { + return None; + } + let parent_impl_ty = tcx.type_of(parent_did); + let (pointee_ty, mutbl) = match parent_impl_ty.kind() { + TyKind::RawPtr(tm) => (tm.ty, tm.mutbl), + _ => return None, + }; + Some(Callee::PtrOffset { pointee_ty, mutbl }) + } + _ => None, + } +} diff --git a/c2rust-analyze/tests/filecheck.rs b/c2rust-analyze/tests/filecheck.rs new file mode 100644 index 0000000000..a8b0631414 --- /dev/null +++ b/c2rust-analyze/tests/filecheck.rs @@ -0,0 +1,103 @@ +use std::env; +use std::fs; +use std::os::unix::io::{AsRawFd, FromRawFd}; +use std::process::{Command, Stdio}; + +fn detect_filecheck() -> Option<&'static str> { + let candidates = [ + "FileCheck", + "/usr/local/opt/llvm/bin/FileCheck", + "FileCheck-14", + "/usr/local/opt/llvm@14/bin/FileCheck", + "FileCheck-13", + "/usr/local/opt/llvm@13/bin/FileCheck", + "FileCheck-12", + "/usr/local/opt/llvm@12/bin/FileCheck", + "FileCheck-11", + "/usr/local/opt/llvm@11/bin/FileCheck", + "FileCheck-10", + "/usr/local/opt/llvm@10/bin/FileCheck", + "FileCheck-9", + "/usr/local/opt/llvm@9/bin/FileCheck", + "FileCheck-8", + "/usr/local/opt/llvm@8/bin/FileCheck", + "FileCheck-7", + "FileCheck-7.0", + "/usr/local/opt/llvm@7/bin/FileCheck", + ]; + + for filecheck in candidates { + let result = Command::new(filecheck) + .arg("--version") + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); + if result.is_ok() { + return Some(filecheck); + } + } + None +} + +#[test] +fn filecheck() { + let lib_dir = env::var("C2RUST_TARGET_LIB_DIR").unwrap(); + let lib_dir = &lib_dir; + + let filecheck_bin = env::var("FILECHECK") + .ok() + .or_else(|| detect_filecheck().map(|s| s.to_owned())) + .unwrap_or_else(|| panic!("FileCheck not found - set FILECHECK=/path/to/FileCheck")); + eprintln!("detected FILECHECK={}", filecheck_bin); + + for entry in fs::read_dir("tests/filecheck").unwrap() { + let entry = entry.unwrap(); + + if !entry.file_type().unwrap().is_file() { + continue; + } + + let name = entry.file_name(); + let name = name.to_str().unwrap(); + if name.starts_with(".") || !name.ends_with(".rs") { + continue; + } + + eprintln!("{:?}", entry.path()); + + let mut filecheck_cmd = Command::new(&filecheck_bin); + filecheck_cmd.arg(entry.path()).stdin(Stdio::piped()); + let mut filecheck = filecheck_cmd.spawn().unwrap(); + let pipe_fd = filecheck.stdin.as_ref().unwrap().as_raw_fd(); + let mut analyze_cmd = Command::new("cargo"); + analyze_cmd + .arg("run") + .arg("--manifest-path") + .arg(format!("{}/Cargo.toml", env!("CARGO_MANIFEST_DIR"))) + .arg("--") + .arg(entry.path()) + .arg("-L") + .arg(lib_dir) + .arg("--crate-type") + .arg("rlib") + .stdout(unsafe { Stdio::from_raw_fd(pipe_fd) }) + .stderr(unsafe { Stdio::from_raw_fd(pipe_fd) }); + let mut analyze = analyze_cmd.spawn().unwrap(); + + let filecheck_status = filecheck.wait().unwrap(); + assert!( + filecheck_status.success(), + "{:?}: FileCheck failed with status {:?}", + entry.path(), + filecheck_status, + ); + + let analyze_status = analyze.wait().unwrap(); + assert!( + analyze_status.success(), + "{:?}: c2rust-analyze failed with status {:?}", + entry.path(), + analyze_status, + ); + } +} diff --git a/c2rust-analyze/tests/filecheck/alias1.rs b/c2rust-analyze/tests/filecheck/alias1.rs new file mode 100644 index 0000000000..4eac063c91 --- /dev/null +++ b/c2rust-analyze/tests/filecheck/alias1.rs @@ -0,0 +1,44 @@ +use std::ptr; + +// CHECK-LABEL: final labeling for "alias1_good" +pub unsafe fn alias1_good() { + // CHECK-DAG: ([[#@LINE+1]]: mut x): addr_of = READ | WRITE | UNIQUE, + let mut x = 0; + // CHECK-DAG: ([[#@LINE+1]]: p): {{.*}}type = UNIQUE# + let p = ptr::addr_of_mut!(x); + // CHECK-DAG: ([[#@LINE+1]]: q): {{.*}}type = READ | WRITE | UNIQUE# + let q = ptr::addr_of_mut!(x); + *q = 1; +} + +// CHECK-LABEL: final labeling for "alias1_bad" +pub unsafe fn alias1_bad() { + // CHECK-DAG: ([[#@LINE+2]]: mut x): addr_of = READ | WRITE, + // CHECK-DAG: ([[#@LINE+1]]: mut x): addr_of flags = CELL, + let mut x = 0; + // CHECK-DAG: ([[#@LINE+1]]: p): {{.*}}type = READ | WRITE# + let p = ptr::addr_of_mut!(x); + // CHECK-DAG: ([[#@LINE+2]]: q): {{.*}}type = (empty)# + // CHECK-DAG: ([[#@LINE+1]]: q): {{.*}}type flags = CELL# + let q = ptr::addr_of_mut!(x); + *p = 1; +} + + +// The safe versions of these functions are useful for debugging Polonius fact generation, but +// aren't checked when running tests. +#[cfg(debug_polonius_facts)] +pub fn safe_alias1_good() { + let mut x = 0; + let p = &mut x; + let q = &mut x; + *q = 1; +} + +#[cfg(debug_polonius_facts)] +pub fn safe_alias1_bad() { + let mut x = 0; + let p = &mut x; + let q = &mut x; + *p = 1; +} diff --git a/c2rust-analyze/tests/filecheck/alias2.rs b/c2rust-analyze/tests/filecheck/alias2.rs new file mode 100644 index 0000000000..b95cc129c6 --- /dev/null +++ b/c2rust-analyze/tests/filecheck/alias2.rs @@ -0,0 +1,72 @@ +use std::ptr; + +// CHECK-LABEL: final labeling for "alias2_copy_good" +// CHECK-DAG: ([[#@LINE+1]]: x): {{.*}}type = READ | WRITE | UNIQUE# +pub unsafe fn alias2_copy_good(x: *mut i32) { + // CHECK-DAG: ([[#@LINE+1]]: p): {{.*}}type = UNIQUE# + let p = x; + // CHECK-DAG: ([[#@LINE+1]]: q): {{.*}}type = READ | WRITE | UNIQUE# + let q = x; + *q = 1; +} + +// CHECK-LABEL: final labeling for "alias2_addr_of_good" +// CHECK-DAG: ([[#@LINE+1]]: x): {{.*}}type = READ | WRITE | UNIQUE# +pub unsafe fn alias2_addr_of_good(x: *mut i32) { + // CHECK-DAG: ([[#@LINE+1]]: p): {{.*}}type = UNIQUE# + let p = ptr::addr_of_mut!(*x); + // CHECK-DAG: ([[#@LINE+1]]: q): {{.*}}type = READ | WRITE | UNIQUE# + let q = ptr::addr_of_mut!(*x); + *q = 1; +} + +// CHECK-LABEL: final labeling for "alias2_copy_bad" +// CHECK-DAG: ([[#@LINE+2]]: x): {{.*}}type = READ | WRITE# +// CHECK-DAG: ([[#@LINE+1]]: x): {{.*}}type flags = CELL# +pub unsafe fn alias2_copy_bad(x: *mut i32) { + // CHECK-DAG: ([[#@LINE+1]]: p): {{.*}}type = READ | WRITE# + let p = x; + // CHECK-DAG: ([[#@LINE+1]]: q): {{.*}}type = (empty)# + let q = x; + *p = 1; +} + +// CHECK-LABEL: final labeling for "alias2_addr_of_bad" +// CHECK-DAG: ([[#@LINE+2]]: x): {{.*}}type = READ | WRITE# +// CHECK-DAG: ([[#@LINE+1]]: x): {{.*}}type flags = CELL# +pub unsafe fn alias2_addr_of_bad(x: *mut i32) { + // CHECK-DAG: ([[#@LINE+1]]: p): {{.*}}type = READ | WRITE# + let p = ptr::addr_of_mut!(*x); + // CHECK-DAG: ([[#@LINE+1]]: q): {{.*}}type = (empty)# + let q = ptr::addr_of_mut!(*x); + *p = 1; +} + + +#[cfg(debug_polonius_facts)] +pub unsafe fn safe_alias2_copy_good(x: &mut i32) { + let p = x; + let q = x; + *q = 1; +} + +#[cfg(debug_polonius_facts)] +pub unsafe fn safe_alias2_addr_of_good(x: &mut i32) { + let p = &mut *x; + let q = &mut *x; + *q = 1; +} + +#[cfg(debug_polonius_facts)] +pub unsafe fn safe_alias2_copy_bad(x: &mut i32) { + let p = x; + let q = x; + *p = 1; +} + +#[cfg(debug_polonius_facts)] +pub unsafe fn safe_alias2_addr_of_bad(x: &mut i32) { + let p = &mut *x; + let q = &mut *x; + *p = 1; +} diff --git a/c2rust-analyze/tests/filecheck/alias3.rs b/c2rust-analyze/tests/filecheck/alias3.rs new file mode 100644 index 0000000000..a56d36aa39 --- /dev/null +++ b/c2rust-analyze/tests/filecheck/alias3.rs @@ -0,0 +1,41 @@ +use std::ptr; + +// CHECK-LABEL: final labeling for "alias3_copy_bad1" +// CHECK-DAG: ([[#@LINE+2]]: x): {{.*}}type = READ | WRITE# +// CHECK-DAG: ([[#@LINE+1]]: x): {{.*}}type flags = CELL# +pub unsafe fn alias3_copy_bad1(x: *mut i32) { + // CHECK-DAG: ([[#@LINE+2]]: p): {{.*}}type = READ# + // CHECK-DAG: ([[#@LINE+1]]: p): {{.*}}type flags = CELL# + let p = x; + // CHECK-DAG: ([[#@LINE+2]]: q): {{.*}}type = READ | WRITE# + // CHECK-DAG: ([[#@LINE+1]]: q): {{.*}}type flags = CELL# + let q = x; + *q = *p; +} + +// CHECK-LABEL: final labeling for "alias3_copy_bad2" +// CHECK-DAG: ([[#@LINE+2]]: x): {{.*}}type = READ | WRITE# +// CHECK-DAG: ([[#@LINE+1]]: x): {{.*}}type flags = CELL# +pub unsafe fn alias3_copy_bad2(x: *mut i32) { + // CHECK-DAG: ([[#@LINE+2]]: p): {{.*}}type = READ | WRITE# + // CHECK-DAG: ([[#@LINE+1]]: p): {{.*}}type flags = CELL# + let p = x; + // CHECK-DAG: ([[#@LINE+2]]: q): {{.*}}type = READ# + // CHECK-DAG: ([[#@LINE+1]]: q): {{.*}}type flags = CELL# + let q = x; + *p = *q; +} + +#[cfg(debug_polonius_facts)] +pub unsafe fn alias3_addr_of_bad1(x: *mut i32) { + let p = ptr::addr_of_mut!(*x); + let q = ptr::addr_of_mut!(*x); + *q = *p; +} + +#[cfg(debug_polonius_facts)] +pub unsafe fn alias3_addr_of_bad2(x: *mut i32) { + let p = ptr::addr_of_mut!(*x); + let q = ptr::addr_of_mut!(*x); + *p = *q; +} diff --git a/c2rust-analyze/tests/filecheck/insertion_sort.rs b/c2rust-analyze/tests/filecheck/insertion_sort.rs new file mode 100644 index 0000000000..a7c1d93cfe --- /dev/null +++ b/c2rust-analyze/tests/filecheck/insertion_sort.rs @@ -0,0 +1,32 @@ +#![allow(dead_code, mutable_transmutes, non_camel_case_types, non_snake_case, + non_upper_case_globals, unused_assignments, unused_mut)] +#![feature(rustc_private)] + +extern crate libc; + +#[no_mangle] +// CHECK-LABEL: final labeling for "insertion_sort" +// CHECK-DAG: ([[#@LINE+1]]: p): {{.*}}type = READ | WRITE | UNIQUE | OFFSET_ADD | OFFSET_SUB# +pub unsafe extern "C" fn insertion_sort(n: libc::c_int, p: *mut libc::c_int) { + let mut i: libc::c_int = 1 as libc::c_int; + while i < n { + // CHECK-DAG: ([[#@LINE+2]]: p): {{.*}}type = READ | UNIQUE | OFFSET_ADD | OFFSET_SUB# + // CHECK-DAG: ([[#@LINE+1]]: p.offset(i as isize)): {{.*}}type = READ | UNIQUE# + let tmp: libc::c_int = *p.offset(i as isize); + let mut j: libc::c_int = i; + // CHECK-DAG: ([[#@LINE+2]]: p): {{.*}}type = READ | UNIQUE | OFFSET_ADD | OFFSET_SUB# + // CHECK-DAG: ([[#@LINE+1]]: p.offset{{.*}}): {{.*}}type = READ | UNIQUE# + while j > 0 as libc::c_int && *p.offset((j - 1 as libc::c_int) as isize) > tmp { + // CHECK-DAG: ([[#@LINE+4]]: p): {{.*}}type = READ | UNIQUE | OFFSET_ADD | OFFSET_SUB# + // CHECK-DAG: ([[#@LINE+3]]: p): {{.*}}type = READ | WRITE | UNIQUE | OFFSET_ADD | OFFSET_SUB# + // CHECK-DAG: ([[#@LINE+2]]: p.offset((j {{.*}}): {{.*}}type = READ | UNIQUE# + // CHECK-DAG: ([[#@LINE+1]]: p.offset(j {{.*}}): {{.*}}type = READ | WRITE | UNIQUE# + *p.offset(j as isize) = *p.offset((j - 1 as libc::c_int) as isize); + j -= 1 + } + // CHECK-DAG: ([[#@LINE+2]]: p): {{.*}}type = READ | WRITE | UNIQUE | OFFSET_ADD | OFFSET_SUB# + // CHECK-DAG: ([[#@LINE+1]]: p.offset(j {{.*}}): {{.*}}type = READ | WRITE | UNIQUE# + *p.offset(j as isize) = tmp; + i += 1 + } +} diff --git a/c2rust-analyze/tests/filecheck/offset1.rs b/c2rust-analyze/tests/filecheck/offset1.rs new file mode 100644 index 0000000000..4ee614e9b1 --- /dev/null +++ b/c2rust-analyze/tests/filecheck/offset1.rs @@ -0,0 +1,40 @@ +use std::ptr; + +// CHECK-LABEL: final labeling for "offset1_const" +// CHECK-DAG: ([[#@LINE+1]]: x): {{.*}}type = READ | UNIQUE | OFFSET_ADD | OFFSET_SUB# +pub unsafe fn offset1_const(x: *mut i32) -> i32 { + // CHECK-DAG: ([[#@LINE+2]]: x): {{.*}}type = READ | UNIQUE | OFFSET_ADD | OFFSET_SUB# + // CHECK-DAG: ([[#@LINE+1]]: x.offset(1)): {{.*}}type = READ | UNIQUE# + *x.offset(1) +} + +// CHECK-LABEL: final labeling for "offset1_unknown" +// CHECK-DAG: ([[#@LINE+1]]: x): {{.*}}type = READ | UNIQUE | OFFSET_ADD | OFFSET_SUB# +pub unsafe fn offset1_unknown(x: *mut i32, off: isize) -> i32 { + // CHECK-DAG: ([[#@LINE+2]]: x): {{.*}}type = READ | UNIQUE | OFFSET_ADD | OFFSET_SUB# + // CHECK-DAG: ([[#@LINE+1]]: x.offset(off)): {{.*}}type = READ | UNIQUE# + *x.offset(off) +} + +/* +pub unsafe fn offset1_usize(x: *mut i32, off: usize) -> i32 { + *x.offset(off as isize) +} +*/ + +// CHECK-LABEL: final labeling for "offset1_immut" +// CHECK-DAG: ([[#@LINE+1]]: x): {{.*}}type = READ | UNIQUE | OFFSET_ADD | OFFSET_SUB# +pub unsafe fn offset1_immut(x: *const i32, off: isize) -> i32 { + // CHECK-DAG: ([[#@LINE+2]]: x): {{.*}}type = READ | UNIQUE | OFFSET_ADD | OFFSET_SUB# + // CHECK-DAG: ([[#@LINE+1]]: x.offset(off)): {{.*}}type = READ | UNIQUE# + *x.offset(off) +} + +// CHECK-LABEL: final labeling for "offset1_double" +// CHECK-DAG: ([[#@LINE+1]]: x): {{.*}}type = READ | UNIQUE | OFFSET_ADD | OFFSET_SUB# +pub unsafe fn offset1_double(x: *mut i32, off: isize) -> i32 { + // CHECK-DAG: ([[#@LINE+3]]: x): {{.*}}type = READ | UNIQUE | OFFSET_ADD | OFFSET_SUB# + // CHECK-DAG: ([[#@LINE+2]]: x.offset(off)): {{.*}}type = READ | UNIQUE | OFFSET_ADD | OFFSET_SUB# + // CHECK-DAG: ([[#@LINE+1]]: x.offset{{.*}}...{{.*}}): {{.*}}type = READ | UNIQUE# + *x.offset(off).offset(off) +} diff --git a/c2rust-analyze/tests/filecheck/offset2.rs b/c2rust-analyze/tests/filecheck/offset2.rs new file mode 100644 index 0000000000..df2d1c93c1 --- /dev/null +++ b/c2rust-analyze/tests/filecheck/offset2.rs @@ -0,0 +1,25 @@ +use std::ptr; + +// CHECK-LABEL: final labeling for "offset2_good" +// CHECK-DAG: ([[#@LINE+1]]: x): {{.*}}type = READ | WRITE | UNIQUE | OFFSET_ADD | OFFSET_SUB# +pub unsafe fn offset2_good(x: *mut i32, off: isize) { + // CHECK-DAG: ([[#@LINE+2]]: x): {{.*}}type = UNIQUE | OFFSET_ADD | OFFSET_SUB# + // CHECK-DAG: ([[#@LINE+1]]: p): {{.*}}type = UNIQUE# + let p = x.offset(off); + // CHECK-DAG: ([[#@LINE+2]]: x): {{.*}}type = READ | WRITE | UNIQUE | OFFSET_ADD | OFFSET_SUB# + // CHECK-DAG: ([[#@LINE+1]]: q): {{.*}}type = READ | WRITE | UNIQUE# + let q = x.offset(off); + *q = 1; +} + +// CHECK-LABEL: final labeling for "offset2_bad" +// CHECK-DAG: ([[#@LINE+1]]: x): {{.*}}type = READ | WRITE | OFFSET_ADD | OFFSET_SUB# +pub unsafe fn offset2_bad(x: *mut i32, off: isize) { + // CHECK-DAG: ([[#@LINE+2]]: x): {{.*}}type = READ | WRITE | OFFSET_ADD | OFFSET_SUB# + // CHECK-DAG: ([[#@LINE+1]]: p): {{.*}}type = READ | WRITE# + let p = x.offset(off); + // CHECK-DAG: ([[#@LINE+2]]: x): {{.*}}type = OFFSET_ADD | OFFSET_SUB# + // CHECK-DAG: ([[#@LINE+1]]: q): {{.*}}type = (empty)# + let q = x.offset(off); + *p = 1; +}