Skip to content

Commit 61269de

Browse files
celinvalCarolyn Zech
authored andcommitted
Analyze unsafe code reachability
Add callgraph analysis to scanner in order to find the distance between functions in a crate and unsafe functions. For that, we build the crate call graph and collect the unsafe functions. After that, do reverse BFS traversal from the unsafe functions and store the distance to other functions. The result is stored in a new csv file.
1 parent a13041b commit 61269de

File tree

6 files changed

+194
-19
lines changed

6 files changed

+194
-19
lines changed
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
5 test_scan_fn_loops.csv
2-
19 test_scan_functions.csv
2+
20 test_scan_functions.csv
33
5 test_scan_input_tys.csv
44
16 test_scan_overall.csv
55
3 test_scan_recursion.csv
6-
5 test_scan_unsafe_ops.csv
6+
6 test_scan_unsafe_ops.csv

tests/script-based-pre/tool-scanner/scanner-test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,4 @@ cargo run -p scanner test.rs --crate-type lib
1717
wc -l *csv
1818

1919
popd
20-
rm -rf ${OUT_DIR}
20+
#rm -rf ${OUT_DIR}

tests/script-based-pre/tool-scanner/test.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ pub fn generic<T: Default>() -> T {
1414
T::default()
1515
}
1616

17+
pub fn blah() {
18+
ok();
19+
assert_eq!(u8::default(), 0);
20+
}
21+
1722
pub struct RecursiveType {
1823
pub inner: Option<*const RecursiveType>,
1924
}
@@ -102,3 +107,11 @@ pub fn start_recursion() {
102107
pub fn not_recursive() {
103108
let _ = ok();
104109
}
110+
111+
extern "C" {
112+
fn external_function();
113+
}
114+
115+
pub fn call_external() {
116+
unsafe { external_function() };
117+
}

tools/scanner/src/analysis.rs

Lines changed: 69 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@ use serde::{Serialize, Serializer, ser::SerializeStruct};
1111
use stable_mir::mir::mono::Instance;
1212
use stable_mir::mir::visit::{Location, PlaceContext, PlaceRef};
1313
use stable_mir::mir::{
14-
BasicBlock, Body, MirVisitor, Mutability, ProjectionElem, Safety, Terminator, TerminatorKind,
14+
BasicBlock, Body, CastKind, MirVisitor, Mutability, NonDivergingIntrinsic, ProjectionElem,
15+
Rvalue, Safety, Statement, StatementKind, Terminator, TerminatorKind,
1516
};
16-
use stable_mir::ty::{AdtDef, AdtKind, FnDef, GenericArgs, MirConst, RigidTy, Ty, TyKind};
17+
use stable_mir::ty::{Abi, AdtDef, AdtKind, FnDef, GenericArgs, MirConst, RigidTy, Ty, TyKind};
1718
use stable_mir::visitor::{Visitable, Visitor};
1819
use stable_mir::{CrateDef, CrateItem};
1920
use std::collections::{HashMap, HashSet};
@@ -23,7 +24,7 @@ use std::path::{Path, PathBuf};
2324
#[derive(Clone, Debug)]
2425
pub struct OverallStats {
2526
/// The key and value of each counter.
26-
counters: Vec<(&'static str, usize)>,
27+
pub counters: Vec<(&'static str, usize)>,
2728
/// TODO: Group stats per function.
2829
fn_stats: HashMap<CrateItem, FnStats>,
2930
}
@@ -35,6 +36,12 @@ struct FnStats {
3536
has_unsafe_ops: Option<bool>,
3637
has_unsupported_input: Option<bool>,
3738
has_loop_or_iterator: Option<bool>,
39+
/// How many degrees of separation to unsafe code if any?
40+
/// - `None` if this function is indeed safe.
41+
/// - 0 if this function contains unsafe code (including invoking unsafe fns).
42+
/// - 1 if this function calls a safe abstraction.
43+
/// - 2+ if this function calls other functions that call safe abstractions.
44+
unsafe_distance: Option<usize>,
3845
}
3946

4047
impl FnStats {
@@ -45,6 +52,7 @@ impl FnStats {
4552
has_unsafe_ops: None,
4653
has_unsupported_input: None,
4754
has_loop_or_iterator: None,
55+
unsafe_distance: None,
4856
}
4957
}
5058
}
@@ -232,24 +240,24 @@ impl OverallStats {
232240

233241
macro_rules! fn_props {
234242
($(#[$attr:meta])*
235-
struct $name:ident {
243+
$vis:vis struct $name:ident {
236244
$(
237245
$(#[$prop_attr:meta])*
238246
$prop:ident,
239247
)+
240248
}) => {
241249
#[derive(Debug)]
242-
struct $name {
250+
$vis struct $name {
243251
fn_name: String,
244252
$($(#[$prop_attr])* $prop: usize,)+
245253
}
246254

247255
impl $name {
248-
const fn num_props() -> usize {
256+
pub const fn num_props() -> usize {
249257
[$(stringify!($prop),)+].len()
250258
}
251259

252-
fn new(fn_name: String) -> Self {
260+
pub fn new(fn_name: String) -> Self {
253261
Self { fn_name, $($prop: 0,)+}
254262
}
255263
}
@@ -369,7 +377,7 @@ impl Visitor for TypeVisitor<'_> {
369377
}
370378
}
371379

372-
fn dump_csv<T: Serialize>(mut out_path: PathBuf, data: &[T]) {
380+
pub(crate) fn dump_csv<T: Serialize>(mut out_path: PathBuf, data: &[T]) {
373381
out_path.set_extension("csv");
374382
info(format!("Write file: {out_path:?}"));
375383
let mut writer = WriterBuilder::new().delimiter(b';').from_path(&out_path).unwrap();
@@ -379,17 +387,23 @@ fn dump_csv<T: Serialize>(mut out_path: PathBuf, data: &[T]) {
379387
}
380388

381389
fn_props! {
382-
struct FnUnsafeOperations {
390+
pub struct FnUnsafeOperations {
383391
inline_assembly,
384392
/// Dereference a raw pointer.
385393
/// This is also counted when we access a static variable since it gets translated to a raw pointer.
386394
unsafe_dereference,
387-
/// Call an unsafe function or method.
395+
/// Call an unsafe function or method including C-FFI.
388396
unsafe_call,
389397
/// Access or modify a mutable static variable.
390398
unsafe_static_access,
391399
/// Access fields of unions.
392400
unsafe_union_access,
401+
/// Invoke external functions (this is a subset of `unsafe_call`.
402+
extern_call,
403+
/// Transmute operations.
404+
transmute,
405+
/// Cast raw pointer to reference.
406+
unsafe_cast,
393407
}
394408
}
395409

@@ -419,9 +433,21 @@ impl MirVisitor for BodyVisitor<'_> {
419433
fn visit_terminator(&mut self, term: &Terminator, location: Location) {
420434
match &term.kind {
421435
TerminatorKind::Call { func, .. } => {
422-
let fn_sig = func.ty(self.body.locals()).unwrap().kind().fn_sig().unwrap();
423-
if fn_sig.value.safety == Safety::Unsafe {
436+
let TyKind::RigidTy(RigidTy::FnDef(fn_def, _)) =
437+
func.ty(self.body.locals()).unwrap().kind()
438+
else {
439+
return self.super_terminator(term, location);
440+
};
441+
let fn_sig = fn_def.fn_sig().skip_binder();
442+
if fn_sig.safety == Safety::Unsafe {
424443
self.props.unsafe_call += 1;
444+
if !matches!(
445+
fn_sig.abi,
446+
Abi::Rust | Abi::RustCold | Abi::RustCall | Abi::RustIntrinsic
447+
) && !fn_def.has_body()
448+
{
449+
self.props.extern_call += 1;
450+
}
425451
}
426452
}
427453
TerminatorKind::InlineAsm { .. } => self.props.inline_assembly += 1,
@@ -430,6 +456,34 @@ impl MirVisitor for BodyVisitor<'_> {
430456
self.super_terminator(term, location)
431457
}
432458

459+
fn visit_rvalue(&mut self, rvalue: &Rvalue, location: Location) {
460+
if let Rvalue::Cast(cast_kind, operand, ty) = rvalue {
461+
match cast_kind {
462+
CastKind::Transmute => {
463+
self.props.transmute += 1;
464+
}
465+
_ => {
466+
let operand_ty = operand.ty(self.body.locals()).unwrap();
467+
if ty.kind().is_ref() && operand_ty.kind().is_raw_ptr() {
468+
self.props.unsafe_cast += 1;
469+
}
470+
}
471+
}
472+
};
473+
self.super_rvalue(rvalue, location);
474+
}
475+
476+
fn visit_statement(&mut self, stmt: &Statement, location: Location) {
477+
if matches!(
478+
&stmt.kind,
479+
StatementKind::Intrinsic(NonDivergingIntrinsic::CopyNonOverlapping(_))
480+
) {
481+
// Treat this as invoking the copy intrinsic.
482+
self.props.unsafe_call += 1;
483+
}
484+
self.super_statement(stmt, location)
485+
}
486+
433487
fn visit_projection_elem(
434488
&mut self,
435489
place: PlaceRef,
@@ -674,9 +728,9 @@ impl Recursion {
674728
}
675729
}
676730

677-
struct FnCallVisitor<'a> {
678-
body: &'a Body,
679-
fns: Vec<FnDef>,
731+
pub struct FnCallVisitor<'a> {
732+
pub body: &'a Body,
733+
pub fns: Vec<FnDef>,
680734
}
681735

682736
impl MirVisitor for FnCallVisitor<'_> {

tools/scanner/src/call_graph.rs

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
// Copyright Kani Contributors
2+
// SPDX-License-Identifier: Apache-2.0 OR MIT
3+
4+
//! Provide different static analysis to be performed in the call graph
5+
6+
use crate::analysis::{FnCallVisitor, FnUnsafeOperations, OverallStats};
7+
use stable_mir::mir::{MirVisitor, Safety};
8+
use stable_mir::ty::{FnDef, RigidTy, Ty, TyKind};
9+
use stable_mir::{CrateDef, CrateDefType};
10+
use std::collections::hash_map::Entry;
11+
use std::collections::{HashMap, VecDeque};
12+
use std::hash::{Hash, Hasher};
13+
use std::path::PathBuf;
14+
15+
impl OverallStats {
16+
/// Iterate over all functions defined in this crate and log any unsafe operation.
17+
pub fn unsafe_distance(&mut self, filename: PathBuf) {
18+
let all_items = stable_mir::all_local_items();
19+
let mut queue =
20+
all_items.into_iter().filter_map(|item| Node::try_new(item.ty())).collect::<Vec<_>>();
21+
// Build call graph
22+
let mut call_graph = CallGraph::default();
23+
while let Some(node) = queue.pop() {
24+
if let Entry::Vacant(e) = call_graph.nodes.entry(node.def) {
25+
e.insert(node);
26+
let Some(body) = node.def.body() else {
27+
continue;
28+
};
29+
let mut visitor = FnCallVisitor { body: &body, fns: vec![] };
30+
visitor.visit_body(&body);
31+
queue.extend(visitor.fns.iter().map(|def| Node::try_new(def.ty()).unwrap()));
32+
for callee in &visitor.fns {
33+
call_graph.rev_edges.entry(*callee).or_default().push(node.def)
34+
}
35+
call_graph.edges.insert(node.def, visitor.fns);
36+
}
37+
}
38+
39+
// Calculate the distance between unsafe functions and functions with unsafe operation.
40+
let mut queue = call_graph
41+
.nodes
42+
.values()
43+
.filter_map(|node| node.has_unsafe.then_some((node.def, 0)))
44+
.collect::<VecDeque<_>>();
45+
let mut visited: HashMap<FnDef, u16> = HashMap::from_iter(queue.iter().cloned());
46+
while let Some(current) = queue.pop_front() {
47+
for caller in call_graph.rev_edges.entry(current.0).or_default() {
48+
if !visited.contains_key(caller) {
49+
let distance = current.1 + 1;
50+
visited.insert(*caller, distance);
51+
queue.push_back((*caller, distance))
52+
}
53+
}
54+
}
55+
let krate = stable_mir::local_crate();
56+
let transitive_unsafe = visited
57+
.into_iter()
58+
.filter_map(|(def, distance)| (def.krate() == krate).then_some((def.name(), distance)))
59+
.collect::<Vec<_>>();
60+
self.counters.push(("transitive_unsafe", transitive_unsafe.len()));
61+
crate::analysis::dump_csv(filename, &transitive_unsafe);
62+
}
63+
}
64+
65+
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
66+
struct Node {
67+
def: FnDef,
68+
is_unsafe: bool,
69+
has_unsafe: bool,
70+
}
71+
72+
impl Node {
73+
fn try_new(ty: Ty) -> Option<Node> {
74+
let kind = ty.kind();
75+
let TyKind::RigidTy(RigidTy::FnDef(def, _)) = kind else {
76+
return None;
77+
};
78+
let has_unsafe = if let Some(body) = def.body() {
79+
let unsafe_ops = FnUnsafeOperations::new(def.name()).collect(&body);
80+
unsafe_ops.has_unsafe()
81+
} else {
82+
true
83+
};
84+
let fn_sig = kind.fn_sig().unwrap();
85+
let is_unsafe = fn_sig.skip_binder().safety == Safety::Unsafe;
86+
Some(Node { def, is_unsafe, has_unsafe })
87+
}
88+
}
89+
90+
impl Hash for Node {
91+
fn hash<H: Hasher>(&self, state: &mut H) {
92+
self.def.hash(state)
93+
}
94+
}
95+
96+
#[derive(Default, Debug)]
97+
struct CallGraph {
98+
nodes: HashMap<FnDef, Node>,
99+
edges: HashMap<FnDef, Vec<FnDef>>,
100+
rev_edges: HashMap<FnDef, Vec<FnDef>>,
101+
}

tools/scanner/src/lib.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
1111
#![feature(rustc_private)]
1212

13-
mod analysis;
13+
pub mod analysis;
14+
pub mod call_graph;
1415

1516
extern crate rustc_driver;
1617
extern crate rustc_interface;
@@ -65,6 +66,8 @@ pub enum Analysis {
6566
FnLoops,
6667
/// Collect information about recursion via direct calls.
6768
Recursion,
69+
/// Collect information about transitive usage of unsafe.
70+
UnsafeDistance,
6871
}
6972

7073
fn info(msg: String) {
@@ -75,6 +78,9 @@ fn info(msg: String) {
7578

7679
/// This function invoke the required analyses in the given order.
7780
fn analyze_crate(tcx: TyCtxt, analyses: &[Analysis]) -> ControlFlow<()> {
81+
if stable_mir::local_crate().name == "build_script_build" {
82+
return ControlFlow::Continue(());
83+
}
7884
let object_file = tcx.output_filenames(()).path(OutputType::Object);
7985
let base_path = object_file.as_path().to_path_buf();
8086
// Use name for now to make it more friendly. Change to base_path.file_stem() to avoid conflict.
@@ -96,6 +102,7 @@ fn analyze_crate(tcx: TyCtxt, analyses: &[Analysis]) -> ControlFlow<()> {
96102
Analysis::UnsafeOps => crate_stats.unsafe_operations(out_path),
97103
Analysis::FnLoops => crate_stats.loops(out_path),
98104
Analysis::Recursion => crate_stats.recursion(out_path),
105+
Analysis::UnsafeDistance => crate_stats.unsafe_distance(out_path),
99106
}
100107
}
101108
crate_stats.store_csv(base_path, &file_stem);

0 commit comments

Comments
 (0)