Skip to content

Commit 830be81

Browse files
committed
use output from std-analysis.sh to generate lists of unsafe functions instead of doing analysis from first principles
1 parent b9ec578 commit 830be81

File tree

2 files changed

+216
-121
lines changed

2 files changed

+216
-121
lines changed

tools/unsafe-finder/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,7 @@ edition = "2024"
66
[dependencies]
77
prettyplease = "0.2.32"
88
syn = {version = "2.0.101", features = ["full", "extra-traits", "visit"]}
9+
csv = "1.1"
10+
serde = { version = "1.0.55", features = ["derive"] }
11+
regex = "1.11.2"
12+
itertools = "0.14.0"

tools/unsafe-finder/src/main.rs

Lines changed: 212 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -1,142 +1,220 @@
1-
use syn::ImplItem;
2-
use syn::Item::Impl;
3-
use syn::ItemImpl;
4-
5-
use syn::Item::Trait;
6-
use syn::ItemTrait;
7-
use syn::TraitItem;
8-
9-
use syn::visit;
10-
use syn::visit::Visit;
11-
121
use std::env;
132
use std::fs;
3+
use std::error::Error;
144
use std::io;
155
use std::process;
166
use std::path::Path;
177

18-
struct StmtVisitor {
19-
found_unsafe: bool,
8+
use std::collections::HashMap;
9+
10+
use itertools::Itertools;
11+
12+
use serde::Serialize;
13+
use serde::Deserialize;
14+
15+
use regex::Regex;
16+
17+
// from kani repo's tools/scanner/src/analysis.rs:
18+
#[derive(Clone, Debug, Serialize, Deserialize)]
19+
struct FnStats {
20+
name: String,
21+
is_unsafe: Option<bool>,
22+
has_unsafe_ops: Option<bool>,
23+
has_unsupported_input: Option<bool>,
24+
has_loop_or_iterator: Option<bool>,
25+
is_public: Option<bool>,
26+
}
27+
28+
#[derive(Clone)]
29+
struct StructuredFnName {
30+
krate: String,
31+
module_path: Vec<String>,
32+
type_parameters: Vec<String>,
33+
item: String,
2034
}
2135

22-
impl<'ast> Visit<'ast> for StmtVisitor {
23-
fn visit_expr_unsafe(&mut self, i: &'ast syn::ExprUnsafe) {
24-
self.found_unsafe = true;
25-
visit::visit_expr_unsafe(self, i);
36+
#[derive(PartialOrd, Ord, Hash, Eq, PartialEq)]
37+
struct CrateAndModules {
38+
krate: String,
39+
module_path: Vec<String>
40+
}
41+
42+
fn split_by_double_colons(s:&str) -> Vec<String> {
43+
let mut bracket_level = 0;
44+
let mut current_string = String::new();
45+
let mut previous_strings = vec![];
46+
let mut colons = 0;
47+
for c in s.chars() {
48+
current_string.push(c);
49+
match c {
50+
'<' => bracket_level += 1,
51+
'>' => bracket_level -= 1,
52+
':' => {
53+
if bracket_level > 0 { continue; }
54+
colons += 1;
55+
if colons == 2 {
56+
colons = 0;
57+
previous_strings.push(current_string[..current_string.len()-2].to_string());
58+
current_string.clear();
59+
}},
60+
_ => ()
61+
}
2662
}
63+
previous_strings.push(current_string.clone());
64+
previous_strings
2765
}
2866

29-
fn print_pub_unsafe_and_unsafe_containing_fns(ii: ItemImpl) {
30-
let mut interesting = false;
31-
let mut pub_unsafe_fns = Vec::new();
32-
let mut unsafe_containing_fns = Vec::new();
33-
for item in &ii.items {
34-
match item {
35-
ImplItem::Fn(f) =>
36-
{
37-
// record all pub unsafe functions
38-
if matches!(f.vis, syn::Visibility::Public(_)) && matches!(f.sig.unsafety, Some(_))
39-
{
40-
interesting = true;
41-
pub_unsafe_fns.push(format!("--- pub unsafe fn {}", f.sig.ident));
42-
}
43-
// record functions that contain unsafe code in their bodies but that are not marked unsafe
44-
else if matches!(f.sig.unsafety, None) {
45-
let mut sv = StmtVisitor {
46-
found_unsafe: false,
47-
};
48-
sv.visit_block(&f.block);
49-
if sv.found_unsafe {
50-
interesting = true;
51-
unsafe_containing_fns
52-
.push(format!("--- unsafe-containing fn {}", f.sig.ident));
53-
}
54-
}
55-
}
56-
_ => (),
57-
}
58-
}
59-
if interesting {
60-
// create an empty impl with the same name as ii
61-
let mut i_copy = ii.clone();
62-
i_copy.items = Vec::new();
63-
let file = syn::File {
64-
attrs: vec![],
65-
items: vec![Impl(i_copy)],
66-
shebang: None,
67-
};
68-
print!("{}", prettyplease::unparse(&file));
69-
pub_unsafe_fns.iter().for_each(|s| {
70-
println!("{}", s);
71-
});
72-
unsafe_containing_fns.iter().for_each(|s| {
73-
println!("{}", s);
74-
});
75-
println!();
76-
} else {
77-
// println!("--- nothing interesting here");
67+
fn split_by_commas(s:&str) -> Vec<String> {
68+
let mut bracket_level = 0;
69+
let mut parens_level = 0;
70+
let mut current_string = String::new();
71+
let mut previous_strings = vec![];
72+
for c in s.chars() {
73+
current_string.push(c);
74+
match c {
75+
'<' => bracket_level += 1,
76+
'>' => bracket_level -= 1,
77+
'(' => parens_level += 1,
78+
')' => parens_level -= 1,
79+
',' => {
80+
if bracket_level > 0 || parens_level > 0 { continue; }
81+
previous_strings.push(current_string[..current_string.len()-1].trim().to_string());
82+
current_string.clear();
83+
},
84+
_ => ()
85+
}
7886
}
87+
previous_strings.push(current_string.trim().to_string().clone());
88+
previous_strings
7989
}
8090

81-
fn print_trait_unsafe_containing_fns(it: ItemTrait) {
82-
let mut interesting = false;
83-
let mut unsafe_containing_fns = Vec::new();
84-
for item in &it.items {
85-
match item {
86-
TraitItem::Fn(f) =>
87-
// record functions that contain unsafe code in their bodies but that are not marked unsafe
88-
{
89-
if matches!(f.sig.unsafety, None) {
90-
let mut sv = StmtVisitor {
91-
found_unsafe: false,
92-
};
93-
if let Some(d) = &f.default {
94-
sv.visit_block(&d);
95-
}
96-
if sv.found_unsafe {
97-
interesting = true;
98-
unsafe_containing_fns
99-
.push(format!("--- unsafe-containing fn {}", f.sig.ident));
100-
}
101-
}
102-
}
103-
_ => (),
104-
}
105-
}
106-
if interesting {
107-
let mut i_copy = it.clone();
108-
i_copy.items = Vec::new();
109-
let file = syn::File {
110-
attrs: vec![],
111-
items: vec![Trait(i_copy)],
112-
shebang: None,
113-
};
114-
print!("{}", prettyplease::unparse(&file));
115-
unsafe_containing_fns.iter().for_each(|s| {
116-
println!("{}", s);
117-
});
118-
println!();
119-
} else {
120-
// println!("--- nothing interesting here");
91+
#[cfg(test)]
92+
mod tests {
93+
use super::*;
94+
95+
#[test]
96+
fn colons_singleton() {
97+
let result = split_by_double_colons("a");
98+
assert_eq!(result, ["a"]);
99+
}
100+
101+
#[test]
102+
fn colons_no_brackets() {
103+
let result = split_by_double_colons("one::two");
104+
assert_eq!(result, ["one", "two"]);
105+
}
106+
107+
#[test]
108+
fn colons_brackets_no_colons() {
109+
let result = split_by_double_colons("one::<two>::three");
110+
assert_eq!(result, ["one", "<two>", "three"]);
111+
}
112+
113+
#[test]
114+
fn colons_brackets_with_colons() {
115+
let result = split_by_double_colons("one::<two::four>::three");
116+
assert_eq!(result, ["one", "<two::four>", "three"]);
117+
}
118+
119+
#[test]
120+
fn commas_singleton() {
121+
let result = split_by_commas("a");
122+
assert_eq!(result, ["a"]);
123+
}
124+
125+
#[test]
126+
fn commas_brackets() {
127+
let result = split_by_commas("<a,b>");
128+
assert_eq!(result, ["<a,b>"]);
129+
}
130+
131+
#[test]
132+
fn commas_no_brackets() {
133+
let result = split_by_commas("a, b");
134+
assert_eq!(result, ["a","b"]);
135+
}
136+
137+
#[test]
138+
fn commas_parens() {
139+
let result = split_by_commas("(a,b)");
140+
assert_eq!(result, ["(a,b)"]);
141+
}
142+
143+
#[test]
144+
fn commas_unmatched() {
145+
let result = split_by_commas("<a,b),c");
146+
assert_eq!(result, ["<a,b),c"]);
121147
}
122148
}
123149

124-
fn handle_file(path:&Path) {
125-
if !path.to_str().unwrap().ends_with(".rs") {
126-
return;
150+
fn parse_fn_name(raw_name:String) -> StructuredFnName {
151+
let brackets_re = Regex::new(r"<(.+)>").unwrap();
152+
153+
let parts:Vec<String> = split_by_double_colons(&raw_name).into_iter().rev().collect();
154+
let mut parts_index = 0;
155+
let item = &parts[parts_index]; parts_index += 1;
156+
let tp = &parts[parts_index].as_str();
157+
let type_parameters = if brackets_re.is_match(tp) {
158+
let tp_commas = &brackets_re.captures(tp).unwrap();
159+
parts_index += 1;
160+
split_by_commas(&tp_commas[1]).into_iter().map(|x| x.to_string()).collect()
161+
} else {
162+
vec![]
163+
};
164+
let mut mp = vec![];
165+
while parts_index < parts.len() {
166+
mp.push(parts[parts_index].to_string());
167+
parts_index += 1;
168+
}
169+
let kr = match mp.pop() {
170+
Some(k) => k,
171+
None => "".to_string()
172+
};
173+
174+
StructuredFnName {
175+
krate: kr,
176+
module_path: mp.into_iter().rev().collect(),
177+
type_parameters: type_parameters.into_iter().map(|x| x.to_string()).collect(),
178+
item: item.to_string()
127179
}
180+
}
181+
182+
fn handle_file(path:&Path) -> Result<(), Box<dyn Error>> {
183+
let path_contents = fs::read_to_string(&path).expect("unable to read file");
184+
let mut rdr = csv::ReaderBuilder::new().delimiter(b';').from_reader(path_contents.as_bytes());
128185

129186
println!("# Unsafe usages in file {}", path.display());
130-
let src = fs::read_to_string(&path).expect("unable to read file");
131-
let syntax = syn::parse_file(&src).expect("unable to parse file");
132187

133-
for item in syntax.items {
134-
match item {
135-
Impl(im) => print_pub_unsafe_and_unsafe_containing_fns(im),
136-
Trait(t) => print_trait_unsafe_containing_fns(t),
137-
_ => (),
138-
}
188+
let mut fns_by_crate_and_modules: HashMap<CrateAndModules, Vec<StructuredFnName>> = HashMap::new();
189+
190+
for result in rdr.deserialize() {
191+
let fn_stats: FnStats = result?;
192+
if matches!(fn_stats.is_unsafe, Some(true)) {
193+
let structured_fn_name = parse_fn_name(fn_stats.name);
194+
let krate_and_module_path = CrateAndModules {
195+
krate: structured_fn_name.krate.clone(),
196+
module_path: structured_fn_name.module_path.clone()
197+
};
198+
match fns_by_crate_and_modules.get_mut(&krate_and_module_path) {
199+
Some(fns) => fns.push(structured_fn_name.clone()),
200+
None => { fns_by_crate_and_modules.insert(krate_and_module_path, vec![structured_fn_name.clone()]); }
201+
}
202+
}
139203
}
204+
205+
for krm in fns_by_crate_and_modules.keys().sorted() {
206+
println!("crate {}, modules {:?}", krm.krate, krm.module_path);
207+
if let Some(fns) = fns_by_crate_and_modules.get(krm) {
208+
for structured_fn_name in fns {
209+
println!("--- unsafe-containing fn {}", structured_fn_name.item);
210+
if !structured_fn_name.type_parameters.is_empty() {
211+
println!(" type parameters {:?}", structured_fn_name.type_parameters);
212+
}
213+
}
214+
}
215+
}
216+
217+
Ok(())
140218
}
141219

142220
fn handle_dir(path:&Path) -> io::Result<()> {
@@ -156,13 +234,20 @@ fn handle_dir(path:&Path) -> io::Result<()> {
156234
}
157235

158236
if cur_path.is_file() {
159-
handle_file(&cur_path);
237+
if let Err(err) = handle_file(&cur_path) {
238+
println!("error processing {}: {}", cur_path.display(), err);
239+
process::exit(1);
240+
}
160241
continue;
161242
}
162243
}
163244
_ => {
164245
if !had_files && !dirs.is_empty() {
165-
handle_file(&dirs[(dir_index - 1).max(0)].to_owned());
246+
let target = dirs[(dir_index - 1).max(0)].to_owned();
247+
if let Err(err) = handle_file(&target) {
248+
println!("error processing {}: {}", target.display(), err);
249+
process::exit(1);
250+
}
166251
}
167252
if dir_index == dirs.len() {
168253
break;
@@ -182,16 +267,22 @@ fn main() {
182267
let _ = args.next(); // executable name
183268

184269
if args.len() == 0 {
185-
eprintln!("Usage: unsafe-finder [directory | filename.rs]*");
270+
// should we only handle files named "_scan_functions.csv"?
271+
eprintln!("Usage: unsafe-finder [[prefix]_scan_functions.csv]*");
186272
process::exit(1);
187273
}
188274

189275
for arg in args {
190276
let path = Path::new(&arg);
191277
if path.is_file() {
192-
handle_file(&path);
278+
if let Err(err) = handle_file(&path) {
279+
eprintln!("error processing {}: {}", arg, err);
280+
process::exit(1);
281+
}
193282
} else if path.is_dir() {
194283
handle_dir(&path).unwrap();
284+
} else {
285+
eprintln!("could not open {}", arg);
195286
}
196287
}
197288
}

0 commit comments

Comments
 (0)