Skip to content

Commit b9e2b98

Browse files
authored
Autoharness: change pattern options to accept regexes (#4144)
Change `--include-pattern` and `--exclude-pattern` to accept regular expressions. Resolves #4046 By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 and MIT licenses.
1 parent 18524cc commit b9e2b98

File tree

9 files changed

+308
-99
lines changed

9 files changed

+308
-99
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -991,6 +991,7 @@ dependencies = [
991991
"lazy_static",
992992
"num",
993993
"quote",
994+
"regex",
994995
"serde",
995996
"serde_json",
996997
"strum",

docs/src/reference/experimental/autoharness.md

Lines changed: 19 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -43,45 +43,37 @@ autoharness feature means that you are also opting into the function contracts a
4343
Kani generates and runs these harnesses internally—the user only sees the verification results.
4444

4545
### Options
46-
The `autoharness` subcommand has options `--include-pattern` and `--exclude-pattern` to include and exclude particular functions.
47-
These flags look for partial matches against the fully qualified name of a function.
48-
49-
For example, if a module `my_module` has many functions, but we are only interested in `my_module::foo` and `my_module::bar`, we can run:
50-
```
51-
cargo run autoharness -Z autoharness --include-pattern my_module::foo --include-pattern my_module::bar
52-
```
53-
To exclude `my_module` entirely, run:
54-
```
55-
cargo run autoharness -Z autoharness --exclude-pattern my_module
56-
```
46+
The `autoharness` subcommand has options `--include-pattern [REGEX]` and `--exclude-pattern [REGEX]` to include and exclude particular functions using regular expressions.
47+
When matching, Kani prefixes the function's path with the crate name. For example, a function `foo` in the `my_crate` crate will be matched as `my_crate::foo`.
5748

5849
The selection algorithm is as follows:
5950
- If only `--include-pattern`s are provided, include a function if it matches any of the provided patterns.
6051
- If only `--exclude-pattern`s are provided, include a function if it does not match any of the provided patterns.
6152
- If both are provided, include a function if it matches an include pattern *and* does not match any of the exclude patterns. Note that this implies that the exclude pattern takes precedence, i.e., if a function matches both an include pattern and an exclude pattern, it will be excluded.
6253

63-
Here are some more examples:
54+
Here are some examples:
6455

65-
```
66-
# Include functions whose paths contain the substring foo or baz, but not foo::bar
67-
kani autoharness -Z autoharness --include-pattern foo --include-pattern baz --exclude-pattern foo::bar
56+
```bash
57+
# Include functions containing foo but not bar
58+
kani autoharness -Z autoharness --include-pattern 'foo' --exclude-pattern 'bar'
59+
60+
# Include my_crate::foo exactly
61+
kani autoharness -Z autoharness --include-pattern '^my_crate::foo$'
6862

69-
# Include functions whose paths contain the substring foo, but not bar.
70-
kani autoharness -Z autoharness --include-pattern foo --exclude-pattern bar
63+
# Include functions in the foo module, but not in foo::bar
64+
kani autoharness -Z autoharness --include-pattern 'foo::.*' --exclude-pattern 'foo::bar::.*'
7165

72-
# Include functions whose paths contain the substring foo::bar, but not bar.
73-
# This ends up including nothing, since all foo::bar matches will also contain bar.
74-
# Kani will emit a warning that these flags conflict.
75-
kani autoharness -Z autoharness --include-pattern foo::bar --exclude-pattern bar
66+
# Include functions starting with test_, but not if they're in a private module
67+
kani autoharness -Z autoharness --include-pattern 'test_.*' --exclude-pattern '.*::private::.*'
7668

77-
# Include functions whose paths contain the substring foo, but not foo.
78-
# This ends up including nothing, and Kani will emit a warning that these flags conflict.
79-
kani autoharness -Z autoharness --include-pattern foo --exclude--pattern foo
69+
# This ends up including nothing since all foo::bar matches will also contain bar.
70+
# Kani will emit a warning that these options conflict.
71+
kani autoharness -Z autoharness --include-pattern 'foo::bar' --exclude-pattern 'bar'
8072
```
8173

82-
Currently, the only supported "patterns" are substrings of the fully qualified path of the function.
83-
However, if more sophisticated patterns (e.g., regular expressions) would be useful for you,
84-
please let us know in a comment on [this GitHub issue](https://github.com/model-checking/kani/issues/3832).
74+
Note that because Kani prefixes function paths with the crate name, some patterns might match more than you expect.
75+
For example, given a function `foo_top_level` inside crate `my_crate`, the regex `.*::foo_.*` will match `foo_top_level`, since Kani interprets it as `my_crate::foo_top_level`.
76+
To match only `foo_` functions inside modules, use a more specific pattern, e.g. `.*::[^:]+::foo_.*`.
8577

8678
## Example
8779
Using the `estimate_size` example from [First Steps](../../tutorial-first-steps.md) again:

kani-compiler/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ kani_metadata = { path = "../kani_metadata" }
1717
lazy_static = "1.5.0"
1818
num = { version = "0.4.0", optional = true }
1919
quote = "1.0.36"
20+
regex = "1.11.1"
2021
serde = { version = "1", optional = true }
2122
serde_json = "1"
2223
strum = "0.27.1"

kani-compiler/src/kani_middle/codegen_units.rs

Lines changed: 209 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use kani_metadata::{
2121
ArtifactType, AssignsContract, AutoHarnessMetadata, AutoHarnessSkipReason, HarnessKind,
2222
HarnessMetadata, KaniMetadata,
2323
};
24+
use regex::RegexSet;
2425
use rustc_hir::def_id::DefId;
2526
use rustc_middle::ty::TyCtxt;
2627
use rustc_session::config::OutputType;
@@ -356,6 +357,29 @@ fn get_all_automatic_harnesses(
356357
.collect::<HashMap<_, _>>()
357358
}
358359

360+
fn make_regex_set(patterns: Vec<String>) -> Option<RegexSet> {
361+
if patterns.is_empty() {
362+
None
363+
} else {
364+
Some(RegexSet::new(patterns).unwrap_or_else(|e| {
365+
panic!("Invalid regexes should have been caught during argument validation: {e}")
366+
}))
367+
}
368+
}
369+
370+
/// A function is filtered out if 1) none of the include patterns match it or 2) one of the exclude patterns matches it.
371+
fn autoharness_filtered_out(
372+
name: &str,
373+
included_set: &Option<RegexSet>,
374+
excluded_set: &Option<RegexSet>,
375+
) -> bool {
376+
// A function is included if `--include-pattern` is not provided or if at least one of its regexes matches `name`
377+
let included = included_set.as_ref().is_none_or(|set| set.is_match(name));
378+
// A function is excluded if `--exclude-pattern` is provided and at least one of its regexes matches `name`
379+
let excluded = excluded_set.as_ref().is_some_and(|set| set.is_match(name));
380+
!included || excluded
381+
}
382+
359383
/// Partition every function in the crate into (chosen, skipped), where `chosen` is a vector of the Instances for which we'll generate automatic harnesses,
360384
/// and `skipped` is a map of function names to the reason why we skipped them.
361385
fn automatic_harness_partition(
@@ -364,12 +388,15 @@ fn automatic_harness_partition(
364388
crate_name: &str,
365389
kani_any_def: FnDef,
366390
) -> (Vec<Instance>, BTreeMap<String, AutoHarnessSkipReason>) {
367-
// If `filter_list` contains `name`, either as an exact match or a substring.
368-
let filter_contains = |name: &str, filter_list: &[String]| -> bool {
369-
filter_list.iter().any(|filter_name| name.contains(filter_name))
370-
};
391+
let crate_fns =
392+
stable_mir::all_local_items().into_iter().filter(|item| item.ty().kind().is_fn());
393+
394+
let included_set = make_regex_set(args.autoharness_included_patterns.clone());
395+
let excluded_set = make_regex_set(args.autoharness_excluded_patterns.clone());
371396

372397
// If `func` is not eligible for an automatic harness, return the reason why; if it is eligible, return None.
398+
// Note that we only return one reason for ineligiblity, when there could be multiple;
399+
// we can revisit this implementation choice in the future if users request more verbose output.
373400
let skip_reason = |fn_item: CrateItem| -> Option<AutoHarnessSkipReason> {
374401
if KaniAttributes::for_def_id(tcx, fn_item.def_id()).is_kani_instrumentation() {
375402
return Some(AutoHarnessSkipReason::KaniImpl);
@@ -397,34 +424,8 @@ fn automatic_harness_partition(
397424
return Some(AutoHarnessSkipReason::KaniImpl);
398425
}
399426

400-
match (
401-
args.autoharness_included_patterns.is_empty(),
402-
args.autoharness_excluded_patterns.is_empty(),
403-
) {
404-
// If no filters were specified, then continue.
405-
(true, true) => {}
406-
// If only --exclude-pattern was provided, filter out the function if excluded_patterns contains its name.
407-
(true, false) => {
408-
if filter_contains(&name, &args.autoharness_excluded_patterns) {
409-
return Some(AutoHarnessSkipReason::UserFilter);
410-
}
411-
}
412-
// If only --include-pattern was provided, filter out the function if included_patterns does not contain its name.
413-
(false, true) => {
414-
if !filter_contains(&name, &args.autoharness_included_patterns) {
415-
return Some(AutoHarnessSkipReason::UserFilter);
416-
}
417-
}
418-
// If both are specified, filter out the function if included_patterns does not contain its name.
419-
// Then, filter out any functions that excluded_patterns does match.
420-
// This order is important, since it preserves the semantics described in kani_driver::autoharness_args where exclude takes precedence over include.
421-
(false, false) => {
422-
if !filter_contains(&name, &args.autoharness_included_patterns)
423-
|| filter_contains(&name, &args.autoharness_excluded_patterns)
424-
{
425-
return Some(AutoHarnessSkipReason::UserFilter);
426-
}
427-
}
427+
if autoharness_filtered_out(&name, &included_set, &excluded_set) {
428+
return Some(AutoHarnessSkipReason::UserFilter);
428429
}
429430

430431
// Each argument of `instance` must implement Arbitrary.
@@ -472,14 +473,6 @@ fn automatic_harness_partition(
472473
let mut chosen = vec![];
473474
let mut skipped = BTreeMap::new();
474475

475-
// FIXME: ideally, this filter would be matches!(item.kind(), ItemKind::Fn), since that would allow us to generate harnesses for top-level closures,
476-
// c.f. https://github.com/model-checking/kani/issues/3832#issuecomment-2701671798.
477-
// Note that filtering closures out here is a UX choice: we could instead call skip_reason() on closures,
478-
// but the limitations in the linked issue would cause the user to be flooded with reports of "skipping" Kani instrumentation functions.
479-
// Instead, we just pretend closures don't exist in our reporting of what we did and did not verify, which has the downside of also ignoring the user's top-level closures, but those are rare.
480-
let crate_fns =
481-
stable_mir::all_local_items().into_iter().filter(|item| item.ty().kind().is_fn());
482-
483476
for func in crate_fns {
484477
if let Some(reason) = skip_reason(func) {
485478
skipped.insert(func.name(), reason);
@@ -490,3 +483,179 @@ fn automatic_harness_partition(
490483

491484
(chosen, skipped)
492485
}
486+
487+
#[cfg(test)]
488+
mod autoharness_filter_tests {
489+
use super::*;
490+
491+
#[test]
492+
fn both_none() {
493+
let included = None;
494+
let excluded = None;
495+
assert!(!autoharness_filtered_out("test_fn", &included, &excluded));
496+
}
497+
498+
#[test]
499+
fn only_included() {
500+
let included = make_regex_set(vec!["test.*".to_string()]);
501+
let excluded = None;
502+
503+
assert!(!autoharness_filtered_out("test_fn", &included, &excluded));
504+
assert!(autoharness_filtered_out("other_fn", &included, &excluded));
505+
}
506+
507+
#[test]
508+
fn only_excluded() {
509+
let included = None;
510+
let excluded = make_regex_set(vec!["test.*".to_string()]);
511+
512+
assert!(autoharness_filtered_out("test_fn", &included, &excluded));
513+
assert!(!autoharness_filtered_out("other_fn", &included, &excluded));
514+
}
515+
516+
#[test]
517+
fn both_matching() {
518+
let included = make_regex_set(vec![".*_fn".to_string()]);
519+
let excluded = make_regex_set(vec!["test.*".to_string()]);
520+
521+
assert!(autoharness_filtered_out("test_fn", &included, &excluded));
522+
assert!(!autoharness_filtered_out("other_fn", &included, &excluded));
523+
}
524+
525+
#[test]
526+
fn multiple_include_patterns() {
527+
let included = make_regex_set(vec!["test.*".to_string(), "other.*".to_string()]);
528+
let excluded = None;
529+
530+
assert!(!autoharness_filtered_out("test_fn", &included, &excluded));
531+
assert!(!autoharness_filtered_out("other_fn", &included, &excluded));
532+
assert!(autoharness_filtered_out("different_fn", &included, &excluded));
533+
}
534+
535+
#[test]
536+
fn multiple_exclude_patterns() {
537+
let included = None;
538+
let excluded = make_regex_set(vec!["test.*".to_string(), "other.*".to_string()]);
539+
540+
assert!(autoharness_filtered_out("test_fn", &included, &excluded));
541+
assert!(autoharness_filtered_out("other_fn", &included, &excluded));
542+
assert!(!autoharness_filtered_out("different_fn", &included, &excluded));
543+
}
544+
545+
#[test]
546+
fn exclude_precedence_identical_patterns() {
547+
let pattern = "test.*".to_string();
548+
let included = make_regex_set(vec![pattern.clone()]);
549+
let excluded = make_regex_set(vec![pattern]);
550+
551+
assert!(autoharness_filtered_out("test_fn", &included, &excluded));
552+
assert!(autoharness_filtered_out("other_fn", &included, &excluded));
553+
}
554+
555+
#[test]
556+
fn exclude_precedence_overlapping_patterns() {
557+
let included = make_regex_set(vec![".*_fn".to_string()]);
558+
let excluded = make_regex_set(vec!["test_.*".to_string(), "other_.*".to_string()]);
559+
560+
assert!(autoharness_filtered_out("test_fn", &included, &excluded));
561+
assert!(autoharness_filtered_out("other_fn", &included, &excluded));
562+
assert!(!autoharness_filtered_out("different_fn", &included, &excluded));
563+
}
564+
565+
#[test]
566+
fn exact_match() {
567+
let included = make_regex_set(vec!["^test_fn$".to_string()]);
568+
let excluded = None;
569+
570+
assert!(!autoharness_filtered_out("test_fn", &included, &excluded));
571+
assert!(autoharness_filtered_out("test_fn_extra", &included, &excluded));
572+
}
573+
574+
#[test]
575+
fn include_specific_module() {
576+
let included = make_regex_set(vec!["module1::.*".to_string()]);
577+
let excluded = None;
578+
579+
assert!(!autoharness_filtered_out("module1::test_fn", &included, &excluded));
580+
assert!(!autoharness_filtered_out("crate::module1::test_fn", &included, &excluded));
581+
assert!(autoharness_filtered_out("module2::test_fn", &included, &excluded));
582+
assert!(autoharness_filtered_out("crate::module2::test_fn", &included, &excluded));
583+
}
584+
585+
#[test]
586+
fn exclude_specific_module() {
587+
let included = None;
588+
let excluded = make_regex_set(vec![".*::internal::.*".to_string()]);
589+
590+
assert!(autoharness_filtered_out("crate::internal::helper_fn", &included, &excluded));
591+
assert!(autoharness_filtered_out("my_crate::internal::test_fn", &included, &excluded));
592+
assert!(!autoharness_filtered_out("crate::public::test_fn", &included, &excluded));
593+
}
594+
595+
#[test]
596+
fn test_exact_match_with_crate() {
597+
let included = make_regex_set(vec!["^lib::foo_function$".to_string()]);
598+
let excluded = None;
599+
600+
assert!(!autoharness_filtered_out("lib::foo_function", &included, &excluded));
601+
assert!(autoharness_filtered_out("lib::foo_function_extra", &included, &excluded));
602+
assert!(autoharness_filtered_out("lib::other::foo_function", &included, &excluded));
603+
assert!(autoharness_filtered_out("other::foo_function", &included, &excluded));
604+
assert!(autoharness_filtered_out("foo_function", &included, &excluded));
605+
}
606+
607+
#[test]
608+
fn complex_path_patterns() {
609+
let included = make_regex_set(vec![
610+
"crate::module1::.*".to_string(),
611+
"other_crate::tests::.*".to_string(),
612+
]);
613+
let excluded =
614+
make_regex_set(vec![".*::internal::.*".to_string(), ".*::private::.*".to_string()]);
615+
616+
assert!(!autoharness_filtered_out("crate::module1::test_fn", &included, &excluded));
617+
assert!(!autoharness_filtered_out("other_crate::tests::test_fn", &included, &excluded));
618+
assert!(autoharness_filtered_out(
619+
"crate::module1::internal::test_fn",
620+
&included,
621+
&excluded
622+
));
623+
assert!(autoharness_filtered_out(
624+
"other_crate::tests::private::test_fn",
625+
&included,
626+
&excluded
627+
));
628+
assert!(autoharness_filtered_out("crate::module2::test_fn", &included, &excluded));
629+
}
630+
631+
#[test]
632+
fn crate_specific_filtering() {
633+
let included = make_regex_set(vec!["my_crate::.*".to_string()]);
634+
let excluded = make_regex_set(vec!["other_crate::.*".to_string()]);
635+
636+
assert!(!autoharness_filtered_out("my_crate::test_fn", &included, &excluded));
637+
assert!(!autoharness_filtered_out("my_crate::module::test_fn", &included, &excluded));
638+
assert!(autoharness_filtered_out("other_crate::test_fn", &included, &excluded));
639+
assert!(autoharness_filtered_out("third_crate::test_fn", &included, &excluded));
640+
}
641+
642+
#[test]
643+
fn root_crate_paths() {
644+
let included = make_regex_set(vec!["^crate::.*".to_string()]);
645+
let excluded = None;
646+
647+
assert!(!autoharness_filtered_out("crate::test_fn", &included, &excluded));
648+
assert!(autoharness_filtered_out("other_crate::test_fn", &included, &excluded));
649+
assert!(autoharness_filtered_out("test_fn", &included, &excluded));
650+
}
651+
652+
#[test]
653+
fn impl_paths_with_spaces() {
654+
let included = make_regex_set(vec!["num::<impl.i8>::wrapping_.*".to_string()]);
655+
let excluded = None;
656+
657+
assert!(!autoharness_filtered_out("num::<impl i8>::wrapping_sh", &included, &excluded));
658+
assert!(!autoharness_filtered_out("num::<impl i8>::wrapping_add", &included, &excluded));
659+
assert!(autoharness_filtered_out("num::<impl i16>::wrapping_sh", &included, &excluded));
660+
}
661+
}

kani-driver/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ serde = { version = "1", features = ["derive"] }
2222
serde_json = { version = "1", features = ["preserve_order"] }
2323
clap = { version = "4.4.11", features = ["derive"] }
2424
toml = "0.8"
25-
regex = "1.6"
25+
regex = "1.11.1"
2626
rustc-demangle = "0.1.21"
2727
pathdiff = "0.2.1"
2828
rayon = "1.5.3"

0 commit comments

Comments
 (0)