Skip codegen for unneeded harnesses (#4213)

AlexanderPortland · web-flow · commit 210b6c3ba42d · 2025-07-22T00:34:07.000Z
This PR moves the logic for filtering out specific harnesses (as specified by the `--harness` & `--exact` Kani arguments) to the compiler, allowing us to skip codegen for harnesses that will end up being blocked by the filter. I’ve also added [a dev-build-only assertion](https://github.com/AlexanderPortland/kani/blob/c6283ac0ec53f70bf6ab78f367b6443c25b5290c/kani-driver/src/metadata.rs#L103-L110) to double check that we’re not doing codegen for any unused harnesses. Based on testing, something like running `cargo kani --harness estimator` on `s2n-quic-codec` used to take ~11s locally to compile 66 harnesses, but only 5 would end up being analyzed. Now, the same command compiles only the 5 needed harnesses, taking just ~5s. The downside of this is that the target crate now has to be recompiled if the user runs Kani with differing harness filters but, if the idea in #4212 is possible, that too could be avoided. By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 and MIT licenses.
diff --git a/Cargo.lock b/Cargo.lock
@@ -1199,6 +1199,7 @@ dependencies = [
  "serde",
  "strum",
  "strum_macros",
+ "tracing",
 ]
 
 [[package]]
diff --git a/kani-compiler/src/args.rs b/kani-compiler/src/args.rs
@@ -52,6 +52,16 @@ pub struct Arguments {
     /// Option name used to use json pretty-print for output files.
     #[clap(long = "pretty-json-files")]
     pub output_pretty_json: bool,
+    /// When specified, the harness filter will only match the exact fully qualified name of a harness.
+    // (Passed here directly from [CargoKaniArgs] in `args_toml.rs`)
+    #[arg(long, requires("harnesses"))]
+    pub exact: bool,
+    /// If specified, only run harnesses that match this filter. This option can be provided
+    /// multiple times, which will run all tests matching any of the filters.
+    /// If used with --exact, the harness filter will only match the exact fully qualified name of a harness.
+    // (Passed here directly from [CargoKaniArgs] in `args_toml.rs`)
+    #[arg(long = "harness", num_args(1), value_name = "HARNESS_FILTER")]
+    pub harnesses: Vec<String>,
     /// Option used for suppressing global ASM error.
     #[clap(long)]
     pub ignore_global_asm: bool,
diff --git a/kani-compiler/src/kani_middle/codegen_units.rs b/kani-compiler/src/kani_middle/codegen_units.rs
@@ -21,7 +21,7 @@ use crate::kani_queries::QueryDb;
 use fxhash::{FxHashMap, FxHashSet};
 use kani_metadata::{
     ArtifactType, AssignsContract, AutoHarnessMetadata, AutoHarnessSkipReason, HarnessKind,
-    HarnessMetadata, KaniMetadata,
+    HarnessMetadata, KaniMetadata, find_proof_harnesses,
 };
 use regex::RegexSet;
 use rustc_hir::def_id::DefId;
@@ -75,15 +75,23 @@ impl CodegenUnits {
         let args = queries.args();
         match args.reachability_analysis {
             ReachabilityType::Harnesses => {
-                let all_harnesses = get_all_manual_harnesses(tcx, base_filename);
+                let all_harnesses = determine_targets(
+                    get_all_manual_harnesses(tcx, base_filename),
+                    &args.harnesses,
+                    args.exact,
+                );
                 // Even if no_stubs is empty we still need to store rustc metadata.
                 let units = group_by_stubs(tcx, &all_harnesses);
                 validate_units(tcx, &units);
                 debug!(?units, "CodegenUnits::new");
                 CodegenUnits { units, harness_info: all_harnesses, crate_info }
             }
             ReachabilityType::AllFns => {
-                let mut all_harnesses = get_all_manual_harnesses(tcx, base_filename);
+                let mut all_harnesses = determine_targets(
+                    get_all_manual_harnesses(tcx, base_filename),
+                    &args.harnesses,
+                    args.exact,
+                );
                 let mut units = group_by_stubs(tcx, &all_harnesses);
                 validate_units(tcx, &units);
 
@@ -328,6 +336,30 @@ fn get_all_manual_harnesses(
         .collect::<HashMap<_, _>>()
 }
 
+/// Filter which harnesses to codegen based on user filters. Shares use of `find_proof_harnesses` with the `determine_targets` function
+/// in `kani-driver/src/metadata.rs` to ensure the filter is consistent and thus codegen is always done for the subset of harnesses we want
+/// to analyze.
+fn determine_targets(
+    all_harnesses: HashMap<Harness, HarnessMetadata>,
+    harness_filters: &[String],
+    exact_filter: bool,
+) -> HashMap<Harness, HarnessMetadata> {
+    if harness_filters.is_empty() {
+        return all_harnesses;
+    }
+
+    // If there are filters, only keep around harnesses that satisfy them.
+    let mut new_harnesses = all_harnesses.clone();
+    let valid_harnesses = find_proof_harnesses(
+        &BTreeSet::from_iter(harness_filters.iter()),
+        all_harnesses.values(),
+        exact_filter,
+    );
+
+    new_harnesses.retain(|_, metadata| valid_harnesses.contains(&&*metadata));
+    new_harnesses
+}
+
 /// For each function eligible for automatic verification,
 /// generate a harness Instance for it, then generate its metadata.
 /// Note that the body of each harness instance is still the dummy body of `kani_harness_intrinsic`;
diff --git a/kani-driver/src/call_single_file.rs b/kani-driver/src/call_single_file.rs
@@ -162,6 +162,14 @@ impl KaniSession {
             flags.push("--no-assert-contracts".into());
         }
 
+        for harness in &self.args.harnesses {
+            flags.push(format!("--harness {harness}"));
+        }
+
+        if self.args.exact {
+            flags.push("--exact".into());
+        }
+
         if let Some(args) = self.autoharness_compiler_flags.clone() {
             flags.extend(args);
         }
diff --git a/kani-driver/src/main.rs b/kani-driver/src/main.rs
@@ -128,7 +128,7 @@ fn standalone_main() -> Result<()> {
 /// Run verification on the given project.
 fn verify_project(project: Project, session: KaniSession) -> Result<()> {
     debug!(?project, "verify_project");
-    let harnesses = session.determine_targets(&project.get_all_harnesses())?;
+    let harnesses = session.determine_targets(project.get_all_harnesses())?;
     debug!(n = harnesses.len(), ?harnesses, "verify_project");
 
     // Verification
diff --git a/kani-driver/src/metadata.rs b/kani-driver/src/metadata.rs
@@ -3,9 +3,10 @@
 
 use anyhow::{Result, bail};
 use std::path::Path;
-use tracing::{debug, trace};
 
-use kani_metadata::{HarnessMetadata, InternedString, TraitDefinedMethod, VtableCtxResults};
+use kani_metadata::{
+    HarnessMetadata, InternedString, TraitDefinedMethod, VtableCtxResults, find_proof_harnesses,
+};
 use std::collections::{BTreeSet, HashMap};
 use std::fs::File;
 use std::io::{BufReader, BufWriter};
@@ -91,39 +92,42 @@ impl KaniSession {
     /// Determine which function to use as entry point, based on command-line arguments and kani-metadata.
     pub fn determine_targets<'a>(
         &self,
-        all_harnesses: &[&'a HarnessMetadata],
+        compiler_filtered_harnesses: Vec<&'a HarnessMetadata>,
     ) -> Result<Vec<&'a HarnessMetadata>> {
-        let harnesses = BTreeSet::from_iter(self.args.harnesses.iter());
-        let total_harnesses = harnesses.len();
-        let all_targets = &harnesses;
+        let harness_filters = BTreeSet::from_iter(self.args.harnesses.iter());
 
-        if harnesses.is_empty() {
-            Ok(Vec::from(all_harnesses))
-        } else {
-            let harnesses_found: Vec<&HarnessMetadata> =
-                find_proof_harnesses(&harnesses, all_harnesses, self.args.exact);
-
-            // If even one harness was not found with --exact, return an error to user
-            if self.args.exact && harnesses_found.len() < total_harnesses {
-                let harness_found_names: BTreeSet<&String> =
-                    harnesses_found.iter().map(|&h| &h.pretty_name).collect();
+        // For dev builds, re-filter the harnesses to double check filtering in the compiler
+        // and ensure we're doing the minimal harness codegen possible. That filtering happens in
+        // the `kani-compiler/src/kani_middle/codegen_units.rs` file's `determine_targets` function.
+        if cfg!(debug_assertions) && !harness_filters.is_empty() {
+            let filtered_harnesses: Vec<&HarnessMetadata> = find_proof_harnesses(
+                &harness_filters,
+                compiler_filtered_harnesses.clone(),
+                self.args.exact,
+            );
+            assert_eq!(compiler_filtered_harnesses, filtered_harnesses);
+        }
 
-                // Check which harnesses are missing from the difference of targets and harnesses_found
-                let harnesses_missing: Vec<&String> =
-                    all_targets.difference(&harness_found_names).cloned().collect();
-                let joined_string = harnesses_missing
-                    .iter()
-                    .map(|&s| (*s).clone())
-                    .collect::<Vec<String>>()
-                    .join("`, `");
+        // If any of the `--harness` filters failed to find a harness (and thus the # of harnesses is less than the # of filters), report that to the user.
+        if self.args.exact && (compiler_filtered_harnesses.len() < self.args.harnesses.len()) {
+            let harness_found_names: BTreeSet<&String> =
+                compiler_filtered_harnesses.iter().map(|&h| &h.pretty_name).collect();
 
-                bail!(
-                    "Failed to match the following harness(es):\n{joined_string}\nPlease specify the fully-qualified name of a harness.",
-                );
-            }
+            // Check which harnesses are missing from the difference of targets and all_harnesses
+            let harnesses_missing: Vec<&String> =
+                harness_filters.difference(&harness_found_names).cloned().collect();
+            let joined_string = harnesses_missing
+                .iter()
+                .map(|&s| (*s).clone())
+                .collect::<Vec<String>>()
+                .join("`, `");
 
-            Ok(harnesses_found)
+            bail!(
+                "Failed to match the following harness(es):\n{joined_string}\nPlease specify the fully-qualified name of a harness.",
+            );
         }
+
+        Ok(compiler_filtered_harnesses)
     }
 }
 
@@ -142,44 +146,6 @@ pub fn sort_harnesses_by_loc<'a>(harnesses: &[&'a HarnessMetadata]) -> Vec<&'a H
     harnesses_clone
 }
 
-/// Search for a proof harness with a particular name.
-/// At the present time, we use `no_mangle` so collisions shouldn't happen,
-/// but this function is written to be robust against that changing in the future.
-fn find_proof_harnesses<'a>(
-    targets: &BTreeSet<&String>,
-    all_harnesses: &[&'a HarnessMetadata],
-    exact_filter: bool,
-) -> Vec<&'a HarnessMetadata> {
-    debug!(?targets, "find_proof_harness");
-    let mut result = vec![];
-    for md in all_harnesses.iter() {
-        // --harnesses should not select automatic harnesses
-        if md.is_automatically_generated {
-            continue;
-        }
-        if exact_filter {
-            // Check for exact match only
-            if targets.contains(&md.pretty_name) {
-                // if exact match found, stop searching
-                result.push(*md);
-            } else {
-                trace!(skip = md.pretty_name, "find_proof_harnesses");
-            }
-        } else {
-            // Either an exact match, or a substring match. We check the exact first since it's cheaper.
-            if targets.contains(&md.pretty_name)
-                || targets.contains(&md.get_harness_name_unqualified().to_string())
-                || targets.iter().any(|target| md.pretty_name.contains(*target))
-            {
-                result.push(*md);
-            } else {
-                trace!(skip = md.pretty_name, "find_proof_harnesses");
-            }
-        }
-    }
-    result
-}
-
 #[cfg(test)]
 pub mod tests {
     use super::*;
diff --git a/kani_metadata/Cargo.toml b/kani_metadata/Cargo.toml
@@ -16,6 +16,7 @@ cbmc = { path = "../cprover_bindings", package = "cprover_bindings" }
 strum = "0.27.1"
 strum_macros = "0.27.1"
 clap = { version = "4.4.11", features = ["derive"] }
+tracing = "0.1.41"
 
 [lints]
 workspace = true
diff --git a/kani_metadata/src/harness.rs b/kani_metadata/src/harness.rs
@@ -3,8 +3,9 @@
 
 use crate::CbmcSolver;
 use serde::{Deserialize, Serialize};
-use std::path::PathBuf;
+use std::{borrow::Borrow, collections::BTreeSet, path::PathBuf};
 use strum_macros::Display;
+use tracing::{debug, trace};
 
 /// A CBMC-level `assigns` contract that needs to be enforced on a function.
 #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
@@ -112,3 +113,47 @@ impl HarnessMetadata {
         }
     }
 }
+
+/// Search for a proof harness with a particular name.
+/// At the present time, we use `no_mangle` so collisions shouldn't happen,
+/// but this function is written to be robust against that changing in the future.
+pub fn find_proof_harnesses<'a, I>(
+    targets: &BTreeSet<&String>,
+    all_harnesses: I,
+    exact_filter: bool,
+) -> Vec<&'a HarnessMetadata>
+where
+    I: IntoIterator,
+    I::Item: Borrow<&'a HarnessMetadata>,
+{
+    debug!(?targets, "find_proof_harness");
+    let mut result = vec![];
+    for md in all_harnesses.into_iter() {
+        let md: &'a HarnessMetadata = md.borrow();
+
+        // --harnesses should not select automatic harnesses
+        if md.is_automatically_generated {
+            continue;
+        }
+        if exact_filter {
+            // Check for exact match only
+            if targets.contains(&md.pretty_name) {
+                // if exact match found, stop searching
+                result.push(md);
+            } else {
+                trace!(skip = md.pretty_name, "find_proof_harnesses");
+            }
+        } else {
+            // Either an exact match, or a substring match. We check the exact first since it's cheaper.
+            if targets.contains(&md.pretty_name)
+                || targets.contains(&md.get_harness_name_unqualified().to_string())
+                || targets.iter().any(|target| md.pretty_name.contains(*target))
+            {
+                result.push(md);
+            } else {
+                trace!(skip = md.pretty_name, "find_proof_harnesses");
+            }
+        }
+    }
+    result
+}

Original file line number	Diff line number	Diff line change
`@@ -1199,6 +1199,7 @@ dependencies = [`
`1199`	`1199`	`"serde",`
`1200`	`1200`	`"strum",`
`1201`	`1201`	`"strum_macros",`
	`1202`	`+ "tracing",`
`1202`	`1203`	`]`
`1203`	`1204`
`1204`	`1205`	`[[package]]`
Original file line number	Diff line number	Diff line change
`@@ -162,6 +162,14 @@ impl KaniSession {`
`162`	`162`	`flags.push("--no-assert-contracts".into());`
`163`	`163`	`}`
`164`	`164`
	`165`	`+ for harness in &self.args.harnesses {`
	`166`	`+ flags.push(format!("--harness {harness}"));`
	`167`	`+ }`
	`168`	`+`
	`169`	`+ if self.args.exact {`
	`170`	`+ flags.push("--exact".into());`
	`171`	`+ }`
	`172`	`+`
`165`	`173`	`if let Some(args) = self.autoharness_compiler_flags.clone() {`
`166`	`174`	`flags.extend(args);`
`167`	`175`	`}`