1+ use scanner_core:: * ;
2+ use std:: fs;
3+ use std:: path:: PathBuf ;
4+
5+ /// Test that compares AST-based detection results against generated ground truth JSONL files
6+ #[ test]
7+ fn compare_ast_ground_truth ( ) {
8+ let workspace = PathBuf :: from ( env ! ( "CARGO_MANIFEST_DIR" ) ) . join ( "../.." ) ;
9+
10+ // Use AST-based detectors
11+ let dets: Vec < Box < dyn Detector > > = vec ! [
12+ Box :: new( AstBasedDetector :: new(
13+ "ast-detector-c" ,
14+ & [ Language :: C ] ,
15+ ) . unwrap( ) ) ,
16+ Box :: new( AstBasedDetector :: new(
17+ "ast-detector-cpp" ,
18+ & [ Language :: Cpp ] ,
19+ ) . unwrap( ) ) ,
20+ Box :: new( AstBasedDetector :: new(
21+ "ast-detector-rust" ,
22+ & [ Language :: Rust ] ,
23+ ) . unwrap( ) ) ,
24+ Box :: new( AstBasedDetector :: new(
25+ "ast-detector-python" ,
26+ & [ Language :: Python ] ,
27+ ) . unwrap( ) ) ,
28+ Box :: new( AstBasedDetector :: new(
29+ "ast-detector-java" ,
30+ & [ Language :: Java ] ,
31+ ) . unwrap( ) ) ,
32+ Box :: new( AstBasedDetector :: new(
33+ "ast-detector-go" ,
34+ & [ Language :: Go ] ,
35+ ) . unwrap( ) ) ,
36+ ] ;
37+
38+ let reg = PatternRegistry :: empty ( ) ;
39+ let mut config = Config :: default ( ) ;
40+ config. deterministic = true ; // Ensure reproducible results
41+ let scanner = Scanner :: new ( & reg, dets, config) ;
42+
43+ let fixtures_root = workspace. join ( "fixtures" ) ;
44+
45+ // Find all directories that have ground truth files
46+ let mut ground_truth_dirs = Vec :: new ( ) ;
47+ collect_ground_truth_dirs ( & fixtures_root, & mut ground_truth_dirs) . unwrap ( ) ;
48+
49+ println ! ( "Found {} directories with ground truth files" , ground_truth_dirs. len( ) ) ;
50+
51+ let mut total_matches = 0 ;
52+ let mut total_mismatches = 0 ;
53+
54+ // Test each directory with ground truth
55+ for dir in ground_truth_dirs {
56+ let ground_truth_file = dir. join ( "ground_truth.jsonl" ) ;
57+
58+ // Run scanner on this directory
59+ let findings = scanner. run ( & [ dir. clone ( ) ] ) . unwrap ( ) ;
60+
61+ // Convert findings to JSONL format and normalize paths
62+ let mut crypto_findings = CryptoFindings :: from_scanner_findings ( findings) ;
63+
64+ // Normalize file paths to be relative to workspace
65+ for finding in & mut crypto_findings. findings {
66+ let file_str = finding. file . to_string_lossy ( ) ;
67+ if let Some ( idx) = file_str. find ( "fixtures/" ) {
68+ finding. file = std:: path:: PathBuf :: from ( & file_str[ idx..] ) ;
69+ }
70+ }
71+
72+ let actual_jsonl = crypto_findings. to_jsonl ( ) . unwrap ( ) ;
73+
74+ // Read expected ground truth
75+ let expected_jsonl = fs:: read_to_string ( & ground_truth_file) . unwrap ( ) ;
76+
77+ // Compare line by line (order matters due to deterministic flag)
78+ let actual_lines: Vec < & str > = actual_jsonl. lines ( ) . collect ( ) ;
79+ let expected_lines: Vec < & str > = expected_jsonl. lines ( ) . collect ( ) ;
80+
81+ if actual_lines == expected_lines {
82+ total_matches += 1 ;
83+ println ! ( "✓ {}" , dir. strip_prefix( & workspace) . unwrap( ) . display( ) ) ;
84+ } else {
85+ total_mismatches += 1 ;
86+ println ! ( "✗ {}" , dir. strip_prefix( & workspace) . unwrap( ) . display( ) ) ;
87+ println ! ( " Expected {} lines, got {} lines" , expected_lines. len( ) , actual_lines. len( ) ) ;
88+
89+ // Show first few differences for debugging
90+ let max_diff_lines = 3 ;
91+ let mut diff_count = 0 ;
92+ for ( i, ( expected, actual) ) in expected_lines. iter ( ) . zip ( actual_lines. iter ( ) ) . enumerate ( ) {
93+ if expected != actual && diff_count < max_diff_lines {
94+ println ! ( " Line {}: Expected: {}" , i + 1 , expected) ;
95+ println ! ( " Line {}: Actual: {}" , i + 1 , actual) ;
96+ diff_count += 1 ;
97+ }
98+ }
99+ if diff_count >= max_diff_lines {
100+ println ! ( " ... (showing only first {} differences)" , max_diff_lines) ;
101+ }
102+ }
103+ }
104+
105+ println ! ( "\n Ground truth comparison summary:" ) ;
106+ println ! ( " Matches: {}" , total_matches) ;
107+ println ! ( " Mismatches: {}" , total_mismatches) ;
108+ println ! ( " Total: {}" , total_matches + total_mismatches) ;
109+
110+ // Allow some mismatches during development, but ensure we have some matches
111+ assert ! ( total_matches > 0 , "No ground truth matches found - AST detection may be broken" ) ;
112+
113+ // For now, we'll be lenient during development. In production, this should be:
114+ // assert_eq!(total_mismatches, 0, "Ground truth mismatches found");
115+ }
116+
117+ fn collect_ground_truth_dirs ( root : & std:: path:: Path , dirs : & mut Vec < PathBuf > ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
118+ if !root. is_dir ( ) {
119+ return Ok ( ( ) ) ;
120+ }
121+
122+ // Check if this directory has a ground truth file
123+ let ground_truth_file = root. join ( "ground_truth.jsonl" ) ;
124+ if ground_truth_file. exists ( ) {
125+ dirs. push ( root. to_path_buf ( ) ) ;
126+ }
127+
128+ // Recursively check subdirectories
129+ for entry in fs:: read_dir ( root) ? {
130+ let entry = entry?;
131+ let path = entry. path ( ) ;
132+ if path. is_dir ( ) && !path. file_name ( ) . unwrap ( ) . to_str ( ) . unwrap ( ) . starts_with ( '.' ) {
133+ collect_ground_truth_dirs ( & path, dirs) ?;
134+ }
135+ }
136+
137+ Ok ( ( ) )
138+ }
0 commit comments