11use anyhow:: { Context , Result } ;
2+ use globset:: { Glob , GlobSet , GlobSetBuilder } ;
23use include_dir:: { Dir , include_dir} ;
4+ use once_cell:: sync:: Lazy ;
35use std:: fs;
4- use std:: path:: Path ;
6+ use std:: path:: { Path , PathBuf } ;
57
68#[ cfg( feature = "native" ) ]
79use once_cell:: sync:: OnceCell ;
10+ #[ cfg( feature = "native" ) ]
11+ use walkdir:: WalkDir ;
812
913/// Embedded stdlib tree sourced directly from repository stdlib/.
1014static EMBEDDED_STDLIB : Dir = include_dir ! ( "$CARGO_MANIFEST_DIR/../../stdlib" ) ;
15+ static EXCLUDED_STDLIB_PATHS : Lazy < GlobSet > = Lazy :: new ( || {
16+ let mut builder = GlobSetBuilder :: new ( ) ;
17+ for pattern in [
18+ ".gitignore" ,
19+ "**/.gitignore" ,
20+ "**/*.log" ,
21+ "**/*layout.json" ,
22+ "**/test" ,
23+ "**/test/**" ,
24+ ] {
25+ builder. add ( Glob :: new ( pattern) . expect ( "valid stdlib exclude glob" ) ) ;
26+ }
27+ builder
28+ . build ( )
29+ . expect ( "valid stdlib exclude globset configuration" )
30+ } ) ;
1131
1232pub fn embedded_stdlib_dir ( ) -> & ' static Dir < ' static > {
1333 & EMBEDDED_STDLIB
1434}
1535
36+ /// Filter out tool-generated files and local ignore metadata so embedded and
37+ /// materialized stdlib hashing stays stable across workspaces.
38+ fn include_stdlib_path ( path : & Path ) -> bool {
39+ !EXCLUDED_STDLIB_PATHS . is_match ( path)
40+ }
41+
1642#[ cfg( feature = "native" ) ]
1743pub fn embedded_stdlib_hash ( ) -> & ' static str {
1844 static EMBEDDED_STDLIB_HASH : OnceCell < String > = OnceCell :: new ( ) ;
@@ -26,6 +52,17 @@ pub fn embedded_stdlib_hash() -> &'static str {
2652 . as_str ( )
2753}
2854
55+ #[ cfg( feature = "native" ) ]
56+ pub fn compute_stdlib_dir_hash ( root : & Path ) -> Result < String > {
57+ let mut files: Vec < ( PathBuf , Vec < u8 > ) > = Vec :: new ( ) ;
58+ collect_stdlib_disk_files ( root, & mut files) ?;
59+ let refs: Vec < ( & Path , & [ u8 ] ) > = files
60+ . iter ( )
61+ . map ( |( path, contents) | ( path. as_path ( ) , contents. as_slice ( ) ) )
62+ . collect ( ) ;
63+ pcb_canonical:: compute_content_hash_from_memory_files ( refs)
64+ }
65+
2966/// Extract the embedded stdlib tree into `target_dir`.
3067pub fn extract_embedded_stdlib ( target_dir : & Path ) -> Result < ( ) > {
3168 fs:: create_dir_all ( target_dir) . with_context ( || {
@@ -39,19 +76,74 @@ pub fn extract_embedded_stdlib(target_dir: &Path) -> Result<()> {
3976 "Failed to extract embedded stdlib into {}" ,
4077 target_dir. display( )
4178 )
42- } )
79+ } ) ?;
80+ #[ cfg( feature = "native" ) ]
81+ prune_excluded_paths ( target_dir) ?;
82+ Ok ( ( ) )
4383}
4484
4585#[ cfg( feature = "native" ) ]
4686fn collect_embedded_files ( dir : & Dir < ' static > , out : & mut Vec < ( & ' static Path , & ' static [ u8 ] ) > ) {
47- out. extend ( dir. files ( ) . map ( |file| ( file. path ( ) , file. contents ( ) ) ) ) ;
87+ out. extend (
88+ dir. files ( )
89+ . filter ( |file| include_stdlib_path ( file. path ( ) ) )
90+ . map ( |file| ( file. path ( ) , file. contents ( ) ) ) ,
91+ ) ;
4892 for subdir in dir. dirs ( ) {
4993 collect_embedded_files ( subdir, out) ;
5094 }
5195}
5296
97+ #[ cfg( feature = "native" ) ]
98+ fn collect_stdlib_disk_files ( root : & Path , out : & mut Vec < ( PathBuf , Vec < u8 > ) > ) -> Result < ( ) > {
99+ for entry in WalkDir :: new ( root) . follow_links ( false ) {
100+ let entry = entry. with_context ( || format ! ( "Failed to walk {}" , root. display( ) ) ) ?;
101+ if !entry. file_type ( ) . is_file ( ) {
102+ continue ;
103+ }
104+
105+ let path = entry. path ( ) ;
106+ let rel = path
107+ . strip_prefix ( root)
108+ . with_context ( || format ! ( "{} is not under {}" , path. display( ) , root. display( ) ) ) ?;
109+ if !include_stdlib_path ( rel) {
110+ continue ;
111+ }
112+
113+ let contents =
114+ fs:: read ( path) . with_context ( || format ! ( "Failed to read {}" , path. display( ) ) ) ?;
115+ out. push ( ( rel. to_path_buf ( ) , contents) ) ;
116+ }
117+ Ok ( ( ) )
118+ }
119+
120+ #[ cfg( feature = "native" ) ]
121+ fn prune_excluded_paths ( root : & Path ) -> Result < ( ) > {
122+ for entry in WalkDir :: new ( root) . contents_first ( true ) . min_depth ( 1 ) {
123+ let entry = entry. with_context ( || format ! ( "Failed to walk {}" , root. display( ) ) ) ?;
124+ let path = entry. path ( ) ;
125+ let rel = path
126+ . strip_prefix ( root)
127+ . with_context ( || format ! ( "{} is not under {}" , path. display( ) , root. display( ) ) ) ?;
128+ if include_stdlib_path ( rel) {
129+ continue ;
130+ }
131+
132+ if entry. file_type ( ) . is_dir ( ) {
133+ fs:: remove_dir_all ( path)
134+ . with_context ( || format ! ( "Failed to remove directory {}" , path. display( ) ) ) ?;
135+ } else {
136+ fs:: remove_file ( path)
137+ . with_context ( || format ! ( "Failed to remove file {}" , path. display( ) ) ) ?;
138+ }
139+ }
140+ Ok ( ( ) )
141+ }
142+
53143#[ cfg( test) ]
54144mod tests {
145+ use std:: path:: Path ;
146+
55147 #[ test]
56148 fn embeds_expected_stdlib_files ( ) {
57149 let stdlib = & super :: EMBEDDED_STDLIB ;
@@ -61,4 +153,32 @@ mod tests {
61153 assert ! ( stdlib. get_file( "docs/spec.md" ) . is_some( ) ) ;
62154 assert ! ( stdlib. get_dir( ".pcb" ) . is_none( ) ) ;
63155 }
156+
157+ #[ test]
158+ fn stdlib_filter_excludes_hidden_and_generated_noise ( ) {
159+ assert ! ( super :: include_stdlib_path( Path :: new( "interfaces.zen" ) ) ) ;
160+ assert ! ( !super :: include_stdlib_path( Path :: new(
161+ "test/test_checks.zen"
162+ ) ) ) ;
163+ assert ! ( !super :: include_stdlib_path( Path :: new( ".gitignore" ) ) ) ;
164+ assert ! ( !super :: include_stdlib_path( Path :: new(
165+ "test/layout/layout.log"
166+ ) ) ) ;
167+ assert ! ( !super :: include_stdlib_path( Path :: new(
168+ "test/layout/snapshot.layout.json" ,
169+ ) ) ) ;
170+ }
171+
172+ #[ cfg( feature = "native" ) ]
173+ #[ test]
174+ fn filtered_embedded_hash_matches_filtered_extracted_hash ( ) {
175+ let temp = tempfile:: tempdir ( ) . expect ( "create temp dir" ) ;
176+ super :: extract_embedded_stdlib ( temp. path ( ) ) . expect ( "extract stdlib" ) ;
177+ assert ! ( !temp. path( ) . join( "test" ) . exists( ) ) ;
178+ assert ! ( !temp. path( ) . join( ".gitignore" ) . exists( ) ) ;
179+
180+ let expected = super :: embedded_stdlib_hash ( ) . to_string ( ) ;
181+ let actual = super :: compute_stdlib_dir_hash ( temp. path ( ) ) . expect ( "hash extracted stdlib" ) ;
182+ assert_eq ! ( expected, actual) ;
183+ }
64184}
0 commit comments