@@ -31,6 +31,56 @@ fn get_packument_url(registry: &str, package_name: &str) -> anyhow::Result<Url>
3131 . map_err ( |e| anyhow ! ( "failed to construct packument URL: {e}" ) )
3232}
3333
34+ /// Normalize packument data to ensure determinism.
35+ ///
36+ /// Strips volatile fields like `_rev`, `time`, and `modified`.
37+ /// Filters the `versions` map to only include versions requested in the lockfile.
38+ fn normalize_packument (
39+ package_name : & str ,
40+ data : & [ u8 ] ,
41+ requested_versions : & HashSet < String > ,
42+ ) -> anyhow:: Result < Vec < u8 > > {
43+ let mut json: Value = serde_json:: from_slice ( data)
44+ . map_err ( |e| anyhow ! ( "failed to parse packument JSON for {package_name}: {e}" ) ) ?;
45+
46+ let obj = json
47+ . as_object_mut ( )
48+ . ok_or_else ( || anyhow ! ( "packument for {package_name} is not a JSON object" ) ) ?;
49+
50+ // Strip volatile top-level fields
51+ obj. remove ( "_rev" ) ;
52+ obj. remove ( "time" ) ;
53+ obj. remove ( "modified" ) ;
54+
55+ // Filter versions
56+ if let Some ( Value :: Object ( versions) ) = obj. get_mut ( "versions" ) {
57+ versions. retain ( |version, _| requested_versions. contains ( version) ) ;
58+
59+ // Normalize each version object
60+ for version_val in versions. values_mut ( ) {
61+ if let Some ( version_obj) = version_val. as_object_mut ( ) {
62+ // Strip fields starting with underscore (volatile/internal)
63+ version_obj. retain ( |key, _| !key. starts_with ( '_' ) ) ;
64+ // Strip other often-volatile fields
65+ version_obj. remove ( "gitHead" ) ;
66+ }
67+ }
68+ }
69+
70+ // Filter dist-tags to only point to versions we kept
71+ if let Some ( Value :: Object ( tags) ) = obj. get_mut ( "dist-tags" ) {
72+ tags. retain ( |_, version_val| {
73+ if let Some ( version) = version_val. as_str ( ) {
74+ requested_versions. contains ( version)
75+ } else {
76+ false
77+ }
78+ } ) ;
79+ }
80+
81+ serde_json:: to_vec ( & json) . map_err ( |e| anyhow ! ( "failed to re-serialize packument for {package_name}: {e}" ) )
82+ }
83+
3484/// Fetch and cache packuments (package metadata) for all packages.
3585///
3686/// This is needed because npm may query package metadata for optional peer dependencies
@@ -43,60 +93,68 @@ fn get_packument_url(registry: &str, package_name: &str) -> anyhow::Result<Url>
4393///
4494/// We cache both versions to ensure cache hits regardless of which header npm uses.
4595/// See: pacote/lib/registry.js and @npmcli/arborist/lib/arborist/build-ideal-tree.js
46- fn fetch_packuments ( cache : & Cache , package_names : HashSet < String > ) -> anyhow:: Result < ( ) > {
96+ fn fetch_packuments (
97+ cache : & Cache ,
98+ package_versions : HashMap < String , HashSet < String > > ,
99+ ) -> anyhow:: Result < ( ) > {
47100 const CORGI_DOC : & str =
48101 "application/vnd.npm.install-v1+json; q=1.0, application/json; q=0.8, */*" ;
49102 const FULL_DOC : & str = "application/json" ;
50103
51- info ! ( "Fetching {} packuments" , package_names. len( ) ) ;
52-
53- package_names. into_par_iter ( ) . try_for_each ( |package_name| {
54- let packument_url = get_packument_url ( "https://registry.npmjs.org" , & package_name) ?;
55-
56- match util:: get_url_body_with_retry ( & packument_url) {
57- Ok ( packument_data) => {
58- // npm's make-fetch-happen uses the URL-encoded form for cache keys
59- // e.g., "https://registry.npmjs.org/@types%2freact-dom" not "@types/react-dom"
60- // We must use the encoded form in both the cache key string AND the metadata URL
61-
62- // Cache with corgiDoc header (for initial requests)
63- cache
64- . put (
65- format ! ( "make-fetch-happen:request-cache:{packument_url}" ) ,
66- packument_url. clone ( ) ,
67- & packument_data,
68- None , // Packuments don't have integrity hashes
69- Some ( ReqHeaders {
70- accept : String :: from ( CORGI_DOC ) ,
71- } ) ,
72- )
73- . map_err ( |e| {
74- anyhow ! ( "couldn't insert packument cache entry (corgi) for {package_name}: {e:?}" )
75- } ) ?;
76-
77- // Cache with fullDoc header (for workspace/full metadata requests)
78- cache
79- . put (
80- format ! ( "make-fetch-happen:request-cache:{packument_url}" ) ,
81- packument_url. clone ( ) ,
82- & packument_data,
83- None ,
84- Some ( ReqHeaders {
85- accept : String :: from ( FULL_DOC ) ,
86- } ) ,
87- )
88- . map_err ( |e| {
89- anyhow ! ( "couldn't insert packument cache entry (full) for {package_name}: {e:?}" )
90- } ) ?;
91- }
92- Err ( e) => {
93- // Log but don't fail - some packages might not need packuments
94- info ! ( "Warning: couldn't fetch packument for {package_name}: {e}" ) ;
104+ info ! ( "Fetching {} packuments" , package_versions. len( ) ) ;
105+
106+ package_versions
107+ . into_par_iter ( )
108+ . try_for_each ( |( package_name, requested_versions) | {
109+ let packument_url = get_packument_url ( "https://registry.npmjs.org" , & package_name) ?;
110+
111+ match util:: get_url_body_with_retry ( & packument_url) {
112+ Ok ( packument_data) => {
113+ let normalized_data =
114+ normalize_packument ( & package_name, & packument_data, & requested_versions) ?;
115+
116+ // npm's make-fetch-happen uses the URL-encoded form for cache keys
117+ // e.g., "https://registry.npmjs.org/@types%2freact-dom" not "@types/react-dom"
118+ // We must use the encoded form in both the cache key string AND the metadata URL
119+
120+ // Cache with corgiDoc header (for initial requests)
121+ cache
122+ . put (
123+ format ! ( "make-fetch-happen:request-cache:{packument_url}" ) ,
124+ packument_url. clone ( ) ,
125+ & normalized_data,
126+ None , // Packuments don't have integrity hashes
127+ Some ( ReqHeaders {
128+ accept : String :: from ( CORGI_DOC ) ,
129+ } ) ,
130+ )
131+ . map_err ( |e| {
132+ anyhow ! ( "couldn't insert packument cache entry (corgi) for {package_name}: {e:?}" )
133+ } ) ?;
134+
135+ // Cache with fullDoc header (for workspace/full metadata requests)
136+ cache
137+ . put (
138+ format ! ( "make-fetch-happen:request-cache:{packument_url}" ) ,
139+ packument_url. clone ( ) ,
140+ & normalized_data,
141+ None ,
142+ Some ( ReqHeaders {
143+ accept : String :: from ( FULL_DOC ) ,
144+ } ) ,
145+ )
146+ . map_err ( |e| {
147+ anyhow ! ( "couldn't insert packument cache entry (full) for {package_name}: {e:?}" )
148+ } ) ?;
149+ }
150+ Err ( e) => {
151+ // Log but don't fail - some packages might not need packuments
152+ info ! ( "Warning: couldn't fetch packument for {package_name}: {e}" ) ;
153+ }
95154 }
96- }
97155
98- Ok :: < _ , anyhow:: Error > ( ( ) )
99- } )
156+ Ok :: < _ , anyhow:: Error > ( ( ) )
157+ } )
100158}
101159
102160/// `fixup_lockfile` rewrites `integrity` hashes to match cache and removes the `integrity` field from Git dependencies.
@@ -332,12 +390,24 @@ fn main() -> anyhow::Result<()> {
332390 let cache = Cache :: new ( out. join ( "_cacache" ) ) ;
333391 cache. init ( ) ?;
334392
335- // Collect unique package names for packument fetching
336- // Extract from lockfile keys like "node_modules/@scope/name" -> "@scope/name"
337- let package_names: HashSet < String > = packages
338- . iter ( )
339- . filter_map ( |p| p. name . rsplit_once ( "node_modules/" ) . map ( |( _, n) | n. to_string ( ) ) )
340- . collect ( ) ;
393+ // Collect package names and their versions from the lockfile for packument filtering.
394+ // For non-aliased packages: extract from lockfile keys like "node_modules/@scope/name" -> "@scope/name"
395+ // For aliased packages: the name is already the real package name (e.g., "string-width" not "string-width-cjs")
396+ // We only care about packages that have a version string (registry packages).
397+ let mut package_versions: HashMap < String , HashSet < String > > = HashMap :: new ( ) ;
398+ for p in & packages {
399+ if let Some ( version) = & p. version {
400+ let pkg_name = p
401+ . name
402+ . rsplit_once ( "node_modules/" )
403+ . map ( |( _, name) | name. to_string ( ) )
404+ . unwrap_or_else ( || p. name . clone ( ) ) ;
405+ package_versions
406+ . entry ( pkg_name)
407+ . or_default ( )
408+ . insert ( version. to_string ( ) ) ;
409+ }
410+ }
341411
342412 // Fetch and cache tarballs
343413 packages. into_par_iter ( ) . try_for_each ( |package| {
@@ -359,14 +429,15 @@ fn main() -> anyhow::Result<()> {
359429 Ok :: < _ , anyhow:: Error > ( ( ) )
360430 } ) ?;
361431
362- // Fetch and cache packuments (package metadata) - only for cache version 2+
363- let cache_version : u32 = env:: var ( "NPM_CACHE_VERSION " )
432+ // Fetch and cache packuments (package metadata) - only for fetcher version 2+
433+ let fetcher_version : u32 = env:: var ( "NPM_FETCHER_VERSION " )
364434 . ok ( )
365435 . and_then ( |v| v. parse ( ) . ok ( ) )
366436 . unwrap_or ( 1 ) ;
367437
368- if cache_version >= 2 {
369- fetch_packuments ( & cache, package_names) ?;
438+ if fetcher_version >= 2 {
439+ fetch_packuments ( & cache, package_versions) ?;
440+ fs:: write ( out. join ( ".fetcher-version" ) , format ! ( "{fetcher_version}" ) ) ?;
370441 }
371442
372443 fs:: write ( out. join ( "package-lock.json" ) , lock_content) ?;
0 commit comments