1
+ //! Access to a Git index based registry. See [`RemoteRegistry`] for details.
2
+
1
3
use crate :: core:: { GitReference , PackageId , SourceId } ;
2
4
use crate :: sources:: git;
3
5
use crate :: sources:: git:: fetch:: RemoteKind ;
@@ -21,29 +23,73 @@ use std::task::{ready, Poll};
21
23
/// A remote registry is a registry that lives at a remote URL (such as
22
24
/// crates.io). The git index is cloned locally, and `.crate` files are
23
25
/// downloaded as needed and cached locally.
26
+ ///
27
+ /// This type is primarily accessed through the [`RegistryData`] trait.
28
+ ///
29
+ /// See the [module-level documentation](super) for the index format and layout.
30
+ ///
31
+ /// ## History of Git-based index registry
32
+ ///
33
+ /// Using Git to host this index used to be quite efficient. The full index can
34
+ /// be stored efficiently locally on disk, and once it is downloaded, all
35
+ /// queries of a registry can happen locally and needn't touch the network.
36
+ /// Git-based index was a reasonable design choice at the time when HTTP/2
37
+ /// was just introduced.
38
+ ///
39
+ /// However, the full index keeps growing as crates.io grows. It becomes
40
+ /// relatively big and slows down the first use of Cargo. Git (specifically
41
+ /// libgit2) is not efficient at handling huge amounts of small files either.
42
+ /// On the other hand, newer protocols like HTTP/2 are prevalent and capable to
43
+ /// serve a bunch of tiny files. Today, it is encouraged to use [`HttpRegistry`],
44
+ /// which is the default from 1.70.0. That being said, Cargo will continue
45
+ /// supporting Git-based index for a pretty long while.
46
+ ///
47
+ /// [`HttpRegistry`]: super::http_remote::HttpRegistry
24
48
pub struct RemoteRegistry < ' cfg > {
49
+ /// Path to the registry index (`$CARGO_HOME/registry/index/$REG-HASH`).
25
50
index_path : Filesystem ,
26
- /// Path to the cache of `.crate` files (`$CARGO_HOME/registry/path /$REG-HASH`).
51
+ /// Path to the cache of `.crate` files (`$CARGO_HOME/registry/cache /$REG-HASH`).
27
52
cache_path : Filesystem ,
53
+ /// The unique identifier of this registry source.
28
54
source_id : SourceId ,
55
+ /// This reference is stored so that when a registry needs update, it knows
56
+ /// where to fetch from.
29
57
index_git_ref : GitReference ,
30
58
config : & ' cfg Config ,
59
+ /// A Git [tree object] to help this registry find crate metadata from the
60
+ /// underlying Git repository.
61
+ ///
62
+ /// This is stored here to prevent Git from repeatly creating a tree object
63
+ /// during each call into `load()`.
64
+ ///
65
+ /// [tree object]: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects#_tree_objects
31
66
tree : RefCell < Option < git2:: Tree < ' static > > > ,
67
+ /// A Git repository that contains the actual index we want.
32
68
repo : LazyCell < git2:: Repository > ,
69
+ /// The current HEAD commit of the underlying Git repository.
33
70
head : Cell < Option < git2:: Oid > > ,
71
+ /// This stores sha value of the current HEAD commit for convenience.
34
72
current_sha : Cell < Option < InternedString > > ,
35
- needs_update : bool , // Does this registry need to be updated?
73
+ /// Whether this registry needs to update package informations.
74
+ ///
75
+ /// See [`RemoteRegistry::mark_updated`] on how to make sure a registry
76
+ /// index is updated only once per session.
77
+ needs_update : bool ,
78
+ /// Disables status messages.
36
79
quiet : bool ,
37
80
}
38
81
39
82
impl < ' cfg > RemoteRegistry < ' cfg > {
83
+ /// Creates a Git-rebased remote registry for `source_id`.
84
+ ///
85
+ /// * `name` --- Name of a path segment where `.crate` tarballs and the
86
+ /// registry index are stored. Expect to be unique.
40
87
pub fn new ( source_id : SourceId , config : & ' cfg Config , name : & str ) -> RemoteRegistry < ' cfg > {
41
88
RemoteRegistry {
42
89
index_path : config. registry_index_path ( ) . join ( name) ,
43
90
cache_path : config. registry_cache_path ( ) . join ( name) ,
44
91
source_id,
45
92
config,
46
- // TODO: we should probably make this configurable
47
93
index_git_ref : GitReference :: DefaultBranch ,
48
94
tree : RefCell :: new ( None ) ,
49
95
repo : LazyCell :: new ( ) ,
@@ -54,17 +100,16 @@ impl<'cfg> RemoteRegistry<'cfg> {
54
100
}
55
101
}
56
102
103
+ /// Creates intermediate dirs and initialize the repository.
57
104
fn repo ( & self ) -> CargoResult < & git2:: Repository > {
58
105
self . repo . try_borrow_with ( || {
59
106
let path = self . config . assert_package_cache_locked ( & self . index_path ) ;
60
107
61
- // Fast path without a lock
62
108
if let Ok ( repo) = git2:: Repository :: open ( & path) {
63
109
trace ! ( "opened a repo without a lock" ) ;
64
110
return Ok ( repo) ;
65
111
}
66
112
67
- // Ok, now we need to lock and try the whole thing over again.
68
113
trace ! ( "acquiring registry index lock" ) ;
69
114
match git2:: Repository :: open ( & path) {
70
115
Ok ( repo) => Ok ( repo) ,
@@ -97,6 +142,7 @@ impl<'cfg> RemoteRegistry<'cfg> {
97
142
} )
98
143
}
99
144
145
+ /// Get the object ID of the HEAD commit from the underlying Git repository.
100
146
fn head ( & self ) -> CargoResult < git2:: Oid > {
101
147
if self . head . get ( ) . is_none ( ) {
102
148
let repo = self . repo ( ) ?;
@@ -106,6 +152,8 @@ impl<'cfg> RemoteRegistry<'cfg> {
106
152
Ok ( self . head . get ( ) . unwrap ( ) )
107
153
}
108
154
155
+ /// Returns a [`git2::Tree`] object of the current HEAD commit of the
156
+ /// underlying Git repository.
109
157
fn tree ( & self ) -> CargoResult < Ref < ' _ , git2:: Tree < ' _ > > > {
110
158
{
111
159
let tree = self . tree . borrow ( ) ;
@@ -117,6 +165,7 @@ impl<'cfg> RemoteRegistry<'cfg> {
117
165
let commit = repo. find_commit ( self . head ( ) ?) ?;
118
166
let tree = commit. tree ( ) ?;
119
167
168
+ // SAFETY:
120
169
// Unfortunately in libgit2 the tree objects look like they've got a
121
170
// reference to the repository object which means that a tree cannot
122
171
// outlive the repository that it came from. Here we want to cache this
@@ -134,6 +183,9 @@ impl<'cfg> RemoteRegistry<'cfg> {
134
183
Ok ( Ref :: map ( self . tree . borrow ( ) , |s| s. as_ref ( ) . unwrap ( ) ) )
135
184
}
136
185
186
+ /// Gets the current version of the registry index.
187
+ ///
188
+ /// It is usually sha of the HEAD commit from the underlying Git repository.
137
189
fn current_version ( & self ) -> Option < InternedString > {
138
190
if let Some ( sha) = self . current_sha . get ( ) {
139
191
return Some ( sha) ;
@@ -143,10 +195,16 @@ impl<'cfg> RemoteRegistry<'cfg> {
143
195
Some ( sha)
144
196
}
145
197
198
+ /// Whether the registry is up-to-date. See [`Self::mark_updated`] for more.
146
199
fn is_updated ( & self ) -> bool {
147
200
self . config . updated_sources ( ) . contains ( & self . source_id )
148
201
}
149
202
203
+ /// Marks this registry as up-to-date.
204
+ ///
205
+ /// This makes sure the index is only updated once per session since it is
206
+ /// an expensive operation. This generally only happens when the resolver
207
+ /// is run multiple times, such as during `cargo publish`.
150
208
fn mark_updated ( & self ) {
151
209
self . config . updated_sources ( ) . insert ( self . source_id ) ;
152
210
}
@@ -156,7 +214,7 @@ const LAST_UPDATED_FILE: &str = ".last-updated";
156
214
157
215
impl < ' cfg > RegistryData for RemoteRegistry < ' cfg > {
158
216
fn prepare ( & self ) -> CargoResult < ( ) > {
159
- self . repo ( ) ?; // create intermediate dirs and initialize the repo
217
+ self . repo ( ) ?;
160
218
Ok ( ( ) )
161
219
}
162
220
@@ -168,13 +226,20 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
168
226
self . config . assert_package_cache_locked ( path)
169
227
}
170
228
171
- // `index_version` Is a string representing the version of the file used to construct the cached copy.
172
- // Older versions of Cargo used the single value of the hash of the HEAD commit as a `index_version`.
173
- // This is technically correct but a little too conservative. If a new commit is fetched all cached
174
- // files need to be regenerated even if a particular file was not changed.
175
- // However if an old cargo has written such a file we still know how to read it, as long as we check for that hash value.
176
- //
177
- // Cargo now uses a hash of the file's contents as provided by git.
229
+ /// Read the general concept for `load()` on [`RegistryData::load`].
230
+ ///
231
+ /// `index_version` is a string representing the version of the file used
232
+ /// to construct the cached copy.
233
+ ///
234
+ /// Older versions of Cargo used the single value of the hash of the HEAD
235
+ /// commit as a `index_version`. This is technically correct but a little
236
+ /// too conservative. If a new commit is fetched all cached files need to
237
+ /// be regenerated even if a particular file was not changed.
238
+ ///
239
+ /// However if an old cargo has written such a file we still know how to
240
+ /// read it, as long as we check for that hash value.
241
+ ///
242
+ /// Cargo now uses a hash of the file's contents as provided by git.
178
243
fn load (
179
244
& mut self ,
180
245
_root : & Path ,
@@ -187,7 +252,8 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
187
252
// Check if the cache is valid.
188
253
let git_commit_hash = self . current_version ( ) ;
189
254
if index_version. is_some ( ) && index_version == git_commit_hash. as_deref ( ) {
190
- // This file was written by an old version of cargo, but it is still up-to-date.
255
+ // This file was written by an old version of cargo, but it is
256
+ // still up-to-date.
191
257
return Poll :: Ready ( Ok ( LoadResponse :: CacheValid ) ) ;
192
258
}
193
259
// Note that the index calls this method and the filesystem is locked
@@ -224,8 +290,8 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
224
290
match load_helper ( & self , path, index_version) {
225
291
Ok ( result) => Poll :: Ready ( Ok ( result) ) ,
226
292
Err ( _) if !self . is_updated ( ) => {
227
- // If git returns an error and we haven't updated the repo, return
228
- // pending to allow an update to try again.
293
+ // If git returns an error and we haven't updated the repo,
294
+ // return pending to allow an update to try again.
229
295
self . needs_update = true ;
230
296
Poll :: Pending
231
297
}
@@ -265,9 +331,6 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
265
331
266
332
self . needs_update = false ;
267
333
268
- // Make sure the index is only updated once per session since it is an
269
- // expensive operation. This generally only happens when the resolver
270
- // is run multiple times, such as during `cargo publish`.
271
334
if self . is_updated ( ) {
272
335
return Ok ( ( ) ) ;
273
336
}
@@ -321,8 +384,11 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
321
384
Ok ( ( ) )
322
385
}
323
386
387
+ /// Read the general concept for `invalidate_cache()` on
388
+ /// [`RegistryData::invalidate_cache`].
389
+ ///
390
+ /// To fully invalidate, undo [`RemoteRegistry::mark_updated`]'s work.
324
391
fn invalidate_cache ( & mut self ) {
325
- // To fully invalidate, undo `mark_updated`s work
326
392
self . needs_update = true ;
327
393
}
328
394
@@ -365,9 +431,10 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
365
431
}
366
432
}
367
433
434
+ /// Implemented to just be sure to drop `tree` field before our other fields.
435
+ /// See SAFETY inside [`RemoteRegistry::tree()`] for more.
368
436
impl < ' cfg > Drop for RemoteRegistry < ' cfg > {
369
437
fn drop ( & mut self ) {
370
- // Just be sure to drop this before our other fields
371
438
self . tree . borrow_mut ( ) . take ( ) ;
372
439
}
373
440
}
0 commit comments