Skip to content

Commit 253c3aa

Browse files
committed
Merge branch 'scalar-with-gvfs'
Prepare `scalar` to use the GVFS protocol instead of partial clone (required to support Azure Repos). Signed-off-by: Johannes Schindelin <[email protected]>
2 parents a191446 + 02fe83f commit 253c3aa

File tree

16 files changed

+1267
-54
lines changed

16 files changed

+1267
-54
lines changed

Documentation/scalar.adoc

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,17 @@ SYNOPSIS
99
--------
1010
[verse]
1111
scalar clone [--single-branch] [--branch <main-branch>] [--full-clone]
12-
[--[no-]src] [--[no-]tags] [--[no-]maintenance] <url> [<enlistment>]
12+
[--[no-]src] [--[no-]tags] [--[no-]maintenance]
13+
[--[no-]src] [--local-cache-path <path>] [--cache-server-url <url>]
14+
<url> [<enlistment>]
1315
scalar list
1416
scalar register [--[no-]maintenance] [<enlistment>]
1517
scalar unregister [<enlistment>]
1618
scalar run ( all | config | commit-graph | fetch | loose-objects | pack-files ) [<enlistment>]
1719
scalar reconfigure [--maintenance=(enable|disable|keep)] [ --all | <enlistment> ]
1820
scalar diagnose [<enlistment>]
1921
scalar delete <enlistment>
22+
scalar cache-server ( --get | --set <url> | --list [<remote>] ) [<enlistment>]
2023

2124
DESCRIPTION
2225
-----------
@@ -102,6 +105,37 @@ cloning. If the HEAD at the remote did not point at any branch when
102105
background maintenance feature. Use the `--no-maintenance` to skip
103106
this configuration.
104107

108+
--local-cache-path <path>::
109+
Override the path to the local cache root directory; Pre-fetched objects
110+
are stored into a repository-dependent subdirectory of that path.
111+
+
112+
The default is `<drive>:\.scalarCache` on Windows (on the same drive as the
113+
clone), and `~/.scalarCache` on macOS.
114+
115+
--cache-server-url <url>::
116+
Retrieve missing objects from the specified remote, which is expected to
117+
understand the GVFS protocol.
118+
119+
--[no-]gvfs-protocol::
120+
When cloning from a `<url>` with either `dev.azure.com` or
121+
`visualstudio.com` in the name, `scalar clone` will attempt to use the GVFS
122+
Protocol to access Git objects, specifically from a cache server when
123+
available, and will fail to clone if there is an error over that protocol.
124+
125+
To enable the GVFS Protocol regardless of the origin `<url>`, use
126+
`--gvfs-protocol`. This will cause `scalar clone` to fail when the origin
127+
server fails to provide a valid response to the `gvfs/config` endpoint.
128+
129+
To disable the GVFS Protocol, use `--no-gvfs-protocol` and `scalar clone`
130+
will only use the Git protocol, starting with a partial clone. This can be
131+
helpful if your `<url>` points to Azure Repos but the repository does not
132+
have GVFS cache servers enabled. It is likely more efficient to use its
133+
partial clone functionality through the Git protocol.
134+
135+
Previous versions of `scalar clone` could fall back to a partial clone over
136+
the Git protocol if there is any issue gathering GVFS configuration
137+
information from the origin server.
138+
105139
List
106140
~~~~
107141

@@ -191,6 +225,27 @@ delete <enlistment>::
191225
This subcommand lets you delete an existing Scalar enlistment from your
192226
local file system, unregistering the repository.
193227

228+
Cache-server
229+
~~~~~~~~~~~~
230+
231+
cache-server ( --get | --set <url> | --list [<remote>] ) [<enlistment>]::
232+
This command lets you query or set the GVFS-enabled cache server used
233+
to fetch missing objects.
234+
235+
--get::
236+
This is the default command mode: query the currently-configured cache
237+
server URL, if any.
238+
239+
--list::
240+
Access the `gvfs/info` endpoint of the specified remote (default:
241+
`origin`) to figure out which cache servers are available, if any.
242+
+
243+
In contrast to the `--get` command mode (which only accesses the local
244+
repository), this command mode triggers a request via the network that
245+
potentially requires authentication. If authentication is required, the
246+
configured credential helper is employed (see linkgit:git-credential[1]
247+
for details).
248+
194249
SEE ALSO
195250
--------
196251
linkgit:git-clone[1], linkgit:git-maintenance[1].

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2831,6 +2831,7 @@ GIT_OBJS += git.o
28312831
.PHONY: git-objs
28322832
git-objs: $(GIT_OBJS)
28332833

2834+
SCALAR_OBJS := json-parser.o
28342835
SCALAR_OBJS += scalar.o
28352836
.PHONY: scalar-objs
28362837
scalar-objs: $(SCALAR_OBJS)
@@ -2986,7 +2987,7 @@ $(REMOTE_CURL_PRIMARY): remote-curl.o http.o http-walker.o $(LAZYLOAD_LIBCURL_OB
29862987
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
29872988
$(CURL_LIBCURL) $(EXPAT_LIBEXPAT) $(LIBS)
29882989

2989-
scalar$X: scalar.o GIT-LDFLAGS $(GITLIBS)
2990+
scalar$X: $(SCALAR_OBJS) GIT-LDFLAGS $(GITLIBS)
29902991
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) \
29912992
$(filter %.o,$^) $(LIBS)
29922993

contrib/buildsystems/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -804,7 +804,7 @@ target_link_libraries(git-sh-i18n--envsubst common-main)
804804
add_executable(git-shell ${CMAKE_SOURCE_DIR}/shell.c)
805805
target_link_libraries(git-shell common-main)
806806

807-
add_executable(scalar ${CMAKE_SOURCE_DIR}/scalar.c)
807+
add_executable(scalar ${CMAKE_SOURCE_DIR}/scalar.c ${CMAKE_SOURCE_DIR}/json-parser.c)
808808
target_link_libraries(scalar common-main)
809809

810810
if(CURL_FOUND)

contrib/scalar/docs/getting-started.md

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ Creating a new Scalar clone
1818
---------------------------------------------------
1919

2020
The `clone` verb creates a local enlistment of a remote repository using the
21-
partial clone feature available e.g. on GitHub.
22-
21+
partial clone feature available e.g. on GitHub, or using the
22+
[GVFS protocol](https://github.com/microsoft/VFSForGit/blob/HEAD/Protocol.md),
23+
such as Azure Repos.
2324

2425
```
2526
scalar clone [options] <url> [<dir>]
@@ -68,11 +69,26 @@ in `<path>`.
6869
These options allow a user to customize their initial enlistment.
6970

7071
* `--full-clone`: If specified, do not initialize the sparse-checkout feature.
71-
All files will be present in your `src` directory. This uses a Git partial
72-
clone: blobs are downloaded on demand.
72+
All files will be present in your `src` directory. This behaves very similar
73+
to a Git partial clone in that blobs are downloaded on demand. However, it
74+
will use the GVFS protocol to download all Git objects.
75+
76+
* `--cache-server-url=<url>`: If specified, set the intended cache server to
77+
the specified `<url>`. All object queries will use the GVFS protocol to this
78+
`<url>` instead of the origin remote. If the remote supplies a list of
79+
cache servers via the `<url>/gvfs/config` endpoint, then the `clone` command
80+
will select a nearby cache server from that list.
7381

7482
* `--branch=<ref>`: Specify the branch to checkout after clone.
7583

84+
* `--local-cache-path=<path>`: Use this option to override the path for the
85+
local Scalar cache. If not specified, then Scalar will select a default
86+
path to share objects with your other enlistments. On Windows, this path
87+
is a subdirectory of `<Volume>:\.scalarCache\`. On Mac, this path is a
88+
subdirectory of `~/.scalarCache/`. The default cache path is recommended so
89+
multiple enlistments of the same remote repository share objects on the
90+
same device.
91+
7692
### Advanced Options
7793

7894
The options below are not intended for use by a typical user. These are

contrib/scalar/docs/index.md

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,14 @@ these features for that repo (except partial clone) and start running suggested
2828
maintenance in the background using
2929
[the `git maintenance` feature](https://git-scm.com/docs/git-maintenance).
3030

31-
Repos cloned with the `scalar clone` command use partial clone to significantly
32-
reduce the amount of data required to get started using a repository. By
33-
delaying all blob downloads until they are required, Scalar allows you to work
34-
with very large repositories quickly.
31+
Repos cloned with the `scalar clone` command use partial clone or the
32+
[GVFS protocol](https://github.com/microsoft/VFSForGit/blob/HEAD/Protocol.md)
33+
to significantly reduce the amount of data required to get started
34+
using a repository. By delaying all blob downloads until they are required,
35+
Scalar allows you to work with very large repositories quickly. The GVFS
36+
protocol allows a network of _cache servers_ to serve objects with lower
37+
latency and higher throughput. The cache servers also reduce load on the
38+
central server.
3539

3640
Documentation
3741
-------------
@@ -42,7 +46,7 @@ Documentation
4246

4347
* [Troubleshooting](troubleshooting.md):
4448
Collect diagnostic information or update custom settings. Includes
45-
`scalar diagnose`.
49+
`scalar diagnose` and `scalar cache-server`.
4650

4751
* [The Philosophy of Scalar](philosophy.md): Why does Scalar work the way
4852
it does, and how do we make decisions about its future?

contrib/scalar/docs/philosophy.md

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,27 @@ Scalar only to configure those new settings. In particular, we ported
1313
features like background maintenance to Git to make Scalar simpler and
1414
make Git more powerful.
1515

16-
Services such as GitHub support partial clone , a standard adopted by the Git
17-
project to download only part of the Git objects when cloning, and fetching
18-
further objects on demand. If your hosting service supports partial clone, then
19-
we absolutely recommend it as a way to greatly speed up your clone and fetch
20-
times and to reduce how much disk space your Git repository requires. Scalar
21-
will help with this!
16+
Scalar ships inside [a custom version of Git][microsoft-git], but we are
17+
working to make it available in other forks of Git. The only feature
18+
that is not intended to ever reach the standard Git client is Scalar's use
19+
of [the GVFS Protocol][gvfs-protocol], which is essentially an older
20+
version of [Git's partial clone feature](https://github.blog/2020-12-21-get-up-to-speed-with-partial-clone-and-shallow-clone/)
21+
that was available first in Azure Repos. Services such as GitHub support
22+
only partial clone instead of the GVFS protocol because that is the
23+
standard adopted by the Git project. If your hosting service supports
24+
partial clone, then we absolutely recommend it as a way to greatly speed
25+
up your clone and fetch times and to reduce how much disk space your Git
26+
repository requires. Scalar will help with this!
2227

23-
Most of the value of Scalar can be found in the core Git client. However, most
24-
of the advanced features that really optimize Git's performance are off by
25-
default for compatibility reasons. To really take advantage of Git's latest and
26-
greatest features, you either need to study the [`git config`
27-
documentation](https://git-scm.com/docs/git-config) and regularly read [the Git
28-
release notes](https://github.com/git/git/tree/master/Documentation/RelNotes).
28+
If you don't use the GVFS Protocol, then most of the value of Scalar can
29+
be found in the core Git client. However, most of the advanced features
30+
that really optimize Git's performance are off by default for compatibility
31+
reasons. To really take advantage of Git's latest and greatest features,
32+
you either need to study the [`git config` documentation](https://git-scm.com/docs/git-config)
33+
and regularly read [the Git release notes](https://github.com/git/git/tree/master/Documentation/RelNotes).
2934
Even if you do all that work and customize your Git settings on your machines,
30-
you likely will want to share those settings with other team members. Or, you
31-
can just use Scalar!
35+
you likely will want to share those settings with other team members.
36+
Or, you can just use Scalar!
3237

3338
Using `scalar register` on an existing Git repository will give you these
3439
benefits:

contrib/scalar/docs/troubleshooting.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,23 @@ files for that repository. This includes:
1818

1919
As the `diagnose` command completes, it provides the path of the resulting
2020
zip file. This zip can be attached to bug reports to make the analysis easier.
21+
22+
Modifying Configuration Values
23+
------------------------------
24+
25+
The Scalar-specific configuration is only available for repos using the
26+
GVFS protocol.
27+
28+
### Cache Server URL
29+
30+
When using an enlistment cloned with `scalar clone` and the GVFS protocol,
31+
you will have a value called the cache server URL. Cache servers are a feature
32+
of the GVFS protocol to provide low-latency access to the on-demand object
33+
requests. This modifies the `gvfs.cache-server` setting in your local Git config
34+
file.
35+
36+
Run `scalar cache-server --get` to see the current cache server.
37+
38+
Run `scalar cache-server --list` to see the available cache server URLs.
39+
40+
Run `scalar cache-server --set=<url>` to set your cache server to `<url>`.

diagnose.c

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "parse-options.h"
1313
#include "repository.h"
1414
#include "write-or-die.h"
15+
#include "config.h"
1516

1617
struct archive_dir {
1718
const char *path;
@@ -71,6 +72,39 @@ static int dir_file_stats(struct object_directory *object_dir, void *data)
7172
return 0;
7273
}
7374

75+
static void dir_stats(struct strbuf *buf, const char *path)
76+
{
77+
DIR *dir = opendir(path);
78+
struct dirent *e;
79+
struct stat e_stat;
80+
struct strbuf file_path = STRBUF_INIT;
81+
size_t base_path_len;
82+
83+
if (!dir)
84+
return;
85+
86+
strbuf_addstr(buf, "Contents of ");
87+
strbuf_add_absolute_path(buf, path);
88+
strbuf_addstr(buf, ":\n");
89+
90+
strbuf_add_absolute_path(&file_path, path);
91+
strbuf_addch(&file_path, '/');
92+
base_path_len = file_path.len;
93+
94+
while ((e = readdir(dir)) != NULL)
95+
if (!is_dot_or_dotdot(e->d_name) && e->d_type == DT_REG) {
96+
strbuf_setlen(&file_path, base_path_len);
97+
strbuf_addstr(&file_path, e->d_name);
98+
if (!stat(file_path.buf, &e_stat))
99+
strbuf_addf(buf, "%-70s %16"PRIuMAX"\n",
100+
e->d_name,
101+
(uintmax_t)e_stat.st_size);
102+
}
103+
104+
strbuf_release(&file_path);
105+
closedir(dir);
106+
}
107+
74108
static int count_files(struct strbuf *path)
75109
{
76110
DIR *dir = opendir(path->buf);
@@ -185,7 +219,8 @@ int create_diagnostics_archive(struct repository *r,
185219
struct strvec archiver_args = STRVEC_INIT;
186220
char **argv_copy = NULL;
187221
int stdout_fd = -1, archiver_fd = -1;
188-
struct strbuf buf = STRBUF_INIT;
222+
char *cache_server_url = NULL, *shared_cache = NULL;
223+
struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT;
189224
int res;
190225
struct archive_dir archive_dirs[] = {
191226
{ ".git", 0 },
@@ -220,6 +255,13 @@ int create_diagnostics_archive(struct repository *r,
220255
get_version_info(&buf, 1);
221256

222257
strbuf_addf(&buf, "Repository root: %s\n", r->worktree);
258+
259+
repo_config_get_string(r, "gvfs.cache-server", &cache_server_url);
260+
repo_config_get_string(r, "gvfs.sharedCache", &shared_cache);
261+
strbuf_addf(&buf, "Cache Server: %s\nLocal Cache: %s\n\n",
262+
cache_server_url ? cache_server_url : "None",
263+
shared_cache ? shared_cache : "None");
264+
223265
get_disk_info(&buf);
224266
write_or_die(stdout_fd, buf.buf, buf.len);
225267
strvec_pushf(&archiver_args,
@@ -250,6 +292,52 @@ int create_diagnostics_archive(struct repository *r,
250292
}
251293
}
252294

295+
if (shared_cache) {
296+
size_t path_len;
297+
298+
strbuf_reset(&buf);
299+
strbuf_addf(&path, "%s/pack", shared_cache);
300+
strbuf_reset(&buf);
301+
strbuf_addstr(&buf, "--add-virtual-file=packs-cached.txt:");
302+
dir_stats(&buf, path.buf);
303+
strvec_push(&archiver_args, buf.buf);
304+
305+
strbuf_reset(&buf);
306+
strbuf_addstr(&buf, "--add-virtual-file=objects-cached.txt:");
307+
loose_objs_stats(&buf, shared_cache);
308+
strvec_push(&archiver_args, buf.buf);
309+
310+
strbuf_reset(&path);
311+
strbuf_addf(&path, "%s/info", shared_cache);
312+
path_len = path.len;
313+
314+
if (is_directory(path.buf)) {
315+
DIR *dir = opendir(path.buf);
316+
struct dirent *e;
317+
318+
while ((e = readdir(dir))) {
319+
if (!strcmp(".", e->d_name) || !strcmp("..", e->d_name))
320+
continue;
321+
if (e->d_type == DT_DIR)
322+
continue;
323+
324+
strbuf_reset(&buf);
325+
strbuf_addf(&buf, "--add-virtual-file=info/%s:", e->d_name);
326+
327+
strbuf_setlen(&path, path_len);
328+
strbuf_addch(&path, '/');
329+
strbuf_addstr(&path, e->d_name);
330+
331+
if (strbuf_read_file(&buf, path.buf, 0) < 0) {
332+
res = error_errno(_("could not read '%s'"), path.buf);
333+
goto diagnose_cleanup;
334+
}
335+
strvec_push(&archiver_args, buf.buf);
336+
}
337+
closedir(dir);
338+
}
339+
}
340+
253341
strvec_pushl(&archiver_args, "--prefix=",
254342
oid_to_hex(r->hash_algo->empty_tree), "--", NULL);
255343

@@ -277,6 +365,8 @@ int create_diagnostics_archive(struct repository *r,
277365
free(argv_copy);
278366
strvec_clear(&archiver_args);
279367
strbuf_release(&buf);
368+
free(cache_server_url);
369+
free(shared_cache);
280370

281371
return res;
282372
}

dir.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3231,6 +3231,8 @@ static int cmp_icase(char a, char b)
32313231
{
32323232
if (a == b)
32333233
return 0;
3234+
if (is_dir_sep(a))
3235+
return is_dir_sep(b) ? 0 : -1;
32343236
if (ignore_case)
32353237
return toupper(a) - toupper(b);
32363238
return a - b;

0 commit comments

Comments
 (0)