diff --git a/Documentation/git-clone.txt b/Documentation/git-clone.txt index bf24f1813adc3d..a6e13666ea19cc 100644 --- a/Documentation/git-clone.txt +++ b/Documentation/git-clone.txt @@ -15,7 +15,8 @@ SYNOPSIS [--dissociate] [--separate-git-dir ] [--depth ] [--[no-]single-branch] [--no-tags] [--recurse-submodules[=]] [--[no-]shallow-submodules] - [--[no-]remote-submodules] [--jobs ] [--sparse] [--] + [--[no-]remote-submodules] [--jobs ] [--sparse] + [--partial[=]|--filter=] [--] [] DESCRIPTION @@ -162,6 +163,18 @@ objects from the source repository into a pack in the cloned repository. of the repository. The sparse-checkout file can be modified to grow the working directory as needed. +--partial[=]:: +--filter=:: + Use the partial clone feature and request that the server sends + a subset of reachable objects according to a given object filter. + When using `--filter`, the supplied `` is used for + the partial clone filter. When using `--partial` with no ``, + the `blob:none` filter is applied to filter all blobs. When using + `--partial=` the `blob:limit=` filter is applied to + filter all blobs with size larger than ``. For more details + on filter specifications, see the `--filter` option in + linkgit:git-rev-list[1]. + --mirror:: Set up a mirror of the source repository. This implies `--bare`. Compared to `--bare`, `--mirror` not only maps local branches of the @@ -297,6 +310,78 @@ or `--mirror` is given) for `host.xz:foo/.git`). Cloning into an existing directory is only allowed if the directory is empty. +Partial Clone +------------- + +By default, `git clone` will download every reachable object, including +every version of every file in the history of the repository. The +**partial clone** feature allows Git to transfer fewer objects and +request them from the remote only when they are needed, so some +reachable objects can be omitted from the initial `git clone` and +subsequent `git fetch` operations. + +To use the partial clone feature, you can run `git clone` with the +`--filter=` option. If you want to clone a repository +without download any blobs, the form `filter=blob:none` will omit all +the blobs. If the repository has some large blobs and you want to +prevent some large blobs being downloaded by an appropriate threshold, +the form `--filter=blob:limit=[kmg]`omits blobs larger than n bytes +or units (see linkgit:git-rev-list[1]). + +As mentioned before, a partially cloned repository may have to request +the missing objects when they are needed. So some 'local' commands may +fail without a network connection to the remote repository. + +For example, The contains two branches which names 'master' +and 'topic. Then, we clone the repository by + + $ git clone --filter=blob:none --no-checkout + +With the `--filter=blob:none` option Git will omit all the blobs and +the `--no-checkout` option Git will not perform a checkout of HEAD +after the clone is complete. Then, we check out the remote tracking +'topic' branch by + + $ git checkout -b topic origin/topic + +The output looks like + +------------ + remote: Enumerating objects: 1, done. + remote: Counting objects: 100% (1/1), done. + remote: Total 1 (delta 0), reused 0 (delta 0), pack-reused 0 + Receiving objects: 100% (1/1), 43 bytes | 43.00 KiB/s, done. + Branch 'topic' set up to track remote branch 'topic' from 'origin'. + Switched to a new branch 'topic' +------------ + +The output is a bit surprising but it shows how partial clone works. +When we check out the branch 'topic' Git will request the missing blobs +because they are needed. Then, We can switch back to branch 'master' by + + $ git checkout master + +This time the output looks like + +------------ + Switched to branch 'master' + Your branch is up to date with 'origin/master'. +------------ + +It shows that when we switch back to the previous location, the checkout +is done without a download because the repository has all the blobs that +were downloaded previously. + +`git log` may also make a surprise with partial clones. `git log +-- ` will not cause downloads with the blob filters, because +it's only reading commits and trees. In addition to any options that +require git to look at the contents of blobs, like "-p" and "--stat" +, options that cause git to report pathnames, like "--summary" and +"--raw", will trigger lazy/on-demand fetching of blobs, as they are +needed to detect inexact renames. + +linkgit:partial-clone[1] + :git-clone: 1 include::urls.txt[] diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 256bcfbdfe666d..a71716ef75e161 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -270,6 +270,24 @@ int opt_parse_list_objects_filter(const struct option *opt, return 0; } +int opt_set_blob_none_filter(const struct option *opt, + const char *arg, int unset) +{ + struct strbuf filter_arg = STRBUF_INIT; + struct list_objects_filter_options *filter_options = opt->value; + + if (unset || !arg || !strcmp(arg, "0")) { + parse_list_objects_filter(filter_options, "blob:none"); + return 0; + } + + strbuf_addf(&filter_arg, "blob:limit=%s", arg); + parse_list_objects_filter(filter_options, filter_arg.buf); + strbuf_release(&filter_arg); + + return 0; +} + const char *list_objects_filter_spec(struct list_objects_filter_options *filter) { if (!filter->filter_spec.nr) diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index 2ffb39222c4974..ac38ffcbe86857 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -62,6 +62,7 @@ struct list_objects_filter_options { /* Normalized command line arguments */ #define CL_ARG__FILTER "filter" +#define CL_ARG__PARTIAL "partial" void list_objects_filter_die_if_populated( struct list_objects_filter_options *filter_options); @@ -80,11 +81,16 @@ void parse_list_objects_filter( int opt_parse_list_objects_filter(const struct option *opt, const char *arg, int unset); +int opt_set_blob_none_filter(const struct option *opt, + const char *arg, int unset); #define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \ { OPTION_CALLBACK, 0, CL_ARG__FILTER, fo, N_("args"), \ N_("object filtering"), 0, \ - opt_parse_list_objects_filter } + opt_parse_list_objects_filter }, \ + { OPTION_CALLBACK, 0, CL_ARG__PARTIAL, fo, N_("size"), \ + N_("partial clone with blob filter"), \ + PARSE_OPT_OPTARG | PARSE_OPT_NONEG , opt_set_blob_none_filter } /* * Translates abbreviated numbers in the filter's filter_spec into their diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 77bb91e9769227..c42cef612963f2 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -33,17 +33,39 @@ test_expect_success 'setup bare clone for server' ' # confirm we are missing all of the known blobs. # confirm partial clone was registered in the local config. test_expect_success 'do partial clone 1' ' - git clone --no-checkout --filter=blob:none "file://$(pwd)/srv.bare" pc1 && - - git -C pc1 rev-list --quiet --objects --missing=print HEAD >revs && - awk -f print_1.awk revs | - sed "s/?//" | - sort >observed.oids && + for option in "--filter=blob:none" "--partial" + do + rm -rf pc1 && + git clone --no-checkout "$option" "file://$(pwd)/srv.bare" pc1 && + + git -C pc1 rev-list --quiet --objects --missing=print HEAD >revs && + awk -f print_1.awk revs | + sed "s/?//" | + sort >observed.oids && + + test_cmp expect_1.oids observed.oids && + test "$(git -C pc1 config --local core.repositoryformatversion)" = "1" && + test "$(git -C pc1 config --local remote.origin.promisor)" = "true" && + test "$(git -C pc1 config --local remote.origin.partialclonefilter)" = "blob:none" + done +' - test_cmp expect_1.oids observed.oids && - test "$(git -C pc1 config --local core.repositoryformatversion)" = "1" && - test "$(git -C pc1 config --local remote.origin.promisor)" = "true" && - test "$(git -C pc1 config --local remote.origin.partialclonefilter)" = "blob:none" +test_expect_success 'do partial clone with size limit' ' + for option in "--filter=blob:limit=1" "--partial=1" + do + rm -rf pc-limit && + git clone --no-checkout "$option" "file://$(pwd)/srv.bare" pc-limit && + + git -C pc-limit rev-list --quiet --objects --missing=print HEAD >revs && + awk -f print_1.awk revs | + sed "s/?//" | + sort >observed.oids && + + test_cmp expect_1.oids observed.oids && + test "$(git -C pc-limit config --local core.repositoryformatversion)" = "1" && + test "$(git -C pc-limit config --local remote.origin.promisor)" = "true" && + test "$(git -C pc-limit config --local remote.origin.partialclonefilter)" = "blob:limit=1" + done ' test_expect_success 'verify that .promisor file contains refs fetched' '