Skip to content

Commit 0179ca7

Browse files
elfstromgitster
authored andcommitted
clean: improve performance when removing lots of directories
"git clean" uses resolve_gitlink_ref() to check for the presence of nested git repositories, but it has the drawback of creating a ref_cache entry for every directory that should potentially be cleaned. The linear search through the ref_cache list causes a massive performance hit for large number of directories. Modify clean.c:remove_dirs to use setup.c:is_git_directory and setup.c:read_gitfile_gently instead. Both these functions will open files and parse contents when they find something that looks like a git repository. This is ok from a performance standpoint since finding repository candidates should be comparatively rare. Using is_git_directory and read_gitfile_gently should give a more standardized check for what is and what isn't a git repository but also gives three behavioral changes. The first change is that we will now detect and avoid cleaning empty nested git repositories (only init run). This is desirable. Second, we will no longer die when cleaning a file named ".git" with garbage content (it will be cleaned instead). This is also desirable. The last change is that we will detect and avoid cleaning empty bare repositories that have been placed in a directory named ".git". This is not desirable but should have no real user impact since we already fail to clean non-empty bare repositories in the same scenario. This is thus deemed acceptable. On top of this we add some extra precautions. If read_gitfile_gently fails to open the git file, read the git file or verify the path in the git file we assume that the path with the git file is a valid repository and avoid cleaning. Update t7300 to reflect these changes in behavior. The time to clean an untracked directory containing 100000 sub directories went from 61s to 1.7s after this change. Helped-by: Jeff King <[email protected]> Signed-off-by: Erik Elfström <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent f49a565 commit 0179ca7

File tree

2 files changed

+30
-10
lines changed

2 files changed

+30
-10
lines changed

builtin/clean.c

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
#include "cache.h"
1111
#include "dir.h"
1212
#include "parse-options.h"
13-
#include "refs.h"
1413
#include "string-list.h"
1514
#include "quote.h"
1615
#include "column.h"
@@ -148,20 +147,43 @@ static int exclude_cb(const struct option *opt, const char *arg, int unset)
148147
return 0;
149148
}
150149

150+
/*
151+
* Return 1 if the given path is the root of a git repository or
152+
* submodule else 0. Will not return 1 for bare repositories with the
153+
* exception of creating a bare repository in "foo/.git" and calling
154+
* is_git_repository("foo").
155+
*/
156+
static int is_git_repository(struct strbuf *path)
157+
{
158+
int ret = 0;
159+
int gitfile_error;
160+
size_t orig_path_len = path->len;
161+
assert(orig_path_len != 0);
162+
if (path->buf[orig_path_len - 1] != '/')
163+
strbuf_addch(path, '/');
164+
strbuf_addstr(path, ".git");
165+
if (read_gitfile_gently(path->buf, &gitfile_error) || is_git_directory(path->buf))
166+
ret = 1;
167+
if (gitfile_error == READ_GITFILE_ERR_OPEN_FAILED ||
168+
gitfile_error == READ_GITFILE_ERR_READ_FAILED)
169+
ret = 1; /* This could be a real .git file, take the
170+
* safe option and avoid cleaning */
171+
strbuf_setlen(path, orig_path_len);
172+
return ret;
173+
}
174+
151175
static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag,
152176
int dry_run, int quiet, int *dir_gone)
153177
{
154178
DIR *dir;
155179
struct strbuf quoted = STRBUF_INIT;
156180
struct dirent *e;
157181
int res = 0, ret = 0, gone = 1, original_len = path->len, len;
158-
unsigned char submodule_head[20];
159182
struct string_list dels = STRING_LIST_INIT_DUP;
160183

161184
*dir_gone = 1;
162185

163-
if ((force_flag & REMOVE_DIR_KEEP_NESTED_GIT) &&
164-
!resolve_gitlink_ref(path->buf, "HEAD", submodule_head)) {
186+
if ((force_flag & REMOVE_DIR_KEEP_NESTED_GIT) && is_git_repository(path)) {
165187
if (!quiet) {
166188
quote_path_relative(path->buf, prefix, &quoted);
167189
printf(dry_run ? _(msg_would_skip_git_dir) : _(msg_skip_git_dir),

t/t7300-clean.sh

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ test_expect_success 'nested git work tree' '
455455
! test -d bar
456456
'
457457

458-
test_expect_failure 'should clean things that almost look like git but are not' '
458+
test_expect_success 'should clean things that almost look like git but are not' '
459459
rm -fr almost_git almost_bare_git almost_submodule &&
460460
mkdir -p almost_git/.git/objects &&
461461
mkdir -p almost_git/.git/refs &&
@@ -468,8 +468,6 @@ test_expect_failure 'should clean things that almost look like git but are not'
468468
garbage
469469
EOF
470470
test_when_finished "rm -rf almost_*" &&
471-
## This will fail due to die("Invalid gitfile format: %s", path); in
472-
## setup.c:read_gitfile.
473471
git clean -f -d &&
474472
test_path_is_missing almost_git &&
475473
test_path_is_missing almost_bare_git &&
@@ -501,7 +499,7 @@ test_expect_success 'should not clean submodules' '
501499
test_path_is_missing to_clean
502500
'
503501

504-
test_expect_failure 'should avoid cleaning possible submodules' '
502+
test_expect_success 'should avoid cleaning possible submodules' '
505503
rm -fr to_clean possible_sub1 &&
506504
mkdir to_clean possible_sub1 &&
507505
test_when_finished "rm -rf possible_sub*" &&
@@ -515,7 +513,7 @@ test_expect_failure 'should avoid cleaning possible submodules' '
515513
test_path_is_missing to_clean
516514
'
517515

518-
test_expect_failure 'nested (empty) git should be kept' '
516+
test_expect_success 'nested (empty) git should be kept' '
519517
rm -fr empty_repo to_clean &&
520518
git init empty_repo &&
521519
mkdir to_clean &&
@@ -537,7 +535,7 @@ test_expect_success 'nested bare repositories should be cleaned' '
537535
test_path_is_missing subdir
538536
'
539537

540-
test_expect_success 'nested (empty) bare repositories should be cleaned even when in .git' '
538+
test_expect_failure 'nested (empty) bare repositories should be cleaned even when in .git' '
541539
rm -fr strange_bare &&
542540
mkdir strange_bare &&
543541
git init --bare strange_bare/.git &&

0 commit comments

Comments
 (0)