Skip to content

Commit 11e5d0a

Browse files
committed
Merge branch 'jt/grep-wo-submodule-odb-as-alternate'
The code to make "git grep" recurse into submodules has been updated to migrate away from the "add submodule's object store as an alternate object store" mechanism (which is suboptimal). * jt/grep-wo-submodule-odb-as-alternate: t7814: show lack of alternate ODB-adding submodule-config: pass repo upon blob config read grep: add repository to OID grep sources grep: allocate subrepos on heap grep: read submodule entry with explicit repo grep: typesafe versions of grep_source_init grep: use submodule-ODB-as-alternate lazy-addition submodule: lazily add submodule ODBs as alternates
2 parents 0649303 + 18a2f66 commit 11e5d0a

File tree

11 files changed

+161
-55
lines changed

11 files changed

+161
-55
lines changed

builtin/grep.c

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ static int todo_done;
6565
/* Has all work items been added? */
6666
static int all_work_added;
6767

68+
static struct repository **repos_to_free;
69+
static size_t repos_to_free_nr, repos_to_free_alloc;
70+
6871
/* This lock protects all the variables above. */
6972
static pthread_mutex_t grep_mutex;
7073

@@ -168,6 +171,19 @@ static void work_done(struct work_item *w)
168171
grep_unlock();
169172
}
170173

174+
static void free_repos(void)
175+
{
176+
int i;
177+
178+
for (i = 0; i < repos_to_free_nr; i++) {
179+
repo_clear(repos_to_free[i]);
180+
free(repos_to_free[i]);
181+
}
182+
FREE_AND_NULL(repos_to_free);
183+
repos_to_free_nr = 0;
184+
repos_to_free_alloc = 0;
185+
}
186+
171187
static void *run(void *arg)
172188
{
173189
int hit = 0;
@@ -333,7 +349,7 @@ static int grep_oid(struct grep_opt *opt, const struct object_id *oid,
333349
struct grep_source gs;
334350

335351
grep_source_name(opt, filename, tree_name_len, &pathbuf);
336-
grep_source_init(&gs, GREP_SOURCE_OID, pathbuf.buf, path, oid);
352+
grep_source_init_oid(&gs, pathbuf.buf, path, oid, opt->repo);
337353
strbuf_release(&pathbuf);
338354

339355
if (num_threads > 1) {
@@ -359,7 +375,7 @@ static int grep_file(struct grep_opt *opt, const char *filename)
359375
struct grep_source gs;
360376

361377
grep_source_name(opt, filename, 0, &buf);
362-
grep_source_init(&gs, GREP_SOURCE_FILE, buf.buf, filename, filename);
378+
grep_source_init_file(&gs, buf.buf, filename);
363379
strbuf_release(&buf);
364380

365381
if (num_threads > 1) {
@@ -415,19 +431,24 @@ static int grep_submodule(struct grep_opt *opt,
415431
const struct object_id *oid,
416432
const char *filename, const char *path, int cached)
417433
{
418-
struct repository subrepo;
434+
struct repository *subrepo;
419435
struct repository *superproject = opt->repo;
420436
const struct submodule *sub;
421437
struct grep_opt subopt;
422-
int hit;
438+
int hit = 0;
423439

424440
sub = submodule_from_path(superproject, null_oid(), path);
425441

426442
if (!is_submodule_active(superproject, path))
427443
return 0;
428444

429-
if (repo_submodule_init(&subrepo, superproject, sub))
445+
subrepo = xmalloc(sizeof(*subrepo));
446+
if (repo_submodule_init(subrepo, superproject, sub)) {
447+
free(subrepo);
430448
return 0;
449+
}
450+
ALLOC_GROW(repos_to_free, repos_to_free_nr + 1, repos_to_free_alloc);
451+
repos_to_free[repos_to_free_nr++] = subrepo;
431452

432453
/*
433454
* NEEDSWORK: repo_read_gitmodules() might call
@@ -438,53 +459,49 @@ static int grep_submodule(struct grep_opt *opt,
438459
* subrepo's odbs to the in-memory alternates list.
439460
*/
440461
obj_read_lock();
441-
repo_read_gitmodules(&subrepo, 0);
462+
repo_read_gitmodules(subrepo, 0);
442463

443464
/*
444-
* NEEDSWORK: This adds the submodule's object directory to the list of
445-
* alternates for the single in-memory object store. This has some bad
446-
* consequences for memory (processed objects will never be freed) and
447-
* performance (this increases the number of pack files git has to pay
448-
* attention to, to the sum of the number of pack files in all the
449-
* repositories processed so far). This can be removed once the object
450-
* store is no longer global and instead is a member of the repository
451-
* object.
465+
* All code paths tested by test code no longer need submodule ODBs to
466+
* be added as alternates, but add it to the list just in case.
467+
* Submodule ODBs added through add_submodule_odb_by_path() will be
468+
* lazily registered as alternates when needed (and except in an
469+
* unexpected code interaction, it won't be needed).
452470
*/
453-
add_to_alternates_memory(subrepo.objects->odb->path);
471+
add_submodule_odb_by_path(subrepo->objects->odb->path);
454472
obj_read_unlock();
455473

456474
memcpy(&subopt, opt, sizeof(subopt));
457-
subopt.repo = &subrepo;
475+
subopt.repo = subrepo;
458476

459477
if (oid) {
460-
struct object *object;
478+
enum object_type object_type;
461479
struct tree_desc tree;
462480
void *data;
463481
unsigned long size;
464482
struct strbuf base = STRBUF_INIT;
465483

466484
obj_read_lock();
467-
object = parse_object_or_die(oid, NULL);
485+
object_type = oid_object_info(subrepo, oid, NULL);
468486
obj_read_unlock();
469-
data = read_object_with_reference(&subrepo,
470-
&object->oid, tree_type,
487+
data = read_object_with_reference(subrepo,
488+
oid, tree_type,
471489
&size, NULL);
472490
if (!data)
473-
die(_("unable to read tree (%s)"), oid_to_hex(&object->oid));
491+
die(_("unable to read tree (%s)"), oid_to_hex(oid));
474492

475493
strbuf_addstr(&base, filename);
476494
strbuf_addch(&base, '/');
477495

478496
init_tree_desc(&tree, data, size);
479497
hit = grep_tree(&subopt, pathspec, &tree, &base, base.len,
480-
object->type == OBJ_COMMIT);
498+
object_type == OBJ_COMMIT);
481499
strbuf_release(&base);
482500
free(data);
483501
} else {
484502
hit = grep_cache(&subopt, pathspec, cached);
485503
}
486504

487-
repo_clear(&subrepo);
488505
return hit;
489506
}
490507

@@ -1182,5 +1199,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
11821199
run_pager(&opt, prefix);
11831200
clear_pathspec(&pathspec);
11841201
free_grep_patterns(&opt);
1202+
free_repos();
11851203
return !hit;
11861204
}

config.c

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1796,6 +1796,7 @@ int git_config_from_mem(config_fn_t fn,
17961796

17971797
int git_config_from_blob_oid(config_fn_t fn,
17981798
const char *name,
1799+
struct repository *repo,
17991800
const struct object_id *oid,
18001801
void *data)
18011802
{
@@ -1804,7 +1805,7 @@ int git_config_from_blob_oid(config_fn_t fn,
18041805
unsigned long size;
18051806
int ret;
18061807

1807-
buf = read_object_file(oid, &type, &size);
1808+
buf = repo_read_object_file(repo, oid, &type, &size);
18081809
if (!buf)
18091810
return error(_("unable to load config blob object '%s'"), name);
18101811
if (type != OBJ_BLOB) {
@@ -1820,14 +1821,15 @@ int git_config_from_blob_oid(config_fn_t fn,
18201821
}
18211822

18221823
static int git_config_from_blob_ref(config_fn_t fn,
1824+
struct repository *repo,
18231825
const char *name,
18241826
void *data)
18251827
{
18261828
struct object_id oid;
18271829

1828-
if (get_oid(name, &oid) < 0)
1830+
if (repo_get_oid(repo, name, &oid) < 0)
18291831
return error(_("unable to resolve config blob '%s'"), name);
1830-
return git_config_from_blob_oid(fn, name, &oid, data);
1832+
return git_config_from_blob_oid(fn, name, repo, &oid, data);
18311833
}
18321834

18331835
char *git_system_config(void)
@@ -1958,12 +1960,16 @@ int config_with_options(config_fn_t fn, void *data,
19581960
* If we have a specific filename, use it. Otherwise, follow the
19591961
* regular lookup sequence.
19601962
*/
1961-
if (config_source && config_source->use_stdin)
1963+
if (config_source && config_source->use_stdin) {
19621964
return git_config_from_stdin(fn, data);
1963-
else if (config_source && config_source->file)
1965+
} else if (config_source && config_source->file) {
19641966
return git_config_from_file(fn, config_source->file, data);
1965-
else if (config_source && config_source->blob)
1966-
return git_config_from_blob_ref(fn, config_source->blob, data);
1967+
} else if (config_source && config_source->blob) {
1968+
struct repository *repo = config_source->repo ?
1969+
config_source->repo : the_repository;
1970+
return git_config_from_blob_ref(fn, repo, config_source->blob,
1971+
data);
1972+
}
19671973

19681974
return do_git_config_sequence(opts, fn, data);
19691975
}

config.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ const char *config_scope_name(enum config_scope scope);
4949
struct git_config_source {
5050
unsigned int use_stdin:1;
5151
const char *file;
52+
/* The repository if blob is not NULL; leave blank for the_repository */
53+
struct repository *repo;
5254
const char *blob;
5355
enum config_scope scope;
5456
};
@@ -136,6 +138,7 @@ int git_config_from_mem(config_fn_t fn,
136138
const char *buf, size_t len,
137139
void *data, const struct config_options *opts);
138140
int git_config_from_blob_oid(config_fn_t fn, const char *name,
141+
struct repository *repo,
139142
const struct object_id *oid, void *data);
140143
void git_config_push_parameter(const char *text);
141144
void git_config_push_env(const char *spec);

grep.c

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1825,43 +1825,55 @@ int grep_source(struct grep_opt *opt, struct grep_source *gs)
18251825
return grep_source_1(opt, gs, 0);
18261826
}
18271827

1828+
static void grep_source_init_buf(struct grep_source *gs, char *buf,
1829+
unsigned long size)
1830+
{
1831+
gs->type = GREP_SOURCE_BUF;
1832+
gs->name = NULL;
1833+
gs->path = NULL;
1834+
gs->buf = buf;
1835+
gs->size = size;
1836+
gs->driver = NULL;
1837+
gs->identifier = NULL;
1838+
}
1839+
18281840
int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
18291841
{
18301842
struct grep_source gs;
18311843
int r;
18321844

1833-
grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL);
1834-
gs.buf = buf;
1835-
gs.size = size;
1845+
grep_source_init_buf(&gs, buf, size);
18361846

18371847
r = grep_source(opt, &gs);
18381848

18391849
grep_source_clear(&gs);
18401850
return r;
18411851
}
18421852

1843-
void grep_source_init(struct grep_source *gs, enum grep_source_type type,
1844-
const char *name, const char *path,
1845-
const void *identifier)
1853+
void grep_source_init_file(struct grep_source *gs, const char *name,
1854+
const char *path)
18461855
{
1847-
gs->type = type;
1856+
gs->type = GREP_SOURCE_FILE;
18481857
gs->name = xstrdup_or_null(name);
18491858
gs->path = xstrdup_or_null(path);
18501859
gs->buf = NULL;
18511860
gs->size = 0;
18521861
gs->driver = NULL;
1862+
gs->identifier = xstrdup(path);
1863+
}
18531864

1854-
switch (type) {
1855-
case GREP_SOURCE_FILE:
1856-
gs->identifier = xstrdup(identifier);
1857-
break;
1858-
case GREP_SOURCE_OID:
1859-
gs->identifier = oiddup(identifier);
1860-
break;
1861-
case GREP_SOURCE_BUF:
1862-
gs->identifier = NULL;
1863-
break;
1864-
}
1865+
void grep_source_init_oid(struct grep_source *gs, const char *name,
1866+
const char *path, const struct object_id *oid,
1867+
struct repository *repo)
1868+
{
1869+
gs->type = GREP_SOURCE_OID;
1870+
gs->name = xstrdup_or_null(name);
1871+
gs->path = xstrdup_or_null(path);
1872+
gs->buf = NULL;
1873+
gs->size = 0;
1874+
gs->driver = NULL;
1875+
gs->identifier = oiddup(oid);
1876+
gs->repo = repo;
18651877
}
18661878

18671879
void grep_source_clear(struct grep_source *gs)
@@ -1890,7 +1902,8 @@ static int grep_source_load_oid(struct grep_source *gs)
18901902
{
18911903
enum object_type type;
18921904

1893-
gs->buf = read_object_file(gs->identifier, &type, &gs->size);
1905+
gs->buf = repo_read_object_file(gs->repo, gs->identifier, &type,
1906+
&gs->size);
18941907
if (!gs->buf)
18951908
return error(_("'%s': unable to read %s"),
18961909
gs->name,

grep.h

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,20 @@ struct grep_opt {
120120
struct grep_pat *header_list;
121121
struct grep_pat **header_tail;
122122
struct grep_expr *pattern_expression;
123+
124+
/*
125+
* NEEDSWORK: See if we can remove this field, because the repository
126+
* should probably be per-source. That is, grep.c functions using this
127+
* field should probably start using "repo" in "struct grep_source"
128+
* instead.
129+
*
130+
* This is potentially the cause of at least one bug - "git grep"
131+
* ignoring the textconv attributes from submodules. See [1] for more
132+
* information.
133+
* [1] https://lore.kernel.org/git/CAHd-oW5iEQarYVxEXoTG-ua2zdoybTrSjCBKtO0YT292fm0NQQ@mail.gmail.com/
134+
*/
123135
struct repository *repo;
136+
124137
const char *prefix;
125138
int prefix_length;
126139
regex_t regexp;
@@ -187,6 +200,7 @@ struct grep_source {
187200
GREP_SOURCE_BUF,
188201
} type;
189202
void *identifier;
203+
struct repository *repo; /* if GREP_SOURCE_OID */
190204

191205
char *buf;
192206
unsigned long size;
@@ -195,9 +209,11 @@ struct grep_source {
195209
struct userdiff_driver *driver;
196210
};
197211

198-
void grep_source_init(struct grep_source *gs, enum grep_source_type type,
199-
const char *name, const char *path,
200-
const void *identifier);
212+
void grep_source_init_file(struct grep_source *gs, const char *name,
213+
const char *path);
214+
void grep_source_init_oid(struct grep_source *gs, const char *name,
215+
const char *path, const struct object_id *oid,
216+
struct repository *repo);
201217
void grep_source_clear_data(struct grep_source *gs);
202218
void grep_source_clear(struct grep_source *gs);
203219
void grep_source_load_driver(struct grep_source *gs,

object-file.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "packfile.h"
3333
#include "object-store.h"
3434
#include "promisor-remote.h"
35+
#include "submodule.h"
3536

3637
/* The maximum size for an object header. */
3738
#define MAX_HEADER_LEN 32
@@ -1613,6 +1614,10 @@ static int do_oid_object_info_extended(struct repository *r,
16131614
break;
16141615
}
16151616

1617+
if (register_all_submodule_odb_as_alternates())
1618+
/* We added some alternates; retry */
1619+
continue;
1620+
16161621
/* Check if it is a missing object */
16171622
if (fetch_if_missing && repo_has_promisor_remote(r) &&
16181623
!already_retried &&

submodule-config.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -649,9 +649,10 @@ static void config_from_gitmodules(config_fn_t fn, struct repository *repo, void
649649
config_source.file = file;
650650
} else if (repo_get_oid(repo, GITMODULES_INDEX, &oid) >= 0 ||
651651
repo_get_oid(repo, GITMODULES_HEAD, &oid) >= 0) {
652+
config_source.repo = repo;
652653
config_source.blob = oidstr = xstrdup(oid_to_hex(&oid));
653654
if (repo != the_repository)
654-
add_to_alternates_memory(repo->objects->odb->path);
655+
add_submodule_odb_by_path(repo->objects->odb->path);
655656
} else {
656657
goto out;
657658
}
@@ -702,7 +703,7 @@ void gitmodules_config_oid(const struct object_id *commit_oid)
702703

703704
if (gitmodule_oid_from_commit(commit_oid, &oid, &rev)) {
704705
git_config_from_blob_oid(gitmodules_cb, rev.buf,
705-
&oid, the_repository);
706+
the_repository, &oid, the_repository);
706707
}
707708
strbuf_release(&rev);
708709

0 commit comments

Comments
 (0)