Skip to content

Commit 6d56d4c

Browse files
committed
Merge branch 'ds/blame-on-bloom'
"git blame" learns to take advantage of the "changed-paths" Bloom filter stored in the commit-graph file. * ds/blame-on-bloom: test-bloom: check that we have expected arguments test-bloom: fix some whitespace issues blame: drop unused parameter from maybe_changed_path blame: use changed-path Bloom filters tests: write commit-graph with Bloom filters revision: complicated pathspecs disable filters
2 parents 9b6606f + 1b4c57f commit 6d56d4c

File tree

9 files changed

+211
-23
lines changed

9 files changed

+211
-23
lines changed

blame.c

Lines changed: 128 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#include "blame.h"
1010
#include "alloc.h"
1111
#include "commit-slab.h"
12+
#include "bloom.h"
13+
#include "commit-graph.h"
1214

1315
define_commit_slab(blame_suspects, struct blame_origin *);
1416
static struct blame_suspects blame_suspects;
@@ -1246,13 +1248,74 @@ static int fill_blob_sha1_and_mode(struct repository *r,
12461248
return -1;
12471249
}
12481250

1251+
struct blame_bloom_data {
1252+
/*
1253+
* Changed-path Bloom filter keys. These can help prevent
1254+
* computing diffs against first parents, but we need to
1255+
* expand the list as code is moved or files are renamed.
1256+
*/
1257+
struct bloom_filter_settings *settings;
1258+
struct bloom_key **keys;
1259+
int nr;
1260+
int alloc;
1261+
};
1262+
1263+
static int bloom_count_queries = 0;
1264+
static int bloom_count_no = 0;
1265+
static int maybe_changed_path(struct repository *r,
1266+
struct blame_origin *origin,
1267+
struct blame_bloom_data *bd)
1268+
{
1269+
int i;
1270+
struct bloom_filter *filter;
1271+
1272+
if (!bd)
1273+
return 1;
1274+
1275+
if (origin->commit->generation == GENERATION_NUMBER_INFINITY)
1276+
return 1;
1277+
1278+
filter = get_bloom_filter(r, origin->commit, 0);
1279+
1280+
if (!filter)
1281+
return 1;
1282+
1283+
bloom_count_queries++;
1284+
for (i = 0; i < bd->nr; i++) {
1285+
if (bloom_filter_contains(filter,
1286+
bd->keys[i],
1287+
bd->settings))
1288+
return 1;
1289+
}
1290+
1291+
bloom_count_no++;
1292+
return 0;
1293+
}
1294+
1295+
static void add_bloom_key(struct blame_bloom_data *bd,
1296+
const char *path)
1297+
{
1298+
if (!bd)
1299+
return;
1300+
1301+
if (bd->nr >= bd->alloc) {
1302+
bd->alloc *= 2;
1303+
REALLOC_ARRAY(bd->keys, bd->alloc);
1304+
}
1305+
1306+
bd->keys[bd->nr] = xmalloc(sizeof(struct bloom_key));
1307+
fill_bloom_key(path, strlen(path), bd->keys[bd->nr], bd->settings);
1308+
bd->nr++;
1309+
}
1310+
12491311
/*
12501312
* We have an origin -- check if the same path exists in the
12511313
* parent and return an origin structure to represent it.
12521314
*/
12531315
static struct blame_origin *find_origin(struct repository *r,
12541316
struct commit *parent,
1255-
struct blame_origin *origin)
1317+
struct blame_origin *origin,
1318+
struct blame_bloom_data *bd)
12561319
{
12571320
struct blame_origin *porigin;
12581321
struct diff_options diff_opts;
@@ -1286,10 +1349,18 @@ static struct blame_origin *find_origin(struct repository *r,
12861349

12871350
if (is_null_oid(&origin->commit->object.oid))
12881351
do_diff_cache(get_commit_tree_oid(parent), &diff_opts);
1289-
else
1290-
diff_tree_oid(get_commit_tree_oid(parent),
1291-
get_commit_tree_oid(origin->commit),
1292-
"", &diff_opts);
1352+
else {
1353+
int compute_diff = 1;
1354+
if (origin->commit->parents &&
1355+
!oidcmp(&parent->object.oid,
1356+
&origin->commit->parents->item->object.oid))
1357+
compute_diff = maybe_changed_path(r, origin, bd);
1358+
1359+
if (compute_diff)
1360+
diff_tree_oid(get_commit_tree_oid(parent),
1361+
get_commit_tree_oid(origin->commit),
1362+
"", &diff_opts);
1363+
}
12931364
diffcore_std(&diff_opts);
12941365

12951366
if (!diff_queued_diff.nr) {
@@ -1341,7 +1412,8 @@ static struct blame_origin *find_origin(struct repository *r,
13411412
*/
13421413
static struct blame_origin *find_rename(struct repository *r,
13431414
struct commit *parent,
1344-
struct blame_origin *origin)
1415+
struct blame_origin *origin,
1416+
struct blame_bloom_data *bd)
13451417
{
13461418
struct blame_origin *porigin = NULL;
13471419
struct diff_options diff_opts;
@@ -1366,6 +1438,7 @@ static struct blame_origin *find_rename(struct repository *r,
13661438
struct diff_filepair *p = diff_queued_diff.queue[i];
13671439
if ((p->status == 'R' || p->status == 'C') &&
13681440
!strcmp(p->two->path, origin->path)) {
1441+
add_bloom_key(bd, p->one->path);
13691442
porigin = get_origin(parent, p->one->path);
13701443
oidcpy(&porigin->blob_oid, &p->one->oid);
13711444
porigin->mode = p->one->mode;
@@ -2332,6 +2405,11 @@ static void distribute_blame(struct blame_scoreboard *sb, struct blame_entry *bl
23322405

23332406
#define MAXSG 16
23342407

2408+
typedef struct blame_origin *(*blame_find_alg)(struct repository *,
2409+
struct commit *,
2410+
struct blame_origin *,
2411+
struct blame_bloom_data *);
2412+
23352413
static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin, int opt)
23362414
{
23372415
struct rev_info *revs = sb->revs;
@@ -2356,8 +2434,7 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin,
23562434
* common cases, then we look for renames in the second pass.
23572435
*/
23582436
for (pass = 0; pass < 2 - sb->no_whole_file_rename; pass++) {
2359-
struct blame_origin *(*find)(struct repository *, struct commit *, struct blame_origin *);
2360-
find = pass ? find_rename : find_origin;
2437+
blame_find_alg find = pass ? find_rename : find_origin;
23612438

23622439
for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
23632440
i < num_sg && sg;
@@ -2369,7 +2446,7 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin,
23692446
continue;
23702447
if (parse_commit(p))
23712448
continue;
2372-
porigin = find(sb->repo, p, origin);
2449+
porigin = find(sb->repo, p, origin, sb->bloom_data);
23732450
if (!porigin)
23742451
continue;
23752452
if (oideq(&porigin->blob_oid, &origin->blob_oid)) {
@@ -2809,3 +2886,45 @@ struct blame_entry *blame_entry_prepend(struct blame_entry *head,
28092886
blame_origin_incref(o);
28102887
return new_head;
28112888
}
2889+
2890+
void setup_blame_bloom_data(struct blame_scoreboard *sb,
2891+
const char *path)
2892+
{
2893+
struct blame_bloom_data *bd;
2894+
2895+
if (!sb->repo->objects->commit_graph)
2896+
return;
2897+
2898+
if (!sb->repo->objects->commit_graph->bloom_filter_settings)
2899+
return;
2900+
2901+
bd = xmalloc(sizeof(struct blame_bloom_data));
2902+
2903+
bd->settings = sb->repo->objects->commit_graph->bloom_filter_settings;
2904+
2905+
bd->alloc = 4;
2906+
bd->nr = 0;
2907+
ALLOC_ARRAY(bd->keys, bd->alloc);
2908+
2909+
add_bloom_key(bd, path);
2910+
2911+
sb->bloom_data = bd;
2912+
}
2913+
2914+
void cleanup_scoreboard(struct blame_scoreboard *sb)
2915+
{
2916+
if (sb->bloom_data) {
2917+
int i;
2918+
for (i = 0; i < sb->bloom_data->nr; i++) {
2919+
free(sb->bloom_data->keys[i]->hashes);
2920+
free(sb->bloom_data->keys[i]);
2921+
}
2922+
free(sb->bloom_data->keys);
2923+
FREE_AND_NULL(sb->bloom_data);
2924+
2925+
trace2_data_intmax("blame", sb->repo,
2926+
"bloom/queries", bloom_count_queries);
2927+
trace2_data_intmax("blame", sb->repo,
2928+
"bloom/response-no", bloom_count_no);
2929+
}
2930+
}

blame.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ struct blame_entry {
100100
int unblamable;
101101
};
102102

103+
struct blame_bloom_data;
104+
103105
/*
104106
* The current state of the blame assignment.
105107
*/
@@ -156,6 +158,7 @@ struct blame_scoreboard {
156158
void(*found_guilty_entry)(struct blame_entry *, void *);
157159

158160
void *found_guilty_entry_data;
161+
struct blame_bloom_data *bloom_data;
159162
};
160163

161164
/*
@@ -180,6 +183,9 @@ void init_scoreboard(struct blame_scoreboard *sb);
180183
void setup_scoreboard(struct blame_scoreboard *sb,
181184
const char *path,
182185
struct blame_origin **orig);
186+
void setup_blame_bloom_data(struct blame_scoreboard *sb,
187+
const char *path);
188+
void cleanup_scoreboard(struct blame_scoreboard *sb);
183189

184190
struct blame_entry *blame_entry_prepend(struct blame_entry *head,
185191
long start, long end,

builtin/blame.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,6 +1061,14 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
10611061
string_list_clear(&ignore_revs_file_list, 0);
10621062
string_list_clear(&ignore_rev_list, 0);
10631063
setup_scoreboard(&sb, path, &o);
1064+
1065+
/*
1066+
* Changed-path Bloom filters are disabled when looking
1067+
* for copies.
1068+
*/
1069+
if (!(opt & PICKAXE_BLAME_COPY))
1070+
setup_blame_bloom_data(&sb, path);
1071+
10641072
lno = sb.num_lines;
10651073

10661074
if (lno && !range_list.nr)
@@ -1164,5 +1172,7 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
11641172
printf("num get patch: %d\n", sb.num_get_patch);
11651173
printf("num commits: %d\n", sb.num_commits);
11661174
}
1175+
1176+
cleanup_scoreboard(&sb);
11671177
return 0;
11681178
}

builtin/commit.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1700,9 +1700,7 @@ int cmd_commit(int argc, const char **argv, const char *prefix)
17001700
"new_index file. Check that disk is not full and quota is\n"
17011701
"not exceeded, and then \"git restore --staged :/\" to recover."));
17021702

1703-
if (git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) &&
1704-
write_commit_graph_reachable(the_repository->objects->odb, 0, NULL))
1705-
return 1;
1703+
git_test_write_commit_graph_or_die();
17061704

17071705
repo_rerere(the_repository, 0);
17081706
run_command_v_opt(argv_gc_auto, RUN_GIT_CMD);

builtin/merge.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "branch.h"
4141
#include "commit-reach.h"
4242
#include "wt-status.h"
43+
#include "commit-graph.h"
4344

4445
#define DEFAULT_TWOHEAD (1<<0)
4546
#define DEFAULT_OCTOPUS (1<<1)
@@ -1700,9 +1701,11 @@ int cmd_merge(int argc, const char **argv, const char *prefix)
17001701
head_commit);
17011702
}
17021703

1703-
if (squash)
1704+
if (squash) {
17041705
finish(head_commit, remoteheads, NULL, NULL);
1705-
else
1706+
1707+
git_test_write_commit_graph_or_die();
1708+
} else
17061709
write_merge_state(remoteheads);
17071710

17081711
if (merge_was_ok)

commit-graph.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,20 @@
1919
#include "bloom.h"
2020
#include "commit-slab.h"
2121

22+
void git_test_write_commit_graph_or_die(void)
23+
{
24+
int flags = 0;
25+
if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0))
26+
return;
27+
28+
if (git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0))
29+
flags = COMMIT_GRAPH_WRITE_BLOOM_FILTERS;
30+
31+
if (write_commit_graph_reachable(the_repository->objects->odb,
32+
flags, NULL))
33+
die("failed to write commit-graph under GIT_TEST_COMMIT_GRAPH");
34+
}
35+
2236
#define GRAPH_SIGNATURE 0x43475048 /* "CGPH" */
2337
#define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
2438
#define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */

commit-graph.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,15 @@
1212
#define GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD "GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD"
1313
#define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS"
1414

15+
/*
16+
* This method is only used to enhance coverage of the commit-graph
17+
* feature in the test suite with the GIT_TEST_COMMIT_GRAPH and
18+
* GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS environment variables. Do not
19+
* call this method oustide of a builtin, and only if you know what
20+
* you are doing!
21+
*/
22+
void git_test_write_commit_graph_or_die(void);
23+
1524
struct commit;
1625
struct bloom_filter_settings;
1726

revision.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,20 @@ static void trace2_bloom_filter_statistics_atexit(void)
650650
jw_release(&jw);
651651
}
652652

653+
static int forbid_bloom_filters(struct pathspec *spec)
654+
{
655+
if (spec->has_wildcard)
656+
return 1;
657+
if (spec->nr > 1)
658+
return 1;
659+
if (spec->magic & ~PATHSPEC_LITERAL)
660+
return 1;
661+
if (spec->nr && (spec->items[0].magic & ~PATHSPEC_LITERAL))
662+
return 1;
663+
664+
return 0;
665+
}
666+
653667
static void prepare_to_use_bloom_filter(struct rev_info *revs)
654668
{
655669
struct pathspec_item *pi;
@@ -659,7 +673,10 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
659673
int len;
660674

661675
if (!revs->commits)
662-
return;
676+
return;
677+
678+
if (forbid_bloom_filters(&revs->prune_data))
679+
return;
663680

664681
repo_parse_commit(revs->repo, revs->commits->item);
665682

0 commit comments

Comments
 (0)