Skip to content

Commit e9e8adf

Browse files
matheustavarespcloudsjeffhostetler
authored andcommitted
parallel-checkout: make it truly parallel
Use multiple worker processes to distribute the queued entries and call write_pc_item() in parallel for them. The items are distributed uniformly in contiguous chunks. This minimizes the chances of two workers writing to the same directory simultaneously, which could affect performance due to lock contention in the kernel. Work stealing (or any other format of re-distribution) is not implemented yet. The protocol between the main process and the workers is quite simple. They exchange binary messages packed in pkt-line format, and use PKT-FLUSH to mark the end of input (from both sides). The main process starts the communication by sending N pkt-lines, each corresponding to an item that needs to be written. These packets contain all the necessary information to load, smudge, and write the blob associated with each item. Then it waits for the worker to send back N pkt-lines containing the results for each item. The resulting packet must contain: the identification number of the item that it refers to, the status of the operation, and the lstat() data gathered after writing the file (iff the operation was successful). For now, checkout always uses a hardcoded value of 2 workers, only to demonstrate that the parallel checkout framework correctly divides and writes the queued entries. The next patch will add user configurations and define a more reasonable default, based on tests with the said settings. Co-authored-by: Nguyễn Thái Ngọc Duy <[email protected]> Co-authored-by: Jeff Hostetler <[email protected]> Signed-off-by: Matheus Tavares <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 04155bd commit e9e8adf

File tree

7 files changed

+496
-27
lines changed

7 files changed

+496
-27
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
/git-check-mailmap
3434
/git-check-ref-format
3535
/git-checkout
36+
/git-checkout--worker
3637
/git-checkout-index
3738
/git-cherry
3839
/git-cherry-pick

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,6 +1062,7 @@ BUILTIN_OBJS += builtin/check-attr.o
10621062
BUILTIN_OBJS += builtin/check-ignore.o
10631063
BUILTIN_OBJS += builtin/check-mailmap.o
10641064
BUILTIN_OBJS += builtin/check-ref-format.o
1065+
BUILTIN_OBJS += builtin/checkout--worker.o
10651066
BUILTIN_OBJS += builtin/checkout-index.o
10661067
BUILTIN_OBJS += builtin/checkout.o
10671068
BUILTIN_OBJS += builtin/clean.o

builtin.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ int cmd_bugreport(int argc, const char **argv, const char *prefix);
123123
int cmd_bundle(int argc, const char **argv, const char *prefix);
124124
int cmd_cat_file(int argc, const char **argv, const char *prefix);
125125
int cmd_checkout(int argc, const char **argv, const char *prefix);
126+
int cmd_checkout__worker(int argc, const char **argv, const char *prefix);
126127
int cmd_checkout_index(int argc, const char **argv, const char *prefix);
127128
int cmd_check_attr(int argc, const char **argv, const char *prefix);
128129
int cmd_check_ignore(int argc, const char **argv, const char *prefix);

builtin/checkout--worker.c

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
#include "builtin.h"
2+
#include "config.h"
3+
#include "entry.h"
4+
#include "parallel-checkout.h"
5+
#include "parse-options.h"
6+
#include "pkt-line.h"
7+
8+
static void packet_to_pc_item(const char *buffer, int len,
9+
struct parallel_checkout_item *pc_item)
10+
{
11+
const struct pc_item_fixed_portion *fixed_portion;
12+
const char *variant;
13+
char *encoding;
14+
15+
if (len < sizeof(struct pc_item_fixed_portion))
16+
BUG("checkout worker received too short item (got %dB, exp %dB)",
17+
len, (int)sizeof(struct pc_item_fixed_portion));
18+
19+
fixed_portion = (struct pc_item_fixed_portion *)buffer;
20+
21+
if (len - sizeof(struct pc_item_fixed_portion) !=
22+
fixed_portion->name_len + fixed_portion->working_tree_encoding_len)
23+
BUG("checkout worker received corrupted item");
24+
25+
variant = buffer + sizeof(struct pc_item_fixed_portion);
26+
27+
/*
28+
* Note: the main process uses zero length to communicate that the
29+
* encoding is NULL. There is no use case that requires sending an
30+
* actual empty string, since convert_attrs() never sets
31+
* ca.working_tree_enconding to "".
32+
*/
33+
if (fixed_portion->working_tree_encoding_len) {
34+
encoding = xmemdupz(variant,
35+
fixed_portion->working_tree_encoding_len);
36+
variant += fixed_portion->working_tree_encoding_len;
37+
} else {
38+
encoding = NULL;
39+
}
40+
41+
memset(pc_item, 0, sizeof(*pc_item));
42+
pc_item->ce = make_empty_transient_cache_entry(fixed_portion->name_len);
43+
pc_item->ce->ce_namelen = fixed_portion->name_len;
44+
pc_item->ce->ce_mode = fixed_portion->ce_mode;
45+
memcpy(pc_item->ce->name, variant, pc_item->ce->ce_namelen);
46+
oidcpy(&pc_item->ce->oid, &fixed_portion->oid);
47+
48+
pc_item->id = fixed_portion->id;
49+
pc_item->ca.crlf_action = fixed_portion->crlf_action;
50+
pc_item->ca.ident = fixed_portion->ident;
51+
pc_item->ca.working_tree_encoding = encoding;
52+
}
53+
54+
static void report_result(struct parallel_checkout_item *pc_item)
55+
{
56+
struct pc_item_result res;
57+
size_t size;
58+
59+
res.id = pc_item->id;
60+
res.status = pc_item->status;
61+
62+
if (pc_item->status == PC_ITEM_WRITTEN) {
63+
res.st = pc_item->st;
64+
size = sizeof(res);
65+
} else {
66+
size = PC_ITEM_RESULT_BASE_SIZE;
67+
}
68+
69+
packet_write(1, (const char *)&res, size);
70+
}
71+
72+
/* Free the worker-side malloced data, but not pc_item itself. */
73+
static void release_pc_item_data(struct parallel_checkout_item *pc_item)
74+
{
75+
free((char *)pc_item->ca.working_tree_encoding);
76+
discard_cache_entry(pc_item->ce);
77+
}
78+
79+
static void worker_loop(struct checkout *state)
80+
{
81+
struct parallel_checkout_item *items = NULL;
82+
size_t i, nr = 0, alloc = 0;
83+
84+
while (1) {
85+
int len = packet_read(0, NULL, NULL, packet_buffer,
86+
sizeof(packet_buffer), 0);
87+
88+
if (len < 0)
89+
BUG("packet_read() returned negative value");
90+
else if (!len)
91+
break;
92+
93+
ALLOC_GROW(items, nr + 1, alloc);
94+
packet_to_pc_item(packet_buffer, len, &items[nr++]);
95+
}
96+
97+
for (i = 0; i < nr; i++) {
98+
struct parallel_checkout_item *pc_item = &items[i];
99+
write_pc_item(pc_item, state);
100+
report_result(pc_item);
101+
release_pc_item_data(pc_item);
102+
}
103+
104+
packet_flush(1);
105+
106+
free(items);
107+
}
108+
109+
static const char * const checkout_worker_usage[] = {
110+
N_("git checkout--worker [<options>]"),
111+
NULL
112+
};
113+
114+
int cmd_checkout__worker(int argc, const char **argv, const char *prefix)
115+
{
116+
struct checkout state = CHECKOUT_INIT;
117+
struct option checkout_worker_options[] = {
118+
OPT_STRING(0, "prefix", &state.base_dir, N_("string"),
119+
N_("when creating files, prepend <string>")),
120+
OPT_END()
121+
};
122+
123+
if (argc == 2 && !strcmp(argv[1], "-h"))
124+
usage_with_options(checkout_worker_usage,
125+
checkout_worker_options);
126+
127+
git_config(git_default_config, NULL);
128+
argc = parse_options(argc, argv, prefix, checkout_worker_options,
129+
checkout_worker_usage, 0);
130+
if (argc > 0)
131+
usage_with_options(checkout_worker_usage, checkout_worker_options);
132+
133+
if (state.base_dir)
134+
state.base_dir_len = strlen(state.base_dir);
135+
136+
/*
137+
* Setting this on a worker won't actually update the index. We just
138+
* need to tell the checkout machinery to lstat() the written entries,
139+
* so that we can send this data back to the main process.
140+
*/
141+
state.refresh_cache = 1;
142+
143+
worker_loop(&state);
144+
return 0;
145+
}

git.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,8 @@ static struct cmd_struct commands[] = {
490490
{ "check-mailmap", cmd_check_mailmap, RUN_SETUP },
491491
{ "check-ref-format", cmd_check_ref_format, NO_PARSEOPT },
492492
{ "checkout", cmd_checkout, RUN_SETUP | NEED_WORK_TREE },
493+
{ "checkout--worker", cmd_checkout__worker,
494+
RUN_SETUP | NEED_WORK_TREE | SUPPORT_SUPER_PREFIX },
493495
{ "checkout-index", cmd_checkout_index,
494496
RUN_SETUP | NEED_WORK_TREE},
495497
{ "cherry", cmd_cherry, RUN_SETUP },

0 commit comments

Comments
 (0)