Skip to content

Commit fae9627

Browse files
peffgitster
authored andcommitted
upload-pack: drop separate v2 "haves" array
When upload-pack sees a "have" line in the v0 protocol, it immediately calls got_oid() with its argument and potentially produces an ACK response. In the v2 protocol, we simply record the argument in an oid_array, and only later process all of the "have" objects by calling the equivalent of got_oid() on the contents of the array. This makes some sense, as v2 is a pure request/response protocol, as opposed to v0's asynchronous negotiation phase. But there's a downside: a client can send us an infinite number of garbage "have" lines, which we'll happily slurp into the array, consuming memory. Whereas in v0, they are limited by the number of objects in the repository (because got_oid() only records objects we have ourselves, and we avoid duplicates by setting a flag on the object struct). We can make v2 behave more like v0 by also calling got_oid() directly when v2 parses a "have" line. Calling it early like this is OK because got_oid() itself does not interact with the client; it only confirms that we have the object and sets a few flags. Note that unlike v0, v2 does not ever (before or after this patch) check the return code of got_oid(), which lets the caller know whether we have the object. But again, that makes sense; v0 is using it to asynchronously tell the client to stop sending. In v2's synchronous protocol, we just discard those entries (and decide how to ACK at the end of each round). There is one slight tweak we need, though. In v2's state machine, we reach the SEND_ACKS state if the other side sent us any "have" lines, whether they were useful or not. Right now we do that by checking whether the "have" array had any entries, but if we record only the useful ones, that doesn't work. Instead, we can add a simple boolean that tells us whether we saw any have line (even if it was useless). This lets us drop the "haves" array entirely, as we're now placing objects directly into the "have_obj" object array (which is where got_oid() put them in the long run anyway). And as a bonus, we can drop the secondary "common" array used in process_haves_and_send_acks(). It was essentially a copy of "haves" minus the objects we do not have. But now that we are using "have_obj" directly, we know everything in it is useful. So in addition to protecting ourselves against malicious input, we should slightly lower our memory usage for normal inputs. Note that there is one user-visible effect. The trace2 output records the number of "haves". Previously this was the total number of "have" lines we saw, but now is the number of useful ones. We could retain the original meaning by keeping a separate counter, but it doesn't seem worth the effort; this trace info is for debugging and metrics, and arguably the count of common oids is at least as useful as the total count. Reported-by: Benjamin Flesch <[email protected]> Signed-off-by: Jeff King <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 3c2a3fd commit fae9627

File tree

1 file changed

+10
-38
lines changed

1 file changed

+10
-38
lines changed

upload-pack.c

Lines changed: 10 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ struct upload_pack_data {
6161
struct string_list symref; /* v0 only */
6262
struct object_array want_obj;
6363
struct object_array have_obj;
64-
struct oid_array haves; /* v2 only */
6564
struct string_list wanted_refs; /* v2 only */
6665
struct strvec hidden_refs;
6766

@@ -113,6 +112,7 @@ struct upload_pack_data {
113112
unsigned done : 1; /* v2 only */
114113
unsigned allow_ref_in_want : 1; /* v2 only */
115114
unsigned allow_sideband_all : 1; /* v2 only */
115+
unsigned seen_haves : 1; /* v2 only */
116116
unsigned advertise_sid : 1;
117117
unsigned sent_capabilities : 1;
118118
};
@@ -124,7 +124,6 @@ static void upload_pack_data_init(struct upload_pack_data *data)
124124
struct strvec hidden_refs = STRVEC_INIT;
125125
struct object_array want_obj = OBJECT_ARRAY_INIT;
126126
struct object_array have_obj = OBJECT_ARRAY_INIT;
127-
struct oid_array haves = OID_ARRAY_INIT;
128127
struct object_array shallows = OBJECT_ARRAY_INIT;
129128
struct string_list deepen_not = STRING_LIST_INIT_DUP;
130129
struct string_list uri_protocols = STRING_LIST_INIT_DUP;
@@ -137,7 +136,6 @@ static void upload_pack_data_init(struct upload_pack_data *data)
137136
data->hidden_refs = hidden_refs;
138137
data->want_obj = want_obj;
139138
data->have_obj = have_obj;
140-
data->haves = haves;
141139
data->shallows = shallows;
142140
data->deepen_not = deepen_not;
143141
data->uri_protocols = uri_protocols;
@@ -159,7 +157,6 @@ static void upload_pack_data_clear(struct upload_pack_data *data)
159157
strvec_clear(&data->hidden_refs);
160158
object_array_clear(&data->want_obj);
161159
object_array_clear(&data->have_obj);
162-
oid_array_clear(&data->haves);
163160
object_array_clear(&data->shallows);
164161
string_list_clear(&data->deepen_not, 0);
165162
object_array_clear(&data->extra_edge_obj);
@@ -1532,15 +1529,14 @@ static int parse_want_ref(struct packet_writer *writer, const char *line,
15321529
return 0;
15331530
}
15341531

1535-
static int parse_have(const char *line, struct oid_array *haves)
1532+
static int parse_have(const char *line, struct upload_pack_data *data)
15361533
{
15371534
const char *arg;
15381535
if (skip_prefix(line, "have ", &arg)) {
15391536
struct object_id oid;
15401537

1541-
if (get_oid_hex(arg, &oid))
1542-
die("git upload-pack: expected SHA1 object, got '%s'", arg);
1543-
oid_array_append(haves, &oid);
1538+
got_oid(data, arg, &oid);
1539+
data->seen_haves = 1;
15441540
return 1;
15451541
}
15461542

@@ -1552,7 +1548,7 @@ static void trace2_fetch_info(struct upload_pack_data *data)
15521548
struct json_writer jw = JSON_WRITER_INIT;
15531549

15541550
jw_object_begin(&jw, 0);
1555-
jw_object_intmax(&jw, "haves", data->haves.nr);
1551+
jw_object_intmax(&jw, "haves", data->have_obj.nr);
15561552
jw_object_intmax(&jw, "wants", data->want_obj.nr);
15571553
jw_object_intmax(&jw, "want-refs", data->wanted_refs.nr);
15581554
jw_object_intmax(&jw, "depth", data->depth);
@@ -1586,7 +1582,7 @@ static void process_args(struct packet_reader *request,
15861582
&data->hidden_refs, &data->want_obj))
15871583
continue;
15881584
/* process have line */
1589-
if (parse_have(arg, &data->haves))
1585+
if (parse_have(arg, data))
15901586
continue;
15911587

15921588
/* process args like thin-pack */
@@ -1664,27 +1660,7 @@ static void process_args(struct packet_reader *request,
16641660
trace2_fetch_info(data);
16651661
}
16661662

1667-
static int process_haves(struct upload_pack_data *data, struct oid_array *common)
1668-
{
1669-
int i;
1670-
1671-
/* Process haves */
1672-
for (i = 0; i < data->haves.nr; i++) {
1673-
const struct object_id *oid = &data->haves.oid[i];
1674-
1675-
if (!repo_has_object_file_with_flags(the_repository, oid,
1676-
OBJECT_INFO_QUICK | OBJECT_INFO_SKIP_FETCH_OBJECT))
1677-
continue;
1678-
1679-
oid_array_append(common, oid);
1680-
1681-
do_got_oid(data, oid);
1682-
}
1683-
1684-
return 0;
1685-
}
1686-
1687-
static int send_acks(struct upload_pack_data *data, struct oid_array *acks)
1663+
static int send_acks(struct upload_pack_data *data, struct object_array *acks)
16881664
{
16891665
int i;
16901666

@@ -1696,7 +1672,7 @@ static int send_acks(struct upload_pack_data *data, struct oid_array *acks)
16961672

16971673
for (i = 0; i < acks->nr; i++) {
16981674
packet_writer_write(&data->writer, "ACK %s\n",
1699-
oid_to_hex(&acks->oid[i]));
1675+
oid_to_hex(&acks->objects[i].item->oid));
17001676
}
17011677

17021678
if (!data->wait_for_done && ok_to_give_up(data)) {
@@ -1710,13 +1686,11 @@ static int send_acks(struct upload_pack_data *data, struct oid_array *acks)
17101686

17111687
static int process_haves_and_send_acks(struct upload_pack_data *data)
17121688
{
1713-
struct oid_array common = OID_ARRAY_INIT;
17141689
int ret = 0;
17151690

1716-
process_haves(data, &common);
17171691
if (data->done) {
17181692
ret = 1;
1719-
} else if (send_acks(data, &common)) {
1693+
} else if (send_acks(data, &data->have_obj)) {
17201694
packet_writer_delim(&data->writer);
17211695
ret = 1;
17221696
} else {
@@ -1725,8 +1699,6 @@ static int process_haves_and_send_acks(struct upload_pack_data *data)
17251699
ret = 0;
17261700
}
17271701

1728-
oid_array_clear(&data->haves);
1729-
oid_array_clear(&common);
17301702
return ret;
17311703
}
17321704

@@ -1796,7 +1768,7 @@ int upload_pack_v2(struct repository *r UNUSED, struct packet_reader *request)
17961768
* they didn't want anything.
17971769
*/
17981770
state = FETCH_DONE;
1799-
} else if (data.haves.nr) {
1771+
} else if (data.seen_haves) {
18001772
/*
18011773
* Request had 'have' lines, so lets ACK them.
18021774
*/

0 commit comments

Comments
 (0)