Skip to content

Commit a31fa1f

Browse files
jeffhostetlerdscho
authored andcommitted
survey: add pathname of blob or tree to large_item_vec
Include the pathname of each blob or tree in the large_item_vec to help identify the file or directory associated with the OID and size information. This pathname is computed during the path walk, so it reflects the first observed pathname seen for that OID during the traversal over all of the refs. Since the file or directory could have moved (without being modified), there may be multiple "correct" pathnames for a particular OID. Since we do not control the ref traversal order, we should consider it to be a "suggested pathname" for the OID. Signed-off-by: Jeff Hostetler <[email protected]>
1 parent fbd0935 commit a31fa1f

File tree

1 file changed

+32
-11
lines changed

1 file changed

+32
-11
lines changed

builtin/survey.c

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ static void incr_obj_hist_bin(struct obj_hist_bin *pbin,
156156
struct large_item {
157157
uint64_t size;
158158
struct object_id oid;
159+
struct strbuf name;
159160
};
160161

161162
struct large_item_vec {
@@ -171,6 +172,7 @@ static struct large_item_vec *alloc_large_item_vec(const char *dimension_label,
171172
{
172173
struct large_item_vec *vec;
173174
size_t flex_len = nr_items * sizeof(struct large_item);
175+
size_t k;
174176

175177
if (!nr_items)
176178
return NULL;
@@ -180,6 +182,9 @@ static struct large_item_vec *alloc_large_item_vec(const char *dimension_label,
180182
vec->item_label = strdup(item_label);
181183
vec->nr_items = nr_items;
182184

185+
for (k = 0; k < nr_items; k++)
186+
strbuf_init(&vec->items[k].name, 0);
187+
183188
return vec;
184189
}
185190

@@ -188,14 +193,18 @@ static void free_large_item_vec(struct large_item_vec *vec)
188193
if (!vec)
189194
return;
190195

196+
for (size_t k = 0; k < vec->nr_items; k++)
197+
strbuf_release(&vec->items[k].name);
198+
191199
free(vec->dimension_label);
192200
free(vec->item_label);
193201
free(vec);
194202
}
195203

196204
static void maybe_insert_large_item(struct large_item_vec *vec,
197205
uint64_t size,
198-
struct object_id *oid)
206+
struct object_id *oid,
207+
const char *name)
199208
{
200209
size_t rest_len;
201210
size_t k;
@@ -215,7 +224,14 @@ static void maybe_insert_large_item(struct large_item_vec *vec,
215224
if (size < vec->items[k].size)
216225
continue;
217226

218-
/* push items[k..] down one and insert it here */
227+
/*
228+
* The last large_item in the vector is about to be
229+
* overwritten by the previous one during the shift.
230+
* Steal its allocated strbuf and reuse it.
231+
*/
232+
strbuf_release(&vec->items[vec->nr_items - 1].name);
233+
234+
/* push items[k..] down one and insert data for this item here */
219235

220236
rest_len = (vec->nr_items - k - 1) * sizeof(struct large_item);
221237
if (rest_len)
@@ -224,6 +240,10 @@ static void maybe_insert_large_item(struct large_item_vec *vec,
224240
memset(&vec->items[k], 0, sizeof(struct large_item));
225241
vec->items[k].size = size;
226242
oidcpy(&vec->items[k].oid, oid);
243+
strbuf_init(&vec->items[k].name, 0);
244+
if (name && *name)
245+
strbuf_addstr(&vec->items[k].name, name);
246+
227247
return;
228248
}
229249
}
@@ -728,15 +748,15 @@ static void survey_report_largest_vec(struct large_item_vec *vec)
728748
return;
729749

730750
table.table_name = vec->dimension_label;
731-
strvec_pushl(&table.header, "Size", "OID", NULL);
751+
strvec_pushl(&table.header, "Size", "OID", "Name", NULL);
732752

733753
for (size_t k = 0; k < vec->nr_items; k++) {
734754
struct large_item *pk = &vec->items[k];
735755
if (!is_null_oid(&pk->oid)) {
736756
strbuf_reset(&size);
737757
strbuf_addf(&size, "%"PRIuMAX, (uintmax_t)pk->size);
738758

739-
insert_table_rowv(&table, size.buf, oid_to_hex(&pk->oid), NULL);
759+
insert_table_rowv(&table, size.buf, oid_to_hex(&pk->oid), pk->name.buf, NULL);
740760
}
741761
}
742762
strbuf_release(&size);
@@ -1198,7 +1218,8 @@ static void increment_object_counts(
11981218

11991219
static void increment_totals(struct survey_context *ctx,
12001220
struct oid_array *oids,
1201-
struct survey_report_object_size_summary *summary)
1221+
struct survey_report_object_size_summary *summary,
1222+
const char *path)
12021223
{
12031224
for (size_t i = 0; i < oids->nr; i++) {
12041225
struct object_info oi = OBJECT_INFO_INIT;
@@ -1234,8 +1255,8 @@ static void increment_totals(struct survey_context *ctx,
12341255
ctx->report.reachable_objects.commits.parent_cnt_pbin[k]++;
12351256
base = &ctx->report.reachable_objects.commits.base;
12361257

1237-
maybe_insert_large_item(ctx->report.reachable_objects.commits.vec_largest_by_nr_parents, k, &commit->object.oid);
1238-
maybe_insert_large_item(ctx->report.reachable_objects.commits.vec_largest_by_size_bytes, object_length, &commit->object.oid);
1258+
maybe_insert_large_item(ctx->report.reachable_objects.commits.vec_largest_by_nr_parents, k, &commit->object.oid, NULL);
1259+
maybe_insert_large_item(ctx->report.reachable_objects.commits.vec_largest_by_size_bytes, object_length, &commit->object.oid, NULL);
12391260
break;
12401261
}
12411262
case OBJ_TREE: {
@@ -1255,8 +1276,8 @@ static void increment_totals(struct survey_context *ctx,
12551276

12561277
pst->sum_entries += nr_entries;
12571278

1258-
maybe_insert_large_item(pst->vec_largest_by_nr_entries, nr_entries, &tree->object.oid);
1259-
maybe_insert_large_item(pst->vec_largest_by_size_bytes, object_length, &tree->object.oid);
1279+
maybe_insert_large_item(pst->vec_largest_by_nr_entries, nr_entries, &tree->object.oid, path);
1280+
maybe_insert_large_item(pst->vec_largest_by_size_bytes, object_length, &tree->object.oid, path);
12601281

12611282
qb = qbin(nr_entries);
12621283
incr_obj_hist_bin(&pst->entry_qbin[qb], object_length, disk_sizep);
@@ -1267,7 +1288,7 @@ static void increment_totals(struct survey_context *ctx,
12671288
case OBJ_BLOB:
12681289
base = &ctx->report.reachable_objects.blobs.base;
12691290

1270-
maybe_insert_large_item(ctx->report.reachable_objects.blobs.vec_largest_by_size_bytes, object_length, &oids->oid[i]);
1291+
maybe_insert_large_item(ctx->report.reachable_objects.blobs.vec_largest_by_size_bytes, object_length, &oids->oid[i], path);
12711292
break;
12721293
default:
12731294
continue;
@@ -1307,7 +1328,7 @@ static void increment_object_totals(struct survey_context *ctx,
13071328
struct survey_report_object_size_summary *total;
13081329
struct survey_report_object_size_summary summary = { 0 };
13091330

1310-
increment_totals(ctx, oids, &summary);
1331+
increment_totals(ctx, oids, &summary, path);
13111332

13121333
switch (type) {
13131334
case OBJ_COMMIT:

0 commit comments

Comments
 (0)