@@ -41,6 +41,15 @@ static struct survey_refs_wanted default_ref_options = {
41
41
struct survey_opts {
42
42
int verbose ;
43
43
int show_progress ;
44
+
45
+ int show_largest_commits_by_nr_parents ;
46
+ int show_largest_commits_by_size_bytes ;
47
+
48
+ int show_largest_trees_by_nr_entries ;
49
+ int show_largest_trees_by_size_bytes ;
50
+
51
+ int show_largest_blobs_by_size_bytes ;
52
+
44
53
int top_nr ;
45
54
struct survey_refs_wanted refs ;
46
55
};
@@ -138,6 +147,87 @@ static void incr_obj_hist_bin(struct obj_hist_bin *pbin,
138
147
pbin -> cnt_seen ++ ;
139
148
}
140
149
150
+ /*
151
+ * Remember the largest n objects for some scaling dimension. This
152
+ * could be the observed object size or number of entries in a tree.
153
+ * We'll use this to generate a sorted vector in the output for that
154
+ * dimension.
155
+ */
156
+ struct large_item {
157
+ uint64_t size ;
158
+ struct object_id oid ;
159
+ };
160
+
161
+ struct large_item_vec {
162
+ char * dimension_label ;
163
+ char * item_label ;
164
+ uint64_t nr_items ;
165
+ struct large_item items [FLEX_ARRAY ]; /* nr_items */
166
+ };
167
+
168
+ static struct large_item_vec * alloc_large_item_vec (const char * dimension_label ,
169
+ const char * item_label ,
170
+ uint64_t nr_items )
171
+ {
172
+ struct large_item_vec * vec ;
173
+ size_t flex_len = nr_items * sizeof (struct large_item );
174
+
175
+ if (!nr_items )
176
+ return NULL ;
177
+
178
+ vec = xcalloc (1 , (sizeof (struct large_item_vec ) + flex_len ));
179
+ vec -> dimension_label = strdup (dimension_label );
180
+ vec -> item_label = strdup (item_label );
181
+ vec -> nr_items = nr_items ;
182
+
183
+ return vec ;
184
+ }
185
+
186
+ static void free_large_item_vec (struct large_item_vec * vec )
187
+ {
188
+ if (!vec )
189
+ return ;
190
+
191
+ free (vec -> dimension_label );
192
+ free (vec -> item_label );
193
+ free (vec );
194
+ }
195
+
196
+ static void maybe_insert_large_item (struct large_item_vec * vec ,
197
+ uint64_t size ,
198
+ struct object_id * oid )
199
+ {
200
+ size_t rest_len ;
201
+ size_t k ;
202
+
203
+ if (!vec || !vec -> nr_items )
204
+ return ;
205
+
206
+ /*
207
+ * Since the odds an object being among the largest n
208
+ * is small, shortcut and see if it is smaller than
209
+ * the smallest one in our set and quickly reject it.
210
+ */
211
+ if (size < vec -> items [vec -> nr_items - 1 ].size )
212
+ return ;
213
+
214
+ for (k = 0 ; k < vec -> nr_items ; k ++ ) {
215
+ if (size < vec -> items [k ].size )
216
+ continue ;
217
+
218
+ /* push items[k..] down one and insert it here */
219
+
220
+ rest_len = (vec -> nr_items - k - 1 ) * sizeof (struct large_item );
221
+ if (rest_len )
222
+ memmove (& vec -> items [k + 1 ], & vec -> items [k ], rest_len );
223
+
224
+ memset (& vec -> items [k ], 0 , sizeof (struct large_item ));
225
+ vec -> items [k ].size = size ;
226
+ oidcpy (& vec -> items [k ].oid , oid );
227
+ return ;
228
+ }
229
+ }
230
+
141
231
/*
142
232
* Common fields for any type of object.
143
233
*/
@@ -183,6 +273,9 @@ struct survey_stats_commits {
183
273
* Count of commits with k parents.
184
274
*/
185
275
uint32_t parent_cnt_pbin [PBIN_VEC_LEN ];
276
+
277
+ struct large_item_vec * vec_largest_by_nr_parents ;
278
+ struct large_item_vec * vec_largest_by_size_bytes ;
186
279
};
187
280
188
281
/*
@@ -192,11 +285,18 @@ struct survey_stats_trees {
192
285
struct survey_stats_base_object base ;
193
286
194
287
/*
195
- * In the following, nr_entries refers to the number of files or
196
- * subdirectories in a tree. We are interested in how wide the
197
- * tree is and if the repo has gigantic directories.
288
+ * Keep a vector of the trees with the most number of entries.
289
+ * This gives us a feel for the width of a tree when there are
290
+ * gigantic directories.
198
291
*/
199
- uint64_t max_entries ; /* max(nr_entries) -- the width of the largest tree */
292
+ struct large_item_vec * vec_largest_by_nr_entries ;
293
+
294
+ /*
295
+ * Keep a vector of the trees with the largest size in bytes.
296
+ * The contents of this may or may not match items in the other
297
+ * vector, since entryname length can alter the results.
298
+ */
299
+ struct large_item_vec * vec_largest_by_size_bytes ;
200
300
201
301
/*
202
302
* Computing the sum of the number of entries across all trees
@@ -216,6 +316,11 @@ struct survey_stats_trees {
216
316
*/
217
317
struct survey_stats_blobs {
218
318
struct survey_stats_base_object base ;
319
+
320
+ /*
321
+ * Remember the OIDs of the largest n blobs.
322
+ */
323
+ struct large_item_vec * vec_largest_by_size_bytes ;
219
324
};
220
325
221
326
struct survey_report_object_summary {
@@ -396,6 +501,12 @@ struct survey_context {
396
501
397
502
static void clear_survey_context (struct survey_context * ctx )
398
503
{
504
+ free_large_item_vec (ctx -> report .reachable_objects .commits .vec_largest_by_nr_parents );
505
+ free_large_item_vec (ctx -> report .reachable_objects .commits .vec_largest_by_size_bytes );
506
+ free_large_item_vec (ctx -> report .reachable_objects .trees .vec_largest_by_nr_entries );
507
+ free_large_item_vec (ctx -> report .reachable_objects .trees .vec_largest_by_size_bytes );
508
+ free_large_item_vec (ctx -> report .reachable_objects .blobs .vec_largest_by_size_bytes );
509
+
399
510
ref_array_clear (& ctx -> ref_array );
400
511
strvec_clear (& ctx -> refs );
401
512
}
@@ -608,6 +719,32 @@ static void survey_report_commit_parents(struct survey_context *ctx)
608
719
clear_table (& table );
609
720
}
610
721
722
+ static void survey_report_largest_vec (struct large_item_vec * vec )
723
+ {
724
+ struct survey_table table = SURVEY_TABLE_INIT ;
725
+ struct strbuf size = STRBUF_INIT ;
726
+
727
+ if (!vec || !vec -> nr_items )
728
+ return ;
729
+
730
+ table .table_name = vec -> dimension_label ;
731
+ strvec_pushl (& table .header , "Size" , "OID" , NULL );
732
+
733
+ for (size_t k = 0 ; k < vec -> nr_items ; k ++ ) {
734
+ struct large_item * pk = & vec -> items [k ];
735
+ if (!is_null_oid (& pk -> oid )) {
736
+ strbuf_reset (& size );
737
+ strbuf_addf (& size , "%" PRIuMAX , (uintmax_t )pk -> size );
738
+
739
+ insert_table_rowv (& table , size .buf , oid_to_hex (& pk -> oid ), NULL );
740
+ }
741
+ }
742
+ strbuf_release (& size );
743
+
744
+ print_table_plaintext (& table );
745
+ clear_table (& table );
746
+ }
747
+
611
748
static void survey_report_plaintext_refs (struct survey_context * ctx )
612
749
{
613
750
struct survey_report_ref_summary * refs = & ctx -> report .refs ;
@@ -787,6 +924,12 @@ static void survey_report_plaintext(struct survey_context *ctx)
787
924
& ctx -> report .top_paths_by_inflate [REPORT_TYPE_TREE ]);
788
925
survey_report_plaintext_sorted_size (
789
926
& ctx -> report .top_paths_by_inflate [REPORT_TYPE_BLOB ]);
927
+
928
+ survey_report_largest_vec (ctx -> report .reachable_objects .commits .vec_largest_by_nr_parents );
929
+ survey_report_largest_vec (ctx -> report .reachable_objects .commits .vec_largest_by_size_bytes );
930
+ survey_report_largest_vec (ctx -> report .reachable_objects .trees .vec_largest_by_nr_entries );
931
+ survey_report_largest_vec (ctx -> report .reachable_objects .trees .vec_largest_by_size_bytes );
932
+ survey_report_largest_vec (ctx -> report .reachable_objects .blobs .vec_largest_by_size_bytes );
790
933
}
791
934
792
935
/*
@@ -858,6 +1001,27 @@ static int survey_load_config_cb(const char *var, const char *value,
858
1001
ctx -> opts .show_progress = git_config_bool (var , value );
859
1002
return 0 ;
860
1003
}
1004
+ if (!strcmp (var , "survey.showcommitparents" )) {
1005
+ ctx -> opts .show_largest_commits_by_nr_parents = git_config_ulong (var , value , cctx -> kvi );
1006
+ return 0 ;
1007
+ }
1008
+ if (!strcmp (var , "survey.showcommitsizes" )) {
1009
+ ctx -> opts .show_largest_commits_by_size_bytes = git_config_ulong (var , value , cctx -> kvi );
1010
+ return 0 ;
1011
+ }
1012
+
1013
+ if (!strcmp (var , "survey.showtreeentries" )) {
1014
+ ctx -> opts .show_largest_trees_by_nr_entries = git_config_ulong (var , value , cctx -> kvi );
1015
+ return 0 ;
1016
+ }
1017
+ if (!strcmp (var , "survey.showtreesizes" )) {
1018
+ ctx -> opts .show_largest_trees_by_size_bytes = git_config_ulong (var , value , cctx -> kvi );
1019
+ return 0 ;
1020
+ }
1021
+ if (!strcmp (var , "survey.showblobsizes" )) {
1022
+ ctx -> opts .show_largest_blobs_by_size_bytes = git_config_ulong (var , value , cctx -> kvi );
1023
+ return 0 ;
1024
+ }
861
1025
if (!strcmp (var , "survey.top" )) {
862
1026
ctx -> opts .top_nr = git_config_bool (var , value );
863
1027
return 0 ;
@@ -1069,6 +1233,9 @@ static void increment_totals(struct survey_context *ctx,
1069
1233
1070
1234
ctx -> report .reachable_objects .commits .parent_cnt_pbin [k ]++ ;
1071
1235
base = & ctx -> report .reachable_objects .commits .base ;
1236
+
1237
+ maybe_insert_large_item (ctx -> report .reachable_objects .commits .vec_largest_by_nr_parents , k , & commit -> object .oid );
1238
+ maybe_insert_large_item (ctx -> report .reachable_objects .commits .vec_largest_by_size_bytes , object_length , & commit -> object .oid );
1072
1239
break ;
1073
1240
}
1074
1241
case OBJ_TREE : {
@@ -1088,8 +1255,8 @@ static void increment_totals(struct survey_context *ctx,
1088
1255
1089
1256
pst -> sum_entries += nr_entries ;
1090
1257
1091
- if ( nr_entries > pst -> max_entries )
1092
- pst -> max_entries = nr_entries ;
1258
+ maybe_insert_large_item ( pst -> vec_largest_by_nr_entries , nr_entries , & tree -> object . oid );
1259
+ maybe_insert_large_item ( pst -> vec_largest_by_size_bytes , object_length , & tree -> object . oid ) ;
1093
1260
1094
1261
qb = qbin (nr_entries );
1095
1262
incr_obj_hist_bin (& pst -> entry_qbin [qb ], object_length , disk_sizep );
@@ -1099,6 +1266,8 @@ static void increment_totals(struct survey_context *ctx,
1099
1266
}
1100
1267
case OBJ_BLOB :
1101
1268
base = & ctx -> report .reachable_objects .blobs .base ;
1269
+
1270
+ maybe_insert_large_item (ctx -> report .reachable_objects .blobs .vec_largest_by_size_bytes , object_length , & oids -> oid [i ]);
1102
1271
break ;
1103
1272
default :
1104
1273
continue ;
@@ -1307,6 +1476,14 @@ int cmd_survey(int argc, const char **argv, const char *prefix, struct repositor
1307
1476
OPT_BOOL_F (0 , "detached" , & ctx .opts .refs .want_detached , N_ ("include detached HEAD" ), PARSE_OPT_NONEG ),
1308
1477
OPT_BOOL_F (0 , "other" , & ctx .opts .refs .want_other , N_ ("include notes and stashes" ), PARSE_OPT_NONEG ),
1309
1478
1479
+ OPT_INTEGER_F (0 , "commit-parents" , & ctx .opts .show_largest_commits_by_nr_parents , N_ ("show N largest commits by parent count" ), PARSE_OPT_NONEG ),
1480
+ OPT_INTEGER_F (0 , "commit-sizes" , & ctx .opts .show_largest_commits_by_size_bytes , N_ ("show N largest commits by size in bytes" ), PARSE_OPT_NONEG ),
1481
+
1482
+ OPT_INTEGER_F (0 , "tree-entries" , & ctx .opts .show_largest_trees_by_nr_entries , N_ ("show N largest trees by entry count" ), PARSE_OPT_NONEG ),
1483
+ OPT_INTEGER_F (0 , "tree-sizes" , & ctx .opts .show_largest_trees_by_size_bytes , N_ ("show N largest trees by size in bytes" ), PARSE_OPT_NONEG ),
1484
+
1485
+ OPT_INTEGER_F (0 , "blob-sizes" , & ctx .opts .show_largest_blobs_by_size_bytes , N_ ("show N largest blobs by size in bytes" ), PARSE_OPT_NONEG ),
1486
+
1310
1487
OPT_END (),
1311
1488
};
1312
1489
@@ -1330,6 +1507,39 @@ int cmd_survey(int argc, const char **argv, const char *prefix, struct repositor
1330
1507
1331
1508
fixup_refs_wanted (& ctx );
1332
1509
1510
+ if (ctx .opts .show_largest_commits_by_nr_parents )
1511
+ ctx .report .reachable_objects .commits .vec_largest_by_nr_parents =
1512
+ alloc_large_item_vec (
1513
+ "largest_commits_by_nr_parents" ,
1514
+ "nr_parents" ,
1515
+ ctx .opts .show_largest_commits_by_nr_parents );
1516
+ if (ctx .opts .show_largest_commits_by_size_bytes )
1517
+ ctx .report .reachable_objects .commits .vec_largest_by_size_bytes =
1518
+ alloc_large_item_vec (
1519
+ "largest_commits_by_size_bytes" ,
1520
+ "size" ,
1521
+ ctx .opts .show_largest_commits_by_size_bytes );
1522
+
1523
+ if (ctx .opts .show_largest_trees_by_nr_entries )
1524
+ ctx .report .reachable_objects .trees .vec_largest_by_nr_entries =
1525
+ alloc_large_item_vec (
1526
+ "largest_trees_by_nr_entries" ,
1527
+ "nr_entries" ,
1528
+ ctx .opts .show_largest_trees_by_nr_entries );
1529
+ if (ctx .opts .show_largest_trees_by_size_bytes )
1530
+ ctx .report .reachable_objects .trees .vec_largest_by_size_bytes =
1531
+ alloc_large_item_vec (
1532
+ "largest_trees_by_size_bytes" ,
1533
+ "size" ,
1534
+ ctx .opts .show_largest_trees_by_size_bytes );
1535
+
1536
+ if (ctx .opts .show_largest_blobs_by_size_bytes )
1537
+ ctx .report .reachable_objects .blobs .vec_largest_by_size_bytes =
1538
+ alloc_large_item_vec (
1539
+ "largest_blobs_by_size_bytes" ,
1540
+ "size" ,
1541
+ ctx .opts .show_largest_blobs_by_size_bytes );
1542
+
1333
1543
survey_phase_refs (& ctx );
1334
1544
1335
1545
survey_phase_objects (& ctx );
0 commit comments