Skip to content

Commit f17b0b9

Browse files
peffgitster
authored andcommitted
shortlog: de-duplicate trailer values
The current documentation is vague about what happens with --group=trailer:signed-off-by when we see a commit with: Signed-off-by: One Signed-off-by: Two Signed-off-by: One We clearly should credit both "One" and "Two", but should "One" get credited twice? The current code does so, but mostly because that was the easiest thing to do. It's probably more useful to count each commit at most once. This will become especially important when we allow values from multiple sources in a future patch. Signed-off-by: Jeff King <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 47beb37 commit f17b0b9

File tree

3 files changed

+88
-1
lines changed

3 files changed

+88
-1
lines changed

Documentation/git-shortlog.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ OPTIONS
6161
+
6262
Note that commits that do not include the trailer will not be counted.
6363
Likewise, commits with multiple trailers (e.g., multiple signoffs) may
64-
be counted more than once.
64+
be counted more than once (but only once per unique trailer value in
65+
that commit).
6566
+
6667
The contents of each trailer value are taken literally and completely.
6768
No mailmap is applied, and the `-e` option has no effect (if the trailer

builtin/shortlog.c

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,13 +166,68 @@ static void read_from_stdin(struct shortlog *log)
166166
strbuf_release(&oneline);
167167
}
168168

169+
struct strset_item {
170+
struct hashmap_entry ent;
171+
char value[FLEX_ARRAY];
172+
};
173+
174+
struct strset {
175+
struct hashmap map;
176+
};
177+
178+
#define STRSET_INIT { { NULL } }
179+
180+
static int strset_item_hashcmp(const void *hash_data,
181+
const struct hashmap_entry *entry,
182+
const struct hashmap_entry *entry_or_key,
183+
const void *keydata)
184+
{
185+
const struct strset_item *a, *b;
186+
187+
a = container_of(entry, const struct strset_item, ent);
188+
if (keydata)
189+
return strcmp(a->value, keydata);
190+
191+
b = container_of(entry_or_key, const struct strset_item, ent);
192+
return strcmp(a->value, b->value);
193+
}
194+
195+
/*
196+
* Adds "str" to the set if it was not already present; returns true if it was
197+
* already there.
198+
*/
199+
static int strset_check_and_add(struct strset *ss, const char *str)
200+
{
201+
unsigned int hash = strhash(str);
202+
struct strset_item *item;
203+
204+
if (!ss->map.table)
205+
hashmap_init(&ss->map, strset_item_hashcmp, NULL, 0);
206+
207+
if (hashmap_get_from_hash(&ss->map, hash, str))
208+
return 1;
209+
210+
FLEX_ALLOC_STR(item, value, str);
211+
hashmap_entry_init(&item->ent, hash);
212+
hashmap_add(&ss->map, &item->ent);
213+
return 0;
214+
}
215+
216+
static void strset_clear(struct strset *ss)
217+
{
218+
if (!ss->map.table)
219+
return;
220+
hashmap_free_entries(&ss->map, struct strset_item, ent);
221+
}
222+
169223
static void insert_records_from_trailers(struct shortlog *log,
170224
struct commit *commit,
171225
struct pretty_print_context *ctx,
172226
const char *oneline)
173227
{
174228
struct trailer_iterator iter;
175229
const char *commit_buffer, *body;
230+
struct strset dups = STRSET_INIT;
176231

177232
/*
178233
* Using format_commit_message("%B") would be simpler here, but
@@ -190,10 +245,13 @@ static void insert_records_from_trailers(struct shortlog *log,
190245
if (strcasecmp(iter.key.buf, log->trailer))
191246
continue;
192247

248+
if (strset_check_and_add(&dups, value))
249+
continue;
193250
insert_one_record(log, value, oneline);
194251
}
195252
trailer_iterator_release(&iter);
196253

254+
strset_clear(&dups);
197255
unuse_commit_buffer(commit, commit_buffer);
198256
}
199257

t/t4201-shortlog.sh

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,4 +234,32 @@ test_expect_success 'shortlog --group=trailer:signed-off-by' '
234234
test_cmp expect actual
235235
'
236236

237+
test_expect_success 'shortlog de-duplicates trailers in a single commit' '
238+
git commit --allow-empty -F - <<-\EOF &&
239+
subject one
240+
241+
this message has two distinct values, plus a repeat
242+
243+
Repeated-trailer: Foo
244+
Repeated-trailer: Bar
245+
Repeated-trailer: Foo
246+
EOF
247+
248+
git commit --allow-empty -F - <<-\EOF &&
249+
subject two
250+
251+
similar to the previous, but without the second distinct value
252+
253+
Repeated-trailer: Foo
254+
Repeated-trailer: Foo
255+
EOF
256+
257+
cat >expect <<-\EOF &&
258+
2 Foo
259+
1 Bar
260+
EOF
261+
git shortlog -ns --group=trailer:repeated-trailer -2 HEAD >actual &&
262+
test_cmp expect actual
263+
'
264+
237265
test_done

0 commit comments

Comments
 (0)