Skip to content

Commit ad24dcc

Browse files
committed
As used for building sheet [pybind11 commit overview (git log -p -m 2020-05-22 14:19 PDT)].
1 parent 8ce7b3f commit ad24dcc

File tree

1 file changed

+95
-0
lines changed

1 file changed

+95
-0
lines changed

mine_git_log.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
"""Produces CSV lists of Count, Author for 1. merge, 2. regular commits.
2+
3+
Usage:
4+
cd <gitrepo>
5+
git log -p -m > git_log_p_m_output.txt
6+
python3 mine_git_log.py git_log_p_m_output.txt
7+
"""
8+
9+
import collections
10+
import sys
11+
12+
13+
class CommitInfo:
14+
15+
__slots__ = (
16+
'commit', 'merge', 'author', 'date',
17+
'author_name')
18+
19+
def __init__(self):
20+
for slot in self.__slots__:
21+
setattr(self, slot, None)
22+
23+
24+
def set_derived_info_in_place(all_commit_info):
25+
# pylint: disable=missing-docstring
26+
email_list_by_name = collections.defaultdict(set)
27+
author_name_buffer = []
28+
for commit_info in all_commit_info:
29+
author_line = commit_info.author
30+
flds = author_line.split('<', 1)
31+
assert len(flds) == 2, author_line
32+
assert flds[1].endswith('>'), author_line
33+
author_name = flds[0].strip()
34+
email = flds[1][:-1]
35+
email_list_by_name[author_name].add(email)
36+
author_name_buffer.append(author_name)
37+
for commit_info, author_name in zip(all_commit_info, author_name_buffer):
38+
commit_info.author_name = author_name
39+
return email_list_by_name
40+
41+
42+
def sorted_by_counts(key_count_items, counts_top_down=True):
43+
csign = -1 if counts_top_down else 1
44+
return tuple(
45+
[(csign * scount, key) for scount, key in sorted(
46+
[(csign * count, key) for key, count in key_count_items])])
47+
48+
49+
def author_counts(all_commit_info, merge_commit_selector):
50+
assert isinstance(merge_commit_selector, bool)
51+
counts_by_author = collections.defaultdict(int)
52+
for commit_info in all_commit_info:
53+
if bool(commit_info.merge) is merge_commit_selector:
54+
counts_by_author[commit_info.author_name] += 1
55+
return sorted_by_counts(counts_by_author.items())
56+
57+
58+
def run(args):
59+
# pylint: disable=missing-docstring
60+
assert len(args) == 1, 'git log -p -m output'
61+
all_commit_info = []
62+
full_log_lines = open(args[0]).read().splitlines()
63+
line_iter = iter(full_log_lines)
64+
open_block = False
65+
for line in line_iter:
66+
if line.startswith('commit '):
67+
commit_info = CommitInfo()
68+
assert not open_block
69+
open_block = True
70+
for block_line in line_iter:
71+
if not block_line.strip():
72+
break
73+
flds = block_line.split(' ', 1)
74+
assert len(flds) == 2, block_line
75+
key = flds[0].lower()
76+
if key.endswith(':'):
77+
key = key[:-1]
78+
if not hasattr(commit_info, key):
79+
raise RuntimeError('Unexpected line after "commit": %s' % block_line)
80+
setattr(commit_info, key, flds[1])
81+
open_block = False
82+
all_commit_info.append(commit_info)
83+
assert not open_block
84+
set_derived_info_in_place(all_commit_info)
85+
for commit_kind in ['Merge ', '']:
86+
counts_by_author_name = author_counts(
87+
all_commit_info, merge_commit_selector=(commit_kind == 'Merge '))
88+
print('"Count","%sAuthor"' % commit_kind)
89+
for count, author_name in counts_by_author_name:
90+
print('%s,%s' % (count, author_name))
91+
print(',')
92+
93+
94+
if __name__ == '__main__':
95+
run(args=sys.argv[1:])

0 commit comments

Comments
 (0)