Skip to content

Commit fbaaaaa

Browse files
committed
script: sync gitlog2changelog.py with upstream
Update gitlog2changelog.py script with recent changes made on it in its upstream repository (https://github.com/networkupstools/nut/, commit e4739b9). Keep our local adaptation: * TextWrapper specific configuration Signed-off-by: Xavier Delaruelle <[email protected]>
1 parent 126c83d commit fbaaaaa

File tree

1 file changed

+139
-22
lines changed

1 file changed

+139
-22
lines changed

script/gitlog2changelog.py.in

Lines changed: 139 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!@PYTHON@
22
# Copyright 2008 Marcus D. Hanwell <[email protected]>
33
# Minor changes for NUT by Charles Lepple
4+
# Subsequent maintenance for NUT by Jim Klimov (since 2021)
45
# Distributed under the terms of the GNU General Public License v2 or later
56

67
import re
@@ -9,6 +10,24 @@ from textwrap import TextWrapper
910
import sys
1011
import subprocess
1112

13+
# Python 3 compatibility hack
14+
try:
15+
try:
16+
import unicode
17+
except:
18+
# Maybe built-in?
19+
pass
20+
unicode('')
21+
except NameError as ex:
22+
#DEBUG# sys.stderr.write("Using 'str' as 'unicode': %s\n" % str(ex))
23+
#DEBUG# sys.stderr.flush()
24+
unicode = str
25+
26+
try:
27+
import unicodedata
28+
except:
29+
pass
30+
1231
rev_range = "HEAD"
1332

1433
if len(sys.argv) > 1:
@@ -53,11 +72,47 @@ except TypeError:
5372
fin_mode = 2
5473
fin_chop = 1
5574

56-
# Create a ChangeLog file in the current directory.
57-
if fin_mode == 3:
58-
fout = open("ChangeLog", "w", encoding="UTF-8")
75+
# Create a ChangeLog file in the current directory by default.
76+
CHANGELOG_FILE = "ChangeLog"
77+
try:
78+
# e.g. point from Makefile to a builddir (caller ensures it exists)
79+
if os.environ.get("CHANGELOG_FILE", None) is not None:
80+
CHANGELOG_FILE = os.environ.get("CHANGELOG_FILE")
81+
except Exception as ignored:
82+
pass
83+
84+
if CHANGELOG_FILE == "-":
85+
fout = sys.stdout
5986
else:
60-
fout = open("ChangeLog", "w")
87+
if fin_mode == 3:
88+
fout = open(CHANGELOG_FILE, "w", encoding="UTF-8")
89+
else:
90+
fout = open(CHANGELOG_FILE, "w")
91+
92+
# By default we collect information from a commit and output it as soon as
93+
# we have enough. Part of it is best-effort grouping of a series of commits
94+
# made by the same author on the same day, if they follow each other.
95+
# The alternative is to expend memory to collect all git log entries into a
96+
# dictionary first (key = date+author, value = list of entries) and only
97+
# print the output in the end of processing. This costs more resources, so
98+
# is not default behavior.
99+
requireGroupByDateAuthor = False
100+
try:
101+
tmpEnvVar = os.environ.get("CHANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR", None)
102+
if str(tmpEnvVar).lower() == "true":
103+
requireGroupByDateAuthor = True
104+
except Exception as ignored:
105+
pass
106+
107+
cachedContent = None
108+
if requireGroupByDateAuthor:
109+
try:
110+
from collections import defaultdict
111+
cachedContent = defaultdict(list)
112+
except Exception as x:
113+
print("Failed to init requireGroupByDateAuthor processing as defaultdict(list), trying simple dict(): " + str(x))
114+
requireGroupByDateAuthor = False
115+
cachedContent = dict()
61116

62117
# Set up the loop variables in order to locate the blocks we want
63118
authorFound = False
@@ -69,12 +124,16 @@ messageNL = False
69124
files = ""
70125
prevAuthorLine = ""
71126

72-
wrapper = TextWrapper(
73-
initial_indent=" ",
74-
subsequent_indent=" ",
75-
width=78,
76-
break_on_hyphens=False
77-
)
127+
# Legacy default: keep as is
128+
authorMustBeASCII = False
129+
authorMustBeASCII_inverse_setting = str(os.environ.get("WITH_PDF_NONASCII_TITLES", "")).upper()
130+
if authorMustBeASCII_inverse_setting in ["YES", "TRUE"]:
131+
authorMustBeASCII = False
132+
elif authorMustBeASCII_inverse_setting in ["NO", "FALSE"]:
133+
authorMustBeASCII = True
134+
135+
# See also: https://github.com/python/cpython/blob/main/Lib/textwrap.py
136+
wrapper = TextWrapper(initial_indent=" ", subsequent_indent=" ", width=78, break_on_hyphens=False)
78137

79138
# The main part of the loop
80139
for line in fin:
@@ -92,17 +151,33 @@ for line in fin:
92151
# Match the author line and extract the part we want
93152
# (Don't use startswith to allow Author override inside commit message.)
94153
elif "Author:" in line:
95-
authorList = re.split(": ", line, 1)
154+
if sys.version_info >= (3, 13, ):
155+
authorList = re.split(": ", line, maxsplit=1)
156+
else:
157+
authorList = re.split(": ", line, 1)
158+
96159
try:
97160
author = authorList[1]
98161
author = author[0 : len(author) - fin_chop]
162+
if authorMustBeASCII:
163+
try:
164+
if isinstance(author, str) and unicode != str:
165+
author = unicodedata.normalize(u'NFKD', unicode(author, "utf-8")).encode('ascii', 'ignore').decode('utf8')
166+
else:
167+
author = unicodedata.normalize(u'NFKD', author).encode('ascii', 'ignore').decode('utf8')
168+
except Exception as e:
169+
print("Could not unicodedata.normalize() author '%s': %s" % (author, str(e)))
99170
authorFound = True
100171
except:
101172
print("Could not parse authorList = '%s'" % (line))
102173

103174
# Match the date line
104175
elif line.startswith("Date:"):
105-
dateList = re.split(": ", line, 1)
176+
if sys.version_info >= (3, 13, ):
177+
dateList = re.split(": ", line, maxsplit=1)
178+
else:
179+
dateList = re.split(": ", line, 1)
180+
106181
try:
107182
date = dateList[1]
108183
date = date[0 : len(date) - fin_chop]
@@ -120,7 +195,14 @@ for line in fin:
120195
continue
121196
# Extract the actual commit message for this commit
122197
elif authorFound and dateFound and messageFound is False:
123-
# Find the commit message if we can
198+
# Find the commit message if we can (including the optional
199+
# details after the title and a blank line)
200+
# FIXME: Detect end of message by /^#/ to allow for longer essays
201+
# in the detailed comments part?
202+
# FIXME: Some such comments include asciidoc-ish markup, notably
203+
# bullet lists - do not concatenate those into one block but do
204+
# actually pass them as sub-lists (indented, and perhaps not
205+
# starting with an asterisk which we use for this document).
124206
if len(line) == fin_chop:
125207
if messageNL:
126208
messageFound = True
@@ -139,23 +221,39 @@ for line in fin:
139221
continue
140222
# Collect the files for this commit. FIXME: Still need to add +/- to files
141223
elif authorFound and dateFound and messageFound:
142-
fileList = re.split(r' \| ', line, 2)
224+
if sys.version_info >= (3, 13, ):
225+
fileList = re.split(r' \| ', line, maxsplit=2)
226+
else:
227+
fileList = re.split(r' \| ', line, 2)
228+
143229
if len(fileList) > 1:
144230
if len(files) > 0:
145231
files = files + ", " + fileList[0].strip()
146232
else:
147233
files = fileList[0].strip()
234+
148235
# All of the parts of the commit have been found - write out the entry
149236
if authorFound and dateFound and messageFound and filesFound:
150237
# First the author line, only outputted if it is the first for that
151-
# author on this day
238+
# author on this day.
239+
# WARNING: In case of git rebase commit shuffling, merges of dormant
240+
# branches, etc. we are not guaranteed to have all dates in the list
241+
# nicely ordered. In fact, the same date+author can be repeated if
242+
# there were commits with other metadata in git history between those
243+
# (e.g. many PRs from a few authors merged during one day). While we
244+
# could cache each section by authorLine and only output in the end,
245+
# it can require a lot of memory - so by default we do not.
152246
authorLine = date + " " + author
153-
if len(prevAuthorLine) == 0:
154-
fout.write(authorLine + "\n\n")
155-
elif authorLine == prevAuthorLine:
156-
pass
247+
if requireGroupByDateAuthor:
248+
if authorLine not in cachedContent:
249+
cachedContent[authorLine] = list()
157250
else:
158-
fout.write("\n" + authorLine + "\n\n")
251+
if len(prevAuthorLine) == 0:
252+
fout.write(authorLine + "\n\n")
253+
elif authorLine == prevAuthorLine:
254+
pass
255+
else:
256+
fout.write("\n" + authorLine + "\n\n")
159257

160258
# Assemble the actual commit message line(s) and limit the line length
161259
# to 80 characters.
@@ -177,8 +275,11 @@ for line in fin:
177275
else:
178276
commitLine = "* " + files + ": " + message
179277

180-
# Write out the commit line
181-
fout.write(wrapper.fill(commitLine) + "\n")
278+
if requireGroupByDateAuthor:
279+
cachedContent[authorLine].append(commitLine)
280+
else:
281+
# Write out the commit line, wrapped for length
282+
fout.write(wrapper.fill(commitLine) + "\n")
182283

183284
# Now reset all the variables ready for a new commit block.
184285
authorFound = False
@@ -190,6 +291,22 @@ for line in fin:
190291
files = ""
191292
prevAuthorLine = authorLine
192293

294+
if requireGroupByDateAuthor:
295+
# We did not print anything before, flush it out now;
296+
# most recent date first (alphanumerically reverse)
297+
counter = 0
298+
for authorLine in sorted(cachedContent, reverse=True):
299+
if counter == 0:
300+
fout.write(authorLine + "\n\n")
301+
else:
302+
fout.write("\n" + authorLine + "\n\n")
303+
304+
# Use original list append order
305+
for commitLine in cachedContent[authorLine]:
306+
fout.write(wrapper.fill(commitLine) + "\n")
307+
308+
counter = counter + 1
309+
193310
# Close the input and output lines now that we are finished.
194311
if fin_mode == 3:
195312
p.stdout.close()

0 commit comments

Comments
 (0)