Skip to content

Commit cdc7e42

Browse files
committed
Add news entry parsing
This automatically parses news entries as part of the update process, to make changelogs easier to find for end users.
1 parent c968dae commit cdc7e42

File tree

1 file changed

+187
-4
lines changed

1 file changed

+187
-4
lines changed

update.py

Lines changed: 187 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import dataclasses
12
import io
3+
import itertools
24
import logging
35
import os
46
import pathlib
@@ -7,10 +9,12 @@
79
import subprocess
810
import tarfile
911
import tempfile
12+
import textwrap
1013
import typing
14+
from datetime import datetime, timezone
1115

1216
import click
13-
import parver
17+
import parver # type: ignore
1418
import requests
1519

1620
IANA_LATEST_LOCATION = "https://www.iana.org/time-zones/repository/tzdata-latest.tar.gz"
@@ -20,6 +24,11 @@
2024
PKG_BASE = REPO_ROOT / "src"
2125
TEMPLATES_DIR = REPO_ROOT / "templates"
2226

27+
SKIP_NEWS_HEADINGS = {
28+
"Changes to code",
29+
"Changes to build procedure",
30+
}
31+
2332

2433
def download_tzdb_tarballs(
2534
version: str, base_url: str = SOURCE, working_dir: pathlib.Path = WORKING_DIR
@@ -202,11 +211,181 @@ def translate_version(iana_version: str) -> str:
202211
return f"{version_year}.{patch_number:d}"
203212

204213

214+
##
215+
# News entry handling
216+
@dataclasses.dataclass
217+
class NewsEntry:
218+
version: str
219+
release_date: datetime
220+
categories: typing.Mapping[str, str]
221+
222+
def to_file(self) -> None:
223+
fpath = pathlib.Path("news.d") / (self.version + ".md")
224+
release_date = self.release_date.astimezone(timezone.utc)
225+
translated_version = translate_version(self.version)
226+
227+
contents = [f"# Version {translated_version}"]
228+
contents.append(
229+
f"Upstream version {self.version} release {release_date.isoformat()}"
230+
)
231+
contents.append("")
232+
233+
for category, entry in self.categories.items():
234+
contents.append(f"## {category}")
235+
contents.append("")
236+
contents.append(entry)
237+
contents.append("")
238+
239+
with open(fpath, "wt") as f:
240+
f.write(("\n".join(contents)).strip())
241+
242+
243+
INDENT_RE = re.compile("[^ ]")
244+
245+
246+
def get_indent(s: str) -> int:
247+
s = s.rstrip()
248+
if not s:
249+
return 0
250+
251+
m = INDENT_RE.search(s)
252+
assert m is not None
253+
return m.span()[0]
254+
255+
256+
def read_block(
257+
lines: typing.Iterator[str],
258+
) -> typing.Tuple[typing.Sequence[str], typing.Iterator[str]]:
259+
lines, peek = itertools.tee(lines)
260+
while not (first_line := next(peek)):
261+
next(lines)
262+
263+
block_indent = get_indent(first_line)
264+
block = []
265+
266+
# The way this loop works: `peek` is always one line ahead of `lines`. It
267+
# starts out where `lines` is pointing to the first non-empty line, and
268+
# peek is the line after that. We know that if the body of the loop is
269+
# reached, the next value in `lines` is part of the block.
270+
#
271+
# It is done this way so that we can return an iterable pointing at the
272+
# first line *after* the block that we just read.
273+
for line in peek:
274+
block.append(next(lines))
275+
276+
if not line:
277+
block.append(line)
278+
continue
279+
280+
line_indent = get_indent(line)
281+
if line_indent < block_indent:
282+
# We've dedented, so this is the end of the block.
283+
break
284+
else:
285+
# If we've exhausted `peek` because we're reading the last block in the
286+
# file, we won't hit the `break` condition, but we'll still have a
287+
# valid line in the `lines` queue.
288+
block.append(next(lines))
289+
290+
return block, lines
291+
292+
293+
def parse_categories(news_block: typing.Sequence[str]) -> typing.Mapping[str, str]:
294+
blocks = iter(news_block)
295+
296+
output = {}
297+
while True:
298+
try:
299+
while not (heading := next(blocks)):
300+
pass
301+
except StopIteration:
302+
break
303+
304+
content_lines, blocks = read_block(blocks)
305+
306+
heading = heading.strip()
307+
if heading in SKIP_NEWS_HEADINGS:
308+
continue
309+
310+
# Merge the contents into paragraphs by grouping into consecutive blocks
311+
# of non-empty lines, then joining those lines on a newline.
312+
content_paragraphs: typing.Iterable[str] = (
313+
"\n".join(paragraph)
314+
for _, paragraph in itertools.groupby(content_lines, key=bool)
315+
)
316+
317+
# Now dedent each paragraph and wrap it to 80 characters. This needs to
318+
# be done at the per-paragraph level, because `textwrap.wrap` doesn't
319+
# recognize paragraph breaks.
320+
content_paragraphs = map(textwrap.dedent, content_paragraphs)
321+
content_paragraphs = map(
322+
"\n".join,
323+
(textwrap.wrap(paragraph, width=80) for paragraph in content_paragraphs),
324+
)
325+
326+
# Finally we can join the paragraphs into a single string and trim
327+
# whitespace from it
328+
contents = "\n".join(content_paragraphs)
329+
contents = contents.strip()
330+
331+
output[heading] = contents
332+
333+
return output
334+
335+
336+
def read_news(tzdb_loc: pathlib.Path, version: str = None) -> NewsEntry:
337+
release_re = re.compile("^Release (?P<version>\d{4}[a-z]) - (?P<date>.*$)")
338+
with open(tzdb_loc / "NEWS", "rt") as f:
339+
f_lines = map(str.rstrip, f)
340+
for line in f_lines:
341+
if ((m := release_re.match(line)) is not None) and (
342+
version is None or m.group("version") == version
343+
):
344+
break
345+
else:
346+
if version is None:
347+
message = "No releases found!"
348+
else:
349+
message = f"No release found with version {version}"
350+
351+
assert m is not None
352+
version_date = datetime.strptime(m.group("date"), "%Y-%m-%d %H:%M:%S %z")
353+
release_version = m.group("version")
354+
release_contents, _ = read_block(f_lines)
355+
356+
# Now we further parse the contents of the news and filter out some
357+
# irrelevant categories.
358+
categories = parse_categories(release_contents)
359+
360+
return NewsEntry(release_version, version_date, categories)
361+
362+
363+
def update_news(news_entry: NewsEntry):
364+
# news.d contains fragments for each tzdata version, and the NEWS file
365+
# is assembled by stitching these together each time. First thing we'll do
366+
# is add a new fragment.
367+
news_entry.to_file()
368+
369+
# Now go through and join all the files together
370+
news_fragment_files = sorted(
371+
pathlib.Path("news.d").glob("*.md"), key=lambda p: p.name, reverse=True
372+
)
373+
374+
news_fragments = [p.read_text() for p in news_fragment_files]
375+
376+
with open("NEWS.md", "wt") as f:
377+
f.write("\n\n---\n\n".join(news_fragments))
378+
379+
205380
@click.command()
206381
@click.option(
207382
"--version", "-v", default=None, help="The version of the tzdata file to download"
208383
)
209-
def main(version: str):
384+
@click.option(
385+
"--news-only/--no-news-only",
386+
help="Flag to disable data updates and only update the news entry",
387+
)
388+
def main(version: str, news_only: bool):
210389
logging.basicConfig(level=logging.INFO)
211390

212391
if version is None:
@@ -215,9 +394,13 @@ def main(version: str):
215394
download_locations = download_tzdb_tarballs(version)
216395
tzdb_location = unpack_tzdb_tarballs(download_locations)
217396

218-
zonenames, zonefile_path = load_zonefiles(tzdb_location)
397+
# Update the news entry
398+
news_entry = read_news(tzdb_location, version=version)
399+
update_news(news_entry)
219400

220-
create_package(version, zonenames, zonefile_path)
401+
if not news_only:
402+
zonenames, zonefile_path = load_zonefiles(tzdb_location)
403+
create_package(version, zonenames, zonefile_path)
221404

222405

223406
if __name__ == "__main__":

0 commit comments

Comments
 (0)