1+ import dataclasses
12import io
3+ import itertools
24import logging
35import os
46import pathlib
79import subprocess
810import tarfile
911import tempfile
12+ import textwrap
1013import typing
14+ from datetime import datetime , timezone
1115
1216import click
13- import parver
17+ import parver # type: ignore
1418import requests
1519
1620IANA_LATEST_LOCATION = "https://www.iana.org/time-zones/repository/tzdata-latest.tar.gz"
2024PKG_BASE = REPO_ROOT / "src"
2125TEMPLATES_DIR = REPO_ROOT / "templates"
2226
27+ SKIP_NEWS_HEADINGS = {
28+ "Changes to code" ,
29+ "Changes to build procedure" ,
30+ }
31+
2332
2433def download_tzdb_tarballs (
2534 version : str , base_url : str = SOURCE , working_dir : pathlib .Path = WORKING_DIR
@@ -202,11 +211,181 @@ def translate_version(iana_version: str) -> str:
202211 return f"{ version_year } .{ patch_number :d} "
203212
204213
214+ ##
215+ # News entry handling
216+ @dataclasses .dataclass
217+ class NewsEntry :
218+ version : str
219+ release_date : datetime
220+ categories : typing .Mapping [str , str ]
221+
222+ def to_file (self ) -> None :
223+ fpath = pathlib .Path ("news.d" ) / (self .version + ".md" )
224+ release_date = self .release_date .astimezone (timezone .utc )
225+ translated_version = translate_version (self .version )
226+
227+ contents = [f"# Version { translated_version } " ]
228+ contents .append (
229+ f"Upstream version { self .version } release { release_date .isoformat ()} "
230+ )
231+ contents .append ("" )
232+
233+ for category , entry in self .categories .items ():
234+ contents .append (f"## { category } " )
235+ contents .append ("" )
236+ contents .append (entry )
237+ contents .append ("" )
238+
239+ with open (fpath , "wt" ) as f :
240+ f .write (("\n " .join (contents )).strip ())
241+
242+
243+ INDENT_RE = re .compile ("[^ ]" )
244+
245+
246+ def get_indent (s : str ) -> int :
247+ s = s .rstrip ()
248+ if not s :
249+ return 0
250+
251+ m = INDENT_RE .search (s )
252+ assert m is not None
253+ return m .span ()[0 ]
254+
255+
256+ def read_block (
257+ lines : typing .Iterator [str ],
258+ ) -> typing .Tuple [typing .Sequence [str ], typing .Iterator [str ]]:
259+ lines , peek = itertools .tee (lines )
260+ while not (first_line := next (peek )):
261+ next (lines )
262+
263+ block_indent = get_indent (first_line )
264+ block = []
265+
266+ # The way this loop works: `peek` is always one line ahead of `lines`. It
267+ # starts out where `lines` is pointing to the first non-empty line, and
268+ # peek is the line after that. We know that if the body of the loop is
269+ # reached, the next value in `lines` is part of the block.
270+ #
271+ # It is done this way so that we can return an iterable pointing at the
272+ # first line *after* the block that we just read.
273+ for line in peek :
274+ block .append (next (lines ))
275+
276+ if not line :
277+ block .append (line )
278+ continue
279+
280+ line_indent = get_indent (line )
281+ if line_indent < block_indent :
282+ # We've dedented, so this is the end of the block.
283+ break
284+ else :
285+ # If we've exhausted `peek` because we're reading the last block in the
286+ # file, we won't hit the `break` condition, but we'll still have a
287+ # valid line in the `lines` queue.
288+ block .append (next (lines ))
289+
290+ return block , lines
291+
292+
293+ def parse_categories (news_block : typing .Sequence [str ]) -> typing .Mapping [str , str ]:
294+ blocks = iter (news_block )
295+
296+ output = {}
297+ while True :
298+ try :
299+ while not (heading := next (blocks )):
300+ pass
301+ except StopIteration :
302+ break
303+
304+ content_lines , blocks = read_block (blocks )
305+
306+ heading = heading .strip ()
307+ if heading in SKIP_NEWS_HEADINGS :
308+ continue
309+
310+ # Merge the contents into paragraphs by grouping into consecutive blocks
311+ # of non-empty lines, then joining those lines on a newline.
312+ content_paragraphs : typing .Iterable [str ] = (
313+ "\n " .join (paragraph )
314+ for _ , paragraph in itertools .groupby (content_lines , key = bool )
315+ )
316+
317+ # Now dedent each paragraph and wrap it to 80 characters. This needs to
318+ # be done at the per-paragraph level, because `textwrap.wrap` doesn't
319+ # recognize paragraph breaks.
320+ content_paragraphs = map (textwrap .dedent , content_paragraphs )
321+ content_paragraphs = map (
322+ "\n " .join ,
323+ (textwrap .wrap (paragraph , width = 80 ) for paragraph in content_paragraphs ),
324+ )
325+
326+ # Finally we can join the paragraphs into a single string and trim
327+ # whitespace from it
328+ contents = "\n " .join (content_paragraphs )
329+ contents = contents .strip ()
330+
331+ output [heading ] = contents
332+
333+ return output
334+
335+
336+ def read_news (tzdb_loc : pathlib .Path , version : str = None ) -> NewsEntry :
337+ release_re = re .compile ("^Release (?P<version>\d{4}[a-z]) - (?P<date>.*$)" )
338+ with open (tzdb_loc / "NEWS" , "rt" ) as f :
339+ f_lines = map (str .rstrip , f )
340+ for line in f_lines :
341+ if ((m := release_re .match (line )) is not None ) and (
342+ version is None or m .group ("version" ) == version
343+ ):
344+ break
345+ else :
346+ if version is None :
347+ message = "No releases found!"
348+ else :
349+ message = f"No release found with version { version } "
350+
351+ assert m is not None
352+ version_date = datetime .strptime (m .group ("date" ), "%Y-%m-%d %H:%M:%S %z" )
353+ release_version = m .group ("version" )
354+ release_contents , _ = read_block (f_lines )
355+
356+ # Now we further parse the contents of the news and filter out some
357+ # irrelevant categories.
358+ categories = parse_categories (release_contents )
359+
360+ return NewsEntry (release_version , version_date , categories )
361+
362+
363+ def update_news (news_entry : NewsEntry ):
364+ # news.d contains fragments for each tzdata version, and the NEWS file
365+ # is assembled by stitching these together each time. First thing we'll do
366+ # is add a new fragment.
367+ news_entry .to_file ()
368+
369+ # Now go through and join all the files together
370+ news_fragment_files = sorted (
371+ pathlib .Path ("news.d" ).glob ("*.md" ), key = lambda p : p .name , reverse = True
372+ )
373+
374+ news_fragments = [p .read_text () for p in news_fragment_files ]
375+
376+ with open ("NEWS.md" , "wt" ) as f :
377+ f .write ("\n \n ---\n \n " .join (news_fragments ))
378+
379+
205380@click .command ()
206381@click .option (
207382 "--version" , "-v" , default = None , help = "The version of the tzdata file to download"
208383)
209- def main (version : str ):
384+ @click .option (
385+ "--news-only/--no-news-only" ,
386+ help = "Flag to disable data updates and only update the news entry" ,
387+ )
388+ def main (version : str , news_only : bool ):
210389 logging .basicConfig (level = logging .INFO )
211390
212391 if version is None :
@@ -215,9 +394,13 @@ def main(version: str):
215394 download_locations = download_tzdb_tarballs (version )
216395 tzdb_location = unpack_tzdb_tarballs (download_locations )
217396
218- zonenames , zonefile_path = load_zonefiles (tzdb_location )
397+ # Update the news entry
398+ news_entry = read_news (tzdb_location , version = version )
399+ update_news (news_entry )
219400
220- create_package (version , zonenames , zonefile_path )
401+ if not news_only :
402+ zonenames , zonefile_path = load_zonefiles (tzdb_location )
403+ create_package (version , zonenames , zonefile_path )
221404
222405
223406if __name__ == "__main__" :
0 commit comments