|
| 1 | +import datetime |
| 2 | +import os |
| 3 | +import re |
| 4 | +from enum import StrEnum |
| 5 | + |
| 6 | +import frontmatter |
| 7 | +from git import Commit, Repo |
| 8 | + |
| 9 | +DEFAULT_CHANGELOG_PATH = "changelog/" |
| 10 | +DEFAULT_INITIAL_GIT_TAG_VERSION = "1.0.0" |
| 11 | +FILENAME_DATE_FORMAT = "%Y%m%d" |
| 12 | +FRONTMATTER_DATE_FORMAT = "%Y-%m-%d" |
| 13 | +MAX_TITLE_LENGTH = 50 |
| 14 | + |
| 15 | + |
| 16 | +class ChangeKind(StrEnum): |
| 17 | + PRELUDE = "prelude" |
| 18 | + BREAKING = "breaking" |
| 19 | + FEATURE = "feature" |
| 20 | + FIX = "fix" |
| 21 | + OTHER = "other" |
| 22 | + |
| 23 | + @staticmethod |
| 24 | + def from_str(kind_str: str) -> "ChangeKind": |
| 25 | + kind_str_lower = kind_str.lower() |
| 26 | + if kind_str_lower == str(ChangeKind.PRELUDE): |
| 27 | + return ChangeKind.PRELUDE |
| 28 | + if kind_str_lower == str(ChangeKind.BREAKING): |
| 29 | + return ChangeKind.BREAKING |
| 30 | + elif kind_str_lower == str(ChangeKind.FEATURE): |
| 31 | + return ChangeKind.FEATURE |
| 32 | + elif kind_str_lower == str(ChangeKind.FIX): |
| 33 | + return ChangeKind.FIX |
| 34 | + elif kind_str_lower == str(ChangeKind.OTHER): |
| 35 | + return ChangeKind.OTHER |
| 36 | + raise ValueError(f"unknown change kind: {kind_str}") |
| 37 | + |
| 38 | + |
| 39 | +class ChangeEntry: |
| 40 | + def __init__(self, date: datetime, kind: ChangeKind, title: str, contents: str): |
| 41 | + self.date = date |
| 42 | + self.kind = kind |
| 43 | + self.title = title |
| 44 | + self.contents = contents |
| 45 | + |
| 46 | + |
| 47 | +def get_changelog_entries( |
| 48 | + base_commit: Commit, |
| 49 | + repo: Repo, |
| 50 | + changelog_sub_path: str, |
| 51 | +) -> list[ChangeEntry]: |
| 52 | + changelog = [] |
| 53 | + |
| 54 | + # Compare base commit with current working tree |
| 55 | + diff_index = base_commit.diff(other=repo.head.commit, paths=changelog_sub_path) |
| 56 | + |
| 57 | + # No changes since the previous version |
| 58 | + if not diff_index: |
| 59 | + return changelog |
| 60 | + |
| 61 | + # Traverse added Diff objects only (change type 'A' for added files) |
| 62 | + # Because we are traversing back to the most recent version, we only care about files that were added since then |
| 63 | + # If a file was added in one commit and then modified in another, we only care about the final version of the file |
| 64 | + # Same for deletions - if file was added in one commit but then deleted in another, we don't want to include it |
| 65 | + # in the Release Notes |
| 66 | + for diff_item in diff_index.iter_change_type("A"): |
| 67 | + file_path = diff_item.b_path |
| 68 | + |
| 69 | + change_entry = extract_changelog_entry(repo.working_dir, file_path) |
| 70 | + changelog.append(change_entry) |
| 71 | + |
| 72 | + return changelog |
| 73 | + |
| 74 | + |
| 75 | +def extract_changelog_entry(working_dir: str, file_path: str) -> ChangeEntry: |
| 76 | + file_name = os.path.basename(file_path) |
| 77 | + date, kind = extract_date_and_kind_from_file_name(file_name) |
| 78 | + |
| 79 | + abs_file_path = os.path.join(working_dir, file_path) |
| 80 | + with open(abs_file_path, "r") as file: |
| 81 | + file_content = file.read() |
| 82 | + |
| 83 | + change_entry = extract_changelog_entry_from_contents(file_content) |
| 84 | + |
| 85 | + if change_entry.date != date: |
| 86 | + raise Exception( |
| 87 | + f"{file_name} - date in front matter '{change_entry.date}' does not match date extracted from file name '{date}'" |
| 88 | + ) |
| 89 | + |
| 90 | + if change_entry.kind != kind: |
| 91 | + raise Exception( |
| 92 | + f"{file_name} - kind in front matter '{change_entry.kind}' does not match kind extracted from file name '{kind}'" |
| 93 | + ) |
| 94 | + |
| 95 | + return change_entry |
| 96 | + |
| 97 | + |
| 98 | +def extract_date_and_kind_from_file_name(file_name: str) -> (datetime, ChangeKind): |
| 99 | + match = re.match(r"(\d{8})_([a-zA-Z]+)_(.+)\.md", file_name) |
| 100 | + if not match: |
| 101 | + raise Exception(f"{file_name} - doesn't match expected pattern") |
| 102 | + |
| 103 | + date_str, kind_str, _ = match.groups() |
| 104 | + try: |
| 105 | + date = parse_change_date(date_str, FILENAME_DATE_FORMAT) |
| 106 | + except Exception as e: |
| 107 | + raise Exception(f"{file_name} - {e}") |
| 108 | + |
| 109 | + try: |
| 110 | + kind = ChangeKind.from_str(kind_str) |
| 111 | + except Exception as e: |
| 112 | + raise Exception(f"{file_name} - {e}") |
| 113 | + |
| 114 | + return date, kind |
| 115 | + |
| 116 | + |
| 117 | +def parse_change_date(date_str: str, date_format: str) -> datetime: |
| 118 | + try: |
| 119 | + date = datetime.datetime.strptime(date_str, date_format).date() |
| 120 | + except Exception: |
| 121 | + raise Exception(f"date '{date_str}' is not in the expected format {date_format}") |
| 122 | + |
| 123 | + return date |
| 124 | + |
| 125 | + |
| 126 | +def extract_changelog_entry_from_contents(file_contents: str) -> ChangeEntry: |
| 127 | + data = frontmatter.loads(file_contents) |
| 128 | + |
| 129 | + kind = ChangeKind.from_str(str(data["kind"])) |
| 130 | + date = parse_change_date(str(data["date"]), FRONTMATTER_DATE_FORMAT) |
| 131 | + ## Add newline to contents so the Markdown file also contains a newline at the end |
| 132 | + contents = data.content + "\n" |
| 133 | + |
| 134 | + return ChangeEntry(date=date, title=str(data["title"]), kind=kind, contents=contents) |
| 135 | + |
| 136 | + |
| 137 | +def get_changelog_filename(title: str, kind: ChangeKind, date: datetime) -> str: |
| 138 | + sanitized_title = sanitize_title(title) |
| 139 | + filename_date = datetime.datetime.strftime(date, FILENAME_DATE_FORMAT) |
| 140 | + |
| 141 | + return f"{filename_date}_{kind}_{sanitized_title}.md" |
| 142 | + |
| 143 | + |
| 144 | +def sanitize_title(title: str) -> str: |
| 145 | + # Only keep alphanumeric characters, dashes, underscores and spaces |
| 146 | + regex = re.compile("[^a-zA-Z0-9-_ ]+") |
| 147 | + title = regex.sub("", title) |
| 148 | + |
| 149 | + # Replace multiple dashes, underscores and spaces with underscores |
| 150 | + regex_underscore = re.compile("[-_ ]+") |
| 151 | + title = regex_underscore.sub(" ", title).strip() |
| 152 | + |
| 153 | + # Lowercase and split by space |
| 154 | + words = [word.lower() for word in title.split(" ")] |
| 155 | + |
| 156 | + result = words[0] |
| 157 | + |
| 158 | + for word in words[1:]: |
| 159 | + if len(result) + len("_") + len(word) <= MAX_TITLE_LENGTH: |
| 160 | + result = result + "_" + word |
| 161 | + else: |
| 162 | + break |
| 163 | + |
| 164 | + return result |
0 commit comments