Skip to content

Commit 8344588

Browse files
committed
feat: parse git history
1 parent 670566e commit 8344588

File tree

2 files changed

+282
-0
lines changed

2 files changed

+282
-0
lines changed

pixi.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ merge = 'python ./utils/main.py'
1313
serve = "bundler exec jekyll serve"
1414
profile = "bundler exec jekyll build --profile"
1515
newsletter = "python ./utils/newsletter.py"
16+
git = "python utils/git_parser.py"
1617

1718
[dependencies]
1819
python = ">=3.12.3,<3.13"

utils/git_parser.py

Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
import argparse
2+
import re
3+
import shutil
4+
import subprocess # noqa: S404
5+
from collections import defaultdict
6+
from dataclasses import dataclass
7+
from datetime import datetime
8+
from datetime import timedelta
9+
from datetime import timezone
10+
from urllib.parse import quote
11+
12+
13+
@dataclass
14+
class ConventionalCommit:
15+
"""A structured representation of a conventional commit.
16+
17+
Parameters
18+
----------
19+
hash : str
20+
The Git commit hash identifier
21+
prefix : str
22+
The conventional commit prefix (e.g., 'cfp', 'conf')
23+
message : str
24+
The commit message content without the prefix
25+
author : str
26+
The commit author's name
27+
date : datetime
28+
The commit timestamp
29+
30+
Methods
31+
-------
32+
generate_url
33+
Generates a formatted URL for the conference entry
34+
to_markdown
35+
Converts the commit to a markdown-formatted string
36+
"""
37+
38+
hash: str
39+
prefix: str
40+
message: str
41+
author: str
42+
date: datetime
43+
44+
def generate_url(self) -> str:
45+
"""Generate a formatted URL for the conference entry.
46+
47+
Returns
48+
-------
49+
str
50+
Formatted URL with sanitized conference title
51+
52+
Notes
53+
-----
54+
Implements URL sanitization for conference titles
55+
"""
56+
# Basic URL-safe transformation of the message
57+
sanitized = quote(self.message.lower().replace(" ", "-"))
58+
return f"https://pythondeadlin.es/conference/{sanitized}"
59+
60+
def to_markdown(self) -> str:
61+
"""Convert the commit to a markdown-formatted string.
62+
63+
Returns
64+
-------
65+
str
66+
Markdown-formatted commit representation
67+
68+
Notes
69+
-----
70+
Formats the entry with date, message, and URL
71+
"""
72+
date_str = self.date.strftime("%Y-%m-%d")
73+
return f"- [{date_str}] [{self.message}]({self.generate_url()})"
74+
75+
76+
class GitCommitParser:
77+
"""Analyzes git repository history for conference-related commits.
78+
79+
Parameters
80+
----------
81+
repo_path : str, optional
82+
Path to the git repository, by default "."
83+
prefixes : List[str] | None, optional
84+
List of commit prefixes to search for, by default ["cfp", "conf"]
85+
days : int, optional
86+
Number of days to look back in history, by default None
87+
"""
88+
89+
def __init__(self, repo_path: str = ".", prefixes: list[str] | None = None, days: int | None = None):
90+
self.repo_path = repo_path
91+
self.git_path = shutil.which("git")
92+
self.prefixes = prefixes or ["cfp", "conf"]
93+
self.days = days
94+
self._prefix_pattern = re.compile(rf'^({"|".join(map(re.escape, self.prefixes))}):\s*(.+)$', re.IGNORECASE)
95+
if not self.git_path:
96+
raise RuntimeError("Git executable not found in PATH")
97+
98+
def _execute_git_command(self, command: list[str]) -> str:
99+
"""Implementation remains unchanged."""
100+
# Validate input commands against allowed list
101+
allowed_commands = {
102+
"log",
103+
"show",
104+
"diff",
105+
"status",
106+
"rev-parse",
107+
"--format",
108+
"--pretty",
109+
"--no-merges",
110+
"--name-only",
111+
"HEAD",
112+
"origin",
113+
"--abbrev-ref",
114+
# Add other allowed commands as needed
115+
}
116+
117+
if not all(cmd.split("=")[0] in allowed_commands for cmd in command):
118+
raise ValueError("Invalid or unauthorized git command")
119+
120+
try:
121+
result = subprocess.run(
122+
[self.git_path, *command], # noqa: S603
123+
cwd=self.repo_path,
124+
capture_output=True,
125+
text=True,
126+
check=True,
127+
)
128+
return result.stdout.strip()
129+
except subprocess.CalledProcessError as e:
130+
print(f"Error executing git command: {e}")
131+
raise
132+
133+
def parse_commit_message(
134+
self,
135+
commit_hash: str,
136+
message: str,
137+
author: str,
138+
date_str: str,
139+
) -> ConventionalCommit | None:
140+
"""Implementation remains unchanged."""
141+
match = self._prefix_pattern.match(message.strip())
142+
if not match:
143+
return None
144+
145+
prefix, content = match.groups()
146+
date = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S %z")
147+
148+
return ConventionalCommit(hash=commit_hash, prefix=prefix.lower(), message=content, author=author, date=date)
149+
150+
def get_conventional_commits(self) -> list[ConventionalCommit]:
151+
"""Implementation remains unchanged."""
152+
git_log_format = "--pretty=format:%H%n%s%n%an%n%ai"
153+
git_command = ["log", git_log_format]
154+
155+
if self.days is not None:
156+
since_date = datetime.now(timezone.utc) - timedelta(days=self.days)
157+
git_command.extend(["--since", since_date.strftime("%Y-%m-%d")])
158+
159+
log_output = self._execute_git_command(git_command)
160+
161+
commits = []
162+
current_commit = []
163+
164+
for line in log_output.split("\n"):
165+
if line:
166+
current_commit.append(line)
167+
168+
if len(current_commit) == 4:
169+
commit = self.parse_commit_message(*current_commit)
170+
if commit:
171+
commits.append(commit)
172+
current_commit = []
173+
174+
return commits
175+
176+
def _generate_link_list(self, commits: list[ConventionalCommit]) -> str:
177+
"""Generate a comma-separated list of markdown-formatted links.
178+
179+
Parameters
180+
----------
181+
commits : List[ConventionalCommit]
182+
List of commits to format
183+
184+
Returns
185+
-------
186+
str
187+
Formatted link list
188+
189+
Notes
190+
-----
191+
Implements Oxford comma formatting for lists with more than two items
192+
"""
193+
if not commits:
194+
return ""
195+
196+
links = [f"[{commit.message}]({commit.generate_url()})" for commit in commits]
197+
198+
if len(links) == 1:
199+
return links[0]
200+
if len(links) == 2:
201+
return f"{links[0]} and {links[1]}"
202+
return f"{', '.join(links[:-1])}, and {links[-1]}"
203+
204+
def generate_markdown_report(self) -> str:
205+
"""Generate a markdown-formatted report of commits grouped by type.
206+
207+
Returns
208+
-------
209+
str
210+
Complete markdown-formatted report
211+
212+
Notes
213+
-----
214+
Organizes commits by type (cfp/conf) with chronological ordering and
215+
includes a comprehensive summary sentence
216+
"""
217+
commits = self.get_conventional_commits()
218+
grouped_commits: dict[str, list[ConventionalCommit]] = defaultdict(list)
219+
220+
for commit in commits:
221+
grouped_commits[commit.prefix].append(commit)
222+
223+
# Sort commits within each group by date
224+
for commits_list in grouped_commits.values():
225+
commits_list.sort(key=lambda x: x.date, reverse=True)
226+
227+
# Generate markdown sections
228+
sections = []
229+
230+
if grouped_commits.get("cfp"):
231+
sections.extend(
232+
["## Call for Papers", "", *[commit.to_markdown() for commit in grouped_commits["cfp"]], ""],
233+
)
234+
235+
if grouped_commits.get("conf"):
236+
sections.extend(["## Conferences", "", *[commit.to_markdown() for commit in grouped_commits["conf"]], ""])
237+
238+
# Generate summary sentence
239+
conf_links = self._generate_link_list(grouped_commits.get("conf", []))
240+
cfp_links = self._generate_link_list(grouped_commits.get("cfp", []))
241+
242+
summary_parts = []
243+
if conf_links:
244+
summary_parts.append(f"these conferences {conf_links}")
245+
if cfp_links:
246+
summary_parts.append(f"new CFPs for {cfp_links}")
247+
248+
if summary_parts:
249+
sections.extend(["## Summary", "", f"I found {' and '.join(summary_parts)}.", ""])
250+
251+
return "\n".join(sections)
252+
253+
254+
def parse_arguments() -> argparse.Namespace:
255+
"""Implementation remains unchanged."""
256+
parser = argparse.ArgumentParser(description="Parse git history for conference-related commits")
257+
parser.add_argument("--days", type=int, default=15, help="Number of days to look back in history")
258+
parser.add_argument("--repo", default=".", help="Path to the git repository (default: current directory)")
259+
return parser.parse_args()
260+
261+
262+
def main():
263+
"""Main execution function with markdown report generation.
264+
265+
Notes
266+
-----
267+
Generates and displays a markdown-formatted report of commits
268+
"""
269+
args = parse_arguments()
270+
parser = GitCommitParser(repo_path=args.repo, days=args.days)
271+
272+
try:
273+
markdown_report = parser.generate_markdown_report()
274+
print(markdown_report)
275+
276+
except subprocess.CalledProcessError:
277+
print("Error: Failed to analyze git repository. Please ensure you're in a valid git repository.")
278+
279+
280+
if __name__ == "__main__":
281+
main()

0 commit comments

Comments
 (0)