Skip to content

Commit 141cede

Browse files
Create summarize_applications.py
1 parent ef7980a commit 141cede

File tree

1 file changed

+81
-0
lines changed

1 file changed

+81
-0
lines changed
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import os
2+
import csv
3+
import re
4+
from github import Github
5+
from openpyxl import Workbook
6+
7+
# Get GitHub token and repository name
8+
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
9+
GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY")
10+
11+
# Authenticate
12+
g = Github(GITHUB_TOKEN)
13+
repo = g.get_repo(GITHUB_REPOSITORY)
14+
15+
print("📥 Fetching GitHub issues...")
16+
issues = list(repo.get_issues(state='all', labels=['ambassador']))
17+
18+
print(f"🔍 Total issues fetched: {len(issues)}")
19+
20+
# Helper to extract a label's value from issue body
21+
def extract(label, body):
22+
match = re.search(rf"{label}\s*\n\s*(.+)", body)
23+
return match.group(1).strip() if match else ""
24+
25+
# Extract all relevant data
26+
submissions = []
27+
for issue in issues:
28+
body = issue.body or ""
29+
entry = {
30+
"Issue #": issue.number,
31+
"Nominee Name": extract("Nominee Name", body),
32+
"Nominee Email": extract("Nominee Email", body),
33+
"Organization": extract("Organization / Affiliation", body),
34+
"Location": extract("City, State/Province, Country", body),
35+
"Contributions": extract("Relevant Contributions and Links", body),
36+
"Ambassador Pitch": extract("Why do you want to be a PyTorch Ambassador?", body),
37+
"Extra Notes": extract("Additional Notes or Comments", body),
38+
"Nominate Others": extract("I would like to nominate contributors", body),
39+
"Additional Info": extract("Any other information", body)
40+
}
41+
submissions.append(entry)
42+
43+
print("🧹 Deduplicating...")
44+
45+
# Deduplication logic: use email if present, fallback to name
46+
latest_submissions = {}
47+
for entry in sorted(submissions, key=lambda x: x["Issue #"], reverse=True):
48+
key = entry["Nominee Email"].lower() if entry["Nominee Email"] else entry["Nominee Name"].lower()
49+
if key not in latest_submissions:
50+
latest_submissions[key] = entry
51+
52+
deduped = list(latest_submissions.values())
53+
duplicates = [s for s in submissions if s not in deduped]
54+
55+
# Ensure output folder
56+
os.makedirs("ambassador", exist_ok=True)
57+
58+
# Save full submission CSV
59+
with open("ambassador/ambassador_submissions_full.csv", "w", newline='', encoding="utf-8") as f:
60+
writer = csv.DictWriter(f, fieldnames=submissions[0].keys())
61+
writer.writeheader()
62+
writer.writerows(submissions)
63+
64+
# Save deduplicated CSV
65+
with open("ambassador/ambassador_submissions_deduped.csv", "w", newline='', encoding="utf-8") as f:
66+
writer = csv.DictWriter(f, fieldnames=deduped[0].keys())
67+
writer.writeheader()
68+
writer.writerows(deduped)
69+
70+
# Save duplicates to Excel
71+
if duplicates:
72+
wb = Workbook()
73+
ws = wb.active
74+
ws.title = "Duplicates Removed"
75+
ws.append(list(duplicates[0].keys()))
76+
for d in duplicates:
77+
ws.append([d.get(k, "") for k in ws[1]])
78+
wb.save("ambassador/duplicates_removed.xlsx")
79+
print(f"🗂️ Duplicates written to ambassador/duplicates_removed.xlsx")
80+
81+
print("✅ Step 1 complete: Extraction + Deduplication done.")

0 commit comments

Comments
 (0)