Skip to content

Commit ac9bb2e

Browse files
Update summarize_applications.py
1 parent 0bf873e commit ac9bb2e

File tree

1 file changed

+71
-56
lines changed

1 file changed

+71
-56
lines changed

.github/scripts/summarize_applications.py

Lines changed: 71 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -2,86 +2,101 @@
22
import csv
33
import re
44
from github import Github
5-
from openpyxl import Workbook
65

7-
# Get GitHub token and repository name
6+
# Load secrets
87
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
98
GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY")
109

1110
# Authenticate with GitHub
1211
g = Github(GITHUB_TOKEN)
1312
repo = g.get_repo(GITHUB_REPOSITORY)
1413

15-
print("📥 Fetching open GitHub issues labeled 'ambassador'...")
16-
issues = list(repo.get_issues(state='open', labels=['ambassador']))
17-
print(f"🔍 Total open issues fetched: {len(issues)}")
14+
print("🔍 Fetching ambassador issues (open only)...")
15+
issues = repo.get_issues(state="open", labels=["ambassador"])
1816

19-
# Helper to extract a field from the issue body
20-
def extract(label, body):
21-
match = re.search(rf"{re.escape(label)}\s*\n\s*(.+)", body)
17+
submissions = []
18+
19+
def extract_value(label, body):
20+
match = re.search(rf"{label}\s*\n\s*(.+?)(?:\n|$)", body)
2221
return match.group(1).strip() if match else ""
2322

24-
# Extract structured data from each issue
25-
submissions = []
23+
def extract_checkboxes(body):
24+
boxes = re.findall(r"- \[x\] (.+)", body, re.IGNORECASE)
25+
return "\n".join(f"- {b.strip()}" for b in boxes)
26+
2627
for issue in issues:
2728
body = issue.body or ""
28-
entry = {
29+
30+
nominee_name = extract_value("Nominee Name", body)
31+
nominee_email = extract_value("Nominee Email", body)
32+
github_handle = extract_value("Nominee's GitHub or GitLab Handle", body)
33+
organization = extract_value("Organization / Affiliation", body)
34+
location = extract_value("City, State/Province, Country", body)
35+
nominator_name = extract_value("Your Name", body)
36+
nominator_email = extract_value("Your Email", body)
37+
ambassador_pitch = extract_value("🏆 How Would the Nominee Contribute as an Ambassador?", body)
38+
additional_info = extract_value("Any additional details you'd like to share?", body)
39+
contributions = extract_checkboxes(body)
40+
41+
# Compose the Submission Summary
42+
summary_parts = []
43+
if github_handle:
44+
summary_parts.append(f"GitHub Handle: {github_handle}")
45+
if contributions:
46+
summary_parts.append(f"Contributions:\n{contributions}")
47+
if ambassador_pitch:
48+
summary_parts.append(f"Ambassador Pitch:\n{ambassador_pitch}")
49+
if additional_info:
50+
summary_parts.append(f"Additional Info:\n{additional_info}")
51+
52+
submission_summary = "\n\n".join(summary_parts)
53+
54+
submissions.append({
2955
"Issue #": issue.number,
30-
"Nominee Name": extract("Nominee Name", body),
31-
"Nominee Email": extract("Nominee Email", body),
32-
"GitHub Handle": extract("Nominee's GitHub or GitLab Handle", body),
33-
"Organization": extract("(Optional) Organization / Affiliation", body),
34-
"Location": extract("City, State/Province, Country", body),
35-
"Your Name": extract("Your Name", body),
36-
"Your Email": extract("Your Email (Optional)", body),
37-
"Submission Summary": "\n\n".join([
38-
f"Nominee Self/Nominated: {extract('Select one:', body)}",
39-
f"Requirements Acknowledged: {extract('Please confirm that the nominee meets the following requirements:', body)}",
40-
f"Contributions: {extract('How has the nominee contributed to PyTorch?', body)}",
41-
f"Ambassador Pitch: {extract('🏆 How Would the Nominee Contribute as an Ambassador?', body)}",
42-
f"Additional Info: {extract('Any additional details you\'d like to share?', body)}"
43-
])
44-
}
45-
submissions.append(entry)
46-
47-
print("🧹 Deduplicating by email or name...")
48-
49-
# Deduplication logic: keep latest (by issue #), use email if available
50-
latest_submissions = {}
56+
"Nominee Name": nominee_name,
57+
"Nominee Email": nominee_email,
58+
"Organization": organization,
59+
"Location": location,
60+
"Nominator Name": nominator_name,
61+
"Nominator Email": nominator_email,
62+
"Submission Summary": submission_summary
63+
})
64+
65+
print(f"📄 Total submissions found: {len(submissions)}")
66+
print("🧹 Deduplicating...")
67+
68+
# Deduplicate by email, fallback to name
69+
deduped = {}
5170
for entry in sorted(submissions, key=lambda x: x["Issue #"], reverse=True):
5271
key = entry["Nominee Email"].lower() if entry["Nominee Email"] else entry["Nominee Name"].lower()
53-
if key and key not in latest_submissions:
54-
latest_submissions[key] = entry
72+
if key not in deduped:
73+
deduped[key] = entry
5574

56-
deduped = list(latest_submissions.values())
57-
duplicates = [s for s in submissions if s not in deduped]
75+
deduped_list = list(deduped.values())
76+
duplicates = [s for s in submissions if s not in deduped_list]
5877

59-
# Ensure output folder
78+
# Save results
6079
os.makedirs("ambassador", exist_ok=True)
6180

62-
# Write raw submissions
63-
with open("ambassador/submissions_all_raw.csv", "w", newline='', encoding="utf-8") as f:
81+
with open("ambassador/submissions_all.csv", "w", newline='', encoding="utf-8") as f:
6482
writer = csv.DictWriter(f, fieldnames=submissions[0].keys())
6583
writer.writeheader()
6684
writer.writerows(submissions)
6785

68-
# Write deduplicated submissions
69-
with open("ambassador/submissions_deduplicated.csv", "w", newline='', encoding="utf-8") as f:
70-
writer = csv.DictWriter(f, fieldnames=deduped[0].keys())
86+
with open("ambassador/submissions_deduped.csv", "w", newline='', encoding="utf-8") as f:
87+
writer = csv.DictWriter(f, fieldnames=deduped_list[0].keys())
7188
writer.writeheader()
72-
writer.writerows(deduped)
89+
writer.writerows(deduped_list)
90+
91+
if duplicates:
92+
with open("ambassador/submissions_duplicates.csv", "w", newline='', encoding="utf-8") as f:
93+
writer = csv.DictWriter(f, fieldnames=duplicates[0].keys())
94+
writer.writeheader()
95+
writer.writerows(duplicates)
7396

74-
# Write duplicates to Excel if any
97+
print("✅ Extraction and deduplication complete.")
98+
print("📁 Files created in ambassador/:")
99+
print(" - submissions_all.csv")
100+
print(" - submissions_deduped.csv")
75101
if duplicates:
76-
wb = Workbook()
77-
ws = wb.active
78-
ws.title = "Duplicates Removed"
79-
ws.append(list(duplicates[0].keys()))
80-
for d in duplicates:
81-
ws.append([d.get(k, "") for k in ws[1]])
82-
wb.save("ambassador/submissions_duplicates_removed.xlsx")
83-
print("🗂️ Duplicates written to ambassador/submissions_duplicates_removed.xlsx")
84-
else:
85-
print("✅ No duplicates found.")
86-
87-
print("🎉 Done: Data extracted and files saved.")
102+
print(" - submissions_duplicates.csv")

0 commit comments

Comments
 (0)