Skip to content

Commit a847f0b

Browse files
Update summarize_applications.py
1 parent 5a01c77 commit a847f0b

File tree

1 file changed

+66
-53
lines changed

1 file changed

+66
-53
lines changed

.github/scripts/summarize_applications.py

Lines changed: 66 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -4,71 +4,85 @@
44
from github import Github
55
from openpyxl import Workbook
66

7-
# Setup GitHub token and repo from environment
7+
# Load GitHub access credentials
88
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
99
GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY")
10+
11+
# Authenticate with GitHub
1012
g = Github(GITHUB_TOKEN)
1113
repo = g.get_repo(GITHUB_REPOSITORY)
1214

13-
print("📥 Fetching GitHub issues...")
14-
issues = list(repo.get_issues(state='open', labels=['ambassador']))
15-
print(f"🔍 Total issues fetched: {len(issues)}")
15+
print("📥 Fetching open GitHub issues with 'ambassador' label...")
16+
issues = repo.get_issues(state='open', labels=['ambassador'])
17+
18+
submissions = []
1619

17-
# Markdown extractor
20+
# Helper to extract plain-text responses
1821
def extract(label, body):
19-
match = re.search(rf"{re.escape(label)}\s*\n\s*(.+?)(\n|$)", body)
22+
match = re.search(rf"{label}\s*\n\s*(.+)", body)
2023
return match.group(1).strip() if match else ""
2124

22-
# Extracted field definitions
23-
submissions = []
25+
# Helper to extract checkbox options
26+
def extract_checkboxes(body):
27+
checkbox_section = re.findall(r"How has the nominee contributed to PyTorch\?\s*\n((?:- \[.\] .+\n?)+)", body)
28+
if not checkbox_section:
29+
return []
30+
return checkbox_section[0].strip().splitlines()
31+
32+
# Process each issue
2433
for issue in issues:
2534
body = issue.body or ""
26-
entry = {
35+
36+
name = extract("Nominee Name", body)
37+
email = extract("Nominee Email", body)
38+
github_handle = extract("Nominee's GitHub or GitLab Handle", body)
39+
ambassador_plan = extract("🏆 How Would the Nominee Contribute as an Ambassador?", body)
40+
additional_info = extract("Any additional details you'd like to share?", body)
41+
contributions = extract_checkboxes(body)
42+
43+
# Format submission summary
44+
submission_summary = f"""**GitHub Handle:** {github_handle or 'Not Provided'}
45+
46+
**How Has the Nominee Contributed to PyTorch?**
47+
{chr(10).join(contributions) if contributions else 'Not Provided'}
48+
49+
**Ambassador Contribution Plan**
50+
{ambassador_plan or 'Not Provided'}
51+
52+
**Additional Information**
53+
{additional_info or 'Not Provided'}
54+
"""
55+
56+
submissions.append({
2757
"Issue #": issue.number,
28-
"Nominee Name": extract("Nominee Name", body),
29-
"Nominee Email": extract("Nominee Email", body),
30-
"GitHub Handle": extract("Nominee's GitHub or GitLab Handle", body),
31-
"Organization": extract("(Optional) Organization / Affiliation", body),
32-
"Location": extract("City, State/Province, Country", body),
33-
"Nominator Name": extract("Your Name", body),
34-
"Nominator Email": extract("Your Email (Optional)", body),
35-
"Contribution Checkboxes": "; ".join(re.findall(r"- \[x\] (.+)", body, re.IGNORECASE)),
36-
"Ambassador Pitch": extract("🏆 How Would the Nominee Contribute as an Ambassador?", body),
37-
"Additional Info": extract("Any additional details you'd like to share?", body)
38-
}
39-
40-
# Construct clean submission summary
41-
summary = f"""Contributions:\n{entry['Contribution Checkboxes']}
42-
43-
Ambassador Nomination Statement:\n{entry['Ambassador Pitch']}
44-
45-
GitHub Handle:\n{entry['GitHub Handle']}
46-
47-
Additional Info:\n{entry['Additional Info']}"""
48-
entry["Submission Summary"] = summary
49-
submissions.append(entry)
50-
51-
# Deduplicate by nominee email (fallback to name)
52-
print("🧹 Deduplicating...")
53-
latest = {}
54-
for s in sorted(submissions, key=lambda x: x["Issue #"], reverse=True):
55-
key = (s["Nominee Email"] or s["Nominee Name"]).lower()
56-
if key not in latest:
57-
latest[key] = s
58-
deduped = list(latest.values())
59-
duplicates = [s for s in submissions if s not in deduped]
60-
61-
# Output folder
58+
"Nominee Name": name,
59+
"Nominee Email": email,
60+
"Submission Summary": submission_summary.strip()
61+
})
62+
63+
print(f"✅ Total submissions found: {len(submissions)}")
64+
65+
# Deduplicate by email (fallback to name)
66+
latest_by_email = {}
67+
for entry in sorted(submissions, key=lambda x: x["Issue #"], reverse=True):
68+
key = (entry["Nominee Email"] or entry["Nominee Name"]).lower()
69+
if key not in latest_by_email:
70+
latest_by_email[key] = entry
71+
72+
deduped = list(latest_by_email.values())
73+
duplicates = [entry for entry in submissions if entry not in deduped]
74+
75+
# Ensure output directory
6276
os.makedirs("ambassador", exist_ok=True)
6377

64-
# Save raw submissions
65-
with open("ambassador/submissions_all_raw.csv", "w", newline='', encoding='utf-8') as f:
78+
# Save all submissions
79+
with open("ambassador/submissions_all.csv", "w", newline='', encoding="utf-8") as f:
6680
writer = csv.DictWriter(f, fieldnames=submissions[0].keys())
6781
writer.writeheader()
6882
writer.writerows(submissions)
6983

70-
# Save deduplicated
71-
with open("ambassador/submissions_deduplicated.csv", "w", newline='', encoding='utf-8') as f:
84+
# Save deduplicated submissions
85+
with open("ambassador/submissions_deduped.csv", "w", newline='', encoding="utf-8") as f:
7286
writer = csv.DictWriter(f, fieldnames=deduped[0].keys())
7387
writer.writeheader()
7488
writer.writerows(deduped)
@@ -77,11 +91,10 @@ def extract(label, body):
7791
if duplicates:
7892
wb = Workbook()
7993
ws = wb.active
80-
ws.title = "Duplicates Removed"
81-
ws.append(list(duplicates[0].keys()))
94+
ws.title = "Duplicates"
95+
ws.append(duplicates[0].keys())
8296
for row in duplicates:
83-
ws.append([row.get(k, "") for k in duplicates[0].keys()])
84-
wb.save("ambassador/submissions_duplicates_removed.xlsx")
85-
print("📄 Duplicates saved to ambassador/submissions_duplicates_removed.xlsx")
97+
ws.append([row[k] for k in duplicates[0].keys()])
98+
wb.save("ambassador/submissions_duplicates.xlsx")
8699

87-
print("✅ Done: All submission data saved.")
100+
print("📁 Files written: submissions_all.csv, submissions_deduped.csv, submissions_duplicates.xlsx")

0 commit comments

Comments
 (0)