Skip to content

Commit 7540fa5

Browse files
Update summarize_applications.py
1 parent 3a6e04f commit 7540fa5

File tree

1 file changed

+61
-76
lines changed

1 file changed

+61
-76
lines changed

.github/scripts/summarize_applications.py

Lines changed: 61 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -2,101 +2,86 @@
22
import csv
33
import re
44
from github import Github
5+
from openpyxl import Workbook
56

6-
# Load secrets
7+
# Setup GitHub token and repo from environment
78
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
89
GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY")
9-
10-
# Authenticate with GitHub
1110
g = Github(GITHUB_TOKEN)
1211
repo = g.get_repo(GITHUB_REPOSITORY)
1312

14-
print("🔍 Fetching ambassador issues (open only)...")
15-
issues = repo.get_issues(state="open", labels=["ambassador"])
16-
17-
submissions = []
13+
print("📥 Fetching GitHub issues...")
14+
issues = list(repo.get_issues(state='open', labels=['ambassador']))
15+
print(f"🔍 Total issues fetched: {len(issues)}")
1816

19-
def extract_value(label, body):
20-
match = re.search(rf"{label}\s*\n\s*(.+?)(?:\n|$)", body)
17+
# Markdown extractor
18+
def extract(label, body):
19+
match = re.search(rf"{re.escape(label)}\s*\n\s*(.+?)(\n|$)", body)
2120
return match.group(1).strip() if match else ""
2221

23-
def extract_checkboxes(body):
24-
boxes = re.findall(r"- \[x\] (.+)", body, re.IGNORECASE)
25-
return "\n".join(f"- {b.strip()}" for b in boxes)
26-
22+
# Extracted field definitions
23+
submissions = []
2724
for issue in issues:
2825
body = issue.body or ""
29-
30-
nominee_name = extract_value("Nominee Name", body)
31-
nominee_email = extract_value("Nominee Email", body)
32-
github_handle = extract_value("Nominee's GitHub or GitLab Handle", body)
33-
organization = extract_value("Organization / Affiliation", body)
34-
location = extract_value("City, State/Province, Country", body)
35-
nominator_name = extract_value("Your Name", body)
36-
nominator_email = extract_value("Your Email", body)
37-
ambassador_pitch = extract_value("🏆 How Would the Nominee Contribute as an Ambassador?", body)
38-
additional_info = extract_value("Any additional details you'd like to share?", body)
39-
contributions = extract_checkboxes(body)
40-
41-
# Compose the Submission Summary
42-
summary_parts = []
43-
if github_handle:
44-
summary_parts.append(f"GitHub Handle: {github_handle}")
45-
if contributions:
46-
summary_parts.append(f"Contributions:\n{contributions}")
47-
if ambassador_pitch:
48-
summary_parts.append(f"Ambassador Pitch:\n{ambassador_pitch}")
49-
if additional_info:
50-
summary_parts.append(f"Additional Info:\n{additional_info}")
51-
52-
submission_summary = "\n\n".join(summary_parts)
53-
54-
submissions.append({
26+
entry = {
5527
"Issue #": issue.number,
56-
"Nominee Name": nominee_name,
57-
"Nominee Email": nominee_email,
58-
"Organization": organization,
59-
"Location": location,
60-
"Nominator Name": nominator_name,
61-
"Nominator Email": nominator_email,
62-
"Submission Summary": submission_summary
63-
})
64-
65-
print(f"📄 Total submissions found: {len(submissions)}")
28+
"Nominee Name": extract("Nominee Name", body),
29+
"Nominee Email": extract("Nominee Email", body),
30+
"GitHub Handle": extract("Nominee's GitHub or GitLab Handle", body),
31+
"Organization": extract("(Optional) Organization / Affiliation", body),
32+
"Location": extract("City, State/Province, Country", body),
33+
"Nominator Name": extract("Your Name", body),
34+
"Nominator Email": extract("Your Email (Optional)", body),
35+
"Contribution Checkboxes": "; ".join(re.findall(r"- \[x\] (.+)", body, re.IGNORECASE)),
36+
"Ambassador Pitch": extract("🏆 How Would the Nominee Contribute as an Ambassador?", body),
37+
"Additional Info": extract("Any additional details you'd like to share?", body)
38+
}
39+
40+
# Construct clean submission summary
41+
summary = f"""Contributions:\n{entry['Contribution Checkboxes']}
42+
43+
Ambassador Nomination Statement:\n{entry['Ambassador Pitch']}
44+
45+
GitHub Handle:\n{entry['GitHub Handle']}
46+
47+
Additional Info:\n{entry['Additional Info']}"""
48+
entry["Submission Summary"] = summary
49+
submissions.append(entry)
50+
51+
# Deduplicate by nominee email (fallback to name)
6652
print("🧹 Deduplicating...")
67-
68-
# Deduplicate by email, fallback to name
69-
deduped = {}
70-
for entry in sorted(submissions, key=lambda x: x["Issue #"], reverse=True):
71-
key = entry["Nominee Email"].lower() if entry["Nominee Email"] else entry["Nominee Name"].lower()
72-
if key not in deduped:
73-
deduped[key] = entry
74-
75-
deduped_list = list(deduped.values())
76-
duplicates = [s for s in submissions if s not in deduped_list]
77-
78-
# Save results
53+
latest = {}
54+
for s in sorted(submissions, key=lambda x: x["Issue #"], reverse=True):
55+
key = (s["Nominee Email"] or s["Nominee Name"]).lower()
56+
if key not in latest:
57+
latest[key] = s
58+
deduped = list(latest.values())
59+
duplicates = [s for s in submissions if s not in deduped]
60+
61+
# Output folder
7962
os.makedirs("ambassador", exist_ok=True)
8063

81-
with open("ambassador/submissions_all.csv", "w", newline='', encoding="utf-8") as f:
64+
# Save raw submissions
65+
with open("ambassador/submissions_all_raw.csv", "w", newline='', encoding='utf-8') as f:
8266
writer = csv.DictWriter(f, fieldnames=submissions[0].keys())
8367
writer.writeheader()
8468
writer.writerows(submissions)
8569

86-
with open("ambassador/submissions_deduped.csv", "w", newline='', encoding="utf-8") as f:
87-
writer = csv.DictWriter(f, fieldnames=deduped_list[0].keys())
70+
# Save deduplicated
71+
with open("ambassador/submissions_deduplicated.csv", "w", newline='', encoding='utf-8') as f:
72+
writer = csv.DictWriter(f, fieldnames=deduped[0].keys())
8873
writer.writeheader()
89-
writer.writerows(deduped_list)
90-
91-
if duplicates:
92-
with open("ambassador/submissions_duplicates.csv", "w", newline='', encoding="utf-8") as f:
93-
writer = csv.DictWriter(f, fieldnames=duplicates[0].keys())
94-
writer.writeheader()
95-
writer.writerows(duplicates)
74+
writer.writerows(deduped)
9675

97-
print("✅ Extraction and deduplication complete.")
98-
print("📁 Files created in ambassador/:")
99-
print(" - submissions_all.csv")
100-
print(" - submissions_deduped.csv")
76+
# Save duplicates to Excel
10177
if duplicates:
102-
print(" - submissions_duplicates.csv")
78+
wb = Workbook()
79+
ws = wb.active
80+
ws.title = "Duplicates Removed"
81+
ws.append(list(duplicates[0].keys()))
82+
for row in duplicates:
83+
ws.append([row.get(k, "") for k in duplicates[0].keys()])
84+
wb.save("ambassador/submissions_duplicates_removed.xlsx")
85+
print("📄 Duplicates saved to ambassador/submissions_duplicates_removed.xlsx")
86+
87+
print("✅ Done: All submission data saved.")

0 commit comments

Comments
 (0)