Skip to content

Commit 7e1451c

Browse files
Update summarize_applications.py
1 parent 9e153a7 commit 7e1451c

File tree

1 file changed

+40
-44
lines changed

1 file changed

+40
-44
lines changed

.github/scripts/summarize_applications.py

Lines changed: 40 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -2,90 +2,86 @@
22
import csv
33
import re
44
from github import Github
5+
from openpyxl import Workbook
56

6-
# Step 0: Setup environment
7+
# Get GitHub token and repository name
78
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
89
GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY")
910

10-
# Step 1: Authenticate GitHub
11+
# Authenticate with GitHub
1112
g = Github(GITHUB_TOKEN)
1213
repo = g.get_repo(GITHUB_REPOSITORY)
1314

14-
print("📥 Fetching GitHub issues...")
15-
issues = list(repo.get_issues(state='all', labels=['ambassador']))
16-
print(f"🔍 Total issues fetched: {len(issues)}")
15+
print("📥 Fetching open GitHub issues labeled 'ambassador'...")
16+
issues = list(repo.get_issues(state='open', labels=['ambassador']))
17+
print(f"🔍 Total open issues fetched: {len(issues)}")
1718

18-
# Helper: Extract value from GitHub issue template body
19+
# Helper to extract a field from the issue body
1920
def extract(label, body):
20-
match = re.search(rf"{label}\s*\n\s*(.+)", body)
21+
match = re.search(rf"{re.escape(label)}\s*\n\s*(.+)", body)
2122
return match.group(1).strip() if match else ""
2223

23-
# Step 2: Extract submission data
24+
# Extract structured data from each issue
2425
submissions = []
2526
for issue in issues:
2627
body = issue.body or ""
2728
entry = {
2829
"Issue #": issue.number,
2930
"Nominee Name": extract("Nominee Name", body),
3031
"Nominee Email": extract("Nominee Email", body),
31-
"Organization": extract("Organization / Affiliation", body),
32+
"GitHub Handle": extract("Nominee's GitHub or GitLab Handle", body),
33+
"Organization": extract("(Optional) Organization / Affiliation", body),
3234
"Location": extract("City, State/Province, Country", body),
33-
"Contributions": extract("Relevant Contributions and Links", body),
34-
"Ambassador Pitch": extract("Why do you want to be a PyTorch Ambassador?", body),
35-
"Extra Notes": extract("Additional Notes or Comments", body),
36-
"Nominate Others": extract("I would like to nominate contributors", body),
37-
"Additional Info": extract("Any other information", body)
35+
"Your Name": extract("Your Name", body),
36+
"Your Email": extract("Your Email (Optional)", body),
37+
"Submission Summary": "\n\n".join([
38+
f"Nominee Self/Nominated: {extract('Select one:', body)}",
39+
f"Requirements Acknowledged: {extract('Please confirm that the nominee meets the following requirements:', body)}",
40+
f"Contributions: {extract('How has the nominee contributed to PyTorch?', body)}",
41+
f"Ambassador Pitch: {extract('🏆 How Would the Nominee Contribute as an Ambassador?', body)}",
42+
f"Additional Info: {extract('Any additional details you\'d like to share?', body)}"
43+
])
3844
}
3945
submissions.append(entry)
4046

41-
print("🧹 Deduplicating...")
47+
print("🧹 Deduplicating by email or name...")
4248

43-
# Step 3: Deduplicate — keep latest per email/name
49+
# Deduplication logic: keep latest (by issue #), use email if available
4450
latest_submissions = {}
45-
seen_keys = set()
46-
4751
for entry in sorted(submissions, key=lambda x: x["Issue #"], reverse=True):
4852
key = entry["Nominee Email"].lower() if entry["Nominee Email"] else entry["Nominee Name"].lower()
49-
if key not in latest_submissions:
53+
if key and key not in latest_submissions:
5054
latest_submissions[key] = entry
51-
seen_keys.add(key)
5255

5356
deduped = list(latest_submissions.values())
57+
duplicates = [s for s in submissions if s not in deduped]
5458

55-
# Step 4: Track duplicates
56-
duplicates = []
57-
seen_keys_copy = seen_keys.copy() # prevent modifying original while checking
58-
for entry in submissions:
59-
key = entry["Nominee Email"].lower() if entry["Nominee Email"] else entry["Nominee Name"].lower()
60-
if key in seen_keys_copy:
61-
seen_keys_copy.remove(key) # keep only the first seen (i.e., latest)
62-
else:
63-
duplicates.append(entry)
64-
65-
# Step 5: Ensure output directory exists
66-
output_dir = "ambassador/output_step1"
67-
os.makedirs(output_dir, exist_ok=True)
59+
# Ensure output folder
60+
os.makedirs("ambassador", exist_ok=True)
6861

69-
# Step 6: Write full submissions
70-
with open(os.path.join(output_dir, "ambassador_submissions_full.csv"), "w", newline='', encoding="utf-8") as f:
62+
# Write raw submissions
63+
with open("ambassador/submissions_all_raw.csv", "w", newline='', encoding="utf-8") as f:
7164
writer = csv.DictWriter(f, fieldnames=submissions[0].keys())
7265
writer.writeheader()
7366
writer.writerows(submissions)
7467

75-
# Step 7: Write deduplicated submissions
76-
with open(os.path.join(output_dir, "ambassador_submissions_deduped.csv"), "w", newline='', encoding="utf-8") as f:
68+
# Write deduplicated submissions
69+
with open("ambassador/submissions_deduplicated.csv", "w", newline='', encoding="utf-8") as f:
7770
writer = csv.DictWriter(f, fieldnames=deduped[0].keys())
7871
writer.writeheader()
7972
writer.writerows(deduped)
8073

81-
# Step 8: Write duplicates removed
74+
# Write duplicates to Excel if any
8275
if duplicates:
83-
with open(os.path.join(output_dir, "duplicates_removed.csv"), "w", newline='', encoding="utf-8") as f:
84-
writer = csv.DictWriter(f, fieldnames=duplicates[0].keys())
85-
writer.writeheader()
86-
writer.writerows(duplicates)
87-
print(f"🗂️ Duplicates written to {output_dir}/duplicates_removed.csv")
76+
wb = Workbook()
77+
ws = wb.active
78+
ws.title = "Duplicates Removed"
79+
ws.append(list(duplicates[0].keys()))
80+
for d in duplicates:
81+
ws.append([d.get(k, "") for k in ws[1]])
82+
wb.save("ambassador/submissions_duplicates_removed.xlsx")
83+
print("🗂️ Duplicates written to ambassador/submissions_duplicates_removed.xlsx")
8884
else:
8985
print("✅ No duplicates found.")
9086

91-
print("✅ Step 1 complete: Extraction + Deduplication done.")
87+
print("🎉 Done: Data extracted and files saved.")

0 commit comments

Comments
 (0)