Skip to content

Commit b294095

Browse files
Update extract_submissions.py
1 parent 7c827d0 commit b294095

File tree

1 file changed

+65
-100
lines changed

1 file changed

+65
-100
lines changed
Lines changed: 65 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -1,91 +1,68 @@
11
import os
2+
import re
23
import csv
34
import random
4-
from datetime import datetime
55
from collections import defaultdict
66
from github import Github
77
from openpyxl import Workbook
88
from openpyxl.styles import Alignment, Font
99
from openpyxl.utils import get_column_letter
1010
from openpyxl.worksheet.datavalidation import DataValidation
1111

12-
# Load GitHub issues
1312
print("📥 Fetching GitHub issues...")
14-
GITHUB_TOKEN = os.environ["GITHUB_TOKEN"]
15-
GITHUB_REPO = os.environ["GITHUB_REPOSITORY"]
16-
REPO = Github(GITHUB_TOKEN).get_repo(GITHUB_REPO)
1713

18-
issues = REPO.get_issues(state="all", labels=["closed"])
14+
# Environment setup
15+
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
16+
GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY")
17+
g = Github(GITHUB_TOKEN)
18+
repo = g.get_repo(GITHUB_REPOSITORY)
19+
20+
issues = list(repo.get_issues(state='all', labels=['ambassador']))
21+
print(f"🔍 Found {len(issues)} total issues")
22+
23+
# Helper to extract field from issue body
24+
def extract_field(label, body):
25+
match = re.search(rf"{label}\s*\n\s*(.+)", body)
26+
return match.group(1).strip() if match else ""
27+
28+
# Step 1: Parse issues
1929
submissions_raw = []
2030
for issue in issues:
21-
if not issue.body or "[Nomination]" not in issue.title:
22-
continue
31+
body = issue.body
2332
submission = {
2433
"Issue #": issue.number,
25-
"Nominee Name": "",
26-
"Nominee GitHub": "",
27-
"Nominee Email": "",
28-
"Organization": "",
29-
"Location": "",
30-
"Nominator Name": "",
31-
"Nominator Email": "",
32-
"Nominee Contributions": "",
33-
"Ambassador Pitch": "",
34-
"Additional Info": "",
35-
"Created At": issue.created_at.strftime("%Y-%m-%d %H:%M:%S")
34+
"Nominee Name": extract_field("Nominee Name", body),
35+
"Nominee Email": extract_field("Nominee Email", body),
36+
"GitHub Handle": extract_field("GitHub or GitLab Handle", body),
37+
"Organization": extract_field("Organization / Affiliation", body),
38+
"Location": extract_field("City, State/Province, Country", body),
39+
"Contributions": extract_field("How has the nominee contributed to PyTorch?", body),
40+
"Ambassador Pitch": extract_field("How Would the Nominee Contribute as an Ambassador?", body),
41+
"Extra Notes": extract_field("Any additional details you'd like to share?", body),
3642
}
37-
38-
# Extract fields
39-
lines = issue.body.splitlines()
40-
current_key = ""
41-
for line in lines:
42-
if "**Nominee Name**" in line:
43-
current_key = "Nominee Name"
44-
elif "**Nominee Email**" in line:
45-
current_key = "Nominee Email"
46-
elif "**GitHub or GitLab Handle**" in line:
47-
current_key = "Nominee GitHub"
48-
elif "**Organization / Affiliation**" in line:
49-
current_key = "Organization"
50-
elif "**City, State/Province, Country**" in line:
51-
current_key = "Location"
52-
elif "**Your Name**" in line:
53-
current_key = "Nominator Name"
54-
elif "**Your Email (Optional)**" in line:
55-
current_key = "Nominator Email"
56-
elif "**How has the nominee contributed**" in line:
57-
current_key = "Nominee Contributions"
58-
elif "**How Would the Nominee Contribute as an Ambassador?**" in line:
59-
current_key = "Ambassador Pitch"
60-
elif "**Any additional details you'd like to share?**" in line:
61-
current_key = "Additional Info"
62-
elif line.strip() and current_key:
63-
submission[current_key] += line.strip() + "\n"
64-
6543
submissions_raw.append(submission)
6644

67-
# Deduplicate by GitHub handle (latest entry kept)
68-
print("🧹 Deduplicating...")
69-
seen = {}
45+
# Step 2: Deduplicate by GitHub handle (keep latest by Issue #)
46+
submissions_by_handle = {}
7047
duplicates = []
71-
for s in sorted(submissions_raw, key=lambda x: x["Created At"]):
72-
key = s["Nominee GitHub"].strip().lower()
73-
if key in seen:
74-
duplicates.append(s)
48+
for s in sorted(submissions_raw, key=lambda x: x["Issue #"], reverse=True):
49+
handle = s.get("GitHub Handle", "").lower()
50+
if handle and handle not in submissions_by_handle:
51+
submissions_by_handle[handle] = s
7552
else:
76-
seen[key] = s
53+
duplicates.append(s)
7754

78-
submissions = list(seen.values())
55+
submissions = list(submissions_by_handle.values())
56+
print(f"🧹 Deduplicated to {len(submissions)} unique submissions")
7957

80-
# Save deduplicated CSV
58+
# Step 3: Write deduplicated CSV
8159
os.makedirs("ambassador", exist_ok=True)
82-
csv_path = "ambassador/ambassador_submissions_deduped.csv"
83-
with open(csv_path, "w", newline="", encoding="utf-8") as f:
84-
writer = csv.DictWriter(f, fieldnames=list(submissions[0].keys()))
60+
with open("ambassador/ambassador_submissions_deduped.csv", "w", newline='', encoding='utf-8') as f:
61+
writer = csv.DictWriter(f, fieldnames=submissions[0].keys())
8562
writer.writeheader()
8663
writer.writerows(submissions)
8764

88-
# Save duplicates separately
65+
# Step 4: Save duplicates separately
8966
if duplicates:
9067
dup_wb = Workbook()
9168
ws = dup_wb.active
@@ -94,8 +71,13 @@
9471
for d in duplicates:
9572
ws.append([d.get(k, "") for k in ws[1]])
9673
dup_wb.save("ambassador/duplicates_removed.xlsx")
74+
print("⚠️ Duplicates saved to ambassador/duplicates_removed.xlsx")
75+
76+
# Step 5: Generate Reviewer Sheets
77+
print("📊 Generating reviewer sheets...")
78+
79+
reviewers = [f"Reviewer {i}" for i in range(1, 8)]
9780

98-
# Rubric
9981
rubric = [
10082
("Technical Expertise", "Proficiency with the PyTorch Ecosystem", "Demonstrated knowledge and practical experience with PyTorch, including model building, traininga and deployment?"),
10183
("Technical Expertise", "Proficiency with the PyTorch Ecosystem", "Familiarity with foundation-hosted projects, vLLM, DeepSpeed?"),
@@ -121,12 +103,7 @@
121103
("Motivation and Vision", "Vision", "Proposed goals or initiatives that align with the mission of the PyTorch Foundation?")
122104
]
123105

124-
summary_categories = list(dict.fromkeys(cat for cat, _, _ in rubric))
125-
reviewers = [f"Reviewer {i}" for i in range(1, 8)]
126-
output_folder = "ambassador/reviewer_sheets_excel"
127-
os.makedirs(output_folder, exist_ok=True)
128-
129-
# Assign reviewers evenly
106+
summary_categories = list({cat for cat, _, _ in rubric})
130107
assignments = []
131108
reviewer_counts = defaultdict(int)
132109
for submission in submissions:
@@ -135,17 +112,18 @@
135112
reviewer_counts[reviewer] += 1
136113
assignments.append((submission, reviewer))
137114

138-
# Generate reviewer workbooks
115+
output_folder = "ambassador/reviewer_sheets_excel"
116+
os.makedirs(output_folder, exist_ok=True)
117+
139118
for reviewer in reviewers:
140119
wb = Workbook()
141120
ws = wb.active
142121
ws.title = "Review Sheet"
143122
summary_ws = wb.create_sheet("Score Summary")
144123

145-
headers = [
146-
"Submission ID", "First Name", "Last Name", "Submission Summary",
147-
"Reviewer's Comment", "Category", "Subcategory", "Question", "Score"
148-
]
124+
# Headers
125+
headers = ["Submission ID", "First Name", "Last Name", "Submission Summary",
126+
"Reviewer's Comment", "Category", "Subcategory", "Question", "Score"]
149127
ws.append(headers)
150128
for col in range(1, len(headers)+1):
151129
ws.cell(row=1, column=col).font = Font(bold=True)
@@ -164,18 +142,11 @@
164142
name = submission["Nominee Name"].split()
165143
fname = name[0]
166144
lname = name[-1] if len(name) > 1 else ""
145+
summary = f"""Contributions:\n{submission.get("Contributions", "")}
167146
168-
# Submission Summary includes all fields except first 3
169-
summary = f"""GitHub: {submission.get("Nominee GitHub", "")}
170-
Email: {submission.get("Nominee Email", "")}
171-
Organization: {submission.get("Organization", "")}
172-
Location: {submission.get("Location", "")}
173-
Nominator: {submission.get("Nominator Name", "")}
174-
Nominator Email: {submission.get("Nominator Email", "")}
175-
176-
Contributions:\n{submission.get("Nominee Contributions", "")}
177147
Ambassador Pitch:\n{submission.get("Ambassador Pitch", "")}
178-
Additional Info:\n{submission.get("Additional Info", "")}"""
148+
149+
Additional Notes:\n{submission.get("Extra Notes", "")}"""
179150

180151
start = row_idx
181152
for cat, subcat, question in rubric:
@@ -184,41 +155,35 @@
184155
end = row_idx - 1
185156
candidate_ranges.append((sid, fname, lname, start, end))
186157

187-
for col in [1, 2, 3, 4, 5]: # Merge ID, First, Last, Summary, Reviewer Comment
158+
for col in [1, 2, 3, 4]:
188159
ws.merge_cells(start_row=start, end_row=end, start_column=col, end_column=col)
189-
cell = ws.cell(row=start, column=col)
190-
cell.alignment = Alignment(vertical="top", wrap_text=True)
160+
ws.cell(row=start, column=col).alignment = Alignment(vertical="top", wrap_text=True)
191161

192162
for r in range(start, end + 1):
193163
dv.add(ws[f"I{r}"])
194164

195-
for col in ws.columns:
196-
max_len = max((len(str(cell.value)) if cell.value else 0) for cell in col)
197-
ws.column_dimensions[get_column_letter(col[0].column)].width = min(max_len + 5, 50)
198-
165+
# Score summary tab
199166
summary_ws.append(["Submission ID", "First Name", "Last Name"] + summary_categories + ["Final Score"])
200167
for col in range(1, summary_ws.max_column + 1):
201168
summary_ws.cell(row=1, column=col).font = Font(bold=True)
202169

203170
for sid, fname, lname, start, end in candidate_ranges:
204-
category_rows = defaultdict(list)
171+
cat_rows = defaultdict(list)
205172
for r in range(start, end + 1):
206173
cat = ws.cell(row=r, column=6).value
207-
category_rows[cat].append(r)
174+
cat_rows[cat].append(r)
208175

176+
row_num = summary_ws.max_row + 1
209177
formulas = []
210178
for cat in summary_categories:
211-
if cat in category_rows:
212-
rows = category_rows[cat]
179+
if cat in cat_rows:
180+
rows = cat_rows[cat]
213181
formulas.append(f'=SUMPRODUCT(--(\'Review Sheet\'!I{rows[0]}:I{rows[-1]}="Yes"))')
214182
else:
215183
formulas.append("0")
216-
217-
row_number = summary_ws.max_row + 1
218-
total_formula = f"=SUM({','.join([f'{get_column_letter(i+4)}{row_number}' for i in range(len(formulas))])})"
184+
total_formula = f"=SUM({','.join([f'{get_column_letter(i+4)}{row_num}' for i in range(len(formulas))])})"
219185
summary_ws.append([sid, fname, lname] + formulas + [total_formula])
220186

221-
filename = os.path.join(output_folder, f"{reviewer.replace(' ', '_').lower()}_sheet.xlsx")
222-
wb.save(filename)
187+
wb.save(os.path.join(output_folder, f"{reviewer.replace(' ', '_').lower()}_sheet.xlsx"))
223188

224-
print("✅ All reviewer sheets generated successfully.")
189+
print("✅ All reviewer sheets and summaries generated.")

0 commit comments

Comments
 (0)