|
1 | 1 | import os |
| 2 | +import re |
2 | 3 | import csv |
3 | 4 | import random |
4 | | -from datetime import datetime |
5 | 5 | from collections import defaultdict |
6 | 6 | from github import Github |
7 | 7 | from openpyxl import Workbook |
8 | 8 | from openpyxl.styles import Alignment, Font |
9 | 9 | from openpyxl.utils import get_column_letter |
10 | 10 | from openpyxl.worksheet.datavalidation import DataValidation |
11 | 11 |
|
12 | | -# Load GitHub issues |
13 | 12 | print("📥 Fetching GitHub issues...") |
14 | | -GITHUB_TOKEN = os.environ["GITHUB_TOKEN"] |
15 | | -GITHUB_REPO = os.environ["GITHUB_REPOSITORY"] |
16 | | -REPO = Github(GITHUB_TOKEN).get_repo(GITHUB_REPO) |
17 | 13 |
|
18 | | -issues = REPO.get_issues(state="all", labels=["closed"]) |
| 14 | +# Environment setup |
| 15 | +GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") |
| 16 | +GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY") |
| 17 | +g = Github(GITHUB_TOKEN) |
| 18 | +repo = g.get_repo(GITHUB_REPOSITORY) |
| 19 | + |
| 20 | +issues = list(repo.get_issues(state='all', labels=['ambassador'])) |
| 21 | +print(f"🔍 Found {len(issues)} total issues") |
| 22 | + |
| 23 | +# Helper to extract field from issue body |
| 24 | +def extract_field(label, body): |
| 25 | + match = re.search(rf"{label}\s*\n\s*(.+)", body) |
| 26 | + return match.group(1).strip() if match else "" |
| 27 | + |
| 28 | +# Step 1: Parse issues |
19 | 29 | submissions_raw = [] |
20 | 30 | for issue in issues: |
21 | | - if not issue.body or "[Nomination]" not in issue.title: |
22 | | - continue |
| 31 | + body = issue.body |
23 | 32 | submission = { |
24 | 33 | "Issue #": issue.number, |
25 | | - "Nominee Name": "", |
26 | | - "Nominee GitHub": "", |
27 | | - "Nominee Email": "", |
28 | | - "Organization": "", |
29 | | - "Location": "", |
30 | | - "Nominator Name": "", |
31 | | - "Nominator Email": "", |
32 | | - "Nominee Contributions": "", |
33 | | - "Ambassador Pitch": "", |
34 | | - "Additional Info": "", |
35 | | - "Created At": issue.created_at.strftime("%Y-%m-%d %H:%M:%S") |
| 34 | + "Nominee Name": extract_field("Nominee Name", body), |
| 35 | + "Nominee Email": extract_field("Nominee Email", body), |
| 36 | + "GitHub Handle": extract_field("GitHub or GitLab Handle", body), |
| 37 | + "Organization": extract_field("Organization / Affiliation", body), |
| 38 | + "Location": extract_field("City, State/Province, Country", body), |
| 39 | + "Contributions": extract_field("How has the nominee contributed to PyTorch?", body), |
| 40 | + "Ambassador Pitch": extract_field("How Would the Nominee Contribute as an Ambassador?", body), |
| 41 | + "Extra Notes": extract_field("Any additional details you'd like to share?", body), |
36 | 42 | } |
37 | | - |
38 | | - # Extract fields |
39 | | - lines = issue.body.splitlines() |
40 | | - current_key = "" |
41 | | - for line in lines: |
42 | | - if "**Nominee Name**" in line: |
43 | | - current_key = "Nominee Name" |
44 | | - elif "**Nominee Email**" in line: |
45 | | - current_key = "Nominee Email" |
46 | | - elif "**GitHub or GitLab Handle**" in line: |
47 | | - current_key = "Nominee GitHub" |
48 | | - elif "**Organization / Affiliation**" in line: |
49 | | - current_key = "Organization" |
50 | | - elif "**City, State/Province, Country**" in line: |
51 | | - current_key = "Location" |
52 | | - elif "**Your Name**" in line: |
53 | | - current_key = "Nominator Name" |
54 | | - elif "**Your Email (Optional)**" in line: |
55 | | - current_key = "Nominator Email" |
56 | | - elif "**How has the nominee contributed**" in line: |
57 | | - current_key = "Nominee Contributions" |
58 | | - elif "**How Would the Nominee Contribute as an Ambassador?**" in line: |
59 | | - current_key = "Ambassador Pitch" |
60 | | - elif "**Any additional details you'd like to share?**" in line: |
61 | | - current_key = "Additional Info" |
62 | | - elif line.strip() and current_key: |
63 | | - submission[current_key] += line.strip() + "\n" |
64 | | - |
65 | 43 | submissions_raw.append(submission) |
66 | 44 |
|
67 | | -# Deduplicate by GitHub handle (latest entry kept) |
68 | | -print("🧹 Deduplicating...") |
69 | | -seen = {} |
| 45 | +# Step 2: Deduplicate by GitHub handle (keep latest by Issue #) |
| 46 | +submissions_by_handle = {} |
70 | 47 | duplicates = [] |
71 | | -for s in sorted(submissions_raw, key=lambda x: x["Created At"]): |
72 | | - key = s["Nominee GitHub"].strip().lower() |
73 | | - if key in seen: |
74 | | - duplicates.append(s) |
| 48 | +for s in sorted(submissions_raw, key=lambda x: x["Issue #"], reverse=True): |
| 49 | + handle = s.get("GitHub Handle", "").lower() |
| 50 | + if handle and handle not in submissions_by_handle: |
| 51 | + submissions_by_handle[handle] = s |
75 | 52 | else: |
76 | | - seen[key] = s |
| 53 | + duplicates.append(s) |
77 | 54 |
|
78 | | -submissions = list(seen.values()) |
| 55 | +submissions = list(submissions_by_handle.values()) |
| 56 | +print(f"🧹 Deduplicated to {len(submissions)} unique submissions") |
79 | 57 |
|
80 | | -# Save deduplicated CSV |
| 58 | +# Step 3: Write deduplicated CSV |
81 | 59 | os.makedirs("ambassador", exist_ok=True) |
82 | | -csv_path = "ambassador/ambassador_submissions_deduped.csv" |
83 | | -with open(csv_path, "w", newline="", encoding="utf-8") as f: |
84 | | - writer = csv.DictWriter(f, fieldnames=list(submissions[0].keys())) |
| 60 | +with open("ambassador/ambassador_submissions_deduped.csv", "w", newline='', encoding='utf-8') as f: |
| 61 | + writer = csv.DictWriter(f, fieldnames=submissions[0].keys()) |
85 | 62 | writer.writeheader() |
86 | 63 | writer.writerows(submissions) |
87 | 64 |
|
88 | | -# Save duplicates separately |
| 65 | +# Step 4: Save duplicates separately |
89 | 66 | if duplicates: |
90 | 67 | dup_wb = Workbook() |
91 | 68 | ws = dup_wb.active |
|
94 | 71 | for d in duplicates: |
95 | 72 | ws.append([d.get(k, "") for k in ws[1]]) |
96 | 73 | dup_wb.save("ambassador/duplicates_removed.xlsx") |
| 74 | + print("⚠️ Duplicates saved to ambassador/duplicates_removed.xlsx") |
| 75 | + |
| 76 | +# Step 5: Generate Reviewer Sheets |
| 77 | +print("📊 Generating reviewer sheets...") |
| 78 | + |
| 79 | +reviewers = [f"Reviewer {i}" for i in range(1, 8)] |
97 | 80 |
|
98 | | -# Rubric |
99 | 81 | rubric = [ |
100 | 82 | ("Technical Expertise", "Proficiency with the PyTorch Ecosystem", "Demonstrated knowledge and practical experience with PyTorch, including model building, traininga and deployment?"), |
101 | 83 | ("Technical Expertise", "Proficiency with the PyTorch Ecosystem", "Familiarity with foundation-hosted projects, vLLM, DeepSpeed?"), |
|
121 | 103 | ("Motivation and Vision", "Vision", "Proposed goals or initiatives that align with the mission of the PyTorch Foundation?") |
122 | 104 | ] |
123 | 105 |
|
124 | | -summary_categories = list(dict.fromkeys(cat for cat, _, _ in rubric)) |
125 | | -reviewers = [f"Reviewer {i}" for i in range(1, 8)] |
126 | | -output_folder = "ambassador/reviewer_sheets_excel" |
127 | | -os.makedirs(output_folder, exist_ok=True) |
128 | | - |
129 | | -# Assign reviewers evenly |
| 106 | +summary_categories = list({cat for cat, _, _ in rubric}) |
130 | 107 | assignments = [] |
131 | 108 | reviewer_counts = defaultdict(int) |
132 | 109 | for submission in submissions: |
|
135 | 112 | reviewer_counts[reviewer] += 1 |
136 | 113 | assignments.append((submission, reviewer)) |
137 | 114 |
|
138 | | -# Generate reviewer workbooks |
| 115 | +output_folder = "ambassador/reviewer_sheets_excel" |
| 116 | +os.makedirs(output_folder, exist_ok=True) |
| 117 | + |
139 | 118 | for reviewer in reviewers: |
140 | 119 | wb = Workbook() |
141 | 120 | ws = wb.active |
142 | 121 | ws.title = "Review Sheet" |
143 | 122 | summary_ws = wb.create_sheet("Score Summary") |
144 | 123 |
|
145 | | - headers = [ |
146 | | - "Submission ID", "First Name", "Last Name", "Submission Summary", |
147 | | - "Reviewer's Comment", "Category", "Subcategory", "Question", "Score" |
148 | | - ] |
| 124 | + # Headers |
| 125 | + headers = ["Submission ID", "First Name", "Last Name", "Submission Summary", |
| 126 | + "Reviewer's Comment", "Category", "Subcategory", "Question", "Score"] |
149 | 127 | ws.append(headers) |
150 | 128 | for col in range(1, len(headers)+1): |
151 | 129 | ws.cell(row=1, column=col).font = Font(bold=True) |
|
164 | 142 | name = submission["Nominee Name"].split() |
165 | 143 | fname = name[0] |
166 | 144 | lname = name[-1] if len(name) > 1 else "" |
| 145 | + summary = f"""Contributions:\n{submission.get("Contributions", "")} |
167 | 146 |
|
168 | | - # Submission Summary includes all fields except first 3 |
169 | | - summary = f"""GitHub: {submission.get("Nominee GitHub", "")} |
170 | | -Email: {submission.get("Nominee Email", "")} |
171 | | -Organization: {submission.get("Organization", "")} |
172 | | -Location: {submission.get("Location", "")} |
173 | | -Nominator: {submission.get("Nominator Name", "")} |
174 | | -Nominator Email: {submission.get("Nominator Email", "")} |
175 | | -
|
176 | | -Contributions:\n{submission.get("Nominee Contributions", "")} |
177 | 147 | Ambassador Pitch:\n{submission.get("Ambassador Pitch", "")} |
178 | | -Additional Info:\n{submission.get("Additional Info", "")}""" |
| 148 | +
|
| 149 | +Additional Notes:\n{submission.get("Extra Notes", "")}""" |
179 | 150 |
|
180 | 151 | start = row_idx |
181 | 152 | for cat, subcat, question in rubric: |
|
184 | 155 | end = row_idx - 1 |
185 | 156 | candidate_ranges.append((sid, fname, lname, start, end)) |
186 | 157 |
|
187 | | - for col in [1, 2, 3, 4, 5]: # Merge ID, First, Last, Summary, Reviewer Comment |
| 158 | + for col in [1, 2, 3, 4]: |
188 | 159 | ws.merge_cells(start_row=start, end_row=end, start_column=col, end_column=col) |
189 | | - cell = ws.cell(row=start, column=col) |
190 | | - cell.alignment = Alignment(vertical="top", wrap_text=True) |
| 160 | + ws.cell(row=start, column=col).alignment = Alignment(vertical="top", wrap_text=True) |
191 | 161 |
|
192 | 162 | for r in range(start, end + 1): |
193 | 163 | dv.add(ws[f"I{r}"]) |
194 | 164 |
|
195 | | - for col in ws.columns: |
196 | | - max_len = max((len(str(cell.value)) if cell.value else 0) for cell in col) |
197 | | - ws.column_dimensions[get_column_letter(col[0].column)].width = min(max_len + 5, 50) |
198 | | - |
| 165 | + # Score summary tab |
199 | 166 | summary_ws.append(["Submission ID", "First Name", "Last Name"] + summary_categories + ["Final Score"]) |
200 | 167 | for col in range(1, summary_ws.max_column + 1): |
201 | 168 | summary_ws.cell(row=1, column=col).font = Font(bold=True) |
202 | 169 |
|
203 | 170 | for sid, fname, lname, start, end in candidate_ranges: |
204 | | - category_rows = defaultdict(list) |
| 171 | + cat_rows = defaultdict(list) |
205 | 172 | for r in range(start, end + 1): |
206 | 173 | cat = ws.cell(row=r, column=6).value |
207 | | - category_rows[cat].append(r) |
| 174 | + cat_rows[cat].append(r) |
208 | 175 |
|
| 176 | + row_num = summary_ws.max_row + 1 |
209 | 177 | formulas = [] |
210 | 178 | for cat in summary_categories: |
211 | | - if cat in category_rows: |
212 | | - rows = category_rows[cat] |
| 179 | + if cat in cat_rows: |
| 180 | + rows = cat_rows[cat] |
213 | 181 | formulas.append(f'=SUMPRODUCT(--(\'Review Sheet\'!I{rows[0]}:I{rows[-1]}="Yes"))') |
214 | 182 | else: |
215 | 183 | formulas.append("0") |
216 | | - |
217 | | - row_number = summary_ws.max_row + 1 |
218 | | - total_formula = f"=SUM({','.join([f'{get_column_letter(i+4)}{row_number}' for i in range(len(formulas))])})" |
| 184 | + total_formula = f"=SUM({','.join([f'{get_column_letter(i+4)}{row_num}' for i in range(len(formulas))])})" |
219 | 185 | summary_ws.append([sid, fname, lname] + formulas + [total_formula]) |
220 | 186 |
|
221 | | - filename = os.path.join(output_folder, f"{reviewer.replace(' ', '_').lower()}_sheet.xlsx") |
222 | | - wb.save(filename) |
| 187 | + wb.save(os.path.join(output_folder, f"{reviewer.replace(' ', '_').lower()}_sheet.xlsx")) |
223 | 188 |
|
224 | | -print("✅ All reviewer sheets generated successfully.") |
| 189 | +print("✅ All reviewer sheets and summaries generated.") |
0 commit comments