|
1 | 1 | import os |
2 | | -import csv |
| 2 | +import re |
3 | 3 | import random |
| 4 | +import requests |
4 | 5 | from collections import defaultdict |
| 6 | +from datetime import datetime |
5 | 7 | from openpyxl import Workbook |
6 | 8 | from openpyxl.styles import Alignment, Font |
7 | 9 | from openpyxl.utils import get_column_letter |
8 | 10 | from openpyxl.worksheet.datavalidation import DataValidation |
9 | 11 |
|
10 | | -# Load deduplicated submissions |
11 | | -with open("ambassador/ambassador_submissions_deduped.csv", newline='', encoding='utf-8') as f: |
12 | | - reader = csv.DictReader(f) |
13 | | - submissions = list(reader) |
14 | | - |
15 | | -# Define reviewers |
| 12 | +# Set your GitHub repo details |
| 13 | +REPO = "pytorch-fdn/ambassador-program" |
| 14 | +GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") |
| 15 | +HEADERS = {"Authorization": f"Bearer {GITHUB_TOKEN}"} |
| 16 | +API_URL = f"https://api.github.com/repos/{REPO}/issues?state=all&labels=closed&per_page=100" |
| 17 | + |
| 18 | +# Output directories |
| 19 | +os.makedirs("ambassador/reviewer_sheets_excel", exist_ok=True) |
| 20 | + |
| 21 | +# Helper to extract structured data from the issue body |
| 22 | +def extract_submission(issue): |
| 23 | + body = issue["body"] |
| 24 | + def extract(label): # Flexible line extractor |
| 25 | + pattern = rf"\*\*{re.escape(label)}\*\*\s*\n([\s\S]*?)(?:\n\*\*|$)" |
| 26 | + match = re.search(pattern, body, re.IGNORECASE) |
| 27 | + return match.group(1).strip() if match else "" |
| 28 | + |
| 29 | + return { |
| 30 | + "Issue #": str(issue["number"]), |
| 31 | + "Nominee Name": extract("Nominee Name"), |
| 32 | + "Nominee Email": extract("Nominee Email"), |
| 33 | + "GitHub Handle": extract("Nominee's GitHub or GitLab Handle"), |
| 34 | + "Organization": extract("Organization / Affiliation"), |
| 35 | + "Location": extract("City, State/Province, Country"), |
| 36 | + "Nominator Name": extract("Your Name"), |
| 37 | + "Nominator Email": extract("Your Email"), |
| 38 | + "Contributions": extract("How has the nominee contributed to PyTorch?"), |
| 39 | + "Ambassador Pitch": extract("How Would the Nominee Contribute as an Ambassador?"), |
| 40 | + "Extra Notes": extract("Any additional details you'd like to share?"), |
| 41 | + "Created At": issue["created_at"] |
| 42 | + } |
| 43 | + |
| 44 | +# Step 1: Fetch and parse issues |
| 45 | +print("📥 Fetching GitHub issues...") |
| 46 | +all_issues = [] |
| 47 | +page = 1 |
| 48 | +while True: |
| 49 | + response = requests.get(f"{API_URL}&page={page}", headers=HEADERS) |
| 50 | + data = response.json() |
| 51 | + if not data or "message" in data: |
| 52 | + break |
| 53 | + all_issues.extend(data) |
| 54 | + page += 1 |
| 55 | + |
| 56 | +submissions_raw = [extract_submission(issue) for issue in all_issues if "Nominee Name" in issue["body"]] |
| 57 | + |
| 58 | +# Step 2: Deduplicate by nominee name, keeping latest |
| 59 | +print("🧹 Deduplicating...") |
| 60 | +deduped, duplicates = {}, [] |
| 61 | +for sub in submissions_raw: |
| 62 | + key = sub["Nominee Name"].strip().lower() |
| 63 | + dt = datetime.strptime(sub["Created At"], "%Y-%m-%dT%H:%M:%SZ") |
| 64 | + if key not in deduped or dt > datetime.strptime(deduped[key]["Created At"], "%Y-%m-%dT%H:%M:%SZ"): |
| 65 | + if key in deduped: |
| 66 | + duplicates.append(deduped[key]) |
| 67 | + deduped[key] = sub |
| 68 | + else: |
| 69 | + duplicates.append(sub) |
| 70 | + |
| 71 | +submissions = list(deduped.values()) |
| 72 | + |
| 73 | +# Step 3: Reviewer logic |
16 | 74 | reviewers = [f"Reviewer {i}" for i in range(1, 8)] |
17 | 75 |
|
18 | 76 | # Updated rubric including all categories from the latest file |
|
46 | 104 | ("Credibility", "Community References", "References from other known community members?") |
47 | 105 | ] |
48 | 106 |
|
49 | | -# Dynamically detect unique rubric categories in order |
50 | 107 | summary_categories = [] |
51 | 108 | for cat, _, _ in rubric: |
52 | 109 | if cat not in summary_categories: |
53 | 110 | summary_categories.append(cat) |
54 | 111 |
|
55 | | -# Output directory |
56 | | -output_folder = "ambassador/reviewer_sheets_excel" |
57 | | -os.makedirs(output_folder, exist_ok=True) |
58 | | - |
59 | | -# Assign reviewers evenly |
60 | 112 | assignments = [] |
61 | 113 | reviewer_counts = defaultdict(int) |
62 | | -for submission in submissions: |
| 114 | +for sub in submissions: |
63 | 115 | assigned = random.sample(sorted(reviewers, key=lambda r: reviewer_counts[r])[:4], 2) |
64 | | - for reviewer in assigned: |
65 | | - reviewer_counts[reviewer] += 1 |
66 | | - assignments.append((submission, reviewer)) |
| 116 | + for r in assigned: |
| 117 | + reviewer_counts[r] += 1 |
| 118 | + assignments.append((sub, r)) |
67 | 119 |
|
68 | | -# Generate Excel files per reviewer |
| 120 | +# Step 4: Generate reviewer sheets |
69 | 121 | for reviewer in reviewers: |
70 | 122 | wb = Workbook() |
71 | 123 | ws = wb.active |
72 | 124 | ws.title = "Review Sheet" |
73 | 125 | summary_ws = wb.create_sheet("Score Summary") |
74 | 126 |
|
75 | | - # Review Sheet headers |
76 | 127 | headers = [ |
77 | 128 | "Submission ID", "First Name", "Last Name", "Submission Summary", |
78 | 129 | "Reviewer's Comment", "Category", "Subcategory", "Question", "Score" |
79 | 130 | ] |
80 | 131 | ws.append(headers) |
81 | | - for col in range(1, len(headers)+1): |
82 | | - ws.cell(row=1, column=col).font = Font(bold=True) |
| 132 | + for c in range(1, len(headers)+1): |
| 133 | + ws.cell(row=1, column=c).font = Font(bold=True) |
83 | 134 |
|
84 | | - # Add dropdown |
85 | 135 | dv = DataValidation(type="list", formula1='"Yes,No,N/A"', allow_blank=True) |
86 | 136 | ws.add_data_validation(dv) |
87 | 137 |
|
88 | 138 | row_idx = 2 |
89 | | - candidate_ranges = [] |
| 139 | + ranges = [] |
90 | 140 |
|
91 | | - for submission, assigned_reviewer in assignments: |
92 | | - if assigned_reviewer != reviewer: |
| 141 | + for sub, r in assignments: |
| 142 | + if r != reviewer: |
93 | 143 | continue |
| 144 | + sid = sub["Issue #"] |
| 145 | + name_parts = sub["Nominee Name"].split() |
| 146 | + fname = name_parts[0] |
| 147 | + lname = name_parts[-1] if len(name_parts) > 1 else "" |
| 148 | + summary = f""" |
| 149 | +GitHub: {sub.get("GitHub Handle", "")} |
| 150 | +Org: {sub.get("Organization", "")} |
| 151 | +Location: {sub.get("Location", "")} |
94 | 152 |
|
95 | | - sid = submission["Issue #"] |
96 | | - name = submission["Nominee Name"].split() |
97 | | - fname = name[0] |
98 | | - lname = name[-1] if len(name) > 1 else "" |
99 | | - summary = f"""Contributions:\n{submission.get("Contributions", "")} |
| 153 | +Contributions: |
| 154 | +{sub.get("Contributions", "")} |
100 | 155 |
|
101 | | -Ambassador Pitch:\n{submission.get("Ambassador Pitch", "")} |
| 156 | +Ambassador Pitch: |
| 157 | +{sub.get("Ambassador Pitch", "")} |
102 | 158 |
|
103 | | -Additional Notes:\n{submission.get("Extra Notes", "")}""" |
| 159 | +Additional Info: |
| 160 | +{sub.get("Extra Notes", "")} |
| 161 | +""".strip() |
104 | 162 |
|
105 | 163 | start = row_idx |
106 | 164 | for cat, subcat, question in rubric: |
107 | 165 | ws.append([sid, fname, lname, summary, "", cat, subcat, question, ""]) |
108 | 166 | row_idx += 1 |
109 | 167 | end = row_idx - 1 |
110 | | - candidate_ranges.append((sid, fname, lname, start, end)) |
| 168 | + ranges.append((sid, fname, lname, start, end)) |
111 | 169 |
|
112 | | - # Merge ID/name cells |
113 | | - for col in [1, 2, 3, 4]: |
| 170 | + for col in [1, 2, 3, 4, 5]: # Merge key fields |
114 | 171 | ws.merge_cells(start_row=start, end_row=end, start_column=col, end_column=col) |
115 | | - cell = ws.cell(row=start, column=col) |
116 | | - cell.alignment = Alignment(vertical="top", wrap_text=True) |
117 | | - |
118 | | - for r in range(start, end + 1): |
| 172 | + ws.cell(row=start, column=col).alignment = Alignment(vertical="top", wrap_text=True) |
| 173 | + for r in range(start, end+1): |
119 | 174 | dv.add(ws[f"I{r}"]) |
120 | 175 |
|
121 | 176 | # Autofit columns |
122 | 177 | for col in ws.columns: |
123 | | - max_len = max((len(str(cell.value)) if cell.value else 0) for cell in col) |
124 | | - ws.column_dimensions[get_column_letter(col[0].column)].width = min(max_len + 5, 50) |
| 178 | + max_len = max((len(str(c.value)) if c.value else 0) for c in col) |
| 179 | + ws.column_dimensions[get_column_letter(col[0].column)].width = min(max_len + 5, 60) |
125 | 180 |
|
126 | | - # Score Summary header |
| 181 | + # Score Summary |
127 | 182 | summary_ws.append(["Submission ID", "First Name", "Last Name"] + summary_categories + ["Final Score"]) |
128 | 183 | for col in range(1, summary_ws.max_column + 1): |
129 | 184 | summary_ws.cell(row=1, column=col).font = Font(bold=True) |
130 | 185 |
|
131 | | - # Fill score summary |
132 | | - for sid, fname, lname, start, end in candidate_ranges: |
133 | | - category_rows = defaultdict(list) |
| 186 | + for sid, fname, lname, start, end in ranges: |
| 187 | + cat_rows = defaultdict(list) |
134 | 188 | for r in range(start, end + 1): |
135 | 189 | cat = ws.cell(row=r, column=6).value |
136 | | - category_rows[cat].append(r) |
| 190 | + cat_rows[cat].append(r) |
137 | 191 |
|
138 | 192 | formulas = [] |
139 | 193 | for cat in summary_categories: |
140 | | - if cat in category_rows: |
141 | | - rows = category_rows[cat] |
| 194 | + if cat in cat_rows: |
| 195 | + rows = cat_rows[cat] |
142 | 196 | formulas.append(f'=SUMPRODUCT(--(\'Review Sheet\'!I{rows[0]}:I{rows[-1]}="Yes"))') |
143 | 197 | else: |
144 | 198 | formulas.append("0") |
145 | | - |
146 | 199 | row_number = summary_ws.max_row + 1 |
147 | | - total_formula = f"=SUM({','.join([f'{get_column_letter(i+4)}{row_number}' for i in range(len(formulas))])})" |
148 | | - summary_ws.append([sid, fname, lname] + formulas + [total_formula]) |
| 200 | + final_formula = f"=SUM({','.join([f'{get_column_letter(i+4)}{row_number}' for i in range(len(formulas))])})" |
| 201 | + summary_ws.append([sid, fname, lname] + formulas + [final_formula]) |
| 202 | + |
| 203 | + wb.save(f"ambassador/reviewer_sheets_excel/{reviewer.replace(' ', '_').lower()}_sheet.xlsx") |
149 | 204 |
|
150 | | - # Save |
151 | | - filename = os.path.join(output_folder, f"{reviewer.replace(' ', '_').lower()}_sheet.xlsx") |
152 | | - wb.save(filename) |
| 205 | +# Step 5: Save duplicates separately |
| 206 | +dup_wb = Workbook() |
| 207 | +ws = dup_wb.active |
| 208 | +ws.title = "Duplicates Removed" |
| 209 | +ws.append(list(duplicates[0].keys())) |
| 210 | +for d in duplicates: |
| 211 | + ws.append([d.get(k, "") for k in ws[1]]) |
| 212 | +dup_wb.save("ambassador/duplicates_removed.xlsx") |
153 | 213 |
|
154 | | -print("✅ Reviewer sheets generated with updated rubric and corrected score summary.") |
| 214 | +print("✅ All reviewer sheets and duplicates file generated.") |
0 commit comments