11import os
2- import re
2+ import csv
33import random
4- import requests
5- from collections import defaultdict
64from datetime import datetime
5+ from collections import defaultdict
6+ from github import Github
77from openpyxl import Workbook
88from openpyxl .styles import Alignment , Font
99from openpyxl .utils import get_column_letter
1010from openpyxl .worksheet .datavalidation import DataValidation
1111
12- # Set your GitHub repo details
13- REPO = "pytorch-fdn/ambassador-program"
14- GITHUB_TOKEN = os .getenv ("GITHUB_TOKEN" )
15- HEADERS = {"Authorization" : f"Bearer { GITHUB_TOKEN } " }
16- API_URL = f"https://api.github.com/repos/{ REPO } /issues?state=all&labels=closed&per_page=100"
17-
18- # Output directories
19- os .makedirs ("ambassador/reviewer_sheets_excel" , exist_ok = True )
20-
21- # Helper to extract structured data from the issue body
22- def extract_submission (issue ):
23- body = issue ["body" ]
24- def extract (label ): # Flexible line extractor
25- pattern = rf"\*\*{ re .escape (label )} \*\*\s*\n([\s\S]*?)(?:\n\*\*|$)"
26- match = re .search (pattern , body , re .IGNORECASE )
27- return match .group (1 ).strip () if match else ""
28-
29- return {
30- "Issue #" : str (issue ["number" ]),
31- "Nominee Name" : extract ("Nominee Name" ),
32- "Nominee Email" : extract ("Nominee Email" ),
33- "GitHub Handle" : extract ("Nominee's GitHub or GitLab Handle" ),
34- "Organization" : extract ("Organization / Affiliation" ),
35- "Location" : extract ("City, State/Province, Country" ),
36- "Nominator Name" : extract ("Your Name" ),
37- "Nominator Email" : extract ("Your Email" ),
38- "Contributions" : extract ("How has the nominee contributed to PyTorch?" ),
39- "Ambassador Pitch" : extract ("How Would the Nominee Contribute as an Ambassador?" ),
40- "Extra Notes" : extract ("Any additional details you'd like to share?" ),
41- "Created At" : issue ["created_at" ]
12+ # Load GitHub issues
13+ print ("📥 Fetching GitHub issues..." )
14+ GITHUB_TOKEN = os .environ ["GITHUB_TOKEN" ]
15+ GITHUB_REPO = os .environ ["GITHUB_REPOSITORY" ]
16+ REPO = Github (GITHUB_TOKEN ).get_repo (GITHUB_REPO )
17+
18+ issues = REPO .get_issues (state = "all" , labels = ["closed" ])
19+ submissions_raw = []
20+ for issue in issues :
21+ if not issue .body or "[Nomination]" not in issue .title :
22+ continue
23+ submission = {
24+ "Issue #" : issue .number ,
25+ "Nominee Name" : "" ,
26+ "Nominee GitHub" : "" ,
27+ "Nominee Email" : "" ,
28+ "Organization" : "" ,
29+ "Location" : "" ,
30+ "Nominator Name" : "" ,
31+ "Nominator Email" : "" ,
32+ "Nominee Contributions" : "" ,
33+ "Ambassador Pitch" : "" ,
34+ "Additional Info" : "" ,
35+ "Created At" : issue .created_at .strftime ("%Y-%m-%d %H:%M:%S" )
4236 }
4337
44- # Step 1: Fetch and parse issues
45- print ("📥 Fetching GitHub issues..." )
46- all_issues = []
47- page = 1
48- while True :
49- response = requests .get (f"{ API_URL } &page={ page } " , headers = HEADERS )
50- data = response .json ()
51- if not data or "message" in data :
52- break
53- all_issues .extend (data )
54- page += 1
55-
56- submissions_raw = [extract_submission (issue ) for issue in all_issues if "Nominee Name" in issue ["body" ]]
57-
58- # Step 2: Deduplicate by nominee name, keeping latest
38+ # Extract fields
39+ lines = issue .body .splitlines ()
40+ current_key = ""
41+ for line in lines :
42+ if "**Nominee Name**" in line :
43+ current_key = "Nominee Name"
44+ elif "**Nominee Email**" in line :
45+ current_key = "Nominee Email"
46+ elif "**GitHub or GitLab Handle**" in line :
47+ current_key = "Nominee GitHub"
48+ elif "**Organization / Affiliation**" in line :
49+ current_key = "Organization"
50+ elif "**City, State/Province, Country**" in line :
51+ current_key = "Location"
52+ elif "**Your Name**" in line :
53+ current_key = "Nominator Name"
54+ elif "**Your Email (Optional)**" in line :
55+ current_key = "Nominator Email"
56+ elif "**How has the nominee contributed**" in line :
57+ current_key = "Nominee Contributions"
58+ elif "**How Would the Nominee Contribute as an Ambassador?**" in line :
59+ current_key = "Ambassador Pitch"
60+ elif "**Any additional details you'd like to share?**" in line :
61+ current_key = "Additional Info"
62+ elif line .strip () and current_key :
63+ submission [current_key ] += line .strip () + "\n "
64+
65+ submissions_raw .append (submission )
66+
67+ # Deduplicate by GitHub handle (latest entry kept)
5968print ("🧹 Deduplicating..." )
60- deduped , duplicates = {}, []
61- for sub in submissions_raw :
62- key = sub ["Nominee Name" ].strip ().lower ()
63- dt = datetime .strptime (sub ["Created At" ], "%Y-%m-%dT%H:%M:%SZ" )
64- if key not in deduped or dt > datetime .strptime (deduped [key ]["Created At" ], "%Y-%m-%dT%H:%M:%SZ" ):
65- if key in deduped :
66- duplicates .append (deduped [key ])
67- deduped [key ] = sub
69+ seen = {}
70+ duplicates = []
71+ for s in sorted (submissions_raw , key = lambda x : x ["Created At" ]):
72+ key = s ["Nominee GitHub" ].strip ().lower ()
73+ if key in seen :
74+ duplicates .append (s )
6875 else :
69- duplicates . append ( sub )
76+ seen [ key ] = s
7077
71- submissions = list (deduped .values ())
78+ submissions = list (seen .values ())
7279
73- # Step 3: Reviewer logic
74- reviewers = [f"Reviewer { i } " for i in range (1 , 8 )]
80+ # Save deduplicated CSV
81+ os .makedirs ("ambassador" , exist_ok = True )
82+ csv_path = "ambassador/ambassador_submissions_deduped.csv"
83+ with open (csv_path , "w" , newline = "" , encoding = "utf-8" ) as f :
84+ writer = csv .DictWriter (f , fieldnames = list (submissions [0 ].keys ()))
85+ writer .writeheader ()
86+ writer .writerows (submissions )
7587
76- # Updated rubric including all categories from the latest file
88+ # Save duplicates separately
89+ if duplicates :
90+ dup_wb = Workbook ()
91+ ws = dup_wb .active
92+ ws .title = "Duplicates Removed"
93+ ws .append (list (duplicates [0 ].keys ()))
94+ for d in duplicates :
95+ ws .append ([d .get (k , "" ) for k in ws [1 ]])
96+ dup_wb .save ("ambassador/duplicates_removed.xlsx" )
97+
98+ # Rubric
7799rubric = [
78100 ("Technical Expertise" , "Proficiency with the PyTorch Ecosystem" , "Demonstrated knowledge and practical experience with PyTorch, including model building, traininga and deployment?" ),
79101 ("Technical Expertise" , "Proficiency with the PyTorch Ecosystem" , "Familiarity with foundation-hosted projects, vLLM, DeepSpeed?" ),
@@ -96,28 +118,24 @@ def extract(label): # Flexible line extractor
96118 ("Alignment and Values" , "Alignment with PyTorch Foundation Values" , "Commitment to open source principles, community-first development, and inclusive collaboration?" ),
97119 ("Alignment and Values" , "Alignment with PyTorch Foundation Values" , "Advocacy for responsible AI development and ethical machine learning practices?" ),
98120 ("Motivation and Vision" , "Vision" , "Clear articulation of why they want to be an Ambassador and what they hope to accomplish?" ),
99- ("Motivation and Vision" , "Vision" , "Proposed goals or initiatives that align with the mission of the PyTorch Foundation?" ),
100- ("Additional Bonus Criteria" , "Cross-Community Collaboration" , "Contributions or bridges to other relevant ecosystems (e.g., HuggingFace?)" ),
101- ("Additional Bonus Criteria" , "Cross-Community Collaboration" , "Integration work across tools or libraries within the AI/ML infrastructure landscape?" ),
102- ("Additional Bonus Criteria" , "Geographic and Demographic Diversity" , "Representation from underrepresented regions or groups to foster inclusivity and global outreach?" ),
103- ("Additional Bonus Criteria" , "Innovation and Pioneering Work" , "Early adoption or novel application of PyTorch or its ecosystem tools in industry, research, or startups?" ),
104- ("Credibility" , "Community References" , "References from other known community members?" )
121+ ("Motivation and Vision" , "Vision" , "Proposed goals or initiatives that align with the mission of the PyTorch Foundation?" )
105122]
106123
107- summary_categories = []
108- for cat , _ , _ in rubric :
109- if cat not in summary_categories :
110- summary_categories . append ( cat )
124+ summary_categories = list ( dict . fromkeys ( cat for cat , _ , _ in rubric ))
125+ reviewers = [ f"Reviewer { i } " for i in range ( 1 , 8 )]
126+ output_folder = "ambassador/reviewer_sheets_excel"
127+ os . makedirs ( output_folder , exist_ok = True )
111128
129+ # Assign reviewers evenly
112130assignments = []
113131reviewer_counts = defaultdict (int )
114- for sub in submissions :
132+ for submission in submissions :
115133 assigned = random .sample (sorted (reviewers , key = lambda r : reviewer_counts [r ])[:4 ], 2 )
116- for r in assigned :
117- reviewer_counts [r ] += 1
118- assignments .append ((sub , r ))
134+ for reviewer in assigned :
135+ reviewer_counts [reviewer ] += 1
136+ assignments .append ((submission , reviewer ))
119137
120- # Step 4: Generate reviewer sheets
138+ # Generate reviewer workbooks
121139for reviewer in reviewers :
122140 wb = Workbook ()
123141 ws = wb .active
@@ -129,91 +147,78 @@ def extract(label): # Flexible line extractor
129147 "Reviewer's Comment" , "Category" , "Subcategory" , "Question" , "Score"
130148 ]
131149 ws .append (headers )
132- for c in range (1 , len (headers )+ 1 ):
133- ws .cell (row = 1 , column = c ).font = Font (bold = True )
150+ for col in range (1 , len (headers )+ 1 ):
151+ ws .cell (row = 1 , column = col ).font = Font (bold = True )
134152
135153 dv = DataValidation (type = "list" , formula1 = '"Yes,No,N/A"' , allow_blank = True )
136154 ws .add_data_validation (dv )
137155
138156 row_idx = 2
139- ranges = []
157+ candidate_ranges = []
140158
141- for sub , r in assignments :
142- if r != reviewer :
159+ for submission , assigned_reviewer in assignments :
160+ if assigned_reviewer != reviewer :
143161 continue
144- sid = sub ["Issue #" ]
145- name_parts = sub ["Nominee Name" ].split ()
146- fname = name_parts [0 ]
147- lname = name_parts [- 1 ] if len (name_parts ) > 1 else ""
148- summary = f"""
149- GitHub: { sub .get ("GitHub Handle" , "" )}
150- Org: { sub .get ("Organization" , "" )}
151- Location: { sub .get ("Location" , "" )}
152162
153- Contributions:
154- { sub .get ("Contributions" , "" )}
163+ sid = submission ["Issue #" ]
164+ name = submission ["Nominee Name" ].split ()
165+ fname = name [0 ]
166+ lname = name [- 1 ] if len (name ) > 1 else ""
155167
156- Ambassador Pitch:
157- { sub .get ("Ambassador Pitch" , "" )}
168+ # Submission Summary includes all fields except first 3
169+ summary = f"""GitHub: { submission .get ("Nominee GitHub" , "" )}
170+ Email: { submission .get ("Nominee Email" , "" )}
171+ Organization: { submission .get ("Organization" , "" )}
172+ Location: { submission .get ("Location" , "" )}
173+ Nominator: { submission .get ("Nominator Name" , "" )}
174+ Nominator Email: { submission .get ("Nominator Email" , "" )}
158175
159- Additional Info:
160- { sub .get ("Extra Notes " , "" )}
161- """ . strip ()
176+ Contributions: \n { submission . get ( "Nominee Contributions" , "" ) }
177+ Ambassador Pitch: \n { submission .get ("Ambassador Pitch " , "" )}
178+ Additional Info: \n { submission . get ( "Additional Info" , "" ) } """
162179
163180 start = row_idx
164181 for cat , subcat , question in rubric :
165182 ws .append ([sid , fname , lname , summary , "" , cat , subcat , question , "" ])
166183 row_idx += 1
167184 end = row_idx - 1
168- ranges .append ((sid , fname , lname , start , end ))
185+ candidate_ranges .append ((sid , fname , lname , start , end ))
169186
170- for col in [1 , 2 , 3 , 4 , 5 ]: # Merge key fields
187+ for col in [1 , 2 , 3 , 4 , 5 ]: # Merge ID, First, Last, Summary, Reviewer Comment
171188 ws .merge_cells (start_row = start , end_row = end , start_column = col , end_column = col )
172- ws .cell (row = start , column = col ).alignment = Alignment (vertical = "top" , wrap_text = True )
173- for r in range (start , end + 1 ):
189+ cell = ws .cell (row = start , column = col )
190+ cell .alignment = Alignment (vertical = "top" , wrap_text = True )
191+
192+ for r in range (start , end + 1 ):
174193 dv .add (ws [f"I{ r } " ])
175194
176- # Autofit columns
177195 for col in ws .columns :
178- max_len = max ((len (str (c .value )) if c .value else 0 ) for c in col )
179- ws .column_dimensions [get_column_letter (col [0 ].column )].width = min (max_len + 5 , 60 )
196+ max_len = max ((len (str (cell .value )) if cell .value else 0 ) for cell in col )
197+ ws .column_dimensions [get_column_letter (col [0 ].column )].width = min (max_len + 5 , 50 )
180198
181- # Score Summary
182199 summary_ws .append (["Submission ID" , "First Name" , "Last Name" ] + summary_categories + ["Final Score" ])
183200 for col in range (1 , summary_ws .max_column + 1 ):
184201 summary_ws .cell (row = 1 , column = col ).font = Font (bold = True )
185202
186- for sid , fname , lname , start , end in ranges :
187- cat_rows = defaultdict (list )
203+ for sid , fname , lname , start , end in candidate_ranges :
204+ category_rows = defaultdict (list )
188205 for r in range (start , end + 1 ):
189206 cat = ws .cell (row = r , column = 6 ).value
190- cat_rows [cat ].append (r )
207+ category_rows [cat ].append (r )
191208
192209 formulas = []
193210 for cat in summary_categories :
194- if cat in cat_rows :
195- rows = cat_rows [cat ]
211+ if cat in category_rows :
212+ rows = category_rows [cat ]
196213 formulas .append (f'=SUMPRODUCT(--(\' Review Sheet\' !I{ rows [0 ]} :I{ rows [- 1 ]} ="Yes"))' )
197214 else :
198215 formulas .append ("0" )
199- row_number = summary_ws .max_row + 1
200- final_formula = f"=SUM({ ',' .join ([f'{ get_column_letter (i + 4 )} { row_number } ' for i in range (len (formulas ))])} )"
201- summary_ws .append ([sid , fname , lname ] + formulas + [final_formula ])
202-
203- wb .save (f"ambassador/reviewer_sheets_excel/{ reviewer .replace (' ' , '_' ).lower ()} _sheet.xlsx" )
204-
205- # Step 5: Save duplicates separately
206- dup_wb = Workbook ()
207- ws = dup_wb .active
208- ws .title = "Duplicates Removed"
209216
210- if duplicates :
211- ws .append (list (duplicates [0 ].keys ()))
212- for d in duplicates :
213- ws .append ([d .get (k , "" ) for k in ws [1 ]])
214- else :
215- ws .append (["No duplicates found" ])
217+ row_number = summary_ws .max_row + 1
218+ total_formula = f"=SUM({ ',' .join ([f'{ get_column_letter (i + 4 )} { row_number } ' for i in range (len (formulas ))])} )"
219+ summary_ws .append ([sid , fname , lname ] + formulas + [total_formula ])
216220
217- dup_wb .save ("ambassador/duplicates_removed.xlsx" )
221+ filename = os .path .join (output_folder , f"{ reviewer .replace (' ' , '_' ).lower ()} _sheet.xlsx" )
222+ wb .save (filename )
218223
219- print ("✅ All reviewer sheets and duplicates file generated." )
224+ print ("✅ All reviewer sheets generated successfully ." )
0 commit comments