44from github import Github
55from openpyxl import Workbook
66
7- # Setup GitHub token and repo from environment
7+ # Load GitHub access credentials
88GITHUB_TOKEN = os .getenv ("GITHUB_TOKEN" )
99GITHUB_REPOSITORY = os .getenv ("GITHUB_REPOSITORY" )
10+
11+ # Authenticate with GitHub
1012g = Github (GITHUB_TOKEN )
1113repo = g .get_repo (GITHUB_REPOSITORY )
1214
13- print ("📥 Fetching GitHub issues..." )
14- issues = list (repo .get_issues (state = 'open' , labels = ['ambassador' ]))
15- print (f"🔍 Total issues fetched: { len (issues )} " )
15+ print ("📥 Fetching open GitHub issues with 'ambassador' label..." )
16+ issues = repo .get_issues (state = 'open' , labels = ['ambassador' ])
17+
18+ submissions = []
1619
17- # Markdown extractor
20+ # Helper to extract plain-text responses
1821def extract (label , body ):
19- match = re .search (rf"{ re . escape ( label ) } \s*\n\s*(.+?)(\n|$ )" , body )
22+ match = re .search (rf"{ label } \s*\n\s*(.+)" , body )
2023 return match .group (1 ).strip () if match else ""
2124
22- # Extracted field definitions
23- submissions = []
25+ # Helper to extract checkbox options
26+ def extract_checkboxes (body ):
27+ checkbox_section = re .findall (r"How has the nominee contributed to PyTorch\?\s*\n((?:- \[.\] .+\n?)+)" , body )
28+ if not checkbox_section :
29+ return []
30+ return checkbox_section [0 ].strip ().splitlines ()
31+
32+ # Process each issue
2433for issue in issues :
2534 body = issue .body or ""
26- entry = {
35+
36+ name = extract ("Nominee Name" , body )
37+ email = extract ("Nominee Email" , body )
38+ github_handle = extract ("Nominee's GitHub or GitLab Handle" , body )
39+ ambassador_plan = extract ("🏆 How Would the Nominee Contribute as an Ambassador?" , body )
40+ additional_info = extract ("Any additional details you'd like to share?" , body )
41+ contributions = extract_checkboxes (body )
42+
43+ # Format submission summary
44+ submission_summary = f"""**GitHub Handle:** { github_handle or 'Not Provided' }
45+
46+ **How Has the Nominee Contributed to PyTorch?**
47+ { chr (10 ).join (contributions ) if contributions else 'Not Provided' }
48+
49+ **Ambassador Contribution Plan**
50+ { ambassador_plan or 'Not Provided' }
51+
52+ **Additional Information**
53+ { additional_info or 'Not Provided' }
54+ """
55+
56+ submissions .append ({
2757 "Issue #" : issue .number ,
28- "Nominee Name" : extract ("Nominee Name" , body ),
29- "Nominee Email" : extract ("Nominee Email" , body ),
30- "GitHub Handle" : extract ("Nominee's GitHub or GitLab Handle" , body ),
31- "Organization" : extract ("(Optional) Organization / Affiliation" , body ),
32- "Location" : extract ("City, State/Province, Country" , body ),
33- "Nominator Name" : extract ("Your Name" , body ),
34- "Nominator Email" : extract ("Your Email (Optional)" , body ),
35- "Contribution Checkboxes" : "; " .join (re .findall (r"- \[x\] (.+)" , body , re .IGNORECASE )),
36- "Ambassador Pitch" : extract ("🏆 How Would the Nominee Contribute as an Ambassador?" , body ),
37- "Additional Info" : extract ("Any additional details you'd like to share?" , body )
38- }
39-
40- # Construct clean submission summary
41- summary = f"""Contributions:\n { entry ['Contribution Checkboxes' ]}
42-
43- Ambassador Nomination Statement:\n { entry ['Ambassador Pitch' ]}
44-
45- GitHub Handle:\n { entry ['GitHub Handle' ]}
46-
47- Additional Info:\n { entry ['Additional Info' ]} """
48- entry ["Submission Summary" ] = summary
49- submissions .append (entry )
50-
51- # Deduplicate by nominee email (fallback to name)
52- print ("🧹 Deduplicating..." )
53- latest = {}
54- for s in sorted (submissions , key = lambda x : x ["Issue #" ], reverse = True ):
55- key = (s ["Nominee Email" ] or s ["Nominee Name" ]).lower ()
56- if key not in latest :
57- latest [key ] = s
58- deduped = list (latest .values ())
59- duplicates = [s for s in submissions if s not in deduped ]
60-
61- # Output folder
58+ "Nominee Name" : name ,
59+ "Nominee Email" : email ,
60+ "Submission Summary" : submission_summary .strip ()
61+ })
62+
63+ print (f"✅ Total submissions found: { len (submissions )} " )
64+
65+ # Deduplicate by email (fallback to name)
66+ latest_by_email = {}
67+ for entry in sorted (submissions , key = lambda x : x ["Issue #" ], reverse = True ):
68+ key = (entry ["Nominee Email" ] or entry ["Nominee Name" ]).lower ()
69+ if key not in latest_by_email :
70+ latest_by_email [key ] = entry
71+
72+ deduped = list (latest_by_email .values ())
73+ duplicates = [entry for entry in submissions if entry not in deduped ]
74+
75+ # Ensure output directory
6276os .makedirs ("ambassador" , exist_ok = True )
6377
64- # Save raw submissions
65- with open ("ambassador/submissions_all_raw .csv" , "w" , newline = '' , encoding = ' utf-8' ) as f :
78+ # Save all submissions
79+ with open ("ambassador/submissions_all .csv" , "w" , newline = '' , encoding = " utf-8" ) as f :
6680 writer = csv .DictWriter (f , fieldnames = submissions [0 ].keys ())
6781 writer .writeheader ()
6882 writer .writerows (submissions )
6983
70- # Save deduplicated
71- with open ("ambassador/submissions_deduplicated .csv" , "w" , newline = '' , encoding = ' utf-8' ) as f :
84+ # Save deduplicated submissions
85+ with open ("ambassador/submissions_deduped .csv" , "w" , newline = '' , encoding = " utf-8" ) as f :
7286 writer = csv .DictWriter (f , fieldnames = deduped [0 ].keys ())
7387 writer .writeheader ()
7488 writer .writerows (deduped )
@@ -77,11 +91,10 @@ def extract(label, body):
7791if duplicates :
7892 wb = Workbook ()
7993 ws = wb .active
80- ws .title = "Duplicates Removed "
81- ws .append (list ( duplicates [0 ].keys () ))
94+ ws .title = "Duplicates"
95+ ws .append (duplicates [0 ].keys ())
8296 for row in duplicates :
83- ws .append ([row .get (k , "" ) for k in duplicates [0 ].keys ()])
84- wb .save ("ambassador/submissions_duplicates_removed.xlsx" )
85- print ("📄 Duplicates saved to ambassador/submissions_duplicates_removed.xlsx" )
97+ ws .append ([row [k ] for k in duplicates [0 ].keys ()])
98+ wb .save ("ambassador/submissions_duplicates.xlsx" )
8699
87- print ("✅ Done: All submission data saved. " )
100+ print ("📁 Files written: submissions_all.csv, submissions_deduped.csv, submissions_duplicates.xlsx " )
0 commit comments