| 
4 | 4 | from github import Github  | 
5 | 5 | from openpyxl import Workbook  | 
6 | 6 | 
 
  | 
7 |  | -# Load GitHub access credentials  | 
 | 7 | +# Get GitHub token and repository name  | 
8 | 8 | GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")  | 
9 | 9 | GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY")  | 
10 | 10 | 
 
  | 
11 |  | -# Authenticate with GitHub  | 
 | 11 | +# Authenticate  | 
12 | 12 | g = Github(GITHUB_TOKEN)  | 
13 | 13 | repo = g.get_repo(GITHUB_REPOSITORY)  | 
14 | 14 | 
 
  | 
15 | 15 | print("📥 Fetching open GitHub issues with 'ambassador' label...")  | 
16 |  | -issues = repo.get_issues(state='open', labels=['ambassador'])  | 
 | 16 | +issues = list(repo.get_issues(state='open', labels=['ambassador']))  | 
 | 17 | +print(f"✅ Total submissions found: {len(issues)}")  | 
17 | 18 | 
 
  | 
18 |  | -submissions = []  | 
19 |  | - | 
20 |  | -# Helper to extract plain-text responses  | 
 | 19 | +# Helper to extract text fields  | 
21 | 20 | def extract(label, body):  | 
22 |  | -    match = re.search(rf"{label}\s*\n\s*(.+)", body)  | 
 | 21 | +    match = re.search(rf"{label}\s*\n+(.+?)(\n\S|\Z)", body, re.DOTALL)  | 
23 | 22 |     return match.group(1).strip() if match else ""  | 
24 | 23 | 
 
  | 
25 |  | -# Helper to extract checkbox options  | 
 | 24 | +# Helper to extract all checkbox lines  | 
26 | 25 | def extract_checkboxes(body):  | 
27 |  | -    checkbox_section = re.findall(r"How has the nominee contributed to PyTorch\?\s*\n((?:- \[.\] .+\n?)+)", body)  | 
28 |  | -    if not checkbox_section:  | 
29 |  | -        return []  | 
30 |  | -    return checkbox_section[0].strip().splitlines()  | 
 | 26 | +    matches = re.findall(r"- \[x\] (.+)", body, flags=re.IGNORECASE)  | 
 | 27 | +    return "; ".join(matches) if matches else ""  | 
31 | 28 | 
 
  | 
32 |  | -# Process each issue  | 
 | 29 | +# Build submissions list  | 
 | 30 | +submissions = []  | 
33 | 31 | for issue in issues:  | 
34 | 32 |     body = issue.body or ""  | 
35 | 33 | 
 
  | 
36 |  | -    name = extract("Nominee Name", body)  | 
37 |  | -    email = extract("Nominee Email", body)  | 
38 |  | -    github_handle = extract("Nominee's GitHub or GitLab Handle", body)  | 
39 |  | -    ambassador_plan = extract("🏆 How Would the Nominee Contribute as an Ambassador?", body)  | 
40 |  | -    additional_info = extract("Any additional details you'd like to share?", body)  | 
41 |  | -    contributions = extract_checkboxes(body)  | 
42 |  | - | 
43 |  | -    # Format submission summary  | 
44 |  | -    submission_summary = f"""**GitHub Handle:** {github_handle or 'Not Provided'}  | 
45 |  | -
  | 
46 |  | -**How Has the Nominee Contributed to PyTorch?**  | 
47 |  | -{chr(10).join(contributions) if contributions else 'Not Provided'}  | 
48 |  | -
  | 
49 |  | -**Ambassador Contribution Plan**  | 
50 |  | -{ambassador_plan or 'Not Provided'}  | 
51 |  | -
  | 
52 |  | -**Additional Information**  | 
53 |  | -{additional_info or 'Not Provided'}  | 
54 |  | -"""  | 
55 |  | - | 
56 |  | -    submissions.append({  | 
 | 34 | +    entry = {  | 
57 | 35 |         "Issue #": issue.number,  | 
58 |  | -        "Nominee Name": name,  | 
59 |  | -        "Nominee Email": email,  | 
60 |  | -        "Submission Summary": submission_summary.strip()  | 
61 |  | -    })  | 
62 |  | - | 
63 |  | -print(f"✅ Total submissions found: {len(submissions)}")  | 
64 |  | - | 
65 |  | -# Deduplicate by email (fallback to name)  | 
66 |  | -latest_by_email = {}  | 
 | 36 | +        "Nominee Name": extract("Nominee Name", body),  | 
 | 37 | +        "Nominee Email": extract("Nominee Email", body),  | 
 | 38 | +        "GitHub Handle": extract("Nominee's GitHub or GitLab Handle", body),  | 
 | 39 | +        "Submission Summary": (  | 
 | 40 | +            f"🏆 Ambassador Contribution Plan:\n{extract('🏆 How Would the Nominee Contribute as an Ambassador?', body)}\n\n"  | 
 | 41 | +            f"🔗 Additional Information:\n{extract('Any additional details you\\'d like to share?', body)}\n\n"  | 
 | 42 | +            f"✅ Contribution Highlights:\n{extract_checkboxes(body)}"  | 
 | 43 | +        )  | 
 | 44 | +    }  | 
 | 45 | +    submissions.append(entry)  | 
 | 46 | + | 
 | 47 | +# Deduplication logic: prefer latest submission by email or name  | 
 | 48 | +latest_submissions = {}  | 
67 | 49 | for entry in sorted(submissions, key=lambda x: x["Issue #"], reverse=True):  | 
68 |  | -    key = (entry["Nominee Email"] or entry["Nominee Name"]).lower()  | 
69 |  | -    if key not in latest_by_email:  | 
70 |  | -        latest_by_email[key] = entry  | 
 | 50 | +    key = entry["Nominee Email"].lower() if entry["Nominee Email"] else entry["Nominee Name"].lower()  | 
 | 51 | +    if key not in latest_submissions:  | 
 | 52 | +        latest_submissions[key] = entry  | 
71 | 53 | 
 
  | 
72 |  | -deduped = list(latest_by_email.values())  | 
73 |  | -duplicates = [entry for entry in submissions if entry not in deduped]  | 
 | 54 | +deduped = list(latest_submissions.values())  | 
 | 55 | +duplicates = [s for s in submissions if s not in deduped]  | 
74 | 56 | 
 
  | 
75 |  | -# Ensure output directory  | 
 | 57 | +# Ensure output folder  | 
76 | 58 | os.makedirs("ambassador", exist_ok=True)  | 
77 | 59 | 
 
  | 
78 |  | -# Save all submissions  | 
79 |  | -with open("ambassador/submissions_all.csv", "w", newline='', encoding="utf-8") as f:  | 
 | 60 | +# Write full submission CSV  | 
 | 61 | +with open("ambassador/submissions_all_raw.csv", "w", newline='', encoding="utf-8") as f:  | 
80 | 62 |     writer = csv.DictWriter(f, fieldnames=submissions[0].keys())  | 
81 | 63 |     writer.writeheader()  | 
82 | 64 |     writer.writerows(submissions)  | 
83 | 65 | 
 
  | 
84 |  | -# Save deduplicated submissions  | 
85 |  | -with open("ambassador/submissions_deduped.csv", "w", newline='', encoding="utf-8") as f:  | 
 | 66 | +# Write deduplicated CSV  | 
 | 67 | +with open("ambassador/submissions_deduplicated.csv", "w", newline='', encoding="utf-8") as f:  | 
86 | 68 |     writer = csv.DictWriter(f, fieldnames=deduped[0].keys())  | 
87 | 69 |     writer.writeheader()  | 
88 | 70 |     writer.writerows(deduped)  | 
89 | 71 | 
 
  | 
90 |  | -# Save duplicates to Excel  | 
 | 72 | +# Write duplicates to Excel  | 
91 | 73 | if duplicates:  | 
92 | 74 |     wb = Workbook()  | 
93 | 75 |     ws = wb.active  | 
94 |  | -    ws.title = "Duplicates"  | 
95 |  | -    ws.append(duplicates[0].keys())  | 
 | 76 | +    ws.title = "Duplicates Removed"  | 
 | 77 | +    ws.append(list(duplicates[0].keys()))  # ✅ Fixed here  | 
96 | 78 |     for row in duplicates:  | 
97 |  | -        ws.append([row[k] for k in duplicates[0].keys()])  | 
98 |  | -    wb.save("ambassador/submissions_duplicates.xlsx")  | 
 | 79 | +        ws.append([row.get(k, "") for k in duplicates[0].keys()])  | 
 | 80 | +    wb.save("ambassador/submissions_duplicates_removed.xlsx")  | 
 | 81 | +    print("🗂️ Duplicates written to ambassador/submissions_duplicates_removed.xlsx")  | 
99 | 82 | 
 
  | 
100 |  | -print("📁 Files written: submissions_all.csv, submissions_deduped.csv, submissions_duplicates.xlsx")  | 
 | 83 | +print("✅ Extraction and deduplication complete.")  | 
0 commit comments