|
2 | 2 | import csv |
3 | 3 | import re |
4 | 4 | from github import Github |
| 5 | +from openpyxl import Workbook |
5 | 6 |
|
6 | | -# Load secrets |
| 7 | +# Setup GitHub token and repo from environment |
7 | 8 | GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") |
8 | 9 | GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY") |
9 | | - |
10 | | -# Authenticate with GitHub |
11 | 10 | g = Github(GITHUB_TOKEN) |
12 | 11 | repo = g.get_repo(GITHUB_REPOSITORY) |
13 | 12 |
|
14 | | -print("🔍 Fetching ambassador issues (open only)...") |
15 | | -issues = repo.get_issues(state="open", labels=["ambassador"]) |
16 | | - |
17 | | -submissions = [] |
| 13 | +print("📥 Fetching GitHub issues...") |
| 14 | +issues = list(repo.get_issues(state='open', labels=['ambassador'])) |
| 15 | +print(f"🔍 Total issues fetched: {len(issues)}") |
18 | 16 |
|
19 | | -def extract_value(label, body): |
20 | | - match = re.search(rf"{label}\s*\n\s*(.+?)(?:\n|$)", body) |
| 17 | +# Markdown extractor |
| 18 | +def extract(label, body): |
| 19 | + match = re.search(rf"{re.escape(label)}\s*\n\s*(.+?)(\n|$)", body) |
21 | 20 | return match.group(1).strip() if match else "" |
22 | 21 |
|
23 | | -def extract_checkboxes(body): |
24 | | - boxes = re.findall(r"- \[x\] (.+)", body, re.IGNORECASE) |
25 | | - return "\n".join(f"- {b.strip()}" for b in boxes) |
26 | | - |
| 22 | +# Extracted field definitions |
| 23 | +submissions = [] |
27 | 24 | for issue in issues: |
28 | 25 | body = issue.body or "" |
29 | | - |
30 | | - nominee_name = extract_value("Nominee Name", body) |
31 | | - nominee_email = extract_value("Nominee Email", body) |
32 | | - github_handle = extract_value("Nominee's GitHub or GitLab Handle", body) |
33 | | - organization = extract_value("Organization / Affiliation", body) |
34 | | - location = extract_value("City, State/Province, Country", body) |
35 | | - nominator_name = extract_value("Your Name", body) |
36 | | - nominator_email = extract_value("Your Email", body) |
37 | | - ambassador_pitch = extract_value("🏆 How Would the Nominee Contribute as an Ambassador?", body) |
38 | | - additional_info = extract_value("Any additional details you'd like to share?", body) |
39 | | - contributions = extract_checkboxes(body) |
40 | | - |
41 | | - # Compose the Submission Summary |
42 | | - summary_parts = [] |
43 | | - if github_handle: |
44 | | - summary_parts.append(f"GitHub Handle: {github_handle}") |
45 | | - if contributions: |
46 | | - summary_parts.append(f"Contributions:\n{contributions}") |
47 | | - if ambassador_pitch: |
48 | | - summary_parts.append(f"Ambassador Pitch:\n{ambassador_pitch}") |
49 | | - if additional_info: |
50 | | - summary_parts.append(f"Additional Info:\n{additional_info}") |
51 | | - |
52 | | - submission_summary = "\n\n".join(summary_parts) |
53 | | - |
54 | | - submissions.append({ |
| 26 | + entry = { |
55 | 27 | "Issue #": issue.number, |
56 | | - "Nominee Name": nominee_name, |
57 | | - "Nominee Email": nominee_email, |
58 | | - "Organization": organization, |
59 | | - "Location": location, |
60 | | - "Nominator Name": nominator_name, |
61 | | - "Nominator Email": nominator_email, |
62 | | - "Submission Summary": submission_summary |
63 | | - }) |
64 | | - |
65 | | -print(f"📄 Total submissions found: {len(submissions)}") |
| 28 | + "Nominee Name": extract("Nominee Name", body), |
| 29 | + "Nominee Email": extract("Nominee Email", body), |
| 30 | + "GitHub Handle": extract("Nominee's GitHub or GitLab Handle", body), |
| 31 | + "Organization": extract("(Optional) Organization / Affiliation", body), |
| 32 | + "Location": extract("City, State/Province, Country", body), |
| 33 | + "Nominator Name": extract("Your Name", body), |
| 34 | + "Nominator Email": extract("Your Email (Optional)", body), |
| 35 | + "Contribution Checkboxes": "; ".join(re.findall(r"- \[x\] (.+)", body, re.IGNORECASE)), |
| 36 | + "Ambassador Pitch": extract("🏆 How Would the Nominee Contribute as an Ambassador?", body), |
| 37 | + "Additional Info": extract("Any additional details you'd like to share?", body) |
| 38 | + } |
| 39 | + |
| 40 | + # Construct clean submission summary |
| 41 | + summary = f"""Contributions:\n{entry['Contribution Checkboxes']} |
| 42 | +
|
| 43 | +Ambassador Nomination Statement:\n{entry['Ambassador Pitch']} |
| 44 | +
|
| 45 | +GitHub Handle:\n{entry['GitHub Handle']} |
| 46 | +
|
| 47 | +Additional Info:\n{entry['Additional Info']}""" |
| 48 | + entry["Submission Summary"] = summary |
| 49 | + submissions.append(entry) |
| 50 | + |
| 51 | +# Deduplicate by nominee email (fallback to name) |
66 | 52 | print("🧹 Deduplicating...") |
67 | | - |
68 | | -# Deduplicate by email, fallback to name |
69 | | -deduped = {} |
70 | | -for entry in sorted(submissions, key=lambda x: x["Issue #"], reverse=True): |
71 | | - key = entry["Nominee Email"].lower() if entry["Nominee Email"] else entry["Nominee Name"].lower() |
72 | | - if key not in deduped: |
73 | | - deduped[key] = entry |
74 | | - |
75 | | -deduped_list = list(deduped.values()) |
76 | | -duplicates = [s for s in submissions if s not in deduped_list] |
77 | | - |
78 | | -# Save results |
| 53 | +latest = {} |
| 54 | +for s in sorted(submissions, key=lambda x: x["Issue #"], reverse=True): |
| 55 | + key = (s["Nominee Email"] or s["Nominee Name"]).lower() |
| 56 | + if key not in latest: |
| 57 | + latest[key] = s |
| 58 | +deduped = list(latest.values()) |
| 59 | +duplicates = [s for s in submissions if s not in deduped] |
| 60 | + |
| 61 | +# Output folder |
79 | 62 | os.makedirs("ambassador", exist_ok=True) |
80 | 63 |
|
81 | | -with open("ambassador/submissions_all.csv", "w", newline='', encoding="utf-8") as f: |
| 64 | +# Save raw submissions |
| 65 | +with open("ambassador/submissions_all_raw.csv", "w", newline='', encoding='utf-8') as f: |
82 | 66 | writer = csv.DictWriter(f, fieldnames=submissions[0].keys()) |
83 | 67 | writer.writeheader() |
84 | 68 | writer.writerows(submissions) |
85 | 69 |
|
86 | | -with open("ambassador/submissions_deduped.csv", "w", newline='', encoding="utf-8") as f: |
87 | | - writer = csv.DictWriter(f, fieldnames=deduped_list[0].keys()) |
| 70 | +# Save deduplicated |
| 71 | +with open("ambassador/submissions_deduplicated.csv", "w", newline='', encoding='utf-8') as f: |
| 72 | + writer = csv.DictWriter(f, fieldnames=deduped[0].keys()) |
88 | 73 | writer.writeheader() |
89 | | - writer.writerows(deduped_list) |
90 | | - |
91 | | -if duplicates: |
92 | | - with open("ambassador/submissions_duplicates.csv", "w", newline='', encoding="utf-8") as f: |
93 | | - writer = csv.DictWriter(f, fieldnames=duplicates[0].keys()) |
94 | | - writer.writeheader() |
95 | | - writer.writerows(duplicates) |
| 74 | + writer.writerows(deduped) |
96 | 75 |
|
97 | | -print("✅ Extraction and deduplication complete.") |
98 | | -print("📁 Files created in ambassador/:") |
99 | | -print(" - submissions_all.csv") |
100 | | -print(" - submissions_deduped.csv") |
| 76 | +# Save duplicates to Excel |
101 | 77 | if duplicates: |
102 | | - print(" - submissions_duplicates.csv") |
| 78 | + wb = Workbook() |
| 79 | + ws = wb.active |
| 80 | + ws.title = "Duplicates Removed" |
| 81 | + ws.append(list(duplicates[0].keys())) |
| 82 | + for row in duplicates: |
| 83 | + ws.append([row.get(k, "") for k in duplicates[0].keys()]) |
| 84 | + wb.save("ambassador/submissions_duplicates_removed.xlsx") |
| 85 | + print("📄 Duplicates saved to ambassador/submissions_duplicates_removed.xlsx") |
| 86 | + |
| 87 | +print("✅ Done: All submission data saved.") |
0 commit comments