From 02ad7d64fc240914cef18183406a5964690a4a55 Mon Sep 17 00:00:00 2001 From: myelinated-wackerow <263208946+myelinated-wackerow@users.noreply.github.com> Date: Wed, 11 Mar 2026 18:32:58 +0000 Subject: [PATCH 1/4] feat: add co-author support, filter bots Parse Co-authored-by trailers from commit messages to include co-authors in page contributor lists. Add exclusion lists for AI agents and bots (Claude, Copilot, CodeRabbit, Gemini, etc.) to keep contributor lists human-only. Co-Authored-By: Claude Opus 4.6 Co-Authored-By: wackerow <54227730+wackerow@users.noreply.github.com> --- src/lib/types.ts | 1 + src/lib/utils/gh.ts | 129 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 127 insertions(+), 3 deletions(-) diff --git a/src/lib/types.ts b/src/lib/types.ts index bc35853c6f4..1b968547af0 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -402,6 +402,7 @@ export type Commit = { email: string date: string } + message: string } author: { avatar_url: string diff --git a/src/lib/utils/gh.ts b/src/lib/utils/gh.ts index 50a90d5d5b5..83daef19214 100644 --- a/src/lib/utils/gh.ts +++ b/src/lib/utils/gh.ts @@ -147,6 +147,122 @@ async function fetchWithRateLimit(filepath: string): Promise { return json } +/** Email addresses (or substrings) that identify AI agent co-authors */ +const EXCLUDED_EMAILS = [ + "noreply@anthropic.com", + "copilot@github.com", + "49699333+dependabot[bot]@users.noreply.github.com", + "actions@github.com", + "github-actions[bot]@users.noreply.github.com", + "noreply@github.com", +] + +/** GitHub logins (exact match) that should be excluded */ +const EXCLUDED_LOGINS = [ + "dependabot[bot]", + "github-actions[bot]", + "allcontributors[bot]", + "netlify[bot]", + "crowdin-bot", + "eth-bot", + "ethereumoptimism-bot", + "coderabbitai[bot]", +] + +/** Name patterns (case-insensitive substring match) for AI agent co-authors */ +const EXCLUDED_NAME_PATTERNS = [ + "claude", + "copilot", + "gpt", + "chatgpt", + "openai", + "cursor", + "codeium", + "tabnine", + "amazon q", + "cody", + "gemini", + "coderabbit", +] + +/** + * Extract GitHub login from a noreply email address. + * Handles both formats: + * - "username@users.noreply.github.com" + * - "12345678+username@users.noreply.github.com" + * Returns null for non-noreply emails. + */ +const extractLoginFromNoreplyEmail = (email: string): string | null => { + const match = email.match(/^(?:\d+\+)?([^@]+)@users\.noreply\.github\.com$/) + return match ? match[1] : null +} + +/** + * Parse co-author trailers from a commit message. + * Matches lines like: "Co-authored-by: Name " + * Returns FileContributor entries for any co-authors with resolvable + * GitHub logins (via noreply email addresses). + */ +const parseCoAuthors = ( + message: string, + commitDate: string +): FileContributor[] => { + const coAuthorPattern = /^co-authored-by:\s*(.+?)\s*<([^>]+)>/gim + const coAuthors: FileContributor[] = [] + let match + + while ((match = coAuthorPattern.exec(message)) !== null) { + const name = match[1].trim() + const email = match[2].trim() + + // Skip excluded emails + if ( + EXCLUDED_EMAILS.some((excluded) => + email.toLowerCase().includes(excluded.toLowerCase()) + ) + ) { + continue + } + + // Skip excluded name patterns (catches AI agents) + if ( + EXCLUDED_NAME_PATTERNS.some((pattern) => + name.toLowerCase().includes(pattern.toLowerCase()) + ) + ) { + continue + } + + // Resolve GitHub login from noreply email + const login = extractLoginFromNoreplyEmail(email) + if (!login) continue + + // Skip excluded logins + if ( + EXCLUDED_LOGINS.some( + (excluded) => excluded.toLowerCase() === login.toLowerCase() + ) + ) { + continue + } + + coAuthors.push({ + login, + avatar_url: `https://avatars.githubusercontent.com/${login}`, + html_url: `https://github.com/${login}`, + date: commitDate, + }) + } + + return coAuthors +} + +/** Check if a primary commit author should be excluded */ +const isExcludedContributor = (login: string): boolean => + EXCLUDED_LOGINS.some( + (excluded) => excluded.toLowerCase() === login.toLowerCase() + ) + // Fetch commit history and save it to a JSON file export const fetchAndCacheGitHubContributors = async ( filepath: string, @@ -163,13 +279,20 @@ export const fetchAndCacheGitHubContributors = async ( filepath.replace(CONTENT_DIR, OLD_CONTENT_DIR) )) || [] - // Transform commitHistory + // Transform commitHistory: include both primary authors and co-authors const contributors = [...history, ...legacyHistory] .filter(({ author }) => !!author) - .map((contribution) => { + .flatMap((contribution) => { const { login, avatar_url, html_url } = contribution.author const { date } = contribution.commit.author - return { login, avatar_url, html_url, date } + + const primary: FileContributor[] = isExcludedContributor(login) + ? [] + : [{ login, avatar_url, html_url, date }] + + const coAuthors = parseCoAuthors(contribution.commit.message, date) + + return [...primary, ...coAuthors] }) // Remove duplicates from same login From 616baa2758c78ad0747550baa1ef5c61ef5b07c2 Mon Sep 17 00:00:00 2001 From: myelinated-wackerow <263208946+myelinated-wackerow@users.noreply.github.com> Date: Thu, 12 Mar 2026 21:34:22 +0000 Subject: [PATCH 2/4] feat: port coauthor logic to data-layer fetcher Move co-author parsing, bot/AI filtering, and exclusion lists from the removed gh.ts fetching code into fetchGitHubContributors.ts where the contributor fetching now lives post-#17174. Co-Authored-By: Claude Opus 4.6 Co-Authored-By: wackerow <54227730+wackerow@users.noreply.github.com> --- .../fetchers/fetchGitHubContributors.ts | 145 +++++++++++++++++- 1 file changed, 137 insertions(+), 8 deletions(-) diff --git a/src/data-layer/fetchers/fetchGitHubContributors.ts b/src/data-layer/fetchers/fetchGitHubContributors.ts index 8506212d6cc..ef4b1d6846b 100644 --- a/src/data-layer/fetchers/fetchGitHubContributors.ts +++ b/src/data-layer/fetchers/fetchGitHubContributors.ts @@ -11,6 +11,122 @@ const BATCH_DELAY_MS = 50 // Small delay between batches to avoid rate limiting const APP_PAGES_PREFIX = "app/[locale]/" +/** Email addresses (or substrings) that identify AI agent co-authors */ +const EXCLUDED_EMAILS = [ + "noreply@anthropic.com", + "copilot@github.com", + "49699333+dependabot[bot]@users.noreply.github.com", + "actions@github.com", + "github-actions[bot]@users.noreply.github.com", + "noreply@github.com", +] + +/** GitHub logins (exact match) that should be excluded */ +const EXCLUDED_LOGINS = [ + "dependabot[bot]", + "github-actions[bot]", + "allcontributors[bot]", + "netlify[bot]", + "crowdin-bot", + "eth-bot", + "ethereumoptimism-bot", + "coderabbitai[bot]", +] + +/** Name patterns (case-insensitive substring match) for AI agent co-authors */ +const EXCLUDED_NAME_PATTERNS = [ + "claude", + "copilot", + "gpt", + "chatgpt", + "openai", + "cursor", + "codeium", + "tabnine", + "amazon q", + "cody", + "gemini", + "coderabbit", +] + +/** + * Extract GitHub login from a noreply email address. + * Handles both formats: + * - "username@users.noreply.github.com" + * - "12345678+username@users.noreply.github.com" + * Returns null for non-noreply emails. + */ +const extractLoginFromNoreplyEmail = (email: string): string | null => { + const match = email.match(/^(?:\d+\+)?([^@]+)@users\.noreply\.github\.com$/) + return match ? match[1] : null +} + +/** + * Parse co-author trailers from a commit message. + * Matches lines like: "Co-authored-by: Name " + * Returns FileContributor entries for any co-authors with resolvable + * GitHub logins (via noreply email addresses). + */ +const parseCoAuthors = ( + message: string, + commitDate: string +): FileContributor[] => { + const coAuthorPattern = /^co-authored-by:\s*(.+?)\s*<([^>]+)>/gim + const coAuthors: FileContributor[] = [] + let match + + while ((match = coAuthorPattern.exec(message)) !== null) { + const name = match[1].trim() + const email = match[2].trim() + + // Skip excluded emails + if ( + EXCLUDED_EMAILS.some((excluded) => + email.toLowerCase().includes(excluded.toLowerCase()) + ) + ) { + continue + } + + // Skip excluded name patterns (catches AI agents) + if ( + EXCLUDED_NAME_PATTERNS.some((pattern) => + name.toLowerCase().includes(pattern.toLowerCase()) + ) + ) { + continue + } + + // Resolve GitHub login from noreply email + const login = extractLoginFromNoreplyEmail(email) + if (!login) continue + + // Skip excluded logins + if ( + EXCLUDED_LOGINS.some( + (excluded) => excluded.toLowerCase() === login.toLowerCase() + ) + ) { + continue + } + + coAuthors.push({ + login, + avatar_url: `https://avatars.githubusercontent.com/${login}`, + html_url: `https://github.com/${login}`, + date: commitDate, + }) + } + + return coAuthors +} + +/** Check if a primary commit author should be excluded */ +const isExcludedContributor = (login: string): boolean => + EXCLUDED_LOGINS.some( + (excluded) => excluded.toLowerCase() === login.toLowerCase() + ) + /** * Generate all historical paths for an app page. * Used to aggregate git history across directory structure migrations. @@ -119,16 +235,29 @@ async function fetchCommitsForPath( // When a commit author email isn't linked to a GitHub account, the API // returns `author: null`. We still include these commits so their date // is captured, using the git commit author name as a fallback identity. - const contributors = commits.map( + // Also parses co-author trailers and filters out bots/AI agents. + const contributors = commits.flatMap( (commit: { author?: { login: string; avatar_url: string; html_url: string } | null - commit: { author: { name: string; date: string } } - }) => ({ - login: commit.author?.login ?? commit.commit.author.name, - avatar_url: commit.author?.avatar_url ?? "", - html_url: commit.author?.html_url ?? "", - date: commit.commit.author.date, - }) + commit: { author: { name: string; date: string }; message: string } + }) => { + const login = commit.author?.login ?? commit.commit.author.name + const date = commit.commit.author.date + + const primary: FileContributor[] = + isExcludedContributor(login) + ? [] + : [{ + login, + avatar_url: commit.author?.avatar_url ?? "", + html_url: commit.author?.html_url ?? "", + date, + }] + + const coAuthors = parseCoAuthors(commit.commit.message, date) + + return [...primary, ...coAuthors] + } ) // Remove duplicates by login (keep first = most recent) From ea987b01ebf3bbd4c812874fddf03eba076dab7e Mon Sep 17 00:00:00 2001 From: myelinated-wackerow <263208946+myelinated-wackerow@users.noreply.github.com> Date: Thu, 12 Mar 2026 22:17:44 +0000 Subject: [PATCH 3/4] feat: resolve co-authors via name lookup Add fallback chain for co-author resolution: 1. Extract login from GitHub noreply email 2. Match name against .all-contributorsrc 3. Fall back to trailer name (no avatar/link) Update FileContributors component to handle contributors without GitHub profiles by showing initials and name without link. Co-Authored-By: Claude Opus 4.6 Co-Authored-By: wackerow <54227730+wackerow@users.noreply.github.com> --- src/components/FileContributors.tsx | 84 +++++++++----- .../fetchers/fetchGitHubContributors.ts | 104 ++++++++++++++---- 2 files changed, 138 insertions(+), 50 deletions(-) diff --git a/src/components/FileContributors.tsx b/src/components/FileContributors.tsx index e201602e01b..c2870cb3ec5 100644 --- a/src/components/FileContributors.tsx +++ b/src/components/FileContributors.tsx @@ -13,7 +13,7 @@ import { ScrollArea } from "@/components/ui/scroll-area" import { cn } from "@/lib/utils/cn" import { trackCustomEvent } from "@/lib/utils/matomo" -import { Avatar } from "./ui/avatar" +import { Avatar, AvatarBase, AvatarFallback } from "./ui/avatar" import Modal from "./ui/dialog-modal" import { LinkBox, LinkOverlay } from "./ui/link-box" @@ -29,20 +29,45 @@ const ContributorAvatar = ({ contributor, label, className, -}: ContributorProps & { label?: string; className?: string }) => ( - -) +}: ContributorProps & { label?: string; className?: string }) => { + const hasProfile = !!contributor.html_url + + // Contributors without a profile (no GitHub account) + if (!hasProfile) { + const initials = contributor.login + .split(" ") + .map((n) => n[0]) + .join("") + .slice(0, 2) + .toUpperCase() + + return ( + + + {initials} + + {label && ( + {label} + )} + + ) + } + + return ( + + ) +} const ContributorAvatarGroup = ({ contributors, @@ -75,19 +100,22 @@ const ContributorAvatarGroup = ({ } type ContributorProps = { contributor: FileContributor } -const Contributor = ({ contributor }: ContributorProps) => ( - - - {contributor.html_url.includes("crowdin.com") && ( -

- -

- )} -
-) +const Contributor = ({ contributor }: ContributorProps) => { + const hasProfile = !!contributor.html_url + return ( + + + {contributor.html_url.includes("crowdin.com") && ( +

+ +

+ )} +
+ ) +} type FlexProps = BaseHTMLAttributes & { asChild?: boolean } export type FileContributorsProps = FlexProps & { diff --git a/src/data-layer/fetchers/fetchGitHubContributors.ts b/src/data-layer/fetchers/fetchGitHubContributors.ts index ef4b1d6846b..5a293386bc8 100644 --- a/src/data-layer/fetchers/fetchGitHubContributors.ts +++ b/src/data-layer/fetchers/fetchGitHubContributors.ts @@ -11,6 +11,39 @@ const BATCH_DELAY_MS = 50 // Small delay between batches to avoid rate limiting const APP_PAGES_PREFIX = "app/[locale]/" +interface AllContributorsEntry { + login: string + name: string + avatar_url: string +} + +type NameLookup = Map + +/** + * Fetch .all-contributorsrc from GitHub and build a case-insensitive + * name -> entry lookup map for resolving co-authors by display name. + */ +async function fetchNameLookup(): Promise { + const url = + "https://raw.githubusercontent.com/ethereum/ethereum-org-website/master/.all-contributorsrc" + const response = await fetch(url) + + if (!response.ok) { + console.warn("Failed to fetch .all-contributorsrc:", response.status) + return new Map() + } + + const data = await response.json() + const entries: AllContributorsEntry[] = data.contributors || [] + const lookup: NameLookup = new Map() + + for (const entry of entries) { + lookup.set(entry.name.toLowerCase(), entry) + } + + return lookup +} + /** Email addresses (or substrings) that identify AI agent co-authors */ const EXCLUDED_EMAILS = [ "noreply@anthropic.com", @@ -64,12 +97,17 @@ const extractLoginFromNoreplyEmail = (email: string): string | null => { /** * Parse co-author trailers from a commit message. * Matches lines like: "Co-authored-by: Name " - * Returns FileContributor entries for any co-authors with resolvable - * GitHub logins (via noreply email addresses). + * + * Resolution chain for each co-author: + * 1. Filter out bots/AI agents by email and name patterns + * 2. Extract login from GitHub noreply email if applicable + * 3. Match display name against .all-contributorsrc entries + * 4. Fall back to trailer name (no avatar/profile link) */ const parseCoAuthors = ( message: string, - commitDate: string + commitDate: string, + nameLookup: NameLookup ): FileContributor[] => { const coAuthorPattern = /^co-authored-by:\s*(.+?)\s*<([^>]+)>/gim const coAuthors: FileContributor[] = [] @@ -97,23 +135,39 @@ const parseCoAuthors = ( continue } - // Resolve GitHub login from noreply email - const login = extractLoginFromNoreplyEmail(email) - if (!login) continue + // 1. Try noreply email -> GitHub login + const noreplyLogin = extractLoginFromNoreplyEmail(email) + if (noreplyLogin) { + if (!isExcludedContributor(noreplyLogin)) { + coAuthors.push({ + login: noreplyLogin, + avatar_url: `https://avatars.githubusercontent.com/${noreplyLogin}`, + html_url: `https://github.com/${noreplyLogin}`, + date: commitDate, + }) + } + continue + } - // Skip excluded logins - if ( - EXCLUDED_LOGINS.some( - (excluded) => excluded.toLowerCase() === login.toLowerCase() - ) - ) { + // 2. Try name match in .all-contributorsrc + const entry = nameLookup.get(name.toLowerCase()) + if (entry) { + if (!isExcludedContributor(entry.login)) { + coAuthors.push({ + login: entry.login, + avatar_url: entry.avatar_url, + html_url: `https://github.com/${entry.login}`, + date: commitDate, + }) + } continue } + // 3. Last resort: use trailer name, no avatar or profile link coAuthors.push({ - login, - avatar_url: `https://avatars.githubusercontent.com/${login}`, - html_url: `https://github.com/${login}`, + login: name, + avatar_url: "", + html_url: "", date: commitDate, }) } @@ -189,7 +243,8 @@ async function parallelBatch( */ async function fetchCommitsForPath( filepath: string, - token: string + token: string, + nameLookup: NameLookup ): Promise { const url = new URL(`${GITHUB_API_BASE}/commits`) url.searchParams.set("path", filepath) @@ -212,7 +267,7 @@ async function fetchCommitsForPath( const waitTime = +resetTime - Math.floor(Date.now() / 1000) console.log(`Rate limit exceeded, waiting ${waitTime}s...`) await delay(waitTime * 1000) - return fetchCommitsForPath(filepath, token) // Retry + return fetchCommitsForPath(filepath, token, nameLookup) // Retry } } @@ -254,7 +309,7 @@ async function fetchCommitsForPath( date, }] - const coAuthors = parseCoAuthors(commit.commit.message, date) + const coAuthors = parseCoAuthors(commit.commit.message, date, nameLookup) return [...primary, ...coAuthors] } @@ -274,10 +329,11 @@ async function fetchCommitsForPath( */ async function fetchContributorsForPaths( paths: string[], - token: string + token: string, + nameLookup: NameLookup ): Promise { const results = await parallelBatch(paths, (path) => - fetchCommitsForPath(path, token) + fetchCommitsForPath(path, token, nameLookup) ) const allContributors = results.flat() @@ -386,6 +442,10 @@ export async function fetchGitHubContributors(): Promise console.log("Starting GitHub contributors fetch...") const startTime = Date.now() + // Build name lookup from .all-contributorsrc for co-author resolution + const nameLookup = await fetchNameLookup() + console.log(`Loaded ${nameLookup.size} entries from .all-contributorsrc`) + const result: GitHubContributorsData = { content: {}, appPages: {}, @@ -417,7 +477,7 @@ export async function fetchGitHubContributors(): Promise const contentResults = await parallelBatch( contentPathPairs, async ({ slug, paths }) => { - const contributors = await fetchContributorsForPaths(paths, token) + const contributors = await fetchContributorsForPaths(paths, token, nameLookup) return { slug, contributors } } ) @@ -449,7 +509,7 @@ export async function fetchGitHubContributors(): Promise const appPageResults = await parallelBatch( appPagePathPairs, async ({ pagePath, paths }) => { - const contributors = await fetchContributorsForPaths(paths, token) + const contributors = await fetchContributorsForPaths(paths, token, nameLookup) return { pagePath, contributors } } ) From 23ecab1875fa5bea9c21607c2c43df9f51246abc Mon Sep 17 00:00:00 2001 From: myelinated-wackerow <263208946+myelinated-wackerow@users.noreply.github.com> Date: Thu, 12 Mar 2026 23:43:53 +0000 Subject: [PATCH 4/4] fix: use username-based avatar URLs Use avatars.githubusercontent.com/{login} instead of the API's /u/{id}?v=4 format which causes redirect loops with Next.js image optimization. Co-Authored-By: Claude Opus 4.6 Co-Authored-By: wackerow <54227730+wackerow@users.noreply.github.com> --- src/data-layer/fetchers/fetchGitHubContributors.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/data-layer/fetchers/fetchGitHubContributors.ts b/src/data-layer/fetchers/fetchGitHubContributors.ts index 5a293386bc8..4cf1a6506e7 100644 --- a/src/data-layer/fetchers/fetchGitHubContributors.ts +++ b/src/data-layer/fetchers/fetchGitHubContributors.ts @@ -299,12 +299,16 @@ async function fetchCommitsForPath( const login = commit.author?.login ?? commit.commit.author.name const date = commit.commit.author.date + // Use username-based avatar URL instead of the API's /u/{id}?v=4 + // format which causes redirect loops with Next.js image optimization const primary: FileContributor[] = isExcludedContributor(login) ? [] : [{ login, - avatar_url: commit.author?.avatar_url ?? "", + avatar_url: commit.author + ? `https://avatars.githubusercontent.com/${commit.author.login}` + : "", html_url: commit.author?.html_url ?? "", date, }]