Skip to content

Commit aed857e

Browse files
committed
fix(sitemap): add lastmod tags & remove duplicate API ref URLs
1 parent 3e54d0f commit aed857e

File tree

2 files changed

+204
-1
lines changed

2 files changed

+204
-1
lines changed

astro.config.ts

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,20 @@ import { RehypePlugins } from "@astrojs/markdown-remark"
1111
import yaml from "@rollup/plugin-yaml"
1212
import { ccipRedirects } from "./src/config/redirects/ccip"
1313
import trailingSlashMiddleware from "./src/integrations/trailing-slash-middleware"
14+
import { getLastModifiedDate } from "./src/utils/lastModified"
15+
import redirectsJson from "./src/features/redirects/redirects.json"
16+
17+
// Prepare set of redirect source URLs to exclude from sitemap
18+
// This prevents duplicate entries and ensures only canonical URLs are indexed
19+
const redirectSources = new Set(
20+
redirectsJson.redirects
21+
.map((r) => r.source)
22+
.filter((source) => source)
23+
.map((source) => {
24+
const normalized = source.startsWith("/") ? source : `/${source}`
25+
return normalized.endsWith("/") ? normalized.slice(0, -1) : normalized
26+
})
27+
)
1428

1529
// https://astro.build/config
1630
export default defineConfig({
@@ -30,7 +44,41 @@ export default defineConfig({
3044
react({
3145
include: ["**/react/*"],
3246
}),
33-
sitemap({ changefreq: "daily" }),
47+
sitemap({
48+
changefreq: "daily",
49+
filter: (page) => {
50+
// Exclude redirect source URLs from sitemap to prevent duplicates
51+
const pathname = new URL(page).pathname
52+
const cleanPath = pathname.endsWith("/") && pathname !== "/" ? pathname.slice(0, -1) : pathname
53+
54+
// Exclude short format API reference URLs (e.g., /api-reference/v150, /ccip/api-reference/evm/v150)
55+
// These are aliases for versioned content - we keep only the canonical long format URLs
56+
const shortVersionPattern = /\/api-reference\/(?:.*\/)?v\d{3,4}(?:\/|$)/
57+
if (shortVersionPattern.test(cleanPath)) {
58+
return false
59+
}
60+
61+
return !redirectSources.has(cleanPath)
62+
},
63+
serialize(item) {
64+
// Remove trailing slash from URLs (except for root)
65+
const url = new URL(item.url)
66+
if (url.pathname.endsWith("/") && url.pathname !== "/") {
67+
url.pathname = url.pathname.slice(0, -1)
68+
item.url = url.toString()
69+
}
70+
71+
// Add last modified date using git commit history
72+
// Supports content files, API reference pages, and CCIP directory pages
73+
const path = url.pathname
74+
const lastModified = getLastModifiedDate(path)
75+
if (lastModified) {
76+
item.lastmod = lastModified.toISOString()
77+
}
78+
79+
return item
80+
},
81+
}),
3482
mdx(),
3583
],
3684
markdown: {

src/utils/lastModified.ts

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
import { execSync } from "child_process"
2+
import fs from "fs"
3+
4+
/**
5+
* Get the last modified date for a given page path
6+
* Uses two strategies in order of preference:
7+
* 1. Git commit date of the source file
8+
* 2. Current date as fallback (for new/untracked/dynamic files)
9+
*/
10+
export function getLastModifiedDate(urlPath: string): Date | null {
11+
try {
12+
const filePath = convertUrlToFilePath(urlPath)
13+
14+
if (!filePath) {
15+
return null
16+
}
17+
18+
// Strategy 1: Git commit date
19+
const gitDate = getGitLastModified(filePath)
20+
if (gitDate) {
21+
return gitDate
22+
}
23+
24+
// Strategy 2: Fallback to current date for new/untracked files
25+
return new Date()
26+
} catch (error) {
27+
console.warn(`Failed to get last modified date for ${urlPath}:`, error)
28+
return null
29+
}
30+
}
31+
32+
function convertUrlToFilePath(urlPath: string): string | null {
33+
// Handle index pages
34+
if (urlPath === "/" || urlPath === "") {
35+
return "src/pages/index.astro"
36+
}
37+
38+
// Remove leading slash
39+
const cleanPath = urlPath.replace(/^\//, "")
40+
41+
// Handle API reference pages - map URL paths to content file versions
42+
const apiReferenceMatch = cleanPath.match(/^([^/]+)\/api-reference\/(.+)$/)
43+
if (apiReferenceMatch) {
44+
const [, product, apiPath] = apiReferenceMatch
45+
46+
// Convert version shorthand to full version directory names
47+
const versionMap: Record<string, string> = {
48+
v150: "v1.5.0",
49+
v151: "v1.5.1",
50+
v160: "v1.6.0",
51+
v023: "v0.2.3",
52+
// Add more version mappings as needed
53+
}
54+
55+
// Split the path and check if it's a version or has subpaths
56+
const pathParts = apiPath.split("/")
57+
58+
if (pathParts.length === 1) {
59+
// Simple version page like "evm/v150" -> "evm/v1.5.0/index.mdx"
60+
const versionKey = pathParts[0]
61+
const fullVersion = versionMap[versionKey] || versionKey
62+
const contentPath = `src/content/${product}/api-reference/${fullVersion}/index.mdx`
63+
if (fs.existsSync(contentPath)) {
64+
return contentPath
65+
}
66+
} else if (pathParts.length >= 2) {
67+
// Versioned subpath like "evm/v150/client" -> "evm/v1.5.0/client.mdx"
68+
const versionKey = pathParts[0]
69+
const fullVersion = versionMap[versionKey] || versionKey
70+
const subPath = pathParts.slice(1).join("/")
71+
72+
// Try with the subpath
73+
const contentPath = `src/content/${product}/api-reference/${fullVersion}/${subPath}.mdx`
74+
if (fs.existsSync(contentPath)) {
75+
return contentPath
76+
}
77+
78+
// Try with index in subdirectory
79+
const indexPath = `src/content/${product}/api-reference/${fullVersion}/${subPath}/index.mdx`
80+
if (fs.existsSync(indexPath)) {
81+
return indexPath
82+
}
83+
}
84+
}
85+
86+
// Handle CCIP directory pages - map URLs to configuration files
87+
const ccipDirectoryChainMatch = cleanPath.match(/^ccip\/directory\/(mainnet|testnet)\/chain\/(.+)$/)
88+
if (ccipDirectoryChainMatch) {
89+
const [, environment] = ccipDirectoryChainMatch
90+
91+
// Map CCIP directory chain URLs to their source configuration files
92+
// These pages are generated from the chains.json configuration files
93+
const configPath = `src/config/data/ccip/v1_2_0/${environment}/chains.json`
94+
if (fs.existsSync(configPath)) {
95+
return configPath
96+
}
97+
}
98+
99+
// Handle CCIP directory token pages - map URLs to token configuration files
100+
const ccipDirectoryTokenMatch = cleanPath.match(/^ccip\/directory\/(mainnet|testnet)\/token\/(.+)$/)
101+
if (ccipDirectoryTokenMatch) {
102+
const [, environment] = ccipDirectoryTokenMatch
103+
104+
// Map CCIP directory token URLs to their source configuration files
105+
// These pages are generated from the tokens.json configuration files
106+
const configPath = `src/config/data/ccip/v1_2_0/${environment}/tokens.json`
107+
if (fs.existsSync(configPath)) {
108+
return configPath
109+
}
110+
}
111+
112+
// Try different file extensions and locations
113+
const possiblePaths = [
114+
`src/content/${cleanPath}.md`,
115+
`src/content/${cleanPath}.mdx`,
116+
`src/content/${cleanPath}/index.md`,
117+
`src/content/${cleanPath}/index.mdx`,
118+
`src/pages/${cleanPath}.astro`,
119+
`src/pages/${cleanPath}.md`,
120+
`src/pages/${cleanPath}.mdx`,
121+
`src/pages/${cleanPath}/index.astro`,
122+
`src/pages/${cleanPath}/index.md`,
123+
`src/pages/${cleanPath}/index.mdx`,
124+
]
125+
126+
for (const possiblePath of possiblePaths) {
127+
if (fs.existsSync(possiblePath)) {
128+
return possiblePath
129+
}
130+
}
131+
132+
return null
133+
}
134+
135+
function getGitLastModified(filePath: string): Date | null {
136+
try {
137+
// Get the last commit date for this file
138+
const gitCommand = `git log -1 --format="%ci" --follow "${filePath}"`
139+
const result = execSync(gitCommand, {
140+
encoding: "utf8",
141+
stdio: "pipe",
142+
cwd: process.cwd(),
143+
timeout: 5000, // 5 second timeout to prevent hanging
144+
})
145+
146+
if (result.trim()) {
147+
return new Date(result.trim())
148+
}
149+
} catch (error) {
150+
// Git command failed - file might not be tracked or git not available
151+
console.debug(`Git command failed for ${filePath}:`, error)
152+
}
153+
154+
return null
155+
}

0 commit comments

Comments
 (0)