Skip to content

Commit b1e64a3

Browse files
authored
Merge pull request #1395 from CodeForAfrica/context-specific-robots.txt
feat: add dynamic robots.txt
2 parents 9adf804 + d900630 commit b1e64a3

File tree

7 files changed

+240
-7
lines changed

7 files changed

+240
-7
lines changed

apps/trustlab/public/robots.txt

Lines changed: 0 additions & 6 deletions
This file was deleted.

apps/trustlab/src/app/robots.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
import { getRobotsTxt } from "@/trustlab/lib/data";
2+
3+
export default getRobotsTxt;

apps/trustlab/src/lib/data/common/seo.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { site } from "@/trustlab/utils";
2+
import parseRobotsToMetadata from "@/trustlab/utils/parseRobotsTxt";
23

34
const HOMEPAGE_TITLES = ["home", "homepage", "index"];
45

@@ -60,4 +61,8 @@ export function getPageSeoFromMeta(page, settings) {
6061
};
6162
}
6263

64+
export function parseRobotsTxt(content = "") {
65+
return parseRobotsToMetadata(content);
66+
}
67+
6368
export default undefined;
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
export { getPageStaticPaths, getPageStaticProps } from "./local";
1+
export { getPageStaticPaths, getPageStaticProps, getRobotsTxt } from "./local";
22

33
export default undefined;

apps/trustlab/src/lib/data/local/index.js

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { getPageProps, getPagePaths } from "@/trustlab/lib/data/common";
2+
import { parseRobotsTxt } from "@/trustlab/lib/data/common/seo";
23
import api from "@/trustlab/lib/payload";
34

45
export async function getPageStaticPaths() {
@@ -15,3 +16,10 @@ export async function getPageStaticProps(context) {
1516
revalidate: 60,
1617
};
1718
}
19+
20+
export async function getRobotsTxt() {
21+
const siteSettings = await api.findGlobal("site-settings");
22+
return parseRobotsTxt(siteSettings?.robotsTxt);
23+
}
24+
25+
export default undefined;

apps/trustlab/src/payload/globals/tabs/SeoTab.js

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,26 @@ import {
77
/* eslint-disable-next-line import/no-unresolved */
88
} from "@payloadcms/plugin-seo/fields";
99

10+
import parseRobotsToMetadata from "@/trustlab/utils/parseRobotsTxt";
11+
12+
const validateRobotsTxt = (value) => {
13+
if (!value?.trim()) {
14+
return true;
15+
}
16+
const result = parseRobotsToMetadata(value, { collectDiagnostics: true });
17+
if (!result.errors?.length) {
18+
return true;
19+
}
20+
const message = result.errors
21+
.map(({ line, directive, reason }) =>
22+
[`line ${line}`, directive ? `directive "${directive}"` : null, reason]
23+
.filter(Boolean)
24+
.join(" "),
25+
)
26+
.join("; ");
27+
return `Invalid robots.txt: ${message}`;
28+
};
29+
1030
const SeoTab = {
1131
label: "SEO",
1232
fields: [
@@ -46,6 +66,18 @@ const SeoTab = {
4666
}),
4767
],
4868
},
69+
{
70+
name: "robotsTxt",
71+
label: "robots.txt",
72+
type: "code",
73+
defaultValue: "User-agent: *\nDisallow: /",
74+
admin: {
75+
language: "plaintext",
76+
rows: 14,
77+
description: "Enter the exact robots.txt text to serve.",
78+
},
79+
validate: validateRobotsTxt,
80+
},
4981
],
5082
};
5183

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
const KNOWN_DIRECTIVES = new Set([
2+
"user-agent",
3+
"allow",
4+
"disallow",
5+
"crawl-delay",
6+
"sitemap",
7+
"host",
8+
"cache-delay",
9+
]);
10+
11+
const normalize = (value) =>
12+
typeof value === "string" ? value : `${value ?? ""}`;
13+
14+
const unwrap = (list) => {
15+
if (!Array.isArray(list)) {
16+
return list;
17+
}
18+
if (list.length === 0) {
19+
return undefined;
20+
}
21+
return list.length === 1 ? list[0] : list;
22+
};
23+
24+
const pushError = (errors, line, directive, reason) => {
25+
if (!errors) {
26+
return;
27+
}
28+
errors.push({ line, directive, reason });
29+
};
30+
31+
/**
32+
* Parses a robots.txt string into a Next.js-compatible robots metadata object.
33+
* @see https://nextjs.org/docs/app/api-reference/file-conventions/metadata/robots#robots-object
34+
*
35+
* @param {string} raw - robots.txt content
36+
* @param {{ collectDiagnostics?: boolean }} options
37+
* @returns {{ rules: import("next").Metadata["robots"]["rules"], sitemap?: string | string[], host?: string, errors?: Array<{ line: number, directive?: string, reason: string }> }}
38+
*/
39+
export default function parseRobotsToNextJs(raw = "", options = {}) {
40+
const { collectDiagnostics = false } = options;
41+
const diagnostics = collectDiagnostics ? [] : undefined;
42+
const lines = normalize(raw).replace(/\r\n?/g, "\n").split("\n");
43+
44+
const groups = []; // { userAgent: string[], allow: string[], disallow: string[], crawlDelay?: number }
45+
const sitemaps = [];
46+
let host = null;
47+
let cur = null;
48+
let curHasDirectives = false;
49+
50+
const ensureGroup = (lineNumber, directive) => {
51+
if (!cur) {
52+
cur = { userAgent: ["*"], allow: [], disallow: [] };
53+
groups.push(cur);
54+
curHasDirectives = false;
55+
pushError(
56+
diagnostics,
57+
lineNumber,
58+
directive,
59+
"Directive applied before any User-agent; defaulting to *",
60+
);
61+
}
62+
return cur;
63+
};
64+
65+
lines.forEach((rawLine, index) => {
66+
const lineNumber = index + 1;
67+
const line = rawLine.replace(/#.*$/, "").trim();
68+
if (!line) {
69+
return;
70+
}
71+
72+
const sep = line.indexOf(":");
73+
if (sep === -1) {
74+
pushError(diagnostics, lineNumber, line, 'Missing ":" separator');
75+
return;
76+
}
77+
78+
const key = line.slice(0, sep).trim().toLowerCase();
79+
const val = line.slice(sep + 1).trim();
80+
81+
if (!KNOWN_DIRECTIVES.has(key)) {
82+
pushError(diagnostics, lineNumber, key || line, "Unknown directive");
83+
return;
84+
}
85+
86+
if (!val && key !== "disallow") {
87+
pushError(diagnostics, lineNumber, key, "Empty directive value");
88+
return;
89+
}
90+
91+
switch (key) {
92+
case "sitemap":
93+
sitemaps.push(val);
94+
break;
95+
96+
case "host":
97+
if (host && host !== val) {
98+
pushError(
99+
diagnostics,
100+
lineNumber,
101+
key,
102+
`Host already set to "${host}"`,
103+
);
104+
} else {
105+
host = val;
106+
}
107+
break;
108+
109+
case "user-agent":
110+
if (!val) {
111+
pushError(diagnostics, lineNumber, key, "User-agent cannot be empty");
112+
break;
113+
}
114+
if (!cur || curHasDirectives) {
115+
cur = { userAgent: [val], allow: [], disallow: [] };
116+
groups.push(cur);
117+
curHasDirectives = false;
118+
} else {
119+
cur.userAgent.push(val);
120+
}
121+
break;
122+
case "allow":
123+
ensureGroup(lineNumber, key).allow.push(val);
124+
curHasDirectives = true;
125+
break;
126+
127+
case "disallow":
128+
ensureGroup(lineNumber, key).disallow.push(val);
129+
curHasDirectives = true;
130+
break;
131+
case "crawl-delay": {
132+
const n = Number(val);
133+
if (Number.isFinite(n)) {
134+
ensureGroup(lineNumber, key).crawlDelay = n;
135+
} else {
136+
pushError(
137+
diagnostics,
138+
lineNumber,
139+
key,
140+
"Crawl-delay must be numeric",
141+
);
142+
}
143+
curHasDirectives = true;
144+
break;
145+
}
146+
147+
case "cache-delay":
148+
pushError(
149+
diagnostics,
150+
lineNumber,
151+
key,
152+
"Directive not supported in Next.js robots metadata",
153+
);
154+
break;
155+
156+
default:
157+
break;
158+
}
159+
});
160+
161+
const rules = groups.map((g) => {
162+
const rule = { userAgent: unwrap(g.userAgent) };
163+
const allow = unwrap(g.allow);
164+
const disallow = unwrap(g.disallow);
165+
if (allow !== undefined && allow !== "") {
166+
rule.allow = allow;
167+
}
168+
if (disallow !== undefined && disallow !== "") {
169+
rule.disallow = disallow;
170+
}
171+
if (g.crawlDelay !== undefined) {
172+
rule.crawlDelay = g.crawlDelay;
173+
}
174+
return rule;
175+
});
176+
177+
const result = {
178+
rules: rules.length === 1 ? rules[0] : rules,
179+
};
180+
const sitemap = unwrap(sitemaps);
181+
if (sitemap !== undefined) {
182+
result.sitemap = sitemap;
183+
}
184+
if (host) {
185+
result.host = host;
186+
}
187+
if (diagnostics) {
188+
result.errors = diagnostics;
189+
}
190+
return result;
191+
}

0 commit comments

Comments
 (0)