From 8461f5c631b21799fc87a1922fa12b87cdbed2b0 Mon Sep 17 00:00:00 2001 From: Chris Green <75560394+chr156r33n@users.noreply.github.com> Date: Sat, 21 Jun 2025 18:57:07 +0100 Subject: [PATCH 01/13] Create llms-txt-validation.js MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fetches /llms.txt and returns a simple validity flag (1 or 0). It checks that the response: - Succeeds (HTTP 2xx) - Has a Content-Type of text/plain - Contains balanced Markdown code fences (…) - Has matching counts of [ vs. ] and ( vs. ) If any check fails, it returns 0; otherwise it returns 1. --- llms-txt-validation.js | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 llms-txt-validation.js diff --git a/llms-txt-validation.js b/llms-txt-validation.js new file mode 100644 index 0000000..7fe25f8 --- /dev/null +++ b/llms-txt-validation.js @@ -0,0 +1,14 @@ +[llms-txt-valid] +return fetch('/llms.txt') + .then(response => { + if (!response.ok) return 0; + const ct = response.headers.get('Content-Type')||''; + if (!ct.toLowerCase().includes('text/plain')) return 0; + return response.text().then(text => { + const m = s=> (text.match(new RegExp(`\\${s}`,'g'))||[]).length; + if ((text.match(/```/g)||[]).length %2) return 0; + if (m('[')!==m(']')||m('(')!==m(')')) return 0; + return 1; + }); + }) + .catch(()=>0); From 41b063a1cd13cffcae75056918fddb295c02fa90 Mon Sep 17 00:00:00 2001 From: Barry Pollard Date: Sat, 21 Jun 2025 19:24:45 +0100 Subject: [PATCH 02/13] Update llms-txt-validation.js --- llms-txt-validation.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llms-txt-validation.js b/llms-txt-validation.js index 7fe25f8..7d3b1f4 100644 --- a/llms-txt-validation.js +++ b/llms-txt-validation.js @@ -1,4 +1,4 @@ -[llms-txt-valid] +//[llms-txt-valid] return fetch('/llms.txt') .then(response => { if (!response.ok) return 0; From cae08e6d1479fb709991cb51a806e4ab1ecf6d0b Mon Sep 17 00:00:00 2001 From: Barry Pollard Date: Sat, 21 Jun 2025 19:45:11 +0100 Subject: [PATCH 03/13] Update llms-txt-validation.js --- llms-txt-validation.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llms-txt-validation.js b/llms-txt-validation.js index 7d3b1f4..794cf0a 100644 --- a/llms-txt-validation.js +++ b/llms-txt-validation.js @@ -11,4 +11,6 @@ return fetch('/llms.txt') return 1; }); }) - .catch(()=>0); + .catch(error => { + return JSON.stringify({message: error.message, error: error}); + }); From c9dd28857956a96b74622bf6c4d6661ccd54945e Mon Sep 17 00:00:00 2001 From: Barry Pollard Date: Sat, 21 Jun 2025 19:57:24 +0100 Subject: [PATCH 04/13] Return JSON --- llms-txt-validation.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llms-txt-validation.js b/llms-txt-validation.js index 794cf0a..47c3328 100644 --- a/llms-txt-validation.js +++ b/llms-txt-validation.js @@ -1,14 +1,14 @@ //[llms-txt-valid] return fetch('/llms.txt') .then(response => { - if (!response.ok) return 0; + if (!response.ok) return {"exists":0,"valid":0}; const ct = response.headers.get('Content-Type')||''; - if (!ct.toLowerCase().includes('text/plain')) return 0; + if (!ct.toLowerCase().includes('text/plain')) return {"exists":1,"valid":0}; return response.text().then(text => { const m = s=> (text.match(new RegExp(`\\${s}`,'g'))||[]).length; - if ((text.match(/```/g)||[]).length %2) return 0; - if (m('[')!==m(']')||m('(')!==m(')')) return 0; - return 1; + if ((text.match(/```/g)||[]).length %2) return {"exists":1,"valid":0}; + if (m('[')!==m(']')||m('(')!==m(')')) return {"exists":1,"valid":0}; + return {"exists":1,"valid":1}; }); }) .catch(error => { From c9f93aed85cec5c1b7379548a83621924879b07d Mon Sep 17 00:00:00 2001 From: Barry Pollard Date: Sat, 21 Jun 2025 20:15:36 +0100 Subject: [PATCH 05/13] JSON stringify --- llms-txt-validation.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llms-txt-validation.js b/llms-txt-validation.js index 47c3328..93d11e0 100644 --- a/llms-txt-validation.js +++ b/llms-txt-validation.js @@ -1,16 +1,16 @@ //[llms-txt-valid] return fetch('/llms.txt') .then(response => { - if (!response.ok) return {"exists":0,"valid":0}; + if (!response.ok) return JSON.stringify({valid:0, error: "Non OK status code"}); const ct = response.headers.get('Content-Type')||''; - if (!ct.toLowerCase().includes('text/plain')) return {"exists":1,"valid":0}; + if (!ct.toLowerCase().includes('text/plain')) return JSON.stringify({valid:0,error: "Invalid content type"}); return response.text().then(text => { const m = s=> (text.match(new RegExp(`\\${s}`,'g'))||[]).length; - if ((text.match(/```/g)||[]).length %2) return {"exists":1,"valid":0}; - if (m('[')!==m(']')||m('(')!==m(')')) return {"exists":1,"valid":0}; - return {"exists":1,"valid":1}; + if ((text.match(/```/g)||[]).length %2) return JSON.stringify({valid:0,error:""}); + if (m('[')!==m(']')||m('(')!==m(')')) return JSON.stringify({valid:0,error:""}); + return JSON.stringify({valid":1}); }); }) .catch(error => { - return JSON.stringify({message: error.message, error: error}); + return JSON.stringify({valid:0,message: error.message, error: error}); }); From 4aaada52be0e89615d67b09f12fa1ea7070e301a Mon Sep 17 00:00:00 2001 From: Barry Pollard Date: Sat, 21 Jun 2025 20:19:44 +0100 Subject: [PATCH 06/13] Update llms-txt-validation.js --- llms-txt-validation.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llms-txt-validation.js b/llms-txt-validation.js index 93d11e0..6e8b458 100644 --- a/llms-txt-validation.js +++ b/llms-txt-validation.js @@ -8,7 +8,7 @@ return fetch('/llms.txt') const m = s=> (text.match(new RegExp(`\\${s}`,'g'))||[]).length; if ((text.match(/```/g)||[]).length %2) return JSON.stringify({valid:0,error:""}); if (m('[')!==m(']')||m('(')!==m(')')) return JSON.stringify({valid:0,error:""}); - return JSON.stringify({valid":1}); + return JSON.stringify({valid:1}); }); }) .catch(error => { From cfd4af36824b102f8aab256571578a1411a20bde Mon Sep 17 00:00:00 2001 From: Barry Pollard Date: Sat, 21 Jun 2025 20:52:18 +0100 Subject: [PATCH 07/13] Rename llms-txt-validation.js to dist/llms-txt-validation.js --- llms-txt-validation.js => dist/llms-txt-validation.js | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename llms-txt-validation.js => dist/llms-txt-validation.js (100%) diff --git a/llms-txt-validation.js b/dist/llms-txt-validation.js similarity index 100% rename from llms-txt-validation.js rename to dist/llms-txt-validation.js From dc9ba7af312f1778deccb1641719342a7a92c302 Mon Sep 17 00:00:00 2001 From: Barry Pollard Date: Sat, 21 Jun 2025 21:03:51 +0100 Subject: [PATCH 08/13] Update llms-txt-validation.js --- dist/llms-txt-validation.js | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/dist/llms-txt-validation.js b/dist/llms-txt-validation.js index 6e8b458..5e41ba3 100644 --- a/dist/llms-txt-validation.js +++ b/dist/llms-txt-validation.js @@ -1,16 +1,23 @@ //[llms-txt-valid] -return fetch('/llms.txt') + +const fetchWithTimeout = (url) => { + var controller = new AbortController(); + setTimeout(() => {controller.abort()}, 5000); + return fetch(url, {signal: controller.signal}); +} + +return fetchWithTimeout('/llms.txt') .then(response => { - if (!response.ok) return JSON.stringify({valid:0, error: "Non OK status code"}); + if (!response.ok) return JSON.stringify({valid:0, message:response.status, error: "Non OK status code"}); const ct = response.headers.get('Content-Type')||''; if (!ct.toLowerCase().includes('text/plain')) return JSON.stringify({valid:0,error: "Invalid content type"}); return response.text().then(text => { const m = s=> (text.match(new RegExp(`\\${s}`,'g'))||[]).length; - if ((text.match(/```/g)||[]).length %2) return JSON.stringify({valid:0,error:""}); - if (m('[')!==m(']')||m('(')!==m(')')) return JSON.stringify({valid:0,error:""}); + if ((text.match(/```/g)||[]).length %2) return JSON.stringify({valid:0, error:"Invalid markdown fences"}); + if (m('[')!==m(']')||m('(')!==m(')')) return JSON.stringify({valid:0, error:"Unmatch braces"}); return JSON.stringify({valid:1}); }); }) .catch(error => { - return JSON.stringify({valid:0,message: error.message, error: error}); + return JSON.stringify({valid:0, message: error.message, error: error}); }); From 1bb9fb8fd9885ffadd09b9b27bb3ea4a12c7b5a7 Mon Sep 17 00:00:00 2001 From: Barry Pollard Date: Sat, 21 Jun 2025 21:17:19 +0100 Subject: [PATCH 09/13] Update llms-txt-validation.js --- dist/llms-txt-validation.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dist/llms-txt-validation.js b/dist/llms-txt-validation.js index 5e41ba3..77c3599 100644 --- a/dist/llms-txt-validation.js +++ b/dist/llms-txt-validation.js @@ -8,13 +8,13 @@ const fetchWithTimeout = (url) => { return fetchWithTimeout('/llms.txt') .then(response => { - if (!response.ok) return JSON.stringify({valid:0, message:response.status, error: "Non OK status code"}); + if (!response.ok) return JSON.stringify({valid:0, message: response.status, error: "Non OK status code"}); const ct = response.headers.get('Content-Type')||''; - if (!ct.toLowerCase().includes('text/plain')) return JSON.stringify({valid:0,error: "Invalid content type"}); + if (!ct.toLowerCase().includes('text/plain')) return JSON.stringify({valid:0, message: ct, error: "Invalid content type"}); return response.text().then(text => { const m = s=> (text.match(new RegExp(`\\${s}`,'g'))||[]).length; if ((text.match(/```/g)||[]).length %2) return JSON.stringify({valid:0, error:"Invalid markdown fences"}); - if (m('[')!==m(']')||m('(')!==m(')')) return JSON.stringify({valid:0, error:"Unmatch braces"}); + if (m('[')!==m(']')||m('(')!==m(')')) return JSON.stringify({valid:0, error:"Unmatched braces"}); return JSON.stringify({valid:1}); }); }) From 4b96095e21663aaa75440f16a2b4b012c6256f05 Mon Sep 17 00:00:00 2001 From: Barry Pollard Date: Sat, 21 Jun 2025 21:34:18 +0100 Subject: [PATCH 10/13] Rename llms-txt-validation.js to llms_txt_validation.js --- dist/{llms-txt-validation.js => llms_txt_validation.js} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename dist/{llms-txt-validation.js => llms_txt_validation.js} (100%) diff --git a/dist/llms-txt-validation.js b/dist/llms_txt_validation.js similarity index 100% rename from dist/llms-txt-validation.js rename to dist/llms_txt_validation.js From f1c6d6fd96f99bb58e441db509f29c1738614842 Mon Sep 17 00:00:00 2001 From: Barry Pollard Date: Sat, 21 Jun 2025 21:42:57 +0100 Subject: [PATCH 11/13] Update llms_txt_validation.js --- dist/llms_txt_validation.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dist/llms_txt_validation.js b/dist/llms_txt_validation.js index 77c3599..b3af89f 100644 --- a/dist/llms_txt_validation.js +++ b/dist/llms_txt_validation.js @@ -12,9 +12,9 @@ return fetchWithTimeout('/llms.txt') const ct = response.headers.get('Content-Type')||''; if (!ct.toLowerCase().includes('text/plain')) return JSON.stringify({valid:0, message: ct, error: "Invalid content type"}); return response.text().then(text => { - const m = s=> (text.match(new RegExp(`\\${s}`,'g'))||[]).length; - if ((text.match(/```/g)||[]).length %2) return JSON.stringify({valid:0, error:"Invalid markdown fences"}); - if (m('[')!==m(']')||m('(')!==m(')')) return JSON.stringify({valid:0, error:"Unmatched braces"}); + const m = s => (text.match(new RegExp(`\\${s}`,'g'))||[]).length; + if (m('[')!==m(']')||m('(')!==m(')')) return JSON.stringify({valid:0, error:"Invalid markdown: Unmatched braces"}); + if ((text.match(/```/g)||[]).length %2) return JSON.stringify({valid:0, error:"Invalid markdown: Uneven code fences"}); return JSON.stringify({valid:1}); }); }) From 50de227dad94efdef057b55371f0761ec267ce9a Mon Sep 17 00:00:00 2001 From: Barry Pollard Date: Sat, 21 Jun 2025 21:50:44 +0100 Subject: [PATCH 12/13] Change to Boolean --- dist/llms_txt_validation.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dist/llms_txt_validation.js b/dist/llms_txt_validation.js index b3af89f..a7a2e60 100644 --- a/dist/llms_txt_validation.js +++ b/dist/llms_txt_validation.js @@ -8,16 +8,16 @@ const fetchWithTimeout = (url) => { return fetchWithTimeout('/llms.txt') .then(response => { - if (!response.ok) return JSON.stringify({valid:0, message: response.status, error: "Non OK status code"}); + if (!response.ok) return JSON.stringify({valid: false, message: response.status, error: "Non OK status code"}); const ct = response.headers.get('Content-Type')||''; - if (!ct.toLowerCase().includes('text/plain')) return JSON.stringify({valid:0, message: ct, error: "Invalid content type"}); + if (!ct.toLowerCase().includes('text/plain')) return JSON.stringify({valid: false, message: ct, error: "Invalid content type"}); return response.text().then(text => { const m = s => (text.match(new RegExp(`\\${s}`,'g'))||[]).length; - if (m('[')!==m(']')||m('(')!==m(')')) return JSON.stringify({valid:0, error:"Invalid markdown: Unmatched braces"}); - if ((text.match(/```/g)||[]).length %2) return JSON.stringify({valid:0, error:"Invalid markdown: Uneven code fences"}); - return JSON.stringify({valid:1}); + if (m('[')!==m(']')||m('(')!==m(')')) return JSON.stringify({valid: false, error: "Invalid markdown: Unmatched braces"}); + if ((text.match(/```/g)||[]).length %2) return JSON.stringify({valid: false, error: "Invalid markdown: Uneven code fences"}); + return JSON.stringify({valid: true}); }); }) .catch(error => { - return JSON.stringify({valid:0, message: error.message, error: error}); + return JSON.stringify({valid: false, message: error.message, error: error}); }); From ca3538fef809e48cca4697e96e100dac31ff5b5a Mon Sep 17 00:00:00 2001 From: Chris Green <75560394+chr156r33n@users.noreply.github.com> Date: Tue, 24 Jun 2025 12:55:25 +0100 Subject: [PATCH 13/13] Validation Changes Making LLMs.txt scoring less strict, so present - yet malformed llms.txt files - will always be marked as "valid", rather than invalid to make it easier to count total adoption whilst understanding which files have errors or not. --- dist/llms_txt_validation.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dist/llms_txt_validation.js b/dist/llms_txt_validation.js index a7a2e60..48e9ae6 100644 --- a/dist/llms_txt_validation.js +++ b/dist/llms_txt_validation.js @@ -13,8 +13,8 @@ return fetchWithTimeout('/llms.txt') if (!ct.toLowerCase().includes('text/plain')) return JSON.stringify({valid: false, message: ct, error: "Invalid content type"}); return response.text().then(text => { const m = s => (text.match(new RegExp(`\\${s}`,'g'))||[]).length; - if (m('[')!==m(']')||m('(')!==m(')')) return JSON.stringify({valid: false, error: "Invalid markdown: Unmatched braces"}); - if ((text.match(/```/g)||[]).length %2) return JSON.stringify({valid: false, error: "Invalid markdown: Uneven code fences"}); + if (m('[')!==m(']')||m('(')!==m(')')) return JSON.stringify({valid: true, error: "Invalid markdown: Unmatched braces"}); + if ((text.match(/```/g)||[]).length %2) return JSON.stringify({valid: true, error: "Invalid markdown: Uneven code fences"}); return JSON.stringify({valid: true}); }); })