Skip to content

Commit 829fb95

Browse files
authored
Merge pull request #16 from raifdmueller/feature/llms-txt-issue-109
feat: Add llms.txt + all-anchors.adoc generation, fix HTML entities
2 parents 284498f + 8669b53 commit 829fb95

File tree

10 files changed

+2772
-10
lines changed

10 files changed

+2772
-10
lines changed

.github/workflows/test.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ jobs:
5656
working-directory: ./website
5757
run: npx playwright install --with-deps chromium
5858

59+
- name: Generate llms.txt and all-anchors.adoc
60+
run: node scripts/generate-llms-txt.js
61+
5962
- name: Copy documentation files to public directory
6063
run: |
6164
mkdir -p website/public/docs

docs/all-anchors.adoc

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
= Semantic Anchors — Complete Reference
2+
:toc:
3+
:toc-placement: preamble
4+
:toclevels: 2
5+
6+
include::about.adoc[leveloffset=+1]
7+
8+
<<<
9+
10+
== Communication & Presentation
11+
12+
include::anchors/bluf.adoc[leveloffset=+2]
13+
14+
include::anchors/chatham-house-rule.adoc[leveloffset=+2]
15+
16+
include::anchors/mece.adoc[leveloffset=+2]
17+
18+
include::anchors/pyramid-principle.adoc[leveloffset=+2]
19+
20+
<<<
21+
22+
== Design Principles & Patterns
23+
24+
include::anchors/dry-principle.adoc[leveloffset=+2]
25+
26+
include::anchors/fowler-patterns.adoc[leveloffset=+2]
27+
28+
include::anchors/solid-principles.adoc[leveloffset=+2]
29+
30+
include::anchors/spot-principle.adoc[leveloffset=+2]
31+
32+
include::anchors/ssot-principle.adoc[leveloffset=+2]
33+
34+
<<<
35+
36+
== Development Workflow
37+
38+
include::anchors/bem-methodology.adoc[leveloffset=+2]
39+
40+
include::anchors/conventional-commits.adoc[leveloffset=+2]
41+
42+
include::anchors/mental-model-according-to-naur.adoc[leveloffset=+2]
43+
44+
include::anchors/semantic-versioning.adoc[leveloffset=+2]
45+
46+
include::anchors/sota.adoc[leveloffset=+2]
47+
48+
include::anchors/timtowtdi.adoc[leveloffset=+2]
49+
50+
include::anchors/todotxt-flavoured-markdown.adoc[leveloffset=+2]
51+
52+
<<<
53+
54+
== Dialogue Interaction
55+
56+
include::anchors/socratic-method.adoc[leveloffset=+2]
57+
58+
<<<
59+
60+
== Documentation
61+
62+
include::anchors/diataxis-framework.adoc[leveloffset=+2]
63+
64+
include::anchors/docs-as-code.adoc[leveloffset=+2]
65+
66+
<<<
67+
68+
== Meta
69+
70+
include::anchors/what-qualifies-as-a-semantic-anchor.adoc[leveloffset=+2]
71+
72+
<<<
73+
74+
== Problem Solving
75+
76+
include::anchors/chain-of-thought.adoc[leveloffset=+2]
77+
78+
include::anchors/devils-advocate.adoc[leveloffset=+2]
79+
80+
include::anchors/feynman-technique.adoc[leveloffset=+2]
81+
82+
include::anchors/five-whys.adoc[leveloffset=+2]
83+
84+
include::anchors/morphological-box.adoc[leveloffset=+2]
85+
86+
include::anchors/rubber-duck-debugging.adoc[leveloffset=+2]
87+
88+
<<<
89+
90+
== Requirements Engineering
91+
92+
include::anchors/ears-requirements.adoc[leveloffset=+2]
93+
94+
include::anchors/problem-space-nvc.adoc[leveloffset=+2]
95+
96+
include::anchors/user-story-mapping.adoc[leveloffset=+2]
97+
98+
<<<
99+
100+
== Software Architecture
101+
102+
include::anchors/adr-according-to-nygard.adoc[leveloffset=+2]
103+
104+
include::anchors/arc42.adoc[leveloffset=+2]
105+
106+
include::anchors/c4-diagrams.adoc[leveloffset=+2]
107+
108+
include::anchors/clean-architecture.adoc[leveloffset=+2]
109+
110+
include::anchors/domain-driven-design.adoc[leveloffset=+2]
111+
112+
include::anchors/hexagonal-architecture.adoc[leveloffset=+2]
113+
114+
include::anchors/madr.adoc[leveloffset=+2]
115+
116+
<<<
117+
118+
== Statistical Methods & Process Monitoring
119+
120+
include::anchors/control-chart-shewhart.adoc[leveloffset=+2]
121+
122+
include::anchors/nelson-rules.adoc[leveloffset=+2]
123+
124+
include::anchors/spc.adoc[leveloffset=+2]
125+
126+
<<<
127+
128+
== Strategic Planning
129+
130+
include::anchors/cynefin-framework.adoc[leveloffset=+2]
131+
132+
include::anchors/impact-mapping.adoc[leveloffset=+2]
133+
134+
include::anchors/jobs-to-be-done.adoc[leveloffset=+2]
135+
136+
include::anchors/pugh-matrix.adoc[leveloffset=+2]
137+
138+
include::anchors/wardley-mapping.adoc[leveloffset=+2]
139+
140+
<<<
141+
142+
== Testing & Quality Practices
143+
144+
include::anchors/iec-61508-sil-levels.adoc[leveloffset=+2]
145+
146+
include::anchors/mutation-testing.adoc[leveloffset=+2]
147+
148+
include::anchors/property-based-testing.adoc[leveloffset=+2]
149+
150+
include::anchors/tdd-chicago-school.adoc[leveloffset=+2]
151+
152+
include::anchors/tdd-london-school.adoc[leveloffset=+2]
153+
154+
include::anchors/testing-pyramid.adoc[leveloffset=+2]
155+
156+
<<<

scripts/extract-metadata.js

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,18 @@ if (!fs.existsSync(OUTPUT_DIR)) {
2121
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
2222
}
2323

24+
/**
25+
* Decode HTML entities to plain Unicode characters
26+
*/
27+
function decodeHtmlEntities(str) {
28+
if (!str) return str;
29+
const named = { amp: '&', lt: '<', gt: '>', quot: '"', apos: "'" };
30+
return str.replace(/&#(\d+);|&([a-z]+);/gi, (match, num, name) => {
31+
if (num) return String.fromCharCode(parseInt(num, 10));
32+
return named[name] || match;
33+
});
34+
}
35+
2436
/**
2537
* Parse a single anchor file
2638
*/
@@ -31,7 +43,7 @@ function parseAnchorFile(filePath) {
3143
// Extract attributes
3244
const attributes = doc.getAttributes();
3345
const id = path.basename(filePath, '.adoc');
34-
const title = doc.getDocumentTitle();
46+
const title = decodeHtmlEntities(doc.getDocumentTitle());
3547

3648
// Parse comma-separated attributes
3749
const parseList = (attr) => {

scripts/generate-llms-txt.js

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
#!/usr/bin/env node
2+
/**
3+
* Generate docs/all-anchors.adoc and website/public/llms.txt
4+
*
5+
* all-anchors.adoc: AsciiDoc include-based full reference document
6+
* llms.txt: Clean Markdown for LLM consumption
7+
*
8+
* Usage: node scripts/generate-llms-txt.js
9+
*/
10+
11+
const fs = require('fs')
12+
const path = require('path')
13+
14+
const ROOT = path.join(__dirname, '..')
15+
16+
const categories = JSON.parse(
17+
fs.readFileSync(path.join(ROOT, 'website/public/data/categories.json'), 'utf-8')
18+
)
19+
20+
// ─── AsciiDoc → Markdown converter ──────────────────────────────────────────
21+
22+
function adocToMarkdown(adoc) {
23+
let md = adoc
24+
25+
// Remove document attributes (:key: value)
26+
md = md.replace(/^:[a-z][a-z0-9-]*:.*$/gm, '')
27+
28+
// Headings: = → #, == → ##, etc.
29+
md = md.replace(/^(=+) (.+)$/gm, (_, eq, title) => '#'.repeat(eq.length) + ' ' + title)
30+
31+
// [source,lang] + ---- → ```lang / ```
32+
md = md.replace(/\[source(?:,([^\]]*))?\]\s*\n----/g, (_, lang) => '```' + (lang ? lang.trim() : ''))
33+
md = md.replace(/^----\s*$/gm, '```')
34+
35+
// [quote] block: [quote]\n____\ntext\n____ → > text
36+
md = md.replace(/\[quote[^\]]*\]\s*\n_{4}\s*\n([\s\S]*?)\n_{4}/g, (_, body) =>
37+
body.trim().split('\n').map((l) => '> ' + l).join('\n')
38+
)
39+
40+
// Sidebar blocks **** → remove delimiters
41+
md = md.replace(/^\*{4}\s*$/gm, '')
42+
43+
// Collapsible: [%collapsible] + ==== delimiters → remove markers, keep content
44+
md = md.replace(/^\[%collapsible\]\s*$/gm, '')
45+
md = md.replace(/^====\s*$/gm, '')
46+
47+
// Tables |=== → remove delimiters
48+
md = md.replace(/^\|===\s*$/gm, '')
49+
50+
// Table rows: |cell content → keep, clean up leading pipe
51+
md = md.replace(/^\|(.+)$/gm, (_, row) => {
52+
const cells = row.split('|').map((c) => c.trim()).filter(Boolean)
53+
return '| ' + cells.join(' | ') + ' |'
54+
})
55+
56+
// Remove block attribute lines
57+
md = md.replace(/^\[(?:horizontal|sidebar|cols[^\]]*|options[^\]]*|%\w+[^\]]*)\]\s*$/gm, '')
58+
59+
// Definition lists: term:: description → **term**: description
60+
md = md.replace(/^([^:\n|#`>]+)::\s*(.*)$/gm, (_, term, desc) =>
61+
desc.trim() ? `**${term.trim()}**: ${desc.trim()}` : `**${term.trim()}**`
62+
)
63+
64+
// Links: link:url[text] → [text](url)
65+
md = md.replace(/link:([^\[]+)\[([^\]]*)\]/g, '[$2]($1)')
66+
67+
// Cross-references: <<id,text>> → text, <<id>> → `id`
68+
md = md.replace(/<<([^,>]+),([^>]+)>>/g, '$2')
69+
md = md.replace(/<<([^>]+)>>/g, '`$1`')
70+
71+
// Bold: **text** stays, *text* → **text**
72+
md = md.replace(/(?<![*\w])\*([^*\n]+)\*(?![*\w])/g, '**$1**')
73+
74+
// Ordered list items: ". item" → "1. item"
75+
md = md.replace(/^\. /gm, '1. ')
76+
77+
// Trailing whitespace and normalize blank lines
78+
md = md.replace(/[ \t]+$/gm, '')
79+
md = md.replace(/\n{3,}/g, '\n\n')
80+
81+
return md.trim()
82+
}
83+
84+
// ─── Generate docs/all-anchors.adoc ─────────────────────────────────────────
85+
86+
function generateAllAnchorsAdoc() {
87+
const lines = [
88+
'= Semantic Anchors — Complete Reference',
89+
':toc:',
90+
':toc-placement: preamble',
91+
':toclevels: 2',
92+
'',
93+
'include::about.adoc[leveloffset=+1]',
94+
'',
95+
'<<<',
96+
'',
97+
]
98+
99+
for (const category of categories) {
100+
lines.push(`== ${category.name}`)
101+
lines.push('')
102+
for (const anchorId of category.anchors) {
103+
const filepath = path.join(ROOT, 'docs/anchors', `${anchorId}.adoc`)
104+
if (fs.existsSync(filepath)) {
105+
lines.push(`include::anchors/${anchorId}.adoc[leveloffset=+2]`)
106+
lines.push('')
107+
}
108+
}
109+
lines.push('<<<')
110+
lines.push('')
111+
}
112+
113+
const output = lines.join('\n')
114+
fs.writeFileSync(path.join(ROOT, 'docs/all-anchors.adoc'), output, 'utf-8')
115+
console.warn(`Generated: docs/all-anchors.adoc (${categories.length} categories)`)
116+
}
117+
118+
// ─── Generate website/public/llms.txt ───────────────────────────────────────
119+
120+
function generateLlmsTxt() {
121+
const totalAnchors = categories.reduce((n, c) => n + c.anchors.length, 0)
122+
const lines = [
123+
'# Semantic Anchors — Complete Reference',
124+
'',
125+
`> ${totalAnchors} well-defined terms, methodologies, and frameworks`,
126+
'> that serve as precision reference points when communicating with LLMs.',
127+
'> Source: https://github.com/LLM-Coding/Semantic-Anchors',
128+
'> Website: https://llm-coding.github.io/Semantic-Anchors/',
129+
'',
130+
'---',
131+
'',
132+
]
133+
134+
// Introductory content from about.adoc
135+
const aboutAdoc = fs.readFileSync(path.join(ROOT, 'docs/about.adoc'), 'utf-8')
136+
lines.push(adocToMarkdown(aboutAdoc))
137+
lines.push('')
138+
lines.push('---')
139+
lines.push('')
140+
141+
// Anchors by category
142+
for (const category of categories) {
143+
lines.push(`## ${category.name}`)
144+
lines.push('')
145+
146+
for (const anchorId of category.anchors) {
147+
const filepath = path.join(ROOT, 'docs/anchors', `${anchorId}.adoc`)
148+
if (!fs.existsSync(filepath)) continue
149+
150+
const raw = fs.readFileSync(filepath, 'utf-8')
151+
const titleMatch = raw.match(/^= (.+)$/m)
152+
const title = titleMatch ? titleMatch[1] : anchorId
153+
154+
lines.push(`### ${title}`)
155+
lines.push('')
156+
157+
const body = raw.replace(/^= .+\n/, '')
158+
lines.push(adocToMarkdown(body))
159+
lines.push('')
160+
}
161+
162+
lines.push('---')
163+
lines.push('')
164+
}
165+
166+
const output = lines.join('\n')
167+
fs.writeFileSync(path.join(ROOT, 'website/public/llms.txt'), output, 'utf-8')
168+
const kb = Math.round(Buffer.byteLength(output, 'utf-8') / 1024)
169+
console.warn(`Generated: website/public/llms.txt (${totalAnchors} anchors, ~${kb} KB)`)
170+
}
171+
172+
// ─── Main ────────────────────────────────────────────────────────────────────
173+
174+
generateAllAnchorsAdoc()
175+
generateLlmsTxt()

scripts/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
"description": "Build scripts for Semantic Anchors project",
55
"scripts": {
66
"extract-metadata": "node extract-metadata.js",
7-
"split-readme": "node split-readme.js"
7+
"split-readme": "node split-readme.js",
8+
"generate-llms-txt": "node generate-llms-txt.js"
89
},
910
"dependencies": {
1011
"@asciidoctor/core": "^3.0.4",

0 commit comments

Comments
 (0)