Skip to content

Commit 11dcdb5

Browse files
raifdmuellerclaude
andcommitted
fix: Improve AsciiDoc→Markdown converter in generate-llms-txt
- Fix table conversion: use block-level parser (convertAdocTable) that correctly handles cells on separate lines → proper Markdown table - Fix line continuation: remove AsciiDoc '+' continuation marker - Fix nested definition lists: term::: now converted to '- **term**:' - Fix relative links: ../file.adoc → absolute GitHub URLs - Fix def-list regex: use [^\S\n]* instead of \s* to prevent matching across newlines (was causing term desc to bleed into next paragraph) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 9a9e843 commit 11dcdb5

File tree

2 files changed

+118
-65
lines changed

2 files changed

+118
-65
lines changed

scripts/generate-llms-txt.js

Lines changed: 67 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,51 @@ const categories = JSON.parse(
1717
fs.readFileSync(path.join(ROOT, 'website/public/data/categories.json'), 'utf-8')
1818
)
1919

20+
// ─── AsciiDoc table converter ────────────────────────────────────────────────
21+
22+
function convertAdocTable(body) {
23+
const lines = body.split('\n')
24+
const allCells = []
25+
26+
for (const line of lines) {
27+
const trimmed = line.trim()
28+
if (!trimmed || !trimmed.startsWith('|')) continue
29+
// Split line into cells: |cell1 |cell2 → ['cell1', 'cell2']
30+
const parts = trimmed.split(/(?=\|)/).filter(Boolean)
31+
for (const part of parts) {
32+
if (part.startsWith('|')) allCells.push(part.slice(1).trim())
33+
}
34+
}
35+
36+
if (allCells.length === 0) return ''
37+
38+
// Determine column count from the first line that has cells
39+
let colCount = 0
40+
for (const line of lines) {
41+
const trimmed = line.trim()
42+
if (!trimmed || !trimmed.startsWith('|')) continue
43+
colCount = trimmed.split(/(?=\|)/).filter(Boolean).length
44+
if (colCount > 0) break
45+
}
46+
if (colCount <= 0) colCount = 2
47+
48+
// Group cells into rows
49+
const rows = []
50+
for (let i = 0; i < allCells.length; i += colCount) {
51+
rows.push(allCells.slice(i, i + colCount))
52+
}
53+
54+
if (rows.length === 0) return ''
55+
56+
const out = []
57+
out.push('| ' + rows[0].join(' | ') + ' |')
58+
out.push('| ' + rows[0].map(() => '---').join(' | ') + ' |')
59+
for (const row of rows.slice(1)) {
60+
if (row.length > 0) out.push('| ' + row.join(' | ') + ' |')
61+
}
62+
return out.join('\n')
63+
}
64+
2065
// ─── AsciiDoc → Markdown converter ──────────────────────────────────────────
2166

2267
function adocToMarkdown(adoc) {
@@ -44,25 +89,35 @@ function adocToMarkdown(adoc) {
4489
md = md.replace(/^\[%collapsible\]\s*$/gm, '')
4590
md = md.replace(/^====\s*$/gm, '')
4691

47-
// Tables |=== → remove delimiters
48-
md = md.replace(/^\|===\s*$/gm, '')
49-
50-
// Table rows: |cell content → keep, clean up leading pipe
51-
md = md.replace(/^\|(.+)$/gm, (_, row) => {
52-
const cells = row.split('|').map((c) => c.trim()).filter(Boolean)
53-
return '| ' + cells.join(' | ') + ' |'
54-
})
92+
// Tables: convert full blocks including optional attribute line (handles multi-line cells)
93+
md = md.replace(/(?:\[[^\]]*\]\s*\n)?\|===\s*\n([\s\S]*?)\|===\s*/g, (_, body) =>
94+
convertAdocTable(body)
95+
)
5596

56-
// Remove block attribute lines
97+
// Remove remaining block attribute lines
5798
md = md.replace(/^\[(?:horizontal|sidebar|cols[^\]]*|options[^\]]*|%\w+[^\]]*)\]\s*$/gm, '')
5899

100+
// AsciiDoc line continuation (+) → remove
101+
md = md.replace(/^\+\s*$/gm, '')
102+
103+
// Nested definition lists: term::: description → - **term**: description
104+
md = md.replace(/^([^:\n|#`>]+):::[^\S\n]*(.*)$/gm, (_, term, desc) =>
105+
desc.trim() ? `- **${term.trim()}**: ${desc.trim()}` : `- **${term.trim()}**`
106+
)
107+
59108
// Definition lists: term:: description → **term**: description
60-
md = md.replace(/^([^:\n|#`>]+)::\s*(.*)$/gm, (_, term, desc) =>
109+
// Use [^\S\n]* (horizontal whitespace only) to avoid matching across newlines
110+
md = md.replace(/^([^:\n|#`>]+)::[^\S\n]*(.*)$/gm, (_, term, desc) =>
61111
desc.trim() ? `**${term.trim()}**: ${desc.trim()}` : `**${term.trim()}**`
62112
)
63113

64-
// Links: link:url[text] → [text](url)
65-
md = md.replace(/link:([^\[]+)\[([^\]]*)\]/g, '[$2]($1)')
114+
// Links: link:url[text] → [text](url), resolve relative .adoc paths to GitHub URLs
115+
md = md.replace(/link:([^\[]+)\[([^\]]*)\]/g, (_, url, text) => {
116+
if (/^\.\.\/.*\.adoc$/.test(url)) {
117+
url = 'https://github.com/LLM-Coding/Semantic-Anchors/blob/main/' + url.slice(3)
118+
}
119+
return `[${text}](${url})`
120+
})
66121

67122
// Cross-references: <<id,text>> → text, <<id>> → `id`
68123
md = md.replace(/<<([^,>]+),([^>]+)>>/g, '$2')

website/public/llms.txt

Lines changed: 51 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -86,26 +86,13 @@ Not every well-known term qualifies as a semantic anchor. Understanding what **d
8686
These terms are frequently used but **lack the depth and definition** required for semantic anchors:
8787

8888
| ❌ Not a Semantic Anchor | Why Not? |
89-
90-
| **"TLDR"** |
91-
| Underspecified instruction with no defined structure or methodology. Just means "be brief" without any framework for how. |
92-
93-
| **"ELI5"** (Explain Like I'm 5) |
94-
| Vague target level with no pedagogical framework. What does "5-year-old level" mean technically? No consistent interpretation. |
95-
96-
| **"Keep it short"** |
97-
| Pure directive with no conceptual depth. It's an instruction, not a methodology. |
98-
99-
| **"Best practices"** |
100-
| Too vague and not attributable. Whose best practices? Based on what research or authority? |
101-
102-
| **"Modern approach"** |
103-
| Ambiguous and inconsistent. "Modern" means different things to different people and changes over time. |
104-
105-
| **"Make it simple"** |
106-
| No reference to specific simplification frameworks (unlike KISS principle or Occam's Razor which **are** semantic anchors). |
107-
108-
### Comparison: Good vs. Bad
89+
| --- | --- |
90+
| **"TLDR"** | Underspecified instruction with no defined structure or methodology. Just means "be brief" without any framework for how. |
91+
| **"ELI5"** (Explain Like I'm 5) | Vague target level with no pedagogical framework. What does "5-year-old level" mean technically? No consistent interpretation. |
92+
| **"Keep it short"** | Pure directive with no conceptual depth. It's an instruction, not a methodology. |
93+
| **"Best practices"** | Too vague and not attributable. Whose best practices? Based on what research or authority? |
94+
| **"Modern approach"** | Ambiguous and inconsistent. "Modern" means different things to different people and changes over time. |
95+
| **"Make it simple"** | No reference to specific simplification frameworks (unlike KISS principle or Occam's Razor which **are** semantic anchors). |### Comparison: Good vs. Bad
10996

11097
**Example 1: Testing Instructions**
11198

@@ -275,7 +262,7 @@ If the LLM provides accurate, detailed information, it's safe to use that anchor
275262

276263
## Contributing
277264

278-
We welcome proposals for new semantic anchors! See our [Contributing Guide](../CONTRIBUTING.adoc) for details on:
265+
We welcome proposals for new semantic anchors! See our [Contributing Guide](https://github.com/LLM-Coding/Semantic-Anchors/blob/main/CONTRIBUTING.adoc) for details on:
279266

280267
* How to propose a new anchor
281268
* Quality criteria and testing methodology
@@ -740,7 +727,8 @@ to specific team members.
740727
* Communicating the nature of changes to teammates, the public, and other stakeholders
741728
**Schema**: <type>[!][(optional scope)]: <description> + optional body/footer
742729

743-
**Common Types**: * **feat:** - introduce new feature to the codebase (-> Semver Minor)
730+
**Common Types**
731+
* **feat:** - introduce new feature to the codebase (-> Semver Minor)
744732
* **fix:** - patches a bug in your codebase (-> SemVer Patch)
745733
* **docs:** - documentation improvements to the codebase
746734
* **chore:** - codebase/repository housekeeping changes
@@ -807,11 +795,12 @@ to specific team members.
807795

808796
**Core Concepts**:
809797

810-
**Version format**: +
811-
**MAJOR**: : Incompatible API changes (breaking changes)
812-
**MINOR**: : Backward-compatible functionality additions
813-
**PATCH**: : Backward-compatible bug fixes
814-
**Pre-release versions**: : Append hyphen and identifiers (e.g., 1.0.0-alpha.1)
798+
**Version format**
799+
800+
- **MAJOR**: Incompatible API changes (breaking changes)
801+
- **MINOR**: Backward-compatible functionality additions
802+
- **PATCH**: Backward-compatible bug fixes
803+
- **Pre-release versions**: Append hyphen and identifiers (e.g., 1.0.0-alpha.1)
815804

816805
**Build metadata**: Append plus sign and identifiers (e.g., 1.0.0+20241111)
817806

@@ -1047,13 +1036,15 @@ to specific team members.
10471036

10481037
**Core Concepts**:
10491038

1050-
**Four documentation types**: +
1051-
**Tutorials**: : Learning-oriented, lessons for beginners
1052-
**How-to guides**: : Task-oriented, directions for specific goals
1053-
**Reference**: : Information-oriented, technical descriptions
1054-
**Explanation**: : Understanding-oriented, conceptual discussions
1039+
**Four documentation types**
1040+
1041+
- **Tutorials**: Learning-oriented, lessons for beginners
1042+
- **How-to guides**: Task-oriented, directions for specific goals
1043+
- **Reference**: Information-oriented, technical descriptions
1044+
- **Explanation**: Understanding-oriented, conceptual discussions
10551045

1056-
**Two dimensions**: * Practical vs. Theoretical
1046+
**Two dimensions**
1047+
* Practical vs. Theoretical
10571048
* Acquisition (learning) vs. Application (working)
10581049

10591050
**Separation of concerns**: Each type serves a distinct purpose
@@ -1621,7 +1612,8 @@ Filter by constraints (e.g., "AsciiDoc + Hugo not well-supported")
16211612

16221613
**Lightweight documentation**: Short, focused records
16231614

1624-
**Standard structure**: * Title
1615+
**Standard structure**
1616+
* Title
16251617
* Status (proposed, accepted, deprecated, superseded)
16261618
* Context (forces at play)
16271619
* Decision (what was chosen)
@@ -1694,11 +1686,12 @@ Filter by constraints (e.g., "AsciiDoc + Hugo not well-supported")
16941686

16951687
**Core Concepts**:
16961688

1697-
**Four levels of abstraction**: +
1698-
**Level 1 - Context**: : System in its environment (users, external systems)
1699-
**Level 2 - Container**: : Applications and data stores that make up the system
1700-
**Level 3 - Component**: : Components within containers
1701-
**Level 4 - Code**: : Class diagrams, entity relationships (optional)
1689+
**Four levels of abstraction**
1690+
1691+
- **Level 1 - Context**: System in its environment (users, external systems)
1692+
- **Level 2 - Container**: Applications and data stores that make up the system
1693+
- **Level 3 - Component**: Components within containers
1694+
- **Level 4 - Code**: Class diagrams, entity relationships (optional)
17021695

17031696
**Zoom in/out**: Progressive disclosure of detail
17041697

@@ -1826,7 +1819,8 @@ Filter by constraints (e.g., "AsciiDoc + Hugo not well-supported")
18261819

18271820
**Structured template**: Well-defined format with specific sections
18281821

1829-
**Standard fields**: * Title (short noun phrase)
1822+
**Standard fields**
1823+
* Title (short noun phrase)
18301824
* Status (proposed, accepted, rejected, deprecated, superseded)
18311825
* Context and Problem Statement
18321826
* Decision Drivers (forces influencing the decision)
@@ -1882,7 +1876,8 @@ Filter by constraints (e.g., "AsciiDoc + Hugo not well-supported")
18821876

18831877
**Special Cause Variation**: Assignable, correctable deviation
18841878

1885-
**Chart Types**: * X-bar Chart: Subgroup means
1879+
**Chart Types**
1880+
* X-bar Chart: Subgroup means
18861881
* R-Chart: Subgroup ranges
18871882
* I-MR Chart: Individual values and moving range
18881883
* p-Chart: Defect proportions
@@ -2014,12 +2009,13 @@ SPC:: Nelson Rules are a tool within Statistical Process Control
20142009

20152010
**Core Concepts**:
20162011

2017-
**Five domains**: +
2018-
**Clear (formerly "Simple")**: : Best practices apply, sense-categorize-respond
2019-
**Complicated**: : Good practices exist, sense-analyze-respond
2020-
**Complex**: : Emergent practices, probe-sense-respond
2021-
**Chaotic**: : Novel practices needed, act-sense-respond
2022-
**Confused (center)**: : Don't know which domain you're in
2012+
**Five domains**
2013+
2014+
- **Clear (formerly "Simple")**: Best practices apply, sense-categorize-respond
2015+
- **Complicated**: Good practices exist, sense-analyze-respond
2016+
- **Complex**: Emergent practices, probe-sense-respond
2017+
- **Chaotic**: Novel practices needed, act-sense-respond
2018+
- **Confused (center)**: Don't know which domain you're in
20232019

20242020
**Domain transitions**: How situations move between domains
20252021

@@ -2182,11 +2178,12 @@ SPC:: Nelson Rules are a tool within Statistical Process Control
21822178

21832179
**Core Concepts**:
21842180

2185-
**Four Safety Integrity Levels**: +
2186-
**SIL 1 (lowest)**: : 10^-2^ ≤ PFD < 10^-1^ (tolerable risk reduction)
2187-
**SIL 2**: : 10^-3^ ≤ PFD < 10^-2^ (moderate risk reduction)
2188-
**SIL 3**: : 10^-4^ ≤ PFD < 10^-3^ (substantial risk reduction)
2189-
**SIL 4 (highest)**: : 10^-5^ ≤ PFD < 10^-4^ (maximum risk reduction)
2181+
**Four Safety Integrity Levels**
2182+
2183+
- **SIL 1 (lowest)**: 10^-2^ ≤ PFD < 10^-1^ (tolerable risk reduction)
2184+
- **SIL 2**: 10^-3^ ≤ PFD < 10^-2^ (moderate risk reduction)
2185+
- **SIL 3**: 10^-4^ ≤ PFD < 10^-3^ (substantial risk reduction)
2186+
- **SIL 4 (highest)**: 10^-5^ ≤ PFD < 10^-4^ (maximum risk reduction)
21902187

21912188
**Risk-based classification**: SIL level determined by hazard analysis and risk assessment
21922189

@@ -2387,7 +2384,8 @@ SPC:: Nelson Rules are a tool within Statistical Process Control
23872384

23882385
**Core Concepts**:
23892386

2390-
**Three layers**: * **Unit tests** (base): Many fast, isolated tests
2387+
**Three layers**
2388+
* **Unit tests** (base): Many fast, isolated tests
23912389
* **Integration tests** (middle): Moderate number, test component interaction
23922390
* **End-to-end tests** (top): Few, test complete user journeys
23932391

0 commit comments

Comments
 (0)