Skip to content

Commit 51070bc

Browse files
committed
feat(remark): Improve auto-detection of terms in headings
The `mdastToPlainText` function has been enhanced to reliably extract text from various types of headings in Markdown and MDX. - Explicitly handles `heading` nodes from standard Markdown. - Detects heading elements in MDX, including standard `<h2>` tags and custom components like `<Heading>`. - Uses `toString` to capture the full text content of headings and `SKIP` to prevent double-counting of text nodes within them. - Improved warning messages to provide more context when auto-detection fails. This fixes a bug where terms inside headings were ignored by the `auto` parameter, leading to empty tables.
1 parent e13ebd2 commit 51070bc

File tree

1 file changed

+83
-18
lines changed

1 file changed

+83
-18
lines changed

src/remark/index.ts

Lines changed: 83 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { getSharedData, resolveIds, selectDataSource, loadResistanceDataForSourc
44
import { join } from "path";
55
import chalk from 'chalk';
66

7-
// AST → Plaintext (robust über MD/MDX, ohne Code/InlineCode, ohne %%RESIST-Zeilen)
7+
// AST → Plaintext (robust for MD/MDX, excluding code, inlineCode, and %%RESIST paragraphs)
88
export function mdastToPlainText(root: any): string {
99
let out = "";
1010
const push = (s: string) => {
@@ -14,22 +14,53 @@ export function mdastToPlainText(root: any): string {
1414
};
1515

1616
visit(root, (node) => {
17-
if (node.type === "code" || node.type === "inlineCode") {
17+
// --- Skip rules ---------------------------------------------------------
18+
19+
// Skip entire code blocks and inline code.
20+
if (node.type === "code" || node.type === "inlineCode") return SKIP;
21+
22+
// Skip paragraphs that contain a %%RESIST directive.
23+
if (node.type === "paragraph" && /%%RESIST/.test(toString(node))) return SKIP;
24+
25+
// --- Headings -----------------------------------------------------------
26+
27+
// Plain Markdown headings (# Title) are "heading" nodes.
28+
// Extract their text once as a whole and skip visiting children
29+
// to avoid double counting.
30+
if (node.type === "heading") {
31+
push(toString(node));
1832
return SKIP;
1933
}
20-
if (node.type === "paragraph" && /%%RESIST/.test(toString(node))) {
21-
return SKIP;
34+
35+
// MDX JSX elements may represent headings too, e.g. <h2>…</h2> or <Heading>…</Heading>.
36+
// Detect those and extract their text. For other MDX elements,
37+
// keep traversing so text children are collected normally.
38+
if (
39+
node.type === "mdxJsxFlowElement" ||
40+
node.type === "mdxJsxTextElement"
41+
) {
42+
const name = (node as any).name?.toLowerCase?.();
43+
if (name && (/^h[1-6]$/.test(name) || name.includes("heading"))) {
44+
push(toString(node));
45+
return SKIP;
46+
}
2247
}
48+
49+
// --- Plain text ---------------------------------------------------------
50+
51+
// Collect raw text node values.
2352
if (node.type === "text") {
24-
push(node.value as string);
25-
}
26-
if (node.type === "break") {
27-
out += " ";
53+
push((node as any).value as string);
2854
}
55+
56+
// Replace line breaks with spaces to keep words separated.
57+
if (node.type === "break") out += " ";
2958
});
3059

31-
const compact = out.replace(/\s+/g, " ").trim();
32-
return ` ${compact} `;
60+
// Normalize whitespace: collapse multiple spaces and trim.
61+
const compact = out.replace(/\s+/g, " ").trim();
62+
// Surround with spaces so word-boundary regexes (`\b`) work reliably at edges.
63+
return ` ${compact} `;
3364
}
3465

3566
const parseParams = (s: string): Record<string, string> => {
@@ -120,21 +151,55 @@ export default function remarkResistogram(options: { dataDir?: string, files?: a
120151

121152
// --- Build-Time Data Validation ---
122153
const logWarning = (message: string) => {
123-
console.warn(chalk.yellow(`[docusaurus-plugin-resistogram] Warning in ${file.path}:\n${message}\n`));
154+
console.warn(
155+
chalk.yellow(`[docusaurus-plugin-resistogram] Warning in ${file.path}:\n${message}\n`)
156+
);
124157
};
125158

159+
// Show details about which params were "auto" and what failed
160+
const autoInfo: string[] = [];
161+
if (abxParam === "auto") {
162+
autoInfo.push("antibiotics=auto → resolved against page text");
163+
}
164+
if (orgParam === "auto") {
165+
autoInfo.push("organisms=auto → resolved against page text");
166+
}
167+
126168
if (unresolvedAbx.length > 0 || unresolvedOrg.length > 0) {
127169
const unresolved = [...unresolvedAbx, ...unresolvedOrg];
128-
logWarning(`Unrecognized identifiers in "%%RESIST ${paramsStr}%%": ${unresolved.join(', ')}.\nThe table will display an error.`);
170+
logWarning(
171+
`Unrecognized identifiers in "%%RESIST ${paramsStr}%%": ${unresolved.join(
172+
", "
173+
)}.
174+
Resolved antibiotics: ${JSON.stringify(antibioticIds)}.
175+
Resolved organisms: ${JSON.stringify(organismIds)}.
176+
${autoInfo.length ? "Parameter mode: " + autoInfo.join("; ") : ""}`
177+
);
129178
} else if (antibioticIds.length === 0 || organismIds.length === 0) {
130-
logWarning(`The directive "%%RESIST ${paramsStr}%%" did not resolve to any valid antibiotics or organisms.\nThe table will be empty.`);
179+
logWarning(
180+
`The directive "%%RESIST ${paramsStr}%%" did not resolve to any valid antibiotics or organisms.
181+
Resolved antibiotics: ${JSON.stringify(antibioticIds)}.
182+
Resolved organisms: ${JSON.stringify(organismIds)}.
183+
${autoInfo.length ? "Parameter mode: " + autoInfo.join("; ") : ""}`
184+
);
131185
} else {
132-
const resistanceData = await loadResistanceDataForSource(selectedSource, sharedData.sources, dataPath);
133-
const hasData = resistanceData.some(row =>
134-
antibioticIds.includes(row.antibiotic_id) && organismIds.includes(row.organism_id)
186+
const resistanceData = await loadResistanceDataForSource(
187+
selectedSource,
188+
sharedData.sources,
189+
dataPath
190+
);
191+
const hasData = resistanceData.some(
192+
(row) =>
193+
antibioticIds.includes(row.antibiotic_id) &&
194+
organismIds.includes(row.organism_id)
135195
);
136196
if (!hasData) {
137-
logWarning(`No resistance data found for the combination of resolved antibiotics and organisms in "%%RESIST ${paramsStr}%%".\nThe table will be empty.`);
197+
logWarning(
198+
`No resistance data found for the combination in "%%RESIST ${paramsStr}%%".
199+
Resolved antibiotics: ${JSON.stringify(antibioticIds)}.
200+
Resolved organisms: ${JSON.stringify(organismIds)}.
201+
${autoInfo.length ? "Parameter mode: " + autoInfo.join("; ") : ""}`
202+
);
138203
}
139204
}
140205
// --- End Validation ---
@@ -181,4 +246,4 @@ export default function remarkResistogram(options: { dataDir?: string, files?: a
181246

182247
return tree;
183248
};
184-
}
249+
}

0 commit comments

Comments
 (0)