Skip to content

Commit 2af3f2d

Browse files
jeremymanningclaude
andcommitted
Fix checklist item extraction using nextSibling iteration
- Use nextSibling instead of parent.childNodes to properly iterate through siblings after the h2 heading - Build checklist items incrementally: start new item on MJX-CONTAINER, accumulate text from TEXT nodes and inline elements (SPAN, A) - Skip the first DIV element which contains duplicate concatenated text - Stop iteration on TABLE or next H2 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent 49231af commit 2af3f2d

File tree

1 file changed

+56
-52
lines changed

1 file changed

+56
-52
lines changed

checklist.js

Lines changed: 56 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -38,71 +38,75 @@ function convertChecklistToInteractive() {
3838
if (!checklistHeading) return;
3939

4040
// tex4ht generates a complex structure where checklist items are spread across
41-
// many nodes: MJX-CONTAINER (for checkbox chars), TEXT nodes, SPAN, A elements
42-
// The actual item text is in TEXT NODES between elements, not inside elements
43-
// We need to iterate over ALL child nodes, not just element siblings
44-
45-
var parent = checklistHeading.parentElement;
46-
if (!parent) return;
47-
48-
// Find the index of the heading in parent's childNodes
49-
var startIndex = -1;
50-
var childNodes = parent.childNodes;
51-
for (var i = 0; i < childNodes.length; i++) {
52-
if (childNodes[i] === checklistHeading) {
53-
startIndex = i;
54-
break;
55-
}
56-
}
57-
if (startIndex === -1) return;
41+
// sibling nodes after the h2 heading:
42+
// - MJX-CONTAINER elements contain the checkbox character (□)
43+
// - TEXT nodes contain the actual item text
44+
// - SPAN and A elements contain inline formatting
45+
// We use nextSibling to iterate through ALL siblings (including text nodes)
5846

59-
// Collect all nodes (elements AND text nodes) after the heading until signature table
47+
// Collect all sibling nodes after the heading until we hit a TABLE or end
6048
var nodesToHide = [];
6149
var signatureTable = null;
62-
var fullText = '';
63-
64-
for (var i = startIndex + 1; i < childNodes.length; i++) {
65-
var node = childNodes[i];
50+
var sibling = checklistHeading.nextSibling;
6651

67-
// Check if this is the signature table (TABLE element or contains Signature/Date labels)
68-
if (node.nodeType === Node.ELEMENT_NODE) {
69-
if (node.tagName === 'TABLE' ||
70-
(node.textContent &&
71-
node.textContent.includes('Signature') &&
72-
node.textContent.includes('Date') &&
73-
!node.textContent.includes('tasks'))) {
74-
signatureTable = node;
52+
while (sibling) {
53+
// Check if this is the signature table
54+
if (sibling.nodeType === Node.ELEMENT_NODE) {
55+
if (sibling.tagName === 'TABLE') {
56+
signatureTable = sibling;
57+
break;
58+
}
59+
// Also check for next section (another h2)
60+
if (sibling.tagName === 'H2') {
7561
break;
7662
}
7763
}
64+
nodesToHide.push(sibling);
65+
sibling = sibling.nextSibling;
66+
}
67+
68+
// Now extract checklist items by walking through nodes and building items
69+
// Each item starts after a MJX-CONTAINER (checkbox) and continues until the next one
70+
var items = [];
71+
var currentItem = '';
72+
var foundFirstCheckbox = false;
7873

79-
nodesToHide.push(node);
74+
nodesToHide.forEach(function(node) {
75+
// Check if this node contains a checkbox character
76+
var isCheckbox = false;
77+
if (node.nodeType === Node.ELEMENT_NODE && node.tagName === 'MJX-CONTAINER') {
78+
isCheckbox = true;
79+
}
8080

81-
// Extract text from both text nodes and elements
82-
if (node.nodeType === Node.TEXT_NODE) {
83-
fullText += node.textContent || '';
84-
} else if (node.nodeType === Node.ELEMENT_NODE) {
85-
fullText += node.textContent || '';
81+
if (isCheckbox) {
82+
// Save the previous item if we have one
83+
if (foundFirstCheckbox && currentItem.trim().length > 15) {
84+
items.push(currentItem.trim());
85+
}
86+
currentItem = '';
87+
foundFirstCheckbox = true;
88+
} else if (foundFirstCheckbox) {
89+
// Add text content to current item
90+
if (node.nodeType === Node.TEXT_NODE) {
91+
currentItem += node.textContent || '';
92+
} else if (node.nodeType === Node.ELEMENT_NODE) {
93+
// For elements like SPAN and A, get their text content
94+
// But skip the first DIV which has duplicate content
95+
if (node.tagName !== 'DIV') {
96+
currentItem += node.textContent || '';
97+
}
98+
}
8699
}
87-
}
100+
});
88101

89-
// Check if we have checkbox characters
90-
if (!fullText.includes('\u25A1') && !fullText.includes('\u2610')) {
91-
return;
102+
// Don't forget the last item
103+
if (currentItem.trim().length > 15) {
104+
items.push(currentItem.trim());
92105
}
93106

94-
// Split by checkbox character (□ or ☐) and extract items
95-
var items = [];
96-
var parts = fullText.split(/[\u25A1\u2610]/);
97-
98-
parts.forEach(function(part, index) {
99-
if (index === 0) return; // Skip text before first checkbox
100-
var cleanText = part.trim();
101-
// Remove leading/trailing punctuation artifacts
102-
cleanText = cleanText.replace(/^[\s,]+/, '').replace(/[\s,]+$/, '');
103-
if (cleanText.length > 15) { // Only include substantial items
104-
items.push(cleanText);
105-
}
107+
// Clean up items - remove extra whitespace
108+
items = items.map(function(item) {
109+
return item.replace(/\s+/g, ' ').trim();
106110
});
107111

108112
if (items.length === 0) return;

0 commit comments

Comments
 (0)