Skip to content

Commit b90c909

Browse files
author
Steve Ramage
committed
Checkpoint
1 parent ea52ccb commit b90c909

File tree

1 file changed

+66
-12
lines changed

1 file changed

+66
-12
lines changed

buildSrc/src/main/groovy/GenerateDataFromManPages.groovy

Lines changed: 66 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import org.gradle.api.tasks.OutputDirectory
55
import org.gradle.api.tasks.TaskAction
66
import org.gradle.api.tasks.Internal
77
import org.w3c.dom.Document
8+
import org.w3c.dom.Element
89
import org.w3c.dom.Node
910
import org.w3c.dom.NodeList
1011
import org.w3c.dom.ls.DOMImplementationLS
@@ -14,6 +15,7 @@ import org.xml.sax.InputSource
1415

1516
import javax.xml.parsers.DocumentBuilder
1617
import javax.xml.parsers.DocumentBuilderFactory
18+
import javax.xml.transform.OutputKeys
1719
import javax.xml.transform.Transformer
1820
import javax.xml.transform.TransformerFactory
1921
import javax.xml.transform.dom.DOMSource
@@ -22,8 +24,8 @@ import javax.xml.transform.stream.StreamSource
2224
import javax.xml.xpath.XPath
2325
import javax.xml.xpath.XPathConstants
2426
import javax.xml.xpath.XPathFactory
27+
import java.util.concurrent.ConcurrentHashMap
2528
import java.util.regex.Matcher
26-
import java.util.regex.Pattern
2729

2830
/**
2931
* This task scans the systemd source code man pages to extract the set of available options as well as (eventually) documentation.
@@ -464,7 +466,6 @@ class GenerateDataFromManPages extends DefaultTask {
464466
// 🔥 Regex to match <xi:include href="some.xml" xpointer="some-id"/> (xpointer is optional)
465467
def includePattern = /<xi:include\s+href="([^"]+)"(?:\s+xpointer="([^"]+)")?\s*\/>/
466468

467-
468469
return xmlContent.replaceAll(includePattern) { match, href, xpointer ->
469470
File includedFile = new File(baseDir, href)
470471

@@ -473,27 +474,80 @@ class GenerateDataFromManPages extends DefaultTask {
473474
return "<!-- Failed to include: $href -->"
474475
}
475476

476-
String includedContent = includedFile.text
477477

478-
if (xpointer) {
479-
// Extract only the element with the specified ID
478+
// ✅ Load XML properly instead of using regex
479+
String xptr = xpointer
480+
String includedContent = GenerateDataFromManPages.extractElementById(includedFile, xptr)
481+
482+
return includedContent ?: "<!-- Failed to find xpointer '$xpointer' in $href -->"
483+
484+
}
485+
}
486+
487+
private static final Map<String, String> cache = new ConcurrentHashMap<>()
488+
489+
private static String extractElementById(File xmlFile, String elementId) {
490+
String cacheKey = xmlFile.getAbsolutePath() + "::" + elementId
491+
492+
// ✅ Check cache before processing
493+
if (cache.containsKey(cacheKey)) {
494+
return cache.get(cacheKey)
495+
}
480496

481-
//includedContent = extractById(includedContent, xpointer) ?: "<!-- Failed to find xpointer '$xpointer' in $href -->"
497+
try {
498+
println("Processing $cacheKey")
482499

483-
def xmlContent2 = includedContent
484-
def idPattern = Pattern.compile(/<([a-zA-Z0-9:_-]+)\s+[^>]*id=["']$xpointer["'][^>]*>(.*?)<\/\1>/, Pattern.DOTALL)
485-
def matcher = xmlContent2 =~ idPattern
500+
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance()
501+
factory.setNamespaceAware(true) // Needed for XML ID lookup
502+
DocumentBuilder builder = factory.newDocumentBuilder()
486503

487-
def resContent = matcher.find() ? matcher.group(0) : null
504+
Document document = builder.parse(new InputSource(xmlFile.newReader()))
505+
document.getDocumentElement().normalize()
488506

489-
includedContent = "<!--xi:include='$href' xpointer='$xpointer'-->" + resContent + "<!-- /xi:include='$href' xpointer='$xpointer' -->"
507+
// Find element with matching id
508+
Node targetNode = findElementById(document, elementId)
509+
if (targetNode == null) {
510+
println "⚠️ WARNING: Element with id='$elementId' not found in ${xmlFile.name}"
511+
return null
490512
}
491513

492-
return includedContent
514+
// ✅ Convert the found node back to an XML string
515+
String extractedXml = nodeToString(targetNode)
516+
String value = "<!--xi:include='$xmlFile.name' xpointer='$elementId'-->" + extractedXml + "<!-- /xi:include='$xmlFile.name' xpointer='$elementId' -->"
517+
cache.put(cacheKey, value)
518+
519+
return value
520+
521+
} catch (Exception e) {
522+
println "❌ ERROR: Failed to extract element by id='$elementId' from ${xmlFile.name}: ${e.message}"
523+
cache.put(cacheKey, null)
493524
}
494525
}
495526

527+
private static Node findElementById(Document document, String elementId) {
528+
def elements = document.getElementsByTagName("*") // Get all elements
529+
for (int i = 0; i < elements.length; i++) {
530+
Element element = elements.item(i)
531+
if (element.hasAttribute("id") && element.getAttribute("id") == elementId) {
532+
return element
533+
}
534+
}
535+
return null
536+
}
496537

538+
private static String nodeToString(Node node) {
539+
try {
540+
TransformerFactory transformerFactory = TransformerFactory.newInstance()
541+
Transformer transformer = transformerFactory.newTransformer()
542+
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes")
543+
StringWriter writer = new StringWriter()
544+
transformer.transform(new DOMSource(node), new StreamResult(writer))
545+
return writer.toString()
546+
} catch (Exception e) {
547+
println "❌ ERROR: Failed to convert node to string: ${e.message}"
548+
return ""
549+
}
550+
}
497551
/**
498552
* Transforms the supplied document with the supplied transformer
499553
* @param document - XML Document to transform

0 commit comments

Comments
 (0)