|
15 | 15 | import org.elasticsearch.ingest.IngestDocument; |
16 | 16 | import org.elasticsearch.ingest.Processor; |
17 | 17 |
|
| 18 | +import java.nio.charset.StandardCharsets; |
18 | 19 | import java.util.ArrayList; |
19 | 20 | import java.util.HashMap; |
20 | 21 | import java.util.Iterator; |
21 | 22 | import java.util.List; |
22 | 23 | import java.util.Locale; |
23 | 24 | import java.util.Map; |
| 25 | +import java.util.regex.Pattern; |
24 | 26 |
|
25 | 27 | import javax.xml.namespace.NamespaceContext; |
26 | 28 | import javax.xml.parsers.DocumentBuilder; |
@@ -52,6 +54,9 @@ public final class XmlProcessor extends AbstractProcessor { |
52 | 54 |
|
53 | 55 | private static final XPathFactory XPATH_FACTORY = XPathFactory.newInstance(); |
54 | 56 |
|
| 57 | + // Pre-compiled pattern to detect namespace prefixes |
| 58 | + private static final Pattern NAMESPACE_PATTERN = Pattern.compile(".*\\b[a-zA-Z][a-zA-Z0-9_-]*:[a-zA-Z][a-zA-Z0-9_-]*.*"); |
| 59 | + |
55 | 60 | // Pre-configured SAX parser factories for secure XML parsing |
56 | 61 | private static final javax.xml.parsers.SAXParserFactory SAX_PARSER_FACTORY = createSecureSaxParserFactory(); |
57 | 62 | private static final javax.xml.parsers.SAXParserFactory SAX_PARSER_FACTORY_NS = createSecureSaxParserFactoryNamespaceAware(); |
@@ -383,16 +388,14 @@ public Iterator<String> getPrefixes(String namespaceURI) { |
383 | 388 | }); |
384 | 389 | } |
385 | 390 |
|
386 | | - // Pre-compiled pattern to detect namespace prefixes |
387 | | - java.util.regex.Pattern namespacePattern = |
388 | | - java.util.regex.Pattern.compile(".*\\b[a-zA-Z][a-zA-Z0-9_-]*:[a-zA-Z][a-zA-Z0-9_-]*.*"); |
| 391 | + // Use pre-compiled pattern to detect namespace prefixes |
389 | 392 |
|
390 | 393 | for (Map.Entry<String, String> entry : xpathExpressions.entrySet()) { |
391 | 394 | String xpathExpression = entry.getKey(); |
392 | 395 | String targetFieldName = entry.getValue(); |
393 | 396 |
|
394 | 397 | // Validate namespace prefixes if no namespaces are configured |
395 | | - if (!hasNamespaces && namespacePattern.matcher(xpathExpression).matches()) { |
| 398 | + if (!hasNamespaces && NAMESPACE_PATTERN.matcher(xpathExpression).matches()) { |
396 | 399 | throw new IllegalArgumentException( |
397 | 400 | "Invalid XPath expression [" + xpathExpression + "]: contains namespace prefixes but no namespace configuration provided" |
398 | 401 | ); |
@@ -476,7 +479,7 @@ public XmlProcessor create( |
476 | 479 |
|
477 | 480 | // Parse parse_options parameter |
478 | 481 | String parseOptions = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "parse_options", ""); |
479 | | - if (parseOptions != null && parseOptions != "" && !"strict".equals(parseOptions)) { |
| 482 | + if (parseOptions != null && !parseOptions.isEmpty() && !"strict".equals(parseOptions)) { |
480 | 483 | throw new IllegalArgumentException("Invalid parse_options [" + parseOptions + "]. Only 'strict' is supported."); |
481 | 484 | } |
482 | 485 |
|
@@ -528,7 +531,7 @@ public void fatalError(org.xml.sax.SAXParseException exception) throws org.xml.s |
528 | 531 | // Use enhanced handler that can build DOM during streaming when needed |
529 | 532 | XmlStreamingWithDomHandler handler = new XmlStreamingWithDomHandler(needsDom); |
530 | 533 |
|
531 | | - parser.parse(new java.io.ByteArrayInputStream(xmlString.getBytes("UTF-8")), handler); |
| 534 | + parser.parse(new java.io.ByteArrayInputStream(xmlString.getBytes(StandardCharsets.UTF_8)), handler); |
532 | 535 |
|
533 | 536 | // Store structured result if needed |
534 | 537 | if (storeXml) { |
@@ -926,10 +929,11 @@ private static DocumentBuilderFactory createSecureDocumentBuilderFactory() { |
926 | 929 | * @return the appropriate SAX parser factory for the current configuration |
927 | 930 | */ |
928 | 931 | private javax.xml.parsers.SAXParserFactory selectSaxParserFactory() { |
| 932 | + boolean needsNamespaceAware = hasNamespaces() || removeNamespaces; |
929 | 933 | if (isStrict()) { |
930 | | - return hasNamespaces() ? SAX_PARSER_FACTORY_NS_STRICT : SAX_PARSER_FACTORY_STRICT; |
| 934 | + return needsNamespaceAware ? SAX_PARSER_FACTORY_NS_STRICT : SAX_PARSER_FACTORY_STRICT; |
931 | 935 | } else { |
932 | | - return hasNamespaces() ? SAX_PARSER_FACTORY_NS : SAX_PARSER_FACTORY; |
| 936 | + return needsNamespaceAware ? SAX_PARSER_FACTORY_NS : SAX_PARSER_FACTORY; |
933 | 937 | } |
934 | 938 | } |
935 | 939 | } |
0 commit comments