Skip to content

Commit 497d775

Browse files
committed
Change strict parsing option to be boolean
1 parent 8847f4b commit 497d775

File tree

4 files changed

+20
-36
lines changed

4 files changed

+20
-36
lines changed

docs/reference/enrich-processor/xml-processor.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ $$$xml-options$$$
2222
| `remove_namespaces` | no | `false` | If `true`, removes namespace prefixes from element and attribute names. |
2323
| `force_content` | no | `false` | If `true`, forces text content and attributes to always parse to a hash value with `#text` key for content. |
2424
| `force_array` | no | `false` | If `true`, forces all parsed values to be arrays. Single elements are wrapped in arrays. |
25-
| `parse_options` | no | - | Controls XML parsing behavior. Set to `"strict"` for strict XML validation that fails fast on invalid content. |
25+
| `strict_parsing` | no | `false` | If `true`, enables strict XML validation that fails fast on invalid content. |
2626
| `xpath` | no | - | Map of XPath expressions to target field names. Extracts values from the XML using XPath and stores them in the specified fields. |
2727
| `namespaces` | no | - | Map of namespace prefixes to URIs for use with XPath expressions. Required when XPath expressions contain namespace prefixes. |
2828
| `description` | no | - | Description of the processor. Useful for describing the purpose of the processor or its configuration. |
@@ -422,7 +422,7 @@ Result:
422422

423423
### Strict parsing mode
424424

425-
Use `parse_options: "strict"` for strict XML validation:
425+
Use `strict_parsing: true` for strict XML validation:
426426

427427
```console
428428
POST _ingest/pipeline/_simulate
@@ -432,7 +432,7 @@ POST _ingest/pipeline/_simulate
432432
{
433433
"xml": {
434434
"field": "xml_content",
435-
"parse_options": "strict",
435+
"strict_parsing": true,
436436
"ignore_failure": true
437437
}
438438
}
@@ -441,7 +441,7 @@ POST _ingest/pipeline/_simulate
441441
"docs": [
442442
{
443443
"_source": {
444-
"xml_content": "<catalog><book><title>Invalid XML with control character</title></book></catalog>"
444+
"xml_content": "<catalog><book><title>Invalid XML with control character \u0000</title></book></catalog>"
445445
}
446446
}
447447
]
@@ -457,7 +457,7 @@ Result (with parsing failure):
457457
"doc": {
458458
...
459459
"_source": {
460-
"xml_content": "<catalog><book><title>Invalid XML with control character</title></book></catalog>",
460+
"xml_content": "<catalog><book><title>Invalid XML with control character \u0000</title></book></catalog>",
461461
"tags": ["_xmlparsefailure"]
462462
}
463463
}

modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/XmlProcessor.java

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ public final class XmlProcessor extends AbstractProcessor {
7979
private final Map<String, String> xpathExpressions;
8080
private final Map<String, String> namespaces;
8181
private final Map<String, XPathExpression> compiledXPathExpressions;
82-
private final String parseOptions;
82+
private final boolean strictParsing;
8383

8484
XmlProcessor(
8585
String tag,
@@ -96,7 +96,7 @@ public final class XmlProcessor extends AbstractProcessor {
9696
boolean forceArray,
9797
Map<String, String> xpathExpressions,
9898
Map<String, String> namespaces,
99-
String parseOptions
99+
boolean strictParsing
100100
) {
101101
super(tag, description);
102102
this.field = field;
@@ -112,7 +112,7 @@ public final class XmlProcessor extends AbstractProcessor {
112112
this.xpathExpressions = xpathExpressions != null ? Map.copyOf(xpathExpressions) : Map.of();
113113
this.namespaces = namespaces != null ? Map.copyOf(namespaces) : Map.of();
114114
this.compiledXPathExpressions = compileXPathExpressions(this.xpathExpressions, this.namespaces);
115-
this.parseOptions = parseOptions != null ? parseOptions : "";
115+
this.strictParsing = strictParsing;
116116
}
117117

118118
public String getField() {
@@ -144,7 +144,7 @@ public boolean isForceContent() {
144144
}
145145

146146
public boolean isStrict() {
147-
return "strict".equals(parseOptions);
147+
return strictParsing;
148148
}
149149

150150
public boolean isForceArray() {
@@ -159,8 +159,8 @@ public Map<String, String> getNamespaces() {
159159
return namespaces;
160160
}
161161

162-
public String getParseOptions() {
163-
return parseOptions;
162+
public boolean getStrictParsing() {
163+
return strictParsing;
164164
}
165165

166166
@Override
@@ -488,11 +488,8 @@ public XmlProcessor create(
488488
}
489489
}
490490

491-
// Parse parse_options parameter
492-
String parseOptions = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "parse_options", "");
493-
if (parseOptions != null && !parseOptions.isEmpty() && !"strict".equals(parseOptions)) {
494-
throw new IllegalArgumentException("Invalid parse_options [" + parseOptions + "]. Only 'strict' is supported.");
495-
}
491+
// Parse strict_parsing parameter
492+
boolean strictParsing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "strict_parsing", false);
496493

497494
return new XmlProcessor(
498495
processorTag,
@@ -509,7 +506,7 @@ public XmlProcessor create(
509506
forceArray,
510507
xpathExpressions,
511508
namespaces,
512-
parseOptions
509+
strictParsing
513510
);
514511
}
515512
}

modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/XmlProcessorFactoryTests.java

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ private Map<String, Object> createConfigWithOptions(String fieldName, String...
155155
case "force_array":
156156
config.put("force_array", true);
157157
break;
158-
case "strict":
159-
config.put("parse_options", "strict");
158+
case "strict_parsing":
159+
config.put("strict_parsing", true);
160160
break;
161161
default:
162162
throw new IllegalArgumentException("Unknown option: " + option);
@@ -329,12 +329,12 @@ public void testCreateWithForceArray() throws Exception {
329329
assertThat(processor.isForceArray(), equalTo(true));
330330
}
331331

332-
public void testCreateWithStrictParseOptions() throws Exception {
333-
Map<String, Object> config = createConfigWithOptions(DEFAULT_FIELD, "strict");
332+
public void testCreateWithStrictParsing() throws Exception {
333+
Map<String, Object> config = createConfigWithOptions(DEFAULT_FIELD, "strict_parsing");
334334
XmlProcessor processor = createProcessor(config);
335335

336336
assertThat(processor.getField(), equalTo(DEFAULT_FIELD));
337-
assertThat(processor.getParseOptions(), equalTo("strict"));
337+
assertThat(processor.getStrictParsing(), equalTo(true));
338338
assertThat(processor.isStrict(), equalTo(true));
339339
}
340340

@@ -355,19 +355,6 @@ public void testCreateWithMultipleOptions() throws Exception {
355355
assertThat(processor.isRemoveNamespaces(), equalTo(true));
356356
}
357357

358-
// Tests for invalid parse options
359-
360-
public void testCreateWithInvalidParseOptions() throws Exception {
361-
Map<String, Object> config = createBaseConfig();
362-
config.put("parse_options", "invalid_option");
363-
364-
expectCreationFailure(
365-
config,
366-
IllegalArgumentException.class,
367-
"Invalid parse_options [invalid_option]. Only 'strict' is supported."
368-
);
369-
}
370-
371358
// Tests for XPath compilation errors (testing precompilation feature)
372359

373360
public void testCreateWithInvalidXPathExpression() throws Exception {

modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/XmlProcessorTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,7 @@ public void testStrictParsing() {
544544
String xml = "<foo><bar>valid</bar></foo>";
545545

546546
Map<String, Object> config = new HashMap<>();
547-
config.put("parse_options", "strict");
547+
config.put("strict_parsing", true);
548548
XmlProcessor processor = createTestProcessor(config);
549549
IngestDocument ingestDocument = createTestIngestDocument(xml);
550550

0 commit comments

Comments
 (0)