Skip to content

Commit e21f297

Browse files
committed
Added support for multiple tag escaping strategies
1 parent 088e6bf commit e21f297

File tree

8 files changed

+301
-81
lines changed

8 files changed

+301
-81
lines changed
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
package com.fasterxml.jackson.dataformat.xml;
2+
3+
/**
4+
* Strategy for dealing with tags containing invalid characters. Invalid
5+
* characters in tags can, for instance, easily appear in map keys.
6+
*
7+
* @since 2.14
8+
*/
9+
public enum TagEscapingStrategy {
10+
/**
11+
* Present for backwards compatibility. Using this option may produce
12+
* invalid XML that can no longer be processed. For instance, this
13+
* can easily happen for map keys containing special characters or spaces.
14+
* <p>
15+
* With this feature set, a map with the keys {@code "123"} and
16+
* {@code "$ I am <fancy>! &;"} will be written as:
17+
*
18+
* <pre>{@code
19+
* <DTO>
20+
* <badMap>
21+
* <$ I am <fancy>! &;>xyz</$ I am <fancy>! &;>
22+
* <123>bar</123>
23+
* </badMap>
24+
* </DTO>
25+
* }</pre>
26+
*
27+
* <b>Note:</b> This option ignored for deserialization, since even
28+
* Jackson can't parse invalid XML.
29+
*
30+
* @since 2.14
31+
*/
32+
NONE,
33+
34+
/**
35+
* Replaces all invalid characters in a tag with a {@code _}. Using this
36+
* option will produce valid XML, but the generated XML will not be
37+
* 100% reversible (as in serializing and deserializing an object may not
38+
* produce the same object again).
39+
* <p>
40+
* With this feature set, a map with the keys {@code "123"} and
41+
* {@code "$ I am <fancy>! &;"} will be written as:
42+
*
43+
* <pre>{@code
44+
* <DTO>
45+
* <badMap>
46+
* <__I_am__fancy_____>xyz</__I_am__fancy_____>
47+
* <_23>bar</_23>
48+
* </badMap>
49+
* </DTO>
50+
* }</pre>
51+
*
52+
* <b>Note:</b> This option ignored for deserialization, since there is no
53+
* way to reverse this step.
54+
*
55+
* @since 2.14
56+
*/
57+
REPLACE,
58+
59+
/**
60+
* Strategy that indicates that invalid XML tag names should be escaped
61+
* via an attribute ({@code real_name}) in a standard tag.
62+
* <p>
63+
* With this feature set, a map with the keys {@code "123"} and
64+
* {@code "$ I am <fancy>! &;"} will be written as:
65+
*
66+
* <pre>{@code
67+
* <DTO>
68+
* <badMap>
69+
* <escaped_tag real_name="$ I am &lt;fancy>! &amp;;">xyz</escaped_tag>
70+
* <escaped_tag real_name="123">bar</escaped_tag>
71+
* </badMap>
72+
* </DTO>
73+
* }</pre>
74+
*
75+
* @since 2.14
76+
*/
77+
ATTRIBUTE_ESCAPE,
78+
79+
/**
80+
* With this strategy the entire tag is escaped via base64 with the prefix
81+
* {@code base64_tag_}. Here the
82+
* <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-5">base64url</a>
83+
* encoder and decoders are used. The {@code =} padding characters are
84+
* always omitted.
85+
* <p>
86+
* With this feature set, a map with the keys {@code "123"} and
87+
* {@code "$ I am <fancy>! &;"} will be written as:
88+
*
89+
* <pre>{@code
90+
* <DTO>
91+
* <badMap>
92+
* <base64_tag_JCBJIGFtIDxmYW5jeT4hICY7>xyz</base64_tag_JCBJIGFtIDxmYW5jeT4hICY7>
93+
* <base64_tag_MTIz>bar</base64_tag_MTIz>
94+
* </badMap>
95+
* </DTO>
96+
* }</pre>
97+
*
98+
* @since 2.14
99+
*/
100+
BASE64,
101+
102+
;
103+
104+
public static final String ESCAPED_TAG_NAME = "escaped_tag";
105+
public static final String ESCAPED_ATTR_NS = ""; // "jackson";
106+
public static final String ESCAPED_ATTR_NAME = "real_name";
107+
public static final String ESCAPED_BASE64_PREFIX = "base64_tag_";
108+
}

src/main/java/com/fasterxml/jackson/dataformat/xml/XmlFactory.java

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ public class XmlFactory extends JsonFactory
6565
protected transient XMLOutputFactory _xmlOutputFactory;
6666

6767
protected String _cfgNameForTextElement;
68+
69+
protected TagEscapingStrategy _serializationEscapeStrategy = TagEscapingStrategy.NONE;
70+
71+
protected TagEscapingStrategy _deserializationEscapeStrategy = TagEscapingStrategy.NONE;
6872

6973
/*
7074
/**********************************************************
@@ -325,6 +329,17 @@ public int getFormatGeneratorFeatures() {
325329
return _xmlGeneratorFeatures;
326330
}
327331

332+
/**
333+
* Set the serialization strategy. See
334+
* {@link TagEscapingStrategy} for more information.
335+
*
336+
* @since 2.14
337+
*/
338+
public XmlFactory serializationTagEscapingStrategy(TagEscapingStrategy strategy) {
339+
_serializationEscapeStrategy = strategy;
340+
return this;
341+
}
342+
328343
/*
329344
/******************************************************
330345
/* Configuration, XML, generator settings
@@ -366,6 +381,18 @@ public final boolean isEnabled(ToXmlGenerator.Feature f) {
366381
return (_xmlGeneratorFeatures & f.getMask()) != 0;
367382
}
368383

384+
385+
/**
386+
* Set the seserialization strategy. See
387+
* {@link TagEscapingStrategy} for more information.
388+
*
389+
* @since 2.14
390+
*/
391+
public XmlFactory deserializationTagEscapingStrategy(TagEscapingStrategy strategy) {
392+
_deserializationEscapeStrategy = strategy;
393+
return this;
394+
}
395+
369396
/*
370397
/**********************************************************
371398
/* Additional configuration
@@ -497,7 +524,7 @@ public ToXmlGenerator createGenerator(OutputStream out, JsonEncoding enc) throws
497524
final IOContext ctxt = _createContext(_createContentReference(out), false);
498525
ctxt.setEncoding(enc);
499526
return new ToXmlGenerator(ctxt,
500-
_generatorFeatures, _xmlGeneratorFeatures,
527+
_generatorFeatures, _xmlGeneratorFeatures, _serializationEscapeStrategy,
501528
_objectCodec, _createXmlWriter(ctxt, out));
502529
}
503530

@@ -506,7 +533,7 @@ public ToXmlGenerator createGenerator(Writer out) throws IOException
506533
{
507534
final IOContext ctxt = _createContext(_createContentReference(out), false);
508535
return new ToXmlGenerator(ctxt,
509-
_generatorFeatures, _xmlGeneratorFeatures,
536+
_generatorFeatures, _xmlGeneratorFeatures, _serializationEscapeStrategy,
510537
_objectCodec, _createXmlWriter(ctxt, out));
511538
}
512539

@@ -519,7 +546,7 @@ public ToXmlGenerator createGenerator(File f, JsonEncoding enc) throws IOExcepti
519546
final IOContext ctxt = _createContext(_createContentReference(out), true);
520547
ctxt.setEncoding(enc);
521548
return new ToXmlGenerator(ctxt, _generatorFeatures, _xmlGeneratorFeatures,
522-
_objectCodec, _createXmlWriter(ctxt, out));
549+
_serializationEscapeStrategy, _objectCodec, _createXmlWriter(ctxt, out));
523550
}
524551

525552
/*
@@ -543,7 +570,7 @@ public FromXmlParser createParser(XMLStreamReader sr) throws IOException
543570

544571
// false -> not managed
545572
FromXmlParser xp = new FromXmlParser(_createContext(_createContentReference(sr), false),
546-
_parserFeatures, _xmlParserFeatures, _objectCodec, sr);
573+
_parserFeatures, _xmlParserFeatures, _deserializationEscapeStrategy, _objectCodec, sr);
547574
if (_cfgNameForTextElement != null) {
548575
xp.setXMLTextElementName(_cfgNameForTextElement);
549576
}
@@ -562,7 +589,7 @@ public ToXmlGenerator createGenerator(XMLStreamWriter sw) throws IOException
562589
sw = _initializeXmlWriter(sw);
563590
IOContext ctxt = _createContext(_createContentReference(sw), false);
564591
return new ToXmlGenerator(ctxt, _generatorFeatures, _xmlGeneratorFeatures,
565-
_objectCodec, sw);
592+
_serializationEscapeStrategy, _objectCodec, sw);
566593
}
567594

568595
/*
@@ -582,7 +609,7 @@ protected FromXmlParser _createParser(InputStream in, IOContext ctxt) throws IOE
582609
}
583610
sr = _initializeXmlReader(sr);
584611
FromXmlParser xp = new FromXmlParser(ctxt, _parserFeatures, _xmlParserFeatures,
585-
_objectCodec, sr);
612+
_deserializationEscapeStrategy, _objectCodec, sr);
586613
if (_cfgNameForTextElement != null) {
587614
xp.setXMLTextElementName(_cfgNameForTextElement);
588615
}
@@ -600,7 +627,7 @@ protected FromXmlParser _createParser(Reader r, IOContext ctxt) throws IOExcepti
600627
}
601628
sr = _initializeXmlReader(sr);
602629
FromXmlParser xp = new FromXmlParser(ctxt, _parserFeatures, _xmlParserFeatures,
603-
_objectCodec, sr);
630+
_deserializationEscapeStrategy, _objectCodec, sr);
604631
if (_cfgNameForTextElement != null) {
605632
xp.setXMLTextElementName(_cfgNameForTextElement);
606633
}
@@ -627,7 +654,7 @@ protected FromXmlParser _createParser(char[] data, int offset, int len, IOContex
627654
}
628655
sr = _initializeXmlReader(sr);
629656
FromXmlParser xp = new FromXmlParser(ctxt, _parserFeatures, _xmlParserFeatures,
630-
_objectCodec, sr);
657+
_deserializationEscapeStrategy, _objectCodec, sr);
631658
if (_cfgNameForTextElement != null) {
632659
xp.setXMLTextElementName(_cfgNameForTextElement);
633660
}
@@ -651,7 +678,7 @@ protected FromXmlParser _createParser(byte[] data, int offset, int len, IOContex
651678
}
652679
sr = _initializeXmlReader(sr);
653680
FromXmlParser xp = new FromXmlParser(ctxt, _parserFeatures, _xmlParserFeatures,
654-
_objectCodec, sr);
681+
_deserializationEscapeStrategy, _objectCodec, sr);
655682
if (_cfgNameForTextElement != null) {
656683
xp.setXMLTextElementName(_cfgNameForTextElement);
657684
}

src/main/java/com/fasterxml/jackson/dataformat/xml/XmlMapper.java

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,40 @@ public Builder defaultUseWrapper(boolean state) {
108108
_mapper.setDefaultUseWrapper(state);
109109
return this;
110110
}
111+
112+
/**
113+
* Set the serialization and deserialization strategy. See
114+
* {@link TagEscapingStrategy} for more information.
115+
*
116+
* @since 2.14
117+
*/
118+
public Builder tagEscapingStrategy(TagEscapingStrategy strategy) {
119+
serializationTagEscapingStrategy(strategy);
120+
deserializationTagEscapingStrategy(strategy);
121+
return this;
122+
}
123+
124+
/**
125+
* Set the serialization strategy. See
126+
* {@link TagEscapingStrategy} for more information.
127+
*
128+
* @since 2.14
129+
*/
130+
public Builder serializationTagEscapingStrategy(TagEscapingStrategy strategy) {
131+
_mapper.serializationTagEscapingStrategy(strategy);
132+
return this;
133+
}
134+
135+
/**
136+
* Set the deserialization strategy. See
137+
* {@link TagEscapingStrategy} for more information.
138+
*
139+
* @since 2.14
140+
*/
141+
public Builder deserializationTagEscapingStrategy(TagEscapingStrategy strategy) {
142+
_mapper.deserializationTagEscapingStrategy(strategy);
143+
return this;
144+
}
111145
}
112146

113147
protected final static JacksonXmlModule DEFAULT_XML_MODULE = new JacksonXmlModule();
@@ -321,6 +355,40 @@ public ObjectMapper disable(FromXmlParser.Feature f) {
321355
return this;
322356
}
323357

358+
/**
359+
* Set the serialization and deserialization strategy. See
360+
* {@link TagEscapingStrategy} for more information.
361+
*
362+
* @since 2.14
363+
*/
364+
public ObjectMapper tagEscapingStrategy(TagEscapingStrategy strategy) {
365+
serializationTagEscapingStrategy(strategy);
366+
deserializationTagEscapingStrategy(strategy);
367+
return this;
368+
}
369+
370+
/**
371+
* Set the serialization strategy. See
372+
* {@link TagEscapingStrategy} for more information.
373+
*
374+
* @since 2.14
375+
*/
376+
public ObjectMapper serializationTagEscapingStrategy(TagEscapingStrategy strategy) {
377+
((XmlFactory)_jsonFactory).serializationTagEscapingStrategy(strategy);
378+
return this;
379+
}
380+
381+
/**
382+
* Set the seserialization strategy. See
383+
* {@link TagEscapingStrategy} for more information.
384+
*
385+
* @since 2.14
386+
*/
387+
public ObjectMapper deserializationTagEscapingStrategy(TagEscapingStrategy strategy) {
388+
((XmlFactory)_jsonFactory).deserializationTagEscapingStrategy(strategy);
389+
return this;
390+
}
391+
324392
/*
325393
/**********************************************************
326394
/* XML-specific access

src/main/java/com/fasterxml/jackson/dataformat/xml/deser/FromXmlParser.java

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
import com.fasterxml.jackson.core.util.JacksonFeatureSet;
1919

2020
import com.fasterxml.jackson.dataformat.xml.PackageVersion;
21+
import com.fasterxml.jackson.dataformat.xml.TagEscapingStrategy;
2122
import com.fasterxml.jackson.dataformat.xml.XmlMapper;
22-
import com.fasterxml.jackson.dataformat.xml.ser.ToXmlGenerator;
2323
import com.fasterxml.jackson.dataformat.xml.util.CaseInsensitiveNameSet;
2424
import com.fasterxml.jackson.dataformat.xml.util.StaxUtil;
2525

@@ -83,15 +83,6 @@ public enum Feature implements FormatFeature
8383
*/
8484
PROCESS_XSI_NIL(true),
8585

86-
/**
87-
* Feature that controls whether the escaping mechanism from the
88-
* {@code ESCAPE_MALFORMED_TAGS} from {@link ToXmlGenerator.Feature}
89-
* is reversed.
90-
*
91-
* @since 2.14
92-
*/
93-
PROCESS_ESCAPED_MALFORMED_TAGS(true)
94-
9586
// 16-Nov-2020, tatu: would have been nice to add in 2.12 but is not
9687
// trivial to implement... so leaving out for now
9788

@@ -262,7 +253,7 @@ private Feature(boolean defaultState) {
262253
*/
263254

264255
public FromXmlParser(IOContext ctxt, int genericParserFeatures, int xmlFeatures,
265-
ObjectCodec codec, XMLStreamReader xmlReader)
256+
TagEscapingStrategy tagEscapingStrategy, ObjectCodec codec, XMLStreamReader xmlReader)
266257
throws IOException
267258
{
268259
super(genericParserFeatures);
@@ -271,7 +262,7 @@ public FromXmlParser(IOContext ctxt, int genericParserFeatures, int xmlFeatures,
271262
_objectCodec = codec;
272263
_parsingContext = XmlReadContext.createRootContext(-1, -1);
273264
_xmlTokens = new XmlTokenStream(xmlReader, ctxt.contentReference(),
274-
_formatFeatures);
265+
_formatFeatures, tagEscapingStrategy);
275266

276267
final int firstToken;
277268
try {

0 commit comments

Comments
 (0)