Skip to content

Commit efdc7be

Browse files
authored
Fix issue regarding empty tags & and code refactoring (#8)
* Fix issue regarding empty tags * Set correct encoding after removing empty tags
1 parent e3de098 commit efdc7be

File tree

7 files changed

+434
-57
lines changed

7 files changed

+434
-57
lines changed

src/main/java/io/github/easybill/Exceptions/InvalidXmlException.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,8 @@ public class InvalidXmlException extends ValidatorException {
55
public InvalidXmlException() {
66
super();
77
}
8+
9+
public InvalidXmlException(Throwable cause) {
10+
super("the xml is invalid", cause);
11+
}
812
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
package io.github.easybill.Exceptions;
2+
3+
public class XmlSanitizationException extends ValidatorException {
4+
5+
public XmlSanitizationException(Throwable cause) {
6+
super("could not sanitize the xml accordingly", cause);
7+
}
8+
}

src/main/java/io/github/easybill/Services/ValidationService.java

Lines changed: 7 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import java.util.List;
2020
import java.util.Objects;
2121
import java.util.Optional;
22-
import java.util.regex.Pattern;
22+
import net.sf.saxon.type.ValidationException;
2323
import org.checkerframework.checker.nullness.qual.NonNull;
2424
import org.mozilla.universalchardet.UniversalDetector;
2525

@@ -66,13 +66,14 @@ public final class ValidationService implements IValidationService {
6666

6767
var xml = new String(bytesFromSteam, charset);
6868

69-
if (isXmlInvalid(xml)) {
69+
if (xml.isBlank()) {
7070
throw new InvalidXmlException();
7171
}
7272

73-
xml = removeBOM(xml);
73+
xml = XMLSanitizer.sanitize(xml, charset);
7474

75-
var xmlSyntaxType = determineXmlSyntax(xml)
75+
var xmlSyntaxType = XMLSyntaxGuesser
76+
.tryGuessSyntax(xml)
7677
.orElseThrow(InvalidXmlException::new);
7778

7879
var report = innerValidateSchematron(
@@ -144,56 +145,6 @@ private Charset determineCharsetForXmlPayload(byte[] bytes)
144145
throw new InvalidXmlException();
145146
}
146147

147-
private boolean isXmlInvalid(@NonNull String xml) {
148-
return xml.isBlank() || (!checkIfUblXml(xml) && !checkIfCiiXml(xml));
149-
}
150-
151-
private Optional<XMLSyntaxType> determineXmlSyntax(@NonNull String xml) {
152-
if (checkIfCiiXml(xml)) {
153-
return Optional.of(XMLSyntaxType.CII);
154-
}
155-
156-
if (checkIfUblXml(xml)) {
157-
return Optional.of(XMLSyntaxType.UBL);
158-
}
159-
160-
return Optional.empty();
161-
}
162-
163-
private boolean checkIfCiiXml(@NonNull CharSequence payload) {
164-
return Pattern
165-
.compile("[<:](CrossIndustryInvoice)")
166-
.matcher(payload)
167-
.find();
168-
}
169-
170-
private boolean checkIfUblXml(@NonNull CharSequence payload) {
171-
return Pattern
172-
.compile("[<:](Invoice|CreditNote)")
173-
.matcher(payload)
174-
.find();
175-
}
176-
177-
private @NonNull String removeBOM(@NonNull String $payload) {
178-
String UTF8_BOM = "\uFEFF";
179-
String UTF16LE_BOM = "\uFFFE";
180-
String UTF16BE_BOM = "\uFEFF";
181-
182-
if ($payload.isEmpty()) {
183-
return $payload;
184-
}
185-
186-
if (
187-
$payload.startsWith(UTF8_BOM) ||
188-
$payload.startsWith(UTF16LE_BOM) ||
189-
$payload.startsWith(UTF16BE_BOM)
190-
) {
191-
return $payload.substring(1);
192-
}
193-
194-
return $payload;
195-
}
196-
197148
private Optional<SchematronOutputType> innerValidateSchematron(
198149
@NonNull XMLSyntaxType xmlSyntaxType,
199150
byte[] bytes
@@ -213,6 +164,8 @@ private Optional<SchematronOutputType> innerValidateSchematron(
213164
};
214165
} catch (IllegalArgumentException exception) {
215166
throw new ParsingException(exception);
167+
} catch (ValidationException exception) {
168+
throw new InvalidXmlException(exception);
216169
}
217170
}
218171
}
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
package io.github.easybill.Services;
2+
3+
import io.github.easybill.Exceptions.XmlSanitizationException;
4+
import java.io.*;
5+
import java.nio.charset.Charset;
6+
import javax.xml.XMLConstants;
7+
import javax.xml.parsers.DocumentBuilder;
8+
import javax.xml.parsers.DocumentBuilderFactory;
9+
import javax.xml.parsers.ParserConfigurationException;
10+
import javax.xml.transform.OutputKeys;
11+
import javax.xml.transform.Transformer;
12+
import javax.xml.transform.TransformerException;
13+
import javax.xml.transform.TransformerFactory;
14+
import javax.xml.transform.dom.DOMSource;
15+
import javax.xml.transform.stream.StreamResult;
16+
import org.checkerframework.checker.nullness.qual.NonNull;
17+
import org.w3c.dom.*;
18+
import org.w3c.dom.Document;
19+
import org.xml.sax.InputSource;
20+
import org.xml.sax.SAXException;
21+
22+
public final class XMLSanitizer {
23+
24+
public static @NonNull String sanitize(
25+
@NonNull String xml,
26+
@NonNull Charset charset
27+
) throws XmlSanitizationException {
28+
try {
29+
return removeEmptyTags(
30+
removeInvalidCharsFromProlog(removeBOM(xml)),
31+
charset
32+
);
33+
} catch (Exception exception) {
34+
throw new XmlSanitizationException(exception);
35+
}
36+
}
37+
38+
private static @NonNull String removeInvalidCharsFromProlog(
39+
@NonNull String payload
40+
) {
41+
var indexOfXmlIntro = payload.indexOf("<?xml version");
42+
43+
if (indexOfXmlIntro == 0) {
44+
return payload;
45+
}
46+
47+
return payload.substring(indexOfXmlIntro);
48+
}
49+
50+
private static @NonNull String removeBOM(@NonNull String xml) {
51+
String UTF8_BOM = "\uFEFF";
52+
String UTF16LE_BOM = "\uFFFE";
53+
String UTF16BE_BOM = "\uFEFF";
54+
55+
if (xml.isEmpty()) {
56+
return xml;
57+
}
58+
59+
if (
60+
xml.startsWith(UTF8_BOM) ||
61+
xml.startsWith(UTF16LE_BOM) ||
62+
xml.startsWith(UTF16BE_BOM)
63+
) {
64+
return xml.substring(1);
65+
}
66+
67+
return xml;
68+
}
69+
70+
private static @NonNull String removeEmptyTags(
71+
@NonNull String xml,
72+
@NonNull Charset charset
73+
)
74+
throws ParserConfigurationException, IOException, SAXException, TransformerException {
75+
byte[] xmlBytes = xml.getBytes(charset);
76+
77+
var builderFactory = DocumentBuilderFactory.newInstance();
78+
builderFactory.setNamespaceAware(true);
79+
builderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
80+
81+
DocumentBuilder db = builderFactory.newDocumentBuilder();
82+
83+
try (
84+
InputStream inputStream = new ByteArrayInputStream(xmlBytes);
85+
Reader reader = new InputStreamReader(inputStream, charset)
86+
) {
87+
Document document = db.parse(new InputSource(reader));
88+
89+
removeEmptyElements(document.getDocumentElement());
90+
91+
TransformerFactory transformerFactory =
92+
TransformerFactory.newInstance();
93+
94+
Transformer transformer = transformerFactory.newTransformer();
95+
transformer.setOutputProperty(OutputKeys.ENCODING, charset.name());
96+
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
97+
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
98+
99+
try (
100+
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
101+
Writer writer = new OutputStreamWriter(outputStream, charset)
102+
) {
103+
transformer.transform(
104+
new DOMSource(document),
105+
new StreamResult(writer)
106+
);
107+
writer.flush();
108+
109+
return outputStream.toString(charset);
110+
}
111+
}
112+
}
113+
114+
private static void removeEmptyElements(Element element) {
115+
NodeList children = element.getChildNodes();
116+
117+
for (int i = children.getLength() - 1; i >= 0; i--) {
118+
Node child = children.item(i);
119+
120+
if (child == null) {
121+
continue;
122+
}
123+
124+
if (child.getNodeType() == Node.ELEMENT_NODE) {
125+
removeEmptyElements((Element) child);
126+
}
127+
128+
if (
129+
child.getNodeType() == Node.ELEMENT_NODE &&
130+
isEmptyElement((Element) child)
131+
) {
132+
element.removeChild(child);
133+
}
134+
}
135+
}
136+
137+
private static boolean isEmptyElement(Element element) {
138+
return (
139+
element.getChildNodes().getLength() == 0 ||
140+
(
141+
element.getChildNodes().getLength() == 1 &&
142+
element.getFirstChild() != null &&
143+
element.getFirstChild().getNodeType() == Node.TEXT_NODE &&
144+
element.getFirstChild().getTextContent() != null &&
145+
element.getFirstChild().getTextContent().trim().isEmpty()
146+
)
147+
);
148+
}
149+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
package io.github.easybill.Services;
2+
3+
import io.github.easybill.Enums.XMLSyntaxType;
4+
import java.util.Optional;
5+
import java.util.regex.Pattern;
6+
import org.checkerframework.checker.nullness.qual.NonNull;
7+
8+
public final class XMLSyntaxGuesser {
9+
10+
public static Optional<XMLSyntaxType> tryGuessSyntax(@NonNull String xml) {
11+
if (checkIfCiiXml(xml)) {
12+
return Optional.of(XMLSyntaxType.CII);
13+
}
14+
15+
if (checkIfUblXml(xml)) {
16+
return Optional.of(XMLSyntaxType.UBL);
17+
}
18+
19+
return Optional.empty();
20+
}
21+
22+
private static boolean checkIfCiiXml(@NonNull CharSequence payload) {
23+
return Pattern
24+
.compile("[<:](CrossIndustryInvoice)")
25+
.matcher(payload)
26+
.find();
27+
}
28+
29+
private static boolean checkIfUblXml(@NonNull CharSequence payload) {
30+
return Pattern
31+
.compile("[<:](Invoice|CreditNote)")
32+
.matcher(payload)
33+
.find();
34+
}
35+
}

src/test/java/io/github/easybill/ValidationControllerTest.java

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,27 @@ void testValidationEndpointWithPayloadIncludingCharsInProlog()
6464
.when()
6565
.post("/validation")
6666
.then()
67-
.statusCode(422);
67+
.statusCode(200);
68+
}
69+
70+
@ParameterizedTest
71+
@ValueSource(strings = { "CII/CII_empty_tags.xml" })
72+
void testDocumentWithEmptyTags(@NonNull String fixtureFileName)
73+
throws IOException {
74+
given()
75+
.body(loadFixtureFileAsStream(fixtureFileName))
76+
.contentType(ContentType.XML)
77+
.when()
78+
.post("/validation")
79+
.then()
80+
.statusCode(200)
81+
.contentType(ContentType.JSON)
82+
.body("is_valid", equalTo(false))
83+
.body(
84+
"meta.validation_profile",
85+
equalTo(XMLSyntaxType.CII.toString())
86+
)
87+
.body("errors", not(empty()));
6888
}
6989

7090
@ParameterizedTest
@@ -147,7 +167,6 @@ void testValidationEndpointWithValidUblDocuments(
147167
"CII/CII_ZUGFeRD_23_XRECHNUNG_Einfach.xml",
148168
"CII/CII_ZUGFeRD_23_XRECHNUNG_Elektron.xml",
149169
"CII/CII_ZUGFeRD_23_XRECHNUNG_Reisekostenabrechnung.xml",
150-
"CII/XRechnung-O.xml",
151170
"CII/CII_ZUGFeRD_23_EXTENDED_Rechnungskorrektur.xml",
152171
}
153172
)
@@ -188,7 +207,8 @@ static Stream<Arguments> providerValuesValidationEndpointWithInvalidPayload() {
188207
Arguments.of(
189208
"CII/CII_ZUGFeRD_23_EXTENDED_Projektabschlussrechnung.xml"
190209
),
191-
Arguments.of("CII/CII_ZUGFeRD_23_EXTENDED_Warenrechnung.xml")
210+
Arguments.of("CII/CII_ZUGFeRD_23_EXTENDED_Warenrechnung.xml"),
211+
Arguments.of("CII/XRechnung-O.xml")
192212
);
193213
}
194214

0 commit comments

Comments
 (0)