Skip to content

Commit dc8d821

Browse files
authored
Merge pull request #33 from evolvedbinary/6.x.x/hotfix/fn-parse-xml-fragment-xml-decl
[6.x.x] Improve spec compliance of fn:parse-xml-fragment
2 parents f61d933 + cbe2c22 commit dc8d821

File tree

4 files changed

+170
-46
lines changed

4 files changed

+170
-46
lines changed

exist-core/pom.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,7 @@
732732
<include>src/test/resources/standalone-webapp/WEB-INF/web.xml</include>
733733
<include>src/test/xquery/maps/maps.xqm</include>
734734
<include>src/test/xquery/util/util.xml</include>
735+
<include>src/test/xquery/xquery3/parse-xml.xqm</include>
735736
<include>src/test/xquery/xquery3/serialize.xql</include>
736737
<include>src/main/java/org/exist/Indexer.java</include>
737738
<include>src/test/java/org/exist/IndexerTest.java</include>
@@ -999,6 +1000,7 @@
9991000
<exclude>src/test/xquery/maps/maps.xqm</exclude>
10001001
<exclude>src/test/xquery/securitymanager/acl.xqm</exclude>
10011002
<exclude>src/test/xquery/util/util.xml</exclude>
1003+
<exclude>src/test/xquery/xquery3/parse-xml.xqm</exclude>
10021004
<exclude>src/test/xquery/xquery3/postfix-expr.xqm</exclude>
10031005
<exclude>src/test/xquery/xquery3/serialize.xql</exclude>
10041006
<exclude>src/main/java/org/exist/Indexer.java</exclude>

exist-core/src/main/java/org/exist/xquery/functions/fn/ParsingFunctions.java

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@
6464

6565
import java.io.IOException;
6666
import java.io.StringReader;
67-
import java.nio.charset.StandardCharsets;
6867

6968
import static org.exist.util.ByteOrderMark.stripXmlBom;
7069

@@ -104,10 +103,6 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
104103
return Sequence.EMPTY_SEQUENCE;
105104
}
106105
final String xmlContent = args[0].itemAt(0).getStringValue();
107-
if (xmlContent.isEmpty()) {
108-
return Sequence.EMPTY_SEQUENCE;
109-
}
110-
111106
return parse(xmlContent, args);
112107
}
113108

@@ -134,7 +129,27 @@ private ValidationReport validate(String xmlContent, final SAXAdapter saxAdapter
134129
xmlContent = stripXmlBom(xmlContent);
135130
final String xml;
136131
if (isCalledAs("parse-xml-fragment")) {
137-
xml = "<" + FRAGMENT_WRAPPER_NAME + ">" + xmlContent + "</" + FRAGMENT_WRAPPER_NAME + ">";
132+
String declStr = xmlContent.toLowerCase();
133+
final int startIdx = declStr.indexOf("<?xml ");
134+
if (startIdx > -1) {
135+
136+
// NOTE(AR) for parsing fragments the input must be an external entity, so validate that the declaration is a TextDecl (https://www.w3.org/TR/REC-xml/#NT-TextDecl) and not a full XMLDecl (https://www.w3.org/TR/REC-xml/#NT-XMLDecl) with standalone attribute
137+
138+
declStr = declStr.substring(startIdx);
139+
int endIdx = declStr.indexOf("?>");
140+
if (endIdx > -1) {
141+
endIdx += 2;
142+
}
143+
declStr = declStr.substring(0, endIdx);
144+
if (declStr.contains("standalone=")) {
145+
throw new XPathException(this, ErrorCodes.FODC0006, "Input to fn:parse-xml-fragment must be a valid external entity, but 'standalone' attribute was detected in the declaration");
146+
}
147+
148+
xml = xmlContent;
149+
150+
} else {
151+
xml = "<" + FRAGMENT_WRAPPER_NAME + ">" + xmlContent + "</" + FRAGMENT_WRAPPER_NAME + ">";
152+
}
138153
} else {
139154
xml = xmlContent;
140155
}

exist-core/src/test/xquery/xquery3/parse-xml.xq

Lines changed: 0 additions & 40 deletions
This file was deleted.
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
(:
2+
: Elemental
3+
: Copyright (C) 2024, Evolved Binary Ltd
4+
:
5+
6+
: https://www.evolvedbinary.com | https://www.elemental.xyz
7+
:
8+
: This library is free software; you can redistribute it and/or
9+
: modify it under the terms of the GNU Lesser General Public
10+
: License as published by the Free Software Foundation; version 2.1.
11+
:
12+
: This library is distributed in the hope that it will be useful,
13+
: but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15+
: Lesser General Public License for more details.
16+
:
17+
: You should have received a copy of the GNU Lesser General Public
18+
: License along with this library; if not, write to the Free Software
19+
: Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20+
:
21+
: NOTE: Parts of this file contain code from 'The eXist-db Authors'.
22+
: The original license header is included below.
23+
:
24+
: =====================================================================
25+
:
26+
: eXist-db Open Source Native XML Database
27+
: Copyright (C) 2001 The eXist-db Authors
28+
:
29+
30+
: http://www.exist-db.org
31+
:
32+
: This library is free software; you can redistribute it and/or
33+
: modify it under the terms of the GNU Lesser General Public
34+
: License as published by the Free Software Foundation; either
35+
: version 2.1 of the License, or (at your option) any later version.
36+
:
37+
: This library is distributed in the hope that it will be useful,
38+
: but WITHOUT ANY WARRANTY; without even the implied warranty of
39+
: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
40+
: Lesser General Public License for more details.
41+
:
42+
: You should have received a copy of the GNU Lesser General Public
43+
: License along with this library; if not, write to the Free Software
44+
: Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
45+
:)
46+
xquery version "3.0";
47+
48+
(:~ Additional tests for the fn:parse-xml and fn:parse-xml-fragment functions :)
49+
module namespace px="http://exist-db.org/xquery/test/parse-xml";
50+
51+
declare namespace test="http://exist-db.org/xquery/xqsuite";
52+
53+
declare
54+
%test:assertEmpty
55+
function px:fragment-type-1() {
56+
fn:parse-xml-fragment(())
57+
};
58+
59+
declare
60+
%test:assertTrue
61+
function px:fragment-type-2() {
62+
fn:parse-xml-fragment("") instance of document-node()
63+
};
64+
65+
declare
66+
%test:assertEmpty
67+
function px:fragment-children-1() {
68+
fn:parse-xml-fragment("")/node()
69+
};
70+
71+
declare
72+
%test:assertTrue
73+
function px:fragment-type-3() {
74+
fn:parse-xml-fragment(" ") instance of document-node()
75+
};
76+
77+
declare
78+
%test:assertTrue(" ")
79+
function px:fragment-children-2() {
80+
fn:parse-xml-fragment(" ")/node()
81+
};
82+
83+
declare
84+
%test:assertTrue
85+
function px:fragment-type-4() {
86+
fn:parse-xml-fragment("<alpha>abcd</alpha><beta>abcd</beta>") instance of document-node()
87+
};
88+
89+
declare
90+
%test:assertEquals("<alpha>abcd</alpha>", "<beta>abcd</beta>")
91+
function px:fragment-children-3() {
92+
fn:parse-xml-fragment("<alpha>abcd</alpha><beta>abcd</beta>")/node()
93+
};
94+
95+
declare
96+
%test:assertTrue
97+
function px:fragment-type-5() {
98+
fn:parse-xml-fragment("He was <i>so</i> kind") instance of document-node()
99+
};
100+
101+
declare
102+
%test:assertEquals(1)
103+
function px:fragment-count() {
104+
count(parse-xml-fragment("He was <i>so</i> kind"))
105+
};
106+
107+
declare
108+
%test:assertEquals(3)
109+
function px:fragment-node-count() {
110+
count(parse-xml-fragment("He was <i>so</i> kind")/node())
111+
};
112+
113+
declare
114+
%test:assertTrue
115+
function px:fragment-xml-decl() {
116+
fn:parse-xml-fragment('<?xml version="1.0"?><a/>') instance of document-node()
117+
};
118+
119+
declare
120+
%test:assertError("FODC0006")
121+
function px:fragment-xml-decl-standalone-yes() {
122+
fn:parse-xml-fragment('<?xml version="1.0" standalone="yes"?><a/>')
123+
};
124+
125+
declare
126+
%test:assertError("FODC0006")
127+
function px:fragment-xml-decl-standalone-no() {
128+
fn:parse-xml-fragment('<?xml version="1.0" standalone="no"?><a/>')
129+
};
130+
131+
declare
132+
%test:assertTrue
133+
function px:fragment-xml-decl-encoding() {
134+
fn:parse-xml-fragment('<?xml version="1.0" encoding="utf8"?><a/>') instance of document-node()
135+
};
136+
137+
declare
138+
%test:assertError("FODC0006")
139+
function px:fragment-xml-decl-encoding-standalone-yes() {
140+
fn:parse-xml-fragment('<?xml version="1.0" encoding="utf8" standalone="yes"?><a/>')
141+
};
142+
143+
declare
144+
%test:assertError("FODC0006")
145+
function px:fragment-xml-decl-encoding-standalone-no() {
146+
fn:parse-xml-fragment('<?xml version="1.0" encoding="utf8" standalone="no"?><a/>')
147+
};

0 commit comments

Comments
 (0)