Skip to content

Commit 6bc46da

Browse files
committed
Fix #167 #186 and #250 : '>' is allowed in attr val
1 parent f0a58dc commit 6bc46da

File tree

3 files changed

+248
-90
lines changed

3 files changed

+248
-90
lines changed

spec/cdata_spec.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ describe("XMLParser", function() {
279279
"@_boolean": true,
280280
"#text": "some dataafter"
281281
},
282-
"before text\n <nestedtag>\n nested cdata 1\n </nestedtag>\n middle\n <nestedtag>\n nested cdata 2\n </nestedtag>\n after\n <nestedtag>\n nested cdata 3\n </nestedtag>\n end"
282+
"before text\n <nestedtag>\n nested cdata 1<!--single line comment-->\n </nestedtag>\n middle\n <nestedtag>\n nested cdata 2<!--multi line\n comment-->\n </nestedtag>\n after\n <nestedtag>\n nested cdata 3\n </nestedtag>\n end"
283283
],
284284
"@_xmlns:soap": "http://schemas.xmlsoap.org/soap/envelope/"
285285
}

spec/data_spec.js

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
"use strict";
2+
3+
const parser = require("../src/parser");
4+
const validator = require("../src/validator");
5+
const he = require("he");
6+
7+
describe("XMLParser", function() {
8+
9+
it("should parse attributes having '>' in value", function() {
10+
const xmlData = `<? xml version = "1.0" encoding = "UTF - 8" ?>
11+
<testStep type="restrequest" name="test step name (bankId -> Error)" id="90e453d3-30cd-4958-a3be-61ecfe7a7cbe">
12+
<settings/>
13+
<encoding>UTF-8</encoding>
14+
</testStep>`;
15+
16+
const expected = {
17+
"testStep": {
18+
"type": "restrequest",
19+
"name": "test step name (bankId -> Error)",
20+
"id": "90e453d3-30cd-4958-a3be-61ecfe7a7cbe",
21+
"settings": "",
22+
"encoding": "UTF-8"
23+
}
24+
};
25+
26+
let result = parser.parse(xmlData, {
27+
attributeNamePrefix: "",
28+
ignoreAttributes: false,
29+
//parseAttributeValue: true
30+
});
31+
32+
//console.log(JSON.stringify(result,null,4));
33+
expect(result).toEqual(expected);
34+
35+
result = validator.validate(xmlData);
36+
expect(result).toBe(true);
37+
});
38+
39+
it("should parse attributes with valid names", function() {
40+
const xmlData = `
41+
<a>
42+
<bug atr="sasa" boolean>val
43+
<b/>
44+
<br/>
45+
<br b/>
46+
<c>some<!--single line comment--></c>here
47+
</bug>as well
48+
</a>`;
49+
50+
const expected = {
51+
"a": {
52+
"#text": "as well",
53+
"bug": {
54+
"#text": "valhere",
55+
"@_atr": "sasa",
56+
"@_boolean": true,
57+
"b": "",
58+
"br": [
59+
"",
60+
{
61+
"@_b": true
62+
}
63+
],
64+
"c": "some"
65+
}
66+
}
67+
};
68+
69+
let result = parser.parse(xmlData, {
70+
//attributeNamePrefix: "",
71+
ignoreAttributes: false,
72+
//parseAttributeValue: true,
73+
allowBooleanAttributes: true
74+
});
75+
76+
//console.log(JSON.stringify(result,null,4));
77+
expect(result).toEqual(expected);
78+
});
79+
});

src/xmlstr2xmlnode.js

Lines changed: 168 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
const util = require('./util');
44
const buildOptions = require('./util').buildOptions;
55
const xmlNode = require('./xmlNode');
6-
const TagType = {OPENING: 1, CLOSING: 2, SELF: 3, CDATA: 4};
76
const regx =
87
'<((!\\[CDATA\\[([\\s\\S]*?)(]]>))|((NAME:)?(NAME))([^>]*)>|((\\/)(NAME)\\s*>))([^<]*)'
98
.replace(/NAME/g, util.nameRegexp);
@@ -65,82 +64,13 @@ const props = [
6564
];
6665
exports.props = props;
6766

68-
const getTraversalObj = function(xmlData, options) {
69-
options = buildOptions(options, defaultOptions, props);
70-
//xmlData = xmlData.replace(/\r?\n/g, " ");//make it single line
71-
xmlData = xmlData.replace(/<!--[\s\S]*?-->/g, ''); //Remove comments
72-
73-
const xmlObj = new xmlNode('!xml');
74-
let currentNode = xmlObj;
75-
76-
const tagsRegx = new RegExp(regx, 'g');
77-
let tag = tagsRegx.exec(xmlData);
78-
let nextTag = tagsRegx.exec(xmlData);
79-
while (tag) {
80-
const tagType = checkForTagType(tag);
81-
82-
if (tagType === TagType.CLOSING) {
83-
//add parsed data to parent node
84-
if (currentNode.parent && tag[12]) {
85-
currentNode.parent.val = util.getValue(currentNode.parent.val) + '' + processTagValue(tag, options, currentNode.parent.tagname);
86-
}
87-
if (options.stopNodes.length && options.stopNodes.includes(currentNode.tagname)) {
88-
currentNode.child = []
89-
if (currentNode.attrsMap == undefined) { currentNode.attrsMap = {}}
90-
currentNode.val = xmlData.substr(currentNode.startIndex + 1, tag.index - currentNode.startIndex - 1)
91-
}
92-
currentNode = currentNode.parent;
93-
} else if (tagType === TagType.CDATA) {
94-
if (options.cdataTagName) {
95-
//add cdata node
96-
const childNode = new xmlNode(options.cdataTagName, currentNode, tag[3]);
97-
childNode.attrsMap = buildAttributesMap(tag[8], options);
98-
currentNode.addChild(childNode);
99-
//for backtracking
100-
currentNode.val = util.getValue(currentNode.val) + options.cdataPositionChar;
101-
//add rest value to parent node
102-
if (tag[12]) {
103-
currentNode.val += processTagValue(tag, options);
104-
}
105-
} else {
106-
currentNode.val = (currentNode.val || '') + (tag[3] || '') + processTagValue(tag, options);
107-
}
108-
} else if (tagType === TagType.SELF) {
109-
if (currentNode && tag[12]) {
110-
currentNode.val = util.getValue(currentNode.val) + '' + processTagValue(tag, options);
111-
}
112-
113-
const childNode = new xmlNode(options.ignoreNameSpace ? tag[7] : tag[5], currentNode, '');
114-
if (tag[8] && tag[8].length > 0) {
115-
tag[8] = tag[8].substr(0, tag[8].length - 1);
116-
}
117-
childNode.attrsMap = buildAttributesMap(tag[8], options);
118-
currentNode.addChild(childNode);
119-
} else {
120-
//TagType.OPENING
121-
const childNode = new xmlNode(
122-
options.ignoreNameSpace ? tag[7] : tag[5],
123-
currentNode,
124-
processTagValue(tag, options)
125-
);
126-
if (options.stopNodes.length && options.stopNodes.includes(childNode.tagname)) {
127-
childNode.startIndex=tag.index + tag[1].length
128-
}
129-
childNode.attrsMap = buildAttributesMap(tag[8], options);
130-
currentNode.addChild(childNode);
131-
currentNode = childNode;
132-
}
133-
134-
tag = nextTag;
135-
nextTag = tagsRegx.exec(xmlData);
136-
}
137-
138-
return xmlObj;
139-
};
140-
141-
function processTagValue(parsedTags, options, parentTagName) {
142-
const tagName = parsedTags[7] || parentTagName;
143-
let val = parsedTags[12];
67+
/**
68+
* Trim -> valueProcessor -> parse value
69+
* @param {string} tagName
70+
* @param {string} val
71+
* @param {object} options
72+
*/
73+
function processTagValue(tagName, val, options) {
14474
if (val) {
14575
if (options.trimValues) {
14676
val = val.trim();
@@ -152,18 +82,6 @@ function processTagValue(parsedTags, options, parentTagName) {
15282
return val;
15383
}
15484

155-
function checkForTagType(match) {
156-
if (match[4] === ']]>') {
157-
return TagType.CDATA;
158-
} else if (match[10] === '/') {
159-
return TagType.CLOSING;
160-
} else if (typeof match[8] !== 'undefined' && match[8].substr(match[8].length - 1) === '/') {
161-
return TagType.SELF;
162-
} else {
163-
return TagType.OPENING;
164-
}
165-
}
166-
16785
function resolveNameSpace(tagname, options) {
16886
if (options.ignoreNameSpace) {
16987
const tags = tagname.split(':');
@@ -249,4 +167,165 @@ function buildAttributesMap(attrStr, options) {
249167
}
250168
}
251169

170+
const getTraversalObj = function(xmlData, options) {
171+
options = buildOptions(options, defaultOptions, props);
172+
const xmlObj = new xmlNode('!xml');
173+
let currentNode = xmlObj;
174+
let textData = "";
175+
//function match(xmlData){
176+
for(let i=0; i< xmlData.length; i++){
177+
const ch = xmlData[i];
178+
if(ch === '<'){
179+
if( xmlData[i+1] === '/') {//Closing Tag
180+
const closeIndex = xmlData.indexOf(">", i);
181+
let tagName = xmlData.substring(i+2,closeIndex).trim();
182+
183+
if(options.ignoreNameSpace){
184+
const colonIndex = tagName.indexOf(":");
185+
if(colonIndex !== -1){
186+
tagName = tagName.substr(colonIndex+1);
187+
}
188+
}
189+
190+
/* if (currentNode.parent) {
191+
currentNode.parent.val = util.getValue(currentNode.parent.val) + '' + processTagValue2(tagName, textData , options);
192+
} */
193+
if(currentNode){
194+
if(currentNode.val){
195+
currentNode.val = util.getValue(currentNode.val) + '' + processTagValue(tagName, textData , options);
196+
}else{
197+
currentNode.val = processTagValue(tagName, textData , options);
198+
}
199+
}
200+
201+
if (options.stopNodes.length && options.stopNodes.includes(currentNode.tagname)) {
202+
currentNode.child = []
203+
if (currentNode.attrsMap == undefined) { currentNode.attrsMap = {}}
204+
currentNode.val = xmlData.substr(currentNode.startIndex + 1, i - currentNode.startIndex - 1)
205+
}
206+
currentNode = currentNode.parent;
207+
textData = "";
208+
i = closeIndex;
209+
} else if( xmlData[i+1] === '?') {
210+
i = xmlData.indexOf("?>", i) + 1;
211+
} else if( xmlData[i+2] === '-') {
212+
i = xmlData.indexOf("-->", i) + 2;
213+
} else if( xmlData[i+2] === 'D') {
214+
const closeIndex = xmlData.indexOf(">",i)
215+
const tagExp = xmlData.substr(i,closeIndex);
216+
if(tagExp.indexOf("[")){
217+
i = xmlData.indexOf("]>", i) + 1;
218+
}else{
219+
i = closeIndex;
220+
}
221+
}else if( xmlData[i+2] === '[') {
222+
const closeIndex = xmlData.indexOf("]]>",i);
223+
const tagExp = xmlData.substring(i + 9,closeIndex);
224+
225+
//save previous value to the parent node
226+
/* if(currentNode){
227+
if(currentNode.val){
228+
currentNode.val = util.getValue(currentNode.val) + '' + processTagValue2(currentNode.tagname, textData , options);
229+
}else{
230+
currentNode.val = processTagValue2(currentNode.tagname, textData , options);
231+
}
232+
} */
233+
//considerations
234+
//1. CDATA will always have parent node
235+
//2. A tag with CDATA is not a leaf node so it's value would be string type.
236+
if(textData){
237+
currentNode.val = util.getValue(currentNode.val) + '' + processTagValue(currentNode.tagname, textData , options);
238+
textData = "";
239+
}
240+
241+
if (options.cdataTagName) {
242+
//add cdata node
243+
const childNode = new xmlNode(options.cdataTagName, currentNode, tagExp);
244+
currentNode.addChild(childNode);
245+
//for backtracking
246+
currentNode.val = util.getValue(currentNode.val) + options.cdataPositionChar;
247+
//add rest value to parent node
248+
if (tagExp) {
249+
childNode.val = tagExp;
250+
}
251+
} else {
252+
currentNode.val = (currentNode.val || '') + (tagExp || '');
253+
}
254+
255+
i = closeIndex + 2;
256+
}else {//Opening tag
257+
const closeIndex = closingIndexForOpeningTag(xmlData, i)
258+
//const closeIndex = xmlData.indexOf(">",i);
259+
let tagExp = xmlData.substring(i + 1,closeIndex);
260+
const separatorIndex = tagExp.indexOf(" ");
261+
let tagName = tagExp;
262+
if(separatorIndex !== -1){
263+
tagName = tagExp.substr(0, separatorIndex);
264+
tagExp = tagExp.substr(separatorIndex + 1);
265+
}
266+
267+
if(options.ignoreNameSpace){
268+
const colonIndex = tagName.indexOf(":");
269+
if(colonIndex !== -1){
270+
tagName = tagName.substr(colonIndex+1);
271+
}
272+
}
273+
274+
//save text to parent node
275+
if (currentNode && textData) {
276+
if(currentNode.tagname !== '!xml'){
277+
currentNode.val = util.getValue(currentNode.val) + '' + processTagValue( currentNode.tagname, textData, options);
278+
}
279+
}
280+
281+
if(tagExp.lastIndexOf("/") === tagExp.length - 1){//selfClosing tag
282+
283+
if(tagName[tagName.length - 1] === "/"){ //remove trailing '/'
284+
tagName = tagName.substr(0, tagName.length - 1);
285+
tagExp = tagName;
286+
}else{
287+
tagExp = tagExp.substr(0, tagExp.length - 1);
288+
}
289+
290+
const childNode = new xmlNode(tagName, currentNode, '');
291+
if(tagName !== tagExp){
292+
childNode.attrsMap = buildAttributesMap(tagExp, options);
293+
}
294+
currentNode.addChild(childNode);
295+
}else{//opening tag
296+
297+
const childNode = new xmlNode( tagName, currentNode );
298+
if (options.stopNodes.length && options.stopNodes.includes(childNode.tagname)) {
299+
childNode.startIndex=closeIndex;
300+
}
301+
if(tagName !== tagExp){
302+
childNode.attrsMap = buildAttributesMap(tagExp, options);
303+
}
304+
currentNode.addChild(childNode);
305+
currentNode = childNode;
306+
}
307+
textData = "";
308+
i = closeIndex;
309+
}
310+
}else{
311+
textData += xmlData[i];
312+
}
313+
}
314+
return xmlObj;
315+
}
316+
317+
function closingIndexForOpeningTag(data, i){
318+
let attrBoundary;
319+
for (let index = i; index < data.length; index++) {
320+
let ch = data[index];
321+
if (attrBoundary) {
322+
if (ch === attrBoundary) attrBoundary = "";//reset
323+
} else if (ch === '"' || ch === "'") {
324+
attrBoundary = ch;
325+
} else if (ch === '>') {
326+
return index
327+
}
328+
}
329+
}
330+
252331
exports.getTraversalObj = getTraversalObj;

0 commit comments

Comments
 (0)