Skip to content

Commit 0c2b489

Browse files
committed
remove fake calls
separate tag value processing to separate function make HTML decoding optional give an option to allow boolean attributes
1 parent 283867c commit 0c2b489

File tree

3 files changed

+184
-74
lines changed

3 files changed

+184
-74
lines changed

spec/attr_spec.js

Lines changed: 72 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,78 @@ describe("XMLParser", function () {
4848
result = validator.validate(xmlData);
4949
expect(result).toBe(true);
5050
});
51-
//1. can start with _, or letter
52-
//2. can contain :,-,_,.,a-z,a-Z,0-9
51+
52+
it("should not decode HTML entities / char by default", function () {
53+
var xmlData = '<element id="7" data="foo\nbar" bug="foo&ampbar&apos;"/>';
54+
var expected = {
55+
"element": {
56+
"id" : 7,
57+
"data" : "foo bar",
58+
"bug" : "foo&ampbar&apos;",
59+
}
60+
};
61+
62+
var result = parser.parse(xmlData, {
63+
attributeNamePrefix:"",
64+
ignoreAttributes: false,
65+
parseAttributeValue: true
66+
});
67+
68+
//console.log(JSON.stringify(result,null,4));
69+
expect(result).toEqual(expected);
70+
71+
result = validator.validate(xmlData);
72+
expect(result).toBe(true);
73+
});
74+
75+
it("should decode HTML entities / char", function () {
76+
var xmlData = '<element id="7" data="foo\nbar" bug="foo&ampbar&apos;"/>';
77+
var expected = {
78+
"element": {
79+
"id" : 7,
80+
"data" : "foo bar",
81+
"bug" : "foo&ampbar'",
82+
}
83+
};
84+
85+
var result = parser.parse(xmlData, {
86+
attributeNamePrefix:"",
87+
ignoreAttributes: false,
88+
parseAttributeValue: true,
89+
decodeHTMLchar: true
90+
});
91+
92+
//console.log(JSON.stringify(result,null,4));
93+
expect(result).toEqual(expected);
94+
95+
result = validator.validate(xmlData);
96+
expect(result).toBe(true);
97+
});
98+
99+
it("should parse Boolean Attributes", function () {
100+
var xmlData = '<element id="7" data/>';
101+
var expected = {
102+
"element": {
103+
"id" : 7,
104+
"data" : true
105+
}
106+
};
107+
108+
var result = parser.parse(xmlData, {
109+
attributeNamePrefix:"",
110+
ignoreAttributes: false,
111+
parseAttributeValue: true,
112+
allowBooleanAttributes : true
113+
});
114+
115+
//console.log(JSON.stringify(result,null,4));
116+
expect(result).toEqual(expected);
117+
118+
result = validator.validate(xmlData,{
119+
allowBooleanAttributes: true
120+
});
121+
expect(result).toBe(true);
122+
});
53123

54124

55125
it("should not parse attributes with name start with number", function () {
@@ -171,16 +241,6 @@ describe("XMLParser", function () {
171241
expect(result).toEqual(expected);
172242
});
173243

174-
it("should validate a tag with boolean attribute if allowed ", function () {
175-
var xmlData = "<rootNode ab cd='ef'></rootNode>";
176-
177-
var result = validator.validate(xmlData,{
178-
allowBooleanAttributes: true
179-
});
180-
//console.log(JSON.stringify(result,null,4));
181-
expect(result).toBe(true);
182-
});
183-
184244
it("should not validate xml with invalid attributes presents without value", function () {
185245
var xmlData = "<rootNode 123 abc='123' bc='567' />";
186246
var expected = {

spec/xmlParser_spec.js

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ var parser = require("../src/parser");
22

33
describe("XMLParser", function () {
44

5-
it("should parse all values as string, int, boolean or float", function () {
5+
/* it("should parse all values as string, int, boolean or float", function () {
66
var xmlData = "<rootNode><tag>value</tag><boolean>true</boolean><intTag>045</intTag><floatTag>65.34</floatTag></rootNode>";
77
var expected = {
88
"rootNode": {
@@ -471,7 +471,7 @@ describe("XMLParser", function () {
471471
});
472472
//console.log(JSON.stringify(result,null,4));
473473
expect(result).toEqual(expected);
474-
});
474+
}); */
475475

476476
/* it("should parse nodes as arrays", function () {
477477
var fs = require("fs");
@@ -547,7 +547,7 @@ describe("XMLParser", function () {
547547
expect(result).toEqual(expected);
548548
}); */
549549

550-
it("should skip namespace", function () {
550+
/* it("should skip namespace", function () {
551551
var xmlData = '<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" >'
552552
+' <soapenv:Header>'
553553
+' <cor:applicationID>dashboardweb</cor:applicationID>'
@@ -579,22 +579,59 @@ describe("XMLParser", function () {
579579
580580
var result = parser.parse(xmlData,{ ignoreNameSpace : true});
581581
expect(result).toEqual(expected);
582-
});
582+
}); */
583583

584-
/* it("should not trim tag value if not allowed ", function () {
584+
it("should not trim tag value if not allowed ", function () {
585585
var xmlData = "<rootNode> 123 </rootNode>";
586586
var expected = {
587587
"rootNode": " 123 "
588588
};
589589
var result = parser.parse(xmlData,{
590590
parseNodeValue: false,
591591
trimValues: false
592-
}).json;
592+
});
593593
//console.log(JSON.stringify(result,null,4));
594594
expect(result).toEqual(expected);
595-
}); */
595+
});
596+
597+
it("should not trim tag value but not parse if not allowed ", function () {
598+
var xmlData = "<rootNode> 123 </rootNode>";
599+
var expected = {
600+
"rootNode": "123"
601+
};
602+
var result = parser.parse(xmlData,{
603+
parseNodeValue: false,
604+
});
605+
//console.log(JSON.stringify(result,null,4));
606+
expect(result).toEqual(expected);
607+
});
596608

597-
/* it("should validate XML with DOCTYPE", function () {
609+
it("should not decode HTML entities by default", function () {
610+
var xmlData = "<rootNode> foo&ampbar&apos; </rootNode>";
611+
var expected = {
612+
"rootNode": "foo&ampbar&apos;"
613+
};
614+
var result = parser.parse(xmlData,{
615+
parseNodeValue: false,
616+
});
617+
//console.log(JSON.stringify(result,null,4));
618+
expect(result).toEqual(expected);
619+
});
620+
621+
it("should decode HTML entities if allowed", function () {
622+
var xmlData = "<rootNode> foo&ampbar&apos; </rootNode>";
623+
var expected = {
624+
"rootNode": "foo&bar'"
625+
};
626+
var result = parser.parse(xmlData,{
627+
parseNodeValue: false,
628+
decodeHTMLchar: true
629+
});
630+
//console.log(JSON.stringify(result,null,4));
631+
expect(result).toEqual(expected);
632+
});
633+
634+
it("should validate XML with DOCTYPE", function () {
598635
var xmlData = '<?xml version="1.0" standalone="yes" ?>'
599636
+ '<!--open the DOCTYPE declaration -'
600637
+ ' the open square bracket indicates an internal DTD-->'
@@ -616,5 +653,7 @@ describe("XMLParser", function () {
616653
});
617654
//console.log(JSON.stringify(result,null,4));
618655
expect(result).toEqual(expected);
619-
}); */
656+
});
657+
658+
620659
});

src/parser.js

Lines changed: 64 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@ var defaultOptions = {
1515
textNodeName : "#text",
1616
ignoreAttributes : true,
1717
ignoreNameSpace : false,
18-
//allowBooleanAttributes : false, //a tag can have attributes without any value
18+
allowBooleanAttributes : false, //a tag can have attributes without any value
1919
//ignoreRootElement : false,
2020
parseNodeValue : true,
2121
parseAttributeValue : false,
2222
arrayMode : false,
2323
trimValues: true, //Trim string values of tag and attributes
24+
decodeHTMLchar: false,
25+
//decodeStrict: false,
2426
};
2527

2628
var buildOptions = function (options){
@@ -52,12 +54,6 @@ var getTraversalObj =function (xmlData,options){
5254
var xmlObj = new xmlNode('!xml');
5355
var currentNode = xmlObj;
5456

55-
//fake function calls to reduce coparisons
56-
var resolveNS = options.ignoreNameSpace ? resolveNameSpace : fakeCall;
57-
var buildAttributesMap = options.ignoreAttributes ? fakeCallNoReturn : buildAttributesArr;
58-
var parseNodeVal = options.parseNodeValue ? parseValue : noParse;
59-
var parseAttrVal = options.parseAttributeValue ? parseValue : noParse;
60-
6157
var tagsRegx = new RegExp("<((!\\[CDATA\\[(.*?)(\\]\\]>))|((\\w*:)?([\\w:\\-\\._]+))([^>]*)>|((\\/)((\\w*:)?([\\w:\\-\\._]+))>))([^<]*)","g");
6258
var tag = tagsRegx.exec(xmlData);
6359
var nextTag = tagsRegx.exec(xmlData);
@@ -68,23 +64,25 @@ var getTraversalObj =function (xmlData,options){
6864
if(tagType === TagType.CLOSING){
6965
//add parsed data to parent node
7066
if(currentNode.parent && tag[14]){
71-
currentNode.parent.val = util.getValue(currentNode.parent.val) + "" + parseNodeVal(he.decode(tag[14]),options);
67+
currentNode.parent.val = util.getValue(currentNode.parent.val) + "" + processTagValue(tag[14],options);
7268
}
7369

74-
//currentNode.parent.val = (currentNode.parent.val || "") + parseNodeVal(he.decode(tag[14]),options);
7570
currentNode = currentNode.parent;
7671
}else if(tagType === TagType.CDATA){
7772
//no attribute
7873
//add text to parent node
7974
//add parsed data to parent node
80-
currentNode.val = (currentNode.val || "") + (tag[3] || "") + parseNodeVal(he.decode(tag[14]),options);
75+
currentNode.val = (currentNode.val || "") + (tag[3] || "") + processTagValue(tag[14],options);
8176
}else if(tagType === TagType.SELF){
8277
var childNode = new xmlNode( options.ignoreNameSpace ? tag[7] : tag[5],currentNode, "");
83-
childNode.attrsMap = buildAttributesMap(tag[8],options,resolveNS,parseAttrVal);
78+
if(tag[8] && tag[8].length > 1){
79+
tag[8] = tag[8].substr(0,tag[8].length -1);
80+
}
81+
childNode.attrsMap = buildAttributesMap(tag[8],options);
8482
currentNode.addChild(childNode);
8583
}else{//TagType.OPENING
86-
var childNode = new xmlNode( options.ignoreNameSpace ? tag[7] : tag[5],currentNode,parseNodeVal(he.decode(tag[14]),options));
87-
childNode.attrsMap = buildAttributesMap(tag[8],options,resolveNS,parseAttrVal);
84+
var childNode = new xmlNode( options.ignoreNameSpace ? tag[7] : tag[5],currentNode,processTagValue(tag[14],options));
85+
childNode.attrsMap = buildAttributesMap(tag[8],options);
8886
currentNode.addChild(childNode);
8987
currentNode = childNode;
9088
}
@@ -96,7 +94,19 @@ var getTraversalObj =function (xmlData,options){
9694
return xmlObj;
9795
};
9896

97+
function processTagValue(val,options){
98+
if(val){
99+
if(options.trimValues){
100+
val = val.trim();
101+
}
102+
if(options.decodeHTMLchar){
103+
val = he.decode(val);
104+
}
105+
val = parseValue(val,options.parseNodeValue);
106+
}
99107

108+
return val;
109+
}
100110

101111
function checkForTagType(match){
102112
if(match[4] === "]]>"){
@@ -119,31 +129,23 @@ var xml2json = function (xmlData,options){
119129
};
120130

121131

122-
function resolveNameSpace(tagname){
123-
var tags = tagname.split(":");
124-
var prefix = tagname.charAt(0) === "/" ? "/" : "";
125-
if(tags.length === 2) {
126-
if(tags[0] === "xmlns") {
127-
return "";
132+
function resolveNameSpace(tagname,options){
133+
if(options.ignoreNameSpace ){
134+
var tags = tagname.split(":");
135+
var prefix = tagname.charAt(0) === "/" ? "/" : "";
136+
if(tags.length === 2) {
137+
if(tags[0] === "xmlns") {
138+
return "";
139+
}
140+
tagname = prefix + tags[1];
128141
}
129-
tagname = prefix + tags[1];
130142
}
131143
return tagname;
132-
}
133144

134-
function noParse(val){
135-
if(typeof val === "string"){
136-
return val;
137-
}else{
138-
return "";
139-
}
140145
}
141146

142-
function parseValue(val,options){
143-
if(typeof val === "string"){
144-
if(options.trimValues){
145-
val = val.trim();
146-
}
147+
function parseValue(val,shouldParse){
148+
if(shouldParse && typeof val === "string"){
147149
if(val.trim() === "" || isNaN(val)){
148150
val = val === "true" ? true : val === "false" ? false : val;
149151
}else{
@@ -161,32 +163,41 @@ function parseValue(val,options){
161163
}
162164

163165
//TODO: change regex to capture NS
164-
var attrsRegx = new RegExp("([\\w\\-\\.\\:]+)\\s*=\\s*(['\"])((.|\n)*?)\\2","gm");
165-
function buildAttributesArr(attrStr,options,resolveNS,parseAttrVal){
166-
if( typeof attrStr === "string" ){
166+
//var attrsRegx = new RegExp("([\\w\\-\\.\\:]+)\\s*=\\s*(['\"])((.|\n)*?)\\2","gm");
167+
var attrsRegx = new RegExp("([^\\s=]+)\\s*(=\\s*(['\"])(.*?)\\3)?","g");
168+
function buildAttributesMap(attrStr,options){
169+
if( !options.ignoreAttributes && typeof attrStr === "string" ){
167170
//attrStr = attrStr || attrStr.trim();
168171

169-
if( attrStr.length > 3){
170-
171-
var matches = util.getAllMatches(attrStr,attrsRegx);
172-
var len = matches.length; //don't make it inline
173-
var attrs = {};
174-
for (var i = 0; i < len ; i++) {
175-
var attrName = resolveNS(matches[i][1]);
176-
if(attrName.length && attrName !== "xmlns") {
177-
attrs[options.attributeNamePrefix + attrName] = parseAttrVal(matches[i][3],options);
172+
var matches = util.getAllMatches(attrStr,attrsRegx);
173+
var len = matches.length; //don't make it inline
174+
var attrs = {};
175+
for (var i = 0; i < len ; i++) {
176+
var attrName = resolveNameSpace(matches[i][1],options);
177+
if(attrName.length && attrName !== "xmlns") {
178+
if(matches[i][4]){
179+
if(options.trimValues){
180+
matches[i][4] = matches[i][4].trim();
181+
}
182+
if(options.decodeHTMLchar){
183+
matches[i][4] = he.decode(matches[i][4], {isAttributeValue : true});
184+
}
185+
attrs[options.attributeNamePrefix + attrName] = parseValue(matches[i][4],options.parseAttributeValue);
186+
}else if(options.allowBooleanAttributes){
187+
attrs[options.attributeNamePrefix + attrName] = true;
178188
}
189+
179190
}
180-
if(!Object.keys(attrs).length){
181-
return;
182-
}
183-
if(options.attrNodeName){
184-
var attrCollection = {};
185-
attrCollection[options.attrNodeName] = attrs;
186-
return attrCollection;
187-
}
188-
return attrs;
189191
}
192+
if(!Object.keys(attrs).length){
193+
return;
194+
}
195+
if(options.attrNodeName){
196+
var attrCollection = {};
197+
attrCollection[options.attrNodeName] = attrs;
198+
return attrCollection;
199+
}
200+
return attrs;
190201
}
191202
}
192203

0 commit comments

Comments
 (0)