Let the parser handle deserializing html entities

Sebmaster · domenic · commit 92e5c656381a · 2014-07-09T14:44:04.000-07:00
diff --git a/lib/jsdom/browser/htmltodom.js b/lib/jsdom/browser/htmltodom.js
@@ -63,7 +63,8 @@ function HtmlToDom(parser) {
         var parserInstance = new parser.Parser(handler, {
           xmlMode: isXML,
           lowerCaseTags: !isXML,
-          lowerCaseAttributeNames: !isXML
+          lowerCaseAttributeNames: !isXML,
+          decodeEntities: true
         });
 
         parserInstance.includeLocation = false;
@@ -175,10 +176,8 @@ function setChild(parent, node) {
     break;
 
     case 'text':
-      // Decode HTML entities if we're not inside a <script> or <style> tag:
-      newNode = currentDocument.createTextNode(/^(?:script|style)$/i.test(parent.nodeName) ?
-                                                   node.data :
-                                                   HTMLDecode(node.data));
+      // HTML entities should already be decoded by the parser, so no need to decode them
+      newNode = currentDocument.createTextNode(node.data);
     break;
 
     case 'comment':
@@ -198,7 +197,7 @@ function setChild(parent, node) {
       // catchin errors here helps with improperly escaped attributes
       // but properly fixing parent should (can only?) be done in the htmlparser itself
       try {
-        newNode.setAttribute(c, HTMLDecode(node.attribs[c]));
+        newNode.setAttribute(c, node.attribs[c]);
       } catch(e2) { /* noop */ }
     }
   }