Skip to content
This repository was archived by the owner on Apr 22, 2020. It is now read-only.

Commit f320c7a

Browse files
author
mikesamuel
committed
<br>s were not being considered as ending a line comment, and HTML comments were not being stripped before lexing.
1 parent 81a8ec2 commit f320c7a

File tree

2 files changed

+54
-18
lines changed

2 files changed

+54
-18
lines changed

src/prettify.js

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ var PR_innerHtmlWorks = null;
321321
function PR_getInnerHtml(node) {
322322
// inner html is hopelessly broken in Safari 2.0.4 when the content is
323323
// an html description of well formed XML and the containing tag is a PRE
324-
// tag, so we detect that case and emulate innerHTML.
324+
// tag, so we detect that case and emulate innerHTML.
325325
if (null == PR_innerHtmlWorks) {
326326
var testNode = document.createElement('PRE');
327327
testNode.appendChild(
@@ -435,18 +435,20 @@ function PR_expandTabs(chunks, tabWidth) {
435435
}
436436

437437
/** split markup into chunks of html tags (style null) and
438-
* plain text (style {@link #PR_PLAIN}).
438+
* plain text (style {@link #PR_PLAIN}), converting tags which are significant
439+
* for tokenization (<br>) into their textual equivalent.
439440
*
440-
* @param {String} s html.
441+
* @param {String} s html where whitespace is considered significant.
441442
* @return {Array} of PR_Tokens of style PR_PLAIN, and null.
442443
* @private
443444
*/
444445
function PR_chunkify(s) {
445446
// The below pattern matches one of the following
446447
// (1) /[^<]+/ : A run of characters other than '<'
447-
// (2) /<\/?[a-zA-Z][^>]*>/ : A probably tag that should not be highlighted
448-
// (3) /</ : A '<' that does not begin a larger chunk. Treated as 1
449-
var chunkPattern = /(?:[^<]+|<\/?[a-zA-Z][^>]*>|<)/g;
448+
// (2) /<!--.*?-->/: an HTML comment
449+
// (3) /<\/?[a-zA-Z][^>]*>/ : A probably tag that should not be highlighted
450+
// (4) /</ : A '<' that does not begin a larger chunk. Treated as 1
451+
var chunkPattern = /(?:[^<]+|<!--.*?-->|<\/?[a-zA-Z][^>]*>|<)/g;
450452
// since the pattern has the 'g' modifier and defines no capturing groups,
451453
// this will return a list of all chunks which we then classify and wrap as
452454
// PR_Tokens
@@ -456,15 +458,24 @@ function PR_chunkify(s) {
456458
var lastChunk = null;
457459
for (var i = 0, n = matches.length; i < n; ++i) {
458460
var chunkText = matches[i];
459-
var style;
460-
if (chunkText.length < 2 || chunkText.charAt(0) !== '<') {
461-
if (lastChunk && lastChunk.style === PR_PLAIN) {
462-
lastChunk.token += chunkText;
463-
continue;
461+
if (!chunkText.length) { continue; }
462+
var style = PR_PLAIN;
463+
if (chunkText.charAt(0) === '<') {
464+
if (/^<!--/.test(chunkText)) { continue; }
465+
if (chunkText.length > 1) { // a tag
466+
if (/^<br\b/i.test(chunkText)) {
467+
// <br> tags are lexically significant so convert them to text.
468+
// This is undone later.
469+
chunkText = '\n';
470+
} else {
471+
style = null;
472+
}
464473
}
465-
style = PR_PLAIN;
466-
} else { // a tag
467-
style = null;
474+
}
475+
if (lastChunk && style == PR_PLAIN && lastChunk.style === PR_PLAIN) {
476+
// combine into last chunk
477+
lastChunk.token += chunkText;
478+
continue;
468479
}
469480
lastChunk = new PR_Token(chunkText, style);
470481
chunks.push(lastChunk);
@@ -1104,7 +1115,7 @@ function PR_splitSourceNodes(tokens) {
11041115
if (ci < nc) {
11051116
tok = tokens[ci];
11061117
if (null == tok.style) {
1107-
tokens.push(tok);
1118+
tokensOut.push(tok);
11081119
continue;
11091120
}
11101121
} else if (!endScriptTag) {
@@ -1251,9 +1262,9 @@ function PR_splitAttributeQuotes(tokens) {
12511262
tokensOut.push(tokens[i]);
12521263
}
12531264
if (lc) {
1254-
tokens.push(new PR_Token(ls.substring(0, lpos), PR_PLAIN));
1265+
tokensOut.push(new PR_Token(ls.substring(0, lpos), PR_PLAIN));
12551266
} else {
1256-
tokens.push(tokens[lastPlain]);
1267+
tokensOut.push(tokens[lastPlain]);
12571268
}
12581269
}
12591270
if (lc) {
@@ -1449,6 +1460,8 @@ function prettyPrintOne(s) {
14491460
// It's necessary for IE though which seems to lose the preformattedness
14501461
// of <pre> tags when their innerHTML is assigned.
14511462
// http://stud3.tuwien.ac.at/~e0226430/innerHtmlQuirk.html
1463+
// and it serves to undo the conversion of <br>s to newlines done in
1464+
// chunkify.
14521465
html = html
14531466
.replace(/(\r\n?|\n| ) /g, '$1&nbsp;')
14541467
.replace(/\r\n?|\n/g, '<br>');

tests/prettify_test.html

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,18 @@ <h1>Bug 8 - tabs mangled</h1>
403403
<b>eleven</b>&#9;Twelve&#9;<b>thirteen</b>&#9;Fourteen&#9;fifteen&#9;|
404404
</pre>
405405

406+
<h1>Bug 14a - does not recognize <code>&lt;br&gt;</code> as newline</h1>
407+
<pre class="prettyprint" id="issue14a"
408+
>//comment<br />int main(int argc, char **argv)
409+
{}</pre>
410+
411+
<h1>Bug 14b - comments not ignored</h1>
412+
<pre class="prettyprint" id="issue14b"
413+
>&lt;!-- There's an <!-- BOO!! --><acronym title="tag soup">HTML</acronym> comment in my comment --&gt;
414+
&lt;p&gt;And another one inside the end tag&lt;/p<!-- GOTCHA!! -->&gt;
415+
</pre>
416+
417+
406418
</body>
407419

408420
<script type="text/javascript">
@@ -927,7 +939,18 @@ <h1>Bug 8 - tabs mangled</h1>
927939
'<br>' +
928940
'`END`COM// &amp; not used as prefix operator in javascript but this ' +
929941
'should still work`END`PLN<br>' +
930-
'`END`PUN&amp;`END`STR/foo/`END`PUN;`END')
942+
'`END`PUN&amp;`END`STR/foo/`END`PUN;`END'),
943+
issue14a: (
944+
'`COM//comment`END`PLN<br>' +
945+
'`END`KWDint`END`PLN main`END`PUN(`END`KWDint`END`PLN argc`END`PUN,`END' +
946+
'`PLN `END`KWDchar`END`PLN `END`PUN**`END`PLNargv`END`PUN)`END' +
947+
'`PLN<br>' +
948+
'`END`PUN{}`END'),
949+
issue14b: (
950+
'`COM&lt;!-- There\'s an `END<acronym title="tag soup">`COMHTML`END' +
951+
'</acronym>`COM comment in my comment --&gt;`END`PLN<br>' +
952+
'`END`TAG&lt;p&gt;`END`PLNAnd another one inside the end tag`END' +
953+
'`TAG&lt;/p&gt;`END')
931954
};
932955

933956

0 commit comments

Comments
 (0)