Skip to content
This repository was archived by the owner on Apr 22, 2020. It is now read-only.

Commit ac34dec

Browse files
author
mikesamuel
committed
Fixed decoration of contents of style nodes in HTML. Consolidated code that handles nested languages in the process.
1 parent 325728e commit ac34dec

File tree

5 files changed

+94
-65
lines changed

5 files changed

+94
-65
lines changed

CHANGES.html

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ <h2>6 Jan 2009</h2>
7070
<a
7171
href="http://code.google.com/p/google-code-prettify/issues/detail?id=58"
7272
>issue 58</a>.
73+
<li>Changed HTML lexer to use the same embedded source mechanism as the
74+
wiki language handler, and changed to use the registered
75+
CSS handler for STYLE element content.
7376
</ul>
7477
</body>
7578
</html>

README.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ <h3>How do I put line numbers in my code?</h3>
154154
<div class="footer">
155155
<!-- Created: Tue Oct 3 17:51:56 PDT 2006 -->
156156
<!-- hhmts start -->
157-
Last modified: Fri Jul 4 20:49:30 PDT 2008
157+
Last modified: Wed Jan 7 13:25:42 PST 2009
158158
<!-- hhmts end -->
159159
</div>
160160
</body>

src/lang-wiki.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ PR.registerLangHandler(
3838
[PR.PR_LITERAL, /^(?:[A-Z][a-z][a-z0-9]+[A-Z][a-z][a-zA-Z0-9]+)\b/
3939
],
4040
// A preformatted block in an unknown language
41-
['lang-', /^\{\{\{([\s\S]*?)\}\}\}/],
41+
['lang-', /^\{\{\{([\s\S]+?)\}\}\}/],
4242
// A block of source code in an unknown language
4343
['lang-', /^`([^\r\n`]+)`/],
4444
// An inline URL.

src/prettify.js

Lines changed: 69 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,11 @@ function _pr_isIE6() {
209209
* literal in a syntactically legal javascript program, and I've removed the
210210
* "in" keyword since it's not a keyword in many languages, and might be used
211211
* as a count of inches.
212+
*
213+
* <p>The link a above does not accurately describe EcmaScript rules since
214+
* it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works
215+
* very well in practice.
216+
*
212217
* @private
213218
*/
214219
var REGEXP_PRECEDER_PATTERN = function () {
@@ -241,8 +246,7 @@ function _pr_isIE6() {
241246
// CAVEAT: this does not properly handle the case where a regular
242247
// expression immediately follows another since a regular expression may
243248
// have flags for case-sensitivity and the like. Having regexp tokens
244-
// adjacent is not
245-
// valid in any language I'm aware of, so I'm punting.
249+
// adjacent is not valid in any language I'm aware of, so I'm punting.
246250
// TODO: maybe style special characters inside a regexp as punctuation.
247251
}();
248252

@@ -467,8 +471,9 @@ function _pr_isIE6() {
467471
// tag.
468472
var name = match.match(pr_tagNameRe)[2];
469473
var depth = 1;
474+
var j;
470475
end_tag_loop:
471-
for (var j = i + 1; j < n; ++j) {
476+
for (j = i + 1; j < n; ++j) {
472477
var name2 = matches[j].match(pr_tagNameRe);
473478
if (name2 && name2[2] === name) {
474479
if (name2[1] === '/') {
@@ -509,6 +514,23 @@ function _pr_isIE6() {
509514
.match(/[cC][lL][aA][sS][sS]=\"[^\"]*\bnocode\b/);
510515
}
511516

517+
/**
518+
* Apply the given language handler to sourceCode and add the resulting
519+
* decorations to out.
520+
* @param {number} offset the index of sourceCode within the chunk of source
521+
* whose decorations are already present on out.
522+
*/
523+
function appendDecorations(offset, sourceCode, langHandler, out) {
524+
if (!sourceCode) { return; }
525+
var decorations = langHandler.call({}, sourceCode);
526+
if (offset) {
527+
for (var i = decorations.length; (i -= 2) >= 0;) {
528+
decorations[i] += offset;
529+
}
530+
}
531+
out.push.apply(out, decorations);
532+
}
533+
512534
/** Given triples of [style, pattern, context] returns a lexing function,
513535
* The lexing function interprets the patterns to find token boundaries and
514536
* returns a decoration list of the form
@@ -526,6 +548,17 @@ function _pr_isIE6() {
526548
* E.g., if style is 'lang-lisp', and group 1 contains the text
527549
* '(hello (world))', then that portion of the token will be passed to the
528550
* registered lisp handler for formatting.
551+
* The text before and after group 1 will be restyled using this decorator
552+
* so decorators should take care that this doesn't result in infinite
553+
* recursion. For example, the HTML lexer rule for SCRIPT elements looks
554+
* something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match
555+
* '<script>foo()<\/script>', which would cause the current decorator to
556+
* be called with '<script>' which would not match the same rule since
557+
* group 1 must not be empty, so it would be instead styled as PR_TAG by
558+
* the generic tag rule. The handler registered for the 'js' extension would
559+
* then be called with 'foo()', and finally, the current decorator would
560+
* be called with '<\/script>' which would not match the original rule and
561+
* so the generic tag rule would identify it as a tag.
529562
*
530563
* Pattern must only match prefixes, and if it matches a prefix and context
531564
* is null or matches the last non-comment token parsed, then that match is
@@ -564,7 +597,7 @@ function _pr_isIE6() {
564597
var nPatterns = fallthroughStylePatterns.length;
565598
var notWs = /\S/;
566599

567-
return function (sourceCode, opt_basePos) {
600+
return function decorate(sourceCode, opt_basePos) {
568601
opt_basePos = opt_basePos || 0;
569602
var decorations = [opt_basePos, PR_PLAIN];
570603
var lastToken = '';
@@ -603,31 +636,36 @@ function _pr_isIE6() {
603636
}
604637
}
605638

606-
if (!match || !match[1] || 'lang-' !== style.substring(0, 5)) {
639+
var isEmbedded = 'lang-' === style.substring(0, 5);
640+
if (isEmbedded && !(match && match[1])) {
641+
isEmbedded = false;
642+
style = PR_SOURCE;
643+
}
644+
if (!isEmbedded) {
607645
decorations.push(opt_basePos + pos, style);
608646
} else { // Treat group 1 as an embedded block of source code.
609-
var lang = style.substring(5);
610647
var embeddedSource = match[1];
611648
var embeddedSourceStart = token.indexOf(embeddedSource);
612649
var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length;
613-
if (embeddedSourceStart) {
614-
decorations.push(opt_basePos + pos, PR_SOURCE);
615-
}
650+
var lang = style.substring(5);
616651
if (!langHandlerRegistry.hasOwnProperty(lang)) {
617652
lang = /^\s*</.test(embeddedSource)
618653
? 'default-markup'
619-
: 'default-code'
620-
}
621-
var delegate = langHandlerRegistry[lang];
622-
var embeddedOffset = opt_basePos + pos + embeddedSourceStart;
623-
var embeddedDecorations = delegate.call({}, embeddedSource);
624-
for (var i = 0, n = embeddedDecorations.length; i < n; i += 2) {
625-
decorations.push(embeddedOffset + embeddedDecorations[i],
626-
embeddedDecorations[i + 1]);
627-
}
628-
if (embeddedSourceEnd < token.length) {
629-
decorations.push(opt_basePos + pos + embeddedSourceEnd, PR_SOURCE);
654+
: 'default-code';
630655
}
656+
var size = decorations.length - 10;
657+
appendDecorations(
658+
opt_basePos + pos,
659+
token.substring(0, embeddedSourceStart),
660+
decorate, decorations);
661+
appendDecorations(
662+
opt_basePos + pos + embeddedSourceStart,
663+
token.substring(embeddedSourceStart, embeddedSourceEnd),
664+
langHandlerRegistry[lang], decorations);
665+
appendDecorations(
666+
opt_basePos + pos + embeddedSourceEnd,
667+
token.substring(embeddedSourceEnd),
668+
decorate, decorations);
631669
}
632670
pos += token.length;
633671
tail = tail.substring(token.length);
@@ -638,14 +676,18 @@ function _pr_isIE6() {
638676
}
639677

640678
var PR_MARKUP_LEXER = createSimpleLexer([], [
641-
[PR_PLAIN, /^[^<]+/, null],
679+
[PR_PLAIN, /^[^<?]+/, null],
642680
[PR_DECLARATION, /^<!\w[^>]*(?:>|$)/, null],
643681
[PR_COMMENT, /^<!--[\s\S]*?(?:-->|$)/, null],
644-
[PR_SOURCE, /^<\?[\s\S]*?(?:\?>|$)/, null],
645-
[PR_SOURCE, /^<%[\s\S]*?(?:%>|$)/, null],
646-
[PR_SOURCE,
647-
// Tags whose content is not escaped, and which contain source code.
648-
/^<(script|style|xmp)\b[^>]*>[\s\S]*?<\/\1\b[^>]*>/i, null],
682+
// Unescaped content in an unknown language
683+
['lang-', /^<\?([\s\S]+?)(?:\?>|$)/, null],
684+
['lang-', /^<%([\s\S]+?)(?:%>|$)/, null],
685+
[PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/, null],
686+
['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i, null],
687+
// Unescaped content in javascript. (Or possibly vbscript).
688+
['lang-js', /^<script\b[^>]*>([\s\S]+?)<\/script\b[^>]*>/i, null],
689+
// Contains unescaped stylesheet content
690+
['lang-css', /^<style\b[^>]*>([\s\S]+?)<\/style\b[^>]*>/i, null],
649691
[PR_TAG, /^<\/?\w[^<>]*>/, null]
650692
]);
651693
// Splits any of the source|style|xmp entries above into a start tag,
@@ -856,29 +898,6 @@ function _pr_isIE6() {
856898
regexLiterals: true
857899
});
858900

859-
/** identify regions of markup that are really source code, and recursivley
860-
* lex them.
861-
* @private
862-
*/
863-
function splitSourceNodes(source, decorations) {
864-
for (var i = 0; i < decorations.length; i += 2) {
865-
var style = decorations[i + 1];
866-
if (style === PR_SOURCE) {
867-
// Recurse using the non-markup lexer
868-
var start, end;
869-
start = decorations[i];
870-
end = i + 2 < decorations.length ? decorations[i + 2] : source.length;
871-
var subDecorations = decorateSource(source.substring(start, end));
872-
for (var j = 0, m = subDecorations.length; j < m; j += 2) {
873-
subDecorations[j] += start;
874-
}
875-
spliceArrayInto(subDecorations, decorations, i, 2);
876-
i += subDecorations.length - 2;
877-
}
878-
}
879-
return decorations;
880-
}
881-
882901
/** identify attribute values that really contain source code and recursively
883902
* lex them.
884903
* @private
@@ -969,7 +988,6 @@ function _pr_isIE6() {
969988
// (PR_TAG, PR_PLAIN, PR_SOURCE, NAME, VALUE, null)
970989
var decorations = tokenizeMarkup(sourceCode);
971990
decorations = splitTagAttributes(sourceCode, decorations);
972-
decorations = splitSourceNodes(sourceCode, decorations);
973991
decorations = splitSourceAttributes(sourceCode, decorations);
974992
return decorations;
975993
}
@@ -1189,7 +1207,7 @@ function _pr_isIE6() {
11891207
document.getElementsByTagName('xmp') ];
11901208
var elements = [];
11911209
for (var i = 0; i < codeSegments.length; ++i) {
1192-
for (var j = 0; j < codeSegments[i].length; ++j) {
1210+
for (var j = 0, n = codeSegments[i].length; j < n; ++j) {
11931211
elements.push(codeSegments[i][j]);
11941212
}
11951213
}

tests/prettify_test.html

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,7 @@ <h1>HTML</h1>
497497
&lt;html&gt;
498498
&lt;head>
499499
&lt;title&gt;Fibonacci number&lt;/title&gt;
500+
&lt;style&gt;&lt;!-- BODY { text-decoration: blink } --&gt;&lt;/style&gt;
500501
&lt;/head&gt;
501502
&lt;body>
502503
&lt;noscript&gt;
@@ -1533,14 +1534,20 @@ <h1>CSS w/ language specified</h1>
15331534
html: (
15341535
'`PUN&lt;`END`TAGhtml`END`PUN&gt;`END`PLN<br>' +
15351536
'&nbsp; `END`PUN&lt;`END`TAGhead`END`PUN&gt;`END`PLN<br>' +
1536-
'&nbsp; &nbsp; `END`PUN&lt;`END`TAGtitle`END`PUN&gt;`END`PLNFibonacci number`END' +
1537+
'&nbsp; &nbsp; `END`PUN&lt;`END`TAGtitle`END`PUN&gt;`END' +
1538+
'`PLNFibonacci number`END' +
15371539
'`PUN&lt;/`END`TAGtitle`END`PUN&gt;`END`PLN<br>' +
1540+
'&nbsp; &nbsp; `END`PUN&lt;`END`TAGstyle`END`PUN&gt;`END`COM&lt;!--`END' +
1541+
'`PLN BODY `END`PUN{`END`PLN `END`KWDtext-decoration`END`PUN:`END' +
1542+
'`PLN blink `END`PUN}`END`PLN `END`COM--&gt;`END`PUN&lt;/`END' +
1543+
'`TAGstyle`END`PUN&gt;`END`PLN<br>' +
15381544
'&nbsp; `END`PUN&lt;/`END`TAGhead`END`PUN&gt;`END`PLN<br>' +
15391545
'&nbsp; `END`PUN&lt;`END`TAGbody`END`PUN&gt;`END`PLN<br>' +
15401546
'&nbsp; &nbsp; `END`PUN&lt;`END`TAGnoscript`END`PUN&gt;`END`PLN<br>' +
15411547
'&nbsp; &nbsp; &nbsp; `END`PUN&lt;`END`TAGdl`END`PUN&gt;`END`PLN<br>' +
15421548
'&nbsp; &nbsp; &nbsp; &nbsp; `END`PUN&lt;`END`TAGdt`END`PUN&gt;`END' +
1543-
'`PLNFibonacci numbers`END`PUN&lt;/`END`TAGdt`END`PUN&gt;`END`PLN<br>' +
1549+
'`PLNFibonacci numbers`END`PUN&lt;/`END`TAGdt`END`PUN&gt;`END' +
1550+
'`PLN<br>' +
15441551
'&nbsp; &nbsp; &nbsp; &nbsp; `END`PUN&lt;`END`TAGdd`END`PUN&gt;`END`PLN1`END' +
15451552
'`PUN&lt;/`END`TAGdd`END`PUN&gt;`END`PLN<br>' +
15461553
'&nbsp; &nbsp; &nbsp; &nbsp; `END`PUN&lt;`END`TAGdd`END`PUN&gt;`END`PLN1`END' +
@@ -2307,31 +2314,31 @@ <h1>CSS w/ language specified</h1>
23072314
'`STRhttp://www.google.com/?q=WikiSyntax+site:code.google.com`END' +
23082315
'`PLN `END`LITWikiSyntax`END`PUN]`END`PLN<br>' +
23092316
'<br>' +
2310-
'Lorem Ipsum `END`SRC``END`KWDwhile`END`PLN `END`PUN(`END`LIT1`END' +
2317+
'Lorem Ipsum ``END`KWDwhile`END`PLN `END`PUN(`END`LIT1`END' +
23112318
'`PUN)`END`PLN `END`KWDprint`END`PUN(`END`STR"blah blah"`END' +
2312-
'`PUN);`END`SRC``END`PLN<br>' +
2319+
'`PUN);`END`PLN`<br>' +
23132320
'<br>' +
23142321
'&nbsp; &nbsp;`END`PUN*`END`PLN Bullet<br>' +
23152322
'&nbsp; &nbsp;`END`PUN*`END`PLN Points<br>' +
23162323
'&nbsp; &nbsp; &nbsp; `END`PUN*`END`PLN `END`LITNestedBullet`END' +
23172324
'`PLN<br>' +
23182325
'<br>' +
23192326
'`END`PUN==`END`LITDroningOnAndOn`END`PUN==`END`PLN<br>' +
2320-
'`END`SRC{{{`END`PLN<br>' +
2327+
'{{{<br>' +
23212328
'&nbsp; `END`COM// Some EmbeddedSourceCode`END`PLN<br>' +
23222329
'&nbsp; `END`KWDvoid`END`PLN main`END`PUN()`END`PLN `END`PUN{`END' +
23232330
'`PLN<br>' +
23242331
'&nbsp; &nbsp; `END`TYPPrint`END`PUN(`END`STR\'hello world\'`END' +
23252332
'`PUN);`END`PLN<br>' +
23262333
'&nbsp; `END`PUN}`END`PLN<br>' +
2327-
'`END`SRC}}}`END`PLN<br>' +
2334+
'}}}<br>' +
23282335
'<br>' +
2329-
'`END`SRC{{{`END`PLN<br>' +
2336+
'{{{<br>' +
23302337
'&nbsp; `END`COM&lt;!-- Embedded XML --&gt;`END`PLN<br>' +
23312338
'&nbsp; `END`PUN&lt;`END`TAGfoo`END`PLN `END`ATNbar`END`PUN=`END' +
23322339
'`ATV"baz"`END`PUN&gt;&lt;`END`TAGboo`END`PLN `END' +
23332340
'`PUN/&gt;&lt;`END`TAGfoo`END`PUN&gt;`END`PLN<br>' +
2334-
'`END`SRC}}}`END'
2341+
'}}}`END'
23352342
),
23362343
css: (
23372344
'`COM&lt;!--`END`PLN<br>' +
@@ -2342,8 +2349,8 @@ <h1>CSS w/ language specified</h1>
23422349
'`PUN);`END`PLN<br>' +
23432350
'<br>' +
23442351
'HTML `END`PUN{`END`PLN `END`KWDcontent-before`END`PUN:`END`PLN `END' +
2345-
'`STR\'hello\20\'`END`PUN;`END`PLN `END`KWDcontent-after`END' +
2346-
'`PUN:`END`PLN `END`STR\'w\6f rld\'`END`PUN;`END`PLN<br>' +
2352+
'`STR\'hello\\20\'`END`PUN;`END`PLN `END`KWDcontent-after`END' +
2353+
'`PUN:`END`PLN `END`STR\'w\\6f rld\'`END`PUN;`END`PLN<br>' +
23472354
'&nbsp; &nbsp; &nbsp; &nbsp;`END`KWD-moz-spiff`END`PUN:`END`PLN `END' +
23482355
'`KWDinherit`END`PLN `END`KWD!important`END`PLN `END`PUN}`END' +
23492356
'`PLN<br>' +
@@ -2457,8 +2464,9 @@ <h1>CSS w/ language specified</h1>
24572464
htmlOut.push('<h2>' + html(lang) + ' OK<\/h1>');
24582465
}
24592466
}
2460-
htmlOut.push('<h2>' + (failures ? (failures + ' test(s) failed') : 'Pass') +
2461-
'<\/h2>');
2467+
var summary = (failures ? (failures + ' test(s) failed') : 'Tests Passed');
2468+
htmlOut.push('<h2>' + summary + '<\/h2>');
2469+
document.title += ' \u2014 ' + summary;
24622470
document.getElementById('errorReport').innerHTML =
24632471
htmlOut.join('').replace(/&lt;br&gt;/g, '&lt;br&gt;\n');
24642472
}

0 commit comments

Comments
 (0)