Skip to content
This repository was archived by the owner on Apr 22, 2020. It is now read-only.

Commit df47ec5

Browse files
Added support for the HTML 5 <pre class=prettyprint><code class=language-foo>...</code></pre> convention.
1 parent 9466702 commit df47ec5

File tree

5 files changed

+189
-48
lines changed

5 files changed

+189
-48
lines changed

README.html

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,12 @@ <h3>How do I specify the language of my code?</h3>
126126
"xhtml", "xml", "xsl".
127127
&lt;/pre&gt;</pre>
128128

129+
<p>You may also use the
130+
<a href="http://dev.w3.org/html5/spec-author-view/the-code-element.html#the-code-element"
131+
>HTML 5</a> convention of embedding a <tt>code</tt> element inside the
132+
<code>PRE</code> and using <code>language-java</code> style classes.
133+
E.g. <xmp class="prettyprint"><pre class="prettyprint"><code class="language-java">...</code></pre></xmp>
134+
129135
<h3>It doesn't work on <tt>&lt;obfuscated code sample&gt;</tt>?</h3>
130136
<p>Yes. Prettifying obfuscated code is like putting lipstick on a pig
131137
&mdash; i.e. outside the scope of this tool.</p>
@@ -221,7 +227,7 @@ <h3>How can I customize the colors and styles of my code?</h3>
221227

222228
<div class="footer">
223229
<!-- Created: Tue Oct 3 17:51:56 PDT 2006 -->
224-
<!-- hhmts start -->Last modified: Fri May 27 16:05:12 PDT 2011 <!-- hhmts end -->
230+
<!-- hhmts start -->Last modified: Fri May 27 20:23:23 PDT 2011 <!-- hhmts end -->
225231
</div>
226232
</body>
227233
</html>

js-modules/prettify.js

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,32 @@ window['PR']
226226
out.push.apply(out, job.decorations);
227227
}
228228

229+
var notWs = /\S/;
230+
231+
/**
232+
* Given an element, if it contains only one child element and any text nodes
233+
* it contains contain only space characters, return the sole child element.
234+
* Otherwise returns undefined.
235+
* <p>
236+
* This is meant to return the CODE element in {@code <pre><code ...>} when
237+
* there is a single child element that contains all the non-space textual
238+
* content, but not to return anything where there are multiple child elements
239+
* as in {@code <pre><code>...</code><code>...</code></pre>} or when there
240+
* is textual content.
241+
*/
242+
function childContentWrapper(element) {
243+
var wrapper = undefined;
244+
for (var c = element.firstChild; c; c = c.nextSibling) {
245+
var type = c.nodeType;
246+
wrapper = (type === 1) // Element Node
247+
? (wrapper ? element : c)
248+
: (type === 3) // Text Node
249+
? (notWs.test(c.nodeValue) ? element : wrapper)
250+
: wrapper;
251+
}
252+
return wrapper === element ? undefined : wrapper;
253+
}
254+
229255
/** Given triples of [style, pattern, context] returns a lexing function,
230256
* The lexing function interprets the patterns to find token boundaries and
231257
* returns a decoration list of the form
@@ -299,7 +325,6 @@ window['PR']
299325
})();
300326

301327
var nPatterns = fallthroughStylePatterns.length;
302-
var notWs = /\S/;
303328

304329
/**
305330
* Lexes job.source and produces an output array job.decorations of style
@@ -744,20 +769,36 @@ window['PR']
744769
var k = 0;
745770
var prettyPrintingJob;
746771

772+
var langExtensionRe = /\blang(?:uage)?-([\w.]+)(?!\S)/;
773+
var prettyPrintRe = /\bprettyprint\b/;
774+
747775
function doWork() {
748776
var endTime = (window['PR_SHOULD_USE_CONTINUATION'] ?
749-
clock.now() + 250 /* ms */ :
777+
clock['now']() + 250 /* ms */ :
750778
Infinity);
751-
for (; k < elements.length && clock.now() < endTime; k++) {
779+
for (; k < elements.length && clock['now']() < endTime; k++) {
752780
var cs = elements[k];
753-
if (cs.className && cs.className.indexOf('prettyprint') >= 0) {
781+
var className = cs.className;
782+
if (className.indexOf('prettyprint') >= 0) {
754783
// If the classes includes a language extensions, use it.
755784
// Language extensions can be specified like
756785
// <pre class="prettyprint lang-cpp">
757786
// the language extension "cpp" is used to find a language handler as
758787
// passed to PR.registerLangHandler.
759-
var langExtension = cs.className.match(/\blang-([\w.]+)(?!\S)/);
760-
if (langExtension) { langExtension = langExtension[1]; }
788+
// HTML5 recommends that a language be specified using "language-"
789+
// as the prefix instead. Google Code Prettify supports both.
790+
// http://dev.w3.org/html5/spec-author-view/the-code-element.html
791+
var langExtension = className.match(langExtensionRe);
792+
// Support <pre class="prettyprint"><code class="language-c">
793+
var wrapper;
794+
if (!langExtension && (wrapper = childContentWrapper(cs))
795+
&& "CODE" === wrapper.tagName) {
796+
langExtension = wrapper.className.match(langExtensionRe);
797+
}
798+
799+
if (langExtension) {
800+
langExtension = langExtension[1];
801+
}
761802

762803
// make sure this is not nested in an already prettified element
763804
var nested = false;

js-modules/recombineTagsAndDecorations.js

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,33 +27,37 @@ function recombineTagsAndDecorations(job) {
2727

2828
var decorations = job.decorations;
2929
var nDecorations = decorations.length;
30-
// Index into decorations after the last decoration which ends at or before sourceIndex.
30+
// Index into decorations after the last decoration which ends at or before
31+
// sourceIndex.
3132
var decorationIndex = 0;
3233

34+
// Remove all zero-length decorations.
35+
decorations[nDecorations] = sourceLength;
36+
var decPos, i;
37+
for (i = decPos = 0; i < nDecorations;) {
38+
if (decorations[i] !== decorations[i + 2]) {
39+
decorations[decPos++] = decorations[i++];
40+
decorations[decPos++] = decorations[i++];
41+
} else {
42+
i += 2;
43+
}
44+
}
45+
nDecorations = decPos;
46+
3347
// Simplify decorations.
34-
var decPos = 0;
35-
for (var i = 0; i < nDecorations;) {
36-
// Skip over any zero-length decorations.
48+
for (i = decPos = 0; i < nDecorations;) {
3749
var startPos = decorations[i];
38-
var start = i;
39-
while (start + 2 < nDecorations && decorations[start + 2] === startPos) {
40-
start += 2;
41-
}
4250
// Conflate all adjacent decorations that use the same style.
43-
var startDec = decorations[start + 1];
44-
var end = start + 2;
45-
while (end + 2 <= nDecorations
46-
&& (decorations[end + 1] === startDec
47-
|| decorations[end] === decorations[end + 2])) {
51+
var startDec = decorations[i + 1];
52+
var end = i + 2;
53+
while (end + 2 <= nDecorations && decorations[end + 1] === startDec) {
4854
end += 2;
4955
}
5056
decorations[decPos++] = startPos;
5157
decorations[decPos++] = startDec;
5258
i = end;
5359
}
5460

55-
// Strip any zero-length decoration at the end.
56-
if (decPos && decorations[decPos - 2] === sourceLength) { decPos -= 2; }
5761
nDecorations = decorations.length = decPos;
5862

5963
var decoration = null;
@@ -67,8 +71,10 @@ function recombineTagsAndDecorations(job) {
6771
var end = Math.min(spanEnd, decEnd);
6872

6973
var textNode = spans[spanIndex + 1];
70-
if (textNode.nodeType !== 1) { // Don't muck with <BR>s or <LI>s
71-
var styledText = source.substring(sourceIndex, end);
74+
var styledText;
75+
if (textNode.nodeType !== 1 // Don't muck with <BR>s or <LI>s
76+
// Don't introduce spans around empty text nodes.
77+
&& (styledText = source.substring(sourceIndex, end))) {
7278
// This may seem bizarre, and it is. Emitting LF on IE causes the
7379
// code to display with spaces instead of line breaks.
7480
// Emitting Windows standard issue linebreaks (CRLF) causes a blank

src/prettify.js

Lines changed: 70 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,32 @@ window['PR']
554554
out.push.apply(out, job.decorations);
555555
}
556556

557+
var notWs = /\S/;
558+
559+
/**
560+
* Given an element, if it contains only one child element and any text nodes
561+
* it contains contain only space characters, return the sole child element.
562+
* Otherwise returns undefined.
563+
* <p>
564+
* This is meant to return the CODE element in {@code <pre><code ...>} when
565+
* there is a single child element that contains all the non-space textual
566+
* content, but not to return anything where there are multiple child elements
567+
* as in {@code <pre><code>...</code><code>...</code></pre>} or when there
568+
* is textual content.
569+
*/
570+
function childContentWrapper(element) {
571+
var wrapper = undefined;
572+
for (var c = element.firstChild; c; c = c.nextSibling) {
573+
var type = c.nodeType;
574+
wrapper = (type === 1) // Element Node
575+
? (wrapper ? element : c)
576+
: (type === 3) // Text Node
577+
? (notWs.test(c.nodeValue) ? element : wrapper)
578+
: wrapper;
579+
}
580+
return wrapper === element ? undefined : wrapper;
581+
}
582+
557583
/** Given triples of [style, pattern, context] returns a lexing function,
558584
* The lexing function interprets the patterns to find token boundaries and
559585
* returns a decoration list of the form
@@ -627,7 +653,6 @@ window['PR']
627653
})();
628654

629655
var nPatterns = fallthroughStylePatterns.length;
630-
var notWs = /\S/;
631656

632657
/**
633658
* Lexes job.source and produces an output array job.decorations of style
@@ -1035,33 +1060,37 @@ window['PR']
10351060

10361061
var decorations = job.decorations;
10371062
var nDecorations = decorations.length;
1038-
// Index into decorations after the last decoration which ends at or before sourceIndex.
1063+
// Index into decorations after the last decoration which ends at or before
1064+
// sourceIndex.
10391065
var decorationIndex = 0;
10401066

1067+
// Remove all zero-length decorations.
1068+
decorations[nDecorations] = sourceLength;
1069+
var decPos, i;
1070+
for (i = decPos = 0; i < nDecorations;) {
1071+
if (decorations[i] !== decorations[i + 2]) {
1072+
decorations[decPos++] = decorations[i++];
1073+
decorations[decPos++] = decorations[i++];
1074+
} else {
1075+
i += 2;
1076+
}
1077+
}
1078+
nDecorations = decPos;
1079+
10411080
// Simplify decorations.
1042-
var decPos = 0;
1043-
for (var i = 0; i < nDecorations;) {
1044-
// Skip over any zero-length decorations.
1081+
for (i = decPos = 0; i < nDecorations;) {
10451082
var startPos = decorations[i];
1046-
var start = i;
1047-
while (start + 2 < nDecorations && decorations[start + 2] === startPos) {
1048-
start += 2;
1049-
}
10501083
// Conflate all adjacent decorations that use the same style.
1051-
var startDec = decorations[start + 1];
1052-
var end = start + 2;
1053-
while (end + 2 <= nDecorations
1054-
&& (decorations[end + 1] === startDec
1055-
|| decorations[end] === decorations[end + 2])) {
1084+
var startDec = decorations[i + 1];
1085+
var end = i + 2;
1086+
while (end + 2 <= nDecorations && decorations[end + 1] === startDec) {
10561087
end += 2;
10571088
}
10581089
decorations[decPos++] = startPos;
10591090
decorations[decPos++] = startDec;
10601091
i = end;
10611092
}
10621093

1063-
// Strip any zero-length decoration at the end.
1064-
if (decPos && decorations[decPos - 2] === sourceLength) { decPos -= 2; }
10651094
nDecorations = decorations.length = decPos;
10661095

10671096
var decoration = null;
@@ -1075,8 +1104,10 @@ window['PR']
10751104
var end = Math.min(spanEnd, decEnd);
10761105

10771106
var textNode = spans[spanIndex + 1];
1078-
if (textNode.nodeType !== 1) { // Don't muck with <BR>s or <LI>s
1079-
var styledText = source.substring(sourceIndex, end);
1107+
var styledText;
1108+
if (textNode.nodeType !== 1 // Don't muck with <BR>s or <LI>s
1109+
// Don't introduce spans around empty text nodes.
1110+
&& (styledText = source.substring(sourceIndex, end))) {
10801111
// This may seem bizarre, and it is. Emitting LF on IE causes the
10811112
// code to display with spaces instead of line breaks.
10821113
// Emitting Windows standard issue linebreaks (CRLF) causes a blank
@@ -1320,20 +1351,36 @@ window['PR']
13201351
var k = 0;
13211352
var prettyPrintingJob;
13221353

1354+
var langExtensionRe = /\blang(?:uage)?-([\w.]+)(?!\S)/;
1355+
var prettyPrintRe = /\bprettyprint\b/;
1356+
13231357
function doWork() {
13241358
var endTime = (window['PR_SHOULD_USE_CONTINUATION'] ?
1325-
clock.now() + 250 /* ms */ :
1359+
clock['now']() + 250 /* ms */ :
13261360
Infinity);
1327-
for (; k < elements.length && clock.now() < endTime; k++) {
1361+
for (; k < elements.length && clock['now']() < endTime; k++) {
13281362
var cs = elements[k];
1329-
if (cs.className && cs.className.indexOf('prettyprint') >= 0) {
1363+
var className = cs.className;
1364+
if (className.indexOf('prettyprint') >= 0) {
13301365
// If the classes includes a language extensions, use it.
13311366
// Language extensions can be specified like
13321367
// <pre class="prettyprint lang-cpp">
13331368
// the language extension "cpp" is used to find a language handler as
13341369
// passed to PR.registerLangHandler.
1335-
var langExtension = cs.className.match(/\blang-([\w.]+)\b/);
1336-
if (langExtension) { langExtension = langExtension[1]; }
1370+
// HTML5 recommends that a language be specified using "language-"
1371+
// as the prefix instead. Google Code Prettify supports both.
1372+
// http://dev.w3.org/html5/spec-author-view/the-code-element.html
1373+
var langExtension = className.match(langExtensionRe);
1374+
// Support <pre class="prettyprint"><code class="language-c">
1375+
var wrapper;
1376+
if (!langExtension && (wrapper = childContentWrapper(cs))
1377+
&& "CODE" === wrapper.tagName) {
1378+
langExtension = wrapper.className.match(langExtensionRe);
1379+
}
1380+
1381+
if (langExtension) {
1382+
langExtension = langExtension[1];
1383+
}
13371384

13381385
// make sure this is not nested in an already prettified element
13391386
var nested = false;

tests/prettify_test_2.html

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
<!-- Language extensions tested -->
1111
<script src="../src/lang-clj.js" type="text/javascript"
1212
onerror="alert('Error: failed to load ' + this.src)"></script>
13+
<script src="../src/lang-lisp.js" type="text/javascript"
14+
onerror="alert('Error: failed to load ' + this.src)"></script>
1315
<script src="../src/lang-xq.js" type="text/javascript"
1416
onerror="alert('Error: failed to load ' + this.src)"></script>
1517
<script src="../src/lang-n.js" type="text/javascript"
@@ -438,6 +440,38 @@ <h1>Clojure Syntax Highlighting</h1>
438440

439441
(bar "foo" "bar" "blah" :baz)
440442
</pre>
443+
444+
<h1>HTML 5 language on code</h1>
445+
<p>
446+
The text is specified to be lisp by the class attribute.
447+
Semicolon is normally a valid punctuation character but
448+
in lisp it is a comment so should be colored as a comment
449+
if the className is being properly parsed.</p>
450+
<code class="prettyprint language-lisp" id="html5conv1">; foo</code>
451+
452+
<h1>HTML 5 language on nested code element</h1>
453+
<p>The language is attached to a CODE element inside a PRE.</p>
454+
<pre class="prettyprint" id="html5conv2"
455+
><code class="language-lisp">; foo</code></pre>
456+
457+
<h1>HTML 5 language on nested code element not foiled by space</h1>
458+
<p>The language is attached to a CODE element inside a PRE and there
459+
is space between the PRE element's tags and CODE element's tags.</p>
460+
<pre class="prettyprint" id="html5conv3">
461+
<code class="language-lisp">
462+
; foo
463+
</code>
464+
</pre>
465+
466+
<h1>HTML 5 nested code element language ignored if not only content</h1>
467+
<p>The below is not treated as lisp despite there being a lisp
468+
language specifier on the contained CODE element, the CODE element
469+
does not wrap all non-space content.</p>
470+
<pre class="prettyprint" id="html5conv4">
471+
before CODE
472+
<code class="language-lisp">; foo</code>
473+
</pre>
474+
441475
</body>
442476

443477
<script type="text/javascript">
@@ -833,7 +867,14 @@ <h1>Clojure Syntax Highlighting</h1>
833867
' `END`OPN(`END`KWDfor`END`PLN `END`OPN[`END`PLNarg args`END`CLO]`END`PLN\n' +
834868
' `END`OPN(`END`KWDprn`END`PLN arg`END`CLO)))`END`PLN\n' +
835869
'\n' +
836-
'`END`OPN(`END`PLNbar `END`STR"foo"`END`PLN `END`STR"bar"`END`PLN `END`STR"blah"`END`PLN `END`TYP:baz`END`CLO)`END'
870+
'`END`OPN(`END`PLNbar `END`STR"foo"`END`PLN `END`STR"bar"`END`PLN `END`STR"blah"`END`PLN `END`TYP:baz`END`CLO)`END',
871+
html5conv1: '`COM; foo`END',
872+
html5conv2: '<code class="language-lisp">`COM; foo`END</code>',
873+
html5conv3: ('<code class="language-lisp">`PLN\n' +
874+
'`END`COM; foo`END`PLN\n' +
875+
'`END</code>\n'),
876+
html5conv4: ('`PLNbefore CODE\n' +
877+
'`END<code class="language-lisp">`PUN;`END`PLN foo`END</code>\n')
837878
};
838879
</script>
839880

0 commit comments

Comments
 (0)