Skip to content
This repository was archived by the owner on Apr 22, 2020. It is now read-only.

Commit e351373

Browse files
author
mikesamuel
committed
implemented language specific formatters, fixed python docstrings, and slashes inside regular expression charsets.
1 parent f5c2c36 commit e351373

File tree

4 files changed

+732
-113
lines changed

4 files changed

+732
-113
lines changed

CHANGES.html

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ <h2>29 March 2007</h2>
3737
>patch</a>
3838
<li>Added a <a href="http://google-code-prettify.googlecode.com/files/prettify-small.zip">distribution</a> that has comments and
3939
whitespace removed to reduce download size from 45.5kB to 12.8kB.
40+
<li>Added <a href="http://code.google.com/p/google-code-prettify/issues/detail?id=17">language specific formatters</a> that are triggered by the presence
41+
of a <code>lang-&lt;language-file-extension&gt;</code></li>
42+
<li>Fixed <a href="http://code.google.com/p/google-code-prettify/issues/detail?id=29">bug</a>: python handling of <code>'''string'''</code>
43+
<li>Fixed bug: <code>/</code> in regex <code>[charsets] should not end regex</code>
4044
</ul>
4145
</body>
4246
</html>

README.html

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,17 @@ <h3>Which languages does it work for?</h3>
7171
CAML-like languages.</p>
7272

7373
<h3>How do I specify which language my code is in?</h3>
74-
<p>There's no way to tell it which language because would complicate the
75-
interface. If it doesn't guess the language properly, that's a bug.</p>
74+
<p>You don't need to specify the language since <code>prettyprint()</code>
75+
will guess. You can specify a language by specifying the language extension
76+
along with the <code>prettyprint</code> class like so:</p>
77+
<code class="prettyprint lang-html">
78+
&lt;pre class=&quot;prettyprint <b>lang-html</b>&quot;&gt;<br>
79+
&nbsp; The lang-* class specifies the language file extensions.<br>
80+
&nbsp; Supported file extensions include<br>
81+
&nbsp; &nbsp; "c", "cc", "cpp", "cs", "cyc", "java", "bsh", "csh", "sh",<br>
82+
&nbsp; &nbsp; "cv", "py", "perl", "pl", "pm", "rb", "js",<br>
83+
&nbsp; &nbsp; "html", "html", "xhtml", "xml", "xsl".<br>
84+
&lt;/pre&gt;</code>
7685

7786
<h3>It doesn't work on <tt>&lt;obfuscated code sample&gt;</tt>?</h3>
7887
<p>Yes. Prettifying obfuscated code is like putting lipstick on a pig
@@ -93,7 +102,7 @@ <h3>What's changed?</h3>
93102
<div class="footer">
94103
<!-- Created: Tue Oct 3 17:51:56 PDT 2006 -->
95104
<!-- hhmts start -->
96-
Last modified: Mon Oct 9 16:47:24 PDT 2006
105+
Last modified: Fri Jul 4 20:49:30 PDT 2008
97106
<!-- hhmts end -->
98107
</div>
99108
</body>

src/prettify.js

Lines changed: 162 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ function pr_isIE6() {
140140
"BEGIN END ";
141141
var SH_KEYWORDS = "break case continue do done elif else esac eval fi for " +
142142
"function if in local set then until while ";
143-
var ALL_KEYWORD_SET = wordSet(
143+
var ALL_KEYWORDS = (
144144
CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS + PERL_KEYWORDS +
145145
PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS);
146146

@@ -270,6 +270,7 @@ function pr_isIE6() {
270270
var pr_aposEnt = /&apos;/g;
271271
var pr_quotEnt = /&quot;/g;
272272
var pr_ampEnt = /&amp;/g;
273+
var pr_nbspEnt = /&nbsp;/g;
273274
/** unescapes html to plain text. */
274275
function htmlToText(html) {
275276
var pos = html.indexOf('&');
@@ -298,7 +299,8 @@ function pr_isIE6() {
298299
.replace(pr_gtEnt, '>')
299300
.replace(pr_aposEnt, "'")
300301
.replace(pr_quotEnt, '"')
301-
.replace(pr_ampEnt, '&');
302+
.replace(pr_ampEnt, '&')
303+
.replace(pr_nbspEnt, ' ');
302304
}
303305

304306
/** is the given node's innerHTML normally unescaped? */
@@ -333,7 +335,7 @@ function pr_isIE6() {
333335
break;
334336
}
335337
}
336-
338+
337339
var PR_innerHtmlWorks = null;
338340
function getInnerHtml(node) {
339341
// inner html is hopelessly broken in Safari 2.0.4 when the content is
@@ -497,7 +499,7 @@ function pr_isIE6() {
497499
* function that takes source code and returns a list of decorations.
498500
*/
499501
function createSimpleLexer(shortcutStylePatterns,
500-
fallthroughStylePatterns) {
502+
fallthroughStylePatterns) {
501503
var shortcuts = {};
502504
(function () {
503505
var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns);
@@ -562,76 +564,6 @@ function pr_isIE6() {
562564
};
563565
}
564566

565-
var PR_C_STYLE_STRING_AND_COMMENT_LEXER = createSimpleLexer([
566-
[PR_STRING, /^\'(?:[^\\\']|\\[\s\S])*(?:\'|$)/, null, "'"],
567-
[PR_STRING, /^\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)/, null, '"'],
568-
[PR_STRING, /^\`(?:[^\\\`]|\\[\s\S])*(?:\`|$)/, null, '`']
569-
], [
570-
[PR_PLAIN, /^(?:[^\'\"\`\/\#]+)/, null, ' \r\n'],
571-
[PR_COMMENT, /^#[^\r\n]*/, null, '#'],
572-
[PR_COMMENT, /^\/\/[^\r\n]*/, null],
573-
[PR_STRING, /^\/(?:[^\\\*\/]|\\[\s\S])+(?:\/|$)/,
574-
REGEXP_PRECEDER_PATTERN],
575-
[PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]
576-
]);
577-
/** splits the given string into comment, string, and "other" tokens.
578-
* @param {string} sourceCode as plain text
579-
* @return {Array.<number|string>} a decoration list.
580-
* @private
581-
*/
582-
function splitStringAndCommentTokens(sourceCode) {
583-
return PR_C_STYLE_STRING_AND_COMMENT_LEXER(sourceCode);
584-
}
585-
586-
var PR_C_STYLE_LITERAL_IDENTIFIER_PUNC_RECOGNIZER = createSimpleLexer([], [
587-
[PR_PLAIN, /^\s+/, null, ' \r\n'],
588-
// TODO(mikesamuel): recognize non-latin letters and numerals in idents
589-
[PR_PLAIN, /^[a-z_$@][a-z_$@0-9]*/i, null],
590-
// A hex number
591-
[PR_LITERAL, /^0x[a-f0-9]+[a-z]/i, null],
592-
// An octal or decimal number, possibly in scientific notation
593-
[PR_LITERAL, /^(?:\d(?:_\d+)*\d*(?:\.\d*)?|\.\d+)(?:e[+\-]?\d+)?[a-z]*/i,
594-
null, '123456789'],
595-
[PR_PUNCTUATION, /^[^\s\w\.$@]+/, null]
596-
// Fallback will handle decimal points not adjacent to a digit
597-
]);
598-
599-
/** splits plain text tokens into more specific tokens, and then tries to
600-
* recognize keywords, and types.
601-
* @private
602-
*/
603-
function splitNonStringNonCommentTokens(source, decorations) {
604-
for (var i = 0; i < decorations.length; i += 2) {
605-
var style = decorations[i + 1];
606-
if (style === PR_PLAIN) {
607-
var start, end, chunk, subDecs;
608-
start = decorations[i];
609-
end = i + 2 < decorations.length ? decorations[i + 2] : source.length;
610-
chunk = source.substring(start, end);
611-
subDecs = PR_C_STYLE_LITERAL_IDENTIFIER_PUNC_RECOGNIZER(chunk, start);
612-
for (var j = 0, m = subDecs.length; j < m; j += 2) {
613-
var subStyle = subDecs[j + 1];
614-
if (subStyle === PR_PLAIN) {
615-
var subStart = subDecs[j];
616-
var subEnd = j + 2 < m ? subDecs[j + 2] : chunk.length;
617-
var token = source.substring(subStart, subEnd);
618-
if (token === '.') {
619-
subDecs[j + 1] = PR_PUNCTUATION;
620-
} else if (token in ALL_KEYWORD_SET) {
621-
subDecs[j + 1] = PR_KEYWORD;
622-
} else if (/^@?[A-Z][A-Z$]*[a-z][A-Za-z$]*$/.test(token)) {
623-
// classify types and annotations using Java's style conventions
624-
subDecs[j + 1] = token.charAt(0) === '@' ? PR_LITERAL : PR_TYPE;
625-
}
626-
}
627-
}
628-
spliceArrayInto(subDecs, decorations, i, 2);
629-
i += subDecs.length - 2;
630-
}
631-
}
632-
return decorations;
633-
}
634-
635567
var PR_MARKUP_LEXER = createSimpleLexer([], [
636568
[PR_PLAIN, /^[^<]+/, null],
637569
[PR_DECLARATION, /^<!\w[^>]*(?:>|$)/, null],
@@ -704,7 +636,7 @@ function pr_isIE6() {
704636
return decorations;
705637
}
706638

707-
/** returns a list of decorations, where even entries
639+
/** returns a function that produces a list of decorations from source text.
708640
*
709641
* This code treats ", ', and ` as string delimiters, and \ as a string
710642
* escape. It does not recognize perl's qq() style strings.
@@ -715,30 +647,130 @@ function pr_isIE6() {
715647
*
716648
* It recognizes C, C++, and shell style comments.
717649
*
718-
* @param {string} sourceCode as plain text
719-
* @return {Array.<string|number>} a decoration list
650+
* @param {Object} options a set of optional parameters.
651+
* @return {function (sourceCode : string) : Array.<string|number>} a
652+
* decorator that takes sourceCode as plain text and that returns a
653+
* decoration list
720654
*/
721-
function decorateSource(sourceCode) {
722-
// Split into strings, comments, and other.
723-
// We do this because strings and comments are easily recognizable and can
724-
// contain stuff that looks like other tokens, so we want to mark those
725-
// early so we don't recurse into them.
726-
var decorations = splitStringAndCommentTokens(sourceCode);
655+
function sourceDecorator(options) {
656+
var shortcutStylePatterns = [], fallthroughStylePatterns = [];
657+
if (options.tripleQuotedStrings) {
658+
shortcutStylePatterns.push(
659+
[PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/,
660+
null, '\'"']);
661+
} else if (options.multiLineStrings) {
662+
shortcutStylePatterns.push(
663+
[PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/,
664+
null, '\'"`']);
665+
} else {
666+
shortcutStylePatterns.push(
667+
[PR_STRING,
668+
/^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/,
669+
null, '"\'']);
670+
}
671+
fallthroughStylePatterns.push(
672+
[PR_PLAIN, /^(?:[^\'\"\`\/\#]+)/, null, ' \r\n']);
673+
if (options.hashComments) {
674+
shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']);
675+
}
676+
if (options.cStyleComments) {
677+
fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]);
678+
}
679+
if (options.regexLiterals) {
680+
fallthroughStylePatterns.push(
681+
[PR_STRING,
682+
/^\/(?:[^\\\*\/\[]|\\[\s\S]|\[(?:[^\]\\]|\\.)*(?:\]|$))+(?:\/|$)/,
683+
REGEXP_PRECEDER_PATTERN]);
684+
}
685+
if (options.cStyleComments) {
686+
fallthroughStylePatterns.push(
687+
[PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]);
688+
}
727689

728-
// Split non comment|string tokens on whitespace and word boundaries
729-
decorations = splitNonStringNonCommentTokens(sourceCode, decorations);
690+
var keywords = wordSet(options.keywords);
691+
692+
options = null;
693+
694+
/** splits the given string into comment, string, and "other" tokens.
695+
* @param {string} sourceCode as plain text
696+
* @return {Array.<number|string>} a decoration list.
697+
* @private
698+
*/
699+
var splitStringAndCommentTokens = createSimpleLexer(
700+
shortcutStylePatterns, fallthroughStylePatterns);
701+
702+
var styleLiteralIdentifierPuncRecognizer = createSimpleLexer([], [
703+
[PR_PLAIN, /^\s+/, null, ' \r\n'],
704+
// TODO(mikesamuel): recognize non-latin letters and numerals in idents
705+
[PR_PLAIN, /^[a-z_$@][a-z_$@0-9]*/i, null],
706+
// A hex number
707+
[PR_LITERAL, /^0x[a-f0-9]+[a-z]/i, null],
708+
// An octal or decimal number, possibly in scientific notation
709+
[PR_LITERAL,
710+
/^(?:\d(?:_\d+)*\d*(?:\.\d*)?|\.\d+)(?:e[+\-]?\d+)?[a-z]*/i,
711+
null, '123456789'],
712+
[PR_PUNCTUATION, /^[^\s\w\.$@]+/, null]
713+
// Fallback will handle decimal points not adjacent to a digit
714+
]);
730715

731-
return decorations;
732-
}
716+
/** splits plain text tokens into more specific tokens, and then tries to
717+
* recognize keywords, and types.
718+
* @private
719+
*/
720+
function splitNonStringNonCommentTokens(source, decorations) {
721+
for (var i = 0; i < decorations.length; i += 2) {
722+
var style = decorations[i + 1];
723+
if (style === PR_PLAIN) {
724+
var start, end, chunk, subDecs;
725+
start = decorations[i];
726+
end = i + 2 < decorations.length ? decorations[i + 2] : source.length;
727+
chunk = source.substring(start, end);
728+
subDecs = styleLiteralIdentifierPuncRecognizer(chunk, start);
729+
for (var j = 0, m = subDecs.length; j < m; j += 2) {
730+
var subStyle = subDecs[j + 1];
731+
if (subStyle === PR_PLAIN) {
732+
var subStart = subDecs[j];
733+
var subEnd = j + 2 < m ? subDecs[j + 2] : chunk.length;
734+
var token = source.substring(subStart, subEnd);
735+
if (token === '.') {
736+
subDecs[j + 1] = PR_PUNCTUATION;
737+
} else if (token in keywords) {
738+
subDecs[j + 1] = PR_KEYWORD;
739+
} else if (/^@?[A-Z][A-Z$]*[a-z][A-Za-z$]*$/.test(token)) {
740+
// classify types and annotations using Java's style conventions
741+
subDecs[j + 1] = token.charAt(0) === '@' ? PR_LITERAL : PR_TYPE;
742+
}
743+
}
744+
}
745+
spliceArrayInto(subDecs, decorations, i, 2);
746+
i += subDecs.length - 2;
747+
}
748+
}
749+
return decorations;
750+
}
733751

734-
function cSourceDecorator(keywords, opt_options) {
735-
return decorateSource; // TODO: implement me
736-
}
752+
return function (sourceCode) {
753+
// Split into strings, comments, and other.
754+
// We do this because strings and comments are easily recognizable and can
755+
// contain stuff that looks like other tokens, so we want to mark those
756+
// early so we don't recurse into them.
757+
var decorations = splitStringAndCommentTokens(sourceCode);
758+
759+
// Split non comment|string tokens on whitespace and word boundaries
760+
decorations = splitNonStringNonCommentTokens(sourceCode, decorations);
737761

738-
function shellSourceDecorator(keywords, opt_options) {
739-
return decorateSource; // TODO: implement me
762+
return decorations;
763+
};
740764
}
741765

766+
var decorateSource = sourceDecorator({
767+
keywords: ALL_KEYWORDS,
768+
hashComments: true,
769+
cStyleComments: true,
770+
multiLineStrings: true,
771+
regexLiterals: true
772+
});
773+
742774
/** identify regions of markup that are really source code, and recursivley
743775
* lex them.
744776
* @private
@@ -958,22 +990,44 @@ function pr_isIE6() {
958990
}
959991
registerLangHandler(decorateSource, ['default-code']);
960992
registerLangHandler(decorateMarkup,
961-
['default-markup', 'html', 'htm', 'xhtml', 'xml']);
962-
registerLangHandler(cSourceDecorator(CPP_KEYWORDS),
963-
['c', 'cc', 'cpp', 'cs', 'cxx', 'cyc']);
964-
registerLangHandler(cSourceDecorator(JAVA_KEYWORDS), ['java']);
965-
registerLangHandler(shellSourceDecorator(SH_KEYWORDS), ['csh', 'sh']);
966-
registerLangHandler(
967-
shellSourceDecorator(PYTHON_KEYWORDS), ['cv', 'py'],
968-
{ tripleQuotedStrings: true });
969-
registerLangHandler(
970-
shellSourceDecorator(PERL_KEYWORDS,
971-
{ regexLiteral: true, multiLineStrings: true }), ['pl']);
972-
registerLangHandler(
973-
shellSourceDecorator(RUBY_KEYWORDS,
974-
{ regexLiteral: true, multiLineStrings: true }), ['rb']);
975-
registerLangHandler(
976-
cSourceDecorator(JSCRIPT_KEYWORDS, { regexLiteral: true }), ['js']);
993+
['default-markup', 'html', 'htm', 'xhtml', 'xml', 'xsl']);
994+
registerLangHandler(sourceDecorator({
995+
keywords: CPP_KEYWORDS,
996+
hashComments: true,
997+
cStyleComments: true
998+
}), ['c', 'cc', 'cpp', 'cs', 'cxx', 'cyc']);
999+
registerLangHandler(sourceDecorator({
1000+
keywords: JAVA_KEYWORDS,
1001+
cStyleComments: true
1002+
}), ['java']);
1003+
registerLangHandler(sourceDecorator({
1004+
keywords: SH_KEYWORDS,
1005+
hashComments: true,
1006+
multiLineStrings: true
1007+
}), ['bsh', 'csh', 'sh']);
1008+
registerLangHandler(sourceDecorator({
1009+
keywords: PYTHON_KEYWORDS,
1010+
hashComments: true,
1011+
multiLineStrings: true,
1012+
tripleQuotedStrings: true
1013+
}), ['cv', 'py']);
1014+
registerLangHandler(sourceDecorator({
1015+
keywords: PERL_KEYWORDS,
1016+
hashComments: true,
1017+
multiLineStrings: true,
1018+
regexLiterals: true
1019+
}), ['perl', 'pl', 'pm']);
1020+
registerLangHandler(sourceDecorator({
1021+
keywords: RUBY_KEYWORDS,
1022+
hashComments: true,
1023+
multiLineStrings: true,
1024+
regexLiterals: true
1025+
}), ['rb']);
1026+
registerLangHandler(sourceDecorator({
1027+
keywords: JSCRIPT_KEYWORDS,
1028+
cStyleComments: true,
1029+
regexLiterals: true
1030+
}), ['js']);
9771031

9781032
function prettyPrintOne(sourceCodeHtml, opt_langExtension) {
9791033
try {

0 commit comments

Comments
 (0)