Skip to content
This repository was archived by the owner on Apr 22, 2020. It is now read-only.

Commit cabfab6

Browse files
author
mikesamuel
committed
consolidated lexing code. sourceDecorator is now implemented using createSimpleLexer
1 parent 4fdd2b5 commit cabfab6

File tree

1 file changed

+27
-93
lines changed

1 file changed

+27
-93
lines changed

src/prettify.js

Lines changed: 27 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -90,19 +90,6 @@ window['_pr_isIE6'] = function () {
9090

9191

9292
(function () {
93-
/** Splits input on space and returns an Object mapping each non-empty part to
94-
* true.
95-
*/
96-
function wordSet(words) {
97-
words = words.split(/ /g);
98-
var set = {};
99-
for (var i = words.length; --i >= 0;) {
100-
var w = words[i];
101-
if (w) { set[w] = null; }
102-
}
103-
return set;
104-
}
105-
10693
// Keyword lists for various languages.
10794
var FLOW_CONTROL_KEYWORDS =
10895
"break continue do else for if return while ";
@@ -809,8 +796,6 @@ window['_pr_isIE6'] = function () {
809796
/^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/,
810797
null, '"\'']);
811798
}
812-
fallthroughStylePatterns.push(
813-
[PR_PLAIN, /^(?:[^\'\"\`\/\#]+)/, null, ' \r\n']);
814799
if (options['hashComments']) {
815800
shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']);
816801
}
@@ -837,85 +822,35 @@ window['_pr_isIE6'] = function () {
837822
[PR_STRING, new RegExp(REGEX_LITERAL), REGEXP_PRECEDER_PATTERN]);
838823
}
839824

840-
var keywords = wordSet(options['keywords']);
841-
842-
options = null;
843-
844-
/** splits the given string into comment, string, and "other" tokens.
845-
* @param {string} sourceCode as plain text
846-
* @return {Array.<number|string>} a decoration list.
847-
* @private
848-
*/
849-
var splitStringAndCommentTokens = createSimpleLexer(
850-
shortcutStylePatterns, fallthroughStylePatterns);
825+
var keywords = options['keywords'].replace(/^\s+|\s+$/g, '');
826+
if (keywords.length) {
827+
fallthroughStylePatterns.push(
828+
[PR_KEYWORD,
829+
new RegExp('^(?:' + keywords.replace(/\s+/g, '|') + ')\\b'), null]);
830+
}
851831

852-
var styleLiteralIdentifierPuncRecognizer = createSimpleLexer([], [
853-
[PR_PLAIN, /^\s+/, null, ' \r\n'],
832+
shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']);
833+
fallthroughStylePatterns.push(
854834
// TODO(mikesamuel): recognize non-latin letters and numerals in idents
855-
[PR_PLAIN, /^[a-z_$@][a-z_$@0-9]*/i, null],
856-
// A hex number
857-
[PR_LITERAL, /^0x[a-f0-9]+[a-z]/i, null],
858-
// An octal or decimal number, possibly in scientific notation
835+
[PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null, '@'],
836+
[PR_TYPE, /^@?[A-Z]+[a-z][A-Za-z_$@0-9]*/, null],
837+
[PR_PLAIN, /^[a-z_$][a-z_$@0-9]*/i, null],
859838
[PR_LITERAL,
860-
/^(?:\d(?:_\d+)*\d*(?:\.\d*)?|\.\d+)(?:e[+\-]?\d+)?[a-z]*/i,
861-
null, '123456789'],
862-
[PR_PUNCTUATION, /^[^\s\w\.$@]+/, null]
863-
// Fallback will handle decimal points not adjacent to a digit
864-
]);
865-
866-
/** splits plain text tokens into more specific tokens, and then tries to
867-
* recognize keywords, and types.
868-
* @private
869-
*/
870-
function splitNonStringNonCommentTokens(job) {
871-
var source = job.source, decorations = job.decorations;
872-
var basePos = job.basePos;
873-
for (var i = 0; i < decorations.length; i += 2) {
874-
var style = decorations[i + 1];
875-
if (style === PR_PLAIN) {
876-
var start, end, chunk, subDecs;
877-
start = decorations[i] - basePos;
878-
end = i + 2 < decorations.length
879-
? decorations[i + 2] - basePos
880-
: source.length;
881-
chunk = source.substring(start, end);
882-
var subJob = { source: chunk, basePos: start + basePos };
883-
styleLiteralIdentifierPuncRecognizer(subJob);
884-
subDecs = subJob.decorations;
885-
for (var j = 0, m = subDecs.length; j < m; j += 2) {
886-
var subStyle = subDecs[j + 1];
887-
if (subStyle === PR_PLAIN) {
888-
var subStart = subDecs[j] - basePos;
889-
var subEnd = j + 2 < m ? subDecs[j + 2] - basePos : end;
890-
var token = source.substring(subStart, subEnd);
891-
if (token === '.') {
892-
subDecs[j + 1] = PR_PUNCTUATION;
893-
} else if (token in keywords) {
894-
subDecs[j + 1] = PR_KEYWORD;
895-
} else if (/^@?[A-Z][A-Z$]*[a-z][A-Za-z$]*$/.test(token)) {
896-
// classify types and annotations using Java's style
897-
// conventions
898-
subDecs[j + 1] = token.charCodeAt(0) === 64
899-
? PR_LITERAL : PR_TYPE;
900-
}
901-
}
902-
}
903-
spliceArrayInto(subDecs, decorations, i, 2);
904-
i += subDecs.length - 2;
905-
}
906-
}
907-
job.decorations = decorations;
908-
}
909-
910-
return function (job) {
911-
// Split into strings, comments, and other.
912-
// We do this because strings and comments are easily recognizable and can
913-
// contain stuff that looks like other tokens, so we want to mark those
914-
// early so we don't recurse into them.
915-
splitStringAndCommentTokens(job);
916-
// Split non comment|string tokens on whitespace and word boundaries
917-
splitNonStringNonCommentTokens(job);
918-
};
839+
new RegExp(
840+
'^(?:'
841+
// A hex number
842+
+ '0x[a-f0-9]+'
843+
// or an octal or decimal number,
844+
+ '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)'
845+
// possibly in scientific notation
846+
+ '(?:e[+\\-]?\\d+)?'
847+
+ ')'
848+
// with an optional modifier like UL for unsigned long
849+
+ '[a-z]*', 'i'),
850+
null, '0123456789'],
851+
[PR_PUNCTUATION, /^.[^\s\w\.$@\'\"\`\/\#]*/, null]);
852+
853+
return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns);
919854
}
920855

921856
var decorateSource = sourceDecorator({
@@ -1026,8 +961,7 @@ window['_pr_isIE6'] = function () {
1026961
splitSourceAttributes(source, decorations, basePos);
1027962
}
1028963

1029-
/**
1030-
* Breaks {@code job.source} around style boundaries in
964+
/** Breaks {@code job.source} around style boundaries in
1031965
* {@code job.decorations} while re-interleaving {@code job.extractedTags},
1032966
* and leaves the result in {@code job.prettyPrintedHtml}.
1033967
* @param {Object} job like {

0 commit comments

Comments
 (0)