@@ -209,6 +209,11 @@ function _pr_isIE6() {
209209 * literal in a syntactically legal javascript program, and I've removed the
210210 * "in" keyword since it's not a keyword in many languages, and might be used
211211 * as a count of inches.
212+ *
213+ * <p>The link a above does not accurately describe EcmaScript rules since
214+ * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works
215+ * very well in practice.
216+ *
212217 * @private
213218 */
214219 var REGEXP_PRECEDER_PATTERN = function ( ) {
@@ -241,8 +246,7 @@ function _pr_isIE6() {
241246 // CAVEAT: this does not properly handle the case where a regular
242247 // expression immediately follows another since a regular expression may
243248 // have flags for case-sensitivity and the like. Having regexp tokens
244- // adjacent is not
245- // valid in any language I'm aware of, so I'm punting.
249+ // adjacent is not valid in any language I'm aware of, so I'm punting.
246250 // TODO: maybe style special characters inside a regexp as punctuation.
247251 } ( ) ;
248252
@@ -467,8 +471,9 @@ function _pr_isIE6() {
467471 // tag.
468472 var name = match . match ( pr_tagNameRe ) [ 2 ] ;
469473 var depth = 1 ;
474+ var j ;
470475 end_tag_loop:
471- for ( var j = i + 1 ; j < n ; ++ j ) {
476+ for ( j = i + 1 ; j < n ; ++ j ) {
472477 var name2 = matches [ j ] . match ( pr_tagNameRe ) ;
473478 if ( name2 && name2 [ 2 ] === name ) {
474479 if ( name2 [ 1 ] === '/' ) {
@@ -509,6 +514,23 @@ function _pr_isIE6() {
509514 . match ( / [ c C ] [ l L ] [ a A ] [ s S ] [ s S ] = \" [ ^ \" ] * \b n o c o d e \b / ) ;
510515 }
511516
517+ /**
518+ * Apply the given language handler to sourceCode and add the resulting
519+ * decorations to out.
520+ * @param {number } offset the index of sourceCode within the chunk of source
521+ * whose decorations are already present on out.
522+ */
523+ function appendDecorations ( offset , sourceCode , langHandler , out ) {
524+ if ( ! sourceCode ) { return ; }
525+ var decorations = langHandler . call ( { } , sourceCode ) ;
526+ if ( offset ) {
527+ for ( var i = decorations . length ; ( i -= 2 ) >= 0 ; ) {
528+ decorations [ i ] += offset ;
529+ }
530+ }
531+ out . push . apply ( out , decorations ) ;
532+ }
533+
512534 /** Given triples of [style, pattern, context] returns a lexing function,
513535 * The lexing function interprets the patterns to find token boundaries and
514536 * returns a decoration list of the form
@@ -526,6 +548,17 @@ function _pr_isIE6() {
526548 * E.g., if style is 'lang-lisp', and group 1 contains the text
527549 * '(hello (world))', then that portion of the token will be passed to the
528550 * registered lisp handler for formatting.
551+ * The text before and after group 1 will be restyled using this decorator
552+ * so decorators should take care that this doesn't result in infinite
553+ * recursion. For example, the HTML lexer rule for SCRIPT elements looks
554+ * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match
555+ * '<script>foo()<\/script>', which would cause the current decorator to
556+ * be called with '<script>' which would not match the same rule since
557+ * group 1 must not be empty, so it would be instead styled as PR_TAG by
558+ * the generic tag rule. The handler registered for the 'js' extension would
559+ * then be called with 'foo()', and finally, the current decorator would
560+ * be called with '<\/script>' which would not match the original rule and
561+ * so the generic tag rule would identify it as a tag.
529562 *
530563 * Pattern must only match prefixes, and if it matches a prefix and context
531564 * is null or matches the last non-comment token parsed, then that match is
@@ -564,7 +597,7 @@ function _pr_isIE6() {
564597 var nPatterns = fallthroughStylePatterns . length ;
565598 var notWs = / \S / ;
566599
567- return function ( sourceCode , opt_basePos ) {
600+ return function decorate ( sourceCode , opt_basePos ) {
568601 opt_basePos = opt_basePos || 0 ;
569602 var decorations = [ opt_basePos , PR_PLAIN ] ;
570603 var lastToken = '' ;
@@ -603,31 +636,36 @@ function _pr_isIE6() {
603636 }
604637 }
605638
606- if ( ! match || ! match [ 1 ] || 'lang-' !== style . substring ( 0 , 5 ) ) {
639+ var isEmbedded = 'lang-' === style . substring ( 0 , 5 ) ;
640+ if ( isEmbedded && ! ( match && match [ 1 ] ) ) {
641+ isEmbedded = false ;
642+ style = PR_SOURCE ;
643+ }
644+ if ( ! isEmbedded ) {
607645 decorations . push ( opt_basePos + pos , style ) ;
608646 } else { // Treat group 1 as an embedded block of source code.
609- var lang = style . substring ( 5 ) ;
610647 var embeddedSource = match [ 1 ] ;
611648 var embeddedSourceStart = token . indexOf ( embeddedSource ) ;
612649 var embeddedSourceEnd = embeddedSourceStart + embeddedSource . length ;
613- if ( embeddedSourceStart ) {
614- decorations . push ( opt_basePos + pos , PR_SOURCE ) ;
615- }
650+ var lang = style . substring ( 5 ) ;
616651 if ( ! langHandlerRegistry . hasOwnProperty ( lang ) ) {
617652 lang = / ^ \s * < / . test ( embeddedSource )
618653 ? 'default-markup'
619- : 'default-code'
620- }
621- var delegate = langHandlerRegistry [ lang ] ;
622- var embeddedOffset = opt_basePos + pos + embeddedSourceStart ;
623- var embeddedDecorations = delegate . call ( { } , embeddedSource ) ;
624- for ( var i = 0 , n = embeddedDecorations . length ; i < n ; i += 2 ) {
625- decorations . push ( embeddedOffset + embeddedDecorations [ i ] ,
626- embeddedDecorations [ i + 1 ] ) ;
627- }
628- if ( embeddedSourceEnd < token . length ) {
629- decorations . push ( opt_basePos + pos + embeddedSourceEnd , PR_SOURCE ) ;
654+ : 'default-code' ;
630655 }
656+ var size = decorations . length - 10 ;
657+ appendDecorations (
658+ opt_basePos + pos ,
659+ token . substring ( 0 , embeddedSourceStart ) ,
660+ decorate , decorations ) ;
661+ appendDecorations (
662+ opt_basePos + pos + embeddedSourceStart ,
663+ token . substring ( embeddedSourceStart , embeddedSourceEnd ) ,
664+ langHandlerRegistry [ lang ] , decorations ) ;
665+ appendDecorations (
666+ opt_basePos + pos + embeddedSourceEnd ,
667+ token . substring ( embeddedSourceEnd ) ,
668+ decorate , decorations ) ;
631669 }
632670 pos += token . length ;
633671 tail = tail . substring ( token . length ) ;
@@ -638,14 +676,18 @@ function _pr_isIE6() {
638676 }
639677
640678 var PR_MARKUP_LEXER = createSimpleLexer ( [ ] , [
641- [ PR_PLAIN , / ^ [ ^ < ] + / , null ] ,
679+ [ PR_PLAIN , / ^ [ ^ < ? ] + / , null ] ,
642680 [ PR_DECLARATION , / ^ < ! \w [ ^ > ] * (?: > | $ ) / , null ] ,
643681 [ PR_COMMENT , / ^ < ! - - [ \s \S ] * ?(?: - - > | $ ) / , null ] ,
644- [ PR_SOURCE , / ^ < \? [ \s \S ] * ?(?: \? > | $ ) / , null ] ,
645- [ PR_SOURCE , / ^ < % [ \s \S ] * ?(?: % > | $ ) / , null ] ,
646- [ PR_SOURCE ,
647- // Tags whose content is not escaped, and which contain source code.
648- / ^ < ( s c r i p t | s t y l e | x m p ) \b [ ^ > ] * > [ \s \S ] * ?< \/ \1\b [ ^ > ] * > / i, null ] ,
682+ // Unescaped content in an unknown language
683+ [ 'lang-' , / ^ < \? ( [ \s \S ] + ?) (?: \? > | $ ) / , null ] ,
684+ [ 'lang-' , / ^ < % ( [ \s \S ] + ?) (?: % > | $ ) / , null ] ,
685+ [ PR_PUNCTUATION , / ^ (?: < [ % ? ] | [ % ? ] > ) / , null ] ,
686+ [ 'lang-' , / ^ < x m p \b [ ^ > ] * > ( [ \s \S ] + ?) < \/ x m p \b [ ^ > ] * > / i, null ] ,
687+ // Unescaped content in javascript. (Or possibly vbscript).
688+ [ 'lang-js' , / ^ < s c r i p t \b [ ^ > ] * > ( [ \s \S ] + ?) < \/ s c r i p t \b [ ^ > ] * > / i, null ] ,
689+ // Contains unescaped stylesheet content
690+ [ 'lang-css' , / ^ < s t y l e \b [ ^ > ] * > ( [ \s \S ] + ?) < \/ s t y l e \b [ ^ > ] * > / i, null ] ,
649691 [ PR_TAG , / ^ < \/ ? \w [ ^ < > ] * > / , null ]
650692 ] ) ;
651693 // Splits any of the source|style|xmp entries above into a start tag,
@@ -856,29 +898,6 @@ function _pr_isIE6() {
856898 regexLiterals : true
857899 } ) ;
858900
859- /** identify regions of markup that are really source code, and recursivley
860- * lex them.
861- * @private
862- */
863- function splitSourceNodes ( source , decorations ) {
864- for ( var i = 0 ; i < decorations . length ; i += 2 ) {
865- var style = decorations [ i + 1 ] ;
866- if ( style === PR_SOURCE ) {
867- // Recurse using the non-markup lexer
868- var start , end ;
869- start = decorations [ i ] ;
870- end = i + 2 < decorations . length ? decorations [ i + 2 ] : source . length ;
871- var subDecorations = decorateSource ( source . substring ( start , end ) ) ;
872- for ( var j = 0 , m = subDecorations . length ; j < m ; j += 2 ) {
873- subDecorations [ j ] += start ;
874- }
875- spliceArrayInto ( subDecorations , decorations , i , 2 ) ;
876- i += subDecorations . length - 2 ;
877- }
878- }
879- return decorations ;
880- }
881-
882901 /** identify attribute values that really contain source code and recursively
883902 * lex them.
884903 * @private
@@ -969,7 +988,6 @@ function _pr_isIE6() {
969988 // (PR_TAG, PR_PLAIN, PR_SOURCE, NAME, VALUE, null)
970989 var decorations = tokenizeMarkup ( sourceCode ) ;
971990 decorations = splitTagAttributes ( sourceCode , decorations ) ;
972- decorations = splitSourceNodes ( sourceCode , decorations ) ;
973991 decorations = splitSourceAttributes ( sourceCode , decorations ) ;
974992 return decorations ;
975993 }
@@ -1189,7 +1207,7 @@ function _pr_isIE6() {
11891207 document . getElementsByTagName ( 'xmp' ) ] ;
11901208 var elements = [ ] ;
11911209 for ( var i = 0 ; i < codeSegments . length ; ++ i ) {
1192- for ( var j = 0 ; j < codeSegments [ i ] . length ; ++ j ) {
1210+ for ( var j = 0 , n = codeSegments [ i ] . length ; j < n ; ++ j ) {
11931211 elements . push ( codeSegments [ i ] [ j ] ) ;
11941212 }
11951213 }
0 commit comments