@@ -321,7 +321,7 @@ var PR_innerHtmlWorks = null;
321321function PR_getInnerHtml ( node ) {
322322 // inner html is hopelessly broken in Safari 2.0.4 when the content is
323323 // an html description of well formed XML and the containing tag is a PRE
324- // tag, so we detect that case and emulate innerHTML.
324+ // tag, so we detect that case and emulate innerHTML.
325325 if ( null == PR_innerHtmlWorks ) {
326326 var testNode = document . createElement ( 'PRE' ) ;
327327 testNode . appendChild (
@@ -435,18 +435,20 @@ function PR_expandTabs(chunks, tabWidth) {
435435}
436436
437437/** split markup into chunks of html tags (style null) and
438- * plain text (style {@link #PR_PLAIN}).
438+ * plain text (style {@link #PR_PLAIN}), converting tags which are significant
439+ * for tokenization (<br>) into their textual equivalent.
439440 *
440- * @param {String } s html.
441+ * @param {String } s html where whitespace is considered significant .
441442 * @return {Array } of PR_Tokens of style PR_PLAIN, and null.
442443 * @private
443444 */
444445function PR_chunkify ( s ) {
445446 // The below pattern matches one of the following
446447 // (1) /[^<]+/ : A run of characters other than '<'
447- // (2) /<\/?[a-zA-Z][^>]*>/ : A probably tag that should not be highlighted
448- // (3) /</ : A '<' that does not begin a larger chunk. Treated as 1
449- var chunkPattern = / (?: [ ^ < ] + | < \/ ? [ a - z A - Z ] [ ^ > ] * > | < ) / g;
448+ // (2) /<!--.*?-->/: an HTML comment
449+ // (3) /<\/?[a-zA-Z][^>]*>/ : A probably tag that should not be highlighted
450+ // (4) /</ : A '<' that does not begin a larger chunk. Treated as 1
451+ var chunkPattern = / (?: [ ^ < ] + | < ! - - .* ?- - > | < \/ ? [ a - z A - Z ] [ ^ > ] * > | < ) / g;
450452 // since the pattern has the 'g' modifier and defines no capturing groups,
451453 // this will return a list of all chunks which we then classify and wrap as
452454 // PR_Tokens
@@ -456,15 +458,24 @@ function PR_chunkify(s) {
456458 var lastChunk = null ;
457459 for ( var i = 0 , n = matches . length ; i < n ; ++ i ) {
458460 var chunkText = matches [ i ] ;
459- var style ;
460- if ( chunkText . length < 2 || chunkText . charAt ( 0 ) !== '<' ) {
461- if ( lastChunk && lastChunk . style === PR_PLAIN ) {
462- lastChunk . token += chunkText ;
463- continue ;
461+ if ( ! chunkText . length ) { continue ; }
462+ var style = PR_PLAIN ;
463+ if ( chunkText . charAt ( 0 ) === '<' ) {
464+ if ( / ^ < ! - - / . test ( chunkText ) ) { continue ; }
465+ if ( chunkText . length > 1 ) { // a tag
466+ if ( / ^ < b r \b / i. test ( chunkText ) ) {
467+ // <br> tags are lexically significant so convert them to text.
468+ // This is undone later.
469+ chunkText = '\n' ;
470+ } else {
471+ style = null ;
472+ }
464473 }
465- style = PR_PLAIN ;
466- } else { // a tag
467- style = null ;
474+ }
475+ if ( lastChunk && style == PR_PLAIN && lastChunk . style === PR_PLAIN ) {
476+ // combine into last chunk
477+ lastChunk . token += chunkText ;
478+ continue ;
468479 }
469480 lastChunk = new PR_Token ( chunkText , style ) ;
470481 chunks . push ( lastChunk ) ;
@@ -1104,7 +1115,7 @@ function PR_splitSourceNodes(tokens) {
11041115 if ( ci < nc ) {
11051116 tok = tokens [ ci ] ;
11061117 if ( null == tok . style ) {
1107- tokens . push ( tok ) ;
1118+ tokensOut . push ( tok ) ;
11081119 continue ;
11091120 }
11101121 } else if ( ! endScriptTag ) {
@@ -1251,9 +1262,9 @@ function PR_splitAttributeQuotes(tokens) {
12511262 tokensOut . push ( tokens [ i ] ) ;
12521263 }
12531264 if ( lc ) {
1254- tokens . push ( new PR_Token ( ls . substring ( 0 , lpos ) , PR_PLAIN ) ) ;
1265+ tokensOut . push ( new PR_Token ( ls . substring ( 0 , lpos ) , PR_PLAIN ) ) ;
12551266 } else {
1256- tokens . push ( tokens [ lastPlain ] ) ;
1267+ tokensOut . push ( tokens [ lastPlain ] ) ;
12571268 }
12581269 }
12591270 if ( lc ) {
@@ -1449,6 +1460,8 @@ function prettyPrintOne(s) {
14491460 // It's necessary for IE though which seems to lose the preformattedness
14501461 // of <pre> tags when their innerHTML is assigned.
14511462 // http://stud3.tuwien.ac.at/~e0226430/innerHtmlQuirk.html
1463+ // and it serves to undo the conversion of <br>s to newlines done in
1464+ // chunkify.
14521465 html = html
14531466 . replace ( / ( \r \n ? | \n | ) / g, '$1 ' )
14541467 . replace ( / \r \n ? | \n / g, '<br>' ) ;
0 commit comments