@@ -127,9 +127,10 @@ var match = function(re) {
127127
128128// Returns the code for the character at the current subject position, or -1
129129// there are no more characters.
130+ // This function must be non-BMP aware because the Unicode category of its result is used.
130131var peek = function ( ) {
131132 if ( this . pos < this . subject . length ) {
132- return this . subject . charCodeAt ( this . pos ) ;
133+ return this . subject . codePointAt ( this . pos ) ;
133134 } else {
134135 return - 1 ;
135136 }
@@ -270,7 +271,7 @@ var scanDelims = function(cc) {
270271 return null ;
271272 }
272273
273- char_before = startpos === 0 ? "\n" : this . subject . charAt ( startpos - 1 ) ;
274+ char_before = previousChar ( this . subject , startpos ) ;
274275
275276 cc_after = this . peek ( ) ;
276277 if ( cc_after === - 1 ) {
@@ -304,6 +305,25 @@ var scanDelims = function(cc) {
304305 }
305306 this . pos = startpos ;
306307 return { numdelims : numdelims , can_open : can_open , can_close : can_close } ;
308+
309+ function previousChar ( str , pos ) {
310+ if ( pos === 0 ) {
311+ return "\n" ;
312+ }
313+ var previous_cc = str . charCodeAt ( pos - 1 ) ;
314+ // not low surrogate (BMP)
315+ if ( ( previous_cc & 0xfc00 ) !== 0xdc00 ) {
316+ return str . charAt ( pos - 1 ) ;
317+ }
318+ // returns NaN if out of range
319+ var two_previous_cc = str . charCodeAt ( pos - 2 ) ;
320+ // NaN & 0xfc00 = 0
321+ // checks if 2 previous char is high surrogate
322+ if ( ( two_previous_cc & 0xfc00 ) !== 0xd800 ) {
323+ return previous_char ;
324+ }
325+ return str . slice ( pos - 2 , pos ) ;
326+ }
307327} ;
308328
309329// Handle a delimiter marker for emphasis or a quote.
0 commit comments