@@ -24,9 +24,22 @@ interface Symbols {
2424
2525const CURRENCY_SIGN_REGEX = new RegExp ( '^.*\\(.*\\).*$' ) ;
2626const NUMBERING_SYSTEMS = [ 'latn' , 'arab' , 'hanidec' ] ;
27- // eslint-disable-next-line no-irregular-whitespace
28- const GROUPING_SYMBOLS_REGEX = / [ , ٬ . ] / gu;
29- const NUMERALS_REGEX = / [ 0 1 2 3 4 5 6 7 8 9 ] | [ ٠ ١ ٢ ٣ ٤ ٥ ٦ ٧ ٨ ٩ ] | [ 〇 一 二 三 四 五 六 七 八 九 ] / gu;
27+ const MINUS_SIGN_SYMBOLS = '\u002D\u2212' ;
28+ const MINUS_SIGN_REGEX = new RegExp ( `[${ MINUS_SIGN_SYMBOLS } ]` , 'g' ) ;
29+ const AMBIGUOUS_SYMBOLS = ',.' ;
30+ const ARABIC_THOUSANDS_SEPARATOR = '\u066C' ;
31+ const ARABIC_DECIMAL_SEPARATOR = '\u066B' ;
32+ const LRM_RLM_REGEX = / [ \u200E \u200F ] / g;
33+ const GROUPING_SYMBOLS = `${ AMBIGUOUS_SYMBOLS } \u00A0\u202F${ ARABIC_THOUSANDS_SEPARATOR } \u2019` ;
34+ const GROUPING_SYMBOLS_REGEX = new RegExp ( `[${ GROUPING_SYMBOLS } ]` , 'g' ) ;
35+ const DECIMAL_SYMBOLS = `${ AMBIGUOUS_SYMBOLS } ${ ARABIC_DECIMAL_SEPARATOR } ` ;
36+ const DECIMAL_SYMBOLS_REGEX = new RegExp ( `[${ DECIMAL_SYMBOLS } ]` , 'g' ) ;
37+ const NUMERALS_LATN = '0123456789' ;
38+ const NUMERALS_ARAB = '٠١٢٣٤٥٦٧٨٩' ;
39+ const NUMERALS_ARAB_REGEX = new RegExp ( `[${ NUMERALS_ARAB } ]` , 'gu' ) ;
40+ const NUMERALS_HANIDEC = '〇一二三四五六七八九' ;
41+ const NUMERALS_HANIDEC_REGEX = new RegExp ( `[${ NUMERALS_HANIDEC } ]` , 'gu' ) ;
42+ const NUMERALS_REGEX = new RegExp ( `[${ NUMERALS_LATN } ]|[${ NUMERALS_ARAB } ]|[${ NUMERALS_HANIDEC } ]` , 'gu' ) ;
3043
3144/**
3245 * A NumberParser can be used to perform locale-aware parsing of numbers from Unicode strings,
@@ -158,8 +171,12 @@ class NumberParserImpl {
158171 }
159172 }
160173
174+ // Remove LRM and RLM characters, which are used in some locales to control text direction.
175+ fullySanitizedValue = fullySanitizedValue ?. replace ( LRM_RLM_REGEX , '' ) ;
176+
161177 let newValue = fullySanitizedValue ? + fullySanitizedValue : NaN ;
162178 if ( isNaN ( newValue ) ) {
179+ // console.log('Failed to parse number:', {value, fullySanitizedValue, locale: this.locale, options: this.options, symbols: this.symbols});
163180 return NaN ;
164181 }
165182
@@ -186,24 +203,73 @@ class NumberParserImpl {
186203 let sanitizedValue = value . trim ( ) ;
187204
188205 let numeralMatches = sanitizedValue . match ( NUMERALS_REGEX ) ;
189- if ( numeralMatches ) {
190- let beforeAbs = sanitizedValue . slice ( 0 , sanitizedValue . indexOf ( numeralMatches [ 0 ] ) ) ;
191- let afterAbs = sanitizedValue . slice ( sanitizedValue . lastIndexOf ( numeralMatches [ numeralMatches . length - 1 ] ) + 1 ) ;
192- let abs = sanitizedValue . slice ( sanitizedValue . indexOf ( numeralMatches [ 0 ] ) , sanitizedValue . lastIndexOf ( numeralMatches [ numeralMatches . length - 1 ] ) + 1 ) ;
206+ if ( numeralMatches && this . options . numberingSystem !== 'arab' ) {
207+ let firstNumeralMatch = numeralMatches [ 0 ] ;
208+ let lastNumeralMatch = numeralMatches [ numeralMatches . length - 1 ] ;
209+ let beforeAbs = sanitizedValue . slice ( 0 , sanitizedValue . indexOf ( firstNumeralMatch ) ) ;
210+ let afterAbs = sanitizedValue . slice ( sanitizedValue . lastIndexOf ( lastNumeralMatch ) + 1 ) ;
211+ let abs = sanitizedValue . slice ( sanitizedValue . indexOf ( firstNumeralMatch ) , sanitizedValue . lastIndexOf ( lastNumeralMatch ) + 1 ) ;
212+
193213 // Replace group and decimal symbols with the current locale's symbols
214+ let decimalSymbolMatch = abs . match ( DECIMAL_SYMBOLS_REGEX ) ;
194215 let groupSymbolMatch = abs . match ( GROUPING_SYMBOLS_REGEX ) ;
195216 let integerPart : string ;
196- let parsedIntegerPart : number ;
197217 let decimalPart : string ;
198- if ( groupSymbolMatch && groupSymbolMatch . length > 0 && abs . length - groupSymbolMatch . length > this . options . minimumIntegerDigits ) {
199- integerPart = abs . slice ( 0 , abs . indexOf ( groupSymbolMatch [ groupSymbolMatch . length - 1 ] ) ) ;
200- decimalPart = abs . slice ( abs . indexOf ( groupSymbolMatch [ groupSymbolMatch . length - 1 ] ) + 1 , abs . length ) ;
218+ if ( decimalSymbolMatch ) {
219+ let firstDecimalSymbol = decimalSymbolMatch [ 0 ] ;
220+ let lastDecimalSymbol = decimalSymbolMatch [ decimalSymbolMatch . length - 1 ] ;
221+ integerPart = abs . slice ( 0 , abs . lastIndexOf ( lastDecimalSymbol ) ) ;
222+ decimalPart = abs . slice ( abs . lastIndexOf ( lastDecimalSymbol ) + 1 , abs . length ) ;
201223 integerPart = integerPart . replace ( GROUPING_SYMBOLS_REGEX , '' ) ;
202- parsedIntegerPart = parseInt ( integerPart , 10 ) ;
203- if ( ! isNaN ( parsedIntegerPart ) ) {
204- integerPart = parsedIntegerPart . toString ( ) ;
224+ let isArabic = NUMERALS_ARAB_REGEX . test ( abs ) ;
225+ let isHanidec = NUMERALS_HANIDEC_REGEX . test ( abs ) ;
226+ if ( isArabic ) {
227+ // Replace Arabic numerals with Latin numerals,
228+ // then parse the integer part to remove leading zeros,
229+ // and finally replace Latin numerals with Arabic numerals.
230+ integerPart = (
231+ parseInt (
232+ integerPart
233+ . replace ( NUMERALS_ARAB_REGEX , ( d ) => NUMERALS_ARAB . indexOf ( d ) . toString ( ) ) ,
234+ 10
235+ ) . toString ( )
236+ . replace ( NUMERALS_REGEX , ( d ) => NUMERALS_ARAB . split ( '' ) [ parseInt ( d , 10 ) ] )
237+ ) ;
238+ } else if ( isHanidec ) {
239+ // Replace Hanidec numerals with Latin numerals,
240+ // then parse the integer part to remove leading zeros,
241+ // and finally replace Latin numerals with Hanidec numerals.
242+ integerPart = (
243+ parseInt (
244+ integerPart
245+ . replace ( NUMERALS_HANIDEC_REGEX , ( d ) => NUMERALS_HANIDEC . indexOf ( d ) . toString ( ) ) ,
246+ 10
247+ ) . toString ( )
248+ . replace ( NUMERALS_REGEX , ( d ) => NUMERALS_HANIDEC . split ( '' ) [ parseInt ( d , 10 ) ] )
249+ ) ;
250+ } else {
251+ integerPart = parseInt ( integerPart , 10 ) . toString ( ) ;
205252 }
206- abs = `${ integerPart ?? '' } ${ integerPart === '0' || groupSymbolMatch ?. [ groupSymbolMatch . length - 1 ] !== groupSymbolMatch ?. [ 0 ] ? this . symbols . decimal : groupSymbolMatch [ groupSymbolMatch . length - 1 ] } ${ decimalPart ?? '' } ` ;
253+ let decimalSymbol = decimalSymbolMatch . length > 1 && lastDecimalSymbol === firstDecimalSymbol ? '' : lastDecimalSymbol ;
254+ if ( decimalSymbol !== '' ) {
255+ if ( this . symbols . decimal &&
256+ lastDecimalSymbol !== this . symbols . decimal &&
257+ ! isArabic &&
258+ ! isHanidec &&
259+ (
260+ integerPart . length > 3 ||
261+ integerPart === '0' ||
262+ ( firstDecimalSymbol === this . symbols . decimal && lastDecimalSymbol === this . symbols . group ) ||
263+ ( decimalSymbolMatch . length === 1 && decimalPart . length > 3 )
264+ )
265+ ) {
266+ decimalSymbol = this . symbols . decimal ;
267+ }
268+ }
269+
270+ abs = `${ integerPart ?? '' } ${ decimalSymbol } ${ decimalPart ?? '' } ` ;
271+ } else if ( groupSymbolMatch ) {
272+ abs = parseInt ( abs . replace ( GROUPING_SYMBOLS_REGEX , '' ) , 10 ) . toString ( ) ;
207273 }
208274 sanitizedValue = `${ beforeAbs } ${ abs } ${ afterAbs } ` ;
209275 }
@@ -214,7 +280,7 @@ class NumberParserImpl {
214280 // Replace the ASCII minus sign with the minus sign used in the current locale
215281 // so that both are allowed in case the user's keyboard doesn't have the locale's minus sign.
216282 if ( this . symbols . minusSign ) {
217- sanitizedValue = sanitizedValue . replace ( '-' , this . symbols . minusSign ) ;
283+ sanitizedValue = sanitizedValue . replace ( MINUS_SIGN_REGEX , this . symbols . minusSign ) ;
218284 }
219285
220286 // In arab numeral system, their decimal character is 1643, but most keyboards don't type that
0 commit comments