@@ -255,6 +255,11 @@ export class Tokenizer {
255255 // Assume Jupyter notebook tokenization rules?
256256 private _useNotebookMode = false ;
257257
258+ // Intern identifier strings within a single tokenization pass. This reduces
259+ // per-identifier allocations while still ensuring we don't retain substrings
260+ // that reference the original source text.
261+ private readonly _identifierInternedStrings = new Map < string , string > ( ) ;
262+
258263 tokenize (
259264 text : string ,
260265 start ?: number ,
@@ -284,6 +289,7 @@ export class Tokenizer {
284289 this . _lineRanges = [ ] ;
285290 this . _indentAmounts = [ ] ;
286291 this . _useNotebookMode = useNotebookMode ;
292+ this . _identifierInternedStrings . clear ( ) ;
287293
288294 const end = start + length ;
289295
@@ -905,20 +911,28 @@ export class Tokenizer {
905911
906912 if ( this . _cs . position > start ) {
907913 const value = this . _cs . getText ( ) . slice ( start , this . _cs . position ) ;
908- if ( _keywords . has ( value ) ) {
914+ const keywordType = _keywords . get ( value ) ;
915+ if ( keywordType !== undefined ) {
909916 this . _tokens . push (
910- KeywordToken . create ( start , this . _cs . position - start , _keywords . get ( value ) ! , this . _getComments ( ) )
917+ KeywordToken . create ( start , this . _cs . position - start , keywordType , this . _getComments ( ) )
911918 ) ;
912919 } else {
920+ const internedValue = this . _identifierInternedStrings . get ( value ) ?? this . _internIdentifierString ( value ) ;
913921 this . _tokens . push (
914- IdentifierToken . create ( start , this . _cs . position - start , cloneStr ( value ) , this . _getComments ( ) )
922+ IdentifierToken . create ( start , this . _cs . position - start , internedValue , this . _getComments ( ) )
915923 ) ;
916924 }
917925 return true ;
918926 }
919927 return false ;
920928 }
921929
930+ private _internIdentifierString ( value : string ) {
931+ const clonedValue = cloneStr ( value ) ;
932+ this . _identifierInternedStrings . set ( clonedValue , clonedValue ) ;
933+ return clonedValue ;
934+ }
935+
922936 private _isPossibleNumber ( ) : boolean {
923937 if ( isDecimal ( this . _cs . currentChar ) ) {
924938 return true ;
0 commit comments