improv: shlex.split() function heap allocs & perf for large compile commands (#4458)

borjamunozf · web-flow · commit 65880d8a884e · 2025-05-16T09:09:00.000-04:00
* direct improve shlex heap allocs &amp; perf for large compile commands

* Fix lint issues &amp; update CHANGELOG
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,7 @@ Improvements:
 
 - Add name de-mangling for C++ symbols in the Test Explorer view when running tests with coverage. [#4340](https://github.com/microsoft/vscode-cmake-tools/pull/4340) [@rjaegers](https://github.com/rjaegers)
 - No longer convert paths on lowercase on MacOS to enable cpp tools to resolve them. [#4325](https://github.com/microsoft/vscode-cmake-tools/pull/4325) [@tringenbach](https://github.com/tringenbach)
+- Speedup & reduce heap allocations in shlex split module function. Significant gains for mid-large compile_commands.json - CompilationDatabase construction. [#4458](https://github.com/microsoft/vscode-cmake-tools/pull/4458) [@borjamunozf](https://github.com/borjamunozf)
 
 Bug Fixes:
 
diff --git a/src/shlex.ts b/src/shlex.ts
@@ -13,67 +13,66 @@ export function* split(str: string, opt?: ShlexOptions): Iterable<string> {
     opt = opt || {
         mode: process.platform === 'win32' ? 'windows' : 'posix'
     };
+
     const quoteChars = opt.mode === 'posix' ? '\'"' : '"';
     const escapeChars = '\\';
     let escapeChar: string | undefined;
-    let token: string | undefined;
+    let token: string[] = [];
     let isSubQuote: boolean = false;
 
     for (let i = 0; i < str.length; ++i) {
         const char = str.charAt(i);
+
         if (escapeChar) {
             if (char === '\n') {
                 // Do nothing
             } else if (escapeChars.includes(char)) {
-                token = (token || '') + char;
+                token.push(char);
             } else {
-                token = (token || '') + escapeChar + char;
+                token.push(escapeChar, char);  // Append escape sequence
             }
             // We parsed an escape seq. Reset to no escape
             escapeChar = undefined;
             continue;
         }
 
         if (escapeChars.includes(char)) {
-            // We're parsing an escape sequence.
+            // Start escape sequence
             escapeChar = char;
             continue;
         }
 
         if (isSubQuote) {
             if (quoteChars.includes(char)) {
-                // Reached the end of a sub-quoted token.
+                // End of sub-quoted token
                 isSubQuote = false;
-                token = (token || '') + char;
+                token.push(char);
                 continue;
             }
-            // Another quoted char
-            token = (token || '') + char;
+            token.push(char);
             continue;
         }
 
         if (quoteChars.includes(char)) {
-            // Beginning of a sub-quoted token
+            // Beginning of a subquoted token
             isSubQuote = true;
-            // Accumulate
-            token = (token || '') + char;
+            token.push(char);
             continue;
         }
 
         if (!isSubQuote && /[\t \n\r\f]/.test(char)) {
-            if (token !== undefined) {
-                yield token;
+            if (token.length > 0) {
+                yield token.join('');
             }
-            token = undefined;
+            token = [];
             continue;
         }
 
-        // Accumulate
-        token = (token || '') + char;
+        token.push(char);
     }
 
-    if (token !== undefined) {
-        yield token;
+    if (token.length > 0) {
+        yield token.join('');
     }
 }