Skip to content

Commit d55b8e1

Browse files
authored
perf(childprocess): track/monitor health of spawned processes #6304
## Problem Toolkit/Q slow performance is consistently reported by customers. Toolkit/Q features can spawn processes, included many long-lived processes such as LSP servers. The cost of these processes is hidden and hard to troubleshoot. Any misbehaving process will not be noticed except in a coarse "Toolkit/Q causes vscode to be slow" manner. ## Solution - Track all running processes in a map-like data structure. - use a `PollingSet` to periodically "monitor" their system usage (%cpu, memory bytes), logging warning messages when it exceeds a threshold. - Add developer command to instantiate spawned processes via `ChildProcess` wrapper to make this easier to test.
1 parent 062d24a commit d55b8e1

File tree

5 files changed

+313
-7
lines changed

5 files changed

+313
-7
lines changed

packages/core/src/dev/activation.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import { getSessionId } from '../shared/telemetry/util'
2424
import { NotificationsController } from '../notifications/controller'
2525
import { DevNotificationsState } from '../notifications/types'
2626
import { QuickPickItem } from 'vscode'
27+
import { ChildProcess } from '../shared/utilities/processUtils'
2728

2829
interface MenuOption {
2930
readonly label: string
@@ -44,6 +45,7 @@ export type DevFunction =
4445
| 'editAuthConnections'
4546
| 'notificationsSend'
4647
| 'forceIdeCrash'
48+
| 'startChildProcess'
4749

4850
export type DevOptions = {
4951
context: vscode.ExtensionContext
@@ -126,6 +128,11 @@ const menuOptions: () => Record<DevFunction, MenuOption> = () => {
126128
detail: `Will SIGKILL ExtHost, { pid: ${process.pid}, sessionId: '${getSessionId().slice(0, 8)}-...' }, but the IDE itself will not crash.`,
127129
executor: forceQuitIde,
128130
},
131+
startChildProcess: {
132+
label: 'ChildProcess: Start child process',
133+
detail: 'Start ChildProcess from our utility wrapper for testing',
134+
executor: startChildProcess,
135+
},
129136
}
130137
}
131138

@@ -578,3 +585,15 @@ async function editNotifications() {
578585
await targetNotificationsController.pollForEmergencies()
579586
})
580587
}
588+
589+
async function startChildProcess() {
590+
const result = await createInputBox({
591+
title: 'Enter a command',
592+
}).prompt()
593+
if (result) {
594+
const [command, ...args] = result?.toString().split(' ') ?? []
595+
getLogger().info(`Starting child process: '${command}'`)
596+
const processResult = await ChildProcess.run(command, args, { collect: true })
597+
getLogger().info(`Child process exited with code ${processResult.exitCode}`)
598+
}
599+
}

packages/core/src/shared/logger/logger.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import * as vscode from 'vscode'
77

8-
export type LogTopic = 'crashMonitoring' | 'dev/beta' | 'notifications' | 'test' | 'unknown'
8+
export type LogTopic = 'crashMonitoring' | 'dev/beta' | 'notifications' | 'test' | 'childProcess' | 'unknown'
99

1010
class ErrorLog {
1111
constructor(

packages/core/src/shared/utilities/pollingSet.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,14 @@ export class PollingSet<T> extends Set<T> {
4444
this.clearTimer()
4545
}
4646
}
47-
47+
// TODO(hkobew): Overwrite the add method instead of adding seperate method. If we add item to set, timer should always start.
4848
public start(id: T): void {
4949
this.add(id)
5050
this.pollTimer = this.pollTimer ?? globals.clock.setInterval(() => this.poll(), this.interval)
5151
}
52+
53+
public override clear(): void {
54+
this.clearTimer()
55+
super.clear()
56+
}
5257
}

packages/core/src/shared/utilities/processUtils.ts

Lines changed: 136 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import * as proc from 'child_process' // eslint-disable-line no-restricted-impor
77
import * as crossSpawn from 'cross-spawn'
88
import * as logger from '../logger'
99
import { Timeout, CancellationError, waitUntil } from './timeoutUtils'
10+
import { PollingSet } from './pollingSet'
11+
import { getLogger } from '../logger/logger'
1012

1113
export interface RunParameterContext {
1214
/** Reports an error parsed from the stdin/stdout streams. */
@@ -61,14 +63,144 @@ export interface ChildProcessResult {
6163

6264
export const eof = Symbol('EOF')
6365

66+
export interface ProcessStats {
67+
memory: number
68+
cpu: number
69+
}
70+
export class ChildProcessTracker {
71+
static readonly pollingInterval: number = 10000 // Check usage every 10 seconds
72+
static readonly thresholds: ProcessStats = {
73+
memory: 100 * 1024 * 1024, // 100 MB
74+
cpu: 50,
75+
}
76+
static readonly logger = getLogger('childProcess')
77+
#processByPid: Map<number, ChildProcess> = new Map<number, ChildProcess>()
78+
#pids: PollingSet<number>
79+
80+
public constructor() {
81+
this.#pids = new PollingSet(ChildProcessTracker.pollingInterval, () => this.monitor())
82+
}
83+
84+
private cleanUp() {
85+
const terminatedProcesses = Array.from(this.#pids.values()).filter(
86+
(pid: number) => this.#processByPid.get(pid)?.stopped
87+
)
88+
for (const pid of terminatedProcesses) {
89+
this.delete(pid)
90+
}
91+
}
92+
93+
private async monitor() {
94+
this.cleanUp()
95+
ChildProcessTracker.logger.debug(`Active running processes size: ${this.#pids.size}`)
96+
97+
for (const pid of this.#pids.values()) {
98+
await this.checkProcessUsage(pid)
99+
}
100+
}
101+
102+
private async checkProcessUsage(pid: number): Promise<void> {
103+
if (!this.#pids.has(pid)) {
104+
ChildProcessTracker.logger.warn(`Missing process with id ${pid}`)
105+
return
106+
}
107+
const stats = this.getUsage(pid)
108+
if (stats) {
109+
ChildProcessTracker.logger.debug(`Process ${pid} usage: %O`, stats)
110+
if (stats.memory > ChildProcessTracker.thresholds.memory) {
111+
ChildProcessTracker.logger.warn(`Process ${pid} exceeded memory threshold: ${stats.memory}`)
112+
}
113+
if (stats.cpu > ChildProcessTracker.thresholds.cpu) {
114+
ChildProcessTracker.logger.warn(`Process ${pid} exceeded cpu threshold: ${stats.cpu}`)
115+
}
116+
}
117+
}
118+
119+
public add(childProcess: ChildProcess) {
120+
const pid = childProcess.pid()
121+
this.#processByPid.set(pid, childProcess)
122+
this.#pids.start(pid)
123+
}
124+
125+
public delete(childProcessId: number) {
126+
this.#processByPid.delete(childProcessId)
127+
this.#pids.delete(childProcessId)
128+
}
129+
130+
public get size() {
131+
return this.#pids.size
132+
}
133+
134+
public has(childProcess: ChildProcess) {
135+
return this.#pids.has(childProcess.pid())
136+
}
137+
138+
public clear() {
139+
for (const childProcess of this.#processByPid.values()) {
140+
childProcess.stop(true)
141+
}
142+
this.#pids.clear()
143+
this.#processByPid.clear()
144+
}
145+
146+
public getUsage(pid: number): ProcessStats {
147+
try {
148+
// isWin() leads to circular dependency.
149+
return process.platform === 'win32' ? getWindowsUsage() : getUnixUsage()
150+
} catch (e) {
151+
ChildProcessTracker.logger.warn(`Failed to get process stats for ${pid}: ${e}`)
152+
return { cpu: 0, memory: 0 }
153+
}
154+
155+
function getWindowsUsage() {
156+
const cpuOutput = proc
157+
.execFileSync('wmic', [
158+
'path',
159+
'Win32_PerfFormattedData_PerfProc_Process',
160+
'where',
161+
`IDProcess=${pid}`,
162+
'get',
163+
'PercentProcessorTime',
164+
])
165+
.toString()
166+
const memOutput = proc
167+
.execFileSync('wmic', ['process', 'where', `ProcessId=${pid}`, 'get', 'WorkingSetSize'])
168+
.toString()
169+
170+
const cpuPercentage = parseFloat(cpuOutput.split('\n')[1])
171+
const memoryBytes = parseInt(memOutput.split('\n')[1]) * 1024
172+
173+
return {
174+
cpu: isNaN(cpuPercentage) ? 0 : cpuPercentage,
175+
memory: memoryBytes,
176+
}
177+
}
178+
179+
function getUnixUsage() {
180+
const cpuMemOutput = proc.execFileSync('ps', ['-p', pid.toString(), '-o', '%cpu,%mem']).toString()
181+
const rssOutput = proc.execFileSync('ps', ['-p', pid.toString(), '-o', 'rss']).toString()
182+
183+
const cpuMemLines = cpuMemOutput.split('\n')[1].trim().split(/\s+/)
184+
const cpuPercentage = parseFloat(cpuMemLines[0])
185+
const memoryBytes = parseInt(rssOutput.split('\n')[1]) * 1024
186+
187+
return {
188+
cpu: isNaN(cpuPercentage) ? 0 : cpuPercentage,
189+
memory: memoryBytes,
190+
}
191+
}
192+
}
193+
}
194+
64195
/**
65196
* Convenience class to manage a child process
66197
* To use:
67198
* - instantiate
68199
* - call and await run to get the results (pass or fail)
69200
*/
70201
export class ChildProcess {
71-
static #runningProcesses: Map<number, ChildProcess> = new Map()
202+
static #runningProcesses = new ChildProcessTracker()
203+
static stopTimeout = 3000
72204
#childProcess: proc.ChildProcess | undefined
73205
#processErrors: Error[] = []
74206
#processResult: ChildProcessResult | undefined
@@ -285,7 +417,7 @@ export class ChildProcess {
285417
child.kill(signal)
286418

287419
if (force === true) {
288-
waitUntil(async () => this.stopped, { timeout: 3000, interval: 200, truthy: true })
420+
waitUntil(async () => this.stopped, { timeout: ChildProcess.stopTimeout, interval: 200, truthy: true })
289421
.then((stopped) => {
290422
if (!stopped) {
291423
child.kill('SIGKILL')
@@ -309,7 +441,7 @@ export class ChildProcess {
309441
if (pid === undefined) {
310442
return
311443
}
312-
ChildProcess.#runningProcesses.set(pid, this)
444+
ChildProcess.#runningProcesses.add(this)
313445

314446
const timeoutListener = options?.timeout?.token.onCancellationRequested(({ agent }) => {
315447
const message = agent === 'user' ? 'Cancelled: ' : 'Timed out: '
@@ -319,7 +451,7 @@ export class ChildProcess {
319451

320452
const dispose = () => {
321453
timeoutListener?.dispose()
322-
ChildProcess.#runningProcesses.delete(pid)
454+
ChildProcess.#runningProcesses.delete(this.pid())
323455
}
324456

325457
process.on('exit', dispose)

0 commit comments

Comments
 (0)