Skip to content

Commit a19101e

Browse files
authored
Merge pull request #210 from HarperFast/spawn-wisely
Spawn Wisely
2 parents 9a397a3 + 6619571 commit a19101e

File tree

8 files changed

+341
-291
lines changed

8 files changed

+341
-291
lines changed

config/configUtils.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ function createConfigFile(args, skipFsValidation = false) {
140140
if (configDoc.errors?.length > 0) {
141141
throw handleHDBError(
142142
new Error(),
143-
`Error parsing harperdb-config.yaml ${configDoc.errors}`,
143+
`Error parsing ${configFilePath} ${configDoc.errors}`,
144144
HTTP_STATUS_CODES.BAD_REQUEST,
145145
undefined,
146146
undefined,

security/jsLoader.ts

Lines changed: 190 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,20 @@ import { contextStorage, transaction } from '../resources/transaction.ts';
33
import { RequestTarget } from '../resources/RequestTarget.ts';
44
import { tables, databases } from '../resources/databases.ts';
55
import { readFile } from 'node:fs/promises';
6-
import { readFileSync } from 'node:fs';
76
import { dirname, isAbsolute } from 'node:path';
87
import { pathToFileURL, fileURLToPath } from 'node:url';
98
import { SourceTextModule, SyntheticModule, createContext, runInContext } from 'node:vm';
109
import { ApplicationScope } from '../components/ApplicationScope.ts';
1110
import logger from '../utility/logging/harper_logger.js';
1211
import { createRequire } from 'node:module';
1312
import * as env from '../utility/environment/environmentManager';
13+
import * as child_process from 'node:child_process';
1414
import { CONFIG_PARAMS } from '../utility/hdbTerms.ts';
1515
import { contentTypes } from '../server/serverHelpers/contentTypes.ts';
1616
import type { CompartmentOptions } from 'ses';
17+
import { mkdirSync, readFileSync, writeFileSync, unlinkSync, openSync, closeSync } from 'node:fs';
18+
import { join } from 'node:path';
19+
import { EventEmitter } from 'node:events';
1720

1821
type Lockdown = 'none' | 'freeze' | 'ses';
1922
const APPLICATIONS_LOCKDOWN: Lockdown = env.get(CONFIG_PARAMS.APPLICATIONS_LOCKDOWN);
@@ -234,7 +237,7 @@ async function loadModuleWithVM(moduleUrl: string, scope: ApplicationScope) {
234237
let module: SourceTextModule | SyntheticModule;
235238

236239
// Handle special built-in modules
237-
if (url === 'harper') {
240+
if (url === 'harper' || url === 'harperdb') {
238241
let harperExports = getHarperExports(scope);
239242
module = new SyntheticModule(
240243
Object.keys(harperExports),
@@ -286,9 +289,9 @@ async function loadModuleWithVM(moduleUrl: string, scope: ApplicationScope) {
286289
}
287290
}
288291
} else {
289-
checkAllowedModulePath(url, scope.verifyPath);
292+
const replacedModule = checkAllowedModulePath(url, scope.verifyPath);
290293
// For Node.js built-in modules (node:) and npm packages, use dynamic import
291-
const importedModule = await import(url);
294+
const importedModule = replacedModule ?? (await import(url));
292295
const exportNames = Object.keys(importedModule);
293296

294297
module = new SyntheticModule(
@@ -439,40 +442,198 @@ function getHarperExports(scope: ApplicationScope) {
439442
contentTypes,
440443
};
441444
}
442-
const ALLOWED_NODE_BUILTIN_MODULES = new Set([
443-
'assert',
444-
'http',
445-
'https',
446-
'path',
447-
'url',
448-
'util',
449-
'stream',
450-
'crypto',
451-
'buffer',
452-
'string_decoder',
453-
'querystring',
454-
'punycode',
455-
'zlib',
456-
'events',
457-
'timers',
458-
'process',
459-
'async_hooks',
460-
'console',
461-
'perf_hooks',
462-
'diagnostics_channel',
463-
'fs',
464-
]);
445+
const ALLOWED_NODE_BUILTIN_MODULES = env.get(CONFIG_PARAMS.APPLICATIONS_ALLOWEDBUILTINMODULES)
446+
? new Set(env.get(CONFIG_PARAMS.APPLICATIONS_ALLOWEDBUILTINMODULES))
447+
: {
448+
// if we don't have a list of allowed modules, allow everything
449+
has() {
450+
return true;
451+
},
452+
};
453+
const ALLOWED_COMMANDS = new Set(env.get(CONFIG_PARAMS.APPLICATIONS_ALLOWEDSPAWNCOMMANDS) ?? []);
454+
const REPLACED_BUILTIN_MODULES = {
455+
child_process: {
456+
exec: createSpawn(child_process.exec),
457+
execFile: createSpawn(child_process.execFile),
458+
fork: createSpawn(child_process.fork, true), // this is launching node, so deemed safe
459+
spawn: createSpawn(child_process.spawn),
460+
},
461+
};
462+
/**
463+
* Creates a ChildProcess-like object for an existing process
464+
*/
465+
class ExistingProcessWrapper extends EventEmitter {
466+
pid: number;
467+
private checkInterval: NodeJS.Timeout;
468+
469+
constructor(pid: number) {
470+
super();
471+
this.pid = pid;
472+
473+
// Monitor process and emit exit event when it terminates
474+
this.checkInterval = setInterval(() => {
475+
try {
476+
// Signal 0 checks if process exists without actually killing it
477+
process.kill(pid, 0);
478+
} catch {
479+
// Process no longer exists
480+
clearInterval(this.checkInterval);
481+
this.emit('exit', null, null);
482+
}
483+
}, 1000);
484+
}
485+
486+
// Kill the process
487+
kill(signal?: NodeJS.Signals | number) {
488+
try {
489+
process.kill(this.pid, signal);
490+
return true;
491+
} catch {
492+
return false;
493+
}
494+
}
495+
496+
// Clean up interval when wrapper is no longer needed
497+
unref() {
498+
clearInterval(this.checkInterval);
499+
return this;
500+
}
501+
}
502+
503+
/**
504+
* Checks if a process with the given PID is running
505+
*/
506+
function isProcessRunning(pid: number): boolean {
507+
try {
508+
// Signal 0 checks existence without killing
509+
process.kill(pid, 0);
510+
return true;
511+
} catch {
512+
return false;
513+
}
514+
}
515+
516+
/**
517+
* Acquires an exclusive lock using the PID file itself (synchronously with busy-wait)
518+
*/
519+
function acquirePidFileLock(pidFilePath: string, maxRetries = 100, retryDelay = 5): number | null {
520+
for (let attempt = 0; attempt < maxRetries; attempt++) {
521+
try {
522+
// Try to open exclusively - 'wx' fails if file exists
523+
const fd = openSync(pidFilePath, 'wx');
524+
closeSync(fd);
525+
return fd; // Successfully acquired lock (file created)
526+
} catch (err) {
527+
if (err.code === 'EEXIST') {
528+
// File exists - check if it contains a valid running process
529+
try {
530+
const pidContent = readFileSync(pidFilePath, 'utf-8');
531+
const existingPid = parseInt(pidContent.trim(), 10);
532+
533+
if (!isNaN(existingPid) && isProcessRunning(existingPid)) {
534+
// Valid process is running, return its PID
535+
return existingPid;
536+
} else {
537+
// Stale PID file, try to remove it
538+
try {
539+
unlinkSync(pidFilePath);
540+
} catch {
541+
// Another thread may have removed it, retry
542+
}
543+
}
544+
} catch {
545+
// Couldn't read file, another thread might be writing, retry
546+
}
547+
548+
// Wait a bit before retrying
549+
const start = Date.now();
550+
while (Date.now() - start < retryDelay) {
551+
// Busy wait
552+
}
553+
} else {
554+
throw err;
555+
}
556+
}
557+
}
558+
559+
throw new Error(`Failed to acquire PID file lock after ${maxRetries} attempts`);
560+
}
561+
562+
function createSpawn(spawnFunction: (...args: any) => child_process.ChildProcess, alwaysAllow?: boolean) {
563+
const basePath = env.getHdbBasePath();
564+
return function (command: string, args?: any, options?: any, callback?: (...args: any[]) => void) {
565+
if (!ALLOWED_COMMANDS.has(command.split(' ')[0]) && !alwaysAllow) {
566+
throw new Error(`Command ${command} is not allowed`);
567+
}
568+
const processName = options?.name;
569+
if (!processName)
570+
throw new Error(
571+
`Calling ${spawnFunction.name} in Harper must have a process "name" in the options to ensure that a single process is started and reused`
572+
);
573+
574+
// Ensure PID directory exists
575+
const pidDir = join(basePath, 'pids');
576+
mkdirSync(pidDir, { recursive: true });
577+
578+
const pidFilePath = join(pidDir, `${processName}.pid`);
579+
580+
// Try to acquire lock - returns null if we got the lock, or PID if process exists
581+
const existingPid = acquirePidFileLock(pidFilePath);
582+
583+
if (typeof existingPid === 'number' && existingPid > 0) {
584+
// Existing process is running, return wrapper
585+
return new ExistingProcessWrapper(existingPid);
586+
}
587+
588+
// We acquired the lock (file was created), spawn new process
589+
const childProcess = spawnFunction(command, args, options, callback);
590+
591+
// Write PID to the file we just created
592+
try {
593+
writeFileSync(pidFilePath, childProcess.pid.toString(), 'utf-8');
594+
} catch (err) {
595+
// Failed to write PID, clean up
596+
try {
597+
childProcess.kill();
598+
unlinkSync(pidFilePath);
599+
} catch {}
600+
throw err;
601+
}
602+
603+
// Clean up PID file when process exits
604+
childProcess.on('exit', () => {
605+
try {
606+
unlinkSync(pidFilePath);
607+
} catch {
608+
// File may already be removed
609+
}
610+
});
611+
612+
return childProcess;
613+
};
614+
}
615+
616+
/**
617+
* Validates whether a module can be loaded based on security restrictions and returns the module path or replacement.
618+
* For file URLs, ensures the module is within the containing folder.
619+
* For node built-in modules, checks against an allowlist and returns any replacements.
620+
*
621+
* @param {string} moduleUrl - The URL or identifier of the module to be loaded, which may be a file: URL, node: URL, or bare module specifier.
622+
* @param {string} containingFolder - The absolute path of the folder that contains the application, used to validate file: URLs are within bounds.
623+
* @return {any} Returns undefined for allowed file paths, or a replacement module identifier for allowed node built-in modules.
624+
* @throws {Error} Throws an error if the module is outside the application folder or if the module is not in the allowed list.
625+
*/
465626
function checkAllowedModulePath(moduleUrl: string, containingFolder?: string): boolean {
466627
if (moduleUrl.startsWith('file:')) {
467628
const path = moduleUrl.slice(7);
468629
if (!containingFolder || path.startsWith(containingFolder)) {
469-
return true;
630+
return;
470631
}
471632
throw new Error(`Can not load module outside of application folder ${containingFolder}`);
472633
}
473634
let simpleName = moduleUrl.startsWith('node:') ? moduleUrl.slice(5) : moduleUrl;
474635
simpleName = simpleName.split('/')[0];
475-
if (ALLOWED_NODE_BUILTIN_MODULES.has(simpleName)) return true;
636+
if (ALLOWED_NODE_BUILTIN_MODULES.has(simpleName)) return REPLACED_BUILTIN_MODULES[simpleName];
476637
throw new Error(`Module ${moduleUrl} is not allowed to be imported`);
477638
}
478639

static/defaultConfig.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ applications:
2727
lockdown: freeze
2828
containment: vm
2929
dependencyContainment: false
30+
allowedSpawnCommands:
31+
- npm
32+
- node
3033
componentsRoot: null
3134
localStudio:
3235
enabled: true

unitTests/components/fixtures/testJSWithDeps/resources.js

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import assert from 'node:assert'; // verify we can access safe node built-in mod
22
import { Resource, getContext } from 'harper';
33
import { MyComponent, connect as connectFromChild } from './child-dir/in-child-dir.js';
44
import { connect } from 'mqtt'; // verify we can import from node_modules packages
5+
import { fork, spawn } from 'node:child_process';
56

67
console.log('Verifying we can access console.log in application');
78
// verify we can't access parent global variables
@@ -24,3 +25,42 @@ export const testExport = {
2425
export class TestComponent extends Resource {}
2526

2627
export { MyComponent as 'my-component' };
28+
29+
export const processSpawnTest = {
30+
get() {}, // make it look like a resource
31+
testFork() {
32+
// Fork should work (allowed command)
33+
const child = fork('npm', ['--version'], { name: 'test-npm-process' });
34+
assert(child.pid, 'Fork should return a process with a PID');
35+
return child;
36+
},
37+
testSpawnDisallowed() {
38+
// Spawn with disallowed command should throw
39+
try {
40+
spawn('curl', ['https://example.com'], { name: 'test-curl-process' });
41+
throw new Error('Should have thrown an error for disallowed command');
42+
} catch (err) {
43+
assert(err.message.includes('not allowed'), 'Should throw error about disallowed command');
44+
}
45+
},
46+
testSpawnWithoutName() {
47+
// Spawn without name should throw
48+
try {
49+
spawn('npm', ['build']);
50+
throw new Error('Should have thrown an error for missing name');
51+
} catch (err) {
52+
assert(err.message.includes('name'), 'Should throw error about missing name');
53+
}
54+
},
55+
testProcessReuse(childProcessPath) {
56+
// First call should fork a new process
57+
const child1 = fork(childProcessPath, [], { name: 'test-reuse-process' });
58+
assert(child1.pid, 'First fork should return a process with a PID');
59+
60+
// Second call with same name should return wrapper for existing process
61+
const child2 = fork(childProcessPath, [], { name: 'test-reuse-process' });
62+
assert.equal(child1.pid, child2.pid, 'Second fork should return same PID');
63+
64+
return { child1, child2 };
65+
},
66+
};
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Simple child process that stays alive until killed
2+
// Used for testing process reuse functionality
3+
4+
let isRunning = true;
5+
6+
process.on('SIGTERM', () => {
7+
isRunning = false;
8+
process.exit(0);
9+
});
10+
11+
// Keep process alive
12+
const interval = setInterval(() => {
13+
if (!isRunning) {
14+
clearInterval(interval);
15+
}
16+
}, 100);
17+
18+
console.log('Child process started with PID:', process.pid);

0 commit comments

Comments
 (0)