Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bin/harper.js
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ async function harper() {
const cliApiOp = cliOperations.buildRequest();
logger.trace('calling cli operations with:', cliApiOp);
await cliOperations.cliOperations(cliApiOp);
return
return;
}
}
exports.harper = harper;
Expand Down
191 changes: 184 additions & 7 deletions security/jsLoader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,20 @@ import { contextStorage, transaction } from '../resources/transaction.ts';
import { RequestTarget } from '../resources/RequestTarget.ts';
import { tables, databases } from '../resources/databases.ts';
import { readFile } from 'node:fs/promises';
import { readFileSync } from 'node:fs';
import { dirname, isAbsolute } from 'node:path';
import { pathToFileURL, fileURLToPath } from 'node:url';
import { SourceTextModule, SyntheticModule, createContext, runInContext } from 'node:vm';
import { Scope } from '../components/Scope.ts';
import logger from '../utility/logging/harper_logger.js';
import { createRequire } from 'node:module';
import * as env from '../utility/environment/environmentManager';
import * as child_process from 'node:child_process';
import { CONFIG_PARAMS } from '../utility/hdbTerms.ts';
import { contentTypes } from '../server/serverHelpers/contentTypes.ts';
import type { CompartmentOptions } from 'ses';
import { mkdirSync, readFileSync, writeFileSync, unlinkSync, openSync, closeSync } from 'node:fs';
import { join } from 'node:path';
import { EventEmitter } from 'node:events';

type Lockdown = 'none' | 'freeze' | 'ses';
const APPLICATIONS_LOCKDOWN: Lockdown = env.get(CONFIG_PARAMS.APPLICATIONS_LOCKDOWN);
Expand Down Expand Up @@ -234,7 +237,7 @@ async function loadModuleWithVM(moduleUrl: string, scope: Scope) {
let module: SourceTextModule | SyntheticModule;

// Handle special built-in modules
if (url === 'harper') {
if (url === 'harper' || url === 'harperdb') {
let harperExports = getHarperExports(scope);
module = new SyntheticModule(
Object.keys(harperExports),
Expand Down Expand Up @@ -286,9 +289,9 @@ async function loadModuleWithVM(moduleUrl: string, scope: Scope) {
}
}
} else {
checkAllowedModulePath(url, scope.applicationContainment.verifyPath);
const replacedModule = checkAllowedModulePath(url, scope.applicationContainment.verifyPath);
// For Node.js built-in modules (node:) and npm packages, use dynamic import
const importedModule = await import(url);
const importedModule = replacedModule ?? (await import(url));
const exportNames = Object.keys(importedModule);

module = new SyntheticModule(
Expand Down Expand Up @@ -456,23 +459,197 @@ const ALLOWED_NODE_BUILTIN_MODULES = new Set([
'events',
'timers',
'process',
'child_process',
'async_hooks',
'console',
'perf_hooks',
'diagnostics_channel',
'fs',
]);
function checkAllowedModulePath(moduleUrl: string, containingFolder: string): boolean {
const ALLOWED_COMMANDS = new Set(['next', 'npm', 'node']);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be configurable and expandable somehow. Maybe plugins should have a way to register commands? And the security contract is that if a user trusts a plugin they trust whatever commands that plugin may register as well?

From the perspective of an open core, we need to ensure maximum configurability so that we don't unnecessarily restrict open usage.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I could make this configurable 👍
FWIW, the plugins were running outside the VM, but was changing that with this PR #209
Primarily because it is somewhat confusing that we can provide a consistent behavior of import from 'harper' or 'harperdb' for application (via the VM), but can't do so with the plugin. But yeah, we could also give plugins a way to register commands.

const REPLACED_BUILTIN_MODULES = {
child_process: {
exec: createSpawn(child_process.exec),
execFile: createSpawn(child_process.execFile),
fork: createSpawn(child_process.fork, true), // this is launching node, so deemed safe
spawn: createSpawn(child_process.spawn),
},
};
/**
* Creates a ChildProcess-like object for an existing process
*/
class ExistingProcessWrapper extends EventEmitter {
pid: number;
private checkInterval: NodeJS.Timeout;

constructor(pid: number) {
super();
this.pid = pid;

// Monitor process and emit exit event when it terminates
this.checkInterval = setInterval(() => {
try {
// Signal 0 checks if process exists without actually killing it
process.kill(pid, 0);
} catch {
// Process no longer exists
clearInterval(this.checkInterval);
this.emit('exit', null, null);
}
}, 1000);
}

// Kill the process
kill(signal?: NodeJS.Signals | number) {
try {
process.kill(this.pid, signal);
return true;
} catch {
return false;
}
}

// Clean up interval when wrapper is no longer needed
unref() {
clearInterval(this.checkInterval);
return this;
}
}
Comment on lines +465 to +501
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
class ExistingProcessWrapper extends EventEmitter {
pid: number;
private checkInterval: NodeJS.Timeout;
constructor(pid: number) {
super();
this.pid = pid;
// Monitor process and emit exit event when it terminates
this.checkInterval = setInterval(() => {
try {
// Signal 0 checks if process exists without actually killing it
process.kill(pid, 0);
} catch {
// Process no longer exists
clearInterval(this.checkInterval);
this.emit('exit', null, null);
}
}, 1000);
}
// Kill the process
kill(signal?: NodeJS.Signals | number) {
try {
process.kill(this.pid, signal);
return true;
} catch {
return false;
}
}
// Clean up interval when wrapper is no longer needed
unref() {
clearInterval(this.checkInterval);
return this;
}
}
class ExistingProcessWrapper extends EventEmitter {
#pid: number;
#checkInterval: NodeJS.Timeout;
constructor(pid: number) {
super();
this.#pid = pid;
// Monitor process and emit exit event when it terminates
this.#checkInterval = setInterval(() => {
try {
// Signal 0 checks if process exists without actually killing it
process.kill(pid, 0);
} catch {
// Process no longer exists
clearInterval(this.#checkInterval);
this.emit('exit', null, null);
}
}, 1000);
}
get pid() {
return this.#pid;
}
// Kill the process
kill(signal?: NodeJS.Signals | number) {
try {
process.kill(this.#pid, signal);
return true;
} catch {
return false;
}
}
// Clean up interval when wrapper is no longer needed
unref() {
clearInterval(this.#checkInterval);
return this;
}
}

Replace private with # so it is actually private. The private keyword is compile time only and has no runtime enforcement.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, sure, but pid wasn't really intended to be private. In trying match node, it is just a plain ol' public property. But yeah, #checkInterval sounds good.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume we wouldn't want the user to be able to set the pid though? Even accidentally? Thats what I guarding for by switching to a getter pattern

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds nice. I think you meant to file a bug with node.js, not comment on a PR ;) (I am just following what node has defined).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Haha yes, I assumed Node.js had this property locked down so users cannot set it. But maybe that is an invalid assumption.


/**
* Checks if a process with the given PID is running
*/
function isProcessRunning(pid: number): boolean {
try {
// Signal 0 checks existence without killing
process.kill(pid, 0);
return true;
} catch {
return false;
}
}

/**
* Acquires an exclusive lock using the PID file itself (synchronously with busy-wait)
*/
function acquirePidFileLock(pidFilePath: string, maxRetries = 100, retryDelay = 5): number | null {
for (let attempt = 0; attempt < maxRetries; attempt++) {
try {
// Try to open exclusively - 'wx' fails if file exists
const fd = openSync(pidFilePath, 'wx');
closeSync(fd);
return fd; // Successfully acquired lock (file created)
} catch (err) {
if (err.code === 'EEXIST') {
// File exists - check if it contains a valid running process
try {
const pidContent = readFileSync(pidFilePath, 'utf-8');
const existingPid = parseInt(pidContent.trim(), 10);

if (!isNaN(existingPid) && isProcessRunning(existingPid)) {
// Valid process is running, return its PID
return existingPid;
} else {
// Stale PID file, try to remove it
try {
unlinkSync(pidFilePath);
} catch {
// Another thread may have removed it, retry
}
}
} catch {
// Couldn't read file, another thread might be writing, retry
}

// Wait a bit before retrying
const start = Date.now();
while (Date.now() - start < retryDelay) {
// Busy wait
}
} else {
throw err;
}
}
}

throw new Error(`Failed to acquire PID file lock after ${maxRetries} attempts`);
}

function createSpawn(spawnFunction: (...args: any) => child_process.ChildProcess, alwaysAllow?: boolean) {
const basePath = env.getHdbBasePath();
return function (command: string, args: any, options: any) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it's a good idea to change the function signature of exec() or execFile().

Ideally, the REPLACED_BUILTIN_MODULES would have each function's signature in tact.

I have an idea... bear with me...

// pull in some child_process types:
import * as child_process, { type ChildProcess, type ExecException, type ExecOptions } from 'node:child_process';
import { mkdirSync, writeFileSync, rmSync } from 'node:fs';
import { join } from 'node:path';

// cache this in a global since it doesn't change
const basePath = env.getHdbBasePath();

function checkCommand(fnName: string, command: string, name?: string, alwaysAllow: boolean = false): { existingPid: number | null, pidFilePath: string } {
	if (!ALLOWED_COMMANDS.has(command) && !alwaysAllow) {
		throw new Error(`Command ${command} is not allowed`);
	}
	const processName = name;
		if (!processName)
			throw new Error(
				`Calling ${fnName} in Harper must have a process "name" in the options to ensure that a single process is started and reused`
			);

	// Ensure PID directory exists
	const pidDir = join(basePath, 'pids');
	mkdirSync(pidDir, { recursive: true });

	const pidFilePath = join(pidDir, `${processName}.pid`);

	// Try to acquire lock - returns null if we got the lock, or PID if process exists
	const existingPid = acquirePidFileLock(pidFilePath);
	return { existingPid, pidFilePath };
}

function createPidFile(child: ChildProcess, pidFilePath: string): ChildProcess {
	// Write PID to the file we just created
	const { pid } = child; // TIL: `pid` can be undefined!
	if (pid === undefined) {
		// child failed, let the caller handle `.on('error')`
		return child;
	}
	try {
		writeFileSync(pidFilePath, pid.toString(), 'utf-8');
	} catch (err) {
		// Failed to write PID, clean up
		child.kill();
		rmSync(pidFilePath, { force: true });
		throw err;
	}

	// Clean up PID file when process exits
	child.on('exit', () => rmSync(pidFilePath, { force: true }));
	return child;
}

// you could shove this in a types file
type ChildProcessWrapper = {
	exec(command: string): ChildProcess;
	exec(
		command: string,
		options: ({ encoding: 'buffer' | null } & ExecOptions) | null
	): ChildProcess;
	exec(
		command: string,
		callback: (error: ExecException | null, stdout: string, stderr: string) => void
	): ChildProcess;
	exec(
		command: string,
		options: ({ encoding: 'buffer' | null } & ExecOptions) | ExecOptions | null,
		callback: (error: ExecException | null, stdout: string | Buffer, stderr: string | Buffer) => void
	): ChildProcess;
	/* execFile, fork, spawn */
};

const REPLACED_BUILTIN_MODULES = {
	child_process: {
		exec(command: string, options: ExecOptions & { name?: string }, callback?: (error: ExecException | null, stdout: string, stderr: string) => void): ChildProcess {
			const { existingPid, pidFilePath } = checkCommand('exec', command.split(' ')[0], options?.name);
			if (typeof existingPid === 'number') {
				return new ExistingProcessWrapper(existingPid);
			}
			return createPidFile(child_process.exec(command, options, callback), pidFilePath);
		},
		/* execFile, fork, spawn */
	} as ChildProcessWrapper
};

It's a bit more verbose, but it will preserve the function signature types and it will help eliminate a few any types. Whatcha think?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure I am following. We are not directly exposing these functions to users, so I am not sure what it means to "preserve the function signature types". Users will be invoking these functions with import { spawn } from 'node:fs'. And AFAIK, the type system will always resolve those types to node's provided types, nothing in this code effects that at all does it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, TypeScript should pick up the types from @types/node. Ignore 99% my comment. The function signature for execFile is missing the 4th parameter: child_process.execFile(file[, args][, options][, callback]).

if (!ALLOWED_COMMANDS.has(command.split(' ')[0]) && !alwaysAllow) {
throw new Error(`Command ${command} is not allowed`);
}
const processName = options?.name;
if (!processName)
throw new Error(
`Calling ${spawnFunction.name} in Harper must have a process "name" in the options to ensure that a single process is started and reused`
);

// Ensure PID directory exists
const pidDir = join(basePath, 'pids');
mkdirSync(pidDir, { recursive: true });

const pidFilePath = join(pidDir, `${processName}.pid`);

// Try to acquire lock - returns null if we got the lock, or PID if process exists
const existingPid = acquirePidFileLock(pidFilePath);

if (typeof existingPid === 'number' && existingPid > 0) {
// Existing process is running, return wrapper
return new ExistingProcessWrapper(existingPid);
}

// We acquired the lock (file was created), spawn new process
const childProcess = spawnFunction(command, args, options);

// Write PID to the file we just created
try {
writeFileSync(pidFilePath, childProcess.pid.toString(), 'utf-8');
} catch (err) {
// Failed to write PID, clean up
try {
childProcess.kill();
unlinkSync(pidFilePath);
} catch {}
throw err;
}

// Clean up PID file when process exits
childProcess.on('exit', () => {
try {
unlinkSync(pidFilePath);
} catch {
// File may already be removed
}
});

return childProcess;
};
}

/**
* Validates whether a module can be loaded based on security restrictions and returns the module path or replacement.
* For file URLs, ensures the module is within the containing folder.
* For node built-in modules, checks against an allowlist and returns any replacements.
*
* @param {string} moduleUrl - The URL or identifier of the module to be loaded, which may be a file: URL, node: URL, or bare module specifier.
* @param {string} containingFolder - The absolute path of the folder that contains the application, used to validate file: URLs are within bounds.
* @return {any} Returns undefined for allowed file paths, or a replacement module identifier for allowed node built-in modules.
* @throws {Error} Throws an error if the module is outside the application folder or if the module is not in the allowed list.
*/
function checkAllowedModulePath(moduleUrl: string, containingFolder: string): any {
if (moduleUrl.startsWith('file:')) {
const path = moduleUrl.slice(7);
if (path.startsWith(containingFolder)) {
return true;
return;
}
throw new Error(`Can not load module outside of application folder`);
}
let simpleName = moduleUrl.startsWith('node:') ? moduleUrl.slice(5) : moduleUrl;
simpleName = simpleName.split('/')[0];
if (ALLOWED_NODE_BUILTIN_MODULES.has(simpleName)) return true;
if (ALLOWED_NODE_BUILTIN_MODULES.has(simpleName)) return REPLACED_BUILTIN_MODULES[simpleName];
throw new Error(`Module ${moduleUrl} is not allowed to be imported`);
}

Expand Down
40 changes: 40 additions & 0 deletions unitTests/components/fixtures/testJSWithDeps/resources.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import assert from 'node:assert'; // verify we can access safe node built-in mod
import { Resource, getContext } from 'harper';
import { MyComponent, connect as connectFromChild } from './child-dir/in-child-dir.js';
import { connect } from 'mqtt'; // verify we can import from node_modules packages
import { fork, spawn } from 'node:child_process';

console.log('Verifying we can access console.log in application');
// verify we can't access parent global variables
Expand All @@ -24,3 +25,42 @@ export const testExport = {
export class TestComponent extends Resource {}

export { MyComponent as 'my-component' };

export const processSpawnTest = {
get() {}, // make it look like a resource
testFork() {
// Fork should work (allowed command)
const child = fork('next', ['--version'], { name: 'test-next-process' });
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is going to fail now, no?

assert(child.pid, 'Fork should return a process with a PID');
return child;
},
testSpawnDisallowed() {
// Spawn with disallowed command should throw
try {
spawn('curl', ['https://example.com'], { name: 'test-curl-process' });
throw new Error('Should have thrown an error for disallowed command');
} catch (err) {
assert(err.message.includes('not allowed'), 'Should throw error about disallowed command');
}
},
testSpawnWithoutName() {
// Spawn without name should throw
try {
spawn('npm', ['build']);
throw new Error('Should have thrown an error for missing name');
} catch (err) {
assert(err.message.includes('name'), 'Should throw error about missing name');
}
},
testProcessReuse(childProcessPath) {
// First call should fork a new process
const child1 = fork(childProcessPath, [], { name: 'test-reuse-process' });
assert(child1.pid, 'First fork should return a process with a PID');

// Second call with same name should return wrapper for existing process
const child2 = fork(childProcessPath, [], { name: 'test-reuse-process' });
assert.equal(child1.pid, child2.pid, 'Second fork should return same PID');

return { child1, child2 };
},
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Simple child process that stays alive until killed
// Used for testing process reuse functionality

let isRunning = true;

process.on('SIGTERM', () => {
isRunning = false;
process.exit(0);
});

// Keep process alive
const interval = setInterval(() => {
if (!isRunning) {
clearInterval(interval);
}
}, 100);

console.log('Child process started with PID:', process.pid);
Loading
Loading