refactor: Add kernel-test-local package for local-only E2E tests (#724)

grypez · web-flow · commit 6f56ad6c2f8b · 2026-01-09T13:39:27.000-05:00
Create a new `@ocap/kernel-test-local` package to contain E2E tests that run on dev workstations but not in CI. The primary motivation for this setup is to test agentic integrations using local language models. Adds a new `yarn test:e2e:local` command to the monorepo root. Note that `yarn test:e2e` will not run the local tests. Additional Changes: - Remove `test:e2e` script from kernel-agents - Move E2E tests from `kernel-agents/test/e2e/` to new package - Add capability exports to kernel-agents package: - Export `./capabilities/examples` (getMoonPhase, search) - Export `./capabilities/math` (count, add, multiply) - Export Agent and MakeAgentArgs types from main index Closes #723
diff --git a/package.json b/package.json
@@ -34,6 +34,7 @@
     "test:dev": "yarn test --mode development --reporter dot",
     "test:e2e": "yarn workspaces foreach --all run test:e2e",
     "test:e2e:ci": "yarn workspaces foreach --all run test:e2e:ci",
+    "test:e2e:local": "yarn workspaces foreach --all run test:e2e:local",
     "test:verbose": "yarn test --reporter verbose",
     "test:watch": "vitest",
     "why:batch": "./scripts/why-batch.sh"
diff --git a/packages/kernel-agents/package.json b/packages/kernel-agents/package.json
@@ -43,7 +43,27 @@
         "default": "./dist/strategies/repl-agent.cjs"
       }
     },
-    "./package.json": "./package.json"
+    "./package.json": "./package.json",
+    "./capabilities/examples": {
+      "import": {
+        "types": "./dist/capabilities/examples.d.mts",
+        "default": "./dist/capabilities/examples.mjs"
+      },
+      "require": {
+        "types": "./dist/capabilities/examples.d.cts",
+        "default": "./dist/capabilities/examples.cjs"
+      }
+    },
+    "./capabilities/math": {
+      "import": {
+        "types": "./dist/capabilities/math.d.mts",
+        "default": "./dist/capabilities/math.mjs"
+      },
+      "require": {
+        "types": "./dist/capabilities/math.d.cts",
+        "default": "./dist/capabilities/math.cjs"
+      }
+    }
   },
   "files": [
     "dist/"
@@ -60,7 +80,6 @@
     "lint:misc": "prettier --no-error-on-unmatched-pattern '**/*.json' '**/*.md' '**/*.html' '!**/CHANGELOG.old.md' '**/*.yml' '!.yarnrc.yml' '!merged-packages/**' --ignore-path ../../.gitignore --log-level error",
     "publish:preview": "yarn npm publish --tag preview",
     "test": "vitest run --config vitest.config.ts",
-    "test:e2e": "vitest run --config vitest.config.e2e.ts",
     "test:clean": "yarn test --no-cache --coverage.clean",
     "test:dev": "yarn test --mode development --reporter dot",
     "test:verbose": "yarn test --reporter verbose",
diff --git a/packages/kernel-agents/src/index.ts b/packages/kernel-agents/src/index.ts
@@ -1,2 +1,4 @@
+export type { Agent } from './types/agent.ts';
 export type { CapabilityRecord } from './types.ts';
+export type { MakeAgentArgs } from './agent.ts';
 export { discover } from './capabilities/discover.ts';
diff --git a/packages/kernel-test-local/README.md b/packages/kernel-test-local/README.md
@@ -0,0 +1,33 @@
+# `@ocap/kernel-test-local`
+
+Local-only E2E tests that use a locally hosted language model.
+
+## Overview
+
+This package contains E2E tests that require a running Ollama instance with specific models installed. These tests are **not run in CI** and are intended for local development and validation only. Because language model outputs are inherently non-deterministic, these tests may occasionally fail even when the implementation is correct.
+
+## Setup
+
+[SETUP.md](./test/SETUP.md)
+
+## Running Tests
+
+From the repository root:
+
+```bash
+yarn test:e2e:local
+```
+
+From this package directory:
+
+```bash
+yarn test:e2e:local
+```
+
+## Troubleshooting
+
+[TROUBLESHOOTING.md](./test/TROUBLESHOOTING.md)
+
+## Contributing
+
+This package is part of the ocap-kernel monorepo. For contributing guidelines, see the [main repository README](https://github.com/MetaMask/ocap-kernel#readme).
diff --git a/packages/kernel-test-local/package.json b/packages/kernel-test-local/package.json
@@ -0,0 +1,69 @@
+{
+  "name": "@ocap/kernel-test-local",
+  "version": "0.0.0",
+  "private": true,
+  "description": "Local-only E2E tests for kernel agents requiring external dependencies (Ollama)",
+  "homepage": "https://github.com/MetaMask/ocap-kernel/tree/main/packages/kernel-test-local#readme",
+  "bugs": {
+    "url": "https://github.com/MetaMask/ocap-kernel/issues"
+  },
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/MetaMask/ocap-kernel.git"
+  },
+  "type": "module",
+  "scripts": {
+    "clean": "rimraf --glob './*.tsbuildinfo' ./.eslintcache ./coverage ./.turbo",
+    "lint": "yarn lint:eslint && yarn lint:misc --check && yarn constraints && yarn lint:dependencies",
+    "lint:dependencies": "depcheck --quiet",
+    "lint:eslint": "eslint . --cache",
+    "lint:fix": "yarn lint:eslint --fix && yarn lint:misc --write && yarn constraints --fix && yarn lint:dependencies",
+    "lint:misc": "prettier --no-error-on-unmatched-pattern '**/*.json' '**/*.md' '**/*.html' '!**/CHANGELOG.old.md' '**/*.yml' '!.yarnrc.yml' '!merged-packages/**' --ignore-path ../../.gitignore --log-level error",
+    "build:docs": "typedoc",
+    "test": "vitest run --config vitest.config.ts",
+    "test:e2e:local": "vitest run --config vitest.config.e2e.ts",
+    "test:clean": "yarn test --no-cache --coverage.clean",
+    "test:dev": "yarn test --mode development --reporter dot",
+    "test:verbose": "yarn test --reporter verbose",
+    "test:watch": "vitest --config vitest.config.ts"
+  },
+  "dependencies": {
+    "@metamask/logger": "workspace:^",
+    "@ocap/kernel-agents": "workspace:^",
+    "@ocap/kernel-language-model-service": "workspace:^",
+    "@ocap/repo-tools": "workspace:^"
+  },
+  "devDependencies": {
+    "@arethetypeswrong/cli": "^0.17.4",
+    "@metamask/eslint-config": "^14.0.0",
+    "@metamask/eslint-config-nodejs": "^14.0.0",
+    "@metamask/eslint-config-typescript": "^14.0.0",
+    "@types/node": "^22.13.1",
+    "@typescript-eslint/eslint-plugin": "^8.29.0",
+    "@typescript-eslint/parser": "^8.29.0",
+    "@typescript-eslint/utils": "^8.29.0",
+    "@vitest/eslint-plugin": "^1.6.5",
+    "depcheck": "^1.4.7",
+    "eslint": "^9.23.0",
+    "eslint-config-prettier": "^10.1.1",
+    "eslint-import-resolver-typescript": "^4.3.1",
+    "eslint-plugin-import-x": "^4.10.0",
+    "eslint-plugin-jsdoc": "^50.6.9",
+    "eslint-plugin-n": "^17.17.0",
+    "eslint-plugin-prettier": "^5.2.6",
+    "eslint-plugin-promise": "^7.2.1",
+    "prettier": "^3.5.3",
+    "rimraf": "^6.0.1",
+    "turbo": "^2.5.6",
+    "typescript": "~5.8.2",
+    "typescript-eslint": "^8.29.0",
+    "vite": "^7.3.0",
+    "vitest": "^4.0.16"
+  },
+  "engines": {
+    "node": "^20.11 || >=22"
+  },
+  "exports": {
+    "./package.json": "./package.json"
+  }
+}
diff --git a/packages/kernel-test-local/src/constants.ts b/packages/kernel-test-local/src/constants.ts
@@ -9,3 +9,12 @@ export const TEST_MODELS = ['llama3.1:latest', 'gpt-oss:20b'];
  */
 export const OLLAMA_API_BASE = 'http://localhost:11434';
 export const OLLAMA_TAGS_ENDPOINT = `${OLLAMA_API_BASE}/api/tags`;
+
+// extract ignored logger tags from environment variable
+
+/**
+ * The tags to ignore for the local tests.
+ */
+export const IGNORE_TAGS =
+  // eslint-disable-next-line n/no-process-env
+  process?.env?.LOGGER_IGNORE?.split(',')?.map((tag) => tag.trim()) ?? [];
diff --git a/packages/kernel-test-local/src/utils.test.ts b/packages/kernel-test-local/src/utils.test.ts
@@ -0,0 +1,48 @@
+import type { LogEntry } from '@metamask/logger';
+import { describe, expect, it, vi } from 'vitest';
+
+import { randomLetter } from './utils.ts';
+
+describe('utils', () => {
+  describe('randomLetter', () => {
+    it('returns a letter', () => {
+      const letters = 'abcdefghijklmnopqrstuvwxyz';
+      expect(letters).toContain(randomLetter());
+    });
+  });
+
+  describe('filterTransports', () => {
+    it('filters out only the ignored tags', async () => {
+      vi.resetModules();
+      vi.doMock('./constants.ts', async (importOriginal) => ({
+        ...(await importOriginal()),
+        IGNORE_TAGS: ['foo'],
+      }));
+      const transport = vi.fn();
+      const { filterTransports } = await import('./utils.ts');
+      const filteredTransport = filterTransports(transport);
+
+      const ignoredEntry = { level: 'debug', tags: ['foo'], message: 'test' };
+      filteredTransport(ignoredEntry as LogEntry);
+      expect(transport).not.toHaveBeenCalledWith(ignoredEntry);
+
+      const passedEntry = { level: 'debug', tags: ['bar'], message: 'test' };
+      filteredTransport(passedEntry as LogEntry);
+      expect(transport).toHaveBeenCalledWith(passedEntry);
+    });
+
+    it('filters out all tags', async () => {
+      vi.resetModules();
+      vi.doMock('./constants.ts', async (importOriginal) => ({
+        ...(await importOriginal()),
+        IGNORE_TAGS: ['all'],
+      }));
+      const transport = vi.fn();
+      const { filterTransports } = await import('./utils.ts');
+      const filteredTransport = filterTransports(transport);
+      const ignoredEntry = { level: 'debug', tags: [], message: 'test' };
+      filteredTransport(ignoredEntry as LogEntry);
+      expect(transport).not.toHaveBeenCalledWith(ignoredEntry);
+    });
+  });
+});
diff --git a/packages/kernel-test-local/src/utils.ts b/packages/kernel-test-local/src/utils.ts
@@ -1,10 +1,6 @@
 import type { LogEntry } from '@metamask/logger';
 
-// extract ignored logger tags from environment variable
-
-const ignoreTags =
-  // eslint-disable-next-line n/no-process-env
-  process?.env?.LOGGER_IGNORE?.split(',')?.map((tag) => tag.trim()) ?? [];
+import { IGNORE_TAGS } from './constants.ts';
 
 /**
  * Filter a logger transport to ignore command line specified ignore tags.
@@ -15,10 +11,10 @@ const ignoreTags =
 export const filterTransports = (
   ...transports: ((entry: LogEntry) => void)[]
 ): ((entry: LogEntry) => void) =>
-  ignoreTags.includes('all')
+  IGNORE_TAGS.includes('all')
     ? () => undefined
     : (entry) => {
-        if (ignoreTags.some((tag) => entry.tags.includes(tag))) {
+        if (IGNORE_TAGS.some((tag) => entry.tags.includes(tag))) {
           return;
         }
         transports.forEach((transport) => transport(entry));
diff --git a/packages/kernel-test-local/test/SETUP.md b/packages/kernel-test-local/test/SETUP.md
@@ -0,0 +1,43 @@
+## Setup Local Tests
+
+### Install Ollama
+
+1. Visit [https://ollama.ai](https://ollama.ai) and download Ollama for your platform
+2. Install and start the Ollama service
+3. Verify installation:
+
+   ```bash
+   curl http://localhost:11434
+   ```
+
+   You should see: `Ollama is running`
+
+### Download Required Models
+
+The tests require the following model:
+
+- `llama3.1:latest`
+
+Download the model:
+
+```bash
+ollama pull llama3.1:latest
+```
+
+Verify the model is available:
+
+```bash
+ollama list
+```
+
+You should see `llama3.1:latest` in the output.
+
+### Validate Test Framework Setup
+
+Verify the configured test framework can access the resources above.
+
+```bash
+yarn test:e2e:local -t suite
+```
+
+All suite tests should pass, with all other tests skipped.
diff --git a/packages/kernel-test-local/test/TROUBLESHOOTING.md b/packages/kernel-test-local/test/TROUBLESHOOTING.md
@@ -0,0 +1,41 @@
+## Troubleshooting
+
+### Connection refused errors
+
+- Ensure Ollama is running: `ollama serve`
+- Check port 11434 is accessible: `curl http://localhost:11434`
+- Check for conflicting processes: `lsof -i :11434`
+
+### Model not found errors
+
+- List available models: `ollama list`
+- Pull required model: `ollama pull llama3.1:latest`
+- Verify model name matches exactly (including version tag)
+
+### Timeout errors
+
+Tests may timeout if:
+
+- Model is not loaded in memory (first run after Ollama restart may be slow)
+- System resources are constrained (CPU/memory)
+- The LLM is struggling with the specific prompt
+
+Try:
+
+- Restarting Ollama: `killall ollama && ollama serve`
+- Running tests individually to isolate issues
+- Increasing available system resources
+
+### Test failures due to LLM responses
+
+These tests verify that agents can interact with language models, but the quality of responses depends on the model's capabilities. Occasional failures are expected, especially for:
+
+- Complex reasoning tasks
+- Code generation problems
+- Multi-step calculations
+
+If tests consistently fail, check:
+
+- Model is loaded correctly: `ollama ps`
+- Ollama logs for errors: Check console output from `ollama serve`
+- System has adequate resources (8GB+ RAM recommended)
diff --git a/packages/kernel-test-local/test/e2e/agents.test.ts b/packages/kernel-test-local/test/e2e/agents.test.ts
@@ -1,6 +1,11 @@
 import '@ocap/repo-tools/test-utils/mock-endoify';
 
 import { makeConsoleTransport, Logger } from '@metamask/logger';
+import type { MakeAgentArgs, Agent } from '@ocap/kernel-agents';
+import { getMoonPhase } from '@ocap/kernel-agents/capabilities/examples';
+import { count, add, multiply } from '@ocap/kernel-agents/capabilities/math';
+import { makeJsonAgent } from '@ocap/kernel-agents/json';
+import { makeReplAgent } from '@ocap/kernel-agents/repl';
 import { OllamaNodejsService } from '@ocap/kernel-language-model-service/ollama/nodejs';
 import { fetchMock } from '@ocap/repo-tools/test-utils/fetch-mock';
 import {
@@ -14,14 +19,8 @@ import {
   vi,
 } from 'vitest';
 
-import type { MakeAgentArgs } from '../../src/agent.ts';
-import { getMoonPhase } from '../../src/capabilities/examples.ts';
-import { count, add, multiply } from '../../src/capabilities/math.ts';
-import { makeJsonAgent } from '../../src/strategies/json-agent.ts';
-import { makeReplAgent } from '../../src/strategies/repl-agent.ts';
-import type { Agent } from '../../src/types.ts';
-import { DEFAULT_MODEL } from '../constants.ts';
-import { filterTransports, randomLetter } from '../utils.ts';
+import { DEFAULT_MODEL } from '../../src/constants.ts';
+import { filterTransports, randomLetter } from '../../src/utils.ts';
 
 const logger = new Logger({
   tags: ['test'],
diff --git a/packages/kernel-test-local/test/e2e/suite.test.ts b/packages/kernel-test-local/test/e2e/suite.test.ts
@@ -1,11 +1,19 @@
+/**
+ * Pre-test verification suite that checks:
+ *
+ * - Ollama service is running and accessible
+ * - Required models are available
+ *
+ * These tests run sequentially and must pass before the main test suite.
+ */
 import { fetchMock } from '@ocap/repo-tools/test-utils/fetch-mock';
 import { afterAll, beforeAll, describe, expect, it } from 'vitest';
 
 import {
   DEFAULT_MODEL,
   OLLAMA_API_BASE,
   OLLAMA_TAGS_ENDPOINT,
-} from '../constants.ts';
+} from '../../src/constants.ts';
 
 describe.sequential('test suite', () => {
   beforeAll(() => {
diff --git a/packages/kernel-test-local/tsconfig.json b/packages/kernel-test-local/tsconfig.json
@@ -0,0 +1,22 @@
+{
+  "extends": "../../tsconfig.packages.json",
+  "compilerOptions": {
+    "baseUrl": "./",
+    "lib": ["ES2022"],
+    "noEmit": true,
+    "types": ["vitest", "node"]
+  },
+  "references": [
+    { "path": "../kernel-agents" },
+    { "path": "../kernel-language-model-service" },
+    { "path": "../logger" },
+    { "path": "../repo-tools" }
+  ],
+  "include": [
+    "../../vitest.config.ts",
+    "./src",
+    "./vitest.config.ts",
+    "./vitest.config.e2e.ts",
+    "./test/e2e"
+  ]
+}
diff --git a/packages/kernel-test-local/turbo.json b/packages/kernel-test-local/turbo.json
diff --git a/packages/kernel-test-local/vitest.config.e2e.ts b/packages/kernel-test-local/vitest.config.e2e.ts
diff --git a/packages/kernel-test-local/vitest.config.ts b/packages/kernel-test-local/vitest.config.ts
diff --git a/yarn.lock b/yarn.lock