Skip to content

Commit 6f56ad6

Browse files
authored
refactor: Add kernel-test-local package for local-only E2E tests (#724)
Create a new `@ocap/kernel-test-local` package to contain E2E tests that run on dev workstations but not in CI. The primary motivation for this setup is to test agentic integrations using local language models. Adds a new `yarn test:e2e:local` command to the monorepo root. Note that `yarn test:e2e` will not run the local tests. Additional Changes: - Remove `test:e2e` script from kernel-agents - Move E2E tests from `kernel-agents/test/e2e/` to new package - Add capability exports to kernel-agents package: - Export `./capabilities/examples` (getMoonPhase, search) - Export `./capabilities/math` (count, add, multiply) - Export Agent and MakeAgentArgs types from main index Closes #723
1 parent be89d42 commit 6f56ad6

File tree

17 files changed

+370
-25
lines changed

17 files changed

+370
-25
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
"test:dev": "yarn test --mode development --reporter dot",
3535
"test:e2e": "yarn workspaces foreach --all run test:e2e",
3636
"test:e2e:ci": "yarn workspaces foreach --all run test:e2e:ci",
37+
"test:e2e:local": "yarn workspaces foreach --all run test:e2e:local",
3738
"test:verbose": "yarn test --reporter verbose",
3839
"test:watch": "vitest",
3940
"why:batch": "./scripts/why-batch.sh"

packages/kernel-agents/package.json

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,27 @@
4343
"default": "./dist/strategies/repl-agent.cjs"
4444
}
4545
},
46-
"./package.json": "./package.json"
46+
"./package.json": "./package.json",
47+
"./capabilities/examples": {
48+
"import": {
49+
"types": "./dist/capabilities/examples.d.mts",
50+
"default": "./dist/capabilities/examples.mjs"
51+
},
52+
"require": {
53+
"types": "./dist/capabilities/examples.d.cts",
54+
"default": "./dist/capabilities/examples.cjs"
55+
}
56+
},
57+
"./capabilities/math": {
58+
"import": {
59+
"types": "./dist/capabilities/math.d.mts",
60+
"default": "./dist/capabilities/math.mjs"
61+
},
62+
"require": {
63+
"types": "./dist/capabilities/math.d.cts",
64+
"default": "./dist/capabilities/math.cjs"
65+
}
66+
}
4767
},
4868
"files": [
4969
"dist/"
@@ -60,7 +80,6 @@
6080
"lint:misc": "prettier --no-error-on-unmatched-pattern '**/*.json' '**/*.md' '**/*.html' '!**/CHANGELOG.old.md' '**/*.yml' '!.yarnrc.yml' '!merged-packages/**' --ignore-path ../../.gitignore --log-level error",
6181
"publish:preview": "yarn npm publish --tag preview",
6282
"test": "vitest run --config vitest.config.ts",
63-
"test:e2e": "vitest run --config vitest.config.e2e.ts",
6483
"test:clean": "yarn test --no-cache --coverage.clean",
6584
"test:dev": "yarn test --mode development --reporter dot",
6685
"test:verbose": "yarn test --reporter verbose",
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
1+
export type { Agent } from './types/agent.ts';
12
export type { CapabilityRecord } from './types.ts';
3+
export type { MakeAgentArgs } from './agent.ts';
24
export { discover } from './capabilities/discover.ts';
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# `@ocap/kernel-test-local`
2+
3+
Local-only E2E tests that use a locally hosted language model.
4+
5+
## Overview
6+
7+
This package contains E2E tests that require a running Ollama instance with specific models installed. These tests are **not run in CI** and are intended for local development and validation only. Because language model outputs are inherently non-deterministic, these tests may occasionally fail even when the implementation is correct.
8+
9+
## Setup
10+
11+
[SETUP.md](./test/SETUP.md)
12+
13+
## Running Tests
14+
15+
From the repository root:
16+
17+
```bash
18+
yarn test:e2e:local
19+
```
20+
21+
From this package directory:
22+
23+
```bash
24+
yarn test:e2e:local
25+
```
26+
27+
## Troubleshooting
28+
29+
[TROUBLESHOOTING.md](./test/TROUBLESHOOTING.md)
30+
31+
## Contributing
32+
33+
This package is part of the ocap-kernel monorepo. For contributing guidelines, see the [main repository README](https://github.com/MetaMask/ocap-kernel#readme).
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
{
2+
"name": "@ocap/kernel-test-local",
3+
"version": "0.0.0",
4+
"private": true,
5+
"description": "Local-only E2E tests for kernel agents requiring external dependencies (Ollama)",
6+
"homepage": "https://github.com/MetaMask/ocap-kernel/tree/main/packages/kernel-test-local#readme",
7+
"bugs": {
8+
"url": "https://github.com/MetaMask/ocap-kernel/issues"
9+
},
10+
"repository": {
11+
"type": "git",
12+
"url": "https://github.com/MetaMask/ocap-kernel.git"
13+
},
14+
"type": "module",
15+
"scripts": {
16+
"clean": "rimraf --glob './*.tsbuildinfo' ./.eslintcache ./coverage ./.turbo",
17+
"lint": "yarn lint:eslint && yarn lint:misc --check && yarn constraints && yarn lint:dependencies",
18+
"lint:dependencies": "depcheck --quiet",
19+
"lint:eslint": "eslint . --cache",
20+
"lint:fix": "yarn lint:eslint --fix && yarn lint:misc --write && yarn constraints --fix && yarn lint:dependencies",
21+
"lint:misc": "prettier --no-error-on-unmatched-pattern '**/*.json' '**/*.md' '**/*.html' '!**/CHANGELOG.old.md' '**/*.yml' '!.yarnrc.yml' '!merged-packages/**' --ignore-path ../../.gitignore --log-level error",
22+
"build:docs": "typedoc",
23+
"test": "vitest run --config vitest.config.ts",
24+
"test:e2e:local": "vitest run --config vitest.config.e2e.ts",
25+
"test:clean": "yarn test --no-cache --coverage.clean",
26+
"test:dev": "yarn test --mode development --reporter dot",
27+
"test:verbose": "yarn test --reporter verbose",
28+
"test:watch": "vitest --config vitest.config.ts"
29+
},
30+
"dependencies": {
31+
"@metamask/logger": "workspace:^",
32+
"@ocap/kernel-agents": "workspace:^",
33+
"@ocap/kernel-language-model-service": "workspace:^",
34+
"@ocap/repo-tools": "workspace:^"
35+
},
36+
"devDependencies": {
37+
"@arethetypeswrong/cli": "^0.17.4",
38+
"@metamask/eslint-config": "^14.0.0",
39+
"@metamask/eslint-config-nodejs": "^14.0.0",
40+
"@metamask/eslint-config-typescript": "^14.0.0",
41+
"@types/node": "^22.13.1",
42+
"@typescript-eslint/eslint-plugin": "^8.29.0",
43+
"@typescript-eslint/parser": "^8.29.0",
44+
"@typescript-eslint/utils": "^8.29.0",
45+
"@vitest/eslint-plugin": "^1.6.5",
46+
"depcheck": "^1.4.7",
47+
"eslint": "^9.23.0",
48+
"eslint-config-prettier": "^10.1.1",
49+
"eslint-import-resolver-typescript": "^4.3.1",
50+
"eslint-plugin-import-x": "^4.10.0",
51+
"eslint-plugin-jsdoc": "^50.6.9",
52+
"eslint-plugin-n": "^17.17.0",
53+
"eslint-plugin-prettier": "^5.2.6",
54+
"eslint-plugin-promise": "^7.2.1",
55+
"prettier": "^3.5.3",
56+
"rimraf": "^6.0.1",
57+
"turbo": "^2.5.6",
58+
"typescript": "~5.8.2",
59+
"typescript-eslint": "^8.29.0",
60+
"vite": "^7.3.0",
61+
"vitest": "^4.0.16"
62+
},
63+
"engines": {
64+
"node": "^20.11 || >=22"
65+
},
66+
"exports": {
67+
"./package.json": "./package.json"
68+
}
69+
}

packages/kernel-agents/test/constants.ts renamed to packages/kernel-test-local/src/constants.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,12 @@ export const TEST_MODELS = ['llama3.1:latest', 'gpt-oss:20b'];
99
*/
1010
export const OLLAMA_API_BASE = 'http://localhost:11434';
1111
export const OLLAMA_TAGS_ENDPOINT = `${OLLAMA_API_BASE}/api/tags`;
12+
13+
// extract ignored logger tags from environment variable
14+
15+
/**
16+
* The tags to ignore for the local tests.
17+
*/
18+
export const IGNORE_TAGS =
19+
// eslint-disable-next-line n/no-process-env
20+
process?.env?.LOGGER_IGNORE?.split(',')?.map((tag) => tag.trim()) ?? [];
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import type { LogEntry } from '@metamask/logger';
2+
import { describe, expect, it, vi } from 'vitest';
3+
4+
import { randomLetter } from './utils.ts';
5+
6+
describe('utils', () => {
7+
describe('randomLetter', () => {
8+
it('returns a letter', () => {
9+
const letters = 'abcdefghijklmnopqrstuvwxyz';
10+
expect(letters).toContain(randomLetter());
11+
});
12+
});
13+
14+
describe('filterTransports', () => {
15+
it('filters out only the ignored tags', async () => {
16+
vi.resetModules();
17+
vi.doMock('./constants.ts', async (importOriginal) => ({
18+
...(await importOriginal()),
19+
IGNORE_TAGS: ['foo'],
20+
}));
21+
const transport = vi.fn();
22+
const { filterTransports } = await import('./utils.ts');
23+
const filteredTransport = filterTransports(transport);
24+
25+
const ignoredEntry = { level: 'debug', tags: ['foo'], message: 'test' };
26+
filteredTransport(ignoredEntry as LogEntry);
27+
expect(transport).not.toHaveBeenCalledWith(ignoredEntry);
28+
29+
const passedEntry = { level: 'debug', tags: ['bar'], message: 'test' };
30+
filteredTransport(passedEntry as LogEntry);
31+
expect(transport).toHaveBeenCalledWith(passedEntry);
32+
});
33+
34+
it('filters out all tags', async () => {
35+
vi.resetModules();
36+
vi.doMock('./constants.ts', async (importOriginal) => ({
37+
...(await importOriginal()),
38+
IGNORE_TAGS: ['all'],
39+
}));
40+
const transport = vi.fn();
41+
const { filterTransports } = await import('./utils.ts');
42+
const filteredTransport = filterTransports(transport);
43+
const ignoredEntry = { level: 'debug', tags: [], message: 'test' };
44+
filteredTransport(ignoredEntry as LogEntry);
45+
expect(transport).not.toHaveBeenCalledWith(ignoredEntry);
46+
});
47+
});
48+
});
Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
import type { LogEntry } from '@metamask/logger';
22

3-
// extract ignored logger tags from environment variable
4-
5-
const ignoreTags =
6-
// eslint-disable-next-line n/no-process-env
7-
process?.env?.LOGGER_IGNORE?.split(',')?.map((tag) => tag.trim()) ?? [];
3+
import { IGNORE_TAGS } from './constants.ts';
84

95
/**
106
* Filter a logger transport to ignore command line specified ignore tags.
@@ -15,10 +11,10 @@ const ignoreTags =
1511
export const filterTransports = (
1612
...transports: ((entry: LogEntry) => void)[]
1713
): ((entry: LogEntry) => void) =>
18-
ignoreTags.includes('all')
14+
IGNORE_TAGS.includes('all')
1915
? () => undefined
2016
: (entry) => {
21-
if (ignoreTags.some((tag) => entry.tags.includes(tag))) {
17+
if (IGNORE_TAGS.some((tag) => entry.tags.includes(tag))) {
2218
return;
2319
}
2420
transports.forEach((transport) => transport(entry));
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
## Setup Local Tests
2+
3+
### Install Ollama
4+
5+
1. Visit [https://ollama.ai](https://ollama.ai) and download Ollama for your platform
6+
2. Install and start the Ollama service
7+
3. Verify installation:
8+
9+
```bash
10+
curl http://localhost:11434
11+
```
12+
13+
You should see: `Ollama is running`
14+
15+
### Download Required Models
16+
17+
The tests require the following model:
18+
19+
- `llama3.1:latest`
20+
21+
Download the model:
22+
23+
```bash
24+
ollama pull llama3.1:latest
25+
```
26+
27+
Verify the model is available:
28+
29+
```bash
30+
ollama list
31+
```
32+
33+
You should see `llama3.1:latest` in the output.
34+
35+
### Validate Test Framework Setup
36+
37+
Verify the configured test framework can access the resources above.
38+
39+
```bash
40+
yarn test:e2e:local -t suite
41+
```
42+
43+
All suite tests should pass, with all other tests skipped.
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
## Troubleshooting
2+
3+
### Connection refused errors
4+
5+
- Ensure Ollama is running: `ollama serve`
6+
- Check port 11434 is accessible: `curl http://localhost:11434`
7+
- Check for conflicting processes: `lsof -i :11434`
8+
9+
### Model not found errors
10+
11+
- List available models: `ollama list`
12+
- Pull required model: `ollama pull llama3.1:latest`
13+
- Verify model name matches exactly (including version tag)
14+
15+
### Timeout errors
16+
17+
Tests may timeout if:
18+
19+
- Model is not loaded in memory (first run after Ollama restart may be slow)
20+
- System resources are constrained (CPU/memory)
21+
- The LLM is struggling with the specific prompt
22+
23+
Try:
24+
25+
- Restarting Ollama: `killall ollama && ollama serve`
26+
- Running tests individually to isolate issues
27+
- Increasing available system resources
28+
29+
### Test failures due to LLM responses
30+
31+
These tests verify that agents can interact with language models, but the quality of responses depends on the model's capabilities. Occasional failures are expected, especially for:
32+
33+
- Complex reasoning tasks
34+
- Code generation problems
35+
- Multi-step calculations
36+
37+
If tests consistently fail, check:
38+
39+
- Model is loaded correctly: `ollama ps`
40+
- Ollama logs for errors: Check console output from `ollama serve`
41+
- System has adequate resources (8GB+ RAM recommended)

0 commit comments

Comments
 (0)