Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"test:dev": "yarn test --mode development --reporter dot",
"test:e2e": "yarn workspaces foreach --all run test:e2e",
"test:e2e:ci": "yarn workspaces foreach --all run test:e2e:ci",
"test:e2e:local": "yarn workspaces foreach --all run test:e2e:local",
"test:verbose": "yarn test --reporter verbose",
"test:watch": "vitest",
"why:batch": "./scripts/why-batch.sh"
Expand Down
23 changes: 21 additions & 2 deletions packages/kernel-agents/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,27 @@
"default": "./dist/strategies/repl-agent.cjs"
}
},
"./package.json": "./package.json"
"./package.json": "./package.json",
"./capabilities/examples": {
"import": {
"types": "./dist/capabilities/examples.d.mts",
"default": "./dist/capabilities/examples.mjs"
},
"require": {
"types": "./dist/capabilities/examples.d.cts",
"default": "./dist/capabilities/examples.cjs"
}
},
"./capabilities/math": {
"import": {
"types": "./dist/capabilities/math.d.mts",
"default": "./dist/capabilities/math.mjs"
},
"require": {
"types": "./dist/capabilities/math.d.cts",
"default": "./dist/capabilities/math.cjs"
}
}
},
"files": [
"dist/"
Expand All @@ -60,7 +80,6 @@
"lint:misc": "prettier --no-error-on-unmatched-pattern '**/*.json' '**/*.md' '**/*.html' '!**/CHANGELOG.old.md' '**/*.yml' '!.yarnrc.yml' '!merged-packages/**' --ignore-path ../../.gitignore --log-level error",
"publish:preview": "yarn npm publish --tag preview",
"test": "vitest run --config vitest.config.ts",
"test:e2e": "vitest run --config vitest.config.e2e.ts",
"test:clean": "yarn test --no-cache --coverage.clean",
"test:dev": "yarn test --mode development --reporter dot",
"test:verbose": "yarn test --reporter verbose",
Expand Down
2 changes: 2 additions & 0 deletions packages/kernel-agents/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
export type { Agent } from './types/agent.ts';
export type { CapabilityRecord } from './types.ts';
export type { MakeAgentArgs } from './agent.ts';
export { discover } from './capabilities/discover.ts';
33 changes: 33 additions & 0 deletions packages/kernel-test-local/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# `@ocap/kernel-test-local`

Local-only E2E tests that use a locally hosted language model.

## Overview

This package contains E2E tests that require a running Ollama instance with specific models installed. These tests are **not run in CI** and are intended for local development and validation only. Because language model outputs are inherently non-deterministic, these tests may occasionally fail even when the implementation is correct.

## Setup

[SETUP.md](./test/SETUP.md)

## Running Tests

From the repository root:

```bash
yarn test:e2e:local
```

From this package directory:

```bash
yarn test:e2e:local
```

## Troubleshooting

[TROUBLESHOOTING.md](./test/TROUBLESHOOTING.md)

## Contributing

This package is part of the ocap-kernel monorepo. For contributing guidelines, see the [main repository README](https://github.com/MetaMask/ocap-kernel#readme).
69 changes: 69 additions & 0 deletions packages/kernel-test-local/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
{
"name": "@ocap/kernel-test-local",
"version": "0.0.0",
"private": true,
"description": "Local-only E2E tests for kernel agents requiring external dependencies (Ollama)",
"homepage": "https://github.com/MetaMask/ocap-kernel/tree/main/packages/kernel-test-local#readme",
"bugs": {
"url": "https://github.com/MetaMask/ocap-kernel/issues"
},
"repository": {
"type": "git",
"url": "https://github.com/MetaMask/ocap-kernel.git"
},
"type": "module",
"scripts": {
"clean": "rimraf --glob './*.tsbuildinfo' ./.eslintcache ./coverage ./.turbo",
"lint": "yarn lint:eslint && yarn lint:misc --check && yarn constraints && yarn lint:dependencies",
"lint:dependencies": "depcheck --quiet",
"lint:eslint": "eslint . --cache",
"lint:fix": "yarn lint:eslint --fix && yarn lint:misc --write && yarn constraints --fix && yarn lint:dependencies",
"lint:misc": "prettier --no-error-on-unmatched-pattern '**/*.json' '**/*.md' '**/*.html' '!**/CHANGELOG.old.md' '**/*.yml' '!.yarnrc.yml' '!merged-packages/**' --ignore-path ../../.gitignore --log-level error",
"build:docs": "typedoc",
"test": "vitest run --config vitest.config.ts",
"test:e2e:local": "vitest run --config vitest.config.e2e.ts",
"test:clean": "yarn test --no-cache --coverage.clean",
"test:dev": "yarn test --mode development --reporter dot",
"test:verbose": "yarn test --reporter verbose",
"test:watch": "vitest --config vitest.config.ts"
},
"dependencies": {
"@metamask/logger": "workspace:^",
"@ocap/kernel-agents": "workspace:^",
"@ocap/kernel-language-model-service": "workspace:^",
"@ocap/repo-tools": "workspace:^"
},
"devDependencies": {
"@arethetypeswrong/cli": "^0.17.4",
"@metamask/eslint-config": "^14.0.0",
"@metamask/eslint-config-nodejs": "^14.0.0",
"@metamask/eslint-config-typescript": "^14.0.0",
"@types/node": "^22.13.1",
"@typescript-eslint/eslint-plugin": "^8.29.0",
"@typescript-eslint/parser": "^8.29.0",
"@typescript-eslint/utils": "^8.29.0",
"@vitest/eslint-plugin": "^1.6.5",
"depcheck": "^1.4.7",
"eslint": "^9.23.0",
"eslint-config-prettier": "^10.1.1",
"eslint-import-resolver-typescript": "^4.3.1",
"eslint-plugin-import-x": "^4.10.0",
"eslint-plugin-jsdoc": "^50.6.9",
"eslint-plugin-n": "^17.17.0",
"eslint-plugin-prettier": "^5.2.6",
"eslint-plugin-promise": "^7.2.1",
"prettier": "^3.5.3",
"rimraf": "^6.0.1",
"turbo": "^2.5.6",
"typescript": "~5.8.2",
"typescript-eslint": "^8.29.0",
"vite": "^7.3.0",
"vitest": "^4.0.16"
},
"engines": {
"node": "^20.11 || >=22"
},
"exports": {
"./package.json": "./package.json"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,12 @@ export const TEST_MODELS = ['llama3.1:latest', 'gpt-oss:20b'];
*/
export const OLLAMA_API_BASE = 'http://localhost:11434';
export const OLLAMA_TAGS_ENDPOINT = `${OLLAMA_API_BASE}/api/tags`;

// extract ignored logger tags from environment variable

/**
* The tags to ignore for the local tests.
*/
export const IGNORE_TAGS =
// eslint-disable-next-line n/no-process-env
process?.env?.LOGGER_IGNORE?.split(',')?.map((tag) => tag.trim()) ?? [];
48 changes: 48 additions & 0 deletions packages/kernel-test-local/src/utils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import type { LogEntry } from '@metamask/logger';
import { describe, expect, it, vi } from 'vitest';

import { randomLetter } from './utils.ts';

describe('utils', () => {
describe('randomLetter', () => {
it('returns a letter', () => {
const letters = 'abcdefghijklmnopqrstuvwxyz';
expect(letters).toContain(randomLetter());
});
});

describe('filterTransports', () => {
it('filters out only the ignored tags', async () => {
vi.resetModules();
vi.doMock('./constants.ts', async (importOriginal) => ({
...(await importOriginal()),
IGNORE_TAGS: ['foo'],
}));
const transport = vi.fn();
const { filterTransports } = await import('./utils.ts');
const filteredTransport = filterTransports(transport);

const ignoredEntry = { level: 'debug', tags: ['foo'], message: 'test' };
filteredTransport(ignoredEntry as LogEntry);
expect(transport).not.toHaveBeenCalledWith(ignoredEntry);

const passedEntry = { level: 'debug', tags: ['bar'], message: 'test' };
filteredTransport(passedEntry as LogEntry);
expect(transport).toHaveBeenCalledWith(passedEntry);
});

it('filters out all tags', async () => {
vi.resetModules();
vi.doMock('./constants.ts', async (importOriginal) => ({
...(await importOriginal()),
IGNORE_TAGS: ['all'],
}));
const transport = vi.fn();
const { filterTransports } = await import('./utils.ts');
const filteredTransport = filterTransports(transport);
const ignoredEntry = { level: 'debug', tags: [], message: 'test' };
filteredTransport(ignoredEntry as LogEntry);
expect(transport).not.toHaveBeenCalledWith(ignoredEntry);
});
});
});
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
import type { LogEntry } from '@metamask/logger';

// extract ignored logger tags from environment variable

const ignoreTags =
// eslint-disable-next-line n/no-process-env
process?.env?.LOGGER_IGNORE?.split(',')?.map((tag) => tag.trim()) ?? [];
import { IGNORE_TAGS } from './constants.ts';

/**
* Filter a logger transport to ignore command line specified ignore tags.
Expand All @@ -15,10 +11,10 @@ const ignoreTags =
export const filterTransports = (
...transports: ((entry: LogEntry) => void)[]
): ((entry: LogEntry) => void) =>
ignoreTags.includes('all')
IGNORE_TAGS.includes('all')
? () => undefined
: (entry) => {
if (ignoreTags.some((tag) => entry.tags.includes(tag))) {
if (IGNORE_TAGS.some((tag) => entry.tags.includes(tag))) {
return;
}
transports.forEach((transport) => transport(entry));
Expand Down
43 changes: 43 additions & 0 deletions packages/kernel-test-local/test/SETUP.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
## Setup Local Tests

### Install Ollama

1. Visit [https://ollama.ai](https://ollama.ai) and download Ollama for your platform
2. Install and start the Ollama service
3. Verify installation:

```bash
curl http://localhost:11434
```

You should see: `Ollama is running`

### Download Required Models

The tests require the following model:

- `llama3.1:latest`

Download the model:

```bash
ollama pull llama3.1:latest
```

Verify the model is available:

```bash
ollama list
```

You should see `llama3.1:latest` in the output.

### Validate Test Framework Setup

Verify the configured test framework can access the resources above.

```bash
yarn test:e2e:local -t suite
```

All suite tests should pass, with all other tests skipped.
41 changes: 41 additions & 0 deletions packages/kernel-test-local/test/TROUBLESHOOTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
## Troubleshooting

### Connection refused errors

- Ensure Ollama is running: `ollama serve`
- Check port 11434 is accessible: `curl http://localhost:11434`
- Check for conflicting processes: `lsof -i :11434`

### Model not found errors

- List available models: `ollama list`
- Pull required model: `ollama pull llama3.1:latest`
- Verify model name matches exactly (including version tag)

### Timeout errors

Tests may timeout if:

- Model is not loaded in memory (first run after Ollama restart may be slow)
- System resources are constrained (CPU/memory)
- The LLM is struggling with the specific prompt

Try:

- Restarting Ollama: `killall ollama && ollama serve`
- Running tests individually to isolate issues
- Increasing available system resources

### Test failures due to LLM responses

These tests verify that agents can interact with language models, but the quality of responses depends on the model's capabilities. Occasional failures are expected, especially for:

- Complex reasoning tasks
- Code generation problems
- Multi-step calculations

If tests consistently fail, check:

- Model is loaded correctly: `ollama ps`
- Ollama logs for errors: Check console output from `ollama serve`
- System has adequate resources (8GB+ RAM recommended)
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import '@ocap/repo-tools/test-utils/mock-endoify';

import { makeConsoleTransport, Logger } from '@metamask/logger';
import type { MakeAgentArgs, Agent } from '@ocap/kernel-agents';
import { getMoonPhase } from '@ocap/kernel-agents/capabilities/examples';
import { count, add, multiply } from '@ocap/kernel-agents/capabilities/math';
import { makeJsonAgent } from '@ocap/kernel-agents/json';
import { makeReplAgent } from '@ocap/kernel-agents/repl';
import { OllamaNodejsService } from '@ocap/kernel-language-model-service/ollama/nodejs';
import { fetchMock } from '@ocap/repo-tools/test-utils/fetch-mock';
import {
Expand All @@ -14,14 +19,8 @@ import {
vi,
} from 'vitest';

import type { MakeAgentArgs } from '../../src/agent.ts';
import { getMoonPhase } from '../../src/capabilities/examples.ts';
import { count, add, multiply } from '../../src/capabilities/math.ts';
import { makeJsonAgent } from '../../src/strategies/json-agent.ts';
import { makeReplAgent } from '../../src/strategies/repl-agent.ts';
import type { Agent } from '../../src/types.ts';
import { DEFAULT_MODEL } from '../constants.ts';
import { filterTransports, randomLetter } from '../utils.ts';
import { DEFAULT_MODEL } from '../../src/constants.ts';
import { filterTransports, randomLetter } from '../../src/utils.ts';

const logger = new Logger({
tags: ['test'],
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
/**
* Pre-test verification suite that checks:
*
* - Ollama service is running and accessible
* - Required models are available
*
* These tests run sequentially and must pass before the main test suite.
*/
import { fetchMock } from '@ocap/repo-tools/test-utils/fetch-mock';
import { afterAll, beforeAll, describe, expect, it } from 'vitest';

import {
DEFAULT_MODEL,
OLLAMA_API_BASE,
OLLAMA_TAGS_ENDPOINT,
} from '../constants.ts';
} from '../../src/constants.ts';

describe.sequential('test suite', () => {
beforeAll(() => {
Expand Down
22 changes: 22 additions & 0 deletions packages/kernel-test-local/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"extends": "../../tsconfig.packages.json",
"compilerOptions": {
"baseUrl": "./",
"lib": ["ES2022"],
"noEmit": true,
"types": ["vitest", "node"]
},
"references": [
{ "path": "../kernel-agents" },
{ "path": "../kernel-language-model-service" },
{ "path": "../logger" },
{ "path": "../repo-tools" }
],
"include": [
"../../vitest.config.ts",
"./src",
"./vitest.config.ts",
"./vitest.config.e2e.ts",
"./test/e2e"
]
}
Loading
Loading