diff --git a/package.json b/package.json index e7c51cb45..311e4ca2f 100644 --- a/package.json +++ b/package.json @@ -34,6 +34,7 @@ "test:dev": "yarn test --mode development --reporter dot", "test:e2e": "yarn workspaces foreach --all run test:e2e", "test:e2e:ci": "yarn workspaces foreach --all run test:e2e:ci", + "test:e2e:local": "yarn workspaces foreach --all run test:e2e:local", "test:verbose": "yarn test --reporter verbose", "test:watch": "vitest", "why:batch": "./scripts/why-batch.sh" diff --git a/packages/kernel-agents/package.json b/packages/kernel-agents/package.json index 8af785726..9659c5013 100644 --- a/packages/kernel-agents/package.json +++ b/packages/kernel-agents/package.json @@ -43,7 +43,27 @@ "default": "./dist/strategies/repl-agent.cjs" } }, - "./package.json": "./package.json" + "./package.json": "./package.json", + "./capabilities/examples": { + "import": { + "types": "./dist/capabilities/examples.d.mts", + "default": "./dist/capabilities/examples.mjs" + }, + "require": { + "types": "./dist/capabilities/examples.d.cts", + "default": "./dist/capabilities/examples.cjs" + } + }, + "./capabilities/math": { + "import": { + "types": "./dist/capabilities/math.d.mts", + "default": "./dist/capabilities/math.mjs" + }, + "require": { + "types": "./dist/capabilities/math.d.cts", + "default": "./dist/capabilities/math.cjs" + } + } }, "files": [ "dist/" @@ -60,7 +80,6 @@ "lint:misc": "prettier --no-error-on-unmatched-pattern '**/*.json' '**/*.md' '**/*.html' '!**/CHANGELOG.old.md' '**/*.yml' '!.yarnrc.yml' '!merged-packages/**' --ignore-path ../../.gitignore --log-level error", "publish:preview": "yarn npm publish --tag preview", "test": "vitest run --config vitest.config.ts", - "test:e2e": "vitest run --config vitest.config.e2e.ts", "test:clean": "yarn test --no-cache --coverage.clean", "test:dev": "yarn test --mode development --reporter dot", "test:verbose": "yarn test --reporter verbose", diff --git a/packages/kernel-agents/src/index.ts b/packages/kernel-agents/src/index.ts index 5860af6e6..5df9d5ea6 100644 --- a/packages/kernel-agents/src/index.ts +++ b/packages/kernel-agents/src/index.ts @@ -1,2 +1,4 @@ +export type { Agent } from './types/agent.ts'; export type { CapabilityRecord } from './types.ts'; +export type { MakeAgentArgs } from './agent.ts'; export { discover } from './capabilities/discover.ts'; diff --git a/packages/kernel-test-local/README.md b/packages/kernel-test-local/README.md new file mode 100644 index 000000000..40fd33ed7 --- /dev/null +++ b/packages/kernel-test-local/README.md @@ -0,0 +1,33 @@ +# `@ocap/kernel-test-local` + +Local-only E2E tests that use a locally hosted language model. + +## Overview + +This package contains E2E tests that require a running Ollama instance with specific models installed. These tests are **not run in CI** and are intended for local development and validation only. Because language model outputs are inherently non-deterministic, these tests may occasionally fail even when the implementation is correct. + +## Setup + +[SETUP.md](./test/SETUP.md) + +## Running Tests + +From the repository root: + +```bash +yarn test:e2e:local +``` + +From this package directory: + +```bash +yarn test:e2e:local +``` + +## Troubleshooting + +[TROUBLESHOOTING.md](./test/TROUBLESHOOTING.md) + +## Contributing + +This package is part of the ocap-kernel monorepo. For contributing guidelines, see the [main repository README](https://github.com/MetaMask/ocap-kernel#readme). diff --git a/packages/kernel-test-local/package.json b/packages/kernel-test-local/package.json new file mode 100644 index 000000000..a85ae8acd --- /dev/null +++ b/packages/kernel-test-local/package.json @@ -0,0 +1,69 @@ +{ + "name": "@ocap/kernel-test-local", + "version": "0.0.0", + "private": true, + "description": "Local-only E2E tests for kernel agents requiring external dependencies (Ollama)", + "homepage": "https://github.com/MetaMask/ocap-kernel/tree/main/packages/kernel-test-local#readme", + "bugs": { + "url": "https://github.com/MetaMask/ocap-kernel/issues" + }, + "repository": { + "type": "git", + "url": "https://github.com/MetaMask/ocap-kernel.git" + }, + "type": "module", + "scripts": { + "clean": "rimraf --glob './*.tsbuildinfo' ./.eslintcache ./coverage ./.turbo", + "lint": "yarn lint:eslint && yarn lint:misc --check && yarn constraints && yarn lint:dependencies", + "lint:dependencies": "depcheck --quiet", + "lint:eslint": "eslint . --cache", + "lint:fix": "yarn lint:eslint --fix && yarn lint:misc --write && yarn constraints --fix && yarn lint:dependencies", + "lint:misc": "prettier --no-error-on-unmatched-pattern '**/*.json' '**/*.md' '**/*.html' '!**/CHANGELOG.old.md' '**/*.yml' '!.yarnrc.yml' '!merged-packages/**' --ignore-path ../../.gitignore --log-level error", + "build:docs": "typedoc", + "test": "vitest run --config vitest.config.ts", + "test:e2e:local": "vitest run --config vitest.config.e2e.ts", + "test:clean": "yarn test --no-cache --coverage.clean", + "test:dev": "yarn test --mode development --reporter dot", + "test:verbose": "yarn test --reporter verbose", + "test:watch": "vitest --config vitest.config.ts" + }, + "dependencies": { + "@metamask/logger": "workspace:^", + "@ocap/kernel-agents": "workspace:^", + "@ocap/kernel-language-model-service": "workspace:^", + "@ocap/repo-tools": "workspace:^" + }, + "devDependencies": { + "@arethetypeswrong/cli": "^0.17.4", + "@metamask/eslint-config": "^14.0.0", + "@metamask/eslint-config-nodejs": "^14.0.0", + "@metamask/eslint-config-typescript": "^14.0.0", + "@types/node": "^22.13.1", + "@typescript-eslint/eslint-plugin": "^8.29.0", + "@typescript-eslint/parser": "^8.29.0", + "@typescript-eslint/utils": "^8.29.0", + "@vitest/eslint-plugin": "^1.6.5", + "depcheck": "^1.4.7", + "eslint": "^9.23.0", + "eslint-config-prettier": "^10.1.1", + "eslint-import-resolver-typescript": "^4.3.1", + "eslint-plugin-import-x": "^4.10.0", + "eslint-plugin-jsdoc": "^50.6.9", + "eslint-plugin-n": "^17.17.0", + "eslint-plugin-prettier": "^5.2.6", + "eslint-plugin-promise": "^7.2.1", + "prettier": "^3.5.3", + "rimraf": "^6.0.1", + "turbo": "^2.5.6", + "typescript": "~5.8.2", + "typescript-eslint": "^8.29.0", + "vite": "^7.3.0", + "vitest": "^4.0.16" + }, + "engines": { + "node": "^20.11 || >=22" + }, + "exports": { + "./package.json": "./package.json" + } +} diff --git a/packages/kernel-agents/test/constants.ts b/packages/kernel-test-local/src/constants.ts similarity index 54% rename from packages/kernel-agents/test/constants.ts rename to packages/kernel-test-local/src/constants.ts index bfb5ac9d5..86b329e2e 100644 --- a/packages/kernel-agents/test/constants.ts +++ b/packages/kernel-test-local/src/constants.ts @@ -9,3 +9,12 @@ export const TEST_MODELS = ['llama3.1:latest', 'gpt-oss:20b']; */ export const OLLAMA_API_BASE = 'http://localhost:11434'; export const OLLAMA_TAGS_ENDPOINT = `${OLLAMA_API_BASE}/api/tags`; + +// extract ignored logger tags from environment variable + +/** + * The tags to ignore for the local tests. + */ +export const IGNORE_TAGS = + // eslint-disable-next-line n/no-process-env + process?.env?.LOGGER_IGNORE?.split(',')?.map((tag) => tag.trim()) ?? []; diff --git a/packages/kernel-test-local/src/utils.test.ts b/packages/kernel-test-local/src/utils.test.ts new file mode 100644 index 000000000..1f4346ef4 --- /dev/null +++ b/packages/kernel-test-local/src/utils.test.ts @@ -0,0 +1,48 @@ +import type { LogEntry } from '@metamask/logger'; +import { describe, expect, it, vi } from 'vitest'; + +import { randomLetter } from './utils.ts'; + +describe('utils', () => { + describe('randomLetter', () => { + it('returns a letter', () => { + const letters = 'abcdefghijklmnopqrstuvwxyz'; + expect(letters).toContain(randomLetter()); + }); + }); + + describe('filterTransports', () => { + it('filters out only the ignored tags', async () => { + vi.resetModules(); + vi.doMock('./constants.ts', async (importOriginal) => ({ + ...(await importOriginal()), + IGNORE_TAGS: ['foo'], + })); + const transport = vi.fn(); + const { filterTransports } = await import('./utils.ts'); + const filteredTransport = filterTransports(transport); + + const ignoredEntry = { level: 'debug', tags: ['foo'], message: 'test' }; + filteredTransport(ignoredEntry as LogEntry); + expect(transport).not.toHaveBeenCalledWith(ignoredEntry); + + const passedEntry = { level: 'debug', tags: ['bar'], message: 'test' }; + filteredTransport(passedEntry as LogEntry); + expect(transport).toHaveBeenCalledWith(passedEntry); + }); + + it('filters out all tags', async () => { + vi.resetModules(); + vi.doMock('./constants.ts', async (importOriginal) => ({ + ...(await importOriginal()), + IGNORE_TAGS: ['all'], + })); + const transport = vi.fn(); + const { filterTransports } = await import('./utils.ts'); + const filteredTransport = filterTransports(transport); + const ignoredEntry = { level: 'debug', tags: [], message: 'test' }; + filteredTransport(ignoredEntry as LogEntry); + expect(transport).not.toHaveBeenCalledWith(ignoredEntry); + }); + }); +}); diff --git a/packages/kernel-agents/test/utils.ts b/packages/kernel-test-local/src/utils.ts similarity index 69% rename from packages/kernel-agents/test/utils.ts rename to packages/kernel-test-local/src/utils.ts index 684077b57..219200830 100644 --- a/packages/kernel-agents/test/utils.ts +++ b/packages/kernel-test-local/src/utils.ts @@ -1,10 +1,6 @@ import type { LogEntry } from '@metamask/logger'; -// extract ignored logger tags from environment variable - -const ignoreTags = - // eslint-disable-next-line n/no-process-env - process?.env?.LOGGER_IGNORE?.split(',')?.map((tag) => tag.trim()) ?? []; +import { IGNORE_TAGS } from './constants.ts'; /** * Filter a logger transport to ignore command line specified ignore tags. @@ -15,10 +11,10 @@ const ignoreTags = export const filterTransports = ( ...transports: ((entry: LogEntry) => void)[] ): ((entry: LogEntry) => void) => - ignoreTags.includes('all') + IGNORE_TAGS.includes('all') ? () => undefined : (entry) => { - if (ignoreTags.some((tag) => entry.tags.includes(tag))) { + if (IGNORE_TAGS.some((tag) => entry.tags.includes(tag))) { return; } transports.forEach((transport) => transport(entry)); diff --git a/packages/kernel-test-local/test/SETUP.md b/packages/kernel-test-local/test/SETUP.md new file mode 100644 index 000000000..8e775ddb7 --- /dev/null +++ b/packages/kernel-test-local/test/SETUP.md @@ -0,0 +1,43 @@ +## Setup Local Tests + +### Install Ollama + +1. Visit [https://ollama.ai](https://ollama.ai) and download Ollama for your platform +2. Install and start the Ollama service +3. Verify installation: + + ```bash + curl http://localhost:11434 + ``` + + You should see: `Ollama is running` + +### Download Required Models + +The tests require the following model: + +- `llama3.1:latest` + +Download the model: + +```bash +ollama pull llama3.1:latest +``` + +Verify the model is available: + +```bash +ollama list +``` + +You should see `llama3.1:latest` in the output. + +### Validate Test Framework Setup + +Verify the configured test framework can access the resources above. + +```bash +yarn test:e2e:local -t suite +``` + +All suite tests should pass, with all other tests skipped. diff --git a/packages/kernel-test-local/test/TROUBLESHOOTING.md b/packages/kernel-test-local/test/TROUBLESHOOTING.md new file mode 100644 index 000000000..a750769bd --- /dev/null +++ b/packages/kernel-test-local/test/TROUBLESHOOTING.md @@ -0,0 +1,41 @@ +## Troubleshooting + +### Connection refused errors + +- Ensure Ollama is running: `ollama serve` +- Check port 11434 is accessible: `curl http://localhost:11434` +- Check for conflicting processes: `lsof -i :11434` + +### Model not found errors + +- List available models: `ollama list` +- Pull required model: `ollama pull llama3.1:latest` +- Verify model name matches exactly (including version tag) + +### Timeout errors + +Tests may timeout if: + +- Model is not loaded in memory (first run after Ollama restart may be slow) +- System resources are constrained (CPU/memory) +- The LLM is struggling with the specific prompt + +Try: + +- Restarting Ollama: `killall ollama && ollama serve` +- Running tests individually to isolate issues +- Increasing available system resources + +### Test failures due to LLM responses + +These tests verify that agents can interact with language models, but the quality of responses depends on the model's capabilities. Occasional failures are expected, especially for: + +- Complex reasoning tasks +- Code generation problems +- Multi-step calculations + +If tests consistently fail, check: + +- Model is loaded correctly: `ollama ps` +- Ollama logs for errors: Check console output from `ollama serve` +- System has adequate resources (8GB+ RAM recommended) diff --git a/packages/kernel-agents/test/e2e/agents.test.ts b/packages/kernel-test-local/test/e2e/agents.test.ts similarity index 93% rename from packages/kernel-agents/test/e2e/agents.test.ts rename to packages/kernel-test-local/test/e2e/agents.test.ts index 0cd1298df..baee6096a 100644 --- a/packages/kernel-agents/test/e2e/agents.test.ts +++ b/packages/kernel-test-local/test/e2e/agents.test.ts @@ -1,6 +1,11 @@ import '@ocap/repo-tools/test-utils/mock-endoify'; import { makeConsoleTransport, Logger } from '@metamask/logger'; +import type { MakeAgentArgs, Agent } from '@ocap/kernel-agents'; +import { getMoonPhase } from '@ocap/kernel-agents/capabilities/examples'; +import { count, add, multiply } from '@ocap/kernel-agents/capabilities/math'; +import { makeJsonAgent } from '@ocap/kernel-agents/json'; +import { makeReplAgent } from '@ocap/kernel-agents/repl'; import { OllamaNodejsService } from '@ocap/kernel-language-model-service/ollama/nodejs'; import { fetchMock } from '@ocap/repo-tools/test-utils/fetch-mock'; import { @@ -14,14 +19,8 @@ import { vi, } from 'vitest'; -import type { MakeAgentArgs } from '../../src/agent.ts'; -import { getMoonPhase } from '../../src/capabilities/examples.ts'; -import { count, add, multiply } from '../../src/capabilities/math.ts'; -import { makeJsonAgent } from '../../src/strategies/json-agent.ts'; -import { makeReplAgent } from '../../src/strategies/repl-agent.ts'; -import type { Agent } from '../../src/types.ts'; -import { DEFAULT_MODEL } from '../constants.ts'; -import { filterTransports, randomLetter } from '../utils.ts'; +import { DEFAULT_MODEL } from '../../src/constants.ts'; +import { filterTransports, randomLetter } from '../../src/utils.ts'; const logger = new Logger({ tags: ['test'], diff --git a/packages/kernel-agents/test/e2e/suite.test.ts b/packages/kernel-test-local/test/e2e/suite.test.ts similarity index 81% rename from packages/kernel-agents/test/e2e/suite.test.ts rename to packages/kernel-test-local/test/e2e/suite.test.ts index c407ef46b..c6c7cee81 100644 --- a/packages/kernel-agents/test/e2e/suite.test.ts +++ b/packages/kernel-test-local/test/e2e/suite.test.ts @@ -1,3 +1,11 @@ +/** + * Pre-test verification suite that checks: + * + * - Ollama service is running and accessible + * - Required models are available + * + * These tests run sequentially and must pass before the main test suite. + */ import { fetchMock } from '@ocap/repo-tools/test-utils/fetch-mock'; import { afterAll, beforeAll, describe, expect, it } from 'vitest'; @@ -5,7 +13,7 @@ import { DEFAULT_MODEL, OLLAMA_API_BASE, OLLAMA_TAGS_ENDPOINT, -} from '../constants.ts'; +} from '../../src/constants.ts'; describe.sequential('test suite', () => { beforeAll(() => { diff --git a/packages/kernel-test-local/tsconfig.json b/packages/kernel-test-local/tsconfig.json new file mode 100644 index 000000000..112c2efd3 --- /dev/null +++ b/packages/kernel-test-local/tsconfig.json @@ -0,0 +1,22 @@ +{ + "extends": "../../tsconfig.packages.json", + "compilerOptions": { + "baseUrl": "./", + "lib": ["ES2022"], + "noEmit": true, + "types": ["vitest", "node"] + }, + "references": [ + { "path": "../kernel-agents" }, + { "path": "../kernel-language-model-service" }, + { "path": "../logger" }, + { "path": "../repo-tools" } + ], + "include": [ + "../../vitest.config.ts", + "./src", + "./vitest.config.ts", + "./vitest.config.e2e.ts", + "./test/e2e" + ] +} diff --git a/packages/kernel-test-local/turbo.json b/packages/kernel-test-local/turbo.json new file mode 100644 index 000000000..3adcb89ef --- /dev/null +++ b/packages/kernel-test-local/turbo.json @@ -0,0 +1,3 @@ +{ + "extends": ["//"] +} diff --git a/packages/kernel-agents/vitest.config.e2e.ts b/packages/kernel-test-local/vitest.config.e2e.ts similarity index 70% rename from packages/kernel-agents/vitest.config.e2e.ts rename to packages/kernel-test-local/vitest.config.e2e.ts index 67291d043..97c0ce5ca 100644 --- a/packages/kernel-agents/vitest.config.e2e.ts +++ b/packages/kernel-test-local/vitest.config.e2e.ts @@ -9,12 +9,9 @@ export default defineConfig((args) => { defaultConfig, defineProject({ test: { - name: 'kernel-agents-e2e', - // E2E test configuration - testTimeout: 30000, - hookTimeout: 10000, - - // Include only E2E tests + name: 'kernel-test-local-e2e', + testTimeout: 30_000, + hookTimeout: 10_000, include: ['./test/e2e/**/*.test.ts'], }, }), diff --git a/packages/kernel-test-local/vitest.config.ts b/packages/kernel-test-local/vitest.config.ts new file mode 100644 index 000000000..6eee6669c --- /dev/null +++ b/packages/kernel-test-local/vitest.config.ts @@ -0,0 +1,19 @@ +import { mergeConfig } from '@ocap/repo-tools/vitest-config'; +import { defineConfig, defineProject } from 'vitest/config'; + +import defaultConfig from '../../vitest.config.ts'; + +export default defineConfig((args) => { + return mergeConfig( + args, + defaultConfig, + defineProject({ + test: { + name: 'kernel-test-local', + testTimeout: 30_000, + hookTimeout: 10_000, + include: ['./src/**/*.test.ts'], + }, + }), + ); +}); diff --git a/yarn.lock b/yarn.lock index 6d5b4cd8a..8d55ed4ab 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3494,7 +3494,7 @@ __metadata: languageName: unknown linkType: soft -"@ocap/kernel-agents@workspace:packages/kernel-agents": +"@ocap/kernel-agents@workspace:^, @ocap/kernel-agents@workspace:packages/kernel-agents": version: 0.0.0-use.local resolution: "@ocap/kernel-agents@workspace:packages/kernel-agents" dependencies: @@ -3620,6 +3620,42 @@ __metadata: languageName: unknown linkType: soft +"@ocap/kernel-test-local@workspace:packages/kernel-test-local": + version: 0.0.0-use.local + resolution: "@ocap/kernel-test-local@workspace:packages/kernel-test-local" + dependencies: + "@arethetypeswrong/cli": "npm:^0.17.4" + "@metamask/eslint-config": "npm:^14.0.0" + "@metamask/eslint-config-nodejs": "npm:^14.0.0" + "@metamask/eslint-config-typescript": "npm:^14.0.0" + "@metamask/logger": "workspace:^" + "@ocap/kernel-agents": "workspace:^" + "@ocap/kernel-language-model-service": "workspace:^" + "@ocap/repo-tools": "workspace:^" + "@types/node": "npm:^22.13.1" + "@typescript-eslint/eslint-plugin": "npm:^8.29.0" + "@typescript-eslint/parser": "npm:^8.29.0" + "@typescript-eslint/utils": "npm:^8.29.0" + "@vitest/eslint-plugin": "npm:^1.6.5" + depcheck: "npm:^1.4.7" + eslint: "npm:^9.23.0" + eslint-config-prettier: "npm:^10.1.1" + eslint-import-resolver-typescript: "npm:^4.3.1" + eslint-plugin-import-x: "npm:^4.10.0" + eslint-plugin-jsdoc: "npm:^50.6.9" + eslint-plugin-n: "npm:^17.17.0" + eslint-plugin-prettier: "npm:^5.2.6" + eslint-plugin-promise: "npm:^7.2.1" + prettier: "npm:^3.5.3" + rimraf: "npm:^6.0.1" + turbo: "npm:^2.5.6" + typescript: "npm:~5.8.2" + typescript-eslint: "npm:^8.29.0" + vite: "npm:^7.3.0" + vitest: "npm:^4.0.16" + languageName: unknown + linkType: soft + "@ocap/kernel-test@workspace:^, @ocap/kernel-test@workspace:packages/kernel-test": version: 0.0.0-use.local resolution: "@ocap/kernel-test@workspace:packages/kernel-test"