harden error handling, add smoke tests, and prep for v0.1.0 release

rossarmstrong · rossarmstrong · commit 28c1fe6b0d9d · 2026-04-05T23:50:03.000+10:00
Pre-ship audit fixes:
- Wrap mic.stop() in try-catch in error and safety timeout handlers
- Add defensive JSON.parse in voice_query pipeline
- Add 60s AbortController timeout on Ollama chat calls
- Improve Ollama error detection with recursive cause chain checking
- Validate whisper transcription result structure before parsing
- Remove .claude/settings.local.json from repo, add .claude/ to .gitignore

Smoke test suite (test/smoke.js):
- Server initialization and version match
- All 3 tools advertised in tools/list
- list_audio_devices returns valid device data
- capture_audio produces WAV with valid RIFF header
- Invalid device and unknown tool return isError
- Skips mic-dependent tests gracefully on CI (no hardware)

Publish workflow improvements:
- Add npm install + npm test before publish
- Add tag/version mismatch check against package.json

README additions:
- Troubleshooting section (Windows mic permissions, Ollama, whisper model)
diff --git a/.claude/settings.local.json b/.claude/settings.local.json
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -19,6 +19,19 @@ jobs:
           node-version: '20'
           registry-url: 'https://registry.npmjs.org'
 
+      - run: npm install --ignore-scripts
+
+      - run: npm test
+
+      - name: Verify tag matches package.json version
+        run: |
+          PKG_VERSION="v$(node -p "require('./package.json').version")"
+          TAG="${GITHUB_REF_NAME}"
+          if [ "$PKG_VERSION" != "$TAG" ]; then
+            echo "::error::Tag $TAG does not match package.json version $PKG_VERSION"
+            exit 1
+          fi
+
       - run: npm publish --access public
         env:
           NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 node_modules/
+.claude/
 *.wav
 *.bin
 models/
diff --git a/README.md b/README.md
@@ -1,3 +1,5 @@
+<!-- markdownlint-disable MD033 MD041 -->
+
 # mcp-listen
 
 **The first MCP server that can hear.**
@@ -26,7 +28,6 @@ Give your AI agents the ability to listen. Microphone capture and speech-to-text
   <!-- badges: end -->
 </div>
 
-
 ## Tools
 
 | Tool | Description |
@@ -185,6 +186,17 @@ The model is ~150MB and downloads once. You can also set the `WHISPER_MODEL_PATH
 4. **No streaming.** MCP's request/response pattern means the entire recording is captured, then transcribed, then sent to the LLM. No real-time partial results.
 5. **Temp files.** `capture_audio` writes WAV files to the system temp directory. They are not automatically cleaned up. `voice_query` cleans up after itself.
 
+## Troubleshooting
+
+**Windows: "Error opening microphone"**
+Windows may block microphone access by default. Go to **Settings > Privacy & security > Microphone** and ensure microphone access is enabled for desktop apps.
+
+**Ollama: "Ollama is not running"**
+Some Ollama installations start as a background service automatically. If you see this error, run `ollama serve` manually or check that the Ollama service is running.
+
+**Whisper: "model not found"**
+The whisper model file must be downloaded before first use. See [Whisper Model Setup](#whisper-model-setup) for instructions.
+
 ## Powered By
 
 - [decibri](https://decibri.dev): Cross-platform microphone capture for Node.js
diff --git a/index.js b/index.js
@@ -114,7 +114,15 @@ async function voiceQuery(args) {
 
   if (captureResult.isError) return captureResult;
 
-  const captureData = JSON.parse(captureResult.content[0].text);
+  let captureData;
+  try {
+    captureData = JSON.parse(captureResult.content[0].text);
+  } catch {
+    return {
+      content: [{ type: 'text', text: 'Error: Failed to parse capture result.' }],
+      isError: true
+    };
+  }
   const wavPath = captureData.path;
 
   try {
diff --git a/lib/audio.js b/lib/audio.js
@@ -89,7 +89,7 @@ function captureAudio({ durationMs = 5000, device, outputPath } = {}) {
     });
 
     mic.on('error', (err) => {
-      if (mic.isOpen) mic.stop();
+      try { if (mic.isOpen) mic.stop(); } catch {}
       finish({
         content: [{ type: 'text', text: `Microphone error during recording: ${err.message}` }],
         isError: true
@@ -131,7 +131,7 @@ function captureAudio({ durationMs = 5000, device, outputPath } = {}) {
 
     // Safety timeout in case 'end' never fires
     const safetyTimer = setTimeout(() => {
-      if (mic.isOpen) mic.stop();
+      try { if (mic.isOpen) mic.stop(); } catch {}
       finish({
         content: [{ type: 'text', text: 'Error: Recording timed out. The microphone did not stop cleanly.' }],
         isError: true
diff --git a/lib/llm.js b/lib/llm.js
@@ -7,6 +7,16 @@ try {
   Ollama = null;
 }
 
+const REQUEST_TIMEOUT_MS = 60000;
+
+function isConnectionError(err) {
+  const codes = ['ECONNREFUSED', 'ECONNRESET', 'EHOSTUNREACH', 'ETIMEDOUT'];
+  if (err.code && codes.includes(err.code)) return true;
+  if (err.cause) return isConnectionError(err.cause);
+  if (err.message && /connect|refused|unreachable/i.test(err.message)) return true;
+  return false;
+}
+
 async function chat({ text, model = 'llama3.2', systemPrompt = 'You are a helpful assistant.', host } = {}) {
   if (!Ollama) {
     return {
@@ -17,28 +27,37 @@ async function chat({ text, model = 'llama3.2', systemPrompt = 'You are a helpfu
   const options = {};
   if (host) options.host = host;
 
+  const controller = new AbortController();
+  const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
+
   try {
     const ollama = new Ollama(options);
     const result = await ollama.chat({
       model,
       messages: [
         { role: 'system', content: systemPrompt },
         { role: 'user', content: text }
-      ]
+      ],
+      signal: controller.signal
     });
 
     return {
       response: result.message.content,
       model
     };
   } catch (err) {
-    if (err.code === 'ECONNREFUSED' || err.cause?.code === 'ECONNREFUSED') {
+    if (err.name === 'AbortError') {
+      return { error: `Ollama request timed out after ${REQUEST_TIMEOUT_MS / 1000} seconds.` };
+    }
+    if (isConnectionError(err)) {
       return { error: 'Ollama is not running. Start it with: ollama serve' };
     }
     if (err.message && err.message.includes('not found')) {
       return { error: `Model "${model}" not found. Pull it with: ollama pull ${model}` };
     }
     return { error: `LLM error: ${err.message}` };
+  } finally {
+    clearTimeout(timeout);
   }
 }
 
diff --git a/lib/transcribe.js b/lib/transcribe.js
@@ -65,8 +65,16 @@ async function transcribe({ filePath, modelPath, language = 'en' } = {}) {
       language
     });
 
+    if (!result || !Array.isArray(result.transcription)) {
+      return { error: 'Unexpected whisper response format. Expected { transcription: [] }.' };
+    }
+
     const text = result.transcription
-      .map(segment => Array.isArray(segment) ? segment[2] : segment)
+      .map(segment => {
+        if (typeof segment === 'string') return segment;
+        if (Array.isArray(segment) && typeof segment[2] === 'string') return segment[2];
+        return '';
+      })
       .join(' ')
       .trim();
 
diff --git a/package.json b/package.json
@@ -27,6 +27,9 @@
     "stt",
     "transcription"
   ],
+  "scripts": {
+    "test": "node test/smoke.js"
+  },
   "author": "Analytics in Motion",
   "license": "Apache-2.0",
   "repository": {
diff --git a/test/smoke.js b/test/smoke.js

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`node_modules/`
	`2`	`+.claude/`
`2`	`3`	`*.wav`
`3`	`4`	`*.bin`
`4`	`5`	`models/`