|
| 1 | +'use strict'; |
| 2 | + |
| 3 | +const { describe, it, before } = require('node:test'); |
| 4 | +const assert = require('node:assert/strict'); |
| 5 | +const fs = require('fs'); |
| 6 | +const path = require('path'); |
| 7 | + |
| 8 | +const { |
| 9 | + discoverAgents, |
| 10 | + loadAgentDefinition, |
| 11 | + STEP_PATTERN, |
| 12 | +} = require('./helpers'); |
| 13 | + |
| 14 | +// ─── Voice Markers — Per-Agent Vocabulary Constants (Task 2) ──── |
| 15 | +// Characteristic phrases: verified in BOTH registry and agent file |
| 16 | +// communication_style. Domain vocabulary: verified in workflow step files. |
| 17 | +// Keyed by agent ID for dynamic lookup via discoverAgents(). |
| 18 | + |
| 19 | +const VOICE_MARKERS = { |
| 20 | + 'contextualization-expert': { |
| 21 | + phrases: ['really solving'], |
| 22 | + vocabulary: ['persona', 'hypothesis', 'assumption', 'context', 'problem', 'product', 'vision'], |
| 23 | + }, |
| 24 | + 'discovery-empathy-expert': { |
| 25 | + phrases: ['I noticed that', 'asked them WHY'], |
| 26 | + vocabulary: ['empathy', 'observe', 'discover', 'interview', 'user', 'research', 'feelings'], |
| 27 | + }, |
| 28 | + 'research-convergence-specialist': { |
| 29 | + phrases: ['what the research is telling us', 'Three patterns converge'], |
| 30 | + vocabulary: ['converge', 'synthesize', 'pattern', 'insight', 'evidence', 'research', 'finding'], |
| 31 | + }, |
| 32 | + 'hypothesis-engineer': { |
| 33 | + phrases: ['What if?', 'safe bet'], |
| 34 | + vocabulary: ['hypothesis', 'assumption', 'brainwriting', 'falsifiable', 'belief', 'experiment'], |
| 35 | + }, |
| 36 | + 'lean-experiments-specialist': { |
| 37 | + phrases: ['validated learning', 'MVPs'], |
| 38 | + vocabulary: ['experiment', 'assumption', 'measure', 'MVP', 'lean', 'learning', 'evidence'], |
| 39 | + }, |
| 40 | + 'production-intelligence-specialist': { |
| 41 | + phrases: ['The signal indicates', 'what we\'re seeing in context'], |
| 42 | + vocabulary: ['signal', 'pattern', 'observe', 'behavior', 'metric', 'anomaly', 'data'], |
| 43 | + }, |
| 44 | + 'learning-decision-expert': { |
| 45 | + phrases: ['The evidence suggests', 'what we\'ve learned'], |
| 46 | + vocabulary: ['evidence', 'decision', 'pivot', 'learning', 'data', 'action', 'experiment'], |
| 47 | + }, |
| 48 | +}; |
| 49 | + |
| 50 | +// ─── Dynamic Agent Discovery (NFR5) ──────────────────────────── |
| 51 | +const agents = discoverAgents(); |
| 52 | + |
| 53 | +// ─── P0 Voice Consistency: Registry vs Agent Definition ───────── |
| 54 | + |
| 55 | +describe('P0 Voice Consistency: Registry vs Agent Definition (Low-Confidence)', () => { |
| 56 | + // M2: Vacuous pass guard — ensure dynamic discovery found agents |
| 57 | + it('discovered at least 7 agents for voice testing', () => { |
| 58 | + assert.ok( |
| 59 | + agents.length >= 7, |
| 60 | + `Expected at least 7 agents for voice consistency testing, discovered ${agents.length}` |
| 61 | + ); |
| 62 | + }); |
| 63 | + |
| 64 | + for (const agent of agents) { |
| 65 | + describe(`${agent.name} (${agent.id})`, () => { |
| 66 | + let def; |
| 67 | + let markers; |
| 68 | + |
| 69 | + before(() => { |
| 70 | + def = loadAgentDefinition(agent.id); |
| 71 | + markers = VOICE_MARKERS[agent.id]; |
| 72 | + }); |
| 73 | + |
| 74 | + // Task 1.8: all 4 registry persona fields are non-empty strings |
| 75 | + it('registry persona has all 4 fields as non-empty strings', () => { |
| 76 | + const fields = ['role', 'identity', 'communication_style', 'expertise']; |
| 77 | + for (const field of fields) { |
| 78 | + assert.ok( |
| 79 | + typeof agent.persona[field] === 'string' && agent.persona[field].length > 0, |
| 80 | + `[Low-Confidence] ${agent.name} (${agent.id}): registry persona.${field} should be a non-empty string (human spot-check recommended)` |
| 81 | + ); |
| 82 | + } |
| 83 | + }); |
| 84 | + |
| 85 | + // Task 1.5: registry communication_style shares at least 1 phrase with agent file |
| 86 | + it('communication_style cross-validation: at least 1 shared phrase', (t) => { |
| 87 | + if (!markers) { |
| 88 | + t.skip(`No voice markers defined for ${agent.name} — add to VOICE_MARKERS`); |
| 89 | + return; |
| 90 | + } |
| 91 | + const registryStyle = agent.persona.communication_style; |
| 92 | + const agentFileStyle = def.persona.communication_style; |
| 93 | + const sharedPhrases = markers.phrases.filter( |
| 94 | + phrase => registryStyle.includes(phrase) && agentFileStyle.includes(phrase) |
| 95 | + ); |
| 96 | + assert.ok( |
| 97 | + sharedPhrases.length >= 1, |
| 98 | + `[Low-Confidence] ${agent.name} (${agent.id}): communication_style cross-validation — registry and agent file should share at least 1 characteristic phrase from [${markers.phrases.map(p => `"${p}"`).join(', ')}], found ${sharedPhrases.length} matches (human spot-check recommended)` |
| 99 | + ); |
| 100 | + }); |
| 101 | + |
| 102 | + // Task 1.6: registry role shares at least 1 keyword with agent file role |
| 103 | + it('role cross-validation: at least 1 shared keyword', () => { |
| 104 | + const registryRole = agent.persona.role.toLowerCase(); |
| 105 | + const agentFileRole = def.persona.role.toLowerCase(); |
| 106 | + |
| 107 | + // Extract significant words (3+ chars) from registry role |
| 108 | + const registryWords = registryRole.match(/\b[a-z]{3,}\b/g) || []; |
| 109 | + const sharedWords = registryWords.filter(w => agentFileRole.includes(w)); |
| 110 | + assert.ok( |
| 111 | + sharedWords.length >= 1, |
| 112 | + `[Low-Confidence] ${agent.name} (${agent.id}): role cross-validation — registry role "${agent.persona.role}" and agent file <role> "${def.persona.role}" should share at least 1 keyword, found [${sharedWords.join(', ')}] (human spot-check recommended)` |
| 113 | + ); |
| 114 | + }); |
| 115 | + |
| 116 | + // Task 1.7: registry expertise themes appear in agent file principles |
| 117 | + it('expertise/principles cross-validation: at least 2 shared domain keywords', () => { |
| 118 | + const registryExpertise = agent.persona.expertise.toLowerCase(); |
| 119 | + const agentFilePrinciples = (def.persona.principles || '').toLowerCase(); |
| 120 | + |
| 121 | + // Extract significant words (4+ chars) from expertise |
| 122 | + const expertiseWords = [...new Set( |
| 123 | + registryExpertise.match(/\b[a-z]{4,}\b/g) || [] |
| 124 | + )]; |
| 125 | + const sharedWords = expertiseWords.filter(w => agentFilePrinciples.includes(w)); |
| 126 | + |
| 127 | + assert.ok( |
| 128 | + sharedWords.length >= 2, |
| 129 | + `[Low-Confidence] ${agent.name} (${agent.id}): expertise/principles cross-validation — registry expertise and agent file <principles> should share at least 2 domain keywords, found ${sharedWords.length}: [${sharedWords.slice(0, 5).join(', ')}] (human spot-check recommended)` |
| 130 | + ); |
| 131 | + }); |
| 132 | + }); |
| 133 | + } |
| 134 | +}); |
| 135 | + |
| 136 | +// ─── P0 Voice Consistency: Workflow Step Voice Markers ─────────── |
| 137 | + |
| 138 | +describe('P0 Voice Consistency: Workflow Step Voice Markers (Low-Confidence)', () => { |
| 139 | + for (const agent of agents) { |
| 140 | + describe(`${agent.name} (${agent.id})`, () => { |
| 141 | + it('workflow step content contains domain vocabulary', (t) => { |
| 142 | + const markers = VOICE_MARKERS[agent.id]; |
| 143 | + if (!markers) { |
| 144 | + t.skip(`No voice markers defined for ${agent.name} — add to VOICE_MARKERS`); |
| 145 | + return; |
| 146 | + } |
| 147 | + |
| 148 | + // M2: Vacuous pass guard — agent has workflows |
| 149 | + assert.ok( |
| 150 | + agent.workflowDirs.length >= 1, |
| 151 | + `[Low-Confidence] ${agent.name} (${agent.id}): expected at least 1 workflow, found ${agent.workflowDirs.length} (human spot-check recommended)` |
| 152 | + ); |
| 153 | + |
| 154 | + // Concatenate ALL step files across ALL workflows for this agent |
| 155 | + let allStepContent = ''; |
| 156 | + let totalStepFiles = 0; |
| 157 | + |
| 158 | + for (const wfDir of agent.workflowDirs) { |
| 159 | + const stepsDir = path.join(wfDir, 'steps'); |
| 160 | + if (!fs.existsSync(stepsDir)) continue; |
| 161 | + |
| 162 | + const files = fs.readdirSync(stepsDir) |
| 163 | + .filter(f => STEP_PATTERN.test(f)) |
| 164 | + .sort(); |
| 165 | + |
| 166 | + for (const file of files) { |
| 167 | + allStepContent += fs.readFileSync(path.join(stepsDir, file), 'utf8') + '\n'; |
| 168 | + totalStepFiles++; |
| 169 | + } |
| 170 | + } |
| 171 | + |
| 172 | + // M2: Vacuous pass guard — ensure step files exist |
| 173 | + assert.ok( |
| 174 | + totalStepFiles >= 1, |
| 175 | + `[Low-Confidence] ${agent.name} (${agent.id}): expected step files but found ${totalStepFiles} across ${agent.workflowDirs.length} workflows (human spot-check recommended)` |
| 176 | + ); |
| 177 | + |
| 178 | + const contentLower = allStepContent.toLowerCase(); |
| 179 | + const matchedWords = markers.vocabulary.filter( |
| 180 | + word => contentLower.includes(word.toLowerCase()) |
| 181 | + ); |
| 182 | + |
| 183 | + assert.ok( |
| 184 | + matchedWords.length >= 2, |
| 185 | + `[Low-Confidence] ${agent.name} (${agent.id}): workflow step content should contain at least 2 domain vocabulary words from [${markers.vocabulary.join(', ')}], found ${matchedWords.length}: [${matchedWords.join(', ')}] (human spot-check recommended)` |
| 186 | + ); |
| 187 | + }); |
| 188 | + }); |
| 189 | + } |
| 190 | +}); |
0 commit comments