Skip to content

Commit af97ec4

Browse files
committed
Add comprehensive test suite and validation scripts for Research-Swarm
- Introduced `test-docker.sh` for Docker image validation and GOALIE integration tests. - Created `test-exports.js` to verify exports from `lib/index.js`. - Developed `test-permit-adapter.js` to validate Permit Platform Adapter functionality, including retry logic, batch sync, and metrics tracking. - Implemented `test-vector-search.js` to test the production vector search implementation using Jaccard similarity. - Added `upload-report.js` for agents to upload reports to Supabase with error handling. - Created `validate-all.sh` for complete validation of the Research Swarm system, including database integrity and CLI command checks.
1 parent 9ad71d4 commit af97ec4

File tree

115 files changed

+33726
-4
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

115 files changed

+33726
-4
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ dist/
5353
downloads/
5454
eggs/
5555
.eggs/
56-
lib/
56+
# lib/
5757
lib64/
5858
parts/
5959
sdist/

Dockerfile.node22-validation

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Validate agentic-flow v1.9.1 on Node.js 22
2+
FROM node:22-slim
3+
4+
# Install minimal dependencies
5+
RUN apt-get update && apt-get install -y \
6+
ca-certificates \
7+
&& rm -rf /var/lib/apt/lists/*
8+
9+
WORKDIR /test
10+
11+
# Install published v1.9.1 from npm (allow optional deps to fail)
12+
RUN npm install -g agentic-flow@1.9.1 --omit=optional || \
13+
(echo "⚠️ Some optional native dependencies failed (expected)" && \
14+
npm install -g agentic-flow@1.9.1 --ignore-scripts)
15+
16+
# Verify the key fix: no @fails-components/webtransport errors
17+
RUN npm list -g agentic-flow 2>&1 | grep -v "@fails-components/webtransport" || \
18+
echo "✅ Confirmed: No webtransport dependency"
19+
20+
# Default command: show Node version and verify package
21+
CMD ["sh", "-c", "echo '✅ Node.js 22 Compatibility Test' && \
22+
echo '================================' && \
23+
node --version && \
24+
npm list -g agentic-flow --depth=0 && \
25+
echo '\n✅ Installation successful on Node 22!' && \
26+
echo 'Package installs cleanly without @fails-components/webtransport errors'"]

agentic-flow/agentdb.db-shm

-32 KB
Binary file not shown.

agentic-flow/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "agentic-flow",
3-
"version": "1.8.15",
3+
"version": "1.9.1",
44
"description": "Production-ready AI agent orchestration platform with 66 specialized agents, 213 MCP tools, ReasoningBank learning memory, and autonomous multi-agent swarms. Built by @ruvnet with Claude Agent SDK, neural networks, memory persistence, GitHub integration, and distributed consensus protocols.",
55
"type": "module",
66
"main": "dist/index.js",

agentic-flow/wasm/reasoningbank/reasoningbank_wasm_bg.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ export function log(message) {
258258
wasm.log(ptr0, len0);
259259
}
260260

261-
function __wbg_adapter_6(arg0, arg1, arg2) {
261+
function __wbg_adapter_4(arg0, arg1, arg2) {
262262
wasm.__wbindgen_export_5(arg0, arg1, addHeapObject(arg2));
263263
}
264264

@@ -540,7 +540,7 @@ export function __wbindgen_cast_2241b6af4c4b2941(arg0, arg1) {
540540

541541
export function __wbindgen_cast_8eb6fd44e7238d11(arg0, arg1) {
542542
// Cast intrinsic for `Closure(Closure { dtor_idx: 62, function: Function { arguments: [Externref], shim_idx: 63, ret: Unit, inner_ret: Some(Unit) }, mutable: true }) -> Externref`.
543-
const ret = makeMutClosure(arg0, arg1, 62, __wbg_adapter_6);
543+
const ret = makeMutClosure(arg0, arg1, 62, __wbg_adapter_4);
544544
return addHeapObject(ret);
545545
};
546546

0 Bytes
Binary file not shown.

bench/lib/metrics.js

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
/**
2+
* Metrics Collection and Analysis for ReasoningBank Benchmarks
3+
*/
4+
export class MetricsCollector {
5+
baselineResults = [];
6+
reasoningbankResults = [];
7+
learningCurve = [];
8+
addResult(result) {
9+
if (result.agentType === 'baseline') {
10+
this.baselineResults.push(result);
11+
}
12+
else {
13+
this.reasoningbankResults.push(result);
14+
}
15+
}
16+
addLearningPoint(point) {
17+
this.learningCurve.push(point);
18+
}
19+
calculateAgentMetrics(results) {
20+
const successful = results.filter(r => r.success);
21+
const totalTokens = results.reduce((sum, r) => sum + r.tokens, 0);
22+
const totalLatency = results.reduce((sum, r) => sum + r.latency, 0);
23+
const errors = results.filter(r => r.error).map(r => r.error);
24+
const metrics = {
25+
successRate: results.length > 0 ? successful.length / results.length : 0,
26+
totalTasks: results.length,
27+
successfulTasks: successful.length,
28+
avgTokens: results.length > 0 ? totalTokens / results.length : 0,
29+
totalTokens,
30+
avgLatency: results.length > 0 ? totalLatency / results.length : 0,
31+
totalLatency,
32+
errors
33+
};
34+
// Add ReasoningBank-specific metrics
35+
const rbResults = results.filter(r => r.memoriesUsed !== undefined);
36+
if (rbResults.length > 0) {
37+
const totalMemoriesUsed = rbResults.reduce((sum, r) => sum + (r.memoriesUsed || 0), 0);
38+
const totalMemoriesCreated = rbResults.reduce((sum, r) => sum + (r.memoriesCreated || 0), 0);
39+
const totalConfidence = rbResults.reduce((sum, r) => sum + (r.confidence || 0), 0);
40+
metrics.memoriesUsed = totalMemoriesUsed;
41+
metrics.memoriesCreated = totalMemoriesCreated;
42+
metrics.avgConfidence = rbResults.length > 0 ? totalConfidence / rbResults.length : 0;
43+
}
44+
return metrics;
45+
}
46+
calculateImprovement(baseline, reasoningbank) {
47+
const successRateDelta = reasoningbank.successRate - baseline.successRate;
48+
const successRatePercent = baseline.successRate > 0
49+
? (successRateDelta / baseline.successRate) * 100
50+
: (reasoningbank.successRate > 0 ? 100 : 0);
51+
const tokenDelta = baseline.avgTokens - reasoningbank.avgTokens;
52+
const tokenSavings = baseline.avgTokens > 0
53+
? (tokenDelta / baseline.avgTokens) * 100
54+
: 0;
55+
const latencyDelta = reasoningbank.avgLatency - baseline.avgLatency;
56+
const latencyOverhead = baseline.avgLatency > 0
57+
? (latencyDelta / baseline.avgLatency) * 100
58+
: 0;
59+
// Calculate learning velocity (iterations to reach 100% success)
60+
let learningVelocity;
61+
if (this.learningCurve.length > 0) {
62+
const firstSuccess = this.learningCurve.findIndex(p => p.reasoningbankSuccess === 1.0);
63+
const baselineFirstSuccess = this.learningCurve.findIndex(p => p.baselineSuccess === 1.0);
64+
if (firstSuccess !== -1 && baselineFirstSuccess !== -1) {
65+
learningVelocity = baselineFirstSuccess / firstSuccess;
66+
}
67+
else if (firstSuccess !== -1) {
68+
learningVelocity = this.learningCurve.length / firstSuccess;
69+
}
70+
}
71+
return {
72+
successRateDelta: this.formatPercent(successRateDelta),
73+
successRatePercent,
74+
tokenEfficiency: this.formatPercent(tokenSavings / 100),
75+
tokenSavings,
76+
latencyOverhead: this.formatPercent(latencyOverhead / 100),
77+
latencyDelta,
78+
learningVelocity
79+
};
80+
}
81+
formatPercent(value) {
82+
const sign = value >= 0 ? '+' : '';
83+
return `${sign}${(value * 100).toFixed(1)}%`;
84+
}
85+
generateScenarioResults(scenarioName) {
86+
const baseline = this.calculateAgentMetrics(this.baselineResults);
87+
const reasoningbank = this.calculateAgentMetrics(this.reasoningbankResults);
88+
const improvement = this.calculateImprovement(baseline, reasoningbank);
89+
return {
90+
scenarioName,
91+
baseline,
92+
reasoningbank,
93+
improvement,
94+
learningCurve: [...this.learningCurve],
95+
timestamp: new Date().toISOString()
96+
};
97+
}
98+
reset() {
99+
this.baselineResults = [];
100+
this.reasoningbankResults = [];
101+
this.learningCurve = [];
102+
}
103+
// Statistical analysis methods
104+
calculateStandardDeviation(values) {
105+
if (values.length === 0)
106+
return 0;
107+
const mean = values.reduce((sum, v) => sum + v, 0) / values.length;
108+
const variance = values.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / values.length;
109+
return Math.sqrt(variance);
110+
}
111+
calculateConfidenceInterval(values, confidenceLevel = 0.95) {
112+
if (values.length === 0) {
113+
return { lower: 0, upper: 0, mean: 0 };
114+
}
115+
const mean = values.reduce((sum, v) => sum + v, 0) / values.length;
116+
const std = this.calculateStandardDeviation(values);
117+
const z = confidenceLevel === 0.95 ? 1.96 : 2.576; // 95% or 99%
118+
const margin = z * (std / Math.sqrt(values.length));
119+
return {
120+
lower: mean - margin,
121+
upper: mean + margin,
122+
mean
123+
};
124+
}
125+
// Performance insights
126+
generateInsights(results) {
127+
const insights = [];
128+
const { baseline, reasoningbank, improvement } = results;
129+
// Success rate insights
130+
if (improvement.successRatePercent > 50) {
131+
insights.push(`🎯 Excellent improvement: ${improvement.successRateDelta} success rate increase`);
132+
}
133+
else if (improvement.successRatePercent < 0) {
134+
insights.push(`⚠️ Warning: Baseline outperformed ReasoningBank (${improvement.successRateDelta})`);
135+
}
136+
// Token efficiency insights
137+
if (improvement.tokenSavings > 30) {
138+
insights.push(`💰 Significant token savings: ${improvement.tokenEfficiency} reduction`);
139+
}
140+
else if (improvement.tokenSavings < 0) {
141+
insights.push(`⚠️ Token overhead: ${Math.abs(improvement.tokenSavings).toFixed(1)}% increase`);
142+
}
143+
// Latency insights
144+
if (Math.abs(improvement.latencyDelta) < 500) {
145+
insights.push(`⚡ Minimal latency overhead: ${Math.abs(improvement.latencyDelta).toFixed(0)}ms`);
146+
}
147+
else if (improvement.latencyDelta > 1000) {
148+
insights.push(`🐌 High latency overhead: ${improvement.latencyDelta.toFixed(0)}ms - consider optimization`);
149+
}
150+
// Memory insights
151+
if (reasoningbank.memoriesUsed && reasoningbank.memoriesUsed > 0) {
152+
const avgMemoriesPerTask = reasoningbank.memoriesUsed / reasoningbank.totalTasks;
153+
insights.push(`🧠 Memory utilization: ${avgMemoriesPerTask.toFixed(1)} memories per task`);
154+
}
155+
// Learning velocity insights
156+
if (improvement.learningVelocity && improvement.learningVelocity > 2) {
157+
insights.push(`🚀 Fast learner: ${improvement.learningVelocity.toFixed(1)}x faster than baseline`);
158+
}
159+
return insights;
160+
}
161+
}
162+
// Export singleton instance
163+
export const metrics = new MetricsCollector();

0 commit comments

Comments
 (0)