-
Notifications
You must be signed in to change notification settings - Fork 245
Performance Testing
alexheifetz edited this page Sep 22, 2025
·
1 revision
Comprehensive guide for performance testing, monitoring, and optimization of Embabel agent systems. This page covers CPU profiling, JVM optimization, memory management, garbage collection tuning, and continuous performance testing strategies.
-
Issue #867: Performance Analysis - Framework-wide performance analysis initiative
- CPU Profiling - Identify hot spots and design optimizations
- Thread Profiling - Identify anomalies (locks, frozen threads)
- Memory Profiling - GC Cycles, Heap Size analysis
- Target: 0.4.0 (Curdimurka) milestone
- Issue #867: Performance Analysis - Main performance tracking issue
- Performance optimizations mentioned in parallel processing - Non-blocking IO, parallel processing improvements
graph TD
A[Unit Performance Tests] --> B[Integration Performance Tests]
B --> C[Load Testing]
C --> D[Stress Testing]
D --> E[Continuous Performance Monitoring]
- Micro-benchmarks - Individual action performance
- Agent workflow benchmarks - Complete agent execution flows
- Multi-agent load tests - Concurrent agent execution
- Memory stress tests - Long-running scenarios
- LLM integration performance - API latency and throughput
@Component
@Profile("performance")
public class JProfilerConfiguration {
@EventListener
public void onApplicationStarted(ApplicationStartedEvent event) {
// Enable JProfiler MBean server for remote profiling
System.setProperty("com.jprofiler.agent.nowait", "true");
System.setProperty("com.jprofiler.agent.offline", "false");
}
}# Start application with JFR enabled
java -XX:+FlightRecorder \
-XX:StartFlightRecording=duration=300s,filename=embabel-performance.jfr \
-jar embabel-agent-app.jar
# Analyze results
jfr print --events jdk.CPULoad,jdk.GarbageCollection embabel-performance.jfr@Configuration
@Profile("performance")
public class VisualVMConfiguration {
@Bean
public MBeanServer mBeanServer() {
return ManagementFactory.getPlatformMBeanServer();
}
@EventListener
public void setupJMXMonitoring(ApplicationStartedEvent event) {
System.setProperty("com.sun.management.jmxremote", "true");
System.setProperty("com.sun.management.jmxremote.port", "9999");
System.setProperty("com.sun.management.jmxremote.authenticate", "false");
System.setProperty("com.sun.management.jmxremote.ssl", "false");
}
}@Component
public class AgentPerformanceMetrics {
private final MeterRegistry meterRegistry;
private final Timer agentExecutionTimer;
private final Counter agentSuccessCounter;
private final Counter agentErrorCounter;
private final Gauge memoryUsageGauge;
public AgentPerformanceMetrics(MeterRegistry meterRegistry) {
this.meterRegistry = meterRegistry;
this.agentExecutionTimer = Timer.builder("agent.execution.time")
.description("Time taken to execute agent actions")
.tag("framework", "embabel")
.register(meterRegistry);
this.agentSuccessCounter = Counter.builder("agent.execution.success")
.description("Successful agent executions")
.register(meterRegistry);
this.agentErrorCounter = Counter.builder("agent.execution.error")
.description("Failed agent executions")
.register(meterRegistry);
this.memoryUsageGauge = Gauge.builder("jvm.memory.usage")
.description("Current JVM memory usage")
.register(meterRegistry, this, AgentPerformanceMetrics::getCurrentMemoryUsage);
}
@EventListener
public void onAgentExecution(AgentExecutionStartedEvent event) {
Timer.Sample sample = Timer.start(meterRegistry);
event.setSample(sample);
}
@EventListener
public void onAgentCompleted(AgentExecutionCompletedEvent event) {
event.getSample().stop(agentExecutionTimer.tag("agent", event.getAgentName()));
if (event.isSuccess()) {
agentSuccessCounter.increment(Tags.of("agent", event.getAgentName()));
} else {
agentErrorCounter.increment(
Tags.of("agent", event.getAgentName(), "error", event.getError().getClass().getSimpleName())
);
}
}
private double getCurrentMemoryUsage() {
Runtime runtime = Runtime.getRuntime();
return (double) (runtime.totalMemory() - runtime.freeMemory()) / runtime.maxMemory();
}
}@Component
@ConditionalOnProperty(name = "embabel.performance.cpu.profiling.enabled", havingValue = "true")
public class CPUProfilerConfiguration {
private static final Logger logger = LoggerFactory.getLogger(CPUProfilerConfiguration.class);
@Value("${embabel.performance.cpu.profiling.duration:300}")
private int profilingDuration;
@Value("${embabel.performance.cpu.profiling.interval:1000000}")
private int samplingInterval;
@PostConstruct
public void startCPUProfiling() {
try {
// Start CPU profiling with AsyncProfiler
String command = String.format(
"start,event=cpu,interval=%dns,file=cpu-profile.html,duration=%ds",
samplingInterval, profilingDuration
);
AsyncProfiler.getInstance().execute(command);
logger.info("CPU profiling started for {} seconds", profilingDuration);
} catch (Exception e) {
logger.warn("Failed to start CPU profiling: {}", e.getMessage());
}
}
}@SpringBootTest
@TestMethodOrder(OrderAnnotation.class)
public class CPUPerformanceTest {
@Autowired
private AgentPlatform agentPlatform;
@Autowired
private MeterRegistry meterRegistry;
@Test
@Order(1)
public void testSingleAgentCPUUsage() {
// Measure CPU usage for single agent execution
Timer.Sample sample = Timer.start(meterRegistry);
AnalysisData data = createTestData();
FinancialAnalysis result = agentPlatform.run(FinancialAnalysisAgent.class, data);
sample.stop(Timer.builder("cpu.test.single.agent").register(meterRegistry));
assertThat(result).isNotNull();
// Verify CPU metrics
Timer cpuTimer = meterRegistry.get("cpu.test.single.agent").timer();
assertThat(cpuTimer.mean(TimeUnit.MILLISECONDS)).isLessThan(5000); // 5 second max
}
@Test
@Order(2)
public void testParallelAgentsCPUUsage() throws InterruptedException {
// Test CPU usage with parallel agent execution
int concurrentAgents = 10;
CountDownLatch latch = new CountDownLatch(concurrentAgents);
List<CompletableFuture<Void>> futures = new ArrayList<>();
AnalysisData data = createTestData();
Timer.Sample sample = Timer.start(meterRegistry);
for (int i = 0; i < concurrentAgents; i++) {
CompletableFuture<Void> future = CompletableFuture.runAsync(() -> {
try {
agentPlatform.run(FinancialAnalysisAgent.class, data);
} finally {
latch.countDown();
}
});
futures.add(future);
}
latch.await(30, TimeUnit.SECONDS);
sample.stop(Timer.builder("cpu.test.parallel.agents").register(meterRegistry));
// Verify all agents completed
futures.forEach(future -> assertThat(future).succeedsWithin(Duration.ofSeconds(5)));
// Check CPU metrics
Timer parallelTimer = meterRegistry.get("cpu.test.parallel.agents").timer();
assertThat(parallelTimer.mean(TimeUnit.MILLISECONDS)).isLessThan(15000); // 15 second max
}
}Micro benchmarks are essential for measuring the performance of individual components in isolation. JMH provides accurate, statistically sound measurements of small code units.
<dependencies>
<!-- JMH Core -->
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
<version>1.37</version>
<scope>test</scope>
</dependency>
<!-- JMH Annotation Processor -->
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-generator-annprocess</artifactId>
<version>1.37</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<!-- JMH Maven Plugin -->
<plugin>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-maven-plugin</artifactId>
<version>1.37</version>
<executions>
<execution>
<goals>
<goal>generate-sources</goal>
</goals>
<phase>generate-sources</phase>
</execution>
</executions>
</plugin>
</plugins>
</build>@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS)
@Fork(value = 2)
@State(Scope.Benchmark)
public class ActionPerformanceBenchmark {
private AnalysisAgent analysisAgent;
private AnalysisData testData;
private FakeOperationContext context;
@Setup(Level.Trial)
public void setup() {
analysisAgent = new AnalysisAgent();
testData = createBenchmarkTestData();
context = new FakeOperationContext();
context.expectResponse(new AnalysisResult("benchmark result"));
}
@Benchmark
public AnalysisResult benchmarkSimpleAnalysis() {
return analysisAgent.performSimpleAnalysis(testData, context);
}
@Benchmark
public AnalysisResult benchmarkComplexAnalysis() {
return analysisAgent.performComplexAnalysis(testData, context);
}
@Benchmark
public DataValidation benchmarkDataValidation() {
return analysisAgent.validateData(testData);
}
@Benchmark
@Group("parallel")
@GroupThreads(4)
public AnalysisResult benchmarkConcurrentAnalysis() {
// Test concurrent execution of the same action
return analysisAgent.performSimpleAnalysis(testData, context);
}
private AnalysisData createBenchmarkTestData() {
return AnalysisData.builder()
.records(1000)
.complexity(ComplexityLevel.MEDIUM)
.build();
}
}@BenchmarkMode({Mode.AverageTime, Mode.Throughput})
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Fork(value = 1)
@State(Scope.Benchmark)
public class DomainModelBenchmark {
private FinancialData financialData;
private MarketData marketData;
private List<Transaction> transactions;
@Setup(Level.Trial)
public void setup() {
financialData = createFinancialData();
marketData = createMarketData();
transactions = createTransactionList(10000);
}
@Benchmark
public RiskScore calculateRiskScore() {
return financialData.calculateRiskScore();
}
@Benchmark
public MarketTrend analyzeTrend() {
return marketData.analyzeTrend();
}
@Benchmark
public BigDecimal calculatePortfolioValue() {
return transactions.stream()
.map(Transaction::getValue)
.reduce(BigDecimal.ZERO, BigDecimal::add);
}
@Benchmark
@Param({"100", "1000", "10000"})
public List<Transaction> filterTransactionsByAmount(int transactionCount) {
return transactions.stream()
.limit(transactionCount)
.filter(t -> t.getAmount().compareTo(BigDecimal.valueOf(1000)) > 0)
.collect(Collectors.toList());
}
@Benchmark
public Map<String, List<Transaction>> groupTransactionsByCategory() {
return transactions.stream()
.collect(Collectors.groupingBy(Transaction::getCategory));
}
}@BenchmarkMode(Mode.SingleShotTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@Warmup(iterations = 3)
@Measurement(iterations = 10)
@Fork(value = 1)
@State(Scope.Benchmark)
public class LLMIntegrationBenchmark {
private LlmOperations llmOperations;
private OperationContext context;
private String simplePrompt;
private String complexPrompt;
@Setup(Level.Trial)
public void setup() {
llmOperations = new ChatClientLlmOperations();
context = createBenchmarkContext();
simplePrompt = "Analyze this simple data: test";
complexPrompt = createComplexPrompt();
}
@Benchmark
public String benchmarkSimpleLLMCall() {
return context.ai().withDefaultLlm().generateText(simplePrompt);
}
@Benchmark
public String benchmarkComplexLLMCall() {
return context.ai().withDefaultLlm().generateText(complexPrompt);
}
@Benchmark
public AnalysisResult benchmarkStructuredOutput() {
return context.ai().withDefaultLlm()
.createObject("Create analysis result", AnalysisResult.class);
}
@Benchmark
@Param({"gpt-3.5-turbo", "gpt-4", "claude-3-opus", "llama3.2"})
public String benchmarkModelPerformance(String model) {
return context.ai()
.withLlm(LlmOptions.withModel(model))
.generateText(simplePrompt);
}
private String createComplexPrompt() {
return """
Analyze the following complex financial scenario:
- Multiple data sources with varying reliability
- Historical trends spanning 10 years
- Market volatility factors
- Regulatory compliance requirements
Provide comprehensive analysis with:
1. Risk assessment
2. Trend analysis
3. Recommendations
4. Compliance notes
""";
}
}@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS)
@Fork(value = 2)
@State(Scope.Benchmark)
public class ConditionEvaluationBenchmark {
private FinancialAnalysisAgent agent;
private AnalysisData data;
private Budget budget;
private RiskProfile riskProfile;
@Setup(Level.Trial)
public void setup() {
agent = new FinancialAnalysisAgent();
data = createAnalysisData();
budget = createBudget();
riskProfile = createRiskProfile();
}
@Benchmark
public boolean benchmarkSimpleCondition() {
return agent.hasValidBudget(budget);
}
@Benchmark
public boolean benchmarkComplexCondition() {
return agent.isHighRiskAnalysisRequired(data, riskProfile, budget);
}
@Benchmark
public boolean benchmarkDataDependentCondition() {
return agent.requiresExternalValidation(data);
}
@Benchmark
@Param({"LOW", "MEDIUM", "HIGH", "CRITICAL"})
public boolean benchmarkParameterizedCondition(String riskLevel) {
RiskLevel level = RiskLevel.valueOf(riskLevel);
return agent.requiresSpecialHandling(data, level);
}
}@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@Warmup(iterations = 3, time = 2, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS)
@Fork(value = 1)
@State(Scope.Benchmark)
public class GOAPPlanningBenchmark {
private AgentPlatform agentPlatform;
private WorldState initialState;
private Goal targetGoal;
@Setup(Level.Trial)
public void setup() {
agentPlatform = createBenchmarkPlatform();
initialState = createInitialWorldState();
targetGoal = createTargetGoal();
}
@Benchmark
public Plan benchmarkSimplePlanning() {
return agentPlatform.createPlan(initialState, targetGoal);
}
@Benchmark
@Param({"5", "10", "20", "50"})
public Plan benchmarkPlanningWithVariableActions(int actionCount) {
WorldState state = createStateWithActions(actionCount);
return agentPlatform.createPlan(state, targetGoal);
}
@Benchmark
public Plan benchmarkComplexPlanning() {
WorldState complexState = createComplexWorldState();
Goal complexGoal = createComplexGoal();
return agentPlatform.createPlan(complexState, complexGoal);
}
@Benchmark
public List<Action> benchmarkActionDiscovery() {
return agentPlatform.discoverAvailableActions(initialState);
}
}@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Fork(value = 1)
@State(Scope.Benchmark)
public class SerializationBenchmark {
private ObjectMapper objectMapper;
private AnalysisResult analysisResult;
private ComplexDomainObject complexObject;
private String jsonString;
@Setup(Level.Trial)
public void setup() throws Exception {
objectMapper = new ObjectMapper();
analysisResult = createAnalysisResult();
complexObject = createComplexDomainObject();
jsonString = objectMapper.writeValueAsString(analysisResult);
}
@Benchmark
public String benchmarkSerialization() throws Exception {
return objectMapper.writeValueAsString(analysisResult);
}
@Benchmark
public AnalysisResult benchmarkDeserialization() throws Exception {
return objectMapper.readValue(jsonString, AnalysisResult.class);
}
@Benchmark
public String benchmarkComplexObjectSerialization() throws Exception {
return objectMapper.writeValueAsString(complexObject);
}
@Benchmark
@Param({"100", "1000", "10000"})
public String benchmarkCollectionSerialization(int size) throws Exception {
List<AnalysisResult> results = createResultList(size);
return objectMapper.writeValueAsString(results);
}
}# Run all benchmarks
mvn clean compile exec:java -Dexec.mainClass="org.openjdk.jmh.Main" \
-Dexec.args="-rf json -rff benchmark-results.json"
# Run specific benchmark class
mvn clean compile exec:java -Dexec.mainClass="org.openjdk.jmh.Main" \
-Dexec.args="ActionPerformanceBenchmark -rf json"
# Run benchmarks with specific parameters
mvn clean compile exec:java -Dexec.mainClass="org.openjdk.jmh.Main" \
-Dexec.args="-wi 3 -i 5 -f 1 -t 1"
# Profile with async-profiler
mvn clean compile exec:java -Dexec.mainClass="org.openjdk.jmh.Main" \
-Dexec.args="-prof async:output=flamegraph"public class BenchmarkRunner {
public static void main(String[] args) throws Exception {
Options opt = new OptionsBuilder()
.include(ActionPerformanceBenchmark.class.getSimpleName())
.mode(Mode.AverageTime)
.timeUnit(TimeUnit.MICROSECONDS)
.warmupIterations(3)
.measurementIterations(5)
.forks(1)
.threads(1)
.resultFormat(ResultFormatType.JSON)
.result("benchmark-results.json")
.build();
new Runner(opt).run();
}
}@Component
public class BenchmarkAnalyzer {
public void analyzeBenchmarkResults(String resultsFile) throws Exception {
ObjectMapper mapper = new ObjectMapper();
JsonNode results = mapper.readTree(new File(resultsFile));
Map<String, BenchmarkStats> benchmarkStats = new HashMap<>();
for (JsonNode result : results) {
String benchmark = result.get("benchmark").asText();
double score = result.get("primaryMetric").get("score").asDouble();
String unit = result.get("primaryMetric").get("scoreUnit").asText();
benchmarkStats.put(benchmark, new BenchmarkStats(score, unit));
}
// Generate performance report
generatePerformanceReport(benchmarkStats);
}
private void generatePerformanceReport(Map<String, BenchmarkStats> stats) {
System.out.println("\n=== Embabel Agent Performance Report ===");
stats.entrySet().stream()
.sorted(Map.Entry.comparingByKey())
.forEach(entry -> {
String benchmark = entry.getKey();
BenchmarkStats stat = entry.getValue();
System.out.printf("%-50s: %.2f %s%n",
benchmark, stat.score(), stat.unit());
});
// Identify performance hotspots
identifyPerformanceHotspots(stats);
}
private void identifyPerformanceHotspots(Map<String, BenchmarkStats> stats) {
System.out.println("\n=== Performance Hotspots ===");
stats.entrySet().stream()
.filter(entry -> entry.getValue().score() > getThreshold(entry.getValue().unit()))
.sorted((e1, e2) -> Double.compare(e2.getValue().score(), e1.getValue().score()))
.forEach(entry -> {
System.out.printf("HOTSPOT: %s - %.2f %s%n",
entry.getKey(),
entry.getValue().score(),
entry.getValue().unit());
});
}
private double getThreshold(String unit) {
return switch (unit) {
case "us/op" -> 1000.0; // 1ms threshold
case "ms/op" -> 100.0; // 100ms threshold
case "s/op" -> 1.0; // 1s threshold
default -> Double.MAX_VALUE;
};
}
private record BenchmarkStats(double score, String unit) {}
}# Add to .github/workflows/performance-tests.yml
- name: Run Microbenchmarks
run: |
./mvnw clean compile exec:java \
-Dexec.mainClass="org.openjdk.jmh.Main" \
-Dexec.args="-rf json -rff microbenchmark-results.json -wi 2 -i 3 -f 1"
- name: Analyze Benchmark Results
run: |
java -cp target/classes BenchmarkAnalyzer microbenchmark-results.json
- name: Upload Benchmark Results
uses: actions/upload-artifact@v3
with:
name: microbenchmark-results
path: microbenchmark-results.json
- name: Performance Regression Check
run: |
# Compare with baseline results
java -cp target/classes PerformanceRegressionChecker \
microbenchmark-results.json \
baseline-microbenchmarks.json// ❌ BAD: Dead Code Elimination
@Benchmark
public void badBenchmark() {
String result = expensiveOperation(); // Result not used - may be eliminated
}
// ✅ GOOD: Blackhole consumption
@Benchmark
public void goodBenchmark(Blackhole bh) {
String result = expensiveOperation();
bh.consume(result); // Prevents elimination
}
// ❌ BAD: Loop unrolling issues
@Benchmark
public int badLoopBenchmark() {
int sum = 0;
for (int i = 0; i < 1000; i++) {
sum += simpleCalculation(i); // May be unrolled
}
return sum;
}
// ✅ GOOD: Using @Param for iterations
@Benchmark
@Param({"100", "1000", "10000"})
public int goodLoopBenchmark(int iterations, Blackhole bh) {
for (int i = 0; i < iterations; i++) {
bh.consume(simpleCalculation(i));
}
}
// ❌ BAD: Constant folding
@Benchmark
public double badConstantBenchmark() {
return Math.sqrt(16.0); // Constant - computed at compile time
}
// ✅ GOOD: Using state variables
@State(Scope.Benchmark)
public class GoodBenchmarkState {
@Param({"4.0", "16.0", "64.0"})
public double value;
@Benchmark
public double goodConstantBenchmark() {
return Math.sqrt(value); // Variable - computed at runtime
}
}
## 🧠 JVM Memory & Garbage Collection Testing
### **Memory Profiling Configuration**
```java
@Component
@ConditionalOnProperty(name = "embabel.performance.memory.profiling.enabled", havingValue = "true")
public class MemoryProfilerConfiguration {
private final MeterRegistry meterRegistry;
private final MemoryMXBean memoryBean;
private final List<GarbageCollectorMXBean> gcBeans;
public MemoryProfilerConfiguration(MeterRegistry meterRegistry) {
this.meterRegistry = meterRegistry;
this.memoryBean = ManagementFactory.getMemoryMXBean();
this.gcBeans = ManagementFactory.getGarbageCollectorMXBeans();
setupMemoryMetrics();
setupGCMetrics();
}
private void setupMemoryMetrics() {
// Heap memory metrics
Gauge.builder("jvm.memory.heap.used")
.register(meterRegistry, this, mem -> memoryBean.getHeapMemoryUsage().getUsed());
Gauge.builder("jvm.memory.heap.committed")
.register(meterRegistry, this, mem -> memoryBean.getHeapMemoryUsage().getCommitted());
Gauge.builder("jvm.memory.heap.max")
.register(meterRegistry, this, mem -> memoryBean.getHeapMemoryUsage().getMax());
// Non-heap memory metrics
Gauge.builder("jvm.memory.nonheap.used")
.register(meterRegistry, this, mem -> memoryBean.getNonHeapMemoryUsage().getUsed());
}
private void setupGCMetrics() {
gcBeans.forEach(gcBean -> {
String gcName = gcBean.getName().replace(" ", "_").toLowerCase();
Gauge.builder("jvm.gc.collections")
.tag("gc", gcName)
.register(meterRegistry, gcBean, GarbageCollectorMXBean::getCollectionCount);
Gauge.builder("jvm.gc.time")
.tag("gc", gcName)
.register(meterRegistry, gcBean, GarbageCollectorMXBean::getCollectionTime);
});
}
}
# GraalVM Native Image performance optimization
-H:+UseG1GC \
-H:MaxGCPauseMillis=200 \
-H:G1HeapRegionSize=16m \
-H:+UnlockExperimentalVMOptions \
-H:+UseStringDeduplication# Production JVM settings for Embabel agents
-XX:+UseG1GC
-XX:MaxGCPauseMillis=200
-XX:G1HeapRegionSize=16m
-XX:G1NewSizePercent=20
-XX:G1MaxNewSizePercent=40
-XX:G1MixedGCCountTarget=8
-XX:InitiatingHeapOccupancyPercent=45
-XX:+UseStringDeduplication
-XX:+OptimizeStringConcat
-Xms2g
-Xmx8g# .github/workflows/performance-tests.yml
name: Performance Tests
on:
push:
branches: [ main, performance/** ]
schedule:
# Run performance tests nightly
- cron: '0 2 * * *'
jobs:
performance-tests:
runs-on: ubuntu-latest
services:
performance-db:
image: influxdb:2.0
env:
INFLUXDB_DB: performance
INFLUXDB_USER: embabel
INFLUXDB_USER_PASSWORD: performance
ports:
- 8086:8086
steps:
- uses: actions/checkout@v3
- name: Set up JDK 21
uses: actions/setup-java@v3
with:
java-version: '21'
distribution: 'temurin'
- name: Setup performance testing tools
run: |
# Install async-profiler
wget https://github.com/jvm-profiling-tools/async-profiler/releases/download/v2.9/async-profiler-2.9-linux-x64.tar.gz
tar -xzf async-profiler-2.9-linux-x64.tar.gz
- name: Run performance tests
run: |
./mvnw clean test -Pperformance-test \
-Dembabel.performance.cpu.profiling.enabled=true \
-Dembabel.performance.memory.profiling.enabled=true \
-Djava.library.path=./async-profiler-2.9-linux-x64/lib
- name: Upload performance results
uses: actions/upload-artifact@v3
with:
name: performance-results
path: |
target/performance-reports/
*.jfr
cpu-profile.html@Component
public class PerformanceTestDataFactory {
public AnalysisData createTestData() {
return AnalysisData.builder()
.size(DataSize.SMALL)
.complexity(ComplexityLevel.LOW)
.build();
}
public AnalysisData createLargeTestData() {
return AnalysisData.builder()
.size(DataSize.LARGE)
.complexity(ComplexityLevel.HIGH)
.records(10000)
.build();
}
public LargeDataSet createLargeDataSet() {
// Generate data set that will consume significant memory
List<ComplexRecord> records = IntStream.range(0, 50000)
.mapToObj(i -> new ComplexRecord(generateRandomData(1000)))
.collect(Collectors.toList());
return new LargeDataSet(records);
}
}@Component
public class PerformanceRegressionDetector {
private final PerformanceHistoryRepository historyRepository;
@EventListener
public void onPerformanceTestCompleted(PerformanceTestCompletedEvent event) {
PerformanceMetrics current = event.getMetrics();
PerformanceMetrics baseline = historyRepository.getBaseline(event.getTestName());
if (baseline != null) {
double executionTimeRegression = calculateRegression(
baseline.getAverageExecutionTime(),
current.getAverageExecutionTime()
);
if (executionTimeRegression > 10.0) { // 10% regression threshold
notificationService.sendAlert(
"Performance Regression Detected",
String.format("Execution time regressed by %.2f%% for test %s",
executionTimeRegression, event.getTestName())
);
}
}
// Store current metrics as potential future baseline
historyRepository.save(current);
}
private double calculateRegression(double baseline, double current) {
return ((current - baseline) / baseline) * 100;
}
}- Single Agent Execution: < 5 seconds average
- Parallel Agent Execution: < 15 seconds for 10 concurrent agents
- LLM Integration: < 30 seconds average including network latency
- Memory Usage: < 90% of available heap under normal load
- CPU Usage: < 80% under normal load, < 95% under peak load
- GC Performance: < 5% of total execution time spent in GC
- Concurrent Agents: Support 100+ concurrent agent executions
- Memory Leak Prevention: < 5% memory growth over 24-hour period
- Thread Efficiency: 95%+ success rate under stress test conditions
- Testing Guide - General testing strategies and frameworks
- Advanced Agent Patterns - Parallel execution and optimization patterns
- Configuration Reference - Performance-related configuration options
- Troubleshooting - Performance troubleshooting guides
This performance testing guide is continuously updated based on findings from Issue #867 and community feedback.
(c) Embabel Software Inc 2024-2025.