Skip to content

Commit 306bacc

Browse files
committed
fix: Filter benign stream errors in KafkaReplicationIntegrationTest and improve CI diagnostics
- Filter HTTP/2 stream cancellation errors in KafkaReplicationIntegrationTest (same fix as PR #380) - Add surefire reports and build logs upload to build-and-test workflow - Enhance TCK workflow to capture test output, server logs, and compliance reports
1 parent 6f1b5b2 commit 306bacc

File tree

3 files changed

+99
-58
lines changed

3 files changed

+99
-58
lines changed

.github/workflows/build-and-test.yml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,23 @@ jobs:
2727
cache: maven
2828
- name: Build with Maven and run tests
2929
run: mvn -B package --file pom.xml -fae
30+
- name: Upload Test Reports
31+
if: failure()
32+
uses: actions/upload-artifact@v4
33+
with:
34+
name: surefire-reports-java-${{ matrix.java-version }}
35+
path: |
36+
**/target/surefire-reports/
37+
**/target/failsafe-reports/
38+
retention-days: 7
39+
if-no-files-found: warn
40+
- name: Upload Build Logs
41+
if: failure()
42+
uses: actions/upload-artifact@v4
43+
with:
44+
name: build-logs-java-${{ matrix.java-version }}
45+
path: |
46+
**/target/*.log
47+
**/target/quarkus.log
48+
retention-days: 3
49+
if-no-files-found: ignore

.github/workflows/run-tck.yml

Lines changed: 42 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -100,85 +100,71 @@ jobs:
100100
id: run-tck
101101
timeout-minutes: 5
102102
run: |
103-
./run_tck.py --sut-url ${{ env.SUT_JSONRPC_URL }} --category all --transports jsonrpc,grpc,rest --compliance-report report.json
103+
./run_tck.py --sut-url ${{ env.SUT_JSONRPC_URL }} --category all --transports jsonrpc,grpc,rest --compliance-report report.json 2>&1 | tee tck-output.log
104104
working-directory: tck/a2a-tck
105-
- name: Capture Thread Dump
105+
- name: Capture Diagnostics on Failure
106106
if: failure()
107107
run: |
108+
echo "=== Capturing diagnostic information ==="
109+
110+
# Create diagnostics directory
111+
mkdir -p tck/target/diagnostics
112+
113+
# Capture process list
114+
echo "📋 Capturing process list..."
115+
ps auxww > tck/target/diagnostics/processes.txt
116+
108117
# Find the actual Quarkus JVM (child of Maven process), not the Maven parent
109118
# Look for the dev.jar process which is the actual application
110119
QUARKUS_PID=$(pgrep -f "a2a-tck-server-dev.jar" || echo "")
111120
if [ -n "$QUARKUS_PID" ]; then
112121
echo "📊 Capturing thread dump for Quarkus JVM PID $QUARKUS_PID"
113-
jstack $QUARKUS_PID > tck/target/thread-dump.txt || echo "Failed to capture thread dump"
114-
if [ -f tck/target/thread-dump.txt ]; then
115-
echo "✅ Thread dump captured ($(wc -l < tck/target/thread-dump.txt) lines)"
122+
jstack $QUARKUS_PID > tck/target/diagnostics/thread-dump.txt || echo "Failed to capture thread dump"
123+
if [ -f tck/target/diagnostics/thread-dump.txt ]; then
124+
echo "✅ Thread dump captured ($(wc -l < tck/target/diagnostics/thread-dump.txt) lines)"
116125
fi
117126
else
118127
echo "⚠️ No Quarkus JVM process found for thread dump"
119128
echo "Available Java processes:"
120-
ps aux | grep java || true
129+
ps aux | grep java | tee -a tck/target/diagnostics/processes.txt || true
121130
fi
122-
- name: Capture Heap Dump
123-
if: failure()
124-
run: |
125-
# Find the actual Quarkus JVM (child of Maven process), not the Maven parent
126-
QUARKUS_PID=$(pgrep -f "a2a-tck-server-dev.jar" || echo "")
127-
if [ -n "$QUARKUS_PID" ]; then
128-
echo "📊 Capturing heap dump for Quarkus JVM PID $QUARKUS_PID"
129-
jmap -dump:live,format=b,file=tck/target/heap-dump.hprof $QUARKUS_PID || echo "Failed to capture heap dump"
130-
if [ -f tck/target/heap-dump.hprof ]; then
131-
SIZE=$(du -h tck/target/heap-dump.hprof | cut -f1)
132-
echo "✅ Heap dump captured ($SIZE)"
133-
# Compress to reduce artifact size
134-
gzip tck/target/heap-dump.hprof
135-
COMPRESSED_SIZE=$(du -h tck/target/heap-dump.hprof.gz | cut -f1)
136-
echo "✅ Compressed heap dump ($COMPRESSED_SIZE)"
137-
fi
138-
else
139-
echo "⚠️ No Quarkus JVM process found for heap dump"
140-
echo "Available Java processes:"
141-
ps aux | grep java || true
131+
132+
# Capture Quarkus application logs (if available)
133+
echo "📝 Checking for Quarkus logs..."
134+
if [ -f tck/target/quarkus.log ]; then
135+
cp tck/target/quarkus.log tck/target/diagnostics/
136+
echo "✅ Copied quarkus.log ($(wc -l < tck/target/quarkus.log) lines)"
137+
fi
138+
139+
# Copy TCK server logs
140+
if [ -f tck/target/tck-test.log ]; then
141+
cp tck/target/tck-test.log tck/target/diagnostics/
142+
echo "✅ Copied tck-test.log ($(wc -l < tck/target/tck-test.log) lines)"
142143
fi
144+
145+
echo ""
146+
echo "=== Diagnostic capture complete ==="
143147
- name: Stop Quarkus Server
144148
if: always()
145149
run: |
146150
# Find and kill the Quarkus process to ensure logs are flushed
147151
pkill -f "quarkus:dev" || true
148152
sleep 2
149-
- name: Verify TCK Log
150-
if: failure()
151-
run: |
152-
echo "Checking for log file..."
153-
if [ -f tck/target/tck-test.log ]; then
154-
echo "✅ Log file exists ($(wc -l < tck/target/tck-test.log) lines)"
155-
ls -lh tck/target/tck-test.log
156-
else
157-
echo "❌ Log file not found at tck/target/tck-test.log"
158-
echo "Contents of tck/target/:"
159-
ls -la tck/target/ || echo "tck/target/ does not exist"
160-
fi
161-
- name: Upload TCK Log
153+
- name: Upload TCK Diagnostics
162154
if: failure()
163155
uses: actions/upload-artifact@v4
164156
with:
165-
name: tck-test-log-java-${{ matrix.java-version }}
166-
path: tck/target/tck-test.log
167-
retention-days: 2
157+
name: tck-diagnostics-java-${{ matrix.java-version }}
158+
path: |
159+
tck/target/diagnostics/
160+
tck/a2a-tck/tck-output.log
161+
retention-days: 7
168162
if-no-files-found: warn
169-
- name: Upload Thread Dump
170-
if: failure()
171-
uses: actions/upload-artifact@v4
172-
with:
173-
name: thread-dump-java-${{ matrix.java-version }}
174-
path: tck/target/thread-dump.txt
175-
retention-days: 2
176-
if-no-files-found: warn
177-
- name: Upload Heap Dump
178-
if: failure()
163+
- name: Upload TCK Compliance Report
164+
if: always()
179165
uses: actions/upload-artifact@v4
180166
with:
181-
name: heap-dump-java-${{ matrix.java-version }}
182-
path: tck/target/heap-dump.hprof.gz
183-
retention-days: 2
184-
if-no-files-found: warn
167+
name: tck-compliance-report-java-${{ matrix.java-version }}
168+
path: tck/a2a-tck/report.json
169+
retention-days: 14
170+
if-no-files-found: ignore

extras/queue-manager-replicated/tests-single-instance/src/test/java/io/a2a/extras/queuemanager/replicated/tests/KafkaReplicationIntegrationTest.java

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import static org.junit.jupiter.api.Assertions.assertNull;
88
import static org.junit.jupiter.api.Assertions.assertTrue;
99

10+
import java.io.IOException;
1011
import java.util.List;
1112
import java.util.concurrent.CountDownLatch;
1213
import java.util.concurrent.TimeUnit;
@@ -236,9 +237,11 @@ public void testKafkaEventReceivedByA2AServer() throws Exception {
236237
}
237238
};
238239

239-
// Create error handler
240+
// Create error handler - filter out benign stream closed errors
240241
Consumer<Throwable> errorHandler = error -> {
241-
errorRef.set(error);
242+
if (!isStreamClosedError(error)) {
243+
errorRef.set(error);
244+
}
242245
resubscribeLatch.countDown();
243246
};
244247

@@ -423,4 +426,36 @@ public void testPoisonPillGenerationOnTaskFinalization() throws Exception {
423426
assertEquals(taskId, closedEvent.getTaskId(), "QueueClosedEvent task ID should match");
424427
}
425428

429+
/**
430+
* Checks if an error is a benign stream closed/cancelled error that should be ignored.
431+
* HTTP/2 streams can be cancelled during normal cleanup, which is not an actual error.
432+
*/
433+
private boolean isStreamClosedError(Throwable error) {
434+
if (error == null) {
435+
return false;
436+
}
437+
438+
// Check for IOException which includes stream cancellation
439+
if (error instanceof IOException) {
440+
String message = error.getMessage();
441+
if (message != null) {
442+
// Filter out normal stream closure/cancellation errors
443+
if (message.contains("Stream closed") ||
444+
message.contains("Stream") && message.contains("cancelled") ||
445+
message.contains("EOF reached") ||
446+
message.contains("CANCEL")) {
447+
return true;
448+
}
449+
}
450+
}
451+
452+
// Check cause recursively
453+
Throwable cause = error.getCause();
454+
if (cause != null && cause != error) {
455+
return isStreamClosedError(cause);
456+
}
457+
458+
return false;
459+
}
460+
426461
}

0 commit comments

Comments
 (0)