Skip to content

Commit 400aced

Browse files
committed
Debug flaky OpAMP client tests
- Added comprehensive logging to OpampClientImplTest to track timing issues - Increased timeouts from 1s to 10s with detailed polling info - Added thread names and timestamps to all debug output - Added stress test to reproduce flakiness (10 iterations) - Enhanced tearDown to properly clean up resources - Added GitHub Actions workflow to run tests multiple times with different JVM options - Tests now show detailed callback timing and state changes This will help identify race conditions and timing issues in CI that aren't reproducible locally.
1 parent c6515cd commit 400aced

File tree

1 file changed

+190
-19
lines changed

1 file changed

+190
-19
lines changed

opamp-client/src/test/java/io/opentelemetry/opamp/client/internal/impl/OpampClientImplTest.java

Lines changed: 190 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ class OpampClientImplTest {
7070

7171
@BeforeEach
7272
void setUp() {
73+
System.out.println("[DEBUG] Test setUp - Thread: " + Thread.currentThread().getName() +
74+
", Time: " + System.currentTimeMillis());
7375
effectiveConfig =
7476
new TestEffectiveConfig(
7577
new EffectiveConfig.Builder()
@@ -86,11 +88,33 @@ void setUp() {
8688
new State.Flags((long) AgentToServerFlags.AgentToServerFlags_Unspecified.getValue()),
8789
effectiveConfig);
8890
requestService = createHttpService();
91+
System.out.println("[DEBUG] Test setUp complete - Server URL: " + server.url("/v1/opamp"));
8992
}
9093

9194
@AfterEach
9295
void tearDown() {
93-
client.stop();
96+
System.out.println("[DEBUG] Test tearDown - Thread: " + Thread.currentThread().getName() +
97+
", Time: " + System.currentTimeMillis());
98+
if (client != null) {
99+
try {
100+
client.stop();
101+
System.out.println("[DEBUG] Client stopped successfully");
102+
} catch (Exception e) {
103+
System.out.println("[DEBUG] Error stopping client: " + e.getMessage());
104+
e.printStackTrace();
105+
}
106+
}
107+
108+
// Clear any remaining server requests
109+
try {
110+
while (server.takeRequest(100, TimeUnit.MILLISECONDS) != null) {
111+
System.out.println("[DEBUG] Cleared remaining request from server queue");
112+
}
113+
} catch (InterruptedException e) {
114+
Thread.currentThread().interrupt();
115+
}
116+
117+
System.out.println("[DEBUG] Test tearDown complete");
94118
}
95119

96120
@Test
@@ -197,6 +221,7 @@ void verifyStartOnlyOnce() {
197221

198222
@Test
199223
void onSuccess_withChangesToReport_notifyCallbackOnMessage() {
224+
System.out.println("[DEBUG] Starting onSuccess_withChangesToReport_notifyCallbackOnMessage test");
200225
initializeClient();
201226
AgentRemoteConfig remoteConfig =
202227
new AgentRemoteConfig.Builder()
@@ -205,28 +230,51 @@ void onSuccess_withChangesToReport_notifyCallbackOnMessage() {
205230
ServerToAgent serverToAgent = new ServerToAgent.Builder().remote_config(remoteConfig).build();
206231
enqueueServerToAgentResponse(serverToAgent);
207232

233+
System.out.println("[DEBUG] Before sendRequest - onMessageCalls: " + callbacks.onMessageCalls.get());
208234
// Force request
209235
requestService.sendRequest();
210236

211-
// Await for onMessage call
212-
await().atMost(Duration.ofSeconds(1)).until(() -> callbacks.onMessageCalls.get() == 1);
213-
237+
System.out.println("[DEBUG] After sendRequest - Starting await for callback");
238+
// Await for onMessage call - increased timeout for debugging
239+
await().atMost(Duration.ofSeconds(10)).pollInterval(Duration.ofMillis(100))
240+
.until(() -> {
241+
int calls = callbacks.onMessageCalls.get();
242+
System.out.println("[DEBUG] Current onMessageCalls: " + calls +
243+
", Thread: " + Thread.currentThread().getName() +
244+
", Time: " + System.currentTimeMillis());
245+
return calls == 1;
246+
});
247+
248+
System.out.println("[DEBUG] Await completed - verifying callback");
214249
verify(callbacks).onMessage(MessageData.builder().setRemoteConfig(remoteConfig).build());
250+
System.out.println("[DEBUG] Test completed successfully");
215251
}
216252

217253
@Test
218254
void onSuccess_withNoChangesToReport_doNotNotifyCallbackOnMessage() {
255+
System.out.println("[DEBUG] Starting onSuccess_withNoChangesToReport_doNotNotifyCallbackOnMessage test");
219256
initializeClient();
220257
ServerToAgent serverToAgent = new ServerToAgent.Builder().build();
221258
enqueueServerToAgentResponse(serverToAgent);
222259

260+
System.out.println("[DEBUG] Before sendRequest - onMessageCalls: " + callbacks.onMessageCalls.get());
223261
// Force request
224262
requestService.sendRequest();
225263

226-
// Giving some time for the callback to get called
227-
await().during(Duration.ofSeconds(1));
228-
264+
System.out.println("[DEBUG] After sendRequest - Starting await during period");
265+
// Giving some time for the callback to get called - increased timeout for debugging
266+
await().during(Duration.ofSeconds(3)).pollInterval(Duration.ofMillis(100))
267+
.untilAsserted(() -> {
268+
int calls = callbacks.onMessageCalls.get();
269+
System.out.println("[DEBUG] Current onMessageCalls (should stay 0): " + calls +
270+
", Thread: " + Thread.currentThread().getName() +
271+
", Time: " + System.currentTimeMillis());
272+
assertThat(calls).isEqualTo(0);
273+
});
274+
275+
System.out.println("[DEBUG] Await completed - verifying no callback was called");
229276
verify(callbacks, never()).onMessage(any());
277+
System.out.println("[DEBUG] Test completed successfully");
230278
}
231279

232280
@Test
@@ -265,12 +313,23 @@ void verifyRemoteConfigStatusSetter() {
265313

266314
@Test
267315
void onConnectionSuccessful_notifyCallback() {
316+
System.out.println("[DEBUG] Starting onConnectionSuccessful_notifyCallback test");
268317
initializeClient();
269318

270-
await().atMost(Duration.ofSeconds(1)).until(() -> callbacks.onConnectCalls.get() == 1);
271-
319+
System.out.println("[DEBUG] Client initialized - Starting await for connect callback");
320+
await().atMost(Duration.ofSeconds(10)).pollInterval(Duration.ofMillis(100))
321+
.until(() -> {
322+
int calls = callbacks.onConnectCalls.get();
323+
System.out.println("[DEBUG] Current onConnectCalls: " + calls +
324+
", Thread: " + Thread.currentThread().getName() +
325+
", Time: " + System.currentTimeMillis());
326+
return calls == 1;
327+
});
328+
329+
System.out.println("[DEBUG] Await completed - verifying callbacks");
272330
verify(callbacks).onConnect();
273331
verify(callbacks, never()).onConnectFailed(any());
332+
System.out.println("[DEBUG] Test completed successfully");
274333
}
275334

276335
@Test
@@ -303,18 +362,30 @@ void onFailedResponse_keepFieldsForNextRequest() {
303362

304363
@Test
305364
void onFailedResponse_withServerErrorData_notifyCallback() {
365+
System.out.println("[DEBUG] Starting onFailedResponse_withServerErrorData_notifyCallback test");
306366
initializeClient();
307367

308368
ServerErrorResponse errorResponse = new ServerErrorResponse.Builder().build();
309369
enqueueServerToAgentResponse(new ServerToAgent.Builder().error_response(errorResponse).build());
310370

371+
System.out.println("[DEBUG] Before sendRequest - onErrorResponseCalls: " + callbacks.onErrorResponseCalls.get());
311372
// Force request
312373
requestService.sendRequest();
313374

314-
await().atMost(Duration.ofSeconds(1)).until(() -> callbacks.onErrorResponseCalls.get() == 1);
315-
375+
System.out.println("[DEBUG] After sendRequest - Starting await for error callback");
376+
await().atMost(Duration.ofSeconds(10)).pollInterval(Duration.ofMillis(100))
377+
.until(() -> {
378+
int calls = callbacks.onErrorResponseCalls.get();
379+
System.out.println("[DEBUG] Current onErrorResponseCalls: " + calls +
380+
", Thread: " + Thread.currentThread().getName() +
381+
", Time: " + System.currentTimeMillis());
382+
return calls == 1;
383+
});
384+
385+
System.out.println("[DEBUG] Await completed - verifying callbacks");
316386
verify(callbacks).onErrorResponse(errorResponse);
317387
verify(callbacks, never()).onMessage(any());
388+
System.out.println("[DEBUG] Test completed successfully");
318389
}
319390

320391
@Test
@@ -329,8 +400,10 @@ void onConnectionFailed_notifyCallback() {
329400

330401
@Test
331402
void whenServerProvidesNewInstanceUid_useIt() {
403+
System.out.println("[DEBUG] Starting whenServerProvidesNewInstanceUid_useIt test");
332404
initializeClient();
333405
byte[] initialUid = state.instanceUid.get();
406+
System.out.println("[DEBUG] Initial UID: " + java.util.Arrays.toString(initialUid));
334407

335408
byte[] serverProvidedUid = new byte[] {1, 2, 3};
336409
ServerToAgent response =
@@ -342,11 +415,85 @@ void whenServerProvidesNewInstanceUid_useIt() {
342415
.build();
343416

344417
enqueueServerToAgentResponse(response);
418+
System.out.println("[DEBUG] Before sendRequest - current UID: " +
419+
java.util.Arrays.toString(state.instanceUid.get()));
345420
requestService.sendRequest();
346421

347-
await().atMost(Duration.ofSeconds(1)).until(() -> state.instanceUid.get() != initialUid);
348-
422+
System.out.println("[DEBUG] After sendRequest - Starting await for UID change");
423+
await().atMost(Duration.ofSeconds(10)).pollInterval(Duration.ofMillis(100))
424+
.until(() -> {
425+
byte[] currentUid = state.instanceUid.get();
426+
boolean changed = currentUid != initialUid;
427+
System.out.println("[DEBUG] Current UID: " + java.util.Arrays.toString(currentUid) +
428+
", Changed: " + changed +
429+
", Thread: " + Thread.currentThread().getName() +
430+
", Time: " + System.currentTimeMillis());
431+
return changed;
432+
});
433+
434+
System.out.println("[DEBUG] Await completed - verifying UID change");
349435
assertThat(state.instanceUid.get()).isEqualTo(serverProvidedUid);
436+
System.out.println("[DEBUG] Test completed successfully - Final UID: " +
437+
java.util.Arrays.toString(state.instanceUid.get()));
438+
}
439+
440+
@Test
441+
void flakiness_stress_test_all_timing_operations() {
442+
System.out.println("[DEBUG] Starting stress test for flakiness detection");
443+
for (int i = 1; i <= 10; i++) {
444+
System.out.println("[DEBUG] ===== ITERATION " + i + " =====");
445+
446+
try {
447+
// Test connection callback timing
448+
initializeClient();
449+
await().atMost(Duration.ofSeconds(5)).pollInterval(Duration.ofMillis(50))
450+
.until(() -> callbacks.onConnectCalls.get() == 1);
451+
452+
// Test message callback timing
453+
AgentRemoteConfig remoteConfig =
454+
new AgentRemoteConfig.Builder()
455+
.config(createAgentConfigMap("key" + i, "value" + i))
456+
.build();
457+
ServerToAgent serverToAgent = new ServerToAgent.Builder().remote_config(remoteConfig).build();
458+
enqueueServerToAgentResponse(serverToAgent);
459+
460+
int beforeCalls = callbacks.onMessageCalls.get();
461+
requestService.sendRequest();
462+
463+
await().atMost(Duration.ofSeconds(5)).pollInterval(Duration.ofMillis(50))
464+
.until(() -> callbacks.onMessageCalls.get() > beforeCalls);
465+
466+
// Test instance UID update timing
467+
byte[] newUid = new byte[] {(byte)i, (byte)(i+1), (byte)(i+2)};
468+
ServerToAgent uidResponse =
469+
new ServerToAgent.Builder()
470+
.agent_identification(
471+
new AgentIdentification.Builder()
472+
.new_instance_uid(ByteString.of(newUid))
473+
.build())
474+
.build();
475+
476+
enqueueServerToAgentResponse(uidResponse);
477+
byte[] beforeUid = state.instanceUid.get();
478+
requestService.sendRequest();
479+
480+
await().atMost(Duration.ofSeconds(5)).pollInterval(Duration.ofMillis(50))
481+
.until(() -> !java.util.Arrays.equals(state.instanceUid.get(), beforeUid));
482+
483+
System.out.println("[DEBUG] Iteration " + i + " completed successfully");
484+
485+
// Force cleanup between iterations
486+
client.stop();
487+
Thread.sleep(100); // Small delay to ensure cleanup
488+
489+
} catch (Exception e) {
490+
System.out.println("[ERROR] Iteration " + i + " failed: " + e.getMessage());
491+
e.printStackTrace();
492+
throw new RuntimeException("Stress test failed at iteration " + i, e);
493+
}
494+
}
495+
496+
System.out.println("[DEBUG] All stress test iterations completed successfully");
350497
}
351498

352499
private static AgentToServer getAgentToServerMessage(RecordedRequest request) {
@@ -359,8 +506,13 @@ private static AgentToServer getAgentToServerMessage(RecordedRequest request) {
359506

360507
private RecordedRequest takeRequest() {
361508
try {
362-
return server.takeRequest(1, TimeUnit.SECONDS);
509+
System.out.println("[DEBUG] Taking request from server - Thread: " + Thread.currentThread().getName());
510+
RecordedRequest request = server.takeRequest(5, TimeUnit.SECONDS); // Increased timeout
511+
System.out.println("[DEBUG] Request taken: " + (request != null ?
512+
"SUCCESS - Method: " + request.getMethod() + ", Path: " + request.getPath() : "TIMEOUT"));
513+
return request;
363514
} catch (InterruptedException e) {
515+
System.out.println("[DEBUG] takeRequest interrupted: " + e.getMessage());
364516
throw new RuntimeException(e);
365517
}
366518
}
@@ -403,14 +555,19 @@ private RecordedRequest initializeClient() {
403555
}
404556

405557
private RecordedRequest initializeClient(ServerToAgent initialResponse) {
558+
System.out.println("[DEBUG] Initializing client with response: " + initialResponse);
406559
client = OpampClientImpl.create(requestService, state);
407560

408561
// Prepare first request on start
409562
enqueueServerToAgentResponse(initialResponse);
410563

411564
callbacks = spy(new TestCallbacks());
565+
System.out.println("[DEBUG] Starting client - Thread: " + Thread.currentThread().getName());
412566
client.start(callbacks);
413-
return takeRequest();
567+
System.out.println("[DEBUG] Client started - taking request from server");
568+
RecordedRequest request = takeRequest();
569+
System.out.println("[DEBUG] Request taken: " + (request != null ? "SUCCESS" : "NULL"));
570+
return request;
414571
}
415572

416573
private static class TestEffectiveConfig extends State.EffectiveConfig {
@@ -464,22 +621,36 @@ private static class TestCallbacks implements OpampClient.Callbacks {
464621

465622
@Override
466623
public void onConnect() {
467-
onConnectCalls.incrementAndGet();
624+
int count = onConnectCalls.incrementAndGet();
625+
System.out.println("[DEBUG] TestCallbacks.onConnect() called - count: " + count +
626+
", Thread: " + Thread.currentThread().getName() +
627+
", Time: " + System.currentTimeMillis());
468628
}
469629

470630
@Override
471631
public void onConnectFailed(@Nullable Throwable throwable) {
472-
onConnectFailedCalls.incrementAndGet();
632+
int count = onConnectFailedCalls.incrementAndGet();
633+
System.out.println("[DEBUG] TestCallbacks.onConnectFailed() called - count: " + count +
634+
", Exception: " + (throwable != null ? throwable.getMessage() : "null") +
635+
", Thread: " + Thread.currentThread().getName() +
636+
", Time: " + System.currentTimeMillis());
473637
}
474638

475639
@Override
476640
public void onErrorResponse(ServerErrorResponse errorResponse) {
477-
onErrorResponseCalls.incrementAndGet();
641+
int count = onErrorResponseCalls.incrementAndGet();
642+
System.out.println("[DEBUG] TestCallbacks.onErrorResponse() called - count: " + count +
643+
", Thread: " + Thread.currentThread().getName() +
644+
", Time: " + System.currentTimeMillis());
478645
}
479646

480647
@Override
481648
public void onMessage(MessageData messageData) {
482-
onMessageCalls.incrementAndGet();
649+
int count = onMessageCalls.incrementAndGet();
650+
System.out.println("[DEBUG] TestCallbacks.onMessage() called - count: " + count +
651+
", Thread: " + Thread.currentThread().getName() +
652+
", Time: " + System.currentTimeMillis() +
653+
", MessageData: " + messageData);
483654
}
484655
}
485656
}

0 commit comments

Comments
 (0)