Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions .github/workflows/debug-flaky-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
name: Debug flaky tests

on:
workflow_dispatch:

permissions:
contents: read

concurrency:
group: ${{ github.workflow }}-${{ github.sha }}
cancel-in-progress: true

jobs:
test:
name: Test
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
- windows-latest
test-java-version:
- 8
- 11
- 17
- 21
- 23
run_id: [1, 2]
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

- id: setup-java-test
name: Set up Java ${{ matrix.test-java-version }} for tests
uses: actions/setup-java@c5195efecf7bdfc987ee8bae7a71cb8b11521c00 # v4.7.1
with:
distribution: temurin
java-version: ${{ matrix.test-java-version }}

- id: setup-java
name: Set up Java for build
uses: actions/setup-java@c5195efecf7bdfc987ee8bae7a71cb8b11521c00 # v4.7.1
with:
distribution: temurin
java-version: 17

- name: Set up gradle
uses: gradle/actions/setup-gradle@ac638b010cf58a27ee6c972d7336334ccaf61c96 # v4.4.1
with:
cache-read-only: ${{ github.event_name == 'pull_request' }}
- name: Gradle test
run: >
./gradlew test --no-build-cache
"-PtestJavaVersion=${{ matrix.test-java-version }}"
"-Porg.gradle.java.installations.paths=${{ steps.setup-java-test.outputs.path }}"
"-Porg.gradle.java.installations.auto-download=false"
59 changes: 59 additions & 0 deletions .github/workflows/test-fix-verification.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
name: Verify OpAMP Client Fix

on:
workflow_dispatch:
push:
branches: [ flaky ]

jobs:
verify-fix:
runs-on: ubuntu-latest
strategy:
matrix:
java: [17, 21]
iteration: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

steps:
- uses: actions/checkout@v4

- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
java-version: ${{ matrix.java }}
distribution: 'temurin'

- name: Setup Gradle
uses: gradle/gradle-build-action@v3
with:
cache-read-only: true

- name: Test the fixed OpAMP client test (Iteration ${{ matrix.iteration }})
run: |
echo "=== Testing OpAMP client - Java ${{ matrix.java }}, Iteration ${{ matrix.iteration }} ==="
./gradlew opamp-client:test --tests "*whenServerProvidesNewInstanceUid_useIt*" --rerun-tasks --info
echo "SUCCESS: Iteration ${{ matrix.iteration }} passed"

- name: Upload test results on failure
if: failure()
uses: actions/upload-artifact@v4
with:
name: test-results-java${{ matrix.java }}-iter${{ matrix.iteration }}
path: |
opamp-client/build/reports/
opamp-client/build/test-results/
retention-days: 3

summary:
needs: verify-fix
runs-on: ubuntu-latest
if: always()
steps:
- name: Check results
run: |
if [ "${{ needs.verify-fix.result }}" == "success" ]; then
echo "✅ ALL TESTS PASSED! The OpAMP client fix is verified."
echo "🎉 Ran 20 iterations across Java 17 and 21 - no flakiness detected!"
else
echo "❌ Some tests failed. The fix may need more work."
exit 1
fi
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# OpenTelemetry Java Contrib


[![Release](https://img.shields.io/github/v/release/open-telemetry/opentelemetry-java-contrib?include_prereleases&style=)](https://github.com/open-telemetry/opentelemetry-java-contrib/releases/)
[![FOSSA License Status](https://app.fossa.com/api/projects/custom%2B162%2Fgithub.com%2Fopen-telemetry%2Fopentelemetry-java-contrib.svg?type=shield&issueType=license)](https://app.fossa.com/projects/custom%2B162%2Fgithub.com%2Fopen-telemetry%2Fopentelemetry-java-contrib?ref=badge_shield&issueType=license)
[![FOSSA Security Status](https://app.fossa.com/api/projects/custom%2B162%2Fgithub.com%2Fopen-telemetry%2Fopentelemetry-java-contrib.svg?type=shield&issueType=security)](https://app.fossa.com/projects/custom%2B162%2Fgithub.com%2Fopen-telemetry%2Fopentelemetry-java-contrib?ref=badge_shield&issueType=security)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
import org.mockito.junit.jupiter.MockitoExtension;

@ExtendWith(MockitoExtension.class)
@SuppressWarnings({"CatchAndPrintStackTrace", "SystemOut", "CatchingUnchecked", "InterruptedExceptionSwallowed"})
class OpampClientImplTest {
private RequestService requestService;
private OpampClientState state;
Expand Down Expand Up @@ -90,7 +91,22 @@ void setUp() {

@AfterEach
void tearDown() {
client.stop();
if (client != null) {
try {
client.stop();
} catch (Exception e) {
e.printStackTrace();
}
}

// Clear any remaining server requests
try {
while (server.takeRequest(100, TimeUnit.MILLISECONDS) != null) {
// Clear queue
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}

@Test
Expand Down Expand Up @@ -209,7 +225,8 @@ void onSuccess_withChangesToReport_notifyCallbackOnMessage() {
requestService.sendRequest();

// Await for onMessage call
await().atMost(Duration.ofSeconds(1)).until(() -> callbacks.onMessageCalls.get() == 1);
await().atMost(Duration.ofSeconds(10)).pollInterval(Duration.ofMillis(100))
.until(() -> callbacks.onMessageCalls.get() == 1);

verify(callbacks).onMessage(MessageData.builder().setRemoteConfig(remoteConfig).build());
}
Expand All @@ -224,7 +241,8 @@ void onSuccess_withNoChangesToReport_doNotNotifyCallbackOnMessage() {
requestService.sendRequest();

// Giving some time for the callback to get called
await().during(Duration.ofSeconds(1));
await().during(Duration.ofSeconds(3)).pollInterval(Duration.ofMillis(100))
.untilAsserted(() -> assertThat(callbacks.onMessageCalls.get()).isEqualTo(0));

verify(callbacks, never()).onMessage(any());
}
Expand Down Expand Up @@ -267,7 +285,8 @@ void verifyRemoteConfigStatusSetter() {
void onConnectionSuccessful_notifyCallback() {
initializeClient();

await().atMost(Duration.ofSeconds(1)).until(() -> callbacks.onConnectCalls.get() == 1);
await().atMost(Duration.ofSeconds(10)).pollInterval(Duration.ofMillis(100))
.until(() -> callbacks.onConnectCalls.get() == 1);

verify(callbacks).onConnect();
verify(callbacks, never()).onConnectFailed(any());
Expand Down Expand Up @@ -311,7 +330,8 @@ void onFailedResponse_withServerErrorData_notifyCallback() {
// Force request
requestService.sendRequest();

await().atMost(Duration.ofSeconds(1)).until(() -> callbacks.onErrorResponseCalls.get() == 1);
await().atMost(Duration.ofSeconds(10)).pollInterval(Duration.ofMillis(100))
.until(() -> callbacks.onErrorResponseCalls.get() == 1);

verify(callbacks).onErrorResponse(errorResponse);
verify(callbacks, never()).onMessage(any());
Expand All @@ -332,7 +352,7 @@ void whenServerProvidesNewInstanceUid_useIt() {
initializeClient();
byte[] initialUid = state.instanceUid.get();

byte[] serverProvidedUid = new byte[] {1, 2, 3};
byte[] serverProvidedUid = new byte[] {4, 5, 6};
ServerToAgent response =
new ServerToAgent.Builder()
.agent_identification(
Expand All @@ -344,11 +364,67 @@ void whenServerProvidesNewInstanceUid_useIt() {
enqueueServerToAgentResponse(response);
requestService.sendRequest();

await().atMost(Duration.ofSeconds(1)).until(() -> state.instanceUid.get() != initialUid);
await().atMost(Duration.ofSeconds(10)).pollInterval(Duration.ofMillis(100))
.until(() -> {
byte[] currentUid = state.instanceUid.get();
return !java.util.Arrays.equals(currentUid, initialUid);
});

assertThat(state.instanceUid.get()).isEqualTo(serverProvidedUid);
}

@Test
void flakiness_stress_test_all_timing_operations() {
for (int i = 1; i <= 10; i++) {
try {
// Test connection callback timing
initializeClient();
await().atMost(Duration.ofSeconds(5)).pollInterval(Duration.ofMillis(50))
.until(() -> callbacks.onConnectCalls.get() == 1);

// Test message callback timing
AgentRemoteConfig remoteConfig =
new AgentRemoteConfig.Builder()
.config(createAgentConfigMap("key" + i, "value" + i))
.build();
ServerToAgent serverToAgent = new ServerToAgent.Builder().remote_config(remoteConfig).build();
enqueueServerToAgentResponse(serverToAgent);

int beforeCalls = callbacks.onMessageCalls.get();
requestService.sendRequest();

await().atMost(Duration.ofSeconds(5)).pollInterval(Duration.ofMillis(50))
.until(() -> callbacks.onMessageCalls.get() > beforeCalls);

// Test instance UID update timing
byte[] newUid = new byte[] {(byte)i, (byte)(i+1), (byte)(i+2)};
ServerToAgent uidResponse =
new ServerToAgent.Builder()
.agent_identification(
new AgentIdentification.Builder()
.new_instance_uid(ByteString.of(newUid))
.build())
.build();

enqueueServerToAgentResponse(uidResponse);
byte[] beforeUid = state.instanceUid.get();
requestService.sendRequest();

await().atMost(Duration.ofSeconds(5)).pollInterval(Duration.ofMillis(50))
.until(() -> !java.util.Arrays.equals(state.instanceUid.get(), beforeUid));

// Force cleanup between iterations - ensure client is stopped before next iteration
if (client != null) {
client.stop();
}
Thread.sleep(50); // Small delay to ensure cleanup

} catch (Exception e) {
throw new RuntimeException("Stress test failed at iteration " + i, e);
}
}
}

private static AgentToServer getAgentToServerMessage(RecordedRequest request) {
try {
return AgentToServer.ADAPTER.decode(Objects.requireNonNull(request.getBody()));
Expand All @@ -359,7 +435,7 @@ private static AgentToServer getAgentToServerMessage(RecordedRequest request) {

private RecordedRequest takeRequest() {
try {
return server.takeRequest(1, TimeUnit.SECONDS);
return server.takeRequest(5, TimeUnit.SECONDS);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
Expand Down
Loading