Skip to content

Commit 109598f

Browse files
authored
ci: address flaky tests (#1876)
Caches the Spring Boot version from Maven Central. Improves reliability of barrage test by increasing timeouts and moving it out of the way of other parallel tests. Addresses Spring Boot context errors when running in parallel by splitting the uberclass into smaller, self-contained parts.
1 parent 79fae37 commit 109598f

File tree

11 files changed

+1323
-1167
lines changed

11 files changed

+1323
-1167
lines changed

.github/workflows/pull_request.yml

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,30 @@ jobs:
9191
java-version: 25 # Latest LTS
9292
distribution: 'temurin'
9393
cache: 'maven'
94-
- name: Set Spring Boot version
94+
95+
# Reading the latest Spring Boot version from Maven Central often fails.
96+
# Since this information rarely changes, we can cache it, preventing CI failures.
97+
- name: Cache Spring Boot version
98+
id: cache-spring-boot-version
99+
uses: actions/cache@v4
100+
with:
101+
path: spring-boot-version
102+
key: spring-boot-version-${{ matrix.spring-version }}
103+
- name: Get Spring Boot version if not cached
104+
if: steps.cache-spring-boot-version.outputs.cache-hit != 'true'
105+
run: |
106+
echo "$(curl -s 'https://search.maven.org/solrsearch/select?q=g:org.springframework.boot+AND+a:spring-boot-starter+AND+v:${{ matrix.spring-version }}.*' | jq -r '.response.docs[0].v')" >> spring-boot-version
107+
if [ "$(head -n 1 spring-boot-version | cut -c1-3)" = "${{ matrix.spring-version }}" ]; then
108+
exit 0
109+
else
110+
exit 1
111+
fi
112+
- name: Set Spring Boot version in Maven
95113
run: |
96-
SPRING_VERSION="$(curl -s 'https://search.maven.org/solrsearch/select?q=g:org.springframework.boot+AND+a:spring-boot-starter+AND+v:${{ matrix.spring-version }}.*' | jq -r '.response.docs[0].v')"
114+
SPRING_VERSION=$(cat spring-boot-version)
115+
echo "Using Spring Boot version $SPRING_VERSION"
97116
./mvnw versions:set-property -Dproperty=version.org.springframework.boot -DnewVersion=$SPRING_VERSION
117+
98118
- name: Quickly build timefold-solver
99119
run: ./mvnw -B -Dquickly clean install
100120
- name: Test Spring Boot
Lines changed: 1 addition & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,17 @@
11
package ai.timefold.solver.core.impl.solver;
22

33
import static org.assertj.core.api.Assertions.assertThat;
4-
import static org.awaitility.Awaitility.await;
54
import static org.mockito.Mockito.mock;
65
import static org.mockito.Mockito.times;
76
import static org.mockito.Mockito.verify;
87

9-
import java.time.Duration;
10-
import java.util.ArrayList;
118
import java.util.List;
12-
import java.util.Random;
13-
import java.util.UUID;
149
import java.util.concurrent.CompletableFuture;
15-
import java.util.concurrent.CountDownLatch;
16-
import java.util.concurrent.Executors;
1710

1811
import ai.timefold.solver.core.api.solver.Solver;
19-
import ai.timefold.solver.core.api.solver.SolverManager;
2012
import ai.timefold.solver.core.api.solver.change.ProblemChange;
21-
import ai.timefold.solver.core.api.solver.change.ProblemChangeDirector;
22-
import ai.timefold.solver.core.config.solver.SolverConfig;
23-
import ai.timefold.solver.core.config.solver.SolverManagerConfig;
24-
import ai.timefold.solver.core.testdomain.TestdataEasyScoreCalculator;
25-
import ai.timefold.solver.core.testdomain.TestdataEntity;
2613
import ai.timefold.solver.core.testdomain.TestdataSolution;
2714

28-
import org.jspecify.annotations.NullMarked;
29-
import org.junit.jupiter.api.RepeatedTest;
3015
import org.junit.jupiter.api.Test;
3116
import org.mockito.Mockito;
3217

@@ -92,7 +77,7 @@ void cancelPendingChanges_noChangesRetrieved() {
9277
assertThat(problemChange).isCancelled();
9378
}
9479

95-
private CompletableFuture<Void> addProblemChange(BestSolutionHolder<TestdataSolution> bestSolutionHolder) {
80+
private static CompletableFuture<Void> addProblemChange(BestSolutionHolder<TestdataSolution> bestSolutionHolder) {
9681
Solver<TestdataSolution> solver = mock(Solver.class);
9782
ProblemChange<TestdataSolution> problemChange = mock(ProblemChange.class);
9883
CompletableFuture<Void> futureChange = bestSolutionHolder.addProblemChange(solver, List.of(problemChange));
@@ -101,129 +86,4 @@ private CompletableFuture<Void> addProblemChange(BestSolutionHolder<TestdataSolu
10186
return futureChange;
10287
}
10388

104-
@RepeatedTest(value = 10, failureThreshold = 1) // Run it multiple times to increase the chance of catching a concurrency issue.
105-
void problemChangeBarrageIntermediateBestSolutionConsumer() throws InterruptedException {
106-
var solverConfig = new SolverConfig()
107-
.withSolutionClass(TestdataSolution.class)
108-
.withEntityClasses(TestdataEntity.class)
109-
.withEasyScoreCalculatorClass(TestdataEasyScoreCalculator.class);
110-
111-
var futureList = new ArrayList<RecordedFuture>();
112-
var executorService = Executors.newFixedThreadPool(2);
113-
try (var solverManager = SolverManager.<TestdataSolution, UUID> create(solverConfig, new SolverManagerConfig())) {
114-
var solverStartedLatch = new CountDownLatch(1);
115-
var solution = TestdataSolution.generateSolution();
116-
var solverJob = solverManager.solveBuilder()
117-
.withProblemId(UUID.randomUUID())
118-
.withProblem(solution)
119-
.withFirstInitializedSolutionConsumer((testdataSolution, isTerminatedEarly) -> {
120-
solverStartedLatch.countDown();
121-
})
122-
.withBestSolutionConsumer(testdataSolution -> {
123-
// No need to do anything.
124-
})
125-
.run();
126-
solverStartedLatch.await(); // Only start adding problem changes after CH finished.
127-
128-
var random = new Random(0);
129-
var problemChangeCount = 200; // Arbitrary, for a reasonable test duration.
130-
var problemChangesAddedLatch = new CountDownLatch(problemChangeCount);
131-
for (int i = 0; i < problemChangeCount; i++) {
132-
// Emulate a random delay between problem changes, as it would happen in real world.
133-
var randomDelayNanos = random.nextInt(1_000_000);
134-
var start = System.nanoTime();
135-
while ((System.nanoTime() - randomDelayNanos) < start) {
136-
Thread.onSpinWait();
137-
}
138-
// Submit the problem change and store the future.
139-
var problemChange = random.nextBoolean()
140-
? new EntityAddingProblemChange(problemChangesAddedLatch)
141-
: new EntityRemovingProblemChange(problemChangesAddedLatch);
142-
futureList.add(new RecordedFuture(i, solverJob.addProblemChange(problemChange)));
143-
}
144-
// All problem changes have been added.
145-
// Does not guarantee all have been processed though.
146-
problemChangesAddedLatch.await();
147-
148-
// A best solution should have been produced for all the processed changes.
149-
// Any incomplete futures here means some problem change was "lost".
150-
var lostFutureList = futureList.stream()
151-
.filter(future -> {
152-
await().atMost(Duration.ofSeconds(1))
153-
.pollInterval(Duration.ofMillis(1))
154-
.until(future::isDone);
155-
return !future.isDone();
156-
})
157-
.toList();
158-
var lostFutureCount = lostFutureList.size();
159-
if (lostFutureCount == 0) {
160-
return;
161-
}
162-
// The only exception to the rule:
163-
// the very last problem changes, which might not have been processed yet
164-
// by the time the solver was forced to terminate.
165-
var minIncompleteFutureId = lostFutureList.stream()
166-
.mapToInt(f -> f.id)
167-
.min()
168-
.orElseThrow(() -> new AssertionError("Impossible state: no incomplete future found."));
169-
assertThat(minIncompleteFutureId).isEqualTo(problemChangeCount - lostFutureCount);
170-
} finally {
171-
executorService.shutdownNow();
172-
// The solver is terminated.
173-
// All incomplete futures should have been canceled.
174-
var incompleteFutureList = futureList.stream()
175-
.filter(future -> {
176-
await().atMost(Duration.ofSeconds(1))
177-
.pollInterval(Duration.ofMillis(1))
178-
.until(future::isDone);
179-
return !future.isDone();
180-
})
181-
.toList();
182-
assertThat(incompleteFutureList)
183-
.as("All futures should have been completed by now.")
184-
.isEmpty();
185-
}
186-
187-
}
188-
189-
private record RecordedFuture(int id, CompletableFuture<Void> future) {
190-
191-
boolean isDone() {
192-
return future.isDone();
193-
}
194-
195-
}
196-
197-
@NullMarked
198-
private record EntityAddingProblemChange(CountDownLatch latch) implements ProblemChange<TestdataSolution> {
199-
200-
@Override
201-
public void doChange(TestdataSolution workingSolution, ProblemChangeDirector problemChangeDirector) {
202-
var entity = new TestdataEntity(UUID.randomUUID().toString());
203-
problemChangeDirector.addEntity(entity,
204-
e -> workingSolution.getEntityList().add(e));
205-
problemChangeDirector.updateShadowVariables();
206-
latch.countDown();
207-
}
208-
209-
}
210-
211-
@NullMarked
212-
private record EntityRemovingProblemChange(CountDownLatch latch) implements ProblemChange<TestdataSolution> {
213-
214-
@Override
215-
public void doChange(TestdataSolution workingSolution, ProblemChangeDirector problemChangeDirector) {
216-
if (workingSolution.getEntityList().size() < 2) {
217-
latch.countDown();
218-
return;
219-
}
220-
var entity = workingSolution.getEntityList().get(0);
221-
problemChangeDirector.removeEntity(entity,
222-
e -> workingSolution.getEntityList().remove(e));
223-
problemChangeDirector.updateShadowVariables();
224-
latch.countDown();
225-
}
226-
227-
}
228-
22989
}
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
package ai.timefold.solver.core.impl.solver;
2+
3+
import static org.assertj.core.api.Assertions.assertThat;
4+
import static org.awaitility.Awaitility.await;
5+
6+
import java.time.Duration;
7+
import java.util.ArrayList;
8+
import java.util.List;
9+
import java.util.Random;
10+
import java.util.UUID;
11+
import java.util.concurrent.CompletableFuture;
12+
import java.util.concurrent.CountDownLatch;
13+
import java.util.concurrent.TimeUnit;
14+
15+
import ai.timefold.solver.core.api.solver.SolverManager;
16+
import ai.timefold.solver.core.api.solver.change.ProblemChange;
17+
import ai.timefold.solver.core.api.solver.change.ProblemChangeDirector;
18+
import ai.timefold.solver.core.config.solver.SolverConfig;
19+
import ai.timefold.solver.core.config.solver.SolverManagerConfig;
20+
import ai.timefold.solver.core.testdomain.TestdataEasyScoreCalculator;
21+
import ai.timefold.solver.core.testdomain.TestdataEntity;
22+
import ai.timefold.solver.core.testdomain.TestdataSolution;
23+
24+
import org.awaitility.pollinterval.FibonacciPollInterval;
25+
import org.jspecify.annotations.NullMarked;
26+
import org.junit.jupiter.api.RepeatedTest;
27+
28+
class ProblemChangeBarrageIT {
29+
30+
// Run it multiple times to increase the chance of catching a concurrency issue.
31+
// Also run it as integration test, away from the other solver tests which may be executing in parallel,
32+
// possibly causing contention on shared resources.
33+
@RepeatedTest(value = 10, failureThreshold = 1)
34+
void problemChangeBarrageIntermediateBestSolutionConsumer() throws InterruptedException {
35+
var solverConfig = new SolverConfig()
36+
.withSolutionClass(TestdataSolution.class)
37+
.withEntityClasses(TestdataEntity.class)
38+
.withEasyScoreCalculatorClass(TestdataEasyScoreCalculator.class);
39+
40+
var futureList = new ArrayList<RecordedFuture>();
41+
try (var solverManager = SolverManager.<TestdataSolution, UUID> create(solverConfig, new SolverManagerConfig())) {
42+
var solverStartedLatch = new CountDownLatch(1);
43+
var solution = TestdataSolution.generateSolution();
44+
var solverJob = solverManager.solveBuilder()
45+
.withProblemId(UUID.randomUUID())
46+
.withProblem(solution)
47+
.withFirstInitializedSolutionConsumer((testdataSolution, isTerminatedEarly) -> {
48+
solverStartedLatch.countDown();
49+
})
50+
.withBestSolutionConsumer(testdataSolution -> {
51+
// No need to do anything.
52+
})
53+
.run();
54+
solverStartedLatch.await(); // Only start adding problem changes after CH finished.
55+
56+
var random = new Random(0);
57+
var problemChangeCount = 200; // Arbitrary, for a reasonable test duration.
58+
var problemChangesAddedLatch = new CountDownLatch(problemChangeCount);
59+
for (int i = 0; i < problemChangeCount; i++) {
60+
// Emulate a random delay between problem changes, as it would happen in real world.
61+
var randomDelayNanos = random.nextInt(1_000_000);
62+
var start = System.nanoTime();
63+
while ((System.nanoTime() - randomDelayNanos) < start) {
64+
Thread.onSpinWait();
65+
}
66+
// Submit the problem change and store the future.
67+
var problemChange = random.nextBoolean()
68+
? new EntityAddingProblemChange(problemChangesAddedLatch)
69+
: new EntityRemovingProblemChange(problemChangesAddedLatch);
70+
futureList.add(new RecordedFuture(i, solverJob.addProblemChange(problemChange)));
71+
}
72+
// All problem changes have been added.
73+
// Does not guarantee all have been processed though.
74+
problemChangesAddedLatch.await();
75+
76+
// A best solution should have been produced for all the processed changes.
77+
// Any incomplete futures here means some problem change was "lost".
78+
var lostFutureList = getOutstandingFutures(futureList);
79+
var lostFutureCount = lostFutureList.size();
80+
if (lostFutureCount == 0) {
81+
return;
82+
}
83+
// The only exception to the rule:
84+
// the very last problem changes, which might not have been processed yet
85+
// by the time the solver was forced to terminate.
86+
var minIncompleteFutureId = lostFutureList.stream()
87+
.mapToInt(f -> f.id)
88+
.min()
89+
.orElseThrow(() -> new AssertionError("Impossible state: no incomplete future found."));
90+
assertThat(minIncompleteFutureId).isEqualTo(problemChangeCount - lostFutureCount);
91+
} finally {
92+
// The solver is terminated.
93+
// All incomplete futures should have been canceled.
94+
var incompleteFutureList = getOutstandingFutures(futureList);
95+
assertThat(incompleteFutureList)
96+
.as("All futures should have been completed by now.")
97+
.isEmpty();
98+
}
99+
100+
}
101+
102+
private static List<RecordedFuture> getOutstandingFutures(List<RecordedFuture> futureList) {
103+
// We wait for at most 10 seconds for each future to complete,
104+
// but we expect the completion to be done almost immediately.
105+
// The large timeout is just to avoid test flakes on congested environments;
106+
// after such a long time, something is clearly wrong.
107+
return futureList.stream()
108+
.filter(future -> {
109+
await().atMost(Duration.ofSeconds(10))
110+
.pollInterval(FibonacciPollInterval.fibonacci(1, TimeUnit.MILLISECONDS))
111+
.until(future::isDone);
112+
return !future.isDone();
113+
})
114+
.toList();
115+
}
116+
117+
@NullMarked
118+
private record RecordedFuture(int id, CompletableFuture<Void> future) {
119+
120+
boolean isDone() {
121+
return future.isDone();
122+
}
123+
124+
}
125+
126+
@NullMarked
127+
private record EntityAddingProblemChange(CountDownLatch latch) implements ProblemChange<TestdataSolution> {
128+
129+
@Override
130+
public void doChange(TestdataSolution workingSolution, ProblemChangeDirector problemChangeDirector) {
131+
var entity = new TestdataEntity(UUID.randomUUID().toString());
132+
problemChangeDirector.addEntity(entity,
133+
e -> workingSolution.getEntityList().add(e));
134+
problemChangeDirector.updateShadowVariables();
135+
latch.countDown();
136+
}
137+
138+
}
139+
140+
@NullMarked
141+
private record EntityRemovingProblemChange(CountDownLatch latch) implements ProblemChange<TestdataSolution> {
142+
143+
@Override
144+
public void doChange(TestdataSolution workingSolution, ProblemChangeDirector problemChangeDirector) {
145+
if (workingSolution.getEntityList().size() < 2) {
146+
latch.countDown();
147+
return;
148+
}
149+
var entity = workingSolution.getEntityList().get(0);
150+
problemChangeDirector.removeEntity(entity,
151+
e -> workingSolution.getEntityList().remove(e));
152+
problemChangeDirector.updateShadowVariables();
153+
latch.countDown();
154+
}
155+
156+
}
157+
158+
}

core/src/test/resources/logback-test.xml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,21 @@
33

44
<appender name="consoleAppender" class="ch.qos.logback.core.ConsoleAppender">
55
<encoder>
6-
<!-- %L lowers performance, %C and %c break indentation and therefore reduce readability, normal %t is verbose -->
76
<pattern>%d{HH:mm:ss.SSS} [%-12.12t] %-5p %m%n</pattern>
87
</encoder>
98
</appender>
109

10+
<appender name="asyncAppender" class="ch.qos.logback.classic.AsyncAppender">
11+
<appender-ref ref="consoleAppender" />
12+
</appender>
13+
1114
<!-- To override the info log level from the command line, use the VM option "-Dlogback.level.ai.timefold.solver=trace" -->
1215
<logger name="ai.timefold.solver" level="${logback.level.ai.timefold.solver:-debug}"/>
1316
<!-- Don't pollute the test log with a stacktrace -->
1417
<logger name="ai.timefold.solver.benchmark.impl.DefaultPlannerBenchmark.singleBenchmarkRunnerException" level="error"/>
1518

16-
<root level="debug">
17-
<appender-ref ref="consoleAppender" />
19+
<root level="info">
20+
<appender-ref ref="asyncAppender" />
1821
</root>
1922

2023
</configuration>

0 commit comments

Comments
 (0)