Skip to content

Commit d2df7c6

Browse files
committed
add paired log assertion verb and pass env overrides through restart to fix quiescence
Signed-off-by: Alex Kehayov <aleks.kehayov@limechain.tech>
1 parent 433c9aa commit d2df7c6

File tree

4 files changed

+262
-18
lines changed

4 files changed

+262
-18
lines changed

hedera-node/test-clients/src/main/java/com/hedera/services/bdd/spec/utilops/UtilVerbs.java

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@
163163
import com.hedera.services.bdd.spec.utilops.pauses.HapiSpecWaitUntil;
164164
import com.hedera.services.bdd.spec.utilops.pauses.HapiSpecWaitUntilNextBlock;
165165
import com.hedera.services.bdd.spec.utilops.streams.LogContainmentOp;
166+
import com.hedera.services.bdd.spec.utilops.streams.LogContainmentPairTimeframeOp;
166167
import com.hedera.services.bdd.spec.utilops.streams.LogContainmentTimeframeOp;
167168
import com.hedera.services.bdd.spec.utilops.streams.LogValidationOp;
168169
import com.hedera.services.bdd.spec.utilops.streams.StreamValidationOp;
@@ -3512,6 +3513,42 @@ public static LogContainmentTimeframeOp assertHgcaaLogContainsTimeframe(
35123513
selector, APPLICATION_LOG, Arrays.asList(patterns), startTimeSupplier, timeframe, waitTimeout);
35133514
}
35143515

3516+
/**
3517+
* Asserts that two log patterns appear in order in the specified node's HGCAA log within a timeframe,
3518+
* with a time gap between them that falls within {@code [minGap, maxGap]}. This is useful for
3519+
* distinguishing real state transitions (e.g. sustained quiescence) from transient flickers.
3520+
*
3521+
* @param selector the node selector
3522+
* @param startTimeSupplier supplier for the start time of the timeframe
3523+
* @param timeframe the duration of the timeframe window to search
3524+
* @param waitTimeout the duration to wait (polling) for the pair to appear
3525+
* @param firstPattern the first pattern to match
3526+
* @param secondPattern the second pattern to match (must appear after the first)
3527+
* @param minGap minimum required time gap between the two pattern matches
3528+
* @param maxGap maximum allowed time gap between the two pattern matches
3529+
* @return a new LogContainmentPairTimeframeOp
3530+
*/
3531+
public static LogContainmentPairTimeframeOp assertHgcaaLogContainsPairTimeframe(
3532+
@NonNull final NodeSelector selector,
3533+
@NonNull final Supplier<Instant> startTimeSupplier,
3534+
@NonNull final Duration timeframe,
3535+
@NonNull final Duration waitTimeout,
3536+
@NonNull final String firstPattern,
3537+
@NonNull final String secondPattern,
3538+
@NonNull final Duration minGap,
3539+
@NonNull final Duration maxGap) {
3540+
return new LogContainmentPairTimeframeOp(
3541+
selector,
3542+
APPLICATION_LOG,
3543+
startTimeSupplier,
3544+
timeframe,
3545+
waitTimeout,
3546+
firstPattern,
3547+
secondPattern,
3548+
minGap,
3549+
maxGap);
3550+
}
3551+
35153552
public static CustomSpecAssert valueIsInRange(
35163553
final double value, final double lowerBoundInclusive, final double upperBoundExclusive) {
35173554
return assertionsHold((spec, opLog) -> {
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
package com.hedera.services.bdd.spec.utilops.streams;
3+
4+
import static com.hedera.services.bdd.spec.transactions.TxnUtils.doIfNotInterrupted;
5+
import static java.util.Objects.requireNonNull;
6+
import static java.util.concurrent.TimeUnit.MILLISECONDS;
7+
8+
import com.hedera.services.bdd.junit.hedera.ExternalPath;
9+
import com.hedera.services.bdd.junit.hedera.NodeSelector;
10+
import com.hedera.services.bdd.spec.HapiSpec;
11+
import com.hedera.services.bdd.spec.utilops.UtilOp;
12+
import edu.umd.cs.findbugs.annotations.NonNull;
13+
import java.io.BufferedReader;
14+
import java.nio.file.Files;
15+
import java.nio.file.NoSuchFileException;
16+
import java.time.Duration;
17+
import java.time.Instant;
18+
import java.time.LocalDateTime;
19+
import java.time.ZoneId;
20+
import java.time.format.DateTimeFormatter;
21+
import java.util.concurrent.atomic.AtomicLong;
22+
import java.util.function.Supplier;
23+
import org.apache.logging.log4j.LogManager;
24+
import org.apache.logging.log4j.Logger;
25+
import org.junit.jupiter.api.Assertions;
26+
27+
/**
28+
* A {@link UtilOp} that validates that a node's log contains two patterns appearing in order
29+
* within a specified timeframe, with a time gap between them that falls within {@code [minGap, maxGap]}.
30+
*
31+
* <p>This is useful for asserting that a state transition (e.g. quiescence) lasted a meaningful
32+
* duration, distinguishing real transitions from transient flickers that resolve in milliseconds.
33+
*/
34+
public class LogContainmentPairTimeframeOp extends UtilOp {
35+
private static final Logger log = LogManager.getLogger(LogContainmentPairTimeframeOp.class);
36+
private static final DateTimeFormatter LOG_TIMESTAMP_FORMAT =
37+
DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS");
38+
39+
private final NodeSelector selector;
40+
private final ExternalPath path;
41+
private final String firstPattern;
42+
private final String secondPattern;
43+
private final Supplier<Instant> startTimeSupplier;
44+
private final Duration timeframe;
45+
private final Duration waitTimeout;
46+
private final Duration minGap;
47+
private final Duration maxGap;
48+
49+
// State for incremental reading
50+
private final AtomicLong linesProcessed = new AtomicLong(0L);
51+
// Persists across polls so a firstPattern match on one poll can pair with a secondPattern on the next
52+
private Instant candidateFirstTime = null;
53+
54+
public LogContainmentPairTimeframeOp(
55+
@NonNull final NodeSelector selector,
56+
@NonNull final ExternalPath path,
57+
@NonNull final Supplier<Instant> startTimeSupplier,
58+
@NonNull final Duration timeframe,
59+
@NonNull final Duration waitTimeout,
60+
@NonNull final String firstPattern,
61+
@NonNull final String secondPattern,
62+
@NonNull final Duration minGap,
63+
@NonNull final Duration maxGap) {
64+
if (path != ExternalPath.APPLICATION_LOG
65+
&& path != ExternalPath.BLOCK_NODE_COMMS_LOG
66+
&& path != ExternalPath.SWIRLDS_LOG) {
67+
throw new IllegalArgumentException(path + " is not a log");
68+
}
69+
this.path = requireNonNull(path);
70+
this.selector = requireNonNull(selector);
71+
this.startTimeSupplier = requireNonNull(startTimeSupplier);
72+
this.timeframe = requireNonNull(timeframe);
73+
this.waitTimeout = requireNonNull(waitTimeout);
74+
this.firstPattern = requireNonNull(firstPattern);
75+
this.secondPattern = requireNonNull(secondPattern);
76+
this.minGap = requireNonNull(minGap);
77+
this.maxGap = requireNonNull(maxGap);
78+
if (minGap.compareTo(maxGap) > 0) {
79+
throw new IllegalArgumentException("minGap (" + minGap + ") must be <= maxGap (" + maxGap + ")");
80+
}
81+
}
82+
83+
@Override
84+
protected boolean submitOp(@NonNull final HapiSpec spec) throws Throwable {
85+
final Instant startTime = startTimeSupplier.get();
86+
if (startTime == null) {
87+
throw new IllegalStateException("Start time supplier returned null");
88+
}
89+
final Instant endTime = startTime.plus(timeframe);
90+
final Instant timeoutDeadline = Instant.now().plus(waitTimeout);
91+
92+
log.info(
93+
"Starting paired log check: StartTime={}, Timeframe={}, Timeout={}, "
94+
+ "FirstPattern='{}', SecondPattern='{}', MinGap={}, MaxGap={}",
95+
startTime,
96+
timeframe,
97+
waitTimeout,
98+
firstPattern,
99+
secondPattern,
100+
minGap,
101+
maxGap);
102+
103+
while (Instant.now().isBefore(timeoutDeadline)) {
104+
final var result = new SearchResult();
105+
spec.targetNetworkOrThrow().nodesFor(selector).forEach(node -> {
106+
searchNodeLog(node.getExternalPath(path), startTime, endTime, result);
107+
});
108+
109+
if (result.matched) {
110+
log.info(
111+
"Found matching pair: '{}' at {} and '{}' at {} (gap={})",
112+
firstPattern,
113+
result.firstTime,
114+
secondPattern,
115+
result.secondTime,
116+
Duration.between(result.firstTime, result.secondTime));
117+
return false; // Success
118+
}
119+
120+
if (Instant.now().isBefore(timeoutDeadline)) {
121+
doIfNotInterrupted(() -> MILLISECONDS.sleep(1000));
122+
}
123+
}
124+
125+
Assertions.fail(String.format(
126+
"Did not find a matching pair of log patterns within the timeframe. "
127+
+ "StartTime=%s, Timeframe=%s, Timeout=%s, FirstPattern='%s', SecondPattern='%s', "
128+
+ "MinGap=%s, MaxGap=%s",
129+
startTime, timeframe, waitTimeout, firstPattern, secondPattern, minGap, maxGap));
130+
131+
return false;
132+
}
133+
134+
private void searchNodeLog(
135+
@NonNull final java.nio.file.Path logPath,
136+
@NonNull final Instant startTime,
137+
@NonNull final Instant endTime,
138+
@NonNull final SearchResult result) {
139+
if (result.matched) {
140+
return;
141+
}
142+
long newLinesRead = 0;
143+
try (BufferedReader reader = Files.newBufferedReader(logPath)) {
144+
try (var linesStream = reader.lines().skip(linesProcessed.get())) {
145+
final var iterator = linesStream.iterator();
146+
while (iterator.hasNext()) {
147+
final String line = iterator.next();
148+
newLinesRead++;
149+
150+
final Instant logInstant;
151+
try {
152+
if (line.length() < 23) continue;
153+
final String timestamp = line.substring(0, 23);
154+
final LocalDateTime logTime = LocalDateTime.parse(timestamp, LOG_TIMESTAMP_FORMAT);
155+
logInstant = logTime.atZone(ZoneId.systemDefault()).toInstant();
156+
} catch (Exception e) {
157+
continue;
158+
}
159+
160+
if (!logInstant.isAfter(startTime) || !logInstant.isBefore(endTime)) {
161+
continue;
162+
}
163+
164+
// Check for firstPattern — always update candidate to the latest match
165+
if (line.contains(firstPattern)) {
166+
candidateFirstTime = logInstant;
167+
}
168+
// Check for secondPattern only if we have a candidate firstPattern
169+
if (candidateFirstTime != null && line.contains(secondPattern)) {
170+
final Duration gap = Duration.between(candidateFirstTime, logInstant);
171+
if (gap.compareTo(minGap) >= 0 && gap.compareTo(maxGap) <= 0) {
172+
result.matched = true;
173+
result.firstTime = candidateFirstTime;
174+
result.secondTime = logInstant;
175+
break;
176+
}
177+
// Gap didn't qualify — reset candidate so we look for the next pair
178+
candidateFirstTime = null;
179+
}
180+
}
181+
}
182+
} catch (NoSuchFileException nsfe) {
183+
log.warn("Log file not found: {}. Will retry.", logPath);
184+
} catch (Exception e) {
185+
log.error("Error reading log file {}", logPath, e);
186+
throw new RuntimeException("Error during log processing for " + logPath, e);
187+
}
188+
linesProcessed.addAndGet(newLinesRead);
189+
}
190+
191+
/** Mutable holder for the search result across nodes. */
192+
private static class SearchResult {
193+
boolean matched;
194+
Instant firstTime;
195+
Instant secondTime;
196+
}
197+
}

hedera-node/test-clients/src/main/java/com/hedera/services/bdd/suites/regression/system/LifecycleTest.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,13 +176,25 @@ default SpecOperation upgradeToNextConfigVersion() {
176176
* @return the operation
177177
*/
178178
static SpecOperation restartAtNextConfigVersion() {
179+
return restartAtNextConfigVersion(Map.of());
180+
}
181+
182+
/**
183+
* Returns an operation that restarts the network at the next configuration version with the given
184+
* environment overrides. The overrides are set as process environment variables on the subprocess
185+
* nodes, so they survive the restart and take precedence over {@code application.properties}.
186+
*
187+
* @param envOverrides the environment overrides to pass to the restarted nodes
188+
* @return the operation
189+
*/
190+
static SpecOperation restartAtNextConfigVersion(@NonNull final Map<String, String> envOverrides) {
179191
return blockingOrder(
180192
freezeOnly().startingIn(5).seconds().payingWith(GENESIS).deferStatusResolution(),
181193
// Immediately submit a transaction in the same round to ensure freeze time is only
182194
// reset when last frozen time matches it (i.e., in a post-upgrade transaction)
183195
cryptoTransfer(tinyBarsFromTo(GENESIS, FUNDING, 1)),
184196
confirmFreezeAndShutdown(),
185-
sourcing(() -> FakeNmt.restartNetwork(CURRENT_CONFIG_VERSION.incrementAndGet(), Map.of())),
197+
sourcing(() -> FakeNmt.restartNetwork(CURRENT_CONFIG_VERSION.incrementAndGet(), envOverrides)),
186198
waitForActiveNetworkWithReassignedPorts(RESTART_TIMEOUT));
187199
}
188200

hedera-node/test-clients/src/main/java/com/hedera/services/bdd/suites/regression/system/QuiesceThenMixedOpsRestartTest.java

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,10 @@
1010
import static com.hedera.services.bdd.spec.transactions.TxnVerbs.cryptoTransfer;
1111
import static com.hedera.services.bdd.spec.transactions.TxnVerbs.scheduleCreate;
1212
import static com.hedera.services.bdd.spec.transactions.crypto.HapiCryptoTransfer.tinyBarsFromTo;
13-
import static com.hedera.services.bdd.spec.utilops.UtilVerbs.assertHgcaaLogContainsTimeframe;
13+
import static com.hedera.services.bdd.spec.utilops.UtilVerbs.assertHgcaaLogContainsPairTimeframe;
1414
import static com.hedera.services.bdd.spec.utilops.UtilVerbs.doWithStartupDuration;
15-
import static com.hedera.services.bdd.spec.utilops.UtilVerbs.overridingAllOf;
15+
import static com.hedera.services.bdd.spec.utilops.UtilVerbs.doingContextual;
1616
import static com.hedera.services.bdd.spec.utilops.UtilVerbs.sleepForSeconds;
17-
import static com.hedera.services.bdd.spec.utilops.UtilVerbs.withOpContext;
1817
import static com.hedera.services.bdd.suites.HapiSuite.FUNDING;
1918
import static com.hedera.services.bdd.suites.HapiSuite.GENESIS;
2019
import static com.hedera.services.bdd.suites.HapiSuite.ONE_HBAR;
@@ -30,7 +29,6 @@
3029
import java.util.Map;
3130
import java.util.concurrent.atomic.AtomicReference;
3231
import java.util.stream.Stream;
33-
import org.junit.jupiter.api.Disabled;
3432
import org.junit.jupiter.api.DynamicTest;
3533
import org.junit.jupiter.api.Tag;
3634

@@ -41,26 +39,24 @@
4139
* burst of mixed operations again.
4240
*/
4341
@Tag(RESTART)
44-
@Disabled
4542
public class QuiesceThenMixedOpsRestartTest implements LifecycleTest {
4643
private static final int MIXED_OPS_BURST_TPS = 50;
4744

4845
@LeakyHapiTest(overrides = {"staking.periodMins", "nodes.nodeRewardsEnabled"})
4946
final Stream<DynamicTest> quiesceAndThenRestartMixedOps() {
5047
final AtomicReference<Instant> scheduleExpiry = new AtomicReference<>();
51-
final AtomicReference<Instant> sleepStart = new AtomicReference<>(Instant.now());
48+
final AtomicReference<Instant> logAssertionStart = new AtomicReference<>();
5249
return hapiTest(
53-
// Override properties that interfere with the idle->QUIESCE
54-
// transition and restart so they take effect with a fresh
55-
// lastQuiescenceCommand in BlockStreamManagerImpl
56-
overridingAllOf(Map.of(
50+
// Restart with env overrides that suppress staking period transactions
51+
// which would otherwise prevent the network from reaching sustained quiescence
52+
LifecycleTest.restartAtNextConfigVersion(Map.of(
5753
"staking.periodMins", "1440",
5854
"nodes.nodeRewardsEnabled", "false")),
59-
LifecycleTest.restartAtNextConfigVersion(),
6055
// Ensure the network is out of quiescence before the test logic
6156
cryptoTransfer(tinyBarsFromTo(GENESIS, FUNDING, 1)),
6257
// --- actual test workflow ---
6358
cryptoCreate("scheduledReceiver").via("txn").balance(41 * ONE_HBAR),
59+
doingContextual((ignored) -> logAssertionStart.set(Instant.now())),
6460
doWithStartupDuration("quiescence.tctDuration", duration -> scheduleCreate(
6561
"schedule", cryptoTransfer(tinyBarsFromTo(GENESIS, "scheduledReceiver", ONE_HBAR)))
6662
.payingWith(GENESIS)
@@ -71,14 +67,16 @@ final Stream<DynamicTest> quiesceAndThenRestartMixedOps() {
7167
getScheduleInfo("schedule")
7268
.exposingInfoTo(info -> scheduleExpiry.set(asInstant(info.getExpirationTime())))
7369
.logged(),
74-
withOpContext((spec, opLog) -> sleepStart.set(Instant.now())),
7570
doWithStartupDuration("quiescence.tctDuration", duration -> sleepForSeconds(2 * duration.toSeconds())),
76-
assertHgcaaLogContainsTimeframe(
71+
assertHgcaaLogContainsPairTimeframe(
7772
NodeSelector.byNodeId(0),
78-
sleepStart::get,
79-
Duration.ofSeconds(15),
80-
Duration.ofSeconds(15),
81-
"to QUIESCE"),
73+
logAssertionStart::get,
74+
Duration.ofSeconds(60), // for both lines
75+
Duration.ofSeconds(60),
76+
"Started quiesced heartbeat",
77+
"Stopping quiescence heartbeat",
78+
Duration.ofSeconds(5),
79+
Duration.ofSeconds(40)),
8280
doWithStartupDuration("quiescence.tctDuration", duration -> sleepForSeconds(4 * duration.toSeconds())),
8381
getAccountBalance("scheduledReceiver").hasTinyBars(42 * ONE_HBAR),
8482
getTxnRecord("creation").scheduled().exposingTo(r -> {

0 commit comments

Comments
 (0)