From 5fc736af839639d08ec214d9f9cfc75b4ce19bdf Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 21 Oct 2025 11:08:41 -0700 Subject: [PATCH 01/20] draft of refresh failure shutdown --- src/main/java/com/uid2/operator/Main.java | 36 +++-- .../vertx/OperatorShutdownHandler.java | 95 +++++++----- .../operator/OperatorShutdownHandlerTest.java | 144 +++++++++++++----- .../operator/benchmark/BenchmarkCommon.java | 2 +- 4 files changed, 189 insertions(+), 88 deletions(-) diff --git a/src/main/java/com/uid2/operator/Main.java b/src/main/java/com/uid2/operator/Main.java index c46a080a0..1b66ef88e 100644 --- a/src/main/java/com/uid2/operator/Main.java +++ b/src/main/java/com/uid2/operator/Main.java @@ -117,7 +117,7 @@ public Main(Vertx vertx, JsonObject config) throws Exception { this.encryptedCloudFilesEnabled = config.getBoolean(Const.Config.EncryptedFiles, false); this.shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12), Duration.ofHours(config.getInteger(Const.Config.SaltsExpiredShutdownHours, 12)), - Duration.ofHours(config.getInteger(Const.Config.KeysetKeysFailedShutdownHours, 168)), + Duration.ofHours(config.getInteger(Const.Config.StoreRefreshStaleShutdownHours, 12)), Clock.systemUTC(), new ShutdownService()); this.uidInstanceIdProvider = new UidInstanceIdProvider(config); @@ -406,31 +406,43 @@ private Future createStoreVerticles() throws Exception { List fs = new ArrayList<>(); if (clientSideTokenGenerate) { - fs.add(createAndDeployRotatingStoreVerticle("site", siteProvider, "site_refresh_ms")); - fs.add(createAndDeployRotatingStoreVerticle("client_side_keypairs", clientSideKeypairProvider, "client_side_keypairs_refresh_ms")); + fs.add(createAndDeployRotatingStoreVerticle("site", siteProvider, "site_refresh_ms", + refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("site", refreshSucceeded))); + fs.add(createAndDeployRotatingStoreVerticle("client_side_keypairs", clientSideKeypairProvider, "client_side_keypairs_refresh_ms", + refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("client_side_keypairs", refreshSucceeded))); } if (validateServiceLinks) { - fs.add(createAndDeployRotatingStoreVerticle("service", serviceProvider, "service_refresh_ms")); - fs.add(createAndDeployRotatingStoreVerticle("service_link", serviceLinkProvider, "service_link_refresh_ms")); + fs.add(createAndDeployRotatingStoreVerticle("service", serviceProvider, "service_refresh_ms", + refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("service", refreshSucceeded))); + fs.add(createAndDeployRotatingStoreVerticle("service_link", serviceLinkProvider, "service_link_refresh_ms", + refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("service_link", refreshSucceeded))); } if (encryptedCloudFilesEnabled) { - fs.add(createAndDeployRotatingStoreVerticle("cloud_encryption_keys", cloudEncryptionKeyProvider, "cloud_encryption_keys_refresh_ms")); + fs.add(createAndDeployRotatingStoreVerticle("cloud_encryption_keys", cloudEncryptionKeyProvider, "cloud_encryption_keys_refresh_ms", + refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("cloud_encryption_keys", refreshSucceeded))); } if (useRemoteConfig) { - fs.add(createAndDeployRotatingStoreVerticle("runtime_config", (RuntimeConfigStore) configStore, Const.Config.ConfigScanPeriodMsProp)); + fs.add(createAndDeployRotatingStoreVerticle("runtime_config", (RuntimeConfigStore) configStore, Const.Config.ConfigScanPeriodMsProp, + refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("runtime_config", refreshSucceeded))); } - fs.add(createAndDeployRotatingStoreVerticle("auth", clientKeyProvider, "auth_refresh_ms")); - fs.add(createAndDeployRotatingStoreVerticle("keyset", keysetProvider, "keyset_refresh_ms")); + fs.add(createAndDeployRotatingStoreVerticle("auth", clientKeyProvider, "auth_refresh_ms", + refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("auth", refreshSucceeded))); + fs.add(createAndDeployRotatingStoreVerticle("keyset", keysetProvider, "keyset_refresh_ms", + refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("keyset", refreshSucceeded))); fs.add(createAndDeployRotatingStoreVerticle("keysetkey", keysetKeyStore, "keysetkey_refresh_ms", - this.shutdownHandler::handleKeysetKeyRefreshResponse)); - fs.add(createAndDeployRotatingStoreVerticle("salt", saltProvider, "salt_refresh_ms")); + refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("keysetkey", refreshSucceeded))); + fs.add(createAndDeployRotatingStoreVerticle("salt", saltProvider, "salt_refresh_ms", + refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("salt", refreshSucceeded))); fs.add(createAndDeployCloudSyncStoreVerticle("optout", fsOptOut, optOutCloudSync)); CompositeFuture.all(fs).onComplete(ar -> { if (ar.failed()) promise.fail(new Exception(ar.cause())); - else promise.complete(); + else { + promise.complete(); + this.shutdownHandler.startPeriodicStaleCheck(this.vertx); + } }); diff --git a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java index bba8f8540..642dd8a07 100644 --- a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java +++ b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java @@ -2,7 +2,7 @@ import com.uid2.operator.service.ShutdownService; import com.uid2.shared.attest.AttestationResponseCode; -import lombok.extern.java.Log; +import io.vertx.core.Vertx; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.utils.Pair; @@ -11,28 +11,30 @@ import java.time.Duration; import java.time.Instant; import java.time.temporal.ChronoUnit; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicReference; public class OperatorShutdownHandler { private static final Logger LOGGER = LoggerFactory.getLogger(OperatorShutdownHandler.class); private static final int SALT_FAILURE_LOG_INTERVAL_MINUTES = 10; - private static final int KEYSET_KEY_FAILURE_LOG_INTERVAL_MINUTES = 10; + private static final int STORE_REFRESH_STALENESS_CHECK_INTERVAL_MINUTES = 60; private final Duration attestShutdownWaitTime; private final Duration saltShutdownWaitTime; - private final Duration keysetKeyShutdownWaitTime; + private final Duration storeRefreshStaleTimeout; private final AtomicReference attestFailureStartTime = new AtomicReference<>(null); private final AtomicReference saltFailureStartTime = new AtomicReference<>(null); - private final AtomicReference keysetKeyFailureStartTime = new AtomicReference<>(null); private final AtomicReference lastSaltFailureLogTime = new AtomicReference<>(null); - private final AtomicReference lastKeysetKeyFailureLogTime = new AtomicReference<>(null); + private final Map> lastSuccessfulRefreshTimes = new ConcurrentHashMap<>(); private final Clock clock; private final ShutdownService shutdownService; + private long periodicCheckTimerId = -1; public OperatorShutdownHandler(Duration attestShutdownWaitTime, Duration saltShutdownWaitTime, - Duration keysetKeyShutdownWaitTime, Clock clock, ShutdownService shutdownService) { + Duration storeRefreshStaleTimeout, Clock clock, ShutdownService shutdownService) { this.attestShutdownWaitTime = attestShutdownWaitTime; this.saltShutdownWaitTime = saltShutdownWaitTime; - this.keysetKeyShutdownWaitTime = keysetKeyShutdownWaitTime; + this.storeRefreshStaleTimeout = storeRefreshStaleTimeout; this.clock = clock; this.shutdownService = shutdownService; } @@ -60,37 +62,6 @@ public void logSaltFailureAtInterval() { } } - public void handleKeysetKeyRefreshResponse(Boolean success) { - if (success) { - keysetKeyFailureStartTime.set(null); - lastKeysetKeyFailureLogTime.set(null); - LOGGER.debug("keyset keys sync successful"); - } else { - Instant t = keysetKeyFailureStartTime.get(); - if (t == null) { - keysetKeyFailureStartTime.set(clock.instant()); - lastKeysetKeyFailureLogTime.set(clock.instant()); - LOGGER.warn("keyset keys sync started failing. shutdown timer started"); - } else { - Duration elapsed = Duration.between(t, clock.instant()); - if (elapsed.compareTo(this.keysetKeyShutdownWaitTime) > 0) { - LOGGER.error("keyset keys have been failing to sync for too long. shutting down operator"); - this.shutdownService.Shutdown(1); - } else { - logKeysetKeyFailureProgressAtInterval(t, elapsed); - } - } - } - } - - private void logKeysetKeyFailureProgressAtInterval(Instant failureStartTime, Duration elapsed) { - Instant lastLogTime = lastKeysetKeyFailureLogTime.get(); - if (lastLogTime == null || clock.instant().isAfter(lastLogTime.plus(KEYSET_KEY_FAILURE_LOG_INTERVAL_MINUTES, ChronoUnit.MINUTES))) { - LOGGER.warn("keyset keys sync still failing - elapsed time: {}d {}h {}m", elapsed.toDays(), elapsed.toHoursPart(), elapsed.toMinutesPart()); - lastKeysetKeyFailureLogTime.set(clock.instant()); - } - } - public void handleAttestResponse(Pair response) { if (response.left() == AttestationResponseCode.AttestationFailure) { LOGGER.error("core attestation failed with AttestationFailure, shutting down operator, core response: {}", response.right()); @@ -108,4 +79,52 @@ public void handleAttestResponse(Pair response) } } } + + public void handleStoreRefresh(String storeName, Boolean success) { + if (success) { + lastSuccessfulRefreshTimes.computeIfAbsent(storeName, k -> new AtomicReference<>()) + .set(clock.instant()); + LOGGER.trace("Store {} refresh successful at {}", storeName, clock.instant()); + } else { + LOGGER.debug("Store {} refresh failed, timestamp not updated", storeName); + } + } + + public void checkStoreRefreshStaleness() { + Instant now = clock.instant(); + for (Map.Entry> entry : lastSuccessfulRefreshTimes.entrySet()) { + String storeName = entry.getKey(); + Instant lastSuccess = entry.getValue().get(); + + if (lastSuccess == null) { + // Store hasn't had a successful refresh yet - might be during startup + // Don't trigger shutdown for stores that haven't initialized + continue; + } + + Duration timeSinceLastRefresh = Duration.between(lastSuccess, now); + if (timeSinceLastRefresh.compareTo(storeRefreshStaleTimeout) > 0) { + LOGGER.error("Store '{}' has not refreshed successfully for {} hours ({}). Shutting down operator", + storeName, timeSinceLastRefresh.toHours(), timeSinceLastRefresh); + shutdownService.Shutdown(1); + return; // Exit after triggering shutdown for first stale store + } + } + } + + public void startPeriodicStaleCheck(Vertx vertx) { + if (periodicCheckTimerId != -1) { + LOGGER.warn("Periodic store staleness check already started"); + return; + } + + long intervalMs = STORE_REFRESH_STALENESS_CHECK_INTERVAL_MINUTES * 60 * 1000L; + periodicCheckTimerId = vertx.setPeriodic(intervalMs, id -> { + LOGGER.debug("Running periodic store staleness check"); + checkStoreRefreshStaleness(); + }); + LOGGER.info("Started periodic store staleness check (interval: {} minutes, timeout: {} hours)", + STORE_REFRESH_STALENESS_CHECK_INTERVAL_MINUTES, + storeRefreshStaleTimeout.toHours()); + } } diff --git a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java index e719fc8e1..26b40da91 100644 --- a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java +++ b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java @@ -19,7 +19,6 @@ import org.slf4j.LoggerFactory; import software.amazon.awssdk.utils.Pair; -import java.security.Permission; import java.time.Clock; import java.time.Duration; import java.time.Instant; @@ -43,7 +42,7 @@ void beforeEach() { mocks = MockitoAnnotations.openMocks(this); when(clock.instant()).thenAnswer(i -> Instant.now()); doThrow(new RuntimeException()).when(shutdownService).Shutdown(1); - this.operatorShutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12), Duration.ofHours(12), Duration.ofHours(168), clock, shutdownService); + this.operatorShutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12), Duration.ofHours(12), Duration.ofHours(12), clock, shutdownService); } @AfterEach @@ -62,7 +61,6 @@ void shutdownOnAttestFailure(VertxTestContext testContext) { this.operatorShutdownHandler.handleAttestResponse(Pair.of(AttestationResponseCode.AttestationFailure, "Unauthorized")); } catch (RuntimeException e) { verify(shutdownService).Shutdown(1); - String message = logWatcher.list.get(0).getFormattedMessage(); Assertions.assertEquals("core attestation failed with AttestationFailure, shutting down operator, core response: Unauthorized", logWatcher.list.get(0).getFormattedMessage()); testContext.completeNow(); } @@ -168,72 +166,144 @@ void saltsLogErrorAtInterval(VertxTestContext testContext) { } @Test - void shutdownOnKeysetKeyFailedTooLong(VertxTestContext testContext) { + void storeRefreshRecordsSuccessTimestamp(VertxTestContext testContext) { + // Simulate successful store refresh + this.operatorShutdownHandler.handleStoreRefresh("test_store", true); + + // Verify no shutdown is triggered + verify(shutdownService, never()).Shutdown(anyInt()); + testContext.completeNow(); + } + + @Test + void storeRefreshFailureDoesNotResetTimestamp(VertxTestContext testContext) { + // First successful refresh + this.operatorShutdownHandler.handleStoreRefresh("test_store", true); + + // Then failures - shouldn't reset the timestamp + this.operatorShutdownHandler.handleStoreRefresh("test_store", false); + this.operatorShutdownHandler.handleStoreRefresh("test_store", false); + + // Verify no shutdown is triggered yet + verify(shutdownService, never()).Shutdown(anyInt()); + testContext.completeNow(); + } + + @Test + void storeRefreshStaleShutdown(VertxTestContext testContext) { ListAppender logWatcher = new ListAppender<>(); logWatcher.start(); ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); - this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); - Assertions.assertTrue(logWatcher.list.get(0).getFormattedMessage().contains("keyset keys sync started failing")); - - when(clock.instant()).thenAnswer(i -> Instant.now().plus(7, ChronoUnit.DAYS).plusSeconds(60)); + // Initial successful refresh + this.operatorShutdownHandler.handleStoreRefresh("test_store", true); + + // Move time forward by 12 hours + 1 second + when(clock.instant()).thenAnswer(i -> Instant.now().plus(12, ChronoUnit.HOURS).plusSeconds(1)); + + // Check staleness - should trigger shutdown try { - this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + this.operatorShutdownHandler.checkStoreRefreshStaleness(); } catch (RuntimeException e) { verify(shutdownService).Shutdown(1); Assertions.assertTrue(logWatcher.list.stream().anyMatch(log -> - log.getFormattedMessage().contains("keyset keys have been failing to sync for too long"))); + log.getFormattedMessage().contains("has not refreshed successfully") && + log.getFormattedMessage().contains("test_store"))); testContext.completeNow(); } } @Test - void keysetKeyRecoverOnSuccess(VertxTestContext testContext) { - this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); - when(clock.instant()).thenAnswer(i -> Instant.now().plus(3, ChronoUnit.DAYS)); + void storeRefreshRecoverBeforeStale(VertxTestContext testContext) { + // Initial successful refresh + this.operatorShutdownHandler.handleStoreRefresh("test_store", true); - this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(true); - - when(clock.instant()).thenAnswer(i -> Instant.now().plus(7, ChronoUnit.DAYS)); + // Move time forward by 11 hours + when(clock.instant()).thenAnswer(i -> Instant.now().plus(11, ChronoUnit.HOURS)); + + // Another successful refresh before timeout + this.operatorShutdownHandler.handleStoreRefresh("test_store", true); + + // Move time forward another 12 hours from original time (but only 1 hour from last refresh) + when(clock.instant()).thenAnswer(i -> Instant.now().plus(12, ChronoUnit.HOURS)); + + // Check staleness - should NOT trigger shutdown assertDoesNotThrow(() -> { - this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + this.operatorShutdownHandler.checkStoreRefreshStaleness(); }); verify(shutdownService, never()).Shutdown(anyInt()); testContext.completeNow(); } @Test - void keysetKeyNoShutdownWhenAlwaysSuccessful(VertxTestContext testContext) { - this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(true); - this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(true); - this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(true); + void multipleStoresOneStaleTriggers(VertxTestContext testContext) { + ListAppender logWatcher = new ListAppender<>(); + logWatcher.start(); + ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); + // Multiple stores succeed + this.operatorShutdownHandler.handleStoreRefresh("store1", true); + this.operatorShutdownHandler.handleStoreRefresh("store2", true); + this.operatorShutdownHandler.handleStoreRefresh("store3", true); + + // Move time forward + when(clock.instant()).thenAnswer(i -> Instant.now().plus(6, ChronoUnit.HOURS)); + + // Store1 and Store2 refresh successfully, but Store3 doesn't + this.operatorShutdownHandler.handleStoreRefresh("store1", true); + this.operatorShutdownHandler.handleStoreRefresh("store2", true); + + // Move time forward 12 hours from start (store3 hasn't refreshed for 12 hours) + when(clock.instant()).thenAnswer(i -> Instant.now().plus(12, ChronoUnit.HOURS).plusSeconds(1)); + + // Check staleness - should trigger shutdown for store3 + try { + this.operatorShutdownHandler.checkStoreRefreshStaleness(); + } catch (RuntimeException e) { + verify(shutdownService).Shutdown(1); + Assertions.assertTrue(logWatcher.list.stream().anyMatch(log -> + log.getFormattedMessage().contains("store3") && + log.getFormattedMessage().contains("has not refreshed successfully"))); + testContext.completeNow(); + } + } + + @Test + void noShutdownWhenStoreNeverInitialized(VertxTestContext testContext) { + // Don't register any successful refresh for a store + // Just check staleness immediately + assertDoesNotThrow(() -> { + this.operatorShutdownHandler.checkStoreRefreshStaleness(); + }); + verify(shutdownService, never()).Shutdown(anyInt()); + testContext.completeNow(); + } + + @Test + void periodicCheckStartsSuccessfully(Vertx vertx, VertxTestContext testContext) { + // Start the periodic check + assertDoesNotThrow(() -> { + this.operatorShutdownHandler.startPeriodicStaleCheck(vertx); + }); + + // Verify it doesn't crash and doesn't trigger shutdown immediately verify(shutdownService, never()).Shutdown(anyInt()); testContext.completeNow(); } @Test - void keysetKeyLogProgressAtInterval(VertxTestContext testContext) { + void periodicCheckOnlyStartsOnce(Vertx vertx, VertxTestContext testContext) { ListAppender logWatcher = new ListAppender<>(); logWatcher.start(); ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); - this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); - long warnLogCount1 = logWatcher.list.stream().filter(log -> - log.getFormattedMessage().contains("keyset keys sync still failing")).count(); + // Start the periodic check twice + this.operatorShutdownHandler.startPeriodicStaleCheck(vertx); + this.operatorShutdownHandler.startPeriodicStaleCheck(vertx); - when(clock.instant()).thenAnswer(i -> Instant.now().plus(5, ChronoUnit.MINUTES)); - this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); - long warnLogCount2 = logWatcher.list.stream().filter(log -> - log.getFormattedMessage().contains("keyset keys sync still failing")).count(); - Assertions.assertEquals(warnLogCount1, warnLogCount2); - - when(clock.instant()).thenAnswer(i -> Instant.now().plus(11, ChronoUnit.MINUTES)); - this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); - long warnLogCount3 = logWatcher.list.stream().filter(log -> - log.getFormattedMessage().contains("keyset keys sync still failing")).count(); - Assertions.assertTrue(warnLogCount3 > warnLogCount2); - + // Should log a warning + Assertions.assertTrue(logWatcher.list.stream().anyMatch(log -> + log.getFormattedMessage().contains("already started"))); testContext.completeNow(); } } diff --git a/src/test/java/com/uid2/operator/benchmark/BenchmarkCommon.java b/src/test/java/com/uid2/operator/benchmark/BenchmarkCommon.java index 54ee29192..a2c2ed442 100644 --- a/src/test/java/com/uid2/operator/benchmark/BenchmarkCommon.java +++ b/src/test/java/com/uid2/operator/benchmark/BenchmarkCommon.java @@ -75,7 +75,7 @@ public static IUIDOperatorService createUidOperatorService() throws Exception { saltProvider.getSnapshot(Instant.now()).getFirstLevelSalt(), /* out */ optOutPartitionFiles); final IOptOutStore optOutStore = new StaticOptOutStore(optOutLocalStorage, make1mOptOutEntryConfig(), optOutPartitionFiles); - final OperatorShutdownHandler shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(1), Duration.ofHours(1), Duration.ofHours(1), Clock.systemUTC(), new ShutdownService()); + final OperatorShutdownHandler shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(1), Duration.ofHours(1), Duration.ofHours(12), Clock.systemUTC(), new ShutdownService()); return new UIDOperatorService( optOutStore, saltProvider, From fbeb671be242513499de6adf7faab30d4b21f8b7 Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 21 Oct 2025 11:09:56 -0700 Subject: [PATCH 02/20] update shared version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 85f1dec90..506f57334 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ 2.1.0 2.1.13 2.1.0 - 11.1.80 + 11.1.81-alpha-317-SNAPSHOT ${project.version} 21 21 From d7233f9f756dcdbc59b2c4e3615408f7029d21fd Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 21 Oct 2025 11:13:38 -0700 Subject: [PATCH 03/20] revert benchmarkcommons --- src/test/java/com/uid2/operator/benchmark/BenchmarkCommon.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/com/uid2/operator/benchmark/BenchmarkCommon.java b/src/test/java/com/uid2/operator/benchmark/BenchmarkCommon.java index a2c2ed442..54ee29192 100644 --- a/src/test/java/com/uid2/operator/benchmark/BenchmarkCommon.java +++ b/src/test/java/com/uid2/operator/benchmark/BenchmarkCommon.java @@ -75,7 +75,7 @@ public static IUIDOperatorService createUidOperatorService() throws Exception { saltProvider.getSnapshot(Instant.now()).getFirstLevelSalt(), /* out */ optOutPartitionFiles); final IOptOutStore optOutStore = new StaticOptOutStore(optOutLocalStorage, make1mOptOutEntryConfig(), optOutPartitionFiles); - final OperatorShutdownHandler shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(1), Duration.ofHours(1), Duration.ofHours(12), Clock.systemUTC(), new ShutdownService()); + final OperatorShutdownHandler shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(1), Duration.ofHours(1), Duration.ofHours(1), Clock.systemUTC(), new ShutdownService()); return new UIDOperatorService( optOutStore, saltProvider, From f8dab5956d6f211e3816b8b0038ff35c758949da Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 21 Oct 2025 11:15:18 -0700 Subject: [PATCH 04/20] improve test --- .../java/com/uid2/operator/OperatorShutdownHandlerTest.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java index 26b40da91..0b3bdf7cf 100644 --- a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java +++ b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java @@ -19,6 +19,7 @@ import org.slf4j.LoggerFactory; import software.amazon.awssdk.utils.Pair; +import java.security.Permission; import java.time.Clock; import java.time.Duration; import java.time.Instant; @@ -61,6 +62,7 @@ void shutdownOnAttestFailure(VertxTestContext testContext) { this.operatorShutdownHandler.handleAttestResponse(Pair.of(AttestationResponseCode.AttestationFailure, "Unauthorized")); } catch (RuntimeException e) { verify(shutdownService).Shutdown(1); + String message = logWatcher.list.get(0).getFormattedMessage(); Assertions.assertEquals("core attestation failed with AttestationFailure, shutting down operator, core response: Unauthorized", logWatcher.list.get(0).getFormattedMessage()); testContext.completeNow(); } From 1a3002c07df47893dceff4ee4c695bf081d691c9 Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 21 Oct 2025 11:26:17 -0700 Subject: [PATCH 05/20] update isStalenessCheckScheduled guard name --- .../com/uid2/operator/vertx/OperatorShutdownHandler.java | 7 ++++--- .../com/uid2/operator/OperatorShutdownHandlerTest.java | 5 ----- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java index 642dd8a07..b1e172201 100644 --- a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java +++ b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java @@ -28,7 +28,7 @@ public class OperatorShutdownHandler { private final Map> lastSuccessfulRefreshTimes = new ConcurrentHashMap<>(); private final Clock clock; private final ShutdownService shutdownService; - private long periodicCheckTimerId = -1; + private boolean isStalenessCheckScheduled = false; public OperatorShutdownHandler(Duration attestShutdownWaitTime, Duration saltShutdownWaitTime, Duration storeRefreshStaleTimeout, Clock clock, ShutdownService shutdownService) { @@ -113,16 +113,17 @@ public void checkStoreRefreshStaleness() { } public void startPeriodicStaleCheck(Vertx vertx) { - if (periodicCheckTimerId != -1) { + if (isStalenessCheckScheduled) { LOGGER.warn("Periodic store staleness check already started"); return; } long intervalMs = STORE_REFRESH_STALENESS_CHECK_INTERVAL_MINUTES * 60 * 1000L; - periodicCheckTimerId = vertx.setPeriodic(intervalMs, id -> { + vertx.setPeriodic(intervalMs, id -> { LOGGER.debug("Running periodic store staleness check"); checkStoreRefreshStaleness(); }); + isStalenessCheckScheduled = true; LOGGER.info("Started periodic store staleness check (interval: {} minutes, timeout: {} hours)", STORE_REFRESH_STALENESS_CHECK_INTERVAL_MINUTES, storeRefreshStaleTimeout.toHours()); diff --git a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java index 0b3bdf7cf..e7e3e33b4 100644 --- a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java +++ b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java @@ -169,24 +169,19 @@ void saltsLogErrorAtInterval(VertxTestContext testContext) { @Test void storeRefreshRecordsSuccessTimestamp(VertxTestContext testContext) { - // Simulate successful store refresh this.operatorShutdownHandler.handleStoreRefresh("test_store", true); - // Verify no shutdown is triggered verify(shutdownService, never()).Shutdown(anyInt()); testContext.completeNow(); } @Test void storeRefreshFailureDoesNotResetTimestamp(VertxTestContext testContext) { - // First successful refresh this.operatorShutdownHandler.handleStoreRefresh("test_store", true); - // Then failures - shouldn't reset the timestamp this.operatorShutdownHandler.handleStoreRefresh("test_store", false); this.operatorShutdownHandler.handleStoreRefresh("test_store", false); - // Verify no shutdown is triggered yet verify(shutdownService, never()).Shutdown(anyInt()); testContext.completeNow(); } From b8d030387bead354f1977ef244692bf92d7c7ce6 Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 21 Oct 2025 11:32:10 -0700 Subject: [PATCH 06/20] simplify logging --- .../vertx/OperatorShutdownHandler.java | 3 - .../operator/OperatorShutdownHandlerTest.java | 75 +++++++++++++------ 2 files changed, 53 insertions(+), 25 deletions(-) diff --git a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java index b1e172201..cd46737e6 100644 --- a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java +++ b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java @@ -84,9 +84,6 @@ public void handleStoreRefresh(String storeName, Boolean success) { if (success) { lastSuccessfulRefreshTimes.computeIfAbsent(storeName, k -> new AtomicReference<>()) .set(clock.instant()); - LOGGER.trace("Store {} refresh successful at {}", storeName, clock.instant()); - } else { - LOGGER.debug("Store {} refresh failed, timestamp not updated", storeName); } } diff --git a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java index e7e3e33b4..553a151f0 100644 --- a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java +++ b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java @@ -19,7 +19,6 @@ import org.slf4j.LoggerFactory; import software.amazon.awssdk.utils.Pair; -import java.security.Permission; import java.time.Clock; import java.time.Duration; import java.time.Instant; @@ -35,13 +34,17 @@ public class OperatorShutdownHandlerTest { @Mock private Clock clock; @Mock private ShutdownService shutdownService; private OperatorShutdownHandler operatorShutdownHandler; + + // Fixed base time for predictable testing + private Instant baseTime; @BeforeEach void beforeEach() { mocks = MockitoAnnotations.openMocks(this); - when(clock.instant()).thenAnswer(i -> Instant.now()); + baseTime = Instant.now(); // Capture a fixed base time + when(clock.instant()).thenReturn(baseTime); // Use thenReturn for fixed value doThrow(new RuntimeException()).when(shutdownService).Shutdown(1); this.operatorShutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12), Duration.ofHours(12), Duration.ofHours(12), clock, shutdownService); } @@ -62,7 +65,6 @@ void shutdownOnAttestFailure(VertxTestContext testContext) { this.operatorShutdownHandler.handleAttestResponse(Pair.of(AttestationResponseCode.AttestationFailure, "Unauthorized")); } catch (RuntimeException e) { verify(shutdownService).Shutdown(1); - String message = logWatcher.list.get(0).getFormattedMessage(); Assertions.assertEquals("core attestation failed with AttestationFailure, shutting down operator, core response: Unauthorized", logWatcher.list.get(0).getFormattedMessage()); testContext.completeNow(); } @@ -169,21 +171,50 @@ void saltsLogErrorAtInterval(VertxTestContext testContext) { @Test void storeRefreshRecordsSuccessTimestamp(VertxTestContext testContext) { + // Record successful refresh at baseTime this.operatorShutdownHandler.handleStoreRefresh("test_store", true); + // Advance time by 11 hours (still under threshold) - should NOT trigger shutdown + when(clock.instant()).thenReturn(baseTime.plus(11, ChronoUnit.HOURS)); + assertDoesNotThrow(() -> { + this.operatorShutdownHandler.checkStoreRefreshStaleness(); + }); verify(shutdownService, never()).Shutdown(anyInt()); - testContext.completeNow(); + + // Now advance time to 13 hours from original success - SHOULD trigger shutdown + when(clock.instant()).thenReturn(baseTime.plus(13, ChronoUnit.HOURS)); + try { + this.operatorShutdownHandler.checkStoreRefreshStaleness(); + } catch (RuntimeException e) { + verify(shutdownService).Shutdown(1); + testContext.completeNow(); + } } @Test void storeRefreshFailureDoesNotResetTimestamp(VertxTestContext testContext) { + // Record successful refresh at baseTime this.operatorShutdownHandler.handleStoreRefresh("test_store", true); + // Advance time by 2 hours + when(clock.instant()).thenReturn(baseTime.plus(2, ChronoUnit.HOURS)); + + // Record multiple failures - these should NOT reset the timestamp + this.operatorShutdownHandler.handleStoreRefresh("test_store", false); this.operatorShutdownHandler.handleStoreRefresh("test_store", false); this.operatorShutdownHandler.handleStoreRefresh("test_store", false); - verify(shutdownService, never()).Shutdown(anyInt()); - testContext.completeNow(); + // Advance time to 13 hours from ORIGINAL success (not from failures) + // This proves failures didn't reset the timestamp + when(clock.instant()).thenReturn(baseTime.plus(13, ChronoUnit.HOURS)); + + // Should trigger shutdown based on original success timestamp + try { + this.operatorShutdownHandler.checkStoreRefreshStaleness(); + } catch (RuntimeException e) { + verify(shutdownService).Shutdown(1); + testContext.completeNow(); + } } @Test @@ -192,11 +223,11 @@ void storeRefreshStaleShutdown(VertxTestContext testContext) { logWatcher.start(); ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); - // Initial successful refresh + // Initial successful refresh at baseTime this.operatorShutdownHandler.handleStoreRefresh("test_store", true); - // Move time forward by 12 hours + 1 second - when(clock.instant()).thenAnswer(i -> Instant.now().plus(12, ChronoUnit.HOURS).plusSeconds(1)); + // Move time forward by 12 hours + 1 second from baseTime + when(clock.instant()).thenReturn(baseTime.plus(12, ChronoUnit.HOURS).plusSeconds(1)); // Check staleness - should trigger shutdown try { @@ -212,19 +243,19 @@ void storeRefreshStaleShutdown(VertxTestContext testContext) { @Test void storeRefreshRecoverBeforeStale(VertxTestContext testContext) { - // Initial successful refresh + // Initial successful refresh at baseTime this.operatorShutdownHandler.handleStoreRefresh("test_store", true); - // Move time forward by 11 hours - when(clock.instant()).thenAnswer(i -> Instant.now().plus(11, ChronoUnit.HOURS)); + // Move time forward by 11 hours from baseTime + when(clock.instant()).thenReturn(baseTime.plus(11, ChronoUnit.HOURS)); - // Another successful refresh before timeout + // Another successful refresh before timeout (at baseTime + 11 hours) this.operatorShutdownHandler.handleStoreRefresh("test_store", true); - // Move time forward another 12 hours from original time (but only 1 hour from last refresh) - when(clock.instant()).thenAnswer(i -> Instant.now().plus(12, ChronoUnit.HOURS)); + // Move time forward another 12 hours from original time (but only 1 hour from last refresh at baseTime + 11h) + when(clock.instant()).thenReturn(baseTime.plus(12, ChronoUnit.HOURS)); - // Check staleness - should NOT trigger shutdown + // Check staleness - should NOT trigger shutdown (only 1 hour since last refresh) assertDoesNotThrow(() -> { this.operatorShutdownHandler.checkStoreRefreshStaleness(); }); @@ -238,20 +269,20 @@ void multipleStoresOneStaleTriggers(VertxTestContext testContext) { logWatcher.start(); ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); - // Multiple stores succeed + // Multiple stores succeed at baseTime this.operatorShutdownHandler.handleStoreRefresh("store1", true); this.operatorShutdownHandler.handleStoreRefresh("store2", true); this.operatorShutdownHandler.handleStoreRefresh("store3", true); - // Move time forward - when(clock.instant()).thenAnswer(i -> Instant.now().plus(6, ChronoUnit.HOURS)); + // Move time forward by 6 hours from baseTime + when(clock.instant()).thenReturn(baseTime.plus(6, ChronoUnit.HOURS)); - // Store1 and Store2 refresh successfully, but Store3 doesn't + // Store1 and Store2 refresh successfully at baseTime+6h, but Store3 doesn't this.operatorShutdownHandler.handleStoreRefresh("store1", true); this.operatorShutdownHandler.handleStoreRefresh("store2", true); - // Move time forward 12 hours from start (store3 hasn't refreshed for 12 hours) - when(clock.instant()).thenAnswer(i -> Instant.now().plus(12, ChronoUnit.HOURS).plusSeconds(1)); + // Move time forward 12 hours from baseTime (store3 hasn't refreshed for 12 hours) + when(clock.instant()).thenReturn(baseTime.plus(12, ChronoUnit.HOURS).plusSeconds(1)); // Check staleness - should trigger shutdown for store3 try { From da2d877507d03f6a906b8f831e23999610236fcb Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 21 Oct 2025 11:33:27 -0700 Subject: [PATCH 07/20] refine testing --- .../operator/OperatorShutdownHandlerTest.java | 29 ++----------------- 1 file changed, 3 insertions(+), 26 deletions(-) diff --git a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java index 553a151f0..2f5f9f3f6 100644 --- a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java +++ b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java @@ -171,17 +171,14 @@ void saltsLogErrorAtInterval(VertxTestContext testContext) { @Test void storeRefreshRecordsSuccessTimestamp(VertxTestContext testContext) { - // Record successful refresh at baseTime this.operatorShutdownHandler.handleStoreRefresh("test_store", true); - // Advance time by 11 hours (still under threshold) - should NOT trigger shutdown when(clock.instant()).thenReturn(baseTime.plus(11, ChronoUnit.HOURS)); assertDoesNotThrow(() -> { this.operatorShutdownHandler.checkStoreRefreshStaleness(); }); verify(shutdownService, never()).Shutdown(anyInt()); - // Now advance time to 13 hours from original success - SHOULD trigger shutdown when(clock.instant()).thenReturn(baseTime.plus(13, ChronoUnit.HOURS)); try { this.operatorShutdownHandler.checkStoreRefreshStaleness(); @@ -193,22 +190,17 @@ void storeRefreshRecordsSuccessTimestamp(VertxTestContext testContext) { @Test void storeRefreshFailureDoesNotResetTimestamp(VertxTestContext testContext) { - // Record successful refresh at baseTime this.operatorShutdownHandler.handleStoreRefresh("test_store", true); - // Advance time by 2 hours when(clock.instant()).thenReturn(baseTime.plus(2, ChronoUnit.HOURS)); - // Record multiple failures - these should NOT reset the timestamp this.operatorShutdownHandler.handleStoreRefresh("test_store", false); this.operatorShutdownHandler.handleStoreRefresh("test_store", false); this.operatorShutdownHandler.handleStoreRefresh("test_store", false); - // Advance time to 13 hours from ORIGINAL success (not from failures) - // This proves failures didn't reset the timestamp + when(clock.instant()).thenReturn(baseTime.plus(13, ChronoUnit.HOURS)); - // Should trigger shutdown based on original success timestamp try { this.operatorShutdownHandler.checkStoreRefreshStaleness(); } catch (RuntimeException e) { @@ -223,13 +215,10 @@ void storeRefreshStaleShutdown(VertxTestContext testContext) { logWatcher.start(); ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); - // Initial successful refresh at baseTime this.operatorShutdownHandler.handleStoreRefresh("test_store", true); - // Move time forward by 12 hours + 1 second from baseTime when(clock.instant()).thenReturn(baseTime.plus(12, ChronoUnit.HOURS).plusSeconds(1)); - // Check staleness - should trigger shutdown try { this.operatorShutdownHandler.checkStoreRefreshStaleness(); } catch (RuntimeException e) { @@ -243,19 +232,14 @@ void storeRefreshStaleShutdown(VertxTestContext testContext) { @Test void storeRefreshRecoverBeforeStale(VertxTestContext testContext) { - // Initial successful refresh at baseTime this.operatorShutdownHandler.handleStoreRefresh("test_store", true); - // Move time forward by 11 hours from baseTime when(clock.instant()).thenReturn(baseTime.plus(11, ChronoUnit.HOURS)); - // Another successful refresh before timeout (at baseTime + 11 hours) this.operatorShutdownHandler.handleStoreRefresh("test_store", true); - // Move time forward another 12 hours from original time (but only 1 hour from last refresh at baseTime + 11h) when(clock.instant()).thenReturn(baseTime.plus(12, ChronoUnit.HOURS)); - // Check staleness - should NOT trigger shutdown (only 1 hour since last refresh) assertDoesNotThrow(() -> { this.operatorShutdownHandler.checkStoreRefreshStaleness(); }); @@ -269,22 +253,18 @@ void multipleStoresOneStaleTriggers(VertxTestContext testContext) { logWatcher.start(); ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); - // Multiple stores succeed at baseTime + this.operatorShutdownHandler.handleStoreRefresh("store1", true); this.operatorShutdownHandler.handleStoreRefresh("store2", true); this.operatorShutdownHandler.handleStoreRefresh("store3", true); - // Move time forward by 6 hours from baseTime when(clock.instant()).thenReturn(baseTime.plus(6, ChronoUnit.HOURS)); - // Store1 and Store2 refresh successfully at baseTime+6h, but Store3 doesn't this.operatorShutdownHandler.handleStoreRefresh("store1", true); this.operatorShutdownHandler.handleStoreRefresh("store2", true); - // Move time forward 12 hours from baseTime (store3 hasn't refreshed for 12 hours) when(clock.instant()).thenReturn(baseTime.plus(12, ChronoUnit.HOURS).plusSeconds(1)); - // Check staleness - should trigger shutdown for store3 try { this.operatorShutdownHandler.checkStoreRefreshStaleness(); } catch (RuntimeException e) { @@ -298,8 +278,7 @@ void multipleStoresOneStaleTriggers(VertxTestContext testContext) { @Test void noShutdownWhenStoreNeverInitialized(VertxTestContext testContext) { - // Don't register any successful refresh for a store - // Just check staleness immediately + assertDoesNotThrow(() -> { this.operatorShutdownHandler.checkStoreRefreshStaleness(); }); @@ -325,11 +304,9 @@ void periodicCheckOnlyStartsOnce(Vertx vertx, VertxTestContext testContext) { logWatcher.start(); ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); - // Start the periodic check twice this.operatorShutdownHandler.startPeriodicStaleCheck(vertx); this.operatorShutdownHandler.startPeriodicStaleCheck(vertx); - // Should log a warning Assertions.assertTrue(logWatcher.list.stream().anyMatch(log -> log.getFormattedMessage().contains("already started"))); testContext.completeNow(); From b95cafe734331540936b9d9201b2ef1577201de8 Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 21 Oct 2025 11:39:48 -0700 Subject: [PATCH 08/20] refine tests --- .../operator/OperatorShutdownHandlerTest.java | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java index 2f5f9f3f6..5564353f1 100644 --- a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java +++ b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java @@ -34,17 +34,13 @@ public class OperatorShutdownHandlerTest { @Mock private Clock clock; @Mock private ShutdownService shutdownService; private OperatorShutdownHandler operatorShutdownHandler; - - // Fixed base time for predictable testing - private Instant baseTime; @BeforeEach void beforeEach() { mocks = MockitoAnnotations.openMocks(this); - baseTime = Instant.now(); // Capture a fixed base time - when(clock.instant()).thenReturn(baseTime); // Use thenReturn for fixed value + when(clock.instant()).thenAnswer(i -> Instant.now()); doThrow(new RuntimeException()).when(shutdownService).Shutdown(1); this.operatorShutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12), Duration.ofHours(12), Duration.ofHours(12), clock, shutdownService); } @@ -173,13 +169,13 @@ void saltsLogErrorAtInterval(VertxTestContext testContext) { void storeRefreshRecordsSuccessTimestamp(VertxTestContext testContext) { this.operatorShutdownHandler.handleStoreRefresh("test_store", true); - when(clock.instant()).thenReturn(baseTime.plus(11, ChronoUnit.HOURS)); + when(clock.instant()).thenAnswer(i -> Instant.now().plus(11, ChronoUnit.HOURS)); assertDoesNotThrow(() -> { this.operatorShutdownHandler.checkStoreRefreshStaleness(); }); verify(shutdownService, never()).Shutdown(anyInt()); - when(clock.instant()).thenReturn(baseTime.plus(13, ChronoUnit.HOURS)); + when(clock.instant()).thenAnswer(i -> Instant.now().plus(13, ChronoUnit.HOURS)); try { this.operatorShutdownHandler.checkStoreRefreshStaleness(); } catch (RuntimeException e) { @@ -192,14 +188,14 @@ void storeRefreshRecordsSuccessTimestamp(VertxTestContext testContext) { void storeRefreshFailureDoesNotResetTimestamp(VertxTestContext testContext) { this.operatorShutdownHandler.handleStoreRefresh("test_store", true); - when(clock.instant()).thenReturn(baseTime.plus(2, ChronoUnit.HOURS)); + when(clock.instant()).thenAnswer(i -> Instant.now().plus(2, ChronoUnit.HOURS)); this.operatorShutdownHandler.handleStoreRefresh("test_store", false); this.operatorShutdownHandler.handleStoreRefresh("test_store", false); this.operatorShutdownHandler.handleStoreRefresh("test_store", false); - when(clock.instant()).thenReturn(baseTime.plus(13, ChronoUnit.HOURS)); + when(clock.instant()).thenAnswer(i -> Instant.now().plus(13, ChronoUnit.HOURS)); try { this.operatorShutdownHandler.checkStoreRefreshStaleness(); @@ -217,7 +213,7 @@ void storeRefreshStaleShutdown(VertxTestContext testContext) { this.operatorShutdownHandler.handleStoreRefresh("test_store", true); - when(clock.instant()).thenReturn(baseTime.plus(12, ChronoUnit.HOURS).plusSeconds(1)); + when(clock.instant()).thenAnswer(i -> Instant.now().plus(12, ChronoUnit.HOURS).plusSeconds(1)); try { this.operatorShutdownHandler.checkStoreRefreshStaleness(); @@ -234,11 +230,11 @@ void storeRefreshStaleShutdown(VertxTestContext testContext) { void storeRefreshRecoverBeforeStale(VertxTestContext testContext) { this.operatorShutdownHandler.handleStoreRefresh("test_store", true); - when(clock.instant()).thenReturn(baseTime.plus(11, ChronoUnit.HOURS)); + when(clock.instant()).thenAnswer(i -> Instant.now().plus(11, ChronoUnit.HOURS)); this.operatorShutdownHandler.handleStoreRefresh("test_store", true); - when(clock.instant()).thenReturn(baseTime.plus(12, ChronoUnit.HOURS)); + when(clock.instant()).thenAnswer(i -> Instant.now().plus(12, ChronoUnit.HOURS)); assertDoesNotThrow(() -> { this.operatorShutdownHandler.checkStoreRefreshStaleness(); @@ -258,12 +254,12 @@ void multipleStoresOneStaleTriggers(VertxTestContext testContext) { this.operatorShutdownHandler.handleStoreRefresh("store2", true); this.operatorShutdownHandler.handleStoreRefresh("store3", true); - when(clock.instant()).thenReturn(baseTime.plus(6, ChronoUnit.HOURS)); + when(clock.instant()).thenAnswer(i -> Instant.now().plus(6, ChronoUnit.HOURS)); this.operatorShutdownHandler.handleStoreRefresh("store1", true); this.operatorShutdownHandler.handleStoreRefresh("store2", true); - when(clock.instant()).thenReturn(baseTime.plus(12, ChronoUnit.HOURS).plusSeconds(1)); + when(clock.instant()).thenAnswer(i -> Instant.now().plus(12, ChronoUnit.HOURS).plusSeconds(1)); try { this.operatorShutdownHandler.checkStoreRefreshStaleness(); From f59ce4d7ffce9438c0c579729cc061be4a6a1419 Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 21 Oct 2025 11:40:57 -0700 Subject: [PATCH 09/20] refine tests --- .../java/com/uid2/operator/OperatorShutdownHandlerTest.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java index 5564353f1..c72bc12ac 100644 --- a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java +++ b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java @@ -19,6 +19,7 @@ import org.slf4j.LoggerFactory; import software.amazon.awssdk.utils.Pair; +import java.security.Permission; import java.time.Clock; import java.time.Duration; import java.time.Instant; @@ -61,6 +62,7 @@ void shutdownOnAttestFailure(VertxTestContext testContext) { this.operatorShutdownHandler.handleAttestResponse(Pair.of(AttestationResponseCode.AttestationFailure, "Unauthorized")); } catch (RuntimeException e) { verify(shutdownService).Shutdown(1); + String message = logWatcher.list.get(0).getFormattedMessage(); Assertions.assertEquals("core attestation failed with AttestationFailure, shutting down operator, core response: Unauthorized", logWatcher.list.get(0).getFormattedMessage()); testContext.completeNow(); } From d41cff1570e577ddcf93c844d8db46279253b794 Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 21 Oct 2025 13:00:16 -0700 Subject: [PATCH 10/20] from using boolean to runnerable --- src/main/java/com/uid2/operator/Main.java | 30 +++++++++---------- .../vertx/OperatorShutdownHandler.java | 8 ++--- .../operator/OperatorShutdownHandlerTest.java | 28 ++++++++--------- 3 files changed, 30 insertions(+), 36 deletions(-) diff --git a/src/main/java/com/uid2/operator/Main.java b/src/main/java/com/uid2/operator/Main.java index 1b66ef88e..01cb56ee3 100644 --- a/src/main/java/com/uid2/operator/Main.java +++ b/src/main/java/com/uid2/operator/Main.java @@ -407,35 +407,35 @@ private Future createStoreVerticles() throws Exception { if (clientSideTokenGenerate) { fs.add(createAndDeployRotatingStoreVerticle("site", siteProvider, "site_refresh_ms", - refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("site", refreshSucceeded))); + () -> this.shutdownHandler.handleStoreRefresh("site"))); fs.add(createAndDeployRotatingStoreVerticle("client_side_keypairs", clientSideKeypairProvider, "client_side_keypairs_refresh_ms", - refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("client_side_keypairs", refreshSucceeded))); + () -> this.shutdownHandler.handleStoreRefresh("client_side_keypairs"))); } if (validateServiceLinks) { fs.add(createAndDeployRotatingStoreVerticle("service", serviceProvider, "service_refresh_ms", - refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("service", refreshSucceeded))); + () -> this.shutdownHandler.handleStoreRefresh("service"))); fs.add(createAndDeployRotatingStoreVerticle("service_link", serviceLinkProvider, "service_link_refresh_ms", - refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("service_link", refreshSucceeded))); + () -> this.shutdownHandler.handleStoreRefresh("service_link"))); } if (encryptedCloudFilesEnabled) { fs.add(createAndDeployRotatingStoreVerticle("cloud_encryption_keys", cloudEncryptionKeyProvider, "cloud_encryption_keys_refresh_ms", - refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("cloud_encryption_keys", refreshSucceeded))); + () -> this.shutdownHandler.handleStoreRefresh("cloud_encryption_keys"))); } if (useRemoteConfig) { fs.add(createAndDeployRotatingStoreVerticle("runtime_config", (RuntimeConfigStore) configStore, Const.Config.ConfigScanPeriodMsProp, - refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("runtime_config", refreshSucceeded))); + () -> this.shutdownHandler.handleStoreRefresh("runtime_config"))); } fs.add(createAndDeployRotatingStoreVerticle("auth", clientKeyProvider, "auth_refresh_ms", - refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("auth", refreshSucceeded))); - fs.add(createAndDeployRotatingStoreVerticle("keyset", keysetProvider, "keyset_refresh_ms", - refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("keyset", refreshSucceeded))); - fs.add(createAndDeployRotatingStoreVerticle("keysetkey", keysetKeyStore, "keysetkey_refresh_ms", - refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("keysetkey", refreshSucceeded))); - fs.add(createAndDeployRotatingStoreVerticle("salt", saltProvider, "salt_refresh_ms", - refreshSucceeded -> this.shutdownHandler.handleStoreRefresh("salt", refreshSucceeded))); + () -> this.shutdownHandler.handleStoreRefresh("auth"))); + fs.add(createAndDeployRotatingStoreVerticle("keyset", keysetProvider, "keyset_refresh_ms", + () -> this.shutdownHandler.handleStoreRefresh("keyset"))); + fs.add(createAndDeployRotatingStoreVerticle("keysetkey", keysetKeyStore, "keysetkey_refresh_ms", + () -> this.shutdownHandler.handleStoreRefresh("keysetkey"))); + fs.add(createAndDeployRotatingStoreVerticle("salt", saltProvider, "salt_refresh_ms", + () -> this.shutdownHandler.handleStoreRefresh("salt"))); fs.add(createAndDeployCloudSyncStoreVerticle("optout", fsOptOut, optOutCloudSync)); CompositeFuture.all(fs).onComplete(ar -> { if (ar.failed()) promise.fail(new Exception(ar.cause())); @@ -453,8 +453,8 @@ private Future createAndDeployRotatingStoreVerticle(String name, IMetada return createAndDeployRotatingStoreVerticle(name, store, storeRefreshConfigMs, null); } - private Future createAndDeployRotatingStoreVerticle(String name, IMetadataVersionedStore store, String storeRefreshConfigMs, Consumer refreshCallback) { - final int intervalMs = config.getInteger(storeRefreshConfigMs, 10000); + private Future createAndDeployRotatingStoreVerticle(String name, IMetadataVersionedStore store, String storeRefreshConfigMs, Runnable refreshCallback) { + final long intervalMs = config.getInteger(storeRefreshConfigMs, 10000); RotatingStoreVerticle rotatingStoreVerticle = new RotatingStoreVerticle(name, intervalMs, store, refreshCallback); return vertx.deployVerticle(rotatingStoreVerticle); diff --git a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java index cd46737e6..b7a420311 100644 --- a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java +++ b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java @@ -80,11 +80,9 @@ public void handleAttestResponse(Pair response) } } - public void handleStoreRefresh(String storeName, Boolean success) { - if (success) { - lastSuccessfulRefreshTimes.computeIfAbsent(storeName, k -> new AtomicReference<>()) - .set(clock.instant()); - } + public void handleStoreRefresh(String storeName) { + lastSuccessfulRefreshTimes.computeIfAbsent(storeName, k -> new AtomicReference<>()) + .set(clock.instant()); } public void checkStoreRefreshStaleness() { diff --git a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java index c72bc12ac..b68cfe879 100644 --- a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java +++ b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java @@ -19,7 +19,6 @@ import org.slf4j.LoggerFactory; import software.amazon.awssdk.utils.Pair; -import java.security.Permission; import java.time.Clock; import java.time.Duration; import java.time.Instant; @@ -62,7 +61,6 @@ void shutdownOnAttestFailure(VertxTestContext testContext) { this.operatorShutdownHandler.handleAttestResponse(Pair.of(AttestationResponseCode.AttestationFailure, "Unauthorized")); } catch (RuntimeException e) { verify(shutdownService).Shutdown(1); - String message = logWatcher.list.get(0).getFormattedMessage(); Assertions.assertEquals("core attestation failed with AttestationFailure, shutting down operator, core response: Unauthorized", logWatcher.list.get(0).getFormattedMessage()); testContext.completeNow(); } @@ -169,7 +167,7 @@ void saltsLogErrorAtInterval(VertxTestContext testContext) { @Test void storeRefreshRecordsSuccessTimestamp(VertxTestContext testContext) { - this.operatorShutdownHandler.handleStoreRefresh("test_store", true); + this.operatorShutdownHandler.handleStoreRefresh("test_store"); when(clock.instant()).thenAnswer(i -> Instant.now().plus(11, ChronoUnit.HOURS)); assertDoesNotThrow(() -> { @@ -188,15 +186,13 @@ void storeRefreshRecordsSuccessTimestamp(VertxTestContext testContext) { @Test void storeRefreshFailureDoesNotResetTimestamp(VertxTestContext testContext) { - this.operatorShutdownHandler.handleStoreRefresh("test_store", true); + this.operatorShutdownHandler.handleStoreRefresh("test_store"); when(clock.instant()).thenAnswer(i -> Instant.now().plus(2, ChronoUnit.HOURS)); - this.operatorShutdownHandler.handleStoreRefresh("test_store", false); - this.operatorShutdownHandler.handleStoreRefresh("test_store", false); - this.operatorShutdownHandler.handleStoreRefresh("test_store", false); + // Simulate multiple refresh failures by NOT calling handleStoreRefresh + // (failures don't invoke the callback anymore) - when(clock.instant()).thenAnswer(i -> Instant.now().plus(13, ChronoUnit.HOURS)); try { @@ -213,7 +209,7 @@ void storeRefreshStaleShutdown(VertxTestContext testContext) { logWatcher.start(); ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); - this.operatorShutdownHandler.handleStoreRefresh("test_store", true); + this.operatorShutdownHandler.handleStoreRefresh("test_store"); when(clock.instant()).thenAnswer(i -> Instant.now().plus(12, ChronoUnit.HOURS).plusSeconds(1)); @@ -230,11 +226,11 @@ void storeRefreshStaleShutdown(VertxTestContext testContext) { @Test void storeRefreshRecoverBeforeStale(VertxTestContext testContext) { - this.operatorShutdownHandler.handleStoreRefresh("test_store", true); + this.operatorShutdownHandler.handleStoreRefresh("test_store"); when(clock.instant()).thenAnswer(i -> Instant.now().plus(11, ChronoUnit.HOURS)); - this.operatorShutdownHandler.handleStoreRefresh("test_store", true); + this.operatorShutdownHandler.handleStoreRefresh("test_store"); when(clock.instant()).thenAnswer(i -> Instant.now().plus(12, ChronoUnit.HOURS)); @@ -252,14 +248,14 @@ void multipleStoresOneStaleTriggers(VertxTestContext testContext) { ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); - this.operatorShutdownHandler.handleStoreRefresh("store1", true); - this.operatorShutdownHandler.handleStoreRefresh("store2", true); - this.operatorShutdownHandler.handleStoreRefresh("store3", true); + this.operatorShutdownHandler.handleStoreRefresh("store1"); + this.operatorShutdownHandler.handleStoreRefresh("store2"); + this.operatorShutdownHandler.handleStoreRefresh("store3"); when(clock.instant()).thenAnswer(i -> Instant.now().plus(6, ChronoUnit.HOURS)); - this.operatorShutdownHandler.handleStoreRefresh("store1", true); - this.operatorShutdownHandler.handleStoreRefresh("store2", true); + this.operatorShutdownHandler.handleStoreRefresh("store1"); + this.operatorShutdownHandler.handleStoreRefresh("store2"); when(clock.instant()).thenAnswer(i -> Instant.now().plus(12, ChronoUnit.HOURS).plusSeconds(1)); From 892dd537fe33aa063eb9f66f1959bc51d7d290e8 Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 21 Oct 2025 13:01:24 -0700 Subject: [PATCH 11/20] update pom.xl --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 506f57334..b87bc8b0c 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ 2.1.0 2.1.13 2.1.0 - 11.1.81-alpha-317-SNAPSHOT + 11.1.82-alpha-318-SNAPSHOT ${project.version} 21 21 From 940a562eb2abec834464a6e5f66fae39b03fcb20 Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 21 Oct 2025 13:07:26 -0700 Subject: [PATCH 12/20] update tests --- .../com/uid2/operator/vertx/OperatorShutdownHandler.java | 2 +- .../java/com/uid2/operator/OperatorShutdownHandlerTest.java | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java index b7a420311..5e45fe4dd 100644 --- a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java +++ b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java @@ -101,7 +101,7 @@ public void checkStoreRefreshStaleness() { if (timeSinceLastRefresh.compareTo(storeRefreshStaleTimeout) > 0) { LOGGER.error("Store '{}' has not refreshed successfully for {} hours ({}). Shutting down operator", storeName, timeSinceLastRefresh.toHours(), timeSinceLastRefresh); - shutdownService.Shutdown(1); + this.shutdownService.Shutdown(1); return; // Exit after triggering shutdown for first stale store } } diff --git a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java index b68cfe879..1054d4db2 100644 --- a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java +++ b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java @@ -19,6 +19,7 @@ import org.slf4j.LoggerFactory; import software.amazon.awssdk.utils.Pair; +import java.security.Permission; import java.time.Clock; import java.time.Duration; import java.time.Instant; @@ -61,6 +62,7 @@ void shutdownOnAttestFailure(VertxTestContext testContext) { this.operatorShutdownHandler.handleAttestResponse(Pair.of(AttestationResponseCode.AttestationFailure, "Unauthorized")); } catch (RuntimeException e) { verify(shutdownService).Shutdown(1); + String message = logWatcher.list.get(0).getFormattedMessage(); Assertions.assertEquals("core attestation failed with AttestationFailure, shutting down operator, core response: Unauthorized", logWatcher.list.get(0).getFormattedMessage()); testContext.completeNow(); } @@ -191,8 +193,7 @@ void storeRefreshFailureDoesNotResetTimestamp(VertxTestContext testContext) { when(clock.instant()).thenAnswer(i -> Instant.now().plus(2, ChronoUnit.HOURS)); // Simulate multiple refresh failures by NOT calling handleStoreRefresh - // (failures don't invoke the callback anymore) - + when(clock.instant()).thenAnswer(i -> Instant.now().plus(13, ChronoUnit.HOURS)); try { From df02f0a9d278775cebba710d5f09e74dba99ff62 Mon Sep 17 00:00:00 2001 From: Release Workflow Date: Tue, 21 Oct 2025 20:18:58 +0000 Subject: [PATCH 13/20] [CI Pipeline] Released Snapshot version: 5.60.8-alpha-248-SNAPSHOT --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b87bc8b0c..47eeccf90 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.uid2 uid2-operator - 5.60.7 + 5.60.8-alpha-248-SNAPSHOT UTF-8 From 1a239f2f2618971e40b63a515b0f226ab09574c1 Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 21 Oct 2025 13:23:15 -0700 Subject: [PATCH 14/20] add test for multiple stores failling at same time --- .../operator/OperatorShutdownHandlerTest.java | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java index 1054d4db2..6cfa22a49 100644 --- a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java +++ b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java @@ -271,6 +271,29 @@ void multipleStoresOneStaleTriggers(VertxTestContext testContext) { } } + @Test + void multipleStoresFailSimultaneouslyTriggersShutdown(VertxTestContext testContext) { + ListAppender logWatcher = new ListAppender<>(); + logWatcher.start(); + ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); + + this.operatorShutdownHandler.handleStoreRefresh("keyset"); + this.operatorShutdownHandler.handleStoreRefresh("keysetkey"); + this.operatorShutdownHandler.handleStoreRefresh("salt"); + this.operatorShutdownHandler.handleStoreRefresh("auth"); + + when(clock.instant()).thenAnswer(i -> Instant.now().plus(12, ChronoUnit.HOURS).plusSeconds(1)); + + try { + this.operatorShutdownHandler.checkStoreRefreshStaleness(); + } catch (RuntimeException e) { + verify(shutdownService).Shutdown(1); + Assertions.assertTrue(logWatcher.list.stream().anyMatch(log -> + log.getFormattedMessage().contains("has not refreshed successfully"))); + testContext.completeNow(); + } + } + @Test void noShutdownWhenStoreNeverInitialized(VertxTestContext testContext) { From 7fdc45d02fee38f199abdf21455e964c6588dca5 Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 21 Oct 2025 13:37:21 -0700 Subject: [PATCH 15/20] start logging last refresh sucessful time since startup --- .../com/uid2/operator/vertx/OperatorShutdownHandler.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java index 5e45fe4dd..3a92565bf 100644 --- a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java +++ b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java @@ -92,8 +92,9 @@ public void checkStoreRefreshStaleness() { Instant lastSuccess = entry.getValue().get(); if (lastSuccess == null) { - // Store hasn't had a successful refresh yet - might be during startup - // Don't trigger shutdown for stores that haven't initialized + // Store hasn't had a successful refresh yet + // This should rarely happen since startup success also records timestamp, but keep as defensive guard for edge cases + LOGGER.warn("Store '{}' has no recorded successful refresh - skipping staleness check", storeName); continue; } From d6c088939bde97430730ab2d87d4ae53c734967d Mon Sep 17 00:00:00 2001 From: way zheng Date: Wed, 22 Oct 2025 11:26:54 -0700 Subject: [PATCH 16/20] add config values --- conf/docker-config.json | 2 +- conf/integ-config.json | 2 +- conf/local-config.json | 2 +- conf/local-e2e-docker-private-config.json | 2 +- conf/local-e2e-docker-public-config.json | 2 +- conf/local-e2e-private-config.json | 2 +- conf/local-e2e-public-config.json | 2 +- pom.xml | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/conf/docker-config.json b/conf/docker-config.json index f346045bc..d2927f044 100644 --- a/conf/docker-config.json +++ b/conf/docker-config.json @@ -40,7 +40,7 @@ "enclave_platform": null, "failure_shutdown_wait_hours": 120, "salts_expired_shutdown_hours": 12, - "keysetkeys_failed_shutdown_hours": 168, + "store_refresh_stale_shutdown_hours": 12, "operator_type": "public", "disable_optout_token": true, "enable_remote_config": true, diff --git a/conf/integ-config.json b/conf/integ-config.json index a9b0b9049..f7b16e833 100644 --- a/conf/integ-config.json +++ b/conf/integ-config.json @@ -16,7 +16,7 @@ "cloud_encryption_keys_metadata_path": "http://localhost:8088/cloud_encryption_keys/retrieve", "runtime_config_metadata_path": "http://localhost:8088/operator/config", "salts_expired_shutdown_hours": 12, - "keysetkeys_failed_shutdown_hours": 168, + "store_refresh_stale_shutdown_hours": 12, "operator_type": "public", "disable_optout_token": true, "enable_remote_config": false, diff --git a/conf/local-config.json b/conf/local-config.json index 52697a1c0..0d8720483 100644 --- a/conf/local-config.json +++ b/conf/local-config.json @@ -38,7 +38,7 @@ "key_sharing_endpoint_provide_app_names": true, "client_side_token_generate_log_invalid_http_origins": true, "salts_expired_shutdown_hours": 12, - "keysetkeys_failed_shutdown_hours": 168, + "store_refresh_stale_shutdown_hours": 12, "operator_type": "public", "encrypted_files": false, "disable_optout_token": true, diff --git a/conf/local-e2e-docker-private-config.json b/conf/local-e2e-docker-private-config.json index bdf259c84..629a2c998 100644 --- a/conf/local-e2e-docker-private-config.json +++ b/conf/local-e2e-docker-private-config.json @@ -30,7 +30,7 @@ "optout_delta_rotate_interval": 60, "cloud_refresh_interval": 30, "salts_expired_shutdown_hours": 12, - "keysetkeys_failed_shutdown_hours": 168, + "store_refresh_stale_shutdown_hours": 12, "operator_type": "private", "enable_remote_config": true, "uid_instance_id_prefix": "local-private-operator" diff --git a/conf/local-e2e-docker-public-config.json b/conf/local-e2e-docker-public-config.json index c4c899a63..7610e9bc7 100644 --- a/conf/local-e2e-docker-public-config.json +++ b/conf/local-e2e-docker-public-config.json @@ -36,7 +36,7 @@ "optout_status_api_enabled": true, "cloud_refresh_interval": 30, "salts_expired_shutdown_hours": 12, - "keysetkeys_failed_shutdown_hours": 168, + "store_refresh_stale_shutdown_hours": 12, "operator_type": "public", "disable_optout_token": true, "enable_remote_config": true, diff --git a/conf/local-e2e-private-config.json b/conf/local-e2e-private-config.json index a69994d47..5e3c39a0e 100644 --- a/conf/local-e2e-private-config.json +++ b/conf/local-e2e-private-config.json @@ -41,7 +41,7 @@ "client_side_token_generate_domain_name_check_enabled": false, "client_side_token_generate_log_invalid_http_origins": true, "salts_expired_shutdown_hours": 12, - "keysetkeys_failed_shutdown_hours": 168, + "store_refresh_stale_shutdown_hours": 12, "operator_type": "private", "enable_remote_config": true, "uid_instance_id_prefix": "local-private-operator" diff --git a/conf/local-e2e-public-config.json b/conf/local-e2e-public-config.json index 075481496..80459743b 100644 --- a/conf/local-e2e-public-config.json +++ b/conf/local-e2e-public-config.json @@ -42,7 +42,7 @@ "key_sharing_endpoint_provide_app_names": true, "client_side_token_generate_log_invalid_http_origins": true, "salts_expired_shutdown_hours": 12, - "keysetkeys_failed_shutdown_hours": 168, + "store_refresh_stale_shutdown_hours": 12, "operator_type": "public", "disable_optout_token": true, "enable_remote_config": true, diff --git a/pom.xml b/pom.xml index 47eeccf90..d3c3fc5a9 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ 2.1.0 2.1.13 2.1.0 - 11.1.82-alpha-318-SNAPSHOT + 11.1.83-alpha-319-SNAPSHOT ${project.version} 21 21 From 1e3a62bdf491e37b4700be283b9a41a52def9679 Mon Sep 17 00:00:00 2001 From: way zheng Date: Wed, 22 Oct 2025 11:36:51 -0700 Subject: [PATCH 17/20] put runnable inside createDeployVerticle --- src/main/java/com/uid2/operator/Main.java | 36 ++++++++--------------- 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/src/main/java/com/uid2/operator/Main.java b/src/main/java/com/uid2/operator/Main.java index 01cb56ee3..44f2fbe4a 100644 --- a/src/main/java/com/uid2/operator/Main.java +++ b/src/main/java/com/uid2/operator/Main.java @@ -406,36 +406,26 @@ private Future createStoreVerticles() throws Exception { List fs = new ArrayList<>(); if (clientSideTokenGenerate) { - fs.add(createAndDeployRotatingStoreVerticle("site", siteProvider, "site_refresh_ms", - () -> this.shutdownHandler.handleStoreRefresh("site"))); - fs.add(createAndDeployRotatingStoreVerticle("client_side_keypairs", clientSideKeypairProvider, "client_side_keypairs_refresh_ms", - () -> this.shutdownHandler.handleStoreRefresh("client_side_keypairs"))); + fs.add(createAndDeployRotatingStoreVerticle("site", siteProvider, "site_refresh_ms")); + fs.add(createAndDeployRotatingStoreVerticle("client_side_keypairs", clientSideKeypairProvider, "client_side_keypairs_refresh_ms")); } if (validateServiceLinks) { - fs.add(createAndDeployRotatingStoreVerticle("service", serviceProvider, "service_refresh_ms", - () -> this.shutdownHandler.handleStoreRefresh("service"))); - fs.add(createAndDeployRotatingStoreVerticle("service_link", serviceLinkProvider, "service_link_refresh_ms", - () -> this.shutdownHandler.handleStoreRefresh("service_link"))); + fs.add(createAndDeployRotatingStoreVerticle("service", serviceProvider, "service_refresh_ms")); + fs.add(createAndDeployRotatingStoreVerticle("service_link", serviceLinkProvider, "service_link_refresh_ms")); } if (encryptedCloudFilesEnabled) { - fs.add(createAndDeployRotatingStoreVerticle("cloud_encryption_keys", cloudEncryptionKeyProvider, "cloud_encryption_keys_refresh_ms", - () -> this.shutdownHandler.handleStoreRefresh("cloud_encryption_keys"))); + fs.add(createAndDeployRotatingStoreVerticle("cloud_encryption_keys", cloudEncryptionKeyProvider, "cloud_encryption_keys_refresh_ms")); } if (useRemoteConfig) { - fs.add(createAndDeployRotatingStoreVerticle("runtime_config", (RuntimeConfigStore) configStore, Const.Config.ConfigScanPeriodMsProp, - () -> this.shutdownHandler.handleStoreRefresh("runtime_config"))); + fs.add(createAndDeployRotatingStoreVerticle("runtime_config", (RuntimeConfigStore) configStore, Const.Config.ConfigScanPeriodMsProp)); } - fs.add(createAndDeployRotatingStoreVerticle("auth", clientKeyProvider, "auth_refresh_ms", - () -> this.shutdownHandler.handleStoreRefresh("auth"))); - fs.add(createAndDeployRotatingStoreVerticle("keyset", keysetProvider, "keyset_refresh_ms", - () -> this.shutdownHandler.handleStoreRefresh("keyset"))); - fs.add(createAndDeployRotatingStoreVerticle("keysetkey", keysetKeyStore, "keysetkey_refresh_ms", - () -> this.shutdownHandler.handleStoreRefresh("keysetkey"))); - fs.add(createAndDeployRotatingStoreVerticle("salt", saltProvider, "salt_refresh_ms", - () -> this.shutdownHandler.handleStoreRefresh("salt"))); + fs.add(createAndDeployRotatingStoreVerticle("auth", clientKeyProvider, "auth_refresh_ms")); + fs.add(createAndDeployRotatingStoreVerticle("keyset", keysetProvider, "keyset_refresh_ms")); + fs.add(createAndDeployRotatingStoreVerticle("keysetkey", keysetKeyStore, "keysetkey_refresh_ms")); + fs.add(createAndDeployRotatingStoreVerticle("salt", saltProvider, "salt_refresh_ms")); fs.add(createAndDeployCloudSyncStoreVerticle("optout", fsOptOut, optOutCloudSync)); CompositeFuture.all(fs).onComplete(ar -> { if (ar.failed()) promise.fail(new Exception(ar.cause())); @@ -450,11 +440,9 @@ private Future createStoreVerticles() throws Exception { } private Future createAndDeployRotatingStoreVerticle(String name, IMetadataVersionedStore store, String storeRefreshConfigMs) { - return createAndDeployRotatingStoreVerticle(name, store, storeRefreshConfigMs, null); - } - - private Future createAndDeployRotatingStoreVerticle(String name, IMetadataVersionedStore store, String storeRefreshConfigMs, Runnable refreshCallback) { final long intervalMs = config.getInteger(storeRefreshConfigMs, 10000); + + Runnable refreshCallback = () -> this.shutdownHandler.handleStoreRefresh(name); RotatingStoreVerticle rotatingStoreVerticle = new RotatingStoreVerticle(name, intervalMs, store, refreshCallback); return vertx.deployVerticle(rotatingStoreVerticle); From 8b2aa69a67c77af4f9c9a1fa1db833537c50f1f3 Mon Sep 17 00:00:00 2001 From: way zheng Date: Thu, 23 Oct 2025 13:00:30 -0700 Subject: [PATCH 18/20] update shared version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d3c3fc5a9..c5d878eaf 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ 2.1.0 2.1.13 2.1.0 - 11.1.83-alpha-319-SNAPSHOT + 11.1.91 ${project.version} 21 21 From 8f97839c7c5a05b7f4251bcef159953382e1ea43 Mon Sep 17 00:00:00 2001 From: Weihe Zheng Date: Thu, 23 Oct 2025 13:15:07 -0700 Subject: [PATCH 19/20] Downgrade version from 5.60.8-alpha-248-SNAPSHOT to 5.60.7 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index c5d878eaf..3174ee817 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.uid2 uid2-operator - 5.60.8-alpha-248-SNAPSHOT + 5.60.7 UTF-8 From 338ffff5fb9b839b7c7ff9738ecdd7540e254719 Mon Sep 17 00:00:00 2001 From: way zheng Date: Thu, 23 Oct 2025 13:23:32 -0700 Subject: [PATCH 20/20] add one line more for debugging --- .../java/com/uid2/operator/vertx/OperatorShutdownHandler.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java index 3a92565bf..bd2f5f243 100644 --- a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java +++ b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java @@ -99,6 +99,7 @@ public void checkStoreRefreshStaleness() { } Duration timeSinceLastRefresh = Duration.between(lastSuccess, now); + LOGGER.debug("Store '{}' last successful refresh {} ago", storeName, timeSinceLastRefresh); if (timeSinceLastRefresh.compareTo(storeRefreshStaleTimeout) > 0) { LOGGER.error("Store '{}' has not refreshed successfully for {} hours ({}). Shutting down operator", storeName, timeSinceLastRefresh.toHours(), timeSinceLastRefresh);