Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
5389559
Implement keyset key fail-fast feature with 7-day timeout
lizk886 Oct 13, 2025
6729f3c
Add keysetkey fail-fast logic and handleKeysetKeyRefreshResponse
lizk886 Oct 14, 2025
651898f
Remove unnecessary KeyManager callback - RotatingStoreVerticle handle…
lizk886 Oct 14, 2025
4ad136e
Update uid2-shared version reference
lizk886 Oct 14, 2025
ef241d0
Enable DEBUG logging for OperatorShutdownHandler
lizk886 Oct 14, 2025
246f716
Add timer accumulation logging and keyset key unit tests
lizk886 Oct 14, 2025
7609538
[CI Pipeline] Released Snapshot version: 5.58.63-alpha-245-SNAPSHOT
Oct 14, 2025
da09d57
make KeysetKeysFailedShutdownHours configurable, simplify reconvery l…
lizk886 Oct 14, 2025
d4ffde0
make KeysetKeysFailedShutdownHours configurable, simplify reconvery l…
lizk886 Oct 14, 2025
bb18e29
remove fall back constuctor
lizk886 Oct 14, 2025
d35aed9
bump version
lizk886 Oct 14, 2025
51357a5
shut down behavior changes
lizk886 Oct 14, 2025
619bf84
remove redundant logging
lizk886 Oct 14, 2025
112d7c3
log failure at an interval
lizk886 Oct 14, 2025
c4de167
log failure at an interval
lizk886 Oct 14, 2025
a71cbb8
[CI Pipeline] Released Snapshot version: 5.58.64-alpha-246-SNAPSHOT
Oct 14, 2025
fecce79
simplify delayed timer, since exact logging schedule is not that impo…
lizk886 Oct 14, 2025
c980dfa
simplify delayed timer, since exact logging schedule is not that impo…
lizk886 Oct 14, 2025
639ad3a
Remove DEBUG logger for OperatorShutdownHandler
lizk886 Oct 14, 2025
aa6447c
beautify
lizk886 Oct 14, 2025
c428446
simplify calcualtion logics for delayed logging
lizk886 Oct 14, 2025
ddd10f7
Update uid2-shared.version to 11.1.80
lizk886 Oct 14, 2025
dadcb3e
Update version from 5.58.64-alpha-246-SNAPSHOT to 5.58.62
lizk886 Oct 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions conf/docker-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"enclave_platform": null,
"failure_shutdown_wait_hours": 120,
"salts_expired_shutdown_hours": 12,
"keysetkeys_failed_shutdown_hours": 168,
"operator_type": "public",
"disable_optout_token": true,
"enable_remote_config": true,
Expand Down
1 change: 1 addition & 0 deletions conf/integ-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"cloud_encryption_keys_metadata_path": "http://localhost:8088/cloud_encryption_keys/retrieve",
"runtime_config_metadata_path": "http://localhost:8088/operator/config",
"salts_expired_shutdown_hours": 12,
"keysetkeys_failed_shutdown_hours": 168,
"operator_type": "public",
"disable_optout_token": true,
"enable_remote_config": false,
Expand Down
1 change: 1 addition & 0 deletions conf/local-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"key_sharing_endpoint_provide_app_names": true,
"client_side_token_generate_log_invalid_http_origins": true,
"salts_expired_shutdown_hours": 12,
"keysetkeys_failed_shutdown_hours": 168,
"operator_type": "public",
"encrypted_files": false,
"disable_optout_token": true,
Expand Down
1 change: 1 addition & 0 deletions conf/local-e2e-docker-private-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"optout_delta_rotate_interval": 60,
"cloud_refresh_interval": 30,
"salts_expired_shutdown_hours": 12,
"keysetkeys_failed_shutdown_hours": 168,
"operator_type": "private",
"enable_remote_config": true,
"uid_instance_id_prefix": "local-private-operator"
Expand Down
1 change: 1 addition & 0 deletions conf/local-e2e-docker-public-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"optout_status_api_enabled": true,
"cloud_refresh_interval": 30,
"salts_expired_shutdown_hours": 12,
"keysetkeys_failed_shutdown_hours": 168,
"operator_type": "public",
"disable_optout_token": true,
"enable_remote_config": true,
Expand Down
1 change: 1 addition & 0 deletions conf/local-e2e-private-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
"client_side_token_generate_domain_name_check_enabled": false,
"client_side_token_generate_log_invalid_http_origins": true,
"salts_expired_shutdown_hours": 12,
"keysetkeys_failed_shutdown_hours": 168,
"operator_type": "private",
"enable_remote_config": true,
"uid_instance_id_prefix": "local-private-operator"
Expand Down
1 change: 1 addition & 0 deletions conf/local-e2e-public-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
"key_sharing_endpoint_provide_app_names": true,
"client_side_token_generate_log_invalid_http_origins": true,
"salts_expired_shutdown_hours": 12,
"keysetkeys_failed_shutdown_hours": 168,
"operator_type": "public",
"disable_optout_token": true,
"enable_remote_config": true,
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<enclave-aws.version>2.1.0</enclave-aws.version>
<enclave-azure.version>2.1.13</enclave-azure.version>
<enclave-gcp.version>2.1.0</enclave-gcp.version>
<uid2-shared.version>11.1.69</uid2-shared.version>
<uid2-shared.version>11.1.80</uid2-shared.version>
<image.version>${project.version}</image.version>
<maven.compiler.source>21</maven.compiler.source>
<maven.compiler.target>21</maven.compiler.target>
Expand Down
15 changes: 12 additions & 3 deletions src/main/java/com/uid2/operator/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
import java.time.Duration;
import java.time.Instant;
import java.util.*;
import java.util.function.Consumer;
import java.util.function.Supplier;

import static com.uid2.operator.Const.Config.EnableRemoteConfigProp;
Expand Down Expand Up @@ -114,7 +115,10 @@ public Main(Vertx vertx, JsonObject config) throws Exception {
this.clientSideTokenGenerate = config.getBoolean(Const.Config.EnableClientSideTokenGenerate, false);
this.validateServiceLinks = config.getBoolean(Const.Config.ValidateServiceLinks, false);
this.encryptedCloudFilesEnabled = config.getBoolean(Const.Config.EncryptedFiles, false);
this.shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12), Duration.ofHours(config.getInteger(Const.Config.SaltsExpiredShutdownHours, 12)), Clock.systemUTC(), new ShutdownService());
this.shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12),
Duration.ofHours(config.getInteger(Const.Config.SaltsExpiredShutdownHours, 12)),
Duration.ofHours(config.getInteger(Const.Config.KeysetKeysFailedShutdownHours, 168)),
Clock.systemUTC(), new ShutdownService());
this.uidInstanceIdProvider = new UidInstanceIdProvider(config);

String coreAttestUrl = this.config.getString(Const.Config.CoreAttestUrlProp);
Expand Down Expand Up @@ -420,7 +424,8 @@ private Future<Void> createStoreVerticles() throws Exception {
}
fs.add(createAndDeployRotatingStoreVerticle("auth", clientKeyProvider, "auth_refresh_ms"));
fs.add(createAndDeployRotatingStoreVerticle("keyset", keysetProvider, "keyset_refresh_ms"));
fs.add(createAndDeployRotatingStoreVerticle("keysetkey", keysetKeyStore, "keysetkey_refresh_ms"));
fs.add(createAndDeployRotatingStoreVerticle("keysetkey", keysetKeyStore, "keysetkey_refresh_ms",
this.shutdownHandler::handleKeysetKeyRefreshResponse));
fs.add(createAndDeployRotatingStoreVerticle("salt", saltProvider, "salt_refresh_ms"));
fs.add(createAndDeployCloudSyncStoreVerticle("optout", fsOptOut, optOutCloudSync));
CompositeFuture.all(fs).onComplete(ar -> {
Expand All @@ -433,9 +438,13 @@ private Future<Void> createStoreVerticles() throws Exception {
}

private Future<String> createAndDeployRotatingStoreVerticle(String name, IMetadataVersionedStore store, String storeRefreshConfigMs) {
return createAndDeployRotatingStoreVerticle(name, store, storeRefreshConfigMs, null);
}

private Future<String> createAndDeployRotatingStoreVerticle(String name, IMetadataVersionedStore store, String storeRefreshConfigMs, Consumer<Boolean> refreshCallback) {
final int intervalMs = config.getInteger(storeRefreshConfigMs, 10000);

RotatingStoreVerticle rotatingStoreVerticle = new RotatingStoreVerticle(name, intervalMs, store);
RotatingStoreVerticle rotatingStoreVerticle = new RotatingStoreVerticle(name, intervalMs, store, refreshCallback);
return vertx.deployVerticle(rotatingStoreVerticle);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,23 @@
public class OperatorShutdownHandler {
private static final Logger LOGGER = LoggerFactory.getLogger(OperatorShutdownHandler.class);
private static final int SALT_FAILURE_LOG_INTERVAL_MINUTES = 10;
private static final int KEYSET_KEY_FAILURE_LOG_INTERVAL_MINUTES = 10;
private final Duration attestShutdownWaitTime;
private final Duration saltShutdownWaitTime;
private final Duration keysetKeyShutdownWaitTime;
private final AtomicReference<Instant> attestFailureStartTime = new AtomicReference<>(null);
private final AtomicReference<Instant> saltFailureStartTime = new AtomicReference<>(null);
private final AtomicReference<Instant> keysetKeyFailureStartTime = new AtomicReference<>(null);
private final AtomicReference<Instant> lastSaltFailureLogTime = new AtomicReference<>(null);
private final AtomicReference<Instant> lastKeysetKeyFailureLogTime = new AtomicReference<>(null);
private final Clock clock;
private final ShutdownService shutdownService;

public OperatorShutdownHandler(Duration attestShutdownWaitTime, Duration saltShutdownWaitTime, Clock clock, ShutdownService shutdownService) {
public OperatorShutdownHandler(Duration attestShutdownWaitTime, Duration saltShutdownWaitTime,
Duration keysetKeyShutdownWaitTime, Clock clock, ShutdownService shutdownService) {
this.attestShutdownWaitTime = attestShutdownWaitTime;
this.saltShutdownWaitTime = saltShutdownWaitTime;
this.keysetKeyShutdownWaitTime = keysetKeyShutdownWaitTime;
this.clock = clock;
this.shutdownService = shutdownService;
}
Expand Down Expand Up @@ -54,6 +60,37 @@ public void logSaltFailureAtInterval() {
}
}

public void handleKeysetKeyRefreshResponse(Boolean success) {
if (success) {
keysetKeyFailureStartTime.set(null);
lastKeysetKeyFailureLogTime.set(null);
LOGGER.debug("keyset keys sync successful");
} else {
Instant t = keysetKeyFailureStartTime.get();
if (t == null) {
keysetKeyFailureStartTime.set(clock.instant());
lastKeysetKeyFailureLogTime.set(clock.instant());
LOGGER.warn("keyset keys sync started failing. shutdown timer started");
} else {
Duration elapsed = Duration.between(t, clock.instant());
if (elapsed.compareTo(this.keysetKeyShutdownWaitTime) > 0) {
LOGGER.error("keyset keys have been failing to sync for too long. shutting down operator");
this.shutdownService.Shutdown(1);
} else {
logKeysetKeyFailureProgressAtInterval(t, elapsed);
}
}
}
}

private void logKeysetKeyFailureProgressAtInterval(Instant failureStartTime, Duration elapsed) {
Instant lastLogTime = lastKeysetKeyFailureLogTime.get();
if (lastLogTime == null || clock.instant().isAfter(lastLogTime.plus(KEYSET_KEY_FAILURE_LOG_INTERVAL_MINUTES, ChronoUnit.MINUTES))) {
LOGGER.warn("keyset keys sync still failing - elapsed time: {}d {}h {}m", elapsed.toDays(), elapsed.toHoursPart(), elapsed.toMinutesPart());
lastKeysetKeyFailureLogTime.set(clock.instant());
}
}

public void handleAttestResponse(Pair<AttestationResponseCode, String> response) {
if (response.left() == AttestationResponseCode.AttestationFailure) {
LOGGER.error("core attestation failed with AttestationFailure, shutting down operator, core response: {}", response.right());
Expand Down
72 changes: 71 additions & 1 deletion src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ void beforeEach() {
mocks = MockitoAnnotations.openMocks(this);
when(clock.instant()).thenAnswer(i -> Instant.now());
doThrow(new RuntimeException()).when(shutdownService).Shutdown(1);
this.operatorShutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12), Duration.ofHours(12), clock, shutdownService);
this.operatorShutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12), Duration.ofHours(12), Duration.ofHours(168), clock, shutdownService);
}

@AfterEach
Expand Down Expand Up @@ -166,4 +166,74 @@ void saltsLogErrorAtInterval(VertxTestContext testContext) {

testContext.completeNow();
}

@Test
void shutdownOnKeysetKeyFailedTooLong(VertxTestContext testContext) {
ListAppender<ILoggingEvent> logWatcher = new ListAppender<>();
logWatcher.start();
((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher);

this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false);
Assertions.assertTrue(logWatcher.list.get(0).getFormattedMessage().contains("keyset keys sync started failing"));

when(clock.instant()).thenAnswer(i -> Instant.now().plus(7, ChronoUnit.DAYS).plusSeconds(60));
try {
this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false);
} catch (RuntimeException e) {
verify(shutdownService).Shutdown(1);
Assertions.assertTrue(logWatcher.list.stream().anyMatch(log ->
log.getFormattedMessage().contains("keyset keys have been failing to sync for too long")));
testContext.completeNow();
}
}

@Test
void keysetKeyRecoverOnSuccess(VertxTestContext testContext) {
this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false);
when(clock.instant()).thenAnswer(i -> Instant.now().plus(3, ChronoUnit.DAYS));

this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(true);

when(clock.instant()).thenAnswer(i -> Instant.now().plus(7, ChronoUnit.DAYS));
assertDoesNotThrow(() -> {
this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false);
});
verify(shutdownService, never()).Shutdown(anyInt());
testContext.completeNow();
}

@Test
void keysetKeyNoShutdownWhenAlwaysSuccessful(VertxTestContext testContext) {
this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(true);
this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(true);
this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(true);

verify(shutdownService, never()).Shutdown(anyInt());
testContext.completeNow();
}

@Test
void keysetKeyLogProgressAtInterval(VertxTestContext testContext) {
ListAppender<ILoggingEvent> logWatcher = new ListAppender<>();
logWatcher.start();
((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher);

this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false);
long warnLogCount1 = logWatcher.list.stream().filter(log ->
log.getFormattedMessage().contains("keyset keys sync still failing")).count();

when(clock.instant()).thenAnswer(i -> Instant.now().plus(5, ChronoUnit.MINUTES));
this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false);
long warnLogCount2 = logWatcher.list.stream().filter(log ->
log.getFormattedMessage().contains("keyset keys sync still failing")).count();
Assertions.assertEquals(warnLogCount1, warnLogCount2);

when(clock.instant()).thenAnswer(i -> Instant.now().plus(11, ChronoUnit.MINUTES));
this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false);
long warnLogCount3 = logWatcher.list.stream().filter(log ->
log.getFormattedMessage().contains("keyset keys sync still failing")).count();
Assertions.assertTrue(warnLogCount3 > warnLogCount2);

testContext.completeNow();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ static IUIDOperatorService createUidOperatorService() throws Exception {
saltProvider.getSnapshot(Instant.now()).getFirstLevelSalt(),
/* out */ optOutPartitionFiles);
final IOptOutStore optOutStore = new StaticOptOutStore(optOutLocalStorage, make1mOptOutEntryConfig(), optOutPartitionFiles);
final OperatorShutdownHandler shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(1), Duration.ofHours(1), Clock.systemUTC(), new ShutdownService());
final OperatorShutdownHandler shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(1), Duration.ofHours(1), Duration.ofHours(1), Clock.systemUTC(), new ShutdownService());
return new UIDOperatorService(
optOutStore,
saltProvider,
Expand Down