Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conf/docker-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
"enclave_platform": null,
"failure_shutdown_wait_hours": 120,
"salts_expired_shutdown_hours": 12,
"keysetkeys_failed_shutdown_hours": 168,
"store_refresh_stale_shutdown_hours": 12,
"operator_type": "public",
"disable_optout_token": true,
"enable_remote_config": true,
Expand Down
2 changes: 1 addition & 1 deletion conf/integ-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"cloud_encryption_keys_metadata_path": "http://localhost:8088/cloud_encryption_keys/retrieve",
"runtime_config_metadata_path": "http://localhost:8088/operator/config",
"salts_expired_shutdown_hours": 12,
"keysetkeys_failed_shutdown_hours": 168,
"store_refresh_stale_shutdown_hours": 12,
"operator_type": "public",
"disable_optout_token": true,
"enable_remote_config": false,
Expand Down
2 changes: 1 addition & 1 deletion conf/local-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"key_sharing_endpoint_provide_app_names": true,
"client_side_token_generate_log_invalid_http_origins": true,
"salts_expired_shutdown_hours": 12,
"keysetkeys_failed_shutdown_hours": 168,
"store_refresh_stale_shutdown_hours": 12,
"operator_type": "public",
"encrypted_files": false,
"disable_optout_token": true,
Expand Down
2 changes: 1 addition & 1 deletion conf/local-e2e-docker-private-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"optout_delta_rotate_interval": 60,
"cloud_refresh_interval": 30,
"salts_expired_shutdown_hours": 12,
"keysetkeys_failed_shutdown_hours": 168,
"store_refresh_stale_shutdown_hours": 12,
"operator_type": "private",
"enable_remote_config": true,
"uid_instance_id_prefix": "local-private-operator"
Expand Down
2 changes: 1 addition & 1 deletion conf/local-e2e-docker-public-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
"optout_status_api_enabled": true,
"cloud_refresh_interval": 30,
"salts_expired_shutdown_hours": 12,
"keysetkeys_failed_shutdown_hours": 168,
"store_refresh_stale_shutdown_hours": 12,
"operator_type": "public",
"disable_optout_token": true,
"enable_remote_config": true,
Expand Down
2 changes: 1 addition & 1 deletion conf/local-e2e-private-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"client_side_token_generate_domain_name_check_enabled": false,
"client_side_token_generate_log_invalid_http_origins": true,
"salts_expired_shutdown_hours": 12,
"keysetkeys_failed_shutdown_hours": 168,
"store_refresh_stale_shutdown_hours": 12,
"operator_type": "private",
"enable_remote_config": true,
"uid_instance_id_prefix": "local-private-operator"
Expand Down
2 changes: 1 addition & 1 deletion conf/local-e2e-public-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
"key_sharing_endpoint_provide_app_names": true,
"client_side_token_generate_log_invalid_http_origins": true,
"salts_expired_shutdown_hours": 12,
"keysetkeys_failed_shutdown_hours": 168,
"store_refresh_stale_shutdown_hours": 12,
"operator_type": "public",
"disable_optout_token": true,
"enable_remote_config": true,
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<enclave-aws.version>2.1.0</enclave-aws.version>
<enclave-azure.version>2.1.13</enclave-azure.version>
<enclave-gcp.version>2.1.0</enclave-gcp.version>
<uid2-shared.version>11.1.80</uid2-shared.version>
<uid2-shared.version>11.1.91</uid2-shared.version>
<image.version>${project.version}</image.version>
<maven.compiler.source>21</maven.compiler.source>
<maven.compiler.target>21</maven.compiler.target>
Expand Down
22 changes: 11 additions & 11 deletions src/main/java/com/uid2/operator/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ public Main(Vertx vertx, JsonObject config) throws Exception {
this.encryptedCloudFilesEnabled = config.getBoolean(Const.Config.EncryptedFiles, false);
this.shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12),
Duration.ofHours(config.getInteger(Const.Config.SaltsExpiredShutdownHours, 12)),
Duration.ofHours(config.getInteger(Const.Config.KeysetKeysFailedShutdownHours, 168)),
Duration.ofHours(config.getInteger(Const.Config.StoreRefreshStaleShutdownHours, 12)),
Clock.systemUTC(), new ShutdownService());
this.uidInstanceIdProvider = new UidInstanceIdProvider(config);

Expand Down Expand Up @@ -423,26 +423,26 @@ private Future<Void> createStoreVerticles() throws Exception {
fs.add(createAndDeployRotatingStoreVerticle("runtime_config", (RuntimeConfigStore) configStore, Const.Config.ConfigScanPeriodMsProp));
}
fs.add(createAndDeployRotatingStoreVerticle("auth", clientKeyProvider, "auth_refresh_ms"));
fs.add(createAndDeployRotatingStoreVerticle("keyset", keysetProvider, "keyset_refresh_ms"));
fs.add(createAndDeployRotatingStoreVerticle("keysetkey", keysetKeyStore, "keysetkey_refresh_ms",
this.shutdownHandler::handleKeysetKeyRefreshResponse));
fs.add(createAndDeployRotatingStoreVerticle("salt", saltProvider, "salt_refresh_ms"));
fs.add(createAndDeployRotatingStoreVerticle("keyset", keysetProvider, "keyset_refresh_ms"));
fs.add(createAndDeployRotatingStoreVerticle("keysetkey", keysetKeyStore, "keysetkey_refresh_ms"));
fs.add(createAndDeployRotatingStoreVerticle("salt", saltProvider, "salt_refresh_ms"));
fs.add(createAndDeployCloudSyncStoreVerticle("optout", fsOptOut, optOutCloudSync));
CompositeFuture.all(fs).onComplete(ar -> {
if (ar.failed()) promise.fail(new Exception(ar.cause()));
else promise.complete();
else {
promise.complete();
this.shutdownHandler.startPeriodicStaleCheck(this.vertx);
}
});


return promise.future();
}

private Future<String> createAndDeployRotatingStoreVerticle(String name, IMetadataVersionedStore store, String storeRefreshConfigMs) {
return createAndDeployRotatingStoreVerticle(name, store, storeRefreshConfigMs, null);
}

private Future<String> createAndDeployRotatingStoreVerticle(String name, IMetadataVersionedStore store, String storeRefreshConfigMs, Consumer<Boolean> refreshCallback) {
final int intervalMs = config.getInteger(storeRefreshConfigMs, 10000);
final long intervalMs = config.getInteger(storeRefreshConfigMs, 10000);

Runnable refreshCallback = () -> this.shutdownHandler.handleStoreRefresh(name);

RotatingStoreVerticle rotatingStoreVerticle = new RotatingStoreVerticle(name, intervalMs, store, refreshCallback);
return vertx.deployVerticle(rotatingStoreVerticle);
Expand Down
93 changes: 55 additions & 38 deletions src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import com.uid2.operator.service.ShutdownService;
import com.uid2.shared.attest.AttestationResponseCode;
import lombok.extern.java.Log;
import io.vertx.core.Vertx;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.utils.Pair;
Expand All @@ -11,28 +11,30 @@
import java.time.Duration;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicReference;

public class OperatorShutdownHandler {
private static final Logger LOGGER = LoggerFactory.getLogger(OperatorShutdownHandler.class);
private static final int SALT_FAILURE_LOG_INTERVAL_MINUTES = 10;
private static final int KEYSET_KEY_FAILURE_LOG_INTERVAL_MINUTES = 10;
private static final int STORE_REFRESH_STALENESS_CHECK_INTERVAL_MINUTES = 60;
private final Duration attestShutdownWaitTime;
private final Duration saltShutdownWaitTime;
private final Duration keysetKeyShutdownWaitTime;
private final Duration storeRefreshStaleTimeout;
private final AtomicReference<Instant> attestFailureStartTime = new AtomicReference<>(null);
private final AtomicReference<Instant> saltFailureStartTime = new AtomicReference<>(null);
private final AtomicReference<Instant> keysetKeyFailureStartTime = new AtomicReference<>(null);
private final AtomicReference<Instant> lastSaltFailureLogTime = new AtomicReference<>(null);
private final AtomicReference<Instant> lastKeysetKeyFailureLogTime = new AtomicReference<>(null);
private final Map<String, AtomicReference<Instant>> lastSuccessfulRefreshTimes = new ConcurrentHashMap<>();
private final Clock clock;
private final ShutdownService shutdownService;
private boolean isStalenessCheckScheduled = false;

public OperatorShutdownHandler(Duration attestShutdownWaitTime, Duration saltShutdownWaitTime,
Duration keysetKeyShutdownWaitTime, Clock clock, ShutdownService shutdownService) {
Duration storeRefreshStaleTimeout, Clock clock, ShutdownService shutdownService) {
this.attestShutdownWaitTime = attestShutdownWaitTime;
this.saltShutdownWaitTime = saltShutdownWaitTime;
this.keysetKeyShutdownWaitTime = keysetKeyShutdownWaitTime;
this.storeRefreshStaleTimeout = storeRefreshStaleTimeout;
this.clock = clock;
this.shutdownService = shutdownService;
}
Expand Down Expand Up @@ -60,37 +62,6 @@ public void logSaltFailureAtInterval() {
}
}

public void handleKeysetKeyRefreshResponse(Boolean success) {
if (success) {
keysetKeyFailureStartTime.set(null);
lastKeysetKeyFailureLogTime.set(null);
LOGGER.debug("keyset keys sync successful");
} else {
Instant t = keysetKeyFailureStartTime.get();
if (t == null) {
keysetKeyFailureStartTime.set(clock.instant());
lastKeysetKeyFailureLogTime.set(clock.instant());
LOGGER.warn("keyset keys sync started failing. shutdown timer started");
} else {
Duration elapsed = Duration.between(t, clock.instant());
if (elapsed.compareTo(this.keysetKeyShutdownWaitTime) > 0) {
LOGGER.error("keyset keys have been failing to sync for too long. shutting down operator");
this.shutdownService.Shutdown(1);
} else {
logKeysetKeyFailureProgressAtInterval(t, elapsed);
}
}
}
}

private void logKeysetKeyFailureProgressAtInterval(Instant failureStartTime, Duration elapsed) {
Instant lastLogTime = lastKeysetKeyFailureLogTime.get();
if (lastLogTime == null || clock.instant().isAfter(lastLogTime.plus(KEYSET_KEY_FAILURE_LOG_INTERVAL_MINUTES, ChronoUnit.MINUTES))) {
LOGGER.warn("keyset keys sync still failing - elapsed time: {}d {}h {}m", elapsed.toDays(), elapsed.toHoursPart(), elapsed.toMinutesPart());
lastKeysetKeyFailureLogTime.set(clock.instant());
}
}

public void handleAttestResponse(Pair<AttestationResponseCode, String> response) {
if (response.left() == AttestationResponseCode.AttestationFailure) {
LOGGER.error("core attestation failed with AttestationFailure, shutting down operator, core response: {}", response.right());
Expand All @@ -108,4 +79,50 @@ public void handleAttestResponse(Pair<AttestationResponseCode, String> response)
}
}
}

public void handleStoreRefresh(String storeName) {
lastSuccessfulRefreshTimes.computeIfAbsent(storeName, k -> new AtomicReference<>())
.set(clock.instant());
}

public void checkStoreRefreshStaleness() {
Instant now = clock.instant();
for (Map.Entry<String, AtomicReference<Instant>> entry : lastSuccessfulRefreshTimes.entrySet()) {
String storeName = entry.getKey();
Instant lastSuccess = entry.getValue().get();

if (lastSuccess == null) {
// Store hasn't had a successful refresh yet
// This should rarely happen since startup success also records timestamp, but keep as defensive guard for edge cases
LOGGER.warn("Store '{}' has no recorded successful refresh - skipping staleness check", storeName);
continue;
}

Duration timeSinceLastRefresh = Duration.between(lastSuccess, now);
LOGGER.debug("Store '{}' last successful refresh {} ago", storeName, timeSinceLastRefresh);
if (timeSinceLastRefresh.compareTo(storeRefreshStaleTimeout) > 0) {
LOGGER.error("Store '{}' has not refreshed successfully for {} hours ({}). Shutting down operator",
storeName, timeSinceLastRefresh.toHours(), timeSinceLastRefresh);
this.shutdownService.Shutdown(1);
return; // Exit after triggering shutdown for first stale store
}
}
}

public void startPeriodicStaleCheck(Vertx vertx) {
if (isStalenessCheckScheduled) {
LOGGER.warn("Periodic store staleness check already started");
return;
}

long intervalMs = STORE_REFRESH_STALENESS_CHECK_INTERVAL_MINUTES * 60 * 1000L;
vertx.setPeriodic(intervalMs, id -> {
LOGGER.debug("Running periodic store staleness check");
checkStoreRefreshStaleness();
});
isStalenessCheckScheduled = true;
LOGGER.info("Started periodic store staleness check (interval: {} minutes, timeout: {} hours)",
STORE_REFRESH_STALENESS_CHECK_INTERVAL_MINUTES,
storeRefreshStaleTimeout.toHours());
}
}
Loading
Loading