Skip to content

Commit d44a286

Browse files
rauthsujaws-kevinrickard
authored andcommitted
feat: add ability to configure tes error cache expire time (#1751)
added the ability to configure TES error cache expire time, TES wouldn't restart when error cache is changed and if the time is less than 10 secs or greater than 12 hrs it will fallback to default time.
1 parent 3b6cf48 commit d44a286

File tree

4 files changed

+154
-32
lines changed

4 files changed

+154
-32
lines changed

src/main/java/com/aws/greengrass/tes/CredentialRequestHandler.java

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,14 @@ public class CredentialRequestHandler implements HttpHandler {
6868
public static final String AUTH_HEADER = "Authorization";
6969
public static final String IOT_CREDENTIALS_HTTP_VERB = "GET";
7070
public static final String SUPPORTED_REQUEST_VERB = "GET";
71-
public static final int TIME_BEFORE_CACHE_EXPIRE_IN_MIN = 5;
72-
public static final int CLOUD_4XX_ERROR_CACHE_IN_MIN = 2;
73-
public static final int CLOUD_5XX_ERROR_CACHE_IN_MIN = 1;
74-
public static final int UNKNOWN_ERROR_CACHE_IN_MIN = 5;
71+
public static final int DEFAULT_TIME_BEFORE_CACHE_EXPIRE_IN_SEC = 300;
72+
public static final int DEFAULT_CLOUD_4XX_ERROR_CACHE_IN_SEC = 120;
73+
public static final int DEFAULT_CLOUD_5XX_ERROR_CACHE_IN_SEC = 60;
74+
public static final int DEFAULT_UNKNOWN_ERROR_CACHE_IN_SEC = 300;
75+
76+
private volatile int cloud4xxErrorCacheInSec = DEFAULT_CLOUD_4XX_ERROR_CACHE_IN_SEC;
77+
private volatile int cloud5xxErrorCacheInSec = DEFAULT_CLOUD_5XX_ERROR_CACHE_IN_SEC;
78+
private volatile int unknownErrorCacheInSec = DEFAULT_UNKNOWN_ERROR_CACHE_IN_SEC;
7579

7680
private String iotCredentialsPath;
7781

@@ -142,6 +146,20 @@ void setIotCredentialsPath(String iotRoleAlias) {
142146
this.iotCredentialsPath = "/role-aliases/" + iotRoleAlias + "/credentials";
143147
}
144148

149+
/**
150+
* Configure error cache settings for error responses.
151+
*
152+
* @param cloud4xxErrorCache error cache duration in seconds for 4xx errors.
153+
* @param cloud5xxErrorCache error cache duration in seconds for 5xx errors.
154+
* @param unknownErrorCache error cache duration in seconds for unknown errors.
155+
*/
156+
public void configureCacheSettings(int cloud4xxErrorCache, int cloud5xxErrorCache, int unknownErrorCache) {
157+
this.cloud4xxErrorCacheInSec = cloud4xxErrorCache;
158+
this.cloud5xxErrorCacheInSec = cloud5xxErrorCache;
159+
this.unknownErrorCacheInSec = unknownErrorCache;
160+
}
161+
162+
145163
@Override
146164
@SuppressWarnings("PMD.AvoidCatchingThrowable")
147165
public void handle(final HttpExchange exchange) throws IOException {
@@ -281,14 +299,14 @@ private byte[] getCredentialsBypassCache() {
281299
LOGGER.atError().kv(IOT_CRED_PATH_KEY, iotCredentialsPath)
282300
.log("Unable to cache expired credentials which expired at {}", expiry);
283301
} else {
284-
newExpiry = expiry.minus(Duration.ofMinutes(TIME_BEFORE_CACHE_EXPIRE_IN_MIN));
302+
newExpiry = expiry.minus(Duration.ofSeconds(DEFAULT_TIME_BEFORE_CACHE_EXPIRE_IN_SEC));
285303
tesCache.get(iotCredentialsPath).responseCode = HttpURLConnection.HTTP_OK;
286304

287305
if (newExpiry.isBefore(Instant.now(clock))) {
288306
LOGGER.atWarn().kv(IOT_CRED_PATH_KEY, iotCredentialsPath)
289307
.log("Can't cache credentials as new credentials {} will "
290-
+ "expire in less than {} minutes", expiry,
291-
TIME_BEFORE_CACHE_EXPIRE_IN_MIN);
308+
+ "expire in less than {} seconds", expiry,
309+
DEFAULT_TIME_BEFORE_CACHE_EXPIRE_IN_SEC);
292310
} else {
293311
LOGGER.atInfo().kv(IOT_CRED_PATH_KEY, iotCredentialsPath)
294312
.log("Received IAM credentials that will be cached until {}", newExpiry);
@@ -318,7 +336,7 @@ private byte[] getCredentialsBypassCache() {
318336
String responseString = "Failed to get connection";
319337
response = responseString.getBytes(StandardCharsets.UTF_8);
320338
// Use unknown error cache policy for SSL/TLS connection errors to prevent excessive retries
321-
newExpiry = Instant.now(clock).plus(Duration.ofMinutes(UNKNOWN_ERROR_CACHE_IN_MIN));
339+
newExpiry = Instant.now(clock).plus(Duration.ofSeconds(unknownErrorCacheInSec));
322340
tesCache.get(iotCredentialsPath).responseCode = HttpURLConnection.HTTP_INTERNAL_ERROR;
323341
tesCache.get(iotCredentialsPath).expiry = newExpiry;
324342
tesCache.get(iotCredentialsPath).credentials = response;
@@ -421,16 +439,16 @@ private String parseExpiryFromResponse(final String credentials) throws AWSIotEx
421439
}
422440

423441
private Instant getExpiryPolicyForErr(int statusCode) {
424-
int expiryTime = UNKNOWN_ERROR_CACHE_IN_MIN; // In case of unrecognized cloud errors, back off
442+
int expiryTime = unknownErrorCacheInSec; // In case of unrecognized cloud errors, back off
425443
// Add caching Time-To-Live (TTL) for TES cloud errors
426444
if (statusCode >= 400 && statusCode < 500) {
427445
// 4xx retries are only meaningful unless a user action has been adopted, TTL should be longer
428-
expiryTime = CLOUD_4XX_ERROR_CACHE_IN_MIN;
446+
expiryTime = cloud4xxErrorCacheInSec;
429447
} else if (statusCode >= 500 && statusCode < 600) {
430448
// 5xx could be a temporary cloud unavailability, TTL should be shorter
431-
expiryTime = CLOUD_5XX_ERROR_CACHE_IN_MIN;
449+
expiryTime = cloud5xxErrorCacheInSec;
432450
}
433-
return Instant.now(clock).plus(Duration.ofMinutes(expiryTime));
451+
return Instant.now(clock).plus(Duration.ofSeconds(expiryTime));
434452
}
435453

436454
/**

src/main/java/com/aws/greengrass/tes/TokenExchangeService.java

Lines changed: 81 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import com.aws.greengrass.authorization.exceptions.AuthorizationException;
1010
import com.aws.greengrass.config.Topic;
1111
import com.aws.greengrass.config.Topics;
12+
import com.aws.greengrass.config.WhatHappened;
1213
import com.aws.greengrass.dependency.ImplementsService;
1314
import com.aws.greengrass.dependency.State;
1415
import com.aws.greengrass.deployment.DeviceConfiguration;
@@ -41,6 +42,15 @@ public class TokenExchangeService extends GreengrassService implements AwsCreden
4142
private String iotRoleAlias;
4243
private HttpServerImpl server;
4344

45+
public static final String CLOUD_4XX_ERROR_CACHE_TOPIC = "error4xxCredentialRetryInSec";
46+
public static final String CLOUD_5XX_ERROR_CACHE_TOPIC = "error5xxCredentialRetryInSec";
47+
public static final String UNKNOWN_ERROR_CACHE_TOPIC = "errorUnknownCredentialRetryInSec";
48+
private static final int MINIMUM_ERROR_CACHE_IN_SEC = 10;
49+
private static final int MAXIMUM_ERROR_CACHE_IN_SEC = 42_900;
50+
private int cloud4xxErrorCache;
51+
private int cloud5xxErrorCache;
52+
private int unknownErrorCache;
53+
4454
private final AuthorizationHandler authZHandler;
4555
private final CredentialRequestHandler credentialRequestHandler;
4656

@@ -57,24 +67,77 @@ public TokenExchangeService(Topics topics,
5767
AuthorizationHandler authZHandler, DeviceConfiguration deviceConfiguration) {
5868
super(topics);
5969
port = Coerce.toInt(config.lookup(CONFIGURATION_CONFIG_KEY, PORT_TOPIC).dflt(DEFAULT_PORT));
70+
deviceConfiguration.getIotRoleAlias().subscribe((why, newv) -> {
71+
iotRoleAlias = Coerce.toString(newv);
72+
});
73+
74+
this.authZHandler = authZHandler;
75+
this.credentialRequestHandler = credentialRequestHandler;
76+
77+
cloud4xxErrorCache = validateErrorCacheConfig(Coerce.toInt(config.findOrDefault(
78+
CredentialRequestHandler.DEFAULT_CLOUD_4XX_ERROR_CACHE_IN_SEC, CONFIGURATION_CONFIG_KEY,
79+
CLOUD_4XX_ERROR_CACHE_TOPIC)), CLOUD_4XX_ERROR_CACHE_TOPIC,
80+
CredentialRequestHandler.DEFAULT_CLOUD_4XX_ERROR_CACHE_IN_SEC);
81+
cloud5xxErrorCache = validateErrorCacheConfig(Coerce.toInt(config.findOrDefault(
82+
CredentialRequestHandler.DEFAULT_CLOUD_5XX_ERROR_CACHE_IN_SEC, CONFIGURATION_CONFIG_KEY,
83+
CLOUD_5XX_ERROR_CACHE_TOPIC)), CLOUD_5XX_ERROR_CACHE_TOPIC,
84+
CredentialRequestHandler.DEFAULT_CLOUD_5XX_ERROR_CACHE_IN_SEC);
85+
unknownErrorCache = validateErrorCacheConfig(Coerce.toInt(config.findOrDefault(
86+
CredentialRequestHandler.DEFAULT_UNKNOWN_ERROR_CACHE_IN_SEC, CONFIGURATION_CONFIG_KEY,
87+
UNKNOWN_ERROR_CACHE_TOPIC)), UNKNOWN_ERROR_CACHE_TOPIC,
88+
CredentialRequestHandler.DEFAULT_UNKNOWN_ERROR_CACHE_IN_SEC);
89+
90+
credentialRequestHandler.configureCacheSettings(cloud4xxErrorCache, cloud5xxErrorCache, unknownErrorCache);
91+
6092
config.subscribe((why, node) -> {
93+
logger.atDebug("tes-config-change").kv("node", node).kv("what", why).log();
94+
if (why.equals(WhatHappened.timestampUpdated)) {
95+
return;
96+
}
97+
if (node != null && (node.childOf(CLOUD_4XX_ERROR_CACHE_TOPIC)
98+
|| node.childOf(CLOUD_5XX_ERROR_CACHE_TOPIC)
99+
|| node.childOf(UNKNOWN_ERROR_CACHE_TOPIC))) {
100+
101+
int newCloud4xxErrorCache = validateErrorCacheConfig(Coerce.toInt(config.findOrDefault(
102+
CredentialRequestHandler.DEFAULT_CLOUD_4XX_ERROR_CACHE_IN_SEC, CONFIGURATION_CONFIG_KEY,
103+
CLOUD_4XX_ERROR_CACHE_TOPIC)), CLOUD_4XX_ERROR_CACHE_TOPIC,
104+
CredentialRequestHandler.DEFAULT_CLOUD_4XX_ERROR_CACHE_IN_SEC);
105+
int newCloud5xxErrorCache = validateErrorCacheConfig(Coerce.toInt(config.findOrDefault(
106+
CredentialRequestHandler.DEFAULT_CLOUD_5XX_ERROR_CACHE_IN_SEC, CONFIGURATION_CONFIG_KEY,
107+
CLOUD_5XX_ERROR_CACHE_TOPIC)), CLOUD_5XX_ERROR_CACHE_TOPIC,
108+
CredentialRequestHandler.DEFAULT_CLOUD_5XX_ERROR_CACHE_IN_SEC);
109+
int newUnknownErrorCache = validateErrorCacheConfig(Coerce.toInt(config.findOrDefault(
110+
CredentialRequestHandler.DEFAULT_UNKNOWN_ERROR_CACHE_IN_SEC, CONFIGURATION_CONFIG_KEY,
111+
UNKNOWN_ERROR_CACHE_TOPIC)), UNKNOWN_ERROR_CACHE_TOPIC,
112+
CredentialRequestHandler.DEFAULT_UNKNOWN_ERROR_CACHE_IN_SEC);
113+
114+
if (cloud4xxErrorCache != newCloud4xxErrorCache
115+
|| cloud5xxErrorCache != newCloud5xxErrorCache
116+
|| unknownErrorCache != newUnknownErrorCache) {
117+
118+
cloud4xxErrorCache = newCloud4xxErrorCache;
119+
cloud5xxErrorCache = newCloud5xxErrorCache;
120+
unknownErrorCache = newUnknownErrorCache;
121+
122+
credentialRequestHandler.configureCacheSettings(
123+
newCloud4xxErrorCache, newCloud5xxErrorCache, newUnknownErrorCache);
124+
125+
logger.atInfo("tes-error-cache-config-change")
126+
.kv("node", node).kv("why", why)
127+
.log("TES error cache configuration updated");
128+
}
129+
}
61130
if (node != null && node.childOf(PORT_TOPIC)) {
62-
logger.atDebug("tes-config-change").kv("node", node).kv("why", why).log();
63131
port = Coerce.toInt(node);
64132
Topic activePortTopic = config.lookup(CONFIGURATION_CONFIG_KEY, ACTIVE_PORT_TOPIC);
133+
65134
if (port != Coerce.toInt(activePortTopic)) {
66-
logger.atInfo("tes-config-change").kv(PORT_TOPIC, port).kv("node", node).kv("why", why)
135+
logger.atInfo("tes-port-config-change").kv(PORT_TOPIC, port).kv("node", node).kv("why", why)
67136
.log("Restarting TES server due to port config change");
68137
requestRestart();
69138
}
70139
}
71140
});
72-
deviceConfiguration.getIotRoleAlias().subscribe((why, newv) -> {
73-
iotRoleAlias = Coerce.toString(newv);
74-
});
75-
76-
this.authZHandler = authZHandler;
77-
this.credentialRequestHandler = credentialRequestHandler;
78141
}
79142

80143
@Override
@@ -130,6 +193,16 @@ private void validateConfig() {
130193
}
131194
}
132195

196+
private int validateErrorCacheConfig(int newCacheValue, String topic, int defaultCacheValue) {
197+
if (newCacheValue < MINIMUM_ERROR_CACHE_IN_SEC || newCacheValue > MAXIMUM_ERROR_CACHE_IN_SEC) {
198+
logger.atError()
199+
.log("Error cache value must be between {} and {} seconds, setting {} to default value {}",
200+
MINIMUM_ERROR_CACHE_IN_SEC, MAXIMUM_ERROR_CACHE_IN_SEC, topic, defaultCacheValue);
201+
return defaultCacheValue;
202+
}
203+
return newCacheValue;
204+
}
205+
133206
@Override
134207
public AwsCredentials resolveCredentials() {
135208
return credentialRequestHandler.getAwsCredentials();

src/test/java/com/aws/greengrass/tes/CredentialRequestHandlerTest.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,10 @@
4747
import java.util.concurrent.Executors;
4848
import java.util.concurrent.Future;
4949

50-
import static com.aws.greengrass.tes.CredentialRequestHandler.CLOUD_4XX_ERROR_CACHE_IN_MIN;
51-
import static com.aws.greengrass.tes.CredentialRequestHandler.CLOUD_5XX_ERROR_CACHE_IN_MIN;
52-
import static com.aws.greengrass.tes.CredentialRequestHandler.TIME_BEFORE_CACHE_EXPIRE_IN_MIN;
53-
import static com.aws.greengrass.tes.CredentialRequestHandler.UNKNOWN_ERROR_CACHE_IN_MIN;
50+
import static com.aws.greengrass.tes.CredentialRequestHandler.DEFAULT_CLOUD_4XX_ERROR_CACHE_IN_SEC;
51+
import static com.aws.greengrass.tes.CredentialRequestHandler.DEFAULT_CLOUD_5XX_ERROR_CACHE_IN_SEC;
52+
import static com.aws.greengrass.tes.CredentialRequestHandler.DEFAULT_UNKNOWN_ERROR_CACHE_IN_SEC;
53+
import static com.aws.greengrass.tes.CredentialRequestHandler.DEFAULT_TIME_BEFORE_CACHE_EXPIRE_IN_SEC;
5454
import static com.aws.greengrass.testcommons.testutilities.ExceptionLogProtector.ignoreExceptionOfType;
5555
import static org.hamcrest.MatcherAssert.assertThat;
5656
import static org.hamcrest.Matchers.containsString;
@@ -320,15 +320,15 @@ void GIVEN_credential_handler_WHEN_called_handle_THEN_caches_creds() throws Exce
320320
verify(mockStream, times(1)).write(expectedResponse);
321321

322322
// Expiry time in recent future won't give error but there wil be no caching
323-
expirationTime = Instant.now().plus(Duration.ofMinutes(TIME_BEFORE_CACHE_EXPIRE_IN_MIN - 1));
323+
expirationTime = Instant.now().plus(Duration.ofSeconds(DEFAULT_TIME_BEFORE_CACHE_EXPIRE_IN_SEC - 60));
324324
responseStr = String.format(RESPONSE_STR, expirationTime.toString());
325325
mockResponse = new IotCloudResponse(responseStr.getBytes(StandardCharsets.UTF_8), 200);
326326
when(mockCloudHelper.sendHttpRequest(any(), any(), any(), any(), any())).thenReturn(mockResponse);
327327
handler.handle(mockExchange);
328328
verify(mockCloudHelper, times(2)).sendHttpRequest(any(), any(), any(), any(), any());
329329

330330
// Expiry time in future will result in credentials being cached
331-
expirationTime = Instant.now().plus(Duration.ofMinutes(TIME_BEFORE_CACHE_EXPIRE_IN_MIN + 1));
331+
expirationTime = Instant.now().plus(Duration.ofSeconds(DEFAULT_TIME_BEFORE_CACHE_EXPIRE_IN_SEC + 60));
332332
responseStr = String.format(RESPONSE_STR, expirationTime.toString());
333333
mockResponse = new IotCloudResponse(responseStr.getBytes(StandardCharsets.UTF_8), 200);
334334
when(mockCloudHelper.sendHttpRequest(any(), any(), any(), any(), any())).thenReturn(mockResponse);
@@ -401,7 +401,7 @@ void GIVEN_4xx_response_code_WHEN_called_handle_THEN_expire_in_2_minutes() throw
401401
String.format("TES responded with status code: %d. Caching response. ", expectedStatus).getBytes();
402402
// expire in 2 minutes
403403
handler.getAwsCredentials();
404-
Instant expirationTime = Instant.now().plus(Duration.ofMinutes(CLOUD_4XX_ERROR_CACHE_IN_MIN));
404+
Instant expirationTime = Instant.now().plus(Duration.ofSeconds(DEFAULT_CLOUD_4XX_ERROR_CACHE_IN_SEC));
405405
Clock mockClock = Clock.fixed(expirationTime, ZoneId.of("UTC"));
406406
handler.setClock(mockClock);
407407
handler.getAwsCredentials();
@@ -425,7 +425,7 @@ void GIVEN_5xx_response_code_WHEN_called_handle_THEN_expire_in_1_minute() throws
425425
String.format("TES responded with status code: %d. Caching response. ", expectedStatus).getBytes();
426426
// expire in 1 minute
427427
handler.getAwsCredentials();
428-
Instant expirationTime = Instant.now().plus(Duration.ofMinutes(CLOUD_5XX_ERROR_CACHE_IN_MIN));
428+
Instant expirationTime = Instant.now().plus(Duration.ofSeconds(DEFAULT_CLOUD_5XX_ERROR_CACHE_IN_SEC));
429429
Clock mockClock = Clock.fixed(expirationTime, ZoneId.of("UTC"));
430430
handler.setClock(mockClock);
431431
handler.getAwsCredentials();
@@ -449,7 +449,7 @@ void GIVEN_unknown_error_response_code_WHEN_called_handle_THEN_expire_in_5_minut
449449
String.format("TES responded with status code: %d. Caching response. ", expectedStatus).getBytes();
450450
// expire in 5 minutes
451451
handler.getAwsCredentials();
452-
Instant expirationTime = Instant.now().plus(Duration.ofMinutes(UNKNOWN_ERROR_CACHE_IN_MIN));
452+
Instant expirationTime = Instant.now().plus(Duration.ofSeconds(DEFAULT_UNKNOWN_ERROR_CACHE_IN_SEC));
453453
Clock mockClock = Clock.fixed(expirationTime, ZoneId.of("UTC"));
454454
handler.setClock(mockClock);
455455
handler.getAwsCredentials();

0 commit comments

Comments
 (0)