Skip to content

Commit 731ec58

Browse files
Adding delay for model registry call
1 parent 469874c commit 731ec58

File tree

7 files changed

+40
-12
lines changed

7 files changed

+40
-12
lines changed

x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceRevokeDefaultEndpointsIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ private ElasticInferenceService createElasticInferenceService() {
280280
return new ElasticInferenceService(
281281
senderFactory,
282282
createWithEmptySettings(threadPool),
283-
new ElasticInferenceServiceComponents(gatewayUrl),
283+
ElasticInferenceServiceComponents.withNoRevokeDelay(gatewayUrl),
284284
modelRegistry,
285285
new ElasticInferenceServiceAuthorizationHandler(gatewayUrl, threadPool)
286286
);

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ public Collection<?> createComponents(PluginServices services) {
276276
ElasticInferenceServiceSettings inferenceServiceSettings = new ElasticInferenceServiceSettings(settings);
277277
String elasticInferenceUrl = inferenceServiceSettings.getElasticInferenceServiceUrl();
278278

279-
var elasticInferenceServiceComponentsInstance = new ElasticInferenceServiceComponents(elasticInferenceUrl);
279+
var elasticInferenceServiceComponentsInstance = ElasticInferenceServiceComponents.withDefaultRevokeDelay(elasticInferenceUrl);
280280
elasticInferenceServiceComponents.set(elasticInferenceServiceComponentsInstance);
281281

282282
var authorizationHandler = new ElasticInferenceServiceAuthorizationHandler(

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -280,9 +280,19 @@ private void handleRevokedDefaultConfigs(Set<String> authorizedDefaultModelIds)
280280
authorizationCompletedLatch.countDown();
281281
});
282282

283-
getServiceComponents().threadPool()
284-
.executor(UTILITY_THREAD_POOL_NAME)
285-
.execute(() -> modelRegistry.removeDefaultConfigs(unauthorizedDefaultInferenceEndpointIds, deleteInferenceEndpointsListener));
283+
Runnable removeFromRegistry = () -> {
284+
logger.debug("Synchronizing default inference endpoints");
285+
modelRegistry.removeDefaultConfigs(unauthorizedDefaultInferenceEndpointIds, deleteInferenceEndpointsListener);
286+
};
287+
288+
var delay = elasticInferenceServiceComponents.revokeAuthorizationDelay();
289+
if (delay == null) {
290+
getServiceComponents().threadPool().executor(UTILITY_THREAD_POOL_NAME).execute(removeFromRegistry);
291+
} else {
292+
getServiceComponents().threadPool()
293+
.schedule(removeFromRegistry, delay, getServiceComponents().threadPool().executor(UTILITY_THREAD_POOL_NAME));
294+
}
295+
286296
}
287297

288298
@Override

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceComponents.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,23 @@
88
package org.elasticsearch.xpack.inference.services.elastic;
99

1010
import org.elasticsearch.core.Nullable;
11+
import org.elasticsearch.core.TimeValue;
1112

12-
public record ElasticInferenceServiceComponents(@Nullable String elasticInferenceServiceUrl) {}
13+
/**
14+
* @param elasticInferenceServiceUrl the upstream Elastic Inference Server's URL
15+
* @param revokeAuthorizationDelay Amount of time to wait before attempting to revoke authorization to certain model ids.
16+
* null indicates that there should be no delay
17+
*/
18+
public record ElasticInferenceServiceComponents(@Nullable String elasticInferenceServiceUrl, @Nullable TimeValue revokeAuthorizationDelay) {
19+
private static final TimeValue DEFAULT_REVOKE_AUTHORIZATION_DELAY = TimeValue.timeValueMinutes(10);
20+
21+
public static final ElasticInferenceServiceComponents EMPTY_INSTANCE = new ElasticInferenceServiceComponents(null, null);
22+
23+
public static ElasticInferenceServiceComponents withNoRevokeDelay(String elasticInferenceServiceUrl) {
24+
return new ElasticInferenceServiceComponents(elasticInferenceServiceUrl, null);
25+
}
26+
27+
public static ElasticInferenceServiceComponents withDefaultRevokeDelay(String elasticInferenceServiceUrl) {
28+
return new ElasticInferenceServiceComponents(elasticInferenceServiceUrl, DEFAULT_REVOKE_AUTHORIZATION_DELAY);
29+
}
30+
}

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSparseEmbeddingsModelTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public static ElasticInferenceServiceSparseEmbeddingsModel createModel(String ur
2626
new ElasticInferenceServiceSparseEmbeddingsServiceSettings(modelId, maxInputTokens, null),
2727
EmptyTaskSettings.INSTANCE,
2828
EmptySecretSettings.INSTANCE,
29-
new ElasticInferenceServiceComponents(url)
29+
ElasticInferenceServiceComponents.withNoRevokeDelay(url)
3030
);
3131
}
3232
}

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,7 +1046,7 @@ private void testUnifiedStreamError(int responseCode, String responseJson, Strin
10461046
new ElasticInferenceServiceCompletionServiceSettings("model_id", new RateLimitSettings(100)),
10471047
EmptyTaskSettings.INSTANCE,
10481048
EmptySecretSettings.INSTANCE,
1049-
new ElasticInferenceServiceComponents(eisGatewayUrl)
1049+
ElasticInferenceServiceComponents.withNoRevokeDelay(eisGatewayUrl)
10501050
);
10511051
PlainActionFuture<InferenceServiceResults> listener = new PlainActionFuture<>();
10521052
service.unifiedCompletionInfer(
@@ -1099,7 +1099,7 @@ private ElasticInferenceService createServiceWithMockSender(ElasticInferenceServ
10991099
return new ElasticInferenceService(
11001100
mock(HttpRequestSender.Factory.class),
11011101
createWithEmptySettings(threadPool),
1102-
new ElasticInferenceServiceComponents(null),
1102+
ElasticInferenceServiceComponents.EMPTY_INSTANCE,
11031103
mockModelRegistry(),
11041104
mockAuthHandler
11051105
);
@@ -1128,7 +1128,7 @@ private ElasticInferenceService createService(
11281128
return new ElasticInferenceService(
11291129
senderFactory,
11301130
createWithEmptySettings(threadPool),
1131-
new ElasticInferenceServiceComponents(gatewayUrl),
1131+
ElasticInferenceServiceComponents.withNoRevokeDelay(gatewayUrl),
11321132
mockModelRegistry(),
11331133
mockAuthHandler
11341134
);
@@ -1138,7 +1138,7 @@ private ElasticInferenceService createServiceWithAuthHandler(HttpRequestSender.F
11381138
return new ElasticInferenceService(
11391139
senderFactory,
11401140
createWithEmptySettings(threadPool),
1141-
new ElasticInferenceServiceComponents(eisGatewayUrl),
1141+
ElasticInferenceServiceComponents.withNoRevokeDelay(eisGatewayUrl),
11421142
mockModelRegistry(),
11431143
new ElasticInferenceServiceAuthorizationHandler(eisGatewayUrl, threadPool)
11441144
);

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/completion/ElasticInferenceServiceCompletionModelTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ public void testOverridingModelId() {
2929
new ElasticInferenceServiceCompletionServiceSettings("model_id", new RateLimitSettings(100)),
3030
EmptyTaskSettings.INSTANCE,
3131
EmptySecretSettings.INSTANCE,
32-
new ElasticInferenceServiceComponents("url")
32+
ElasticInferenceServiceComponents.withNoRevokeDelay("url")
3333
);
3434

3535
var request = new UnifiedCompletionRequest(

0 commit comments

Comments
 (0)