Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
006bdf2
Starting refactor
jonathan-buttner Aug 19, 2025
2a7ff64
Merge branch 'main' of github.com:elastic/elasticsearch into ml-remov…
jonathan-buttner Aug 28, 2025
fca2543
Not sending enabled field across nodes
jonathan-buttner Aug 28, 2025
80d4224
Adding transport version change
jonathan-buttner Aug 29, 2025
2ca11e2
Merge branch 'main' of github.com:elastic/elasticsearch into ml-remov…
jonathan-buttner Aug 29, 2025
d8b841f
Removing minimum settings changes
jonathan-buttner Aug 29, 2025
97a3730
Addressing feedback
jonathan-buttner Sep 2, 2025
cee82cf
Merge branch 'main' of github.com:elastic/elasticsearch into ml-remov…
jonathan-buttner Sep 2, 2025
dc7f53b
Rejecting rate limit field
jonathan-buttner Sep 2, 2025
df0a224
Ensure parsing from index does not throw
jonathan-buttner Sep 2, 2025
314b4ba
Merge branch 'main' of github.com:elastic/elasticsearch into ml-remov…
jonathan-buttner Sep 2, 2025
910d317
Adding test to throw when rate limit is in request
jonathan-buttner Sep 2, 2025
6fd0c6d
Merge branch 'main' into ml-remove-eis-rl
jonathan-buttner Sep 2, 2025
48e88b3
Returning validation exception for rate limit field
jonathan-buttner Sep 3, 2025
a1d75fe
Merge branch 'main' of github.com:elastic/elasticsearch into ml-remov…
jonathan-buttner Sep 3, 2025
04d5699
Merge branch 'ml-remove-eis-rl' of github.com:jonathan-buttner/elasti…
jonathan-buttner Sep 3, 2025
dfd9154
Merge branch 'main' of github.com:elastic/elasticsearch into ml-remov…
jonathan-buttner Sep 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ public static ElasticInferenceServiceCompletionServiceSettings fromMap(Map<Strin
ValidationException validationException = new ValidationException();

String modelId = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException);
RateLimitSettings.disabledRateLimiting(map);

if (validationException.validationErrors().isEmpty() == false) {
throw validationException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,6 @@ public static ElasticInferenceServiceDenseTextEmbeddingsServiceSettings fromMap(
ValidationException validationException = new ValidationException();

String modelId = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException);
RateLimitSettings.disabledRateLimiting(map);

SimilarityMeasure similarity = extractSimilarity(map, ModelConfigurations.SERVICE_SETTINGS, validationException);
Integer dims = removeAsType(map, DIMENSIONS, Integer.class);
Integer maxInputTokens = removeAsType(map, MAX_INPUT_TOKENS, Integer.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ public static ElasticInferenceServiceRerankServiceSettings fromMap(Map<String, O
ValidationException validationException = new ValidationException();

String modelId = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException);
RateLimitSettings.disabledRateLimiting(map);

return new ElasticInferenceServiceRerankServiceSettings(modelId);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,6 @@ public static ElasticInferenceServiceSparseEmbeddingsServiceSettings fromMap(Map
validationException
);

RateLimitSettings.disabledRateLimiting(map);

if (validationException.validationErrors().isEmpty() == false) {
throw validationException;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import java.util.concurrent.TimeUnit;

import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalPositiveLong;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMap;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrDefaultEmpty;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwIfNotEmptyMap;

Expand All @@ -53,12 +52,6 @@ public static RateLimitSettings of(
return requestsPerMinute == null ? defaultValue : new RateLimitSettings(requestsPerMinute);
}

public static RateLimitSettings disabledRateLimiting(Map<String, Object> map) {
removeFromMap(map, FIELD_NAME);

return DISABLED_INSTANCE;
}

public static Map<String, SettingsConfiguration> toSettingsConfigurationWithDescription(
String description,
EnumSet<TaskType> supportedTaskTypes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import java.util.Map;

import static org.elasticsearch.xpack.inference.services.elasticsearch.ElserModelsTests.randomElserModel;
import static org.hamcrest.Matchers.anEmptyMap;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.sameInstance;

Expand Down Expand Up @@ -58,7 +57,7 @@ public void testFromMap() {
assertThat(serviceSettings, is(new ElasticInferenceServiceSparseEmbeddingsServiceSettings(modelId, null)));
}

public void testFromMap_RemovesRateLimitSettings() {
public void testFromMap_DoesNotRemoveRateLimitField() {
var modelId = "my-model-id";
var map = new HashMap<String, Object>(
Map.of(
Expand All @@ -70,7 +69,7 @@ public void testFromMap_RemovesRateLimitSettings() {
);
var serviceSettings = ElasticInferenceServiceSparseEmbeddingsServiceSettings.fromMap(map);

assertThat(map, anEmptyMap());
assertThat(map, is(Map.of(RateLimitSettings.FIELD_NAME, Map.of(RateLimitSettings.REQUESTS_PER_MINUTE_FIELD, 100))));
assertThat(serviceSettings, is(new ElasticInferenceServiceSparseEmbeddingsServiceSettings(modelId, null)));
assertThat(serviceSettings.rateLimitSettings(), sameInstance(RateLimitSettings.DISABLED_INSTANCE));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
import org.elasticsearch.xpack.inference.services.elastic.response.ElasticInferenceServiceAuthorizationResponseEntity;
import org.elasticsearch.xpack.inference.services.elastic.sparseembeddings.ElasticInferenceServiceSparseEmbeddingsModel;
import org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels;
import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;
import org.hamcrest.MatcherAssert;
import org.hamcrest.Matchers;
import org.junit.After;
Expand Down Expand Up @@ -197,6 +198,27 @@ public void testParseRequestConfig_ThrowsWhenAnExtraKeyExistsInServiceSettingsMa
}
}

public void testParseRequestConfig_ThrowsWhenRateLimitFieldExistsInServiceSettingsMap() throws IOException {
try (var service = createServiceWithMockSender()) {
Map<String, Object> serviceSettings = new HashMap<>(
Map.of(
ServiceFields.MODEL_ID,
ElserModels.ELSER_V2_MODEL,
RateLimitSettings.FIELD_NAME,
new HashMap<>(Map.of(RateLimitSettings.REQUESTS_PER_MINUTE_FIELD, 100))
)
);

var config = getRequestConfigMap(serviceSettings, Map.of(), Map.of());

var failureListener = getModelListenerForException(
ElasticsearchStatusException.class,
"Configuration contains settings [{rate_limit={requests_per_minute=100}}] unknown to the [elastic] service"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any way to have this message be more specific? It's not really accurate to say that rate_limit or requests_per_minute are unknown, they're just disabled in specific cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea 👍

);
service.parseRequestConfig("id", TaskType.SPARSE_EMBEDDING, config, failureListener);
}
}

public void testParseRequestConfig_ThrowsWhenAnExtraKeyExistsInTaskSettingsMap() throws IOException {
try (var service = createServiceWithMockSender()) {
var taskSettings = Map.of("extra_key", (Object) "value");
Expand Down Expand Up @@ -297,6 +319,39 @@ public void testParsePersistedConfigWithSecrets_DoesNotThrowWhenAnExtraKeyExists
}
}

public void testParsePersistedConfigWithSecrets_DoesNotThrowWhenRateLimitFieldExistsInServiceSettings() throws IOException {
try (var service = createServiceWithMockSender()) {
Map<String, Object> serviceSettingsMap = new HashMap<>(
Map.of(
ServiceFields.MODEL_ID,
ElserModels.ELSER_V2_MODEL,
RateLimitSettings.FIELD_NAME,
new HashMap<>(Map.of(RateLimitSettings.REQUESTS_PER_MINUTE_FIELD, 100))
)
);

var persistedConfig = getPersistedConfigMap(serviceSettingsMap, Map.of(), Map.of());

var model = service.parsePersistedConfigWithSecrets(
"id",
TaskType.SPARSE_EMBEDDING,
persistedConfig.config(),
persistedConfig.secrets()
);

assertThat(model, instanceOf(ElasticInferenceServiceSparseEmbeddingsModel.class));

var parsedModel = (ElasticInferenceServiceSparseEmbeddingsModel) model;
assertThat(parsedModel.getServiceSettings().modelId(), is(ElserModels.ELSER_V2_MODEL));
assertThat(parsedModel.getTaskSettings(), is(EmptyTaskSettings.INSTANCE));
assertThat(parsedModel.getSecretSettings(), is(EmptySecretSettings.INSTANCE));
assertThat(
serviceSettingsMap,
is(Map.of(RateLimitSettings.FIELD_NAME, Map.of(RateLimitSettings.REQUESTS_PER_MINUTE_FIELD, 100)))
);
}
}

public void testParsePersistedConfigWithSecrets_DoesNotThrowWhenAnExtraKeyExistsInTaskSettings() throws IOException {
try (var service = createServiceWithMockSender()) {
var taskSettings = Map.of("extra_key", (Object) "value");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import java.util.HashMap;
import java.util.Map;

import static org.hamcrest.Matchers.anEmptyMap;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.sameInstance;
Expand Down Expand Up @@ -59,7 +58,7 @@ public void testFromMap() {
assertThat(serviceSettings.rateLimitSettings(), sameInstance(RateLimitSettings.DISABLED_INSTANCE));
}

public void testFromMap_RemovesRateLimitingField() {
public void testFromMap_DoesNotRemoveRateLimitField() {
var modelId = "my-model-id";

var map = new HashMap<String, Object>(
Expand All @@ -72,7 +71,7 @@ public void testFromMap_RemovesRateLimitingField() {
);
var serviceSettings = ElasticInferenceServiceRerankServiceSettings.fromMap(map);

assertThat(map, anEmptyMap());
assertThat(map, is(Map.of(RateLimitSettings.FIELD_NAME, Map.of(RateLimitSettings.REQUESTS_PER_MINUTE_FIELD, 100))));
assertThat(serviceSettings, is(new ElasticInferenceServiceRerankServiceSettings(modelId)));
assertThat(serviceSettings.rateLimitSettings(), sameInstance(RateLimitSettings.DISABLED_INSTANCE));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import java.util.HashMap;
import java.util.Map;

import static org.hamcrest.Matchers.anEmptyMap;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.sameInstance;

Expand Down Expand Up @@ -74,7 +73,7 @@ public void testFromMap_Request_WithAllSettings() {
assertThat(serviceSettings.maxInputTokens(), is(maxInputTokens));
}

public void testFromMap_Request_WithAllSettings_RemovesRateLimitField() {
public void testFromMap_Request_WithAllSettings_DoesNotRemoveRateLimitField() {
var modelId = "my-dense-model-id";
var similarity = SimilarityMeasure.COSINE;
var dimensions = 384;
Expand All @@ -96,7 +95,7 @@ public void testFromMap_Request_WithAllSettings_RemovesRateLimitField() {
);
var serviceSettings = ElasticInferenceServiceDenseTextEmbeddingsServiceSettings.fromMap(map);

assertThat(map, anEmptyMap());
assertThat(map, is(Map.of(RateLimitSettings.FIELD_NAME, Map.of(RateLimitSettings.REQUESTS_PER_MINUTE_FIELD, 100))));
assertThat(serviceSettings.modelId(), is(modelId));
assertThat(serviceSettings.similarity(), is(similarity));
assertThat(serviceSettings.dimensions(), is(dimensions));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import java.util.HashMap;
import java.util.Map;

import static org.hamcrest.Matchers.anEmptyMap;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.sameInstance;

Expand Down Expand Up @@ -54,7 +53,7 @@ public void testFromMap() {
assertThat(serviceSettings.rateLimitSettings(), sameInstance(RateLimitSettings.DISABLED_INSTANCE));
}

public void testFromMap_RemovesRateLimitingField() {
public void testFromMap_DoesNotRemoveRateLimitField() {
var modelId = "my-model-id";

var map = new HashMap<String, Object>(
Expand All @@ -65,10 +64,11 @@ public void testFromMap_RemovesRateLimitingField() {
new HashMap<>(Map.of(RateLimitSettings.REQUESTS_PER_MINUTE_FIELD, 100))
)
);

var serviceSettings = ElasticInferenceServiceRerankServiceSettings.fromMap(map);

assertThat(serviceSettings, is(new ElasticInferenceServiceRerankServiceSettings(modelId)));
assertThat(map, anEmptyMap());
assertThat(map, is(Map.of(RateLimitSettings.FIELD_NAME, Map.of(RateLimitSettings.REQUESTS_PER_MINUTE_FIELD, 100))));
assertThat(serviceSettings.rateLimitSettings(), sameInstance(RateLimitSettings.DISABLED_INSTANCE));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@
import java.util.Map;
import java.util.concurrent.TimeUnit;

import static org.hamcrest.Matchers.anEmptyMap;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.sameInstance;

public class RateLimitSettingsTests extends AbstractBWCWireSerializationTestCase<RateLimitSettings> {

Expand Down Expand Up @@ -110,18 +108,6 @@ public void testToXContent() throws IOException {
{"rate_limit":{"requests_per_minute":100}}"""));
}

public void testDisableRateLimiting() {
Map<String, Object> settings = new HashMap<>(
Map.of(RateLimitSettings.FIELD_NAME, new HashMap<>(Map.of(RateLimitSettings.REQUESTS_PER_MINUTE_FIELD, 100)))
);
var res = RateLimitSettings.disabledRateLimiting(settings);

assertThat(res, is(new RateLimitSettings(1, TimeUnit.MINUTES, false)));
assertThat(res, sameInstance(RateLimitSettings.DISABLED_INSTANCE));
assertFalse(res.isEnabled());
assertThat(settings, anEmptyMap());
}

public void testToXContent_WhenDisabled() throws IOException {
var settings = new RateLimitSettings(1, TimeUnit.MINUTES, false);

Expand Down