Skip to content

Commit e12fa47

Browse files
[9.2] [ML][Inference Endpoints] Anthropic endpoint creation: ensure max tokens parameter is passed as expected (#241212) (#241343)
# Backport This will backport the following commits from `main` to `9.2`: - [[ML][Inference Endpoints] Anthropic endpoint creation: ensure max tokens parameter is passed as expected (#241212)](#241212) <!--- Backport version: 9.6.6 --> ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sorenlouv/backport) <!--BACKPORT [{"author":{"name":"Melissa Alvarez","email":"[email protected]"},"sourceCommit":{"committedDate":"2025-10-30T17:36:58Z","message":"[ML][Inference Endpoints] Anthropic endpoint creation: ensure max tokens parameter is passed as expected (#241212)\n\n## Summary\n\nRelated to this [issue](https://github.com/elastic/kibana/issues/241142)\nand this [fix](https://github.com/elastic/kibana/pull/241188).\n\nThis PR:\n- updates the inference creation endpoint to ensure max_tokens are sent\ncorrectly for Anthropic\n- ensures that max_tokens is added back into the providerConfig when\nviewing the endpoint so that it shows up correctly in the form\n\nThis is a temporary workaround for anthropic max_tokens handling until\nthe services endpoint is updated to reflect the correct structure.\nAnthropic is unique in that it requires max_tokens to be sent as part of\nthe task_settings instead of the usual service_settings.\nUntil the services endpoint is updated to reflect that, there is no way\nfor the form UI to know where to put max_tokens. This can be removed\nonce that update is made.\n\n\n### Checklist\n\nCheck the PR satisfies following conditions. \n\nReviewers should verify this PR satisfies this list as well.\n\n- [ ] Any text added follows [EUI's writing\nguidelines](https://elastic.github.io/eui/#/guidelines/writing), uses\nsentence case text and includes [i18n\nsupport](https://github.com/elastic/kibana/blob/main/src/platform/packages/shared/kbn-i18n/README.md)\n- [ ]\n[Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html)\nwas added for features that require explanation or tutorials\n- [ ] [Unit or functional\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\nwere updated or added to match the most common scenarios\n- [ ] If a plugin configuration key changed, check if it needs to be\nallowlisted in the cloud and added to the [docker\nlist](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker)\n- [ ] This was checked for breaking HTTP API changes, and any breaking\nchanges have been approved by the breaking-change committee. The\n`release_note:breaking` label should be applied in these situations.\n- [ ] [Flaky Test\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was\nused on any tests changed\n- [ ] The PR description includes the appropriate Release Notes section,\nand the correct `release_note:*` label is applied per the\n[guidelines](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)\n- [ ] Review the [backport\nguidelines](https://docs.google.com/document/d/1VyN5k91e5OVumlc0Gb9RPa3h1ewuPE705nRtioPiTvY/edit?usp=sharing)\nand apply applicable `backport:*` labels.","sha":"847f9de184d2918f261148ee62350e22bf7e079b","branchLabelMapping":{"^v9.3.0$":"main","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:fix",":ml","backport:version","Feature:Inference UI","v9.3.0","v8.19.7","v9.1.7","v9.2.1"],"title":"[ML][Inference Endpoints] Anthropic endpoint creation: ensure max tokens parameter is passed as expected","number":241212,"url":"https://github.com/elastic/kibana/pull/241212","mergeCommit":{"message":"[ML][Inference Endpoints] Anthropic endpoint creation: ensure max tokens parameter is passed as expected (#241212)\n\n## Summary\n\nRelated to this [issue](https://github.com/elastic/kibana/issues/241142)\nand this [fix](https://github.com/elastic/kibana/pull/241188).\n\nThis PR:\n- updates the inference creation endpoint to ensure max_tokens are sent\ncorrectly for Anthropic\n- ensures that max_tokens is added back into the providerConfig when\nviewing the endpoint so that it shows up correctly in the form\n\nThis is a temporary workaround for anthropic max_tokens handling until\nthe services endpoint is updated to reflect the correct structure.\nAnthropic is unique in that it requires max_tokens to be sent as part of\nthe task_settings instead of the usual service_settings.\nUntil the services endpoint is updated to reflect that, there is no way\nfor the form UI to know where to put max_tokens. This can be removed\nonce that update is made.\n\n\n### Checklist\n\nCheck the PR satisfies following conditions. \n\nReviewers should verify this PR satisfies this list as well.\n\n- [ ] Any text added follows [EUI's writing\nguidelines](https://elastic.github.io/eui/#/guidelines/writing), uses\nsentence case text and includes [i18n\nsupport](https://github.com/elastic/kibana/blob/main/src/platform/packages/shared/kbn-i18n/README.md)\n- [ ]\n[Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html)\nwas added for features that require explanation or tutorials\n- [ ] [Unit or functional\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\nwere updated or added to match the most common scenarios\n- [ ] If a plugin configuration key changed, check if it needs to be\nallowlisted in the cloud and added to the [docker\nlist](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker)\n- [ ] This was checked for breaking HTTP API changes, and any breaking\nchanges have been approved by the breaking-change committee. The\n`release_note:breaking` label should be applied in these situations.\n- [ ] [Flaky Test\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was\nused on any tests changed\n- [ ] The PR description includes the appropriate Release Notes section,\nand the correct `release_note:*` label is applied per the\n[guidelines](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)\n- [ ] Review the [backport\nguidelines](https://docs.google.com/document/d/1VyN5k91e5OVumlc0Gb9RPa3h1ewuPE705nRtioPiTvY/edit?usp=sharing)\nand apply applicable `backport:*` labels.","sha":"847f9de184d2918f261148ee62350e22bf7e079b"}},"sourceBranch":"main","suggestedTargetBranches":["8.19","9.1","9.2"],"targetPullRequestStates":[{"branch":"main","label":"v9.3.0","branchLabelMappingKey":"^v9.3.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/241212","number":241212,"mergeCommit":{"message":"[ML][Inference Endpoints] Anthropic endpoint creation: ensure max tokens parameter is passed as expected (#241212)\n\n## Summary\n\nRelated to this [issue](https://github.com/elastic/kibana/issues/241142)\nand this [fix](https://github.com/elastic/kibana/pull/241188).\n\nThis PR:\n- updates the inference creation endpoint to ensure max_tokens are sent\ncorrectly for Anthropic\n- ensures that max_tokens is added back into the providerConfig when\nviewing the endpoint so that it shows up correctly in the form\n\nThis is a temporary workaround for anthropic max_tokens handling until\nthe services endpoint is updated to reflect the correct structure.\nAnthropic is unique in that it requires max_tokens to be sent as part of\nthe task_settings instead of the usual service_settings.\nUntil the services endpoint is updated to reflect that, there is no way\nfor the form UI to know where to put max_tokens. This can be removed\nonce that update is made.\n\n\n### Checklist\n\nCheck the PR satisfies following conditions. \n\nReviewers should verify this PR satisfies this list as well.\n\n- [ ] Any text added follows [EUI's writing\nguidelines](https://elastic.github.io/eui/#/guidelines/writing), uses\nsentence case text and includes [i18n\nsupport](https://github.com/elastic/kibana/blob/main/src/platform/packages/shared/kbn-i18n/README.md)\n- [ ]\n[Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html)\nwas added for features that require explanation or tutorials\n- [ ] [Unit or functional\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\nwere updated or added to match the most common scenarios\n- [ ] If a plugin configuration key changed, check if it needs to be\nallowlisted in the cloud and added to the [docker\nlist](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker)\n- [ ] This was checked for breaking HTTP API changes, and any breaking\nchanges have been approved by the breaking-change committee. The\n`release_note:breaking` label should be applied in these situations.\n- [ ] [Flaky Test\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was\nused on any tests changed\n- [ ] The PR description includes the appropriate Release Notes section,\nand the correct `release_note:*` label is applied per the\n[guidelines](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)\n- [ ] Review the [backport\nguidelines](https://docs.google.com/document/d/1VyN5k91e5OVumlc0Gb9RPa3h1ewuPE705nRtioPiTvY/edit?usp=sharing)\nand apply applicable `backport:*` labels.","sha":"847f9de184d2918f261148ee62350e22bf7e079b"}},{"branch":"8.19","label":"v8.19.7","branchLabelMappingKey":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"9.1","label":"v9.1.7","branchLabelMappingKey":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"9.2","label":"v9.2.1","branchLabelMappingKey":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"}]}] BACKPORT--> Co-authored-by: Melissa Alvarez <[email protected]>
1 parent d28d2de commit e12fa47

File tree

2 files changed

+23
-1
lines changed
  • x-pack
    • platform/plugins/shared/inference_endpoint/server/routes
    • solutions/search/plugins/search_inference_endpoints/public/components/edit_inference_endpoints

2 files changed

+23
-1
lines changed

x-pack/platform/plugins/shared/inference_endpoint/server/routes/index.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,18 @@ export const getInferenceServicesRoute = (
106106

107107
const { config, secrets } = request.body;
108108

109+
// NOTE: This is a temporary workaround for anthropic max_tokens handling until the services endpoint is updated to reflect the correct structure.
110+
// Anthropic is unique in that it requires max_tokens to be sent as part of the task_settings instead of the usual service_settings.
111+
// Until the services endpoint is updated to reflect that, there is no way for the form UI to know where to put max_tokens. This can be removed once that update is made.
112+
let taskSettings;
113+
if (config?.provider === 'anthropic' && config?.providerConfig?.max_tokens) {
114+
taskSettings = {
115+
max_tokens: config.providerConfig.max_tokens,
116+
};
117+
// This field is unknown to the anthropic service config, so we remove it
118+
delete config.providerConfig.max_tokens;
119+
}
120+
109121
const serviceSettings = {
110122
...unflattenObject(config?.providerConfig ?? {}),
111123
...unflattenObject(secrets?.providerSecrets ?? {}),
@@ -117,6 +129,7 @@ export const getInferenceServicesRoute = (
117129
inference_config: {
118130
service: config?.provider,
119131
service_settings: serviceSettings,
132+
...(taskSettings ? { task_settings: taskSettings } : {}),
120133
},
121134
});
122135

x-pack/solutions/search/plugins/search_inference_endpoints/public/components/edit_inference_endpoints/edit_inference_flyout.tsx

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import React, { useCallback } from 'react';
1010
import type { InferenceEndpoint } from '@kbn/inference-endpoint-ui-common';
1111
import { flattenObject } from '@kbn/object-utils';
1212
import type { InferenceInferenceEndpointInfo } from '@elastic/elasticsearch/lib/api/types';
13+
import { ServiceProviderKeys } from '@kbn/inference-endpoint-ui-common';
1314
import { useKibana } from '../../hooks/use_kibana';
1415
import { useQueryInferenceEndpoints } from '../../hooks/use_inference_endpoints';
1516

@@ -38,7 +39,15 @@ export const EditInferenceFlyout: React.FC<EditInterfaceFlyoutProps> = ({
3839
inferenceId: selectedInferenceEndpoint.inference_id,
3940
taskType: selectedInferenceEndpoint.task_type,
4041
provider: selectedInferenceEndpoint.service,
41-
providerConfig: flattenObject(selectedInferenceEndpoint.service_settings),
42+
providerConfig: {
43+
...flattenObject(selectedInferenceEndpoint.service_settings),
44+
// NOTE: The below is a workaround for anthropic max_tokens handling.
45+
// Anthropic is unique in that it requires max_tokens to be stored as part of the task_settings instead of the usual service_settings - which we populate the providerConfig from.
46+
...(selectedInferenceEndpoint.task_settings?.max_tokens &&
47+
selectedInferenceEndpoint.service === ServiceProviderKeys.anthropic
48+
? { max_tokens: selectedInferenceEndpoint.task_settings?.max_tokens }
49+
: {}),
50+
},
4251
},
4352
secrets: {
4453
providerSecrets: {},

0 commit comments

Comments
 (0)