From 7ab928a90bc4977094fefe72b96ec5e7b275b57d Mon Sep 17 00:00:00 2001 From: Melissa Alvarez Date: Thu, 30 Oct 2025 11:36:58 -0600 Subject: [PATCH] [ML][Inference Endpoints] Anthropic endpoint creation: ensure max tokens parameter is passed as expected (#241212) ## Summary Related to this [issue](https://github.com/elastic/kibana/issues/241142) and this [fix](https://github.com/elastic/kibana/pull/241188). This PR: - updates the inference creation endpoint to ensure max_tokens are sent correctly for Anthropic - ensures that max_tokens is added back into the providerConfig when viewing the endpoint so that it shows up correctly in the form This is a temporary workaround for anthropic max_tokens handling until the services endpoint is updated to reflect the correct structure. Anthropic is unique in that it requires max_tokens to be sent as part of the task_settings instead of the usual service_settings. Until the services endpoint is updated to reflect that, there is no way for the form UI to know where to put max_tokens. This can be removed once that update is made. ### Checklist Check the PR satisfies following conditions. Reviewers should verify this PR satisfies this list as well. - [ ] Any text added follows [EUI's writing guidelines](https://elastic.github.io/eui/#/guidelines/writing), uses sentence case text and includes [i18n support](https://github.com/elastic/kibana/blob/main/src/platform/packages/shared/kbn-i18n/README.md) - [ ] [Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html) was added for features that require explanation or tutorials - [ ] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios - [ ] If a plugin configuration key changed, check if it needs to be allowlisted in the cloud and added to the [docker list](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker) - [ ] This was checked for breaking HTTP API changes, and any breaking changes have been approved by the breaking-change committee. The `release_note:breaking` label should be applied in these situations. - [ ] [Flaky Test Runner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was used on any tests changed - [ ] The PR description includes the appropriate Release Notes section, and the correct `release_note:*` label is applied per the [guidelines](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process) - [ ] Review the [backport guidelines](https://docs.google.com/document/d/1VyN5k91e5OVumlc0Gb9RPa3h1ewuPE705nRtioPiTvY/edit?usp=sharing) and apply applicable `backport:*` labels. (cherry picked from commit 847f9de184d2918f261148ee62350e22bf7e079b) # Conflicts: # x-pack/solutions/search/plugins/search_inference_endpoints/public/components/edit_inference_endpoints/edit_inference_flyout.tsx --- .../inference_endpoint/server/routes/index.ts | 13 +++++++++++++ .../edit_inference_flyout.tsx | 11 ++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/x-pack/platform/plugins/shared/inference_endpoint/server/routes/index.ts b/x-pack/platform/plugins/shared/inference_endpoint/server/routes/index.ts index a4be2c671f4b7..18f5c1cbfb319 100644 --- a/x-pack/platform/plugins/shared/inference_endpoint/server/routes/index.ts +++ b/x-pack/platform/plugins/shared/inference_endpoint/server/routes/index.ts @@ -106,6 +106,18 @@ export const getInferenceServicesRoute = ( const { config, secrets } = request.body; + // NOTE: This is a temporary workaround for anthropic max_tokens handling until the services endpoint is updated to reflect the correct structure. + // Anthropic is unique in that it requires max_tokens to be sent as part of the task_settings instead of the usual service_settings. + // Until the services endpoint is updated to reflect that, there is no way for the form UI to know where to put max_tokens. This can be removed once that update is made. + let taskSettings; + if (config?.provider === 'anthropic' && config?.providerConfig?.max_tokens) { + taskSettings = { + max_tokens: config.providerConfig.max_tokens, + }; + // This field is unknown to the anthropic service config, so we remove it + delete config.providerConfig.max_tokens; + } + const serviceSettings = { ...unflattenObject(config?.providerConfig ?? {}), ...unflattenObject(secrets?.providerSecrets ?? {}), @@ -117,6 +129,7 @@ export const getInferenceServicesRoute = ( inference_config: { service: config?.provider, service_settings: serviceSettings, + ...(taskSettings ? { task_settings: taskSettings } : {}), }, }); diff --git a/x-pack/solutions/search/plugins/search_inference_endpoints/public/components/edit_inference_endpoints/edit_inference_flyout.tsx b/x-pack/solutions/search/plugins/search_inference_endpoints/public/components/edit_inference_endpoints/edit_inference_flyout.tsx index bdf879a7a65c4..5f31452046297 100644 --- a/x-pack/solutions/search/plugins/search_inference_endpoints/public/components/edit_inference_endpoints/edit_inference_flyout.tsx +++ b/x-pack/solutions/search/plugins/search_inference_endpoints/public/components/edit_inference_endpoints/edit_inference_flyout.tsx @@ -10,6 +10,7 @@ import React, { useCallback } from 'react'; import { InferenceEndpoint } from '@kbn/inference-endpoint-ui-common'; import { flattenObject } from '@kbn/object-utils'; import { InferenceInferenceEndpointInfo } from '@elastic/elasticsearch/lib/api/types'; +import { ServiceProviderKeys } from '@kbn/inference-endpoint-ui-common'; import { useKibana } from '../../hooks/use_kibana'; import { useQueryInferenceEndpoints } from '../../hooks/use_inference_endpoints'; @@ -37,7 +38,15 @@ export const EditInferenceFlyout: React.FC = ({ inferenceId: selectedInferenceEndpoint.inference_id, taskType: selectedInferenceEndpoint.task_type, provider: selectedInferenceEndpoint.service, - providerConfig: flattenObject(selectedInferenceEndpoint.service_settings), + providerConfig: { + ...flattenObject(selectedInferenceEndpoint.service_settings), + // NOTE: The below is a workaround for anthropic max_tokens handling. + // Anthropic is unique in that it requires max_tokens to be stored as part of the task_settings instead of the usual service_settings - which we populate the providerConfig from. + ...(selectedInferenceEndpoint.task_settings?.max_tokens && + selectedInferenceEndpoint.service === ServiceProviderKeys.anthropic + ? { max_tokens: selectedInferenceEndpoint.task_settings?.max_tokens } + : {}), + }, }, secrets: { providerSecrets: {},