From 7ab928a90bc4977094fefe72b96ec5e7b275b57d Mon Sep 17 00:00:00 2001
From: Melissa Alvarez <melissa.alvarez@elastic.co>
Date: Thu, 30 Oct 2025 11:36:58 -0600
Subject: [PATCH] [ML][Inference Endpoints] Anthropic endpoint creation: ensure
 max tokens parameter is passed as expected (#241212)

## Summary

Related to this [issue](https://github.com/elastic/kibana/issues/241142)
and this [fix](https://github.com/elastic/kibana/pull/241188).

This PR:
- updates the inference creation endpoint to ensure max_tokens are sent
correctly for Anthropic
- ensures that max_tokens is added back into the providerConfig when
viewing the endpoint so that it shows up correctly in the form

This is a temporary workaround for anthropic max_tokens handling until
the services endpoint is updated to reflect the correct structure.
Anthropic is unique in that it requires max_tokens to be sent as part of
the task_settings instead of the usual service_settings.
Until the services endpoint is updated to reflect that, there is no way
for the form UI to know where to put max_tokens. This can be removed
once that update is made.

### Checklist

Check the PR satisfies following conditions.

Reviewers should verify this PR satisfies this list as well.

- [ ] Any text added follows [EUI's writing
guidelines](https://elastic.github.io/eui/#/guidelines/writing), uses
sentence case text and includes [i18n
support](https://github.com/elastic/kibana/blob/main/src/platform/packages/shared/kbn-i18n/README.md)
- [ ]
[Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html)
was added for features that require explanation or tutorials
- [ ] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
- [ ] If a plugin configuration key changed, check if it needs to be
allowlisted in the cloud and added to the [docker
list](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker)
- [ ] This was checked for breaking HTTP API changes, and any breaking
changes have been approved by the breaking-change committee. The
`release_note:breaking` label should be applied in these situations.
- [ ] [Flaky Test
Runner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was
used on any tests changed
- [ ] The PR description includes the appropriate Release Notes section,
and the correct `release_note:*` label is applied per the
[guidelines](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)
- [ ] Review the [backport
guidelines](https://docs.google.com/document/d/1VyN5k91e5OVumlc0Gb9RPa3h1ewuPE705nRtioPiTvY/edit?usp=sharing)
and apply applicable `backport:*` labels.

(cherry picked from commit 847f9de184d2918f261148ee62350e22bf7e079b)

# Conflicts:
#	x-pack/solutions/search/plugins/search_inference_endpoints/public/components/edit_inference_endpoints/edit_inference_flyout.tsx
---
 .../inference_endpoint/server/routes/index.ts       | 13 +++++++++++++
 .../edit_inference_flyout.tsx                       | 11 ++++++++++-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/x-pack/platform/plugins/shared/inference_endpoint/server/routes/index.ts b/x-pack/platform/plugins/shared/inference_endpoint/server/routes/index.ts
index a4be2c671f4b7..18f5c1cbfb319 100644
--- a/x-pack/platform/plugins/shared/inference_endpoint/server/routes/index.ts
+++ b/x-pack/platform/plugins/shared/inference_endpoint/server/routes/index.ts
@@ -106,6 +106,18 @@ export const getInferenceServicesRoute = (
 
           const { config, secrets } = request.body;
 
+          // NOTE: This is a temporary workaround for anthropic max_tokens handling until the services endpoint is updated to reflect the correct structure.
+          // Anthropic is unique in that it requires max_tokens to be sent as part of the task_settings instead of the usual service_settings.
+          // Until the services endpoint is updated to reflect that, there is no way for the form UI to know where to put max_tokens. This can be removed once that update is made.
+          let taskSettings;
+          if (config?.provider === 'anthropic' && config?.providerConfig?.max_tokens) {
+            taskSettings = {
+              max_tokens: config.providerConfig.max_tokens,
+            };
+            // This field is unknown to the anthropic service config, so we remove it
+            delete config.providerConfig.max_tokens;
+          }
+
           const serviceSettings = {
             ...unflattenObject(config?.providerConfig ?? {}),
             ...unflattenObject(secrets?.providerSecrets ?? {}),
@@ -117,6 +129,7 @@ export const getInferenceServicesRoute = (
             inference_config: {
               service: config?.provider,
               service_settings: serviceSettings,
+              ...(taskSettings ? { task_settings: taskSettings } : {}),
             },
           });
 
diff --git a/x-pack/solutions/search/plugins/search_inference_endpoints/public/components/edit_inference_endpoints/edit_inference_flyout.tsx b/x-pack/solutions/search/plugins/search_inference_endpoints/public/components/edit_inference_endpoints/edit_inference_flyout.tsx
index bdf879a7a65c4..5f31452046297 100644
--- a/x-pack/solutions/search/plugins/search_inference_endpoints/public/components/edit_inference_endpoints/edit_inference_flyout.tsx
+++ b/x-pack/solutions/search/plugins/search_inference_endpoints/public/components/edit_inference_endpoints/edit_inference_flyout.tsx
@@ -10,6 +10,7 @@ import React, { useCallback } from 'react';
 import { InferenceEndpoint } from '@kbn/inference-endpoint-ui-common';
 import { flattenObject } from '@kbn/object-utils';
 import { InferenceInferenceEndpointInfo } from '@elastic/elasticsearch/lib/api/types';
+import { ServiceProviderKeys } from '@kbn/inference-endpoint-ui-common';
 import { useKibana } from '../../hooks/use_kibana';
 import { useQueryInferenceEndpoints } from '../../hooks/use_inference_endpoints';
 
@@ -37,7 +38,15 @@ export const EditInferenceFlyout: React.FC<EditInterfaceFlyoutProps> = ({
       inferenceId: selectedInferenceEndpoint.inference_id,
       taskType: selectedInferenceEndpoint.task_type,
       provider: selectedInferenceEndpoint.service,
-      providerConfig: flattenObject(selectedInferenceEndpoint.service_settings),
+      providerConfig: {
+        ...flattenObject(selectedInferenceEndpoint.service_settings),
+        // NOTE: The below is a workaround for anthropic max_tokens handling.
+        // Anthropic is unique in that it requires max_tokens to be stored as part of the task_settings instead of the usual service_settings - which we populate the providerConfig from.
+        ...(selectedInferenceEndpoint.task_settings?.max_tokens &&
+        selectedInferenceEndpoint.service === ServiceProviderKeys.anthropic
+          ? { max_tokens: selectedInferenceEndpoint.task_settings?.max_tokens }
+          : {}),
+      },
     },
     secrets: {
       providerSecrets: {},