elastic · alvarezmelissa87 · Oct 30, 2025 · Oct 30, 2025
@@ -106,6 +106,18 @@ export const getInferenceServicesRoute = (
 
           const { config, secrets } = request.body;
 
+          // NOTE: This is a temporary workaround for anthropic max_tokens handling until the services endpoint is updated to reflect the correct structure.
+          // Anthropic is unique in that it requires max_tokens to be sent as part of the task_settings instead of the usual service_settings.
+          // Until the services endpoint is updated to reflect that, there is no way for the form UI to know where to put max_tokens. This can be removed once that update is made.
+          let taskSettings;
+          if (config?.provider === 'anthropic' && config?.providerConfig?.max_tokens) {
+            taskSettings = {
+              max_tokens: config.providerConfig.max_tokens,
+            };
+            // This field is unknown to the anthropic service config, so we remove it
+            delete config.providerConfig.max_tokens;
+          }
+
           const serviceSettings = {
             ...unflattenObject(config?.providerConfig ?? {}),
             ...unflattenObject(secrets?.providerSecrets ?? {}),
@@ -117,6 +129,7 @@ export const getInferenceServicesRoute = (
             inference_config: {
               service: config?.provider,
               service_settings: serviceSettings,
+              ...(taskSettings ? { task_settings: taskSettings } : {}),
             },
           });
 

@@ -10,6 +10,7 @@ import React, { useCallback } from 'react';
 import { InferenceEndpoint } from '@kbn/inference-endpoint-ui-common';
 import { flattenObject } from '@kbn/object-utils';
 import { InferenceInferenceEndpointInfo } from '@elastic/elasticsearch/lib/api/types';
+import { ServiceProviderKeys } from '@kbn/inference-endpoint-ui-common';
 import { useKibana } from '../../hooks/use_kibana';
 import { useQueryInferenceEndpoints } from '../../hooks/use_inference_endpoints';
 
@@ -37,7 +38,15 @@ export const EditInferenceFlyout: React.FC<EditInterfaceFlyoutProps> = ({
       inferenceId: selectedInferenceEndpoint.inference_id,
       taskType: selectedInferenceEndpoint.task_type,
       provider: selectedInferenceEndpoint.service,
-      providerConfig: flattenObject(selectedInferenceEndpoint.service_settings),
+      providerConfig: {
+        ...flattenObject(selectedInferenceEndpoint.service_settings),
+        // NOTE: The below is a workaround for anthropic max_tokens handling.
+        // Anthropic is unique in that it requires max_tokens to be stored as part of the task_settings instead of the usual service_settings - which we populate the providerConfig from.
+        ...(selectedInferenceEndpoint.task_settings?.max_tokens &&
+        selectedInferenceEndpoint.service === ServiceProviderKeys.anthropic
+          ? { max_tokens: selectedInferenceEndpoint.task_settings?.max_tokens }
+          : {}),
+      },
     },
     secrets: {
       providerSecrets: {},