feat: optimizing cache usage

mrh997 · mrh997 · commit 1c561c72dd3f · 2025-09-30T13:40:47.000+08:00
diff --git a/components/model/ark/chatmodel.go b/components/model/ark/chatmodel.go
@@ -161,8 +161,14 @@ type CacheConfig struct {
 }
 
 type SessionCacheConfig struct {
-	// EnableCache specifies whether to enable session cache.
-	// If enabled, the model will cache each conversation and reuse it for subsequent requests.
+	// EnableCache controls whether session caching is active.
+	// When enabled, the model caches both input and response for each conversation turn,
+	// making them available for later retrieval via the API.
+	// The ResponseID is stored in the output message and can be retrieved using GetResponseID.
+	// In a multi-turn conversation, the ARK ChatModel implementation automatically scans all input messages
+	// to find the most recent message with a ResponseID, then passes this ID to the model
+	// as the previous ResponseID for context continuity.
+	// This message and all previous messages are automatically clipped before being sent to the model.
 	EnableCache bool `json:"enable_cache"`
 
 	// TTL specifies the survival time of cached data in seconds, with a maximum of 3 * 86400(3 days).
@@ -172,9 +178,9 @@ type SessionCacheConfig struct {
 type APIType string
 
 const (
-	// To learn more about ContextAPI, see https://www.volcengine.com/docs/82379/1528789
+	// ContextAPI is defined from  https://www.volcengine.com/docs/82379/1528789
 	ContextAPI APIType = "context_api"
-	// To learn more about ResponsesAPI, see https://www.volcengine.com/docs/82379/1569618
+	// ResponsesAPI is defined from https://www.volcengine.com/docs/82379/1569618
 	ResponsesAPI APIType = "responses_api"
 )
 
diff --git a/components/model/ark/examples/prefixcache/responsesapi/prefix_cache.go b/components/model/ark/examples/prefixcache/responsesapi/prefix_cache.go
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2025 CloudWeGo Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"log"
+	"os"
+
+	arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
+
+	"github.com/cloudwego/eino/schema"
+
+	"github.com/cloudwego/eino-ext/components/model/ark"
+)
+
+func main() {
+	ctx := context.Background()
+
+	// Get ARK_API_KEY and ARK_MODEL_ID: https://www.volcengine.com/docs/82379/1399008
+	chatModel, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{
+		APIKey: os.Getenv("ARK_API_KEY"),
+		Model:  os.Getenv("ARK_MODEL_ID"),
+	})
+	if err != nil {
+		log.Fatalf("NewChatModel failed, err=%v", err)
+	}
+
+	thinking := &arkModel.Thinking{
+		Type: arkModel.ThinkingTypeDisabled,
+	}
+	cacheOpt := &ark.CacheOption{
+		APIType: ark.ResponsesAPI,
+		SessionCache: &ark.SessionCacheConfig{
+			EnableCache: true,
+			TTL:         86400,
+		},
+	}
+
+	outMsg, err := chatModel.Generate(ctx, []*schema.Message{
+		schema.UserMessage("my name is megumin"),
+	}, ark.WithThinking(thinking),
+		ark.WithCache(cacheOpt))
+	if err != nil {
+		log.Fatalf("Generate failed, err=%v", err)
+	}
+
+	respID, ok := ark.GetResponseID(outMsg)
+	if !ok {
+		log.Fatalf("not found response id in message")
+	}
+
+	msg, err := chatModel.Generate(ctx, []*schema.Message{
+		schema.UserMessage("what is my name?"),
+	}, ark.WithThinking(thinking),
+		ark.WithCache(&ark.CacheOption{
+			APIType:          ark.ResponsesAPI,
+			PrefixResponseID: &respID,
+		}),
+	)
+	if err != nil {
+		log.Fatalf("Generate failed, err=%v", err)
+	}
+
+	log.Printf("\ngenerate output: \n")
+	log.Printf("  request_id: %s\n", ark.GetArkRequestID(msg))
+	respBody, _ := json.MarshalIndent(msg, "  ", "  ")
+	log.Printf("  body: %s\n", string(respBody))
+}
diff --git a/components/model/ark/examples/sessioncache/responsesapi/session_cache.go b/components/model/ark/examples/sessioncache/responsesapi/session_cache.go
@@ -20,10 +20,8 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
-	"io"
 	"log"
 	"os"
-	"time"
 
 	arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
 
@@ -43,13 +41,9 @@ func main() {
 		log.Fatalf("NewChatModel failed, err=%v", err)
 	}
 
-	instructions := []*schema.Message{
-		schema.SystemMessage("Your name is superman"),
-	}
 	thinking := &arkModel.Thinking{
 		Type: arkModel.ThinkingTypeDisabled,
 	}
-
 	cacheOpt := &ark.CacheOption{
 		APIType: ark.ResponsesAPI,
 		SessionCache: &ark.SessionCacheConfig{
@@ -58,79 +52,28 @@ func main() {
 		},
 	}
 
-	msg, err := chatModel.Generate(ctx, instructions,
-		ark.WithThinking(thinking),
-		ark.WithCache(cacheOpt))
-	if err != nil {
-		log.Fatalf("Generate failed, err=%v", err)
-	}
-
-	firstContextID, ok := ark.GetContextID(msg)
-	if !ok {
-		log.Fatalf("GetContextID failed, err=%v", err)
-	}
-
-	cacheOpt.ContextID = &firstContextID
-
-	<-time.After(500 * time.Millisecond)
-
-	msg, err = chatModel.Generate(ctx, []*schema.Message{
-		{
-			Role:    schema.User,
-			Content: "What's your name?",
-		},
-	},
-		ark.WithThinking(thinking),
-		ark.WithCache(cacheOpt))
-	if err != nil {
-		log.Fatalf("Generate failed, err=%v", err)
+	useMsgs := []*schema.Message{
+		schema.UserMessage("Your name is superman"),
+		schema.UserMessage("What's your name?"),
+		schema.UserMessage("What do I ask you last time?"),
 	}
 
-	fmt.Printf("\ngenerate output: \n")
-	fmt.Printf("  request_id: %s\n", ark.GetArkRequestID(msg))
-	respBody, _ := json.MarshalIndent(msg, "  ", "  ")
-	fmt.Printf("  body: %s\n", string(respBody))
+	var input []*schema.Message
+	for _, msg := range useMsgs {
+		input = append(input, msg)
 
-	secondContextID, ok := ark.GetContextID(msg)
-	if !ok {
-		log.Fatalf("GetContextID failed, err=%v", err)
-	}
-
-	cacheOpt.ContextID = &secondContextID
-
-	outStreamReader, err := chatModel.Stream(ctx, []*schema.Message{
-		{
-			Role:    schema.User,
-			Content: "What do I ask you last time?",
-		},
-	},
-		ark.WithThinking(thinking),
-		ark.WithCache(cacheOpt))
-	if err != nil {
-		log.Fatalf("Stream failed, err=%v", err)
-	}
-
-	fmt.Println("\ntypewriter output:")
-	var msgs []*schema.Message
-	for {
-		item, e := outStreamReader.Recv()
-		if e == io.EOF {
-			break
-		}
-		if e != nil {
-			log.Fatal(e)
+		output, err := chatModel.Generate(ctx, input,
+			ark.WithThinking(thinking),
+			ark.WithCache(cacheOpt))
+		if err != nil {
+			log.Fatalf("Generate failed, err=%v", err)
 		}
 
-		fmt.Print(item.Content)
-		msgs = append(msgs, item)
-	}
+		fmt.Printf("generate output: \n")
+		fmt.Printf("  request_id: %s\n", ark.GetArkRequestID(output))
+		respBody, _ := json.MarshalIndent(output, "  ", "  ")
+		fmt.Printf("  body: %s\n\n", string(respBody))
 
-	msg, err = schema.ConcatMessages(msgs)
-	if err != nil {
-		log.Fatalf("ConcatMessages failed, err=%v", err)
+		input = append(input, output)
 	}
-	fmt.Print("\n\nstream output: \n")
-	fmt.Printf("  request_id: %s\n", ark.GetArkRequestID(msg))
-	respBody, _ = json.MarshalIndent(msg, "  ", "  ")
-	fmt.Printf("  body: %s\n", string(respBody))
 }
diff --git a/components/model/ark/message_extra.go b/components/model/ark/message_extra.go
@@ -29,6 +29,7 @@ const (
 	keyOfModelName        = "ark-model-name"
 	videoURLFPS           = "ark-model-video-url-fps"
 	keyOfContextID        = "ark-context-id"
+	keyOfResponseID       = "ark-response-id"
 	keyOfServiceTier      = "ark-service-tier"
 )
 
@@ -97,22 +98,31 @@ func setModelName(msg *schema.Message, name string) {
 	setMsgExtra(msg, keyOfModelName, arkModelName(name))
 }
 
-// GetContextID returns the conversation context ID of the given message.
+// Deprecated: Use GetResponseID instead.
+// GetContextID returns the conversation context ID from the message.
 // Note:
-//   - Only the first chunk returns the context ID.
-//   - It is only available for ResponsesAPI.
+//   - Available only for ResponsesAPI responses.
+//   - For streaming responses, only the first chunk contains the ResponseID.
 func GetContextID(msg *schema.Message) (string, bool) {
-	if msg == nil {
-		return "", false
-	}
-	contextID, ok := getMsgExtraValue[string](msg, keyOfContextID)
-	return contextID, ok
+	return getMsgExtraValue[string](msg, keyOfContextID)
 }
 
 func setContextID(msg *schema.Message, contextID string) {
 	setMsgExtra(msg, keyOfContextID, contextID)
 }
 
+// GetResponseID returns the response ID from the message.
+// Note:
+//   - Available only for ResponsesAPI responses.
+//   - For streaming responses, only the first chunk contains the ResponseID.
+func GetResponseID(msg *schema.Message) (string, bool) {
+	return getMsgExtraValue[string](msg, keyOfResponseID)
+}
+
+func setResponseID(msg *schema.Message, responseID string) {
+	setMsgExtra(msg, keyOfResponseID, responseID)
+}
+
 func getMsgExtraValue[T any](msg *schema.Message, key string) (T, bool) {
 	if msg == nil {
 		var t T
diff --git a/components/model/ark/option.go b/components/model/ark/option.go
@@ -66,12 +66,23 @@ type CacheOption struct {
 	// Required.
 	APIType APIType
 
-	// ContextID specifies the ID of the previous conversation.
-	// If APIType is ResponsesAPI and this is the first time the conversation uses the cache,
-	// ContextID should be nil and get the id of this round through [GetContextID].
+	/// ContextID is the ID returned by ContextAPI.
+	// Available only for ContextAPI.
+	// NOTE: ContextID will no longer be compatible with response id return by the ResponsesAPI in the future.
+	// To use the prefix cache capability of ResponsesAPI, use PrefixResponseID.
+	// To use the session cache capability of ResponsesAPI, use SessionCache.
+	// Optional.
 	ContextID *string
 
+	// PrefixResponseID is the response ID returned by ResponsesAPI.
+	// Available only for ResponsesAPI.
+	// The Response corresponding to this ID must have been cached.
+	// It is useful when a fixed prefix cache is required in the multi-turn conversation.
+	// Optional.
+	PrefixResponseID *string
+
 	// SessionCache is the configuration of ResponsesAPI session cache.
+	// Optional.
 	SessionCache *SessionCacheConfig
 }
 
diff --git a/components/model/ark/responses_api.go b/components/model/ark/responses_api.go
diff --git a/components/model/ark/responses_api_test.go b/components/model/ark/responses_api_test.go