Skip to content

Commit 1c561c7

Browse files
committed
feat: optimizing cache usage
1 parent b7a3496 commit 1c561c7

File tree

7 files changed

+257
-115
lines changed

7 files changed

+257
-115
lines changed

components/model/ark/chatmodel.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,14 @@ type CacheConfig struct {
161161
}
162162

163163
type SessionCacheConfig struct {
164-
// EnableCache specifies whether to enable session cache.
165-
// If enabled, the model will cache each conversation and reuse it for subsequent requests.
164+
// EnableCache controls whether session caching is active.
165+
// When enabled, the model caches both input and response for each conversation turn,
166+
// making them available for later retrieval via the API.
167+
// The ResponseID is stored in the output message and can be retrieved using GetResponseID.
168+
// In a multi-turn conversation, the ARK ChatModel implementation automatically scans all input messages
169+
// to find the most recent message with a ResponseID, then passes this ID to the model
170+
// as the previous ResponseID for context continuity.
171+
// This message and all previous messages are automatically clipped before being sent to the model.
166172
EnableCache bool `json:"enable_cache"`
167173

168174
// TTL specifies the survival time of cached data in seconds, with a maximum of 3 * 86400(3 days).
@@ -172,9 +178,9 @@ type SessionCacheConfig struct {
172178
type APIType string
173179

174180
const (
175-
// To learn more about ContextAPI, see https://www.volcengine.com/docs/82379/1528789
181+
// ContextAPI is defined from https://www.volcengine.com/docs/82379/1528789
176182
ContextAPI APIType = "context_api"
177-
// To learn more about ResponsesAPI, see https://www.volcengine.com/docs/82379/1569618
183+
// ResponsesAPI is defined from https://www.volcengine.com/docs/82379/1569618
178184
ResponsesAPI APIType = "responses_api"
179185
)
180186

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
* Copyright 2025 CloudWeGo Authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package main
18+
19+
import (
20+
"context"
21+
"encoding/json"
22+
"log"
23+
"os"
24+
25+
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
26+
27+
"github.com/cloudwego/eino/schema"
28+
29+
"github.com/cloudwego/eino-ext/components/model/ark"
30+
)
31+
32+
func main() {
33+
ctx := context.Background()
34+
35+
// Get ARK_API_KEY and ARK_MODEL_ID: https://www.volcengine.com/docs/82379/1399008
36+
chatModel, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{
37+
APIKey: os.Getenv("ARK_API_KEY"),
38+
Model: os.Getenv("ARK_MODEL_ID"),
39+
})
40+
if err != nil {
41+
log.Fatalf("NewChatModel failed, err=%v", err)
42+
}
43+
44+
thinking := &arkModel.Thinking{
45+
Type: arkModel.ThinkingTypeDisabled,
46+
}
47+
cacheOpt := &ark.CacheOption{
48+
APIType: ark.ResponsesAPI,
49+
SessionCache: &ark.SessionCacheConfig{
50+
EnableCache: true,
51+
TTL: 86400,
52+
},
53+
}
54+
55+
outMsg, err := chatModel.Generate(ctx, []*schema.Message{
56+
schema.UserMessage("my name is megumin"),
57+
}, ark.WithThinking(thinking),
58+
ark.WithCache(cacheOpt))
59+
if err != nil {
60+
log.Fatalf("Generate failed, err=%v", err)
61+
}
62+
63+
respID, ok := ark.GetResponseID(outMsg)
64+
if !ok {
65+
log.Fatalf("not found response id in message")
66+
}
67+
68+
msg, err := chatModel.Generate(ctx, []*schema.Message{
69+
schema.UserMessage("what is my name?"),
70+
}, ark.WithThinking(thinking),
71+
ark.WithCache(&ark.CacheOption{
72+
APIType: ark.ResponsesAPI,
73+
PrefixResponseID: &respID,
74+
}),
75+
)
76+
if err != nil {
77+
log.Fatalf("Generate failed, err=%v", err)
78+
}
79+
80+
log.Printf("\ngenerate output: \n")
81+
log.Printf(" request_id: %s\n", ark.GetArkRequestID(msg))
82+
respBody, _ := json.MarshalIndent(msg, " ", " ")
83+
log.Printf(" body: %s\n", string(respBody))
84+
}

components/model/ark/examples/sessioncache/responsesapi/session_cache.go

Lines changed: 17 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,8 @@ import (
2020
"context"
2121
"encoding/json"
2222
"fmt"
23-
"io"
2423
"log"
2524
"os"
26-
"time"
2725

2826
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
2927

@@ -43,13 +41,9 @@ func main() {
4341
log.Fatalf("NewChatModel failed, err=%v", err)
4442
}
4543

46-
instructions := []*schema.Message{
47-
schema.SystemMessage("Your name is superman"),
48-
}
4944
thinking := &arkModel.Thinking{
5045
Type: arkModel.ThinkingTypeDisabled,
5146
}
52-
5347
cacheOpt := &ark.CacheOption{
5448
APIType: ark.ResponsesAPI,
5549
SessionCache: &ark.SessionCacheConfig{
@@ -58,79 +52,28 @@ func main() {
5852
},
5953
}
6054

61-
msg, err := chatModel.Generate(ctx, instructions,
62-
ark.WithThinking(thinking),
63-
ark.WithCache(cacheOpt))
64-
if err != nil {
65-
log.Fatalf("Generate failed, err=%v", err)
66-
}
67-
68-
firstContextID, ok := ark.GetContextID(msg)
69-
if !ok {
70-
log.Fatalf("GetContextID failed, err=%v", err)
71-
}
72-
73-
cacheOpt.ContextID = &firstContextID
74-
75-
<-time.After(500 * time.Millisecond)
76-
77-
msg, err = chatModel.Generate(ctx, []*schema.Message{
78-
{
79-
Role: schema.User,
80-
Content: "What's your name?",
81-
},
82-
},
83-
ark.WithThinking(thinking),
84-
ark.WithCache(cacheOpt))
85-
if err != nil {
86-
log.Fatalf("Generate failed, err=%v", err)
55+
useMsgs := []*schema.Message{
56+
schema.UserMessage("Your name is superman"),
57+
schema.UserMessage("What's your name?"),
58+
schema.UserMessage("What do I ask you last time?"),
8759
}
8860

89-
fmt.Printf("\ngenerate output: \n")
90-
fmt.Printf(" request_id: %s\n", ark.GetArkRequestID(msg))
91-
respBody, _ := json.MarshalIndent(msg, " ", " ")
92-
fmt.Printf(" body: %s\n", string(respBody))
61+
var input []*schema.Message
62+
for _, msg := range useMsgs {
63+
input = append(input, msg)
9364

94-
secondContextID, ok := ark.GetContextID(msg)
95-
if !ok {
96-
log.Fatalf("GetContextID failed, err=%v", err)
97-
}
98-
99-
cacheOpt.ContextID = &secondContextID
100-
101-
outStreamReader, err := chatModel.Stream(ctx, []*schema.Message{
102-
{
103-
Role: schema.User,
104-
Content: "What do I ask you last time?",
105-
},
106-
},
107-
ark.WithThinking(thinking),
108-
ark.WithCache(cacheOpt))
109-
if err != nil {
110-
log.Fatalf("Stream failed, err=%v", err)
111-
}
112-
113-
fmt.Println("\ntypewriter output:")
114-
var msgs []*schema.Message
115-
for {
116-
item, e := outStreamReader.Recv()
117-
if e == io.EOF {
118-
break
119-
}
120-
if e != nil {
121-
log.Fatal(e)
65+
output, err := chatModel.Generate(ctx, input,
66+
ark.WithThinking(thinking),
67+
ark.WithCache(cacheOpt))
68+
if err != nil {
69+
log.Fatalf("Generate failed, err=%v", err)
12270
}
12371

124-
fmt.Print(item.Content)
125-
msgs = append(msgs, item)
126-
}
72+
fmt.Printf("generate output: \n")
73+
fmt.Printf(" request_id: %s\n", ark.GetArkRequestID(output))
74+
respBody, _ := json.MarshalIndent(output, " ", " ")
75+
fmt.Printf(" body: %s\n\n", string(respBody))
12776

128-
msg, err = schema.ConcatMessages(msgs)
129-
if err != nil {
130-
log.Fatalf("ConcatMessages failed, err=%v", err)
77+
input = append(input, output)
13178
}
132-
fmt.Print("\n\nstream output: \n")
133-
fmt.Printf(" request_id: %s\n", ark.GetArkRequestID(msg))
134-
respBody, _ = json.MarshalIndent(msg, " ", " ")
135-
fmt.Printf(" body: %s\n", string(respBody))
13679
}

components/model/ark/message_extra.go

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ const (
2929
keyOfModelName = "ark-model-name"
3030
videoURLFPS = "ark-model-video-url-fps"
3131
keyOfContextID = "ark-context-id"
32+
keyOfResponseID = "ark-response-id"
3233
keyOfServiceTier = "ark-service-tier"
3334
)
3435

@@ -97,22 +98,31 @@ func setModelName(msg *schema.Message, name string) {
9798
setMsgExtra(msg, keyOfModelName, arkModelName(name))
9899
}
99100

100-
// GetContextID returns the conversation context ID of the given message.
101+
// Deprecated: Use GetResponseID instead.
102+
// GetContextID returns the conversation context ID from the message.
101103
// Note:
102-
// - Only the first chunk returns the context ID.
103-
// - It is only available for ResponsesAPI.
104+
// - Available only for ResponsesAPI responses.
105+
// - For streaming responses, only the first chunk contains the ResponseID.
104106
func GetContextID(msg *schema.Message) (string, bool) {
105-
if msg == nil {
106-
return "", false
107-
}
108-
contextID, ok := getMsgExtraValue[string](msg, keyOfContextID)
109-
return contextID, ok
107+
return getMsgExtraValue[string](msg, keyOfContextID)
110108
}
111109

112110
func setContextID(msg *schema.Message, contextID string) {
113111
setMsgExtra(msg, keyOfContextID, contextID)
114112
}
115113

114+
// GetResponseID returns the response ID from the message.
115+
// Note:
116+
// - Available only for ResponsesAPI responses.
117+
// - For streaming responses, only the first chunk contains the ResponseID.
118+
func GetResponseID(msg *schema.Message) (string, bool) {
119+
return getMsgExtraValue[string](msg, keyOfResponseID)
120+
}
121+
122+
func setResponseID(msg *schema.Message, responseID string) {
123+
setMsgExtra(msg, keyOfResponseID, responseID)
124+
}
125+
116126
func getMsgExtraValue[T any](msg *schema.Message, key string) (T, bool) {
117127
if msg == nil {
118128
var t T

components/model/ark/option.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,23 @@ type CacheOption struct {
6666
// Required.
6767
APIType APIType
6868

69-
// ContextID specifies the ID of the previous conversation.
70-
// If APIType is ResponsesAPI and this is the first time the conversation uses the cache,
71-
// ContextID should be nil and get the id of this round through [GetContextID].
69+
/// ContextID is the ID returned by ContextAPI.
70+
// Available only for ContextAPI.
71+
// NOTE: ContextID will no longer be compatible with response id return by the ResponsesAPI in the future.
72+
// To use the prefix cache capability of ResponsesAPI, use PrefixResponseID.
73+
// To use the session cache capability of ResponsesAPI, use SessionCache.
74+
// Optional.
7275
ContextID *string
7376

77+
// PrefixResponseID is the response ID returned by ResponsesAPI.
78+
// Available only for ResponsesAPI.
79+
// The Response corresponding to this ID must have been cached.
80+
// It is useful when a fixed prefix cache is required in the multi-turn conversation.
81+
// Optional.
82+
PrefixResponseID *string
83+
7484
// SessionCache is the configuration of ResponsesAPI session cache.
85+
// Optional.
7586
SessionCache *SessionCacheConfig
7687
}
7788

0 commit comments

Comments
 (0)