Skip to content

Commit aacad5e

Browse files
committed
bugfix: fix memory growth caused by brpc arena configuration.
1 parent 14f6b62 commit aacad5e

File tree

4 files changed

+9
-19
lines changed

4 files changed

+9
-19
lines changed

third_party/Mooncake

Submodule Mooncake updated from be89497 to fb26af7

xllm/api_service/api_service.cpp

100755100644
Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,8 @@ void ChatCompletionsImpl(std::unique_ptr<Service>& service,
148148
return;
149149
}
150150

151-
auto call = std::make_shared<ChatCall>(
152-
ctrl, guard.release(), req_pb, resp_pb, arena != nullptr /*use_arena*/);
151+
auto call =
152+
std::make_shared<ChatCall>(ctrl, guard.release(), req_pb, resp_pb);
153153
service->process_async(call);
154154
}
155155
} // namespace
@@ -167,19 +167,17 @@ void APIService::ChatCompletionsHttp(
167167
LOG(ERROR) << "brpc request | respose | controller is null";
168168
return;
169169
}
170-
170+
auto arena = response->GetArena();
171171
auto ctrl = reinterpret_cast<brpc::Controller*>(controller);
172172

173173
if (FLAGS_backend == "llm") {
174-
auto arena = response->GetArena();
175174
CHECK(chat_service_impl_) << " chat service is invalid.";
176175
ChatCompletionsImpl<ChatCall, ChatServiceImpl>(
177176
chat_service_impl_, done_guard, arena, ctrl);
178177
} else if (FLAGS_backend == "vlm") {
179178
CHECK(mm_chat_service_impl_) << " mm chat service is invalid.";
180-
// TODO: fix me - temporarily using heap allocation instead of arena
181179
ChatCompletionsImpl<MMChatCall, MMChatServiceImpl>(
182-
mm_chat_service_impl_, done_guard, nullptr, ctrl);
180+
mm_chat_service_impl_, done_guard, arena, ctrl);
183181
}
184182
}
185183

xllm/api_service/stream_call.h

100755100644
Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,8 @@ class StreamCall : public Call {
3939
StreamCall(brpc::Controller* controller,
4040
::google::protobuf::Closure* done,
4141
Request* request,
42-
Response* response,
43-
bool use_arena = true)
44-
: Call(controller),
45-
done_(done),
46-
request_(request),
47-
response_(response),
48-
use_arena_(use_arena) {
42+
Response* response)
43+
: Call(controller), done_(done), request_(request), response_(response) {
4944
stream_ = request_->stream();
5045
if (stream_) {
5146
pa_ = controller_->CreateProgressiveAttachment();
@@ -72,10 +67,6 @@ class StreamCall : public Call {
7267
if (!stream_) {
7368
done_->Run();
7469
}
75-
if (!use_arena_) {
76-
delete request_;
77-
delete response_;
78-
}
7970
}
8071

8172
bool write_and_finish(Response& response) {
@@ -151,7 +142,6 @@ class StreamCall : public Call {
151142
Response* response_;
152143

153144
bool stream_ = false;
154-
bool use_arena_ = true;
155145
butil::intrusive_ptr<brpc::ProgressiveAttachment> pa_;
156146
butil::IOBuf io_buf_;
157147

xllm/server/xllm_server.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ bool XllmServer::start(std::unique_ptr<APIService> service) {
5050
}
5151

5252
brpc::ServerOptions options;
53+
options.rpc_pb_message_factory =
54+
brpc::GetArenaRpcPBMessageFactory<1024 * 1024, 1024 * 1024 * 100>();
5355
options.idle_timeout_sec = FLAGS_rpc_idle_timeout_s;
5456
options.num_threads = FLAGS_num_threads;
5557
if (server_->Start(FLAGS_port, &options) != 0) {

0 commit comments

Comments
 (0)