From 21e59adf09adc3923a9493ddbe32d8a915c22614 Mon Sep 17 00:00:00 2001 From: "wangyunlong.115" Date: Wed, 19 Nov 2025 14:33:04 +0800 Subject: [PATCH] bugfix: fix memory growth caused by brpc arena configuration. --- third_party/Mooncake | 2 +- xllm/api_service/api_service.cpp | 10 ++++------ xllm/api_service/stream_call.h | 14 ++------------ xllm/server/xllm_server.cpp | 2 ++ 4 files changed, 9 insertions(+), 19 deletions(-) mode change 100755 => 100644 xllm/api_service/api_service.cpp mode change 100755 => 100644 xllm/api_service/stream_call.h diff --git a/third_party/Mooncake b/third_party/Mooncake index be894977d..fb26af761 160000 --- a/third_party/Mooncake +++ b/third_party/Mooncake @@ -1 +1 @@ -Subproject commit be894977d926c5fff03735e8aa37e93aaaf041bc +Subproject commit fb26af7613d4251c9c006c9ff7eef5ff4e18ed65 diff --git a/xllm/api_service/api_service.cpp b/xllm/api_service/api_service.cpp old mode 100755 new mode 100644 index 4b8d2fd48..85fd5a5e5 --- a/xllm/api_service/api_service.cpp +++ b/xllm/api_service/api_service.cpp @@ -148,8 +148,8 @@ void ChatCompletionsImpl(std::unique_ptr& service, return; } - auto call = std::make_shared( - ctrl, guard.release(), req_pb, resp_pb, arena != nullptr /*use_arena*/); + auto call = + std::make_shared(ctrl, guard.release(), req_pb, resp_pb); service->process_async(call); } } // namespace @@ -167,19 +167,17 @@ void APIService::ChatCompletionsHttp( LOG(ERROR) << "brpc request | respose | controller is null"; return; } - + auto arena = response->GetArena(); auto ctrl = reinterpret_cast(controller); if (FLAGS_backend == "llm") { - auto arena = response->GetArena(); CHECK(chat_service_impl_) << " chat service is invalid."; ChatCompletionsImpl( chat_service_impl_, done_guard, arena, ctrl); } else if (FLAGS_backend == "vlm") { CHECK(mm_chat_service_impl_) << " mm chat service is invalid."; - // TODO: fix me - temporarily using heap allocation instead of arena ChatCompletionsImpl( - mm_chat_service_impl_, done_guard, nullptr, ctrl); + mm_chat_service_impl_, done_guard, arena, ctrl); } } diff --git a/xllm/api_service/stream_call.h b/xllm/api_service/stream_call.h old mode 100755 new mode 100644 index 34f763fd9..15124cc3a --- a/xllm/api_service/stream_call.h +++ b/xllm/api_service/stream_call.h @@ -39,13 +39,8 @@ class StreamCall : public Call { StreamCall(brpc::Controller* controller, ::google::protobuf::Closure* done, Request* request, - Response* response, - bool use_arena = true) - : Call(controller), - done_(done), - request_(request), - response_(response), - use_arena_(use_arena) { + Response* response) + : Call(controller), done_(done), request_(request), response_(response) { stream_ = request_->stream(); if (stream_) { pa_ = controller_->CreateProgressiveAttachment(); @@ -72,10 +67,6 @@ class StreamCall : public Call { if (!stream_) { done_->Run(); } - if (!use_arena_) { - delete request_; - delete response_; - } } bool write_and_finish(Response& response) { @@ -151,7 +142,6 @@ class StreamCall : public Call { Response* response_; bool stream_ = false; - bool use_arena_ = true; butil::intrusive_ptr pa_; butil::IOBuf io_buf_; diff --git a/xllm/server/xllm_server.cpp b/xllm/server/xllm_server.cpp index 9a35a8e1e..48b74fe13 100644 --- a/xllm/server/xllm_server.cpp +++ b/xllm/server/xllm_server.cpp @@ -50,6 +50,8 @@ bool XllmServer::start(std::unique_ptr service) { } brpc::ServerOptions options; + options.rpc_pb_message_factory = + brpc::GetArenaRpcPBMessageFactory<1024 * 1024, 1024 * 1024 * 100>(); options.idle_timeout_sec = FLAGS_rpc_idle_timeout_s; options.num_threads = FLAGS_num_threads; if (server_->Start(FLAGS_port, &options) != 0) {