Skip to content

Commit 349a778

Browse files
committed
feat: support contributing memory to distributed KV cache storage.
1 parent 844f089 commit 349a778

File tree

10 files changed

+46
-0
lines changed

10 files changed

+46
-0
lines changed

xllm/core/common/global_flags.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,10 @@ DEFINE_string(store_local_hostname,
344344
"",
345345
"The local host name of the kv cache store client.");
346346

347+
DEFINE_uint64(store_segment_size,
348+
0,
349+
"The contributed memory to kv cache store.");
350+
347351
// --- computation communication parallel config ---
348352

349353
DEFINE_bool(

xllm/core/common/global_flags.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,8 @@ DECLARE_string(store_metadata_server);
165165

166166
DECLARE_string(store_local_hostname);
167167

168+
DECLARE_uint64(store_segment_size);
169+
168170
DECLARE_bool(enable_multi_stream_parallel);
169171

170172
DECLARE_int32(micro_batch_num);

xllm/core/common/options.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ std::string Options::to_string() const {
5656
<< ", store_master_server_address: " << store_master_server_address()
5757
<< ", store_metadata_server: " << store_metadata_server()
5858
<< ", store_local_hostname: " << store_local_hostname()
59+
<< ", store_segment_size: " << store_segment_size()
5960
<< ", enable_multi_stream_parallel: " << enable_multi_stream_parallel()
6061
<< ", enable_continuous_kvcache: " << enable_continuous_kvcache()
6162
<< ", disable_ttft_profiling: " << disable_ttft_profiling()

xllm/core/common/options.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,8 @@ class Options {
149149

150150
PROPERTY(std::string, store_local_hostname) = "";
151151

152+
PROPERTY(uint64_t, store_segment_size) = 0;
153+
152154
PROPERTY(bool, enable_multi_stream_parallel) = false;
153155

154156
PROPERTY(bool, enable_profile_step_time) = false;

xllm/core/framework/kv_cache/kv_cache_store.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,42 @@ bool KVCacheStore::init(const StoreConfig& config,
6969
<< ", and data ptr: " << uint64_t(config_.tensor_data);
7070
}
7171
}
72+
73+
if (config_.segment_size != 0) {
74+
global_segment_ptr_ =
75+
mooncake::allocate_buffer_allocator_memory(config_.segment_size);
76+
if (global_segment_ptr_ == nullptr) {
77+
LOG(FATAL) << "allocate_buffer_allocator_memory " << config_.segment_size
78+
<< " bytes failed!";
79+
}
80+
auto mount_result =
81+
client_ptr_->MountSegment(global_segment_ptr_, config_.segment_size);
82+
if (!mount_result.has_value()) {
83+
LOG(ERROR) << "Failed to mount segment: "
84+
<< toString(mount_result.error());
85+
}
86+
LOG(INFO) << "Segment mounted successfully";
87+
}
88+
7289
is_initialized_ = true;
7390
return true;
7491
}
7592

7693
KVCacheStore::~KVCacheStore() {
94+
if (client_ptr_ && global_segment_ptr_) {
95+
if (!client_ptr_->UnmountSegment(global_segment_ptr_, config_.segment_size)
96+
.has_value()) {
97+
LOG(ERROR) << "Failed to unmount segment";
98+
}
99+
}
100+
77101
if (client_ptr_) {
78102
client_ptr_.reset();
79103
}
104+
105+
if (global_segment_ptr_) {
106+
free(global_segment_ptr_);
107+
}
80108
}
81109

82110
uint32_t KVCacheStore::batch_put(

xllm/core/framework/kv_cache/kv_cache_store.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ struct StoreConfig {
1919
std::string master_server_address = "";
2020
int replica_num = 1;
2121
uint32_t tp_rank = 0;
22+
size_t segment_size = 0;
2223
size_t total_size = 0;
2324
void* tensor_data = nullptr;
2425
};
@@ -75,6 +76,8 @@ class KVCacheStore {
7576
uint64_t v_cache_size_per_block_;
7677

7778
std::shared_ptr<mooncake::Client> client_ptr_;
79+
80+
void* global_segment_ptr_ = nullptr;
7881
};
7982

8083
} // namespace xllm

xllm/core/runtime/master.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ Master::Master(const Options& options, EngineType type) : options_(options) {
201201
.store_master_server_address(options_.store_master_server_address())
202202
.store_metadata_server(options_.store_metadata_server())
203203
.store_local_hostname(options_.store_local_hostname())
204+
.store_segment_size(options_.store_segment_size())
204205
.enable_continuous_kvcache(options_.enable_continuous_kvcache())
205206
.enable_offline_inference(options_.enable_offline_inference())
206207
.spawn_worker_path(options_.spawn_worker_path())

xllm/core/runtime/options.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,9 @@ struct Options {
155155
// value used if port is not included)
156156
PROPERTY(std::string, store_local_hostname) = "";
157157

158+
// the contributed memory to kv cache store
159+
PROPERTY(uint64_t, store_segment_size) = 0;
160+
158161
// dit
159162
// max requests per batch
160163
PROPERTY(int, max_requests_per_batch) = 0;

xllm/core/runtime/worker_impl.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ bool WorkerImpl::allocate_host_kv_cache(
178178
config.tp_rank = options_.dp_size() > 1
179179
? options_.node_rank() % options_.dp_size()
180180
: options_.node_rank();
181+
config.segment_size = options_.store_segment_size();
181182
config.total_size = aligned_tensor_creater_->get_total_size();
182183
config.tensor_data = aligned_tensor_creater_->get_base_ptr();
183184

xllm/xllm.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ int run() {
171171
.store_master_server_address(FLAGS_store_master_server_address)
172172
.store_metadata_server(FLAGS_store_metadata_server)
173173
.store_local_hostname(FLAGS_store_local_hostname)
174+
.store_segment_size(FLAGS_store_segment_size)
174175
.enable_multi_stream_parallel(FLAGS_enable_multi_stream_parallel)
175176
.enable_profile_step_time(FLAGS_enable_profile_step_time)
176177
.enable_profile_token_budget(FLAGS_enable_profile_token_budget)

0 commit comments

Comments
 (0)