Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ endif()
# find all dependencies from vcpkg
find_package(Boost REQUIRED)
find_package(Boost REQUIRED COMPONENTS serialization)
find_package(Eigen3 CONFIG REQUIRED)
find_package(glog CONFIG REQUIRED)
find_package(gflags CONFIG REQUIRED)
find_package(leveldb CONFIG REQUIRED)
Expand Down
4 changes: 4 additions & 0 deletions vcpkg.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@
"name": "boost-serialization",
"version>=": "1.84.0"
},
{
"name": "eigen3",
"version>=": "3.4.0"
},
{
"name": "protobuf",
"version>=": "3.21.12",
Expand Down
2 changes: 2 additions & 0 deletions xllm_service/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ cc_library(
macros.h
slice.h
threadpool.h
ttft_predictor.h
types.h
utils.h
hash_util.h
Expand All @@ -22,6 +23,7 @@ cc_library(
global_gflags.cpp
json_reader.cpp
threadpool.cpp
ttft_predictor.cpp
utils.cpp
hash_util.cpp
xllm/uuid.cpp
Expand Down
59 changes: 59 additions & 0 deletions xllm_service/common/ttft_predictor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/* Copyright 2025 The xLLM Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

https://github.com/jd-opensource/xllm-service/blob/main/LICENSE

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include "ttft_predictor.h"

static constexpr int32_t kDegree = 2;

namespace xllm_service {

TtftPredictor::TtftPredictor(
const std::vector<std::pair<int32_t, int64_t>>& ttft_profiling_data) {
if (!ttft_profiling_data.empty()) {
// construct Vandermonde matrix
int32_t m = ttft_profiling_data.size();
int32_t n = kDegree + 1;
Eigen::MatrixXd matrix(m, n);
for (int32_t i = 0; i < m; ++i) {
for (int32_t j = 0; j < n; ++j) {
matrix(i, j) = std::pow(ttft_profiling_data[i].first, j);
}
}

// construct target vector
Eigen::VectorXd target(m);
for (int32_t i = 0; i < m; ++i) {
target(i) = ttft_profiling_data[i].second;
}

// get coefficients
coefficients_ = matrix.colPivHouseholderQr().solve(target);
} else {
coefficients_ = Eigen::VectorXd::Zero(1);
}
}

int64_t TtftPredictor::predict_ttft(int32_t length) {
double result = 0.0;
double power = 1.0;
for (int32_t i = 0; i < coefficients_.size(); ++i) {
result += coefficients_(i) * power;
power *= length;
}

return static_cast<int64_t>(result);
}

} // namespace xllm_service
35 changes: 35 additions & 0 deletions xllm_service/common/ttft_predictor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/* Copyright 2025 The xLLM Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

https://github.com/jd-opensource/xllm-service/blob/main/LICENSE

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#pragma once

#include <Eigen/Dense>

namespace xllm_service {

// Predictor for predicting TTFT based on input length
class TtftPredictor final {
public:
TtftPredictor(
const std::vector<std::pair<int32_t, int64_t>>& ttft_profiling_data);
~TtftPredictor() = default;

int64_t predict_ttft(int32_t length);

private:
Eigen::VectorXd coefficients_;
};

} // namespace xllm_service
9 changes: 9 additions & 0 deletions xllm_service/common/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ struct InstanceMetaInfo {
std::vector<uint64_t> k_cache_ids;
std::vector<uint64_t> v_cache_ids;
int32_t dp_size;
// ttft profiling data
std::vector<std::pair<int32_t, int64_t>> ttft_profiling_data;

// latest heatbeat timestamp
uint64_t latest_timestamp = 0;
Expand All @@ -155,6 +157,7 @@ struct InstanceMetaInfo {
json_val["k_cache_ids"] = k_cache_ids;
json_val["v_cache_ids"] = v_cache_ids;
json_val["dp_size"] = dp_size;
json_val["ttft_profiling_data"] = ttft_profiling_data;
return json_val;
}

Expand Down Expand Up @@ -189,6 +192,12 @@ struct InstanceMetaInfo {

dp_size = json_value.at("dp_size").get<int32_t>();

for (const auto& item : json_value.at("ttft_profiling_data")) {
if (item.is_array() && item.size() == 2) {
ttft_profiling_data.emplace_back(item[0], item[1]);
}
}

set_init_timestamp();
} catch (const std::exception& e) {
LOG(ERROR) << "json str:" << json_str
Expand Down
11 changes: 11 additions & 0 deletions xllm_service/scheduler/managers/instance_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ void InstanceMgr::init() {
for (auto& it : ETCD_KEYS_PREFIX_MAP) {
etcd_client_->get_prefix(it.second, &instances_);
}
// create ttft predictor for each instance
for (auto& pair : instances_) {
ttft_predictors_.insert_or_assign(
pair.first, TtftPredictor(pair.second.ttft_profiling_data));
}
LOG(INFO) << "Load instance info from etcd:" << instances_.size();
std::vector<std::string> channel_creat_fail_insts;
prefill_index_.reserve(instances_.size());
Expand Down Expand Up @@ -94,6 +99,7 @@ void InstanceMgr::init() {
}
for (auto& name : channel_creat_fail_insts) {
instances_.erase(name);
ttft_predictors_.erase(name);
}
}
{
Expand Down Expand Up @@ -334,6 +340,10 @@ void InstanceMgr::update_instance_metainfo(const etcd::Response& response,
continue;
}

// create ttft predictor for instance
ttft_predictors_.emplace(
iter.first, TtftPredictor(iter.second.ttft_profiling_data));

instances_.insert(std::make_pair(iter.first, std::move(iter.second)));

switch (iter.second.type) {
Expand Down Expand Up @@ -385,6 +395,7 @@ void InstanceMgr::update_instance_metainfo(const etcd::Response& response,
}

instances_.erase(iter);
ttft_predictors_.erase(iter);
cached_channels_.erase(iter);
{
std::lock_guard<std::mutex> lock(update_mutex_);
Expand Down
4 changes: 3 additions & 1 deletion xllm_service/scheduler/managers/instance_mgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,12 @@ limitations under the License.
#include <unordered_map>
#include <unordered_set>

#include "../etcd_client/etcd_client.h"
#include "common/macros.h"
#include "common/options.h"
#include "common/threadpool.h"
#include "common/ttft_predictor.h"
#include "common/types.h"
#include "scheduler/etcd_client/etcd_client.h"
#include "xllm_rpc_service.pb.h"

namespace xllm_service {
Expand Down Expand Up @@ -80,6 +81,7 @@ class InstanceMgr final {

std::shared_mutex inst_mutex_;
std::unordered_map<std::string, InstanceMetaInfo> instances_;
std::unordered_map<std::string, TtftPredictor> ttft_predictors_;
std::vector<std::string> prefill_index_;
std::vector<std::string> decode_index_;
uint64_t next_prefill_index_ = 0;
Expand Down