Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,26 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.cpuparams.priority = (enum ggml_sched_priority) prio;
}
));
add_opt(common_arg(
{ "-CPnP", "--cpu-pnp-strategy" }, "N",
string_format("set CPU PnP strategy : 0-disabled, 1-efficiency (default: %d)\n", params.cpuparams.cpu_pnp_strategy),
[](common_params& params, int strategy) {
if (strategy < 0 || strategy > 1) {
throw std::invalid_argument("invalid value");
}
params.cpuparams.cpu_pnp_strategy = (enum ggml_cpu_pnp_strategy)strategy;
}
));
add_opt(common_arg(
{ "-CPnPb", "--cpu-pnp-strategy-batch" }, "N",
string_format("set CPU PnP strategy batch : 0-disabled, 1-efficiency (default: %d)\n", params.cpuparams.cpu_pnp_strategy),
[](common_params& params, int strategy) {
if (strategy < 0 || strategy > 1) {
throw std::invalid_argument("invalid value");
}
params.cpuparams_batch.cpu_pnp_strategy = (enum ggml_cpu_pnp_strategy)strategy;
}
));
add_opt(common_arg(
{"--poll"}, "<0...100>",
string_format("use polling level to wait for work (0 - no polling, default: %u)\n", (unsigned) params.cpuparams.poll),
Expand Down
126 changes: 102 additions & 24 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,77 @@ using json = nlohmann::ordered_json;
// CPU utils
//

#if defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later

// Print CPU Information
void print_cpu_info(const cpu_info& info) {
LOG_INF("CPU Information:\n");
LOG_INF("----------------\n");
LOG_INF("Is Hybrid Architecture: %s\n", info.is_hybrid ? "Yes" : "No");
LOG_INF("Number of Logical Cores: %d\n", info.num_logical_cores);
LOG_INF("Number of Physical Cores: %d\n", info.num_physical_cores);
LOG_INF("Number of Performance Cores (P-Cores): %d\n", info.num_p_cores);
LOG_INF("Number of Efficient Cores (E-Cores): %d\n", info.num_e_cores);
LOG_INF("\nE-Core Affinity Mask:\n");
LOG_INF("%s\n", info.e_core_affinity_mask.to_string().c_str());
LOG_INF("\nP-Core Affinity Mask:\n");
LOG_INF("%s\n", info.p_core_affinity_mask.to_string().c_str());
}

// Populate CPU Information
int get_cpu_info(cpu_info& c_info) {
DWORD buffer_size = 0;

if (!GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &buffer_size)) {
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
return 0;
}
}

std::vector<char> buffer(buffer_size);
if (!GetLogicalProcessorInformationEx(RelationProcessorCore, reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data()), &buffer_size)) {
return 0;
}

c_info.num_physical_cores = 0;
c_info.num_logical_cores = 0;
c_info.num_e_cores = 0;
c_info.num_p_cores = 0;

PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data());
while (buffer_size > 0) {
if (info->Relationship == RelationProcessorCore) {
c_info.num_physical_cores++;
for (int i = 0; i < info->Processor.GroupCount; ++i) {
GROUP_AFFINITY *groupAffinity = &info->Processor.GroupMask[i];
WORD groupNumber = groupAffinity->Group;
KAFFINITY mask = groupAffinity->Mask;
int baseIndex = groupNumber * 64;
c_info.num_logical_cores += __popcnt64(mask);
if (info->Processor.EfficiencyClass < 1) {
c_info.e_core_affinity_mask |= (std::bitset<GGML_MAX_N_THREADS>(mask) << baseIndex);
c_info.num_e_cores += __popcnt64(mask);
} else {
c_info.p_core_affinity_mask |= (std::bitset<GGML_MAX_N_THREADS>(mask) << baseIndex);
c_info.num_p_cores += __popcnt64(mask);
}
}
}

buffer_size -= info->Size;
info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(reinterpret_cast<char*>(info) + info->Size);
}

if (c_info.num_p_cores > 0 && c_info.num_e_cores > 0)
c_info.is_hybrid = true;

return 1;
}

#endif



int32_t cpu_get_num_physical_cores() {
#ifdef __linux__
// enumerate the set of thread siblings, num entries is num cores
Expand Down Expand Up @@ -131,29 +202,12 @@ int32_t cpu_get_num_physical_cores() {
unsigned int n_threads_win = std::thread::hardware_concurrency();
unsigned int default_threads = n_threads_win > 0 ? (n_threads_win <= 4 ? n_threads_win : n_threads_win / 2) : 4;

DWORD buffer_size = 0;
if (!GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &buffer_size)) {
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
return default_threads;
}
}

std::vector<char> buffer(buffer_size);
if (!GetLogicalProcessorInformationEx(RelationProcessorCore, reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data()), &buffer_size)) {
cpu_info info;
if(!get_cpu_info(info))
return default_threads;
}
else
return info.num_physical_cores > 0 ? info.num_physical_cores : default_threads;

int32_t num_physical_cores = 0;
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data());
while (buffer_size > 0) {
if (info->Relationship == RelationProcessorCore) {
num_physical_cores += info->Processor.GroupCount;
}
buffer_size -= info->Size;
info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(reinterpret_cast<char*>(info) + info->Size);
}

return num_physical_cores > 0 ? num_physical_cores : default_threads;
#endif
unsigned int n_threads = std::thread::hardware_concurrency();
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
Expand Down Expand Up @@ -291,12 +345,36 @@ bool set_process_priority(enum ggml_sched_priority prio) {
void postprocess_cpu_params(cpu_params& cpuparams, const cpu_params* role_model) {
int32_t n_set = 0;

if (cpuparams.n_threads < 0) {
LOG_INF("n_threads: %d, cpu pnp strategy: %d\n", cpuparams.n_threads, cpuparams.cpu_pnp_strategy);

if (cpuparams.n_threads < 0 || cpuparams.cpu_pnp_strategy > 0) {
// Assuming everything about cpuparams is invalid
if (role_model != nullptr) {
if (role_model != nullptr && cpuparams.cpu_pnp_strategy == 0) {
cpuparams = *role_model;
} else {
cpuparams.n_threads = cpu_get_num_math();
if(cpuparams.n_threads < 0)
cpuparams.n_threads = cpu_get_num_math();

#if defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later

if(cpuparams.cpu_pnp_strategy == GGML_CPU_PNP_STRATEGY_EFFICIENCY) {
cpu_info info;
if(get_cpu_info(info)){
print_cpu_info(info);
if(info.is_hybrid){
LOG_INF("hybrid platform detected: applying strategy\n");
if (cpuparams.n_threads > info.num_e_cores) {
LOG_INF("overriding num threads: %d to num efficient cores %d\n", cpuparams.n_threads, info.num_e_cores);
cpuparams.n_threads = info.num_e_cores;
}
for (int32_t i = 0; i < GGML_MAX_N_THREADS; i++) {
cpuparams.cpumask[i] = info.e_core_affinity_mask[i];
}
cpuparams.mask_valid = true;
}
}
}
#endif
}
}

Expand Down
12 changes: 12 additions & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <string>
#include <vector>
#include <sstream>
#include <bitset>

#ifdef _WIN32
#define DIRECTORY_SEPARATOR '\\'
Expand Down Expand Up @@ -45,11 +46,22 @@ struct common_control_vector_load_info;
// CPU utils
//

struct cpu_info {
bool is_hybrid = false;
int num_logical_cores = 0;
int num_physical_cores = 0;
int num_p_cores = 0;
int num_e_cores = 0;
std::bitset<GGML_MAX_N_THREADS> e_core_affinity_mask;
std::bitset<GGML_MAX_N_THREADS> p_core_affinity_mask;
};

struct cpu_params {
int n_threads = -1;
bool cpumask[GGML_MAX_N_THREADS] = {false}; // CPU affinity mask.
bool mask_valid = false; // Default: any CPU
enum ggml_sched_priority priority = GGML_SCHED_PRIO_NORMAL; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime)
enum ggml_cpu_pnp_strategy cpu_pnp_strategy = GGML_CPU_PNP_STRATEGY_DISABLED; // CPU power and performance strategy
bool strict_cpu = false; // Use strict CPU placement
uint32_t poll = 50; // Polling (busywait) level (0 - no polling, 100 - mostly polling)
};
Expand Down
5 changes: 5 additions & 0 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -2169,6 +2169,11 @@ extern "C" {
GGML_SCHED_PRIO_REALTIME
};

enum ggml_cpu_pnp_strategy {
GGML_CPU_PNP_STRATEGY_DISABLED,
GGML_CPU_PNP_STRATEGY_EFFICIENCY
};

// threadpool params
// Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
struct ggml_threadpool_params {
Expand Down
28 changes: 27 additions & 1 deletion ggml/src/ggml-cpu/ggml-cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1318,10 +1318,11 @@ struct ggml_threadpool {
struct ggml_compute_state {
#ifndef GGML_USE_OPENMP
ggml_thread_t thrd;
bool cpumask[GGML_MAX_N_THREADS];
int last_graph;
bool pending;
#endif
bool cpumask[GGML_MAX_N_THREADS];
bool mask_valid;
struct ggml_threadpool * threadpool;
int ith;
};
Expand Down Expand Up @@ -14044,10 +14045,20 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
const size_t workers_size = sizeof(struct ggml_compute_state) * tpp->n_threads;
struct ggml_compute_state * workers = ggml_aligned_malloc(workers_size);

// Check if cpu mask is valid
bool cpumask_valid = false;
for (int i = 0; i < GGML_MAX_N_THREADS; i++) {
if (tpp->cpumask[i]) {
cpumask_valid = true;
break;
}
}

memset(workers, 0, workers_size);
for (int j = 0; j < tpp->n_threads; j++) {
workers[j].threadpool = threadpool;
workers[j].ith = j;
workers[j].mask_valid = cpumask_valid; // set mask_valid for worker threads use affinity
}

threadpool->workers = workers;
Expand Down Expand Up @@ -14079,6 +14090,12 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
}
#endif // GGML_USE_OPENMP

int32_t cpumask_iter = 0;
for (int j = 1; j < tpp->n_threads; j++) {
ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
}
ggml_thread_cpumask_next(tpp->cpumask, workers[0].cpumask, tpp->strict_cpu, &cpumask_iter);

return threadpool;
}

Expand Down Expand Up @@ -14125,10 +14142,19 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
atomic_store_explicit(&threadpool->n_threads_cur, n_threads, memory_order_relaxed);
}

// If mask is valid for worker thread apply affinity
if(&threadpool->workers[omp_get_thread_num()].mask_valid)
ggml_thread_apply_affinity(&threadpool->workers[omp_get_thread_num()].cpumask);

ggml_graph_compute_thread(&threadpool->workers[omp_get_thread_num()]);
}
} else {
atomic_store_explicit(&threadpool->n_threads_cur, 1, memory_order_relaxed);

// If mask is valid for main thread apply affinity
if(&threadpool->workers[omp_get_thread_num()].mask_valid)
ggml_thread_apply_affinity(&threadpool->workers[omp_get_thread_num()].cpumask);

ggml_graph_compute_thread(&threadpool->workers[0]);
}
#else
Expand Down
Loading