Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1348,9 +1348,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
));
add_opt(common_arg(
{"--prio"}, "N",
string_format("set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams.priority),
string_format("set process/thread priority : low(-1), normal(0), medium(1), high(2), realtime(3) (default: %d)\n", params.cpuparams.priority),
[](common_params & params, int prio) {
if (prio < 0 || prio > 3) {
if (prio < GGML_SCHED_PRIO_LOW || prio > GGML_SCHED_PRIO_REALTIME) {
throw std::invalid_argument("invalid value");
}
params.cpuparams.priority = (enum ggml_sched_priority) prio;
Expand Down
2 changes: 2 additions & 0 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {

DWORD p = NORMAL_PRIORITY_CLASS;
switch (prio) {
case GGML_SCHED_PRIO_LOW: p = BELOW_NORMAL_PRIORITY_CLASS; break;
case GGML_SCHED_PRIO_NORMAL: p = NORMAL_PRIORITY_CLASS; break;
case GGML_SCHED_PRIO_MEDIUM: p = ABOVE_NORMAL_PRIORITY_CLASS; break;
case GGML_SCHED_PRIO_HIGH: p = HIGH_PRIORITY_CLASS; break;
Expand All @@ -228,6 +229,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {

int p = 0;
switch (prio) {
case GGML_SCHED_PRIO_LOW: p = 5; break;
case GGML_SCHED_PRIO_NORMAL: p = 0; break;
case GGML_SCHED_PRIO_MEDIUM: p = -5; break;
case GGML_SCHED_PRIO_HIGH: p = -10; break;
Expand Down
1 change: 1 addition & 0 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -2181,6 +2181,7 @@ extern "C" {

// scheduling priorities
enum ggml_sched_priority {
GGML_SCHED_PRIO_LOW = -1,
GGML_SCHED_PRIO_NORMAL,
GGML_SCHED_PRIO_MEDIUM,
GGML_SCHED_PRIO_HIGH,
Expand Down
23 changes: 23 additions & 0 deletions ggml/src/ggml-cpu/ggml-cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -2418,12 +2418,32 @@ static bool ggml_thread_apply_priority(int32_t prio) {
// This is up to the applications.
DWORD p = THREAD_PRIORITY_NORMAL;
switch (prio) {
case GGML_SCHED_PRIO_LOW: p = THREAD_PRIORITY_BELOW_NORMAL; break;
case GGML_SCHED_PRIO_NORMAL: p = THREAD_PRIORITY_NORMAL; break;
case GGML_SCHED_PRIO_MEDIUM: p = THREAD_PRIORITY_ABOVE_NORMAL; break;
case GGML_SCHED_PRIO_HIGH: p = THREAD_PRIORITY_HIGHEST; break;
case GGML_SCHED_PRIO_REALTIME: p = THREAD_PRIORITY_TIME_CRITICAL; break;
}

if (prio != GGML_SCHED_PRIO_LOW) {
// Tell Windows that this thread should not be throttled (needs its own CPU core).
// Newer Windows 11 versions aggresively park (offline) CPU cores and often place
// all our threads onto the first 4 cores which results in terrible performance with
// n_threads > 4
#if _WIN32_WINNT >= 0x0602
THREAD_POWER_THROTTLING_STATE t;
ZeroMemory(&t, sizeof(t));
t.Version = THREAD_POWER_THROTTLING_CURRENT_VERSION;
t.ControlMask = THREAD_POWER_THROTTLING_EXECUTION_SPEED;
t.StateMask = 0;

if (!SetThreadInformation(GetCurrentThread(), ThreadPowerThrottling, &t, sizeof(t))) {
GGML_LOG_DEBUG("failed to disable thread power throttling %d : (%d)\n", prio, (int) GetLastError());
return false;
}
#endif
}

if (prio == GGML_SCHED_PRIO_NORMAL) {
// Keep inherited policy/priority
return true;
Expand Down Expand Up @@ -2451,6 +2471,8 @@ static bool ggml_thread_apply_priority(int32_t prio) {
struct sched_param p;
int32_t policy = SCHED_OTHER;
switch (prio) {
// TODO: there seems to be no way to set lower prio on Apple platforms
case GGML_SCHED_PRIO_LOW: policy = SCHED_OTHER; p.sched_priority = 0; break;
case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
Expand Down Expand Up @@ -2507,6 +2529,7 @@ static bool ggml_thread_apply_priority(int32_t prio) {
struct sched_param p;
int32_t policy = SCHED_OTHER;
switch (prio) {
case GGML_SCHED_PRIO_LOW: policy = SCHED_BATCH; p.sched_priority = 0; break;
case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
Expand Down
2 changes: 1 addition & 1 deletion tools/llama-bench/llama-bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ static void print_usage(int /* argc */, char ** argv) {
printf(" --numa <distribute|isolate|numactl> numa mode (default: disabled)\n");
printf(" -r, --repetitions <n> number of times to repeat each test (default: %d)\n",
cmd_params_defaults.reps);
printf(" --prio <0|1|2|3> process/thread priority (default: %d)\n",
printf(" --prio <-1,0|1|2|3> process/thread priority (default: %d)\n",
cmd_params_defaults.prio);
printf(" --delay <0...N> (seconds) delay between each test (default: %d)\n",
cmd_params_defaults.delay);
Expand Down
Loading