Skip to content

Commit 1e16340

Browse files
max-krasnyanskyslaren
authored andcommitted
threading: support for GGML_SCHED_PRIO_LOW, update thread info on Windows to avoid throttling (llama/12995)
* threading: support for GGML_SCHED_PRIO_LOW, update thread info on Windows to avoid throttling We talked about adding LOW priority for GGML threads in the original threadpool PR. It might be useful for some cases to avoid contention. Latest Windows ARM64 releases started parking (offlining) the CPU cores more aggresively which results in suboptimal performance with n_threads > 4. To deal with that we now disable Power Throttling for our threads for the NORMAL and higher priorities. Co-authored-by: Diego Devesa <[email protected]> * threading: disable SetThreadInfo() calls for older Windows versions * Update tools/llama-bench/llama-bench.cpp Co-authored-by: Diego Devesa <[email protected]> --------- Co-authored-by: Diego Devesa <[email protected]>
1 parent 4a50254 commit 1e16340

File tree

2 files changed

+24
-0
lines changed

2 files changed

+24
-0
lines changed

ggml/include/ggml.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2178,6 +2178,7 @@ extern "C" {
21782178

21792179
// scheduling priorities
21802180
enum ggml_sched_priority {
2181+
GGML_SCHED_PRIO_LOW = -1,
21812182
GGML_SCHED_PRIO_NORMAL,
21822183
GGML_SCHED_PRIO_MEDIUM,
21832184
GGML_SCHED_PRIO_HIGH,

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2418,12 +2418,32 @@ static bool ggml_thread_apply_priority(int32_t prio) {
24182418
// This is up to the applications.
24192419
DWORD p = THREAD_PRIORITY_NORMAL;
24202420
switch (prio) {
2421+
case GGML_SCHED_PRIO_LOW: p = THREAD_PRIORITY_BELOW_NORMAL; break;
24212422
case GGML_SCHED_PRIO_NORMAL: p = THREAD_PRIORITY_NORMAL; break;
24222423
case GGML_SCHED_PRIO_MEDIUM: p = THREAD_PRIORITY_ABOVE_NORMAL; break;
24232424
case GGML_SCHED_PRIO_HIGH: p = THREAD_PRIORITY_HIGHEST; break;
24242425
case GGML_SCHED_PRIO_REALTIME: p = THREAD_PRIORITY_TIME_CRITICAL; break;
24252426
}
24262427

2428+
if (prio != GGML_SCHED_PRIO_LOW) {
2429+
// Tell Windows that this thread should not be throttled (needs its own CPU core).
2430+
// Newer Windows 11 versions aggresively park (offline) CPU cores and often place
2431+
// all our threads onto the first 4 cores which results in terrible performance with
2432+
// n_threads > 4
2433+
#if _WIN32_WINNT >= 0x0602
2434+
THREAD_POWER_THROTTLING_STATE t;
2435+
ZeroMemory(&t, sizeof(t));
2436+
t.Version = THREAD_POWER_THROTTLING_CURRENT_VERSION;
2437+
t.ControlMask = THREAD_POWER_THROTTLING_EXECUTION_SPEED;
2438+
t.StateMask = 0;
2439+
2440+
if (!SetThreadInformation(GetCurrentThread(), ThreadPowerThrottling, &t, sizeof(t))) {
2441+
GGML_LOG_DEBUG("failed to disable thread power throttling %d : (%d)\n", prio, (int) GetLastError());
2442+
return false;
2443+
}
2444+
#endif
2445+
}
2446+
24272447
if (prio == GGML_SCHED_PRIO_NORMAL) {
24282448
// Keep inherited policy/priority
24292449
return true;
@@ -2451,6 +2471,8 @@ static bool ggml_thread_apply_priority(int32_t prio) {
24512471
struct sched_param p;
24522472
int32_t policy = SCHED_OTHER;
24532473
switch (prio) {
2474+
// TODO: there seems to be no way to set lower prio on Apple platforms
2475+
case GGML_SCHED_PRIO_LOW: policy = SCHED_OTHER; p.sched_priority = 0; break;
24542476
case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
24552477
case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
24562478
case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
@@ -2507,6 +2529,7 @@ static bool ggml_thread_apply_priority(int32_t prio) {
25072529
struct sched_param p;
25082530
int32_t policy = SCHED_OTHER;
25092531
switch (prio) {
2532+
case GGML_SCHED_PRIO_LOW: policy = SCHED_BATCH; p.sched_priority = 0; break;
25102533
case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
25112534
case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
25122535
case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;

0 commit comments

Comments
 (0)