|
231 | 231 | #define GGML_MAX_SRC 10 |
232 | 232 | #ifndef GGML_MAX_NAME |
233 | 233 | #define GGML_MAX_NAME 64 |
| 234 | +#define GGML_MAX_N_THREADS 512 |
| 235 | + |
234 | 236 | #endif |
235 | 237 | #define GGML_MAX_OP_PARAMS 64 |
236 | 238 | #define GGML_DEFAULT_N_THREADS 4 |
@@ -628,13 +630,37 @@ extern "C" { |
628 | 630 | // If it returns true, the computation is aborted |
629 | 631 | typedef bool (*ggml_abort_callback)(void * data); |
630 | 632 |
|
| 633 | + // Scheduling priorities |
| 634 | + enum ggml_sched_priority { |
| 635 | + GGML_SCHED_PRIO_NORMAL, |
| 636 | + GGML_SCHED_PRIO_MEDIUM, |
| 637 | + GGML_SCHED_PRIO_HIGH, |
| 638 | + GGML_SCHED_PRIO_REALTIME |
| 639 | + }; |
| 640 | + |
| 641 | + // Threadpool params |
| 642 | + // Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults |
| 643 | + struct ggml_threadpool_params { |
| 644 | + bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings) |
| 645 | + int n_threads; // number of threads |
| 646 | + enum ggml_sched_priority prio; // thread priority |
| 647 | + uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling) |
| 648 | + bool strict_cpu; // strict cpu placement |
| 649 | + bool paused; // start in paused state |
| 650 | + }; |
| 651 | + |
| 652 | + struct ggml_threadpool; // forward declaration, see ggml.c |
| 653 | + |
| 654 | + typedef struct ggml_threadpool * ggml_threadpool_t; |
| 655 | + |
631 | 656 | // the compute plan that needs to be prepared for ggml_graph_compute() |
632 | 657 | // since https://github.com/ggerganov/ggml/issues/287 |
633 | 658 | struct ggml_cplan { |
634 | 659 | size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()` |
635 | 660 | uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()` |
636 | 661 |
|
637 | 662 | int n_threads; |
| 663 | + struct ggml_threadpool * threadpool; |
638 | 664 |
|
639 | 665 | // abort ggml_graph_compute when true |
640 | 666 | ggml_abort_callback abort_callback; |
@@ -2057,10 +2083,23 @@ extern "C" { |
2057 | 2083 | GGML_API size_t ggml_graph_overhead(void); |
2058 | 2084 | GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads); |
2059 | 2085 |
|
| 2086 | + GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads); |
| 2087 | + GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params *p, int n_threads); |
| 2088 | + GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params *p0, const struct ggml_threadpool_params *p1); |
| 2089 | + GGML_API struct ggml_threadpool* ggml_threadpool_new (struct ggml_threadpool_params * params); |
| 2090 | + GGML_API void ggml_threadpool_free (struct ggml_threadpool * threadpool); |
| 2091 | + GGML_API int ggml_threadpool_get_n_threads(struct ggml_threadpool * threadpool); |
| 2092 | + GGML_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool); |
| 2093 | + GGML_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool); |
| 2094 | + |
2060 | 2095 | // ggml_graph_plan() has to be called before ggml_graph_compute() |
2061 | 2096 | // when plan.work_size > 0, caller must allocate memory for plan.work_data |
2062 | | - GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/); |
2063 | | - GGML_API enum ggml_status ggml_graph_compute( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan); |
| 2097 | + GGML_API struct ggml_cplan ggml_graph_plan( |
| 2098 | + const struct ggml_cgraph * cgraph, |
| 2099 | + int n_threads, /* = GGML_DEFAULT_N_THREADS */ |
| 2100 | + struct ggml_threadpool * threadpool /* = NULL */ ); |
| 2101 | + GGML_API enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan); |
| 2102 | + |
2064 | 2103 | // same as ggml_graph_compute() but the work data is allocated as a part of the context |
2065 | 2104 | // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data |
2066 | 2105 | GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads); |
|
0 commit comments