Skip to content

Commit 2bf6c55

Browse files
threadpool: consistent use of int type for n_threads params
1 parent 3f8325a commit 2bf6c55

File tree

7 files changed

+27
-27
lines changed

7 files changed

+27
-27
lines changed

common/common.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ enum dimre_method {
6868
};
6969

7070
struct cpu_params {
71-
int32_t n_threads = -1;
71+
int n_threads = -1;
7272
bool cpumask[GGML_MAX_N_THREADS] = {false}; // CPU affinity mask.
7373
bool mask_valid = false; // Default: any CPU
7474
int32_t priority = 0; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime)
@@ -214,7 +214,7 @@ struct gpt_params {
214214
int32_t port = 8080; // server listens on this network port
215215
int32_t timeout_read = 600; // http read timeout in seconds
216216
int32_t timeout_write = timeout_read; // http write timeout in seconds
217-
int32_t n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool)
217+
int n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool)
218218

219219
std::string hostname = "127.0.0.1";
220220
std::string public_path = "";

examples/benchmark/benchmark-matmult.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) {
5454
#define TENSOR_DUMP(tensor) tensor_dump(tensor, #tensor)
5555

5656
struct benchmark_params_struct {
57-
int32_t n_threads = 1;
57+
int n_threads = 1;
5858
int32_t n_iterations = 10;
5959
};
6060

examples/main/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ int main(int argc, char ** argv) {
223223

224224
LOG("%s: llama threadpool init = n_threads = %d\n",
225225
__func__,
226-
(int32_t) params.cpuparams.n_threads
226+
(int) params.cpuparams.n_threads
227227
);
228228
struct ggml_threadpool_params tpp_batch =
229229
ggml_threadpool_params_from_cpu_params(params.cpuparams_batch);

ggml/include/ggml.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,7 @@ extern "C" {
629629
struct ggml_threadpool_params {
630630
bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores
631631
bool mask_specified; // mask is non-empty
632-
int32_t n_threads; // number of threads
632+
int n_threads; // number of threads
633633
int32_t prio; // thread priority
634634
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
635635
bool strict_cpu; // strict cpu placement
@@ -2027,7 +2027,7 @@ extern "C" {
20272027
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params *p0, const struct ggml_threadpool_params *p1);
20282028
GGML_API struct ggml_compute_threadpool* ggml_create_threadpool (struct ggml_threadpool_params * params);
20292029
GGML_API void ggml_release_threadpool (struct ggml_compute_threadpool * threadpool);
2030-
GGML_API int32_t ggml_threadpool_get_n_threads(struct ggml_compute_threadpool * threadpool);
2030+
GGML_API int ggml_threadpool_get_n_threads(struct ggml_compute_threadpool * threadpool);
20312031
GGML_API void ggml_pause_threadpool (struct ggml_compute_threadpool * threadpool);
20322032
GGML_API void ggml_resume_threadpool (struct ggml_compute_threadpool * threadpool);
20332033

ggml/src/ggml.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1973,8 +1973,8 @@ struct ggml_compute_threadpool {
19731973
atomic_bool pause; // Used for pausing the threadpool or individual threads
19741974

19751975
struct ggml_compute_state * workers; // per thread state
1976-
int32_t n_threads_max; // number of threads in the pool
1977-
int32_t n_threads_cur; // number of threads used in the current graph
1976+
int n_threads_max; // number of threads in the pool
1977+
int n_threads_cur; // number of threads used in the current graph
19781978

19791979
int32_t prio; // Scheduling priority
19801980
uint32_t poll; // Polling level (0 - no polling)
@@ -18846,7 +18846,7 @@ void ggml_release_threadpool(struct ggml_compute_threadpool* threadpool) {
1884618846

1884718847
#ifndef GGML_USE_OPENMP
1884818848
struct ggml_compute_state* workers = threadpool->workers;
18849-
const int32_t n_threads = threadpool->n_threads_max;
18849+
const int n_threads = threadpool->n_threads_max;
1885018850

1885118851
ggml_mutex_lock(&threadpool->mutex);
1885218852

@@ -18856,7 +18856,7 @@ void ggml_release_threadpool(struct ggml_compute_threadpool* threadpool) {
1885618856
ggml_cond_broadcast(&threadpool->cond);
1885718857
ggml_mutex_unlock(&threadpool->mutex);
1885818858

18859-
for (int32_t j = 1; j < n_threads; j++) {
18859+
for (int j = 1; j < n_threads; j++) {
1886018860
int32_t rc = ggml_thread_join(workers[j].thrd, NULL);
1886118861
GGML_ASSERT(rc == GGML_EXIT_SUCCESS || rc == GGML_EXIT_ABORTED);
1886218862
UNUSED(rc);
@@ -18912,11 +18912,11 @@ void ggml_resume_threadpool(struct ggml_compute_threadpool * threadpool) {
1891218912

1891318913
struct ggml_cplan ggml_graph_plan(
1891418914
const struct ggml_cgraph * cgraph,
18915-
int32_t n_threads,
18915+
int n_threads,
1891618916
struct ggml_compute_threadpool * threadpool) {
1891718917

1891818918
if (threadpool == NULL) {
18919-
GGML_PRINT_DEBUG("Threadpool is not specified. Will create a disposable threadpool : n_threads %u\n", n_threads);
18919+
GGML_PRINT_DEBUG("Threadpool is not specified. Will create a disposable threadpool : n_threads %d\n", n_threads);
1892018920
}
1892118921
if (n_threads <= 0) {
1892218922
n_threads = threadpool ? threadpool->n_threads_max : GGML_DEFAULT_N_THREADS;
@@ -19335,13 +19335,13 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
1933519335
GGML_ASSERT(cplan->n_threads > 0);
1933619336
GGML_ASSERT(cplan->work_size == 0 || cplan->work_data != NULL);
1933719337

19338-
int32_t n_threads = cplan->n_threads;
19338+
int n_threads = cplan->n_threads;
1933919339
struct ggml_compute_threadpool * threadpool = cplan->threadpool;
1934019340

1934119341
bool disposable_threadpool = false;
1934219342

1934319343
if (threadpool == NULL) {
19344-
GGML_PRINT_DEBUG("Threadpool is not specified. Will create a disposable threadpool : n_threads %u\n", n_threads);
19344+
GGML_PRINT_DEBUG("Threadpool is not specified. Will create a disposable threadpool : n_threads %d\n", n_threads);
1934519345
disposable_threadpool = true;
1934619346

1934719347
struct ggml_threadpool_params ttp = {

include/llama.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -304,8 +304,8 @@ extern "C" {
304304
uint32_t n_batch; // logical maximum batch size that can be submitted to llama_decode
305305
uint32_t n_ubatch; // physical maximum batch size
306306
uint32_t n_seq_max; // max number of sequences (i.e. distinct states for recurrent models)
307-
uint32_t n_threads; // number of threads to use for generation
308-
uint32_t n_threads_batch; // number of threads to use for batch processing
307+
int n_threads; // number of threads to use for generation
308+
int n_threads_batch; // number of threads to use for batch processing
309309

310310
enum llama_rope_scaling_type rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
311311
enum llama_pooling_type pooling_type; // whether to pool (sum) embedding results by sequence id
@@ -851,13 +851,13 @@ extern "C" {
851851
// Set the number of threads used for decoding
852852
// n_threads is the number of threads used for generation (single token)
853853
// n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens)
854-
LLAMA_API void llama_set_n_threads(struct llama_context * ctx, uint32_t n_threads, uint32_t n_threads_batch);
854+
LLAMA_API void llama_set_n_threads(struct llama_context * ctx, int n_threads, int n_threads_batch);
855855

856856
// Get the number of threads used for generation of a single token.
857-
LLAMA_API uint32_t llama_n_threads(struct llama_context * ctx);
857+
LLAMA_API int llama_n_threads(struct llama_context * ctx);
858858

859859
// Get the number of threads used for prompt and batch processing (multiple token).
860-
LLAMA_API uint32_t llama_n_threads_batch(struct llama_context * ctx);
860+
LLAMA_API int llama_n_threads_batch(struct llama_context * ctx);
861861

862862
// Set whether the model is in embeddings mode or not
863863
// If true, embeddings will be returned but logits will not

src/llama.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2373,8 +2373,8 @@ struct llama_cparams {
23732373
uint32_t n_batch;
23742374
uint32_t n_ubatch;
23752375
uint32_t n_seq_max;
2376-
uint32_t n_threads; // number of threads to use for generation
2377-
uint32_t n_threads_batch; // number of threads to use for batch processing
2376+
int n_threads; // number of threads to use for generation
2377+
int n_threads_batch; // number of threads to use for batch processing
23782378

23792379
float rope_freq_base;
23802380
float rope_freq_scale;
@@ -15528,7 +15528,7 @@ static std::pair<int32_t, ggml_compute_threadpool_t> llama_swap_threadpools(
1552815528
int32_t n_tokens) {
1552915529

1553015530
const auto & cparams = lctx.cparams;
15531-
int32_t n_threads = n_tokens == 1 ? cparams.n_threads : cparams.n_threads_batch;
15531+
int n_threads = n_tokens == 1 ? cparams.n_threads : cparams.n_threads_batch;
1553215532

1553315533
ggml_compute_threadpool_t threadpool = nullptr; // nullptr -> disposable threadpool
1553415534

@@ -15663,7 +15663,7 @@ static int llama_decode_internal(
1566315663
std::pair<int32_t, ggml_compute_threadpool_t> threads =
1566415664
llama_swap_threadpools(lctx, n_tokens);
1566515665

15666-
int32_t n_threads = threads.first;
15666+
int n_threads = threads.first;
1566715667
ggml_compute_threadpool_t threadpool = threads.second;
1566815668

1566915669
GGML_ASSERT(n_threads > 0);
@@ -15907,7 +15907,7 @@ static int llama_encode_internal(
1590715907
std::pair<int32_t, ggml_compute_threadpool_t> threads =
1590815908
llama_swap_threadpools(lctx, n_tokens);
1590915909

15910-
int32_t n_threads = threads.first;
15910+
int n_threads = threads.first;
1591115911
ggml_compute_threadpool_t threadpool = threads.second;
1591215912
GGML_ASSERT(n_threads > 0);
1591315913

@@ -19451,16 +19451,16 @@ size_t llama_state_seq_load_file(struct llama_context * ctx, const char * filepa
1945119451
}
1945219452
}
1945319453

19454-
void llama_set_n_threads(struct llama_context * ctx, uint32_t n_threads, uint32_t n_threads_batch) {
19454+
void llama_set_n_threads(struct llama_context * ctx, int n_threads, int n_threads_batch) {
1945519455
ctx->cparams.n_threads = n_threads;
1945619456
ctx->cparams.n_threads_batch = n_threads_batch;
1945719457
}
1945819458

19459-
uint32_t llama_n_threads(struct llama_context * ctx) {
19459+
int llama_n_threads(struct llama_context * ctx) {
1946019460
return ctx->cparams.n_threads;
1946119461
}
1946219462

19463-
uint32_t llama_n_threads_batch(struct llama_context * ctx) {
19463+
int llama_n_threads_batch(struct llama_context * ctx) {
1946419464
return ctx->cparams.n_threads_batch;
1946519465
}
1946619466

0 commit comments

Comments
 (0)