shadow : cont gcc

ggerganov · ggerganov · commit 10eb87409ec0 · 2025-01-12T16:09:49.000+02:00
ggml-ci
diff --git a/common/arg.cpp b/common/arg.cpp
diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp
@@ -579,8 +579,8 @@ class SchemaConverter {
                     seq.back().second = false;
                 } else {
                     std::string literal;
-                    auto is_non_literal = [&](char c) {
-                        return NON_LITERAL_SET.find(c) != NON_LITERAL_SET.end();
+                    auto is_non_literal = [&](char ch) {
+                        return NON_LITERAL_SET.find(ch) != NON_LITERAL_SET.end();
                     };
                     while (i < length) {
                         if (sub_pattern[i] == '\\' && i < length - 1) {
diff --git a/common/log.cpp b/common/log.cpp
@@ -255,8 +255,8 @@ struct common_log {
         thrd = std::thread([this]() {
             while (true) {
                 {
-                    std::unique_lock<std::mutex> lock(mtx);
-                    cv.wait(lock, [this]() { return head != tail; });
+                    std::unique_lock<std::mutex> lock_thrd(mtx);
+                    cv.wait(lock_thrd, [this]() { return head != tail; });
 
                     cur = entries[head];
 
diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp
@@ -62,7 +62,7 @@ int main(int argc, char ** argv) {
     llama_batch batch = llama_batch_init(n_kv_max, 0, 1);
 
     // decode in batches of ctx_params.n_batch tokens
-    auto decode_helper = [](llama_context * ctx, llama_batch & batch, int32_t n_batch) {
+    auto decode_helper = [&ctx, &batch](int32_t n_batch) {
         for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch) {
             const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));
 
@@ -94,7 +94,7 @@ int main(int argc, char ** argv) {
             common_batch_add(batch, 0, i, { 0 }, false);
         }
 
-        if (!decode_helper(ctx, batch, ctx_params.n_batch)) {
+        if (!decode_helper(ctx_params.n_batch)) {
             LOG_ERR("%s: llama_decode() failed\n", __func__);
             return 1;
         }
@@ -134,7 +134,7 @@ int main(int argc, char ** argv) {
 
                 llama_kv_cache_clear(ctx);
 
-                if (!decode_helper(ctx, batch, ctx_params.n_batch)) {
+                if (!decode_helper(ctx_params.n_batch)) {
                     LOG_ERR("%s: llama_decode() failed\n", __func__);
                     return 1;
                 }
@@ -156,7 +156,7 @@ int main(int argc, char ** argv) {
                         common_batch_add(batch, 0, pp + i, { j }, true);
                     }
 
-                    if (!decode_helper(ctx, batch, ctx_params.n_batch)) {
+                    if (!decode_helper(ctx_params.n_batch)) {
                         LOG_ERR("%s: llama_decode() failed\n", __func__);
                         return 1;
                     }
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
@@ -2082,7 +2082,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
     }
     else if (ctx->has_qwen2vl_merger) {
         clip_image_u8 * resized = clip_image_u8_init();
-        auto patch_size = clip_patch_size(ctx) * 2;
+        auto patch_size = clip_get_patch_size(ctx) * 2;
         int nx = ceil((float)img->nx / patch_size) * patch_size;
         int ny = ceil((float)img->ny / patch_size) * patch_size;
         bicubic_resize(*img, *resized, nx, ny);
@@ -2293,15 +2293,15 @@ size_t clip_embd_nbytes_by_img(const struct clip_ctx * ctx, int img_h, int img_w
     return clip_n_patches_by_img(ctx, &img) * clip_n_mmproj_embd(ctx) * sizeof(float);
 }
 
-int32_t clip_image_size(const struct clip_ctx * ctx) {
+int32_t clip_get_image_size(const struct clip_ctx * ctx) {
     return ctx->vision_model.hparams.image_size;
 }
 
-int32_t clip_patch_size(const struct clip_ctx * ctx) {
+int32_t clip_get_patch_size(const struct clip_ctx * ctx) {
     return ctx->vision_model.hparams.patch_size;
 }
 
-int32_t clip_hidden_size(const struct clip_ctx * ctx) {
+int32_t clip_get_hidden_size(const struct clip_ctx * ctx) {
     return ctx->vision_model.hparams.hidden_size;
 }
 
diff --git a/examples/llava/clip.h b/examples/llava/clip.h
@@ -47,9 +47,9 @@ CLIP_API void clip_free(struct clip_ctx * ctx);
 CLIP_API size_t clip_embd_nbytes(const struct clip_ctx * ctx);
 CLIP_API size_t clip_embd_nbytes_by_img(const struct clip_ctx * ctx, int img_h, int img_w);
 
-CLIP_API int32_t clip_image_size (const struct clip_ctx * ctx);
-CLIP_API int32_t clip_patch_size (const struct clip_ctx * ctx);
-CLIP_API int32_t clip_hidden_size(const struct clip_ctx * ctx);
+CLIP_API int32_t clip_get_image_size (const struct clip_ctx * ctx);
+CLIP_API int32_t clip_get_patch_size (const struct clip_ctx * ctx);
+CLIP_API int32_t clip_get_hidden_size(const struct clip_ctx * ctx);
 
 // TODO: should be enum, not string
 CLIP_API const char * clip_patch_merge_type(const struct clip_ctx * ctx);
diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp
@@ -105,8 +105,8 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
         struct ggml_context * ctx;
     } model;
 
-    const int32_t image_size = clip_image_size(ctx_clip);
-    const int32_t patch_size = clip_patch_size(ctx_clip);
+    const int32_t image_size = clip_get_image_size(ctx_clip);
+    const int32_t patch_size = clip_get_patch_size(ctx_clip);
 
     int32_t num_patches_per_side = image_size / patch_size; // 336 / 14 = 24 - used for embedding-patching boxes (24*24 = 576 patches)
 
@@ -353,7 +353,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
         img_res_v.size = 0;
         img_res_v.data = nullptr;
 
-        const int32_t image_size = clip_image_size(ctx_clip);
+        const int32_t image_size = clip_get_image_size(ctx_clip);
 
         struct clip_image_grid_shape grid_shape = get_anyres_image_grid_shape({img->nx,img->ny}, grid_pinpoints, image_size);
 
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -3702,8 +3702,8 @@ int main(int argc, char ** argv) {
                 ctx_server.receive_cmpl_results_stream(task_ids, [&](server_task_result_ptr & result) -> bool {
                     json res_json = result->to_json();
                     if (res_json.is_array()) {
-                        for (const auto & res : res_json) {
-                            if (!server_sent_event(sink, "data", res)) {
+                        for (const auto & item : res_json) {
+                            if (!server_sent_event(sink, "data", item)) {
                                 return false;
                             }
                         }
@@ -3973,9 +3973,9 @@ int main(int argc, char ** argv) {
             std::unordered_set<int> task_ids = server_task::get_list_id(tasks);
 
             ctx_server.receive_multi_results(task_ids, [&](std::vector<server_task_result_ptr> & results) {
-                for (auto & res : results) {
-                    GGML_ASSERT(dynamic_cast<server_task_result_embd*>(res.get()) != nullptr);
-                    responses.push_back(res->to_json());
+                for (auto & result : results) {
+                    GGML_ASSERT(dynamic_cast<server_task_result_embd*>(result.get()) != nullptr);
+                    responses.push_back(result->to_json());
                 }
             }, [&](const json & error_data) {
                 res_error(res, error_data);
@@ -4063,9 +4063,9 @@ int main(int argc, char ** argv) {
             std::unordered_set<int> task_ids = server_task::get_list_id(tasks);
 
             ctx_server.receive_multi_results(task_ids, [&](std::vector<server_task_result_ptr> & results) {
-                for (auto & res : results) {
-                    GGML_ASSERT(dynamic_cast<server_task_result_rerank*>(res.get()) != nullptr);
-                    responses.push_back(res->to_json());
+                for (auto & result : results) {
+                    GGML_ASSERT(dynamic_cast<server_task_result_rerank*>(result.get()) != nullptr);
+                    responses.push_back(result->to_json());
                 }
             }, [&](const json & error_data) {
                 res_error(res, error_data);
diff --git a/examples/simple-chat/simple-chat.cpp b/examples/simple-chat/simple-chat.cpp
@@ -110,9 +110,8 @@ int main(int argc, char ** argv) {
         llama_token new_token_id;
         while (true) {
             // check if we have enough space in the context to evaluate this batch
-            int n_ctx = llama_n_ctx(ctx);
             int n_ctx_used = llama_get_kv_cache_used_cells(ctx);
-            if (n_ctx_used + batch.n_tokens > n_ctx) {
+            if (n_ctx_used + batch.n_tokens > (int) llama_n_ctx(ctx)) {
                 printf("\033[0m\n");
                 fprintf(stderr, "context size exceeded\n");
                 exit(0);
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -311,9 +311,9 @@ static buft_list_t make_gpu_buft_list(ggml_backend_dev_t dev, enum llama_split_m
             ggml_backend_reg_get_proc_address(reg, "ggml_backend_split_buffer_type");
         if (ggml_backend_split_buffer_type_fn) {
             size_t dev_index = [&]() {
-                auto * reg = ggml_backend_dev_backend_reg(dev);
-                for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); ++i) {
-                    if (ggml_backend_reg_dev_get(reg, i) == dev) {
+                ggml_backend_reg_t reg_dev = ggml_backend_dev_backend_reg(dev);
+                for (size_t i = 0; i < ggml_backend_reg_dev_count(reg_dev); ++i) {
+                    if (ggml_backend_reg_dev_get(reg_dev, i) == dev) {
                         return i;
                     }
                 }
@@ -1304,7 +1304,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
     const int act_gpu_layers = devices.empty() ? 0 : std::min(n_gpu_layers, (int)n_layer + 1);
     auto get_layer_buft_list = [&](int il) -> llama_model::impl::layer_dev {
         if (il < i_gpu_start || (il - i_gpu_start) >= act_gpu_layers) {
-            return {cpu_dev, &pimpl->cpu_buft_list};
+            return { cpu_dev, &pimpl->cpu_buft_list };
         }
         const int layer_gpu = std::upper_bound(splits.begin(), splits.begin() + n_devices(), float(il - i_gpu_start)/act_gpu_layers) - splits.begin();
         auto * dev = devices.at(layer_gpu);
@@ -1453,7 +1453,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
             // avoid using a host buffer when using mmap
             auto * buft_dev = ggml_backend_buft_get_device(buft);
             if (ml.use_mmap && buft_dev && buft == ggml_backend_dev_host_buffer_type(buft_dev)) {
-                auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
                 buft = ggml_backend_dev_buffer_type(cpu_dev);
             }
 
@@ -3697,8 +3696,8 @@ ggml_backend_buffer_type_t llama_model::select_buft(int il) const {
 
 const struct ggml_tensor * llama_model::get_tensor(const char * name) const {
     auto it = std::find_if(tensors_by_name.begin(), tensors_by_name.end(),
-            [name](const std::pair<std::string, struct ggml_tensor *> & it) {
-                return it.first == name;
+            [name](const std::pair<std::string, struct ggml_tensor *> & entry) {
+                return entry.first == name;
             });
     if (it == tensors_by_name.end()) {
         return nullptr;
diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
@@ -130,17 +130,17 @@ static ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_t
         return i_layer < n_layers/8 || i_layer >= 7*n_layers/8 || (i_layer - n_layers/8)%3 == 2;
     };
     const int n_expert = std::max(1, (int)qs.model.hparams.n_expert);
-    auto layer_info = [n_expert] (int i_layer, int n_layer, const char * name) {
+    auto layer_info = [n_expert] (int i_layer, int n_layer, const char * name_layer) {
         if (n_expert > 1) {
             // Believe it or not, "experts" in the FFN of Mixtral-8x7B are not consecutive, but occasionally randomly
             // sprinkled in the model. Hence, simply dividing i_ffn_down by n_expert does not work
             // for getting the current layer as I initially thought, and we need to resort to parsing the
             // tensor name.
-            if (sscanf(name, "blk.%d.", &i_layer) != 1) {
-                throw std::runtime_error(format("Failed to determine layer for tensor %s", name));
+            if (sscanf(name_layer, "blk.%d.", &i_layer) != 1) {
+                throw std::runtime_error(format("Failed to determine layer for tensor %s", name_layer));
             }
             if (i_layer < 0 || i_layer >= n_layer) {
-                throw std::runtime_error(format("Bad layer %d for tensor %s. Must be in [0, %d)", i_layer, name, n_layer));
+                throw std::runtime_error(format("Bad layer %d for tensor %s. Must be in [0, %d)", i_layer, name_layer, n_layer));
             }
         }
         return std::make_pair(i_layer, n_layer);
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
@@ -2496,15 +2496,15 @@ int32_t llama_vocab::impl::token_to_piece(llama_token token, char * buf, int32_t
 
     // copy piece chars to output text buffer
     // skip up to 'lstrip' leading spaces before copying
-    auto _try_copy = [=] (const char * token, size_t size) -> int32_t {
-        for (int32_t i = 0; i < lstrip && size && *token == ' '; ++i) {
-            token++;
+    auto _try_copy = [=] (const char * text, size_t size) -> int32_t {
+        for (int32_t i = 0; i < lstrip && size && *text == ' '; ++i) {
+            text++;
             size--;
         }
         if (length < (int32_t)size) {
             return -(int32_t) size;
         }
-        memcpy(buf, token, size);
+        memcpy(buf, text, size);
         return (int32_t) size;
     };
 

Original file line number	Diff line number	Diff line change
`@@ -255,8 +255,8 @@ struct common_log {`
`255`	`255`	`thrd = std::thread([this]() {`
`256`	`256`	`while (true) {`
`257`	`257`	`{`
`258`		`- std::unique_lock<std::mutex> lock(mtx);`
`259`		`- cv.wait(lock, [this]() { return head != tail; });`
	`258`	`+ std::unique_lock<std::mutex> lock_thrd(mtx);`
	`259`	`+ cv.wait(lock_thrd, [this]() { return head != tail; });`
`260`	`260`
`261`	`261`	`cur = entries[head];`
`262`	`262`
Original file line number	Diff line number	Diff line change
`@@ -62,7 +62,7 @@ int main(int argc, char ** argv) {`
`62`	`62`	`llama_batch batch = llama_batch_init(n_kv_max, 0, 1);`
`63`	`63`
`64`	`64`	`// decode in batches of ctx_params.n_batch tokens`
`65`		`- auto decode_helper = [](llama_context * ctx, llama_batch & batch, int32_t n_batch) {`
	`65`	`+ auto decode_helper = [&ctx, &batch](int32_t n_batch) {`
`66`	`66`	`for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch) {`
`67`	`67`	`const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));`
`68`	`68`
`@@ -94,7 +94,7 @@ int main(int argc, char ** argv) {`
`94`	`94`	`common_batch_add(batch, 0, i, { 0 }, false);`
`95`	`95`	`}`
`96`	`96`
`97`		`- if (!decode_helper(ctx, batch, ctx_params.n_batch)) {`
	`97`	`+ if (!decode_helper(ctx_params.n_batch)) {`
`98`	`98`	`LOG_ERR("%s: llama_decode() failed\n", __func__);`
`99`	`99`	`return 1;`
`100`	`100`	`}`
`@@ -134,7 +134,7 @@ int main(int argc, char ** argv) {`
`134`	`134`
`135`	`135`	`llama_kv_cache_clear(ctx);`
`136`	`136`
`137`		`- if (!decode_helper(ctx, batch, ctx_params.n_batch)) {`
	`137`	`+ if (!decode_helper(ctx_params.n_batch)) {`
`138`	`138`	`LOG_ERR("%s: llama_decode() failed\n", __func__);`
`139`	`139`	`return 1;`
`140`	`140`	`}`
`@@ -156,7 +156,7 @@ int main(int argc, char ** argv) {`
`156`	`156`	`common_batch_add(batch, 0, pp + i, { j }, true);`
`157`	`157`	`}`
`158`	`158`
`159`		`- if (!decode_helper(ctx, batch, ctx_params.n_batch)) {`
	`159`	`+ if (!decode_helper(ctx_params.n_batch)) {`
`160`	`160`	`LOG_ERR("%s: llama_decode() failed\n", __func__);`
`161`	`161`	`return 1;`
`162`	`162`	`}`
Original file line number	Diff line number	Diff line change
`@@ -2082,7 +2082,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli`
`2082`	`2082`	`}`
`2083`	`2083`	`else if (ctx->has_qwen2vl_merger) {`
`2084`	`2084`	`clip_image_u8 * resized = clip_image_u8_init();`
`2085`		`- auto patch_size = clip_patch_size(ctx) * 2;`
	`2085`	`+ auto patch_size = clip_get_patch_size(ctx) * 2;`
`2086`	`2086`	`int nx = ceil((float)img->nx / patch_size) * patch_size;`
`2087`	`2087`	`int ny = ceil((float)img->ny / patch_size) * patch_size;`
`2088`	`2088`	`bicubic_resize(img, resized, nx, ny);`
`@@ -2293,15 +2293,15 @@ size_t clip_embd_nbytes_by_img(const struct clip_ctx * ctx, int img_h, int img_w`
`2293`	`2293`	`return clip_n_patches_by_img(ctx, &img) * clip_n_mmproj_embd(ctx) * sizeof(float);`
`2294`	`2294`	`}`
`2295`	`2295`
`2296`		`-int32_t clip_image_size(const struct clip_ctx * ctx) {`
	`2296`	`+int32_t clip_get_image_size(const struct clip_ctx * ctx) {`
`2297`	`2297`	`return ctx->vision_model.hparams.image_size;`
`2298`	`2298`	`}`
`2299`	`2299`
`2300`		`-int32_t clip_patch_size(const struct clip_ctx * ctx) {`
	`2300`	`+int32_t clip_get_patch_size(const struct clip_ctx * ctx) {`
`2301`	`2301`	`return ctx->vision_model.hparams.patch_size;`
`2302`	`2302`	`}`
`2303`	`2303`
`2304`		`-int32_t clip_hidden_size(const struct clip_ctx * ctx) {`
	`2304`	`+int32_t clip_get_hidden_size(const struct clip_ctx * ctx) {`
`2305`	`2305`	`return ctx->vision_model.hparams.hidden_size;`
`2306`	`2306`	`}`
`2307`	`2307`