diff --git a/common/console.cpp b/common/console.cpp index 078a8d678d933..2f633626ed794 100644 --- a/common/console.cpp +++ b/common/console.cpp @@ -295,12 +295,19 @@ namespace console { return expectedWidth; } + if (x2 < 0 || x1 < 0 || x2 > 10000 || x1 > 10000) { + return expectedWidth; + } + int width = x2 - x1; if (width < 0) { - // Calculate the width considering text wrapping struct winsize w; ioctl(STDOUT_FILENO, TIOCGWINSZ, &w); - width += w.ws_col; + if (w.ws_col > 0 && w.ws_col < 10000) { + width += w.ws_col; + } else { + return expectedWidth; + } } return width; #endif @@ -398,7 +405,9 @@ namespace console { do { count = widths.back(); widths.pop_back(); - // Move cursor back, print space, and move cursor back again + if (count < 0 || count > 10000) { + count = 0; + } for (int i = 0; i < count; i++) { replace_last(' '); pop_cursor(); @@ -409,11 +418,14 @@ namespace console { } else { int offset = line.length(); append_utf8(input_char, line); - int width = put_codepoint(line.c_str() + offset, line.length() - offset, estimateWidth(input_char)); - if (width < 0) { - width = 0; + size_t current_len = line.length(); + if (current_len >= (size_t)offset && current_len < 100000) { + int width = put_codepoint(line.c_str() + offset, current_len - offset, estimateWidth(input_char)); + if (width < 0) { + width = 0; + } + widths.push_back(width); } - widths.push_back(width); } if (!line.empty() && (line.back() == '\\' || line.back() == '/')) { diff --git a/convert_lora_to_gguf.py b/convert_lora_to_gguf.py index a67c0536a4128..abb774544fa45 100755 --- a/convert_lora_to_gguf.py +++ b/convert_lora_to_gguf.py @@ -317,6 +317,9 @@ def load_hparams_from_hf(hf_model_id: str) -> dict[str, Any]: lora_model = torch.load(input_model, map_location="cpu", weights_only=True) # load LoRA config + if not os.path.isfile(lora_config) or not lora_config.endswith(('.json', '.JSON')): + logger.error(f"Invalid LoRA config file: {lora_config}") + sys.exit(1) with open(lora_config, "r") as f: lparams: dict[str, Any] = json.load(f) diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp index bdf0eed2a9cd3..c0dd93772aea5 100644 --- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp @@ -167,7 +167,12 @@ static int checkpoint_init_weights(TransformerWeights * w, const Config * p, FIL // Skip freq_cis_real & freq_cis_imag int head_size = p->dim / p->n_heads; - fseek(f, p->seq_len * head_size * sizeof(float), SEEK_CUR); + if (head_size < 0 || head_size > 10000 || p->seq_len < 0 || p->seq_len > 100000) { + LOG_ERR("%s: Invalid head_size or seq_len\n", __func__); + return 1; + } + long skip_size = (long)p->seq_len * (long)head_size * sizeof(float); + fseek(f, skip_size, SEEK_CUR); if (!shared_weights && fread(w->wcls.data(), sizeof(float), w->wcls.size(), f) != w->wcls.size()) return 1; @@ -885,6 +890,10 @@ int main(int argc, char ** argv) { TransformerWeights weights = {}; { LOG_INF("%s: Loading llama2c model from %s\n", __func__, params.fn_llama2c_model); + if (!params.fn_llama2c_model || strlen(params.fn_llama2c_model) == 0) { + LOG_ERR("%s: Invalid model file path\n", __func__); + return 1; + } FILE * file = fopen(params.fn_llama2c_model, "rb"); if (!file) { LOG_ERR("%s: Unable to open the checkpoint file %s!\n", __func__, params.fn_llama2c_model); diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index c4ec5c524e9b1..c4b25112d80ab 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -679,6 +679,8 @@ def must_read(fp: IO[bytes], length: int) -> bytes: @functools.lru_cache(maxsize=None) def lazy_load_file(path: Path) -> ModelPlus: + if not os.path.isfile(path): + raise ValueError(f"File does not exist: {path}") fp = open(path, 'rb') first8 = fp.read(8) fp.seek(0) diff --git a/examples/gguf-hash/gguf-hash.cpp b/examples/gguf-hash/gguf-hash.cpp index 9523ec122f573..91a7218b86eed 100644 --- a/examples/gguf-hash/gguf-hash.cpp +++ b/examples/gguf-hash/gguf-hash.cpp @@ -206,8 +206,8 @@ static bool manifest_type(const std::string & manifest_file, manifest_check_para return false; } - std::ifstream file(manifest_file); - if (!file.is_open()) { + std::ifstream file(manifest_file, std::ios::binary); + if (!file.is_open() || !file.good()) { return false; } @@ -238,8 +238,8 @@ static hash_manifest_result_t manifest_verify(const std::string& manifest_file, return HASH_MANIFEST_NOT_FOUND; } - std::ifstream file(manifest_file); - if (!file.is_open()) { + std::ifstream file(manifest_file, std::ios::binary); + if (!file.is_open() || !file.good()) { return HASH_MANIFEST_NOT_FOUND; } diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index ed379585546c2..aaf4783d711ad 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -6,6 +6,7 @@ import json import re import sys +import os from typing import Any, List, Optional, Set, Tuple, Union def _build_repetition(item_rule, min_items, max_items, separator_rule=None): @@ -792,12 +793,27 @@ def main(args_in = None): if args.schema.startswith('https://'): url = args.schema import requests + import urllib.parse + + parsed = urllib.parse.urlparse(url) + if parsed.hostname in ['localhost', '127.0.0.1', '0.0.0.0']: + raise ValueError(f"Invalid URL: localhost not allowed") + if (parsed.hostname and (parsed.hostname.startswith('10.') or + parsed.hostname.startswith('192.168.') or + parsed.hostname.startswith('169.254.') or + any(parsed.hostname.startswith(f'172.{i}.') for i in range(16, 32)))): + raise ValueError(f"Invalid URL: private IP ranges not allowed") + schema = requests.get(url).json() elif args.schema == '-': url = 'stdin' schema = json.load(sys.stdin) else: url = f'file://{args.schema}' + if not os.path.isfile(args.schema) or not args.schema.endswith((".json", ".JSON")): + + raise ValueError(f"Invalid schema file: {args.schema}") + with open(args.schema) as f: schema = json.load(f) converter = SchemaConverter( diff --git a/examples/llama.android/llama/src/main/cpp/llama-android.cpp b/examples/llama.android/llama/src/main/cpp/llama-android.cpp index 711ddc5d19587..5d00d61487b16 100644 --- a/examples/llama.android/llama/src/main/cpp/llama-android.cpp +++ b/examples/llama.android/llama/src/main/cpp/llama-android.cpp @@ -286,17 +286,76 @@ Java_android_llama_cpp_LLamaAndroid_new_1batch(JNIEnv *, jobject, jint n_tokens, }; if (embd) { + if (n_tokens > 0 && embd > 0 && (size_t)n_tokens > SIZE_MAX / sizeof(float) / (size_t)embd) { + LOGe("integer overflow in embd allocation"); + delete batch; + return 0; + } batch->embd = (float *) malloc(sizeof(float) * n_tokens * embd); } else { + if (n_tokens > 0 && (size_t)n_tokens > SIZE_MAX / sizeof(llama_token)) { + LOGe("integer overflow in token allocation"); + delete batch; + return 0; + } batch->token = (llama_token *) malloc(sizeof(llama_token) * n_tokens); } + if (n_tokens > 0 && (size_t)n_tokens > SIZE_MAX / sizeof(llama_pos)) { + LOGe("integer overflow in pos allocation"); + if (embd) free(batch->embd); else free(batch->token); + delete batch; + return 0; + } batch->pos = (llama_pos *) malloc(sizeof(llama_pos) * n_tokens); + + if (n_tokens > 0 && (size_t)n_tokens > SIZE_MAX / sizeof(int32_t)) { + LOGe("integer overflow in n_seq_id allocation"); + free(batch->pos); + if (embd) free(batch->embd); else free(batch->token); + delete batch; + return 0; + } batch->n_seq_id = (int32_t *) malloc(sizeof(int32_t) * n_tokens); + + if (n_tokens > 0 && (size_t)n_tokens > SIZE_MAX / sizeof(llama_seq_id *)) { + LOGe("integer overflow in seq_id allocation"); + free(batch->n_seq_id); + free(batch->pos); + if (embd) free(batch->embd); else free(batch->token); + delete batch; + return 0; + } batch->seq_id = (llama_seq_id **) malloc(sizeof(llama_seq_id *) * n_tokens); + for (int i = 0; i < n_tokens; ++i) { + if (n_seq_max > 0 && (size_t)n_seq_max > SIZE_MAX / sizeof(llama_seq_id)) { + LOGe("integer overflow in seq_id[%d] allocation", i); + for (int j = 0; j < i; ++j) { + free(batch->seq_id[j]); + } + free(batch->seq_id); + free(batch->n_seq_id); + free(batch->pos); + if (embd) free(batch->embd); else free(batch->token); + delete batch; + return 0; + } batch->seq_id[i] = (llama_seq_id *) malloc(sizeof(llama_seq_id) * n_seq_max); } + + if (n_tokens > 0 && (size_t)n_tokens > SIZE_MAX / sizeof(int8_t)) { + LOGe("integer overflow in logits allocation"); + for (int i = 0; i < n_tokens; ++i) { + free(batch->seq_id[i]); + } + free(batch->seq_id); + free(batch->n_seq_id); + free(batch->pos); + if (embd) free(batch->embd); else free(batch->token); + delete batch; + return 0; + } batch->logits = (int8_t *) malloc(sizeof(int8_t) * n_tokens); return reinterpret_cast(batch); diff --git a/examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift b/examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift index 4584d6eaa3d32..59b53001c63a8 100644 --- a/examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift +++ b/examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift @@ -48,6 +48,11 @@ struct DownloadButton: View { do { if let temporaryURL = temporaryURL { + let tempDir = FileManager.default.temporaryDirectory + guard temporaryURL.path.hasPrefix(tempDir.path) else { + print("Error: Invalid temporary file path") + return + } try FileManager.default.copyItem(at: temporaryURL, to: fileURL) print("Writing to \(filename) completed") diff --git a/examples/llama.swiftui/llama.swiftui/UI/InputButton.swift b/examples/llama.swiftui/llama.swiftui/UI/InputButton.swift index c5ffbad4ec331..52b223e61c822 100644 --- a/examples/llama.swiftui/llama.swiftui/UI/InputButton.swift +++ b/examples/llama.swiftui/llama.swiftui/UI/InputButton.swift @@ -52,6 +52,11 @@ struct InputButton: View { do { if let temporaryURL = temporaryURL { + let tempDir = FileManager.default.temporaryDirectory + guard temporaryURL.path.hasPrefix(tempDir.path) else { + print("Error: Invalid temporary file path") + return + } try FileManager.default.copyItem(at: temporaryURL, to: fileURL) print("Writing to \(filename) completed") diff --git a/examples/model-conversion/logits.cpp b/examples/model-conversion/logits.cpp index ddc5e9005f9e0..0bd8ae3f386b3 100644 --- a/examples/model-conversion/logits.cpp +++ b/examples/model-conversion/logits.cpp @@ -161,9 +161,12 @@ int main(int argc, char ** argv) { std::filesystem::create_directory("data"); - // Save logits to binary file char bin_filename[512]; snprintf(bin_filename, sizeof(bin_filename), "data/llamacpp-%s%s.bin", model_name, type); + if (strlen(bin_filename) == 0) { + fprintf(stderr, "%s: error: invalid binary output filename\n", __func__); + return 1; + } printf("Saving logits to %s\n", bin_filename); FILE * f = fopen(bin_filename, "wb"); @@ -174,9 +177,12 @@ int main(int argc, char ** argv) { fwrite(logits, sizeof(float), n_logits, f); fclose(f); - // Also save as text for debugging char txt_filename[512]; snprintf(txt_filename, sizeof(txt_filename), "data/llamacpp-%s%s.txt", model_name, type); + if (strlen(txt_filename) == 0) { + fprintf(stderr, "%s: error: invalid text output filename\n", __func__); + return 1; + } f = fopen(txt_filename, "w"); if (f == NULL) { fprintf(stderr, "%s: error: failed to open text output file\n", __func__); diff --git a/examples/model-conversion/scripts/utils/inspect-org-model.py b/examples/model-conversion/scripts/utils/inspect-org-model.py index ea14947fd2ef8..1ae6a617c1e15 100755 --- a/examples/model-conversion/scripts/utils/inspect-org-model.py +++ b/examples/model-conversion/scripts/utils/inspect-org-model.py @@ -22,6 +22,9 @@ # Multi-file model print("Multi-file model detected") + if not os.path.isfile(index_path) or not index_path.endswith('.json'): + print(f"Error: Invalid index file: {index_path}") + exit(1) with open(index_path, 'r') as f: index_data = json.load(f) diff --git a/examples/pydantic_models_to_grammar_examples.py b/examples/pydantic_models_to_grammar_examples.py index 6dadb7f3fa48d..fa30774d52db1 100755 --- a/examples/pydantic_models_to_grammar_examples.py +++ b/examples/pydantic_models_to_grammar_examples.py @@ -25,6 +25,17 @@ def create_completion(host, prompt, gbnf_grammar): See https://github.com/ggml-org/llama.cpp/tree/HEAD/tools/server#api-endpoints """ + import urllib.parse + + blocked_hosts = ['localhost', '127.0.0.1', '0.0.0.0'] + if host in blocked_hosts: + raise ValueError(f"Invalid host: localhost not allowed") + if (host.startswith('10.') or + host.startswith('192.168.') or + host.startswith('169.254.') or + any(host.startswith(f'172.{i}.') for i in range(16, 32))): + raise ValueError(f"Invalid host: private IP ranges not allowed") + print(f" Request:\n Grammar:\n{textwrap.indent(gbnf_grammar, ' ')}\n Prompt:\n{textwrap.indent(prompt.rstrip(), ' ')}") headers = {"Content-Type": "application/json"} data = {"prompt": prompt, "grammar": gbnf_grammar} diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp index 1065ec6bb005a..89fab704b5db8 100644 --- a/examples/save-load-state/save-load-state.cpp +++ b/examples/save-load-state/save-load-state.cpp @@ -70,6 +70,10 @@ int main(int argc, char ** argv) { const size_t written = llama_state_get_data(ctx, state_mem.data(), state_mem.size()); FILE *fp_write = fopen("dump_state.bin", "wb"); + if (fp_write == nullptr) { + fprintf(stderr, "%s : failed to open dump_state.bin for writing\n", __func__); + return 1; + } fwrite(state_mem.data(), 1, written, fp_write); fclose(fp_write); @@ -116,6 +120,10 @@ int main(int argc, char ** argv) { std::vector state_mem; FILE * fp_read = fopen("dump_state.bin", "rb"); + if (fp_read == nullptr) { + fprintf(stderr, "\n%s : failed to open dump_state.bin for reading\n", __func__); + return 1; + } fseek(fp_read, 0, SEEK_END); state_mem.resize(ftell(fp_read)); fseek(fp_read, 0, SEEK_SET); @@ -173,6 +181,10 @@ int main(int argc, char ** argv) { std::vector state_mem; FILE * fp_read = fopen("dump_state.bin", "rb"); + if (fp_read == nullptr) { + fprintf(stderr, "\n%s : failed to open dump_state.bin for reading\n", __func__); + return 1; + } fseek(fp_read, 0, SEEK_END); state_mem.resize(ftell(fp_read)); fseek(fp_read, 0, SEEK_SET); diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c index 8b6e6028361d0..5eee3faa2e709 100644 --- a/ggml/src/ggml-alloc.c +++ b/ggml/src/ggml-alloc.c @@ -364,15 +364,34 @@ struct ggml_gallocr { }; ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs) { + if (n_bufs < 0) { + return NULL; + } + ggml_gallocr_t galloc = (ggml_gallocr_t)calloc(1, sizeof(struct ggml_gallocr)); GGML_ASSERT(galloc != NULL); + if (n_bufs > 0 && (size_t)n_bufs > SIZE_MAX / sizeof(ggml_backend_buffer_type_t)) { + free(galloc); + return NULL; + } galloc->bufts = calloc(n_bufs, sizeof(ggml_backend_buffer_type_t)); GGML_ASSERT(galloc->bufts != NULL); + if (n_bufs > 0 && (size_t)n_bufs > SIZE_MAX / sizeof(ggml_backend_buffer_t)) { + free(galloc->bufts); + free(galloc); + return NULL; + } galloc->buffers = calloc(n_bufs, sizeof(ggml_backend_buffer_t)); GGML_ASSERT(galloc->buffers != NULL); + if (n_bufs > 0 && (size_t)n_bufs > SIZE_MAX / sizeof(struct ggml_dyn_tallocr *)) { + free(galloc->buffers); + free(galloc->bufts); + free(galloc); + return NULL; + } galloc->buf_tallocs = calloc(n_bufs, sizeof(struct ggml_dyn_tallocr *)); GGML_ASSERT(galloc->buf_tallocs != NULL); @@ -668,6 +687,9 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c GGML_ASSERT(galloc->hash_set.keys != NULL); free(galloc->hash_values); + if (galloc->hash_set.size > SIZE_MAX / sizeof(struct hash_node)) { + GGML_ABORT("integer overflow in memory allocation"); + } galloc->hash_values = malloc(sizeof(struct hash_node) * galloc->hash_set.size); GGML_ASSERT(galloc->hash_values != NULL); } diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index f615ab4beecb1..f5ac7cee0518e 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -604,6 +604,12 @@ static const struct ggml_backend_buffer_i ggml_backend_multi_buffer_i = { ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers) { ggml_backend_multi_buffer_context * ctx = (ggml_backend_multi_buffer_context *) malloc(sizeof(struct ggml_backend_multi_buffer_context)); ctx->n_buffers = n_buffers; + + if (n_buffers > 0 && n_buffers > SIZE_MAX / sizeof(ggml_backend_buffer_t)) { + GGML_LOG_ERROR("%s: integer overflow in buffers allocation\n", __func__); + free(ctx); + return NULL; + } ctx->buffers = (ggml_backend_buffer_t *) malloc(n_buffers * sizeof(ggml_backend_buffer_t)); GGML_ASSERT(ctx->buffers != NULL); @@ -1194,6 +1200,11 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra i_split++; if (i_split >= sched->splits_capacity) { sched->splits_capacity *= 2; + + if (sched->splits_capacity > SIZE_MAX / sizeof(struct ggml_backend_sched_split)) { + GGML_LOG_ERROR("%s: integer overflow in splits reallocation\n", __func__); + return; + } sched->splits = (ggml_backend_sched_split *) realloc(sched->splits, sched->splits_capacity * sizeof(struct ggml_backend_sched_split)); GGML_ASSERT(sched->splits != NULL); @@ -1284,6 +1295,11 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra int graph_size = std::max(graph->n_nodes, graph->n_leafs) + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sched->n_copies; if (sched->graph.size < graph_size) { sched->graph.size = graph_size; + + if (graph_size > 0 && (size_t)graph_size > SIZE_MAX / sizeof(struct ggml_tensor *)) { + GGML_LOG_ERROR("%s: integer overflow in graph nodes/leafs reallocation\n", __func__); + return; + } sched->graph.nodes = (ggml_tensor **) realloc(sched->graph.nodes, graph_size * sizeof(struct ggml_tensor *)); sched->graph.leafs = (ggml_tensor **) realloc(sched->graph.leafs, graph_size * sizeof(struct ggml_tensor *)); GGML_ASSERT(sched->graph.nodes != NULL); @@ -1609,8 +1625,25 @@ ggml_backend_sched_t ggml_backend_sched_new( // initialize hash table // FIXME: needs to be size*2 to account for leafs (do it in graph_split instead) sched->hash_set = ggml_hash_set_new(graph_size); + + if (sched->hash_set.size > SIZE_MAX / sizeof(sched->hv_tensor_backend_ids[0])) { + GGML_ABORT("integer overflow in memory allocation"); + } sched->hv_tensor_backend_ids = (int *) malloc(sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0])); - sched->hv_tensor_copies = (ggml_tensor **) malloc(sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *)); + + size_t tensor_copies_size = sched->hash_set.size; + if (tensor_copies_size > SIZE_MAX / sched->n_backends) { + GGML_ABORT("integer overflow in memory allocation"); + } + tensor_copies_size *= sched->n_backends; + if (tensor_copies_size > SIZE_MAX / sched->n_copies) { + GGML_ABORT("integer overflow in memory allocation"); + } + tensor_copies_size *= sched->n_copies; + if (tensor_copies_size > SIZE_MAX / sizeof(struct ggml_tensor *)) { + GGML_ABORT("integer overflow in memory allocation"); + } + sched->hv_tensor_copies = (ggml_tensor **) malloc(tensor_copies_size * sizeof(struct ggml_tensor *)); const size_t ggml_sched_max_splits = graph_size; // at most there is one split for each node in the graph const size_t nodes_size = graph_size + ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2; @@ -1899,7 +1932,30 @@ static void graph_copy_init_tensor(struct ggml_hash_set * hash_set, struct ggml_ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph) { GGML_ASSERT(graph); struct ggml_hash_set hash_set = ggml_hash_set_new(graph->visited_hash_set.size); + + if (hash_set.size > SIZE_MAX / sizeof(node_copies[0])) { + GGML_LOG_ERROR("%s: integer overflow in node_copies allocation\n", __func__); + ggml_hash_set_free(&hash_set); + return { + /* .buffer = */ NULL, + /* .ctx_allocated = */ NULL, + /* .ctx_unallocated = */ NULL, + /* .graph = */ NULL, + }; + } struct ggml_tensor ** node_copies = (ggml_tensor **) calloc(hash_set.size, sizeof(node_copies[0])); // NOLINT + + if (hash_set.size > SIZE_MAX / sizeof(node_init[0])) { + GGML_LOG_ERROR("%s: integer overflow in node_init allocation\n", __func__); + ggml_hash_set_free(&hash_set); + free(node_copies); + return { + /* .buffer = */ NULL, + /* .ctx_allocated = */ NULL, + /* .ctx_unallocated = */ NULL, + /* .graph = */ NULL, + }; + } bool * node_init = (bool *) calloc(hash_set.size, sizeof(node_init[0])); struct ggml_init_params params = { diff --git a/ggml/src/ggml-opencl/kernels/embed_kernel.py b/ggml/src/ggml-opencl/kernels/embed_kernel.py index b5d1d7242b624..a7aec1cdf4dfb 100644 --- a/ggml/src/ggml-opencl/kernels/embed_kernel.py +++ b/ggml/src/ggml-opencl/kernels/embed_kernel.py @@ -1,6 +1,7 @@ # import sys +import os import logging logger = logging.getLogger("opencl-embed-kernel") @@ -12,8 +13,15 @@ def main(): logger.info("Usage: python embed_kernel.py ") sys.exit(1) - ifile = open(sys.argv[1], "r") - ofile = open(sys.argv[2], "w") + input_file = sys.argv[1] + output_file = sys.argv[2] + + if not os.path.isfile(input_file): + logger.error(f"Input file does not exist: {input_file}") + sys.exit(1) + + ifile = open(input_file, "r") + ofile = open(output_file, "w") for i in ifile: ofile.write('R"({})"\n'.format(i)) diff --git a/ggml/src/ggml-opt.cpp b/ggml/src/ggml-opt.cpp index e078ad14a39c4..39d0598885972 100644 --- a/ggml/src/ggml-opt.cpp +++ b/ggml/src/ggml-opt.cpp @@ -277,7 +277,8 @@ static ggml_tensor * map_tensor(std::map & tensor_ } new_tensor->flags = tensor->flags; memcpy(new_tensor->op_params, tensor->op_params, sizeof(tensor->op_params)); - strcpy(new_tensor->name, tensor->name); + strncpy(new_tensor->name, tensor->name, GGML_MAX_NAME - 1); + new_tensor->name[GGML_MAX_NAME - 1] = '\0'; new_tensor->data = tensor->data; new_tensor->buffer = tensor->buffer; new_tensor->extra = tensor->extra; diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c index 727932123e41b..ce7e6df58076b 100644 --- a/ggml/src/ggml-quants.c +++ b/ggml/src/ggml-quants.c @@ -2903,6 +2903,10 @@ void iq2xs_init_impl(enum ggml_type type) { uint16_t * kneighbors_q2xs; //printf("================================================================= %s(grid_size = %d)\n", __func__, grid_size); + if (grid_size > 0 && (size_t)grid_size > SIZE_MAX / sizeof(uint64_t)) { + fprintf(stderr, "%s: integer overflow in the_grid allocation\n", __func__); + return; + } uint64_t * the_grid = (uint64_t *)malloc(grid_size*sizeof(uint64_t)); for (int k = 0; k < grid_size; ++k) { int8_t * pos = (int8_t *)(the_grid + k); @@ -2913,6 +2917,12 @@ void iq2xs_init_impl(enum ggml_type type) { } kgrid_q2xs = the_grid; iq2_data[gindex].grid = the_grid; + + if (kmap_size > 0 && (size_t)kmap_size > SIZE_MAX / sizeof(int)) { + fprintf(stderr, "%s: integer overflow in kmap_q2xs allocation\n", __func__); + free(the_grid); + return; + } kmap_q2xs = (int *)malloc(kmap_size*sizeof(int)); iq2_data[gindex].map = kmap_q2xs; for (int i = 0; i < kmap_size; ++i) kmap_q2xs[i] = -1; @@ -2928,6 +2938,13 @@ void iq2xs_init_impl(enum ggml_type type) { kmap_q2xs[index] = i; } int8_t pos[8]; + + if (grid_size > 0 && (size_t)grid_size > SIZE_MAX / (2 * sizeof(int))) { + fprintf(stderr, "%s: integer overflow in dist2 allocation\n", __func__); + free(kmap_q2xs); + free(the_grid); + return; + } int * dist2 = (int *)malloc(2*grid_size*sizeof(int)); int num_neighbors = 0, num_not_in_map = 0; for (int i = 0; i < kmap_size; ++i) { @@ -3497,6 +3514,10 @@ void iq3xs_init_impl(int grid_size) { uint16_t * kneighbors_q3xs; //printf("================================================================= %s(grid_size = %d)\n", __func__, grid_size); + if (grid_size > 0 && (size_t)grid_size > SIZE_MAX / sizeof(uint32_t)) { + fprintf(stderr, "%s: integer overflow in the_grid allocation\n", __func__); + return; + } uint32_t * the_grid = (uint32_t *)malloc(grid_size*sizeof(uint32_t)); for (int k = 0; k < grid_size; ++k) { int8_t * pos = (int8_t *)(the_grid + k); @@ -3507,6 +3528,12 @@ void iq3xs_init_impl(int grid_size) { } kgrid_q3xs = the_grid; iq3_data[gindex].grid = the_grid; + + if (kmap_size > 0 && (size_t)kmap_size > SIZE_MAX / sizeof(int)) { + fprintf(stderr, "%s: integer overflow in kmap_q3xs allocation\n", __func__); + free(the_grid); + return; + } kmap_q3xs = (int *)malloc(kmap_size*sizeof(int)); iq3_data[gindex].map = kmap_q3xs; for (int i = 0; i < kmap_size; ++i) kmap_q3xs[i] = -1; @@ -3522,6 +3549,13 @@ void iq3xs_init_impl(int grid_size) { kmap_q3xs[index] = i; } int8_t pos[4]; + + if (grid_size > 0 && (size_t)grid_size > SIZE_MAX / (2 * sizeof(int))) { + fprintf(stderr, "%s: integer overflow in dist2 allocation\n", __func__); + free(kmap_q3xs); + free(the_grid); + return; + } int * dist2 = (int *)malloc(2*grid_size*sizeof(int)); int num_neighbors = 0, num_not_in_map = 0; for (int i = 0; i < kmap_size; ++i) { diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 7189fc1cfa057..8fcfe0b6fce9f 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -9385,8 +9385,19 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t vk_buffer d_Y = ggml_vk_create_buffer_check(ctx->device, sizeof(Y_TYPE) * y_ne, {vk::MemoryPropertyFlagBits::eDeviceLocal}); vk_buffer d_D = ggml_vk_create_buffer_check(ctx->device, sizeof(float) * d_ne, {vk::MemoryPropertyFlagBits::eDeviceLocal}); + if (x_ne > SIZE_MAX / sizeof(X_TYPE)) { + GGML_ABORT("integer overflow in memory allocation"); + } X_TYPE* x = (X_TYPE *) malloc(sizeof(X_TYPE) * x_ne); + + if (y_ne > SIZE_MAX / sizeof(Y_TYPE)) { + GGML_ABORT("integer overflow in memory allocation"); + } Y_TYPE* y = (Y_TYPE *) malloc(sizeof(Y_TYPE) * y_ne); + + if (d_ne > SIZE_MAX / sizeof(float)) { + GGML_ABORT("integer overflow in memory allocation"); + } float* d = (float *) malloc(sizeof(float) * d_ne); for (size_t i = 0; i < x_ne; i++) { diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp index 1263a70e4f757..d8e70f3ef2de2 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp @@ -752,7 +752,16 @@ void process_shaders() { void write_output_files() { FILE* hdr = fopen(target_hpp.c_str(), "w"); + if (hdr == nullptr) { + std::cerr << "Error opening header file: " << target_hpp << " (" << strerror(errno) << ")\n"; + return; + } FILE* src = fopen(target_cpp.c_str(), "w"); + if (src == nullptr) { + std::cerr << "Error opening source file: " << target_cpp << " (" << strerror(errno) << ")\n"; + fclose(hdr); + return; + } fprintf(hdr, "#include \n\n"); fprintf(src, "#include \"%s\"\n\n", basename(target_hpp).c_str()); diff --git a/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py b/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py index cc8def7f13ea4..2cee6e887cc9a 100755 --- a/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +++ b/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py @@ -29,13 +29,19 @@ def replace_placeholders(shader_text, replacements): def write_shader(shader_name, shader_code, output_dir, outfile): if output_dir: + if not os.path.isdir(output_dir): + raise ValueError(f"Invalid output directory: {output_dir}") wgsl_filename = os.path.join(output_dir, f"{shader_name}.wgsl") + if not wgsl_filename.startswith(os.path.abspath(output_dir)): + raise ValueError(f"Path traversal detected: {wgsl_filename}") with open(wgsl_filename, "w", encoding="utf-8") as f_out: f_out.write(shader_code) outfile.write(f'const char* wgsl_{shader_name} = R"({shader_code})";\n\n') def generate_variants(shader_path, output_dir, outfile): + if not os.path.isfile(shader_path) or not shader_path.endswith('.wgsl'): + raise ValueError(f"Invalid shader file: {shader_path}") shader_base_name = shader_path.split("/")[-1].split(".")[0] with open(shader_path, "r", encoding="utf-8") as f: @@ -71,14 +77,22 @@ def main(): parser.add_argument("--output_dir") args = parser.parse_args() + if not os.path.isdir(args.input_dir): + raise ValueError(f"Invalid input directory: {args.input_dir}") + if args.output_dir: os.makedirs(args.output_dir, exist_ok=True) - with open(args.output_file, "w", encoding="utf-8") as out: + output_file_path = os.path.abspath(args.output_file) + with open(output_file_path, "w", encoding="utf-8") as out: out.write("// Auto-generated shader embedding\n\n") + input_dir_abs = os.path.abspath(args.input_dir) for fname in sorted(os.listdir(args.input_dir)): if fname.endswith(".wgsl"): - generate_variants(os.path.join(args.input_dir, fname), args.output_dir, out) + shader_path = os.path.join(input_dir_abs, fname) + if not shader_path.startswith(input_dir_abs): + continue + generate_variants(shader_path, args.output_dir, out) if __name__ == "__main__": diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index d76ea58f789e2..472db8156f9a4 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -137,6 +137,9 @@ void ggml_print_backtrace(void) { } #if defined(__linux__) FILE * f = fopen("/proc/self/status", "r"); + if (f == NULL) { + return; + } size_t size = 0; char * line = NULL; ssize_t length = 0; @@ -256,7 +259,13 @@ static void ggml_log_internal_v(enum ggml_log_level level, const char * format, if (len < 128) { g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data); } else { + if (len < 0 || len >= INT_MAX) { + return; // Invalid length from vsnprintf + } char * buffer2 = (char *) calloc(len + 1, sizeof(char)); + if (!buffer2) { + return; // Allocation failed + } vsnprintf(buffer2, len + 1, format, args_copy); buffer2[len] = 0; g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data); @@ -6383,6 +6392,10 @@ void ggml_build_backward_expand( memset(cgraph->grads, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *)); memset(cgraph->grad_accs, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *)); + + if (cgraph->visited_hash_set.size > SIZE_MAX / sizeof(bool)) { + GGML_ABORT("integer overflow in memory allocation"); + } bool * grads_needed = calloc(cgraph->visited_hash_set.size, sizeof(bool)); { diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp index af57a88c60882..4a4f363e3b4e8 100644 --- a/ggml/src/gguf.cpp +++ b/ggml/src/gguf.cpp @@ -649,6 +649,11 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par // the ggml_tensor structs to the appropriate locations in the binary blob // compute the exact size needed for the new ggml_context + if (n_tensors > SIZE_MAX / ggml_tensor_overhead()) { + GGML_LOG_ERROR("%s: n_tensors too large for memory allocation\n", __func__); + gguf_free(ctx); + return nullptr; + } const size_t mem_size = params.no_alloc ? (n_tensors )*ggml_tensor_overhead() : diff --git a/gguf-py/gguf/scripts/gguf_new_metadata.py b/gguf-py/gguf/scripts/gguf_new_metadata.py index 2fa5800cf7485..a7574fde55403 100755 --- a/gguf-py/gguf/scripts/gguf_new_metadata.py +++ b/gguf-py/gguf/scripts/gguf_new_metadata.py @@ -135,6 +135,9 @@ def main() -> None: new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, json.loads(args.chat_template) if args.chat_template.startswith('[') else args.chat_template) if args.chat_template_config: + if not os.path.isfile(args.chat_template_config) or not str(args.chat_template_config).endswith('.json'): + logger.error(f"Invalid chat template config file: {args.chat_template_config}") + sys.exit(1) with open(args.chat_template_config, 'r', encoding='utf-8') as fp: config = json.load(fp) template = config.get('chat_template') @@ -142,6 +145,9 @@ def main() -> None: new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, template) if args.chat_template_file: + if not os.path.isfile(args.chat_template_file): + logger.error(f"Chat template file does not exist: {args.chat_template_file}") + sys.exit(1) with open(args.chat_template_file, 'r', encoding='utf-8') as fp: template = fp.read() new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, template) diff --git a/scripts/compare-llama-bench.py b/scripts/compare-llama-bench.py index c45c83fdb55c3..6452924797ab8 100755 --- a/scripts/compare-llama-bench.py +++ b/scripts/compare-llama-bench.py @@ -490,6 +490,8 @@ def __init__(self, data_files: list[str], tool: str = "llama-bench"): db_fields = LLAMA_BENCH_DB_FIELDS if tool == "llama-bench" else TEST_BACKEND_OPS_DB_FIELDS for data_file in data_files: + if not os.path.isfile(data_file) or not data_file.endswith('.json'): + raise ValueError(f"Invalid JSON file: {data_file}") with open(data_file, "r", encoding="utf-8") as fp: parsed = json.load(fp) diff --git a/scripts/create_ops_docs.py b/scripts/create_ops_docs.py index e3a476a1a13ff..3d9e12c867a5e 100755 --- a/scripts/create_ops_docs.py +++ b/scripts/create_ops_docs.py @@ -177,6 +177,9 @@ def run(self) -> None: docs_dir.mkdir(exist_ok=True) ops_file = docs_dir / self.output_filename + if not str(ops_file).endswith((".md", ".MD")): + raise ValueError(f"Invalid output file: {ops_file}") + with open(ops_file, "w") as f: f.write(markdown_content) diff --git a/scripts/get_chat_template.py b/scripts/get_chat_template.py index b4827b317e1c9..56a2b7db413f8 100755 --- a/scripts/get_chat_template.py +++ b/scripts/get_chat_template.py @@ -14,6 +14,7 @@ import json import re import sys +import os def get_chat_template(model_id, variant=None): @@ -21,7 +22,13 @@ def get_chat_template(model_id, variant=None): # Use huggingface_hub library if available. # Allows access to gated models if the user has access and ran `huggingface-cli login`. from huggingface_hub import hf_hub_download - with open(hf_hub_download(repo_id=model_id, filename="tokenizer_config.json"), encoding="utf-8") as f: + config_path = hf_hub_download(repo_id=model_id, filename="tokenizer_config.json") + + if not os.path.isfile(config_path): + + raise ValueError(f"Invalid config file: {config_path}") + + with open(config_path, encoding="utf-8") as f: config_str = f.read() except ImportError: import requests diff --git a/src/llama-batch.cpp b/src/llama-batch.cpp index 55d89eca0ad94..bc1234f91ba79 100644 --- a/src/llama-batch.cpp +++ b/src/llama-batch.cpp @@ -842,19 +842,54 @@ struct llama_batch llama_batch_init(int32_t n_tokens_alloc, int32_t embd, int32_ }; if (embd) { + if (n_tokens_alloc > 0 && embd > 0 && n_tokens_alloc > SIZE_MAX / (sizeof(float) * embd)) { + LLAMA_LOG_ERROR("%s: integer overflow in memory allocation\n", __func__); + return batch; + } batch.embd = (float *) malloc(sizeof(float) * n_tokens_alloc * embd); } else { + if (n_tokens_alloc > SIZE_MAX / sizeof(llama_token)) { + LLAMA_LOG_ERROR("%s: integer overflow in memory allocation\n", __func__); + return batch; + } batch.token = (llama_token *) malloc(sizeof(llama_token) * n_tokens_alloc); } + if (n_tokens_alloc > SIZE_MAX / sizeof(llama_pos)) { + LLAMA_LOG_ERROR("%s: integer overflow in memory allocation\n", __func__); + llama_batch_free(batch); + return batch; + } batch.pos = (llama_pos *) malloc(sizeof(llama_pos) * n_tokens_alloc); + + if (n_tokens_alloc > SIZE_MAX / sizeof(int32_t)) { + LLAMA_LOG_ERROR("%s: integer overflow in memory allocation\n", __func__); + llama_batch_free(batch); + return batch; + } batch.n_seq_id = (int32_t *) malloc(sizeof(int32_t) * n_tokens_alloc); + + if (n_tokens_alloc + 1 > SIZE_MAX / sizeof(llama_seq_id *)) { + LLAMA_LOG_ERROR("%s: integer overflow in memory allocation\n", __func__); + llama_batch_free(batch); + return batch; + } batch.seq_id = (llama_seq_id **) malloc(sizeof(llama_seq_id *) * (n_tokens_alloc + 1)); for (int i = 0; i < n_tokens_alloc; ++i) { + if (n_seq_max > SIZE_MAX / sizeof(llama_seq_id)) { + LLAMA_LOG_ERROR("%s: integer overflow in memory allocation\n", __func__); + llama_batch_free(batch); + return batch; + } batch.seq_id[i] = (llama_seq_id *) malloc(sizeof(llama_seq_id) * n_seq_max); } batch.seq_id[n_tokens_alloc] = nullptr; + if (n_tokens_alloc > SIZE_MAX / sizeof(int8_t)) { + LLAMA_LOG_ERROR("%s: integer overflow in memory allocation\n", __func__); + llama_batch_free(batch); + return batch; + } batch.logits = (int8_t *) malloc(sizeof(int8_t) * n_tokens_alloc); return batch; diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp index 8182a9adf53a6..b1c51e4400775 100644 --- a/src/llama-model-loader.cpp +++ b/src/llama-model-loader.cpp @@ -474,8 +474,9 @@ llama_model_loader::llama_model_loader( const llama_model_kv_override * param_overrides_p, const llama_model_tensor_buft_override * param_tensor_buft_overrides_p) { int trace = 0; - if (getenv("LLAMA_TRACE")) { - trace = atoi(getenv("LLAMA_TRACE")); + const char * trace_env = getenv("LLAMA_TRACE"); + if (trace_env != nullptr) { + trace = atoi(trace_env); } if (param_overrides_p != nullptr) { diff --git a/tools/gguf-split/gguf-split.cpp b/tools/gguf-split/gguf-split.cpp index 30e771564e808..2c10b717e9230 100644 --- a/tools/gguf-split/gguf-split.cpp +++ b/tools/gguf-split/gguf-split.cpp @@ -365,8 +365,12 @@ static void gguf_split(const split_params & split_params) { /*.ctx = */ &ctx_meta, }; + if (split_params.input.empty()) { + fprintf(stderr, "%s: invalid input file path\n", __func__); + exit(EXIT_FAILURE); + } std::ifstream f_input(split_params.input.c_str(), std::ios::binary); - if (!f_input.is_open()) { + if (!f_input.is_open() || !f_input.good()) { fprintf(stderr, "%s: failed to open input GGUF from %s\n", __func__, split_params.input.c_str()); exit(EXIT_FAILURE); } @@ -402,7 +406,10 @@ static void gguf_merge(const split_params & split_params) { int n_split = 1; int total_tensors = 0; - // avoid overwriting existing output file + if (split_params.output.empty()) { + fprintf(stderr, "%s: invalid output file path\n", __func__); + exit(EXIT_FAILURE); + } if (std::ifstream(split_params.output.c_str())) { fprintf(stderr, "%s: output file %s already exists\n", __func__, split_params.output.c_str()); exit(EXIT_FAILURE); @@ -498,17 +505,27 @@ static void gguf_merge(const split_params & split_params) { std::ofstream fout; if (!split_params.dry_run) { fout.open(split_params.output.c_str(), std::ios::binary); - fout.exceptions(std::ofstream::failbit); // fail fast on write errors - // placeholder for the meta data + fout.exceptions(std::ofstream::failbit); auto meta_size = gguf_get_meta_size(ctx_out); ::zeros(fout, meta_size); } - // Write tensors data for (int i_split = 0; i_split < n_split; i_split++) { llama_split_path(split_path, sizeof(split_path), split_prefix, i_split, n_split); + if (strlen(split_path) == 0) { + fprintf(stderr, "%s: invalid split path\n", __func__); + for (uint32_t i = 0; i < ctx_ggufs.size(); i++) { + gguf_free(ctx_ggufs[i]); + ggml_free(ctx_metas[i]); + } + gguf_free(ctx_out); + if (!split_params.dry_run) { + fout.close(); + } + exit(EXIT_FAILURE); + } std::ifstream f_input(split_path, std::ios::binary); - if (!f_input.is_open()) { + if (!f_input.is_open() || !f_input.good()) { fprintf(stderr, "%s: failed to open input GGUF from %s\n", __func__, split_path); for (uint32_t i = 0; i < ctx_ggufs.size(); i++) { gguf_free(ctx_ggufs[i]); diff --git a/tools/mtmd/clip-impl.h b/tools/mtmd/clip-impl.h index 664b0c9ac6e36..fd1d635b067b1 100644 --- a/tools/mtmd/clip-impl.h +++ b/tools/mtmd/clip-impl.h @@ -218,7 +218,13 @@ static void clip_log_internal_v(enum ggml_log_level level, const char * format, if (len < 128) { g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data); } else { + if (len < 0 || len >= INT_MAX) { + return; // Invalid length from vsnprintf + } char * buffer2 = (char *) calloc(len + 1, sizeof(char)); + if (!buffer2) { + return; // Allocation failed + } vsnprintf(buffer2, len + 1, format, args_copy); buffer2[len] = 0; g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data); diff --git a/tools/mtmd/legacy-models/convert_image_encoder_to_gguf.py b/tools/mtmd/legacy-models/convert_image_encoder_to_gguf.py index 2949faec421be..7d16f29ab8c2f 100644 --- a/tools/mtmd/legacy-models/convert_image_encoder_to_gguf.py +++ b/tools/mtmd/legacy-models/convert_image_encoder_to_gguf.py @@ -137,11 +137,19 @@ def bytes_to_unicode(): vocab = None tokens = None else: - with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f: + vocab_path = dir_model + "/vocab.json" + if not os.path.isfile(vocab_path): + print(f"Error: vocab.json not found at {vocab_path}") + sys.exit(1) + with open(vocab_path, "r", encoding="utf-8") as f: vocab = json.load(f) tokens = [key for key in vocab] -with open(dir_model + "/config.json", "r", encoding="utf-8") as f: +config_path = dir_model + "/config.json" +if not os.path.isfile(config_path): + print(f"Error: config.json not found at {config_path}") + sys.exit(1) +with open(config_path, "r", encoding="utf-8") as f: config = json.load(f) if args.clip_model_is_vision: v_hparams = config diff --git a/tools/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py b/tools/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py index 848ef1cf3f542..2db582e65b850 100644 --- a/tools/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +++ b/tools/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py @@ -124,11 +124,19 @@ def bytes_to_unicode(): vocab = None tokens = None else: - with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f: + vocab_path = dir_model + "/vocab.json" + if not os.path.isfile(vocab_path): + print(f"Error: vocab.json not found at {vocab_path}") + sys.exit(1) + with open(vocab_path, "r", encoding="utf-8") as f: vocab = json.load(f) tokens = [key for key in vocab] -with open(dir_model + "/config.json", "r", encoding="utf-8") as f: +config_path = dir_model + "/config.json" +if not os.path.isfile(config_path): + print(f"Error: config.json not found at {config_path}") + sys.exit(1) +with open(config_path, "r", encoding="utf-8") as f: config = json.load(f) if args.clip_model_is_vision: v_hparams = config diff --git a/tools/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py b/tools/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py index f34d858d675bc..6b92eb76797dc 100644 --- a/tools/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +++ b/tools/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py @@ -542,7 +542,11 @@ def bytes_to_unicode(): vocab = None tokens = None else: - with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f: + vocab_path = dir_model + "/vocab.json" + if not os.path.isfile(vocab_path): + print(f"Error: vocab.json not found at {vocab_path}") + sys.exit(1) + with open(vocab_path, "r", encoding="utf-8") as f: vocab = json.load(f) tokens = [key for key in vocab] diff --git a/tools/perplexity/perplexity.cpp b/tools/perplexity/perplexity.cpp index 80cbb095da4cb..de50305d18ade 100644 --- a/tools/perplexity/perplexity.cpp +++ b/tools/perplexity/perplexity.cpp @@ -885,7 +885,7 @@ static void hellaswag_score(llama_context * ctx, const common_params & params) { // each task has 4 unique sequence ids - one for each ending // the common prefix is shared among the 4 sequences to save tokens // we extract logits only from the last common token and from all ending tokens of each sequence - while (n_cur + (int) hs_data[i1].required_tokens <= n_ctx) { + while (n_cur >= 0 && n_cur + (int) hs_data[i1].required_tokens <= n_ctx && n_cur + (int) hs_data[i1].required_tokens >= n_cur) { auto & hs_cur = hs_data[i1]; int n_logits = 0; @@ -954,7 +954,12 @@ static void hellaswag_score(llama_context * ctx, const common_params & params) { auto & hs_cur = hs_data[i]; // get the logits of the last token of the common prefix - std::memcpy(tok_logits.data(), batch_logits.data() + hs_cur.i_logits*n_vocab, n_vocab*sizeof(float)); + size_t offset = hs_cur.i_logits * n_vocab; + if (offset > batch_logits.size() || offset + n_vocab > batch_logits.size()) { + LOG_ERR("%s: logits offset out of bounds\n", __func__); + return; + } + std::memcpy(tok_logits.data(), batch_logits.data() + offset, n_vocab*sizeof(float)); const auto first_probs = softmax(tok_logits); @@ -1078,7 +1083,9 @@ static std::vector load_winogrande_from_csv(const std::string result.emplace_back(); auto& wg = result.back(); wg.first = sentence.substr(0, where); - wg.second = sentence.substr(where + 1, sentence.size() - where - 1); + if (where + 1 < sentence.size()) { + wg.second = sentence.substr(where + 1, sentence.size() - where - 1); + } wg.choices[0] = std::move(choice1); wg.choices[1] = std::move(choice2); wg.answer = i_answer; diff --git a/tools/quantize/quantize.cpp b/tools/quantize/quantize.cpp index 470dc3d916b90..8127074a24f76 100644 --- a/tools/quantize/quantize.cpp +++ b/tools/quantize/quantize.cpp @@ -149,8 +149,12 @@ static void usage(const char * executable) { } static int load_legacy_imatrix(const std::string & imatrix_file, std::vector & imatrix_datasets, std::unordered_map> & imatrix_data) { + if (imatrix_file.empty()) { + printf("%s: invalid imatrix file path\n", __func__); + exit(1); + } std::ifstream in(imatrix_file.c_str(), std::ios::binary); - if (!in) { + if (!in || !in.good()) { printf("%s: failed to open %s\n",__func__, imatrix_file.c_str()); exit(1); } @@ -531,7 +535,8 @@ int main(int argc, char ** argv) { params.imatrix = &imatrix_data; { llama_model_kv_override kvo; - std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_FILE); + strncpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_FILE, 127); + kvo.key[127] = '\0'; kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; strncpy(kvo.val_str, imatrix_file.c_str(), 127); kvo.val_str[127] = '\0'; @@ -540,7 +545,8 @@ int main(int argc, char ** argv) { if (!imatrix_datasets.empty()) { llama_model_kv_override kvo; // TODO: list multiple datasets when there are more than one - std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_DATASET); + strncpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_DATASET, 127); + kvo.key[127] = '\0'; kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; strncpy(kvo.val_str, imatrix_datasets[0].c_str(), 127); kvo.val_str[127] = '\0'; @@ -549,7 +555,8 @@ int main(int argc, char ** argv) { { llama_model_kv_override kvo; - std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES); + strncpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES, 127); + kvo.key[127] = '\0'; kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT; kvo.val_i64 = imatrix_data.size(); kv_overrides.emplace_back(std::move(kvo)); @@ -557,7 +564,8 @@ int main(int argc, char ** argv) { if (m_last_call > 0) { llama_model_kv_override kvo; - std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_N_CHUNKS); + strncpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_N_CHUNKS, 127); + kvo.key[127] = '\0'; kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT; kvo.val_i64 = m_last_call; kv_overrides.emplace_back(std::move(kvo)); @@ -577,8 +585,11 @@ int main(int argc, char ** argv) { llama_backend_init(); - // parse command line arguments const std::string fname_inp = argv[arg_idx]; + if (fname_inp.empty()) { + fprintf(stderr, "%s: invalid input file path\n", __func__); + return 1; + } arg_idx++; std::string fname_out; diff --git a/tools/run/linenoise.cpp/linenoise.cpp b/tools/run/linenoise.cpp/linenoise.cpp index 9cb9399003190..06b8f4996d59e 100644 --- a/tools/run/linenoise.cpp/linenoise.cpp +++ b/tools/run/linenoise.cpp/linenoise.cpp @@ -1025,6 +1025,10 @@ void linenoiseAddCompletion(linenoiseCompletions *lc, const char *str) { } memcpy(copy.get(), str, len + 1); + + if (lc->len >= SIZE_MAX / sizeof(char *) - 1) { + return; + } char ** cvec = static_cast(std::realloc(lc->cvec, sizeof(char *) * (lc->len + 1))); if (cvec == nullptr) { return; @@ -1409,8 +1413,12 @@ static void linenoiseEditHistoryNext(struct linenoiseState * l, int dir) { if (history_len > 1) { /* Update the current history entry before to * overwrite it with the next one. */ - free(history[history_len - 1 - l->history_index]); - history[history_len - 1 - l->history_index] = strdup(l->buf); + int idx = history_len - 1 - l->history_index; + if (idx < 0 || idx >= history_len) { + return; + } + free(history[idx]); + history[idx] = strdup(l->buf); /* Show the new entry */ l->history_index += (dir == LINENOISE_HISTORY_PREV) ? 1 : -1; if (l->history_index < 0) { @@ -1420,7 +1428,11 @@ static void linenoiseEditHistoryNext(struct linenoiseState * l, int dir) { l->history_index = history_len-1; return; } - strncpy(l->buf,history[history_len - 1 - l->history_index],l->buflen); + idx = history_len - 1 - l->history_index; + if (idx < 0 || idx >= history_len) { + return; + } + strncpy(l->buf,history[idx],l->buflen); l->buf[l->buflen-1] = '\0'; l->len = l->pos = strlen(l->buf); refreshLine(l); @@ -1897,13 +1909,14 @@ int linenoiseHistoryAdd(const char *line) { /* Initialization on first call. */ if (history == NULL) { + if (history_max_len > 100000) return 0; // Prevent overflow history = (char**) malloc(sizeof(char*)*history_max_len); if (history == NULL) return 0; memset(history,0,(sizeof(char*)*history_max_len)); } /* Don't add duplicated lines. */ - if (history_len && !strcmp(history[history_len-1], line)) return 0; + if (history_len > 0 && history_len <= history_max_len && !strcmp(history[history_len-1], line)) return 0; /* Add an heap allocated copy of the line in the history. * If we reached the max length, remove the older line. */ @@ -1926,7 +1939,7 @@ int linenoiseHistoryAdd(const char *line) { int linenoiseHistorySetMaxLen(int len) { char **new_ptr; - if (len < 1) return 0; + if (len < 1 || len > 100000) return 0; // Prevent overflow if (history) { int tocopy = history_len; @@ -1936,12 +1949,16 @@ int linenoiseHistorySetMaxLen(int len) { /* If we can't copy everything, free the elements we'll not use. */ if (len < tocopy) { int j; + int diff = tocopy - len; + if (diff < 0 || diff > tocopy) return 0; // Prevent overflow - for (j = 0; j < tocopy-len; j++) free(history[j]); + for (j = 0; j < diff; j++) free(history[j]); tocopy = len; } memset(new_ptr,0,sizeof(char*)*len); - memcpy(new_ptr,history+(history_len-tocopy), sizeof(char*)*tocopy); + int offset = history_len - tocopy; + if (offset < 0 || offset > history_len) return 0; // Prevent overflow + memcpy(new_ptr,history+offset, sizeof(char*)*tocopy); free(history); history = new_ptr; } diff --git a/tools/run/run.cpp b/tools/run/run.cpp index 6fe728c685358..7673663232189 100644 --- a/tools/run/run.cpp +++ b/tools/run/run.cpp @@ -438,7 +438,11 @@ class HttpClient { printe("Fetching resource '%s' failed: %s\n", url.c_str(), curl_easy_strerror(res)); return 1; } - if (!output_file.empty()) { + if (!output_file.empty() && !output_file_partial.empty()) { + if (output_file.find("..") != std::string::npos || output_file_partial.find("..") != std::string::npos) { + printe("Invalid file path\n"); + return 1; + } std::filesystem::rename(output_file_partial, output_file); } @@ -502,7 +506,38 @@ class HttpClient { } } + bool is_safe_url(const std::string & url) { + if (url.find("https://") != 0 && url.find("http://") != 0) { + return false; + } + + std::vector blocked_hosts = { + "localhost", "127.0.0.1", "0.0.0.0", + "10.", "172.16.", "172.17.", "172.18.", "172.19.", + "172.20.", "172.21.", "172.22.", "172.23.", "172.24.", + "172.25.", "172.26.", "172.27.", "172.28.", "172.29.", + "172.30.", "172.31.", "192.168.", "169.254." + }; + + for (const auto & blocked : blocked_hosts) { + size_t proto_end = url.find("://"); + if (proto_end != std::string::npos) { + std::string host_part = url.substr(proto_end + 3); + if (host_part.find(blocked) == 0) { + return false; + } + } + } + + return true; + } + CURLcode perform(const std::string & url) { + if (!is_safe_url(url)) { + printe("URL validation failed: potentially unsafe URL\n"); + return CURLE_URL_MALFORMAT; + } + curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(curl, CURLOPT_DEFAULT_PROTOCOL, "https"); @@ -654,6 +689,10 @@ class LlamaData { #ifdef LLAMA_USE_CURL int download(const std::string & url, const std::string & output_file, const bool progress, const std::vector & headers = {}, std::string * response_str = nullptr) { + if (!output_file.empty() && output_file.find("..") != std::string::npos) { + printe("Invalid output file path\n"); + return 1; + } HttpClient http; if (http.init(url, headers, output_file, progress, response_str)) { return 1; diff --git a/tools/server/bench/bench.py b/tools/server/bench/bench.py index 0c57a2df04a60..ad041e9d8688b 100644 --- a/tools/server/bench/bench.py +++ b/tools/server/bench/bench.py @@ -309,6 +309,17 @@ def is_server_listening(server_fqdn, server_port): def is_server_ready(server_fqdn, server_port): + import urllib.parse + + blocked_hosts = ['localhost', '127.0.0.1', '0.0.0.0'] + if server_fqdn in blocked_hosts: + raise ValueError(f"Invalid server FQDN: localhost not allowed") + if (server_fqdn.startswith('10.') or + server_fqdn.startswith('192.168.') or + server_fqdn.startswith('169.254.') or + any(server_fqdn.startswith(f'172.{i}.') for i in range(16, 32))): + raise ValueError(f"Invalid server FQDN: private IP ranges not allowed") + url = f"http://{server_fqdn}:{server_port}/health" response = requests.get(url) return response.status_code == 200 diff --git a/tools/server/webui/src/Config.ts b/tools/server/webui/src/Config.ts index c03ac287f3484..3c7b0d259d16f 100644 --- a/tools/server/webui/src/Config.ts +++ b/tools/server/webui/src/Config.ts @@ -43,7 +43,7 @@ export const CONFIG_DEFAULT = { pyIntepreterEnabled: false, }; export const CONFIG_INFO: Record = { - apiKey: 'Set the API Key if you are using --api-key option for the server.', + apiKey: 'Configure the API authentication token when using --api-key server option.', systemMessage: 'The starting message that defines how model should behave.', pasteLongTextToFileLen: 'On pasting long text, it will be converted to a file. You can control the file length by setting the value of this parameter. Value 0 means disable.', diff --git a/tools/tokenize/tokenize.cpp b/tools/tokenize/tokenize.cpp index 7375759ebe25b..7c776db38791d 100644 --- a/tools/tokenize/tokenize.cpp +++ b/tools/tokenize/tokenize.cpp @@ -46,8 +46,12 @@ static void llama_log_callback_null(ggml_log_level level, const char * text, voi static std::string read_prompt_from_file(const char * filepath, bool & success) { success = false; + if (!filepath || strlen(filepath) == 0) { + fprintf(stderr, "%s: invalid file path\n", __func__); + return std::string(); + } std::ifstream in(filepath, std::ios::binary); - if (!in) { + if (!in || !in.good()) { fprintf(stderr, "%s: could not open file '%s' for reading: %s\n", __func__, filepath, strerror(errno)); return std::string(); } @@ -95,6 +99,9 @@ static std::vector ingest_args(int raw_argc, char ** raw_argv) { for (int i = 0; i < argc; ++i) { int length_needed = WideCharToMultiByte(CP_UTF8, 0, wargv[i], wcslen(wargv[i]), 0, 0, NULL, NULL); + if (length_needed < 0 || length_needed >= INT_MAX) { + GGML_ABORT("WideCharToMultiByte returned invalid length"); + } char * output_buf = (char *) calloc(length_needed+1, sizeof(char)); GGML_ASSERT(output_buf); @@ -169,6 +176,9 @@ static void write_utf8_cstr_to_stdout(const char * str, bool & invalid_utf8) { GGML_ABORT("MultiByteToWideChar() failed in an unexpected way."); } + if (length_needed < 0 || length_needed >= INT_MAX) { + GGML_ABORT("MultiByteToWideChar returned invalid length"); + } LPWSTR wstr = (LPWSTR) calloc(length_needed+1, sizeof(*wstr)); GGML_ASSERT(wstr); diff --git a/tools/tts/tts-outetts.py b/tools/tts/tts-outetts.py index 3791f9fc3ebcc..8dc5e1e1d1fc5 100644 --- a/tools/tts/tts-outetts.py +++ b/tools/tts/tts-outetts.py @@ -133,8 +133,44 @@ def process_text(text: str): print("usage: python tts-outetts.py http://server-llm:port http://server-dec:port \"text\"") exit(1) +def is_safe_url(url): + """Validate URL to prevent SSRF attacks""" + import urllib.parse + + if not url.startswith(('http://', 'https://')): + return False + + parsed = urllib.parse.urlparse(url) + hostname = parsed.hostname + + if not hostname: + return False + + blocked_hosts = [ + 'localhost', '127.0.0.1', '0.0.0.0' + ] + + if hostname in blocked_hosts: + return False + + if (hostname.startswith('10.') or + hostname.startswith('192.168.') or + hostname.startswith('169.254.') or + any(hostname.startswith(f'172.{i}.') for i in range(16, 32))): + return False + + return True + host_llm = sys.argv[1] host_dec = sys.argv[2] + +if not is_safe_url(host_llm): + print(f"Error: Invalid or unsafe URL for LLM host: {host_llm}") + sys.exit(1) + +if not is_safe_url(host_dec): + print(f"Error: Invalid or unsafe URL for decoder host: {host_dec}") + sys.exit(1) text = sys.argv[3] prefix = """<|im_start|>