From 5144a18e6705c87241bdb169538cc4183984ef81 Mon Sep 17 00:00:00 2001 From: lexasub Date: Mon, 27 Jan 2025 15:09:34 +0400 Subject: [PATCH 1/3] impl::load change map bpe_ranks to onordered map for reduce time of impl::load on 30% --- src/llama-vocab.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 0782d3a41a1f5..6a3a9d48e9635 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -1245,8 +1245,13 @@ struct llama_vocab::impl { std::vector cache_special_tokens; std::vector cache_token_to_piece; // llama_token_to_piece(special = true); - - std::map, int> bpe_ranks; + struct PairHash { + size_t operator()(const std::pair& p) const { + return std::hash{}(p.first) ^ //create some hash for pair + (std::hash{}(p.second) << 1); + } + }; + std::unordered_map, int, PairHash> bpe_ranks; // set of all tokens that cause "end of generation" std::set special_eog_ids; From 7c2b92423251f22e4ecbd7eba5b06c7765e68f23 Mon Sep 17 00:00:00 2001 From: lexasub Date: Mon, 27 Jan 2025 15:10:54 +0400 Subject: [PATCH 2/3] llama_model_loader::init_mapping - replace new llama_mmap to std::make_unique for clean code & reduce (/2) time of running init_mappings --- src/llama-model-loader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp index 75073bf610ac3..05d58ad90eba9 100644 --- a/src/llama-model-loader.cpp +++ b/src/llama-model-loader.cpp @@ -819,7 +819,7 @@ void llama_model_loader::init_mappings(bool prefetch, llama_mlocks * mlock_mmaps for (const auto & file : files) { auto * reg = ggml_backend_dev_backend_reg(ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU)); auto * is_numa_fn = (decltype(ggml_is_numa) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_is_numa"); - std::unique_ptr mapping(new llama_mmap(file.get(), prefetch ? -1 : 0, is_numa_fn())); + std::unique_ptr mapping = std::make_unique(file.get(), prefetch ? -1 : 0, is_numa_fn()); mmaps_used.emplace_back(mapping->size(), 0); if (mlock_mmaps) { std::unique_ptr mlock_mmap(new llama_mlock()); From 723fc665110ff1724459d32c0cac44000b725204 Mon Sep 17 00:00:00 2001 From: Diego Devesa Date: Mon, 27 Jan 2025 14:24:08 +0100 Subject: [PATCH 3/3] Update src/llama-vocab.cpp --- src/llama-vocab.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 6a3a9d48e9635..561f8bdb84010 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -1245,13 +1245,13 @@ struct llama_vocab::impl { std::vector cache_special_tokens; std::vector cache_token_to_piece; // llama_token_to_piece(special = true); - struct PairHash { - size_t operator()(const std::pair& p) const { + struct pair_hash { + size_t operator()(const std::pair & p) const { return std::hash{}(p.first) ^ //create some hash for pair (std::hash{}(p.second) << 1); } }; - std::unordered_map, int, PairHash> bpe_ranks; + std::unordered_map, int, pair_hash> bpe_ranks; // set of all tokens that cause "end of generation" std::set special_eog_ids;