11#pragma once
22
3- #include " llama-impl.h"
4-
53#include < map>
64
75//
@@ -375,13 +373,11 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
375373};
376374
377375struct LLM_KV {
378- LLM_KV (llm_arch arch) : arch(arch) {}
376+ LLM_KV (llm_arch arch);
379377
380378 llm_arch arch;
381379
382- std::string operator ()(llm_kv kv) const {
383- return ::format (LLM_KV_NAMES.at (kv), LLM_ARCH_NAMES.at (arch));
384- }
380+ std::string operator ()(llm_kv kv) const ;
385381};
386382
387383enum llm_tensor {
@@ -1589,16 +1585,6 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
15891585 { " gigachat" , LLM_CHAT_TEMPLATE_GIGACHAT },
15901586};
15911587
1592- static llm_arch llm_arch_from_string (const std::string & name) {
1593- for (const auto & kv : LLM_ARCH_NAMES) { // NOLINT
1594- if (kv.second == name) {
1595- return kv.first ;
1596- }
1597- }
1598-
1599- return LLM_ARCH_UNKNOWN;
1600- }
1601-
16021588// helper to handle gguf constants
16031589// usage:
16041590//
@@ -1615,20 +1601,7 @@ struct LLM_TN_IMPL {
16151601 const int bid;
16161602 const int xid;
16171603
1618- std::string str () const {
1619- if (LLM_TENSOR_NAMES.at (arch).find (tensor) == LLM_TENSOR_NAMES.at (arch).end ()) {
1620- return " __missing__" ;
1621- }
1622-
1623- std::string name = ::format (LLM_TENSOR_NAMES.at (arch).at (tensor), bid, xid);
1624-
1625- if (suffix != nullptr ) {
1626- name += " ." ;
1627- name += suffix;
1628- }
1629-
1630- return name;
1631- }
1604+ std::string str () const ;
16321605
16331606 operator std::string () const {
16341607 return str ();
@@ -1657,58 +1630,6 @@ struct LLM_TN {
16571630 }
16581631};
16591632
1660- //
1661- // load LLaMA models
1662- //
1663-
1664- static const char * llama_model_arch_name (llm_arch arch) {
1665- auto it = LLM_ARCH_NAMES.find (arch);
1666- if (it == LLM_ARCH_NAMES.end ()) {
1667- return " unknown" ;
1668- }
1669- return it->second ;
1670- }
1671-
1672- static std::string llama_model_ftype_name (llama_ftype ftype) {
1673- if (ftype & LLAMA_FTYPE_GUESSED) {
1674- return llama_model_ftype_name ((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)" ;
1675- }
1676-
1677- switch (ftype) {
1678- case LLAMA_FTYPE_ALL_F32: return " all F32" ;
1679- case LLAMA_FTYPE_MOSTLY_F16: return " F16" ;
1680- case LLAMA_FTYPE_MOSTLY_BF16: return " BF16" ;
1681- case LLAMA_FTYPE_MOSTLY_Q4_0: return " Q4_0" ;
1682- case LLAMA_FTYPE_MOSTLY_Q4_1: return " Q4_1" ;
1683- case LLAMA_FTYPE_MOSTLY_Q5_0: return " Q5_0" ;
1684- case LLAMA_FTYPE_MOSTLY_Q5_1: return " Q5_1" ;
1685- case LLAMA_FTYPE_MOSTLY_Q8_0: return " Q8_0" ;
1686- case LLAMA_FTYPE_MOSTLY_Q2_K: return " Q2_K - Medium" ;
1687- case LLAMA_FTYPE_MOSTLY_Q2_K_S: return " Q2_K - Small" ;
1688- case LLAMA_FTYPE_MOSTLY_Q3_K_S: return " Q3_K - Small" ;
1689- case LLAMA_FTYPE_MOSTLY_Q3_K_M: return " Q3_K - Medium" ;
1690- case LLAMA_FTYPE_MOSTLY_Q3_K_L: return " Q3_K - Large" ;
1691- case LLAMA_FTYPE_MOSTLY_Q4_K_S: return " Q4_K - Small" ;
1692- case LLAMA_FTYPE_MOSTLY_Q4_K_M: return " Q4_K - Medium" ;
1693- case LLAMA_FTYPE_MOSTLY_Q5_K_S: return " Q5_K - Small" ;
1694- case LLAMA_FTYPE_MOSTLY_Q5_K_M: return " Q5_K - Medium" ;
1695- case LLAMA_FTYPE_MOSTLY_Q6_K: return " Q6_K" ;
1696- case LLAMA_FTYPE_MOSTLY_TQ1_0: return " TQ1_0 - 1.69 bpw ternary" ;
1697- case LLAMA_FTYPE_MOSTLY_TQ2_0: return " TQ2_0 - 2.06 bpw ternary" ;
1698- case LLAMA_FTYPE_MOSTLY_IQ2_XXS: return " IQ2_XXS - 2.0625 bpw" ;
1699- case LLAMA_FTYPE_MOSTLY_IQ2_XS: return " IQ2_XS - 2.3125 bpw" ;
1700- case LLAMA_FTYPE_MOSTLY_IQ2_S: return " IQ2_S - 2.5 bpw" ;
1701- case LLAMA_FTYPE_MOSTLY_IQ2_M: return " IQ2_M - 2.7 bpw" ;
1702- case LLAMA_FTYPE_MOSTLY_IQ3_XS: return " IQ3_XS - 3.3 bpw" ;
1703- case LLAMA_FTYPE_MOSTLY_IQ3_XXS: return " IQ3_XXS - 3.0625 bpw" ;
1704- case LLAMA_FTYPE_MOSTLY_IQ1_S: return " IQ1_S - 1.5625 bpw" ;
1705- case LLAMA_FTYPE_MOSTLY_IQ1_M: return " IQ1_M - 1.75 bpw" ;
1706- case LLAMA_FTYPE_MOSTLY_IQ4_NL: return " IQ4_NL - 4.5 bpw" ;
1707- case LLAMA_FTYPE_MOSTLY_IQ4_XS: return " IQ4_XS - 4.25 bpw" ;
1708- case LLAMA_FTYPE_MOSTLY_IQ3_S: return " IQ3_S - 3.4375 bpw" ;
1709- case LLAMA_FTYPE_MOSTLY_IQ3_M: return " IQ3_S mix - 3.66 bpw" ;
1710-
1711- default : return " unknown, may not work" ;
1712- }
1713- }
1633+ const char * llm_arch_name (llm_arch arch);
17141634
1635+ llm_arch llm_arch_from_string (const std::string & name);
0 commit comments