Skip to content

Commit 773d5ac

Browse files
committed
add Jiutian
1 parent 9eb0333 commit 773d5ac

File tree

8 files changed

+85
-1
lines changed

8 files changed

+85
-1
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ set(core_files src/backend.cpp
8888
models/instella.cpp
8989
models/internlm.cpp
9090
models/jina.cpp
91+
models/jiutian.cpp
9192
models/llama.cpp
9293
models/m_a_p.cpp
9394
models/megrez.cpp

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ pure C++ implementation based on [@ggerganov](https://github.com/ggerganov)'s [g
1313

1414
**What's New:**
1515

16+
* 2025-07-29: Jiutian
1617
* 2025-07-10: SmolLM-3
1718
* 2025-07-05: Pangu-Pro-MoE
1819
* 2025-07-04: ERNIE-MoE

convert.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,8 @@ class ModelType(Enum):
205205

206206
Exaone4 = 0x2800
207207

208+
JiuTian = 0x2900
209+
208210
BCE_Embedding = 0x10000100
209211
BCE_ReRanker = 0x10000101
210212
BGE_M3 = 0x10000102
@@ -7171,6 +7173,26 @@ def get_block(prefix: str):
71717173

71727174
return weights + dac_weights
71737175

7176+
7177+
class JiuTianConverter(BaseConverter):
7178+
MODEL_TYPE = ModelType.JiuTian
7179+
7180+
@staticmethod
7181+
def dump_config(f, config, ggml_type):
7182+
assert config.qkv_bias
7183+
dump_llama_like_config(f, config, ggml_type)
7184+
7185+
config_values = [
7186+
config.num_key_value_heads,
7187+
1 if config.tie_word_embeddings else 0,
7188+
]
7189+
f.write(struct.pack("i" * len(config_values), *config_values))
7190+
f.write(struct.pack("<f", config.rope_theta))
7191+
7192+
@staticmethod
7193+
def get_weight_names(config):
7194+
return QWen2Converter.get_weight_names(config)
7195+
71747196
def convert_grok_1_base(args, vocab, ggml_type):
71757197
def ffn_size(emb_size, widening_factor):
71767198
_ffn_size = int(widening_factor * emb_size) * 2 // 3
@@ -7758,6 +7780,8 @@ def main():
77587780
ERNIEMoEConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
77597781
elif arch == 'PanguProMoEForCausalLM':
77607782
PanguMoEConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
7783+
elif arch == 'JiutianForCausalLM':
7784+
JiuTianConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
77617785
elif arch == 'deepseek-r1-distill-qwen3':
77627786
QWen3Converter.MODEL_TYPE = ModelType.DeepSeek_R1_Distill_QWen3
77637787
QWen3Converter.convert(config, model_files, vocab, ggml_type, args.save_path)

docs/models.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,12 @@
9292
* [x] v2.5: [Chat-1.8B](https://huggingface.co/internlm/internlm2_5-1_8b-chat), [Chat-7B](https://huggingface.co/internlm/internlm2_5-7b-chat), [Chat-7B-1M](https://huggingface.co/internlm/internlm2_5-7b-chat-1m), [Chat-20B](https://huggingface.co/internlm/internlm2_5-20b-chat)
9393
* [x] v3: [Instruct-8B](https://huggingface.co/internlm/internlm3-8b-instruct)
9494

95+
* Jiutian (`JiutianForCausalLM`)
96+
* [x] [Math-8B](https://huggingface.co/JT-LM/JT-Math-8B-Instruct/tree/00a347fdae86ddd9e616aa0771492c6aff735697),
97+
[Math-8B-Thinking](https://huggingface.co/JT-LM/JT-Math-8B-Thinking/tree/87d8db3e39c65fa123c59a97266a3ec02ebf6bd6),
98+
[Coder-8B-Instruct](https://huggingface.co/JT-LM/JT-Coder-8B-Instruct/tree/9160d51e9acaae266cfef8493ea25d15e7ed6904),
99+
[DA-8B](https://huggingface.co/JT-LM/JT-DA-8B/commit/8bd5bb1a76305dcc777786b65c239b362cee808e)
100+
95101
* Ling (`BailingMoeForCausalLM`)
96102
* [x] [Lite](https://huggingface.co/inclusionAI/Ling-lite/tree/a80ae6c479251f1ae33dda517ab83cdc6a312f99), [Coder-Lite](https://huggingface.co/inclusionAI/Ling-Coder-lite/tree/4a8647acf9d3855d599adaaaf4bf6ca14239d2ab)
97103

models/jiutian.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#include "qwen.h"
2+
#include "../src/models_priv.h"
3+
4+
namespace chatllm::jiutian
5+
{
6+
struct Config : public BaseConfig
7+
{
8+
int num_key_value_heads;
9+
int tie_word_embeddings;
10+
float rope_theta;
11+
};
12+
13+
static qwen::v2::Config convert(const Config &config)
14+
{
15+
qwen::v2::Config r;
16+
*(BaseConfig *)&r = *(BaseConfig *)&config;
17+
r.num_key_value_heads = config.num_key_value_heads;
18+
r.sliding_window = -1;
19+
r.rope_theta = config.rope_theta;
20+
return r;
21+
}
22+
23+
typedef qwen::v2::Tokenizer Tokenizer;
24+
25+
class ConditionalGeneration : public qwen::v2::ConditionalGeneration
26+
{
27+
public:
28+
ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config)
29+
: qwen::v2::ConditionalGeneration(convert(config), runtime_config, MODEL_TYPE_JIUTIAN, config.tie_word_embeddings != 0)
30+
{}
31+
};
32+
33+
REGISTER_MODEL_LOADER(JIUTIAN, jiutian, 1);
34+
}

scripts/models.json

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3055,5 +3055,21 @@
30553055
}
30563056
}
30573057
}
3058+
},
3059+
"jiutian-coder": {
3060+
"brief": "A series of high-performance and energy-efficient code large language models (LLMs) developed by the JiuTian team.",
3061+
"default": "8b",
3062+
"license": "Apache License 2.0",
3063+
"variants": {
3064+
"8b": {
3065+
"default": "q8",
3066+
"quantized": {
3067+
"q8": {
3068+
"size": 8317413728,
3069+
"url": "chatllm_quantized_jiutian/jt-coder-8b-it.bin"
3070+
}
3071+
}
3072+
}
3073+
}
30583074
}
30593075
}

src/chat.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1385,7 +1385,7 @@ namespace chatllm
13851385
n_dims = 2;
13861386

13871387
CHATLLM_CHECK(ndim == n_dims)
1388-
<< "tensor " << name << " ndim mismatch: expect " << n_dims << " but got " << ndim;
1388+
<< "tensor " << name << " ndim mismatch: expect " << n_dims << " but got " << ndim << ". expected shape: " << shape_to_string(tensor);
13891389

13901390
if (partial)
13911391
{

src/models_priv.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,8 @@ namespace chatllm
165165

166166
MODEL_TYPE_EXAONE4 = 0x2800,
167167

168+
MODEL_TYPE_JIUTIAN = 0x2900,
169+
168170
MODEL_TYPE_BCE_Embedding = 0x10000100,
169171
MODEL_TYPE_BCE_ReRanker = 0x10000101,
170172
MODEL_TYPE_BGE_M3 = 0x10000102,

0 commit comments

Comments
 (0)