Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions examples/demo_qwen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@ int main(int argc, char **argv) {
std::iostream::sync_with_stdio(false);

cmdline::parser cmdParser;

cmdParser.add<string>("vocab", 'v', "specify mllm tokenizer model path", false, "../vocab/qwen2.5_vocab.mllm");
cmdParser.add<string>("merge", 'e', "specify mllm merge file path", false, "../vocab/qwen2.5_merges.txt");
cmdParser.add<string>("model", 'm', "specify mllm model path", false, "../models/qwen-2.5-3b-instruct-q4_0_4x4.mllm");
cmdParser.add<string>("billion", 'b', "[0.5B | 1.8B | 1.5B | 3B |]", false, "3B");
cmdParser.add<string>("version", 'r', "[Qwen1.5 | Qwen2.5 |]", false, "Qwen2.5");
cmdParser.add<int>("limits", 'l', "max KV cache size", false, 400);
cmdParser.add<int>("thread", 't', "num of threads", false, 4);
cmdParser.parse_check(argc, argv);
Expand All @@ -30,11 +32,12 @@ int main(int argc, char **argv) {
string merge_path = cmdParser.get<string>("merge");
string model_path = cmdParser.get<string>("model");
string model_billion = cmdParser.get<string>("billion");
string model_version = cmdParser.get<string>("version");
int tokens_limit = cmdParser.get<int>("limits");
CPUBackend::cpu_threads = cmdParser.get<int>("thread");

auto tokenizer = QWenTokenizer(vocab_path, merge_path);
QWenConfig config(tokens_limit, model_billion, RoPEType::HFHUBROPE);
QWenConfig config(tokens_limit, model_billion, RoPEType::HFHUBROPE, model_version);
auto model = QWenForCausalLM(config);
model.load(model_path);

Expand All @@ -51,10 +54,10 @@ int main(int argc, char **argv) {

LlmTextGeneratorOpts opt{
.max_new_tokens = 100,
.do_sample = true,
.temperature = 0.3F,
.top_k = 50,
.top_p = 0.F,
.do_sample = false,
//.temperature = 0.7F,
//.top_k = 20,
//.top_p = 0.8F,
};
model.generate(input_tensor, opt, [&](unsigned int out_token) -> bool {
auto out_string = tokenizer.detokenize({out_token});
Expand Down
29 changes: 26 additions & 3 deletions src/models/qwen/configuration_qwen.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,17 @@ class QWenNameConfig : public TransformerNameConfig {
};

struct QWenConfig : public TransformerConfig {
explicit QWenConfig(int token_limit, string billions = "0.5B", RoPEType type = RoPEType::HFHUBROPE) :
explicit QWenConfig(int token_limit, string billions = "0.5B", RoPEType type = RoPEType::HFHUBROPE, string model_version = "Qwen1.5") :
cache_limit(token_limit) {
names_config.init(type);
string billionsType;
std::transform(billions.begin(), billions.end(), std::back_inserter(billionsType),
::tolower);
if (billionsType == "0.5b") {
string modelVersion;
std::transform(model_version.begin(), model_version.end(), std::back_inserter(modelVersion),
::tolower);

if (billionsType == "0.5b" && modelVersion == "qwen1.5") {
attention_dropout = 0.0;
bos_token_id = 151643;
eos_token_id = 151645;
Expand All @@ -102,6 +106,25 @@ struct QWenConfig : public TransformerConfig {
sliding_window = 32768;
vocab_size = 151936;
tie_embedding_words = true;
} else if (billionsType == "0.5b" && modelVersion == "qwen2.5") {
attention_dropout = 0.0;
bos_token_id = 151643;
eos_token_id = 151645;
std::string hidden_act = "silu";
hidden_size = 896;
initializer_range = 0.02;
intermediate_size = 4864;
max_position_embeddings = 32768;
max_window_layers = 21;
model_type = "qwen2";
num_attention_heads = 14;
num_hidden_layers = 24;
num_key_value_heads = 2;
rms_norm_eps = 1e-6;
rope_theta = 1000000.0;
sliding_window = 32768;
vocab_size = 151936;
tie_embedding_words = true;
} else if (billionsType == "1.8b") {
attention_dropout = 0.0;
std::string hidden_act = "silu";
Expand All @@ -116,7 +139,7 @@ struct QWenConfig : public TransformerConfig {
sliding_window = 32768;
vocab_size = 151936;
tie_embedding_words = false;
} else if (billionsType == "1.5b") {
} else if (billionsType == "1.5b" && modelVersion == "qwen2.5") {
attention_dropout = 0.0;
std::string hidden_act = "silu";
hidden_size = 1536;
Expand Down