@@ -70,38 +70,34 @@ add_executable(test-tokenizer-0 test-tokenizer-0.cpp)
7070target_link_libraries (test -tokenizer-0 PRIVATE common)
7171install (TARGETS test -tokenizer-0 RUNTIME)
7272
73- llama_test(test -tokenizer-0 NAME test -tokenizer-0-llama-spm ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-llama-spm.gguf)
74- llama_test(test -tokenizer-0 NAME test -tokenizer-0-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-llama-bpe.gguf)
75- llama_test(test -tokenizer-0 NAME test -tokenizer-0-phi-3 ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-phi-3.gguf)
76- llama_test(test -tokenizer-0 NAME test -tokenizer-0-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-falcon.gguf)
7773llama_test(test -tokenizer-0 NAME test -tokenizer-0-bert-bge ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-bert-bge.gguf)
78- # TODO: enable when fixed
79- # https://github.com/ggerganov/llama.cpp/pull/7036
80- #llama_test(test-tokenizer-0 NAME test-tokenizer-0-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf)
81- #llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-llm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-llm.gguf)
82- #llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-coder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-coder.gguf)
83- llama_test(test -tokenizer-0 NAME test -tokenizer-0-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-starcoder.gguf)
84- llama_test(test -tokenizer-0 NAME test -tokenizer-0-gpt-2 ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-gpt-2.gguf)
85- llama_test(test -tokenizer-0 NAME test -tokenizer-0-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-refact.gguf)
8674llama_test(test -tokenizer-0 NAME test -tokenizer-0-command -r ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-command -r.gguf)
75+ llama_test(test -tokenizer-0 NAME test -tokenizer-0-deepseek-coder ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-deepseek-coder.gguf)
76+ llama_test(test -tokenizer-0 NAME test -tokenizer-0-deepseek-llm ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-deepseek-llm.gguf)
77+ llama_test(test -tokenizer-0 NAME test -tokenizer-0-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-falcon.gguf)
78+ llama_test(test -tokenizer-0 NAME test -tokenizer-0-gpt-2 ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-gpt-2.gguf)
79+ llama_test(test -tokenizer-0 NAME test -tokenizer-0-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-llama-bpe.gguf)
80+ llama_test(test -tokenizer-0 NAME test -tokenizer-0-llama-spm ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-llama-spm.gguf)
81+ llama_test(test -tokenizer-0 NAME test -tokenizer-0-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-mpt.gguf)
82+ llama_test(test -tokenizer-0 NAME test -tokenizer-0-phi-3 ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-phi-3.gguf)
8783llama_test(test -tokenizer-0 NAME test -tokenizer-0-qwen2 ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-qwen2.gguf)
84+ llama_test(test -tokenizer-0 NAME test -tokenizer-0-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-refact.gguf)
85+ llama_test(test -tokenizer-0 NAME test -tokenizer-0-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab-starcoder.gguf)
8886
8987# build test-tokenizer-1-bpe target once and add many tests
9088add_executable (test -tokenizer-1-bpe test -tokenizer-1-bpe.cpp)
9189target_link_libraries (test -tokenizer-1-bpe PRIVATE common)
9290install (TARGETS test -tokenizer-1-bpe RUNTIME)
9391
9492# TODO: disabled due to slowness
95- #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf --ignore-merges)
96- #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
9793#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-aquila ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
98- #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt .gguf)
99- #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-stablelm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-stablelm .gguf)
94+ #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon .gguf)
95+ #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2 .gguf)
10096#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-neox ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-neox.gguf)
97+ #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf --ignore-merges)
98+ #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf)
10199#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
102100#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)
103- #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt2.gguf)
104- #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-bloom ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bloom.gguf)
105101
106102# build test-tokenizer-1-spm target once and add many tests
107103add_executable (test -tokenizer-1-spm test -tokenizer-1-spm.cpp)
0 commit comments