@@ -113,6 +113,7 @@ class Opt {
113113 llama_context_params ctx_params;
114114 llama_model_params model_params;
115115 std::string model_;
116+ std::string chat_template_file;
116117 std::string user;
117118 bool use_jinja = false ;
118119 int context_size = -1 , ngl = -1 ;
@@ -148,6 +149,16 @@ class Opt {
148149 return 0 ;
149150 }
150151
152+ int handle_option_with_value (int argc, const char ** argv, int & i, std::string & option_value) {
153+ if (i + 1 >= argc) {
154+ return 1 ;
155+ }
156+
157+ option_value = argv[++i];
158+
159+ return 0 ;
160+ }
161+
151162 int parse (int argc, const char ** argv) {
152163 bool options_parsing = true ;
153164 for (int i = 1 , positional_args_i = 0 ; i < argc; ++i) {
@@ -169,6 +180,11 @@ class Opt {
169180 verbose = true ;
170181 } else if (options_parsing && strcmp (argv[i], " --jinja" ) == 0 ) {
171182 use_jinja = true ;
183+ } else if (options_parsing && strcmp (argv[i], " --chat-template-file" ) == 0 ){
184+ if (handle_option_with_value (argc, argv, i, chat_template_file) == 1 ) {
185+ return 1 ;
186+ }
187+ use_jinja = true ;
172188 } else if (options_parsing && parse_flag (argv, i, " -h" , " --help" )) {
173189 help = true ;
174190 return 0 ;
@@ -207,6 +223,11 @@ class Opt {
207223 " Options:\n "
208224 " -c, --context-size <value>\n "
209225 " Context size (default: %d)\n "
226+ " --chat-template-file <path>\n "
227+ " Path to the file containing the chat template to use with the model.\n "
228+ " Only supports jinja templates and implicitly sets the --jinja flag.\n "
229+ " --jinja\n "
230+ " Use jinja templating for the chat template of the model\n "
210231 " -n, -ngl, --ngl <value>\n "
211232 " Number of GPU layers (default: %d)\n "
212233 " --temp <value>\n "
@@ -1074,12 +1095,44 @@ static int get_user_input(std::string & user_input, const std::string & user) {
10741095 return 0 ;
10751096}
10761097
1098+ // Reads a chat template file to be used
1099+ static std::string read_chat_template_file (const std::string & chat_template_file) {
1100+ if (chat_template_file.empty ()){
1101+ return " " ;
1102+ }
1103+
1104+ FILE* file = ggml_fopen (chat_template_file.c_str (), " r" );
1105+ if (!file) {
1106+ std::cerr << " Error opening chat template file '" << chat_template_file << " ': " << strerror (errno) << " \n " ;
1107+ return " " ;
1108+ }
1109+
1110+ fseek (file, 0 , SEEK_END);
1111+ size_t size = ftell (file);
1112+ fseek (file, 0 , SEEK_SET);
1113+
1114+ std::vector<unsigned char > data (size);
1115+ size_t read_size = fread (data.data (), 1 , size, file);
1116+ fclose (file);
1117+ if (read_size != size) {
1118+ std::cerr << " Error reading chat template file '" << chat_template_file << " ': " << strerror (errno) << " \n " ;
1119+ return " " ;
1120+ }
1121+ return std::string (data.begin (), data.end ());
1122+ }
1123+
10771124// Main chat loop function
1078- static int chat_loop (LlamaData & llama_data, const std::string & user, bool use_jinja) {
1125+ static int chat_loop (LlamaData & llama_data, const std::string & user, const std::string & chat_template_file, bool use_jinja) {
10791126 int prev_len = 0 ;
10801127 llama_data.fmtted .resize (llama_n_ctx (llama_data.context .get ()));
1081- auto chat_templates = common_chat_templates_from_model (llama_data.model .get (), " " );
1128+
1129+ std::string chat_template = " " ;
1130+ if (!chat_template_file.empty ()){
1131+ chat_template = read_chat_template_file (chat_template_file);
1132+ }
1133+ auto chat_templates = common_chat_templates_from_model (llama_data.model .get (), chat_template);
10821134 GGML_ASSERT (chat_templates.template_default );
1135+
10831136 static const bool stdout_a_terminal = is_stdout_a_terminal ();
10841137 while (true ) {
10851138 // Get user input
@@ -1165,7 +1218,7 @@ int main(int argc, const char ** argv) {
11651218 return 1 ;
11661219 }
11671220
1168- if (chat_loop (llama_data, opt.user , opt.use_jinja )) {
1221+ if (chat_loop (llama_data, opt.user , opt.chat_template_file , opt. use_jinja )) {
11691222 return 1 ;
11701223 }
11711224
0 commit comments