diff --git a/tools/tokenize/tokenize.cpp b/tools/tokenize/tokenize.cpp index 7375759ebe25b..ad866277726cb 100644 --- a/tools/tokenize/tokenize.cpp +++ b/tools/tokenize/tokenize.cpp @@ -1,5 +1,5 @@ #include "common.h" -//#include "log.h" // TODO: start using log.h +#include "log.h" #include "llama.h" #include @@ -7,410 +7,458 @@ #include #include #include -#include // TODO: remove me #if defined(_WIN32) #define WIN32_LEAN_AND_MEAN #include -#include // For CommandLineToArgvW +#include #endif +struct tokenizer_config { + const char * model_path; + std::string prompt; + bool print_ids; + bool no_bos; + bool no_escape; + bool no_parse_special; + bool disable_logging; + bool show_token_count; + + enum prompt_source_type { + PROMPT_SOURCE_NONE, + PROMPT_SOURCE_FILE, + PROMPT_SOURCE_ARGUMENT, + PROMPT_SOURCE_STDIN + } prompt_source; + + tokenizer_config() : + model_path(nullptr), + prompt(""), + print_ids(false), + no_bos(false), + no_escape(false), + no_parse_special(false), + disable_logging(false), + show_token_count(false), + prompt_source(PROMPT_SOURCE_NONE) {} +}; + static void print_usage_information(const char * argv0) { - printf("usage: %s [options]\n\n", argv0); - printf("The tokenize program tokenizes a prompt using a given model,\n"); - printf("and prints the resulting tokens to standard output.\n\n"); - printf("It needs a model file, a prompt, and optionally other flags\n"); - printf("to control the behavior of the tokenizer.\n\n"); - printf(" The possible options are:\n"); - printf("\n"); - printf(" -h, --help print this help and exit\n"); - printf(" -m MODEL_PATH, --model MODEL_PATH path to model.\n"); - printf(" --ids if given, only print numerical token IDs, and not token strings.\n"); - printf(" The output format looks like [1, 2, 3], i.e. parseable by Python.\n"); - printf(" -f PROMPT_FNAME, --file PROMPT_FNAME read prompt from a file.\n"); - printf(" -p PROMPT, --prompt PROMPT read prompt from the argument.\n"); - printf(" --stdin read prompt from standard input.\n"); - printf(" --no-bos do not ever add a BOS token to the prompt, even if normally the model uses a BOS token.\n"); - printf(" --no-escape do not escape input (such as \\n, \\t, etc.).\n"); - printf(" --no-parse-special do not parse control tokens.\n"); - printf(" --log-disable disable logs. Makes stderr quiet when loading the model.\n"); - printf(" --show-count print the total number of tokens.\n"); + LOG("Usage: %s [options]\n\n", argv0); + LOG("The tokenize program tokenizes a prompt using a given model,\n"); + LOG("and prints the resulting tokens to standard output.\n\n"); + LOG("Required:\n"); + LOG(" -m, --model MODEL_PATH Path to the model file\n"); + LOG(" One of: --file, --prompt, --stdin\n\n"); + LOG("Prompt sources (exactly one required):\n"); + LOG(" -f, --file FILENAME Read prompt from file\n"); + LOG(" -p, --prompt TEXT Use prompt from command line\n"); + LOG(" --stdin Read prompt from standard input\n\n"); + LOG("Output options:\n"); + LOG(" --ids Print only token IDs as [1, 2, 3]\n"); + LOG(" --show-count Show total token count\n\n"); + LOG("Tokenization options:\n"); + LOG(" --no-bos Don't add BOS token\n"); + LOG(" --no-escape Don't process escape sequences (\\n, \\t)\n"); + LOG(" --no-parse-special Don't parse special/control tokens\n\n"); + LOG("Other options:\n"); + LOG(" --log-disable Disable model loading logs\n"); + LOG(" -h, --help Show this help and exit\n"); + LOG("\nExamples:\n"); + LOG(" %s -m model.gguf -p \"Hello world\"\n", argv0); + LOG(" %s -m model.gguf --file input.txt --ids\n", argv0); + LOG(" echo \"Hello\" | %s -m model.gguf --stdin\n", argv0); } -static void llama_log_callback_null(ggml_log_level level, const char * text, void * user_data) { - (void) level; - (void) text; - (void) user_data; +static bool read_file_to_string(const char * filepath, std::string & result) { + LOG_DBG("Reading prompt from file: %s\n", filepath); + + std::ifstream file(filepath, std::ios::binary); + if (!file) { + LOG_ERR("Cannot open file '%s': %s\n", filepath, strerror(errno)); + return false; + } + + file.seekg(0, std::ios::end); + const size_t file_size = file.tellg(); + file.seekg(0, std::ios::beg); + + result.resize(file_size); + file.read(&result[0], file_size); + + if (file.fail()) { + LOG_ERR("Error reading file '%s': %s\n", filepath, strerror(errno)); + return false; + } + + LOG_DBG("Successfully read %zu bytes from file\n", file_size); + return true; } -static std::string read_prompt_from_file(const char * filepath, bool & success) { - success = false; - - std::ifstream in(filepath, std::ios::binary); - if (!in) { - fprintf(stderr, "%s: could not open file '%s' for reading: %s\n", __func__, filepath, strerror(errno)); - return std::string(); +static bool read_stdin_to_string(std::string & result) { + LOG_DBG("Reading prompt from standard input\n"); + + result.clear(); + char buffer[4096]; + + while (fgets(buffer, sizeof(buffer), stdin)) { + result += buffer; } - // do not assume the file is seekable (e.g. /dev/stdin) - std::stringstream buffer; - buffer << in.rdbuf(); - if (in.fail()) { - fprintf(stderr, "%s: could not read the entire file '%s': %s\n", __func__, filepath, strerror(errno)); - return std::string(); + + if (ferror(stdin)) { + LOG_ERR("Error reading from standard input\n"); + return false; } - - success = true; - return buffer.str(); + + // Remove trailing newline if present + if (!result.empty() && result.back() == '\n') { + result.pop_back(); + } + + LOG_DBG("Successfully read %zu bytes from stdin\n", result.size()); + return true; } -// -// Function: ingest_args(...) -> vector -// -// Takes argc and argv arguments, and converts them to a vector of UTF-8 encoded -// strings, as an STL vector. -// -// In particular, it handles character encoding shenanigans on Windows. -// -// Note: raw_argc and raw_argv are not actually read at all on Windows. -// On Windows we call GetCommandLineW to get the arguments in wchar_t -// format, ignoring the regular argc/argv arguments to main(). -// -// TODO: potential opportunity to roll common stuff into common/console.cpp -// in relation to Windows wchar_t shenanigans. -static std::vector ingest_args(int raw_argc, char ** raw_argv) { +static std::vector process_command_line_args(int raw_argc, char ** raw_argv) { + LOG_DBG("Processing %d command line arguments\n", raw_argc); std::vector argv; - - // Handle Windows, if given non-ASCII arguments. - // We convert wchar_t arguments into UTF-8 char* on this platform. - // Lets you invoke 'tokenize' on Windows cmd.exe with non-ASCII characters - // without throwing tantrums. + #if defined(_WIN32) int argc; - const LPWSTR cmdline_wargv = GetCommandLineW(); - LPWSTR * wargv = CommandLineToArgvW(cmdline_wargv, &argc); - - // silence unused arg warnings - (void) raw_argc; - (void) raw_argv; - + LPWSTR * wargv = CommandLineToArgvW(GetCommandLineW(), &argc); + if (!wargv) { + LOG_ERR("Failed to process command line arguments on Windows\n"); + return argv; + } + for (int i = 0; i < argc; ++i) { - int length_needed = WideCharToMultiByte(CP_UTF8, 0, wargv[i], wcslen(wargv[i]), 0, 0, NULL, NULL); - char * output_buf = (char *) calloc(length_needed+1, sizeof(char)); - GGML_ASSERT(output_buf); - - WideCharToMultiByte(CP_UTF8, 0, wargv[i], wcslen(wargv[i]), output_buf, length_needed, NULL, NULL); - output_buf[length_needed] = '\0'; - - argv.push_back(output_buf); - free(output_buf); + const int length_needed = WideCharToMultiByte(CP_UTF8, 0, wargv[i], -1, nullptr, 0, nullptr, nullptr); + if (length_needed <= 0) { + LocalFree(wargv); + LOG_ERR("Failed to convert Windows command line argument to UTF-8\n"); + return argv; + } + + std::vector buffer(length_needed); + WideCharToMultiByte(CP_UTF8, 0, wargv[i], -1, &buffer[0], length_needed, nullptr, nullptr); + argv.push_back(std::string(&buffer[0])); } - - LocalFree((HLOCAL) wargv); + + LocalFree(wargv); #else - int argc = raw_argc; - for (int i = 0; i < argc; ++i) { - argv.push_back(raw_argv[i]); + for (int i = 0; i < raw_argc; ++i) { + argv.push_back(std::string(raw_argv[i])); } #endif - - GGML_ASSERT((unsigned int) argc == argv.size()); - + + LOG_DBG("Processed %zu arguments\n", argv.size()); return argv; } -// -// Function: write_utf8_cstr_to_stdout(const char *) -> -// -// writes a string to standard output; taking into account that on Windows -// to display correctly you have to use special handling. Works even if the -// user has not set a unicode code page on a Windows cmd.exe. -// -// In case of invalid UTF-8, invalid_utf8 is set to true on Windows, and something -// a human-readable is written instead. -// -// On non-Windows systems, simply printfs() the string. -static void write_utf8_cstr_to_stdout(const char * str, bool & invalid_utf8) { - invalid_utf8 = false; - +static void write_utf8_to_stdout(const char * str, bool & invalid_utf8) { + invalid_utf8 = false; + #if defined(_WIN32) - // Are we in a console? - HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE); - DWORD dwMode = 0; - - // According to Microsoft docs: - // "WriteConsole fails if it is used with a standard handle that is redirected to a file." - // Also according to the docs, you can use GetConsoleMode to check for that. - if (hConsole == INVALID_HANDLE_VALUE || !GetConsoleMode(hConsole, &dwMode)) { - printf("%s", str); - return; - } - - // MultiByteToWideChar reports an error if str is empty, don't report - // them as invalid_utf8. - if (*str == 0) { - return; - } - int length_needed = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, str, strlen(str), NULL, 0); - if (length_needed == 0) { - DWORD err = GetLastError(); - if (err == ERROR_NO_UNICODE_TRANSLATION) { - invalid_utf8 = true; - int len = strlen(str); - printf("<"); - for (int i = 0; i < len; ++i) { - if (i > 0) { - printf(" "); - } - printf("%02x", (uint8_t) str[i]); + const HANDLE console = GetStdHandle(STD_OUTPUT_HANDLE); + DWORD mode; + + if (console == INVALID_HANDLE_VALUE || !GetConsoleMode(console, &mode)) { + printf("%s", str); + return; + } + + if (*str == '\0') { + return; + } + + const int str_len = strlen(str); + const int wide_length = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, str, str_len, nullptr, 0); + if (wide_length == 0) { + const DWORD error = GetLastError(); + if (error == ERROR_NO_UNICODE_TRANSLATION) { + invalid_utf8 = true; + printf("<"); + for (int i = 0; i < str_len; ++i) { + if (i > 0) { + printf(" "); } - printf(">"); - return; + printf("%02x", (uint8_t) str[i]); } - GGML_ABORT("MultiByteToWideChar() failed in an unexpected way."); + printf(">"); + return; } - - LPWSTR wstr = (LPWSTR) calloc(length_needed+1, sizeof(*wstr)); - GGML_ASSERT(wstr); - - MultiByteToWideChar(CP_UTF8, 0, str, strlen(str), wstr, length_needed); - WriteConsoleW(hConsole, wstr, length_needed, NULL, NULL); - - free(wstr); + LOG_ERR("Unexpected error in UTF-8 to wide char conversion\n"); + return; + } + + std::vector wide_str(wide_length); + MultiByteToWideChar(CP_UTF8, 0, str, str_len, &wide_str[0], wide_length); + + DWORD written; + WriteConsoleW(console, &wide_str[0], wide_length, &written, nullptr); #else - // TODO: reporting invalid_utf8 would be useful on non-Windows too. - // printf will silently just write bad unicode. - printf("%s", str); + printf("%s", str); #endif } -int main(int raw_argc, char ** raw_argv) { - const std::vector argv = ingest_args(raw_argc, raw_argv); - const int argc = argv.size(); - - if (argc <= 1) { - print_usage_information(argv[0].c_str()); - return 1; +static bool parse_command_line_args(const std::vector & argv, tokenizer_config & config) { + LOG_DBG("Parsing %zu command line arguments\n", argv.size()); + + if (argv.size() <= 1) { + LOG_ERR("No arguments provided\n"); + return false; } - - ////// - // Read out all the command line arguments. - ////// - - // variables where to put any arguments we see. - bool printing_ids = false; - bool no_bos = false; - bool no_escape = false; - bool no_parse_special = false; - bool disable_logging = false; - bool show_token_count = false; - const char * model_path = NULL; - const char * prompt_path = NULL; - const char * prompt_arg = NULL; - - // track which arguments were explicitly given - // used for sanity checking down the line + bool model_path_set = false; bool prompt_path_set = false; bool prompt_set = false; bool stdin_set = false; - - int iarg = 1; - for (; iarg < argc; ++iarg) { - std::string arg{argv[iarg]}; + + for (size_t i = 1; i < argv.size(); ++i) { + const std::string & arg = argv[i]; + LOG_DBG("Processing argument: %s\n", arg.c_str()); + if (arg == "-h" || arg == "--help") { print_usage_information(argv[0].c_str()); - return 0; + return false; } else if (arg == "--ids") { - printing_ids = true; - } - else if (arg == "-m" || arg == "--model") { - if (model_path_set) { - fprintf(stderr, "Error: -m or --model specified multiple times.\n"); - return 1; - } - model_path = argv[++iarg].c_str(); - model_path_set = true; + LOG_DBG("Enabling ID-only output\n"); + config.print_ids = true; } else if (arg == "--no-bos") { - no_bos = true; + LOG_DBG("Disabling BOS token\n"); + config.no_bos = true; } else if (arg == "--no-escape") { - no_escape = true; + LOG_DBG("Disabling escape sequence processing\n"); + config.no_escape = true; } else if (arg == "--no-parse-special") { - no_parse_special = true; + LOG_DBG("Disabling special token parsing\n"); + config.no_parse_special = true; } - else if (arg == "-p" || arg == "--prompt") { - if (prompt_set) { - fprintf(stderr, "Error: -p or --prompt specified multiple times.\n"); - return 1; - } - prompt_arg = argv[++iarg].c_str(); - prompt_set = true; + else if (arg == "--log-disable") { + LOG_DBG("Disabling logging\n"); + config.disable_logging = true; } - else if (arg == "-f" || arg == "--file") { - if (prompt_path_set) { - fprintf(stderr, "Error: -f or --file specified multiple times.\n"); - return 1; - } - prompt_path = argv[++iarg].c_str(); - prompt_path_set = true; + else if (arg == "--show-count") { + LOG_DBG("Enabling token count display\n"); + config.show_token_count = true; } else if (arg == "--stdin") { + if (prompt_path_set || prompt_set || stdin_set) { + LOG_ERR("Multiple prompt sources specified (--stdin, --file, --prompt are mutually exclusive)\n"); + return false; + } + LOG_DBG("Using stdin as prompt source\n"); stdin_set = true; + config.prompt_source = tokenizer_config::PROMPT_SOURCE_STDIN; } - else if (arg == "--log-disable") { - disable_logging = true; + else if ((arg == "-m" || arg == "--model") && i + 1 < argv.size()) { + if (model_path_set) { + LOG_ERR("Model path specified multiple times\n"); + return false; + } + config.model_path = argv[++i].c_str(); + model_path_set = true; + LOG_DBG("Model path set to: %s\n", config.model_path); } - else if (arg == "--show-count") { - show_token_count = true; + else if ((arg == "-f" || arg == "--file") && i + 1 < argv.size()) { + if (prompt_path_set || prompt_set || stdin_set) { + LOG_ERR("Multiple prompt sources specified (--stdin, --file, --prompt are mutually exclusive)\n"); + return false; + } + const std::string filename = argv[++i]; + if (!read_file_to_string(filename.c_str(), config.prompt)) { + return false; + } + prompt_path_set = true; + config.prompt_source = tokenizer_config::PROMPT_SOURCE_FILE; + LOG_DBG("Using file as prompt source: %s\n", filename.c_str()); + } + else if ((arg == "-p" || arg == "--prompt") && i + 1 < argv.size()) { + if (prompt_path_set || prompt_set || stdin_set) { + LOG_ERR("Multiple prompt sources specified (--stdin, --file, --prompt are mutually exclusive)\n"); + return false; + } + config.prompt = argv[++i]; + prompt_set = true; + config.prompt_source = tokenizer_config::PROMPT_SOURCE_ARGUMENT; + LOG_DBG("Using command line argument as prompt\n"); + } + else if (arg == "-m" || arg == "--model" || arg == "-f" || arg == "--file" || arg == "-p" || arg == "--prompt") { + LOG_ERR("Option %s requires an argument\n", arg.c_str()); + return false; } else { - fprintf(stderr, "Error: unknown option '%s'\n", argv[iarg].c_str()); - return 1; + LOG_ERR("Unknown option: %s\n", arg.c_str()); + return false; } } - - ////// - // Sanity check the command line arguments. - ////// - - // Check that we have the required stuff set. - if (model_path_set && model_path == NULL) { - fprintf(stderr, "Error: --model requires an argument.\n"); - return 1; - } + + // Validate required arguments if (!model_path_set) { - fprintf(stderr, "Error: must specify --model.\n"); - return 1; - } - if (prompt_path_set && prompt_path == NULL) { - fprintf(stderr, "Error: --file requires an argument.\n"); - return 1; - } - if (prompt_set && prompt_arg == NULL) { - fprintf(stderr, "Error: --prompt requires an argument.\n"); - return 1; + LOG_ERR("Model path is required (use -m or --model)\n"); + return false; } - const int prompts_set = !!(prompt_path_set) + !!(prompt_set) + !!(stdin_set); - if (prompts_set > 1) { - fprintf(stderr, "Error: --stdin, --file and --prompt are mutually exclusive.\n"); - return 1; + + if (config.prompt_source == tokenizer_config::PROMPT_SOURCE_NONE) { + LOG_ERR("Prompt source is required (use --stdin, --file, or --prompt)\n"); + return false; } - // Must have some prompt. - if (prompts_set == 0) { - fprintf(stderr, "Error: must specify one of: --stdin, --file or --prompt.\n"); - return 1; - } - - GGML_ASSERT(model_path); - GGML_ASSERT(prompt_path || prompt_arg || stdin_set); - - ////// - // Figure out where will the prompt come from. - ////// + + LOG_DBG("Command line arguments parsed successfully\n"); + return true; +} - std::string prompt; - if (prompt_path_set) { - bool success = false; - prompt = read_prompt_from_file(prompt_path, success); - if (!success) { - return 1; - } - } else if (prompt_set) { - prompt = prompt_arg; - } else { - GGML_ASSERT(stdin_set); - // we read stdin *after* loading model (early exit if model cannot - // be loaded, which can be a nicer user experience) +static bool load_prompt_from_source(tokenizer_config & config) { + if (config.prompt_source == tokenizer_config::PROMPT_SOURCE_STDIN) { + return read_stdin_to_string(config.prompt); } + return true; // File and argument sources already loaded during parsing +} - ////// - // Start actually doing the tokenizing stuff. - ////// - +static void setup_logging_system(bool disable_logging) { + // Setup common logging with reasonable defaults + common_log_set_colors(common_log_main(), true); + common_log_set_prefix(common_log_main(), true); + common_log_set_timestamps(common_log_main(), false); + common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG); + if (disable_logging) { - llama_log_set(llama_log_callback_null, NULL); - } - - llama_backend_init(); - - llama_model_params model_params = llama_model_default_params(); - model_params.vocab_only = true; - llama_model * model = llama_model_load_from_file(model_path, model_params); - if (!model) { - fprintf(stderr, "Error: could not load model from file '%s'.\n", model_path); - return 1; - } - - const llama_vocab * vocab = llama_model_get_vocab(model); - - llama_context_params ctx_params = llama_context_default_params(); - llama_context * ctx = llama_init_from_model(model, ctx_params); - if (!ctx) { - fprintf(stderr, "Error: could not create context.\n"); - return 1; + LOG_DBG("Disabling LLAMA backend logging\n"); + llama_log_set([](ggml_log_level, const char *, void *){}, nullptr); + // Reduce verbosity to only show errors + common_log_set_verbosity_thold(-1); } +} - // read entire prompt from stdin? - if (stdin_set) { - GGML_ASSERT(!prompt_path_set && !prompt_set); - - std::stringstream stdin_buffer; - stdin_buffer << std::cin.rdbuf(); - if (std::cin.fail()) { - fprintf(stderr, "Error: could not read the entire standard input.\n"); - return 1; - } - - prompt = stdin_buffer.str(); - } - - const bool model_wants_add_bos = llama_vocab_get_add_bos(vocab); - const bool add_bos = model_wants_add_bos && !no_bos; - const bool parse_special = !no_parse_special; - const bool escape = !no_escape; - - if (escape) { +static bool tokenize_and_print_results(const tokenizer_config & config, llama_model * model, llama_context * ctx) { + LOG_INF("Starting tokenization\n"); + std::string prompt = config.prompt; + + if (!config.no_escape) { + LOG_DBG("Processing escape sequences in prompt\n"); string_process_escapes(prompt); } - - std::vector tokens; - tokens = common_tokenize(vocab, prompt, add_bos, parse_special); - - if (printing_ids) { + + const llama_vocab * vocab = llama_model_get_vocab(model); + const bool add_bos = llama_vocab_get_add_bos(vocab) && !config.no_bos; + const bool parse_special = !config.no_parse_special; + + LOG_DBG("Tokenization settings: add_bos=%s, parse_special=%s\n", + add_bos ? "true" : "false", parse_special ? "true" : "false"); + + const std::vector tokens = common_tokenize(vocab, prompt, add_bos, parse_special); + + LOG_INF("Tokenized %zu characters into %zu tokens\n", prompt.size(), tokens.size()); + + if (config.print_ids) { printf("["); - } - - for (int i = 0; i < (int) tokens.size(); i++) { - if (printing_ids) { + for (size_t i = 0; i < tokens.size(); ++i) { if (i > 0) { printf(", "); } printf("%d", tokens[i]); - } else { + } + printf("]\n"); + } else { + for (size_t i = 0; i < tokens.size(); ++i) { bool invalid_utf8 = false; printf("%6d -> '", tokens[i]); - write_utf8_cstr_to_stdout(common_token_to_piece(ctx, tokens[i]).c_str(), invalid_utf8); + + const std::string token_piece = common_token_to_piece(ctx, tokens[i]); + write_utf8_to_stdout(token_piece.c_str(), invalid_utf8); + if (invalid_utf8) { - printf("' (utf-8 decode failure)\n"); + printf("' (UTF-8 decode failure)\n"); } else { printf("'\n"); } } } - - if (printing_ids) { - printf("]\n"); + + if (config.show_token_count) { + printf("Total number of tokens: %zu\n", tokens.size()); } + + LOG_INF("Tokenization completed successfully\n"); + return true; +} - if (show_token_count) { - printf("Total number of tokens: %zu\n", tokens.size()); +int main(int raw_argc, char ** raw_argv) { + // Initialize logging first + setup_logging_system(false); + + LOG_DBG("Starting tokenizer application\n"); + + // Process command line arguments + const std::vector argv = process_command_line_args(raw_argc, raw_argv); + if (argv.empty()) { + LOG_ERR("Failed to process command line arguments\n"); + return 1; } - // silence valgrind + + tokenizer_config config; + if (!parse_command_line_args(argv, config)) { + if (raw_argc > 0) { + LOG_ERR("Use '%s --help' for usage information\n", raw_argv[0]); + } + return 1; + } + + // Reconfigure logging based on user preferences + if (config.disable_logging) { + setup_logging_system(true); + } + + // Initialize backend + LOG_INF("Initializing LLAMA backend\n"); + llama_backend_init(); + + // Load model + LOG_INF("Loading model from: %s\n", config.model_path); + llama_model_params model_params = llama_model_default_params(); + model_params.vocab_only = true; + + llama_model * model = llama_model_load_from_file(config.model_path, model_params); + if (!model) { + LOG_ERR("Failed to load model from: %s\n", config.model_path); + return 1; + } + + LOG_DBG("Model loaded successfully, creating context\n"); + llama_context_params ctx_params = llama_context_default_params(); + llama_context * ctx = llama_init_from_model(model, ctx_params); + if (!ctx) { + LOG_ERR("Failed to create context from model\n"); + llama_model_free(model); + return 1; + } + + LOG_INF("Model and context initialized successfully\n"); + + // Load prompt (after model loading for better UX - fail fast if model is bad) + if (!load_prompt_from_source(config)) { + llama_free(ctx); + llama_model_free(model); + return 1; + } + + // Tokenize and output + const bool success = tokenize_and_print_results(config, model, ctx); + + // Cleanup + LOG_DBG("Cleaning up resources\n"); llama_free(ctx); llama_model_free(model); - - return 0; -} + + if (success) { + LOG_DBG("Application completed successfully\n"); + return 0; + } else { + LOG_ERR("Application failed\n"); + return 1; + } +} \ No newline at end of file