diff --git a/tools/tokenize/tokenize.cpp b/tools/tokenize/tokenize.cpp
index 7375759ebe25b..ad866277726cb 100644
--- a/tools/tokenize/tokenize.cpp
+++ b/tools/tokenize/tokenize.cpp
@@ -1,5 +1,5 @@
 #include "common.h"
-//#include "log.h" // TODO: start using log.h
+#include "log.h"
 #include "llama.h"
 
 #include <cstdio>
@@ -7,410 +7,458 @@
 #include <fstream>
 #include <string>
 #include <vector>
-#include <iostream> // TODO: remove me
 
 #if defined(_WIN32)
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
-#include <shellapi.h>   // For CommandLineToArgvW
+#include <shellapi.h>
 #endif
 
+struct tokenizer_config {
+    const char * model_path;
+    std::string  prompt;
+    bool         print_ids;
+    bool         no_bos;
+    bool         no_escape;
+    bool         no_parse_special;
+    bool         disable_logging;
+    bool         show_token_count;
+    
+    enum prompt_source_type {
+        PROMPT_SOURCE_NONE,
+        PROMPT_SOURCE_FILE,
+        PROMPT_SOURCE_ARGUMENT,
+        PROMPT_SOURCE_STDIN
+    } prompt_source;
+    
+    tokenizer_config() : 
+        model_path(nullptr),
+        prompt(""),
+        print_ids(false),
+        no_bos(false),
+        no_escape(false),
+        no_parse_special(false),
+        disable_logging(false),
+        show_token_count(false),
+        prompt_source(PROMPT_SOURCE_NONE) {}
+};
+
 static void print_usage_information(const char * argv0) {
-    printf("usage: %s [options]\n\n", argv0);
-    printf("The tokenize program tokenizes a prompt using a given model,\n");
-    printf("and prints the resulting tokens to standard output.\n\n");
-    printf("It needs a model file, a prompt, and optionally other flags\n");
-    printf("to control the behavior of the tokenizer.\n\n");
-    printf("    The possible options are:\n");
-    printf("\n");
-    printf("    -h, --help                           print this help and exit\n");
-    printf("    -m MODEL_PATH, --model MODEL_PATH    path to model.\n");
-    printf("    --ids                                if given, only print numerical token IDs, and not token strings.\n");
-    printf("                                         The output format looks like [1, 2, 3], i.e. parseable by Python.\n");
-    printf("    -f PROMPT_FNAME, --file PROMPT_FNAME read prompt from a file.\n");
-    printf("    -p PROMPT, --prompt PROMPT           read prompt from the argument.\n");
-    printf("    --stdin                              read prompt from standard input.\n");
-    printf("    --no-bos                             do not ever add a BOS token to the prompt, even if normally the model uses a BOS token.\n");
-    printf("    --no-escape                          do not escape input (such as \\n, \\t, etc.).\n");
-    printf("    --no-parse-special                   do not parse control tokens.\n");
-    printf("    --log-disable                        disable logs. Makes stderr quiet when loading the model.\n");
-    printf("    --show-count                         print the total number of tokens.\n");
+    LOG("Usage: %s [options]\n\n", argv0);
+    LOG("The tokenize program tokenizes a prompt using a given model,\n");
+    LOG("and prints the resulting tokens to standard output.\n\n");
+    LOG("Required:\n");
+    LOG("  -m, --model MODEL_PATH           Path to the model file\n");
+    LOG("  One of: --file, --prompt, --stdin\n\n");
+    LOG("Prompt sources (exactly one required):\n");
+    LOG("  -f, --file FILENAME              Read prompt from file\n");
+    LOG("  -p, --prompt TEXT                Use prompt from command line\n");
+    LOG("  --stdin                          Read prompt from standard input\n\n");
+    LOG("Output options:\n");
+    LOG("  --ids                            Print only token IDs as [1, 2, 3]\n");
+    LOG("  --show-count                     Show total token count\n\n");
+    LOG("Tokenization options:\n");
+    LOG("  --no-bos                         Don't add BOS token\n");
+    LOG("  --no-escape                      Don't process escape sequences (\\n, \\t)\n");
+    LOG("  --no-parse-special               Don't parse special/control tokens\n\n");
+    LOG("Other options:\n");
+    LOG("  --log-disable                    Disable model loading logs\n");
+    LOG("  -h, --help                       Show this help and exit\n");
+    LOG("\nExamples:\n");
+    LOG("  %s -m model.gguf -p \"Hello world\"\n", argv0);
+    LOG("  %s -m model.gguf --file input.txt --ids\n", argv0);
+    LOG("  echo \"Hello\" | %s -m model.gguf --stdin\n", argv0);
 }
 
-static void llama_log_callback_null(ggml_log_level level, const char * text, void * user_data) {
-    (void) level;
-    (void) text;
-    (void) user_data;
+static bool read_file_to_string(const char * filepath, std::string & result) {
+    LOG_DBG("Reading prompt from file: %s\n", filepath);
+    
+    std::ifstream file(filepath, std::ios::binary);
+    if (!file) {
+        LOG_ERR("Cannot open file '%s': %s\n", filepath, strerror(errno));
+        return false;
+    }
+    
+    file.seekg(0, std::ios::end);
+    const size_t file_size = file.tellg();
+    file.seekg(0, std::ios::beg);
+    
+    result.resize(file_size);
+    file.read(&result[0], file_size);
+    
+    if (file.fail()) {
+        LOG_ERR("Error reading file '%s': %s\n", filepath, strerror(errno));
+        return false;
+    }
+    
+    LOG_DBG("Successfully read %zu bytes from file\n", file_size);
+    return true;
 }
 
-static std::string read_prompt_from_file(const char * filepath, bool & success) {
-    success = false;
-
-    std::ifstream in(filepath, std::ios::binary);
-    if (!in) {
-        fprintf(stderr, "%s: could not open file '%s' for reading: %s\n", __func__, filepath, strerror(errno));
-        return std::string();
+static bool read_stdin_to_string(std::string & result) {
+    LOG_DBG("Reading prompt from standard input\n");
+    
+    result.clear();
+    char buffer[4096];
+    
+    while (fgets(buffer, sizeof(buffer), stdin)) {
+        result += buffer;
     }
-    // do not assume the file is seekable (e.g. /dev/stdin)
-    std::stringstream buffer;
-    buffer << in.rdbuf();
-    if (in.fail()) {
-        fprintf(stderr, "%s: could not read the entire file '%s': %s\n", __func__, filepath, strerror(errno));
-        return std::string();
+    
+    if (ferror(stdin)) {
+        LOG_ERR("Error reading from standard input\n");
+        return false;
     }
-
-    success = true;
-    return buffer.str();
+    
+    // Remove trailing newline if present
+    if (!result.empty() && result.back() == '\n') {
+        result.pop_back();
+    }
+    
+    LOG_DBG("Successfully read %zu bytes from stdin\n", result.size());
+    return true;
 }
 
-//
-// Function: ingest_args(...) -> vector<string>
-//
-//  Takes argc and argv arguments, and converts them to a vector of UTF-8 encoded
-//  strings, as an STL vector<string>.
-//
-//  In particular, it handles character encoding shenanigans on Windows.
-//
-// Note: raw_argc and raw_argv are not actually read at all on Windows.
-//       On Windows we call GetCommandLineW to get the arguments in wchar_t
-//       format, ignoring the regular argc/argv arguments to main().
-//
-// TODO: potential opportunity to roll common stuff into common/console.cpp
-//       in relation to Windows wchar_t shenanigans.
-static std::vector<std::string> ingest_args(int raw_argc, char ** raw_argv) {
+static std::vector<std::string> process_command_line_args(int raw_argc, char ** raw_argv) {
+    LOG_DBG("Processing %d command line arguments\n", raw_argc);
     std::vector<std::string> argv;
-
-    // Handle Windows, if given non-ASCII arguments.
-    // We convert wchar_t arguments into UTF-8 char* on this platform.
-    // Lets you invoke 'tokenize' on Windows cmd.exe with non-ASCII characters
-    // without throwing tantrums.
+    
 #if defined(_WIN32)
     int argc;
-    const LPWSTR cmdline_wargv = GetCommandLineW();
-    LPWSTR * wargv = CommandLineToArgvW(cmdline_wargv, &argc);
-
-    // silence unused arg warnings
-    (void) raw_argc;
-    (void) raw_argv;
-
+    LPWSTR * wargv = CommandLineToArgvW(GetCommandLineW(), &argc);
+    if (!wargv) {
+        LOG_ERR("Failed to process command line arguments on Windows\n");
+        return argv;
+    }
+    
     for (int i = 0; i < argc; ++i) {
-        int length_needed = WideCharToMultiByte(CP_UTF8, 0, wargv[i], wcslen(wargv[i]), 0, 0, NULL, NULL);
-        char * output_buf = (char *) calloc(length_needed+1, sizeof(char));
-        GGML_ASSERT(output_buf);
-
-        WideCharToMultiByte(CP_UTF8, 0, wargv[i], wcslen(wargv[i]), output_buf, length_needed, NULL, NULL);
-        output_buf[length_needed] = '\0';
-
-        argv.push_back(output_buf);
-        free(output_buf);
+        const int length_needed = WideCharToMultiByte(CP_UTF8, 0, wargv[i], -1, nullptr, 0, nullptr, nullptr);
+        if (length_needed <= 0) {
+            LocalFree(wargv);
+            LOG_ERR("Failed to convert Windows command line argument to UTF-8\n");
+            return argv;
+        }
+        
+        std::vector<char> buffer(length_needed);
+        WideCharToMultiByte(CP_UTF8, 0, wargv[i], -1, &buffer[0], length_needed, nullptr, nullptr);
+        argv.push_back(std::string(&buffer[0]));
     }
-
-    LocalFree((HLOCAL) wargv);
+    
+    LocalFree(wargv);
 #else
-    int argc = raw_argc;
-    for (int i = 0; i < argc; ++i) {
-        argv.push_back(raw_argv[i]);
+    for (int i = 0; i < raw_argc; ++i) {
+        argv.push_back(std::string(raw_argv[i]));
     }
 #endif
-
-    GGML_ASSERT((unsigned int) argc == argv.size());
-
+    
+    LOG_DBG("Processed %zu arguments\n", argv.size());
     return argv;
 }
 
-//
-// Function: write_utf8_cstr_to_stdout(const char *) -> <writes to stdout>
-//
-// writes a string to standard output; taking into account that on Windows
-// to display correctly you have to use special handling. Works even if the
-// user has not set a unicode code page on a Windows cmd.exe.
-//
-// In case of invalid UTF-8, invalid_utf8 is set to true on Windows, and something
-// a human-readable is written instead.
-//
-// On non-Windows systems, simply printfs() the string.
-static void write_utf8_cstr_to_stdout(const char * str, bool & invalid_utf8) {
-        invalid_utf8 = false;
-
+static void write_utf8_to_stdout(const char * str, bool & invalid_utf8) {
+    invalid_utf8 = false;
+    
 #if defined(_WIN32)
-        // Are we in a console?
-        HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
-        DWORD dwMode = 0;
-
-        // According to Microsoft docs:
-        // "WriteConsole fails if it is used with a standard handle that is redirected to a file."
-        // Also according to the docs, you can use GetConsoleMode to check for that.
-        if (hConsole == INVALID_HANDLE_VALUE || !GetConsoleMode(hConsole, &dwMode)) {
-            printf("%s", str);
-            return;
-        }
-
-        // MultiByteToWideChar reports an error if str is empty, don't report
-        // them as invalid_utf8.
-        if (*str == 0) {
-            return;
-        }
-        int length_needed = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, str, strlen(str), NULL, 0);
-        if (length_needed == 0) {
-            DWORD err = GetLastError();
-            if (err == ERROR_NO_UNICODE_TRANSLATION) {
-                invalid_utf8 = true;
-                int len = strlen(str);
-                printf("<");
-                for (int i = 0; i < len; ++i) {
-                    if (i > 0) {
-                        printf(" ");
-                    }
-                    printf("%02x", (uint8_t) str[i]);
+    const HANDLE console = GetStdHandle(STD_OUTPUT_HANDLE);
+    DWORD mode;
+    
+    if (console == INVALID_HANDLE_VALUE || !GetConsoleMode(console, &mode)) {
+        printf("%s", str);
+        return;
+    }
+    
+    if (*str == '\0') {
+        return;
+    }
+    
+    const int str_len = strlen(str);
+    const int wide_length = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, str, str_len, nullptr, 0);
+    if (wide_length == 0) {
+        const DWORD error = GetLastError();
+        if (error == ERROR_NO_UNICODE_TRANSLATION) {
+            invalid_utf8 = true;
+            printf("<");
+            for (int i = 0; i < str_len; ++i) {
+                if (i > 0) {
+                    printf(" ");
                 }
-                printf(">");
-                return;
+                printf("%02x", (uint8_t) str[i]);
             }
-            GGML_ABORT("MultiByteToWideChar() failed in an unexpected way.");
+            printf(">");
+            return;
         }
-
-        LPWSTR wstr = (LPWSTR) calloc(length_needed+1, sizeof(*wstr));
-        GGML_ASSERT(wstr);
-
-        MultiByteToWideChar(CP_UTF8, 0, str, strlen(str), wstr, length_needed);
-        WriteConsoleW(hConsole, wstr, length_needed, NULL, NULL);
-
-        free(wstr);
+        LOG_ERR("Unexpected error in UTF-8 to wide char conversion\n");
+        return;
+    }
+    
+    std::vector<wchar_t> wide_str(wide_length);
+    MultiByteToWideChar(CP_UTF8, 0, str, str_len, &wide_str[0], wide_length);
+    
+    DWORD written;
+    WriteConsoleW(console, &wide_str[0], wide_length, &written, nullptr);
 #else
-        // TODO: reporting invalid_utf8 would be useful on non-Windows too.
-        // printf will silently just write bad unicode.
-        printf("%s", str);
+    printf("%s", str);
 #endif
 }
 
-int main(int raw_argc, char ** raw_argv) {
-    const std::vector<std::string> argv = ingest_args(raw_argc, raw_argv);
-    const int argc = argv.size();
-
-    if (argc <= 1) {
-        print_usage_information(argv[0].c_str());
-        return 1;
+static bool parse_command_line_args(const std::vector<std::string> & argv, tokenizer_config & config) {
+    LOG_DBG("Parsing %zu command line arguments\n", argv.size());
+    
+    if (argv.size() <= 1) {
+        LOG_ERR("No arguments provided\n");
+        return false;
     }
-
-    //////
-    // Read out all the command line arguments.
-    //////
-
-    // variables where to put any arguments we see.
-    bool printing_ids = false;
-    bool no_bos = false;
-    bool no_escape = false;
-    bool no_parse_special = false;
-    bool disable_logging = false;
-    bool show_token_count = false;
-    const char * model_path = NULL;
-    const char * prompt_path = NULL;
-    const char * prompt_arg = NULL;
-
-    // track which arguments were explicitly given
-    // used for sanity checking down the line
+    
     bool model_path_set = false;
     bool prompt_path_set = false;
     bool prompt_set = false;
     bool stdin_set = false;
-
-    int iarg = 1;
-    for (; iarg < argc; ++iarg) {
-        std::string arg{argv[iarg]};
+    
+    for (size_t i = 1; i < argv.size(); ++i) {
+        const std::string & arg = argv[i];
+        LOG_DBG("Processing argument: %s\n", arg.c_str());
+        
         if (arg == "-h" || arg == "--help") {
             print_usage_information(argv[0].c_str());
-            return 0;
+            return false;
         }
         else if (arg == "--ids") {
-            printing_ids = true;
-        }
-        else if (arg == "-m" || arg == "--model") {
-            if (model_path_set) {
-                fprintf(stderr, "Error: -m or --model specified multiple times.\n");
-                return 1;
-            }
-            model_path = argv[++iarg].c_str();
-            model_path_set = true;
+            LOG_DBG("Enabling ID-only output\n");
+            config.print_ids = true;
         }
         else if (arg == "--no-bos") {
-            no_bos = true;
+            LOG_DBG("Disabling BOS token\n");
+            config.no_bos = true;
         }
         else if (arg == "--no-escape") {
-            no_escape = true;
+            LOG_DBG("Disabling escape sequence processing\n");
+            config.no_escape = true;
         }
         else if (arg == "--no-parse-special") {
-            no_parse_special = true;
+            LOG_DBG("Disabling special token parsing\n");
+            config.no_parse_special = true;
         }
-        else if (arg == "-p" || arg == "--prompt") {
-            if (prompt_set) {
-                fprintf(stderr, "Error: -p or --prompt specified multiple times.\n");
-                return 1;
-            }
-            prompt_arg = argv[++iarg].c_str();
-            prompt_set = true;
+        else if (arg == "--log-disable") {
+            LOG_DBG("Disabling logging\n");
+            config.disable_logging = true;
         }
-        else if (arg == "-f" || arg == "--file") {
-            if (prompt_path_set) {
-                fprintf(stderr, "Error: -f or --file specified multiple times.\n");
-                return 1;
-            }
-            prompt_path = argv[++iarg].c_str();
-            prompt_path_set = true;
+        else if (arg == "--show-count") {
+            LOG_DBG("Enabling token count display\n");
+            config.show_token_count = true;
         }
         else if (arg == "--stdin") {
+            if (prompt_path_set || prompt_set || stdin_set) {
+                LOG_ERR("Multiple prompt sources specified (--stdin, --file, --prompt are mutually exclusive)\n");
+                return false;
+            }
+            LOG_DBG("Using stdin as prompt source\n");
             stdin_set = true;
+            config.prompt_source = tokenizer_config::PROMPT_SOURCE_STDIN;
         }
-        else if (arg == "--log-disable") {
-            disable_logging = true;
+        else if ((arg == "-m" || arg == "--model") && i + 1 < argv.size()) {
+            if (model_path_set) {
+                LOG_ERR("Model path specified multiple times\n");
+                return false;
+            }
+            config.model_path = argv[++i].c_str();
+            model_path_set = true;
+            LOG_DBG("Model path set to: %s\n", config.model_path);
         }
-        else if (arg == "--show-count") {
-            show_token_count = true;
+        else if ((arg == "-f" || arg == "--file") && i + 1 < argv.size()) {
+            if (prompt_path_set || prompt_set || stdin_set) {
+                LOG_ERR("Multiple prompt sources specified (--stdin, --file, --prompt are mutually exclusive)\n");
+                return false;
+            }
+            const std::string filename = argv[++i];
+            if (!read_file_to_string(filename.c_str(), config.prompt)) {
+                return false;
+            }
+            prompt_path_set = true;
+            config.prompt_source = tokenizer_config::PROMPT_SOURCE_FILE;
+            LOG_DBG("Using file as prompt source: %s\n", filename.c_str());
+        }
+        else if ((arg == "-p" || arg == "--prompt") && i + 1 < argv.size()) {
+            if (prompt_path_set || prompt_set || stdin_set) {
+                LOG_ERR("Multiple prompt sources specified (--stdin, --file, --prompt are mutually exclusive)\n");
+                return false;
+            }
+            config.prompt = argv[++i];
+            prompt_set = true;
+            config.prompt_source = tokenizer_config::PROMPT_SOURCE_ARGUMENT;
+            LOG_DBG("Using command line argument as prompt\n");
+        }
+        else if (arg == "-m" || arg == "--model" || arg == "-f" || arg == "--file" || arg == "-p" || arg == "--prompt") {
+            LOG_ERR("Option %s requires an argument\n", arg.c_str());
+            return false;
         }
         else {
-            fprintf(stderr, "Error: unknown option '%s'\n", argv[iarg].c_str());
-            return 1;
+            LOG_ERR("Unknown option: %s\n", arg.c_str());
+            return false;
         }
     }
-
-    //////
-    // Sanity check the command line arguments.
-    //////
-
-    // Check that we have the required stuff set.
-    if (model_path_set && model_path == NULL) {
-        fprintf(stderr, "Error: --model requires an argument.\n");
-        return 1;
-    }
+    
+    // Validate required arguments
     if (!model_path_set) {
-        fprintf(stderr, "Error: must specify --model.\n");
-        return 1;
-    }
-    if (prompt_path_set && prompt_path == NULL) {
-        fprintf(stderr, "Error: --file requires an argument.\n");
-        return 1;
-    }
-    if (prompt_set && prompt_arg == NULL) {
-        fprintf(stderr, "Error: --prompt requires an argument.\n");
-        return 1;
+        LOG_ERR("Model path is required (use -m or --model)\n");
+        return false;
     }
-    const int prompts_set = !!(prompt_path_set) + !!(prompt_set) + !!(stdin_set);
-    if (prompts_set > 1) {
-        fprintf(stderr, "Error: --stdin, --file and --prompt are mutually exclusive.\n");
-        return 1;
+    
+    if (config.prompt_source == tokenizer_config::PROMPT_SOURCE_NONE) {
+        LOG_ERR("Prompt source is required (use --stdin, --file, or --prompt)\n");
+        return false;
     }
-    // Must have some prompt.
-    if (prompts_set == 0) {
-        fprintf(stderr, "Error: must specify one of: --stdin, --file or --prompt.\n");
-        return 1;
-    }
-
-    GGML_ASSERT(model_path);
-    GGML_ASSERT(prompt_path || prompt_arg || stdin_set);
-
-    //////
-    // Figure out where will the prompt come from.
-    //////
+    
+    LOG_DBG("Command line arguments parsed successfully\n");
+    return true;
+}
 
-    std::string prompt;
-    if (prompt_path_set) {
-        bool success = false;
-        prompt = read_prompt_from_file(prompt_path, success);
-        if (!success) {
-            return 1;
-        }
-    } else if (prompt_set) {
-        prompt = prompt_arg;
-    } else {
-        GGML_ASSERT(stdin_set);
-        // we read stdin *after* loading model (early exit if model cannot
-        // be loaded, which can be a nicer user experience)
+static bool load_prompt_from_source(tokenizer_config & config) {
+    if (config.prompt_source == tokenizer_config::PROMPT_SOURCE_STDIN) {
+        return read_stdin_to_string(config.prompt);
     }
+    return true; // File and argument sources already loaded during parsing
+}
 
-    //////
-    // Start actually doing the tokenizing stuff.
-    //////
-
+static void setup_logging_system(bool disable_logging) {
+    // Setup common logging with reasonable defaults
+    common_log_set_colors(common_log_main(), true);
+    common_log_set_prefix(common_log_main(), true);
+    common_log_set_timestamps(common_log_main(), false);
+    common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
+    
     if (disable_logging) {
-        llama_log_set(llama_log_callback_null, NULL);
-    }
-
-    llama_backend_init();
-
-    llama_model_params model_params = llama_model_default_params();
-    model_params.vocab_only = true;
-    llama_model * model = llama_model_load_from_file(model_path, model_params);
-    if (!model) {
-        fprintf(stderr, "Error: could not load model from file '%s'.\n", model_path);
-        return 1;
-    }
-
-    const llama_vocab * vocab = llama_model_get_vocab(model);
-
-    llama_context_params ctx_params = llama_context_default_params();
-    llama_context * ctx = llama_init_from_model(model, ctx_params);
-    if (!ctx) {
-        fprintf(stderr, "Error: could not create context.\n");
-        return 1;
+        LOG_DBG("Disabling LLAMA backend logging\n");
+        llama_log_set([](ggml_log_level, const char *, void *){}, nullptr);
+        // Reduce verbosity to only show errors
+        common_log_set_verbosity_thold(-1);
     }
+}
 
-    // read entire prompt from stdin?
-    if (stdin_set) {
-        GGML_ASSERT(!prompt_path_set && !prompt_set);
-
-        std::stringstream stdin_buffer;
-        stdin_buffer << std::cin.rdbuf();
-        if (std::cin.fail()) {
-            fprintf(stderr, "Error: could not read the entire standard input.\n");
-            return 1;
-        }
-
-        prompt = stdin_buffer.str();
-    }
-
-    const bool model_wants_add_bos = llama_vocab_get_add_bos(vocab);
-    const bool add_bos = model_wants_add_bos && !no_bos;
-    const bool parse_special = !no_parse_special;
-    const bool escape = !no_escape;
-
-    if (escape) {
+static bool tokenize_and_print_results(const tokenizer_config & config, llama_model * model, llama_context * ctx) {
+    LOG_INF("Starting tokenization\n");
+    std::string prompt = config.prompt;
+    
+    if (!config.no_escape) {
+        LOG_DBG("Processing escape sequences in prompt\n");
         string_process_escapes(prompt);
     }
-
-    std::vector<llama_token> tokens;
-    tokens = common_tokenize(vocab, prompt, add_bos, parse_special);
-
-    if (printing_ids) {
+    
+    const llama_vocab * vocab = llama_model_get_vocab(model);
+    const bool add_bos = llama_vocab_get_add_bos(vocab) && !config.no_bos;
+    const bool parse_special = !config.no_parse_special;
+    
+    LOG_DBG("Tokenization settings: add_bos=%s, parse_special=%s\n", 
+            add_bos ? "true" : "false", parse_special ? "true" : "false");
+    
+    const std::vector<llama_token> tokens = common_tokenize(vocab, prompt, add_bos, parse_special);
+    
+    LOG_INF("Tokenized %zu characters into %zu tokens\n", prompt.size(), tokens.size());
+    
+    if (config.print_ids) {
         printf("[");
-    }
-
-    for (int i = 0; i < (int) tokens.size(); i++) {
-        if (printing_ids) {
+        for (size_t i = 0; i < tokens.size(); ++i) {
             if (i > 0) {
                 printf(", ");
             }
             printf("%d", tokens[i]);
-        } else {
+        }
+        printf("]\n");
+    } else {
+        for (size_t i = 0; i < tokens.size(); ++i) {
             bool invalid_utf8 = false;
             printf("%6d -> '", tokens[i]);
-            write_utf8_cstr_to_stdout(common_token_to_piece(ctx, tokens[i]).c_str(), invalid_utf8);
+            
+            const std::string token_piece = common_token_to_piece(ctx, tokens[i]);
+            write_utf8_to_stdout(token_piece.c_str(), invalid_utf8);
+            
             if (invalid_utf8) {
-                printf("' (utf-8 decode failure)\n");
+                printf("' (UTF-8 decode failure)\n");
             } else {
                 printf("'\n");
             }
         }
     }
-
-    if (printing_ids) {
-        printf("]\n");
+    
+    if (config.show_token_count) {
+        printf("Total number of tokens: %zu\n", tokens.size());
     }
+    
+    LOG_INF("Tokenization completed successfully\n");
+    return true;
+}
 
-    if (show_token_count) {
-        printf("Total number of tokens: %zu\n", tokens.size());
+int main(int raw_argc, char ** raw_argv) {
+    // Initialize logging first
+    setup_logging_system(false);
+    
+    LOG_DBG("Starting tokenizer application\n");
+    
+    // Process command line arguments
+    const std::vector<std::string> argv = process_command_line_args(raw_argc, raw_argv);
+    if (argv.empty()) {
+        LOG_ERR("Failed to process command line arguments\n");
+        return 1;
     }
-    // silence valgrind
+    
+    tokenizer_config config;
+    if (!parse_command_line_args(argv, config)) {
+        if (raw_argc > 0) {
+            LOG_ERR("Use '%s --help' for usage information\n", raw_argv[0]);
+        }
+        return 1;
+    }
+    
+    // Reconfigure logging based on user preferences
+    if (config.disable_logging) {
+        setup_logging_system(true);
+    }
+    
+    // Initialize backend
+    LOG_INF("Initializing LLAMA backend\n");
+    llama_backend_init();
+    
+    // Load model
+    LOG_INF("Loading model from: %s\n", config.model_path);
+    llama_model_params model_params = llama_model_default_params();
+    model_params.vocab_only = true;
+    
+    llama_model * model = llama_model_load_from_file(config.model_path, model_params);
+    if (!model) {
+        LOG_ERR("Failed to load model from: %s\n", config.model_path);
+        return 1;
+    }
+    
+    LOG_DBG("Model loaded successfully, creating context\n");
+    llama_context_params ctx_params = llama_context_default_params();
+    llama_context * ctx = llama_init_from_model(model, ctx_params);
+    if (!ctx) {
+        LOG_ERR("Failed to create context from model\n");
+        llama_model_free(model);
+        return 1;
+    }
+    
+    LOG_INF("Model and context initialized successfully\n");
+    
+    // Load prompt (after model loading for better UX - fail fast if model is bad)
+    if (!load_prompt_from_source(config)) {
+        llama_free(ctx);
+        llama_model_free(model);
+        return 1;
+    }
+    
+    // Tokenize and output
+    const bool success = tokenize_and_print_results(config, model, ctx);
+    
+    // Cleanup
+    LOG_DBG("Cleaning up resources\n");
     llama_free(ctx);
     llama_model_free(model);
-
-    return 0;
-}
+    
+    if (success) {
+        LOG_DBG("Application completed successfully\n");
+        return 0;
+    } else {
+        LOG_ERR("Application failed\n");
+        return 1;
+    }
+}
\ No newline at end of file