Improve progress bar

ericcurtin · ericcurtin · commit 47aaa78d7ba5 · 2024-12-14T11:33:43.000Z
Set default width to whatever the terminal is. Also fixed a small bug around
default n_gpu_layers value.

Signed-off-by: Eric Curtin &lt;ecurtin@redhat.com&gt;
diff --git a/examples/run/run.cpp b/examples/run/run.cpp
@@ -1,6 +1,7 @@
 #if defined(_WIN32)
 #    include <windows.h>
 #else
+#    include <sys/ioctl.h>
 #    include <unistd.h>
 #endif
 
@@ -70,7 +71,7 @@ class Opt {
             ")\n"
             "  -n, --ngl <value>\n"
             "      Number of GPU layers (default: " +
-            std::to_string(ngl_);
+            std::to_string(llama_model_default_params().n_gpu_layers);
         help_str_ +=
             ")\n"
             "  -h, --help\n"
@@ -96,8 +97,8 @@ class Opt {
             "  llama-run https://example.com/some-file1.gguf\n"
             "  llama-run some-file2.gguf\n"
             "  llama-run file://some-file3.gguf\n"
-            "  llama-run --ngl 99 some-file4.gguf\n"
-            "  llama-run --ngl 99 some-file5.gguf Hello World\n";
+            "  llama-run --ngl 999 some-file4.gguf\n"
+            "  llama-run --ngl 999 some-file5.gguf Hello World\n";
     }
 
     int parse(int argc, const char ** argv) {
@@ -119,6 +120,10 @@ class Opt {
                 help_ = true;
                 return 0;
             } else if (!positional_args_i) {
+                if (!argv[i][0] || argv[i][1] == '-') {
+                    return 1;
+                }
+
                 ++positional_args_i;
                 model_ = argv[i];
             } else if (positional_args_i == 1) {
@@ -151,6 +156,18 @@ struct FileDeleter {
 
 typedef std::unique_ptr<FILE, FileDeleter> FILE_ptr;
 
+static int get_terminal_width() {
+#if defined(_WIN32)
+    CONSOLE_SCREEN_BUFFER_INFO csbi;
+    GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi);
+    return csbi.srWindow.Right - csbi.srWindow.Left + 1;
+#else
+    struct winsize w;
+    ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
+    return w.ws_col;
+#endif
+}
+
 #ifdef LLAMA_USE_CURL
 class CurlWrapper {
   public:
@@ -270,9 +287,9 @@ class CurlWrapper {
 
     static std::string human_readable_size(curl_off_t size) {
         static const char * suffix[] = { "B", "KB", "MB", "GB", "TB" };
-        char         length   = sizeof(suffix) / sizeof(suffix[0]);
-        int          i        = 0;
-        double       dbl_size = size;
+        char                length   = sizeof(suffix) / sizeof(suffix[0]);
+        int                 i        = 0;
+        double              dbl_size = size;
         if (size > 1024) {
             for (i = 0; (size / 1024) > 0 && i < length - 1; i++, size /= 1024) {
                 dbl_size = size / 1024.0;
@@ -293,27 +310,75 @@ class CurlWrapper {
 
         total_to_download += data->file_size;
         const curl_off_t now_downloaded_plus_file_size = now_downloaded + data->file_size;
-        const curl_off_t percentage                    = (now_downloaded_plus_file_size * 100) / total_to_download;
-        const curl_off_t pos                           = (percentage / 5);
+        const curl_off_t percentage      = calculate_percentage(now_downloaded_plus_file_size, total_to_download);
+        std::string      progress_prefix = generate_progress_prefix(percentage);
+
+        const double speed = calculate_speed(now_downloaded, data->start_time);
+        const double time  = (total_to_download - now_downloaded) / speed;
+        std::string  progress_suffix =
+            generate_progress_suffix(now_downloaded_plus_file_size, total_to_download, speed, time);
+
+        int         progress_bar_width = calculate_progress_bar_width(progress_prefix, progress_suffix);
         std::string progress_bar;
-        for (int i = 0; i < 20; ++i) {
-            progress_bar.append((i < pos) ? "█" : " ");
-        }
+        generate_progress_bar(progress_bar_width, percentage, progress_bar);
 
-        // Calculate download speed and estimated time to completion
-        const auto                          now             = std::chrono::steady_clock::now();
-        const std::chrono::duration<double> elapsed_seconds = now - data->start_time;
-        const double                        speed           = now_downloaded / elapsed_seconds.count();
-        const double                        estimated_time  = (total_to_download - now_downloaded) / speed;
-        printe("\r%ld%% |%s| %s/%s  %.2f MB/s  %s      ", percentage, progress_bar.c_str(),
-               human_readable_size(now_downloaded).c_str(), human_readable_size(total_to_download).c_str(),
-               speed / (1024 * 1024), human_readable_time(estimated_time).c_str());
-        fflush(stderr);
+        print_progress(progress_prefix, progress_bar, progress_suffix);
         data->printed = true;
 
         return 0;
     }
 
+    static curl_off_t calculate_percentage(curl_off_t now_downloaded_plus_file_size, curl_off_t total_to_download) {
+        return (now_downloaded_plus_file_size * 100) / total_to_download;
+    }
+
+    static std::string generate_progress_prefix(curl_off_t percentage) {
+        std::ostringstream progress_output;
+        progress_output << percentage << "% |";
+        return progress_output.str();
+    }
+
+    static double calculate_speed(curl_off_t now_downloaded, const std::chrono::steady_clock::time_point & start_time) {
+        const auto                          now             = std::chrono::steady_clock::now();
+        const std::chrono::duration<double> elapsed_seconds = now - start_time;
+        return now_downloaded / elapsed_seconds.count();
+    }
+
+    static std::string generate_progress_suffix(curl_off_t now_downloaded_plus_file_size, curl_off_t total_to_download,
+                                                double speed, double estimated_time) {
+        std::ostringstream progress_output;
+        progress_output << human_readable_size(now_downloaded_plus_file_size).c_str() << "/"
+                        << human_readable_size(total_to_download).c_str() << " " << std::fixed << std::setprecision(2)
+                        << speed / (1024 * 1024) << " MB/s " << human_readable_time(estimated_time).c_str();
+        return progress_output.str();
+    }
+
+    static int calculate_progress_bar_width(const std::string & progress_prefix, const std::string & progress_suffix) {
+        int progress_bar_width = get_terminal_width() - progress_prefix.size() - progress_suffix.size() - 5;
+        if (progress_bar_width < 10) {
+            progress_bar_width = 10;
+        }
+        return progress_bar_width;
+    }
+
+    static std::string generate_progress_bar(int progress_bar_width, curl_off_t percentage,
+                                             std::string & progress_bar) {
+        const curl_off_t pos = (percentage * progress_bar_width) / 100;
+        for (int i = 0; i < progress_bar_width; ++i) {
+            progress_bar.append((i < pos) ? "█" : " ");
+        }
+
+        return progress_bar;
+    }
+
+    static void print_progress(const std::string & progress_prefix, const std::string & progress_bar,
+                               const std::string & progress_suffix) {
+        std::ostringstream progress_output;
+        progress_output << progress_prefix << progress_bar << "| " << progress_suffix;
+        printe("\r%*s\r%s", get_terminal_width(), " ", progress_output.str().c_str());
+        fflush(stderr);
+    }
+
     // Function to write data to a file
     static size_t write_data(void * ptr, size_t size, size_t nmemb, void * stream) {
         FILE * out = static_cast<FILE *>(stream);
@@ -467,6 +532,7 @@ class LlamaData {
         llama_model_params model_params = llama_model_default_params();
         model_params.n_gpu_layers       = opt.ngl_ >= 0 ? opt.ngl_ : model_params.n_gpu_layers;
         resolve_model(opt.model_);
+        printe("Loading model");
         llama_model_ptr model(llama_load_model_from_file(opt.model_.c_str(), model_params));
         if (!model) {
             printe("%s: error: unable to load model from file: %s\n", __func__, opt.model_.c_str());
@@ -642,8 +708,9 @@ static int handle_user_input(std::string & user_input, const std::string & user_
     }
 
     printf(
-        "\r                                                                       "
-        "\r\033[32m> \033[0m");
+        "\r%*s"
+        "\r\033[32m> \033[0m",
+        get_terminal_width(), " ");
     return read_user_input(user_input);  // Returns true if input ends the loop
 }