|
4 | 4 | #include <string> |
5 | 5 | #include <fstream> |
6 | 6 | #include <vector> |
7 | | -#include <json.hpp> |
| 7 | + |
| 8 | +#include <nlohmann/json.hpp> |
8 | 9 |
|
9 | 10 | using json = nlohmann::json; |
10 | 11 |
|
@@ -109,32 +110,50 @@ int main(void) { |
109 | 110 | } |
110 | 111 | } |
111 | 112 |
|
112 | | - if (common_download_file_multiple(files, {}, false)) { |
113 | | - std::string dir_sep(1, DIRECTORY_SEPARATOR); |
114 | | - |
115 | | - for (auto const & item : files) { |
116 | | - std::string filepath = item.second; |
117 | | - |
118 | | - if (string_ends_with(filepath, ".gguf")) { |
119 | | - std::string vocab_inp = filepath + ".inp"; |
120 | | - std::string vocab_out = filepath + ".out"; |
121 | | - auto matching_inp = std::find_if(files.begin(), files.end(), [&vocab_inp](const auto & p) { |
122 | | - return p.second == vocab_inp; |
123 | | - }); |
124 | | - auto matching_out = std::find_if(files.begin(), files.end(), [&vocab_out](const auto & p) { |
125 | | - return p.second == vocab_out; |
126 | | - }); |
127 | | - |
128 | | - if (matching_inp != files.end() && matching_out != files.end()) { |
129 | | - std::string test_command = "." + dir_sep + "test-tokenizer-0 '" + filepath + "'"; |
130 | | - assert(std::system(test_command.c_str()) == 0); |
131 | | - } else { |
132 | | - printf("test-tokenizers-remote: %s found without .inp/out vocab files, skipping...\n", filepath.c_str()); |
| 113 | + if (!files.empty()) { |
| 114 | + bool downloaded = false; |
| 115 | + const size_t batch_size = 6; |
| 116 | + size_t batches = (files.size() + batch_size - 1) / batch_size; |
| 117 | + |
| 118 | + for (size_t i = 0; i < batches; i++) { |
| 119 | + size_t batch_pos = (i * batch_size); |
| 120 | + size_t batch_step = batch_pos + batch_size; |
| 121 | + auto batch_begin = files.begin() + batch_pos; |
| 122 | + auto batch_end = batch_step >= files.size() ? files.end() : files.begin() + batch_step; |
| 123 | + std::vector<std::pair<std::string, std::string>> batch(batch_begin, batch_end); |
| 124 | + |
| 125 | + if (!(downloaded = common_download_file_multiple(batch, {}, false))) { |
| 126 | + break; |
| 127 | + } |
| 128 | + } |
| 129 | + |
| 130 | + if (downloaded) { |
| 131 | + std::string dir_sep(1, DIRECTORY_SEPARATOR); |
| 132 | + |
| 133 | + for (auto const & item : files) { |
| 134 | + std::string filepath = item.second; |
| 135 | + |
| 136 | + if (string_ends_with(filepath, ".gguf")) { |
| 137 | + std::string vocab_inp = filepath + ".inp"; |
| 138 | + std::string vocab_out = filepath + ".out"; |
| 139 | + auto matching_inp = std::find_if(files.begin(), files.end(), [&vocab_inp](const auto & p) { |
| 140 | + return p.second == vocab_inp; |
| 141 | + }); |
| 142 | + auto matching_out = std::find_if(files.begin(), files.end(), [&vocab_out](const auto & p) { |
| 143 | + return p.second == vocab_out; |
| 144 | + }); |
| 145 | + |
| 146 | + if (matching_inp != files.end() && matching_out != files.end()) { |
| 147 | + std::string test_command = "." + dir_sep + "test-tokenizer-0 '" + filepath + "'"; |
| 148 | + assert(std::system(test_command.c_str()) == 0); |
| 149 | + } else { |
| 150 | + printf("test-tokenizers-remote: %s found without .inp/out vocab files, skipping...\n", filepath.c_str()); |
| 151 | + } |
133 | 152 | } |
134 | 153 | } |
| 154 | + } else { |
| 155 | + printf("test-tokenizers-remote: failed to download files, unable to perform tests...\n"); |
135 | 156 | } |
136 | | - } else { |
137 | | - printf("test-tokenizers-remote: failed to download files, unable to perform tests...\n"); |
138 | 157 | } |
139 | 158 | } else { |
140 | 159 | printf("test-tokenizers-remote: failed to retrieve repository info, unable to perform tests...\n"); |
|
0 commit comments