Skip to content

Commit f9a2717

Browse files
authored
download in batches
1 parent de8ec13 commit f9a2717

File tree

1 file changed

+43
-24
lines changed

1 file changed

+43
-24
lines changed

tests/test-tokenizers-remote.cpp

Lines changed: 43 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
#include <string>
55
#include <fstream>
66
#include <vector>
7-
#include <json.hpp>
7+
8+
#include <nlohmann/json.hpp>
89

910
using json = nlohmann::json;
1011

@@ -109,32 +110,50 @@ int main(void) {
109110
}
110111
}
111112

112-
if (common_download_file_multiple(files, {}, false)) {
113-
std::string dir_sep(1, DIRECTORY_SEPARATOR);
114-
115-
for (auto const & item : files) {
116-
std::string filepath = item.second;
117-
118-
if (string_ends_with(filepath, ".gguf")) {
119-
std::string vocab_inp = filepath + ".inp";
120-
std::string vocab_out = filepath + ".out";
121-
auto matching_inp = std::find_if(files.begin(), files.end(), [&vocab_inp](const auto & p) {
122-
return p.second == vocab_inp;
123-
});
124-
auto matching_out = std::find_if(files.begin(), files.end(), [&vocab_out](const auto & p) {
125-
return p.second == vocab_out;
126-
});
127-
128-
if (matching_inp != files.end() && matching_out != files.end()) {
129-
std::string test_command = "." + dir_sep + "test-tokenizer-0 '" + filepath + "'";
130-
assert(std::system(test_command.c_str()) == 0);
131-
} else {
132-
printf("test-tokenizers-remote: %s found without .inp/out vocab files, skipping...\n", filepath.c_str());
113+
if (!files.empty()) {
114+
bool downloaded = false;
115+
const size_t batch_size = 6;
116+
size_t batches = (files.size() + batch_size - 1) / batch_size;
117+
118+
for (size_t i = 0; i < batches; i++) {
119+
size_t batch_pos = (i * batch_size);
120+
size_t batch_step = batch_pos + batch_size;
121+
auto batch_begin = files.begin() + batch_pos;
122+
auto batch_end = batch_step >= files.size() ? files.end() : files.begin() + batch_step;
123+
std::vector<std::pair<std::string, std::string>> batch(batch_begin, batch_end);
124+
125+
if (!(downloaded = common_download_file_multiple(batch, {}, false))) {
126+
break;
127+
}
128+
}
129+
130+
if (downloaded) {
131+
std::string dir_sep(1, DIRECTORY_SEPARATOR);
132+
133+
for (auto const & item : files) {
134+
std::string filepath = item.second;
135+
136+
if (string_ends_with(filepath, ".gguf")) {
137+
std::string vocab_inp = filepath + ".inp";
138+
std::string vocab_out = filepath + ".out";
139+
auto matching_inp = std::find_if(files.begin(), files.end(), [&vocab_inp](const auto & p) {
140+
return p.second == vocab_inp;
141+
});
142+
auto matching_out = std::find_if(files.begin(), files.end(), [&vocab_out](const auto & p) {
143+
return p.second == vocab_out;
144+
});
145+
146+
if (matching_inp != files.end() && matching_out != files.end()) {
147+
std::string test_command = "." + dir_sep + "test-tokenizer-0 '" + filepath + "'";
148+
assert(std::system(test_command.c_str()) == 0);
149+
} else {
150+
printf("test-tokenizers-remote: %s found without .inp/out vocab files, skipping...\n", filepath.c_str());
151+
}
133152
}
134153
}
154+
} else {
155+
printf("test-tokenizers-remote: failed to download files, unable to perform tests...\n");
135156
}
136-
} else {
137-
printf("test-tokenizers-remote: failed to download files, unable to perform tests...\n");
138157
}
139158
} else {
140159
printf("test-tokenizers-remote: failed to retrieve repository info, unable to perform tests...\n");

0 commit comments

Comments
 (0)