Skip to content

Commit d8eaa26

Browse files
authored
tests : fix test-thread-safety when compiling with multiple backends (#16699)
* run one test per backend/device (even if it's the same device)
1 parent 9285325 commit d8eaa26

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

tests/test-thread-safety.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// - Creates n_parallel (--parallel) contexts per model
44
// - Runs inference in parallel on each context
55

6+
#include <array>
67
#include <thread>
78
#include <vector>
89
#include <atomic>
@@ -38,13 +39,14 @@ int main(int argc, char ** argv) {
3839
cparams.n_seq_max = 1;
3940

4041
int dev_count = ggml_backend_dev_count();
41-
int gpu_dev_count = 0;
42+
std::vector<std::array<ggml_backend_dev_t, 2>> gpus;
4243
for (int i = 0; i < dev_count; ++i) {
4344
auto * dev = ggml_backend_dev_get(i);
4445
if (dev && ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) {
45-
gpu_dev_count++;
46+
gpus.push_back({dev, nullptr});
4647
}
4748
}
49+
const int gpu_dev_count = (int)gpus.size();
4850
const int num_models = gpu_dev_count + 1 + 1; // GPUs + 1 CPU model + 1 layer split
4951
//const int num_models = std::max(1, gpu_dev_count);
5052
const int num_contexts = std::max(1, params.n_parallel);
@@ -58,12 +60,12 @@ int main(int argc, char ** argv) {
5860

5961
if (m < gpu_dev_count) {
6062
mparams.split_mode = LLAMA_SPLIT_MODE_NONE;
61-
mparams.main_gpu = m;
63+
mparams.devices = gpus[m].data();
6264
} else if (m == gpu_dev_count) {
6365
mparams.split_mode = LLAMA_SPLIT_MODE_NONE;
6466
mparams.main_gpu = -1; // CPU model
6567
} else {
66-
mparams.split_mode = LLAMA_SPLIT_MODE_LAYER;;
68+
mparams.split_mode = LLAMA_SPLIT_MODE_LAYER;
6769
}
6870

6971
llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);

0 commit comments

Comments
 (0)