diff --git a/taskvine/src/manager/vine_manager.c b/taskvine/src/manager/vine_manager.c index e4ce14d328..039e058f87 100644 --- a/taskvine/src/manager/vine_manager.c +++ b/taskvine/src/manager/vine_manager.c @@ -935,6 +935,49 @@ static void cleanup_worker_files(struct vine_manager *q, struct vine_worker_info hash_table_free_keys_array(cachenames); } +/** Evict a random worker to simulate a failure. */ +int vine_manager_evict_a_random_worker(struct vine_manager *q) +{ + if (!q) { + return 0; + } + + if (hash_table_size(q->worker_table) == 0) { + return 0; + } + + int removed = 0; + + /* collect removable workers */ + struct list *candidates_list = list_create(); + char *key; + struct vine_worker_info *w; + HASH_TABLE_ITERATE(q->worker_table, key, w) + { + list_push_tail(candidates_list, w); + } + + /* release a random worker if any */ + int random_number = random_int64(); + if (random_number < 0) { + random_number = -random_number; + } + int index = (int)(random_number % list_size(candidates_list)); + int i = 0; + while ((w = list_pop_head(candidates_list))) { + if (i++ == index) { + /* evict this worker */ + debug(D_VINE | D_NOTICE, "Intentionally evicting worker %s", w->hostname); + release_worker(q, w); + removed = 1; + break; + } + } + + list_delete(candidates_list); + return removed; +} + /* This function enforces a target worker eviction rate (1 every X seconds). If the observed eviction interval is shorter than the desired one, we randomly evict one worker @@ -973,32 +1016,8 @@ static int enforce_worker_eviction_interval(struct vine_manager *q) return 0; } - /* collect removable workers */ - struct list *candidates_list = list_create(); - char *key; - struct vine_worker_info *w; - HASH_TABLE_ITERATE(q->worker_table, key, w) - { - if (w->type != VINE_WORKER_TYPE_WORKER) { - continue; - } - list_push_tail(candidates_list, w); - } - - /* release a random worker if any */ - int index = (int)(random_int64() % list_size(candidates_list)); - int i = 0; - while ((w = list_pop_head(candidates_list))) { - if (i++ == index) { - /* evict this worker */ - debug(D_VINE | D_NOTICE, "Intentionally evicting worker %s", w->hostname); - release_worker(q, w); - break; - } - } - list_delete(candidates_list); - - return 1; + /* evict a random worker if any */ + return vine_manager_evict_a_random_worker(q); } /* Remove all tasks and other associated state from a given worker. */ diff --git a/taskvine/src/manager/vine_manager.h b/taskvine/src/manager/vine_manager.h index bcf2405616..91cf80c620 100644 --- a/taskvine/src/manager/vine_manager.h +++ b/taskvine/src/manager/vine_manager.h @@ -291,6 +291,9 @@ void vine_manager_remove_worker(struct vine_manager *q, struct vine_worker_info /* Check if the worker is able to transfer the necessary files for this task. */ int vine_manager_transfer_capacity_available(struct vine_manager *q, struct vine_worker_info *w, struct vine_task *t); +/** Evict a random worker to simulate a failure. */ +int vine_manager_evict_a_random_worker(struct vine_manager *q); + /* The expected format of files created by the resource monitor.*/ #define RESOURCE_MONITOR_TASK_LOCAL_NAME "vine-task-%d" #define RESOURCE_MONITOR_REMOTE_NAME "cctools-monitor"