|
7 | 7 | #include <memory> |
8 | 8 | #include <algorithm> |
9 | 9 | #include <queue> |
| 10 | +#include <thread> |
| 11 | +#include <mutex> |
| 12 | +#include <vector> |
| 13 | +#include <functional> |
10 | 14 |
|
11 | 15 | /* |
12 | 16 | ~~ GPU Linked list pointer chase algorithm ~~ |
@@ -103,20 +107,40 @@ uint64_t general_pointer_chase(int local_device, int remote_device, int init_mod |
103 | 107 | return l.timer; |
104 | 108 | } |
105 | 109 |
|
| 110 | +std::mutex mtx; |
| 111 | +template < class L > |
| 112 | +void loc_ptr_ch(int gpu_id, int init_mode, size_t num_nodes, size_t stride, size_t num_jumps, char * nid) |
| 113 | +{ |
| 114 | + /* |
| 115 | + * Low-level thread-safe local pointer chase function. |
| 116 | + */ |
| 117 | + uint64_t total_cycles = general_pointer_chase< L >(gpu_id, gpu_id, init_mode, num_nodes, stride, num_jumps); |
| 118 | + |
| 119 | + // Print the timings of the pointer chase |
| 120 | + { |
| 121 | + std::lock_guard<std::mutex> lg(mtx); |
| 122 | + printf("[%s] On device %d, the chase took on average %d cycles per node jump.\n", nid, gpu_id, total_cycles/num_jumps); |
| 123 | + } |
| 124 | +} |
106 | 125 |
|
107 | 126 | template < class List > |
108 | 127 | void local_pointer_chase(int num_devices, int init_mode, size_t num_nodes, size_t stride, size_t num_jumps, char * nid) |
109 | 128 | { |
110 | 129 | /* |
111 | 130 | * Specialised pointer chase on a single device. |
112 | 131 | */ |
| 132 | + std::vector<std::thread> threads; |
113 | 133 | for (int gpu_id = 0; gpu_id < num_devices; gpu_id++) |
114 | 134 | { |
115 | | - uint64_t total_cycles = general_pointer_chase< List >(gpu_id, gpu_id, init_mode, num_nodes, stride, num_jumps); |
116 | | - |
117 | | - // Print the timings of the pointer chase |
118 | | - printf("[%s] On device %d, the chase took on average %d cycles per node jump.\n", nid, gpu_id, total_cycles/num_jumps); |
| 135 | + threads.push_back(std::thread(loc_ptr_ch<List>, |
| 136 | + gpu_id, init_mode, |
| 137 | + num_nodes, stride, num_jumps, nid |
| 138 | + ) |
| 139 | + ); |
119 | 140 | } |
| 141 | + |
| 142 | + // Join all threads |
| 143 | + std::for_each(threads.begin(), threads.end(), std::mem_fn(&std::thread::join)); |
120 | 144 | } |
121 | 145 |
|
122 | 146 |
|
|
0 commit comments