@@ -34,28 +34,28 @@ static void test_rapid_context_cycles(
3434 int iterations
3535) {
3636 const int64_t t_start = ggml_time_us ();
37-
37+
3838 std::random_device rd;
3939 std::mt19937 gen (rd () + thread_id);
4040 std::uniform_int_distribution<> delay_dist (1 , 10 );
4141
4242 for (int i = 0 ; i < iterations; i++) {
4343 llama_context * ctx = llama_init_from_model (model, base_params);
44-
44+
4545 if (!ctx) {
4646 LOG_ERR (" thread %d: failed to create context on iteration %d\n " , thread_id, i);
4747 result.errors ++;
4848 continue ;
4949 }
50-
50+
5151 result.contexts_created ++;
52-
52+
5353 std::this_thread::sleep_for (std::chrono::milliseconds (delay_dist (gen)));
54-
54+
5555 llama_free (ctx);
5656 result.contexts_destroyed ++;
5757 }
58-
58+
5959 const int64_t t_end = ggml_time_us ();
6060 LOG_INF (" thread %d: completed %d context cycles in %.2f ms\n " ,
6161 thread_id, iterations, (t_end - t_start) / 1000.0 );
@@ -72,24 +72,24 @@ static void test_backend_resource_stress(
7272 std::random_device rd;
7373 std::mt19937 gen (rd () + thread_id);
7474 std::uniform_int_distribution<> delay_dist (1 , 8 );
75-
75+
7676 for (int i = 0 ; i < iterations; i++) {
7777 llama_context_params ctx_params = base_params;
78-
78+
7979 ctx_params.n_ctx = 128 + (i % 4 ) * 64 ;
8080 ctx_params.n_batch = 32 + (i % 3 ) * 16 ;
81-
81+
8282 llama_context * ctx = llama_init_from_model (model, ctx_params);
8383 if (!ctx) {
8484 LOG_ERR (" thread %d: failed to create context with varying params on iteration %d\n " , thread_id, i);
8585 result.errors ++;
8686 continue ;
8787 }
88-
88+
8989 result.contexts_created ++;
90-
90+
9191 std::this_thread::sleep_for (std::chrono::milliseconds (delay_dist (gen)));
92-
92+
9393 llama_free (ctx);
9494 result.contexts_destroyed ++;
9595 }
@@ -124,171 +124,171 @@ int main(int argc, char ** argv) {
124124
125125 const int n_threads = params.cpuparams .n_threads ;
126126 const int iterations_per_thread = 20 ;
127-
127+
128128 LOG_INF (" Starting concurrent stress tests with %d threads, %d iterations per thread\n " ,
129129 n_threads, iterations_per_thread);
130130
131131 LOG_INF (" \n === Test 1: Rapid Context Creation/Destruction Cycles ===\n " );
132132 {
133133 test_result result;
134134 std::vector<std::thread> threads;
135-
135+
136136 const int64_t t_start = ggml_time_us ();
137-
137+
138138 for (int i = 0 ; i < n_threads; i++) {
139139 threads.emplace_back (test_rapid_context_cycles, model, cparams,
140140 std::ref (result), i, iterations_per_thread);
141141 }
142-
142+
143143 for (auto & t : threads) {
144144 t.join ();
145145 }
146-
146+
147147 const int64_t t_end = ggml_time_us ();
148-
148+
149149 LOG_INF (" Test 1 Results:\n " );
150150 LOG_INF (" Contexts created: %d\n " , result.contexts_created .load ());
151151 LOG_INF (" Contexts destroyed: %d\n " , result.contexts_destroyed .load ());
152152 LOG_INF (" Errors: %d\n " , result.errors .load ());
153153 LOG_INF (" Total time: %.2f ms\n " , (t_end - t_start) / 1000.0 );
154154 LOG_INF (" Avg time per context: %.2f ms\n " ,
155155 (t_end - t_start) / 1000.0 / result.contexts_created .load ());
156-
156+
157157 if (result.contexts_created != result.contexts_destroyed ) {
158158 LOG_ERR (" FAIL: Context leak detected! Created: %d, Destroyed: %d\n " ,
159159 result.contexts_created .load (), result.contexts_destroyed .load ());
160160 llama_free (ctx);
161161 llama_model_free (model);
162162 return 1 ;
163163 }
164-
164+
165165 if (result.errors > 0 ) {
166166 LOG_ERR (" FAIL: %d errors occurred during context cycles\n " , result.errors .load ());
167167 llama_free (ctx);
168168 llama_model_free (model);
169169 return 1 ;
170170 }
171-
171+
172172 LOG_INF (" PASS: No context leaks or errors detected\n " );
173173 }
174174
175175 LOG_INF (" \n === Test 2: Parallel Context Operations ===\n " );
176176 {
177177 test_result result;
178178 std::vector<std::thread> threads;
179-
179+
180180 const int64_t t_start = ggml_time_us ();
181-
181+
182182 auto parallel_context_ops = [&](int thread_id) {
183183 std::random_device rd;
184184 std::mt19937 gen (rd () + thread_id);
185185 std::uniform_int_distribution<> delay_dist (1 , 5 );
186-
186+
187187 for (int i = 0 ; i < iterations_per_thread / 4 ; i++) {
188188 llama_context * thread_ctx = llama_init_from_model (model, cparams);
189189 if (!thread_ctx) {
190190 LOG_ERR (" thread %d: failed to create context on iteration %d\n " , thread_id, i);
191191 result.errors ++;
192192 continue ;
193193 }
194-
194+
195195 result.contexts_created ++;
196-
196+
197197 std::vector<llama_token> tokens = common_tokenize (thread_ctx, " Test prompt" , true , true );
198198 if (!tokens.empty ()) {
199199 llama_batch batch = llama_batch_init (tokens.size (), 0 , 1 );
200200 for (size_t j = 0 ; j < tokens.size (); j++) {
201201 common_batch_add (batch, tokens[j], j, {0 }, false );
202202 }
203-
203+
204204 if (llama_decode (thread_ctx, batch) == 0 ) {
205205 result.batches_processed ++;
206206 }
207-
207+
208208 llama_batch_free (batch);
209209 }
210-
210+
211211 std::this_thread::sleep_for (std::chrono::milliseconds (delay_dist (gen)));
212-
212+
213213 llama_free (thread_ctx);
214214 result.contexts_destroyed ++;
215215 }
216216 };
217-
217+
218218 for (int i = 0 ; i < n_threads; i++) {
219219 threads.emplace_back (parallel_context_ops, i);
220220 }
221-
221+
222222 for (auto & t : threads) {
223223 t.join ();
224224 }
225-
225+
226226 const int64_t t_end = ggml_time_us ();
227-
227+
228228 LOG_INF (" Test 2 Results:\n " );
229229 LOG_INF (" Contexts created: %d\n " , result.contexts_created .load ());
230230 LOG_INF (" Contexts destroyed: %d\n " , result.contexts_destroyed .load ());
231231 LOG_INF (" Batches processed: %d\n " , result.batches_processed .load ());
232232 LOG_INF (" Errors: %d\n " , result.errors .load ());
233233 LOG_INF (" Total time: %.2f ms\n " , (t_end - t_start) / 1000.0 );
234-
234+
235235 if (result.contexts_created != result.contexts_destroyed ) {
236236 LOG_ERR (" FAIL: Context leak detected! Created: %d, Destroyed: %d\n " ,
237237 result.contexts_created .load (), result.contexts_destroyed .load ());
238238 llama_free (ctx);
239239 llama_model_free (model);
240240 return 1 ;
241241 }
242-
242+
243243 if (result.errors > 0 ) {
244244 LOG_ERR (" FAIL: %d errors occurred during parallel operations\n " , result.errors .load ());
245245 llama_free (ctx);
246246 llama_model_free (model);
247247 return 1 ;
248248 }
249-
249+
250250 LOG_INF (" PASS: All parallel context operations completed successfully\n " );
251251 }
252252
253253 LOG_INF (" \n === Test 3: Backend Resource Allocation Stress ===\n " );
254254 {
255255 test_result result;
256256 std::vector<std::thread> threads;
257-
257+
258258 const int64_t t_start = ggml_time_us ();
259-
259+
260260 for (int i = 0 ; i < n_threads; i++) {
261261 threads.emplace_back (test_backend_resource_stress, model, cparams,
262262 std::ref (result), i, iterations_per_thread / 4 );
263263 }
264-
264+
265265 for (auto & t : threads) {
266266 t.join ();
267267 }
268-
268+
269269 const int64_t t_end = ggml_time_us ();
270-
270+
271271 LOG_INF (" Test 3 Results:\n " );
272272 LOG_INF (" Contexts created: %d\n " , result.contexts_created .load ());
273273 LOG_INF (" Contexts destroyed: %d\n " , result.contexts_destroyed .load ());
274274 LOG_INF (" Errors: %d\n " , result.errors .load ());
275275 LOG_INF (" Total time: %.2f ms\n " , (t_end - t_start) / 1000.0 );
276-
276+
277277 if (result.contexts_created != result.contexts_destroyed ) {
278278 LOG_ERR (" FAIL: Resource leak detected! Created: %d, Destroyed: %d\n " ,
279279 result.contexts_created .load (), result.contexts_destroyed .load ());
280280 llama_free (ctx);
281281 llama_model_free (model);
282282 return 1 ;
283283 }
284-
284+
285285 if (result.errors > 0 ) {
286286 LOG_ERR (" FAIL: %d errors occurred during resource stress test\n " , result.errors .load ());
287287 llama_free (ctx);
288288 llama_model_free (model);
289289 return 1 ;
290290 }
291-
291+
292292 LOG_INF (" PASS: No resource leaks detected\n " );
293293 }
294294
0 commit comments