2424#include < signal.h>
2525#endif
2626
27- static bool g_is_generating = false ;
27+ // volatile, because of signal being an interrupt
28+ static volatile bool g_is_generating = false ;
29+ static volatile bool is_app_running = true ;
2830
2931/* *
3032 * Please note that this is NOT a production-ready stuff.
@@ -49,8 +51,10 @@ static void sigint_handler(int signo) {
4951 g_is_generating = false ;
5052 } else {
5153 console::cleanup ();
52- LOG (" \n Interrupted by user\n " );
53- _exit (130 );
54+ if (!is_app_running) {
55+ _exit (1 );
56+ }
57+ is_app_running = false ;
5458 }
5559 }
5660}
@@ -134,7 +138,7 @@ struct decode_embd_batch {
134138
135139static int generate_response (gemma3_context & ctx, common_sampler * smpl, int n_predict) {
136140 for (int i = 0 ; i < n_predict; i++) {
137- if (i > n_predict || !g_is_generating) {
141+ if (i > n_predict || !g_is_generating || !is_app_running ) {
138142 printf (" \n " );
139143 break ;
140144 }
@@ -150,6 +154,11 @@ static int generate_response(gemma3_context & ctx, common_sampler * smpl, int n_
150154 printf (" %s" , common_token_to_piece (ctx.lctx , token_id).c_str ());
151155 fflush (stdout);
152156
157+ if (!is_app_running) {
158+ printf (" \n " );
159+ break ;
160+ }
161+
153162 // eval the token
154163 common_batch_clear (ctx.batch );
155164 common_batch_add (ctx.batch , token_id, ctx.n_past ++, {0 }, true );
@@ -172,6 +181,7 @@ static int eval_message(gemma3_context & ctx, common_chat_msg & msg, std::vector
172181 LOG_DBG (" formatted_chat.prompt: %s\n " , formatted_chat.prompt .c_str ());
173182
174183 for (auto & fname : images_fname) {
184+ if (!is_app_running) return 0 ;
175185 mtmd_bitmap bitmap;
176186 if (mtmd_helper_bitmap_init_from_file (fname.c_str (), bitmap)) {
177187 LOG_ERR (" Unable to load image %s\n " , fname.c_str ());
@@ -185,6 +195,9 @@ static int eval_message(gemma3_context & ctx, common_chat_msg & msg, std::vector
185195 text.add_special = add_bos;
186196 text.parse_special = true ;
187197 mtmd_input_chunks chunks;
198+
199+ if (!is_app_running) return 0 ;
200+
188201 int32_t res = mtmd_tokenize (ctx.ctx_vision .get (), chunks, text, bitmaps);
189202 if (res != 0 ) {
190203 LOG_ERR (" Unable to tokenize prompt, res = %d\n " , res);
@@ -242,6 +255,8 @@ int main(int argc, char ** argv) {
242255#endif
243256 }
244257
258+ if (!is_app_running) return 130 ;
259+
245260 if (is_single_turn) {
246261 g_is_generating = true ;
247262 if (params.prompt .find (" <__image__>" ) == std::string::npos) {
@@ -253,7 +268,7 @@ int main(int argc, char ** argv) {
253268 if (eval_message (ctx, msg, params.image , true )) {
254269 return 1 ;
255270 }
256- if (generate_response (ctx, smpl, n_predict)) {
271+ if (is_app_running && generate_response (ctx, smpl, n_predict)) {
257272 return 1 ;
258273 }
259274
@@ -268,12 +283,13 @@ int main(int argc, char ** argv) {
268283 std::vector<std::string> images_fname;
269284 std::string content;
270285
271- while (true ) {
286+ while (is_app_running ) {
272287 g_is_generating = false ;
273288 LOG (" \n > " );
274289 console::set_display (console::user_input);
275290 std::string line;
276291 console::readline (line, false );
292+ if (!is_app_running) break ;
277293 console::set_display (console::reset);
278294 line = string_strip (line);
279295 if (line.empty ()) {
@@ -301,6 +317,7 @@ int main(int argc, char ** argv) {
301317 msg.role = " user" ;
302318 msg.content = content;
303319 int ret = eval_message (ctx, msg, images_fname, is_first_msg);
320+ if (!is_app_running) break ;
304321 if (ret == 2 ) {
305322 // non-fatal error
306323 images_fname.clear ();
@@ -317,7 +334,8 @@ int main(int argc, char ** argv) {
317334 content.clear ();
318335 is_first_msg = false ;
319336 }
320- }
337+ }
338+ if (!is_app_running) LOG (" \n Interrupted by user\n " );
321339 llama_perf_context_print (ctx.lctx );
322- return 0 ;
340+ return is_app_running ? 0 : 130 ;
323341}
0 commit comments