@@ -41,6 +41,7 @@ def __init__(self, console: Optional[Console] = None):
4141 self .frequency_penalty = None # float
4242 self .stop = None # list[str]
4343 self .num_ctx = None # int
44+ self .num_batch = None # int
4445
4546 # Parameter explanations
4647 self .parameter_explanations = {
@@ -133,6 +134,12 @@ def __init__(self, console: Optional[Console] = None):
133134 "range" : "1 – model maximum (e.g., 1 – 32768 for qwen3:0.6b); model-dependent" ,
134135 "effect" : "Controls how much conversation history and context the model can access when generating responses." ,
135136 "recommendation" : "Use higher values for complex conversations requiring more context; balance with memory usage and performance."
137+ },
138+ "num_batch" :{
139+ "description" :"Sets the batch size for which the prompt request will be divided into during processing." ,
140+ "range" :"1 - context size (1 = no batching), default for ollama == 512" ,
141+ "effect" :"Directly impacts processing speed and response time. Increasing the batch size increases throughput(tokens/sec) upto a point of saturation upon which it starts to deteriorate." ,
142+ "recommendation" :"Vary the batch size while observing the performance and memory, choose the best size for your usecase"
136143 }
137144 }
138145
@@ -157,7 +164,8 @@ def get_config(self) -> Dict[str, Any]:
157164 "presence_penalty" : self .presence_penalty ,
158165 "frequency_penalty" : self .frequency_penalty ,
159166 "stop" : self .stop ,
160- "num_ctx" : self .num_ctx
167+ "num_ctx" : self .num_ctx ,
168+ "num_batch" : self .num_batch
161169 }
162170
163171 def get_ollama_options (self ) -> Dict [str , Any ]:
@@ -198,6 +206,8 @@ def get_ollama_options(self) -> Dict[str, Any]:
198206 options ["stop" ] = self .stop
199207 if self .num_ctx is not None :
200208 options ["num_ctx" ] = self .num_ctx
209+ if self .num_batch is not None :
210+ options ["num_batch" ] = self .num_batch
201211 return options
202212
203213 def get_system_prompt (self ) -> str :
@@ -244,6 +254,8 @@ def set_config(self, config: Dict[str, Any]) -> None:
244254 self .stop = config ["stop" ]
245255 if "num_ctx" in config :
246256 self .num_ctx = config ["num_ctx" ]
257+ if "num_batch" in config :
258+ self .num_batch = config ["num_batch" ]
247259
248260 def display_current_config (self ) -> None :
249261 """Display the current model configuration."""
@@ -273,7 +285,8 @@ def format_value(value):
273285 f"[bold][orange3]11.[/orange3] presence_penalty:[/bold] { format_value (self .presence_penalty )} \n "
274286 f"[bold][orange3]12.[/orange3] frequency_penalty:[/bold] { format_value (self .frequency_penalty )} \n "
275287 f"[bold][orange3]13.[/orange3] stop:[/bold] { format_value (self .stop )} \n "
276- f"[bold][orange3]14.[/orange3] num_ctx:[/bold] { format_value (self .num_ctx )} " ,
288+ f"[bold][orange3]14.[/orange3] num_ctx:[/bold] { format_value (self .num_ctx )} \n "
289+ f"[bold][orange3]15.[/orange3] num_batch:[/bold] { format_value (self .num_batch )} " ,
277290 title = "[bold blue]🎮 Model Parameters[/bold blue]" ,
278291 border_style = "blue" , expand = False ))
279292 self .console .print ("\n [bold yellow]Note:[/bold yellow] Unset values will use Ollama's defaults." )
@@ -325,7 +338,7 @@ def display_parameter_explanations(self) -> None:
325338 for param in [
326339 "num_keep" , "seed" , "num_predict" , "top_k" , "top_p" , "min_p" ,
327340 "typical_p" , "repeat_last_n" , "temperature" , "repeat_penalty" ,
328- "presence_penalty" , "frequency_penalty" , "stop" , "num_ctx"
341+ "presence_penalty" , "frequency_penalty" , "stop" , "num_ctx" , "num_batch"
329342 ]:
330343 info = self .parameter_explanations [param ]
331344 table .add_row (
@@ -447,6 +460,7 @@ def configure_model_interactive(self, clear_console_func: Optional[Callable] = N
447460 self .frequency_penalty = None
448461 self .stop = None
449462 self .num_ctx = None
463+ self .num_batch = None
450464 result_message = "[green]All parameters unset (using Ollama defaults).[/green]"
451465 result_style = "green"
452466 continue
@@ -523,6 +537,10 @@ def configure_model_interactive(self, clear_console_func: Optional[Callable] = N
523537 self .num_ctx = None
524538 result_message = "[green]num_ctx unset (using Ollama default).[/green]"
525539 result_style = "green"
540+ case 15 :
541+ self .num_batch = None
542+ result_message = "[green]num_batch unset (using Ollama default).[/green]"
543+ result_style = "green"
526544 case _:
527545 result_message = "[red]Invalid parameter number.[/red]"
528546 result_style = "red"
@@ -740,6 +758,19 @@ def configure_model_interactive(self, clear_console_func: Optional[Callable] = N
740758 except ValueError :
741759 result_message = "[red]Please enter a valid integer.[/red]"
742760 result_style = "red"
761+ case "15" :
762+ try :
763+ new_value = IntPrompt .ask ("Batch Size (num_batch, size of batches by which prompt is divided)" , default = self .num_batch )
764+ if new_value >= 1 :
765+ self .num_batch = new_value
766+ result_message = f"[green]num_batch set to { new_value } .[/green]"
767+ result_style = "green"
768+ else :
769+ result_message = "[red]num_batch must be a positive integer.[/red]"
770+ result_style = "red"
771+ except ValueError :
772+ result_message = "[red]Please enter a valid integer.[/red]"
773+ result_style = "red"
743774
744775 case _:
745776 result_message = "[red]Invalid selection. Please choose a valid option.[/red]"
0 commit comments