Adding num_batch to model-config options (#121)

keithomayot · jonigl · web-flow · commit b30dc85dfafc · 2025-11-09T15:48:52.000+01:00
* added num_batch parameter for the models

* add 15 to model-config options

* Added help details for num_batch

* chore: adding comment for consistency

* chore: num_batch unset, consistent message

* docs: updating README.md with num_batch

---------

Co-authored-by: Jonathan Gastón Löwenstern &lt;jonigl@gmail.com&gt;
diff --git a/README.md b/README.md
@@ -312,6 +312,7 @@ The `model-config` (`mc`) command opens the advanced model settings interface, a
 - **Repeat Last N / Repeat Penalty**: Reduce repetition
 - **Presence/Frequency Penalty**: Encourage new topics, reduce repeats
 - **Stop Sequences**: Custom stopping points (up to 8)
+ - **Batch Size (num_batch)**: Controls internal batching of requests; larger values can increase throughput but use more memory.
 
 #### Commands
 
diff --git a/mcp_client_for_ollama/config/defaults.py b/mcp_client_for_ollama/config/defaults.py
@@ -38,7 +38,8 @@ def default_config() -> dict:
             "presence_penalty": None,
             "frequency_penalty": None,
             "stop": None,
-            "num_ctx": None
+            "num_ctx": None,
+            "num_batch" : None
         },
         "displaySettings": {
             "showToolExecution": True,
diff --git a/mcp_client_for_ollama/config/manager.py b/mcp_client_for_ollama/config/manager.py
@@ -243,6 +243,9 @@ def _validate_config(self, config_data: Dict[str, Any]) -> Dict[str, Any]:
                 validated["modelConfig"]["stop"] = model_config["stop"] if model_config["stop"] is not None else None
             if "num_ctx" in model_config:
                 validated["modelConfig"]["num_ctx"] = model_config["num_ctx"] if model_config["num_ctx"] is not None else None
+            if "num_batch" in model_config:
+                validated["modelConfig"]["num_batch"] = model_config["num_batch"] if model_config["num_batch"] is not None else None
+
 
         if "displaySettings" in config_data and isinstance(config_data["displaySettings"], dict):
             if "showToolExecution" in config_data["displaySettings"]:
diff --git a/mcp_client_for_ollama/models/config_manager.py b/mcp_client_for_ollama/models/config_manager.py
@@ -41,6 +41,7 @@ def __init__(self, console: Optional[Console] = None):
         self.frequency_penalty = None      # float
         self.stop = None                   # list[str]
         self.num_ctx = None                # int
+        self.num_batch =  None             # int
 
         # Parameter explanations
         self.parameter_explanations = {
@@ -133,6 +134,12 @@ def __init__(self, console: Optional[Console] = None):
                 "range": "1 – model maximum (e.g., 1 – 32768 for qwen3:0.6b); model-dependent",
                 "effect": "Controls how much conversation history and context the model can access when generating responses.",
                 "recommendation": "Use higher values for complex conversations requiring more context; balance with memory usage and performance."
+            },
+            "num_batch":{
+                "description":"Sets the batch size for which the prompt request will be divided into during processing.",
+                "range":"1 - context size (1 = no batching), default for ollama == 512",
+                "effect":"Directly impacts processing speed and response time. Increasing the batch size increases throughput(tokens/sec) upto a point of saturation upon which it starts to deteriorate.",
+                "recommendation":"Vary the batch size while observing the performance and memory, choose the best size for your usecase"
             }
         }
 
@@ -157,7 +164,8 @@ def get_config(self) -> Dict[str, Any]:
             "presence_penalty": self.presence_penalty,
             "frequency_penalty": self.frequency_penalty,
             "stop": self.stop,
-            "num_ctx": self.num_ctx
+            "num_ctx": self.num_ctx,
+            "num_batch": self.num_batch
         }
 
     def get_ollama_options(self) -> Dict[str, Any]:
@@ -198,6 +206,8 @@ def get_ollama_options(self) -> Dict[str, Any]:
             options["stop"] = self.stop
         if self.num_ctx is not None:
             options["num_ctx"] = self.num_ctx
+        if self.num_batch is not None:
+            options["num_batch"] = self.num_batch
         return options
 
     def get_system_prompt(self) -> str:
@@ -244,6 +254,8 @@ def set_config(self, config: Dict[str, Any]) -> None:
             self.stop = config["stop"]
         if "num_ctx" in config:
             self.num_ctx = config["num_ctx"]
+        if "num_batch" in config:
+            self.num_batch = config["num_batch"]
 
     def display_current_config(self) -> None:
         """Display the current model configuration."""
@@ -273,7 +285,8 @@ def format_value(value):
             f"[bold][orange3]11.[/orange3] presence_penalty:[/bold] {format_value(self.presence_penalty)}\n"
             f"[bold][orange3]12.[/orange3] frequency_penalty:[/bold] {format_value(self.frequency_penalty)}\n"
             f"[bold][orange3]13.[/orange3] stop:[/bold] {format_value(self.stop)}\n"
-            f"[bold][orange3]14.[/orange3] num_ctx:[/bold] {format_value(self.num_ctx)}",
+            f"[bold][orange3]14.[/orange3] num_ctx:[/bold] {format_value(self.num_ctx)}\n"
+            f"[bold][orange3]15.[/orange3] num_batch:[/bold] {format_value(self.num_batch)}",
             title="[bold blue]🎮 Model Parameters[/bold blue]",
             border_style="blue", expand=False))
         self.console.print("\n[bold yellow]Note:[/bold yellow] Unset values will use Ollama's defaults.")
@@ -325,7 +338,7 @@ def display_parameter_explanations(self) -> None:
         for param in [
             "num_keep", "seed", "num_predict", "top_k", "top_p", "min_p",
             "typical_p", "repeat_last_n", "temperature", "repeat_penalty",
-            "presence_penalty", "frequency_penalty", "stop", "num_ctx"
+            "presence_penalty", "frequency_penalty", "stop", "num_ctx","num_batch"
         ]:
             info = self.parameter_explanations[param]
             table.add_row(
@@ -447,6 +460,7 @@ def configure_model_interactive(self, clear_console_func: Optional[Callable] = N
                 self.frequency_penalty = None
                 self.stop = None
                 self.num_ctx = None
+                self.num_batch = None
                 result_message = "[green]All parameters unset (using Ollama defaults).[/green]"
                 result_style = "green"
                 continue
@@ -523,6 +537,10 @@ def configure_model_interactive(self, clear_console_func: Optional[Callable] = N
                             self.num_ctx = None
                             result_message = "[green]num_ctx unset (using Ollama default).[/green]"
                             result_style = "green"
+                        case 15:
+                            self.num_batch = None
+                            result_message = "[green]num_batch unset (using Ollama default).[/green]"
+                            result_style = "green"
                         case _:
                             result_message = "[red]Invalid parameter number.[/red]"
                             result_style = "red"
@@ -740,6 +758,19 @@ def configure_model_interactive(self, clear_console_func: Optional[Callable] = N
                     except ValueError:
                         result_message = "[red]Please enter a valid integer.[/red]"
                         result_style = "red"
+                case "15":
+                    try:
+                        new_value = IntPrompt.ask("Batch Size (num_batch, size of batches by which prompt is divided)", default=self.num_batch)
+                        if new_value >= 1:
+                            self.num_batch = new_value
+                            result_message = f"[green]num_batch set to {new_value}.[/green]"
+                            result_style = "green"
+                        else:
+                            result_message = "[red]num_batch must be a positive integer.[/red]"
+                            result_style = "red"
+                    except ValueError:
+                        result_message = "[red]Please enter a valid integer.[/red]"
+                        result_style = "red"
 
                 case _:
                     result_message = "[red]Invalid selection. Please choose a valid option.[/red]"