feat: remove debug mode and remove auto parallelism cap calculation (#41)

Gusarich · web-flow · commit 38de91af5487 · 2025-08-26T10:26:27.000+03:00
diff --git a/src/cli.py b/src/cli.py
@@ -360,7 +360,6 @@ def create_parser(cls) -> argparse.ArgumentParser:
 
         # Diagnostics group
         g_diag = parser.add_argument_group("Diagnostics")
-        g_diag.add_argument("--debug", action="store_true", help="Enable debug logging")
         g_diag.add_argument(
             "--yes", action="store_true", help="Assume 'yes' for prompts"
         )
@@ -1034,8 +1033,7 @@ async def run_list_runs(self, args: argparse.Namespace) -> int:
 
         except (OSError, json.JSONDecodeError) as e:
             self.console.print(f"[red]Error listing runs: {e}[/red]")
-            if args.debug:
-                self.console.print_exception()
+            self.console.print_exception()
             return 1
 
     async def run_show_run(self, args: argparse.Namespace) -> int:
@@ -1140,8 +1138,7 @@ async def run_show_run(self, args: argparse.Namespace) -> int:
 
         except (OSError, json.JSONDecodeError) as e:
             self.console.print(f"[red]Error showing run: {e}[/red]")
-            if args.debug:
-                self.console.print_exception()
+            self.console.print_exception()
             return 1
 
     async def run_prune(self, args: argparse.Namespace) -> int:
@@ -1238,8 +1235,7 @@ async def run_prune(self, args: argparse.Namespace) -> int:
             return 0
         except Exception as e:
             self.console.print(f"[red]Prune failed: {e}[/red]")
-            if args.debug:
-                self.console.print_exception()
+            self.console.print_exception()
             return 1
 
     def _display_detailed_results(
@@ -1481,7 +1477,7 @@ async def run_orchestrator(self, args: argparse.Namespace) -> int:
         setup_structured_logging(
             logs_dir=args.logs_dir,
             run_id=run_id,
-            debug=args.debug,
+            debug=True,
             quiet=args.no_tui and args.output == "quiet",
             no_tui=args.no_tui,
         )
@@ -1638,8 +1634,7 @@ async def run_orchestrator(self, args: argparse.Namespace) -> int:
         if args.output:
             cli_config["output"] = args.output
         # Server extension support removed
-        if args.debug:
-            cli_config.setdefault("logging", {})["level"] = "DEBUG"
+        # Always use full logging; no debug toggle
         # Add CLI auth args
         if hasattr(args, "oauth_token") and args.oauth_token:
             cli_config.setdefault("runner", {})["oauth_token"] = args.oauth_token
@@ -1829,35 +1824,30 @@ def _red(k, v):
             "logs_dir"
         ) or full_config.get("logs_dir", Path("./logs"))
 
-        # Log configuration sources (only in debug mode)
-        if full_config.get("debug") or args.debug:
-            self.console.print("[dim]Configuration loaded from:[/dim]")
-            if cli_config:
-                self.console.print("  - Command line arguments")
-            # No Pitaya-specific environment variables are used
-            if dotenv_config:
-                self.console.print("  - .env file")
-            if config:
-                self.console.print(f"  - Config file: {args.config or 'pitaya.yaml'}")
-            self.console.print("  - Built-in defaults")
+        # Log configuration sources unconditionally
+        self.console.print("[dim]Configuration loaded from:[/dim]")
+        if cli_config:
+            self.console.print("  - Command line arguments")
+        if dotenv_config:
+            self.console.print("  - .env file")
+        if config:
+            self.console.print(f"  - Config file: {args.config or 'pitaya.yaml'}")
+        self.console.print("  - Built-in defaults")
 
         # Respect global session volume consent by setting env for runner
         allow_global_session = bool(getattr(args, "allow_global_session_volume", False))
 
-        # Resolve 'auto' for max_parallel per spec
-        if isinstance(max_parallel, str) and max_parallel.lower() == "auto":
-            try:
-                host_cpu = max(1, os.cpu_count() or 1)
-                per_container = max(1, int(container_limits.cpu_count))
-                computed = max(2, min(20, host_cpu // per_container))
-                max_parallel_val = computed
-            except Exception:
-                max_parallel_val = 20
-        else:
-            try:
+        # Resolve max_parallel without host resource calculations
+        try:
+            if isinstance(max_parallel, str):
+                # Accept numeric strings; treat non-numeric (e.g., 'auto') as default 5
                 max_parallel_val = int(max_parallel)
-            except Exception:
-                max_parallel_val = 20
+            elif isinstance(max_parallel, int):
+                max_parallel_val = max_parallel
+            else:
+                max_parallel_val = 5
+        except Exception:
+            max_parallel_val = 5
 
         # Proxy automatic egress defaults removed
 
@@ -2221,8 +2211,7 @@ def emit_json(ev: dict):
             return 2
         except (OrchestratorError, DockerError, ValidationError) as e:
             self.console.print(f"[red]Error: {e}[/red]")
-            if args.debug:
-                self.console.print_exception()
+            self.console.print_exception()
             # Shutdown orchestrator
             await self.orchestrator.shutdown()
             return 1
@@ -2653,8 +2642,7 @@ async def monitor_shutdown():
             return 2
         except (OrchestratorError, DockerError, ValidationError) as e:
             self.console.print(f"[red]Error: {e}[/red]")
-            if args.debug:
-                self.console.print_exception()
+            self.console.print_exception()
             # Shutdown orchestrator
             await self.orchestrator.shutdown()
             return 1
diff --git a/src/config.py b/src/config.py
@@ -245,7 +245,7 @@ def get_default_config() -> Dict[str, Any]:
         "output": "tui",
         "state_dir": Path("./pitaya_state"),
         "logs_dir": Path("./logs"),
-        "debug": False,
+        # Debug mode removed; logs are verbose by default
         # Normative defaults per spec §6.1.1 (subset applied where supported)
         "import_policy": "auto",  # auto|never|always
         "import_conflict_policy": "fail",  # fail|overwrite|suffix
@@ -262,8 +262,8 @@ def get_default_config() -> Dict[str, Any]:
             "tmpfs_size_mb": 512,
         },
         "orchestration": {
-            # Spec default: auto => max(2, min(20, floor(host_cpu / runner.container_cpu)))
-            "max_parallel_instances": "auto",
+            # Default concurrency: 5; use --max-parallel to override
+            "max_parallel_instances": 5,
             # Branch namespace is hierarchical by default
             # Format: orc/<strategy>/<run_id>/k<short8>
             "branch_namespace": "hierarchical",
diff --git a/src/orchestration/orchestrator.py b/src/orchestration/orchestrator.py
@@ -207,68 +207,17 @@ async def initialize(self) -> None:
         # Clean up orphaned containers from previous runs
         await self.cleanup_orphaned_containers()
 
-        # Resolve max_parallel_instances (simple adaptive default)
-        try:
-            import os
-            import math
-
-            host_cpu = max(1, os.cpu_count() or 1)
-            # cgroup-aware effective CPUs (best-effort)
-            try:
-                import platform
-                from pathlib import Path as _P
-
-                if platform.system() == "Linux":
-                    cpu_max = _P("/sys/fs/cgroup/cpu.max")
-                    if cpu_max.exists():
-                        parts = cpu_max.read_text().strip().split()
-                        if len(parts) >= 2 and parts[0] != "max":
-                            quota = float(parts[0])
-                            period = float(parts[1]) or 100000.0
-                            eff = int(math.ceil(quota / period))
-                            if eff > 0:
-                                host_cpu = eff
-                    else:
-                        q = _P("/sys/fs/cgroup/cpu/cpu.cfs_quota_us")
-                        p = _P("/sys/fs/cgroup/cpu/cpu.cfs_period_us")
-                        if q.exists() and p.exists():
-                            quota = float(q.read_text().strip() or "0")
-                            period = float(p.read_text().strip() or "100000")
-                            if quota > 0 and period > 0:
-                                eff = int(math.ceil(quota / period))
-                                if eff > 0:
-                                    host_cpu = eff
-            except Exception:
-                pass
-            per_container = max(1, int(self.container_limits.cpu_count))
-            adaptive = max(2, min(20, host_cpu // per_container))
-            if self.max_parallel_instances is None:
-                self.max_parallel_instances = adaptive
-                logger.info(
-                    f"Parallelism(auto): host_cpu={host_cpu}, per_container={per_container} -> max_parallel_instances={adaptive}"
-                )
-            # Oversubscription warning only when not explicit
-            try:
-                if (
-                    int(self.max_parallel_instances) * per_container
-                ) > host_cpu and not getattr(self, "_explicit_max_parallel", False):
-                    logger.warning(
-                        f"Configured parallelism may oversubscribe CPU: max_parallel_instances={self.max_parallel_instances}, per_container_cpu={per_container}, host_cpu={host_cpu}"
-                    )
-            except Exception:
-                pass
-        except Exception as e:
-            logger.debug(f"Adaptive parallelism calc failed: {e}")
+        # Resolve max_parallel_instances without any host CPU/memory calculations.
+        # Default to 5 if not provided by the caller/CLI.
+        if self.max_parallel_instances is None:
+            self.max_parallel_instances = 5
+            logger.info("Parallelism(default): max_parallel_instances=5")
 
         # Initialize resource pool semaphore now that parallelism is resolved
         self._resource_pool = asyncio.Semaphore(int(self.max_parallel_instances or 1))
 
-        # Start multiple background executors for true parallel execution
-        # If explicit max-parallel set, honor fully; otherwise cap at 10
-        if getattr(self, "_explicit_max_parallel", False):
-            num_executors = int(self.max_parallel_instances or 1)
-        else:
-            num_executors = min(int(self.max_parallel_instances or 1), 10)
+        # Start multiple background executors equal to max_parallel_instances
+        num_executors = int(self.max_parallel_instances or 1)
         for i in range(num_executors):
             task = asyncio.create_task(self._instance_executor())
             self._executor_tasks.append(task)
@@ -1321,86 +1270,11 @@ async def _instance_executor(self) -> None:
                 break
 
     async def _admission_wait(self, cpu_need: int, mem_need_gb: int) -> None:
-        """Wait until CPU+memory tokens available and disk guard healthy."""
-        import shutil
-        import time as _time
-
-        repo_path = getattr(self, "repo_path", Path.cwd())
-        pack_dir = repo_path / ".git" / "objects" / "pack"
-        start = _time.monotonic()
-        while not self._shutdown:
-            # Disk free space guard
-            try:
-                stat = shutil.disk_usage(str(repo_path))
-                free_gb = stat.free / (1024**3)
-            except Exception:
-                free_gb = self._disk_min_free_gb  # assume OK if unknown
-            # Pack growth slope (MiB/min), best-effort
-            try:
-                size_bytes = 0
-                if pack_dir.exists():
-                    for p in pack_dir.iterdir():
-                        if p.is_file() and (p.suffix in (".pack", ".idx")):
-                            size_bytes += p.stat().st_size
-                now = _time.time()
-                self._pack_series.append((now, size_bytes))
-                slope = 0.0
-                # compute against oldest point >= 5min ago if available
-                oldest = None
-                for ts, sz in list(self._pack_series):
-                    if now - ts >= 300:
-                        oldest = (ts, sz)
-                        break
-                if oldest:
-                    dt_min = max(0.001, (now - oldest[0]) / 60.0)
-                    slope = max(
-                        0.0, (size_bytes - oldest[1]) / (1024.0 * 1024.0) / dt_min
-                    )
-            except Exception:
-                slope = 0.0
-
-            async with self._admission_lock:
-                if getattr(self, "_explicit_max_parallel", False):
-                    # Honor operator's explicit parallel setting: bypass CPU/memory guard; keep disk guard
-                    disk_ok = (free_gb >= self._disk_min_free_gb) and (
-                        slope <= self._pack_max_slope_mib_per_min
-                    )
-                    cpu_ok = True
-                    mem_ok = True
-                else:
-                    cpu_ok = (self._cpu_in_use + cpu_need) <= self._host_cpu
-                    mem_ok = (self._mem_in_use_gb + mem_need_gb) <= int(
-                        self._host_mem_gb * self._mem_guard_pct
-                    )
-                    disk_ok = (free_gb >= self._disk_min_free_gb) and (
-                        slope <= self._pack_max_slope_mib_per_min
-                    )
-
-                if cpu_ok and mem_ok and disk_ok:
-                    self._cpu_in_use += cpu_need
-                    self._mem_in_use_gb += mem_need_gb
-                    return
-
-            # Log periodically while waiting
-            try:
-                if (int((_time.monotonic() - start)) % 10) == 0:
-                    logger.info(
-                        f"admission.wait: cpu_ok={cpu_ok} mem_ok={mem_ok} disk_ok={disk_ok} free_gb={free_gb:.1f} slope_mib_per_min={slope:.1f} in_use(cpu={self._cpu_in_use}/{self._host_cpu}, mem={self._mem_in_use_gb}/{int(self._host_mem_gb*self._mem_guard_pct)})"
-                    )
-            except Exception:
-                pass
-            # Trigger on-run GC/backpressure attempt when disk is the bottleneck
-            if not disk_ok:
-                try:
-                    await self._attempt_on_run_gc()
-                except Exception as e:
-                    logger.debug(f"on-run GC attempt failed: {e}")
-            await asyncio.sleep(0.5)
+        """No-op admission: do not gate on CPU/memory/disk; honor only max_parallel semaphore."""
+        return
 
     async def _admission_release(self, cpu: int, mem_gb: int) -> None:
-        async with self._admission_lock:
-            self._cpu_in_use = max(0, self._cpu_in_use - cpu)
-            self._mem_in_use_gb = max(0, self._mem_in_use_gb - mem_gb)
+        return
 
     async def _attempt_on_run_gc(self) -> None:
         """Best-effort GC to free disk by removing oldest failed workspaces for this run.
diff --git a/src/tui/cli.py b/src/tui/cli.py
@@ -92,9 +92,7 @@ def create_parser(cls) -> argparse.ArgumentParser:
             "--event-types", nargs="+", help="Filter by event type(s)"
         )
 
-        # Diagnostics
-        g_diag = parser.add_argument_group("Diagnostics")
-        g_diag.add_argument("--debug", action="store_true", help="Enable debug logging")
+        # Diagnostics (debug mode removed; verbose by default)
 
         return parser
 
@@ -137,8 +135,7 @@ async def run(self, args: argparse.Namespace) -> int:
             return 2
         except (OSError, IOError) as e:
             self.console.print(f"[red]Error: {e}[/red]")
-            if args.debug:
-                self.console.print_exception()
+            self.console.print_exception()
             return 1
 
     # Connected modes removed