Skip to content

Commit f45656d

Browse files
committed
feat: complete validation expansion with guardrail wiring, desktop failure-injection tests, isolation coverage, and opt-in online API lane
1 parent aaa45ce commit f45656d

24 files changed

+725
-25
lines changed

.secrets.baseline

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,14 +133,14 @@
133133
"filename": "README.md",
134134
"hashed_secret": "9d4e1e23bd5b727046a9e3b4b7db57bd8d6ee684",
135135
"is_verified": false,
136-
"line_number": 607
136+
"line_number": 654
137137
},
138138
{
139139
"type": "Secret Keyword",
140140
"filename": "README.md",
141141
"hashed_secret": "11fa7c37d697f30e6aee828b4426a10f83ab2380",
142142
"is_verified": false,
143-
"line_number": 808
143+
"line_number": 855
144144
}
145145
],
146146
"__init__.py": [
@@ -149,7 +149,7 @@
149149
"filename": "__init__.py",
150150
"hashed_secret": "fdee7bc7f680ce9707185015de5ff37b5098748b",
151151
"is_verified": false,
152-
"line_number": 2171
152+
"line_number": 2172
153153
}
154154
],
155155
"docs\\PLUGIN_MIGRATION.md": [
@@ -750,5 +750,5 @@
750750
}
751751
]
752752
},
753-
"generated_at": "2026-02-16T17:49:36Z"
753+
"generated_at": "2026-02-16T19:37:41Z"
754754
}

README.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,16 @@ A continuous, real-time runtime diagnostics suite for ComfyUI featuring **LLM-po
1010

1111
## Latest Updates (Feb 2026) - Click to expand
1212

13+
<details>
14+
<summary><strong>Validation Expansion Remediation: Desktop Hardening + Isolation Test Pack</strong></summary>
15+
16+
- Added targeted desktop failure-injection regression tests for corrupt state recovery, flush-failure non-crash paths, and history migration continuity.
17+
- Added non-ComfyUI isolation coverage for metadata contract validation and PromptComposer/harness payload compatibility.
18+
- Added an opt-in online API test lane scaffold (`RUN_ONLINE_API_TESTS=true`) to separate secret-scoped provider checks from default local runs.
19+
- Validation gate status: detect-secrets + pre-commit + backend full pytest + frontend E2E passed; online provider smoke tests remain opt-in/secret-scoped and skip safely when credentials are absent.
20+
21+
</details>
22+
1323
<details>
1424
<summary><strong>Validation Expansion Foundation: Runtime Guardrail Config (partial rollout)</strong></summary>
1525

@@ -1103,6 +1113,18 @@ python scripts/phase2_gate.py
11031113
python scripts/phase2_gate.py --fast
11041114
```
11051115

1116+
### Validation Expansion Pack (Targeted)
1117+
1118+
```powershell
1119+
# Targeted remediation pack (R17/T13/T9 + scaffolded T5 lane)
1120+
.\.venv\Scripts\python.exe -m pytest tests/test_r17_guardrails.py tests/test_t13_desktop_failures.py tests/test_t13_flush_storm.py tests/test_t13_migration.py tests/test_t9_pipeline_contract.py tests/test_t9_harness.py tests/integration/test_pipeline_isolation.py tests/integration/test_online_api_opt_in.py
1121+
1122+
# Opt-in online lane (secret-scoped)
1123+
$env:RUN_ONLINE_API_TESTS='true'
1124+
.\.venv\Scripts\python.exe -m pytest tests/integration/test_online_api_opt_in.py -q
1125+
Remove-Item Env:RUN_ONLINE_API_TESTS
1126+
```
1127+
11061128
**CI Status**: The gate runs automatically on push/PR to `main` and `dev` branches.
11071129

11081130
---

__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
core_rate_limit=getattr(CONFIG, "llm_core_rate_limit", None),
7575
light_rate_limit=getattr(CONFIG, "llm_light_rate_limit", None),
7676
max_concurrent=getattr(CONFIG, "llm_max_concurrent", None),
77+
rate_window_seconds=getattr(getattr(CONFIG, "guardrails", None), "RATE_LIMIT_WINDOW_SECONDS", None),
7778
)
7879

7980

history_store.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,13 @@ class HistoryStore:
9292
history = store.get_all()
9393
"""
9494

95-
def __init__(self, filepath: str, maxlen: int = 50, max_bytes: int = 0):
95+
def __init__(
96+
self,
97+
filepath: str,
98+
maxlen: int = 50,
99+
max_bytes: int = 0,
100+
aggregate_window_seconds: int = 60,
101+
):
96102
"""
97103
Initialize the history store.
98104
@@ -109,7 +115,11 @@ def __init__(self, filepath: str, maxlen: int = 50, max_bytes: int = 0):
109115
self._history: List[HistoryEntry] = []
110116
self._loaded = False
111117
# Aggregation window: within this window, repeated identical errors are aggregated.
112-
self._aggregate_window_seconds = 60
118+
try:
119+
window = int(aggregate_window_seconds)
120+
except Exception:
121+
window = 60
122+
self._aggregate_window_seconds = window if window > 0 else 60
113123

114124
@property
115125
def filepath(self) -> str:

logger.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,18 @@ def _get_history_store() -> HistoryStore:
128128
global _history_store
129129
if _history_store is None:
130130
_migrate_legacy_data()
131+
aggregate_window_seconds = 60
132+
try:
133+
aggregate_window_seconds = int(
134+
getattr(CONFIG.guardrails, "AGGREGATION_WINDOW_SECONDS", 60)
135+
)
136+
except Exception:
137+
aggregate_window_seconds = 60
131138
_history_store = HistoryStore(
132139
_history_file,
133140
maxlen=CONFIG.history_size,
134-
max_bytes=getattr(CONFIG, 'history_size_bytes', 5*1024*1024)
141+
max_bytes=getattr(CONFIG, 'history_size_bytes', 5*1024*1024),
142+
aggregate_window_seconds=aggregate_window_seconds,
135143
)
136144
return _history_store
137145

@@ -465,7 +473,14 @@ def __init__(self, message_queue):
465473
self.buffer = []
466474
self.in_traceback = False
467475
self.last_buffer_time = 0
468-
self._aggregate_window_seconds = 60
476+
try:
477+
self._aggregate_window_seconds = int(
478+
getattr(CONFIG.guardrails, "AGGREGATION_WINDOW_SECONDS", 60)
479+
)
480+
except Exception:
481+
self._aggregate_window_seconds = 60
482+
if self._aggregate_window_seconds <= 0:
483+
self._aggregate_window_seconds = 60
469484

470485
def _parse_ts(self, ts: str) -> datetime.datetime:
471486
try:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[project]
22
name = "comfyui-doctor"
33
description = "A real-time debugging assistant for ComfyUI, featuring interactive debugging chat, and 50+ fix patterns. Automatically intercepts terminal output from startup, and delivers prioritized fix suggestions with node-level context extraction. Now supports JSON-based pattern management with hot-reload and full i18n support for 9 languages."
4-
version = "1.6.4"
4+
version = "1.6.5"
55
license = {text = "MIT"}
66
readme = "README.md"
77
requires-python = ">=3.10"

rate_limiter.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,17 @@ class RateLimiter:
2121
Uses time.monotonic() to avoid issues with system clock changes.
2222
"""
2323

24-
def __init__(self, max_per_minute: int = 60):
24+
def __init__(self, max_per_minute: int = 60, window_seconds: int = 60):
2525
"""
2626
Initialize rate limiter.
2727
2828
Args:
2929
max_per_minute: Maximum requests allowed per minute.
30+
window_seconds: Refill window size in seconds.
3031
"""
3132
self.max_tokens = max_per_minute
3233
self.tokens = float(max_per_minute)
34+
self.window_seconds = window_seconds if window_seconds > 0 else 60
3335
self.last_refill = time.monotonic()
3436
self._lock = threading.Lock()
3537

@@ -51,8 +53,8 @@ def _refill(self) -> None:
5153
"""Refill tokens based on elapsed time."""
5254
now = time.monotonic()
5355
elapsed = now - self.last_refill
54-
# Refill at rate of max_tokens per 60 seconds
55-
refill_amount = elapsed * (self.max_tokens / 60.0)
56+
# Refill at rate of max_tokens per configured window.
57+
refill_amount = elapsed * (self.max_tokens / float(self.window_seconds))
5658
self.tokens = min(self.max_tokens, self.tokens + refill_amount)
5759
self.last_refill = now
5860

services/config_guardrails.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,16 @@
11
from dataclasses import dataclass
22
import os
3-
from typing import Optional
3+
4+
5+
def _env_int(name: str, default: int) -> int:
6+
raw = os.getenv(name)
7+
if raw is None:
8+
return default
9+
try:
10+
value = int(raw)
11+
except (TypeError, ValueError):
12+
return default
13+
return value if value > 0 else default
414

515
@dataclass
616
class GuardrailConfig:
@@ -24,11 +34,11 @@ class GuardrailConfig:
2434
def load(cls) -> "GuardrailConfig":
2535
"""Load configuration from environment variables with defaults."""
2636
return cls(
27-
MAX_HISTORY_ENTRIES=int(os.getenv("DOCTOR_GUARDRAIL_MAX_HISTORY", "1000")),
28-
MAX_LOG_SIZE_MB=int(os.getenv("DOCTOR_GUARDRAIL_MAX_LOG_SIZE_MB", "10")),
29-
MAX_JOB_RETENTION_SECONDS=int(os.getenv("DOCTOR_GUARDRAIL_JOB_RETENTION", "86400")),
30-
RATE_LIMIT_WINDOW_SECONDS=int(os.getenv("DOCTOR_GUARDRAIL_RATE_LIMIT_WINDOW", "60")),
31-
AGGREGATION_WINDOW_SECONDS=int(os.getenv("DOCTOR_GUARDRAIL_AGGREGATION_WINDOW", "60")),
32-
PROVIDER_TIMEOUT_SECONDS=int(os.getenv("DOCTOR_GUARDRAIL_PROVIDER_TIMEOUT", "30")),
33-
PROVIDER_MAX_RETRIES=int(os.getenv("DOCTOR_GUARDRAIL_PROVIDER_RETRIES", "3"))
37+
MAX_HISTORY_ENTRIES=_env_int("DOCTOR_GUARDRAIL_MAX_HISTORY", 1000),
38+
MAX_LOG_SIZE_MB=_env_int("DOCTOR_GUARDRAIL_MAX_LOG_SIZE_MB", 10),
39+
MAX_JOB_RETENTION_SECONDS=_env_int("DOCTOR_GUARDRAIL_JOB_RETENTION", 86400),
40+
RATE_LIMIT_WINDOW_SECONDS=_env_int("DOCTOR_GUARDRAIL_RATE_LIMIT_WINDOW", 60),
41+
AGGREGATION_WINDOW_SECONDS=_env_int("DOCTOR_GUARDRAIL_AGGREGATION_WINDOW", 60),
42+
PROVIDER_TIMEOUT_SECONDS=_env_int("DOCTOR_GUARDRAIL_PROVIDER_TIMEOUT", 30),
43+
PROVIDER_MAX_RETRIES=_env_int("DOCTOR_GUARDRAIL_PROVIDER_RETRIES", 3),
3444
)

services/node_health.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""
2+
T5 Service: Node Health Scoring.
3+
4+
Calculates heuristic health scores for nodes based on error history.
5+
Currently limited to "Failure Frequency" as we do not track successful executions yet.
6+
"""
7+
8+
from typing import List, Dict, Any
9+
from collections import defaultdict
10+
11+
class NodeHealthService:
12+
"""
13+
Analyzes error history to determine node health.
14+
"""
15+
16+
@staticmethod
17+
def calculate_node_failures(history: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
18+
"""
19+
Calculate failure counts per node type.
20+
21+
Args:
22+
history: List of error history entries.
23+
24+
Returns:
25+
List of dicts: [{"node_class": str, "count": int, "last_error": str}, ...]
26+
Sorted by count descending.
27+
"""
28+
node_stats = defaultdict(lambda: {"count": 0, "last_error": "", "node_class": ""})
29+
30+
for entry in history:
31+
node_info = entry.get("node_info", {})
32+
if not node_info:
33+
# Try to extract from snapshot if available (legacy)
34+
continue
35+
36+
node_class = node_info.get("node_class") or node_info.get("node_type")
37+
if not node_class:
38+
continue
39+
40+
# Key by node class (type) rather than specific node_id instance
41+
# to capture systematic improvements needed for a node type.
42+
key = node_class
43+
44+
try:
45+
weight = int(entry.get("repeat_count", 1) or 1)
46+
except Exception:
47+
weight = 1
48+
49+
node_stats[key]["node_class"] = node_class
50+
node_stats[key]["count"] += weight
51+
node_stats[key]["last_error"] = entry.get("error_type") or "Unknown Error"
52+
53+
# Convert to list and sort
54+
results = list(node_stats.values())
55+
results.sort(key=lambda x: x["count"], reverse=True)
56+
57+
return results
58+
59+
@staticmethod
60+
def calculate_health_score(failures: int, total_executions: int = 0) -> float:
61+
"""
62+
Calculate a 0.0-1.0 health score.
63+
If total_executions is 0 (unknown), score is based on raw failure count decay.
64+
"""
65+
if total_executions > 0:
66+
return max(0.0, 1.0 - (failures / total_executions))
67+
68+
# Heuristic decay: 1 failure = 0.9, 10 failures = 0.5, 100 failures = 0.1
69+
# Simple exponential decay for now
70+
import math
71+
return float(max(0.0, 1.0 * (0.95 ** failures)))

services/providers/base.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,27 @@ class BaseProviderAdapter(ABC):
2929
Enforces consistent timeout, retry, and response structure.
3030
"""
3131

32-
def __init__(self, provider_id: str, timeout: float = 30.0, max_retries: int = 2):
32+
def __init__(
33+
self,
34+
provider_id: str,
35+
timeout: Optional[float] = None,
36+
max_retries: Optional[int] = None,
37+
):
3338
self.provider_id = provider_id
34-
self.timeout = timeout
35-
self.max_retries = max_retries
39+
default_timeout = 30.0
40+
default_retries = 2
41+
try:
42+
# CRITICAL: keep this import local to avoid module import coupling at startup.
43+
from config import CONFIG # pylint: disable=import-outside-toplevel
44+
default_timeout = float(getattr(CONFIG.guardrails, "PROVIDER_TIMEOUT_SECONDS", 30))
45+
default_retries = int(getattr(CONFIG.guardrails, "PROVIDER_MAX_RETRIES", 2))
46+
except Exception:
47+
pass
48+
49+
resolved_timeout = default_timeout if timeout is None else float(timeout)
50+
resolved_retries = default_retries if max_retries is None else int(max_retries)
51+
self.timeout = resolved_timeout if resolved_timeout > 0 else default_timeout
52+
self.max_retries = resolved_retries if resolved_retries >= 0 else default_retries
3653

3754
async def execute_with_retry(
3855
self,

0 commit comments

Comments
 (0)