Skip to content

Commit e31d9cc

Browse files
authored
NOISSUE - Update to SMQ v0.19.1 (#222)
* feat: update environment variables for local development and enhance proxy request handling Signed-off-by: Sammy Oina <sammyoina@gmail.com> * chore: update release tag for Docker image and fix supermq dependency version Signed-off-by: Sammy Oina <sammyoina@gmail.com> * feat: update environment variables and target URLs for internal services Signed-off-by: Sammy Oina <sammyoina@gmail.com> --------- Signed-off-by: Sammy Oina <sammyoina@gmail.com>
1 parent a9568d1 commit e31d9cc

File tree

9 files changed

+408
-54
lines changed

9 files changed

+408
-54
lines changed

docker/.env

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ SMQ_EMAIL_FROM_ADDRESS=__SMQ_EMAIL_FROM_ADDRESS__
187187
SMQ_EMAIL_FROM_NAME=Cube AI
188188

189189
# Docker image tag
190-
SMQ_RELEASE_TAG=latest
190+
SMQ_RELEASE_TAG=v0.19.1
191191

192192
# Proxy
193193
UV_CUBE_PROXY_LOG_LEVEL=debug
@@ -360,3 +360,7 @@ MG_UI_RELEASE_TAG=latest
360360
MG_BACKEND_RELEASE_TAG=latest
361361
MG_BACKEND_URL=http://magistrala-backend:9097
362362
MG_UI_IMAGE_URL=http://magistrala-backend:9097
363+
364+
# LLM response timeouts
365+
UV_CUBE_PROXY_SERVER_WRITE_TIMEOUT=120s
366+
UV_CUBE_PROXY_SERVER_READ_TIMEOUT=120s

docker/cube-compose.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,8 @@ services:
222222
UV_CUBE_PROXY_DB_PASS: ${UV_CUBE_PROXY_DB_PASS}
223223
UV_CUBE_PROXY_DB_NAME: ${UV_CUBE_PROXY_DB_NAME}
224224
SMQ_ALLOW_UNVERIFIED_USER: ${SMQ_ALLOW_UNVERIFIED_USER}
225+
UV_CUBE_PROXY_SERVER_WRITE_TIMEOUT: ${UV_CUBE_PROXY_SERVER_WRITE_TIMEOUT}
226+
UV_CUBE_PROXY_SERVER_READ_TIMEOUT: ${UV_CUBE_PROXY_SERVER_READ_TIMEOUT}
225227
UV_CUBE_PROXY_DB_SSL_MODE: ${UV_CUBE_PROXY_DB_SSL_MODE}
226228
UV_CUBE_PROXY_DB_SSL_CERT: ${UV_CUBE_PROXY_DB_SSL_CERT}
227229
UV_CUBE_PROXY_DB_SSL_KEY: ${UV_CUBE_PROXY_DB_SSL_KEY}

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ go 1.26.0
55
require (
66
github.com/absmach/callhome v0.18.2
77
github.com/absmach/certs v0.18.2
8-
github.com/absmach/supermq v0.19.1-0.20260311095911-28ae84286e16
8+
github.com/absmach/supermq v0.19.1
99
github.com/caarlos0/env/v11 v11.4.0
1010
github.com/go-chi/chi/v5 v5.2.5
1111
github.com/go-kit/kit v0.13.0

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ github.com/absmach/callhome v0.18.2 h1:dmopRHm2qTheHN1hdUKRRYpKwRrj7X9d8AWCFrb+K
1616
github.com/absmach/callhome v0.18.2/go.mod h1:LEXKhES9JJtj3tBgTZv7VPNjOi5ukJQB0mFic0QP60Q=
1717
github.com/absmach/certs v0.18.2 h1:s6KKL3/KfDZ6z0IxvNCksIOUwRnEgQyCpeAonuR15No=
1818
github.com/absmach/certs v0.18.2/go.mod h1:scqVZsmW2xPScnpMTtE70oN6cn0LLjFcJVPi4JKZ4+E=
19-
github.com/absmach/supermq v0.19.1-0.20260311095911-28ae84286e16 h1:C8ekYx3p/uFlXa6WA6Vg1IRZVloGOOi0OqT3QkBQIMI=
20-
github.com/absmach/supermq v0.19.1-0.20260311095911-28ae84286e16/go.mod h1:UCC6/UIRhO70inBIzlwC1Cm/wCyQj+yuP4dhMZSjXIc=
19+
github.com/absmach/supermq v0.19.1 h1:uLrf1fXpn0W6BkSSaa+d1Kw0KXygSfNn+b2EqFpCiMA=
20+
github.com/absmach/supermq v0.19.1/go.mod h1:UCC6/UIRhO70inBIzlwC1Cm/wCyQj+yuP4dhMZSjXIc=
2121
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
2222
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
2323
github.com/caarlos0/env/v11 v11.4.0 h1:Kcb6t5kIIr4XkoQC9AF2j+8E1Jsrl3Wz/hhm1LtoGAc=

guardrails/rails/general.co

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,19 @@
44
import core
55

66
# ---------------------------------------------------------------------------
7-
# Guard-only NeMo configuration.
7+
# NeMo Guardrails Configuration
88
#
9-
# NeMo is used EXCLUSIVELY for input/output safety guardrails (pattern
10-
# matching). No LLM calls are made inside NeMo.
9+
# Guards run as high-priority background flows. When a guard matches, its
10+
# refusal message is returned immediately.
1111
#
12-
# Response generation is handled by the Python router:
13-
# - Greetings / goodbye / capabilities → Python canned responses
14-
# - All other messages → direct LLM call (bypassing NeMo flow engine)
12+
# For messages that pass all guards, the low-priority `passthrough response`
13+
# flow fires and returns a "<<GUARDRAILS_PASS>>" marker so the Python
14+
# router knows NeMo approved the message. This avoids the ~5 s delay
15+
# previously caused by `wait indefinitely` leaving NeMo's event loop
16+
# with no response to emit.
1517
#
16-
# This avoids NeMo's Colang 2.x state-machine issues with smaller models
17-
# (llama3.2:3b) where intent classification returns names without the
18-
# required "user " prefix, causing `continuation on unhandled user intent`
19-
# to never fire.
18+
# Response generation for approved messages is handled by the Python router
19+
# which calls the LLM directly after receiving the pass marker.
2020
# ---------------------------------------------------------------------------
2121

2222
flow main
@@ -27,5 +27,21 @@ flow main
2727
# Output-side guard — catches unsafe bot output patterns.
2828
activate output guard
2929

30+
# Fast catch-all for approved messages (see docstring above).
31+
activate passthrough response
32+
3033
# Keep main alive so the activated flows persist across turns.
3134
wait indefinitely
35+
36+
37+
# ---------------------------------------------------------------------------
38+
# Low-priority catch-all: fires for any user message that passed all guards.
39+
# Produces a marker so NeMo returns in <1 s instead of ~5 s.
40+
# Priority 0.1 ensures guards (0.9 / 0.8) always take precedence.
41+
# ---------------------------------------------------------------------------
42+
43+
@loop("passthrough_loop")
44+
flow passthrough response
45+
priority 0.1
46+
user said something
47+
bot say "<<GUARDRAILS_PASS>>"

guardrails/src/adapters/llm/cube_llm.py

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,27 @@ def _get_model_from_context(self) -> Optional[str]:
101101
logger.debug("CubeLLM: generation_options_var not set for model")
102102
return None
103103

104+
def _get_base_url_from_context(self) -> Optional[str]:
105+
"""Read per-request base_url from NeMo context.
106+
107+
The proxy forwards the caller's domain ID to guardrails via the
108+
``X-Domain-ID`` header. Guardrails then includes it in
109+
``llm_params["base_url"]`` so that LLM calls are routed back
110+
through the proxy with the correct ``/{domainID}/v1/…`` path
111+
prefix, which is needed for domain-level authentication.
112+
"""
113+
try:
114+
gen_options = generation_options_var.get()
115+
if gen_options and gen_options.llm_params:
116+
base_url = gen_options.llm_params.get("base_url")
117+
if base_url:
118+
logger.debug(f"CubeLLM: found base_url in context: {base_url}")
119+
return str(base_url)
120+
return None
121+
except LookupError:
122+
logger.debug("CubeLLM: generation_options_var not set for base_url")
123+
return None
124+
104125
def _merge_headers(self) -> Dict[str, str]:
105126
base_headers = self._config_headers or {}
106127
request_headers = self._get_headers_from_context()
@@ -153,7 +174,14 @@ async def _agenerate(
153174
model = context_model or self.model_name
154175
logger.debug(f"CubeLLM._agenerate: model='{model}', from_context={context_model is not None}")
155176

156-
base_url = self._normalized_base_url
177+
# Support per-request base_url from context (needed for domain-prefixed proxy URLs)
178+
context_base_url = self._get_base_url_from_context()
179+
if context_base_url:
180+
if not context_base_url.endswith("/v1"):
181+
context_base_url = f"{context_base_url.rstrip('/')}/v1"
182+
base_url = context_base_url
183+
else:
184+
base_url = self._normalized_base_url
157185

158186
try:
159187
# Create a temporary client to inject per-request headers
@@ -200,7 +228,14 @@ def _generate(
200228
model = context_model or self.model_name
201229
logger.debug(f"CubeLLM._generate: model='{model}', from_context={context_model is not None}")
202230

203-
base_url = self._normalized_base_url
231+
# Support per-request base_url from context (needed for domain-prefixed proxy URLs)
232+
context_base_url = self._get_base_url_from_context()
233+
if context_base_url:
234+
if not context_base_url.endswith("/v1"):
235+
context_base_url = f"{context_base_url.rstrip('/')}/v1"
236+
base_url = context_base_url
237+
else:
238+
base_url = self._normalized_base_url
204239

205240
try:
206241
temp_client = ChatOpenAI(

guardrails/src/adapters/runtime/nemo_runtime.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,14 @@ async def swap(self, materialized: MaterializedGuardrail) -> None:
114114

115115
new_rails = LLMRails(rails_config)
116116

117+
# Reduce max_events from the default 500.
118+
# Guards and the passthrough flow fire within ~30 events;
119+
# the remaining hundreds are cascading UnhandledEvent noise
120+
# that add seconds of latency with no functional benefit.
121+
if hasattr(new_rails, "runtime") and hasattr(new_rails.runtime, "max_events"):
122+
new_rails.runtime.max_events = 100
123+
logger.info("Set NeMo runtime max_events to 100 (was 500)")
124+
117125
# Atomic swap
118126
self._rails = new_rails
119127
self._revision = materialized.revision

0 commit comments

Comments
 (0)