@@ -101,6 +101,27 @@ def _get_model_from_context(self) -> Optional[str]:
101101 logger .debug ("CubeLLM: generation_options_var not set for model" )
102102 return None
103103
104+ def _get_base_url_from_context (self ) -> Optional [str ]:
105+ """Read per-request base_url from NeMo context.
106+
107+ The proxy forwards the caller's domain ID to guardrails via the
108+ ``X-Domain-ID`` header. Guardrails then includes it in
109+ ``llm_params["base_url"]`` so that LLM calls are routed back
110+ through the proxy with the correct ``/{domainID}/v1/…`` path
111+ prefix, which is needed for domain-level authentication.
112+ """
113+ try :
114+ gen_options = generation_options_var .get ()
115+ if gen_options and gen_options .llm_params :
116+ base_url = gen_options .llm_params .get ("base_url" )
117+ if base_url :
118+ logger .debug (f"CubeLLM: found base_url in context: { base_url } " )
119+ return str (base_url )
120+ return None
121+ except LookupError :
122+ logger .debug ("CubeLLM: generation_options_var not set for base_url" )
123+ return None
124+
104125 def _merge_headers (self ) -> Dict [str , str ]:
105126 base_headers = self ._config_headers or {}
106127 request_headers = self ._get_headers_from_context ()
@@ -153,7 +174,14 @@ async def _agenerate(
153174 model = context_model or self .model_name
154175 logger .debug (f"CubeLLM._agenerate: model='{ model } ', from_context={ context_model is not None } " )
155176
156- base_url = self ._normalized_base_url
177+ # Support per-request base_url from context (needed for domain-prefixed proxy URLs)
178+ context_base_url = self ._get_base_url_from_context ()
179+ if context_base_url :
180+ if not context_base_url .endswith ("/v1" ):
181+ context_base_url = f"{ context_base_url .rstrip ('/' )} /v1"
182+ base_url = context_base_url
183+ else :
184+ base_url = self ._normalized_base_url
157185
158186 try :
159187 # Create a temporary client to inject per-request headers
@@ -200,7 +228,14 @@ def _generate(
200228 model = context_model or self .model_name
201229 logger .debug (f"CubeLLM._generate: model='{ model } ', from_context={ context_model is not None } " )
202230
203- base_url = self ._normalized_base_url
231+ # Support per-request base_url from context (needed for domain-prefixed proxy URLs)
232+ context_base_url = self ._get_base_url_from_context ()
233+ if context_base_url :
234+ if not context_base_url .endswith ("/v1" ):
235+ context_base_url = f"{ context_base_url .rstrip ('/' )} /v1"
236+ base_url = context_base_url
237+ else :
238+ base_url = self ._normalized_base_url
204239
205240 try :
206241 temp_client = ChatOpenAI (
0 commit comments