@@ -180,6 +180,8 @@ def generate_response(
180180 return self ._handle_caii_request (prompt )
181181 if self .inference_type == "openai" :
182182 return self ._handle_openai_request (prompt )
183+ if self .inference_type == "openai_compatible" :
184+ return self ._handle_openai_compatible_request (prompt )
183185 if self .inference_type == "gemini" :
184186 return self ._handle_gemini_request (prompt )
185187 raise ModelHandlerError (f"Unsupported inference_type={ self .inference_type } " , 400 )
@@ -342,6 +344,66 @@ def _handle_openai_request(self, prompt: str):
342344 except Exception as e :
343345 raise ModelHandlerError (f"OpenAI request failed: { e } " , 500 )
344346
347+ # ---------- OpenAI Compatible -------------------------------------------------------
348+ def _handle_openai_compatible_request (self , prompt : str ):
349+ """Handle OpenAI compatible endpoints with proper timeout configuration"""
350+ try :
351+ import httpx
352+ from openai import OpenAI
353+
354+ # Get API key from environment variable (only credential needed)
355+ api_key = os .getenv ('OpenAI_Endpoint_Compatible_Key' )
356+ if not api_key :
357+ raise ModelHandlerError ("OpenAI_Endpoint_Compatible_Key environment variable not set" , 500 )
358+
359+ # Base URL comes from caii_endpoint parameter (passed during initialization)
360+ openai_compatible_endpoint = self .caii_endpoint
361+ if not openai_compatible_endpoint :
362+ raise ModelHandlerError ("OpenAI compatible endpoint not provided" , 500 )
363+
364+ # Configure timeout for OpenAI compatible client (same as OpenAI v1.57.2)
365+ timeout_config = httpx .Timeout (
366+ connect = self .OPENAI_CONNECT_TIMEOUT ,
367+ read = self .OPENAI_READ_TIMEOUT ,
368+ write = 10.0 ,
369+ pool = 5.0
370+ )
371+
372+ # Configure httpx client with certificate verification for private cloud
373+ if os .path .exists ("/etc/ssl/certs/ca-certificates.crt" ):
374+ http_client = httpx .Client (
375+ verify = "/etc/ssl/certs/ca-certificates.crt" ,
376+ timeout = timeout_config
377+ )
378+ else :
379+ http_client = httpx .Client (timeout = timeout_config )
380+
381+ # Remove trailing '/chat/completions' if present (similar to CAII handling)
382+ openai_compatible_endpoint = openai_compatible_endpoint .removesuffix ('/chat/completions' )
383+
384+ client = OpenAI (
385+ api_key = api_key ,
386+ base_url = openai_compatible_endpoint ,
387+ http_client = http_client
388+ )
389+
390+ completion = client .chat .completions .create (
391+ model = self .model_id ,
392+ messages = [{"role" : "user" , "content" : prompt }],
393+ max_tokens = self .model_params .max_tokens ,
394+ temperature = self .model_params .temperature ,
395+ top_p = self .model_params .top_p ,
396+ stream = False ,
397+ )
398+
399+ print ("generated via OpenAI Compatible endpoint" )
400+ response_text = completion .choices [0 ].message .content
401+
402+ return self ._extract_json_from_text (response_text ) if not self .custom_p else response_text
403+
404+ except Exception as e :
405+ raise ModelHandlerError (f"OpenAI Compatible request failed: { str (e )} " , status_code = 500 )
406+
345407 # ---------- Gemini -------------------------------------------------------
346408 def _handle_gemini_request (self , prompt : str ):
347409 if genai is None :
0 commit comments