File tree Expand file tree Collapse file tree 4 files changed +17
-2
lines changed
Expand file tree Collapse file tree 4 files changed +17
-2
lines changed Original file line number Diff line number Diff line change 7575
7676# Tuning for LLM-as-a-Service
7777LLM_DEFAULT_MAX_QUEUE_SIZE = 50
78- LLM_DEFAULT_REQUESTS_PER_MINUTE = 60
78+ LLM_DEFAULT_REQUESTS_PER_MINUTE = 600
79+ LLM_MAX_CONCURRENT_REQUESTS = 100
80+ LLM_MAX_KEEP_ALIVE_CONNECTIONS = 20
7981
8082# Roles for chat API
8183SYSTEM_ROLE_DEFAULT = "developer"
Original file line number Diff line number Diff line change 11import os
22import logging
33import yaml
4+ import httpx
45from openai import AsyncOpenAI
56
67from logdetective .utils import load_prompts , load_skip_snippet_patterns
@@ -54,9 +55,14 @@ def get_log(config: Config):
5455
5556def get_openai_api_client (inference_config : InferenceConfig ):
5657 """Set up AsyncOpenAI client with default configuration."""
58+ limits = httpx .Limits (
59+ max_connections = inference_config .max_concurrent_requests ,
60+ max_keepalive_connections = inference_config .max_keep_alive_connections ,
61+ )
5762 return AsyncOpenAI (
5863 api_key = inference_config .api_token , base_url = inference_config .url ,
59- timeout = inference_config .llm_api_timeout
64+ timeout = inference_config .llm_api_timeout ,
65+ http_client = httpx .AsyncClient (limits = limits ) # Defaults are too restrictive
6066 )
6167
6268
Original file line number Diff line number Diff line change 1616 LLM_DEFAULT_REQUESTS_PER_MINUTE ,
1717 SYSTEM_ROLE_DEFAULT ,
1818 USER_ROLE_DEFAULT ,
19+ LLM_MAX_CONCURRENT_REQUESTS ,
20+ LLM_MAX_KEEP_ALIVE_CONNECTIONS ,
1921)
2022from logdetective .utils import check_csgrep
2123
@@ -174,6 +176,8 @@ class InferenceConfig(BaseModel): # pylint: disable=too-many-instance-attribute
174176 system_role : str = SYSTEM_ROLE_DEFAULT
175177 llm_api_timeout : float = 15.0
176178 requests_per_minute : int = LLM_DEFAULT_REQUESTS_PER_MINUTE
179+ max_concurrent_requests : int = LLM_MAX_CONCURRENT_REQUESTS
180+ max_keep_alive_connections : int = LLM_MAX_KEEP_ALIVE_CONNECTIONS
177181
178182
179183class ExtractorConfig (BaseModel ):
Original file line number Diff line number Diff line change @@ -16,6 +16,9 @@ inference:
1616 # If the roles are same, the system prompt and user message are concatenated
1717 user_role : user
1818 system_role : system
19+ # Limits on connections for AsyncOpenAI client
20+ max_concurrent_requests : 100
21+ max_keep_alive_connections : 20
1922# Separate LLM endpoint for snippet analysis, optional
2023# snippet_inference:
2124# max_tokens: -1
You can’t perform that action at this time.
0 commit comments