Skip to content

Commit 8c31a16

Browse files
committed
Configurable number requests and connections
Signed-off-by: Jiri Podivin <jpodivin@redhat.com>
1 parent bb8c072 commit 8c31a16

File tree

4 files changed

+17
-2
lines changed

4 files changed

+17
-2
lines changed

logdetective/constants.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,9 @@
7575

7676
# Tuning for LLM-as-a-Service
7777
LLM_DEFAULT_MAX_QUEUE_SIZE = 50
78-
LLM_DEFAULT_REQUESTS_PER_MINUTE = 60
78+
LLM_DEFAULT_REQUESTS_PER_MINUTE = 600
79+
LLM_MAX_CONCURRENT_REQUESTS = 100
80+
LLM_MAX_KEEP_ALIVE_CONNECTIONS = 20
7981

8082
# Roles for chat API
8183
SYSTEM_ROLE_DEFAULT = "developer"

logdetective/server/config.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
import logging
33
import yaml
4+
import httpx
45
from openai import AsyncOpenAI
56

67
from logdetective.utils import load_prompts, load_skip_snippet_patterns
@@ -54,9 +55,14 @@ def get_log(config: Config):
5455

5556
def get_openai_api_client(inference_config: InferenceConfig):
5657
"""Set up AsyncOpenAI client with default configuration."""
58+
limits = httpx.Limits(
59+
max_connections=inference_config.max_concurrent_requests,
60+
max_keepalive_connections=inference_config.max_keep_alive_connections,
61+
)
5762
return AsyncOpenAI(
5863
api_key=inference_config.api_token, base_url=inference_config.url,
59-
timeout=inference_config.llm_api_timeout
64+
timeout=inference_config.llm_api_timeout,
65+
http_client=httpx.AsyncClient(limits=limits) # Defaults are too restrictive
6066
)
6167

6268

logdetective/server/models.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
LLM_DEFAULT_REQUESTS_PER_MINUTE,
1717
SYSTEM_ROLE_DEFAULT,
1818
USER_ROLE_DEFAULT,
19+
LLM_MAX_CONCURRENT_REQUESTS,
20+
LLM_MAX_KEEP_ALIVE_CONNECTIONS,
1921
)
2022
from logdetective.utils import check_csgrep
2123

@@ -174,6 +176,8 @@ class InferenceConfig(BaseModel): # pylint: disable=too-many-instance-attribute
174176
system_role: str = SYSTEM_ROLE_DEFAULT
175177
llm_api_timeout: float = 15.0
176178
requests_per_minute: int = LLM_DEFAULT_REQUESTS_PER_MINUTE
179+
max_concurrent_requests: int = LLM_MAX_CONCURRENT_REQUESTS
180+
max_keep_alive_connections: int = LLM_MAX_KEEP_ALIVE_CONNECTIONS
177181

178182

179183
class ExtractorConfig(BaseModel):

server/config.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ inference:
1616
# If the roles are same, the system prompt and user message are concatenated
1717
user_role: user
1818
system_role: system
19+
# Limits on connections for AsyncOpenAI client
20+
max_concurrent_requests: 100
21+
max_keep_alive_connections: 20
1922
# Separate LLM endpoint for snippet analysis, optional
2023
# snippet_inference:
2124
# max_tokens: -1

0 commit comments

Comments
 (0)