Skip to content

Commit bcc1ac6

Browse files
committed
Adding maximum request size (MiB) to config
- adding a CLI option for this - synchronizing the default size treatment across gitlab/koji/server/cli Signed-off-by: Jan Matufka <jmatufka@redhat.com>
1 parent 226b70b commit bcc1ac6

File tree

12 files changed

+107
-51
lines changed

12 files changed

+107
-51
lines changed

logdetective/constants.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,5 +85,6 @@
8585

8686
# Other constants
8787

88-
# Maximum log size is 300 MiB
89-
MAXIMUM_LOG_LENGTH = 300 * 1024 * 1024
88+
# Default maximum log size is 300 MiB,
89+
# for server it can be overwritten in config as max_artifact_size (in MiB)
90+
DEFAULT_MAXIMUM_LOG_MIB = 300

logdetective/logdetective.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@
66

77
import aiohttp
88

9-
from logdetective.constants import DEFAULT_ADVISOR, DEFAULT_TEMPERATURE
9+
from logdetective.constants import (
10+
DEFAULT_ADVISOR,
11+
DEFAULT_TEMPERATURE,
12+
DEFAULT_MAXIMUM_LOG_MIB,
13+
)
1014
from logdetective.utils import (
1115
process_log,
1216
initialize_model,
@@ -17,6 +21,7 @@
1721
check_csgrep,
1822
mine_logs,
1923
sanitize_log,
24+
mib_to_bytes,
2025
)
2126
from logdetective.remote_log import retrieve_log_content
2227
from logdetective.extractors import DrainExtractor, CSGrepExtractor
@@ -87,6 +92,12 @@ def setup_args():
8792
parser.add_argument(
8893
"--csgrep", action="store_true", help="Use csgrep to process the log."
8994
)
95+
parser.add_argument(
96+
"--mib_limit",
97+
type=int,
98+
default=DEFAULT_MAXIMUM_LOG_MIB,
99+
help="Maximum log size in MiB (default: %(default)s)",
100+
)
90101
return parser.parse_args()
91102

92103

@@ -151,7 +162,7 @@ async def run(): # pylint: disable=too-many-statements,too-many-locals,too-many
151162

152163
async with aiohttp.ClientSession() as http:
153164
try:
154-
log = await retrieve_log_content(http, args.file)
165+
log = await retrieve_log_content(http, args.file, mib_to_bytes(args.mib_limit))
155166
except ValueError as e:
156167
# file does not exist
157168
LOG.error(e)

logdetective/remote_log.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@
55
import aiohttp
66
from aiohttp.web import HTTPBadRequest
77

8-
from logdetective.constants import MAXIMUM_LOG_LENGTH
8+
from logdetective.constants import DEFAULT_MAXIMUM_LOG_MIB
99
from logdetective.utils import (
1010
ContentSizeCheck,
1111
check_content_size,
12+
mib_to_bytes,
1213
)
1314

1415
LOG = logging.getLogger("logdetective")
@@ -19,16 +20,23 @@ class RemoteLog:
1920
Handles retrieval of remote log files.
2021
"""
2122

22-
def __init__(self, url: str, http_session: aiohttp.ClientSession):
23+
def __init__(
24+
self,
25+
url: str,
26+
http_session: aiohttp.ClientSession,
27+
limit_bytes: int = mib_to_bytes(DEFAULT_MAXIMUM_LOG_MIB)
28+
):
2329
"""
2430
Initialize with a remote log URL and HTTP session.
2531
2632
Args:
2733
url: A remote URL pointing to a log file
2834
http_session: The HTTP session used to retrieve the remote file
35+
limit_bytes: For checking the log size on the accessed URL
2936
"""
3037
self._url = url
3138
self._http_session = http_session
39+
self._limit_bytes = limit_bytes
3240

3341
@property
3442
def url(self) -> str:
@@ -67,7 +75,7 @@ async def get_url_content(self) -> str:
6775
raise RuntimeError(f"We couldn't obtain the logs: {ex}") from ex
6876
size_check: ContentSizeCheck = check_content_size(
6977
head_response.headers,
70-
MAXIMUM_LOG_LENGTH
78+
self._limit_bytes
7179
)
7280
if not size_check.result:
7381
raise RuntimeError(
@@ -86,7 +94,7 @@ async def process_url(self) -> str:
8694
raise HTTPBadRequest(reason=f"We couldn't obtain the logs: {ex}") from ex
8795

8896

89-
async def retrieve_log_content(http: aiohttp.ClientSession, log_path: str) -> str:
97+
async def retrieve_log_content(http: aiohttp.ClientSession, log_path: str, size_limit: int) -> str:
9098
"""Get content of the file on the log_path path.
9199
Path is assumed to be valid URL if it has a scheme.
92100
Otherwise it attempts to pull it from local filesystem."""
@@ -101,7 +109,8 @@ async def retrieve_log_content(http: aiohttp.ClientSession, log_path: str) -> st
101109
log = f.read()
102110

103111
else:
104-
remote_log = RemoteLog(log_path, http)
112+
remote_log = RemoteLog(log_path, http, limit_bytes=size_limit)
113+
# limited to DEFAULT_MAXIMUM_LOG_LENGTH (300 MiB)
105114
log = await remote_log.get_url_content()
106115

107116
return log

logdetective/server/metric.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from starlette.responses import StreamingResponse
1010

1111
from logdetective.remote_log import RemoteLog
12-
from logdetective.server.config import LOG
12+
from logdetective.server.config import LOG, SERVER_CONFIG
1313
from logdetective.server.compressors import (
1414
TextCompressor,
1515
RemoteLogCompressor,
@@ -45,7 +45,11 @@ async def add_new_metrics_url(
4545
raise LogDetectiveMetricsError(
4646
f"""Remote log can not be retrieved without URL and http session.
4747
URL: {url}, http session:{http_session}""")
48-
remote_log = RemoteLog(url, http_session)
48+
remote_log = RemoteLog(
49+
url,
50+
http_session,
51+
limit_bytes=SERVER_CONFIG.general.max_artifact_size
52+
)
4953
compressed_log_content = await RemoteLogCompressor(remote_log).zip_content()
5054

5155
# gitlab and koji always fall through here

logdetective/server/models.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919
USER_ROLE_DEFAULT,
2020
LLM_MAX_CONCURRENT_REQUESTS,
2121
LLM_MAX_KEEP_ALIVE_CONNECTIONS,
22-
MAXIMUM_LOG_LENGTH,
22+
DEFAULT_MAXIMUM_LOG_MIB
2323
)
24-
from logdetective.utils import check_csgrep
24+
from logdetective.utils import check_csgrep, mib_to_bytes
2525

2626

2727
class BuildLogFile(BaseModel):
@@ -33,7 +33,10 @@ class BuildLogFile(BaseModel):
3333
max_length=255,
3434
pattern=r"^[a-zA-Z0-9._\-\/ ]+$"
3535
)
36-
content: str = Field(max_length=MAXIMUM_LOG_LENGTH)
36+
content: str = Field(
37+
description="Log file content as a string. By default, request size "
38+
"is limited to 300 MiB. This can affect what kind of logs can be submitted."
39+
)
3740

3841

3942
class BuildLogRequest(BaseModel):
@@ -257,16 +260,15 @@ class GitLabInstanceConfig(BaseModel): # pylint: disable=too-many-instance-attr
257260

258261
timeout: float = 5.0
259262

260-
# Maximum size of artifacts.zip in MiB. (default: 300 MiB)
261-
max_artifact_size: int = 300 * 1024 * 1024
263+
# Maximum size of artifacts.zip (default: 300 MiB)
264+
# In config, the unit is in MiB, but this max_artifact_size attribute will be in bytes
265+
max_artifact_size: int = mib_to_bytes(DEFAULT_MAXIMUM_LOG_MIB)
262266

263267
@field_validator("max_artifact_size", mode="before")
264268
@classmethod
265269
def megabytes_to_bytes(cls, v: Any):
266270
"""Convert max_artifact_size from megabytes to bytes."""
267-
if isinstance(v, int):
268-
return v * 1024 * 1024
269-
return 300 * 1024 * 1024
271+
return mib_to_bytes(v if isinstance(v, int) else DEFAULT_MAXIMUM_LOG_MIB)
270272

271273

272274
class GitLabConfig(BaseModel):
@@ -302,15 +304,15 @@ class KojiConfig(BaseModel):
302304

303305
instances: Dict[str, KojiInstanceConfig] = {}
304306
analysis_timeout: int = 15
305-
max_artifact_size: int = 300 * 1024 * 1024
307+
308+
# in yaml config, this is given in MiB, but we use bytes in code (same as gitlab)
309+
max_artifact_size: int = mib_to_bytes(DEFAULT_MAXIMUM_LOG_MIB)
306310

307311
@field_validator("max_artifact_size", mode="before")
308312
@classmethod
309313
def megabytes_to_bytes(cls, v: Any):
310314
"""Convert max_artifact_size from megabytes to bytes."""
311-
if isinstance(v, int):
312-
return v * 1024 * 1024
313-
return 300 * 1024 * 1024
315+
return mib_to_bytes(v if isinstance(v, int) else DEFAULT_MAXIMUM_LOG_MIB)
314316

315317
@model_validator(mode="before")
316318
@classmethod
@@ -347,6 +349,14 @@ class GeneralConfig(BaseModel):
347349
collect_emojis_interval: int = 60 * 60 # seconds
348350
top_k_snippets: int = 0
349351
report_certainty: bool = False
352+
# max_artifact_size in config.yml is in MiBs, here (GeneralConfig class) is in bytes
353+
max_artifact_size: int = mib_to_bytes(DEFAULT_MAXIMUM_LOG_MIB)
354+
355+
@field_validator("max_artifact_size", mode="before")
356+
@classmethod
357+
def megabytes_to_bytes(cls, v: Any):
358+
"""Convert max_artifact_size from megabytes to bytes."""
359+
return mib_to_bytes(v if isinstance(v, int) else DEFAULT_MAXIMUM_LOG_MIB)
350360

351361

352362
class Config(BaseModel):

logdetective/server/utils.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
import aiohttp
55
from fastapi import Request, HTTPException
66

7-
from logdetective.constants import SNIPPET_DELIMITER, MAXIMUM_LOG_LENGTH
7+
from logdetective.constants import SNIPPET_DELIMITER
88
from logdetective.utils import (
99
ContentSizeCheck,
1010
check_content_size,
1111
)
12-
from logdetective.server.config import LOG
12+
from logdetective.server.config import LOG, SERVER_CONFIG
1313
from logdetective.server.exceptions import LogDetectiveConnectionError
1414
from logdetective.remote_log import RemoteLog
1515
from logdetective.server.models import (
@@ -115,7 +115,11 @@ async def get_log_from_payload(
115115
log_text = ""
116116
if payload.url:
117117
LOG.info("Handling log as URL")
118-
remote_log = RemoteLog(payload.url, http_session)
118+
remote_log = RemoteLog(
119+
payload.url,
120+
http_session,
121+
limit_bytes=SERVER_CONFIG.general.max_artifact_size
122+
)
119123
log_text = await remote_log.process_url()
120124
elif payload.files:
121125
# pydantic field validators make sure at least one element is present,
@@ -156,16 +160,20 @@ def validate_request_size(request: Request) -> None:
156160
HTTPException(411): If Content-Length header is missing or invalid
157161
HTTPException(413): If Content-Length exceeds maximum allowed size
158162
"""
159-
size_check: ContentSizeCheck = check_content_size(dict(request.headers), MAXIMUM_LOG_LENGTH)
163+
size_check: ContentSizeCheck = check_content_size(
164+
request.headers,
165+
SERVER_CONFIG.general.max_artifact_size
166+
)
160167
if not (size_check.value_present and size_check.size_in_bytes is not None):
161168
raise HTTPException(status_code=411, detail="Content-Length is missing or invalid.")
162169
if not size_check.result:
163-
size = size_check.size_in_bytes
164170
raise HTTPException(
165171
status_code=413,
166172
detail=(
167173
f"Content-Length is too large: "
168-
f"{size} B ({size / (1024 * 1024):.2f} MiB) > "
169-
f"{MAXIMUM_LOG_LENGTH} B ({MAXIMUM_LOG_LENGTH / (1024 * 1024):.2f} MiB)"
174+
f"{size_check.size_in_bytes} B "
175+
f"({size_check.size_in_bytes / (1024 * 1024):.2f} MiB) > "
176+
f"{SERVER_CONFIG.general.max_artifact_size} B "
177+
f"({SERVER_CONFIG.general.max_artifact_size / (1024 * 1024):.2f} MiB)"
170178
)
171179
)

logdetective/utils.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@
6060
]
6161

6262

63+
# pylint: disable=missing-function-docstring
64+
def mib_to_bytes(mib: int) -> int:
65+
return mib * 1024 * 1024
66+
67+
6368
def new_message(text: str) -> bool:
6469
"""Set of heuristics for determining whether or not
6570
does the current chunk of log text continue on next line.

server/config.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ extractor:
5757
# tokens:
5858
# - example_token
5959
general:
60+
max_artifact_size: 300 # in MiB
6061
devmode: False
6162
packages:
6263
- .*

tests/base/test_utils.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from unittest import mock
2-
from contextlib import nullcontext
32

43
import aiohttp
54
import aioresponses
@@ -14,8 +13,9 @@
1413
filter_snippet_patterns,
1514
load_skip_snippet_patterns,
1615
get_chunks,
16+
mib_to_bytes,
1717
)
18-
18+
from logdetective.constants import DEFAULT_MAXIMUM_LOG_MIB
1919
from logdetective.remote_log import RemoteLog
2020
from logdetective.models import PromptConfig, SkipSnippets
2121
from logdetective import constants
@@ -78,26 +78,30 @@ def test_load_prompts_correct_path():
7878

7979
@pytest.mark.asyncio
8080
@pytest.mark.parametrize(
81-
"content_length, exception",
81+
"content_length, does_raise, exception_match",
8282
[
83-
("3", None),
84-
(str(301 * 1024 * 1024), "over the limit"),
85-
("test", "invalid"),
83+
("3", False, None),
84+
(str(mib_to_bytes(DEFAULT_MAXIMUM_LOG_MIB) + 1), True, "over the limit"),
85+
("test", True, "invalid"),
8686
],
8787
indirect=False
8888
)
89-
async def test_get_url_content(content_length, exception):
89+
async def test_get_url_content(content_length, does_raise, exception_match):
90+
url = "http://localhost:8999/"
9091
mock_head_response = {"Content-Length": content_length}
9192
mock_response = "123"
9293
with aioresponses.aioresponses() as mock:
93-
mock.head("http://localhost:8999/", status=200, headers=mock_head_response)
94-
mock.get("http://localhost:8999/", status=200, body=mock_response)
94+
mock.head(url, status=200, headers=mock_head_response)
95+
mock.get(url, status=200, body=mock_response)
9596
async with aiohttp.ClientSession() as http:
96-
with pytest.raises(RuntimeError, match=exception) if exception else nullcontext():
97-
url_output_cr = RemoteLog("http://localhost:8999/", http).get_url_content()
97+
if does_raise:
98+
with pytest.raises(RuntimeError, match=exception_match):
99+
url_output_cr = RemoteLog(url, http).get_url_content()
100+
url_output = await url_output_cr
101+
else:
102+
url_output_cr = RemoteLog(url, http).get_url_content()
98103
url_output = await url_output_cr
99-
if not exception:
100-
assert url_output == "123"
104+
assert url_output == "123"
101105

102106

103107
@pytest.mark.parametrize("user_role", ["user", "something"])

tests/server/test_server.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,9 @@ async def test_get_log_from_payload_url_sanitization():
276276
mock_remote_log = flexmock(RemoteLog)
277277

278278
async with aiohttp.ClientSession() as session:
279-
mock_remote_log.should_receive("__init__").with_args("http://path.to/file.log", session)
279+
mock_remote_log.should_receive("__init__").with_args(
280+
"http://path.to/file.log", session, limit_bytes=SERVER_CONFIG.general.max_artifact_size
281+
)
280282
mock_remote_log.should_receive("process_url").and_return(awaited_dirty_log)
281283
log_text = await get_log_from_payload(payload, session)
282284

0 commit comments

Comments
 (0)