Skip to content

Commit 694ee31

Browse files
committed
FIX: Checksum warnings in batch download tests
1 parent b19aa52 commit 694ee31

File tree

2 files changed

+14
-18
lines changed

2 files changed

+14
-18
lines changed

databento/historical/api/batch.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import asyncio
44
import hashlib
55
import logging
6-
import time
76
import warnings
87
from collections.abc import Iterable
98
from concurrent.futures import ThreadPoolExecutor
@@ -430,16 +429,10 @@ def _download_batch_file(
430429
hash_algo, _, hash_hex = batch_download_file.hash_str.partition(":")
431430

432431
if hash_algo == "sha256":
433-
start = time.perf_counter()
434432
output_hash = hashlib.sha256(output_path.read_bytes())
435-
logger.info(
436-
"%s hash time %f second(s)",
437-
output_path.name,
438-
time.perf_counter() - start,
439-
)
440433
if output_hash.hexdigest() != hash_hex:
441434
warn_msg = f"Downloaded file failed checksum validation: {output_path.name}"
442-
logger.warn(warn_msg)
435+
logger.warning(warn_msg)
443436
warnings.warn(warn_msg, category=BentoWarning)
444437
else:
445438
logger.warning(

tests/test_historical_batch.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import hashlib
12
from pathlib import Path
23
from unittest.mock import MagicMock
34

@@ -165,8 +166,9 @@ def test_batch_download_single_file_sends_expected_request(
165166
# Arrange
166167
job_id = "GLBX-20220610-5DEFXVTMSM"
167168
filename = "glbx-mdp3-20220610.mbo.csv.zst"
168-
hash = "sha256:abcdef"
169-
size = 1024
169+
file_content = b""
170+
file_hash = f"sha256:{hashlib.sha256(file_content).hexdigest()}"
171+
file_size = len(file_content)
170172

171173
# Mock the call to list files so it returns a test manifest
172174
monkeypatch.setattr(
@@ -176,8 +178,8 @@ def test_batch_download_single_file_sends_expected_request(
176178
return_value=[
177179
{
178180
"filename": filename,
179-
"hash": hash,
180-
"size": size,
181+
"hash": file_hash,
182+
"size": file_size,
181183
"urls": {
182184
"https": f"localhost:442/v0/batch/download/TESTUSER/{job_id}/{filename}",
183185
"ftp": "",
@@ -223,8 +225,9 @@ def test_batch_download_rate_limit_429(
223225
# Arrange
224226
job_id = "GLBX-20220610-5DEFXVTMSM"
225227
filename = "glbx-mdp3-20220610.mbo.csv.zst"
226-
hash = "sha256:abcdef"
227-
size = 1024
228+
file_content = b"unittest"
229+
file_hash = f"sha256:{hashlib.sha256(file_content).hexdigest()}"
230+
file_size = len(file_content)
228231

229232
# Mock the call to list files so it returns a test manifest
230233
monkeypatch.setattr(
@@ -234,8 +237,8 @@ def test_batch_download_rate_limit_429(
234237
return_value=[
235238
{
236239
"filename": filename,
237-
"hash": hash,
238-
"size": size,
240+
"hash": file_hash,
241+
"size": file_size,
239242
"urls": {
240243
"https": f"localhost:442/v0/batch/download/TESTUSER/{job_id}/{filename}",
241244
"ftp": "",
@@ -252,7 +255,7 @@ def test_batch_download_rate_limit_429(
252255
ok_response = MagicMock()
253256
ok_response.__enter__.return_value = MagicMock(
254257
status_code=200,
255-
iter_content=MagicMock(return_value=iter([b"unittest"])),
258+
iter_content=MagicMock(return_value=iter([file_content])),
256259
)
257260
monkeypatch.setattr(
258261
requests,
@@ -272,4 +275,4 @@ def test_batch_download_rate_limit_429(
272275
# Assert
273276
assert mocked_batch_list_files.call_args.args == (job_id,)
274277
assert len(downloaded_files) == 1
275-
assert downloaded_files[0].read_text() == "unittest"
278+
assert downloaded_files[0].read_bytes() == file_content

0 commit comments

Comments
 (0)