|
| 1 | +import io |
1 | 2 | import random |
2 | 3 | from http.server import BaseHTTPRequestHandler |
3 | 4 | from typing import Iterator, List |
@@ -316,3 +317,131 @@ def mock_iter_content(chunk_size): |
316 | 317 | assert received_data == test_data # all data was received correctly |
317 | 318 | assert len(content_chunks) == expected_chunks # correct number of chunks |
318 | 319 | assert all(len(c) <= chunk_size for c in content_chunks) # chunks don't exceed size |
| 320 | + |
| 321 | + |
| 322 | +def test_is_seekable_stream(): |
| 323 | + client = _BaseClient() |
| 324 | + |
| 325 | + # Test various input types that are not streams. |
| 326 | + assert not client._is_seekable_stream(None) # None |
| 327 | + assert not client._is_seekable_stream("string data") # str |
| 328 | + assert not client._is_seekable_stream(b"binary data") # bytes |
| 329 | + assert not client._is_seekable_stream(["list", "data"]) # list |
| 330 | + assert not client._is_seekable_stream(42) # int |
| 331 | + |
| 332 | + # Test non-seekable stream. |
| 333 | + non_seekable = io.BytesIO(b"test data") |
| 334 | + non_seekable.seekable = lambda: False |
| 335 | + assert not client._is_seekable_stream(non_seekable) |
| 336 | + |
| 337 | + # Test seekable streams. |
| 338 | + assert client._is_seekable_stream(io.BytesIO(b"test data")) # BytesIO |
| 339 | + assert client._is_seekable_stream(io.StringIO("test data")) # StringIO |
| 340 | + |
| 341 | + # Test file objects. |
| 342 | + with open(__file__, 'rb') as f: |
| 343 | + assert client._is_seekable_stream(f) # File object |
| 344 | + |
| 345 | + # Test custom seekable stream. |
| 346 | + class CustomSeekableStream(io.IOBase): |
| 347 | + |
| 348 | + def seekable(self): |
| 349 | + return True |
| 350 | + |
| 351 | + def seek(self, offset, whence=0): |
| 352 | + return 0 |
| 353 | + |
| 354 | + def tell(self): |
| 355 | + return 0 |
| 356 | + |
| 357 | + assert client._is_seekable_stream(CustomSeekableStream()) |
| 358 | + |
| 359 | + |
| 360 | +@pytest.mark.parametrize( |
| 361 | + 'input_data', |
| 362 | + [ |
| 363 | + b"0123456789", # bytes -> BytesIO |
| 364 | + "0123456789", # str -> BytesIO |
| 365 | + io.BytesIO(b"0123456789"), # BytesIO directly |
| 366 | + io.StringIO("0123456789"), # StringIO |
| 367 | + ]) |
| 368 | +def test_reset_seekable_stream_on_retry(input_data): |
| 369 | + received_data = [] |
| 370 | + |
| 371 | + # Retry two times before succeeding. |
| 372 | + def inner(h: BaseHTTPRequestHandler): |
| 373 | + if len(received_data) == 2: |
| 374 | + h.send_response(200) |
| 375 | + h.end_headers() |
| 376 | + else: |
| 377 | + h.send_response(429) |
| 378 | + h.end_headers() |
| 379 | + |
| 380 | + content_length = int(h.headers.get('Content-Length', 0)) |
| 381 | + if content_length > 0: |
| 382 | + received_data.append(h.rfile.read(content_length)) |
| 383 | + |
| 384 | + with http_fixture_server(inner) as host: |
| 385 | + client = _BaseClient() |
| 386 | + |
| 387 | + # Retries should reset the stream. |
| 388 | + client.do('POST', f'{host}/foo', data=input_data) |
| 389 | + |
| 390 | + assert received_data == [b"0123456789", b"0123456789", b"0123456789"] |
| 391 | + |
| 392 | + |
| 393 | +def test_reset_seekable_stream_to_their_initial_position_on_retry(): |
| 394 | + received_data = [] |
| 395 | + |
| 396 | + # Retry two times before succeeding. |
| 397 | + def inner(h: BaseHTTPRequestHandler): |
| 398 | + if len(received_data) == 2: |
| 399 | + h.send_response(200) |
| 400 | + h.end_headers() |
| 401 | + else: |
| 402 | + h.send_response(429) |
| 403 | + h.end_headers() |
| 404 | + |
| 405 | + content_length = int(h.headers.get('Content-Length', 0)) |
| 406 | + if content_length > 0: |
| 407 | + received_data.append(h.rfile.read(content_length)) |
| 408 | + |
| 409 | + input_data = io.BytesIO(b"0123456789") |
| 410 | + input_data.seek(4) |
| 411 | + |
| 412 | + with http_fixture_server(inner) as host: |
| 413 | + client = _BaseClient() |
| 414 | + |
| 415 | + # Retries should reset the stream. |
| 416 | + client.do('POST', f'{host}/foo', data=input_data) |
| 417 | + |
| 418 | + assert received_data == [b"456789", b"456789", b"456789"] |
| 419 | + assert input_data.tell() == 10 # EOF |
| 420 | + |
| 421 | + |
| 422 | +def test_no_retry_or_reset_on_non_seekable_stream(): |
| 423 | + requests = [] |
| 424 | + |
| 425 | + # Always respond with a response that triggers a retry. |
| 426 | + def inner(h: BaseHTTPRequestHandler): |
| 427 | + content_length = int(h.headers.get('Content-Length', 0)) |
| 428 | + if content_length > 0: |
| 429 | + requests.append(h.rfile.read(content_length)) |
| 430 | + |
| 431 | + h.send_response(429) |
| 432 | + h.send_header('Retry-After', '1') |
| 433 | + h.end_headers() |
| 434 | + |
| 435 | + input_data = io.BytesIO(b"0123456789") |
| 436 | + input_data.seekable = lambda: False # makes the stream appear non-seekable |
| 437 | + |
| 438 | + with http_fixture_server(inner) as host: |
| 439 | + client = _BaseClient() |
| 440 | + |
| 441 | + # Should raise error immediately without retry. |
| 442 | + with pytest.raises(DatabricksError): |
| 443 | + client.do('POST', f'{host}/foo', data=input_data) |
| 444 | + |
| 445 | + # Verify that only one request was made (no retries). |
| 446 | + assert requests == [b"0123456789"] |
| 447 | + assert input_data.tell() == 10 # EOF |
0 commit comments