|
4 | 4 | from pathlib import Path |
5 | 5 | from unittest.mock import MagicMock, Mock, patch |
6 | 6 |
|
| 7 | +import pyarrow as pa |
7 | 8 | import pytest |
8 | 9 |
|
9 | 10 | from iqb.pipeline import ( |
@@ -299,38 +300,53 @@ def test_save_parquet_with_data(self, tmp_path): |
299 | 300 |
|
300 | 301 | info = result.save_parquet() |
301 | 302 |
|
302 | | - # Verify |
303 | | - assert info.no_content is False |
| 303 | + # Verify file path and directory creation |
304 | 304 | expected_path = cache_dir / "data.parquet" |
305 | 305 | assert info.file_path == expected_path |
306 | 306 | assert cache_dir.exists() |
307 | 307 | mock_writer_instance.write_batch.assert_called_once_with(mock_batch) |
308 | 308 |
|
309 | 309 | def test_save_parquet_empty_results(self, tmp_path): |
310 | | - """Test handling of empty query results.""" |
| 310 | + """Test handling of empty query results - writes empty parquet file.""" |
311 | 311 | cache_dir = tmp_path / "cache" |
312 | 312 |
|
313 | 313 | # Mock empty iterator |
314 | 314 | mock_rows = Mock() |
315 | 315 | mock_rows.to_arrow_iterable.return_value = iter([]) |
316 | 316 |
|
317 | | - result = QueryResult( |
318 | | - bq_read_client=Mock(), |
319 | | - job=Mock(), |
320 | | - rows=mock_rows, |
321 | | - cache_dir=cache_dir, |
322 | | - query_start_time="2024-11-27T10:00:00.000000Z", |
323 | | - template_hash="abc123", |
324 | | - ) |
| 317 | + # Mock ParquetWriter to verify it's called with empty schema |
| 318 | + with patch("iqb.pipeline.pq.ParquetWriter") as mock_writer: |
| 319 | + mock_writer_instance = MagicMock() |
| 320 | + mock_writer.return_value.__enter__.return_value = mock_writer_instance |
325 | 321 |
|
326 | | - info = result.save_parquet() |
| 322 | + result = QueryResult( |
| 323 | + bq_read_client=Mock(), |
| 324 | + job=Mock(), |
| 325 | + rows=mock_rows, |
| 326 | + cache_dir=cache_dir, |
| 327 | + query_start_time="2024-11-27T10:00:00.000000Z", |
| 328 | + template_hash="abc123", |
| 329 | + ) |
327 | 330 |
|
328 | | - # Verify no file created, but directory exists |
329 | | - assert info.no_content is True |
330 | | - expected_path = cache_dir / "data.parquet" |
331 | | - assert info.file_path == expected_path |
332 | | - assert cache_dir.exists() |
333 | | - assert not expected_path.exists() |
| 331 | + info = result.save_parquet() |
| 332 | + |
| 333 | + # Verify empty parquet file would be created |
| 334 | + expected_path = cache_dir / "data.parquet" |
| 335 | + assert info.file_path == expected_path |
| 336 | + assert cache_dir.exists() |
| 337 | + |
| 338 | + # Verify ParquetWriter was called with empty schema |
| 339 | + mock_writer.assert_called_once() |
| 340 | + call_args = mock_writer.call_args |
| 341 | + assert call_args[0][0] == expected_path.as_posix() |
| 342 | + |
| 343 | + # Verify schema is empty (no fields) |
| 344 | + schema_arg = call_args[0][1] |
| 345 | + assert isinstance(schema_arg, pa.Schema) |
| 346 | + assert len(schema_arg) == 0 |
| 347 | + |
| 348 | + # Verify no batches were written (first_batch is None, for loop has nothing) |
| 349 | + mock_writer_instance.write_batch.assert_not_called() |
334 | 350 |
|
335 | 351 | def test_save_parquet_multiple_batches(self, tmp_path): |
336 | 352 | """Test saving multiple Arrow batches.""" |
@@ -362,7 +378,8 @@ def test_save_parquet_multiple_batches(self, tmp_path): |
362 | 378 |
|
363 | 379 | # Verify all batches written |
364 | 380 | assert mock_writer_instance.write_batch.call_count == 3 |
365 | | - assert info.no_content is False |
| 381 | + expected_path = cache_dir / "data.parquet" |
| 382 | + assert info.file_path == expected_path |
366 | 383 |
|
367 | 384 | def test_save_parquet_creates_nested_directories(self, tmp_path): |
368 | 385 | """Test that save_parquet creates nested cache directory.""" |
@@ -392,7 +409,8 @@ def test_save_parquet_creates_nested_directories(self, tmp_path): |
392 | 409 |
|
393 | 410 | # Verify cache directory created |
394 | 411 | assert cache_dir.exists() |
395 | | - assert info.no_content is False |
| 412 | + expected_path = cache_dir / "data.parquet" |
| 413 | + assert info.file_path == expected_path |
396 | 414 |
|
397 | 415 |
|
398 | 416 | class TestQueryResultSaveStats: |
@@ -672,18 +690,9 @@ def test_get_cache_entry_validation_before_fs_check(self, mock_storage, mock_cli |
672 | 690 | class TestParquetFileInfo: |
673 | 691 | """Test ParquetFileInfo dataclass.""" |
674 | 692 |
|
675 | | - def test_parquet_file_info_with_content(self, tmp_path): |
676 | | - """Test ParquetFileInfo creation with content.""" |
677 | | - test_file = tmp_path / "test.parquet" |
678 | | - info = ParquetFileInfo(no_content=False, file_path=test_file) |
679 | | - |
680 | | - assert info.no_content is False |
681 | | - assert info.file_path == test_file |
682 | | - |
683 | | - def test_parquet_file_info_no_content(self, tmp_path): |
684 | | - """Test ParquetFileInfo creation without content.""" |
| 693 | + def test_parquet_file_info_creation(self, tmp_path): |
| 694 | + """Test ParquetFileInfo creation.""" |
685 | 695 | test_file = tmp_path / "test.parquet" |
686 | | - info = ParquetFileInfo(no_content=True, file_path=test_file) |
| 696 | + info = ParquetFileInfo(file_path=test_file) |
687 | 697 |
|
688 | | - assert info.no_content is True |
689 | 698 | assert info.file_path == test_file |
0 commit comments