|
3 | 3 | import shutil
|
4 | 4 | import numpy as np
|
5 | 5 | import pytest
|
| 6 | +import warnings |
6 | 7 | from datetime import datetime
|
7 | 8 | import os
|
8 | 9 | from urllib.parse import urlparse
|
@@ -655,3 +656,51 @@ def test_big_download_regression(alma):
|
655 | 656 | def test_download_html_file(alma):
|
656 | 657 | result = alma.download_files(['https://almascience.nao.ac.jp/dataPortal/member.uid___A001_X1284_X1353.qa2_report.html'])
|
657 | 658 | assert result
|
| 659 | + |
| 660 | + |
| 661 | +@pytest.mark.remote_data |
| 662 | +def test_verify_html_file(alma, caplog): |
| 663 | + # first, make sure the file is not cached (in case this test gets called repeatedly) |
| 664 | + # (we are hacking the file later in this test to trigger different failure modes so |
| 665 | + # we need it fresh) |
| 666 | + try: |
| 667 | + result = alma.download_files(['https://almascience.nao.ac.jp/dataPortal/member.uid___A001_X1284_X1353.qa2_report.html'], verify_only=True) |
| 668 | + local_filepath = result[0] |
| 669 | + os.remove(local_filepath) |
| 670 | + except FileNotFoundError: |
| 671 | + pass |
| 672 | + |
| 673 | + caplog.clear() |
| 674 | + |
| 675 | + # download the file |
| 676 | + result = alma.download_files(['https://almascience.nao.ac.jp/dataPortal/member.uid___A001_X1284_X1353.qa2_report.html']) |
| 677 | + assert result |
| 678 | + |
| 679 | + result = alma.download_files(['https://almascience.nao.ac.jp/dataPortal/member.uid___A001_X1284_X1353.qa2_report.html'], verify_only=True) |
| 680 | + assert result |
| 681 | + local_filepath = result[0] |
| 682 | + existing_file_length = 66336 |
| 683 | + assert f"Found cached file {local_filepath} with expected size {existing_file_length}." in caplog.text |
| 684 | + |
| 685 | + # manipulate the file |
| 686 | + with open(local_filepath, 'ab') as fh: |
| 687 | + fh.write(b"Extra Text") |
| 688 | + |
| 689 | + caplog.clear() |
| 690 | + with warnings.catch_warnings() as ww: |
| 691 | + result = alma.download_files(['https://almascience.nao.ac.jp/dataPortal/member.uid___A001_X1284_X1353.qa2_report.html'], verify_only=True) |
| 692 | + assert result |
| 693 | + length = 66336 |
| 694 | + existing_file_length = length + 10 |
| 695 | + assert f"Found cached file {local_filepath} with size {existing_file_length} > expected size {length}. The download is likely corrupted." in str(ww) |
| 696 | + |
| 697 | + # manipulate the file: make it small |
| 698 | + with open(local_filepath, 'wb') as fh: |
| 699 | + fh.write(b"Empty Text") |
| 700 | + |
| 701 | + caplog.clear() |
| 702 | + result = alma.download_files(['https://almascience.nao.ac.jp/dataPortal/member.uid___A001_X1284_X1353.qa2_report.html'], verify_only=True) |
| 703 | + assert result |
| 704 | + length = 66336 |
| 705 | + existing_file_length = 10 |
| 706 | + assert f"Found cached file {local_filepath} with size {existing_file_length} < expected size {length}. The download should be continued." in caplog.text |
0 commit comments