11import os
22import shutil
33import tarfile
4+ import hashlib
45
56import pytest
67import urllib .request
78
9+ SAMPLE_IMAGES_SHA256 = "0461f57db3806ca47d9063151eec4bc0720c66a83153dda04e230f838b64f063"
10+ SAMPLE_IMAGES_URL = "https://nexus.library.illinois.edu/repository/sample-data/images/sample_images.tar.gz"
811
9- def download_images (url , destination , download_path ):
12+ # To save time from redownloading the sample_images.tar.gz file every time,
13+ # download it locally and set the environment variable SAMPLE_IMAGES_ARCHIVE
14+ # to the path of the downloaded file
1015
11- print ("Downloading {}" .format (url ))
12- urllib .request .urlretrieve (url ,
13- filename = os .path .join (download_path , "sample_images.tar.gz" ))
14- if not os .path .exists (os .path .join (download_path , "sample_images.tar.gz" )):
16+ def download_images (url , download_path ):
17+
18+ print (f"Downloading { url } " )
19+ output = os .path .join (download_path , "sample_images.tar.gz" )
20+ urllib .request .urlretrieve (url , filename = output )
21+ if not os .path .exists (output ):
1522 raise FileNotFoundError ("sample images not download" )
16- print ("Extracting images" )
17- with tarfile .open (os .path .join (download_path , "sample_images.tar.gz" ), "r:gz" ) as archive_file :
18- for item in archive_file .getmembers ():
19- print ("Extracting {}" .format (item .name ))
20- archive_file .extract (item , path = destination )
21- pass
23+ return output
24+ def extract_images (path , destination ):
25+ print ("Extracting images" )
26+ with tarfile .open (path , "r:gz" ) as archive_file :
27+ for item in archive_file .getmembers ():
28+ print ("Extracting {}" .format (item .name ))
29+ archive_file .extract (item , path = destination )
30+
31+ def verify_hash (path , sha256_hash ):
32+ with open (path , "rb" ) as f :
33+ file_hash = hashlib .sha256 (f .read ()).hexdigest ()
34+ assert file_hash == sha256_hash
2235
2336
2437@pytest .fixture (scope = "session" )
@@ -28,16 +41,23 @@ def sample_images_readonly(tmpdir_factory):
2841 sample_images_path = os .path .join (test_path , "sample_images" )
2942 download_path = tmpdir_factory .mktemp ("downloaded_archives" , numbered = False )
3043 if os .path .exists (sample_images_path ):
31- print ("{ } already exits". format ( sample_images_path ) )
44+ print (f" { sample_images_path } already exits" )
3245
3346 else :
34- print ("Downloading sample images" )
35- download_images (url = "https://nexus.library.illinois.edu/repository/sample-data/images/sample_images.tar.gz" ,
36- destination = test_path ,
37- download_path = download_path )
38-
47+ archive = os .getenv ('SAMPLE_IMAGES_ARCHIVE' )
48+ if not archive :
49+ print ("Downloading sample images" )
50+ archive = download_images (
51+ url = SAMPLE_IMAGES_URL ,
52+ download_path = download_path
53+ )
54+ if not os .path .exists (archive ):
55+ raise FileNotFoundError (f"sample image archive not found. { archive } does not exist." )
56+ verify_hash (archive , sha256_hash = SAMPLE_IMAGES_SHA256 )
57+ extract_images (path = archive , destination = test_path )
3958 yield sample_images_path
40- shutil .rmtree (test_path )
59+ if os .path .exists (test_path ):
60+ shutil .rmtree (test_path )
4161
4262
4363@pytest .fixture
0 commit comments