1+ import hashlib
12import shutil
23
34import pytest
1112
1213# TEST_PATH = os.path.join(os.path.dirname(__file__), "henrytestmetadata")
1314SAMPLE_IMAGES = "https://nexus.library.illinois.edu/repository/sample-data/images/metadata_test_images.tar.gz"
15+ SAMPLE_IMAGES_SHA256 = "eee5fa3641628c7365e14de3f58da069cb332dc13a4b739dc168a1617109ebbf"
1416
17+ # To save time from redownloading the metadata_test_images.tar.gz file every time,
18+ # download it locally and set the environment variable SAMPLE_IMAGES_ARCHIVE
19+ # to the path of the downloaded file
1520
16- def download_images (url , destination ):
17- with TemporaryDirectory () as download_path :
18- print ("Downloading {}" .format (url ), flush = True )
19- urllib .request .urlretrieve (url ,
20- filename = os .path .join (download_path ,
21- "sample_images.tar.gz" ))
22- if not os .path .exists (
23- os .path .join (download_path , "sample_images.tar.gz" )):
24- raise FileNotFoundError ("sample images not download" )
25- print ("Extracting images" )
26- with tarfile .open (os .path .join (download_path , "sample_images.tar.gz" ),
27- "r:gz" ) as archive_file :
28- for item in archive_file .getmembers ():
29- print ("Extracting {}" .format (item .name ))
30- archive_file .extract (item , path = destination )
31- pass
21+ def download_images (url , download_path ):
3222
23+ print (f"Downloading { url } " )
24+ output = os .path .join (download_path , "sample_images.tar.gz" )
25+ urllib .request .urlretrieve (url , filename = output )
26+ if not os .path .exists (output ):
27+ raise FileNotFoundError ("sample images not download" )
28+ return output
3329
3430@pytest .fixture (scope = "session" )
3531def sample_data ():
@@ -40,14 +36,33 @@ def sample_data():
4036 if os .path .exists (sample_images_path ):
4137 print ("{} already exits" .format (sample_images_path ))
4238 else :
43- print ("Downloading sample images" )
44- if not os .path .exists (sample_images_path ):
45- download_images (
46- url = SAMPLE_IMAGES ,
47- destination = test_path )
39+ archive = os .getenv ('SAMPLE_IMAGES_ARCHIVE' )
40+ if not archive :
41+ print ("Downloading sample images" )
42+ if not os .path .exists (sample_images_path ):
43+ archive = download_images (
44+ url = SAMPLE_IMAGES ,
45+ download_path = test_path
46+ )
47+ if not os .path .exists (archive ):
48+ raise FileNotFoundError (f"sample image archive not found. { archive } does not exist." )
49+ verify_hash (archive , sha256_hash = SAMPLE_IMAGES_SHA256 )
50+ extract_images (path = archive , destination = test_path )
4851 yield sample_images_path
4952 shutil .rmtree (sample_images_path )
5053
54+ def extract_images (path , destination ):
55+ print ("Extracting images" )
56+ with tarfile .open (path , "r:gz" ) as archive_file :
57+ for item in archive_file .getmembers ():
58+ print ("Extracting {}" .format (item .name ))
59+ archive_file .extract (item , path = destination )
60+
61+ def verify_hash (path , sha256_hash ):
62+ with open (path , "rb" ) as f :
63+ file_hash = hashlib .sha256 (f .read ()).hexdigest ()
64+ assert file_hash == sha256_hash
65+
5166
5267@pytest .mark .integration
5368@pytest .mark .parametrize ("test_file,profile_name" , [
0 commit comments