2
2
3
3
Run as:
4
4
5
- python3 scripts/validate_data .py
5
+ python3 scripts/validata_data .py data
6
6
"""
7
7
8
8
from pathlib import Path
9
+ import sys
9
10
import hashlib
10
11
11
12
12
13
def file_hash (filename ):
13
- """ Get byte contents of file `filename`, return SHA1 hash
14
+ """Get byte contents of file `filename`, return SHA1 hash
14
15
15
16
Parameters
16
17
----------
@@ -25,12 +26,19 @@ def file_hash(filename):
25
26
# Open the file, read contents as bytes.
26
27
# Calculate, return SHA1 has on the bytes from the file.
27
28
# This is a placeholder, replace it to write your solution.
28
- raise NotImplementedError (
29
- 'This is just a template -- you are expected to code this.' )
29
+ fpath = Path (filename )
30
+ con = fpath .read_bytes ()
31
+ hash_v = hashlib .sha1 (con ).hexdigest ()
32
+ # Your code here.
33
+
34
+ # raise NotImplementedError(
35
+ # "This is just a template -- you are expected to code this."
36
+ # )
37
+ return hash_v
30
38
31
39
32
40
def validate_data (data_directory ):
33
- """ Read ``data_hashes.txt`` file in `data_directory`, check hashes
41
+ """Read ``data_hashes.txt`` file in `data_directory`, check hashes
34
42
35
43
Parameters
36
44
----------
@@ -53,24 +61,54 @@ def validate_data(data_directory):
53
61
# If hash for filename is not the same as the one in the file, raise
54
62
# ValueError
55
63
# This is a placeholder, replace it to write your solution.
56
- raise NotImplementedError (
57
- 'This is just a template -- fill out the template with code.' )
64
+ data_pth = Path () / data_directory
65
+ # print(data_pth)
66
+ hash_pth = list (data_pth .glob ("**/*.txt" ))
67
+ hash_pth = str (hash_pth [0 ])
68
+ # hash_pth= "data_pth/**/hash_list.txt"
69
+ # print(hash_pth[0])
70
+
71
+ # hash_pth = Path(data_directory)
72
+ # hash_pth = data_pth/group-0/'data_hashes.txt'
73
+ # data_dir = hash_pth.parent
74
+ with open (hash_pth ) as f :
75
+ lines = f .readlines ()
76
+ # print(lines)
77
+ f .close ()
78
+ # Split into lines.
79
+ # lines.strip()
80
+
81
+ # For each line:
82
+ for line in lines :
83
+ # Split each line into expected_hash and filename
84
+ spl = line .split ()
85
+ # Calculate actual hash for given filename.
86
+ d_pth = list (data_pth .glob ("**/*" ))
87
+ #print(d_pth)
88
+ cal_hash = file_hash (data_pth / spl [1 ])
89
+ # Check actual hash against expected hash
90
+ act_hash = spl [0 ]
91
+ # Return False if any of the hashes do not match.
92
+ if cal_hash != act_hash :
93
+ return False
94
+
95
+ # raise NotImplementedError(
96
+ # "This is just a template -- you are expected to code this."
97
+ # )
98
+ return True
58
99
59
100
60
101
def main ():
61
102
# This function (main) called when this file run as a script.
62
- group_directory = (Path (__file__ ).parent .parent / 'data' )
63
- groups = list (group_directory .glob ('group-??' ))
64
- if len (groups ) == 0 :
65
- raise RuntimeError ('No group directory in data directory: '
66
- 'have you downloaded and unpacked the data?' )
67
-
68
- if len (groups ) > 1 :
69
- raise RuntimeError ('Too many group directories in data directory' )
103
+ #
104
+ # Get the data directory from the command line arguments
105
+ if len (sys .argv ) < 2 :
106
+ raise RuntimeError ("Please give data directory on " "command line" )
107
+ data_directory = sys .argv [1 ]
70
108
# Call function to validate data in data directory
71
- validate_data (groups [ 0 ] )
109
+ validate_data (data_directory )
72
110
73
111
74
- if __name__ == ' __main__' :
112
+ if __name__ == " __main__" :
75
113
# Python is running this file as a script, not importing it.
76
114
main ()
0 commit comments