Skip to content

Commit fdaf6d5

Browse files
authored
Merge pull request #14 from Nauman702/data-validate-fixed
validate data function-conflict fixed
2 parents 473f8d6 + 03fc24e commit fdaf6d5

File tree

1 file changed

+55
-17
lines changed

1 file changed

+55
-17
lines changed

scripts/validate_data.py

Lines changed: 55 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,16 @@
22
33
Run as:
44
5-
python3 scripts/validate_data.py
5+
python3 scripts/validata_data.py data
66
"""
77

88
from pathlib import Path
9+
import sys
910
import hashlib
1011

1112

1213
def file_hash(filename):
13-
""" Get byte contents of file `filename`, return SHA1 hash
14+
"""Get byte contents of file `filename`, return SHA1 hash
1415
1516
Parameters
1617
----------
@@ -25,12 +26,19 @@ def file_hash(filename):
2526
# Open the file, read contents as bytes.
2627
# Calculate, return SHA1 has on the bytes from the file.
2728
# This is a placeholder, replace it to write your solution.
28-
raise NotImplementedError(
29-
'This is just a template -- you are expected to code this.')
29+
fpath = Path(filename)
30+
con = fpath.read_bytes()
31+
hash_v = hashlib.sha1(con).hexdigest()
32+
# Your code here.
33+
34+
# raise NotImplementedError(
35+
# "This is just a template -- you are expected to code this."
36+
# )
37+
return hash_v
3038

3139

3240
def validate_data(data_directory):
33-
""" Read ``data_hashes.txt`` file in `data_directory`, check hashes
41+
"""Read ``data_hashes.txt`` file in `data_directory`, check hashes
3442
3543
Parameters
3644
----------
@@ -53,24 +61,54 @@ def validate_data(data_directory):
5361
# If hash for filename is not the same as the one in the file, raise
5462
# ValueError
5563
# This is a placeholder, replace it to write your solution.
56-
raise NotImplementedError(
57-
'This is just a template -- fill out the template with code.')
64+
data_pth = Path() / data_directory
65+
# print(data_pth)
66+
hash_pth = list(data_pth.glob("**/*.txt"))
67+
hash_pth = str(hash_pth[0])
68+
# hash_pth= "data_pth/**/hash_list.txt"
69+
# print(hash_pth[0])
70+
71+
# hash_pth = Path(data_directory)
72+
# hash_pth = data_pth/group-0/'data_hashes.txt'
73+
# data_dir = hash_pth.parent
74+
with open(hash_pth) as f:
75+
lines = f.readlines()
76+
# print(lines)
77+
f.close()
78+
# Split into lines.
79+
# lines.strip()
80+
81+
# For each line:
82+
for line in lines:
83+
# Split each line into expected_hash and filename
84+
spl = line.split()
85+
# Calculate actual hash for given filename.
86+
d_pth = list(data_pth.glob("**/*"))
87+
#print(d_pth)
88+
cal_hash = file_hash(data_pth / spl[1])
89+
# Check actual hash against expected hash
90+
act_hash = spl[0]
91+
# Return False if any of the hashes do not match.
92+
if cal_hash != act_hash:
93+
return False
94+
95+
# raise NotImplementedError(
96+
# "This is just a template -- you are expected to code this."
97+
# )
98+
return True
5899

59100

60101
def main():
61102
# This function (main) called when this file run as a script.
62-
group_directory = (Path(__file__).parent.parent / 'data')
63-
groups = list(group_directory.glob('group-??'))
64-
if len(groups) == 0:
65-
raise RuntimeError('No group directory in data directory: '
66-
'have you downloaded and unpacked the data?')
67-
68-
if len(groups) > 1:
69-
raise RuntimeError('Too many group directories in data directory')
103+
#
104+
# Get the data directory from the command line arguments
105+
if len(sys.argv) < 2:
106+
raise RuntimeError("Please give data directory on " "command line")
107+
data_directory = sys.argv[1]
70108
# Call function to validate data in data directory
71-
validate_data(groups[0])
109+
validate_data(data_directory)
72110

73111

74-
if __name__ == '__main__':
112+
if __name__ == "__main__":
75113
# Python is running this file as a script, not importing it.
76114
main()

0 commit comments

Comments
 (0)