Skip to content

Commit cd5eac8

Browse files
Raise errors for inputs that would cause blank outputs; handle length in CLI for variable length digest algorithms (#16)
* Raise errors for inputs that would cause blank outputs with custom exceptions for blank output errors and invalid length param use, with testing * Add length parameter option for certain algs * Refactor hash tests with parametrize --------- Co-authored-by: Elizabeth Campolongo <[email protected]>
1 parent 4bd0f07 commit cd5eac8

File tree

8 files changed

+181
-146
lines changed

8 files changed

+181
-146
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ options:
3636
Hash algorithm to use (default: md5; available: ripemd160, sha3_224, sha512_224, blake2b, sha384, sha256, sm3, sha3_256, shake_256, sha512, sha1, sha224, md5, md5-sha1, sha3_384, sha3_512, sha512_256, shake_128, blake2s)
3737
```
3838

39+
> Note: The available algorithms are determined by those available to `hashlib` and may vary depending on your system and OpenSSL version, so the set shown on your system with `sum-buddy -h` may be different from above. At a minimum, it should include: `{blake2s, blake2b, md5, sha1, sha224, sha256, sha384, sha512, sha3_224, sha3_256, sha3_384, sha3_512, shake_128, shake_256}`, which is given by `hashlib.algorithms_guaranteed`.
40+
3941
#### CLI Examples
4042

4143
- **Basic Usage:**

src/sumbuddy/__main__.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@
22
from sumbuddy.hasher import Hasher
33
from sumbuddy.mapper import Mapper
44
from sumbuddy.filter import Filter
5+
from sumbuddy.exceptions import EmptyInputDirectoryError, NoFilesAfterFilteringError, LengthUsedForFixedLengthHashError
56
import csv
67
import hashlib
78
from tqdm import tqdm
89
import sys
910
import os
1011

11-
def get_checksums(input_directory, output_filepath=None, ignore_file=None, include_hidden=False, algorithm='md5'):
12+
def get_checksums(input_directory, output_filepath=None, ignore_file=None, include_hidden=False, algorithm='md5', length=None):
1213
"""
1314
Generate a CSV file with the filepath, filename, and checksum of all files in the input directory according to patterns to ignore. Checksum column is labeled by the selected algorithm (e.g., 'md5' or 'sha256').
1415
@@ -19,10 +20,14 @@ def get_checksums(input_directory, output_filepath=None, ignore_file=None, inclu
1920
ignore_file - String [optional]. Filepath for the ignore patterns file.
2021
include_hidden - Boolean [optional]. Whether to include hidden files. Default is False.
2122
algorithm - String. Algorithm to use for checksums. Default: 'md5', see options with 'hashlib.algorithms_available'.
23+
length - Integer [conditionally optional]. Length of the digest for SHAKE (required) and BLAKE (optional) algorithms in bytes.
2224
"""
2325
mapper = Mapper()
24-
file_paths = mapper.gather_file_paths(input_directory, ignore_file=ignore_file, include_hidden=include_hidden)
25-
26+
try:
27+
file_paths = mapper.gather_file_paths(input_directory, ignore_file=ignore_file, include_hidden=include_hidden)
28+
except (EmptyInputDirectoryError, NoFilesAfterFilteringError) as e:
29+
sys.exit(str(e))
30+
2631
# Exclude the output file from being hashed
2732
if output_filepath:
2833
output_file_abs_path = os.path.abspath(output_filepath)
@@ -37,7 +42,7 @@ def get_checksums(input_directory, output_filepath=None, ignore_file=None, inclu
3742

3843
disable_tqdm = output_filepath is None
3944
for file_path in tqdm(file_paths, desc=f"Calculating {algorithm} checksums on {input_directory}", disable=disable_tqdm):
40-
checksum = hasher.checksum_file(file_path)
45+
checksum = hasher.checksum_file(file_path, algorithm=algorithm, length=length)
4146
writer.writerow([file_path, os.path.basename(file_path), checksum])
4247

4348
finally:
@@ -58,6 +63,7 @@ def main():
5863
group.add_argument("-i", "--ignore-file", help="Filepath for the ignore patterns file")
5964
group.add_argument("-H", "--include-hidden", action="store_true", help="Include hidden files")
6065
parser.add_argument("-a", "--algorithm", default="md5", help=f"Hash algorithm to use (default: md5; available: {available_algorithms})")
66+
parser.add_argument("-l", "--length", type=int, help="Length of the digest for SHAKE (required) or BLAKE (optional) algorithms in bytes")
6167

6268
args = parser.parse_args()
6369

@@ -69,7 +75,11 @@ def main():
6975
if overwrite.lower() != 'y':
7076
sys.exit("Exited without executing")
7177

72-
get_checksums(args.input_dir, args.output_file, args.ignore_file, args.include_hidden, args.algorithm)
78+
try:
79+
get_checksums(args.input_dir, args.output_file, args.ignore_file, args.include_hidden, args.algorithm, args.length)
80+
except (LengthUsedForFixedLengthHashError) as e:
81+
sys.exit(str(e))
82+
7383

7484
if __name__ == "__main__":
7585
main()

src/sumbuddy/exceptions.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
class EmptyInputDirectoryError(Exception):
2+
def __init__(self, input_directory):
3+
message = f"The directory {input_directory} and subdirectories (if any) contain no files. \nPlease provide a directory with files."
4+
super().__init__(message)
5+
6+
class NoFilesAfterFilteringError(Exception):
7+
def __init__(self, input_directory, ignore_file):
8+
message = f"The directory {input_directory} contains files, but all are filtered out. \nCheck patterns in your {ignore_file} file and/or hidden files settings."
9+
super().__init__(message)
10+
11+
class LengthUsedForFixedLengthHashError(Exception):
12+
def __init__(self, algorithm):
13+
message = f"Length paremeter is not applicable for fixed-length algorithm '{algorithm}'."
14+
super().__init__(message)

src/sumbuddy/hasher.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import hashlib
2+
from sumbuddy.exceptions import LengthUsedForFixedLengthHashError
23

34
class Hasher:
45
def __init__(self, algorithm='md5'):
@@ -51,7 +52,7 @@ def checksum_file(self, file_path, algorithm=None, length=None):
5152
# Other algorithms
5253
else:
5354
if length is not None:
54-
raise ValueError(f"Length parameter is not applicable for fixed-length algorithm '{algorithm}'")
55+
raise LengthUsedForFixedLengthHashError(algorithm)
5556
hash_func = hashlib.new(algorithm)
5657

5758
# Read the file and update the hash function

src/sumbuddy/mapper.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
from sumbuddy.filter import Filter
3+
from sumbuddy.exceptions import EmptyInputDirectoryError, NoFilesAfterFilteringError
34

45
class Mapper:
56
def __init__(self):
@@ -42,11 +43,19 @@ def gather_file_paths(self, input_directory, ignore_file=None, include_hidden=Fa
4243

4344
file_paths = []
4445
root_directory = os.path.abspath(input_directory)
46+
has_files = False
4547

4648
for root, dirs, files in os.walk(input_directory):
49+
if files:
50+
has_files = True
4751
for name in files:
4852
file_path = os.path.join(root, name)
4953
if self.filter_manager.should_include(file_path, root_directory):
5054
file_paths.append(file_path)
5155

56+
if not has_files:
57+
raise EmptyInputDirectoryError(input_directory)
58+
if not file_paths:
59+
raise NoFilesAfterFilteringError(input_directory, ignore_file)
60+
5261
return file_paths

tests/test_getChecksums.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def setUp(self):
2121
@patch('os.path.exists', return_value=True)
2222
@patch('builtins.open', new_callable=mock_open)
2323
@patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt'])
24-
@patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x: 'dummychecksum')
24+
@patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum')
2525
def test_get_checksums_to_file(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath):
2626
get_checksums(self.input_directory, self.output_filepath, ignore_file=None, include_hidden=False, algorithm=self.algorithm)
2727

@@ -35,7 +35,7 @@ def test_get_checksums_to_file(self, mock_checksum, mock_gather, mock_open, mock
3535
@patch('os.path.exists', return_value=True)
3636
@patch('builtins.open', new_callable=mock_open)
3737
@patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt'])
38-
@patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x: 'dummychecksum')
38+
@patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum')
3939
def test_get_checksums_to_stdout(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath):
4040
output_stream = StringIO()
4141
with patch('sys.stdout', new=output_stream):
@@ -50,7 +50,7 @@ def test_get_checksums_to_stdout(self, mock_checksum, mock_gather, mock_open, mo
5050
@patch('os.path.exists', return_value=True)
5151
@patch('builtins.open', new_callable=mock_open)
5252
@patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt'])
53-
@patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x: 'dummychecksum')
53+
@patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum')
5454
def test_get_checksums_with_ignore_file(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath):
5555
get_checksums(self.input_directory, output_filepath=None, ignore_file=self.ignore_file, include_hidden=False, algorithm=self.algorithm)
5656
mock_gather.assert_called_with(self.input_directory, ignore_file=self.ignore_file, include_hidden=False)
@@ -59,7 +59,7 @@ def test_get_checksums_with_ignore_file(self, mock_checksum, mock_gather, mock_o
5959
@patch('os.path.exists', return_value=True)
6060
@patch('builtins.open', new_callable=mock_open)
6161
@patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt', '.hidden_file'])
62-
@patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x: 'dummychecksum')
62+
@patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum')
6363
def test_get_checksums_include_hidden(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath):
6464
get_checksums(self.input_directory, output_filepath=None, ignore_file=None, include_hidden=True, algorithm=self.algorithm)
6565
mock_gather.assert_called_with(self.input_directory, ignore_file=None, include_hidden=True)
@@ -68,7 +68,7 @@ def test_get_checksums_include_hidden(self, mock_checksum, mock_gather, mock_ope
6868
@patch('os.path.exists', return_value=True)
6969
@patch('builtins.open', new_callable=mock_open)
7070
@patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt'])
71-
@patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x: 'dummychecksum')
71+
@patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum')
7272
def test_get_checksums_different_algorithm(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath):
7373
algorithm = 'sha256'
7474
get_checksums(self.input_directory, output_filepath=None, ignore_file=None, include_hidden=False, algorithm=algorithm)

0 commit comments

Comments
 (0)