Skip to content

Commit dcdd99f

Browse files
author
Brock Wade
committed
fix: security update -> use sha256 instead of md5 for file hashing
1 parent 342fbbc commit dcdd99f

File tree

1 file changed

+27
-27
lines changed

1 file changed

+27
-27
lines changed

src/sagemaker/workflow/utilities.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -268,29 +268,29 @@ def get_config_hash(step: Entity):
268268

269269

270270
def hash_object(obj) -> str:
271-
"""Get the MD5 hash of an object.
271+
"""Get the SHA256 hash of an object.
272272
273273
Args:
274274
obj (dict): The object
275275
Returns:
276-
str: The MD5 hash of the object
276+
str: The SHA256 hash of the object
277277
"""
278-
return hashlib.md5(str(obj).encode()).hexdigest()
278+
return hashlib.sha256(str(obj).encode()).hexdigest()
279279

280280

281281
def hash_file(path: str) -> str:
282-
"""Get the MD5 hash of a file.
282+
"""Get the SHA256 hash of a file.
283283
284284
Args:
285285
path (str): The local path for the file.
286286
Returns:
287-
str: The MD5 hash of the file.
287+
str: The SHA256 hash of the file.
288288
"""
289-
return _hash_file(path, hashlib.md5()).hexdigest()
289+
return _hash_file(path, hashlib.sha256()).hexdigest()
290290

291291

292292
def hash_files_or_dirs(paths: List[str]) -> str:
293-
"""Get the MD5 hash of the contents of a list of files or directories.
293+
"""Get the SHA256 hash of the contents of a list of files or directories.
294294
295295
Hash is changed if:
296296
* input list is changed
@@ -301,58 +301,58 @@ def hash_files_or_dirs(paths: List[str]) -> str:
301301
Args:
302302
paths: List of file or directory paths
303303
Returns:
304-
str: The MD5 hash of the list of files or directories.
304+
str: The SHA256 hash of the list of files or directories.
305305
"""
306-
md5 = hashlib.md5()
306+
sha256 = hashlib.sha256()
307307
for path in sorted(paths):
308-
md5 = _hash_file_or_dir(path, md5)
309-
return md5.hexdigest()
308+
sha256 = _hash_file_or_dir(path, sha256)
309+
return sha256.hexdigest()
310310

311311

312-
def _hash_file_or_dir(path: str, md5: Hash) -> Hash:
312+
def _hash_file_or_dir(path: str, sha256: Hash) -> Hash:
313313
"""Updates the inputted Hash with the contents of the current path.
314314
315315
Args:
316316
path: path of file or directory
317317
Returns:
318-
str: The MD5 hash of the file or directory
318+
str: The SHA256 hash of the file or directory
319319
"""
320320
if isinstance(path, str) and path.lower().startswith("file://"):
321321
path = unquote(urlparse(path).path)
322-
md5.update(path.encode())
322+
sha256.update(path.encode())
323323
if Path(path).is_dir():
324-
md5 = _hash_dir(path, md5)
324+
sha256 = _hash_dir(path, sha256)
325325
elif Path(path).is_file():
326-
md5 = _hash_file(path, md5)
327-
return md5
326+
sha256 = _hash_file(path, sha256)
327+
return sha256
328328

329329

330-
def _hash_dir(directory: Union[str, Path], md5: Hash) -> Hash:
330+
def _hash_dir(directory: Union[str, Path], sha256: Hash) -> Hash:
331331
"""Updates the inputted Hash with the contents of the current path.
332332
333333
Args:
334334
directory: path of the directory
335335
Returns:
336-
str: The MD5 hash of the directory
336+
str: The SHA256 hash of the directory
337337
"""
338338
if not Path(directory).is_dir():
339339
raise ValueError(str(directory) + " is not a valid directory")
340340
for path in sorted(Path(directory).iterdir()):
341-
md5.update(path.name.encode())
341+
sha256.update(path.name.encode())
342342
if path.is_file():
343-
md5 = _hash_file(path, md5)
343+
sha256 = _hash_file(path, sha256)
344344
elif path.is_dir():
345-
md5 = _hash_dir(path, md5)
346-
return md5
345+
sha256 = _hash_dir(path, sha256)
346+
return sha256
347347

348348

349-
def _hash_file(file: Union[str, Path], md5: Hash) -> Hash:
349+
def _hash_file(file: Union[str, Path], sha256: Hash) -> Hash:
350350
"""Updates the inputted Hash with the contents of the current path.
351351
352352
Args:
353353
file: path of the file
354354
Returns:
355-
str: The MD5 hash of the file
355+
str: The SHA256 hash of the file
356356
"""
357357
if isinstance(file, str) and file.lower().startswith("file://"):
358358
file = unquote(urlparse(file).path)
@@ -363,8 +363,8 @@ def _hash_file(file: Union[str, Path], md5: Hash) -> Hash:
363363
data = f.read(BUF_SIZE)
364364
if not data:
365365
break
366-
md5.update(data)
367-
return md5
366+
sha256.update(data)
367+
return sha256
368368

369369

370370
def validate_step_args_input(

0 commit comments

Comments
 (0)