This repository was archived by the owner on Jun 23, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathzip.py
More file actions
100 lines (77 loc) · 3.19 KB
/
zip.py
File metadata and controls
100 lines (77 loc) · 3.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""
Module with zip-manipulating functions to serve pushing telemetry artifacts
under maven.mozilla.org
"""
import os
import zipfile
import requests
from constants import (
ZIP_MAX_COMPRESSION_RATIO,
ZIP_MAX_SIZE_IN_MB,
)
def download_zip_archive(url, zip_path):
# switch to asyncio
print(f'Downloading {url} to {zip_path}...')
ret = requests.get(url)
# FIXME: switch to NamedTemporaryFile here
with open(zip_path, 'wb') as f:
f.write(ret.content)
def check_archive_itself(zip_path):
zip_size = os.path.getsize(zip_path)
zip_size_in_mb = zip_size // (1024 * 1024)
if zip_size_in_mb > ZIP_MAX_SIZE_IN_MB:
raise Exception(
f"Archive {zip_path} is too big. Max accepted size (in MB): {zip_size_in_mb}"
)
if not zipfile.is_zipfile(zip_path):
raise Exception(
f"Archive {zip_path} is not a valid zip file"
)
def _fetch_zip_metadata(zip_file):
return {
info.filename: {
'compress_size': info.compress_size,
'file_size': info.file_size,
}
for info in zip_file.infolist() if not info.is_dir()
}
def extract_and_check_output_files(zip_file, relative_paths_in_archive):
zip_path = zip_file.filename
if not os.path.isabs(zip_path):
raise Exception(f'Archive {zip_path} is not absolute path')
extract_to = '{}.out'.format(zip_path)
expected_full_paths_per_relative_path = {
path_in_archive: os.path.join(extract_to, path_in_archive)
for path_in_archive in relative_paths_in_archive
}
zip_file.extractall(extract_to)
return expected_full_paths_per_relative_path
def ensure_files_in_archive_have_decent_sizes(zip_path, zip_metadata):
for file_name, file_metadata in zip_metadata.items():
compressed_size = file_metadata['compress_size']
real_size = file_metadata['file_size']
compressed_size_size_in_mb = compressed_size // (1024 * 1024)
if compressed_size_size_in_mb > ZIP_MAX_SIZE_IN_MB:
raise Exception(
'In archive "{}", compressed file "{}" is too big. Max accepted size (in MB): {}. File size (in MB): {}'.format(
zip_path, file_name, ZIP_MAX_SIZE_IN_MB, compressed_size_size_in_mb
)
)
compression_ratio = real_size / compressed_size
if compression_ratio > ZIP_MAX_COMPRESSION_RATIO:
raise Exception(
'In archive "{}", file "{}" has a suspicious compression ratio. Max accepted: {}. Found: {}'.format(
zip_path, file_name, ZIP_MAX_COMPRESSION_RATIO, compression_ratio
)
)
def check_extract_and_delete_zip_archive(zip_path):
# sanity check the archive itself
check_archive_itself(zip_path)
with zipfile.ZipFile(zip_path) as zip_file:
zip_metadata = _fetch_zip_metadata(zip_file)
relative_paths_in_archive = list(zip_metadata.keys())
ensure_files_in_archive_have_decent_sizes(zip_path,
zip_metadata)
extracted_files = extract_and_check_output_files(zip_file, relative_paths_in_archive)
# os.remove(zip_path)
return extracted_files