Skip to content

Commit ac137cc

Browse files
committed
Initial code to write aes files.
1 parent 9243f0f commit ac137cc

3 files changed

Lines changed: 272 additions & 11 deletions

File tree

README.md

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
# zipfile-aes
2+
23
Extend stdlib zipfile to support [WinZip AES](https://www.winzip.com/en/support/aes-encryption/) encryption.
34

45
This is an alternative to [pyzipper](https://github.com/danifus/pyzipper),
56
with the difference that *zipfile-aes* uses the [cryptography](https://github.com/pyca/cryptography/) library.
6-
The code from here is influced by pyzipper.
7+
The code from here is based on pyzipper.
78

89
This code can't be included in Python `stdlib` since there is no `crypto` support.
910
The [stdlib ssl](https://docs.python.org/3/library/ssl.html) library only exposes the TLS part.
1011

11-
Supported operations (check the automted tests for details):
12-
* WinZIP AES V2 read without seek
12+
Supported operations (check the automated tests for details):
13+
* WinZIP AES V2 and V1 read without seek.
1314

1415

1516
# Contribution
@@ -21,14 +22,14 @@ python3 -m venv .venv
2122
. .venv/bin/activate
2223
pip install uv
2324
uv sync
24-
uv pip install -e .
25-
python patch_zipfile.py
25+
uv pip install -e .[dev]
26+
uv run patch_zipfile.py
2627
```
2728

2829
Use dev tools
2930

3031
```sh
31-
pytest
32-
ruff format .
33-
ruff check
32+
uv run pytest
33+
uv run ruff format .
34+
uv run ruff check
3435
```

src/zipfile_aes/__init__.py

Lines changed: 231 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
Code for handling ZIP aES archives.
44
"""
55

6+
import io
7+
import os
68
import struct
79
import zipfile_patched as zipfile
810

@@ -56,6 +58,30 @@ def __init__(self, *args, **kwargs):
5658
self.aes_version = None
5759
self.aes_compression = None
5860

61+
def setAESInfo(self, version, strength):
62+
"""
63+
Set the attributes to prepare writing an AES encrypted file.
64+
"""
65+
self.aes_strength = strength
66+
self.aes_version = version
67+
self.aes_compression = self.compress_type
68+
69+
self.compress_type = AES_COMPRESSION_TYPE
70+
71+
# We set it here... but when the file is written,
72+
# the stdlib will reset all the flags.
73+
self.flag_bits |= zipfile._MASK_ENCRYPTED
74+
75+
# FIXME
76+
# Handle setting the extra multiple times.
77+
self.extra += (
78+
struct.pack("<HH", EXTRA_AES_HEADER_ID, 7)
79+
+ self.aes_version
80+
+ AES_VENDOR_ID
81+
+ self.aes_strength
82+
+ struct.pack("<H", self.aes_compression)
83+
)
84+
5985
def _decodeExtra(self, filename_crc):
6086
"""
6187
Little endian encoding.
@@ -125,6 +151,9 @@ def __init__(self, fileobj, mode, zipinfo, pwd=None, close_fileobj=False):
125151
# This is reverted at the end of init.
126152
zipinfo.compress_type = zipinfo.aes_compression
127153

154+
if not pwd:
155+
raise RuntimeError("AES encrypted file requires a password.")
156+
128157
self._zipinfo = zipinfo
129158

130159
super().__init__(fileobj, mode, zipinfo, pwd, close_fileobj)
@@ -179,9 +208,9 @@ def _init_aes_decrypter(self):
179208

180209
self._decrypter = AESZipDecipher(self._zipinfo, self._pwd, header)
181210

182-
# FIXME:
183-
# This should be removed once the upstream zipfile
184-
# does the password checking as part of decryptor initialization.
211+
# TODO:
212+
# This can be removed once the upstream zipfile
213+
# does the password checking as part of decryption initialization.
185214
# For AES the password is validate in AESZipDecipher.
186215
# This is here to reduce the patch for stdlib.
187216
if self._zipinfo.flag_bits & zipfile._MASK_USE_DATA_DESCRIPTOR:
@@ -271,3 +300,202 @@ class ZipFileWithAES(zipfile.ZipFile):
271300

272301
_ZipInfo = ZipInfoWithAES
273302
_ZipExtFile = ZipExtFileWithAES
303+
_aes_password = None
304+
_aes_strength = AES_256
305+
_aes_version = AES_V2
306+
307+
def setAESEncryption(self, password, version=None):
308+
self._aes_password = password
309+
310+
def _open_to_write(self, zinfo, force_zip64=False):
311+
if self._aes_password:
312+
zinfo.setAESInfo(version=self._aes_version, strength=self._aes_strength)
313+
314+
# Stdlib will reset the flag_bits.
315+
# We set them again in _writecheck
316+
stream = super()._open_to_write(zinfo, force_zip64=force_zip64)
317+
318+
if not self._aes_password:
319+
return stream
320+
321+
# TODO:
322+
# Refactor _open_to_write to alow to reuse more code.
323+
# Here we close the zipfilewriter created by _open_to_write
324+
# and set a fake file and revert the filelist updated at close.
325+
original_fp = self.fp
326+
try:
327+
self.fp = io.BytesIO()
328+
stream.close()
329+
self.filelist.pop()
330+
finally:
331+
self.fp = original_fp
332+
333+
# The need for zip64 is computes inside _open_to_write and
334+
# was already set in the file header.
335+
return _ZipWithAESWriteFile(self, zinfo, stream._zip64)
336+
337+
def _writecheck(self, zinfo):
338+
"""
339+
Pretend that we have normal compression.
340+
341+
This also fixes the flag_bits that are reset by upstream _open_to_write.
342+
"""
343+
try:
344+
if zinfo.aes_version:
345+
zinfo.compress_type = zinfo.aes_compression
346+
zinfo.flag_bits |= zipfile._MASK_ENCRYPTED
347+
super()._writecheck(zinfo)
348+
finally:
349+
if zinfo.aes_version:
350+
zinfo.compress_type = AES_COMPRESSION_TYPE
351+
352+
353+
class _ZipWithAESWriteFile(zipfile._ZipWriteFile):
354+
def __init__(self, zf, zinfo, zip64):
355+
self._zinfo = zinfo
356+
self._zip64 = zip64
357+
self._zipfile = zf
358+
# Here we update stdlib to pass the compression.
359+
self._compressor = zipfile._get_compressor(
360+
zinfo.aes_compression, zinfo._compresslevel
361+
)
362+
self._file_size = 0
363+
self._compress_size = 0
364+
self._crc = 0
365+
366+
self._salt_length = AES_SALT_LENGTHS[zinfo.aes_strength]
367+
key_length = AES_KEY_LENGTHS[zinfo.aes_strength]
368+
salt = os.urandom(self._salt_length)
369+
370+
pwd_verify_length = 2
371+
dkLen = 2 * key_length + pwd_verify_length
372+
kdf = PBKDF2HMAC(
373+
algorithm=hashes.SHA1(),
374+
length=dkLen,
375+
salt=salt,
376+
iterations=1000,
377+
)
378+
keymaterial = kdf.derive(self._zipfile._aes_password)
379+
380+
encryption_verify = keymaterial[2 * key_length :]
381+
382+
self._enckey = keymaterial[:key_length]
383+
self._counter = 0
384+
encmac_key = keymaterial[key_length : 2 * key_length]
385+
self._hmac = hmac.HMAC(encmac_key, hashes.SHA1())
386+
387+
buf = salt + encryption_verify
388+
self._compress_size += len(buf)
389+
self._fileobj.write(buf)
390+
391+
def _encrypt(self, data):
392+
blocks = self._getBlocks(data)
393+
encrypted_data = []
394+
for block in blocks:
395+
self._counter += 1
396+
cipher = Cipher(
397+
algorithms.AES(self._enckey),
398+
modes.CTR((self._counter).to_bytes(16, byteorder="little")),
399+
)
400+
data = cipher.encryptor().update(block)
401+
data += cipher.encryptor().finalize()
402+
encrypted_data.append(data)
403+
404+
result = b"".join(encrypted_data)
405+
self._hmac.update(result)
406+
return result
407+
408+
@staticmethod
409+
def _getBlocks(original):
410+
"""
411+
Return AES blocks.
412+
"""
413+
for i in range(0, len(original), 16):
414+
yield original[i : i + 16]
415+
416+
def write(self, data):
417+
if self.closed:
418+
raise ValueError("I/O operation on closed file.")
419+
420+
# Accept any data that supports the buffer protocol
421+
if isinstance(data, (bytes, bytearray)):
422+
nbytes = len(data)
423+
else:
424+
data = memoryview(data)
425+
nbytes = data.nbytes
426+
self._file_size += nbytes
427+
428+
self._crc = zipfile.crc32(data, self._crc)
429+
if self._compressor:
430+
data = self._compressor.compress(data)
431+
432+
# TODO:
433+
# This is updated to add encryption.
434+
data = self._encrypt(data)
435+
436+
self._compress_size += len(data)
437+
# This is stdlib code.
438+
self._fileobj.write(data)
439+
return nbytes
440+
441+
def close(self):
442+
if self.closed:
443+
return
444+
445+
try:
446+
io.BufferedIOBase.close(self)
447+
448+
# Flush any data from the compressor, encrypt it and update header info
449+
if self._compressor:
450+
buf = self._compressor.flush()
451+
else:
452+
buf = b""
453+
454+
buf = self._encrypt(buf)
455+
buf += struct.pack(f"<{AES_HMAC_SIZE}s", self._hmac.finalize()[:10])
456+
self._compress_size += len(buf)
457+
self._fileobj.write(buf)
458+
459+
self._zinfo.compress_size = self._compress_size
460+
461+
# Below is stdlib code
462+
self._zinfo.CRC = self._crc
463+
self._zinfo.file_size = self._file_size
464+
465+
if not self._zip64:
466+
if self._file_size > zipfile.ZIP64_LIMIT:
467+
raise RuntimeError("File size too large, try using force_zip64")
468+
if self._compress_size > zipfile.ZIP64_LIMIT:
469+
raise RuntimeError(
470+
"Compressed size too large, try using force_zip64"
471+
)
472+
473+
# Write updated header info
474+
if self._zinfo.flag_bits & zipfile._MASK_USE_DATA_DESCRIPTOR:
475+
# Write CRC and file sizes after the file data
476+
fmt = "<LLQQ" if self._zip64 else "<LLLL"
477+
self._fileobj.write(
478+
struct.pack(
479+
fmt,
480+
zipfile._DD_SIGNATURE,
481+
self._zinfo.CRC,
482+
self._zinfo.compress_size,
483+
self._zinfo.file_size,
484+
)
485+
)
486+
self._zipfile.start_dir = self._fileobj.tell()
487+
else:
488+
# Seek backwards and write file header (which will now include
489+
# correct CRC and file sizes)
490+
491+
# Preserve current position in file
492+
self._zipfile.start_dir = self._fileobj.tell()
493+
self._fileobj.seek(self._zinfo.header_offset)
494+
self._fileobj.write(self._zinfo.FileHeader(self._zip64))
495+
self._fileobj.seek(self._zipfile.start_dir)
496+
497+
# Successfully written: Add file to our caches
498+
self._zipfile.filelist.append(self._zinfo)
499+
self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
500+
finally:
501+
self._zipfile._writing = False

tests/test_zipfile_aes.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
AES_192,
1616
AES_256,
1717
ZipFileWithAES,
18+
ZipInfoWithAES,
1819
zipfile, # Should be moved to canonical name once we no longer need patching.
1920
)
2021

@@ -308,3 +309,34 @@ def test_decrypt_bad_crc_ae2():
308309
with pytest.raises(zipfile.BadZipFile) as context:
309310
sut.read("test.txt", pwd=b"test")
310311
assert "Bad CRC-32 for file 'test.txt'" == context.value.args[0]
312+
313+
314+
def test_encrypt():
315+
"""
316+
By default it will encrypt small files with AES v2 and compress
317+
using the stored method.
318+
"""
319+
password = b"test"
320+
destination = BytesIO()
321+
with ZipFileWithAES(destination, "w") as sut:
322+
sut.setAESEncryption(password=password)
323+
324+
with sut.open("test.txt", mode="w") as stream:
325+
stream.write(b"content")
326+
327+
# stdlib context manager will not close the file when we pass
328+
# a file object.
329+
source = BytesIO(destination.getvalue())
330+
331+
with ZipFileWithAES(source, "r") as sut:
332+
result = sut.infolist()
333+
assert 1 == len(result)
334+
assert "test.txt" == result[0].filename
335+
assert 7 == result[0].file_size
336+
337+
assert AES_256 == result[0].aes_strength
338+
assert AES_V2 == result[0].aes_version
339+
assert zipfile.ZIP_STORED == result[0].aes_compression
340+
341+
content = sut.read("test.txt", pwd=password)
342+
assert b"content" == content

0 commit comments

Comments
 (0)