22from pathlib import Path
33from typing import Optional , List , Union , Tuple , DefaultDict
44from collections import defaultdict
5+ from cryptography .hazmat .primitives .ciphers import Cipher , algorithms , modes
56
67import sys
78import struct
@@ -166,29 +167,26 @@ def __init__(self, mmkv_file_data: Union[str, BufferedIOBase], crc_file_data: Un
166167 else :
167168 pass
168169
169-
170170 # Initialize our files
171171 self .mmkv_file : BufferedIOBase = mmkv_file_data
172172 self .crc_file : Optional [BufferedIOBase ] = crc_file_data
173173 self .pos : int = 0
174174 self .decoded_map : DefaultDict [str , List [bytes ]] = defaultdict (list )
175175
176- # Read in first 4 header bytes - [0:4] is total size
177- self .header_bytes : bytes = self .mmkv_file .read (4 )
178- if len (self .header_bytes ) != 4 :
179- raise ValueError ('[+] Error while reading mmkv_file. Header bytes was not 4 bytes.' )
180- self .pos += 4
181-
182- # TODO: find out the purpose of the varint in [4:x] position
183- # [4:X] is garbage bytes basically (0xffffff07) or is another varint
184- x , bytes_read = decode_unsigned_varint (self .mmkv_file )
185- if (x , bytes_read ) == (- 1 , - 1 ):
186- raise ValueError ('[+] Error while decoding the [4:X] bytes of the mmkv_file.' )
176+ # Found IV from .crc file - don't read anything from the stream if encrypted
177+ if self .crc_file :
178+ crc_header_bytes = self .crc_file .read (28 )
179+ if len (crc_header_bytes ) != 28 :
180+ raise ValueError ('[+] Error while reading crc_file. Header bytes was not 28 bytes.' )
181+ self .iv = crc_header_bytes [12 :28 ]
187182
188- self .pos += bytes_read
183+ # Cannot find IV from .crc file - prepare stream for decoding into a map
184+ else :
185+ print ('[+] .CRC file was not passed in - is needed for decryption routines' )
186+ self .iv = b''
189187
190188
191- def get_db_size (self ) -> int :
189+ def _get_db_size (self ) -> int :
192190 """
193191 Returns the actual size known to the MMKV API for querying data. This includes older
194192 logged data that the actual MMKV API does not have the ability to query.
@@ -207,6 +205,59 @@ def get_db_size(self) -> int:
207205 raise TypeError (f'[+] Error while unpacking header bytes. Received { type (size )} ' )
208206
209207
208+ def _prepare_mmkv_stream_for_decoding (self ):
209+ # Read in first 4 header bytes - [0:4] is total size
210+ self .header_bytes : bytes = self .mmkv_file .read (4 )
211+ if len (self .header_bytes ) != 4 :
212+ raise ValueError ('[+] Error while reading mmkv_file. Header bytes was not 4 bytes.' )
213+ self .pos += 4
214+
215+ # TODO: find out the purpose of the varint in [4:x] position
216+ # [4:X] is garbage bytes basically (0xffffff07) or is another varint
217+ x , bytes_read = decode_unsigned_varint (self .mmkv_file )
218+ if (x , bytes_read ) == (- 1 , - 1 ):
219+ raise ValueError ('[+] Error while decoding the [4:X] bytes of the mmkv_file.' )
220+
221+ self .pos += bytes_read
222+
223+
224+ def decrypt_and_reconstruct (self , key : Union [str , bytes ]) -> bytes :
225+ """
226+ Attempts to decrypt `self.mmkv_file` data with `key` and `self.iv` using
227+ AES-128-CFB. Will return decrypted bytes as a fully decrypted MMKV file.
228+ Will pad `key` with NULL bytes or only take the first 16-bytes.
229+
230+ :param key: 16-byte AES key, or hexstring AES key
231+ :return: decrypted mmkv file in bytes
232+ """
233+ print (f'iv: { self .iv } ' )
234+ if isinstance (key , str ):
235+ key = bytes .fromhex (key )
236+
237+ # Validate the key size
238+ if len (key ) > 16 :
239+ key = key [:16 ]
240+ elif len (key ) < 16 :
241+ diff = (16 - len (key )) * b'\x00 '
242+ key += diff
243+
244+ size = self .mmkv_file .read (4 )
245+ print (f'size: { size } ' )
246+ encrypted_data = self .mmkv_file .read ()
247+
248+ cipher = Cipher (algorithms .AES (key ), modes .CFB (self .iv ))
249+ decryptor = cipher .decryptor ()
250+ res = decryptor .update (encrypted_data ) + decryptor .finalize ()
251+ res = size + res
252+
253+ self .mmkv_file = BytesIO (res )
254+ return res
255+
256+
257+
258+ '''
259+ Decoding Procedures
260+ '''
210261 def decode_into_map (self ) -> DefaultDict [str , List [bytes ]]:
211262 """
212263 A best-effort approach on linearly parsing the `mmkv_file` stream and building up
@@ -215,8 +266,11 @@ def decode_into_map(self) -> DefaultDict[str, List[bytes]]:
215266 :return: a built up defaultdict, which is also an instance variable
216267 """
217268
269+ # Prepare first
270+ self ._prepare_mmkv_stream_for_decoding ()
271+
218272 # Get size of database
219- db_size = self .get_db_size ()
273+ db_size = self ._get_db_size ()
220274
221275 # Check db_size - max out if needed
222276 if db_size == 0 :
@@ -276,6 +330,7 @@ def decode_into_map(self) -> DefaultDict[str, List[bytes]]:
276330
277331 return self .decoded_map
278332
333+
279334 def decode_as_int32 (self , value : Union [str , bytes ]) -> int :
280335 """
281336 Decodes `value` as a signed 32-bit int.
@@ -287,6 +342,7 @@ def decode_as_int32(self, value: Union[str, bytes]) -> int:
287342 value = bytes .fromhex (value )
288343 return decode_signed_varint (BytesIO (value ), mask = 32 )[0 ]
289344
345+
290346 def decode_as_int64 (self , value : Union [str , bytes ]) -> int :
291347 """
292348 Decodes `value` as a signed 64-bit int.
@@ -309,6 +365,7 @@ def decode_as_uint32(self, value: Union[str, bytes]) -> int:
309365 value = bytes .fromhex (value )
310366 return decode_unsigned_varint (BytesIO (value ), mask = 32 )[0 ]
311367
368+
312369 def decode_as_uint64 (self , value : Union [str , bytes ]) -> int :
313370 """
314371 Decodes `value` as an unsigned 64-bit int.
@@ -320,6 +377,7 @@ def decode_as_uint64(self, value: Union[str, bytes]) -> int:
320377 value = bytes .fromhex (value )
321378 return decode_unsigned_varint (BytesIO (value ), mask = 64 )[0 ]
322379
380+
323381 def decode_as_string (self , value : Union [str , bytes ]) -> Optional [str ]:
324382 """
325383 Attempts to decodes `value` as a UTF-8 string.
@@ -343,6 +401,7 @@ def decode_as_string(self, value: Union[str, bytes]) -> Optional[str]:
343401 print (f'[+] Could not UTF-8 decode { value !r} ' )
344402 return None
345403
404+
346405 def decode_as_bytes (self , value : Union [str , bytes ]) -> Optional [bytes ]:
347406 """
348407 Decodes `value` as bytes.
@@ -366,6 +425,7 @@ def decode_as_bytes(self, value: Union[str, bytes]) -> Optional[bytes]:
366425 print (f'[+] Could not decode bytes' )
367426 return None
368427
428+
369429 def decode_as_float (self , value : Union [str , bytes ]) -> Optional [float ]:
370430 """
371431 Decodes `value` as a double (8-bytes), which is a float type in Python.
@@ -377,11 +437,12 @@ def decode_as_float(self, value: Union[str, bytes]) -> Optional[float]:
377437 value = bytes .fromhex (value )
378438
379439 if len (value ) != 8 :
380- print (f'[+] Could not float decode { value } due to length' )
440+ print (f'[+] Could not float decode { value !r } due to length' )
381441 return None
382442
383443 return struct .unpack ('<d' , value )[0 ]
384444
445+
385446 def decode_as_bool (self , value : Union [str , bytes ]) -> Optional [bool ]:
386447 """
387448 Attempts to decode `value` as a boolean.
0 commit comments