|
| 1 | +""" |
| 2 | +A script that decodes the unprocessed file from the s3 folder input--load/ |
| 3 | +
|
| 4 | +You can provide the downloaded file path, and chosen decoded file destination. |
| 5 | +""" |
| 6 | + |
| 7 | +import json |
| 8 | +import pickle |
| 9 | +from collections import deque |
| 10 | + |
| 11 | +import lz4.frame |
| 12 | + |
| 13 | + |
| 14 | +def lz4_pickle_decode_and_save(file_path: str, output_file: str): |
| 15 | + try: |
| 16 | + # Read the compressed pickle data from the file |
| 17 | + with open(file_path, "rb") as f: |
| 18 | + compressed_data = f.read() |
| 19 | + |
| 20 | + # Decompress the LZ4 data |
| 21 | + decompressed_data = lz4.frame.decompress(compressed_data) |
| 22 | + |
| 23 | + # Unpickle the decompressed data |
| 24 | + decoded_data = pickle.loads(decompressed_data) |
| 25 | + |
| 26 | + # If the data is wrapped in a deque, convert it to a list |
| 27 | + if isinstance(decoded_data, deque): |
| 28 | + decoded_data = list(decoded_data) |
| 29 | + |
| 30 | + # Save the decoded data to a file |
| 31 | + with open(output_file, "w") as json_file: |
| 32 | + json.dump(decoded_data, json_file, indent=2, default=str) |
| 33 | + |
| 34 | + print(f"Decoded transaction(s) saved to {output_file}") # noqa |
| 35 | + |
| 36 | + return decoded_data |
| 37 | + |
| 38 | + except lz4.frame.LZ4FrameError as e: |
| 39 | + print(f"LZ4 decompression error: {e}") # noqa |
| 40 | + except pickle.UnpicklingError as e: |
| 41 | + print(f"Unpickling error: {e}") # noqa |
| 42 | + except FileNotFoundError: |
| 43 | + print(f"File not found: {file_path}") # noqa |
| 44 | + except Exception as e: |
| 45 | + print(f"An unexpected error occurred: {e}") # noqa |
| 46 | + |
| 47 | + |
| 48 | +# Example usage |
| 49 | +if __name__ == "__main__": |
| 50 | + # Provide your file path and output file path here |
| 51 | + file_path = "unprocessed-11" |
| 52 | + output_file = "decoded_transaction.json" |
| 53 | + |
| 54 | + # Decode the data from the file and save it to JSON |
| 55 | + decoded_deque = lz4_pickle_decode_and_save(file_path, output_file) |
0 commit comments