Skip to content

Commit 6b7df2d

Browse files
committed
check before upload
1 parent 941610b commit 6b7df2d

File tree

1 file changed

+16
-1
lines changed

1 file changed

+16
-1
lines changed

data/src/upload_aggregate_lodes.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import argparse
22
import re
33
import boto3
4+
import botocore
45
import yaml
56
import os
7+
import hashlib
68

79
def create_public_files(
810
dataset: str,
@@ -34,7 +36,20 @@ def create_public_files(
3436
file_path = f"intermediate/od_lodes/year={year}/geography={geography}/origin={origin}/state={state}/{state}.parquet"
3537
filename = f"{dataset}-{year}-{geography}-{origin}-{state}.parquet"
3638
s3_path = f"{dataset}/year={year}/geography={geography}/origin={origin}/state={state}/{filename}"
37-
s3.upload_file(file_path, public_bucket, s3_path)
39+
with open(file_path, 'rb') as file:
40+
data = file.read()
41+
md5 = hashlib.md5(data).hexdigest()
42+
try:
43+
print(s3_path)
44+
s3.head_object(Bucket=public_bucket, Key=s3_path, IfNoneMatch=md5)
45+
except botocore.exceptions.ClientError as e:
46+
if e.response['Error']['Message'] == 'Not Modified':
47+
print('skipping')
48+
elif e.response['Error']['Message'] == 'Not Found':
49+
s3.put_object(Body=data, Bucket=public_bucket, Key=s3_path, IfNoneMatch=md5)
50+
else:
51+
print(e.response)
52+
raise e
3853

3954
def main() -> None:
4055
parser = argparse.ArgumentParser()

0 commit comments

Comments
 (0)