Skip to content

Commit 4941122

Browse files
committed
Add s3 upload storage method
Signed-off-by: Mike Perez <thingee@gmail.com>
1 parent 3ed5cdf commit 4941122

File tree

5 files changed

+63
-24
lines changed

5 files changed

+63
-24
lines changed

README.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,18 @@ the service as follows::
4141
api_key = 'secret'
4242

4343

44+
storage_method
45+
^^^^^^^^^^^^^^
46+
The ``storage_method`` is a required configuration item, it defines where the
47+
binaries should be stored. The two available method values are ``local`` and
48+
``s3``.
49+
50+
s3_bucket
51+
^^^^^^^^^
52+
The ``s3_bucket`` is required if the ``storage_method`` configuration is set to
53+
``s3``. This defines which bucket the binaries should be stored to.
54+
55+
4456
Self-discovery
4557
--------------
4658
The API provides informational JSON at every step of the URL about what is

chacra/controllers/binaries/archs.py

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import logging
22
import os
3+
import boto3
4+
from botocore.exceptions import ClientError
35
import pecan
46
from pecan import response
57
from pecan.secure import secure
68
from pecan import expose, abort, request
79
from webob.static import FileIter
8-
from chacra.models import Binary
10+
from chacra.models.binaries import Binary, generate_checksum
911
from chacra import models, util
1012
from chacra.controllers import error
1113
from chacra.controllers.util import repository_is_automatic
@@ -26,6 +28,7 @@ def __init__(self, arch):
2628
self.distro_version = request.context['distro_version']
2729
self.ref = request.context['ref']
2830
self.sha1 = request.context['sha1']
31+
self.checksum = None
2932
request.context['arch'] = self.arch
3033

3134
@expose(generic=True, template='json')
@@ -89,7 +92,7 @@ def index_post(self):
8992
if request.POST.get('force', False) is False:
9093
error('/errors/invalid', 'resource already exists and "force" key was not used')
9194

92-
full_path = self.save_file(file_obj)
95+
full_path, size = self.save_file(file_obj)
9396

9497
if self.binary is None:
9598
path = full_path
@@ -102,14 +105,17 @@ def index_post(self):
102105
self.binary = Binary(
103106
self.binary_name, self.project, arch=arch,
104107
distro=distro, distro_version=distro_version,
105-
ref=ref, sha1=sha1, path=path, size=os.path.getsize(path)
108+
ref=ref, sha1=sha1, path=path, size=size,
109+
checksum=self.checksum
106110
)
107111
else:
108112
self.binary.path = full_path
113+
self.binary.checksum = self.checksum
109114

110115
# check if this binary is interesting for other configured projects,
111116
# and if so, then mark those other repos so that they can be re-built
112117
self.mark_related_repos()
118+
113119
return dict()
114120

115121
def mark_related_repos(self):
@@ -175,8 +181,32 @@ def save_file(self, file_obj):
175181
for chunk in file_iterable:
176182
f.write(chunk)
177183

184+
size = os.path.getsize(destination)
185+
self.checksum = generate_checksum(destination)
186+
187+
if pecan.conf.storage_method == 's3':
188+
bucket = pecan.conf.bucket
189+
object_destination = os.path.relpath(destination, pecan.conf.binary_root)
190+
191+
s3_client = boto3.client('s3')
192+
try:
193+
with open(destination, 'rb') as f:
194+
s3_client.put_object(Body=f,
195+
Bucket=bucket,
196+
Key=object_destination,
197+
ChecksumAlgorithm='sha256',
198+
ChecksumSHA256=self.checksum
199+
)
200+
except ClientError as e:
201+
error('/errors/error/', 'file object upload to S3 failed with error %s' % e)
202+
203+
# Remove the local file after S3 upload
204+
os.remove(destination)
205+
206+
destination = 's3://' + object_destination[1:]
207+
178208
# return the full path to the saved object:
179-
return destination
209+
return destination, size
180210

181211
@expose()
182212
def _lookup(self, name, *remainder):

chacra/models/binaries.py

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import hashlib
22
import datetime
3+
import pecan
34
from sqlalchemy import Column, Integer, String, ForeignKey, Boolean, DateTime, BigInteger
45
from sqlalchemy.orm import relationship, backref
56
from sqlalchemy.event import listen
@@ -169,24 +170,18 @@ def __json__(self):
169170
# Listeners
170171

171172

172-
def generate_checksum(mapper, connection, target):
173-
try:
174-
target.path
175-
except AttributeError:
176-
target.checksum = None
177-
return
173+
def generate_checksum(self, binary):
174+
# S3 requires SHA256
175+
chsum = None
176+
if pecan.conf.storage_method == 's3':
177+
chsum = hashlib.sha256()
178+
else:
179+
chsum = hashlib.sha512()
178180

179-
# FIXME
180-
# sometimes we can accept binaries without a path and that is probably something
181-
# that should not happen. The core purpose of this binary is that it works with
182-
# paths and files, this should be required.
183-
if not target.path:
184-
return
185-
chsum = hashlib.sha512()
186-
with open(target.path, 'rb') as f:
181+
with open(binary, 'rb') as f:
187182
for chunk in iter(lambda: f.read(4096), b''):
188183
chsum.update(chunk)
189-
target.checksum = chsum.hexdigest()
184+
return chsum.hexdigest()
190185

191186

192187
def update_repo(mapper, connection, target):
@@ -206,11 +201,6 @@ def update_repo(mapper, connection, target):
206201
# triggered it because there is nothing we need to do
207202
pass
208203

209-
# listen for checksum changes
210-
listen(Binary, 'before_insert', generate_checksum)
211-
listen(Binary, 'before_update', generate_checksum)
212-
213-
214204
def add_timestamp_listeners():
215205
# listen for timestamp modifications
216206
listen(Binary, 'before_insert', update_timestamp)

config/dev.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,17 @@
6363
'encoding': 'utf-8'
6464
}
6565

66+
# Where to store the data. Options are 's3' or 'local'
67+
storage_method = 'local'
68+
6669
# location for storing uploaded binaries
6770
binary_root = '%(confdir)s/public'
6871
repos_root = '%(confdir)s/repos'
6972
distributions_root = '%(confdir)s/distributions'
7073

74+
# If storage method is s3, provide a bucket name
75+
bucket = ''
76+
7177
# When True it will set the headers so that Nginx can serve the download
7278
# instead of Pecan.
7379
delegate_downloads = False

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ alembic
88
ipython
99
python-statsd
1010
requests
11+
boto3
1112
importlib_metadata<=3.6; python_version<'3.8'

0 commit comments

Comments
 (0)