Skip to content

Commit 1c51a6f

Browse files
author
John Kranz
committed
RMS-6798: Add MD5 example to python SDK
1 parent d156ec2 commit 1c51a6f

File tree

1 file changed

+105
-0
lines changed

1 file changed

+105
-0
lines changed

samples/puttingDataWithMD5.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# Copyright 2014-2022 Spectra Logic Corporation. All Rights Reserved.
2+
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use
3+
# this file except in compliance with the License. A copy of the License is located at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# or in the "license" file accompanying this file.
8+
# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
9+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
10+
# specific language governing permissions and limitations under the License.
11+
12+
from ds3 import ds3
13+
import os
14+
import time
15+
import base64
16+
import hashlib
17+
18+
client = ds3.createClientFromEnv()
19+
20+
bucketName = "books"
21+
22+
# make sure the bucket that we will be sending objects to exists
23+
client.put_bucket(ds3.PutBucketRequest(bucketName))
24+
25+
# create your list of objects that will be sent to DS3
26+
# this example assumes that these files exist on the file system
27+
28+
fileList = ["beowulf.txt", "sherlock_holmes.txt", "tale_of_two_cities.txt", "ulysses.txt"]
29+
30+
# this method is used to map a file path to a Ds3PutObject
31+
def fileNameToDs3PutObject(filePath, prefix=""):
32+
size = os.stat(pathForResource(filePath)).st_size
33+
return ds3.Ds3PutObject(prefix + filePath, size)
34+
35+
# this method is used to get the os specific path for an object located in the resources folder
36+
def pathForResource(resourceName):
37+
currentPath = os.path.dirname(str(__file__))
38+
return os.path.join(currentPath, "resources", resourceName)
39+
40+
# get the sizes for each file
41+
fileList = list(map(fileNameToDs3PutObject, fileList))
42+
43+
# submit the put bulk request to DS3
44+
bulkResult = client.put_bulk_job_spectra_s3(ds3.PutBulkJobSpectraS3Request(bucketName, fileList))
45+
46+
# the bulk request will split the files over several chunks if it needs to.
47+
# we then need to ask what chunks we can send, and then send them making
48+
# sure we don't resend the same chunks
49+
50+
# create a set of the chunk ids which will be used to track
51+
# what chunks have not been sent
52+
chunkIds = set([x['ChunkId'] for x in bulkResult.result['ObjectsList']])
53+
54+
# while we still have chunks to send
55+
while len(chunkIds) > 0:
56+
# get a list of the available chunks that we can send
57+
availableChunks = client.get_job_chunks_ready_for_client_processing_spectra_s3(
58+
ds3.GetJobChunksReadyForClientProcessingSpectraS3Request(bulkResult.result['JobId']))
59+
60+
chunks = availableChunks.result['ObjectsList']
61+
62+
# check to make sure we got some chunks, if we did not
63+
# sleep and retry. This could mean that the cache is full
64+
if len(chunks) == 0:
65+
time.sleep(60)
66+
continue
67+
68+
# for each chunk that is available, check to make sure
69+
# we have not sent it, and if not, send that object
70+
for chunk in chunks:
71+
if not chunk['ChunkId'] in chunkIds:
72+
continue
73+
74+
chunkIds.remove(chunk['ChunkId'])
75+
for obj in chunk['ObjectList']:
76+
# it is possible that if we start resending a chunk, due to the program crashing, that
77+
# some objects will already be in cache. Check to make sure that they are not, and then
78+
# send the object to Spectra S3
79+
if obj['InCache'] == 'false':
80+
localFileName = "resources/" + obj['Name']
81+
objectDataStream = open(localFileName, "rb")
82+
objectDataStream.seek(int(obj['Offset']), 0)
83+
objectChunk = objectDataStream.read(int(obj['Length']))
84+
checksum = hashlib.md5(objectChunk)
85+
encodedChecksum = base64.b64encode(checksum.digest()).decode('utf-8')
86+
objectDataStream.seek(int(obj['Offset']), 0)
87+
client.put_object(ds3.PutObjectRequest(bucketName,
88+
obj['Name'],
89+
obj['Length'],
90+
objectDataStream,
91+
offset=int(obj['Offset']),
92+
job=bulkResult.result['JobId'],
93+
headers={"Content-MD5": encodedChecksum}))
94+
95+
# we now verify that all our objects have been sent to DS3
96+
bucketResponse = client.get_bucket(ds3.GetBucketRequest(bucketName))
97+
98+
for obj in bucketResponse.result['ContentsList']:
99+
print(obj['Key'])
100+
101+
# delete the bucket by first deleting all the objects, and then deleting the bucket
102+
for obj in bucketResponse.result['ContentsList']:
103+
client.delete_object(ds3.DeleteObjectRequest(bucketName, obj['Key']))
104+
105+
client.delete_bucket(ds3.DeleteBucketRequest(bucketName))

0 commit comments

Comments
 (0)