Skip to content

Commit 4efccfb

Browse files
author
Kerkesni
committed
fix triggering replication when multpile destinations are set
- Issue 1: In Artesca "SITE_NAME" is never passed, so we always trigger objects that are replicated to the first storageClass in the replication rule. - Issue 2: We check the global replication status when verifying wether or not an object should be retriggered. This doesn't necessarily work all the time, especially when replicating to multiple destinations. As if one destination fails the global status becomes failed, which will make it impossible to trigger objects with a completed status for example. - Issue 3: replication info is completely overwritten when it does not contain info about a specific site. This will cause an issue when replicating to multiple destinations as the script can only be launched for one site at a time, so when having a object with non initialized replication info, we won't be able to set the replication info propely for all destinations. Issue: S3UTILS-184
1 parent 2310928 commit 4efccfb

File tree

2 files changed

+298
-16
lines changed

2 files changed

+298
-16
lines changed

CRR/ReplicationStatusUpdater.js

Lines changed: 40 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -114,16 +114,19 @@ class ReplicationStatusUpdater {
114114
* Determines if an object should be updated based on its replication metadata properties.
115115
* @private
116116
* @param {ObjectMD} objMD - The metadata of the object.
117+
* @param {string} storageClass - The storage class for replication.
117118
* @returns {boolean} True if the object should be updated.
118119
*/
119-
_objectShouldBeUpdated(objMD) {
120+
_objectShouldBeUpdated(objMD, storageClass) {
120121
return this.replicationStatusToProcess.some(filter => {
121122
if (filter === 'NEW') {
123+
// Either site specific replication info is missing
124+
// or are initialized with empty fields.
122125
return (!objMD.getReplicationInfo()
123-
|| objMD.getReplicationInfo().status === '');
126+
|| !objMD.getReplicationSiteStatus(storageClass));
124127
}
125128
return (objMD.getReplicationInfo()
126-
&& objMD.getReplicationInfo().status === filter);
129+
&& objMD.getReplicationSiteStatus(storageClass) === filter);
127130
});
128131
}
129132

@@ -172,36 +175,54 @@ class ReplicationStatusUpdater {
172175
// codebase easier to maintain and upgrade, as opposed to having multiple branches or versions of
173176
// the code for different schema versions.
174177
objMD = new ObjectMD(JSON.parse(mdRes.Body));
175-
if (!this._objectShouldBeUpdated(objMD)) {
178+
if (!this._objectShouldBeUpdated(objMD, storageClass)) {
176179
skip = true;
177180
return process.nextTick(next);
178181
}
179182
// Initialize replication info, if missing
180183
// This is particularly important if the object was created before
181184
// enabling replication on the bucket.
182-
if (!objMD.getReplicationInfo()
183-
|| !objMD.getReplicationSiteStatus(storageClass)) {
185+
let replicationInfo = objMD.getReplicationInfo();
186+
if (!replicationInfo || !replicationInfo.status) {
184187
const { Rules, Role } = repConfig;
185188
const destination = Rules[0].Destination.Bucket;
186189
// set replication properties
187190
const ops = objMD.getContentLength() === 0 ? ['METADATA']
188191
: ['METADATA', 'DATA'];
189-
const backends = [{
190-
site: storageClass,
191-
status: 'PENDING',
192-
dataStoreVersionId: '',
193-
}];
194-
const replicationInfo = {
192+
replicationInfo = {
195193
status: 'PENDING',
196-
backends,
197194
content: ops,
195+
backends: [],
198196
destination,
199-
storageClass,
197+
storageClass: '',
200198
role: Role,
201-
storageType: this.storageType,
199+
storageType: '',
202200
};
203201
objMD.setReplicationInfo(replicationInfo);
204202
}
203+
// Update replication info with site specific info
204+
if (objMD.getReplicationSiteStatus(storageClass) === undefined) {
205+
// When replicating to multiple destinations,
206+
// the storageClass and storageType properties
207+
// become comma-separated lists of the storage
208+
// classes and types of the replication destinations.
209+
const storageClasses = objMD.getReplicationStorageClass()
210+
? `${objMD.getReplicationStorageClass()},${storageClass}` : storageClass;
211+
objMD.setReplicationStorageClass(storageClasses);
212+
if (this.storageType) {
213+
const storageTypes = objMD.getReplicationStorageType()
214+
? `${objMD.getReplicationStorageType()},${this.storageType}` : this.storageType;
215+
objMD.setReplicationStorageType(storageTypes);
216+
}
217+
// Add site to the list of replication backends
218+
const backends = objMD.getReplicationBackends();
219+
backends.push({
220+
site: storageClass,
221+
status: 'PENDING',
222+
dataStoreVersionId: '',
223+
});
224+
objMD.setReplicationBackends(backends);
225+
}
205226

206227
objMD.setReplicationSiteStatus(storageClass, 'PENDING');
207228
objMD.setReplicationStatus('PENDING');
@@ -273,13 +294,16 @@ class ReplicationStatusUpdater {
273294
}),
274295
(repConfig, next) => {
275296
const { Rules } = repConfig;
276-
const storageClass = Rules[0].Destination.StorageClass || this.siteName;
297+
const storageClass = this.siteName || Rules[0].Destination.StorageClass;
277298
if (!storageClass) {
278299
const errMsg = 'missing SITE_NAME environment variable, must be set to'
279300
+ ' the value of "site" property in the CRR configuration';
280301
this.log.error(errMsg);
281302
return next(new Error(errMsg));
282303
}
304+
if (!this.siteName) {
305+
this.log.warn(`missing SITE_NAME environment variable, triggering replication to the ${storageClass} storage class`);
306+
}
283307
return eachLimit(versions, this.workers, (i, apply) => {
284308
const { Key, VersionId } = i;
285309
this._markObjectPending(bucket, Key, VersionId, storageClass, repConfig, apply);

tests/unit/CRR/ReplicationStatusUpdater.js

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ describe('ReplicationStatusUpdater', () => {
2626
replicationStatusToProcess: ['NEW'],
2727
targetPrefix: 'toto',
2828
listingLimit: 10,
29+
siteName: 'aws-location',
2930
}, logger);
3031
});
3132

@@ -129,6 +130,263 @@ describe('ReplicationStatusUpdater', () => {
129130
return done();
130131
});
131132
});
133+
134+
[
135+
{
136+
description: 'for an object with a null replication info',
137+
replicationInfo: null,
138+
replicationStatusToProcess: ['NEW'],
139+
expectedReplicationInfo: {
140+
status: 'PENDING',
141+
backends: [
142+
{
143+
site: 'aws-location',
144+
status: 'PENDING',
145+
dataStoreVersionId: '',
146+
},
147+
],
148+
content: ['METADATA', 'DATA'],
149+
destination: 'arn:aws:s3:::sourcebucket',
150+
storageClass: 'aws-location',
151+
role: 'arn:aws:iam::root:role/s3-replication-role',
152+
storageType: 'aws_s3',
153+
dataStoreVersionId: '',
154+
isNFS: null,
155+
},
156+
}, {
157+
description: 'for an object with empty replication info',
158+
replicationInfo: {
159+
status: '',
160+
backends: [],
161+
content: [],
162+
destination: '',
163+
storageClass: '',
164+
role: '',
165+
storageType: '',
166+
dataStoreVersionId: '',
167+
isNFS: null,
168+
},
169+
replicationStatusToProcess: ['NEW'],
170+
expectedReplicationInfo: {
171+
status: 'PENDING',
172+
backends: [
173+
{
174+
site: 'aws-location',
175+
status: 'PENDING',
176+
dataStoreVersionId: '',
177+
},
178+
],
179+
content: ['METADATA', 'DATA'],
180+
destination: 'arn:aws:s3:::sourcebucket',
181+
storageClass: 'aws-location',
182+
role: 'arn:aws:iam::root:role/s3-replication-role',
183+
storageType: 'aws_s3',
184+
dataStoreVersionId: '',
185+
isNFS: null,
186+
},
187+
}, {
188+
description: 'for an object with a failed replication',
189+
replicationInfo: {
190+
status: 'FAILED',
191+
backends: [
192+
{
193+
site: 'aws-location',
194+
status: 'FAILED',
195+
dataStoreVersionId: '',
196+
},
197+
],
198+
content: ['METADATA', 'DATA'],
199+
destination: 'arn:aws:s3:::sourcebucket',
200+
storageClass: 'aws-location',
201+
role: 'arn:aws:iam::root:role/s3-replication-role',
202+
storageType: 'aws_s3',
203+
dataStoreVersionId: '',
204+
isNFS: null,
205+
},
206+
replicationStatusToProcess: ['FAILED'],
207+
expectedReplicationInfo: {
208+
status: 'PENDING',
209+
backends: [
210+
{
211+
site: 'aws-location',
212+
status: 'PENDING',
213+
dataStoreVersionId: '',
214+
},
215+
],
216+
content: ['METADATA', 'DATA'],
217+
destination: 'arn:aws:s3:::sourcebucket',
218+
storageClass: 'aws-location',
219+
role: 'arn:aws:iam::root:role/s3-replication-role',
220+
storageType: 'aws_s3',
221+
dataStoreVersionId: '',
222+
isNFS: null,
223+
},
224+
}, {
225+
description: 'for an object with a completed replication',
226+
replicationInfo: {
227+
status: 'COMPLETED',
228+
backends: [
229+
{
230+
site: 'aws-location',
231+
status: 'COMPLETED',
232+
dataStoreVersionId: '',
233+
},
234+
],
235+
content: ['METADATA', 'DATA'],
236+
destination: 'arn:aws:s3:::sourcebucket',
237+
storageClass: 'aws-location',
238+
role: 'arn:aws:iam::root:role/s3-replication-role',
239+
storageType: 'aws_s3',
240+
dataStoreVersionId: '',
241+
isNFS: null,
242+
},
243+
replicationStatusToProcess: ['COMPLETED'],
244+
expectedReplicationInfo: {
245+
status: 'PENDING',
246+
backends: [
247+
{
248+
site: 'aws-location',
249+
status: 'PENDING',
250+
dataStoreVersionId: '',
251+
},
252+
],
253+
content: ['METADATA', 'DATA'],
254+
destination: 'arn:aws:s3:::sourcebucket',
255+
storageClass: 'aws-location',
256+
role: 'arn:aws:iam::root:role/s3-replication-role',
257+
storageType: 'aws_s3',
258+
dataStoreVersionId: '',
259+
isNFS: null,
260+
},
261+
}, {
262+
description: 'of a single site for an object with multiple replication destinations',
263+
replicationInfo: {
264+
status: 'FAILED',
265+
backends: [
266+
{
267+
site: 'azure-location',
268+
status: 'COMPLETED',
269+
dataStoreVersionId: '',
270+
},
271+
{
272+
site: 'aws-location',
273+
status: 'FAILED',
274+
dataStoreVersionId: '',
275+
},
276+
],
277+
content: ['METADATA', 'DATA'],
278+
destination: 'arn:aws:s3:::sourcebucket',
279+
storageClass: 'azure-location,aws-location',
280+
role: 'arn:aws:iam::root:role/s3-replication-role',
281+
storageType: 'azure,aws_s3',
282+
dataStoreVersionId: '',
283+
isNFS: null,
284+
},
285+
replicationStatusToProcess: ['FAILED'],
286+
expectedReplicationInfo: {
287+
status: 'PENDING',
288+
backends: [
289+
{
290+
site: 'azure-location',
291+
status: 'COMPLETED',
292+
dataStoreVersionId: '',
293+
}, {
294+
site: 'aws-location',
295+
status: 'PENDING',
296+
dataStoreVersionId: '',
297+
},
298+
],
299+
content: ['METADATA', 'DATA'],
300+
destination: 'arn:aws:s3:::sourcebucket',
301+
storageClass: 'azure-location,aws-location',
302+
role: 'arn:aws:iam::root:role/s3-replication-role',
303+
storageType: 'azure,aws_s3',
304+
dataStoreVersionId: '',
305+
isNFS: null,
306+
},
307+
}, {
308+
description: 'of a single non initialized site for an object with multiple replication destinations',
309+
replicationInfo: {
310+
status: 'FAILED',
311+
backends: [
312+
{
313+
site: 'azure-location',
314+
status: 'COMPLETED',
315+
dataStoreVersionId: '',
316+
},
317+
{
318+
site: 'azure-location-2',
319+
status: 'FAILED',
320+
dataStoreVersionId: '',
321+
},
322+
],
323+
content: ['METADATA', 'DATA'],
324+
destination: 'arn:aws:s3:::sourcebucket',
325+
storageClass: 'azure-location,azure-location-2',
326+
role: 'arn:aws:iam::root:role/s3-replication-role',
327+
storageType: 'azure,azure',
328+
dataStoreVersionId: '',
329+
isNFS: null,
330+
},
331+
replicationStatusToProcess: ['NEW'],
332+
expectedReplicationInfo: {
333+
status: 'PENDING',
334+
backends: [
335+
{
336+
site: 'azure-location',
337+
status: 'COMPLETED',
338+
dataStoreVersionId: '',
339+
}, {
340+
site: 'azure-location-2',
341+
status: 'FAILED',
342+
dataStoreVersionId: '',
343+
}, {
344+
site: 'aws-location',
345+
status: 'PENDING',
346+
dataStoreVersionId: '',
347+
},
348+
],
349+
content: ['METADATA', 'DATA'],
350+
destination: 'arn:aws:s3:::sourcebucket',
351+
storageClass: 'azure-location,azure-location-2,aws-location',
352+
role: 'arn:aws:iam::root:role/s3-replication-role',
353+
storageType: 'azure,azure,aws_s3',
354+
dataStoreVersionId: '',
355+
isNFS: null,
356+
},
357+
},
358+
].forEach(params => {
359+
it(`should trigger replication ${params.description}`, done => {
360+
crr.bb.getMetadata = jest.fn((p, cb) => {
361+
const objectMd = JSON.parse(getMetadataRes.Body);
362+
objectMd.replicationInfo = params.replicationInfo;
363+
cb(null, { Body: JSON.stringify(objectMd) });
364+
});
365+
crr.siteName = 'aws-location';
366+
crr.storageType = 'aws_s3';
367+
crr.replicationStatusToProcess = params.replicationStatusToProcess;
368+
crr.run(err => {
369+
assert.ifError(err);
370+
371+
expect(crr.s3.listObjectVersions).toHaveBeenCalledTimes(1);
372+
expect(crr.s3.getBucketReplication).toHaveBeenCalledTimes(1);
373+
expect(crr.bb.getMetadata).toHaveBeenCalledTimes(1);
374+
expect(crr.bb.putMetadata).toHaveBeenCalledTimes(1);
375+
expect(crr.bb.putMetadata).toHaveBeenCalledWith(
376+
expect.objectContaining({
377+
Body: expect.stringContaining(JSON.stringify(params.expectedReplicationInfo)),
378+
}),
379+
expect.any(Function),
380+
);
381+
382+
assert.strictEqual(crr._nProcessed, 1);
383+
assert.strictEqual(crr._nSkipped, 0);
384+
assert.strictEqual(crr._nUpdated, 1);
385+
assert.strictEqual(crr._nErrors, 0);
386+
return done();
387+
});
388+
});
389+
});
132390
});
133391

134392
describe('ReplicationStatusUpdater with specifics', () => {

0 commit comments

Comments
 (0)