Skip to content

Commit 7d22968

Browse files
committed
re-factore sync controller
1 parent 5f0bfd8 commit 7d22968

File tree

6 files changed

+168
-176
lines changed

6 files changed

+168
-176
lines changed

app/src/components/utils.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,17 @@ const utils = {
6161
else return AuthMode.FULLAUTH; // basicAuth && oidcAuth
6262
},
6363

64+
/**
65+
* @function formatS3KeyForCompare
66+
* Format S3 key-prefixes for comparison with bucket.key in COMS db
67+
* @param {string} k S3 key prefix. example: photos/docs/
68+
* @returns {string} provided key prefix without trailing slash
69+
*/
70+
formatS3KeyForCompare(k) {
71+
let key = k.substr(0, k.lastIndexOf('/')); // remove trailing slash and file name
72+
return key || '/'; // set empty key to '/' to match convention in COMS db
73+
},
74+
6475
/**
6576
* @function getBucket
6677
* Acquire core S3 bucket credential information from database or configuration

app/src/controllers/sync.js

Lines changed: 121 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
const { NIL: SYSTEM_USER } = require('uuid');
22

33
const errorToProblem = require('../components/errorToProblem');
4-
const { addDashesToUuid, getCurrentIdentity, isAtPath, isTruthy } = require('../components/utils');
4+
const { addDashesToUuid, getCurrentIdentity, formatS3KeyForCompare, isAtPath } = require('../components/utils');
55
const utils = require('../db/models/utils');
6+
const log = require('../components/log')(module.filename);
7+
68
const {
79
bucketPermissionService,
810
bucketService,
@@ -31,43 +33,101 @@ const controller = {
3133
*/
3234
async syncBucketRecursive(req, res, next) {
3335
try {
34-
const userId = await userService.getCurrentUserId(getCurrentIdentity(req.currentUser, SYSTEM_USER), SYSTEM_USER);
36+
// Wrap all sql operations in a single transaction
37+
const response = await utils.trxWrapper(async (trx) => {
38+
39+
// curren userId
40+
const userId = await userService.getCurrentUserId(
41+
getCurrentIdentity(req.currentUser, SYSTEM_USER),
42+
SYSTEM_USER
43+
);
44+
// parent bucket
45+
const bucketId = addDashesToUuid(req.params.bucketId);
46+
const parentBucket = await bucketService.read(bucketId);
47+
48+
// current user's permissions on parent bucket (folder)
49+
const currentUserParentBucketPerms = userId !== SYSTEM_USER ? (await bucketPermissionService.searchPermissions({
50+
bucketId: parentBucket.bucketId,
51+
userId: userId
52+
})).map(p => p.permCode) : [];
53+
54+
/**
55+
* sync (ie create or delete) bucket records in COMS db to match 'folders' (S3 key prefixes) that exist in S3
56+
*/
57+
// parent + child bucket records already in COMS db
58+
const dbChildBuckets = await bucketService.searchChildBuckets(parentBucket);
59+
let dbBuckets = [parentBucket].concat(dbChildBuckets);
60+
// 'folders' that exist below (and including) the parent 'folder' in S3
61+
const s3Response = await storageService.listAllObjectVersions({ bucketId: bucketId, precisePath: false });
62+
const s3Keys = [...new Set([
63+
...s3Response.DeleteMarkers.map(object => formatS3KeyForCompare(object.Key)),
64+
...s3Response.Versions.map(object => formatS3KeyForCompare(object.Key)),
65+
])];
66+
67+
const syncedBuckets = await this.syncBucketRecords(
68+
dbBuckets,
69+
s3Keys,
70+
parentBucket,
71+
// assign current user's permissions on parent bucket to new sub-folders (buckets)
72+
currentUserParentBucketPerms,
73+
trx
74+
);
75+
76+
/**
77+
* Queue objects in all the folders for syncing
78+
*/
79+
return await this.queueObjectRecords(syncedBuckets, s3Response, userId, trx);
80+
});
81+
82+
// return number of jobs inserted
83+
res.status(202).json(response);
84+
} catch (e) {
85+
next(errorToProblem(SERVICE, e));
86+
}
87+
},
88+
89+
/**
90+
* @function syncBucketSingle
91+
* Synchronizes objects found at the Key of the given bucket, ignoring subfolders and files after the next delimiter
92+
* @param {object} req Express request object
93+
* @param {object} res Express response object
94+
* @param {function} next The next callback function
95+
* @returns {function} Express middleware function
96+
*/
97+
async syncBucketSingle(req, res, next) {
98+
try {
3599
const bucketId = addDashesToUuid(req.params.bucketId);
36-
const parentBucket = await bucketService.read(bucketId);
37-
// current user's permissions on parent folder
38-
const currentUserParentBucketPerms = userId !== SYSTEM_USER ? (await bucketPermissionService.searchPermissions({
39-
bucketId: parentBucket.bucketId,
40-
userId: userId
41-
})).map(p => p.permCode) : [];
100+
const bucket = await bucketService.read(bucketId);
101+
const userId = await userService.getCurrentUserId(getCurrentIdentity(req.currentUser, SYSTEM_USER), SYSTEM_USER);
42102

43-
/**
44-
* get the two following lists for comparison:
45-
*/
46-
// get parent + child bucket records already in COMS db
47-
const dbChildBuckets = await bucketService.searchChildBuckets(parentBucket);
48-
let dbBuckets = [parentBucket].concat(dbChildBuckets);
49-
50-
// get 'folders' that exist below (and including) the parent 'folder'
51-
const s3Response = await storageService.listAllObjectVersions({ bucketId: bucketId, precisePath: false });
52-
const formatS3KeyForCompare = (k => {
53-
let key = k.substr(0, k.lastIndexOf('/')); // remove trailing slash and file name
54-
return key ? key : '/'; // if parent is root set as '/' to match convention in COMS db
103+
const s3Objects = await storageService.listAllObjectVersions({ bucketId: bucketId, filterLatest: true });
104+
105+
const response = await utils.trxWrapper(async (trx) => {
106+
return this.queueObjectRecords([bucket], s3Objects, userId, trx);
55107
});
56-
const s3Keys = [...new Set([
57-
...s3Response.DeleteMarkers.map(object => formatS3KeyForCompare(object.Key)),
58-
...s3Response.Versions.map(object => formatS3KeyForCompare(object.Key)),
59-
])];
60-
// console.log('s3Keys', s3Keys);
61108

109+
res.status(202).json(response);
110+
} catch (e) {
111+
next(errorToProblem(SERVICE, e));
112+
}
113+
},
62114

63-
/**
64-
* compare each list and sync (ie create or delete) bucket records in COMS db to match 'folders' that exist in S3
65-
* note: we assign all permissions to all users that created parent bucket
66-
*/
115+
/**
116+
* @function syncBucketRecords
117+
* Synchronizes (creates / prunes) COMS db bucket records for each 'directry' found in S3
118+
* @param {object[]} Array of Bucket models - bucket records already in COMS db before syncing
119+
* @param {string[]} s3Keys Array of key prefixes from S3 representing 'directories'
120+
* @param {object} Bucket model for the COMS db bucket record of parent bucket
121+
* @param {string[]} currentUserParentBucketPerms Array of PermCodes to add to NEW buckets
122+
* @param {object} [trx] An Objection Transaction object
123+
* @returns {string[]} And array of bucketId's for bucket records in COMS db
124+
*/
125+
async syncBucketRecords(dbBuckets, s3Keys, parentBucket, currentUserParentBucketPerms, trx) {
126+
try {
67127
// delete buckets not found in S3 from COMS db
68128
const oldDbBuckets = dbBuckets.filter(b => !s3Keys.includes(b.key));
69129
for (const dbBucket of oldDbBuckets) {
70-
await bucketService.delete(dbBucket.bucketId);
130+
await bucketService.delete(dbBucket.bucketId, trx);
71131
dbBuckets = dbBuckets.filter(b => b.bucketId != dbBucket.bucketId);
72132
}
73133
// Create buckets only found in S3 in COMS db
@@ -87,24 +147,34 @@ const controller = {
87147
// ..so copy all their perms to NEW subfolders
88148
permCodes: currentUserParentBucketPerms
89149
};
90-
const dbResponse = await bucketService.create(data);
150+
151+
const dbResponse = await bucketService.create(data, trx);
91152
dbBuckets.push(dbResponse);
92153
}
154+
return dbBuckets;
155+
}
156+
catch (err) {
157+
log.error(err.message, { function: 'syncBucketRecords' });
158+
throw err;
159+
}
160+
},
93161

94-
/**
95-
* Sync all the objects found in all the parent and child 'folders'.
96-
* by comparing objects in COMS db with the keys of the object found in S3
97-
*/
162+
/**
163+
* @function queueObjectRecords
164+
* Synchronizes (creates / prunes) COMS db object records with state in S3
165+
* @param {object[]} dbBuckets Array of Bucket models in COMS db
166+
* @param {object} s3Objects The response from storage.listAllObjectVersions - and
167+
* object containg an array of DeleteMarkers and Versions
168+
* @param {string} userId the guid of current user
169+
* @param {object} [trx] An Objection Transaction object
170+
* @returns {string[]} And array of bucketId's for bucket records in COMS db
171+
*/
172+
async queueObjectRecords(dbBuckets, s3Objects, userId, trx) {
173+
try {
98174
// get all objects in existing buckets in all 'buckets' in COMS db
99175
const dbObjects = await objectService.searchObjects({
100176
bucketId: dbBuckets.map(b => b.bucketId)
101-
});
102-
// get all objects below parent 'key' in S3
103-
const s3Objects = s3Response;
104-
105-
console.log('s3Objects', s3Objects);
106-
107-
177+
}, trx);
108178
/**
109179
* merge arrays of objects from COMS db and S3 to form an array of jobs with format:
110180
* [ { path: '/images/img3.jpg', bucketId: '123' }, { path: '/images/album1/img1.jpg', bucketId: '456' } ]
@@ -145,66 +215,19 @@ const controller = {
145215
const jobs = [...new Map(objects.map(o => [o.path, o])).values()];
146216

147217
// create jobs in COMS db object_queue for each object
148-
const response = await utils.trxWrapper(async (trx) => {
149-
// update 'lastSyncRequestedDate' value in COMS db for each bucket
150-
for (const bucket of dbBuckets) {
151-
await bucketService.update({
152-
bucketId: bucket.bucketId,
153-
userId: userId,
154-
lastSyncRequestedDate: new Date().toISOString()
155-
}, trx);
156-
}
157-
return await objectQueueService.enqueue({ jobs: jobs }, trx);
158-
});
159-
// return number of jobs inserted
160-
res.status(202).json(response);
161-
} catch (e) {
162-
next(errorToProblem(SERVICE, e));
163-
}
164-
},
165-
166-
/**
167-
* @function syncBucketSingle
168-
* Synchronizes objects found at the Key of the given bucket, ignoring subfolders and files after the next delimiter
169-
* @param {object} req Express request object
170-
* @param {object} res Express response object
171-
* @param {function} next The next callback function
172-
* @returns {function} Express middleware function
173-
*/
174-
async syncBucketSingle(req, res, next) {
175-
try {
176-
const bucketId = addDashesToUuid(req.params.bucketId);
177-
const userId = await userService.getCurrentUserId(getCurrentIdentity(req.currentUser, SYSTEM_USER), SYSTEM_USER);
178-
179-
const [dbResponse, s3Response] = await Promise.all([
180-
objectService.searchObjects({ bucketId: bucketId }),
181-
storageService.listAllObjectVersions({ bucketId: bucketId, filterLatest: true })
182-
]);
183-
184-
// Aggregate and dedupe all file paths to consider
185-
const jobs = [...new Set([
186-
...dbResponse.data.map(object => object.path),
187-
...s3Response.DeleteMarkers.map(object => object.Key),
188-
...s3Response.Versions.map(object => object.Key)
189-
])].map(path => ({
190-
path: path,
191-
bucketId: bucketId
192-
// adding current userId will give ALL perms on new objects
193-
// and set createdBy on all downstream resources (versions, tags, meta)
194-
// userId: userId
195-
}));
196-
197-
const response = await utils.trxWrapper(async (trx) => {
218+
// update 'lastSyncRequestedDate' value in COMS db for each bucket
219+
for (const bucket of dbBuckets) {
198220
await bucketService.update({
199-
bucketId: bucketId,
221+
bucketId: bucket.bucketId,
200222
userId: userId,
201223
lastSyncRequestedDate: new Date().toISOString()
202224
}, trx);
203-
return await objectQueueService.enqueue({ jobs: jobs }, trx);
204-
});
205-
res.status(202).json(response);
206-
} catch (e) {
207-
next(errorToProblem(SERVICE, e));
225+
}
226+
return await objectQueueService.enqueue({ jobs: jobs }, trx);
227+
}
228+
catch (err) {
229+
log.error(err.message, { function: 'queueObjectRecords' });
230+
throw err;
208231
}
209232
},
210233

app/src/docs/v1.api-spec.yaml

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,44 +22,44 @@ tags:
2222
description: Operations for managing S3 Bucket(s).
2323
externalDocs:
2424
url: >-
25-
https://github.com/bcgov/common-object-management-service/wiki/Endpoint-Notes#bucket
25+
https://developer.gov.bc.ca/docs/default/component/common-object-management-service/Endpoint-Notes/#bucket
2626
- name: Metadata
2727
description: Operations for managing Metadata for S3 Objects.
2828
externalDocs:
2929
url: >-
30-
https://github.com/bcgov/common-object-management-service/wiki/Endpoint-Notes#metadata
30+
https://developer.gov.bc.ca/docs/default/component/common-object-management-service/Endpoint-Notes/#metadata
3131
- name: Object
3232
description: Operations directly influencing an S3 Object.
3333
externalDocs:
3434
url: >-
35-
https://github.com/bcgov/common-object-management-service/wiki/Endpoint-Notes#object
35+
https://developer.gov.bc.ca/docs/default/component/common-object-management-service/Endpoint-Notes/#object
3636
- name: Permission
3737
description: Operations for managing S3 permissions.
3838
externalDocs:
3939
url: >-
40-
https://github.com/bcgov/common-object-management-service/wiki/Endpoint-Notes#permission
40+
https://developer.gov.bc.ca/docs/default/component/common-object-management-service/Endpoint-Notes/#permissions
4141
- name: Sync
4242
description: >-
4343
Operations for syncing existing buckets and objects. Ensures that external
4444
S3 modifications are eventually correctly tracked by COMS.
4545
externalDocs:
4646
url: >-
47-
https://github.com/bcgov/common-object-management-service/wiki/Endpoint-Notes#sync
47+
https://developer.gov.bc.ca/docs/default/component/common-object-management-service/Endpoint-Notes/#sync
4848
- name: Tagging
4949
description: Operations for managing Tags for S3 Objects.
5050
externalDocs:
5151
url: >-
52-
https://github.com/bcgov/common-object-management-service/wiki/Endpoint-Notes#tag
52+
https://developer.gov.bc.ca/docs/default/component/common-object-management-service/Endpoint-Notes/#tag
5353
- name: User
5454
description: Operations to list valid queryable users and identity providers.
5555
externalDocs:
5656
url: >-
57-
https://github.com/bcgov/common-object-management-service/wiki/Endpoint-Notes#user
57+
https://developer.gov.bc.ca/docs/default/component/common-object-management-service/Endpoint-Notes/#user
5858
- name: Version
5959
description: Operations to list object versions.
6060
externalDocs:
6161
url: >-
62-
https://github.com/bcgov/common-object-management-service/wiki/Endpoint-Notes
62+
hhttps://developer.gov.bc.ca/docs/default/component/common-object-management-service/Endpoint-Notes/#versions
6363
paths:
6464
/bucket:
6565
put:
@@ -297,6 +297,7 @@ paths:
297297
- $ref: "#/components/parameters/Header-S3AccessMode-Bucket"
298298
- $ref: "#/components/parameters/Header-S3AccessMode-Endpoint"
299299
- $ref: "#/components/parameters/Path-BucketId"
300+
- $ref: "#/components/parameters/Query-Recursive"
300301
responses:
301302
"202":
302303
$ref: "#/components/responses/AddedQueueLength"
@@ -1386,9 +1387,10 @@ paths:
13861387
get:
13871388
summary: Synchronize with the default bucket
13881389
description: >-
1389-
Synchronize the contents of the default bucket with COMS so that it can
1390-
track and manage objects that are already in the bucket. This avoids the
1391-
need to re-upload preexisting files through the COMS API. This endpoint
1390+
Synchronize the contents of the default bucket (specified in environent
1391+
variables: `OBJECTSTORAGE_*`) with with current state in object storage.
1392+
This is a 'recursive' sync which will sync objects in all sub-folders.
1393+
Syncing avoids the need to re-upload preexisting files through the COMS API. This endpoint
13921394
does not guarantee immediately synchronized results. Synchronization
13931395
latency will be affected by the remaining number of objects awaiting
13941396
synchronization. This endpoint updates the last known successful
@@ -2045,6 +2047,15 @@ components:
20452047
- bucketId
20462048
- name
20472049
example: name
2050+
Query-Recursive:
2051+
in: query
2052+
name: recursive
2053+
description: >-
2054+
Whether or not to also sync sub-folders inside the provided bucket/folder
2055+
schema:
2056+
type: boolean
2057+
default: false
2058+
example: 1
20482059
Query-S3VersionId:
20492060
in: query
20502061
name: s3VersionId

app/src/routes/v1/sync.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ router.use(requireSomeAuth);
1010
/** Synchronizes the default bucket */
1111
router.get('/', requireBasicAuth, (req, res, next) => {
1212
req.params.bucketId = null;
13-
syncController.syncBucket(req, res, next);
13+
syncController.syncBucketRecursive(req, res, next);
1414
});
1515

1616
/** Check sync queue size */

0 commit comments

Comments
 (0)