|
1 | 1 | # dynamo-backup-to-s3 |
2 | 2 |
|
| 3 | + |
| 4 | + |
3 | 5 | ## Stream DynamoDB backups to S3. |
4 | 6 |
|
5 | 7 | dynamo-backup-to-s3 is a utility to stream DynamoDB data to S3. Since the data is streamed directly from DynamoDB to S3 it is suitable for copying large tables directly. Tables are copied in parallel. |
@@ -148,3 +150,200 @@ __Arguments__ |
148 | 150 | completed. If no error has occurred, the `callback` should be run without |
149 | 151 | arguments or with an explicit `null` argument. |
150 | 152 | * `callback(err)` - A callback which is called when the table has finished backing up, or an error occurs |
| 153 | + |
| 154 | +# dynamo-restore-from-s3 |
| 155 | + |
| 156 | + |
| 157 | + |
| 158 | +## Restore S3 backups back to Dynamo. |
| 159 | + |
| 160 | +`dynamo-restore-from-s3` is a utility that restores backups in S3 back to dynamo. It streams data down from S3 and throttles the download speed to match the rate of batch writes to Dynamo. |
| 161 | + |
| 162 | +It is suitable for restoring large tables without needing to write to disk or use a large amount of memory. Use it on an AWS EC2 instance for best results and to minimise network latency, this should yield restore speeds of around 15min per GB. |
| 163 | + |
| 164 | +Use `--overwrite` if the table already exists. Otherwise it will attempt to generate table on the fly. |
| 165 | + |
| 166 | +Can be run as a command line script or as an npm module. |
| 167 | + |
| 168 | +# Command line usage |
| 169 | + |
| 170 | +``` |
| 171 | + Usage: dynamo-restore-from-s3 [options] -s "s3://mybucket/path/to/file.json" -t "new-dynamodb-table" |
| 172 | +
|
| 173 | + Options: |
| 174 | +
|
| 175 | + -h, --help output usage information |
| 176 | + -V, --version output the version number |
| 177 | + -s, --source [path] Full S3 path to a JSON backup file (Required) |
| 178 | + -t, --table [name] Name of the Dynamo Table to restore to (Required) |
| 179 | + -o, --overwrite Table already exists, skip auto-create. Default is false. |
| 180 | + -c, --concurrency <requestcount> Number of concurrent requests & dynamo capacity units. Defaults to 200. |
| 181 | + -pk, --partitionkey [columnname] Name of Primary Partition Key. If not provided will try determine from backup. |
| 182 | + -sk, --sortkey [columnname] Name of Secondary Sort Key. Ignored unless --partitionkey is provided. |
| 183 | + -rc, --readcapacity <units> Read Units for new table (when finished). Default is 5. |
| 184 | + -wc, --writecapacity <units> Write Units for new table (when finished). Default is --concurrency. |
| 185 | + -sf, --stop-on-failure Stop process when the same batch fails to restore multiple times. Defaults to false. |
| 186 | + --aws-key <key> AWS access key. Will use AWS_ACCESS_KEY_ID env var if --aws-key not set |
| 187 | + --aws-secret <secret> AWS secret key. Will use AWS_SECRET_ACCESS_KEY env var if --aws-secret not set |
| 188 | + --aws-region <region> AWS region. Will use AWS_DEFAULT_REGION env var if --aws-region not set |
| 189 | +``` |
| 190 | + |
| 191 | +## Examples |
| 192 | + |
| 193 | +``` |
| 194 | +
|
| 195 | + # Restore over existing table (cmd.exe). |
| 196 | + > node ./bin/dynamo-restore-from-s3 -t acme-customers -s s3://my-backups/acme-customers.json --overwrite |
| 197 | +
|
| 198 | + # Restore over existing table (shell). |
| 199 | + $ ./bin/dynamo-restore-from-s3 -t acme-customers -s s3://my-backups/acme-customers.json --overwrite |
| 200 | +
|
| 201 | + # Restore over existing table, 1000 concurrent requests. Stop if any batch fails 1000 times. |
| 202 | + $ ./bin/dynamo-restore-from-s3 -t acme-customers -c 1000 -s s3://my-backups/acme-customers.json --overwrite -sf |
| 203 | +
|
| 204 | + # Restore over existing table, 1000 concurrent requests. When finished, set read capacity to 50 and write capacity to 10 (both needed). |
| 205 | + $ ./bin/dynamo-restore-from-s3 -t acme-customers -c 1000 -s s3://my-backups/acme-customers.json --overwrite --readcapacity 50 --writecapacity 10 |
| 206 | +
|
| 207 | + # Auto-generate table (determine PK from backup). |
| 208 | + $ ./bin/dynamo-restore-from-s3 -t acme-customers -s s3://my-backups/acme-customers.json |
| 209 | +
|
| 210 | + # Auto-generate table with partition and sort key. |
| 211 | + $ ./bin/dynamo-restore-from-s3 -t acme-orders -s s3://my-backups/acme-orders.json -pk customerId -sk createDate |
| 212 | +
|
| 213 | + # Auto-generate table, defined PK. Concurrency 2000 (~ 2GB backup). |
| 214 | + $ ./bin/dynamo-restore-from-s3 -t acme-orders -pk orderId -c 2000 -s s3://my-backups/acme-orders.json |
| 215 | +
|
| 216 | + # Auto-generate table. 2000 write units during restore. When finished set 50 write units and 100 write units (both needed). |
| 217 | + $ ./bin/dynamo-restore-from-s3 -t acme-orders -c 2000 -s s3://my-backups/acme-orders.json --readcapacity 100 --writecapacity 50 |
| 218 | +
|
| 219 | + # Auto-generate table. Concurrency 50 (10 MB backup or less). |
| 220 | + $ ./bin/dynamo-restore-from-s3 -t acme-orders -c 50 -s s3://my-backups/acme-orders.json |
| 221 | +
|
| 222 | + # Auto-generate table. Concurrency 50. Stop process if any batch fails 50 times. |
| 223 | + $ ./bin/dynamo-restore-from-s3 -t acme-orders -c 50 -sf -s s3://my-backups/acme-orders.json |
| 224 | +
|
| 225 | +``` |
| 226 | + |
| 227 | +# npm module usage |
| 228 | + |
| 229 | +## Quick Example |
| 230 | + |
| 231 | +``` |
| 232 | +var DynamoRestore = require('dynamo-backup-to-s3').Restore; |
| 233 | +
|
| 234 | +var restore = new DynamoRestore({ |
| 235 | + source: 's3://my-backups/DynamoDB-backup-2016-09-28-15-36-40/acme-customers-prod.json', |
| 236 | + table: 'acme-customers-dev', |
| 237 | + overwrite: true, |
| 238 | + concurrency: 200, // for large restores use 1 unit per MB as a rule of thumb (ie 1000 for 1GB restore) |
| 239 | + awsAccessKey: /* AWS access key */, |
| 240 | + awsSecretKey: /* AWS secret key */, |
| 241 | + awsRegion: /* AWS region */ |
| 242 | +}); |
| 243 | +
|
| 244 | +restore.on('error', function(message) { |
| 245 | + console.log(message); |
| 246 | + process.exit(-1); |
| 247 | +}); |
| 248 | +
|
| 249 | +restore.on('warning', function(message) { |
| 250 | + console.log(message); |
| 251 | +}); |
| 252 | +
|
| 253 | +restore.on('send-batch', function(batches, requests, streamMeta) { |
| 254 | + console.log('Batch sent. %d in flight. %d Mb remaining to download...', requests, streamMeta.RemainingLength / (1024 * 1024)); |
| 255 | +}); |
| 256 | +
|
| 257 | +restore.run(function() { |
| 258 | + console.log('Finished restoring DynamoDB table'); |
| 259 | +}); |
| 260 | +
|
| 261 | +``` |
| 262 | + |
| 263 | +### Constructor |
| 264 | + |
| 265 | +``` |
| 266 | +var options = { |
| 267 | + source: /* path to json file in s3 bucket, should start with s3://bucketname/... */, |
| 268 | + table: /* name of dynamo table, will be created on the fly unless overwritten */, |
| 269 | + overwrite: /* true/false if table already exits (defaults to false) */ |
| 270 | + concurrency: /* number of concurrent requests (and dynamo write capacity units) */, |
| 271 | + partitionkey: /* name of partition key column */, |
| 272 | + sortkey: /* name of secondary (sort) key column */ , |
| 273 | + readcapacity: /* number of read capacity units (when restore finishes) */, |
| 274 | + writecapacity: /* number of write capacity units (when restore finishes) */, |
| 275 | + stopOnFailure: /* true/false should a single failed batch stop the whole restore job? */, |
| 276 | + awsAccessKey: /* AWS access key */, |
| 277 | + awsSecretKey: /* AWS secret key */, |
| 278 | + awsRegion: /* AWS region */ |
| 279 | +}; |
| 280 | +
|
| 281 | +var restore = new DynamoBackup.Restore(options); |
| 282 | +``` |
| 283 | + |
| 284 | +## Events |
| 285 | + |
| 286 | +### error |
| 287 | + |
| 288 | +Raised when there is a fatal error restoring a table |
| 289 | + |
| 290 | +__Example__ |
| 291 | +``` |
| 292 | +restore.on('error', function() { |
| 293 | + console.log('Error!! + ' + message); |
| 294 | +}); |
| 295 | +``` |
| 296 | + |
| 297 | +### warning |
| 298 | + |
| 299 | +Raised when there is a warning restoring a table. Normally this will be a failed batch. |
| 300 | + |
| 301 | +__Example__ |
| 302 | +``` |
| 303 | +restore.on('warning', function() { |
| 304 | + console.log('Warning!! + ' + message); |
| 305 | +}); |
| 306 | +``` |
| 307 | + |
| 308 | +### send-batch |
| 309 | + |
| 310 | +Raised whenever a batch is sent to Dynamo. Useful for tracking progress. |
| 311 | + |
| 312 | +__Example__ |
| 313 | +``` |
| 314 | +restore.on('send-batch', function(batches, requests, streamMeta) { |
| 315 | + console.log('Batch Sent'); |
| 316 | + console.log('Num cached batches: ', batches); |
| 317 | + console.log('Num requests in flight: ', requests); |
| 318 | + console.log('Stream metadata:, JSON.stringify(streamMeta)); |
| 319 | +}); |
| 320 | +``` |
| 321 | +### finish |
| 322 | +Raised when the restore process is finished. |
| 323 | + |
| 324 | +__Example__ |
| 325 | +``` |
| 326 | +restore.on('finish', function() { |
| 327 | + console.log('All done!'); |
| 328 | +}); |
| 329 | +``` |
| 330 | + |
| 331 | +## Functions |
| 332 | + |
| 333 | +### run |
| 334 | + |
| 335 | +Restores table with options as defined in constructor. |
| 336 | + |
| 337 | +__Arguments__ |
| 338 | + |
| 339 | +* `callback(err)` - callback to execute when restore job is complete. First argument exists only if there is an error. |
| 340 | + |
| 341 | +__Example__ |
| 342 | +``` |
| 343 | +restore.run(function(error) { |
| 344 | + if (error) { |
| 345 | + return console.log(error); |
| 346 | + } |
| 347 | + console.log('All done!'); |
| 348 | +}); |
| 349 | +``` |
0 commit comments