Skip to content

Commit c3d110b

Browse files
authored
fix: download zip file to tmp BM-1417 (#1066)
#### Motivation Write zip to /tmp instead of streaming. Bump up lambda storage size #### Modification Use yauzl library
1 parent 4ce54ef commit c3d110b

File tree

4 files changed

+84
-158
lines changed

4 files changed

+84
-158
lines changed

package-lock.json

Lines changed: 35 additions & 129 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717
"@types/aws-lambda": "^8.10.83",
1818
"@types/geojson": "^7946.0.8",
1919
"@types/node": "^20.10.4",
20-
"@types/unzipper": "^0.10.11",
2120
"@types/yazl": "^3.3.0",
21+
"yazl": "^3.3.1",
22+
"@types/yauzl": "^2.10.3",
2223
"aws-cdk": "^2.1004.0",
2324
"constructs": "^10.4.2",
24-
"esbuild": "^0.19.9",
25-
"yazl": "^3.3.1"
25+
"esbuild": "^0.19.9"
2626
},
2727
"scripts": {
2828
"build": "tsc",
@@ -44,6 +44,6 @@
4444
"@linzjs/lambda": "^4.1.0",
4545
"aws-cdk-lib": "^2.184.1",
4646
"stac-ts": "^1.0.3",
47-
"unzipper": "^0.12.3"
47+
"yauzl": "^3.2.0"
4848
}
4949
}

src/infra/lds.export.cache.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import type { StackProps } from 'aws-cdk-lib';
2-
import { CfnOutput, Duration, Stack } from 'aws-cdk-lib';
2+
import { CfnOutput, Duration, Size, Stack } from 'aws-cdk-lib';
33
import { EventBus, Rule, Schedule } from 'aws-cdk-lib/aws-events';
44
import { LambdaFunction } from 'aws-cdk-lib/aws-events-targets';
55
import { Runtime } from 'aws-cdk-lib/aws-lambda';
@@ -31,6 +31,7 @@ export class LdsExportCache extends Stack {
3131
runtime: Runtime.NODEJS_22_X,
3232
timeout: Duration.minutes(10),
3333
memorySize: 4096,
34+
ephemeralStorageSize: Size.gibibytes(10),
3435
environment: {
3536
CACHE_PREFIX: `s3://${cacheBucket.bucketName}`,
3637
KX_API_KEY: kxApiKey.stringValue,

src/storage.ts

Lines changed: 43 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@ import { fsa } from '@chunkd/fs';
55
import { HashTransform } from '@chunkd/fs/build/src/hash.stream.js';
66
import { FsAwsS3 } from '@chunkd/fs-aws';
77
import type { LambdaRequest, LogType } from '@linzjs/lambda';
8+
import { rmSync } from 'fs';
89
import { Readable } from 'stream';
9-
import type { Entry, ParseStream } from 'unzipper';
10-
import { Parse } from 'unzipper';
10+
import type { Entry } from 'yauzl';
11+
import yauzl from 'yauzl';
1112

1213
import { CachePrefix, ExportLayerId, kx } from './config.ts';
1314
import type { KxDatasetExport, KxDatasetVersionDetail } from './kx.ts';
@@ -56,33 +57,51 @@ export async function extractAndWritePackage(
5657
datasetId: number,
5758
log: LogType,
5859
): Promise<void> {
59-
const unzipperParser: ParseStream = Parse();
60+
const tmpZipFile = fsa.toUrl(`/tmp/${datasetId}.zip`);
61+
try {
62+
await fsa.write(tmpZipFile, stream);
6063

61-
let writeProm: Promise<void> | undefined;
62-
let fileName: string | null = null;
64+
await new Promise<void>((resolve, reject) => {
65+
yauzl.open(tmpZipFile.pathname, { lazyEntries: true }, (err, zipFile) => {
66+
if (err) return reject(new Error(`Failed to open zip file ${tmpZipFile.pathname}`));
6367

64-
await stream
65-
.pipe(unzipperParser)
66-
.on('entry', (entry: Entry) => {
67-
log.debug({ datasetId, path: entry.path }, 'Export:Zip:File');
68-
if (entry.path.endsWith(PackageExtension)) {
69-
log.info({ datasetId, path: entry.path, target: targetFileUri.href }, 'Ingest:Read:Start');
70-
71-
if (fileName != null) throw Error(`Duplicate export package: ${fileName} vs ${entry.path}`);
72-
fileName = entry.path;
68+
zipFile.once('end', () => {
69+
reject(new Error('Did not find geopackage entry'));
70+
});
7371

74-
const gzipOut = entry.pipe(ht).pipe(createGzip({ level: 9 }));
75-
writeProm = fsa.write(targetFileUri, gzipOut, {
76-
contentType: 'application/geopackage+vnd.sqlite3',
77-
contentEncoding: 'gzip',
72+
zipFile.once('error', reject);
73+
74+
zipFile.on('entry', (entry: Entry) => {
75+
log.debug({ datasetId, path: entry.fileName }, 'Export:Zip:File');
76+
if (entry.fileName.endsWith(PackageExtension)) {
77+
log.info({ datasetId, path: entry.fileName, target: targetFileUri.href }, 'Ingest:Read:Start');
78+
79+
zipFile.openReadStream(entry, (err, readStream) => {
80+
if (err) return reject(new Error(`Failed to read zip entry ${entry.fileName}`));
81+
82+
const gzipOut = readStream.pipe(ht).pipe(createGzip({ level: 9 }));
83+
fsa
84+
.write(targetFileUri, gzipOut, {
85+
contentType: 'application/geopackage+vnd.sqlite3',
86+
contentEncoding: 'gzip',
87+
})
88+
.then(() => {
89+
zipFile.close();
90+
resolve();
91+
})
92+
.catch(reject);
93+
});
94+
} else {
95+
zipFile.readEntry();
96+
}
7897
});
79-
} else {
80-
entry.autodrain();
81-
}
82-
})
83-
.promise();
8498

85-
if (writeProm != null) await writeProm;
99+
zipFile.readEntry();
100+
});
101+
});
102+
} finally {
103+
rmSync(tmpZipFile);
104+
}
86105
}
87106

88107
/** Ingest the export into our cache */

0 commit comments

Comments
 (0)