Skip to content

Commit d84662b

Browse files
authored
ETag for entities.csv (getodk#776)
* fix: it should return md5 of timestamp of latest entity * fix: coalesce of updatedAt and createdAt for lastEntity timestamp * Enhance: Return etag in entity.csv response
1 parent 529597c commit d84662b

File tree

4 files changed

+143
-18
lines changed

4 files changed

+143
-18
lines changed

lib/formats/openrosa.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ const formManifestTemplate = template(200, `<?xml version="1.0" encoding="UTF-8"
7676
const formManifest = (data) => formManifestTemplate(mergeRight(data, {
7777
attachments: data.attachments.map((attachment) =>
7878
attachment.with({ hasSource: attachment.blobId || attachment.datasetId,
79-
md5: attachment.blobId ? attachment.aux.openRosa.md5 : md5sum(attachment.aux.openRosa.dsUpdatedAt ?? '1970-01-01'),
79+
md5: attachment.blobId ? attachment.aux.openRosa.md5 : md5sum(attachment.aux.openRosa.dsUpdatedAt?.toISOString() ?? '1970-01-01'),
8080
urlName: encodeURIComponent(attachment.name) }))
8181
}));
8282

lib/model/query/datasets.js

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ const pickFrameFields = (frame, obj) => compose(
2424
)(obj);
2525

2626
const makeHierarchy = reduce((result, item) => {
27-
const dataset = new Dataset(pickFrameFields(Dataset, item));
27+
const dataset = new Dataset(pickFrameFields(Dataset, item)).with({ lastEntity: item.lastEntity });
2828
const property = new Dataset.Property(pickFrameFields(Dataset.Property, item));
2929

3030
return {
@@ -50,7 +50,7 @@ const _getAllByProjectId = (fields, extend, options) => sql`
5050
SELECT ${fields} FROM datasets
5151
${extend|| sql`
5252
LEFT JOIN (
53-
SELECT "datasetId", count(1) "entities", max("createdAt") "lastEntity"
53+
SELECT "datasetId", COUNT(1) "entities", MAX(COALESCE("updatedAt", "createdAt")) "lastEntity"
5454
FROM entities e
5555
GROUP BY "datasetId"
5656
) stats on stats."datasetId" = datasets.id`}
@@ -130,6 +130,11 @@ const _getByNameSql = ((fields, datasetName, projectId, includeForms) => sql`
130130
${fields}
131131
FROM
132132
datasets
133+
LEFT OUTER JOIN (
134+
SELECT "datasetId", COUNT(1) "entities", MAX(COALESCE("updatedAt", "createdAt")) "lastEntity"
135+
FROM entities e
136+
GROUP BY "datasetId"
137+
) stats on stats."datasetId" = datasets.id
133138
LEFT OUTER JOIN ds_properties ON
134139
datasets.id = ds_properties."datasetId" AND ds_properties."publishedAt" IS NOT NULL
135140
${includeForms ? sql`
@@ -198,7 +203,7 @@ const getById = (datasetId) => ({ all }) =>
198203

199204
// Returns only published dataset with its published properties
200205
const getByName = (datasetName, projectId) => ({ all }) =>
201-
all(_getByNameSql(unjoiner(Dataset, Dataset.Property).fields, datasetName, projectId))
206+
all(_getByNameSql(unjoiner(Dataset, Dataset.Property, Dataset.Extended).fields, datasetName, projectId))
202207
.then(makeHierarchy)
203208
.then(asArray)
204209
.then(nth(0))

lib/resources/datasets.js

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ const sanitize = require('sanitize-filename');
1111
const { getOrNotFound } = require('../util/promise');
1212
const { streamEntityCsv } = require('../data/entity');
1313
const { contentDisposition } = require('../util/http');
14+
const { md5sum } = require('../util/crypto');
1415

1516
module.exports = (service, endpoint) => {
1617
service.get('/projects/:id/datasets', endpoint(({ Projects, Datasets }, { auth, params, queryOptions }) =>
@@ -26,18 +27,26 @@ module.exports = (service, endpoint) => {
2627
.then(() => Datasets.getDatasetMetadata(params.name, params.projectId)
2728
.then(getOrNotFound))));
2829

29-
service.get('/projects/:projectId/datasets/:name/entities.csv', endpoint(({ Datasets, Entities, Projects }, { params, auth }, _, response) =>
30-
Projects.getById(params.projectId)
31-
.then(getOrNotFound)
32-
.then((project) => auth.canOrReject('entity.list', project))
33-
.then(() => Datasets.getByName(params.name, params.projectId)
34-
.then(getOrNotFound)
35-
.then((dataset) => Entities.streamForExport(dataset.id)
36-
.then((entities) => {
37-
const filename = sanitize(dataset.name);
38-
const extension = 'csv';
39-
response.append('Content-Disposition', contentDisposition(`${filename}.${extension}`));
40-
response.append('Content-Type', 'text/csv');
41-
return streamEntityCsv(entities, dataset.properties);
42-
})))));
30+
service.get('/projects/:projectId/datasets/:name/entities.csv', endpoint(async ({ Datasets, Entities, Projects }, { params, auth }, request, response) => {
31+
const project = await Projects.getById(params.projectId).then(getOrNotFound);
32+
await auth.canOrReject('entity.list', project);
33+
34+
const dataset = await Datasets.getByName(params.name, params.projectId).then(getOrNotFound);
35+
36+
// Etag logic inspired from https://stackoverflow.com/questions/72334843/custom-computed-etag-for-express-js/72335674#72335674
37+
const serverEtag = `"${md5sum(dataset.lastEntity?.toISOString() ?? '1970-01-01')}"`;
38+
const clientEtag = request.get('If-None-Match');
39+
if (clientEtag?.includes(serverEtag)) { // nginx weakens Etag when gzip is used, so clientEtag is like W/"4e9f0c7e9a8240..."
40+
response.status(304);
41+
return;
42+
}
43+
const entities = await Entities.streamForExport(dataset.id);
44+
const filename = sanitize(dataset.name);
45+
const extension = 'csv';
46+
response.append('Content-Disposition', contentDisposition(`${filename}.${extension}`));
47+
response.append('Content-Type', 'text/csv');
48+
response.set('ETag', serverEtag);
49+
return streamEntityCsv(entities, dataset.properties);
50+
51+
}));
4352
};

test/integration/api/datasets.js

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,71 @@ describe('datasets and entities', () => {
327327
.expect(200)
328328
.then(() => asAlice.get('/v1/projects/1/datasets/people/entities.csv')
329329
.expect(404)))));
330+
331+
it('should return csv file with data', testService(async (service, container) => {
332+
const asAlice = await service.login('alice');
333+
334+
await asAlice.post('/v1/projects/1/forms?publish=true')
335+
.send(testData.forms.simpleEntity)
336+
.set('Content-Type', 'application/xml')
337+
.expect(200);
338+
339+
await asAlice.post('/v1/projects/1/forms/simpleEntity/submissions')
340+
.send(testData.instances.simpleEntity.one)
341+
.set('Content-Type', 'application/xml')
342+
.expect(200);
343+
344+
await asAlice.patch('/v1/projects/1/forms/simpleEntity/submissions/one')
345+
.send({ reviewState: 'approved' })
346+
.expect(200);
347+
348+
await exhaust(container);
349+
350+
const result = await asAlice.get('/v1/projects/1/datasets/people/entities.csv')
351+
.expect(200)
352+
.then(r => r.text);
353+
354+
result.should.be.eql(
355+
'name,label,first_name,age\n' +
356+
'12345678-1234-4123-8234-123456789abc,Alice (88),Alice,88\n'
357+
);
358+
359+
}));
360+
361+
it('should return 304 content not changed if ETag matches', testService(async (service, container) => {
362+
const asAlice = await service.login('alice');
363+
364+
await asAlice.post('/v1/projects/1/forms?publish=true')
365+
.send(testData.forms.simpleEntity)
366+
.set('Content-Type', 'application/xml')
367+
.expect(200);
368+
369+
await asAlice.post('/v1/projects/1/forms/simpleEntity/submissions')
370+
.send(testData.instances.simpleEntity.one)
371+
.set('Content-Type', 'application/xml')
372+
.expect(200);
373+
374+
await asAlice.patch('/v1/projects/1/forms/simpleEntity/submissions/one')
375+
.send({ reviewState: 'approved' })
376+
.expect(200);
377+
378+
await exhaust(container);
379+
380+
const result = await asAlice.get('/v1/projects/1/datasets/people/entities.csv')
381+
.expect(200);
382+
383+
result.text.should.be.eql(
384+
'name,label,first_name,age\n' +
385+
'12345678-1234-4123-8234-123456789abc,Alice (88),Alice,88\n'
386+
);
387+
388+
const etag = result.get('ETag');
389+
390+
await asAlice.get('/v1/projects/1/datasets/people/entities.csv')
391+
.set('If-None-Match', etag)
392+
.expect(304);
393+
}));
394+
330395
});
331396

332397
describe('projects/:id/datasets/:name GET', () => {
@@ -499,6 +564,52 @@ describe('datasets and entities', () => {
499564
</manifest>`);
500565
})))));
501566

567+
it('should return md5 of last Entity timestamp in the manifest', testService(async (service, container) => {
568+
const asAlice = await service.login('alice');
569+
570+
await asAlice.post('/v1/projects/1/forms?publish=true')
571+
.send(testData.forms.simpleEntity)
572+
.set('Content-Type', 'application/xml')
573+
.expect(200);
574+
575+
await asAlice.post('/v1/projects/1/forms/simpleEntity/submissions')
576+
.send(testData.instances.simpleEntity.one)
577+
.set('Content-Type', 'application/xml')
578+
.expect(200);
579+
580+
await asAlice.patch('/v1/projects/1/forms/simpleEntity/submissions/one')
581+
.send({ reviewState: 'approved' })
582+
.expect(200);
583+
584+
await exhaust(container);
585+
586+
const result = await asAlice.get('/v1/projects/1/datasets/people/entities.csv')
587+
.expect(200);
588+
589+
const etag = result.get('ETag');
590+
591+
await asAlice.post('/v1/projects/1/forms?publish=true')
592+
.send(testData.forms.withAttachments.replace(/goodone/g, 'people'))
593+
.set('Content-Type', 'application/xml')
594+
.expect(200);
595+
596+
await asAlice.get('/v1/projects/1/forms/withAttachments/manifest')
597+
.set('X-OpenRosa-Version', '1.0')
598+
.expect(200)
599+
.then(({ text }) => {
600+
const domain = config.get('default.env.domain');
601+
text.should.be.eql(`<?xml version="1.0" encoding="UTF-8"?>
602+
<manifest xmlns="http://openrosa.org/xforms/xformsManifest">
603+
<mediaFile>
604+
<filename>people.csv</filename>
605+
<hash>md5:${etag.replace(/"/g, '')}</hash>
606+
<downloadUrl>${domain}/v1/projects/1/forms/withAttachments/attachments/people.csv</downloadUrl>
607+
</mediaFile>
608+
</manifest>`);
609+
});
610+
611+
}));
612+
502613
it('should override blob and link dataset', testService((service, { Forms, FormAttachments, Audits, Datasets }) =>
503614
service.login('alice', (asAlice) =>
504615
asAlice.post('/v1/projects/1/forms')

0 commit comments

Comments
 (0)