Skip to content

Commit 1bc9ca8

Browse files
authored
Merge pull request #11398 from IQSS/11305-export-drafts
metadata export for drafts via API
2 parents 0d89b8f + 823d311 commit 1bc9ca8

File tree

11 files changed

+411
-48
lines changed

11 files changed

+411
-48
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
### Dataset Metadata Can Be Exported From Draft Datasets (via API)
2+
3+
In previous versions of Dataverse, it was only possible to export metadata from published datasets. It is now possible to export metadata from draft datasets via API as long as you supply an API token that has access to the draft. As before, when exporting metadata from published datasets, only the latest published version is supported. Internal exporters have been updated to work with drafts but external exporters might need to be updated (Croissant definitely does). See "upgrade instructions" below for details. See [the guides](https://dataverse-guide--11398.org.readthedocs.build/en/11398/api/native-api.html#export-metadata-of-a-dataset-in-various-formats), #11305, and #11398.
4+
5+
## Upgrade Instructions
6+
7+
If you are using the Croissant exporter, [update it](https://github.com/gdcc/exporter-croissant) to version 0.1.4 or newer for compatibility with exporting drafts. Other external exporters may need to be updated as well. See https://github.com/gdcc/dataverse-exporters for a list.

doc/sphinx-guides/source/api/native-api.rst

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1605,25 +1605,29 @@ Usage example:
16051605
Export Metadata of a Dataset in Various Formats
16061606
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
16071607

1608-
|CORS| Export the metadata of the current published version of a dataset in various formats.
1608+
|CORS| Export the metadata of either the current published version or the draft version of a dataset in various formats.
16091609

16101610
To get a list of available formats, see :ref:`available-exporters` and :ref:`get-export-formats`.
16111611

1612+
If you don't specify a version (see :ref:`dataset-version-specifiers`), ``:latest-published`` is assumed and an API token is not necessary. ``:draft`` is supported if you pass an API token that has access. If you try to pass a version number (e.g. "1.0"), it will only work if it happens to be the latest published version. That is to say, for published versions, only the latest published version is supported.
1613+
16121614
See also :ref:`batch-exports-through-the-api` and the note below:
16131615

16141616
.. code-block:: bash
16151617
16161618
export SERVER_URL=https://demo.dataverse.org
16171619
export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB
16181620
export METADATA_FORMAT=ddi
1621+
export VERSION=:draft
1622+
export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
16191623
1620-
curl "$SERVER_URL/api/datasets/export?exporter=$METADATA_FORMAT&persistentId=$PERSISTENT_IDENTIFIER"
1624+
curl -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/export?exporter=$METADATA_FORMAT&persistentId=$PERSISTENT_IDENTIFIER&version=$VERSION"
16211625
16221626
The fully expanded example above (without environment variables) looks like this:
16231627

16241628
.. code-block:: bash
16251629
1626-
curl "https://demo.dataverse.org/api/datasets/export?exporter=ddi&persistentId=doi:10.5072/FK2/J8SJZB"
1630+
curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/datasets/export?exporter=ddi&persistentId=doi:10.5072/FK2/J8SJZB&version=:draft"
16271631
16281632
.. _available-exporters:
16291633

src/main/java/edu/harvard/iq/dataverse/DatasetPage.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5985,7 +5985,7 @@ public String getCroissant() {
59855985
if (isThisLatestReleasedVersion()) {
59865986
final String CROISSANT_SCHEMA_NAME = "croissant";
59875987
ExportService instance = ExportService.getInstance();
5988-
String croissant = instance.getExportAsString(dataset, CROISSANT_SCHEMA_NAME);
5988+
String croissant = instance.getLatestPublishedAsString(dataset, CROISSANT_SCHEMA_NAME);
59895989
if (croissant != null && !croissant.isEmpty()) {
59905990
logger.fine("Returning cached CROISSANT.");
59915991
return croissant;
@@ -6004,7 +6004,7 @@ public List<License> getAvailableLicenses(){
60046004
public String getJsonLd() {
60056005
if (isThisLatestReleasedVersion()) {
60066006
ExportService instance = ExportService.getInstance();
6007-
String jsonLd = instance.getExportAsString(dataset, SchemaDotOrgExporter.NAME);
6007+
String jsonLd = instance.getLatestPublishedAsString(dataset, SchemaDotOrgExporter.NAME);
60086008
if (jsonLd != null) {
60096009
logger.fine("Returning cached schema.org JSON-LD.");
60106010
return jsonLd;

src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1876,17 +1876,14 @@ public String getPublicationDateAsString() {
18761876
// one metadata export in a given format per dataset (it uses the current
18771877
// released (published) version. This JSON fragment is generated for a
18781878
// specific released version - and we can have multiple released versions.
1879+
// (A JSON fragment is generated for drafts as well. -- P.D.)
18791880
// So something will need to be modified to accommodate this. -- L.A.
18801881
/**
18811882
* We call the export format "Schema.org JSON-LD" and extensive Javadoc can
18821883
* be found in {@link edu.harvard.iq.dataverse.export.SchemaDotOrgExporter}.
18831884
*/
18841885
public String getJsonLd() {
18851886
// We show published datasets only for "datePublished" field below.
1886-
if (!this.isPublished()) {
1887-
return "";
1888-
}
1889-
18901887
if (jsonLd != null) {
18911888
return jsonLd;
18921889
}
@@ -1975,7 +1972,12 @@ public String getJsonLd() {
19751972
* was modified within a DataFeed."
19761973
*/
19771974
job.add("dateModified", this.getPublicationDateAsString());
1978-
job.add("version", this.getVersionNumber().toString());
1975+
if (this.isPublished()) {
1976+
job.add("version", this.getVersionNumber().toString());
1977+
} else {
1978+
// This will show "DRAFT" for drafts.
1979+
job.add("version", this.getFriendlyVersionNumber());
1980+
}
19791981

19801982
String description = this.getDescriptionsPlainTextTruncated();
19811983
job.add("description", description);

src/main/java/edu/harvard/iq/dataverse/api/Datasets.java

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -231,31 +231,57 @@ public Response getDataset(@Context ContainerRequestContext crc, @PathParam("id"
231231
return ok(jsonbuilder.add("latestVersion", (latest != null) ? json(latest, true) : null));
232232
}, getRequestUser(crc));
233233
}
234-
235-
// This API call should, ideally, call findUserOrDie() and the GetDatasetCommand
236-
// to obtain the dataset that we are trying to export - which would handle
237-
// Auth in the process... For now, Auth isn't necessary - since export ONLY
238-
// WORKS on published datasets, which are open to the world. -- L.A. 4.5
234+
239235
@GET
236+
@AuthRequired
240237
@Path("/export")
241238
@Produces({"application/xml", "application/json", "application/html", "application/ld+json", "*/*" })
242-
public Response exportDataset(@QueryParam("persistentId") String persistentId, @QueryParam("exporter") String exporter, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) {
239+
public Response exportDataset(@Context ContainerRequestContext crc, @QueryParam("persistentId") String persistentId,
240+
@QueryParam("version") String versionId, @QueryParam("exporter") String exporter,
241+
@Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) {
243242

244243
try {
245244
Dataset dataset = datasetService.findByGlobalId(persistentId);
246245
if (dataset == null) {
247246
return error(Response.Status.NOT_FOUND, "A dataset with the persistentId " + persistentId + " could not be found.");
248247
}
249-
248+
249+
DataverseRequest req = createDataverseRequest(getRequestUser(crc));
250+
DatasetVersion datasetVersion = null;
251+
try {
252+
String versionToLookUp = DS_VERSION_LATEST_PUBLISHED;
253+
if (versionId != null) {
254+
versionToLookUp = versionId;
255+
}
256+
datasetVersion = getDatasetVersionOrDie(req, versionToLookUp, dataset, uriInfo, headers);
257+
} catch (WrappedResponse wr) {
258+
// wr.getLocalizedMessage() is null so don't bother returning it
259+
return error(BAD_REQUEST, "Unable to look up dataset based on version. Try " + DS_VERSION_LATEST_PUBLISHED + " or " + DS_VERSION_DRAFT + ".");
260+
}
261+
262+
// Trying to get version 1.0 for a dataset that's already at 3.0, for example, is not supported.
263+
if (!datasetVersion.isDraft() && versionId != null) {
264+
Command<DatasetVersion> cmd = new GetLatestPublishedDatasetVersionCommand(dvRequestService.getDataverseRequest(), dataset);
265+
DatasetVersion latestPublishedVersion = commandEngine.submit(cmd);
266+
if (latestPublishedVersion == null) {
267+
return error(BAD_REQUEST, "Non-draft version requested but for published versions only the latest (" + DS_VERSION_LATEST_PUBLISHED + ") is supported.");
268+
}
269+
if (!datasetVersion.equals(latestPublishedVersion)) {
270+
return error(BAD_REQUEST, "Non-draft version requested (" + versionId + ") but for published versions only the latest (" + DS_VERSION_LATEST_PUBLISHED + ") is supported.");
271+
}
272+
}
273+
250274
ExportService instance = ExportService.getInstance();
251-
252-
InputStream is = instance.getExport(dataset, exporter);
253-
275+
276+
InputStream is = instance.getExport(datasetVersion, exporter);
277+
254278
String mediaType = instance.getMediaType(exporter);
255-
//Export is only possible for released (non-draft) dataset versions so we can log without checking to see if this is a request for a draft
256-
MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, dataset);
257-
mdcLogService.logEntry(entry);
258-
279+
280+
if (datasetVersion.isReleased()) {
281+
MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, dataset);
282+
mdcLogService.logEntry(entry);
283+
}
284+
259285
return Response.ok()
260286
.entity(is)
261287
.type(mediaType).

src/main/java/edu/harvard/iq/dataverse/export/ExportService.java

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@
4747
import java.util.logging.Level;
4848
import java.util.logging.Logger;
4949
import jakarta.ws.rs.core.MediaType;
50+
import java.io.ByteArrayInputStream;
51+
import java.io.ByteArrayOutputStream;
52+
import java.io.FileInputStream;
5053

5154
import org.apache.commons.io.IOUtils;
5255

@@ -127,11 +130,36 @@ public List<String[]> getExportersLabels() {
127130
return retList;
128131
}
129132

130-
public InputStream getExport(Dataset dataset, String formatName) throws ExportException, IOException {
131-
// first we will try to locate an already existing, cached export
132-
// for this format:
133-
134-
InputStream exportInputStream = getCachedExportFormat(dataset, formatName);
133+
public InputStream getExport(DatasetVersion datasetVersion, String formatName) throws ExportException, IOException {
134+
135+
Dataset dataset = datasetVersion.getDataset();
136+
InputStream exportInputStream = null;
137+
138+
if (datasetVersion.isDraft()) {
139+
// For drafts we create the export on the fly rather than caching.
140+
Exporter exporter = exporterMap.get(formatName);
141+
if (exporter != null) {
142+
try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
143+
// getPrerequisiteFormatName logic copied from exportFormat()
144+
if (exporter.getPrerequisiteFormatName().isPresent()) {
145+
String prereqFormatName = exporter.getPrerequisiteFormatName().get();
146+
try (InputStream preReqStream = getExport(datasetVersion, prereqFormatName)) {
147+
InternalExportDataProvider dataProvider = new InternalExportDataProvider(datasetVersion, preReqStream);
148+
exporter.exportDataset(dataProvider, outputStream);
149+
} catch (IOException ioe) {
150+
throw new ExportException("Could not get prerequisite " + prereqFormatName + " to create " + formatName + " export for dataset " + dataset.getId(), ioe);
151+
}
152+
} else {
153+
InternalExportDataProvider dataProvider = new InternalExportDataProvider(datasetVersion);
154+
exporter.exportDataset(dataProvider, outputStream);
155+
}
156+
return new ByteArrayInputStream(outputStream.toByteArray());
157+
}
158+
}
159+
} else {
160+
// for non-drafts (published versions) we try to locate an already existing, cached export
161+
exportInputStream = getCachedExportFormat(dataset, formatName);
162+
}
135163

136164
// The DDI export is limited for restricted and actively embargoed files (no
137165
// data/file description sections).and when an embargo ends, we need to refresh
@@ -207,11 +235,18 @@ public InputStream getExport(Dataset dataset, String formatName) throws ExportEx
207235

208236
}
209237

210-
public String getExportAsString(Dataset dataset, String formatName) {
238+
public String getLatestPublishedAsString(Dataset dataset, String formatName) {
239+
if (dataset == null) {
240+
return null;
241+
}
242+
DatasetVersion releasedVersion = dataset.getReleasedVersion();
243+
if (releasedVersion == null) {
244+
return null;
245+
}
211246
InputStream inputStream = null;
212247
InputStreamReader inp = null;
213248
try {
214-
inputStream = getExport(dataset, formatName);
249+
inputStream = getExport(releasedVersion, formatName);
215250
if (inputStream != null) {
216251
inp = new InputStreamReader(inputStream, "UTF8");
217252
BufferedReader br = new BufferedReader(inp);
@@ -238,8 +273,9 @@ public String getExportAsString(Dataset dataset, String formatName) {
238273
}
239274

240275
// This method goes through all the Exporters and calls
241-
// the "chacheExport()" method that will save the produced output
276+
// the "cacheExport()" method that will save the produced output
242277
// in a file in the dataset directory, on each Exporter available.
278+
// This is only for the latest published version.
243279
public void exportAllFormats(Dataset dataset) throws ExportException {
244280
try {
245281
clearAllCachedFormats(dataset);
@@ -258,7 +294,7 @@ public void exportAllFormats(Dataset dataset) throws ExportException {
258294
String formatName = e.getFormatName();
259295
if(e.getPrerequisiteFormatName().isPresent()) {
260296
String prereqFormatName = e.getPrerequisiteFormatName().get();
261-
try (InputStream preReqStream = getExport(dataset, prereqFormatName)) {
297+
try (InputStream preReqStream = getExport(dataset.getReleasedVersion(), prereqFormatName)) {
262298
dataProvider.setPrerequisiteInputStream(preReqStream);
263299
cacheExport(dataset, dataProvider, formatName, e);
264300
dataProvider.setPrerequisiteInputStream(null);
@@ -313,7 +349,7 @@ public void exportFormat(Dataset dataset, String formatName) throws ExportExcept
313349
}
314350
if(e.getPrerequisiteFormatName().isPresent()) {
315351
String prereqFormatName = e.getPrerequisiteFormatName().get();
316-
try (InputStream preReqStream = getExport(dataset, prereqFormatName)) {
352+
try (InputStream preReqStream = getExport(releasedVersion, prereqFormatName)) {
317353
InternalExportDataProvider dataProvider = new InternalExportDataProvider(releasedVersion, preReqStream);
318354
cacheExport(dataset, dataProvider, formatName, e);
319355
} catch (IOException ioe) {

src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -438,9 +438,13 @@ private static void writeVersionStatement(XMLStreamWriter xmlw, DatasetVersionDT
438438
xmlw.writeStartElement("verStmt");
439439
xmlw.writeAttribute("source","archive");
440440
xmlw.writeStartElement("version");
441-
XmlWriterUtil.writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10));
442-
XmlWriterUtil.writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString());
443-
xmlw.writeCharacters(datasetVersionDTO.getVersionNumber().toString());
441+
if (datasetVersionDTO.getReleaseTime() != null) {
442+
XmlWriterUtil.writeAttribute(xmlw, "date", datasetVersionDTO.getReleaseTime().substring(0, 10));
443+
}
444+
XmlWriterUtil.writeAttribute(xmlw, "type", datasetVersionDTO.getVersionState().toString());
445+
if (datasetVersionDTO.getVersionNumber() != null) {
446+
xmlw.writeCharacters(datasetVersionDTO.getVersionNumber().toString());
447+
}
444448
xmlw.writeEndElement(); // version
445449
if (!StringUtils.isBlank(datasetVersionDTO.getVersionNote())) {
446450
xmlw.writeStartElement("notes");

src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ private Metadata getDatasetMetadata(Dataset dataset, String metadataPrefix) thro
253253

254254
} else {
255255
InputStream pregeneratedMetadataStream;
256-
pregeneratedMetadataStream = ExportService.getInstance().getExport(dataset, metadataPrefix);
256+
pregeneratedMetadataStream = ExportService.getInstance().getExport(dataset.getReleasedVersion(), metadataPrefix);
257257

258258
metadata = Metadata.copyFromStream(pregeneratedMetadataStream);
259259
}

src/test/java/edu/harvard/iq/dataverse/DatasetVersionTest.java

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,6 @@ public void testGetJsonLd() throws ParseException {
9999
dataset.setIdentifier("LK0D1H");
100100
DatasetVersion datasetVersion = new DatasetVersion();
101101
datasetVersion.setDataset(dataset);
102-
datasetVersion.setVersionState(DatasetVersion.VersionState.DRAFT);
103-
assertEquals("", datasetVersion.getPublicationDateAsString());
104-
// Only published datasets return any JSON.
105-
assertEquals("", datasetVersion.getJsonLd());
106102
datasetVersion.setVersionState(DatasetVersion.VersionState.RELEASED);
107103
datasetVersion.setVersionNumber(1L);
108104
SimpleDateFormat dateFmt = new SimpleDateFormat("yyyyMMdd");
@@ -153,8 +149,6 @@ public void testGetJsonLdNonCC0License() throws ParseException {
153149
datasetVersion.setDataset(dataset);
154150
datasetVersion.setVersionState(DatasetVersion.VersionState.DRAFT);
155151
assertEquals("", datasetVersion.getPublicationDateAsString());
156-
// Only published datasets return any JSON.
157-
assertEquals("", datasetVersion.getJsonLd());
158152
datasetVersion.setVersionState(DatasetVersion.VersionState.RELEASED);
159153
datasetVersion.setVersionNumber(1L);
160154
datasetVersion.setMinorVersionNumber(0L);
@@ -199,4 +193,24 @@ public void testGetJsonLdNonCC0License() throws ParseException {
199193
assertEquals("LibraScholar", obj.getJsonObject("includedInDataCatalog").getString("name"));
200194
}
201195

196+
@Test
197+
public void testGetJsonLdDraft() throws ParseException {
198+
Dataset dataset = new Dataset();
199+
License license = new License("CC0 1.0", "You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.", URI.create("http://creativecommons.org/publicdomain/zero/1.0"), URI.create("/resources/images/cc0.png"), true, 1l);
200+
license.setDefault(true);
201+
dataset.setProtocol("doi");
202+
dataset.setAuthority("10.5072/FK2");
203+
dataset.setIdentifier("LK0D1H");
204+
DatasetVersion datasetVersion = new DatasetVersion();
205+
datasetVersion.setDataset(dataset);
206+
datasetVersion.setVersionState(DatasetVersion.VersionState.DRAFT);
207+
assertEquals("", datasetVersion.getPublicationDateAsString());
208+
String jsonLd = datasetVersion.getJsonLd();
209+
logger.fine("jsonLd: " + JsonUtil.prettyPrint(jsonLd));
210+
JsonReader jsonReader = Json.createReader(new StringReader(jsonLd));
211+
JsonObject obj = jsonReader.readObject();
212+
assertEquals("http://schema.org", obj.getString("@context"));
213+
assertEquals("DRAFT", obj.getString("version"));
214+
}
215+
202216
}

0 commit comments

Comments
 (0)