Skip to content

Commit 14da5bd

Browse files
authored
Merge pull request #11417 from GlobalDataverseCommunityConsortium/Perf1
Memory Improvements in Export/Ingest
2 parents 6c0c3b7 + 5f9abe8 commit 14da5bd

File tree

7 files changed

+366
-311
lines changed

7 files changed

+366
-311
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
### Export Improvements
2+
3+
Memory usage has been reduced and potential memory leaks closed in the metadata exporters

src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java

Lines changed: 150 additions & 164 deletions
Large diffs are not rendered by default.

src/main/java/edu/harvard/iq/dataverse/export/DDIExporter.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
import jakarta.json.JsonObject;
1616
import javax.xml.stream.XMLStreamException;
1717
import javax.xml.stream.XMLStreamWriter;
18+
19+
import org.apache.solr.common.util.IOUtils;
20+
1821
import javax.xml.stream.XMLOutputFactory;
1922

2023
/**
@@ -44,14 +47,25 @@ public String getDisplayName(Locale locale) {
4447

4548
@Override
4649
public void exportDataset(ExportDataProvider dataProvider, OutputStream outputStream) throws ExportException {
50+
XMLStreamWriter xmlw = null;
51+
//XMLStreamWriter is not auto-closable - can't use try-with-resources here
4752
try {
48-
XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream);
53+
xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream);
4954
xmlw.writeStartDocument();
5055
xmlw.flush();
5156
DdiExportUtil.datasetJson2ddi(dataProvider.getDatasetJson(), dataProvider.getDatasetFileDetails(),
5257
outputStream);
5358
} catch (XMLStreamException xse) {
5459
throw new ExportException("Caught XMLStreamException performing DDI export", xse);
60+
} finally {
61+
if (xmlw != null) {
62+
try {
63+
xmlw.close();
64+
} catch (XMLStreamException e) {
65+
// Log this exception, but don't rethrow as it's not the primary issue
66+
e.printStackTrace();
67+
}
68+
}
5569
}
5670
}
5771

src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java

Lines changed: 53 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -117,19 +117,31 @@ private static String dto2ddi(DatasetDTO datasetDto) throws XMLStreamException {
117117
}
118118

119119
private static void dtoddi(DatasetDTO datasetDto, OutputStream outputStream) throws XMLStreamException {
120-
XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream);
121-
xmlw.writeStartElement("codeBook");
122-
xmlw.writeDefaultNamespace("ddi:codebook:2_5");
123-
xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
124-
xmlw.writeAttribute("xsi:schemaLocation", DDIExporter.DEFAULT_XML_NAMESPACE + " " + DDIExporter.DEFAULT_XML_SCHEMALOCATION);
125-
xmlw.writeAttribute("version", DDIExporter.DEFAULT_XML_VERSION);
126-
if(DvObjectContainer.isMetadataLanguageSet(datasetDto.getMetadataLanguage())) {
127-
xmlw.writeAttribute("xml:lang", datasetDto.getMetadataLanguage());
128-
}
129-
createStdyDscr(xmlw, datasetDto);
130-
createOtherMats(xmlw, datasetDto.getDatasetVersion().getFiles());
131-
xmlw.writeEndElement(); // codeBook
132-
xmlw.flush();
120+
XMLStreamWriter xmlw = null;
121+
try {
122+
xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream);
123+
xmlw.writeStartElement("codeBook");
124+
xmlw.writeDefaultNamespace("ddi:codebook:2_5");
125+
xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
126+
xmlw.writeAttribute("xsi:schemaLocation", DDIExporter.DEFAULT_XML_NAMESPACE + " " + DDIExporter.DEFAULT_XML_SCHEMALOCATION);
127+
xmlw.writeAttribute("version", DDIExporter.DEFAULT_XML_VERSION);
128+
if (DvObjectContainer.isMetadataLanguageSet(datasetDto.getMetadataLanguage())) {
129+
xmlw.writeAttribute("xml:lang", datasetDto.getMetadataLanguage());
130+
}
131+
createStdyDscr(xmlw, datasetDto);
132+
createOtherMats(xmlw, datasetDto.getDatasetVersion().getFiles());
133+
xmlw.writeEndElement(); // codeBook
134+
xmlw.flush();
135+
} finally {
136+
if (xmlw != null) {
137+
try {
138+
xmlw.close();
139+
} catch (XMLStreamException e) {
140+
// Log this exception, but don't rethrow as it's in finally block
141+
logger.log(Level.WARNING, "Error closing XMLStreamWriter", e);
142+
}
143+
}
144+
}
133145
}
134146

135147

@@ -139,21 +151,34 @@ public static void datasetJson2ddi(JsonObject datasetDtoAsJson, JsonArray fileDe
139151
Gson gson = new Gson();
140152
DatasetDTO datasetDto = gson.fromJson(datasetDtoAsJson.toString(), DatasetDTO.class);
141153

142-
XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream);
143-
xmlw.writeStartElement("codeBook");
144-
xmlw.writeDefaultNamespace("ddi:codebook:2_5");
145-
xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
146-
xmlw.writeAttribute("xsi:schemaLocation", DDIExporter.DEFAULT_XML_NAMESPACE + " " + DDIExporter.DEFAULT_XML_SCHEMALOCATION);
147-
xmlw.writeAttribute("version", DDIExporter.DEFAULT_XML_VERSION);
148-
if(DvObjectContainer.isMetadataLanguageSet(datasetDto.getMetadataLanguage())) {
149-
xmlw.writeAttribute("xml:lang", datasetDto.getMetadataLanguage());
150-
}
151-
createStdyDscr(xmlw, datasetDto);
152-
createFileDscr(xmlw, fileDetails);
153-
createDataDscr(xmlw, fileDetails);
154-
createOtherMatsFromFileMetadatas(xmlw, fileDetails);
155-
xmlw.writeEndElement(); // codeBook
156-
xmlw.flush();
154+
XMLStreamWriter xmlw = null;
155+
try {
156+
xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream);
157+
158+
xmlw.writeStartElement("codeBook");
159+
xmlw.writeDefaultNamespace("ddi:codebook:2_5");
160+
xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
161+
xmlw.writeAttribute("xsi:schemaLocation", DDIExporter.DEFAULT_XML_NAMESPACE + " " + DDIExporter.DEFAULT_XML_SCHEMALOCATION);
162+
xmlw.writeAttribute("version", DDIExporter.DEFAULT_XML_VERSION);
163+
if (DvObjectContainer.isMetadataLanguageSet(datasetDto.getMetadataLanguage())) {
164+
xmlw.writeAttribute("xml:lang", datasetDto.getMetadataLanguage());
165+
}
166+
createStdyDscr(xmlw, datasetDto);
167+
createFileDscr(xmlw, fileDetails);
168+
createDataDscr(xmlw, fileDetails);
169+
createOtherMatsFromFileMetadatas(xmlw, fileDetails);
170+
xmlw.writeEndElement(); // codeBook
171+
xmlw.flush();
172+
} finally {
173+
if (xmlw != null) {
174+
try {
175+
xmlw.close();
176+
} catch (XMLStreamException e) {
177+
// Log this exception, but don't rethrow as it's in finally block
178+
logger.log(Level.WARNING, "Error closing XMLStreamWriter", e);
179+
}
180+
}
181+
}
157182
}
158183

159184
/**

src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java

Lines changed: 34 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import java.util.Iterator;
2525
import java.util.List;
2626
import java.util.Map;
27+
import java.util.logging.Level;
2728
import java.util.logging.Logger;
2829
import jakarta.json.JsonObject;
2930
import javax.xml.stream.XMLOutputFactory;
@@ -68,29 +69,40 @@ public static void datasetJson2dublincore(JsonObject datasetDtoAsJson, OutputStr
6869
}
6970

7071
private static void dto2dublincore(DatasetDTO datasetDto, OutputStream outputStream, String dcFlavor) throws XMLStreamException {
71-
XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream);
72-
if (DC_FLAVOR_DCTERMS.equals(dcFlavor)) {
73-
xmlw.writeStartDocument();
74-
xmlw.writeStartElement("metadata");
75-
xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
76-
xmlw.writeAttribute("xmlns:dc", DC_XML_NAMESPACE);
77-
xmlw.writeAttribute("xmlns:dcterms", DCTERMS_XML_NAMESPACE);
78-
xmlw.writeDefaultNamespace(DCTERMS_DEFAULT_NAMESPACE);
79-
//xmlw.writeAttribute("xsi:schemaLocation", DCTERMS_DEFAULT_NAMESPACE+" "+DCTERMS_XML_SCHEMALOCATION);
80-
createDC(xmlw, datasetDto, dcFlavor);
81-
} else if (DC_FLAVOR_OAI.equals(dcFlavor)) {
82-
xmlw.writeStartElement("oai_dc:dc");
83-
xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
84-
xmlw.writeAttribute("xmlns:oai_dc", OAI_DC_XML_NAMESPACE);
85-
xmlw.writeAttribute("xmlns:dc", DC_XML_NAMESPACE);
86-
xmlw.writeAttribute("xsi:schemaLocation", OAI_DC_XML_NAMESPACE+" "+OAI_DC_XML_SCHEMALOCATION);
87-
//writeAttribute(xmlw, "version", DEFAULT_XML_VERSION);
88-
createOAIDC(xmlw, datasetDto, dcFlavor);
72+
XMLStreamWriter xmlw = null;
73+
try {
74+
xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream);
75+
if (DC_FLAVOR_DCTERMS.equals(dcFlavor)) {
76+
xmlw.writeStartDocument();
77+
xmlw.writeStartElement("metadata");
78+
xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
79+
xmlw.writeAttribute("xmlns:dc", DC_XML_NAMESPACE);
80+
xmlw.writeAttribute("xmlns:dcterms", DCTERMS_XML_NAMESPACE);
81+
xmlw.writeDefaultNamespace(DCTERMS_DEFAULT_NAMESPACE);
82+
// xmlw.writeAttribute("xsi:schemaLocation", DCTERMS_DEFAULT_NAMESPACE+" "+DCTERMS_XML_SCHEMALOCATION);
83+
createDC(xmlw, datasetDto, dcFlavor);
84+
} else if (DC_FLAVOR_OAI.equals(dcFlavor)) {
85+
xmlw.writeStartElement("oai_dc:dc");
86+
xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
87+
xmlw.writeAttribute("xmlns:oai_dc", OAI_DC_XML_NAMESPACE);
88+
xmlw.writeAttribute("xmlns:dc", DC_XML_NAMESPACE);
89+
xmlw.writeAttribute("xsi:schemaLocation", OAI_DC_XML_NAMESPACE + " " + OAI_DC_XML_SCHEMALOCATION);
90+
// writeAttribute(xmlw, "version", DEFAULT_XML_VERSION);
91+
createOAIDC(xmlw, datasetDto, dcFlavor);
92+
}
93+
94+
xmlw.writeEndElement(); // <metadata> or <oai_dc:dc>
95+
xmlw.flush();
96+
} finally {
97+
if (xmlw != null) {
98+
try {
99+
xmlw.close();
100+
} catch (XMLStreamException e) {
101+
// Log this exception, but don't rethrow as it's in finally block
102+
logger.log(Level.WARNING, "Error closing XMLStreamWriter", e);
103+
}
104+
}
89105
}
90-
91-
92-
xmlw.writeEndElement(); // <metadata> or <oai_dc:dc>
93-
xmlw.flush();
94106
}
95107

96108
//UPDATED by rmo-cdsp:

src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import java.io.OutputStream;
44
import java.util.*;
5+
import java.util.logging.Level;
56
import java.util.logging.Logger;
67

78
import jakarta.json.JsonObject;
@@ -49,19 +50,31 @@ public static void datasetJson2openaire(JsonObject datasetDtoAsJson, OutputStrea
4950
}
5051

5152
private static void dto2openaire(DatasetDTO datasetDto, OutputStream outputStream) throws XMLStreamException {
52-
XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream);
53+
XMLStreamWriter xmlw = null;
54+
try {
55+
xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream);
5356

54-
xmlw.writeStartElement("resource"); // <resource>
57+
xmlw.writeStartElement("resource"); // <resource>
5558

56-
xmlw.writeAttribute("xmlns:xsi", XSI_NAMESPACE);
57-
xmlw.writeAttribute("xmlns", RESOURCE_NAMESPACE);
58-
xmlw.writeAttribute("xsi:schemaLocation", RESOURCE_NAMESPACE + " " + RESOURCE_SCHEMA_LOCATION);
59+
xmlw.writeAttribute("xmlns:xsi", XSI_NAMESPACE);
60+
xmlw.writeAttribute("xmlns", RESOURCE_NAMESPACE);
61+
xmlw.writeAttribute("xsi:schemaLocation", RESOURCE_NAMESPACE + " " + RESOURCE_SCHEMA_LOCATION);
5962

60-
createOpenAire(xmlw, datasetDto);
63+
createOpenAire(xmlw, datasetDto);
6164

62-
xmlw.writeEndElement(); // </resource>
65+
xmlw.writeEndElement(); // </resource>
6366

64-
xmlw.flush();
67+
xmlw.flush();
68+
} finally {
69+
if (xmlw != null) {
70+
try {
71+
xmlw.close();
72+
} catch (XMLStreamException e) {
73+
// Log this exception, but don't rethrow as it's in finally block
74+
logger.log(Level.WARNING, "Error closing XMLStreamWriter", e);
75+
}
76+
}
77+
}
6578
}
6679

6780
private static void createOpenAire(XMLStreamWriter xmlw, DatasetDTO datasetDto) throws XMLStreamException {

0 commit comments

Comments
 (0)