Skip to content

Commit 9e63eea

Browse files
authored
Merge pull request #11576 from IQSS/11479-harvester-broken-generic-oai
A fix for a recent bug in harvesting from some generic OAI archives
2 parents e5bea5a + 43873a4 commit 9e63eea

File tree

1 file changed

+22
-25
lines changed

1 file changed

+22
-25
lines changed

src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -189,32 +189,29 @@ public DatasetDTO processOAIDCxml(String DcXmlToParse, String oaiIdentifier, boo
189189

190190
datasetDTO.getDatasetVersion().setVersionState(DatasetVersion.VersionState.RELEASED);
191191

192-
// In some cases, the identifier that we want to use for the dataset is
193-
// already supplied to the method explicitly. For example, in some
194-
// harvesting cases we'll want to use the OAI identifier (the identifier
195-
// from the <header> section of the OAI record) for that purpose, without
196-
// expecting to find a valid persistent id in the body of the DC record:
197-
198-
String globalIdentifier;
199-
200-
if (oaiIdentifier != null) {
201-
logger.fine("Attempting to use " + oaiIdentifier + " as the persistentId of the imported dataset");
202-
203-
globalIdentifier = reassignIdentifierAsGlobalId(oaiIdentifier, datasetDTO);
204-
} else {
205-
// Our DC import handles the contents of the dc:identifier field
206-
// as an "other id". Unless we are using an externally supplied
207-
// global id, we will be using the first such "other id" that we
208-
// can parse and recognize as the global id for the imported dataset
209-
// (note that this is the default behavior during harvesting),
210-
// so we need to reaassign it accordingly:
211-
String identifier = selectIdentifier(datasetDTO.getDatasetVersion(), oaiIdentifier, preferSuppliedIdentifier);
212-
logger.fine("Imported identifier: " + identifier);
213-
214-
globalIdentifier = reassignIdentifierAsGlobalId(identifier, datasetDTO);
215-
logger.fine("Detected global identifier: " + globalIdentifier);
216-
}
192+
// Note that in some harvesting cases we will want to use the OAI
193+
// identifier (the identifier from the <header> section of the OAI
194+
// record) for the global id of the harvested dataset, without expecting
195+
// to find a valid persistent id in the body of the DC record. This is
196+
// the use case when harvesting from DataCite: we always want to use the
197+
// OAI identifier, disregarding any identifiers that may be found within
198+
// the metadata record.
199+
//
200+
// Otherwise, we will look at the list of identifiers extracted from the
201+
// <dc:identifier> fields in the OAI_DC record. Our DC parser uses these
202+
// to populate the "Other Id" field in the Citation block. The first one
203+
// of these that parses as a valid Persistent Identifier will be
204+
// selected to serve as the global id for the imported dataset. If none
205+
// are found there, we will try to use the OAI identifier as the last
206+
// resort. Note that this is the default behavior.
217207

208+
String candidateGlobalId = selectIdentifier(datasetDTO.getDatasetVersion(), oaiIdentifier, preferSuppliedIdentifier);
209+
logger.fine("Selected global identifier: " + candidateGlobalId);
210+
211+
// Re-assign the selected identifier to serve as the main persistent Id:
212+
String globalIdentifier = reassignIdentifierAsGlobalId(candidateGlobalId, datasetDTO);
213+
logger.fine("Successfully re-assigned the global identifier: " + globalIdentifier);
214+
218215
if (globalIdentifier == null) {
219216
String exceptionMsg = oaiIdentifier == null ?
220217
"Failed to find a global identifier in the OAI_DC XML record." :

0 commit comments

Comments
 (0)