Skip to content

Commit 61b8046

Browse files
authored
Merge pull request #11003 from GlobalDataverseCommunityConsortium/HarvestDatasetUsingPID
Fix lookups of Harvested datasets with lower-case versions of persistent identifiers in the database
2 parents b28812b + b8c0c40 commit 61b8046

File tree

8 files changed

+34
-10
lines changed

8 files changed

+34
-10
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
MDC Citation retrieval with the PID settings has been fixed.
2-
DOI parsing in Dataverse is case insensitive, improving interaction with services that may change the case.
2+
PID parsing in Dataverse is now case insensitive, improving interaction with services that may change the case of PIDs.
33
Warnings related to managed/excluded PID lists for PID providers have been reduced

doc/sphinx-guides/source/installation/config.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,10 @@ Dataverse automatically manages assigning PIDs and making them findable when dat
236236
allow updating the PID target URLs and metadata of already-published datasets manually if needed <send-metadata-to-pid-provider>`, e.g. if a Dataverse instance is
237237
moved to a new URL or when the software is updated to generate additional metadata or address schema changes at the PID service.
238238

239+
Note that while some forms of PIDs (Handles, PermaLinks) are technically case sensitive, common practice is to avoid creating PIDs that differ only by case.
240+
Dataverse treats PIDs of all types as case-insensitive (as DOIs are by definition). This means that Dataverse will find datasets (in search, to display dataset pages, etc.)
241+
when the PIDs entered do not match the case of the original but will have a problem if two PIDs that differ only by case exist in one instance.
242+
239243
Testing PID Providers
240244
+++++++++++++++++++++
241245

src/main/java/edu/harvard/iq/dataverse/DvObject.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,17 @@
2727
@NamedQuery(name = "DvObject.ownedObjectsById",
2828
query="SELECT COUNT(obj) FROM DvObject obj WHERE obj.owner.id=:id"),
2929
@NamedQuery(name = "DvObject.findByGlobalId",
30-
query = "SELECT o FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype"),
30+
query = "SELECT o FROM DvObject o WHERE UPPER(o.identifier)=UPPER(:identifier) and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype"),
3131
@NamedQuery(name = "DvObject.findIdByGlobalId",
32-
query = "SELECT o.id FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype"),
32+
query = "SELECT o.id FROM DvObject o WHERE UPPER(o.identifier)=UPPER(:identifier) and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype"),
3333

3434
@NamedQuery(name = "DvObject.findByAlternativeGlobalId",
3535
query = "SELECT o FROM DvObject o, AlternativePersistentIdentifier a WHERE o.id = a.dvObject.id and a.identifier=:identifier and a.authority=:authority and a.protocol=:protocol and o.dtype=:dtype"),
3636
@NamedQuery(name = "DvObject.findIdByAlternativeGlobalId",
3737
query = "SELECT o.id FROM DvObject o, AlternativePersistentIdentifier a WHERE o.id = a.dvObject.id and a.identifier=:identifier and a.authority=:authority and a.protocol=:protocol and o.dtype=:dtype"),
3838

3939
@NamedQuery(name = "DvObject.findByProtocolIdentifierAuthority",
40-
query = "SELECT o FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol"),
40+
query = "SELECT o FROM DvObject o WHERE UPPER(o.identifier)=UPPER(:identifier) and o.authority=:authority and o.protocol=:protocol"),
4141
@NamedQuery(name = "DvObject.findByOwnerId",
4242
query = "SELECT o FROM DvObject o WHERE o.owner.id=:ownerId order by o.dtype desc, o.id"),
4343
@NamedQuery(name = "DvObject.findByAuthenticatedUserId",
@@ -53,7 +53,8 @@
5353
@Table(indexes = {@Index(columnList="dtype")
5454
, @Index(columnList="owner_id")
5555
, @Index(columnList="creator_id")
56-
, @Index(columnList="releaseuser_id")},
56+
, @Index(columnList="releaseuser_id")
57+
, @Index(columnList="authority,protocol, UPPER(identifier)", name="INDEX_DVOBJECT_authority_protocol_upper_identifier")},
5758
uniqueConstraints = {@UniqueConstraint(columnNames = {"authority,protocol,identifier"}),@UniqueConstraint(columnNames = {"owner_id,storageidentifier"})})
5859
public abstract class DvObject extends DataverseEntity implements java.io.Serializable {
5960

src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve
313313
// Creating a new dataset from scratch:
314314

315315
harvestedDataset = parser.parseDataset(obj);
316-
316+
317317
harvestedDataset.setHarvestedFrom(harvestingClient);
318318
harvestedDataset.setHarvestIdentifier(harvestIdentifier);
319319

src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandlePidProvider.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@
5959
* service.
6060
* As of now, it only does the registration updates, to accommodate
6161
* the modifyRegistration datasets API sub-command.
62+
*
63+
* Note that while Handles are nominally case sensitive, handle.net is
64+
* configured to be case-insensitive and Dataverse makes case-insensitve
65+
* database look-ups to find Handles (See #11003). That said, database
66+
* entries are stored in the case matching the configuration of the provider.
6267
*/
6368
public class HandlePidProvider extends AbstractPidProvider {
6469

src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkPidProvider.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
* overridable by a configurable parameter to support use of an external
2525
* resolver.
2626
*
27+
* Note that while PermaLinks are nominally case sensitive, Dataverse makes
28+
* case-insensitve database look-ups to find them (See #11003). That said, database
29+
* entries are stored in the case matching the configuration of the provider.
2730
*/
2831
public class PermaLinkPidProvider extends AbstractPidProvider {
2932

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
-- Adding a case-insensitive index related to #11003
2+
--
3+
4+
CREATE UNIQUE INDEX IF NOT EXISTS INDEX_DVOBJECT_authority_protocol_upper_identifier ON dvobject (authority, protocol, UPPER(identifier));

src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@
9999
@JvmSetting(key = JvmSettings.PID_PROVIDER_LABEL, value = "FAKE 1", varArgs = "fake1")
100100
@JvmSetting(key = JvmSettings.PID_PROVIDER_TYPE, value = FakeDOIProvider.TYPE, varArgs = "fake1")
101101
@JvmSetting(key = JvmSettings.PID_PROVIDER_AUTHORITY, value = "10.5074", varArgs = "fake1")
102-
@JvmSetting(key = JvmSettings.PID_PROVIDER_SHOULDER, value = "FK", varArgs = "fake1")
102+
@JvmSetting(key = JvmSettings.PID_PROVIDER_SHOULDER, value = "fk", varArgs = "fake1")
103103
@JvmSetting(key = JvmSettings.PID_PROVIDER_MANAGED_LIST, value = "doi:10.5073/FK3ABCDEF", varArgs ="fake1")
104104

105105
//HANDLE 1
@@ -315,6 +315,13 @@ public void testUnmanagedParsing() throws IOException {
315315
GlobalId pid6 = PidUtil.parseAsGlobalID(pid6String);
316316
assertEquals(pid6String, pid6.asString());
317317
assertEquals(UnmanagedPermaLinkPidProvider.ID, pid6.getProviderId());
318+
319+
//Lowercase test for unmanaged DOIs
320+
String pid7String = "doi:10.5281/zenodo.6381129";
321+
GlobalId pid7 = PidUtil.parseAsGlobalID(pid7String);
322+
assertEquals(UnmanagedDOIProvider.ID, pid5.getProviderId());
323+
assertEquals(pid7String.toUpperCase().replace("DOI", "doi"), pid7.asString());
324+
318325

319326
}
320327

@@ -353,15 +360,15 @@ public void testExcludedSetParsing() throws IOException {
353360
@Test
354361
public void testManagedSetParsing() throws IOException {
355362

356-
String pid1String = "doi:10.5073/FK3ABCDEF";
363+
String pid1String = "doi:10.5073/fk3ABCDEF";
357364
GlobalId pid2 = PidUtil.parseAsGlobalID(pid1String);
358-
assertEquals(pid1String, pid2.asString());
365+
assertEquals(pid1String.toUpperCase().replace("DOI", "doi"), pid2.asString());
359366
assertEquals("fake1", pid2.getProviderId());
360367
assertEquals("https://doi.org/" + pid2.getAuthority() + PidUtil.getPidProvider(pid2.getProviderId()).getSeparator() + pid2.getIdentifier(),pid2.asURL());
361368
assertEquals("10.5073", pid2.getAuthority());
362369
assertEquals(AbstractDOIProvider.DOI_PROTOCOL, pid2.getProtocol());
363370
GlobalId pid3 = PidUtil.parseAsGlobalID(pid2.asURL());
364-
assertEquals(pid1String, pid3.asString());
371+
assertEquals(pid1String.toUpperCase().replace("DOI", "doi"), pid3.asString());
365372
assertEquals("fake1", pid3.getProviderId());
366373
assertFalse(PidUtil.getPidProvider(pid3.getProviderId()).canCreatePidsLike(pid3));
367374

0 commit comments

Comments
 (0)