diff --git a/doc/release-notes/11832-DataCite-scaling.md b/doc/release-notes/11832-DataCite-scaling.md
new file mode 100644
index 00000000000..574ef05f94c
--- /dev/null
+++ b/doc/release-notes/11832-DataCite-scaling.md
@@ -0,0 +1,14 @@
+This release adds functionality to retry calls to DataCite when their server is overloaded or Dataverse has hit their rate limit.
+
+It also introduces an option to only update DataCite metadata after checking to see if the current DataCite information is out of date.
+(This adds a request to get information from DataCite before any potential write of new information which will be more efficient when
+most DOIs have not changed but will result in an extra call to get info when a DOI has changed.)
+
+Both of these can help when DataCite is being used heavily, e.g. creating and publishing datasets with many datafiles and using file DOIs,
+or doing bulk operations that involve DataCite with many datasets.
+
+### New Settings
+
+- dataverse.feature.only-update-datacite-when-needed
+
+The default is false - Dataverse will not check to see if DataCite's information is out of date before sending an update.
diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index bbf0a0d2449..91dc63c36cb 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -537,6 +537,8 @@ dataverse.pid.*.datacite.username
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
dataverse.pid.*.datacite.password
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+dataverse.feature.only-update-datacite-when-needed
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
PID Providers of type ``datacite`` require four additional parameters that define how the provider connects to DataCite.
DataCite has two APIs that are used in Dataverse:
@@ -552,6 +554,11 @@ for `Fabrica `_ and their APIs. You need to provide
the same credentials (``username``, ``password``) to Dataverse software to mint and manage DOIs for you.
As noted above, you should use one of the more secure options for setting the password.
+The `only-update-datacite-when-needed feature` flag is a global option that causes Dataverse to GET the latest metadata from DataCite
+for a DOI and compare it with the current metadata in Dataverse and only sending a following POST request if needed. This potentially
+substitutes a read for an unnecessary write at DataCite, but would result in extra reads when all metadata in Dataverse is new.
+Setting the flag to "true" is recommended when using DataCite file DOIs.
+
CrossRef-specific Settings
^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -3824,6 +3831,9 @@ please find all known feature flags below. Any of these flags can be activated u
* - role-assignment-history
- Turns on tracking/display of role assignments and revocations for collections, datasets, and files
- ``Off``
+ * - only-update-datacite-when-needed
+ - Only contact DataCite to update a DOI after checking to see if DataCite has outdated information (for efficiency, lighter load on DataCite, especially when using file DOIs).
+ - ``Off``
**Note:** Feature flags can be set via any `supported MicroProfile Config API source`_, e.g. the environment variable
``DATAVERSE_FEATURE_XXX`` (e.g. ``DATAVERSE_FEATURE_API_SESSION_AUTH=1``). These environment variables can be set in your shell before starting Payara. If you are using :doc:`Docker for development `, you can set them in the `docker compose `_ file.
diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java
index a4d788de4df..d9ddfe04393 100644
--- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java
+++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java
@@ -95,7 +95,14 @@ public String reRegisterIdentifier(String identifier, Map metada
}
retString = "metadata:\\r" + client.postMetadata(xmlMetadata) + "\\r";
}
- if (!target.equals(client.getUrl(numericIdentifier))) {
+ String currentUrl = null;
+ try {
+ //May get a 204 if the DOI is still draft
+ currentUrl = client.getUrl(numericIdentifier);
+ } catch (RuntimeException ex) {
+ logger.fine("Error getting Url for " + numericIdentifier + ": " + ex.getMessage());
+ }
+ if (!target.equals(currentUrl)) {
logger.info("Updating target URL to " + target);
client.postUrl(numericIdentifier, target);
retString = retString + "url:\\r" + target;
diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java
index dd64a89dfe6..3c21699d45e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java
+++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java
@@ -15,6 +15,7 @@
import edu.harvard.iq.dataverse.FileMetadata;
import edu.harvard.iq.dataverse.GlobalId;
import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider;
+import edu.harvard.iq.dataverse.settings.FeatureFlags;
import edu.harvard.iq.dataverse.util.json.JsonUtil;
import jakarta.json.JsonObject;
@@ -217,7 +218,11 @@ public boolean publicizeIdentifier(DvObject dvObject) {
metadata.put("datacite.publicationyear", generateYear(dvObject));
metadata.put("_target", getTargetUrl(dvObject));
try {
- doiDataCiteRegisterService.registerIdentifier(identifier, metadata, dvObject);
+ if (FeatureFlags.ONLY_UPDATE_DATACITE_WHEN_NEEDED.enabled()) {
+ doiDataCiteRegisterService.reRegisterIdentifier(identifier, metadata, dvObject);
+ } else {
+ doiDataCiteRegisterService.registerIdentifier(identifier, metadata, dvObject);
+ }
return true;
} catch (Exception e) {
logger.log(Level.WARNING, "modifyMetadata failed: " + e.getMessage(), e);
diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java
index 465b10ee407..47394f0ad54 100644
--- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java
+++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java
@@ -41,6 +41,10 @@ public class DataCiteRESTfullClient implements Closeable {
private static final Logger logger = Logger.getLogger(DataCiteRESTfullClient.class.getCanonicalName());
+ // Constants for retry mechanism
+ private static final int MAX_RETRIES = 5;
+ private static final long RETRY_DELAY_MS = 10000; // 10 seconds
+
private String url;
private CloseableHttpClient httpClient;
private HttpClientContext context;
@@ -59,11 +63,78 @@ public DataCiteRESTfullClient(String url, String username, String password) {
public void close() {
if (this.httpClient != null) {
try {
- httpClient.close();
+ httpClient.close();
} catch (IOException io) {
- logger.warning("IOException closing hhtpClient: " + io.getMessage());
- }
+ logger.warning("IOException closing httpClient: " + io.getMessage());
+ }
+ }
+ }
+
+ /**
+ * Execute HTTP request with retry mechanism for specific status codes
+ *
+ * @param request The HTTP request to execute
+ * @param operationName Name of the operation for logging
+ * @return HttpResponse The response from the server
+ * @throws IOException If an error occurs during the request
+ */
+ private HttpResponse executeWithRetry(org.apache.http.client.methods.HttpRequestBase request, String operationName) throws IOException {
+ int attempts = 0;
+ IOException lastException = null;
+
+ while (attempts < MAX_RETRIES) {
+ try {
+ HttpResponse response = httpClient.execute(request, context);
+ int statusCode = response.getStatusLine().getStatusCode();
+
+ // If we get a retry status code, try again after delay
+ if (statusCode == 429 || statusCode == 503 || statusCode == 504) {
+ EntityUtils.consumeQuietly(response.getEntity());
+ attempts++;
+
+ if (attempts < MAX_RETRIES) {
+ logger.warning("DataCite API returned status " + statusCode +
+ " for " + operationName + ". Retrying in " +
+ (RETRY_DELAY_MS / 1000) + " seconds (attempt " + attempts + " of " + MAX_RETRIES + ")");
+ try {
+ Thread.sleep(RETRY_DELAY_MS);
+ } catch (InterruptedException ie) {
+ Thread.currentThread().interrupt();
+ throw new IOException("Retry interrupted", ie);
+ }
+ } else {
+ logger.severe("DataCite API failed with status " + statusCode +
+ " for " + operationName + " after " + MAX_RETRIES + " attempts");
+ return response; // Return the last failed response
+ }
+ } else {
+ // Success or non-retry error code
+ return response;
+ }
+ } catch (IOException ioe) {
+ lastException = ioe;
+ attempts++;
+
+ if (attempts < MAX_RETRIES) {
+ logger.warning("IOException during " + operationName + ": " + ioe.getMessage() +
+ ". Retrying in " + (RETRY_DELAY_MS / 1000) + " seconds (attempt " +
+ attempts + " of " + MAX_RETRIES + ")");
+ try {
+ Thread.sleep(RETRY_DELAY_MS);
+ } catch (InterruptedException ie) {
+ Thread.currentThread().interrupt();
+ throw new IOException("Retry interrupted", ie);
+ }
+ } else {
+ logger.severe("DataCite API failed for " + operationName + " after " +
+ MAX_RETRIES + " attempts due to: " + ioe.getMessage());
+ throw lastException;
+ }
+ }
}
+
+ // This should never happen, but just in case
+ throw new IOException("Failed to execute request after " + MAX_RETRIES + " attempts");
}
/**
@@ -75,7 +146,7 @@ public void close() {
public String getUrl(String doi) {
HttpGet httpGet = new HttpGet(this.url + "/doi/" + doi);
try {
- HttpResponse response = httpClient.execute(httpGet,context);
+ HttpResponse response = executeWithRetry(httpGet, "getUrl");
HttpEntity entity = response.getEntity();
String data = null;
@@ -104,7 +175,7 @@ public String postUrl(String doi, String url) throws IOException {
httpPost.setHeader("Content-Type", "text/plain;charset=UTF-8");
httpPost.setEntity(new StringEntity("doi=" + doi + "\nurl=" + url, "utf-8"));
- HttpResponse response = httpClient.execute(httpPost, context);
+ HttpResponse response = executeWithRetry(httpPost, "postUrl");
String data = EntityUtils.toString(response.getEntity(), encoding);
if (response.getStatusLine().getStatusCode() != 201) {
String errMsg = "Response from postUrl: " + response.getStatusLine().getStatusCode() + ", " + data;
@@ -124,7 +195,7 @@ public String getMetadata(String doi) {
HttpGet httpGet = new HttpGet(this.url + "/metadata/" + doi);
httpGet.setHeader("Accept", "application/xml");
try {
- HttpResponse response = httpClient.execute(httpGet,context);
+ HttpResponse response = executeWithRetry(httpGet, "getMetadata");
String data = EntityUtils.toString(response.getEntity(), encoding);
if (response.getStatusLine().getStatusCode() != 200) {
String errMsg = "Response from getMetadata: " + response.getStatusLine().getStatusCode() + ", " + data;
@@ -133,7 +204,7 @@ public String getMetadata(String doi) {
}
return data;
} catch (IOException ioe) {
- logger.log(Level.SEVERE, "IOException when get metadata");
+ logger.log(Level.SEVERE, "IOException when get metadata", ioe);
throw new RuntimeException("IOException when get metadata", ioe);
}
}
@@ -147,7 +218,7 @@ public String getMetadata(String doi) {
public boolean testDOIExists(String doi) throws IOException {
HttpGet httpGet = new HttpGet(this.url + "/metadata/" + doi);
httpGet.setHeader("Accept", "application/xml");
- HttpResponse response = httpClient.execute(httpGet, context);
+ HttpResponse response = executeWithRetry(httpGet, "testDOIExists");
if (response.getStatusLine().getStatusCode() != 200) {
EntityUtils.consumeQuietly(response.getEntity());
return false;
@@ -166,7 +237,7 @@ public String postMetadata(String metadata) throws IOException {
HttpPost httpPost = new HttpPost(this.url + "/metadata");
httpPost.setHeader("Content-Type", "application/xml;charset=UTF-8");
httpPost.setEntity(new StringEntity(metadata, "utf-8"));
- HttpResponse response = httpClient.execute(httpPost, context);
+ HttpResponse response = executeWithRetry(httpPost, "postMetadata");
String data = EntityUtils.toString(response.getEntity(), encoding);
if (response.getStatusLine().getStatusCode() != 201) {
String errMsg = "Response from postMetadata: " + response.getStatusLine().getStatusCode() + ", " + data;
@@ -185,7 +256,7 @@ public String postMetadata(String metadata) throws IOException {
public String inactiveDataset(String doi) {
HttpDelete httpDelete = new HttpDelete(this.url + "/metadata/" + doi);
try {
- HttpResponse response = httpClient.execute(httpDelete,context);
+ HttpResponse response = executeWithRetry(httpDelete, "inactiveDataset");
String data = EntityUtils.toString(response.getEntity(), encoding);
if (response.getStatusLine().getStatusCode() != 200) {
String errMsg = "Response code: " + response.getStatusLine().getStatusCode() + ", " + data;
@@ -194,7 +265,7 @@ public String inactiveDataset(String doi) {
}
return data;
} catch (IOException ioe) {
- logger.log(Level.SEVERE, "IOException when inactive dataset");
+ logger.log(Level.SEVERE, "IOException when inactive dataset", ioe);
throw new RuntimeException("IOException when inactive dataset", ioe);
}
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java
index 6f513e30dec..2e86fae610e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java
@@ -235,6 +235,21 @@ public enum FeatureFlags {
* or revoked, at what times, and by whom.
*/
ROLE_ASSIGNMENT_HISTORY("role-assignment-history"),
+
+ /**
+ * Only update a DataCite DOI when needed (for efficiency, lighter load on DataCite).
+ * This flag causes Dataverse to GET the latest metadata from DataCite for a DOI and
+ * comparing it with the current metadata in Dataverse and only sending a following POST
+ * request if needed. This potentially substitutes a read for an unnecessary write at DataCite,
+ * but would result in extra reads when all metadata in Dataverse is new. Setting the flag
+ * to "true" is recommended when using DataCite file DOIs.
+ *
+ * @apiNote Raise flag by setting
+ * "dataverse.feature.only-update-datacite-when-needed"
+ * @since Dataverse 6.9
+ */
+ ONLY_UPDATE_DATACITE_WHEN_NEEDED("only-update-datacite-when-needed"),
+
;
final String flag;