diff --git a/doc/release-notes/11601-pid-fail-tracking.md b/doc/release-notes/11601-pid-fail-tracking.md new file mode 100644 index 00000000000..8a020c18c8e --- /dev/null +++ b/doc/release-notes/11601-pid-fail-tracking.md @@ -0,0 +1,3 @@ +This version of Dataverse includes a new feature flag - ``dataverse.feature.enable-pid-failure-log``. When set, Dataverse will log requests for dataset and file pages via persistentId that fail in monthly log files of the form PIDFailures_.log. These potentially indicate when someone has shared a draft PID without publishing or cases where a '.' or other character has been added to the PID, which may be of interest to site administrators. + +The new log files can be used in concert with the pidreport.py script at https://github.com/gdcc/dataverse-recipes/tree/main/python/pid_reports to generate and email monthly PID failure reports. \ No newline at end of file diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index a6aa44df974..b77b9d45a06 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3800,6 +3800,9 @@ please find all known feature flags below. Any of these flags can be activated u * - add-local-contexts-permission-check - Adds a permission check to ensure that the user calling the /api/localcontexts/datasets/{id} API can edit the dataset with that id. This is currently the only use case - see https://github.com/gdcc/dataverse-external-vocab-support/tree/main/packages/local_contexts. The flag adds additional security to stop other uses, but would currently have to be used in conjunction with the api-session-auth feature flag (the security implications of which have not been fully investigated) to still allow adding Local Contexts metadata to a dataset. - ``Off`` + * - enable-pid-failure-log + - Turns on creation of a monthly log file (logs/PIDFailures_.log) showing failed requests for dataset/file PIDs. Can be used directly or with scripts at https://github.com/gdcc/dataverse-recipes/python/pid_reports to alert admins. + - ``Off`` **Note:** Feature flags can be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_FEATURE_XXX`` (e.g. ``DATAVERSE_FEATURE_API_SESSION_AUTH=1``). These environment variables can be set in your shell before starting Payara. If you are using :doc:`Docker for development `, you can set them in the `docker compose `_ file. diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index f52163192f7..cca9be7ce9d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -19,7 +19,10 @@ import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.harvest.server.OAIRecordServiceBean; +import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean; +import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry; import edu.harvard.iq.dataverse.search.IndexServiceBean; +import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -37,6 +40,8 @@ import jakarta.ejb.Stateless; import jakarta.ejb.TransactionAttribute; import jakarta.ejb.TransactionAttributeType; +import jakarta.faces.context.FacesContext; +import jakarta.inject.Inject; import jakarta.inject.Named; import jakarta.persistence.EntityManager; import jakarta.persistence.NoResultException; @@ -44,6 +49,7 @@ import jakarta.persistence.PersistenceContext; import jakarta.persistence.Query; import jakarta.persistence.TypedQuery; +import jakarta.servlet.http.HttpServletRequest; import org.apache.commons.lang3.StringUtils; /** @@ -86,6 +92,9 @@ public class DatasetServiceBean implements java.io.Serializable { @EJB SystemConfig systemConfig; + + @Inject + FailedPIDResolutionLoggingServiceBean fprLogService; @EJB GlobusServiceBean globusServiceBean; @@ -94,6 +103,8 @@ public class DatasetServiceBean implements java.io.Serializable { UserNotificationServiceBean userNotificationService; private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); + + private static final boolean pidFailureLoggingEnabled = FeatureFlags.ENABLE_PID_FAILURE_LOG.enabled(); @PersistenceContext(unitName = "VDCNet-ejbPU") protected EntityManager em; @@ -308,8 +319,19 @@ public Dataset findByGlobalId(String globalId) { if (retVal != null){ return retVal; } else { - //try to find with alternative PID - return (Dataset) dvObjectService.findByAltGlobalId(globalId, DvObject.DType.Dataset); + // try to find with alternative PID + retVal = (Dataset) dvObjectService.findByAltGlobalId(globalId, DvObject.DType.Dataset); + if (retVal == null && pidFailureLoggingEnabled) { + try { + + HttpServletRequest httpRequest = ((HttpServletRequest) FacesContext.getCurrentInstance().getExternalContext().getRequest()); + FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(globalId, httpRequest.getRequestURI(), httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress()); + fprLogService.logEntry(entry); + } catch (NullPointerException npe) { + // Do nothing - this is an API call with no FacesContext + } + } + return retVal; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 76ef91fbd3a..46e8263da15 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean; + import static edu.harvard.iq.dataverse.api.Datasets.handleVersion; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.DataverseRole; @@ -22,10 +23,13 @@ import edu.harvard.iq.dataverse.engine.command.impl.GetSpecificPublishedDatasetVersionCommand; import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean; import edu.harvard.iq.dataverse.license.LicenseServiceBean; +import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry; import edu.harvard.iq.dataverse.locality.StorageSiteServiceBean; import edu.harvard.iq.dataverse.metrics.MetricsServiceBean; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean; +import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.DateUtil; @@ -37,6 +41,7 @@ import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean; import jakarta.ejb.EJB; import jakarta.ejb.EJBException; +import jakarta.inject.Inject; import jakarta.json.*; import jakarta.json.JsonValue.ValueType; import jakarta.persistence.EntityManager; @@ -232,6 +237,9 @@ String getWrappedMessageWhenJson() { @EJB GuestbookResponseServiceBean gbRespSvc; + @Inject + FailedPIDResolutionLoggingServiceBean fprLogService; + @PersistenceContext(unitName = "VDCNet-ejbPU") protected EntityManager em; @@ -408,8 +416,13 @@ protected Dataset findDatasetOrDie(String id, boolean deep) throws WrappedRespon datasetId = dvObjSvc.findIdByAltGlobalId(globalId, DvObject.DType.Dataset); } if (datasetId == null) { + if (FeatureFlags.ENABLE_PID_FAILURE_LOG.enabled()) { + + FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(persistentId, httpRequest.getRequestURI(), httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress()); + fprLogService.logEntry(entry); + } throw new WrappedResponse( - notFound(BundleUtil.getStringFromBundle("find.dataset.error.dataset_id_is_null", Collections.singletonList(PERSISTENT_ID_KEY.substring(1))))); + notFound(BundleUtil.getStringFromBundle("find.dataset.error.dataset_id_is_null", Collections.singletonList(PERSISTENT_ID_KEY.substring(1))))); } } if (deep) { @@ -479,6 +492,11 @@ protected DataFile findDataFileOrDie(String id) throws WrappedResponse { } datafile = fileService.findByGlobalId(persistentId); if (datafile == null) { + if (FeatureFlags.ENABLE_PID_FAILURE_LOG.enabled()) { + + FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(persistentId, httpRequest.getRequestURI(), httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress()); + fprLogService.logEntry(entry); + } throw new WrappedResponse(notFound(BundleUtil.getStringFromBundle("find.datafile.error.dataset.not.found.persistentId", Collections.singletonList(persistentId)))); } return datafile; diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/FailedPIDResolutionLoggingServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/FailedPIDResolutionLoggingServiceBean.java new file mode 100644 index 00000000000..364a424f819 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/FailedPIDResolutionLoggingServiceBean.java @@ -0,0 +1,161 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package edu.harvard.iq.dataverse.pidproviders; + +import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress; +import edu.harvard.iq.dataverse.batch.util.LoggingUtil; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.sql.Timestamp; +import java.text.SimpleDateFormat; +import java.util.Date; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Named; + +/** + * + * @author qqmyers + */ + +@Named +@RequestScoped +public class FailedPIDResolutionLoggingServiceBean { + + public static final String LOG_HEADER = "#Fields: pid\trequestURI\tHTTP method\tclient_ip\teventTime\n"; + + + public void logEntry(FailedPIDResolutionEntry entry) { + LoggingUtil.saveLogFileAppendWithHeader(entry.toString(), "../logs", getLogFileName(), LOG_HEADER); + } + + public String getLogFileName() { + return "PIDFailures_" + new SimpleDateFormat("yyyy-MM").format(new Timestamp(new Date().getTime())) + ".log"; + } + + public static class FailedPIDResolutionEntry { + + private String eventTime; + private String clientIp; + private String requestUrl; + private String identifier; + private String method; + + public FailedPIDResolutionEntry() { + + } + + public FailedPIDResolutionEntry(String persistentId, String requestURI, String method, IpAddress sourceAddress) { + try { + setIdentifier(URLEncoder.encode(persistentId, StandardCharsets.UTF_8.toString())); + } catch (UnsupportedEncodingException e) { + // Should never happen + e.printStackTrace(); + } + setRequestUrl(requestURI); + setMethod(method); + setClientIp(sourceAddress.toString()); + setEventTime(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ").format(new Timestamp(new Date().getTime()))); + } + + @Override + public String toString() { + return getIdentifier() + "\t" + + getRequestUrl() + "\t" + + getMethod() + "\t" + + getClientIp() + "\t" + + getEventTime() + "\n"; + } + + /** + * @return the eventTime + */ + public String getEventTime() { + if (eventTime == null) { + return "-"; + } + return eventTime; + } + + /** + * @param eventTime + * the eventTime to set + */ + public final void setEventTime(String eventTime) { + this.eventTime = eventTime; + } + + /** + * @return the clientIp + */ + public String getClientIp() { + if (clientIp == null) { + return "-"; + } + return clientIp; + } + + /** + * @param clientIp + * the clientIp to set + */ + public final void setClientIp(String clientIp) { + this.clientIp = clientIp; + } + + /** + * @return the HTTP Method + */ + public String getMethod() { + return method; + } + + /** + * @param method + * - the HTTP Method used + */ + public final void setMethod(String method) { + this.method = method; + } + + /** + * @return the requestUrl + */ + public String getRequestUrl() { + if (requestUrl == null) { + return "-"; + } + return requestUrl; + } + + /** + * @param requestUrl + * the requestUrl to set + */ + public final void setRequestUrl(String requestUrl) { + this.requestUrl = requestUrl; + } + + /** + * @return the identifier + */ + public String getIdentifier() { + if (identifier == null) { + return "-"; + } + return identifier; + } + + /** + * @param identifier + * the identifier to set + */ + public final void setIdentifier(String identifier) { + this.identifier = identifier; + } + + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java index e3042b529e9..ae117159b5c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java @@ -218,6 +218,18 @@ public enum FeatureFlags { * @since Dataverse 6.5 */ ADD_LOCAL_CONTEXTS_PERMISSION_CHECK("add-local-contexts-permission-check"), + + /** + * This flag turns on creation of a monthly log file that tracks when requests for + * datasets/files with PIDs fail due to the PIDs not existing. This helps in catching + * cases where the DOI of a draft dataset has been cited, etc. + * + * @apiNote Raise flag by setting + * "dataverse.feature.enable-pid-failure-log" + * @since Dataverse 6.8 + */ + ENABLE_PID_FAILURE_LOG("enable-pid-failure-log"), + ; final String flag;