Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/release-notes/11601-pid-fail-tracking.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
This version of Dataverse includes a new feature flag - ``dataverse.feature.enable-pid-failure-log``. When set, Dataverse will log requests for dataset and file pages via persistentId that fail in monthly log files of the form PIDFailures_<yyyy-MM>.log. These potentially indicate when someone has shared a draft PID without publishing or cases where a '.' or other character has been added to the PID, which may be of interest to site administrators.

The new log files can be used in concert with the pidreport.py script at https://github.com/gdcc/dataverse-recipes/tree/main/python/pid_reports to generate and email monthly PID failure reports.
3 changes: 3 additions & 0 deletions doc/sphinx-guides/source/installation/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3800,6 +3800,9 @@ please find all known feature flags below. Any of these flags can be activated u
* - add-local-contexts-permission-check
- Adds a permission check to ensure that the user calling the /api/localcontexts/datasets/{id} API can edit the dataset with that id. This is currently the only use case - see https://github.com/gdcc/dataverse-external-vocab-support/tree/main/packages/local_contexts. The flag adds additional security to stop other uses, but would currently have to be used in conjunction with the api-session-auth feature flag (the security implications of which have not been fully investigated) to still allow adding Local Contexts metadata to a dataset.
- ``Off``
* - enable-pid-failure-log
- Turns on creation of a monthly log file (logs/PIDFailures_<yyyy-MM>.log) showing failed requests for dataset/file PIDs. Can be used directly or with scripts at https://github.com/gdcc/dataverse-recipes/python/pid_reports to alert admins.
- ``Off``

**Note:** Feature flags can be set via any `supported MicroProfile Config API source`_, e.g. the environment variable
``DATAVERSE_FEATURE_XXX`` (e.g. ``DATAVERSE_FEATURE_API_SESSION_AUTH=1``). These environment variables can be set in your shell before starting Payara. If you are using :doc:`Docker for development </container/dev-usage>`, you can set them in the `docker compose <https://docs.docker.com/compose/environment-variables/set-environment-variables/>`_ file.
Expand Down
26 changes: 24 additions & 2 deletions src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
import edu.harvard.iq.dataverse.export.ExportService;
import edu.harvard.iq.dataverse.globus.GlobusServiceBean;
import edu.harvard.iq.dataverse.harvest.server.OAIRecordServiceBean;
import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean;
import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry;
import edu.harvard.iq.dataverse.search.IndexServiceBean;
import edu.harvard.iq.dataverse.settings.FeatureFlags;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
Expand All @@ -37,13 +40,16 @@
import jakarta.ejb.Stateless;
import jakarta.ejb.TransactionAttribute;
import jakarta.ejb.TransactionAttributeType;
import jakarta.faces.context.FacesContext;
import jakarta.inject.Inject;
import jakarta.inject.Named;
import jakarta.persistence.EntityManager;
import jakarta.persistence.NoResultException;
import jakarta.persistence.NonUniqueResultException;
import jakarta.persistence.PersistenceContext;
import jakarta.persistence.Query;
import jakarta.persistence.TypedQuery;
import jakarta.servlet.http.HttpServletRequest;
import org.apache.commons.lang3.StringUtils;

/**
Expand Down Expand Up @@ -86,6 +92,9 @@ public class DatasetServiceBean implements java.io.Serializable {

@EJB
SystemConfig systemConfig;

@Inject
FailedPIDResolutionLoggingServiceBean fprLogService;

@EJB
GlobusServiceBean globusServiceBean;
Expand All @@ -94,6 +103,8 @@ public class DatasetServiceBean implements java.io.Serializable {
UserNotificationServiceBean userNotificationService;

private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss");

private static final boolean pidFailureLoggingEnabled = FeatureFlags.ENABLE_PID_FAILURE_LOG.enabled();

@PersistenceContext(unitName = "VDCNet-ejbPU")
protected EntityManager em;
Expand Down Expand Up @@ -308,8 +319,19 @@ public Dataset findByGlobalId(String globalId) {
if (retVal != null){
return retVal;
} else {
//try to find with alternative PID
return (Dataset) dvObjectService.findByAltGlobalId(globalId, DvObject.DType.Dataset);
// try to find with alternative PID
retVal = (Dataset) dvObjectService.findByAltGlobalId(globalId, DvObject.DType.Dataset);
if (retVal == null && pidFailureLoggingEnabled) {
try {

HttpServletRequest httpRequest = ((HttpServletRequest) FacesContext.getCurrentInstance().getExternalContext().getRequest());
FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(globalId, httpRequest.getRequestURI(), httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress());
fprLogService.logEntry(entry);
} catch (NullPointerException npe) {
// Do nothing - this is an API call with no FacesContext
}
}
return retVal;
}
}

Expand Down
20 changes: 19 additions & 1 deletion src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import edu.harvard.iq.dataverse.*;
import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean;

import static edu.harvard.iq.dataverse.api.Datasets.handleVersion;
import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
import edu.harvard.iq.dataverse.authorization.DataverseRole;
Expand All @@ -22,10 +23,13 @@
import edu.harvard.iq.dataverse.engine.command.impl.GetSpecificPublishedDatasetVersionCommand;
import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean;
import edu.harvard.iq.dataverse.license.LicenseServiceBean;
import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean;
import edu.harvard.iq.dataverse.pidproviders.PidUtil;
import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry;
import edu.harvard.iq.dataverse.locality.StorageSiteServiceBean;
import edu.harvard.iq.dataverse.metrics.MetricsServiceBean;
import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean;
import edu.harvard.iq.dataverse.settings.FeatureFlags;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.DateUtil;
Expand All @@ -37,6 +41,7 @@
import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean;
import jakarta.ejb.EJB;
import jakarta.ejb.EJBException;
import jakarta.inject.Inject;
import jakarta.json.*;
import jakarta.json.JsonValue.ValueType;
import jakarta.persistence.EntityManager;
Expand Down Expand Up @@ -232,6 +237,9 @@ String getWrappedMessageWhenJson() {
@EJB
GuestbookResponseServiceBean gbRespSvc;

@Inject
FailedPIDResolutionLoggingServiceBean fprLogService;

@PersistenceContext(unitName = "VDCNet-ejbPU")
protected EntityManager em;

Expand Down Expand Up @@ -408,8 +416,13 @@ protected Dataset findDatasetOrDie(String id, boolean deep) throws WrappedRespon
datasetId = dvObjSvc.findIdByAltGlobalId(globalId, DvObject.DType.Dataset);
}
if (datasetId == null) {
if (FeatureFlags.ENABLE_PID_FAILURE_LOG.enabled()) {

FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(persistentId, httpRequest.getRequestURI(), httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress());
fprLogService.logEntry(entry);
}
throw new WrappedResponse(
notFound(BundleUtil.getStringFromBundle("find.dataset.error.dataset_id_is_null", Collections.singletonList(PERSISTENT_ID_KEY.substring(1)))));
notFound(BundleUtil.getStringFromBundle("find.dataset.error.dataset_id_is_null", Collections.singletonList(PERSISTENT_ID_KEY.substring(1)))));
}
}
if (deep) {
Expand Down Expand Up @@ -479,6 +492,11 @@ protected DataFile findDataFileOrDie(String id) throws WrappedResponse {
}
datafile = fileService.findByGlobalId(persistentId);
if (datafile == null) {
if (FeatureFlags.ENABLE_PID_FAILURE_LOG.enabled()) {

FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(persistentId, httpRequest.getRequestURI(), httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress());
fprLogService.logEntry(entry);
}
throw new WrappedResponse(notFound(BundleUtil.getStringFromBundle("find.datafile.error.dataset.not.found.persistentId", Collections.singletonList(persistentId))));
}
return datafile;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package edu.harvard.iq.dataverse.pidproviders;

import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress;
import edu.harvard.iq.dataverse.batch.util.LoggingUtil;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.sql.Timestamp;
import java.text.SimpleDateFormat;
import java.util.Date;
import jakarta.enterprise.context.RequestScoped;
import jakarta.inject.Named;

/**
*
* @author qqmyers
*/

@Named
@RequestScoped
public class FailedPIDResolutionLoggingServiceBean {

public static final String LOG_HEADER = "#Fields: pid\trequestURI\tHTTP method\tclient_ip\teventTime\n";


public void logEntry(FailedPIDResolutionEntry entry) {
LoggingUtil.saveLogFileAppendWithHeader(entry.toString(), "../logs", getLogFileName(), LOG_HEADER);
}

public String getLogFileName() {
return "PIDFailures_" + new SimpleDateFormat("yyyy-MM").format(new Timestamp(new Date().getTime())) + ".log";
}

public static class FailedPIDResolutionEntry {

private String eventTime;
private String clientIp;
private String requestUrl;
private String identifier;
private String method;

public FailedPIDResolutionEntry() {

}

public FailedPIDResolutionEntry(String persistentId, String requestURI, String method, IpAddress sourceAddress) {
try {
setIdentifier(URLEncoder.encode(persistentId, StandardCharsets.UTF_8.toString()));
} catch (UnsupportedEncodingException e) {
// Should never happen
e.printStackTrace();
}
setRequestUrl(requestURI);
setMethod(method);
setClientIp(sourceAddress.toString());
setEventTime(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ").format(new Timestamp(new Date().getTime())));
}

@Override
public String toString() {
return getIdentifier() + "\t" +
getRequestUrl() + "\t" +
getMethod() + "\t" +
getClientIp() + "\t" +
getEventTime() + "\n";
}

/**
* @return the eventTime
*/
public String getEventTime() {
if (eventTime == null) {
return "-";
}
return eventTime;
}

/**
* @param eventTime
* the eventTime to set
*/
public final void setEventTime(String eventTime) {
this.eventTime = eventTime;
}

/**
* @return the clientIp
*/
public String getClientIp() {
if (clientIp == null) {
return "-";
}
return clientIp;
}

/**
* @param clientIp
* the clientIp to set
*/
public final void setClientIp(String clientIp) {
this.clientIp = clientIp;
}

/**
* @return the HTTP Method
*/
public String getMethod() {
return method;
}

/**
* @param method
* - the HTTP Method used
*/
public final void setMethod(String method) {
this.method = method;
}

/**
* @return the requestUrl
*/
public String getRequestUrl() {
if (requestUrl == null) {
return "-";
}
return requestUrl;
}

/**
* @param requestUrl
* the requestUrl to set
*/
public final void setRequestUrl(String requestUrl) {
this.requestUrl = requestUrl;
}

/**
* @return the identifier
*/
public String getIdentifier() {
if (identifier == null) {
return "-";
}
return identifier;
}

/**
* @param identifier
* the identifier to set
*/
public final void setIdentifier(String identifier) {
this.identifier = identifier;
}

}
}
12 changes: 12 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,18 @@ public enum FeatureFlags {
* @since Dataverse 6.5
*/
ADD_LOCAL_CONTEXTS_PERMISSION_CHECK("add-local-contexts-permission-check"),

/**
* This flag turns on creation of a monthly log file that tracks when requests for
* datasets/files with PIDs fail due to the PIDs not existing. This helps in catching
* cases where the DOI of a draft dataset has been cited, etc.
*
* @apiNote Raise flag by setting
* "dataverse.feature.enable-pid-failure-log"
* @since Dataverse 6.8
*/
ENABLE_PID_FAILURE_LOG("enable-pid-failure-log"),

;

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm just adding this at the end...

https://jenkins.dataverse.org/job/IQSS-Dataverse-Develop-PR/job/PR-11601/3/testReport/junit/edu.harvard.iq.dataverse.api/DataverseFeaturedItemsIT/testCreateFeaturedItemWithBadDvOdbjectIds/ is failing with a 500 error. Can you please merge the latest from develop and see if tests pass?

final String flag;
Expand Down