Skip to content

Commit d5f49b6

Browse files
authored
Merge pull request #11601 from QualitativeDataRepository/PIDFailTracking
Add optional logging of requests for non-existent/non-public PIDs
2 parents 195a584 + f4f55ff commit d5f49b6

File tree

6 files changed

+222
-3
lines changed

6 files changed

+222
-3
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
This version of Dataverse includes a new feature flag - ``dataverse.feature.enable-pid-failure-log``. When set, Dataverse will log requests for dataset and file pages via persistentId that fail in monthly log files of the form PIDFailures_<yyyy-MM>.log. These potentially indicate when someone has shared a draft PID without publishing or cases where a '.' or other character has been added to the PID, which may be of interest to site administrators.
2+
3+
The new log files can be used in concert with the pidreport.py script at https://github.com/gdcc/dataverse-recipes/tree/main/python/pid_reports to generate and email monthly PID failure reports.

doc/sphinx-guides/source/installation/config.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3800,6 +3800,9 @@ please find all known feature flags below. Any of these flags can be activated u
38003800
* - add-local-contexts-permission-check
38013801
- Adds a permission check to ensure that the user calling the /api/localcontexts/datasets/{id} API can edit the dataset with that id. This is currently the only use case - see https://github.com/gdcc/dataverse-external-vocab-support/tree/main/packages/local_contexts. The flag adds additional security to stop other uses, but would currently have to be used in conjunction with the api-session-auth feature flag (the security implications of which have not been fully investigated) to still allow adding Local Contexts metadata to a dataset.
38023802
- ``Off``
3803+
* - enable-pid-failure-log
3804+
- Turns on creation of a monthly log file (logs/PIDFailures_<yyyy-MM>.log) showing failed requests for dataset/file PIDs. Can be used directly or with scripts at https://github.com/gdcc/dataverse-recipes/python/pid_reports to alert admins.
3805+
- ``Off``
38033806

38043807
**Note:** Feature flags can be set via any `supported MicroProfile Config API source`_, e.g. the environment variable
38053808
``DATAVERSE_FEATURE_XXX`` (e.g. ``DATAVERSE_FEATURE_API_SESSION_AUTH=1``). These environment variables can be set in your shell before starting Payara. If you are using :doc:`Docker for development </container/dev-usage>`, you can set them in the `docker compose <https://docs.docker.com/compose/environment-variables/set-environment-variables/>`_ file.

src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@
1919
import edu.harvard.iq.dataverse.export.ExportService;
2020
import edu.harvard.iq.dataverse.globus.GlobusServiceBean;
2121
import edu.harvard.iq.dataverse.harvest.server.OAIRecordServiceBean;
22+
import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean;
23+
import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry;
2224
import edu.harvard.iq.dataverse.search.IndexServiceBean;
25+
import edu.harvard.iq.dataverse.settings.FeatureFlags;
2326
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
2427
import edu.harvard.iq.dataverse.util.BundleUtil;
2528
import edu.harvard.iq.dataverse.util.SystemConfig;
@@ -37,13 +40,16 @@
3740
import jakarta.ejb.Stateless;
3841
import jakarta.ejb.TransactionAttribute;
3942
import jakarta.ejb.TransactionAttributeType;
43+
import jakarta.faces.context.FacesContext;
44+
import jakarta.inject.Inject;
4045
import jakarta.inject.Named;
4146
import jakarta.persistence.EntityManager;
4247
import jakarta.persistence.NoResultException;
4348
import jakarta.persistence.NonUniqueResultException;
4449
import jakarta.persistence.PersistenceContext;
4550
import jakarta.persistence.Query;
4651
import jakarta.persistence.TypedQuery;
52+
import jakarta.servlet.http.HttpServletRequest;
4753
import org.apache.commons.lang3.StringUtils;
4854

4955
/**
@@ -86,6 +92,9 @@ public class DatasetServiceBean implements java.io.Serializable {
8692

8793
@EJB
8894
SystemConfig systemConfig;
95+
96+
@Inject
97+
FailedPIDResolutionLoggingServiceBean fprLogService;
8998

9099
@EJB
91100
GlobusServiceBean globusServiceBean;
@@ -94,6 +103,8 @@ public class DatasetServiceBean implements java.io.Serializable {
94103
UserNotificationServiceBean userNotificationService;
95104

96105
private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss");
106+
107+
private static final boolean pidFailureLoggingEnabled = FeatureFlags.ENABLE_PID_FAILURE_LOG.enabled();
97108

98109
@PersistenceContext(unitName = "VDCNet-ejbPU")
99110
protected EntityManager em;
@@ -308,8 +319,19 @@ public Dataset findByGlobalId(String globalId) {
308319
if (retVal != null){
309320
return retVal;
310321
} else {
311-
//try to find with alternative PID
312-
return (Dataset) dvObjectService.findByAltGlobalId(globalId, DvObject.DType.Dataset);
322+
// try to find with alternative PID
323+
retVal = (Dataset) dvObjectService.findByAltGlobalId(globalId, DvObject.DType.Dataset);
324+
if (retVal == null && pidFailureLoggingEnabled) {
325+
try {
326+
327+
HttpServletRequest httpRequest = ((HttpServletRequest) FacesContext.getCurrentInstance().getExternalContext().getRequest());
328+
FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(globalId, httpRequest.getRequestURI(), httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress());
329+
fprLogService.logEntry(entry);
330+
} catch (NullPointerException npe) {
331+
// Do nothing - this is an API call with no FacesContext
332+
}
333+
}
334+
return retVal;
313335
}
314336
}
315337

src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import edu.harvard.iq.dataverse.*;
44
import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean;
5+
56
import static edu.harvard.iq.dataverse.api.Datasets.handleVersion;
67
import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
78
import edu.harvard.iq.dataverse.authorization.DataverseRole;
@@ -22,10 +23,13 @@
2223
import edu.harvard.iq.dataverse.engine.command.impl.GetSpecificPublishedDatasetVersionCommand;
2324
import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean;
2425
import edu.harvard.iq.dataverse.license.LicenseServiceBean;
26+
import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean;
2527
import edu.harvard.iq.dataverse.pidproviders.PidUtil;
28+
import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry;
2629
import edu.harvard.iq.dataverse.locality.StorageSiteServiceBean;
2730
import edu.harvard.iq.dataverse.metrics.MetricsServiceBean;
2831
import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean;
32+
import edu.harvard.iq.dataverse.settings.FeatureFlags;
2933
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
3034
import edu.harvard.iq.dataverse.util.BundleUtil;
3135
import edu.harvard.iq.dataverse.util.DateUtil;
@@ -37,6 +41,7 @@
3741
import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean;
3842
import jakarta.ejb.EJB;
3943
import jakarta.ejb.EJBException;
44+
import jakarta.inject.Inject;
4045
import jakarta.json.*;
4146
import jakarta.json.JsonValue.ValueType;
4247
import jakarta.persistence.EntityManager;
@@ -232,6 +237,9 @@ String getWrappedMessageWhenJson() {
232237
@EJB
233238
GuestbookResponseServiceBean gbRespSvc;
234239

240+
@Inject
241+
FailedPIDResolutionLoggingServiceBean fprLogService;
242+
235243
@PersistenceContext(unitName = "VDCNet-ejbPU")
236244
protected EntityManager em;
237245

@@ -408,8 +416,13 @@ protected Dataset findDatasetOrDie(String id, boolean deep) throws WrappedRespon
408416
datasetId = dvObjSvc.findIdByAltGlobalId(globalId, DvObject.DType.Dataset);
409417
}
410418
if (datasetId == null) {
419+
if (FeatureFlags.ENABLE_PID_FAILURE_LOG.enabled()) {
420+
421+
FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(persistentId, httpRequest.getRequestURI(), httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress());
422+
fprLogService.logEntry(entry);
423+
}
411424
throw new WrappedResponse(
412-
notFound(BundleUtil.getStringFromBundle("find.dataset.error.dataset_id_is_null", Collections.singletonList(PERSISTENT_ID_KEY.substring(1)))));
425+
notFound(BundleUtil.getStringFromBundle("find.dataset.error.dataset_id_is_null", Collections.singletonList(PERSISTENT_ID_KEY.substring(1)))));
413426
}
414427
}
415428
if (deep) {
@@ -479,6 +492,11 @@ protected DataFile findDataFileOrDie(String id) throws WrappedResponse {
479492
}
480493
datafile = fileService.findByGlobalId(persistentId);
481494
if (datafile == null) {
495+
if (FeatureFlags.ENABLE_PID_FAILURE_LOG.enabled()) {
496+
497+
FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(persistentId, httpRequest.getRequestURI(), httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress());
498+
fprLogService.logEntry(entry);
499+
}
482500
throw new WrappedResponse(notFound(BundleUtil.getStringFromBundle("find.datafile.error.dataset.not.found.persistentId", Collections.singletonList(persistentId))));
483501
}
484502
return datafile;
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
/*
2+
* To change this license header, choose License Headers in Project Properties.
3+
* To change this template file, choose Tools | Templates
4+
* and open the template in the editor.
5+
*/
6+
package edu.harvard.iq.dataverse.pidproviders;
7+
8+
import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress;
9+
import edu.harvard.iq.dataverse.batch.util.LoggingUtil;
10+
import java.io.UnsupportedEncodingException;
11+
import java.net.URLEncoder;
12+
import java.nio.charset.StandardCharsets;
13+
import java.sql.Timestamp;
14+
import java.text.SimpleDateFormat;
15+
import java.util.Date;
16+
import jakarta.enterprise.context.RequestScoped;
17+
import jakarta.inject.Named;
18+
19+
/**
20+
*
21+
* @author qqmyers
22+
*/
23+
24+
@Named
25+
@RequestScoped
26+
public class FailedPIDResolutionLoggingServiceBean {
27+
28+
public static final String LOG_HEADER = "#Fields: pid\trequestURI\tHTTP method\tclient_ip\teventTime\n";
29+
30+
31+
public void logEntry(FailedPIDResolutionEntry entry) {
32+
LoggingUtil.saveLogFileAppendWithHeader(entry.toString(), "../logs", getLogFileName(), LOG_HEADER);
33+
}
34+
35+
public String getLogFileName() {
36+
return "PIDFailures_" + new SimpleDateFormat("yyyy-MM").format(new Timestamp(new Date().getTime())) + ".log";
37+
}
38+
39+
public static class FailedPIDResolutionEntry {
40+
41+
private String eventTime;
42+
private String clientIp;
43+
private String requestUrl;
44+
private String identifier;
45+
private String method;
46+
47+
public FailedPIDResolutionEntry() {
48+
49+
}
50+
51+
public FailedPIDResolutionEntry(String persistentId, String requestURI, String method, IpAddress sourceAddress) {
52+
try {
53+
setIdentifier(URLEncoder.encode(persistentId, StandardCharsets.UTF_8.toString()));
54+
} catch (UnsupportedEncodingException e) {
55+
// Should never happen
56+
e.printStackTrace();
57+
}
58+
setRequestUrl(requestURI);
59+
setMethod(method);
60+
setClientIp(sourceAddress.toString());
61+
setEventTime(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ").format(new Timestamp(new Date().getTime())));
62+
}
63+
64+
@Override
65+
public String toString() {
66+
return getIdentifier() + "\t" +
67+
getRequestUrl() + "\t" +
68+
getMethod() + "\t" +
69+
getClientIp() + "\t" +
70+
getEventTime() + "\n";
71+
}
72+
73+
/**
74+
* @return the eventTime
75+
*/
76+
public String getEventTime() {
77+
if (eventTime == null) {
78+
return "-";
79+
}
80+
return eventTime;
81+
}
82+
83+
/**
84+
* @param eventTime
85+
* the eventTime to set
86+
*/
87+
public final void setEventTime(String eventTime) {
88+
this.eventTime = eventTime;
89+
}
90+
91+
/**
92+
* @return the clientIp
93+
*/
94+
public String getClientIp() {
95+
if (clientIp == null) {
96+
return "-";
97+
}
98+
return clientIp;
99+
}
100+
101+
/**
102+
* @param clientIp
103+
* the clientIp to set
104+
*/
105+
public final void setClientIp(String clientIp) {
106+
this.clientIp = clientIp;
107+
}
108+
109+
/**
110+
* @return the HTTP Method
111+
*/
112+
public String getMethod() {
113+
return method;
114+
}
115+
116+
/**
117+
* @param method
118+
* - the HTTP Method used
119+
*/
120+
public final void setMethod(String method) {
121+
this.method = method;
122+
}
123+
124+
/**
125+
* @return the requestUrl
126+
*/
127+
public String getRequestUrl() {
128+
if (requestUrl == null) {
129+
return "-";
130+
}
131+
return requestUrl;
132+
}
133+
134+
/**
135+
* @param requestUrl
136+
* the requestUrl to set
137+
*/
138+
public final void setRequestUrl(String requestUrl) {
139+
this.requestUrl = requestUrl;
140+
}
141+
142+
/**
143+
* @return the identifier
144+
*/
145+
public String getIdentifier() {
146+
if (identifier == null) {
147+
return "-";
148+
}
149+
return identifier;
150+
}
151+
152+
/**
153+
* @param identifier
154+
* the identifier to set
155+
*/
156+
public final void setIdentifier(String identifier) {
157+
this.identifier = identifier;
158+
}
159+
160+
}
161+
}

src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,18 @@ public enum FeatureFlags {
218218
* @since Dataverse 6.5
219219
*/
220220
ADD_LOCAL_CONTEXTS_PERMISSION_CHECK("add-local-contexts-permission-check"),
221+
222+
/**
223+
* This flag turns on creation of a monthly log file that tracks when requests for
224+
* datasets/files with PIDs fail due to the PIDs not existing. This helps in catching
225+
* cases where the DOI of a draft dataset has been cited, etc.
226+
*
227+
* @apiNote Raise flag by setting
228+
* "dataverse.feature.enable-pid-failure-log"
229+
* @since Dataverse 6.8
230+
*/
231+
ENABLE_PID_FAILURE_LOG("enable-pid-failure-log"),
232+
221233
;
222234

223235
final String flag;

0 commit comments

Comments
 (0)