Skip to content

Commit ecebf5a

Browse files
committed
Failed PID logging
1 parent 1c31486 commit ecebf5a

File tree

5 files changed

+216
-3
lines changed

5 files changed

+216
-3
lines changed

doc/sphinx-guides/source/installation/config.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3740,6 +3740,12 @@ please find all known feature flags below. Any of these flags can be activated u
37403740
* - enable-version-note
37413741
- Turns on the ability to add/view/edit/delete per-dataset-version notes intended to provide :ref:`provenance` information about why the dataset/version was created.
37423742
- ``Off``
3743+
* - add-local-contexts-permission-check
3744+
- Adds a permission check to ensure that the user calling the /api/localcontexts/datasets/{id} API can edit the dataset with that id. This is currently the only use case - see https://github.com/gdcc/dataverse-external-vocab-support/tree/main/packages/local_contexts. The flag adds additional security to stop other uses, but would currently have to be used in conjunction with the api-session-auth feature flag (the security implications of which have not been fully investigated) to still allow adding Local Contexts metadata to a dataset.
3745+
- ``Off``
3746+
* - enable-pid-failure-log
3747+
- Turns on creation of a monthly log file (logs/PIDFailures_<yyyy-MM>.log) showing failed requests for dataset/file PIDs. Can be used directly or with scripts at https://github.com/gdcc/dataverse-recipes/python/pid_reports to alert admins.
3748+
- ``Off``
37433749

37443750
**Note:** Feature flags can be set via any `supported MicroProfile Config API source`_, e.g. the environment variable
37453751
``DATAVERSE_FEATURE_XXX`` (e.g. ``DATAVERSE_FEATURE_API_SESSION_AUTH=1``). These environment variables can be set in your shell before starting Payara. If you are using :doc:`Docker for development </container/dev-usage>`, you can set them in the `docker compose <https://docs.docker.com/compose/environment-variables/set-environment-variables/>`_ file.

src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@
1919
import edu.harvard.iq.dataverse.export.ExportService;
2020
import edu.harvard.iq.dataverse.globus.GlobusServiceBean;
2121
import edu.harvard.iq.dataverse.harvest.server.OAIRecordServiceBean;
22+
import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean;
23+
import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry;
2224
import edu.harvard.iq.dataverse.search.IndexServiceBean;
25+
import edu.harvard.iq.dataverse.settings.FeatureFlags;
2326
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
2427
import edu.harvard.iq.dataverse.util.BundleUtil;
2528
import edu.harvard.iq.dataverse.util.SystemConfig;
@@ -37,13 +40,16 @@
3740
import jakarta.ejb.Stateless;
3841
import jakarta.ejb.TransactionAttribute;
3942
import jakarta.ejb.TransactionAttributeType;
43+
import jakarta.faces.context.FacesContext;
44+
import jakarta.inject.Inject;
4045
import jakarta.inject.Named;
4146
import jakarta.persistence.EntityManager;
4247
import jakarta.persistence.NoResultException;
4348
import jakarta.persistence.NonUniqueResultException;
4449
import jakarta.persistence.PersistenceContext;
4550
import jakarta.persistence.Query;
4651
import jakarta.persistence.TypedQuery;
52+
import jakarta.servlet.http.HttpServletRequest;
4753
import org.apache.commons.lang3.StringUtils;
4854

4955
/**
@@ -86,6 +92,9 @@ public class DatasetServiceBean implements java.io.Serializable {
8692

8793
@EJB
8894
SystemConfig systemConfig;
95+
96+
@Inject
97+
FailedPIDResolutionLoggingServiceBean fprLogService;
8998

9099
@EJB
91100
GlobusServiceBean globusServiceBean;
@@ -94,6 +103,8 @@ public class DatasetServiceBean implements java.io.Serializable {
94103
UserNotificationServiceBean userNotificationService;
95104

96105
private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss");
106+
107+
private static final boolean pidFailureLoggingEnabled = FeatureFlags.ENABLE_PID_FAILURE_LOG.enabled();
97108

98109
@PersistenceContext(unitName = "VDCNet-ejbPU")
99110
protected EntityManager em;
@@ -298,8 +309,19 @@ public Dataset findByGlobalId(String globalId) {
298309
if (retVal != null){
299310
return retVal;
300311
} else {
301-
//try to find with alternative PID
302-
return (Dataset) dvObjectService.findByAltGlobalId(globalId, DvObject.DType.Dataset);
312+
// try to find with alternative PID
313+
retVal = (Dataset) dvObjectService.findByAltGlobalId(globalId, DvObject.DType.Dataset);
314+
if (retVal == null && pidFailureLoggingEnabled) {
315+
try {
316+
317+
HttpServletRequest httpRequest = ((HttpServletRequest) FacesContext.getCurrentInstance().getExternalContext().getRequest());
318+
FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(globalId, httpRequest.getRequestURI(), httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress());
319+
fprLogService.logEntry(entry);
320+
} catch (NullPointerException npe) {
321+
// Do nothing - this is an API call with no FacesContext
322+
}
323+
}
324+
return retVal;
303325
}
304326
}
305327

src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import edu.harvard.iq.dataverse.*;
44
import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean;
5+
56
import static edu.harvard.iq.dataverse.api.Datasets.handleVersion;
67
import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
78
import edu.harvard.iq.dataverse.authorization.DataverseRole;
@@ -22,10 +23,13 @@
2223
import edu.harvard.iq.dataverse.engine.command.impl.GetSpecificPublishedDatasetVersionCommand;
2324
import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean;
2425
import edu.harvard.iq.dataverse.license.LicenseServiceBean;
26+
import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean;
2527
import edu.harvard.iq.dataverse.pidproviders.PidUtil;
28+
import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry;
2629
import edu.harvard.iq.dataverse.locality.StorageSiteServiceBean;
2730
import edu.harvard.iq.dataverse.metrics.MetricsServiceBean;
2831
import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean;
32+
import edu.harvard.iq.dataverse.settings.FeatureFlags;
2933
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
3034
import edu.harvard.iq.dataverse.util.BundleUtil;
3135
import edu.harvard.iq.dataverse.util.FileUtil;
@@ -36,6 +40,7 @@
3640
import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean;
3741
import jakarta.ejb.EJB;
3842
import jakarta.ejb.EJBException;
43+
import jakarta.inject.Inject;
3944
import jakarta.json.*;
4045
import jakarta.json.JsonValue.ValueType;
4146
import jakarta.persistence.EntityManager;
@@ -230,6 +235,9 @@ String getWrappedMessageWhenJson() {
230235
@EJB
231236
GuestbookResponseServiceBean gbRespSvc;
232237

238+
@Inject
239+
FailedPIDResolutionLoggingServiceBean fprLogService;
240+
233241
@PersistenceContext(unitName = "VDCNet-ejbPU")
234242
protected EntityManager em;
235243

@@ -405,7 +413,9 @@ protected Dataset findDatasetOrDie(String id, boolean deep) throws WrappedRespon
405413
if (datasetId == null) {
406414
datasetId = dvObjSvc.findIdByAltGlobalId(globalId, DvObject.DType.Dataset);
407415
}
408-
if (datasetId == null) {
416+
if (datasetId == null && FeatureFlags.ENABLE_PID_FAILURE_LOG.enabled()) {
417+
FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(persistentId, httpRequest.getRequestURI(),httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress());
418+
fprLogService.logEntry(entry);
409419
throw new WrappedResponse(
410420
notFound(BundleUtil.getStringFromBundle("find.dataset.error.dataset_id_is_null", Collections.singletonList(PERSISTENT_ID_KEY.substring(1)))));
411421
}
@@ -465,6 +475,8 @@ protected DataFile findDataFileOrDie(String id) throws WrappedResponse {
465475
}
466476
datafile = fileService.findByGlobalId(persistentId);
467477
if (datafile == null) {
478+
FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(persistentId, httpRequest.getRequestURI(),httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress());
479+
fprLogService.logEntry(entry);
468480
throw new WrappedResponse(notFound(BundleUtil.getStringFromBundle("find.datafile.error.dataset.not.found.persistentId", Collections.singletonList(persistentId))));
469481
}
470482
return datafile;
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
/*
2+
* To change this license header, choose License Headers in Project Properties.
3+
* To change this template file, choose Tools | Templates
4+
* and open the template in the editor.
5+
*/
6+
package edu.harvard.iq.dataverse.pidproviders;
7+
8+
import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress;
9+
import edu.harvard.iq.dataverse.batch.util.LoggingUtil;
10+
import java.io.UnsupportedEncodingException;
11+
import java.net.URLEncoder;
12+
import java.nio.charset.StandardCharsets;
13+
import java.sql.Timestamp;
14+
import java.text.SimpleDateFormat;
15+
import java.util.Date;
16+
import jakarta.enterprise.context.RequestScoped;
17+
import jakarta.inject.Named;
18+
19+
/**
20+
*
21+
* @author qqmyers
22+
*/
23+
24+
@Named
25+
@RequestScoped
26+
public class FailedPIDResolutionLoggingServiceBean {
27+
28+
public static final String LOG_HEADER = "#Fields: pid\trequestURI\tHTTP method\tclient_ip\teventTime\n";
29+
30+
31+
public void logEntry(FailedPIDResolutionEntry entry) {
32+
LoggingUtil.saveLogFileAppendWithHeader(entry.toString(), "../logs", getLogFileName(), LOG_HEADER);
33+
}
34+
35+
public String getLogFileName() {
36+
return "PIDFailures_" + new SimpleDateFormat("yyyy-MM").format(new Timestamp(new Date().getTime())) + ".log";
37+
}
38+
39+
public static class FailedPIDResolutionEntry {
40+
41+
private String eventTime;
42+
private String clientIp;
43+
private String requestUrl;
44+
private String identifier;
45+
private String method;
46+
47+
public FailedPIDResolutionEntry() {
48+
49+
}
50+
51+
public FailedPIDResolutionEntry(String persistentId, String requestURI, String method, IpAddress sourceAddress) {
52+
try {
53+
setIdentifier(URLEncoder.encode(persistentId, StandardCharsets.UTF_8.toString()));
54+
} catch (UnsupportedEncodingException e) {
55+
// Should never happen
56+
e.printStackTrace();
57+
}
58+
setRequestUrl(requestURI);
59+
setMethod(method);
60+
setClientIp(sourceAddress.toString());
61+
setEventTime(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ").format(new Timestamp(new Date().getTime())));
62+
}
63+
64+
@Override
65+
public String toString() {
66+
return getIdentifier() + "\t" +
67+
getRequestUrl() + "\t" +
68+
getMethod() + "\t" +
69+
getClientIp() + "\t" +
70+
getEventTime() + "\n";
71+
}
72+
73+
/**
74+
* @return the eventTime
75+
*/
76+
public String getEventTime() {
77+
if (eventTime == null) {
78+
return "-";
79+
}
80+
return eventTime;
81+
}
82+
83+
/**
84+
* @param eventTime
85+
* the eventTime to set
86+
*/
87+
public final void setEventTime(String eventTime) {
88+
this.eventTime = eventTime;
89+
}
90+
91+
/**
92+
* @return the clientIp
93+
*/
94+
public String getClientIp() {
95+
if (clientIp == null) {
96+
return "-";
97+
}
98+
return clientIp;
99+
}
100+
101+
/**
102+
* @param clientIp
103+
* the clientIp to set
104+
*/
105+
public final void setClientIp(String clientIp) {
106+
this.clientIp = clientIp;
107+
}
108+
109+
/**
110+
* @return the HTTP Method
111+
*/
112+
public String getMethod() {
113+
return method;
114+
}
115+
116+
/**
117+
* @param method
118+
* - the HTTP Method used
119+
*/
120+
public final void setMethod(String method) {
121+
this.method = method;
122+
}
123+
124+
/**
125+
* @return the requestUrl
126+
*/
127+
public String getRequestUrl() {
128+
if (requestUrl == null) {
129+
return "-";
130+
}
131+
return requestUrl;
132+
}
133+
134+
/**
135+
* @param requestUrl
136+
* the requestUrl to set
137+
*/
138+
public final void setRequestUrl(String requestUrl) {
139+
this.requestUrl = requestUrl;
140+
}
141+
142+
/**
143+
* @return the identifier
144+
*/
145+
public String getIdentifier() {
146+
if (identifier == null) {
147+
return "-";
148+
}
149+
return identifier;
150+
}
151+
152+
/**
153+
* @param identifier
154+
* the identifier to set
155+
*/
156+
public final void setIdentifier(String identifier) {
157+
this.identifier = identifier;
158+
}
159+
160+
}
161+
}

src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,18 @@ public enum FeatureFlags {
166166
* @since Dataverse 6.5
167167
*/
168168
ADD_LOCAL_CONTEXTS_PERMISSION_CHECK("add-local-contexts-permission-check"),
169+
170+
/**
171+
* This flag turns on creation of a monthly log file that tracks when requests for
172+
* datasets/files with PIDs fail due to the PIDs not existing. This helps in catching
173+
* cases where the DOI of a draft dataset has been cited, etc.
174+
*
175+
* @apiNote Raise flag by setting
176+
* "dataverse.feature.enable-pid-failure-log"
177+
* @since Dataverse 6.8
178+
*/
179+
ENABLE_PID_FAILURE_LOG("enable-pid-failure-log"),
180+
169181

170182
;
171183

0 commit comments

Comments
 (0)