Skip to content

Commit 1400d57

Browse files
authored
Merge pull request #11189 from GlobalDataverseCommunityConsortium/Metrics-fix_file/monthly
File metrics fixes
2 parents ff8a037 + f7479bf commit 1400d57

File tree

2 files changed

+21
-9
lines changed

2 files changed

+21
-9
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
The /api/info/metrics/files/monthly API call had a bug that resulted in files being counted each time they were published in a new version if those publication events occurred in different months. This resulted in an over-count.
2+
The /api/info/metrics/files and /api/info/metrics/files/toMonth API calls had a bug that resulted in files that were published but no longer in the latest published version as of the specified date (now, or the date entered in the /toMonth variant). This resulted in an under-count.

src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -288,11 +288,18 @@ public JsonArray filesTimeSeries(Dataverse d) {
288288
+ "from (\n"
289289
+ "select min(to_char(COALESCE(releasetime, createtime), 'YYYY-MM')) as date, filemetadata.id as id\n"
290290
+ "from datasetversion, filemetadata\n"
291-
+ "where datasetversion.id=filemetadata.datasetversion_id\n"
292-
+ "and versionstate='RELEASED' \n"
293-
+ "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n"
291+
+ "where datasetversion.id = filemetadata.datasetversion_id\n"
292+
+ "and datasetversion.versionstate = 'RELEASED'\n"
293+
+ "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id = dvobject.id\n"
294294
+ "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n "
295295
+ ((d == null) ? ")" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + "))\n ")
296+
+ "and filemetadata.id = (\n"
297+
+ " select min(fm.id)\n"
298+
+ " from filemetadata fm\n"
299+
+ " join datasetversion dv on dv.id = fm.datasetversion_id\n"
300+
+ " where fm.datafile_id = filemetadata.datafile_id\n"
301+
+ " and dv.versionstate = 'RELEASED'\n"
302+
+ ")\n"
296303
+ "group by filemetadata.id) as subq group by subq.date order by date;");
297304
logger.log(Level.FINE, "Metric query: {0}", query);
298305
List<Object[]> results = query.getResultList();
@@ -314,8 +321,9 @@ public long filesToMonth(String yyyymm, Dataverse d) {
314321
+ "select DISTINCT ON (datasetversion.dataset_id) datasetversion.id \n"
315322
+ "from datasetversion\n"
316323
+ "join dataset on dataset.id = datasetversion.dataset_id\n"
324+
+ "join filemetadata fm on fm.datasetversion_id = datasetversion.id\n"
317325
+ ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
318-
+ "where versionstate='RELEASED'\n"
326+
+ "where datasetversion.versionstate='RELEASED' and filemetadata.datafile_id=fm.datafile_id\n"
319327
+ ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
320328
+ "and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n"
321329
+ "and dataset.harvestingclient_id is null\n"
@@ -353,12 +361,14 @@ public long filesPastDays(int days, Dataverse d) {
353361

354362
public JsonArray filesByType(Dataverse d) {
355363
// SELECT DISTINCT df.contenttype, sum(df.filesize) FROM datafile df, dvObject ob where ob.id = df.id and dob.owner_id< group by df.contenttype
356-
// ToDo - published only?
357364
Query query = em.createNativeQuery("SELECT DISTINCT df.contenttype, count(df.id), coalesce(sum(df.filesize), 0) "
358-
+ " FROM DataFile df, DvObject ob"
359-
+ " where ob.id = df.id "
360-
+ ((d == null) ? "" : "and ob.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ")\n")
361-
+ "group by df.contenttype;");
365+
+ " FROM DataFile df "
366+
+ " JOIN DvObject ob ON ob.id = df.id "
367+
+ " JOIN FileMetadata fm ON fm.datafile_id = df.id "
368+
+ " JOIN DatasetVersion dv ON dv.id = fm.datasetversion_id "
369+
+ " WHERE dv.versionstate = 'RELEASED' "
370+
+ ((d == null) ? "" : "AND ob.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ") ")
371+
+ "GROUP BY df.contenttype;");
362372
JsonArrayBuilder jab = Json.createArrayBuilder();
363373
try {
364374
List<Object[]> results = query.getResultList();

0 commit comments

Comments
 (0)