Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions conf/solr/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@

<field name="license" type="string" stored="true" indexed="true" multiValued="false"/>
<field name="fileCount" type="plong" stored="true" indexed="true" multiValued="false"/>
<field name="datasetCount" type="plong" stored="true" indexed="true" multiValued="false"/>

<!--
METADATA SCHEMA FIELDS
Expand Down
2 changes: 2 additions & 0 deletions doc/release-notes/10190-dataset-count.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
The search index now includes datasetCount for each collection, counting published, linked, and harvested datasets.
Collections can be filtered using datasetCount (e.g., `datasetCount:[1000 TO *]`), and the value is returned in Dataverse search results via the Search API.
2 changes: 1 addition & 1 deletion modules/dataverse-parent/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@

<properties>
<!-- This is a special Maven property name, do not change! -->
<revision>6.7</revision>
<revision>6.7.1</revision>

<target.java.version>17</target.java.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
Expand Down
20 changes: 15 additions & 5 deletions src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -165,21 +165,31 @@ private List<Dataset> findByOwnerId(Long ownerId, boolean onlyPublished) {
}

public List<Long> findIdsByOwnerId(Long ownerId) {
return findIdsByOwnerId(ownerId, false);
return findIdsByOwnerId(ownerId, false, false);
}

private List<Long> findIdsByOwnerId(Long ownerId, boolean onlyPublished) {
public List<Long> findIdsByOwnerId(Long ownerId, boolean onlyPublished, boolean includeHarvested) {
List<Long> retList = new ArrayList<>();
if (!onlyPublished) {
if (!onlyPublished && includeHarvested) {
return em.createNamedQuery("Dataset.findIdByOwnerId")
.setParameter("ownerId", ownerId)
.getResultList();
} else {
List<Dataset> results = em.createNamedQuery("Dataset.findByOwnerId")
.setParameter("ownerId", ownerId).getResultList();
for (Dataset ds : results) {
if (ds.isReleased() && !ds.isDeaccessioned()) {
retList.add(ds.getId());
// For harvested datasets, only add them if includeHarvested is true
if (ds.isHarvested()) {
if (includeHarvested) {
retList.add(ds.getId());
}
// For non-harvested datasets, either
// - add them all (if onlyPublished is false) OR
// - only add them if they are released and not deaccessioned (if onlyPublished is true)
} else {
if (!onlyPublished || (ds.isReleased() && !ds.isDeaccessioned())) {
retList.add(ds.getId());
}
}
}
return retList;
Expand Down
8 changes: 7 additions & 1 deletion src/main/java/edu/harvard/iq/dataverse/Dataverse.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,13 @@
@NamedQuery(name = "Dataverse.filterByAlias", query="SELECT dv FROM Dataverse dv WHERE LOWER(dv.alias) LIKE :alias order by dv.alias"),
@NamedQuery(name = "Dataverse.filterByAliasNameAffiliation", query="SELECT dv FROM Dataverse dv WHERE (LOWER(dv.alias) LIKE :alias) OR (LOWER(dv.name) LIKE :name) OR (LOWER(dv.affiliation) LIKE :affiliation) order by dv.alias"),
@NamedQuery(name = "Dataverse.filterByName", query="SELECT dv FROM Dataverse dv WHERE LOWER(dv.name) LIKE :name order by dv.alias"),
@NamedQuery(name = "Dataverse.countAll", query = "SELECT COUNT(dv) FROM Dataverse dv")
@NamedQuery(name = "Dataverse.countAll", query = "SELECT COUNT(dv) FROM Dataverse dv"),
@NamedQuery(name = "Dataverse.getDatasetCount",
query = "SELECT " +
"(SELECT COUNT(DISTINCT d) FROM Dataset d JOIN d.versions v WHERE d.owner.id IN :ids AND v.versionState = :datasetState) + " +
"(SELECT COUNT(DISTINCT l.dataset) FROM DatasetLinkingDataverse l JOIN l.dataset.versions v WHERE l.linkingDataverse.id IN :ids AND v.versionState = :datasetState) " +
// The WHERE statement is a hacky way of ensuring the count is returned in a single result row
"FROM Dataverse d WHERE d.id = (SELECT MIN(d2.id) FROM Dataverse d2)")
})
@Entity
@Table(indexes = {@Index(columnList="defaultcontributorrole_id")
Expand Down
70 changes: 60 additions & 10 deletions src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import edu.harvard.iq.dataverse.authorization.users.User;
import edu.harvard.iq.dataverse.batch.util.LoggingUtil;
import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
import edu.harvard.iq.dataverse.DatasetVersion.VersionState;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
import edu.harvard.iq.dataverse.search.IndexServiceBean;
import edu.harvard.iq.dataverse.search.SolrIndexServiceBean;
Expand All @@ -26,13 +27,8 @@
import java.io.File;
import java.io.IOException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.logging.Logger;
import java.util.Properties;

import edu.harvard.iq.dataverse.validation.JSONDataValidation;
import jakarta.ejb.EJB;
Expand Down Expand Up @@ -468,8 +464,26 @@ public List<Dataverse> findDataversesThatLinkToThisDvId(long dataverseId) {
return dataverseLinkingService.findLinkingDataverses(dataverseId);
}

public List<Dataset> findDatasetsThisIdHasLinkedTo(long dataverseId, boolean onlyPublished) {
List<Dataset> linkedDatasets = datasetLinkingService.findLinkedDatasets(dataverseId);

if (!onlyPublished) {
return linkedDatasets;
}

List<Dataset> retList = new ArrayList();

for (Dataset ds : linkedDatasets) {
if (ds.isReleased() && !ds.isDeaccessioned()) {
retList.add(ds);
}
}

return retList;
}

public List<Dataset> findDatasetsThisIdHasLinkedTo(long dataverseId) {
return datasetLinkingService.findLinkedDatasets(dataverseId);
return this.findDatasetsThisIdHasLinkedTo(dataverseId, false);
}

public List<Dataverse> findDataversesThatLinkToThisDatasetId(long datasetId) {
Expand Down Expand Up @@ -754,21 +768,25 @@ public List<Long> findAllDataverseDataverseChildren(Long dvId) {

// function to recursively find ids of all children of a dataverse that are
// of type dataset
public List<Long> findAllDataverseDatasetChildren(Long dvId) {
public List<Long> findAllDataverseDatasetChildren(Long dvId, boolean onlyPublished, boolean includeHarvested) {
// get list of Dataverse children
List<Long> dataverseChildren = findIdsByOwnerId(dvId);
// get list of Dataset children
List<Long> datasetChildren = datasetService.findIdsByOwnerId(dvId);
List<Long> datasetChildren = datasetService.findIdsByOwnerId(dvId, onlyPublished, includeHarvested);

if (dataverseChildren == null) {
return datasetChildren;
} else {
for (Long childDvId : dataverseChildren) {
datasetChildren.addAll(findAllDataverseDatasetChildren(childDvId));
datasetChildren.addAll(findAllDataverseDatasetChildren(childDvId, onlyPublished, includeHarvested));
}
return datasetChildren;
}
}

public List<Long> findAllDataverseDatasetChildren(Long dvId) {
return findAllDataverseDatasetChildren(dvId, false, false);
}

public String addRoleAssignmentsToChildren(Dataverse owner, ArrayList<String> rolesToInherit,
boolean inheritAllRoles) {
Expand Down Expand Up @@ -1257,4 +1275,36 @@ public void disableStorageQuota(StorageQuota storageQuota) {
public long getDataverseCount() {
return em.createNamedQuery("Dataverse.countAll", Long.class).getSingleResult();
}

/**
* Returns the total number of published datasets within a Dataverse collection. The number includes harvested and
* linked datasets. Datasets in subcollections are also counted.
* @param dvId ID of a Dataverse collection
* @return the total number of published datasets within that Dataverse collection
*/
public long getDatasetCount(Long dvId) {
Set<Long> dvIds = new HashSet<>();
Deque<Long> stack = new ArrayDeque<>();
dvIds.add(dvId);
stack.push(dvId);

// Collect IDs of all subdataverses
while (!stack.isEmpty()) {
Long currentId = stack.pop();
List<Long> children = em.createQuery("SELECT d.id FROM Dataverse d WHERE d.owner.id = :parentId", Long.class)
.setParameter("parentId", currentId)
.getResultList();

for (Long childId : children) {
if (dvIds.add(childId)) {
stack.push(childId);
}
}
}

return em.createNamedQuery("Dataverse.getDatasetCount", Long.class)
.setParameter("ids", dvIds)
.setParameter("datasetState", VersionState.RELEASED)
.getSingleResult();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetLinkingDataverse;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.authorization.Permission;
import edu.harvard.iq.dataverse.batch.util.LoggingUtil;
import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
Expand All @@ -15,6 +16,8 @@
import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.solr.client.solrj.SolrServerException;

Expand Down Expand Up @@ -42,12 +45,17 @@ public Dataset execute(CommandContext ctxt) throws CommandException {
DatasetLinkingDataverse doomedAndMerged = ctxt.em().merge(doomed);
ctxt.em().remove(doomedAndMerged);

try {
ctxt.index().indexDataverse(doomed.getLinkingDataverse());
} catch (IOException | SolrServerException e) {
String failureLogText = "Post delete linking dataverse indexing failed for Dataverse. ";
failureLogText += "\r\n" + e.getLocalizedMessage();
LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, doomed.getLinkingDataverse());
List<Dataverse> toReindex = new ArrayList<>();
toReindex.add(doomed.getLinkingDataverse());
toReindex.addAll(doomed.getLinkingDataverse().getOwners());
for (Dataverse dv : toReindex) {
try {
ctxt.index().indexDataverse(dv);
} catch (IOException | SolrServerException e) {
String failureLogText = "Post delete linking dataverse indexing failed for Dataverse. ";
failureLogText += "\r\n" + e.getLocalizedMessage();
LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dv);
}
}

return merged;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,13 @@ public class DestroyDatasetCommand extends AbstractVoidCommand {

private List<String> datasetAndFileSolrIdsToDelete;

private Dataverse toReIndex;
private List<Dataverse> toReIndex;

public DestroyDatasetCommand(Dataset doomed, DataverseRequest aRequest) {
super(aRequest, doomed);
this.doomed = doomed;
datasetAndFileSolrIdsToDelete = new ArrayList<>();
toReIndex = new ArrayList<>();
}

@Override
Expand Down Expand Up @@ -116,7 +117,12 @@ protected void executeImpl(CommandContext ctxt) throws CommandException {
}
}

toReIndex = managedDoomed.getOwner();
toReIndex.add(managedDoomed.getOwner());
toReIndex.addAll(managedDoomed.getOwner().getOwners());
managedDoomed.getDatasetLinkingDataverses().forEach(dld -> {
toReIndex.add(dld.getLinkingDataverse());
toReIndex.addAll(dld.getLinkingDataverse().getOwners());
});

// add potential Solr IDs of datasets to list for deletion
String solrIdOfPublishedDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + managedDoomed.getId();
Expand Down Expand Up @@ -145,13 +151,15 @@ public boolean onSuccess(CommandContext ctxt, Object r) {
logger.log(Level.FINE, "Result of attempt to delete dataset and file IDs from the search index: {0}", resultOfSolrDeletionAttempt.getMessage());

// reindex
try {
ctxt.index().indexDataverse(toReIndex);
} catch (IOException | SolrServerException e) {
String failureLogText = "Post-destroy dataset indexing of the owning dataverse failed. You can kickoff a re-index of this dataverse with: \r\n curl http://localhost:8080/api/admin/index/dataverses/" + toReIndex.getId().toString();
failureLogText += "\r\n" + e.getLocalizedMessage();
LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, toReIndex);
retVal = false;
for (Dataverse dv : toReIndex) {
try {
ctxt.index().indexDataverse(dv);
} catch (IOException | SolrServerException e) {
String failureLogText = "Post-destroy dataset indexing of an owning or linking dataverse failed. You can kickoff a re-index of this dataverse with: \r\n curl http://localhost:8080/api/admin/index/dataverses/" + dv.getId().toString();
failureLogText += "\r\n" + e.getLocalizedMessage();
LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dv);
retVal = false;
}
}

return retVal;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,15 @@
import java.awt.datatransfer.StringSelection;
import java.io.IOException;
import java.sql.Timestamp;
import java.util.Date;
import java.util.List;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;

import edu.harvard.iq.dataverse.batch.util.LoggingUtil;
import edu.harvard.iq.dataverse.dataaccess.StorageIO;
import edu.harvard.iq.dataverse.engine.command.Command;
import edu.harvard.iq.dataverse.util.FileUtil;
import java.util.ArrayList;

import java.util.concurrent.Future;

import org.apache.logging.log4j.util.Strings;
Expand All @@ -67,7 +66,7 @@ public class FinalizeDatasetPublicationCommand extends AbstractPublishDatasetCom
*/
final boolean datasetExternallyReleased;

List<Dataverse> dataversesToIndex = new ArrayList<>();
Set<Dataverse> dataversesToIndex = new HashSet<>();

public static final String FILE_VALIDATION_ERROR = "FILE VALIDATION ERROR";

Expand Down Expand Up @@ -209,6 +208,15 @@ public Dataset execute(CommandContext ctxt) throws CommandException {

}

// The owning dataverse plus all dataverses linking to this dataset must be re-indexed to update their
// datasetCount
dataversesToIndex.add(getDataset().getOwner());
dataversesToIndex.addAll(getDataset().getOwner().getOwners());
getDataset().getDatasetLinkingDataverses().forEach(dld -> {
dataversesToIndex.add(dld.getLinkingDataverse());
dataversesToIndex.addAll(dld.getLinkingDataverse().getOwners());
});

List<Command> previouslyCalled = ctxt.getCommandsCalled();

PrivateUrl privateUrl = ctxt.engine().submit(new GetPrivateUrlCommand(getRequest(), theDataset));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
import edu.harvard.iq.dataverse.util.BundleUtil;
import java.io.IOException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.solr.client.solrj.SolrServerException;

/**
Expand Down Expand Up @@ -68,6 +71,20 @@ public boolean onSuccess(CommandContext ctxt, Object r) {

ctxt.index().asyncIndexDataset(dld.getDataset(), true);

List<Dataverse> toReindex = new ArrayList<>();
toReindex.add(dld.getLinkingDataverse());
toReindex.addAll(dld.getLinkingDataverse().getOwners());
for (Dataverse dv : toReindex) {
try {
ctxt.index().indexDataverse(dv);
} catch (IOException | SolrServerException e) {
String failureLogText = "Indexing of linking dataverse failed. You can kickoff a re-index of this dataverse with: \r\n curl http://localhost:8080/api/admin/index/dataverses/" + dv.getId().toString();
failureLogText += "\r\n" + e.getLocalizedMessage();
LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dv);
return false;
}
}

return retVal;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import org.apache.commons.lang3.mutable.MutableBoolean;
import org.apache.solr.client.solrj.SolrServerException;
import org.xml.sax.SAXException;

import io.gdcc.xoai.model.oaipmh.results.Record;
Expand Down Expand Up @@ -191,6 +192,17 @@ public void doHarvest(DataverseRequest dataverseRequest, Long harvestingClientId

hdLogger.log(Level.INFO, String.format("Datasets created/updated: %s, datasets deleted: %s, datasets failed: %s", harvestedDatasetIds.size(), deletedIdentifiers.size(), failedIdentifiers.size()));

// Reindex dataverse to update datasetCount
List<Dataverse> toReindex = new ArrayList<>();
toReindex.add(harvestingClientConfig.getDataverse());
toReindex.addAll(harvestingClientConfig.getDataverse().getOwners());
for (Dataverse dv : toReindex) {
try {
indexService.indexDataverse(dv);
} catch (IOException | SolrServerException e) {
hdLogger.log(Level.SEVERE, "Dataverse indexing failed. You can kickoff a re-index of this dataverse with: \r\n curl http://localhost:8080/api/admin/index/dataverses/" + dv.getId().toString());
}
}
}
} catch (StopHarvestException she) {
hdLogger.log(Level.INFO, "HARVEST INTERRUPTED BY EXTERNAL REQUEST");
Expand Down
Loading
Loading