Skip to content

Commit 9bda7dd

Browse files
authored
Merge pull request #10383 from IQSS/solr-date-sort-optimization
Resolve performance issues with large datasets
2 parents 5d05e6a + d1b73cf commit 9bda7dd

File tree

5 files changed

+82
-61
lines changed

5 files changed

+82
-61
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
For scenarios involving API calls related to large datasets (Numerous files, for example: ~10k) it has been optimized:
2+
3+
- The search API endpoint.
4+
- The permission checking logic present in PermissionServiceBean.

src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,16 @@ public enum FileDownloadSizeMode {
4242
All, Original, Archival
4343
}
4444

45+
/**
46+
* Given a DatasetVersion, returns its total file metadata count
47+
*
48+
* @param datasetVersion the DatasetVersion to access
49+
* @return long value of total file metadata count
50+
*/
51+
public long getFileMetadataCount(DatasetVersion datasetVersion) {
52+
return getFileMetadataCount(datasetVersion, new FileSearchCriteria(null, null, null, null, null));
53+
}
54+
4555
/**
4656
* Given a DatasetVersion, returns its total file metadata count
4757
*
@@ -189,6 +199,32 @@ public long getFilesDownloadSize(DatasetVersion datasetVersion, FileSearchCriter
189199
};
190200
}
191201

202+
/**
203+
* Determines whether or not a DataFile is present in a DatasetVersion
204+
*
205+
* @param datasetVersion the DatasetVersion to check
206+
* @param dataFile the DataFile to check
207+
* @return boolean value
208+
*/
209+
public boolean isDataFilePresentInDatasetVersion(DatasetVersion datasetVersion, DataFile dataFile) {
210+
CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder();
211+
CriteriaQuery<Long> criteriaQuery = criteriaBuilder.createQuery(Long.class);
212+
Root<DataFile> dataFileRoot = criteriaQuery.from(DataFile.class);
213+
Root<FileMetadata> fileMetadataRoot = criteriaQuery.from(FileMetadata.class);
214+
Root<DatasetVersion> datasetVersionRoot = criteriaQuery.from(DatasetVersion.class);
215+
criteriaQuery
216+
.select(criteriaBuilder.count(dataFileRoot))
217+
.where(criteriaBuilder.and(
218+
criteriaBuilder.equal(dataFileRoot.get("id"), dataFile.getId()),
219+
criteriaBuilder.equal(datasetVersionRoot.get("id"), datasetVersion.getId()),
220+
fileMetadataRoot.in(dataFileRoot.get("fileMetadatas")),
221+
fileMetadataRoot.in(datasetVersionRoot.get("fileMetadatas"))
222+
)
223+
);
224+
Long count = em.createQuery(criteriaQuery).getSingleResult();
225+
return count != null && count > 0;
226+
}
227+
192228
private void addAccessStatusCountToTotal(DatasetVersion datasetVersion, Map<FileAccessStatus, Long> totalCounts, FileAccessStatus dataFileAccessStatus, FileSearchCriteria searchCriteria) {
193229
long fileMetadataCount = getFileMetadataCountByAccessStatus(datasetVersion, dataFileAccessStatus, searchCriteria);
194230
if (fileMetadataCount > 0) {

src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ public class PermissionServiceBean {
9797
@Inject
9898
DataverseRequestServiceBean dvRequestService;
9999

100+
@Inject
101+
DatasetVersionFilesServiceBean datasetVersionFilesServiceBean;
102+
100103
/**
101104
* A request-level permission query (e.g includes IP ras).
102105
*/
@@ -442,23 +445,14 @@ private Set<Permission> getInferredPermissions(DvObject dvo) {
442445
* download permission for everybody:
443446
*/
444447
private boolean isPublicallyDownloadable(DvObject dvo) {
445-
if (dvo instanceof DataFile) {
448+
if (dvo instanceof DataFile df) {
446449
// unrestricted files that are part of a release dataset
447450
// automatically get download permission for everybody:
448451
// -- L.A. 4.0 beta12
449-
450-
DataFile df = (DataFile) dvo;
451-
452452
if (!df.isRestricted()) {
453-
if (df.getOwner().getReleasedVersion() != null) {
454-
List<FileMetadata> fileMetadatas = df.getOwner().getReleasedVersion().getFileMetadatas();
455-
if (fileMetadatas != null) {
456-
for (FileMetadata fm : fileMetadatas) {
457-
if (df.equals(fm.getDataFile())) {
458-
return true;
459-
}
460-
}
461-
}
453+
DatasetVersion releasedVersion = df.getOwner().getReleasedVersion();
454+
if (releasedVersion != null) {
455+
return datasetVersionFilesServiceBean.isDataFilePresentInDatasetVersion(releasedVersion, df);
462456
}
463457
}
464458
}

src/main/java/edu/harvard/iq/dataverse/api/Search.java

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
package edu.harvard.iq.dataverse.api;
22

3-
import edu.harvard.iq.dataverse.Dataverse;
3+
import edu.harvard.iq.dataverse.*;
44
import edu.harvard.iq.dataverse.api.auth.AuthRequired;
55
import edu.harvard.iq.dataverse.search.SearchFields;
6-
import edu.harvard.iq.dataverse.DataverseServiceBean;
7-
import edu.harvard.iq.dataverse.DvObjectServiceBean;
86
import edu.harvard.iq.dataverse.search.FacetCategory;
97
import edu.harvard.iq.dataverse.search.FacetLabel;
108
import edu.harvard.iq.dataverse.search.SolrSearchResult;
@@ -16,7 +14,6 @@
1614
import edu.harvard.iq.dataverse.search.SearchConstants;
1715
import edu.harvard.iq.dataverse.search.SearchException;
1816
import edu.harvard.iq.dataverse.search.SearchUtil;
19-
import edu.harvard.iq.dataverse.search.SolrIndexServiceBean;
2017
import edu.harvard.iq.dataverse.search.SortBy;
2118
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
2219
import java.io.IOException;
@@ -26,6 +23,7 @@
2623
import java.util.Map;
2724
import java.util.logging.Logger;
2825
import jakarta.ejb.EJB;
26+
import jakarta.inject.Inject;
2927
import jakarta.json.Json;
3028
import jakarta.json.JsonArrayBuilder;
3129
import jakarta.json.JsonObjectBuilder;
@@ -51,10 +49,8 @@ public class Search extends AbstractApiBean {
5149
SearchServiceBean searchService;
5250
@EJB
5351
DataverseServiceBean dataverseService;
54-
@EJB
55-
DvObjectServiceBean dvObjectService;
56-
@EJB
57-
SolrIndexServiceBean SolrIndexService;
52+
@Inject
53+
DatasetVersionFilesServiceBean datasetVersionFilesServiceBean;
5854

5955
@GET
6056
@AuthRequired
@@ -179,39 +175,40 @@ public Response search(
179175
JsonArrayBuilder itemsArrayBuilder = Json.createArrayBuilder();
180176
List<SolrSearchResult> solrSearchResults = solrQueryResponse.getSolrSearchResults();
181177
for (SolrSearchResult solrSearchResult : solrSearchResults) {
182-
itemsArrayBuilder.add(solrSearchResult.toJsonObject(showRelevance, showEntityIds, showApiUrls, metadataFields));
178+
itemsArrayBuilder.add(solrSearchResult.json(showRelevance, showEntityIds, showApiUrls, metadataFields, getDatasetFileCount(solrSearchResult)));
183179
}
184180

185181
JsonObjectBuilder spelling_alternatives = Json.createObjectBuilder();
186182
for (Map.Entry<String, List<String>> entry : solrQueryResponse.getSpellingSuggestionsByToken().entrySet()) {
187183
spelling_alternatives.add(entry.getKey(), entry.getValue().toString());
188184
}
189185

190-
JsonArrayBuilder facets = Json.createArrayBuilder();
191-
JsonObjectBuilder facetCategoryBuilder = Json.createObjectBuilder();
192-
for (FacetCategory facetCategory : solrQueryResponse.getFacetCategoryList()) {
193-
JsonObjectBuilder facetCategoryBuilderFriendlyPlusData = Json.createObjectBuilder();
194-
JsonArrayBuilder facetLabelBuilderData = Json.createArrayBuilder();
195-
for (FacetLabel facetLabel : facetCategory.getFacetLabel()) {
196-
JsonObjectBuilder countBuilder = Json.createObjectBuilder();
197-
countBuilder.add(facetLabel.getName(), facetLabel.getCount());
198-
facetLabelBuilderData.add(countBuilder);
199-
}
200-
facetCategoryBuilderFriendlyPlusData.add("friendly", facetCategory.getFriendlyName());
201-
facetCategoryBuilderFriendlyPlusData.add("labels", facetLabelBuilderData);
202-
facetCategoryBuilder.add(facetCategory.getName(), facetCategoryBuilderFriendlyPlusData);
203-
}
204-
facets.add(facetCategoryBuilder);
205-
206186
JsonObjectBuilder value = Json.createObjectBuilder()
207187
.add("q", query)
208188
.add("total_count", solrQueryResponse.getNumResultsFound())
209189
.add("start", solrQueryResponse.getResultsStart())
210190
.add("spelling_alternatives", spelling_alternatives)
211191
.add("items", itemsArrayBuilder.build());
192+
212193
if (showFacets) {
194+
JsonArrayBuilder facets = Json.createArrayBuilder();
195+
JsonObjectBuilder facetCategoryBuilder = Json.createObjectBuilder();
196+
for (FacetCategory facetCategory : solrQueryResponse.getFacetCategoryList()) {
197+
JsonObjectBuilder facetCategoryBuilderFriendlyPlusData = Json.createObjectBuilder();
198+
JsonArrayBuilder facetLabelBuilderData = Json.createArrayBuilder();
199+
for (FacetLabel facetLabel : facetCategory.getFacetLabel()) {
200+
JsonObjectBuilder countBuilder = Json.createObjectBuilder();
201+
countBuilder.add(facetLabel.getName(), facetLabel.getCount());
202+
facetLabelBuilderData.add(countBuilder);
203+
}
204+
facetCategoryBuilderFriendlyPlusData.add("friendly", facetCategory.getFriendlyName());
205+
facetCategoryBuilderFriendlyPlusData.add("labels", facetLabelBuilderData);
206+
facetCategoryBuilder.add(facetCategory.getName(), facetCategoryBuilderFriendlyPlusData);
207+
}
208+
facets.add(facetCategoryBuilder);
213209
value.add("facets", facets);
214210
}
211+
215212
value.add("count_in_response", solrSearchResults.size());
216213
/**
217214
* @todo Returning the fq might be useful as a troubleshooting aid
@@ -232,6 +229,15 @@ public Response search(
232229
}
233230
}
234231

232+
private Long getDatasetFileCount(SolrSearchResult solrSearchResult) {
233+
DvObject dvObject = solrSearchResult.getEntity();
234+
if (dvObject.isInstanceofDataset()) {
235+
DatasetVersion datasetVersion = ((Dataset) dvObject).getVersionFromId(solrSearchResult.getDatasetVersionId());
236+
return datasetVersionFilesServiceBean.getFileMetadataCount(datasetVersion);
237+
}
238+
return null;
239+
}
240+
235241
private User getUser(ContainerRequestContext crc) throws WrappedResponse {
236242
User userToExecuteSearchAs = GuestUser.get();
237243
try {

src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,29 +9,21 @@
99
import java.util.Map;
1010
import java.util.logging.Logger;
1111

12+
import edu.harvard.iq.dataverse.*;
1213
import jakarta.json.Json;
1314
import jakarta.json.JsonArrayBuilder;
1415
import jakarta.json.JsonObject;
1516
import jakarta.json.JsonObjectBuilder;
1617

1718
import org.apache.commons.collections4.CollectionUtils;
1819

19-
import edu.harvard.iq.dataverse.DataFile;
20-
import edu.harvard.iq.dataverse.Dataset;
21-
import edu.harvard.iq.dataverse.DatasetField;
22-
import edu.harvard.iq.dataverse.DatasetRelPublication;
23-
import edu.harvard.iq.dataverse.DatasetVersion;
24-
import edu.harvard.iq.dataverse.DvObject;
25-
import edu.harvard.iq.dataverse.GlobalId;
26-
import edu.harvard.iq.dataverse.MetadataBlock;
2720
import edu.harvard.iq.dataverse.api.Util;
2821
import edu.harvard.iq.dataverse.dataset.DatasetThumbnail;
2922
import edu.harvard.iq.dataverse.util.DateUtil;
3023
import edu.harvard.iq.dataverse.util.json.JsonPrinter;
3124
import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder;
3225

3326
public class SolrSearchResult {
34-
3527
private static final Logger logger = Logger.getLogger(SolrSearchResult.class.getCanonicalName());
3628

3729
private String id;
@@ -403,15 +395,6 @@ public JsonArrayBuilder getRelevance() {
403395
return matchedFieldsArray;
404396
}
405397

406-
public JsonObject toJsonObject(boolean showRelevance, boolean showEntityIds, boolean showApiUrls) {
407-
return toJsonObject(showRelevance, showEntityIds, showApiUrls, null);
408-
}
409-
410-
public JsonObject toJsonObject(boolean showRelevance, boolean showEntityIds, boolean showApiUrls,
411-
List<String> metadataFields) {
412-
return json(showRelevance, showEntityIds, showApiUrls, metadataFields).build();
413-
}
414-
415398
/**
416399
* Add additional fields for the MyData page
417400
*
@@ -450,12 +433,10 @@ public JsonObjectBuilder getJsonForMyData() {
450433
} // getJsonForMydata
451434

452435
public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, boolean showApiUrls) {
453-
return json(showRelevance, showEntityIds, showApiUrls, null);
436+
return json(showRelevance, showEntityIds, showApiUrls, null, null);
454437
}
455438

456-
public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, boolean showApiUrls,
457-
List<String> metadataFields) {
458-
439+
public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, boolean showApiUrls, List<String> metadataFields, Long datasetFileCount) {
459440
if (this.type == null) {
460441
return jsonObjectBuilder();
461442
}
@@ -571,7 +552,7 @@ public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, bool
571552
subjects.add(subject);
572553
}
573554
nullSafeJsonBuilder.add("subjects", subjects);
574-
nullSafeJsonBuilder.add("fileCount", dv.getFileMetadatas().size());
555+
nullSafeJsonBuilder.add("fileCount", datasetFileCount);
575556
nullSafeJsonBuilder.add("versionId", dv.getId());
576557
nullSafeJsonBuilder.add("versionState", dv.getVersionState().toString());
577558
if (this.isPublishedState()) {

0 commit comments

Comments
 (0)