Skip to content

Commit 9d44877

Browse files
bchen49David-YuWei
andauthored
Fix issue with studies facet count not being correct (#158)
* Fix issue with studies facet count not being correct * Fix issue with studies facet count not being correct_additional --------- Co-authored-by: David-YuWei <ywwei85@gmail.com>
1 parent ed9df02 commit 9d44877

File tree

2 files changed

+79
-38
lines changed

2 files changed

+79
-38
lines changed

src/main/java/gov/nih/nci/bento_ri/model/PrivateESDataFetcher.java

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -759,15 +759,19 @@ private Map<String, Object> searchParticipants(Map<String, Object> params) throw
759759
AGG_ENDPOINT, PARTICIPANTS_END_POINT
760760
));
761761
PARTICIPANT_TERM_AGGS.add(Map.of(
762+
CARDINALITY_AGG_NAME, "pid",
762763
AGG_NAME, "dbgap_accession",
764+
ADDITIONAL_UPDATE, Map.of("phs001327", 2000),
763765
FILTER_COUNT_QUERY, "filterParticipantCountByDBGAPAccession",
764-
AGG_ENDPOINT, PARTICIPANTS_END_POINT
766+
AGG_ENDPOINT, STUDIES_FACET_END_POINT
765767
));
766768
PARTICIPANT_TERM_AGGS.add(Map.of(
769+
CARDINALITY_AGG_NAME, "pid",
767770
AGG_NAME, "study_acronym",
771+
ADDITIONAL_UPDATE, Map.of("CCSS_SMN", 2000),
768772
WIDGET_QUERY, "participantCountByStudy",
769773
FILTER_COUNT_QUERY, "filterParticipantCountByAcronym",
770-
AGG_ENDPOINT, PARTICIPANTS_END_POINT
774+
AGG_ENDPOINT, STUDIES_FACET_END_POINT
771775
));
772776
PARTICIPANT_TERM_AGGS.add(Map.of(
773777
CARDINALITY_AGG_NAME, "pid",
@@ -922,24 +926,24 @@ private Map<String, Object> searchParticipants(Map<String, Object> params) throw
922926
AGG_ENDPOINT, FILES_END_POINT
923927
));
924928
PARTICIPANT_TERM_AGGS.add(Map.of(
925-
// CARDINALITY_AGG_NAME, "pid",
929+
CARDINALITY_AGG_NAME, "pid",
926930
// AGG_NAME, "study_name",
927931
// FILTER_COUNT_QUERY, "filterParticipantCountByStudyTitle",
928-
// ADDITIONAL_UPDATE, Map.of("Childhood Cancer Survivor Study (CCSS)", 2000, "Molecular Characterization Initiative", 1000),
929-
// AGG_ENDPOINT, STUDIES_FACET_END_POINT
932+
ADDITIONAL_UPDATE, Map.of("Childhood Cancer Survivor Study (CCSS)", 2000),
933+
AGG_ENDPOINT, STUDIES_FACET_END_POINT,
930934
AGG_NAME, "study_name",
931-
FILTER_COUNT_QUERY, "filterParticipantCountByStudyTitle",
932-
AGG_ENDPOINT, PARTICIPANTS_END_POINT
935+
FILTER_COUNT_QUERY, "filterParticipantCountByStudyTitle"
936+
// AGG_ENDPOINT, PARTICIPANTS_END_POINT
933937
));
934938
PARTICIPANT_TERM_AGGS.add(Map.of(
935-
// CARDINALITY_AGG_NAME, "pid",
939+
CARDINALITY_AGG_NAME, "pid",
936940
// AGG_NAME, "study_status",
937941
// FILTER_COUNT_QUERY, "filterParticipantCountByStudyStatus",
938-
// ADDITIONAL_UPDATE, Map.of("Active", 2000,"Completed", 3000),
939-
// AGG_ENDPOINT, STUDIES_FACET_END_POINT
942+
ADDITIONAL_UPDATE, Map.of("Active", 2000,"Completed", 3000),
943+
AGG_ENDPOINT, STUDIES_FACET_END_POINT,
940944
AGG_NAME, "study_status",
941-
FILTER_COUNT_QUERY, "filterParticipantCountByStudyStatus",
942-
AGG_ENDPOINT, PARTICIPANTS_END_POINT
945+
FILTER_COUNT_QUERY, "filterParticipantCountByStudyStatus"
946+
//AGG_ENDPOINT, PARTICIPANTS_END_POINT
943947
));
944948
PARTICIPANT_TERM_AGGS.add(Map.of(
945949
CARDINALITY_AGG_NAME, "pid",
@@ -1093,8 +1097,15 @@ private Map<String, Object> searchParticipants(Map<String, Object> params) throw
10931097
nestedProperty = "treatment_filters";
10941098
} else if (indexType.equals("treatment_responses")) {
10951099
nestedProperty = "treatment_response_filters";
1096-
} else {
1100+
} else if (indexType.equals("samples")) {
1101+
nestedProperty = "sample_diagnosis_file_filters";
1102+
} else if (indexType.equals("diagnosis")) {
1103+
nestedProperty = "sample_diagnosis_file_filters";
1104+
} else if (indexType.equals("files")) {
10971105
nestedProperty = "sample_diagnosis_file_filters";
1106+
} else {
1107+
// study_participants or participants
1108+
nestedProperty = "";
10981109
}
10991110
query_4_update = inventoryESService.addCustomAggregations(query_4_update, "facetAgg", prop, nestedProperty);
11001111
Request request = new Request("GET", PARTICIPANTS_END_POINT);

src/main/java/gov/nih/nci/bento_ri/service/InventoryESService.java

Lines changed: 55 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,31 +1102,24 @@ public Map<String, Object> addAggregations(Map<String, Object> query, String[] t
11021102
}
11031103

11041104
public Map<String, Object> addCustomAggregations(Map<String, Object> query, String aggName, String field, String nestedProperty) {
1105-
// "aggs": {
1106-
// "customAgg": {
1107-
// "nested": {
1108-
// "path": "sample_diagnosis_file_filters"
1109-
// },
1110-
// "aggs": {
1111-
// "min_price": {
1112-
// "terms": {
1113-
// "field": "sample_diagnosis_file_filters.diagnosis_classification_system"
1114-
// },
1115-
// "aggs": {
1116-
// "top_reverse_nested": {
1117-
// "reverse_nested": {}
1118-
// }
1119-
// }
1120-
// }
1121-
// }
1122-
// }
1123-
// }
1105+
// When nestedProperty is empty: root-level terms aggregation (no nested path).
1106+
// When nestedProperty is set: nested aggregation with reverse_nested for doc_count at root.
11241107
Map<String, Object> newQuery = new HashMap<>(query);
11251108
newQuery.put("size", 0);
11261109
Map<String, Object> aggSection = new HashMap<String, Object>();
1127-
Map<String, Object> aggSubSection = new HashMap<String, Object>();
1128-
aggSubSection.put("agg_buckets", Map.of("terms", Map.of("field", nestedProperty + "." + field, "size", 1000), "aggs", Map.of("top_reverse_nested", Map.of("reverse_nested", Map.of()))));
1129-
aggSection.put(aggName, Map.of("nested", Map.of("path", nestedProperty), "aggs", aggSubSection));
1110+
1111+
if (nestedProperty == null || nestedProperty.isEmpty()) {
1112+
// Root-level aggregation: terms on field only, no nested/reverse_nested
1113+
Map<String, Object> aggSubSection = new HashMap<String, Object>();
1114+
aggSubSection.put("terms", Map.of("field", field, "size", 1000));
1115+
aggSection.put(aggName, aggSubSection);
1116+
} else {
1117+
// Nested aggregation: nested path + terms on nestedProperty.field + reverse_nested
1118+
Map<String, Object> aggSubSection = new HashMap<String, Object>();
1119+
aggSubSection.put("agg_buckets", Map.of("terms", Map.of("field", nestedProperty + "." + field, "size", 1000), "aggs", Map.of("top_reverse_nested", Map.of("reverse_nested", Map.of()))));
1120+
aggSection.put(aggName, Map.of("nested", Map.of("path", nestedProperty), "aggs", aggSubSection));
1121+
}
1122+
11301123
newQuery.put("aggs", aggSection);
11311124
return newQuery;
11321125
}
@@ -1183,11 +1176,48 @@ public List<String> collectTerms(JsonObject jsonObject, String aggName) {
11831176
}
11841177

11851178
public Map<String, Integer> collectCustomTerms(JsonObject jsonObject, String aggName) {
1179+
// "facetAgg": {
1180+
// "doc_count_error_upper_bound": 0,
1181+
// "sum_other_doc_count": 0,
1182+
// "buckets": [
1183+
// {
1184+
// "key": "Active",
1185+
// "doc_count": 40919
1186+
// },
1187+
// {
1188+
// "key": "Completed",
1189+
// "doc_count": 19703
1190+
// }
1191+
// ]
1192+
// }
1193+
// or in the case of nested aggregation:
1194+
// "facetAgg": {
1195+
// "doc_count": 822822,
1196+
// "agg_buckets": {
1197+
// "doc_count_error_upper_bound": 0,
1198+
// "sum_other_doc_count": 0,
1199+
// "buckets": [
1200+
// {
1201+
// "key": "see diagnosis_comment",
1202+
// "doc_count": 125542,
1203+
// "top_reverse_nested": {
1204+
// "doc_count": 6374
1205+
// }
1206+
// }
1207+
11861208
Map<String, Integer> data = new HashMap<>();
11871209
JsonObject aggs = jsonObject.getAsJsonObject("aggregations").getAsJsonObject(aggName);
1188-
JsonArray buckets = aggs.getAsJsonObject("agg_buckets").getAsJsonArray("buckets");
1189-
for (var bucket: buckets) {
1190-
data.put(bucket.getAsJsonObject().get("key").getAsString(), bucket.getAsJsonObject().getAsJsonObject("top_reverse_nested").get("doc_count").getAsInt());
1210+
JsonArray buckets = aggs.getAsJsonObject("agg_buckets") != null ? aggs.getAsJsonObject("agg_buckets").getAsJsonArray("buckets") : aggs.getAsJsonArray("buckets");
1211+
for (var bucket : buckets) {
1212+
JsonObject bucketObj = bucket.getAsJsonObject();
1213+
String key = bucketObj.get("key").getAsString();
1214+
int docCount;
1215+
if (bucketObj.has("top_reverse_nested")) {
1216+
docCount = bucketObj.getAsJsonObject("top_reverse_nested").get("doc_count").getAsInt();
1217+
} else {
1218+
docCount = bucketObj.get("doc_count").getAsInt();
1219+
}
1220+
data.put(key, docCount);
11911221
}
11921222
return data;
11931223
}

0 commit comments

Comments
 (0)