Skip to content

Commit 69cda0d

Browse files
committed
Finalized detailed projection to include sample ids
1 parent 899a186 commit 69cda0d

File tree

9 files changed

+174
-43
lines changed

9 files changed

+174
-43
lines changed

src/main/java/org/cbioportal/application/rest/vcolumnstore/ColumnarStoreStudyViewController.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,9 @@ public ResponseEntity<List<GenomicDataCountItem>> fetchMutationDataCounts(
496496
@Parameter(required = true, description = "Genomic data count filter")
497497
@Valid
498498
@RequestBody(required = false)
499-
GenomicDataCountFilter genomicDataCountFilter) {
499+
GenomicDataCountFilter genomicDataCountFilter,
500+
@Parameter(description = "Flag to include sample ids") @RequestParam(defaultValue = "false")
501+
boolean includeSampleIds) {
500502
List<GenomicDataFilter> genomicDataFilters = genomicDataCountFilter.getGenomicDataFilters();
501503
StudyViewFilter studyViewFilter = genomicDataCountFilter.getStudyViewFilter();
502504
// when there is only one filter, it means study view is doing a single chart filter operation
@@ -515,7 +517,7 @@ public ResponseEntity<List<GenomicDataCountItem>> fetchMutationDataCounts(
515517
projection == Projection.SUMMARY
516518
? studyViewService.getMutationCountsByGeneSpecific(studyViewFilter, genomicDataFilters)
517519
: studyViewService.getMutationTypeCountsByGeneSpecific(
518-
studyViewFilter, genomicDataFilters);
520+
studyViewFilter, genomicDataFilters, includeSampleIds);
519521

520522
return ResponseEntity.ok(result);
521523
}

src/main/java/org/cbioportal/domain/genomic_data/repository/GenomicDataRepository.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,12 @@ Map<String, Integer> getMutationCounts(
7575
* {@code null}.
7676
* @param genomicDataFilters a list of filters to apply to the genomic data. Must not be {@code
7777
* null}.
78+
* @param includeSampleIds flag to include sample ids
7879
* @return a list of {@link GenomicDataCountItem} objects representing the mutation counts by
7980
* type.
8081
*/
8182
List<GenomicDataCountItem> getMutationCountsByType(
82-
StudyViewFilterContext studyViewFilterContext, List<GenomicDataFilter> genomicDataFilters);
83+
StudyViewFilterContext studyViewFilterContext,
84+
List<GenomicDataFilter> genomicDataFilters,
85+
boolean includeSampleIds);
8386
}

src/main/java/org/cbioportal/domain/genomic_data/usecase/GetMutationCountsByTypeUseCase.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,14 @@ public GetMutationCountsByTypeUseCase(GenomicDataRepository repository) {
3131
*
3232
* @param studyViewFilterContext the context of the study view filter to apply
3333
* @param genomicDataFilters a list of genomic data filters to apply
34+
* @param includeSampleIds flag to include sample ids
3435
* @return a list of {@link GenomicDataCountItem} representing the mutation counts by type
3536
*/
3637
public List<GenomicDataCountItem> execute(
37-
StudyViewFilterContext studyViewFilterContext, List<GenomicDataFilter> genomicDataFilters) {
38-
return repository.getMutationCountsByType(studyViewFilterContext, genomicDataFilters);
38+
StudyViewFilterContext studyViewFilterContext,
39+
List<GenomicDataFilter> genomicDataFilters,
40+
boolean includeSampleIds) {
41+
return repository.getMutationCountsByType(
42+
studyViewFilterContext, genomicDataFilters, includeSampleIds);
3943
}
4044
}

src/main/java/org/cbioportal/domain/studyview/StudyViewService.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -304,10 +304,13 @@ public List<GenomicDataCountItem> getMutationCountsByGeneSpecific(
304304
condition =
305305
"@cacheEnabledConfig.getEnabledClickhouse() && @studyViewFilterUtil.isUnfilteredQuery(#studyViewFilter)")
306306
public List<GenomicDataCountItem> getMutationTypeCountsByGeneSpecific(
307-
StudyViewFilter studyViewFilter, List<GenomicDataFilter> genomicDataFilters) {
307+
StudyViewFilter studyViewFilter,
308+
List<GenomicDataFilter> genomicDataFilters,
309+
boolean includeSampleIds) {
308310
return genomicDataUseCases
309311
.getMutationCountsByTypeUseCase()
310-
.execute(buildStudyViewFilterContext(studyViewFilter), genomicDataFilters);
312+
.execute(
313+
buildStudyViewFilterContext(studyViewFilter), genomicDataFilters, includeSampleIds);
311314
}
312315

313316
@Cacheable(

src/main/java/org/cbioportal/infrastructure/repository/clickhouse/genomic_data/ClickhouseGenomicDataMapper.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,5 +67,7 @@ Map<String, Integer> getMutationCounts(
6767
* @return a list of mutation counts by type
6868
*/
6969
List<GenomicDataCountItem> getMutationCountsByType(
70-
StudyViewFilterContext studyViewFilterContext, List<GenomicDataFilter> genomicDataFilters);
70+
StudyViewFilterContext studyViewFilterContext,
71+
List<GenomicDataFilter> genomicDataFilters,
72+
boolean includeSampleIds);
7173
}

src/main/java/org/cbioportal/infrastructure/repository/clickhouse/genomic_data/ClickhouseGenomicDataRepository.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,10 @@ public Map<String, Integer> getMutationCounts(
4747

4848
@Override
4949
public List<GenomicDataCountItem> getMutationCountsByType(
50-
StudyViewFilterContext studyViewFilterContext, List<GenomicDataFilter> genomicDataFilters) {
51-
return mapper.getMutationCountsByType(studyViewFilterContext, genomicDataFilters);
50+
StudyViewFilterContext studyViewFilterContext,
51+
List<GenomicDataFilter> genomicDataFilters,
52+
boolean includeSampleIds) {
53+
return mapper.getMutationCountsByType(
54+
studyViewFilterContext, genomicDataFilters, includeSampleIds);
5255
}
5356
}

src/main/java/org/cbioportal/legacy/model/GenomicDataCount.java

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,14 @@ public GenomicDataCount(String label, String value, Integer count, Integer uniqu
2828
this.uniqueCount = uniqueCount;
2929
}
3030

31-
public GenomicDataCount(String label, String value, Integer count, Integer uniqueCount, List<String> sampleIds) {
32-
this.label = label;
33-
this.value = value;
34-
this.count = count;
35-
this.uniqueCount = uniqueCount;
36-
this.sampleIds = sampleIds;
37-
}
31+
public GenomicDataCount(
32+
String label, String value, Integer count, Integer uniqueCount, List<String> sampleIds) {
33+
this.label = label;
34+
this.value = value;
35+
this.count = count;
36+
this.uniqueCount = uniqueCount;
37+
this.sampleIds = sampleIds;
38+
}
3839

3940
public String getLabel() {
4041
return label;
@@ -59,18 +60,18 @@ public Integer getCount() {
5960
public void setCount(Integer count) {
6061
this.count = count;
6162
}
62-
63+
6364
public List<String> getSampleIds() {
64-
return sampleIds;
65+
return sampleIds;
6566
}
66-
67+
6768
public void setSampleIds(String sampleIdsStr) {
68-
if (sampleIdsStr != null && !sampleIdsStr.isEmpty()) {
69-
this.sampleIds = Arrays.asList(sampleIdsStr.split(","));
70-
}
69+
if (sampleIdsStr != null && !sampleIdsStr.isEmpty()) {
70+
this.sampleIds = Arrays.asList(sampleIdsStr.split(","));
71+
}
7172
}
7273

73-
public Integer getUniqueCount() {
74+
public Integer getUniqueCount() {
7475
return uniqueCount;
7576
}
7677

@@ -88,4 +89,9 @@ public boolean equals(Object o) {
8889
&& Objects.equals(count, that.count)
8990
&& Objects.equals(uniqueCount, that.uniqueCount);
9091
}
92+
93+
@Override
94+
public int hashCode() {
95+
return Objects.hash(label, value, count, uniqueCount);
96+
}
9197
}

src/main/resources/mappers/clickhouse/genomic_data/ClickhouseGenomicDataMapper.xml

Lines changed: 84 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -127,26 +127,92 @@
127127

128128
<!-- for /mutation-data-counts/fetch - (returns GenomicDataCountItem objects) mutation type counts table part-->
129129
<select id="getMutationCountsByType" resultMap="GenomicDataCountItemResultMap">
130-
SELECT
131-
hugo_gene_symbol as hugoGeneSymbol,
132-
'mutations' as profileType,
133-
replace(mutation_type, '_', ' ') as label,
134-
mutation_type as value,
135-
count(*) as count,
136-
count(distinct(sample_unique_id)) as uniqueCount,
137-
arrayStringConcat(groupArray(DISTINCT sample_unique_id), ',') AS sampleIdsStr
138-
FROM genomic_event_derived
139-
<where>
130+
WITH
131+
<if test="includeSampleIds">
132+
all_samples AS (
133+
SELECT sample_unique_id
134+
FROM sample_derived
135+
WHERE cancer_study_identifier IN
136+
<foreach collection="studyViewFilterContext.studyIds" item="studyId" open="(" separator="," close=")">
137+
#{studyId}
138+
</foreach>
139+
),
140+
</if>
141+
mutated_samples AS (
142+
SELECT
143+
sample_unique_id,
144+
hugo_gene_symbol,
145+
mutation_type
146+
FROM genomic_event_derived
147+
<where>
148+
<include
149+
refid="org.cbioportal.infrastructure.repository.clickhouse.studyview.ClickhouseStudyViewFilterMapper.applyStudyViewFilterUsingSampleId"/>
150+
AND variant_type = 'mutation'
151+
<foreach item="genomicDataFilter" collection="genomicDataFilters" open=" AND (" separator=" OR " close=")">
152+
hugo_gene_symbol = #{genomicDataFilter.hugoGeneSymbol}
153+
</foreach>
154+
</where>
155+
)
156+
<if test="includeSampleIds">
157+
,
158+
profiled_samples as (
159+
SELECT DISTINCT sgp.sample_unique_id
160+
FROM sample_to_gene_panel_derived sgp
161+
JOIN gene_panel_to_gene_derived gpg ON sgp.gene_panel_id = gpg.gene_panel_id
162+
WHERE
140163
<include
141-
refid="org.cbioportal.infrastructure.repository.clickhouse.studyview.ClickhouseStudyViewFilterMapper.applyStudyViewFilterUsingSampleId"/>
142-
AND variant_type = 'mutation'
143-
<foreach item="genomicDataFilter" collection="genomicDataFilters" open=" AND (" separator=" OR " close=")">
144-
hugo_gene_symbol = #{genomicDataFilter.hugoGeneSymbol}
145-
</foreach>
146-
</where>
164+
refid="org.cbioportal.infrastructure.repository.clickhouse.studyview.ClickhouseStudyViewFilterMapper.applyStudyViewFilterUsingSampleId"/>
165+
AND gpg.gene = #{genomicDataFilters[0].hugoGeneSymbol}
166+
AND sgp.alteration_type = 'MUTATION_EXTENDED'
167+
)
168+
</if>
169+
170+
SELECT
171+
hugo_gene_symbol AS hugoGeneSymbol,
172+
'mutations' AS profileType,
173+
replace(mutation_type, '_', ' ') AS label,
174+
mutation_type AS value,
175+
COUNT(*) AS count,
176+
COUNT(DISTINCT sample_unique_id) AS uniqueCount
177+
<if test="includeSampleIds">
178+
,
179+
arrayStringConcat(groupArray(DISTINCT sample_unique_id), ',') AS sampleIdsStr
180+
</if>
181+
FROM
182+
mutated_samples
147183
GROUP BY
148-
mutation_type,
149-
hugo_gene_symbol
184+
hugo_gene_symbol,
185+
mutation_type
186+
187+
<if test="includeSampleIds">
188+
UNION ALL
189+
190+
SELECT
191+
#{genomicDataFilters[0].hugoGeneSymbol} as hugoGeneSymbol,
192+
'Not Mutated' AS profileType,
193+
'Not Mutated' AS label,
194+
'NOT_MUTATED' AS value,
195+
COUNT(*) AS count,
196+
COUNT(DISTINCT sample_unique_id) AS uniqueCount,
197+
arrayStringConcat(groupArray(DISTINCT sample_unique_id), ',') AS sampleIdsStr
198+
FROM profiled_samples
199+
where sample_unique_id NOT in (SELECT sample_unique_id from mutated_samples)
200+
201+
UNION ALL
202+
203+
SELECT
204+
#{genomicDataFilters[0].hugoGeneSymbol} AS hugoGeneSymbol,
205+
'Not Profiled' AS profileType,
206+
'Not Profiled' AS label,
207+
'NOT_PROFILED' AS value,
208+
COUNT(*) AS count,
209+
COUNT(DISTINCT sample_unique_id) AS uniqueCount,
210+
arrayStringConcat(groupArray(DISTINCT sample_unique_id), ',') AS sampleIdsStr
211+
FROM all_samples
212+
WHERE
213+
sample_unique_id NOT IN ( SELECT sample_unique_id FROM profiled_samples
214+
)
215+
</if>
150216
</select>
151217

152218
<resultMap id="GenomicDataCountItemResultMap" type="org.cbioportal.legacy.model.GenomicDataCountItem">

src/test/java/org/cbioportal/infrastructure/repository/clickhouse/genomic_data/ClickhouseGenomicDataMapperTest.java

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,8 @@ public void getMutationCountsByType() {
119119
List<GenomicDataCountItem> actualMutationCountsByType =
120120
mapper.getMutationCountsByType(
121121
StudyViewFilterFactory.make(studyViewFilter, null, studyViewFilter.getStudyIds(), null),
122-
List.of(genomicDataFilterMutation));
122+
List.of(genomicDataFilterMutation),
123+
false);
123124
List<GenomicDataCountItem> expectedMutationCountsByType =
124125
List.of(
125126
new GenomicDataCountItem(
@@ -134,6 +135,47 @@ public void getMutationCountsByType() {
134135
.isEqualTo(expectedMutationCountsByType);
135136
}
136137

138+
@Test
139+
public void getMutationCountsByTypeAddSampleId() {
140+
StudyViewFilter studyViewFilter = new StudyViewFilter();
141+
studyViewFilter.setStudyIds(List.of(STUDY_TCGA_PUB));
142+
143+
GenomicDataFilter genomicDataFilterMutation = new GenomicDataFilter("AKT1", "mutation");
144+
List<GenomicDataCountItem> mutationCountsByType =
145+
mapper.getMutationCountsByType(
146+
StudyViewFilterFactory.make(studyViewFilter, null, studyViewFilter.getStudyIds(), null),
147+
List.of(genomicDataFilterMutation),
148+
true);
149+
150+
assertThat(mutationCountsByType)
151+
.flatExtracting(GenomicDataCountItem::getCounts)
152+
.extracting(GenomicDataCount::getSampleIds)
153+
.allSatisfy(
154+
sampleIds ->
155+
assertThat(sampleIds)
156+
.as("sampleIds should be populated when includeSampleIds=true")
157+
.isNotNull()
158+
.isNotEmpty());
159+
}
160+
161+
@Test
162+
public void getMutationCountsByTypeNoSampleId() {
163+
StudyViewFilter studyViewFilter = new StudyViewFilter();
164+
studyViewFilter.setStudyIds(List.of(STUDY_TCGA_PUB));
165+
166+
GenomicDataFilter genomicDataFilterMutation = new GenomicDataFilter("AKT1", "mutation");
167+
List<GenomicDataCountItem> mutationCountsByType =
168+
mapper.getMutationCountsByType(
169+
StudyViewFilterFactory.make(studyViewFilter, null, studyViewFilter.getStudyIds(), null),
170+
List.of(genomicDataFilterMutation),
171+
false);
172+
173+
assertThat(mutationCountsByType)
174+
.flatExtracting(GenomicDataCountItem::getCounts)
175+
.extracting(GenomicDataCount::getSampleIds)
176+
.containsOnlyNulls();
177+
}
178+
137179
@Test
138180
public void getProteinExpressionCounts() {
139181
// Testing combined study missing samples when one lacks a relevant genomic profile

0 commit comments

Comments
 (0)