Skip to content

Commit 0fe329f

Browse files
authored
Make study export clickhouse only (#11893)
* Make study export clickhouse only Drop MySQL support * Fix virtual study download where sample id in (IDS) filtering
1 parent 6c28892 commit 0fe329f

File tree

10 files changed

+385
-478
lines changed

10 files changed

+385
-478
lines changed

src/main/java/org/cbioportal/application/file/export/ExportConfig.java

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@
8484
import org.springframework.context.ApplicationContext;
8585
import org.springframework.context.annotation.Bean;
8686
import org.springframework.context.annotation.Configuration;
87+
import org.springframework.core.io.Resource;
8788
import org.springframework.web.servlet.config.annotation.AsyncSupportConfigurer;
8889
import org.springframework.web.servlet.config.annotation.WebMvcConfigurer;
8990

@@ -218,31 +219,25 @@ public VirtualStudyExportDecoratorService virtualStudyAwareExportService(
218219
return new VirtualStudyExportDecoratorService(virtualStudyService, exportService);
219220
}
220221

222+
@Bean("exportMapperLocations")
223+
public Resource[] clickhouseExportMapperLocations(ApplicationContext applicationContext)
224+
throws IOException {
225+
return applicationContext.getResources("classpath:mappers/export/*.xml");
226+
}
227+
221228
@Bean("exportSqlSessionFactory")
222229
public SqlSessionFactoryBean exportSqlSessionFactory(
223-
@Qualifier("exportDataSource") DataSource dataSource, ApplicationContext applicationContext)
224-
throws IOException {
230+
@Qualifier("exportDataSource") DataSource dataSource, Resource[] exportMapperLocations) {
225231
SqlSessionFactoryBean sessionFactory = new SqlSessionFactoryBean();
226232
sessionFactory.setDataSource(dataSource);
227-
sessionFactory.setMapperLocations(
228-
applicationContext.getResources("classpath:mappers/export/*.xml"));
233+
sessionFactory.setMapperLocations(exportMapperLocations);
229234
return sessionFactory;
230235
}
231236

232-
@Bean
233-
public DataSource exportDataSource(DataSourceProperties dataSourceProperties) {
234-
HikariConfig hikariConfig = new HikariConfig();
235-
hikariConfig.setJdbcUrl(dataSourceProperties.getUrl());
236-
hikariConfig.setUsername(dataSourceProperties.getUsername());
237-
hikariConfig.setPassword(dataSourceProperties.getPassword());
238-
239-
// Set streaming properties for data export
240-
Properties dsProperties = new Properties();
241-
dsProperties.setProperty("useCursorFetch", "true");
242-
dsProperties.setProperty("defaultFetchSize", "1000");
243-
hikariConfig.setDataSourceProperties(dsProperties);
244-
245-
return new HikariDataSource(hikariConfig);
237+
@Bean("exportDataSource")
238+
public DataSource clickhouseExportDataSource(DataSourceProperties dataSourceProperties) {
239+
// TODO How to use cursor fetch with ClickHouse to minimize memory usage during export?
240+
return createDataSource(dataSourceProperties, null);
246241
}
247242

248243
@Value("${feature.study.export.timeout_ms:600000}") // 10 minutes timeout by default
@@ -253,6 +248,23 @@ public void configureAsyncSupport(AsyncSupportConfigurer configurer) {
253248
configurer.setDefaultTimeout(timeoutMs);
254249
}
255250

251+
private DataSource createDataSource(
252+
DataSourceProperties dataSourceProperties, Properties dataSourcePropertiesOverrides) {
253+
HikariConfig hikariConfig = new HikariConfig();
254+
hikariConfig.setJdbcUrl(dataSourceProperties.getUrl());
255+
hikariConfig.setUsername(dataSourceProperties.getUsername());
256+
hikariConfig.setPassword(dataSourceProperties.getPassword());
257+
258+
if (dataSourceProperties.getDriverClassName() != null) {
259+
hikariConfig.setDriverClassName(dataSourceProperties.getDriverClassName());
260+
}
261+
if (dataSourcePropertiesOverrides != null) {
262+
hikariConfig.setDataSourceProperties(dataSourcePropertiesOverrides);
263+
}
264+
265+
return new HikariDataSource(hikariConfig);
266+
}
267+
256268
@Bean
257269
public List<Exporter> exporters(
258270
CancerStudyMetadataExporter cancerStudyMetadataExporter,

src/main/resources/mappers/export/CancerStudyMetadataMapper.xml

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,44 +4,44 @@
44
<mapper namespace="org.cbioportal.application.file.export.repositories.mybatis.CancerStudyMetadataMapper">
55
<select id="getCancerStudyMetadata" resultType="org.cbioportal.application.file.model.CancerStudyMetadata">
66
SELECT
7-
cs.TYPE_OF_CANCER_ID AS typeOfCancer,
8-
cs.CANCER_STUDY_IDENTIFIER AS cancerStudyIdentifier,
9-
cs.NAME AS name,
10-
cs.DESCRIPTION AS description,
11-
cs.CITATION AS citation,
12-
cs.PMID AS pmid,
13-
cs.`GROUPS` AS `groups`,
14-
rg.NAME AS referenceGenome
7+
cs.type_of_cancer_id AS typeOfCancer,
8+
cs.cancer_study_identifier AS cancerStudyIdentifier,
9+
cs.name AS name,
10+
cs.description AS description,
11+
cs.citation AS citation,
12+
cs.pmid AS pmid,
13+
cs.`groups` AS `groups`,
14+
rg.name AS referenceGenome
1515
FROM cancer_study cs
16-
JOIN reference_genome rg ON rg.REFERENCE_GENOME_ID = cs.REFERENCE_GENOME_ID
17-
WHERE cs.CANCER_STUDY_IDENTIFIER = #{studyId}
16+
JOIN reference_genome rg ON rg.reference_genome_id = cs.reference_genome_id
17+
WHERE cs.cancer_study_identifier = #{studyId}
1818
</select>
1919
<select id="getCancerTypeHierarchy" resultType="org.cbioportal.application.file.model.CancerType">
2020
WITH RECURSIVE cancer_type_hierarchy AS (
2121
-- Anchor member: get the initial cancer type from the study
2222
SELECT
23-
ct.TYPE_OF_CANCER_ID AS typeOfCancerId,
24-
ct.NAME AS name,
25-
ct.DEDICATED_COLOR AS dedicatedColor,
26-
ct.SHORT_NAME AS shortName,
27-
ct.PARENT AS parent
23+
ct.type_of_cancer_id AS typeOfCancerId,
24+
ct.name AS name,
25+
ct.dedicated_color AS dedicatedColor,
26+
ct.short_name AS shortName,
27+
ct.parent AS parent
2828
FROM cancer_study cs
29-
JOIN type_of_cancer ct ON ct.TYPE_OF_CANCER_ID = cs.TYPE_OF_CANCER_ID
30-
WHERE cs.CANCER_STUDY_IDENTIFIER = #{studyId}
29+
JOIN type_of_cancer ct ON ct.type_of_cancer_id = cs.type_of_cancer_id
30+
WHERE cs.cancer_study_identifier = #{studyId}
3131

3232
UNION ALL
3333

3434
-- Recursive member: follow the parent chain
3535
SELECT
36-
parent_ct.TYPE_OF_CANCER_ID AS typeOfCancerId,
37-
parent_ct.NAME AS name,
38-
parent_ct.DEDICATED_COLOR AS dedicatedColor,
39-
parent_ct.SHORT_NAME AS shortName,
40-
parent_ct.PARENT AS parent
36+
parent_ct.type_of_cancer_id AS typeOfCancerId,
37+
parent_ct.name AS name,
38+
parent_ct.dedicated_color AS dedicatedColor,
39+
parent_ct.short_name AS shortName,
40+
parent_ct.parent AS parent
4141
FROM type_of_cancer parent_ct
42-
JOIN cancer_type_hierarchy cth ON cth.parent = parent_ct.TYPE_OF_CANCER_ID
42+
JOIN cancer_type_hierarchy cth ON cth.parent = parent_ct.type_of_cancer_id
4343
)
4444

45-
SELECT * FROM cancer_type_hierarchy;
45+
SELECT * FROM cancer_type_hierarchy
4646
</select>
47-
</mapper>
47+
</mapper>

src/main/resources/mappers/export/CaseListMetadataMapper.xml

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,39 +3,33 @@
33

44
<mapper namespace="org.cbioportal.application.file.export.repositories.mybatis.CaseListMetadataMapper">
55
<resultMap id="CaseListMetadataResultMap" type="org.cbioportal.application.file.model.CaseListMetadata">
6-
<id column="LIST_ID"/>
7-
<result property="cancerStudyIdentifier" column="cancerStudyIdentifier"/>
8-
<result property="stableId" column="stableId"/>
6+
<id column="list_id"/>
7+
<result property="cancerStudyIdentifier" column="cancer_study_identifier"/>
8+
<result property="stableId" column="stable_id"/>
99
<result property="name" column="name"/>
1010
<result property="description" column="description"/>
1111
<collection property="sampleIds" ofType="String" javaType="java.util.TreeSet">
12-
<result column="sampleId"/>
12+
<result column="sample_id"/>
1313
</collection>
1414
</resultMap>
1515
<select
1616
id="getCaseListsMetadata"
1717
resultMap="CaseListMetadataResultMap">
18+
<bind name="sampleIdsArray" value="sampleIds != null ? sampleIds.toArray(new String[sampleIds.size()]) : null" />
1819
SELECT
19-
sl.LIST_ID,
20-
cs.CANCER_STUDY_IDENTIFIER as cancerStudyIdentifier,
21-
sl.STABLE_ID as stableId,
22-
sl.NAME as name,
23-
sl.DESCRIPTION as description,
24-
s.STABLE_ID as sampleId
20+
sl.list_id AS list_id,
21+
cs.cancer_study_identifier AS cancer_study_identifier,
22+
sl.stable_id AS stable_id,
23+
sl.name AS name,
24+
sl.description AS description,
25+
s.stable_id AS sample_id
2526
FROM sample_list sl
26-
JOIN cancer_study cs ON cs.CANCER_STUDY_ID = sl.CANCER_STUDY_ID
27-
JOIN sample_list_list sll ON sll.LIST_ID = sl.LIST_ID
28-
JOIN sample s ON s.INTERNAL_ID = sll.SAMPLE_ID
27+
JOIN cancer_study cs ON cs.cancer_study_id = sl.cancer_study_id
28+
JOIN sample_list_list sll ON sll.list_id = sl.list_id
29+
JOIN sample s ON s.internal_id = sll.sample_id
30+
WHERE cs.cancer_study_identifier = #{studyId}
2931
<if test="sampleIds != null">
30-
JOIN (
31-
SELECT *
32-
FROM (VALUES
33-
<foreach item="sid" collection="sampleIds" separator=",">
34-
ROW(#{sid})
35-
</foreach>
36-
) AS temp(sample_id)
37-
) AS sample_ids_subquery ON sample_ids_subquery.sample_id = s.STABLE_ID
32+
AND s.stable_id IN (#{sampleIdsArray, typeHandler=org.apache.ibatis.type.ArrayTypeHandler})
3833
</if>
39-
WHERE cs.CANCER_STUDY_IDENTIFIER = #{studyId}
4034
</select>
41-
</mapper>
35+
</mapper>

0 commit comments

Comments
 (0)