diff --git a/config/clickhouse/schema/clinical_indication.sql b/config/clickhouse/schema/clinical_indication.sql new file mode 100644 index 00000000..62f0a126 --- /dev/null +++ b/config/clickhouse/schema/clinical_indication.sql @@ -0,0 +1,9 @@ +create table if not exists clinical_indication_log engine = MergeTree ( + id String, + drugId Nullable (String), + diseaseId Nullable (String), + maxClinicalStatus LowCardinality (String), + mappingStatus LowCardinality (String), + clinicalReportIds Array (String), + mappingConfidence LowCardinality (String) +) engine = Log; diff --git a/config/clickhouse/scripts/clinical_indication.sql b/config/clickhouse/scripts/clinical_indication.sql new file mode 100644 index 00000000..4f5831cb --- /dev/null +++ b/config/clickhouse/scripts/clinical_indication.sql @@ -0,0 +1,23 @@ +create table if not exists clinical_indication_drug engine = MergeTree () +order by (drugId, id) primary key (drugId) as +select id, + drugId, + diseaseId, + maxClinicalStatus, + mappingStatus, + clinicalReportIds, + mappingConfidence +from clinical_indication_log +where drugId is not null; +create table if not exists clinical_indication_disease engine = MergeTree () +order by (diseaseId, id) primary key (diseaseId) as +select id, + drugId, + diseaseId, + maxClinicalStatus, + mappingStatus, + clinicalReportIds, + mappingConfidence +from clinical_indication_log +where diseaseId is not null; +drop table clinical_indication_log; diff --git a/config/config.yaml b/config/config.yaml index d94f3015..d01f1a6d 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -2,18 +2,18 @@ # Platform Output Support Configuration File - PLATFORM work_path: ./work log_level: DEBUG -release_uri: gs://open-targets-pipeline-runs/il/25.12-testrun-1 # e.g gs://open-targets-pipeline-runs/szsz/25.06-testrun-4 +release_uri: gs://open-targets-pipeline-runs/rm/25.12-testrun-2 # e.g gs://open-targets-pipeline-runs/szsz/25.06-testrun-4 scratchpad: # ================================= SET THESE VALUES ==================================== release: 25.12 # e.g. 25.12 - data_source: gs://open-targets-pipeline-runs/il/25.12-testrun-1 # same as release_uri + data_source: gs://open-targets-pipeline-runs/rm/25.12-testrun-2 # same as release_uri database_namespace: platform2512 # e.g. platform2512 # Google disk snapshot variables - opensearch_disk_name: platform-2512-os - opensearch_disk_snapshot_name: platform-2512-os - clickhouse_disk_name: platform-2512-ch - clickhouse_disk_snapshot_name: platform-2512-ch + opensearch_disk_name: platform-2512-os-4247-1 + opensearch_disk_snapshot_name: platform-2512-os-4247-1 + clickhouse_disk_name: platform-2512-ch-4247-1 + clickhouse_disk_snapshot_name: platform-2512-ch-4247-1 # ========================= THESE VALUES RARELY NEED CHANGING =========================== disk_snapshot_project_id: open-targets-eu-dev release_gcs_bucket: gs://open-targets-data-releases @@ -116,7 +116,6 @@ steps: - indication - interaction - interaction_evidence - - known_drugs - l2g_predictions - mechanism_of_action - mouse_phenotypes @@ -187,6 +186,7 @@ steps: - credible_sets - studies - disease + - clinical_indication do: - name: clickhouse_load clickhouse load ${each} dataset: ${each} @@ -205,34 +205,36 @@ steps: dataset: targets clickhouse_database: ${database_namespace} data_dir_parent: ${local_data} - - name: explode backup clickhouse tables - requires: - - explode load all datasets - foreach: - - associations_otf_target - - associations_otf_disease - - baseline_expression - - intervals - - literature_index - - literature - - targets - - ml_w2v - do: - - name: clickhouse_backup table ${each} - clickhouse_database: ${database_namespace} - table: ${each} - gcs_base_path: ${clickhouse_backup_base_path} - - name: clickhouse_stop data loading instance - requires: - - explode backup clickhouse tables - clickhouse_database_name: ${database_namespace} - - name: create_gcp_disk_snapshot clickhouse - requires: - - clickhouse_stop data loading instance - gcp_project_id: ${disk_snapshot_project_id} - gcp_disk_name: ${clickhouse_disk_name} - gcp_snapshot_name: ${clickhouse_disk_snapshot_name} - gcp_disk_zone: europe-west1-d + # - name: explode backup clickhouse tables + # requires: + # - explode load all datasets + # foreach: + # - associations_otf_target + # - associations_otf_disease + # - clinical_indication_disease + # - clinical_indication_drug + # - baseline_expression + # - intervals + # - literature_index + # - literature + # - targets + # - ml_w2v + # do: + # - name: clickhouse_backup table ${each} + # clickhouse_database: ${database_namespace} + # table: ${each} + # gcs_base_path: ${clickhouse_backup_base_path} + # - name: clickhouse_stop data loading instance + # requires: + # - explode backup clickhouse tables + # clickhouse_database_name: ${database_namespace} + # - name: create_gcp_disk_snapshot clickhouse + # requires: + # - clickhouse_stop data loading instance + # gcp_project_id: ${disk_snapshot_project_id} + # gcp_disk_name: ${clickhouse_disk_name} + # gcp_snapshot_name: ${clickhouse_disk_snapshot_name} + # gcp_disk_zone: europe-west1-d # exports - name: explode_data_prep search_ebi_associations dataset: search_ebi_associations @@ -465,6 +467,7 @@ steps: - credible_sets - studies - disease + - clinical_indication do: - name: clickhouse_load clickhouse load ${each} dataset: ${each} @@ -495,6 +498,8 @@ steps: - literature - targets - ml_w2v + - clinical_indication_disease + - clinical_indication_drug do: - name: clickhouse_backup table ${each} clickhouse_database: ${database_namespace} @@ -549,6 +554,8 @@ steps: - literature - targets - ml_w2v + - clinical_indication_disease + - clinical_indication_drug do: - name: clickhouse_backup table ${each} requires: @@ -570,6 +577,8 @@ steps: - literature - targets - ml_w2v + - clinical_indication_disease + - clinical_indication_drug do: - name: clickhouse_restore table ${each} clickhouse_database: ${database_namespace} @@ -599,6 +608,7 @@ steps: - credible_sets - studies - disease + - clinical_indication do: - name: clickhouse_load clickhouse load ${each} requires: diff --git a/config/config_ppp.yaml b/config/config_ppp.yaml index b5c97e70..4ce70c99 100644 --- a/config/config_ppp.yaml +++ b/config/config_ppp.yaml @@ -115,7 +115,6 @@ steps: - indication - interaction - interaction_evidence - - known_drugs - l2g_predictions - mechanism_of_action - mouse_phenotypes diff --git a/config/datasets.yaml b/config/datasets.yaml index d8f1e29c..67d1a098 100644 --- a/config/datasets.yaml +++ b/config/datasets.yaml @@ -225,11 +225,6 @@ opensearch: input_dir: output/interaction_evidence mappings: config/opensearch/default.json output_dir: interaction_evidence - known_drugs: - index: known_drugs - input_dir: output/known_drug - mappings: config/opensearch/search_known_drugs.json - output_dir: known_drug l2g_predictions: index: l2g_predictions input_dir: output/l2g_prediction @@ -372,6 +367,11 @@ clickhouse: input_dir: output/disease preload_script: config/clickhouse/schema/disease.sql postload_script: config/clickhouse/scripts/disease.sql + clinical_indication: + table: clinical_indication_log + input_dir: output/clinical_indication + preload_script: config/clickhouse/schema/clinical_indication.sql + postload_script: config/clickhouse/scripts/clinical_indication.sql bigquery: association_by_datasource_direct: index: association_by_datasource_direct diff --git a/config/opensearch/search_known_drugs.json b/config/opensearch/search_known_drugs.json deleted file mode 100644 index 3e1be75c..00000000 --- a/config/opensearch/search_known_drugs.json +++ /dev/null @@ -1,103 +0,0 @@ -{ - "mappings": { - "properties": { - "multiplier": { - "type": "float" - } - }, - "dynamic_templates": [ - { - "strings": { - "match_mapping_type": "string", - "mapping": { - "type": "text", - "term_vector": "with_positions_offsets", - "fields": { - "raw": { - "type": "keyword", - "ignore_above": 1024, - "normalizer": "custom" - } - } - } - } - } - ] - }, - "settings": { - "index": { - "number_of_replicas": 0, - "number_of_shards": 1, - "refresh_interval": -1, - "max_ngram_diff": 20, - "analysis": { - "normalizer": { - "custom": { - "type": "custom", - "char_filter": [], - "filter": [ - "lowercase" - ] - } - }, - "filter": { - "autocomplete_filter": { - "type": "ngram", - "min_gram": 1, - "max_gram": 20, - "token_chars": [ - "letter", - "digit", - "punctuation", - "symbol" - ] - }, - "word_delimiter_filter": { - "type": "word_delimiter", - "catenate_numbers": false, - "catenate_words": false, - "generate_word_parts": false, - "generate_number_parts": true, - "catenate_all": false, - "split_on_case_change": false, - "split_on_numerics": false, - "preserve_original": true, - "stem_english_possesive": true - } - }, - "analyzer": { - "default": { - "type": "custom", - "tokenizer": "whitespace", - "filter": [ - "lowercase", - "autocomplete_filter" - ] - }, - "default_search": { - "type": "custom", - "tokenizer": "keyword", - "filter": [ - "lowercase" - ] - }, - "ngram_analyzer": { - "type": "custom", - "tokenizer": "whitespace", - "filter": [ - "lowercase", - "autocomplete_filter" - ] - }, - "token": { - "type": "custom", - "tokenizer": "keyword", - "filter": [ - "lowercase" - ] - } - } - } - } - } -} \ No newline at end of file diff --git a/deployment/pos_config.tftpl b/deployment/pos_config.tftpl index 73cfc08f..80ff12a2 100644 --- a/deployment/pos_config.tftpl +++ b/deployment/pos_config.tftpl @@ -120,7 +120,6 @@ steps: - indication - interaction - interaction_evidence - - known_drugs - l2g_predictions - mechanism_of_action - mouse_phenotypes