Skip to content

Commit 43ede3a

Browse files
committed
Update project configuration and dependencies
- Add Google credentials to gitignore - Update Poetry dependencies for all modules - Modify experimental strategy configurations - Create backup files for strategy modifications - Add results and storage directories
1 parent 9943f74 commit 43ede3a

File tree

256 files changed

+12260
-2255
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

256 files changed

+12260
-2255
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ mapping_cache.db-*
5151
metamapper.db-*
5252
*.prof
5353
pytest_output.txt
54-
/qdrant_storage
5554
!mappings_list.csv
5655

5756
# IDE specific files
@@ -279,3 +278,6 @@ qdrant_indexing_log.txt
279278
# Ignore temporary scripts
280279
temp_merge_script.sh
281280

281+
# Google Credentials
282+
google-credentials.json
283+
Lines changed: 105 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -1,118 +1,117 @@
1-
name: Arivale Chemistries to KG2c Phenotypes
2-
description: Maps Arivale clinical chemistry/lab tests to KG2c phenotypic features via LOINC codes and semantic associations
3-
1+
name: chem_arv_to_kg2c_phenotypes_v1_base
2+
description: Maps Arivale clinical chemistry/lab tests to KG2c phenotypic features
3+
via LOINC codes and semantic associations
44
metadata:
5-
id: "chem_arv_to_kg2c_phenotypes_v1_base"
6-
name: "Arivale Chemistries to KG2c Phenotypes"
7-
version: "1.0.0"
8-
created: "2025-01-08"
9-
author: "biomapper-team"
10-
entity_type: "chemistries"
11-
source_dataset: "arivale"
12-
target_dataset: "kg2c_phenotypes"
13-
bridge_type: ["loinc", "semantic", "rule_based"]
14-
15-
quality_tier: "experimental"
16-
validation_status: "pending"
5+
id: chem_arv_to_kg2c_phenotypes_v1_base
6+
name: Arivale Chemistries to KG2c Phenotypes
7+
version: 1.0.0
8+
created: '2025-01-08'
9+
author: biomapper-team
10+
entity_type: chemistries
11+
source_dataset: arivale
12+
target_dataset: kg2c_phenotypes
13+
bridge_type:
14+
- loinc
15+
- semantic
16+
- rule_based
17+
quality_tier: experimental
18+
validation_status: pending
1719
expected_match_rate: 0.65
1820
actual_match_rate: null
19-
2021
source_files:
21-
- path: "/procedure/data/local_data/MAPPING_ONTOLOGIES/arivale/chemistries_metadata.tsv"
22-
last_updated: "2019-06-01"
23-
row_count: 128
22+
- path: /procedure/data/local_data/MAPPING_ONTOLOGIES/arivale/chemistries_metadata.tsv
23+
last_updated: '2019-06-01'
24+
row_count: 128
2425
target_files:
25-
- path: "/procedure/data/local_data/MAPPING_ONTOLOGIES/kg2.10.2c_ontologies/kg2c_phenotypes.csv"
26-
last_updated: "2024-10-01"
27-
row_count: 45000
28-
29-
description: "Maps clinical chemistry tests to phenotypic features for disease association analysis"
30-
tags: ["chemistries", "phenotypes", "loinc", "clinical", "arivale", "kg2c"]
26+
- path: /procedure/data/local_data/MAPPING_ONTOLOGIES/kg2.10.2c_ontologies/kg2c_phenotypes.csv
27+
last_updated: '2024-10-01'
28+
row_count: 45000
29+
description: Maps clinical chemistry tests to phenotypic features for disease association
30+
analysis
31+
tags:
32+
- chemistries
33+
- phenotypes
34+
- loinc
35+
- clinical
36+
- arivale
37+
- kg2c
3138
dependencies: []
3239
supersedes: null
3340
citation: null
34-
3541
parameters:
36-
output_dir: "${OUTPUT_DIR:-/tmp/biomapper/chemistries}"
37-
cache_dir: "${CACHE_DIR:-/tmp/biomapper/cache}"
42+
output_dir: ${OUTPUT_DIR:-/tmp/biomapper/chemistries}
43+
cache_dir: ${CACHE_DIR:-/tmp/biomapper/cache}
3844
use_cache: true
3945
cache_ttl_days: 30
40-
4146
steps:
42-
- name: load_arivale_chemistries
43-
action:
44-
type: LOAD_DATASET_IDENTIFIERS
45-
params:
46-
file_path: "${metadata.source_files[0].path}"
47-
identifier_column: "Name"
48-
additional_columns:
49-
- "Display Name"
50-
- "Labcorp ID"
51-
- "Labcorp Name"
52-
- "Labcorp LOINC ID"
53-
output_key: "arivale_chemistries"
54-
drop_empty: true
55-
56-
- name: load_kg2c_phenotypes
57-
action:
58-
type: LOAD_DATASET_IDENTIFIERS
59-
params:
60-
file_path: "${metadata.target_files[0].path}"
61-
identifier_column: "id"
62-
additional_columns:
63-
- "name"
64-
- "category"
65-
- "description"
66-
- "synonyms"
67-
- "xrefs"
68-
output_key: "kg2c_phenotypes"
69-
drop_empty: true
70-
71-
- name: extract_loinc_codes
72-
action:
73-
type: CHEMISTRY_EXTRACT_LOINC
74-
params:
75-
input_key: "arivale_chemistries"
76-
loinc_column: "Labcorp LOINC ID"
77-
test_name_column: "Name"
78-
display_name_column: "Display Name"
79-
output_key: "arivale_with_loinc"
80-
81-
- name: filter_clinical_phenotypes
82-
action:
83-
type: FILTER_DATASET
84-
params:
85-
dataset_key: "kg2c_phenotypes"
86-
filters:
87-
- field: "category"
88-
operator: "contains"
89-
value: "PhenotypicFeature"
90-
output_key: "clinical_phenotypes"
91-
92-
- name: bridge_chemistry_to_phenotypes
93-
action:
94-
type: CHEMISTRY_TO_PHENOTYPE_BRIDGE
95-
params:
96-
source_key: "arivale_with_loinc"
97-
target_key: "clinical_phenotypes"
98-
loinc_column: "Labcorp LOINC ID"
99-
phenotype_id_column: "id"
100-
phenotype_xrefs_column: "xrefs"
101-
association_threshold: 0.7
102-
use_semantic_matching: true
103-
output_key: "chemistry_phenotype_mappings"
104-
105-
- name: calculate_overlap
106-
action:
107-
type: CALCULATE_SET_OVERLAP
108-
params:
109-
dataset1_key: "arivale_with_loinc"
110-
dataset2_key: "clinical_phenotypes"
111-
output_key: "overlap_statistics"
112-
113-
- name: export_mappings
114-
action:
115-
type: EXPORT_DATASET
116-
params:
117-
dataset_key: "chemistry_phenotype_mappings"
118-
output_path: "${parameters.output_dir}/arivale_kg2c_chemistry_phenotypes.tsv"
47+
- name: load_arivale_chemistries
48+
action:
49+
type: LOAD_DATASET_IDENTIFIERS
50+
params:
51+
file_path: ${metadata.source_files[0].path}
52+
identifier_column: Name
53+
additional_columns:
54+
- Display Name
55+
- Labcorp ID
56+
- Labcorp Name
57+
- Labcorp LOINC ID
58+
output_key: arivale_chemistries
59+
drop_empty: true
60+
- name: load_kg2c_phenotypes
61+
action:
62+
type: LOAD_DATASET_IDENTIFIERS
63+
params:
64+
file_path: ${metadata.target_files[0].path}
65+
identifier_column: id
66+
additional_columns:
67+
- name
68+
- category
69+
- description
70+
- synonyms
71+
- xrefs
72+
output_key: kg2c_phenotypes
73+
drop_empty: true
74+
- name: extract_loinc_codes
75+
action:
76+
type: CHEMISTRY_EXTRACT_LOINC
77+
params:
78+
input_key: arivale_chemistries
79+
loinc_column: Labcorp LOINC ID
80+
test_name_column: Name
81+
display_name_column: Display Name
82+
output_key: arivale_with_loinc
83+
- name: filter_clinical_phenotypes
84+
action:
85+
type: FILTER_DATASET
86+
params:
87+
dataset_key: kg2c_phenotypes
88+
filters:
89+
- field: category
90+
operator: contains
91+
value: PhenotypicFeature
92+
output_key: clinical_phenotypes
93+
- name: bridge_chemistry_to_phenotypes
94+
action:
95+
type: CHEMISTRY_TO_PHENOTYPE_BRIDGE
96+
params:
97+
source_key: arivale_with_loinc
98+
target_key: clinical_phenotypes
99+
loinc_column: Labcorp LOINC ID
100+
phenotype_id_column: id
101+
phenotype_xrefs_column: xrefs
102+
association_threshold: 0.7
103+
use_semantic_matching: true
104+
output_key: chemistry_phenotype_mappings
105+
- name: calculate_overlap
106+
action:
107+
type: CALCULATE_SET_OVERLAP
108+
params:
109+
dataset1_key: arivale_with_loinc
110+
dataset2_key: clinical_phenotypes
111+
output_key: overlap_statistics
112+
- name: export_mappings
113+
action:
114+
type: EXPORT_DATASET
115+
params:
116+
dataset_key: chemistry_phenotype_mappings
117+
output_path: ${parameters.output_dir}/arivale_kg2c_chemistry_phenotypes.tsv
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
name: Arivale Chemistries to KG2c Phenotypes
2+
description: Maps Arivale clinical chemistry/lab tests to KG2c phenotypic features via LOINC codes and semantic associations
3+
4+
metadata:
5+
id: "chem_arv_to_kg2c_phenotypes_v1_base"
6+
name: "Arivale Chemistries to KG2c Phenotypes"
7+
version: "1.0.0"
8+
created: "2025-01-08"
9+
author: "biomapper-team"
10+
entity_type: "chemistries"
11+
source_dataset: "arivale"
12+
target_dataset: "kg2c_phenotypes"
13+
bridge_type: ["loinc", "semantic", "rule_based"]
14+
15+
quality_tier: "experimental"
16+
validation_status: "pending"
17+
expected_match_rate: 0.65
18+
actual_match_rate: null
19+
20+
source_files:
21+
- path: "/procedure/data/local_data/MAPPING_ONTOLOGIES/arivale/chemistries_metadata.tsv"
22+
last_updated: "2019-06-01"
23+
row_count: 128
24+
target_files:
25+
- path: "/procedure/data/local_data/MAPPING_ONTOLOGIES/kg2.10.2c_ontologies/kg2c_phenotypes.csv"
26+
last_updated: "2024-10-01"
27+
row_count: 45000
28+
29+
description: "Maps clinical chemistry tests to phenotypic features for disease association analysis"
30+
tags: ["chemistries", "phenotypes", "loinc", "clinical", "arivale", "kg2c"]
31+
dependencies: []
32+
supersedes: null
33+
citation: null
34+
35+
parameters:
36+
output_dir: "${OUTPUT_DIR:-/tmp/biomapper/chemistries}"
37+
cache_dir: "${CACHE_DIR:-/tmp/biomapper/cache}"
38+
use_cache: true
39+
cache_ttl_days: 30
40+
41+
steps:
42+
- name: load_arivale_chemistries
43+
action:
44+
type: LOAD_DATASET_IDENTIFIERS
45+
params:
46+
file_path: "${metadata.source_files[0].path}"
47+
identifier_column: "Name"
48+
additional_columns:
49+
- "Display Name"
50+
- "Labcorp ID"
51+
- "Labcorp Name"
52+
- "Labcorp LOINC ID"
53+
output_key: "arivale_chemistries"
54+
drop_empty: true
55+
56+
- name: load_kg2c_phenotypes
57+
action:
58+
type: LOAD_DATASET_IDENTIFIERS
59+
params:
60+
file_path: "${metadata.target_files[0].path}"
61+
identifier_column: "id"
62+
additional_columns:
63+
- "name"
64+
- "category"
65+
- "description"
66+
- "synonyms"
67+
- "xrefs"
68+
output_key: "kg2c_phenotypes"
69+
drop_empty: true
70+
71+
- name: extract_loinc_codes
72+
action:
73+
type: CHEMISTRY_EXTRACT_LOINC
74+
params:
75+
input_key: "arivale_chemistries"
76+
loinc_column: "Labcorp LOINC ID"
77+
test_name_column: "Name"
78+
display_name_column: "Display Name"
79+
output_key: "arivale_with_loinc"
80+
81+
- name: filter_clinical_phenotypes
82+
action:
83+
type: FILTER_DATASET
84+
params:
85+
dataset_key: "kg2c_phenotypes"
86+
filters:
87+
- field: "category"
88+
operator: "contains"
89+
value: "PhenotypicFeature"
90+
output_key: "clinical_phenotypes"
91+
92+
- name: bridge_chemistry_to_phenotypes
93+
action:
94+
type: CHEMISTRY_TO_PHENOTYPE_BRIDGE
95+
params:
96+
source_key: "arivale_with_loinc"
97+
target_key: "clinical_phenotypes"
98+
loinc_column: "Labcorp LOINC ID"
99+
phenotype_id_column: "id"
100+
phenotype_xrefs_column: "xrefs"
101+
association_threshold: 0.7
102+
use_semantic_matching: true
103+
output_key: "chemistry_phenotype_mappings"
104+
105+
- name: calculate_overlap
106+
action:
107+
type: CALCULATE_SET_OVERLAP
108+
params:
109+
dataset1_key: "arivale_with_loinc"
110+
dataset2_key: "clinical_phenotypes"
111+
output_key: "overlap_statistics"
112+
113+
- name: export_mappings
114+
action:
115+
type: EXPORT_DATASET
116+
params:
117+
dataset_key: "chemistry_phenotype_mappings"
118+
output_path: "${parameters.output_dir}/arivale_kg2c_chemistry_phenotypes.tsv"

0 commit comments

Comments
 (0)