Skip to content
This repository was archived by the owner on Mar 4, 2026. It is now read-only.

Commit 9255111

Browse files
Merge pull request #37 from everycure-org/feature/medi_website
renorm drug list
2 parents 62e4910 + 422e4b1 commit 9255111

File tree

62 files changed

+22929
-33864
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+22929
-33864
lines changed

.github/workflows/release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,6 @@ jobs:
3636
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
3737
with:
3838
upload_url: ${{ steps.create_release.outputs.upload_url }}
39-
asset_path: drug-list/data/03_primary/drugList.tsv # Replace with the path to your file
39+
asset_path: drug-list/data/03_primary/drugList_renorm_rejoin.tsv # Replace with the path to your file
4040
asset_name: drugList.tsv # Replace with the name you want for the uploaded asset
4141
asset_content_type: application/octet-stream

drug-list/.viz/stats.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"previous_drug_list": {"rows": 3136, "columns": 16, "file_size": 3419592}, "drug_list_final": {"rows": 3255, "columns": 36, "file_size": 5138523}, "drug_list_v2v_log": {"rows": 2636, "columns": 6, "file_size": 121010}}
1+
{"drug_list_renorm": {"rows": 3923, "columns": 36, "file_size": 5486888}, "drug_list_renorm_rejoined": {"rows": 3857, "columns": 36, "file_size": 5264136}}

drug-list/conf/base/catalog.yml

Lines changed: 124 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
1-
# ORANGE BOOK
1+
# ERRORS
2+
3+
error_log:
4+
type: pandas.CSVDataset
5+
filepath: data/08_reporting/errors.tsv
6+
save_args:
7+
sep: "\t"
8+
load_args:
9+
sep: "\t"
210

11+
# ORANGE BOOK
312
fda_exclusions:
413
type: pandas.ExcelDataset
514
filepath: data/02_intermediate/fda_exclusions.xlsx
@@ -181,8 +190,6 @@ orange_book_list_filtered:
181190
##########################################################################################
182191
##########################################################################################
183192

184-
185-
186193
# EMA
187194
ema-products:
188195
type: pandas.ExcelDataset
@@ -483,37 +490,37 @@ drug_list_atc_with_labels:
483490

484491

485492
# Post-enrichment final list
486-
drug_list_with_radioisotope_tags:
487-
type: pandas.CSVDataset
488-
filepath: data/02_intermediate/drug-list-with-radioisotope-tags.tsv
489-
load_args:
490-
sep: "\t"
491-
save_args:
492-
sep: "\t"
493+
# drug_list_with_radioisotope_tags:
494+
# type: pandas.CSVDataset
495+
# filepath: data/02_intermediate/drug-list-with-radioisotope-tags.tsv
496+
# load_args:
497+
# sep: "\t"
498+
# save_args:
499+
# sep: "\t"
493500

494-
drug_list_with_allergen_tags:
495-
type: pandas.CSVDataset
496-
filepath: data/02_intermediate/drug-list-with-allergen-tags.tsv
497-
load_args:
498-
sep: "\t"
499-
save_args:
500-
sep: "\t"
501+
# drug_list_with_allergen_tags:
502+
# type: pandas.CSVDataset
503+
# filepath: data/02_intermediate/drug-list-with-allergen-tags.tsv
504+
# load_args:
505+
# sep: "\t"
506+
# save_args:
507+
# sep: "\t"
501508

502-
drug_list_with_metallic_salt_tags:
503-
type: pandas.CSVDataset
504-
filepath: data/02_intermediate/drug-list-with-metallic-salt-tags.tsv
505-
load_args:
506-
sep: "\t"
507-
save_args:
508-
sep: "\t"
509+
# drug_list_with_metallic_salt_tags:
510+
# type: pandas.CSVDataset
511+
# filepath: data/02_intermediate/drug-list-with-metallic-salt-tags.tsv
512+
# load_args:
513+
# sep: "\t"
514+
# save_args:
515+
# sep: "\t"
509516

510-
drug_list_with_no_therapeutic_value_tags:
511-
type: pandas.CSVDataset
512-
filepath: data/02_intermediate/drug-list-with-no-therapeutic-value-tags.tsv
513-
load_args:
514-
sep: "\t"
515-
save_args:
516-
sep: "\t"
517+
# drug_list_with_no_therapeutic_value_tags:
518+
# type: pandas.CSVDataset
519+
# filepath: data/02_intermediate/drug-list-with-no-therapeutic-value-tags.tsv
520+
# load_args:
521+
# sep: "\t"
522+
# save_args:
523+
# sep: "\t"
517524

518525
# drug_list_with_no_therapeutic_value_tags_test:
519526
# type: pandas.CSVDataset
@@ -524,44 +531,56 @@ drug_list_with_no_therapeutic_value_tags:
524531
# sep: "\t"
525532

526533
drug_list_with_tags:
527-
type: pandas.CSVDataset
528-
filepath: data/02_intermediate/drug-list-with-tags.tsv
529-
load_args:
530-
sep: "\t"
531-
save_args:
532-
sep: "\t"
534+
type: pandas.ExcelDataset
535+
filepath: data/02_intermediate/drug-list-with-tags.xlsx
536+
537+
drug_list_with_cancer_tags:
538+
type: pandas.ExcelDataset
539+
filepath: data/02_intermediate/drug-list-with-cancer-tags.xlsx
533540

534541
drug_list_with_tags_cleaned:
542+
type: pandas.ExcelDataset
543+
filepath: data/02_intermediate/drug-list-with-tags-cleaned.xlsx
544+
545+
# drug_list_with_vaccine_antigen_tags:
546+
# type: pandas.CSVDataset
547+
# filepath: data/02_intermediate/drug-list-vaccine-antigen.tsv
548+
# load_args:
549+
# sep: "\t"
550+
# save_args:
551+
# sep: "\t"
552+
553+
# drug_list_with_ec_tags:
554+
# type: pandas.CSVDataset
555+
# filepath: data/02_intermediate/drug-list-with-ec-tags.tsv
556+
# load_args:
557+
# sep: "\t"
558+
# save_args:
559+
# sep: "\t"
560+
561+
drug_list_corrected_approval_tags:
562+
type: pandas.ExcelDataset
563+
filepath: data/02_intermediate/drug_list_corrected_approval_tags.xlsx
564+
565+
drug_list_final:
535566
type: pandas.CSVDataset
536-
filepath: data/02_intermediate/drug-list-with-tags-cleaned.tsv
537-
load_args:
538-
sep: "\t"
567+
filepath: data/03_primary/drugList.tsv
539568
save_args:
540569
sep: "\t"
541-
542-
drug_list_with_vaccine_antigen_tags:
543-
type: pandas.CSVDataset
544-
filepath: data/02_intermediate/drug-list-vaccine-antigen.tsv
545570
load_args:
546571
sep: "\t"
547-
save_args:
548-
sep: "\t"
549572

550-
drug_list_with_ec_tags:
573+
drug_list_renorm:
551574
type: pandas.CSVDataset
552-
filepath: data/02_intermediate/drug-list-with-ec-tags.tsv
553-
load_args:
554-
sep: "\t"
575+
filepath: data/03_primary/drugList_renorm.tsv
555576
save_args:
556577
sep: "\t"
578+
load_args:
579+
sep: "\t"
557580

558-
drug_list_corrected_approval_tags:
559-
type: pandas.ExcelDataset
560-
filepath: data/02_intermediate/drug_list_corrected_approval_tags.xlsx
561-
562-
drug_list_final:
581+
drug_list_renorm_rejoined:
563582
type: pandas.CSVDataset
564-
filepath: data/03_primary/drugList.tsv
583+
filepath: data/03_primary/drugList_renorm_rejoin.tsv
565584
save_args:
566585
sep: "\t"
567586
load_args:
@@ -575,6 +594,14 @@ drug_list_final_no_smiles:
575594
load_args:
576595
sep: "\t"
577596

597+
drug_list_stringent:
598+
type: pandas.CSVDataset
599+
filepath: data/03_primary/drug_list_stringent.tsv
600+
save_args:
601+
sep: "\t"
602+
load_args:
603+
sep: "\t"
604+
578605
drug_list_with_smiles:
579606
type: pandas.ExcelDataset
580607
filepath: data/03_primary/drug_list_with_smiles.xlsx
@@ -595,12 +622,18 @@ atc_with_ids:
595622

596623
previous_drug_list:
597624
type: pandas.CSVDataset
598-
filepath: data/08_reporting/drugList_v2_0_0.tsv
625+
filepath: data/08_reporting/drugList_v2_3_0.tsv
599626
save_args:
600627
sep: "\t"
601628
load_args:
602629
sep: "\t"
603630

631+
632+
drug_list_renorm_v2v:
633+
type: pandas.ExcelDataset
634+
filepath: data/08_reporting/drug_list_renorm_v2v.xlsx
635+
636+
604637
drug_list_v2v_log:
605638
type: pandas.ExcelDataset
606639
filepath: data/08_reporting/drug_list_v2v_log.xlsx
@@ -690,4 +723,37 @@ russia_comparison:
690723
save_args:
691724
sep: "\t"
692725
load_args:
693-
sep: "\t"
726+
sep: "\t"
727+
728+
729+
# ROBOKOP AND RTX COMPARISON
730+
robokop_drugs:
731+
type: pandas.CSVDataset
732+
filepath: data/08_reporting/robokop_drugs.csv
733+
734+
drugcentral_approved_drugs:
735+
type: pandas.ExcelDataset
736+
filepath: data/08_reporting/all_approved_drugs_drugcentral.xlsx
737+
738+
drugcentral_norm:
739+
type: pandas.ExcelDataset
740+
filepath: data/08_reporting/drugcentral_approved_norm.xlsx
741+
742+
743+
744+
# REPORTING
745+
746+
# indications_list:
747+
# type: pandas.ExcelDataset
748+
# filepath: https://github.com/everycure-org/matrix-indication-list/releases/download/v1.2.0/indicationList.xlsx
749+
# load_args:
750+
# engine: openpyxl
751+
# sheet_name: 0
752+
753+
indications_list:
754+
type: pandas.ExcelDataset
755+
filepath: data/08_reporting/matrix_indication_list.xlsx
756+
757+
missing_drugs_indications:
758+
type: pandas.ExcelDataset
759+
filepath: data/08_reporting/indications_missing_drugs.xlsx

0 commit comments

Comments
 (0)