Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,7 @@ dictionaryutils==2.0.7
dvc>=0.50.1
# for allele normalization with maf2maf
cwltool==1.0.20191022103248
# for wiki transform
python-slugify==4.0.0
# for wiki export
gen3==2.2.3
2 changes: 2 additions & 0 deletions outputs/wiki/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.gz
*.json
4 changes: 2 additions & 2 deletions tests/unit/enrichers/test_drug_enricher.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

EXPECTED = [
{'id': 'CID2244', 'id_source': 'PUBCHEM', 'pubchem_id': 'CID2244', 'chebi_id': 'CHEBI:15365', 'chembl_id': 'CHEMBL25', 'drugbank_id': 'DB00945', 'synonym': 'ASPIRIN', 'inchi': 'InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)', 'inchi_key': 'BSYNRYMUTXBXSQ-UHFFFAOYSA-N', 'taxonomy': {'class': 'Benzene and substituted derivatives', 'description': 'This compound belongs to the class of organic compounds known as acylsalicylic acids. These are o-acylated derivatives of salicylic acid.', 'direct-parent': 'Acylsalicylic acids', 'kingdom': 'Organic compounds', 'subclass': 'Benzoic acids and derivatives', 'superclass': 'Benzenoids'}, 'approved_countries': ['Canada', 'EU', 'US'], 'usan_stem_definition': None, 'source_url': 'http://mychem.info/v1/query?q=pubchem.cid:"2244"&fields=chebi.id,chebi.inchi,chebi.inchi_key,chebi.name,chembl.molecule_chembl_id,chembl.pref_name,chembl.inchi,chembl.inchi_key,chembl.molecule_synonyms,chembl.usan_stem_definition,pubchem.cid,pubchem.inchi,pubchem.inchi_key,drugbank.id,drugbank.inchi,drugbank.inchi_key,drugbank.products.approved,drugbank.products.country,drugbank.taxonomy.class,drugbank.taxonomy.direct-parent,drugbank.taxonomy.kingdom,drugbank.taxonomy.subclass,drugbank.taxonomy.superclass,drugbank.taxonomy.description&size=1'},
{'id': 'CID3385', 'id_source': 'PUBCHEM', 'pubchem_id': 'CID3385', 'chebi_id': 'CHEBI:46345', 'chembl_id': 'CHEMBL185', 'drugbank_id': 'DB00544', 'synonym': 'FLUOROURACIL', 'inchi': 'InChI=1S/C4H3FN2O2/c5-2-1-6-4(9)7-3(2)8/h1H,(H2,6,7,8,9)', 'inchi_key': 'GHASVSINZRGABV-UHFFFAOYSA-N', 'taxonomy': {'class': 'Diazines', 'description': 'This compound belongs to the class of organic compounds known as halopyrimidines. These are aromatic compounds containing a halogen atom linked to a pyrimidine ring. Pyrimidine is a 6-membered ring consisting of four carbon atoms and two nitrogen centers at the 1- and 3- ring positions.', 'direct-parent': 'Halopyrimidines', 'kingdom': 'Organic compounds', 'subclass': 'Pyrimidines and pyrimidine derivatives', 'superclass': 'Organoheterocyclic compounds'}, 'approved_countries': ['Canada', 'US'], 'usan_stem_definition': 'uracil derivatives used as thyroid antagonists and as antineoplastics', 'source_url': 'http://mychem.info/v1/query?q=pubchem.cid:"3385"&fields=chebi.id,chebi.inchi,chebi.inchi_key,chebi.name,chembl.molecule_chembl_id,chembl.pref_name,chembl.inchi,chembl.inchi_key,chembl.molecule_synonyms,chembl.usan_stem_definition,pubchem.cid,pubchem.inchi,pubchem.inchi_key,drugbank.id,drugbank.inchi,drugbank.inchi_key,drugbank.products.approved,drugbank.products.country,drugbank.taxonomy.class,drugbank.taxonomy.direct-parent,drugbank.taxonomy.kingdom,drugbank.taxonomy.subclass,drugbank.taxonomy.superclass,drugbank.taxonomy.description&size=1'},
{'id': 'CID3385', 'id_source': 'PUBCHEM', 'pubchem_id': 'CID3385', 'chebi_id': 'CHEBI:46345', 'chembl_id': 'CHEMBL185', 'drugbank_id': 'DB00544', 'synonym': 'FLUOROURACIL', 'inchi': 'InChI=1S/C4H3FN2O2/c5-2-1-6-4(9)7-3(2)8/h1H,(H2,6,7,8,9)', 'inchi_key': 'GHASVSINZRGABV-UHFFFAOYSA-N', 'taxonomy': {'class': 'Diazines', 'description': 'This compound belongs to the class of organic compounds known as halopyrimidines. These are aromatic compounds containing a halogen atom linked to a pyrimidine ring. Pyrimidine is a 6-membered ring consisting of four carbon atoms and two nitrogen centers at the 1- and 3- ring positions.', 'direct-parent': 'Halopyrimidines', 'kingdom': 'Organic compounds', 'subclass': 'Pyrimidines and pyrimidine derivatives', 'superclass': 'Organoheterocyclic compounds'}, 'approved_countries': ['Canada', 'US'], 'usan_stem_definition': 'uracil type antineoplastics; uracil derivatives used as thyroid antagonists and as antineoplastics', 'source_url': 'http://mychem.info/v1/query?q=pubchem.cid:"3385"&fields=chebi.id,chebi.inchi,chebi.inchi_key,chebi.name,chembl.molecule_chembl_id,chembl.pref_name,chembl.inchi,chembl.inchi_key,chembl.molecule_synonyms,chembl.usan_stem_definition,pubchem.cid,pubchem.inchi,pubchem.inchi_key,drugbank.id,drugbank.inchi,drugbank.inchi_key,drugbank.products.approved,drugbank.products.country,drugbank.taxonomy.class,drugbank.taxonomy.direct-parent,drugbank.taxonomy.kingdom,drugbank.taxonomy.subclass,drugbank.taxonomy.superclass,drugbank.taxonomy.description&size=1'},
{'id': 'CID9863776', 'id_source': 'PUBCHEM', 'pubchem_id': 'CID9863776', 'chebi_id': 'CHEBI:67559', 'chembl_id': 'CHEMBL1077979', 'drugbank_id': None, 'synonym': '9863776', 'inchi': 'InChI=1S/C19H22O7/c1-11-5-3-7-14(20)18(23)15(21)8-4-6-12-9-13(25-2)10-16(22)17(12)19(24)26-11/h3-4,6-7,9-11,15,18,21-23H,5,8H2,1-2H3/b6-4+,7-3-/t11-,15-,18+/m0/s1', 'inchi_key': 'NEQZWEXWOFPKOT-BYRRXHGESA-N', 'taxonomy': None, 'approved_countries': [], 'usan_stem_definition': None, 'source_url': 'http://mychem.info/v1/query?q=pubchem.cid:"9863776"&fields=chebi.id,chebi.inchi,chebi.inchi_key,chebi.name,chembl.molecule_chembl_id,chembl.pref_name,chembl.inchi,chembl.inchi_key,chembl.molecule_synonyms,chembl.usan_stem_definition,pubchem.cid,pubchem.inchi,pubchem.inchi_key,drugbank.id,drugbank.inchi,drugbank.inchi_key,drugbank.products.approved,drugbank.products.country,drugbank.taxonomy.class,drugbank.taxonomy.direct-parent,drugbank.taxonomy.kingdom,drugbank.taxonomy.subclass,drugbank.taxonomy.superclass,drugbank.taxonomy.description&size=1'},
{'id': 'CID10172943', 'id_source': 'PUBCHEM', 'pubchem_id': 'CID10172943', 'chebi_id': 'CHEBI:91351', 'chembl_id': 'CHEMBL379300', 'drugbank_id': 'DB08073', 'synonym': '10172943', 'inchi': 'InChI=1S/C24H23N5O/c1-15-22-10-16(6-7-24(22)29-28-15)17-9-20(13-26-11-17)30-14-19(25)8-18-12-27-23-5-3-2-4-21(18)23/h2-7,9-13,19,27H,8,14,25H2,1H3,(H,28,29)/t19-/m0/s1', 'inchi_key': 'YWTBGJGMTBHQTM-IBGZPJMESA-N', 'taxonomy': {'class': 'Indoles and derivatives', 'description': 'This compound belongs to the class of organic compounds known as 3-alkylindoles. These are compounds containing an indole moiety that carries an alkyl chain at the 3-position.', 'direct-parent': '3-alkylindoles', 'kingdom': 'Organic compounds', 'subclass': 'Indoles', 'superclass': 'Organoheterocyclic compounds'}, 'approved_countries': [], 'usan_stem_definition': None, 'source_url': 'http://mychem.info/v1/query?q=pubchem.cid:"10172943"&fields=chebi.id,chebi.inchi,chebi.inchi_key,chebi.name,chembl.molecule_chembl_id,chembl.pref_name,chembl.inchi,chembl.inchi_key,chembl.molecule_synonyms,chembl.usan_stem_definition,pubchem.cid,pubchem.inchi,pubchem.inchi_key,drugbank.id,drugbank.inchi,drugbank.inchi_key,drugbank.products.approved,drugbank.products.country,drugbank.taxonomy.class,drugbank.taxonomy.direct-parent,drugbank.taxonomy.kingdom,drugbank.taxonomy.subclass,drugbank.taxonomy.superclass,drugbank.taxonomy.description&size=1'},
{'id': 'CID9549184', 'id_source': 'PUBCHEM', 'pubchem_id': 'CID9549184', 'chebi_id': 'CHEBI:91457', 'chembl_id': 'CHEMBL1970879', 'drugbank_id': None, 'synonym': '9549184', 'inchi': 'InChI=1S/C34H39N9O3/c1-21(44)41-14-16-42(17-15-41)24-9-11-25(12-10-24)43-33-30(32(35)36-20-37-33)31(39-43)23-8-13-26(29(19-23)46-3)38-34(45)28-18-22-6-4-5-7-27(22)40(28)2/h4-8,13,18-20,24-25H,9-12,14-17H2,1-3H3,(H,38,45)(H2,35,36,37)', 'inchi_key': 'ZMNWFTYYYCSSTF-UHFFFAOYSA-N', 'taxonomy': None, 'approved_countries': [], 'usan_stem_definition': None, 'source_url': 'http://mychem.info/v1/query?q=pubchem.cid:"9549184"&fields=chebi.id,chebi.inchi,chebi.inchi_key,chebi.name,chembl.molecule_chembl_id,chembl.pref_name,chembl.inchi,chembl.inchi_key,chembl.molecule_synonyms,chembl.usan_stem_definition,pubchem.cid,pubchem.inchi,pubchem.inchi_key,drugbank.id,drugbank.inchi,drugbank.inchi_key,drugbank.products.approved,drugbank.products.country,drugbank.taxonomy.class,drugbank.taxonomy.direct-parent,drugbank.taxonomy.kingdom,drugbank.taxonomy.subclass,drugbank.taxonomy.superclass,drugbank.taxonomy.description&size=1'},
Expand All @@ -38,7 +38,7 @@
{'id': 'CID2244', 'id_source': 'PUBCHEM', 'pubchem_id': 'CID2244', 'chebi_id': 'CHEBI:15365', 'chembl_id': 'CHEMBL25', 'drugbank_id': 'DB00945', 'synonym': 'ASPIRIN', 'inchi': 'InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)', 'inchi_key': 'BSYNRYMUTXBXSQ-UHFFFAOYSA-N', 'taxonomy': {'class': 'Benzene and substituted derivatives', 'description': 'This compound belongs to the class of organic compounds known as acylsalicylic acids. These are o-acylated derivatives of salicylic acid.', 'direct-parent': 'Acylsalicylic acids', 'kingdom': 'Organic compounds', 'subclass': 'Benzoic acids and derivatives', 'superclass': 'Benzenoids'}, 'approved_countries': ['Canada', 'EU', 'US'], 'usan_stem_definition': None, 'source_url': 'http://mychem.info/v1/query?q=pubchem.cid:"2244"&fields=chebi.id,chebi.inchi,chebi.inchi_key,chebi.name,chembl.molecule_chembl_id,chembl.pref_name,chembl.inchi,chembl.inchi_key,chembl.molecule_synonyms,chembl.usan_stem_definition,pubchem.cid,pubchem.inchi,pubchem.inchi_key,drugbank.id,drugbank.inchi,drugbank.inchi_key,drugbank.products.approved,drugbank.products.country,drugbank.taxonomy.class,drugbank.taxonomy.direct-parent,drugbank.taxonomy.kingdom,drugbank.taxonomy.subclass,drugbank.taxonomy.superclass,drugbank.taxonomy.description&size=1'},
{'id': 'CID2733526', 'id_source': 'PUBCHEM', 'pubchem_id': 'CID2733526', 'chebi_id': 'CHEBI:41774', 'chembl_id': 'CHEMBL83', 'drugbank_id': 'DB00675', 'synonym': 'Tamoxifen', 'inchi': 'InChI=1S/C26H29NO/c1-4-25(21-11-7-5-8-12-21)26(22-13-9-6-10-14-22)23-15-17-24(18-16-23)28-20-19-27(2)3/h5-18H,4,19-20H2,1-3H3/b26-25-', 'inchi_key': 'NKANXQFJJICGDU-QPLCGJKRSA-N', 'taxonomy': {'class': 'Stilbenes', 'description': 'This compound belongs to the class of organic compounds known as stilbenes. These are organic compounds containing a 1,2-diphenylethylene moiety. Stilbenes (C6-C2-C6 ) are derived from the common phenylpropene (C6-C3) skeleton building block. The introduction of one or more hydroxyl groups to a phenyl ring lead to stilbenoids.', 'direct-parent': 'Stilbenes', 'kingdom': 'Organic compounds', 'superclass': 'Phenylpropanoids and polyketides'}, 'approved_countries': ['Canada', 'US'], 'usan_stem_definition': None, 'source_url': 'http://mychem.info/v1/query?q=pubchem.cid:"2733526"&fields=chebi.id,chebi.inchi,chebi.inchi_key,chebi.name,chembl.molecule_chembl_id,chembl.pref_name,chembl.inchi,chembl.inchi_key,chembl.molecule_synonyms,chembl.usan_stem_definition,pubchem.cid,pubchem.inchi,pubchem.inchi_key,drugbank.id,drugbank.inchi,drugbank.inchi_key,drugbank.products.approved,drugbank.products.country,drugbank.taxonomy.class,drugbank.taxonomy.direct-parent,drugbank.taxonomy.kingdom,drugbank.taxonomy.subclass,drugbank.taxonomy.superclass,drugbank.taxonomy.description&size=1'},
{'id': 'CID2733526', 'id_source': 'PUBCHEM', 'pubchem_id': 'CID2733526', 'chebi_id': 'CHEBI:41774', 'chembl_id': 'CHEMBL83', 'drugbank_id': 'DB00675', 'synonym': 'Tamoxifen', 'inchi': 'InChI=1S/C26H29NO/c1-4-25(21-11-7-5-8-12-21)26(22-13-9-6-10-14-22)23-15-17-24(18-16-23)28-20-19-27(2)3/h5-18H,4,19-20H2,1-3H3/b26-25-', 'inchi_key': 'NKANXQFJJICGDU-QPLCGJKRSA-N', 'taxonomy': {'class': 'Stilbenes', 'description': 'This compound belongs to the class of organic compounds known as stilbenes. These are organic compounds containing a 1,2-diphenylethylene moiety. Stilbenes (C6-C2-C6 ) are derived from the common phenylpropene (C6-C3) skeleton building block. The introduction of one or more hydroxyl groups to a phenyl ring lead to stilbenoids.', 'direct-parent': 'Stilbenes', 'kingdom': 'Organic compounds', 'superclass': 'Phenylpropanoids and polyketides'}, 'approved_countries': ['Canada', 'US'], 'usan_stem_definition': None, 'source_url': 'http://mychem.info/v1/query?q=pubchem.cid:"2733526"&fields=chebi.id,chebi.inchi,chebi.inchi_key,chebi.name,chembl.molecule_chembl_id,chembl.pref_name,chembl.inchi,chembl.inchi_key,chembl.molecule_synonyms,chembl.usan_stem_definition,pubchem.cid,pubchem.inchi,pubchem.inchi_key,drugbank.id,drugbank.inchi,drugbank.inchi_key,drugbank.products.approved,drugbank.products.country,drugbank.taxonomy.class,drugbank.taxonomy.direct-parent,drugbank.taxonomy.kingdom,drugbank.taxonomy.subclass,drugbank.taxonomy.superclass,drugbank.taxonomy.description&size=1'},
{'id': 'CHEMBL1742981', 'id_source': 'CHEMBL', 'pubchem_id': None, 'chebi_id': None, 'chembl_id': 'CHEMBL1742981', 'drugbank_id': None, 'synonym': 'Abagovomab', 'inchi': None, 'inchi_key': None, 'taxonomy': None, 'approved_countries': [], 'usan_stem_definition': 'monoclonal antibodies', 'source_url': 'http://mychem.info/v1/query?q=chembl.pref_name:abagovomab&fields=chebi.id,chebi.inchi,chebi.inchi_key,chebi.name,chembl.molecule_chembl_id,chembl.pref_name,chembl.inchi,chembl.inchi_key,chembl.molecule_synonyms,chembl.usan_stem_definition,pubchem.cid,pubchem.inchi,pubchem.inchi_key,drugbank.id,drugbank.inchi,drugbank.inchi_key,drugbank.products.approved,drugbank.products.country,drugbank.taxonomy.class,drugbank.taxonomy.direct-parent,drugbank.taxonomy.kingdom,drugbank.taxonomy.subclass,drugbank.taxonomy.superclass,drugbank.taxonomy.description&size=1'},
{'id': 'CHEMBL1742981', 'id_source': 'CHEMBL', 'pubchem_id': None, 'chebi_id': None, 'chembl_id': 'CHEMBL1742981', 'drugbank_id': None, 'synonym': 'Abagovomab', 'inchi': None, 'inchi_key': None, 'taxonomy': None, 'approved_countries': [], 'usan_stem_definition': 'monoclonal antibodies: mouse, antiviral indications', 'source_url': 'http://mychem.info/v1/query?q=chembl.pref_name:abagovomab&fields=chebi.id,chebi.inchi,chebi.inchi_key,chebi.name,chembl.molecule_chembl_id,chembl.pref_name,chembl.inchi,chembl.inchi_key,chembl.molecule_synonyms,chembl.usan_stem_definition,pubchem.cid,pubchem.inchi,pubchem.inchi_key,drugbank.id,drugbank.inchi,drugbank.inchi_key,drugbank.products.approved,drugbank.products.country,drugbank.taxonomy.class,drugbank.taxonomy.direct-parent,drugbank.taxonomy.kingdom,drugbank.taxonomy.subclass,drugbank.taxonomy.superclass,drugbank.taxonomy.description&size=1'},
{'id': 'CID68740', 'id_source': 'PUBCHEM', 'pubchem_id': 'CID68740', 'chebi_id': 'CHEBI:46557', 'chembl_id': 'CHEMBL924', 'drugbank_id': 'DB00399', 'synonym': 'ZOLEDRONIC ACID', 'inchi': 'InChI=1S/C5H10N2O7P2/c8-5(15(9,10)11,16(12,13)14)3-7-2-1-6-4-7/h1-2,4,8H,3H2,(H2,9,10,11)(H2,12,13,14)', 'inchi_key': 'XRASPMIURGNCCH-UHFFFAOYSA-N', 'taxonomy': {'class': 'Organic phosphonic acids and derivatives', 'description': 'This compound belongs to the class of organic compounds known as bisphosphonates. These are organic compounds containing two phosphonate groups linked together through a carbon atoms.', 'direct-parent': 'Bisphosphonates', 'kingdom': 'Organic compounds', 'subclass': 'Bisphosphonates', 'superclass': 'Organic acids and derivatives'}, 'approved_countries': ['Canada', 'EU', 'US'], 'usan_stem_definition': 'calcium metabolism regulators', 'source_url': 'http://mychem.info/v1/query?q=pubchem.cid:"68740"&fields=chebi.id,chebi.inchi,chebi.inchi_key,chebi.name,chembl.molecule_chembl_id,chembl.pref_name,chembl.inchi,chembl.inchi_key,chembl.molecule_synonyms,chembl.usan_stem_definition,pubchem.cid,pubchem.inchi,pubchem.inchi_key,drugbank.id,drugbank.inchi,drugbank.inchi_key,drugbank.products.approved,drugbank.products.country,drugbank.taxonomy.class,drugbank.taxonomy.direct-parent,drugbank.taxonomy.kingdom,drugbank.taxonomy.subclass,drugbank.taxonomy.superclass,drugbank.taxonomy.description&size=1'}
]

Expand Down
Loading