Skip to content

Commit 019f211

Browse files
committed
First pass at PyOBO ingest.
See #45
1 parent 0178c2d commit 019f211

File tree

6 files changed

+108
-8
lines changed

6 files changed

+108
-8
lines changed

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ BUILDER_DIR = src/semsql/builder
66
DDL_DIR = $(BUILDER_DIR)/sql_schema
77
YAML_DIR = src/semsql/linkml
88
SQLA_DIR = src/semsql/sqla
9+
ONT_REGISTRY = src/semsql/builder/registry/ontologies.yaml
910

1011
PREFIX_DIR = $(BUILDER_DIR)/prefixes
1112

@@ -141,7 +142,7 @@ download/reactome-biopax.zip:
141142
src/semsql/builder/registry/registry_schema.py: src/semsql/builder/registry/registry_schema.yaml
142143
$(RUN) gen-python $< > $@
143144

144-
ontologies.Makefile: src/semsql/builder/registry/ontologies.yaml
145+
ontologies.Makefile: $(ONT_REGISTRY)
145146
$(RUN) semsql generate-makefile -P src/semsql/builder/prefixes/prefixes_local.csv $< > $@.tmp && mv $@.tmp $@
146147

147148
include ontologies.Makefile

ontologies.Makefile

Lines changed: 59 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ db/mlo.owl: download/mlo.owl
208208

209209

210210
download/ito.owl: STAMP
211-
curl -L -s https://github.com/OpenBioLink/ITO/raw/master/ITO.owl.zip > $@.zip.tmp && unzip -p $@.zip.tmp ITO.owl > $@.tmp && rm $@.zip.tmp
211+
curl -L -s https://github.com/OpenBioLink/ITO/raw/master/ITO.owl.zip > $@.zip.tmp && unzip -p $@.zip.tmp {ont.zip_extract_file} > $@.tmp && rm $@.zip.tmp
212212
sha256sum -b $@.tmp > $@.sha256
213213
mv $@.tmp $@
214214

@@ -219,7 +219,7 @@ db/ito.owl: download/ito.owl
219219

220220

221221
download/reactome-Homo-sapiens.owl: STAMP
222-
curl -L -s https://reactome.org/download/current/biopax.zip > $@.zip.tmp && unzip -p $@.zip.tmp Homo_sapiens.owl > $@.tmp && rm $@.zip.tmp
222+
curl -L -s https://reactome.org/download/current/biopax.zip > $@.zip.tmp && unzip -p $@.zip.tmp {ont.zip_extract_file} > $@.tmp && rm $@.zip.tmp
223223
sha256sum -b $@.tmp > $@.sha256
224224
mv $@.tmp $@
225225

@@ -263,7 +263,7 @@ db/sweetAll.owl: download/sweetAll.owl
263263

264264

265265
download/lov.owl: STAMP
266-
curl -L -s https://lov.linkeddata.es/lov.n3.gz > $@.tmp
266+
curl -L -s https://lov.linkeddata.es/lov.n3.gz | gzip -dc > $@.tmp
267267
sha256sum -b $@.tmp > $@.sha256
268268
mv $@.tmp $@
269269

@@ -306,6 +306,61 @@ db/co_324.owl: download/co_324.owl
306306
cp $< $@
307307

308308

309+
download/hgnc.genegroup.owl: STAMP
310+
curl -L -s https://github.com/biopragmatics/obo-db-ingest/raw/main/export/hgnc.genegroup/hgnc.genegroup.owl.gz | gzip -dc > $@.tmp
311+
sha256sum -b $@.tmp > $@.sha256
312+
mv $@.tmp $@
313+
314+
.PRECIOUS: download/hgnc.genegroup.owl
315+
316+
db/hgnc.genegroup.owl: download/hgnc.genegroup.owl
317+
cp $< $@
318+
319+
320+
download/hgnc.owl: STAMP
321+
curl -L -s https://github.com/biopragmatics/obo-db-ingest/raw/main/export/hgnc/2022-06-01/hgnc.owl.gz | gzip -dc > $@.tmp
322+
sha256sum -b $@.tmp > $@.sha256
323+
mv $@.tmp $@
324+
325+
.PRECIOUS: download/hgnc.owl
326+
327+
db/hgnc.owl: download/hgnc.owl
328+
cp $< $@
329+
330+
331+
download/dictybase.owl: STAMP
332+
curl -L -s https://github.com/biopragmatics/obo-db-ingest/raw/main/export/dictybase/dictybase.owl.gz | gzip -dc > $@.tmp
333+
sha256sum -b $@.tmp > $@.sha256
334+
mv $@.tmp $@
335+
336+
.PRECIOUS: download/dictybase.owl
337+
338+
db/dictybase.owl: download/dictybase.owl
339+
cp $< $@
340+
341+
342+
download/eccode.owl: STAMP
343+
curl -L -s https://github.com/biopragmatics/obo-db-ingest/raw/main/export/eccode/25-May-2022/eccode.owl.gz | gzip -dc > $@.tmp
344+
sha256sum -b $@.tmp > $@.sha256
345+
mv $@.tmp $@
346+
347+
.PRECIOUS: download/eccode.owl
348+
349+
db/eccode.owl: download/eccode.owl
350+
cp $< $@
351+
352+
353+
download/uniprot.owl: STAMP
354+
curl -L -s https://github.com/biopragmatics/obo-db-ingest/raw/main/export/uniprot/2022_02/uniprot.owl.gz | gzip -dc > $@.tmp
355+
sha256sum -b $@.tmp > $@.sha256
356+
mv $@.tmp $@
357+
358+
.PRECIOUS: download/uniprot.owl
359+
360+
db/uniprot.owl: download/uniprot.owl
361+
cp $< $@
362+
363+
309364
download/%.owl: STAMP
310365
curl -L -s http://purl.obolibrary.org/obo/$*.owl > $@.tmp
311366
sha256sum -b $@.tmp > $@.sha256
@@ -316,4 +371,4 @@ download/%.owl: STAMP
316371
db/%.owl: download/%.owl
317372
robot merge -i $< -o $@
318373

319-
EXTRA_ONTOLOGIES = chiro ncit foodon chebiplus msio phenio comploinc bero aio reacto go go-lego bao orcid cpont biolink biopax enanomapper mlo ito reactome-Homo-sapiens efo edam sweetAll lov schema-dot-org cosmo co_324
374+
EXTRA_ONTOLOGIES = chiro ncit foodon chebiplus msio phenio comploinc bero aio reacto go go-lego bao orcid cpont biolink biopax enanomapper mlo ito reactome-Homo-sapiens efo edam sweetAll lov schema-dot-org cosmo co_324 hgnc.genegroup hgnc dictybase eccode uniprot

src/semsql/builder/builder.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from semsql.builder.registry import registry_schema
1515
from semsql.builder.registry.registry_schema import (Makefile, MakefileRule,
16-
Ontology)
16+
Ontology, CompressionEnum)
1717
from semsql.utils.makefile_utils import makefile_to_string
1818

1919
this_path = Path(__file__).parent
@@ -137,6 +137,11 @@ def compile_registry(registry_path: str, local_prefix_file: TextIO = None) -> st
137137
"unzip -p [email protected] {ont.zip_extract_file} "
138138
139139
)
140+
elif ont.compression:
141+
if str(ont.compression) == str(CompressionEnum.gzip.text):
142+
command = f"curl -L -s {ont.url} | gzip -dc > [email protected]"
143+
else:
144+
raise ValueError(f"Unknown compression: '{ont.compression}'")
140145
else:
141146
command = f"curl -L -s {ont.url} > [email protected]"
142147
download_rule = MakefileRule(

src/semsql/builder/prefixes/prefixes.csv

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ evs.ncit,http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#
5959
old.fix,http://purl.org/obo/owl/FIX#
6060
mlo,http://www.a2rd.net.br/mlo#
6161
co_324,https://cropontology.org/rdf/CO_324:
62+
hgnc.genegroup,http://purl.obolibrary.org/obo/hgnc.genegroup_
63+
hgnc,http://purl.obolibrary.org/obo/hgnc_
64+
hgnc.genegroup,http://purl.obolibrary.org/obo/dictybase_
65+
EC,http://purl.obolibrary.org/obo/eccode_
66+
uniprot.obo,http://purl.obolibrary.org/obo/uniprot_
67+
uniprot.obo,http://purl.obolibrary.org/obo/uniprot_
6268
RBO,http://purl.obolibrary.org/obo/RBO_
6369
CLYH,http://purl.obolibrary.org/obo/CLYH_
6470
RO,http://purl.obolibrary.org/obo/RO_

src/semsql/builder/prefixes/prefixes_local.csv

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,9 @@ evs.ncit,http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#
77
old.fix,http://purl.org/obo/owl/FIX#
88
mlo,http://www.a2rd.net.br/mlo#
99
co_324,https://cropontology.org/rdf/CO_324:
10+
hgnc.genegroup,http://purl.obolibrary.org/obo/hgnc.genegroup_
11+
hgnc,http://purl.obolibrary.org/obo/hgnc_
12+
hgnc.genegroup,http://purl.obolibrary.org/obo/dictybase_
13+
EC,http://purl.obolibrary.org/obo/eccode_
14+
uniprot.obo,http://purl.obolibrary.org/obo/uniprot_
15+
uniprot.obo,http://purl.obolibrary.org/obo/uniprot_

src/semsql/builder/registry/ontologies.yaml

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,5 +110,32 @@ ontologies:
110110
url: https://cropontology.org/ontology/CO_324/rdf
111111
prefixmap:
112112
co_324: "https://cropontology.org/rdf/CO_324:"
113-
114-
113+
114+
## PyOBO products
115+
## See https://github.com/INCATools/semantic-sql/issues/45
116+
hgnc.genegroup:
117+
url: https://github.com/biopragmatics/obo-db-ingest/raw/main/export/hgnc.genegroup/hgnc.genegroup.owl.gz
118+
compression: gzip
119+
prefixmap:
120+
hgnc.genegroup: http://purl.obolibrary.org/obo/hgnc.genegroup_
121+
hgnc:
122+
url: https://github.com/biopragmatics/obo-db-ingest/raw/main/export/hgnc/2022-06-01/hgnc.owl.gz
123+
compression: gzip
124+
prefixmap:
125+
hgnc: http://purl.obolibrary.org/obo/hgnc_
126+
dictybase:
127+
url: https://github.com/biopragmatics/obo-db-ingest/raw/main/export/dictybase/dictybase.owl.gz
128+
compression: gzip
129+
prefixmap:
130+
hgnc.genegroup: http://purl.obolibrary.org/obo/dictybase_
131+
eccode:
132+
url: https://github.com/biopragmatics/obo-db-ingest/raw/main/export/eccode/25-May-2022/eccode.owl.gz
133+
compression: gzip
134+
prefixmap:
135+
EC: http://purl.obolibrary.org/obo/eccode_
136+
uniprot.obo: http://purl.obolibrary.org/obo/uniprot_
137+
uniprot:
138+
url: https://github.com/biopragmatics/obo-db-ingest/raw/main/export/uniprot/2022_02/uniprot.owl.gz
139+
compression: gzip
140+
prefixmap:
141+
uniprot.obo: http://purl.obolibrary.org/obo/uniprot_

0 commit comments

Comments
 (0)