Skip to content

Commit e30df20

Browse files
authored
Merge pull request #62 from INCATools/issue-45
First pass at PyOBO ingest.
2 parents 0178c2d + 4fb7bac commit e30df20

File tree

6 files changed

+130
-12
lines changed

6 files changed

+130
-12
lines changed

Makefile

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ BUILDER_DIR = src/semsql/builder
66
DDL_DIR = $(BUILDER_DIR)/sql_schema
77
YAML_DIR = src/semsql/linkml
88
SQLA_DIR = src/semsql/sqla
9+
ONT_REGISTRY = src/semsql/builder/registry/ontologies.yaml
910

1011
PREFIX_DIR = $(BUILDER_DIR)/prefixes
1112

@@ -16,21 +17,24 @@ SELECTED_ONTS = obi mondo go envo ro hp mp zfa wbphenotype ecto upheno uberon_cm
1617
# EXTRA_ONTOLOGIES is defined in ontologies.Makefile
1718
ALL_ONTS = $(ALL_OBO_ONTS) $(EXTRA_ONTOLOGIES)
1819

19-
STAGED_ONTOLOGIES = $(patsubst %,stage/%.db.gz,$(ALL_ONTS))
20+
STAGED_ONTOLOGIES = $(patsubst %, stage/%.db.gz, $(ALL_ONTS))
2021

2122
TEST_ONTOLOGIES = go-nucleus robot-example
2223

2324

2425
all: build_all stage_all
2526
build_all: $(patsubst %,all-%,$(ALL_ONTS))
2627
stage_all: $(STAGED_ONTOLOGIES)
28+
echo done $(STAGED_ONTOLOGIES)
2729

2830
selected: $(patsubst %,all-%,$(SELECTED_ONTS))
2931

3032
all-%: db/%.db
3133
sqlite3 $< "SELECT COUNT(*) FROM statements"
32-
stage/%.db.gz: db/%.db
33-
gzip -c $< > $@.tmp && mv $@.tmp $@
34+
#stage/%.db.gz: db/%.db
35+
# gzip -c $< > [email protected] && mv [email protected] $@
36+
stage/%.db.gz:
37+
gzip -c db/$*.db > $@.tmp && mv $@.tmp $@
3438
.PRECIOUS: stage/%.db.gz
3539

3640
list-onts:
@@ -141,7 +145,7 @@ download/reactome-biopax.zip:
141145
src/semsql/builder/registry/registry_schema.py: src/semsql/builder/registry/registry_schema.yaml
142146
$(RUN) gen-python $< > $@
143147

144-
ontologies.Makefile: src/semsql/builder/registry/ontologies.yaml
148+
ontologies.Makefile: $(ONT_REGISTRY)
145149
$(RUN) semsql generate-makefile -P src/semsql/builder/prefixes/prefixes_local.csv $< > $@.tmp && mv $@.tmp $@
146150

147151
include ontologies.Makefile

ontologies.Makefile

Lines changed: 70 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,17 @@ db/ncit.owl: download/ncit.owl
2020
robot relax -i $< merge -o $@
2121

2222

23+
download/maxo.owl: STAMP
24+
curl -L -s http://purl.obolibrary.org/obo/maxo.owl > $@.tmp
25+
sha256sum -b $@.tmp > $@.sha256
26+
mv $@.tmp $@
27+
28+
.PRECIOUS: download/maxo.owl
29+
30+
db/maxo.owl: download/maxo.owl
31+
robot relax -i $< merge -o $@
32+
33+
2334
download/foodon.owl: STAMP
2435
curl -L -s http://purl.obolibrary.org/obo/foodon.owl > $@.tmp
2536
sha256sum -b $@.tmp > $@.sha256
@@ -208,7 +219,7 @@ db/mlo.owl: download/mlo.owl
208219

209220

210221
download/ito.owl: STAMP
211-
curl -L -s https://github.com/OpenBioLink/ITO/raw/master/ITO.owl.zip > $@.zip.tmp && unzip -p $@.zip.tmp ITO.owl > $@.tmp && rm $@.zip.tmp
222+
curl -L -s https://github.com/OpenBioLink/ITO/raw/master/ITO.owl.zip > $@.zip.tmp && unzip -p $@.zip.tmp {ont.zip_extract_file} > $@.tmp && rm $@.zip.tmp
212223
sha256sum -b $@.tmp > $@.sha256
213224
mv $@.tmp $@
214225

@@ -219,7 +230,7 @@ db/ito.owl: download/ito.owl
219230

220231

221232
download/reactome-Homo-sapiens.owl: STAMP
222-
curl -L -s https://reactome.org/download/current/biopax.zip > $@.zip.tmp && unzip -p $@.zip.tmp Homo_sapiens.owl > $@.tmp && rm $@.zip.tmp
233+
curl -L -s https://reactome.org/download/current/biopax.zip > $@.zip.tmp && unzip -p $@.zip.tmp {ont.zip_extract_file} > $@.tmp && rm $@.zip.tmp
223234
sha256sum -b $@.tmp > $@.sha256
224235
mv $@.tmp $@
225236

@@ -263,7 +274,7 @@ db/sweetAll.owl: download/sweetAll.owl
263274

264275

265276
download/lov.owl: STAMP
266-
curl -L -s https://lov.linkeddata.es/lov.n3.gz > $@.tmp
277+
curl -L -s https://lov.linkeddata.es/lov.n3.gz | gzip -dc > $@.tmp
267278
sha256sum -b $@.tmp > $@.sha256
268279
mv $@.tmp $@
269280

@@ -306,6 +317,61 @@ db/co_324.owl: download/co_324.owl
306317
cp $< $@
307318

308319

320+
download/hgnc.genegroup.owl: STAMP
321+
curl -L -s https://github.com/biopragmatics/obo-db-ingest/raw/main/export/hgnc.genegroup/hgnc.genegroup.owl.gz | gzip -dc > $@.tmp
322+
sha256sum -b $@.tmp > $@.sha256
323+
mv $@.tmp $@
324+
325+
.PRECIOUS: download/hgnc.genegroup.owl
326+
327+
db/hgnc.genegroup.owl: download/hgnc.genegroup.owl
328+
cp $< $@
329+
330+
331+
download/hgnc.owl: STAMP
332+
curl -L -s https://github.com/biopragmatics/obo-db-ingest/raw/main/export/hgnc/2022-06-01/hgnc.owl.gz | gzip -dc > $@.tmp
333+
sha256sum -b $@.tmp > $@.sha256
334+
mv $@.tmp $@
335+
336+
.PRECIOUS: download/hgnc.owl
337+
338+
db/hgnc.owl: download/hgnc.owl
339+
cp $< $@
340+
341+
342+
download/dictybase.owl: STAMP
343+
curl -L -s https://github.com/biopragmatics/obo-db-ingest/raw/main/export/dictybase/dictybase.owl.gz | gzip -dc > $@.tmp
344+
sha256sum -b $@.tmp > $@.sha256
345+
mv $@.tmp $@
346+
347+
.PRECIOUS: download/dictybase.owl
348+
349+
db/dictybase.owl: download/dictybase.owl
350+
cp $< $@
351+
352+
353+
download/eccode.owl: STAMP
354+
curl -L -s https://github.com/biopragmatics/obo-db-ingest/raw/main/export/eccode/25-May-2022/eccode.owl.gz | gzip -dc > $@.tmp
355+
sha256sum -b $@.tmp > $@.sha256
356+
mv $@.tmp $@
357+
358+
.PRECIOUS: download/eccode.owl
359+
360+
db/eccode.owl: download/eccode.owl
361+
cp $< $@
362+
363+
364+
download/uniprot.owl: STAMP
365+
curl -L -s https://github.com/biopragmatics/obo-db-ingest/raw/main/export/uniprot/2022_02/uniprot.owl.gz | gzip -dc > $@.tmp
366+
sha256sum -b $@.tmp > $@.sha256
367+
mv $@.tmp $@
368+
369+
.PRECIOUS: download/uniprot.owl
370+
371+
db/uniprot.owl: download/uniprot.owl
372+
cp $< $@
373+
374+
309375
download/%.owl: STAMP
310376
curl -L -s http://purl.obolibrary.org/obo/$*.owl > $@.tmp
311377
sha256sum -b $@.tmp > $@.sha256
@@ -316,4 +382,4 @@ download/%.owl: STAMP
316382
db/%.owl: download/%.owl
317383
robot merge -i $< -o $@
318384

319-
EXTRA_ONTOLOGIES = chiro ncit foodon chebiplus msio phenio comploinc bero aio reacto go go-lego bao orcid cpont biolink biopax enanomapper mlo ito reactome-Homo-sapiens efo edam sweetAll lov schema-dot-org cosmo co_324
385+
EXTRA_ONTOLOGIES = chiro ncit maxo foodon chebiplus msio phenio comploinc bero aio reacto go go-lego bao orcid cpont biolink biopax enanomapper mlo ito reactome-Homo-sapiens efo edam sweetAll lov schema-dot-org cosmo co_324 hgnc.genegroup hgnc dictybase eccode uniprot

src/semsql/builder/builder.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
from sqlalchemy.orm import sessionmaker
1313

1414
from semsql.builder.registry import registry_schema
15-
from semsql.builder.registry.registry_schema import (Makefile, MakefileRule,
16-
Ontology)
15+
from semsql.builder.registry.registry_schema import (CompressionEnum, Makefile,
16+
MakefileRule, Ontology)
1717
from semsql.utils.makefile_utils import makefile_to_string
1818

1919
this_path = Path(__file__).parent
@@ -137,6 +137,11 @@ def compile_registry(registry_path: str, local_prefix_file: TextIO = None) -> st
137137
"unzip -p [email protected] {ont.zip_extract_file} "
138138
139139
)
140+
elif ont.compression:
141+
if str(ont.compression) == str(CompressionEnum.gzip.text):
142+
command = f"curl -L -s {ont.url} | gzip -dc > [email protected]"
143+
else:
144+
raise ValueError(f"Unknown compression: '{ont.compression}'")
140145
else:
141146
command = f"curl -L -s {ont.url} > [email protected]"
142147
download_rule = MakefileRule(

src/semsql/builder/prefixes/prefixes.csv

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ evs.ncit,http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#
5959
old.fix,http://purl.org/obo/owl/FIX#
6060
mlo,http://www.a2rd.net.br/mlo#
6161
co_324,https://cropontology.org/rdf/CO_324:
62+
hgnc.genegroup,http://purl.obolibrary.org/obo/hgnc.genegroup_
63+
hgnc,http://purl.obolibrary.org/obo/hgnc_
64+
hgnc.genegroup,http://purl.obolibrary.org/obo/dictybase_
65+
EC,http://purl.obolibrary.org/obo/eccode_
66+
uniprot.obo,http://purl.obolibrary.org/obo/uniprot_
67+
uniprot.obo,http://purl.obolibrary.org/obo/uniprot_
6268
RBO,http://purl.obolibrary.org/obo/RBO_
6369
CLYH,http://purl.obolibrary.org/obo/CLYH_
6470
RO,http://purl.obolibrary.org/obo/RO_

src/semsql/builder/prefixes/prefixes_local.csv

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,9 @@ evs.ncit,http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#
77
old.fix,http://purl.org/obo/owl/FIX#
88
mlo,http://www.a2rd.net.br/mlo#
99
co_324,https://cropontology.org/rdf/CO_324:
10+
hgnc.genegroup,http://purl.obolibrary.org/obo/hgnc.genegroup_
11+
hgnc,http://purl.obolibrary.org/obo/hgnc_
12+
hgnc.genegroup,http://purl.obolibrary.org/obo/dictybase_
13+
EC,http://purl.obolibrary.org/obo/eccode_
14+
uniprot.obo,http://purl.obolibrary.org/obo/uniprot_
15+
uniprot.obo,http://purl.obolibrary.org/obo/uniprot_

src/semsql/builder/registry/ontologies.yaml

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ ontologies:
88
ncit:
99
url: http://purl.obolibrary.org/obo/ncit.owl
1010
build_command: "robot relax -i $< merge -o $@"
11+
maxo:
12+
url: http://purl.obolibrary.org/obo/maxo.owl
13+
# https://github.com/monarch-initiative/MAxO/issues/367
14+
build_command: "robot relax -i $< merge reason -r structural -o $@"
1115
foodon:
1216
url: http://purl.obolibrary.org/obo/foodon.owl
1317
build_command: "robot merge -i $< relax reduce -c true -o $@"
@@ -110,5 +114,32 @@ ontologies:
110114
url: https://cropontology.org/ontology/CO_324/rdf
111115
prefixmap:
112116
co_324: "https://cropontology.org/rdf/CO_324:"
113-
114-
117+
118+
## PyOBO products
119+
## See https://github.com/INCATools/semantic-sql/issues/45
120+
hgnc.genegroup:
121+
url: https://github.com/biopragmatics/obo-db-ingest/raw/main/export/hgnc.genegroup/hgnc.genegroup.owl.gz
122+
compression: gzip
123+
prefixmap:
124+
hgnc.genegroup: http://purl.obolibrary.org/obo/hgnc.genegroup_
125+
hgnc:
126+
url: https://github.com/biopragmatics/obo-db-ingest/raw/main/export/hgnc/2022-06-01/hgnc.owl.gz
127+
compression: gzip
128+
prefixmap:
129+
hgnc: http://purl.obolibrary.org/obo/hgnc_
130+
dictybase:
131+
url: https://github.com/biopragmatics/obo-db-ingest/raw/main/export/dictybase/dictybase.owl.gz
132+
compression: gzip
133+
prefixmap:
134+
hgnc.genegroup: http://purl.obolibrary.org/obo/dictybase_
135+
eccode:
136+
url: https://github.com/biopragmatics/obo-db-ingest/raw/main/export/eccode/25-May-2022/eccode.owl.gz
137+
compression: gzip
138+
prefixmap:
139+
EC: http://purl.obolibrary.org/obo/eccode_
140+
uniprot.obo: http://purl.obolibrary.org/obo/uniprot_
141+
uniprot:
142+
url: https://github.com/biopragmatics/obo-db-ingest/raw/main/export/uniprot/2022_02/uniprot.owl.gz
143+
compression: gzip
144+
prefixmap:
145+
uniprot.obo: http://purl.obolibrary.org/obo/uniprot_

0 commit comments

Comments
 (0)