Skip to content

Commit 7b30441

Browse files
committed
Allow prefixes pass-thru.
This PR allows a path to a prefixes file to passed on the command line to semsql; e.g. semsql make foo.db -P /path/to/my/prefixes.csv This will be used in place of the default prefix map. Note that it is expected that standard prefixes for owl, rdf, xsd will be passed through.
1 parent 605844c commit 7b30441

File tree

10 files changed

+1184
-25
lines changed

10 files changed

+1184
-25
lines changed

ontologies.Makefile

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ db/ncit.owl: download/ncit.owl
2121

2222

2323
download/fma.owl: STAMP
24-
curl -L -s https://data.bioontology.org/ontologies/FMA/submissions/29/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb > $@.tmp
24+
curl -L -s http://sig.biostr.washington.edu/share/downloads/fma/release/latest/fma.owl > $@.tmp
2525
sha256sum -b $@.tmp > $@.sha256
2626
mv $@.tmp $@
2727

@@ -75,6 +75,17 @@ db/msio.owl: download/msio.owl
7575
cp $< $@
7676

7777

78+
download/modl.owl: STAMP
79+
curl -L -s https://raw.githubusercontent.com/Data-Semantics-Laboratory/modular-ontology-design-library/master/MODL.owl > $@.tmp
80+
sha256sum -b $@.tmp > $@.sha256
81+
mv $@.tmp $@
82+
83+
.PRECIOUS: download/modl.owl
84+
85+
db/modl.owl: download/modl.owl
86+
cp $< $@
87+
88+
7889
download/phenio.owl: STAMP
7990
curl -L -s https://github.com/monarch-initiative/phenio/releases/latest/download/phenio.owl > $@.tmp
8091
sha256sum -b $@.tmp > $@.sha256
@@ -130,6 +141,17 @@ db/reacto.owl: download/reacto.owl
130141
cp $< $@
131142

132143

144+
download/bcio.owl: STAMP
145+
curl -L -s http://humanbehaviourchange.org/ontology/bcio.owl > $@.tmp
146+
sha256sum -b $@.tmp > $@.sha256
147+
mv $@.tmp $@
148+
149+
.PRECIOUS: download/bcio.owl
150+
151+
db/bcio.owl: download/bcio.owl
152+
cp $< $@
153+
154+
133155
download/go.owl: STAMP
134156
curl -L -s http://purl.obolibrary.org/obo/go/extensions/go-plus.owl > $@.tmp
135157
sha256sum -b $@.tmp > $@.sha256
@@ -503,4 +525,4 @@ download/%.owl: STAMP
503525
db/%.owl: download/%.owl
504526
robot merge -i $< -o $@
505527

506-
EXTRA_ONTOLOGIES = chiro ncit fma maxo foodon chebiplus msio phenio comploinc bero aio reacto go go-lego bao orcid cpont biolink biopax enanomapper mlo ito reactome-Homo-sapiens efo hcao edam sweetAll lov schema-dot-org cellosaurus cosmo dbpendiaont co_324 hgnc.genegroup hgnc dictybase eccode uniprot rhea swisslipid drugbank drugcentral complexportal drugmechdb rxnorm
528+
EXTRA_ONTOLOGIES = chiro ncit fma maxo foodon chebiplus msio modl phenio comploinc bero aio reacto bcio go go-lego bao orcid cpont biolink biopax enanomapper mlo ito reactome-Homo-sapiens efo hcao edam sweetAll lov schema-dot-org cellosaurus cosmo dbpendiaont co_324 hgnc.genegroup hgnc dictybase eccode uniprot rhea swisslipid drugbank drugcentral complexportal drugmechdb rxnorm

src/semsql/builder/build.Makefile

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -33,24 +33,29 @@ help:
3333
# All dbs are made from an initial template containing
3434
# (1) prefixes
3535
# (2) SQL Schema (primarily views)
36-
$(TEMPLATE): $(THIS_DIR)/sql_schema/semsql.sql build_prefixes
37-
cat $< | sqlite3 $@.tmp && \
38-
echo .exit | sqlite3 -echo $@.tmp -cmd ".mode csv" -cmd ".import $(THIS_DIR)/prefixes/prefixes.csv prefix" && \
39-
mv $@.tmp $@
40-
.PRECIOUS: $(TEMPLATE)
36+
#$(TEMPLATE): $(THIS_DIR)/sql_schema/semsql.sql build_prefixes
37+
# cat $< | sqlite3 [email protected] && \
38+
# echo .exit | sqlite3 -echo [email protected] -cmd ".mode csv" -cmd ".import $(THIS_DIR)/prefixes/prefixes.csv prefix" && \
39+
40+
#.PRECIOUS: $(TEMPLATE)
4141

4242
%-min.owl: %.owl
4343
robot \
4444
remove -i $< --axioms "equivalent disjoint annotation" \
4545
filter --exclude-terms $(THIS_DIR)/exclude-terms.txt \
4646
-o $@
4747

48+
PREFIX_CSV_PATH = $(PREFIX_DIR)/prefixes.csv
49+
PREFIX_YAML_PATH = $(PREFIX_DIR)/prefixes.yaml
50+
4851
# -- MAIN TARGET --
4952
# A db is constructed from
5053
# (1) triples loaded using rdftab
5154
# (2) A relation-graph TSV
52-
%.db: %.owl %-$(RGSUFFIX).tsv $(TEMPLATE)
53-
cp $(TEMPLATE) $@.tmp && \
55+
%.db: %.owl %-$(RGSUFFIX).tsv $(PREFIX_CSV_PATH)
56+
rm -f $@.tmp && \
57+
cat $(THIS_DIR)/sql_schema/semsql.sql | sqlite3 $@.tmp && \
58+
echo .exit | sqlite3 -echo $@.tmp -cmd ".mode csv" -cmd ".import $(PREFIX_CSV_PATH) prefix" && \
5459
rdftab $@.tmp < $< && \
5560
sqlite3 $@.tmp -cmd '.separator "\t"' ".import $*-$(RGSUFFIX).tsv entailed_edge" && \
5661
gzip -f $*-$(RGSUFFIX).tsv && \
@@ -61,22 +66,14 @@ $(TEMPLATE): $(THIS_DIR)/sql_schema/semsql.sql build_prefixes
6166

6267
# -- ENTAILED EDGES --
6368
# relation-graph is used to compute entailed edges.
64-
#
65-
# this currently requires a few different steps, because
66-
# - RG currently outputs TTL
67-
# - We need a TSV using correct prefixes/CURIEs to load into our db
68-
#
69-
# will be simplified in future. See:
70-
# - https://github.com/balhoff/relation-graph/issues/123
71-
# - https://github.com/balhoff/relation-graph/issues/25
72-
%-$(RGSUFFIX).tsv: %-min.owl %-properties.txt $(PREFIX_DIR)/prefixes.yaml
69+
%-$(RGSUFFIX).tsv: %-min.owl %-properties.txt $(PREFIX_YAML_PATH)
7370
$(RG) --disable-owl-nothing true \
7471
--ontology-file $<\
7572
$(RG_PROPERTIES) \
7673
--output-file [email protected] \
7774
--equivalence-as-subclass true \
7875
--mode TSV \
79-
--prefixes $(PREFIX_DIR)/prefixes.yaml \
76+
--prefixes $(PREFIX_YAML_PATH) \
8077
--output-individuals true \
8178
--output-subclasses true \
8279
--reflexive-subclasses true && \
@@ -105,6 +102,6 @@ $(PREFIX_DIR)/prefixes.csv: $(PREFIX_DIR)/prefixes_curated.csv $(PREFIX_DIR)/pre
105102
cat $^ > $@
106103

107104
# see https://github.com/INCATools/relation-graph/issues/168
108-
$(PREFIX_DIR)/prefixes.yaml: $(PREFIX_DIR)/prefixes.csv
105+
$(PREFIX_YAML_PATH): $(PREFIX_CSV_PATH)
109106
grep -v ^prefix, $< | grep -v ^obo, | perl -npe 's@,(.*)@: "$$1"@' > $@.tmp && mv $@.tmp $@
110107

src/semsql/builder/builder.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,13 @@ class DockerConfig:
2828
memory: str = None
2929

3030

31-
def make(target: str, docker_config: Optional[DockerConfig] = None):
31+
def make(target: str, docker_config: Optional[DockerConfig] = None, prefix_csv_path = None):
3232
"""
3333
Builds a target such as a SQLite file using the build.Makefile
3434
3535
:param target: Make target
3636
:param docker_config: if passed, use ODK docker with the specific config
37+
:param prefix_csv_path:
3738
"""
3839
path_to_makefile = str(this_path / "build.Makefile")
3940
if docker_config is not None:
@@ -60,6 +61,8 @@ def make(target: str, docker_config: Optional[DockerConfig] = None):
6061
else:
6162
pre = []
6263
cmd = pre + ["make", target, "-f", path_to_makefile]
64+
if prefix_csv_path:
65+
cmd += [f"PREFIX_CSV_PATH={prefix_csv_path}"]
6366
logging.info(f"CMD={cmd}")
6467
subprocess.run(cmd)
6568

src/semsql/builder/cli.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,12 @@ def main(verbose: int, quiet: bool):
3333
show_default=True,
3434
help="Uses ODK docker image",
3535
)
36-
def make(path, docker):
36+
@click.option(
37+
"--prefix-csv-path",
38+
"-P",
39+
help="path to csv of prefix expansions"
40+
)
41+
def make(path, docker, **kwargs):
3742
"""
3843
Makes a specified target, such as a db file
3944
@@ -47,7 +52,7 @@ def make(path, docker):
4752
docker_config = builder.DockerConfig()
4853
else:
4954
docker_config = None
50-
builder.make(path, docker_config=docker_config)
55+
builder.make(path, docker_config=docker_config, **kwargs)
5156

5257

5358
@main.command()

src/semsql/builder/prefixes/prefixes.csv

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,10 @@ prefix,base
5454
FMA,http://purl.org/sig/ont/fma/fma
5555
MSIO,http://purl.obolibrary.org/obo/MSIO_
5656
nmrCV,http://nmrML.org/nmrCV#NMR:
57+
modl,https://archive.org/services/purl/purl/modular_ontology_design_library#
5758
biolink,https://w3id.org/biolink/vocab/
5859
loinc,https://loinc.org/
60+
BCIO,http://humanbehaviourchange.org/ontology/BCIO_
5961
orcid,https://orcid.org/
6062
evs.ncit,http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#
6163
old.fix,http://purl.org/obo/owl/FIX#

src/semsql/builder/prefixes/prefixes_local.csv

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@ prefix,base
22
FMA,http://purl.org/sig/ont/fma/fma
33
MSIO,http://purl.obolibrary.org/obo/MSIO_
44
nmrCV,http://nmrML.org/nmrCV#NMR:
5+
modl,https://archive.org/services/purl/purl/modular_ontology_design_library#
56
biolink,https://w3id.org/biolink/vocab/
67
loinc,https://loinc.org/
8+
BCIO,http://humanbehaviourchange.org/ontology/BCIO_
79
orcid,https://orcid.org/
810
evs.ncit,http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#
911
old.fix,http://purl.org/obo/owl/FIX#

src/semsql/builder/registry/ontologies.yaml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@ ontologies:
99
url: http://purl.obolibrary.org/obo/ncit.owl
1010
build_command: "robot relax -i $< merge -o $@"
1111
fma:
12-
# note: this is the public API key on the main bioportal site
13-
url: "https://data.bioontology.org/ontologies/FMA/submissions/29/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb"
12+
url: http://sig.biostr.washington.edu/share/downloads/fma/release/latest/fma.owl
1413
prefixmap:
1514
FMA: http://purl.org/sig/ont/fma/fma
1615
maxo:
@@ -28,6 +27,10 @@ ontologies:
2827
prefixmap:
2928
MSIO: http://purl.obolibrary.org/obo/MSIO_
3029
nmrCV: "http://nmrML.org/nmrCV#NMR:"
30+
modl:
31+
url: https://raw.githubusercontent.com/Data-Semantics-Laboratory/modular-ontology-design-library/master/MODL.owl
32+
prefixmap:
33+
modl: "https://archive.org/services/purl/purl/modular_ontology_design_library#"
3134
phenio:
3235
description: Monarch Phenomics Integrated Ontology
3336
url: https://github.com/monarch-initiative/phenio/releases/latest/download/phenio.owl
@@ -51,6 +54,10 @@ ontologies:
5154
url: https://raw.githubusercontent.com/berkeleybop/artificial-intelligence-ontology/main/aio.owl
5255
reacto:
5356
url: http://purl.obolibrary.org/obo/go/extensions/reacto.owl
57+
bcio:
58+
url: http://humanbehaviourchange.org/ontology/bcio.owl
59+
prefixmap:
60+
BCIO: http://humanbehaviourchange.org/ontology/BCIO_
5461
go:
5562
url: http://purl.obolibrary.org/obo/go/extensions/go-plus.owl
5663
go-lego:

tests/integration/Makefile

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
RUN = poetry run
2+
3+
input/%.owl:
4+
robot merge -I http://purl.obolibrary.org/obo/$*.owl -o $@
5+
.PRECIOUS: input/%.owl
6+
7+
fake/%.owl: input/%.owl
8+
cp $< $@
9+
10+
input/%.db: input/%.owl
11+
$(RUN) semsql make $@
12+
13+
fake/%.db: fake/%.owl
14+
$(RUN) semsql make $@ -P conf/bad-prefixes.csv
15+
16+
test-bad-prefix: fake/hsapdv.db
17+
runoak -i $< info i^HsapDvFAKE: | head -5 | grep HsapDvFAKE && echo pass
18+
19+
input/%.tree: input/%.db
20+
runoak -i $< tree -p i,p .all -o $@

tests/integration/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# integration tests
2+
3+
These are not currently executed as part of actions, they require installation of RG and rdftab.
4+
5+
Note that these will be folded into the main unit tests after: https://github.com/INCATools/semantic-sql/issues/41

0 commit comments

Comments
 (0)