-
Notifications
You must be signed in to change notification settings - Fork 4
VLPB: example SPARQL queries
Namespace prefixes
- some boilerplate (e.g. check prefix.cc lookup service)
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
- domain-specific
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX sio: <http://semanticscience.org/resource/>
PREFIX pubmed: <http://identifiers.org/pubmed/>
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX prosite: <http://purl.uniprot.org/prosite/>
PREFIX prints: <http://purl.uniprot.org/prints/>
PREFIX pirsf: <http://purl.uniprot.org/pirsf/>
PREFIX superfamily: <http://purl.uniprot.org/supfam/>
PREFIX tigrfam: <http://purl.uniprot.org/tigrfams/>
PREFIX pfam: <http://purl.uniprot.org/pfam/>
PREFIX smart: <http://purl.uniprot.org/smart/>
PREFIX ensembl: <http://rdf.ebi.ac.uk/resource/ensembl/>
PREFIX transcript: <http://rdf.ebi.ac.uk/resource/ensembl.transcript/>
PREFIX protein: <http://rdf.ebi.ac.uk/resource/ensembl.protein/>
PREFIX exon: <http://rdf.ebi.ac.uk/resource/ensembl.exon/>
PREFIX term: <http://rdf.ebi.ac.uk/terms/ensembl/>
PREFIX taxon: <http://identifiers.org/taxonomy/>
1. Count genomic features in the (wild) tomato genome from the SGN and EnsemblPlants databases.
PREFIX obo: <http://purl.obolibrary.org/obo/>
SELECT
str(?feature_name) AS ?feature_name
?feature_id
COUNT(*) AS ?n
WHERE {
GRAPH <http://solgenomics.net/genome/Solanum_lycopersicum> {
# http://solgenomics.net/genome/Solanum_pennellii
# http://plants.ensembl.org/Solanum_lycopersicum
?ft a ?feature_type .
FILTER regex(?feature_type, obo:SO_) .
BIND(concat('[', replace(replace(str(?feature_type), '.+\\/', ''), '_', ':'), '](', ?feature_type, ')') AS ?feature_id)
}
GRAPH <http://purl.obolibrary.org/obo/so.owl> {
?feature_type rdfs:label ?feature_name
}
}
GROUP BY ?feature_name ?feature_id
ORDER BY DESC(?n)
Solanum lycopersicum (SGN)
| feature_name | feature_id | n |
|---|---|---|
| exon | SO:0000147 | 160001 |
| CDS | SO:0000316 | 157233 |
| intron | SO:0000188 | 125276 |
| protein_coding_gene | SO:0001217 | 34725 |
| protein_coding_primary_transcript | SO:0000120 | 34725 |
| genetic_marker | SO:0001645 | 30718 |
| three_prime_UTR | SO:0000205 | 15343 |
| five_prime_UTR | SO:0000204 | 13548 |
| chromosome | SO:0000340 | 13 |
| genome | SO:0001026 | 1 |
Solanum pennellii (SGN)
| feature_name | feature_id | n |
|---|---|---|
| exon | SO:0000147 | 278874 |
| CDS | SO:0000316 | 252950 |
| intron | SO:0000188 | 204027 |
| protein_coding_primary_transcript | SO:0000120 | 48923 |
| protein_coding_gene | SO:0001217 | 44965 |
| genetic_marker | SO:0001645 | 2225 |
| chromosome | SO:0000340 | 13 |
| genome | SO:0001026 | 1 |
Solanum lycopersicum (EnsemblPlants)
| feature_name | feature_id | n |
|---|---|---|
| exon | SO:0000147 | 162535 |
| protein_coding_primary_transcript | SO:0000120 | 34725 |
| protein_coding_gene | SO:0001217 | 33785 |
| miRNA | SO:0000276 | 3153 |
| miRNA_gene | SO:0001265 | 3153 |
| tRNA_gene | SO:0001272 | 908 |
| snoRNA | SO:0000275 | 390 |
| snoRNA_gene | SO:0001267 | 390 |
| snRNA_gene | SO:0001268 | 255 |
| snRNA | SO:0000274 | 255 |
| rRNA | SO:0000252 | 94 |
| rRNA_gene | SO:0001637 | 94 |
| pseudogenic_tRNA | SO:0000778 | 76 |
| chromosome | SO:0000340 | 13 |
| RNA | SO:0000356 | 2 |
Note: The chromosome counts include chr.00 (pseudomolecule).
2. Count protein accessions in the tomato proteome from the UniProt database.
PREFIX uniprot: <http://purl.uniprot.org/core/>
SELECT
COUNT(*) AS ?n
FROM <http://www.uniprot.org/proteomes/Solanum_lycopersicum>
WHERE { ?s a uniprot:Protein }
| n |
|---|
| 33952 |
3. Count triples using i) fruit ripening phrase, ii) fruit AND ripening bag-of-words or iii) fruit* keyword search across all RDF graphs in SGN-LD.
SELECT COUNT(*) AS ?n
WHERE {
graph ?g {
?s ?p ?o .
?o bif:contains '"fruit ripening"'
# '( fruit AND ripening )'
# '"fruit*"'
}
}
| n |
|---|
| 124 |
| n |
|---|
| 155 |
| n |
|---|
| 1481 |
4. List genes/proteins annotated with Gene Ontology (GO) terms containing fruit AND ripening bag-of-words.
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX uniprot: <http://purl.uniprot.org/core/>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX so: <http://purl.obolibrary.org/obo/so#>
PREFIX go: <http://www.geneontology.org/formats/oboInOwl#>
SELECT
DISTINCT
str(?gene_name) AS ?gene_name
concat('[', ?sgn_gene_id, '](https://solgenomics.net/locus/Solyc00g005000.2/view)') AS ?sgn_gene_id
concat('[', ?uniprot_acc, '](', ?prot, ')') AS ?uniprot_acc
concat('[', ?uniprot_id, '](', ?prot, ')') AS ?uniprot_id
str(?uniprot_des) AS ?uniprot_des
str(?go_term) AS ?go_term
concat('[', ?go_id, '](', ?go, ')') AS ?go_id
str(?go_cat) AS ?go_cat
WHERE {
GRAPH <http://www.uniprot.org/proteomes/Solanum_lycopersicum> {
?prot uniprot:classifiedWith ?go ;
uniprot:encodedBy/skos:prefLabel ?gene_name
}
GRAPH <http://plants.ensembl.org/Solanum_lycopersicum> {
?prot dc:identifier ?uniprot_acc ;
rdfs:label ?uniprot_id ;
dc:description ?uniprot_des ;
^<http://rdf.ebi.ac.uk/terms/ensembl/CHECKSUM> ?ensembl_prot_id .
?ensembl_transcript_id so:translates_to ?ensembl_prot_id ;
so:transcribed_from/dc:identifier ?sgn_gene_id .
}
GRAPH <http://purl.obolibrary.org/obo/go.owl> {
?go ?p ?o ;
rdfs:label ?go_term ;
go:id ?go_id ;
go:hasOBONamespace ?go_cat .
?o bif:contains '( fruit AND ripening )' .
FILTER regex(?go, obo:GO_)
}
}
ORDER BY ?gene_name
| gene_name | sgn_gene_id | uniprot_acc | uniprot_id | uniprot_des | go_term | go_id | go_cat |
|---|---|---|---|---|---|---|---|
| ACO1 | Solyc07g049530.2 | P05116 | ACCO1_SOLLC | 1-aminocyclopropane-1-carboxylate oxidase 1 | fruit ripening | GO:0009835 | biological_process |
| ACO3 | Solyc09g089580.2 | P10967 | ACCH3_SOLLC | 1-aminocyclopropane-1-carboxylate oxidase homolog | fruit ripening | GO:0009835 | biological_process |
| ACO4 | Solyc07g049550.2 | P24157 | ACCO4_SOLLC | 1-aminocyclopropane-1-carboxylate oxidase 4 | fruit ripening | GO:0009835 | biological_process |
| ACS2 | Solyc01g095080.2 | P18485 | 1A12_SOLLC | 1-aminocyclopropane-1-carboxylate synthase 2 | fruit ripening | GO:0009835 | biological_process |
| ACS3 | Solyc02g091990.2 | Q42881 | 1A13_SOLLC | 1-aminocyclopropane-1-carboxylate synthase 3 | fruit ripening | GO:0009835 | biological_process |
| GP1 | Solyc05g005560.2 | Q40161 | GP1_SOLLC | Polygalacturonase-1 non-catalytic subunit beta | fruit ripening | GO:0009835 | biological_process |
| PG2 | Solyc10g080210.1 | P05117 | PGLR_SOLLC | Polygalacturonase-2 | fruit ripening | GO:0009835 | biological_process |
| PME1.9 | Solyc07g064170.2 | P14280 | PME1_SOLLC | Pectinesterase 1 | fruit ripening | GO:0009835 | biological_process |
| PME2.1 | Solyc07g064180.2 | P09607 | PME21_SOLLC | Pectinesterase 2.1 | fruit ripening | GO:0009835 | biological_process |
5. Summarize tomato QTL data extracted from Europe PMC. Note: Not all QTLs could be mapped to chromosomal locations (via genetic markers) available in the SGN RDF graphs.
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX dcterms: <http://purl.org/dc/terms/>
SELECT
COUNT(DISTINCT ?pmcid) AS ?n_articles
COUNT(?qtl) AS ?n_qtls
COUNT(?loc) AS ?n_qtls_with_loc
FROM <http://europepmc.org/articles>
WHERE {
?qtl a obo:SO_0000771 ;
dcterms:isReferencedBy ?pmcid .
OPTIONAL { ?qtl faldo:location ?loc }
}
| n_articles | n_qtls | n_qtls_mapped |
|---|---|---|
| 6 | 512 | 227 |
6. List traits (terms from PO, TO and SPTO ontologies) associated with the extracted QTLs.
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX dcterms: <http://purl.org/dc/terms/>
SELECT
DISTINCT concat('[', ?trait_id, '](', ?trait, ')') AS ?trait_id
str(?trait_name) AS ?trait_name
COUNT(?qtl) AS ?n_qtls
FROM <http://europepmc.org/articles>
WHERE {
?qtl a obo:SO_0000771 ;
obo:RO_0003308 ?trait .
{
SELECT
?trait
?trait_name
?trait_id
FROM <http://purl.obolibrary.org/obo/po.owl>
FROM <http://purl.obolibrary.org/obo/to.owl>
FROM <http://purl.bioontology.org/ontology/SPTO> {
?trait rdfs:label ?trait_name ;
<http://www.geneontology.org/formats/oboInOwl#id> ?trait_id .
}
}
}
ORDER BY ?trait_name
| trait_id | trait_name | n_qtls |
|---|---|---|
| PO:0020043 | compound leaf | 12 |
| SP:0000366 | days to fruit ripening | 16 |
| PO:0009001 | fruit | 22 |
| TO:0002626 | fruit length | 16 |
| SP:0000087 | fruit perimeter | 8 |
| TO:0002728 | fruit quality trait | 11 |
| SP:0000038 | fruit shape | 8 |
| TO:0002628 | fruit shape | 7 |
| TO:0002625 | fruit size | 9 |
| TO:0002746 | fruit weight | 16 |
| TO:0002627 | fruit width | 8 |
| TO:0002699 | lycopene content | 2 |
| TO:0000174 | maturity trait | 11 |
| SP:0000170 | pH | 9 |
| TO:0020076 | phenolic compound content | 1 |
| SP:0000236 | plant canopy | 16 |
| TO:0000442 | plant fresh weight | 9 |
| SP:0000003 | plant habit | 16 |
| TO:0000207 | plant height | 16 |
| TO:0000017 | plant morphology trait | 64 |
| SP:0000002 | plant size | 16 |
| TO:0006062 | plant width | 16 |
| TO:0000181 | seed weight | 16 |
| SP:0000345 | titratable acids | 1 |
| SP:0000165 | total soluble solids | 14 |
| SP:0000198 | yield | 19 |
7. List QTLs and associated gene/transcript IDs for the trait days to fruit ripening
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX so: <http://purl.obolibrary.org/obo/so#>
SELECT
str(?qtl_id) AS ?qtl_id
str(?sgn_gene_id) AS ?sgn_gene_id
str(?sgn_trans_id) AS ?sgn_trans_id
str(?annot) AS ?annot
WHERE {
GRAPH <http://europepmc.org/articles> {
?qtl a obo:SO_0000771 ;
obo:RO_0003308 ?trait ;
so:overlaps ?gene ;
dcterms:identifier ?qtl_id .
FILTER(?trait = obo:SP_0000366)
}
GRAPH <http://solgenomics.net/genome/Solanum_lycopersicum> {
?gene so:transcribed_to ?transcript ;
dcterms:identifier ?sgn_gene_id .
?transcript rdfs:comment ?annot ;
dcterms:identifier ?sgn_trans_id
}
}
LIMIT 5
| qtl_id | sgn_gene_id | sgn_trans_id | annot |
|---|---|---|---|
| PMC4321030_2_36 | Solyc11g008770.1 | Solyc11g008770.1.1 | Name: Solyc11g008770.1.1; Note: LETM1 and EF-hand domain-containing protein 1, mitochondrial (AHRD V1 *--- LETM1_CHICK); contains Interpro domain(s) IPR011685 LETM1-like ; Ontology_term: GO:0005509; interpro2go_term: GO:0005509 |
| PMC4321030_2_54 | Solyc11g008770.1 | Solyc11g008770.1.1 | Name: Solyc11g008770.1.1; Note: LETM1 and EF-hand domain-containing protein 1, mitochondrial (AHRD V1 *--- LETM1_CHICK); contains Interpro domain(s) IPR011685 LETM1-like ; Ontology_term: GO:0005509; interpro2go_term: GO:0005509 |
| PMC4321030_2_54 | Solyc11g008780.1 | Solyc11g008780.1.1 | Name: Solyc11g008780.1.1; Note: Acetolactate synthase small subunit (AHRD V1 ***- Q9SMC2_NICPL); contains Interpro domain(s) IPR004789 Acetolactate synthase, small subunit ; Ontology_term: GO:0009082, GO:0008152; interpro2go_term: GO:0009082, GO:0008152 |
| PMC4321030_2_54 | Solyc11g008790.1 | Solyc11g008790.1.1 | Name: Solyc11g008790.1.1; Note: ARV1 (AHRD V1 ***- Q5MK24_ARATH); contains Interpro domain(s) IPR007290 Arv1-like protein |
| PMC4321030_2_54 | Solyc11g008800.1 | Solyc11g008800.1.1 | Name: Solyc11g008800.1.1; Note: Inositol 1 4 5-trisphosphate 5-phosphatase-like protein (AHRD V1 -- Q6H453_ORYSJ); contains Interpro domain(s) IPR000300 Inositol polyphosphate related phosphatase ; Ontology_term: GO:0004437; interpro2go_term: GO:0004437 |
ODEX4all