Skip to content

Commit 1c15b65

Browse files
committed
Merge branch 'release/0.3.6'
2 parents 0e2124b + 3da2692 commit 1c15b65

14 files changed

+760
-798
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.3.5
2+
current_version = 0.3.6
33
commit = False
44
tag = False
55

rdflib_sqlalchemy/base.py

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
"""Base classes for the store."""
2+
from rdflib import Literal
3+
from rdflib.graph import Graph, QuotedGraph
4+
from rdflib.plugins.stores.regexmatching import REGEXTerm
5+
from sqlalchemy.sql import expression
6+
7+
from rdflib_sqlalchemy.termutils import (
8+
type_to_term_combination,
9+
statement_to_term_combination,
10+
)
11+
12+
13+
class SQLGeneratorMixin(object):
14+
"""SQL statement generator mixin for the SQLAlchemy store."""
15+
16+
def _build_type_sql_command(self, member, klass, context):
17+
"""Build an insert command for a type table."""
18+
# columns: member,klass,context
19+
rt = self.tables["type_statements"].insert()
20+
return rt, {
21+
"member": member,
22+
"klass": klass,
23+
"context": context.identifier,
24+
"termComb": int(type_to_term_combination(member, klass, context))}
25+
26+
def _build_literal_triple_sql_command(self, subject, predicate, obj, context):
27+
"""
28+
Build an insert command for literal triples.
29+
30+
These triples correspond to RDF statements where the object is a Literal,
31+
e.g. `rdflib.Literal`.
32+
33+
"""
34+
triple_pattern = int(
35+
statement_to_term_combination(subject, predicate, obj, context)
36+
)
37+
command = self.tables["literal_statements"].insert()
38+
values = {
39+
"subject": subject,
40+
"predicate": predicate,
41+
"object": obj,
42+
"context": context.identifier,
43+
"termComb": triple_pattern,
44+
"objLanguage": isinstance(obj, Literal) and obj.language or None,
45+
"objDatatype": isinstance(obj, Literal) and obj.datatype or None,
46+
}
47+
return command, values
48+
49+
def _build_triple_sql_command(self, subject, predicate, obj, context, quoted):
50+
"""
51+
Build an insert command for regular triple table.
52+
53+
"""
54+
stmt_table = (quoted and
55+
self.tables["quoted_statements"] or
56+
self.tables["asserted_statements"])
57+
58+
triple_pattern = statement_to_term_combination(
59+
subject,
60+
predicate,
61+
obj,
62+
context,
63+
)
64+
command = stmt_table.insert()
65+
66+
if quoted:
67+
params = {
68+
"subject": subject,
69+
"predicate": predicate,
70+
"object": obj,
71+
"context": context.identifier,
72+
"termComb": triple_pattern,
73+
"objLanguage": isinstance(
74+
obj, Literal) and obj.language or None,
75+
"objDatatype": isinstance(
76+
obj, Literal) and obj.datatype or None
77+
}
78+
else:
79+
params = {
80+
"subject": subject,
81+
"predicate": predicate,
82+
"object": obj,
83+
"context": context.identifier,
84+
"termComb": triple_pattern,
85+
}
86+
return command, params
87+
88+
def build_clause(self, table, subject, predicate, obj, context=None, typeTable=False):
89+
"""Build WHERE clauses for the supplied terms and, context."""
90+
if typeTable:
91+
clauseList = [
92+
self.buildTypeMemberClause(subject, table),
93+
self.buildTypeClassClause(obj, table),
94+
self.buildContextClause(context, table)
95+
]
96+
else:
97+
clauseList = [
98+
self.buildSubjClause(subject, table),
99+
self.buildPredClause(predicate, table),
100+
self.buildObjClause(obj, table),
101+
self.buildContextClause(context, table),
102+
self.buildLitDTypeClause(obj, table),
103+
self.buildLitLanguageClause(obj, table)
104+
]
105+
106+
clauseList = [clause for clause in clauseList if clause is not None]
107+
if clauseList:
108+
return expression.and_(*clauseList)
109+
else:
110+
return None
111+
112+
def buildLitDTypeClause(self, obj, table):
113+
"""Build Literal and datatype clause."""
114+
if isinstance(obj, Literal) and obj.datatype is not None:
115+
return table.c.objDatatype == obj.datatype
116+
else:
117+
return None
118+
119+
def buildLitLanguageClause(self, obj, table):
120+
"""Build Literal and language clause."""
121+
if isinstance(obj, Literal) and obj.language is not None:
122+
return table.c.objLanguage == obj.language
123+
else:
124+
return None
125+
126+
# Where Clause utility Functions
127+
# The predicate and object clause builders are modified in order
128+
# to optimize subjects and objects utility functions which can
129+
# take lists as their last argument (object, predicate -
130+
# respectively)
131+
def buildSubjClause(self, subject, table):
132+
"""Build Subject clause."""
133+
if isinstance(subject, REGEXTerm):
134+
# TODO: this work only in mysql. Must adapt for postgres and sqlite
135+
return table.c.subject.op("REGEXP")(subject)
136+
elif isinstance(subject, list):
137+
# clauseStrings = [] --- unused
138+
return expression.or_(
139+
*[self.buildSubjClause(s, table) for s in subject if s])
140+
elif isinstance(subject, (QuotedGraph, Graph)):
141+
return table.c.subject == subject.identifier
142+
elif subject is not None:
143+
return table.c.subject == subject
144+
else:
145+
return None
146+
147+
def buildPredClause(self, predicate, table):
148+
"""
149+
Build Predicate clause.
150+
151+
Capable of taking a list of predicates as well, in which case
152+
subclauses are joined with 'OR'.
153+
154+
"""
155+
if isinstance(predicate, REGEXTerm):
156+
# TODO: this work only in mysql. Must adapt for postgres and sqlite
157+
return table.c.predicate.op("REGEXP")(predicate)
158+
elif isinstance(predicate, list):
159+
return expression.or_(
160+
*[self.buildPredClause(p, table) for p in predicate if p])
161+
elif predicate is not None:
162+
return table.c.predicate == predicate
163+
else:
164+
return None
165+
166+
def buildObjClause(self, obj, table):
167+
"""
168+
Build Object clause.
169+
170+
Capable of taking a list of objects as well, in which case subclauses
171+
are joined with 'OR'.
172+
173+
"""
174+
if isinstance(obj, REGEXTerm):
175+
# TODO: this work only in mysql. Must adapt for postgres and sqlite
176+
return table.c.object.op("REGEXP")(obj)
177+
elif isinstance(obj, list):
178+
return expression.or_(
179+
*[self.buildObjClause(o, table) for o in obj if o])
180+
elif isinstance(obj, (QuotedGraph, Graph)):
181+
return table.c.object == obj.identifier
182+
elif obj is not None:
183+
return table.c.object == obj
184+
else:
185+
return None
186+
187+
def buildContextClause(self, context, table):
188+
"""Build Context clause."""
189+
if isinstance(context, REGEXTerm):
190+
# TODO: this work only in mysql. Must adapt for postgres and sqlite
191+
return table.c.context.op("regexp")(context.identifier)
192+
elif context is not None and context.identifier is not None:
193+
return table.c.context == context.identifier
194+
else:
195+
return None
196+
197+
def buildTypeMemberClause(self, subject, table):
198+
"""Build Type Member clause."""
199+
if isinstance(subject, REGEXTerm):
200+
# TODO: this work only in mysql. Must adapt for postgres and sqlite
201+
return table.c.member.op("regexp")(subject)
202+
elif isinstance(subject, list):
203+
return expression.or_(
204+
*[self.buildTypeMemberClause(s, table) for s in subject if s])
205+
elif subject is not None:
206+
return table.c.member == subject
207+
else:
208+
return None
209+
210+
def buildTypeClassClause(self, obj, table):
211+
"""Build Type Class clause."""
212+
if isinstance(obj, REGEXTerm):
213+
# TODO: this work only in mysql. Must adapt for postgres and sqlite
214+
return table.c.klass.op("regexp")(obj)
215+
elif isinstance(obj, list):
216+
return expression.or_(
217+
*[self.buildTypeClassClause(o, table) for o in obj if o])
218+
elif obj is not None:
219+
return obj and table.c.klass == obj
220+
else:
221+
return None

rdflib_sqlalchemy/constants.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
"""Constant definitions"""
2+
from rdflib import BNode, Literal, URIRef, Variable
3+
4+
5+
COUNT_SELECT = 0
6+
CONTEXT_SELECT = 1
7+
TRIPLE_SELECT = 2
8+
TRIPLE_SELECT_NO_ORDER = 3
9+
10+
ASSERTED_NON_TYPE_PARTITION = 3
11+
ASSERTED_TYPE_PARTITION = 4
12+
QUOTED_PARTITION = 5
13+
ASSERTED_LITERAL_PARTITION = 6
14+
15+
FULL_TRIPLE_PARTITIONS = [QUOTED_PARTITION, ASSERTED_LITERAL_PARTITION]
16+
17+
INTERNED_PREFIX = "kb_"
18+
19+
TERM_COMBINATIONS = dict([(term, index) for index, term in enumerate([
20+
"UUUU", "UUUB", "UUUF", "UUVU", "UUVB", "UUVF", "UUBU", "UUBB", "UUBF",
21+
"UULU", "UULB", "UULF", "UUFU", "UUFB", "UUFF",
22+
#
23+
"UVUU", "UVUB", "UVUF", "UVVU", "UVVB", "UVVF", "UVBU", "UVBB", "UVBF",
24+
"UVLU", "UVLB", "UVLF", "UVFU", "UVFB", "UVFF",
25+
#
26+
"VUUU", "VUUB", "VUUF", "VUVU", "VUVB", "VUVF", "VUBU", "VUBB", "VUBF",
27+
"VULU", "VULB", "VULF", "VUFU", "VUFB", "VUFF",
28+
#
29+
"VVUU", "VVUB", "VVUF", "VVVU", "VVVB", "VVVF", "VVBU", "VVBB", "VVBF",
30+
"VVLU", "VVLB", "VVLF", "VVFU", "VVFB", "VVFF",
31+
#
32+
"BUUU", "BUUB", "BUUF", "BUVU", "BUVB", "BUVF", "BUBU", "BUBB", "BUBF",
33+
"BULU", "BULB", "BULF", "BUFU", "BUFB", "BUFF",
34+
#
35+
"BVUU", "BVUB", "BVUF", "BVVU", "BVVB", "BVVF", "BVBU", "BVBB", "BVBF",
36+
"BVLU", "BVLB", "BVLF", "BVFU", "BVFB", "BVFF",
37+
#
38+
"FUUU", "FUUB", "FUUF", "FUVU", "FUVB", "FUVF", "FUBU", "FUBB", "FUBF",
39+
"FULU", "FULB", "FULF", "FUFU", "FUFB", "FUFF",
40+
#
41+
"FVUU", "FVUB", "FVUF", "FVVU", "FVVB", "FVVF", "FVBU", "FVBB", "FVBF",
42+
"FVLU", "FVLB", "FVLF", "FVFU", "FVFB", "FVFF",
43+
])])
44+
45+
REVERSE_TERM_COMBINATIONS = dict([
46+
(value, key)
47+
for key, value in TERM_COMBINATIONS.items()
48+
])
49+
50+
TERM_INSTANTIATION_DICT = {
51+
"U": URIRef,
52+
"B": BNode,
53+
"V": Variable,
54+
"L": Literal
55+
}

rdflib_sqlalchemy/sql.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
from rdflib.namespace import RDF
2+
from six import text_type
3+
from sqlalchemy.sql import expression
4+
5+
from rdflib_sqlalchemy.constants import (
6+
ASSERTED_TYPE_PARTITION,
7+
ASSERTED_NON_TYPE_PARTITION,
8+
CONTEXT_SELECT,
9+
COUNT_SELECT,
10+
FULL_TRIPLE_PARTITIONS,
11+
TRIPLE_SELECT,
12+
)
13+
14+
15+
def query_analysis(query, store, connection):
16+
"""
17+
Helper function.
18+
19+
For executing EXPLAIN on all dispatched SQL statements -
20+
for the pupose of analyzing index usage.
21+
22+
"""
23+
res = connection.execute("explain " + query)
24+
rt = res.fetchall()[0]
25+
table, joinType, posKeys, _key, key_len, \
26+
comparedCol, rowsExamined, extra = rt
27+
if not _key:
28+
assert joinType == "ALL"
29+
if not hasattr(store, "queryOptMarks"):
30+
store.queryOptMarks = {}
31+
hits = store.queryOptMarks.get(("FULL SCAN", table), 0)
32+
store.queryOptMarks[("FULL SCAN", table)] = hits + 1
33+
34+
if not hasattr(store, "queryOptMarks"):
35+
store.queryOptMarks = {}
36+
hits = store.queryOptMarks.get((_key, table), 0)
37+
store.queryOptMarks[(_key, table)] = hits + 1
38+
39+
40+
def union_select(selectComponents, distinct=False, select_type=TRIPLE_SELECT):
41+
"""
42+
Helper function for building union all select statement.
43+
44+
Terms: u - uri refs v - variables b - bnodes l - literal f - formula
45+
46+
Takes a list of:
47+
- table name
48+
- table alias
49+
- table type (literal, type, asserted, quoted)
50+
- where clause string
51+
"""
52+
selects = []
53+
for table, whereClause, tableType in selectComponents:
54+
55+
if select_type == COUNT_SELECT:
56+
selectClause = table.count(whereClause)
57+
elif select_type == CONTEXT_SELECT:
58+
selectClause = expression.select([table.c.context], whereClause)
59+
elif tableType in FULL_TRIPLE_PARTITIONS:
60+
selectClause = table.select(whereClause)
61+
elif tableType == ASSERTED_TYPE_PARTITION:
62+
selectClause = expression.select(
63+
[table.c.id.label("id"),
64+
table.c.member.label("subject"),
65+
expression.literal(text_type(RDF.type)).label("predicate"),
66+
table.c.klass.label("object"),
67+
table.c.context.label("context"),
68+
table.c.termComb.label("termcomb"),
69+
expression.literal_column("NULL").label("objlanguage"),
70+
expression.literal_column("NULL").label("objdatatype")],
71+
whereClause)
72+
elif tableType == ASSERTED_NON_TYPE_PARTITION:
73+
selectClause = expression.select(
74+
[c for c in table.columns] +
75+
[expression.literal_column("NULL").label("objlanguage"),
76+
expression.literal_column("NULL").label("objdatatype")],
77+
whereClause,
78+
from_obj=[table])
79+
80+
selects.append(selectClause)
81+
82+
order_statement = []
83+
if select_type == TRIPLE_SELECT:
84+
order_statement = [
85+
expression.literal_column("subject"),
86+
expression.literal_column("predicate"),
87+
expression.literal_column("object"),
88+
]
89+
if distinct:
90+
return expression.union(*selects, **{"order_by": order_statement})
91+
else:
92+
return expression.union_all(*selects, **{"order_by": order_statement})

0 commit comments

Comments
 (0)