Skip to content

Commit 947827a

Browse files
committed
statistics mixin
1 parent 00bfa67 commit 947827a

File tree

3 files changed

+64
-20
lines changed

3 files changed

+64
-20
lines changed

rdflib_sqlalchemy/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
)
1111

1212

13-
class SQLGenerator(object):
14-
"""SQL statement generator."""
13+
class SQLGeneratorMixin(object):
14+
"""SQL statement generator mixin for the SQLAlchemy store."""
1515

1616
def _build_type_sql_command(self, member, klass, context):
1717
"""Build an insert command for a type table."""

rdflib_sqlalchemy/statistics.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""Statistical summary of store statements mixin"""
2+
from sqlalchemy.orm.session import Session
3+
from sqlalchemy.sql import func
4+
5+
6+
def get_group_by_count(session, group_by_column):
7+
"""
8+
Construct SQL query to get counts for distinct values using GROUP BY.
9+
10+
:param session - sqlalchemy `Session` instance
11+
:param group_by_column sqlalchemy `Column` instance
12+
:returns {dict} dictionary mapping from value to count
13+
14+
"""
15+
return dict(
16+
session.query(
17+
group_by_column,
18+
func.count(group_by_column)
19+
).group_by(group_by_column).all()
20+
)
21+
22+
23+
class StatisticsMixin(object):
24+
25+
def statistics(self, asserted_statements=True, literals=True, types=True):
26+
"""Store statistics."""
27+
statistics = dict(
28+
total_num_statements=len(self),
29+
)
30+
31+
with self.engine.connect() as connection:
32+
session = Session(bind=connection)
33+
if asserted_statements:
34+
table = self.tables["asserted_statements"]
35+
group_by_column = table.c.predicate
36+
statistics["asserted_statements"] = get_group_by_count(session, group_by_column)
37+
if literals:
38+
table = self.tables["literal_statements"]
39+
group_by_column = table.c.predicate
40+
statistics["literals"] = get_group_by_count(session, group_by_column)
41+
if types:
42+
table = self.tables["type_statements"]
43+
group_by_column = table.c.klass
44+
statistics["types"] = get_group_by_count(session, group_by_column)
45+
46+
return statistics

rdflib_sqlalchemy/store.py

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
"""SQLAlchemy-based RDF store."""
2-
from __future__ import with_statement
3-
42
import hashlib
53
import logging
64

@@ -19,37 +17,38 @@
1917
from six.moves import reduce
2018
from sqlalchemy import MetaData
2119
from sqlalchemy.engine import reflection
22-
from sqlalchemy.sql import select, expression
20+
from sqlalchemy.sql import expression, select
2321

2422
from rdflib_sqlalchemy.constants import (
25-
CONTEXT_SELECT,
26-
COUNT_SELECT,
27-
INTERNED_PREFIX,
2823
ASSERTED_LITERAL_PARTITION,
2924
ASSERTED_NON_TYPE_PARTITION,
3025
ASSERTED_TYPE_PARTITION,
26+
CONTEXT_SELECT,
27+
COUNT_SELECT,
28+
INTERNED_PREFIX,
3129
QUOTED_PARTITION,
3230
TRIPLE_SELECT_NO_ORDER,
3331
)
3432
from rdflib_sqlalchemy.tables import (
35-
get_table_names,
3633
create_asserted_statements_table,
3734
create_literal_statements_table,
3835
create_namespace_binds_table,
3936
create_quoted_statements_table,
4037
create_type_statements_table,
38+
get_table_names,
4139
)
42-
from rdflib_sqlalchemy.base import SQLGenerator
43-
from rdflib_sqlalchemy.termutils import extract_triple
40+
from rdflib_sqlalchemy.base import SQLGeneratorMixin
4441
from rdflib_sqlalchemy.sql import union_select
42+
from rdflib_sqlalchemy.statistics import StatisticsMixin
43+
from rdflib_sqlalchemy.termutils import extract_triple
4544

4645

4746
_logger = logging.getLogger(__name__)
4847

4948
Any = None
5049

5150

52-
class SQLAlchemy(Store, SQLGenerator):
51+
class SQLAlchemy(Store, SQLGeneratorMixin, StatisticsMixin):
5352
"""
5453
SQL-92 formula-aware implementation of an rdflib Store.
5554
@@ -124,14 +123,11 @@ def __repr__(self):
124123
literal_table = self.tables["literal_statements"]
125124

126125
selects = [
127-
(expression.alias(asserted_type_table, "typetable"),
128-
None, ASSERTED_TYPE_PARTITION),
129-
(expression.alias(quoted_table, "quoted"),
130-
None, QUOTED_PARTITION),
131-
(expression.alias(asserted_table, "asserted"),
132-
None, ASSERTED_NON_TYPE_PARTITION),
133-
(expression.alias(literal_table, "literal"),
134-
None, ASSERTED_LITERAL_PARTITION), ]
126+
(expression.alias(asserted_type_table, "typetable"), None, ASSERTED_TYPE_PARTITION),
127+
(expression.alias(quoted_table, "quoted"), None, QUOTED_PARTITION),
128+
(expression.alias(asserted_table, "asserted"), None, ASSERTED_NON_TYPE_PARTITION),
129+
(expression.alias(literal_table, "literal"), None, ASSERTED_LITERAL_PARTITION),
130+
]
135131
q = union_select(selects, distinct=False, select_type=COUNT_SELECT)
136132
if hasattr(self, "engine"):
137133
with self.engine.connect() as connection:
@@ -678,6 +674,8 @@ def namespaces(self):
678674
for prefix, uri in res.fetchall():
679675
yield prefix, uri
680676

677+
# Private methods
678+
681679
def _create_table_definitions(self):
682680
self.metadata = MetaData()
683681
self.tables = {

0 commit comments

Comments
 (0)