48
48
Any = None
49
49
50
50
51
+ def grouper (iterable , n ):
52
+ "Collect data into chunks of at most n elements"
53
+ assert n > 0 , 'Cannot group into chunks of zero elements'
54
+ lst = []
55
+ iterable = iter (iterable )
56
+ while True :
57
+ try :
58
+ lst .append (next (iterable ))
59
+ except StopIteration :
60
+ break
61
+
62
+ if len (lst ) == n :
63
+ yield lst
64
+ lst = []
65
+
66
+ if lst :
67
+ yield lst
68
+
69
+
51
70
def generate_interned_id (identifier ):
52
71
return "{prefix}{identifier_hash}" .format (
53
72
prefix = INTERNED_PREFIX ,
@@ -78,7 +97,8 @@ class SQLAlchemy(Store, SQLGeneratorMixin, StatisticsMixin):
78
97
regex_matching = PYTHON_REGEX
79
98
configuration = Literal ("sqlite://" )
80
99
81
- def __init__ (self , identifier = None , configuration = None , engine = None ):
100
+ def __init__ (self , identifier = None , configuration = None , engine = None ,
101
+ max_terms_per_where = 800 ):
82
102
"""
83
103
Initialisation.
84
104
@@ -89,10 +109,14 @@ def __init__(self, identifier=None, configuration=None, engine=None):
89
109
with the additional "url" key pointing to the connection URL. See `open` documentation
90
110
for more details.
91
111
engine (sqlalchemy.engine.Engine, optional): a pre-existing `SQLAlchemy.engine.Engine` instance.
92
-
112
+ max_terms_per_where (int): The max number of terms (s/p/o) in a call to
113
+ triples_choices to combine in one SQL "where" clause. Important for SQLite
114
+ back-end with SQLITE_MAX_EXPR_DEPTH limit and SQLITE_LIMIT_COMPOUND_SELECT
115
+ -- must find a balance that doesn't hit either of those.
93
116
"""
94
117
self .identifier = identifier and identifier or "hardcoded"
95
118
self .engine = engine
119
+ self .max_terms_per_where = max_terms_per_where
96
120
97
121
# Use only the first 10 bytes of the digest
98
122
self ._interned_id = generate_interned_id (self .identifier )
@@ -383,25 +407,7 @@ def remove(self, triple, context):
383
407
_logger .exception ("Removal failed." )
384
408
trans .rollback ()
385
409
386
- def triples (self , triple , context = None ):
387
- """
388
- A generator over all the triples matching pattern.
389
-
390
- Pattern can be any objects for comparing against nodes in
391
- the store, for example, RegExLiteral, Date? DateRange?
392
-
393
- quoted table: <id>_quoted_statements
394
- asserted rdf:type table: <id>_type_statements
395
- asserted non rdf:type table: <id>_asserted_statements
396
-
397
- triple columns:
398
- subject, predicate, object, context, termComb, objLanguage, objDatatype
399
- class membership columns:
400
- member, klass, context, termComb
401
-
402
- FIXME: These union all selects *may* be further optimized by joins
403
-
404
- """
410
+ def _triples_helper (self , triple , context = None ):
405
411
subject , predicate , obj = triple
406
412
407
413
quoted_table = self .tables ["quoted_statements" ]
@@ -427,10 +433,10 @@ class membership columns:
427
433
# Literal partition if (obj is Literal or None) and asserted
428
434
# non rdf:type partition (if obj is URIRef or None)
429
435
selects = []
430
- if not self .STRONGLY_TYPED_TERMS \
431
- or isinstance (obj , Literal ) \
432
- or not obj \
433
- or (self .STRONGLY_TYPED_TERMS and isinstance (obj , REGEXTerm )):
436
+ if ( not self .STRONGLY_TYPED_TERMS
437
+ or isinstance (obj , Literal )
438
+ or not obj
439
+ or (self .STRONGLY_TYPED_TERMS and isinstance (obj , REGEXTerm ))) :
434
440
literal = expression .alias (literal_table , "literal" )
435
441
clause = self .build_clause (literal , subject , predicate , obj , context )
436
442
selects .append ((literal , clause , ASSERTED_LITERAL_PARTITION ))
@@ -471,6 +477,9 @@ class membership columns:
471
477
clause = self .build_clause (quoted , subject , predicate , obj , context )
472
478
selects .append ((quoted , clause , QUOTED_PARTITION ))
473
479
480
+ return selects
481
+
482
+ def _do_triples_select (self , selects , context ):
474
483
q = union_select (selects , select_type = TRIPLE_SELECT_NO_ORDER )
475
484
with self .engine .connect () as connection :
476
485
res = connection .execute (q )
@@ -490,6 +499,29 @@ class membership columns:
490
499
for (s , p , o ), contexts in tripleCoverage .items ():
491
500
yield (s , p , o ), (c for c in contexts )
492
501
502
+ def triples (self , triple , context = None ):
503
+ """
504
+ A generator over all the triples matching pattern.
505
+
506
+ Pattern can be any objects for comparing against nodes in
507
+ the store, for example, RegExLiteral, Date? DateRange?
508
+
509
+ quoted table: <id>_quoted_statements
510
+ asserted rdf:type table: <id>_type_statements
511
+ asserted non rdf:type table: <id>_asserted_statements
512
+
513
+ triple columns:
514
+ subject, predicate, object, context, termComb, objLanguage, objDatatype
515
+ class membership columns:
516
+ member, klass, context, termComb
517
+
518
+ FIXME: These union all selects *may* be further optimized by joins
519
+
520
+ """
521
+ selects = self ._triples_helper (triple , context )
522
+ for m in self ._do_triples_select (selects , context ):
523
+ yield m
524
+
493
525
def triples_choices (self , triple , context = None ):
494
526
"""
495
527
A variant of triples.
@@ -499,36 +531,40 @@ def triples_choices(self, triple, context=None):
499
531
import default 'fallback' implementation, which will iterate over
500
532
each term in the list and dispatch to triples.
501
533
"""
534
+ # We already support accepting a list for s/p/o
502
535
subject , predicate , object_ = triple
503
-
536
+ selects = []
504
537
if isinstance (object_ , list ):
505
538
assert not isinstance (
506
539
subject , list ), "object_ / subject are both lists"
507
540
assert not isinstance (
508
541
predicate , list ), "object_ / predicate are both lists"
509
542
if not object_ :
510
543
object_ = None
511
- for ( s1 , p1 , o1 ), cg in self .triples (
512
- ( subject , predicate , object_ ), context ):
513
- yield ( s1 , p1 , o1 ), cg
544
+ for o in grouper ( object_ , self .max_terms_per_where ):
545
+ for sels in self . _triples_helper (( subject , predicate , o ), context ):
546
+ selects . append ( sels )
514
547
515
548
elif isinstance (subject , list ):
516
549
assert not isinstance (
517
550
predicate , list ), "subject / predicate are both lists"
518
551
if not subject :
519
552
subject = None
520
- for ( s1 , p1 , o1 ), cg in self .triples (
521
- ( subject , predicate , object_ ), context ):
522
- yield ( s1 , p1 , o1 ), cg
553
+ for s in grouper ( subject , self .max_terms_per_where ):
554
+ for sels in self . _triples_helper (( s , predicate , object_ ), context ):
555
+ selects . append ( sels )
523
556
524
557
elif isinstance (predicate , list ):
525
558
assert not isinstance (
526
559
subject , list ), "predicate / subject are both lists"
527
560
if not predicate :
528
561
predicate = None
529
- for (s1 , p1 , o1 ), cg in self .triples (
530
- (subject , predicate , object_ ), context ):
531
- yield (s1 , p1 , o1 ), cg
562
+ for p in grouper (predicate , self .max_terms_per_where ):
563
+ for sels in self ._triples_helper ((subject , p , object_ ), context ):
564
+ selects .append (sels )
565
+
566
+ for m in self ._do_triples_select (selects , context ):
567
+ yield m
532
568
533
569
def contexts (self , triple = None ):
534
570
quoted_table = self .tables ["quoted_statements" ]
@@ -759,9 +795,9 @@ def _get_build_command(self, triple, context=None, quoted=False):
759
795
command_type = "type"
760
796
return command_type , statement , params
761
797
762
- def _remove_context (self , identifier ):
798
+ def _remove_context (self , context ):
763
799
"""Remove context."""
764
- assert identifier
800
+ assert context
765
801
quoted_table = self .tables ["quoted_statements" ]
766
802
asserted_table = self .tables ["asserted_statements" ]
767
803
asserted_type_table = self .tables ["type_statements" ]
@@ -772,7 +808,7 @@ def _remove_context(self, identifier):
772
808
try :
773
809
for table in [quoted_table , asserted_table ,
774
810
asserted_type_table , literal_table ]:
775
- clause = self .build_context_clause (identifier , table )
811
+ clause = self .build_context_clause (context , table )
776
812
connection .execute (table .delete (clause ))
777
813
trans .commit ()
778
814
except Exception :
0 commit comments