12
12
from itertools import product
13
13
import sys
14
14
15
+ import networkx as nx
15
16
from rdflib import Variable
16
17
from scipy .special import binom
17
18
from scipy .misc import comb
21
22
from graph_pattern import TARGET_VAR
22
23
from graph_pattern import GraphPattern
23
24
from graph_pattern import canonicalize
25
+ from graph_pattern import to_nx_graph
24
26
25
27
logger = logging .getLogger (__name__ )
26
28
logger .info ('init' )
@@ -42,7 +44,6 @@ def numerical_patterns(
42
44
length ,
43
45
loops = True ,
44
46
node_edge_joint = True ,
45
- p_connected = True ,
46
47
_partial_pattern = None ,
47
48
_pos = None ,
48
49
_var = 1 ,
@@ -119,7 +120,11 @@ def numerical_patterns(
119
120
s , p , o = _partial_pattern [i ]
120
121
for pt in _partial_pattern [:i ]:
121
122
# loop over previous triples and check if current is connected
122
- if s in pt or o in pt or (p_connected and p in pt ):
123
+ if s in pt or p in pt or o in pt :
124
+ # for p_only_connected it's possible to become
125
+ # n_connected again later:
126
+ # 123 145 627 685
127
+ # ^ ^
123
128
break
124
129
else :
125
130
# we're not connected, early terminate this
@@ -163,7 +168,6 @@ def numerical_patterns(
163
168
length ,
164
169
loops = loops ,
165
170
node_edge_joint = node_edge_joint ,
166
- p_connected = p_connected ,
167
171
_partial_pattern = _partial_pattern ,
168
172
_pos = (i , j ),
169
173
_var = v
@@ -175,7 +179,7 @@ def patterns(
175
179
length ,
176
180
loops = True ,
177
181
node_edge_joint = True ,
178
- p_connected = True ,
182
+ p_only_connected = True ,
179
183
source_target_edges = True ,
180
184
exclude_isomorphic = True ,
181
185
count_candidates_only = False ,
@@ -191,21 +195,28 @@ def patterns(
191
195
length ,
192
196
loops = loops ,
193
197
node_edge_joint = node_edge_joint ,
194
- p_connected = p_connected ,
195
198
)):
196
199
flat_num_pat = [v for t in num_pat for v in t ]
197
200
all_numbers = set (flat_num_pat )
198
201
202
+ if not p_only_connected :
203
+ # Numerical patterns are always connected, but they might be
204
+ # p_only_connected (e.g., 123 425).
205
+ # Check that the pattern isn't p_only_connected, meaning that it's
206
+ # also connected by nodes (e.g., 123 325).
207
+ # Note that in case of node_edge_joint 123 245 is also considered
208
+ # p_only_connected.
209
+ if not nx .is_connected (to_nx_graph (num_pat )):
210
+ logger .debug ('excluded %d: not node connected:\n %s' , c , num_pat )
211
+ continue
212
+
199
213
if source_target_edges :
200
214
all_numbers = sorted (all_numbers )
201
215
numbers = all_numbers
202
216
else :
203
217
numbers = sorted (all_numbers - set (flat_num_pat [1 ::3 ]))
204
218
all_numbers = sorted (all_numbers )
205
219
206
- # var_map = {i: '?v%d' % i for i in numbers}
207
- # pattern = GraphPattern(
208
- # tuple([tuple([var_map[i] for i in t]) for t in numerical_repr]))
209
220
if count_candidates_only :
210
221
l = len (numbers )
211
222
perms = l * (l - 1 )
@@ -256,7 +267,7 @@ def pattern_generator(
256
267
length ,
257
268
loops = True ,
258
269
node_edge_joint = True ,
259
- p_connected = True ,
270
+ p_only_connected = True ,
260
271
source_target_edges = True ,
261
272
exclude_isomorphic = True ,
262
273
):
@@ -307,7 +318,7 @@ def pattern_generator(
307
318
continue
308
319
309
320
# check that the pattern is connected
310
- if not gp .is_connected (via_edges = p_connected ):
321
+ if not gp .is_connected (via_edges = p_only_connected ):
311
322
logger .debug ('excluded %d: not connected:\n %s' , pid , gp )
312
323
continue
313
324
@@ -338,16 +349,28 @@ def pattern_generator(
338
349
339
350
340
351
def main ():
341
- length = 1
352
+ # len | pcon | nej | all | candidates (all) | candidates (all) |
353
+ # | | | (canonical) | (old method) | (numerical) |
354
+ # ----+------+-----+--------------+-------------------+-------------------+
355
+ # 1 | 8 | 12 | 12 | 27 | 12 |
356
+ # 2 | 146 | 469 | 693 | 7750 | 1314 |
357
+ # 3 | | | 47478 | 6666891 | 151534 |
358
+ # 4 | | | | 11671285626 | 20884300 |
359
+ # 5 | | | | 34549552710596 | 3461471628 |
360
+
361
+ # len | typical | candidates | candidates |
362
+ # | (canonical) | (old method) | (numerical) |
363
+ # ----+-------------+----------------+-------------+
364
+ # 1 | 2 | 27 | 2 |
365
+ # 2 | 28 | 7750 | 54 |
366
+ # 3 | 486 | 6666891 | 1614 |
367
+ # 4 | 10374 | 11671285626 | 59654 |
368
+ # 5 | | 34549552710596 | 2707960 |
369
+
370
+ # typical above means none of (loops, nej, pcon, source_target_edges)
371
+
372
+ length = 5
342
373
canonical = True
343
- # len | pcon | nej | pcon, nej | candidates | candidates |
344
- # | | | (canonical) | (old method) | (numerical) |
345
- # ----+------+-----+--------------+----------------+-------------+
346
- # 1 | 8 | 12 | 12 | 27 | 12 |
347
- # 2 | 146 | 469 | 693 | 7750 | 1314 |
348
- # 3 | | | 47478 | 6666891 | 151534 |
349
- # 4 | | | | 11671285626 | 20884300 |
350
- # 5 | | | | 34549552710596 | 3461471628 |
351
374
352
375
gen_patterns = []
353
376
n = 0
@@ -356,7 +379,7 @@ def main():
356
379
length ,
357
380
loops = False ,
358
381
node_edge_joint = False ,
359
- p_connected = False ,
382
+ p_only_connected = False ,
360
383
source_target_edges = False ,
361
384
exclude_isomorphic = canonical ,
362
385
count_candidates_only = False ,
0 commit comments