Skip to content

Commit a7250af

Browse files
committed
fixed p_only_connectedness
1 parent 907f581 commit a7250af

File tree

1 file changed

+43
-20
lines changed

1 file changed

+43
-20
lines changed

eval.py

Lines changed: 43 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from itertools import product
1313
import sys
1414

15+
import networkx as nx
1516
from rdflib import Variable
1617
from scipy.special import binom
1718
from scipy.misc import comb
@@ -21,6 +22,7 @@
2122
from graph_pattern import TARGET_VAR
2223
from graph_pattern import GraphPattern
2324
from graph_pattern import canonicalize
25+
from graph_pattern import to_nx_graph
2426

2527
logger = logging.getLogger(__name__)
2628
logger.info('init')
@@ -42,7 +44,6 @@ def numerical_patterns(
4244
length,
4345
loops=True,
4446
node_edge_joint=True,
45-
p_connected=True,
4647
_partial_pattern=None,
4748
_pos=None,
4849
_var=1,
@@ -119,7 +120,11 @@ def numerical_patterns(
119120
s, p, o = _partial_pattern[i]
120121
for pt in _partial_pattern[:i]:
121122
# loop over previous triples and check if current is connected
122-
if s in pt or o in pt or (p_connected and p in pt):
123+
if s in pt or p in pt or o in pt:
124+
# for p_only_connected it's possible to become
125+
# n_connected again later:
126+
# 123 145 627 685
127+
# ^ ^
123128
break
124129
else:
125130
# we're not connected, early terminate this
@@ -163,7 +168,6 @@ def numerical_patterns(
163168
length,
164169
loops=loops,
165170
node_edge_joint=node_edge_joint,
166-
p_connected=p_connected,
167171
_partial_pattern=_partial_pattern,
168172
_pos=(i, j),
169173
_var=v
@@ -175,7 +179,7 @@ def patterns(
175179
length,
176180
loops=True,
177181
node_edge_joint=True,
178-
p_connected=True,
182+
p_only_connected=True,
179183
source_target_edges=True,
180184
exclude_isomorphic=True,
181185
count_candidates_only=False,
@@ -191,21 +195,28 @@ def patterns(
191195
length,
192196
loops=loops,
193197
node_edge_joint=node_edge_joint,
194-
p_connected=p_connected,
195198
)):
196199
flat_num_pat = [v for t in num_pat for v in t]
197200
all_numbers = set(flat_num_pat)
198201

202+
if not p_only_connected:
203+
# Numerical patterns are always connected, but they might be
204+
# p_only_connected (e.g., 123 425).
205+
# Check that the pattern isn't p_only_connected, meaning that it's
206+
# also connected by nodes (e.g., 123 325).
207+
# Note that in case of node_edge_joint 123 245 is also considered
208+
# p_only_connected.
209+
if not nx.is_connected(to_nx_graph(num_pat)):
210+
logger.debug('excluded %d: not node connected:\n%s', c, num_pat)
211+
continue
212+
199213
if source_target_edges:
200214
all_numbers = sorted(all_numbers)
201215
numbers = all_numbers
202216
else:
203217
numbers = sorted(all_numbers - set(flat_num_pat[1::3]))
204218
all_numbers = sorted(all_numbers)
205219

206-
# var_map = {i: '?v%d' % i for i in numbers}
207-
# pattern = GraphPattern(
208-
# tuple([tuple([var_map[i] for i in t]) for t in numerical_repr]))
209220
if count_candidates_only:
210221
l = len(numbers)
211222
perms = l * (l-1)
@@ -256,7 +267,7 @@ def pattern_generator(
256267
length,
257268
loops=True,
258269
node_edge_joint=True,
259-
p_connected=True,
270+
p_only_connected=True,
260271
source_target_edges=True,
261272
exclude_isomorphic=True,
262273
):
@@ -307,7 +318,7 @@ def pattern_generator(
307318
continue
308319

309320
# check that the pattern is connected
310-
if not gp.is_connected(via_edges=p_connected):
321+
if not gp.is_connected(via_edges=p_only_connected):
311322
logger.debug('excluded %d: not connected:\n%s', pid, gp)
312323
continue
313324

@@ -338,16 +349,28 @@ def pattern_generator(
338349

339350

340351
def main():
341-
length = 1
352+
# len | pcon | nej | all | candidates (all) | candidates (all) |
353+
# | | | (canonical) | (old method) | (numerical) |
354+
# ----+------+-----+--------------+-------------------+-------------------+
355+
# 1 | 8 | 12 | 12 | 27 | 12 |
356+
# 2 | 146 | 469 | 693 | 7750 | 1314 |
357+
# 3 | | | 47478 | 6666891 | 151534 |
358+
# 4 | | | | 11671285626 | 20884300 |
359+
# 5 | | | | 34549552710596 | 3461471628 |
360+
361+
# len | typical | candidates | candidates |
362+
# | (canonical) | (old method) | (numerical) |
363+
# ----+-------------+----------------+-------------+
364+
# 1 | 2 | 27 | 2 |
365+
# 2 | 28 | 7750 | 54 |
366+
# 3 | 486 | 6666891 | 1614 |
367+
# 4 | 10374 | 11671285626 | 59654 |
368+
# 5 | | 34549552710596 | 2707960 |
369+
370+
# typical above means none of (loops, nej, pcon, source_target_edges)
371+
372+
length = 5
342373
canonical = True
343-
# len | pcon | nej | pcon, nej | candidates | candidates |
344-
# | | | (canonical) | (old method) | (numerical) |
345-
# ----+------+-----+--------------+----------------+-------------+
346-
# 1 | 8 | 12 | 12 | 27 | 12 |
347-
# 2 | 146 | 469 | 693 | 7750 | 1314 |
348-
# 3 | | | 47478 | 6666891 | 151534 |
349-
# 4 | | | | 11671285626 | 20884300 |
350-
# 5 | | | | 34549552710596 | 3461471628 |
351374

352375
gen_patterns = []
353376
n = 0
@@ -356,7 +379,7 @@ def main():
356379
length,
357380
loops=False,
358381
node_edge_joint=False,
359-
p_connected=False,
382+
p_only_connected=False,
360383
source_target_edges=False,
361384
exclude_isomorphic=canonical,
362385
count_candidates_only=False,

0 commit comments

Comments
 (0)