Skip to content

Commit 5870f8f

Browse files
committed
style: fix typo
1 parent 5eeed03 commit 5870f8f

File tree

8 files changed

+86
-44
lines changed

8 files changed

+86
-44
lines changed

pyrdf2vec/walkers/anonymous.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ class AnonymousWalker(RandomWalker):
2727
Defaults to None.
2828
sampler: The sampling strategy.
2929
Defaults to UniformSampler.
30-
with_reverse: True to extracts children's and parents' walks from the
31-
root, creating (max_walks * max_walks) more walks of 2 * depth,
32-
False otherwise.
30+
with_reverse: True to extracts parents and children hops from an
31+
entity, creating (max_walks * max_walks) more walks of 2 * depth,
32+
allowing also to centralize this entity in the walks. False otherwise.
3333
Defaults to False.
3434
3535
"""

pyrdf2vec/walkers/community.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,10 @@ class CommunityWalker(Walker):
6363
Defaults to The resolution to use.
6464
sampler: The sampling strategy.
6565
Defaults to UniformSampler.
66-
with_reverse: True to extracts children's and parents' walks from the
67-
root, creating (max_walks * max_walks) more walks of 2 * depth,
68-
False otherwise.
66+
with_reverse: True to extracts parents and children hops from an
67+
entity, creating (max_walks * max_walks) walks of 2 * depth,
68+
allowing also to centralize this entity in the walks. False
69+
otherwise.
6970
Defaults to False.
7071
7172
"""

pyrdf2vec/walkers/ngram.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,10 @@ class NGramWalker(RandomWalker):
3636
wildcards: The wildcards to be used to match sub-sequences with small
3737
differences to be mapped onto the same label.
3838
Defaults to None.
39-
with_reverse: True to extracts children's and parents' walks from the
40-
root, creating (max_walks * max_walks) more walks of 2 * depth,
41-
False otherwise.
39+
with_reverse: True to extracts parents and children hops from an
40+
entity, creating (max_walks * max_walks) walks of 2 * depth,
41+
allowing also to centralize this entity in the walks. False
42+
otherwise.
4243
Defaults to False.
4344
4445
"""

pyrdf2vec/walkers/random.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,10 @@ class RandomWalker(Walker):
3232
Defaults to None.
3333
sampler: The sampling strategy.
3434
Defaults to UniformSampler.
35-
with_reverse: True to extracts children's and parents' walks from the
36-
root, creating (max_walks * max_walks) more walks of 2 * depth,
37-
False otherwise.
35+
with_reverse: True to extracts parents and children hops from an
36+
entity, creating (max_walks * max_walks) walks of 2 * depth,
37+
allowing also to centralize this entity in the walks. False
38+
otherwise.
3839
Defaults to False.
3940
4041
"""

pyrdf2vec/walkers/split.py

Lines changed: 59 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,33 @@ class SplitWalker(RandomWalker):
1515
node) present in the randomly extracted walks.
1616
1717
Attributes:
18+
_is_support_remote: True if the walking strategy can be used with a
19+
remote Knowledge Graph, False Otherwise
20+
Defaults to True.
21+
kg: The global KG used later on for the worker process.
22+
Defaults to None.
23+
max_depth: The maximum depth of one walk.
24+
max_walks: The maximum number of walks per entity.
25+
Defaults to None.
26+
md5_bytes: The number of bytes to keep after hashing objects in
27+
MD5. Hasher allows to reduce the memory occupied by a long
28+
text. If md5_bytes is None, no hash is applied.
29+
Defaults to 8.
30+
random_state: The random state to use to keep random determinism with
31+
the walking strategy.
32+
Defaults to None.
33+
sampler: The sampling strategy.
34+
Defaults to UniformSampler.
35+
with_reverse: True to extracts parents and children hops from an
36+
entity, creating (max_walks * max_walks) walks of 2 * depth,
37+
allowing also to centralize this entity in the walks. False
38+
otherwise.
39+
Defaults to False.
1840
func_split: The function to call for the splitting of vertices. In case
1941
of reimplementation, it is important to respect the signature
2042
imposed by `basic_split` function.
43+
Defaults to func_split.
44+
2145
"""
2246

2347
func_split = attr.ib(kw_only=True, default=None, repr=False)
@@ -42,7 +66,7 @@ def basic_split(self, walks: List[Walk]) -> Set[SWalk]:
4266
'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
4367
'http://dl-learner.org/carcinogenesis#Compound')
4468
45-
-> ('http://dl-learner.org/carcinogenesis#d19', 'type', 'compound', 'class')
69+
-> ('http://dl-learner.org/carcinogenesis#d19', 'type', 'compound')
4670
4771
Args:
4872
walks: The random extracted walks.
@@ -53,37 +77,49 @@ def basic_split(self, walks: List[Walk]) -> Set[SWalk]:
5377
"""
5478
canonical_walks: Set[SWalk] = set()
5579
for walk in walks:
56-
canonical_walk = [walk[0].name]
80+
tmp_vertices = []
81+
canonical_walk = [] if self.with_reverse else [walk[0].name]
5782
for i, _ in enumerate(walk[1::], 1):
5883
vertices = []
5984
if "http" in walk[i].name:
6085
vertices = " ".join(re.split("[#]", walk[i].name)).split()
61-
if i % 2 == 1:
62-
name = vertices[1] if vertices else walk[i].name
63-
preds = [
64-
sub_name
65-
for sub_name in re.split(r"([A-Z][a-z]*)", name)
66-
if sub_name
67-
]
68-
for pred in preds:
69-
canonical_walk += [pred.lower()]
70-
else:
71-
name = vertices[-1] if vertices else walk[i].name
72-
objs = []
86+
name = vertices[-1] if vertices else walk[i].name
87+
88+
vertices = [
89+
sub_name
90+
for sub_name in re.split(r"([A-Z][a-z]*)", name)
91+
if sub_name
92+
]
93+
if i % 2 != 1:
7394
try:
74-
objs = [str(float(name))]
95+
vertices = [str(float(name))]
7596
except ValueError:
76-
objs = re.sub("[^A-Za-z0-9]+", " ", name).split()
77-
if len(objs) == 1:
97+
vertices = re.sub("[^A-Za-z0-9]+", " ", name).split()
98+
if len(vertices) == 1:
7899
match = re.match(
79-
r"([a-z]+)([0-9]+)", objs[0], re.I
100+
r"([a-z]+)([0-9]+)", vertices[0], re.I
80101
)
81102
if match:
82-
objs = list(match.groups())
83-
for obj in objs:
84-
canonical_walk += [obj.lower()]
85-
canonical_walk = list(dict(zip(canonical_walk, canonical_walk)))
86-
canonical_walks.add(tuple(canonical_walk))
103+
vertices = list(match.groups())
104+
105+
if self.with_reverse:
106+
if tmp_vertices:
107+
tmp_vertices.append(vertices)
108+
tmp_vertices.reverse()
109+
for v in tmp_vertices:
110+
for vertex in v:
111+
canonical_walk += [vertex.lower()]
112+
tmp_vertices = []
113+
else:
114+
tmp_vertices.append(vertices)
115+
else:
116+
for vertex in vertices:
117+
canonical_walk += [vertex.lower()]
118+
if self.with_reverse:
119+
canonical_walk += [walk[0].name]
120+
canonical_walks.add(
121+
tuple(list(dict(zip(canonical_walk, canonical_walk))))
122+
)
87123
return canonical_walks
88124

89125
def _extract(self, kg: KG, entity: Vertex) -> EntityWalks:

pyrdf2vec/walkers/walker.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,10 @@ class Walker(ABC):
4444
Defaults to None.
4545
sampler: The sampling strategy.
4646
Defaults to UniformSampler.
47-
with_reverse: True to extracts children's and parents' walks from the
48-
root, creating (max_walks * max_walks) more walks of 2 * depth,
49-
False otherwise.
47+
with_reverse: True to extracts parents and children hops from an
48+
entity, creating (max_walks * max_walks) walks of 2 * depth,
49+
allowing also to centralize this entity in the walks. False
50+
otherwise.
5051
Defaults to False.
5152
5253
"""

pyrdf2vec/walkers/walklet.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,10 @@ class WalkletWalker(RandomWalker):
2727
Defaults to None.
2828
sampler: The sampling strategy.
2929
Defaults to UniformSampler.
30-
with_reverse: True to extracts children's and parents' walks from the
31-
root, creating (max_walks * max_walks) more walks of 2 * depth,
32-
False otherwise.
30+
with_reverse: True to extracts parents and children hops from an
31+
entity, creating (max_walks * max_walks) walks of 2 * depth,
32+
allowing also to centralize this entity in the walks. False
33+
otherwise.
3334
Defaults to False.
3435
3536
"""

pyrdf2vec/walkers/weisfeiler_lehman.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,10 @@ class WLWalker(RandomWalker):
3737
Defaults to None.
3838
sampler: The sampling strategy.
3939
Defaults to UniformSampler.
40-
with_reverse: True to extracts children's and parents' walks from the
41-
root, creating (max_walks * max_walks) more walks of 2 * depth,
42-
False otherwise.
40+
with_reverse: True to extracts parents and children hops from an
41+
entity, creating (max_walks * max_walks) walks of 2 * depth,
42+
allowing also to centralize this entity in the walks. False
43+
otherwise.
4344
Defaults to False.
4445
wl_iterations: The Weisfeiler Lehman's iteration.
4546
Defaults to 4.

0 commit comments

Comments
 (0)