@@ -15,9 +15,33 @@ class SplitWalker(RandomWalker):
1515 node) present in the randomly extracted walks.
1616
1717 Attributes:
18+ _is_support_remote: True if the walking strategy can be used with a
19+ remote Knowledge Graph, False Otherwise
20+ Defaults to True.
21+ kg: The global KG used later on for the worker process.
22+ Defaults to None.
23+ max_depth: The maximum depth of one walk.
24+ max_walks: The maximum number of walks per entity.
25+ Defaults to None.
26+ md5_bytes: The number of bytes to keep after hashing objects in
27+ MD5. Hasher allows to reduce the memory occupied by a long
28+ text. If md5_bytes is None, no hash is applied.
29+ Defaults to 8.
30+ random_state: The random state to use to keep random determinism with
31+ the walking strategy.
32+ Defaults to None.
33+ sampler: The sampling strategy.
34+ Defaults to UniformSampler.
35+ with_reverse: True to extracts parents and children hops from an
36+ entity, creating (max_walks * max_walks) walks of 2 * depth,
37+ allowing also to centralize this entity in the walks. False
38+ otherwise.
39+ Defaults to False.
1840 func_split: The function to call for the splitting of vertices. In case
1941 of reimplementation, it is important to respect the signature
2042 imposed by `basic_split` function.
43+ Defaults to func_split.
44+
2145 """
2246
2347 func_split = attr .ib (kw_only = True , default = None , repr = False )
@@ -42,7 +66,7 @@ def basic_split(self, walks: List[Walk]) -> Set[SWalk]:
4266 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
4367 'http://dl-learner.org/carcinogenesis#Compound')
4468
45- -> ('http://dl-learner.org/carcinogenesis#d19', 'type', 'compound', 'class' )
69+ -> ('http://dl-learner.org/carcinogenesis#d19', 'type', 'compound')
4670
4771 Args:
4872 walks: The random extracted walks.
@@ -53,37 +77,49 @@ def basic_split(self, walks: List[Walk]) -> Set[SWalk]:
5377 """
5478 canonical_walks : Set [SWalk ] = set ()
5579 for walk in walks :
56- canonical_walk = [walk [0 ].name ]
80+ tmp_vertices = []
81+ canonical_walk = [] if self .with_reverse else [walk [0 ].name ]
5782 for i , _ in enumerate (walk [1 ::], 1 ):
5883 vertices = []
5984 if "http" in walk [i ].name :
6085 vertices = " " .join (re .split ("[#]" , walk [i ].name )).split ()
61- if i % 2 == 1 :
62- name = vertices [1 ] if vertices else walk [i ].name
63- preds = [
64- sub_name
65- for sub_name in re .split (r"([A-Z][a-z]*)" , name )
66- if sub_name
67- ]
68- for pred in preds :
69- canonical_walk += [pred .lower ()]
70- else :
71- name = vertices [- 1 ] if vertices else walk [i ].name
72- objs = []
86+ name = vertices [- 1 ] if vertices else walk [i ].name
87+
88+ vertices = [
89+ sub_name
90+ for sub_name in re .split (r"([A-Z][a-z]*)" , name )
91+ if sub_name
92+ ]
93+ if i % 2 != 1 :
7394 try :
74- objs = [str (float (name ))]
95+ vertices = [str (float (name ))]
7596 except ValueError :
76- objs = re .sub ("[^A-Za-z0-9]+" , " " , name ).split ()
77- if len (objs ) == 1 :
97+ vertices = re .sub ("[^A-Za-z0-9]+" , " " , name ).split ()
98+ if len (vertices ) == 1 :
7899 match = re .match (
79- r"([a-z]+)([0-9]+)" , objs [0 ], re .I
100+ r"([a-z]+)([0-9]+)" , vertices [0 ], re .I
80101 )
81102 if match :
82- objs = list (match .groups ())
83- for obj in objs :
84- canonical_walk += [obj .lower ()]
85- canonical_walk = list (dict (zip (canonical_walk , canonical_walk )))
86- canonical_walks .add (tuple (canonical_walk ))
103+ vertices = list (match .groups ())
104+
105+ if self .with_reverse :
106+ if tmp_vertices :
107+ tmp_vertices .append (vertices )
108+ tmp_vertices .reverse ()
109+ for v in tmp_vertices :
110+ for vertex in v :
111+ canonical_walk += [vertex .lower ()]
112+ tmp_vertices = []
113+ else :
114+ tmp_vertices .append (vertices )
115+ else :
116+ for vertex in vertices :
117+ canonical_walk += [vertex .lower ()]
118+ if self .with_reverse :
119+ canonical_walk += [walk [0 ].name ]
120+ canonical_walks .add (
121+ tuple (list (dict (zip (canonical_walk , canonical_walk ))))
122+ )
87123 return canonical_walks
88124
89125 def _extract (self , kg : KG , entity : Vertex ) -> EntityWalks :
0 commit comments