Skip to content
This repository was archived by the owner on Jan 19, 2025. It is now read-only.

Commit a48a235

Browse files
feat: creation of a simple implementation of a differ class (#1117)
Closes #1114 . ### Summary of Changes extends `AbstractDiffer` while using Levenshtein distance (as implemented in `distance_elements`) for any type of data types / classes ### Testing Instructions view and run the tests in `test_differ.py` Signed-off-by: Aclrian <[email protected]> Co-authored-by: Lars Reimann <[email protected]> Co-authored-by: Aclrian <[email protected]> Co-authored-by: lars-reimann <[email protected]>
1 parent 167d25c commit a48a235

File tree

4 files changed

+460
-0
lines changed

4 files changed

+460
-0
lines changed

package-parser/package_parser/processing/api/model/_types.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ def from_string(cls, string: str) -> NamedType:
4545
def to_json(self) -> dict[str, str]:
4646
return {"kind": self.__class__.__name__, "name": self.name}
4747

48+
def __eq__(self, other):
49+
if isinstance(other, self.__class__):
50+
return self.name == other.name
51+
return False
52+
4853

4954
@dataclass
5055
class EnumType(AbstractType):
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from ._differ import AbstractDiffer, SimpleDiffer

package-parser/package_parser/processing/migration/_differ.py

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
from abc import ABC, abstractmethod
2+
from typing import Any, Optional
23

34
from package_parser.processing.api.model import (
5+
AbstractType,
46
Attribute,
57
Class,
68
Function,
79
Parameter,
10+
ParameterAssignment,
811
Result,
12+
UnionType,
913
)
1014

1115

@@ -37,3 +41,242 @@ def compute_parameter_similarity(
3741
@abstractmethod
3842
def compute_result_similarity(self, result_a: Result, result_b: Result) -> float:
3943
pass
44+
45+
46+
def distance_elements(
47+
list_a: list[Any], list_b: list[Any], are_similar=lambda x, y: x == y
48+
) -> float:
49+
if len(list_a) == 0:
50+
return len(list_b)
51+
if len(list_b) == 0:
52+
return len(list_a)
53+
if are_similar(list_a[0], list_b[0]):
54+
return distance_elements(list_a[1:], list_b[1:])
55+
return 1 + min(
56+
distance_elements(list_a[1:], list_b),
57+
distance_elements(list_a, list_b[1:]),
58+
distance_elements(list_a[1:], list_b[1:]),
59+
)
60+
61+
62+
class SimpleDiffer(AbstractDiffer):
63+
assigned_by_look_up_similarity: dict[
64+
ParameterAssignment, dict[ParameterAssignment, float]
65+
]
66+
67+
def __init__(self):
68+
distance_between_implicit_and_explicit = 0.3
69+
distance_between_vararg_and_normal = 0.3
70+
distance_between_position_and_named = 0.3
71+
distance_between_both_to_one = 0.15
72+
distance_between_one_to_both = 0.15
73+
self.assigned_by_look_up_similarity = {
74+
ParameterAssignment.IMPLICIT: {
75+
ParameterAssignment.IMPLICIT: 1,
76+
ParameterAssignment.NAMED_VARARG: 1
77+
- distance_between_implicit_and_explicit
78+
- distance_between_vararg_and_normal
79+
- distance_between_position_and_named,
80+
ParameterAssignment.POSITIONAL_VARARG: 1
81+
- distance_between_implicit_and_explicit
82+
- distance_between_vararg_and_normal,
83+
ParameterAssignment.POSITION_OR_NAME: 1
84+
- distance_between_implicit_and_explicit,
85+
ParameterAssignment.NAME_ONLY: 1
86+
- distance_between_implicit_and_explicit,
87+
ParameterAssignment.POSITION_ONLY: 1
88+
- distance_between_implicit_and_explicit,
89+
},
90+
ParameterAssignment.NAMED_VARARG: {
91+
ParameterAssignment.IMPLICIT: 1
92+
- distance_between_implicit_and_explicit
93+
- distance_between_vararg_and_normal
94+
- distance_between_position_and_named,
95+
ParameterAssignment.NAMED_VARARG: 1,
96+
ParameterAssignment.POSITIONAL_VARARG: 1
97+
- distance_between_position_and_named,
98+
ParameterAssignment.POSITION_OR_NAME: 1
99+
- distance_between_vararg_and_normal
100+
- distance_between_one_to_both,
101+
ParameterAssignment.NAME_ONLY: 1 - distance_between_vararg_and_normal,
102+
ParameterAssignment.POSITION_ONLY: 1
103+
- distance_between_vararg_and_normal
104+
- distance_between_position_and_named,
105+
},
106+
ParameterAssignment.POSITIONAL_VARARG: {
107+
ParameterAssignment.IMPLICIT: 1
108+
- distance_between_implicit_and_explicit
109+
- distance_between_vararg_and_normal,
110+
ParameterAssignment.NAMED_VARARG: 1
111+
- distance_between_position_and_named,
112+
ParameterAssignment.POSITIONAL_VARARG: 1,
113+
ParameterAssignment.POSITION_OR_NAME: 1
114+
- distance_between_vararg_and_normal
115+
- distance_between_one_to_both,
116+
ParameterAssignment.NAME_ONLY: 1
117+
- distance_between_vararg_and_normal
118+
- distance_between_position_and_named,
119+
ParameterAssignment.POSITION_ONLY: 1
120+
- distance_between_vararg_and_normal,
121+
},
122+
ParameterAssignment.POSITION_OR_NAME: {
123+
ParameterAssignment.IMPLICIT: 1
124+
- distance_between_implicit_and_explicit,
125+
ParameterAssignment.NAMED_VARARG: 1
126+
- distance_between_vararg_and_normal
127+
- distance_between_both_to_one,
128+
ParameterAssignment.POSITIONAL_VARARG: 1
129+
- distance_between_vararg_and_normal
130+
- distance_between_both_to_one,
131+
ParameterAssignment.POSITION_OR_NAME: 1,
132+
ParameterAssignment.NAME_ONLY: 1 - distance_between_both_to_one,
133+
ParameterAssignment.POSITION_ONLY: 1 - distance_between_both_to_one,
134+
},
135+
ParameterAssignment.NAME_ONLY: {
136+
ParameterAssignment.IMPLICIT: 1
137+
- distance_between_implicit_and_explicit,
138+
ParameterAssignment.NAMED_VARARG: 1
139+
- distance_between_vararg_and_normal,
140+
ParameterAssignment.POSITIONAL_VARARG: 1
141+
- distance_between_vararg_and_normal
142+
- distance_between_position_and_named,
143+
ParameterAssignment.POSITION_OR_NAME: 1 - distance_between_one_to_both,
144+
ParameterAssignment.NAME_ONLY: 1,
145+
ParameterAssignment.POSITION_ONLY: 1
146+
- distance_between_position_and_named,
147+
},
148+
ParameterAssignment.POSITION_ONLY: {
149+
ParameterAssignment.IMPLICIT: 1
150+
- distance_between_implicit_and_explicit,
151+
ParameterAssignment.NAMED_VARARG: 1
152+
- distance_between_vararg_and_normal
153+
- distance_between_position_and_named,
154+
ParameterAssignment.POSITIONAL_VARARG: 1
155+
- distance_between_vararg_and_normal,
156+
ParameterAssignment.POSITION_OR_NAME: 1 - distance_between_one_to_both,
157+
ParameterAssignment.NAME_ONLY: 1 - distance_between_position_and_named,
158+
ParameterAssignment.POSITION_ONLY: 1,
159+
},
160+
}
161+
162+
def compute_class_similarity(self, class_a: Class, class_b: Class) -> float:
163+
name_similarity = self._compute_name_similarity(class_a.name, class_b.name)
164+
attributes_similarity = distance_elements(
165+
class_a.instance_attributes, class_b.instance_attributes
166+
)
167+
attributes_similarity = attributes_similarity / (
168+
max(len(class_a.instance_attributes), len(class_b.instance_attributes), 1)
169+
)
170+
attributes_similarity = 1 - attributes_similarity
171+
172+
code_similarity = self._compute_code_similarity(class_a.code, class_b.code)
173+
return (name_similarity + attributes_similarity + code_similarity) / 3
174+
175+
def _compute_name_similarity(self, name_a: str, name_b: str) -> float:
176+
name_similarity = distance_elements([*name_a], [*name_b]) / max(
177+
len(name_a), len(name_b), 1
178+
)
179+
return 1 - name_similarity
180+
181+
def compute_attribute_similarity(
182+
self,
183+
attributes_a: Attribute,
184+
attributes_b: Attribute,
185+
) -> float:
186+
name_similarity = self._compute_name_similarity(
187+
attributes_a.name, attributes_b.name
188+
)
189+
type_list_a = [attributes_a.types]
190+
if attributes_a.types is not None and isinstance(attributes_a, UnionType):
191+
type_list_a = [attributes_a.types]
192+
type_list_b = [attributes_b.types]
193+
if attributes_b.types is not None and isinstance(attributes_b, UnionType):
194+
type_list_b = [attributes_a.types]
195+
type_similarity = distance_elements(type_list_a, type_list_b) / max(
196+
len(type_list_a), len(type_list_b), 1
197+
)
198+
type_similarity = 1 - type_similarity
199+
return (name_similarity + type_similarity) / 2
200+
201+
def compute_function_similarity(
202+
self, function_a: Function, function_b: Function
203+
) -> float:
204+
code_similarity = self._compute_code_similarity(
205+
function_a.code, function_b.code
206+
)
207+
name_similarity = self._compute_name_similarity(
208+
function_a.name, function_b.name
209+
)
210+
211+
def are_parameters_similar(parameter_a: Parameter, parameter_b: Parameter):
212+
return self.compute_parameter_similarity(parameter_a, parameter_b) == 1
213+
214+
parameter_similarity = distance_elements(
215+
function_a.parameters,
216+
function_b.parameters,
217+
are_similar=are_parameters_similar,
218+
) / max(len(function_a.parameters), len(function_b.parameters), 1)
219+
parameter_similarity = 1 - parameter_similarity
220+
221+
return (code_similarity + name_similarity + parameter_similarity) / 3
222+
223+
def _compute_code_similarity(self, code_a: str, code_b: str) -> float:
224+
split_a = code_a.split("\n")
225+
split_b = code_b.split("\n")
226+
diff_code = distance_elements(split_a, split_b) / max(
227+
len(split_a), len(split_b), 1
228+
)
229+
return 1 - diff_code
230+
231+
def compute_parameter_similarity(
232+
self, parameter_a: Parameter, parameter_b: Parameter
233+
) -> float:
234+
parameter_name_similarity = self._compute_name_similarity(
235+
parameter_a.name, parameter_b.name
236+
)
237+
parameter_type_similarity = self._compute_type_similarity(
238+
parameter_a.type, parameter_b.type
239+
)
240+
parameter_assignment_similarity = self._compute_assignment_similarity(
241+
parameter_a.assigned_by, parameter_b.assigned_by
242+
)
243+
return (
244+
parameter_name_similarity
245+
+ parameter_type_similarity
246+
+ parameter_assignment_similarity
247+
) / 3
248+
249+
def _compute_type_similarity(
250+
self, type_a: Optional[AbstractType], type_b: Optional[AbstractType]
251+
) -> float:
252+
if type_a is None:
253+
if type_b is None:
254+
return 1
255+
return 0
256+
if type_b is None:
257+
return 0
258+
259+
def are_types_similar(
260+
abstract_type_a: AbstractType, abstract_type_b: AbstractType
261+
):
262+
return abstract_type_a.to_json() == abstract_type_b.to_json()
263+
264+
type_list_a = self._create_list_from_type(type_a)
265+
type_list_b = self._create_list_from_type(type_b)
266+
diff_elements = distance_elements(
267+
type_list_a, type_list_b, are_similar=are_types_similar
268+
) / max(len(type_list_a), len(type_list_b), 1)
269+
return 1 - diff_elements
270+
271+
def _create_list_from_type(self, abstract_type: AbstractType):
272+
if isinstance(abstract_type, UnionType):
273+
return abstract_type.types
274+
return [abstract_type]
275+
276+
def _compute_assignment_similarity(
277+
self, assigned_by_a: ParameterAssignment, assigned_by_b: ParameterAssignment
278+
) -> float:
279+
return self.assigned_by_look_up_similarity[assigned_by_a][assigned_by_b]
280+
281+
def compute_result_similarity(self, result_a: Result, result_b: Result) -> float:
282+
return self._compute_name_similarity(result_a.name, result_b.name)

0 commit comments

Comments
 (0)