Skip to content
This repository was archived by the owner on Jan 19, 2025. It is now read-only.

Commit ab94562

Browse files
authored
feat: Improve calculating of Levenshtein Distance (#1180)
Closes ##1179. ### Summary of Changes add dependecy levenshtein and use its distance function in name_similarity ### Testing Instructions run `package-parser/tests/processing/migration/test_differ.py` in a previous commit from the main branch and after this commit and compare the running time
1 parent 25931c8 commit ab94562

File tree

4 files changed

+235
-14
lines changed

4 files changed

+235
-14
lines changed

package-parser/package_parser/processing/api/model/_types.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
class AbstractType(metaclass=ABCMeta):
1212
@abstractmethod
13-
def to_json(self):
13+
def to_json(self) -> dict[str, Any]:
1414
pass
1515

1616
@classmethod
@@ -45,7 +45,7 @@ def from_string(cls, string: str) -> NamedType:
4545
def to_json(self) -> dict[str, str]:
4646
return {"kind": self.__class__.__name__, "name": self.name}
4747

48-
def __eq__(self, other):
48+
def __eq__(self, other: object) -> bool:
4949
if isinstance(other, self.__class__):
5050
return self.name == other.name
5151
return False
@@ -64,7 +64,7 @@ def from_json(cls, json: Any) -> Optional[EnumType]:
6464

6565
@classmethod
6666
def from_string(cls, string: str) -> Optional[EnumType]:
67-
def remove_backslash(e: str):
67+
def remove_backslash(e: str) -> str:
6868
e = e.replace(r"\"", '"')
6969
e = e.replace(r"\'", "'")
7070
return e
@@ -97,7 +97,7 @@ def remove_backslash(e: str):
9797

9898
return None
9999

100-
def update(self, enum: EnumType):
100+
def update(self, enum: EnumType) -> None:
101101
self.values.update(enum.values)
102102

103103
def to_json(self) -> dict[str, Any]:

package-parser/package_parser/processing/migration/model/_differ.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from abc import ABC, abstractmethod
2-
from typing import Any, Optional
2+
from typing import Any, Callable, Optional
33

4+
from Levenshtein import distance
45
from package_parser.processing.api.model import (
56
AbstractType,
67
Attribute,
@@ -44,7 +45,9 @@ def compute_result_similarity(self, result_a: Result, result_b: Result) -> float
4445

4546

4647
def distance_elements(
47-
list_a: list[Any], list_b: list[Any], are_similar=lambda x, y: x == y
48+
list_a: list[Any],
49+
list_b: list[Any],
50+
are_similar: Callable[[Any, Any], bool] = lambda x, y: x == y,
4851
) -> float:
4952
if len(list_a) == 0:
5053
return len(list_b)
@@ -64,7 +67,7 @@ class SimpleDiffer(AbstractDiffer):
6467
ParameterAssignment, dict[ParameterAssignment, float]
6568
]
6669

67-
def __init__(self):
70+
def __init__(self) -> None:
6871
distance_between_implicit_and_explicit = 0.3
6972
distance_between_vararg_and_normal = 0.3
7073
distance_between_position_and_named = 0.3
@@ -173,9 +176,7 @@ def compute_class_similarity(self, class_a: Class, class_b: Class) -> float:
173176
return (name_similarity + attributes_similarity + code_similarity) / 3
174177

175178
def _compute_name_similarity(self, name_a: str, name_b: str) -> float:
176-
name_similarity = distance_elements([*name_a], [*name_b]) / max(
177-
len(name_a), len(name_b), 1
178-
)
179+
name_similarity = distance(name_a, name_b) / max(len(name_a), len(name_b), 1)
179180
return 1 - name_similarity
180181

181182
def compute_attribute_similarity(
@@ -208,7 +209,9 @@ def compute_function_similarity(
208209
function_a.name, function_b.name
209210
)
210211

211-
def are_parameters_similar(parameter_a: Parameter, parameter_b: Parameter):
212+
def are_parameters_similar(
213+
parameter_a: Parameter, parameter_b: Parameter
214+
) -> bool:
212215
return self.compute_parameter_similarity(parameter_a, parameter_b) == 1
213216

214217
parameter_similarity = distance_elements(
@@ -258,7 +261,7 @@ def _compute_type_similarity(
258261

259262
def are_types_similar(
260263
abstract_type_a: AbstractType, abstract_type_b: AbstractType
261-
):
264+
) -> bool:
262265
return abstract_type_a.to_json() == abstract_type_b.to_json()
263266

264267
type_list_a = self._create_list_from_type(type_a)
@@ -268,7 +271,7 @@ def are_types_similar(
268271
) / max(len(type_list_a), len(type_list_b), 1)
269272
return 1 - diff_elements
270273

271-
def _create_list_from_type(self, abstract_type: AbstractType):
274+
def _create_list_from_type(self, abstract_type: AbstractType) -> list[AbstractType]:
272275
if isinstance(abstract_type, UnionType):
273276
return abstract_type.types
274277
return [abstract_type]

0 commit comments

Comments
 (0)