Skip to content

Commit ce375c1

Browse files
authored
refactor: split compare result into full compare info and fast compare info. (#224)
1 parent af0f06c commit ce375c1

File tree

11 files changed

+65
-65
lines changed

11 files changed

+65
-65
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
UTIL_VERSION := 0.5.18
1+
UTIL_VERSION := 0.5.19
22
UTIL_NAME := codeplag
33
PWD := $(shell pwd)
44

src/codeplag/algorithms/compare.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
from codeplag.consts import DEFAULT_MAX_DEPTH, DEFAULT_NGRAMS_LENGTH, DEFAULT_WEIGHTS
88
from codeplag.types import (
99
ASTFeatures,
10-
CompareInfo,
11-
FastMetrics,
10+
FastCompareInfo,
11+
FullCompareInfo,
1212
MaxDepth,
1313
NgramsLength,
14-
StructuresInfo,
14+
StructureCompareInfo,
1515
Threshold,
1616
)
1717

@@ -21,11 +21,11 @@ def fast_compare(
2121
features_s: ASTFeatures,
2222
ngrams_length: NgramsLength = DEFAULT_NGRAMS_LENGTH,
2323
weights: tuple[float, float, float, float] = DEFAULT_WEIGHTS,
24-
) -> FastMetrics:
24+
) -> FastCompareInfo:
2525
"""Returns comparison result of two works compared by fast algorithms.
2626
2727
Calculates the similarity of features of two programs using four algorithms, calculates their
28-
weighted average, and returns all of this in 'FastMetrics' structure.
28+
weighted average, and returns all of this in 'FastCompareInfo' structure.
2929
3030
Args:
3131
----
@@ -48,7 +48,7 @@ def fast_compare(
4848
np.array([jakkar_coef, ops_res, kw_res, lits_res]), weights=weights
4949
)
5050

51-
fast_metrics = FastMetrics(
51+
fast_metrics = FastCompareInfo(
5252
jakkar=jakkar_coef,
5353
operators=ops_res,
5454
keywords=kw_res,
@@ -65,7 +65,7 @@ def compare_works(
6565
ngrams_length: NgramsLength = DEFAULT_NGRAMS_LENGTH,
6666
max_depth: MaxDepth = DEFAULT_MAX_DEPTH,
6767
threshold: Threshold | None = None,
68-
) -> CompareInfo:
68+
) -> FastCompareInfo | FullCompareInfo:
6969
"""The function returns the complex result of comparing two works.
7070
7171
Args:
@@ -79,18 +79,18 @@ def compare_works(
7979
8080
Returns:
8181
-------
82-
CompareInfo, which is the result of comparing works.
82+
FastCompareInfo or FullCompareInfo, which is the result of comparing works.
8383
This can consist of fast metrics and, if the threshold
8484
value has been crossed, structure metric.
8585
If the threshold value is not set, it returns the structure
86-
metric anywhere.
86+
metric anywhere (FullCompareInfo).
8787
8888
"""
89-
fast_metrics = fast_compare(
89+
fast_compare_info = fast_compare(
9090
features_f=features1, features_s=features2, ngrams_length=ngrams_length
9191
)
92-
if threshold and (fast_metrics.weighted_average * 100.0) < threshold:
93-
return CompareInfo(fast=fast_metrics)
92+
if threshold and (fast_compare_info.weighted_average * 100.0) < threshold:
93+
return fast_compare_info
9494

9595
compliance_matrix = np.empty(
9696
(len(features1.head_nodes), len(features2.head_nodes), 2), dtype=np.int64
@@ -102,6 +102,8 @@ def compare_works(
102102
)
103103
struct_res = struct_res[0] / struct_res[1]
104104

105-
structure_info = StructuresInfo(similarity=struct_res, compliance_matrix=compliance_matrix)
105+
structure_info = StructureCompareInfo(
106+
similarity=struct_res, compliance_matrix=compliance_matrix
107+
)
106108

107-
return CompareInfo(fast=fast_metrics, structure=structure_info)
109+
return FullCompareInfo(fast=fast_compare_info, structure=structure_info)

src/codeplag/display.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import pandas as pd
99
from typing_extensions import Self
1010

11-
from codeplag.types import ASTFeatures, CompareInfo, NodeCodePlace
11+
from codeplag.types import ASTFeatures, FullCompareInfo, NodeCodePlace
1212

1313
CHARS_CNT: Final[int] = 40
1414
USEFUL_CHARS: Final[int] = 100
@@ -157,7 +157,7 @@ def clear_line() -> None:
157157
def print_compare_result(
158158
features1: ASTFeatures,
159159
features2: ASTFeatures,
160-
compare_info: CompareInfo,
160+
compare_info: FullCompareInfo,
161161
compliance_matrix_df: pd.DataFrame | None = None,
162162
) -> None:
163163
"""Prints the pretty result of comparing two files.
@@ -166,7 +166,7 @@ def print_compare_result(
166166
----
167167
features1 (ASTFeatures): The features of the first source file.
168168
features2 (ASTFeatures): The features of the second source file.
169-
compare_info (CompareInfo): The compare metrics of two works.
169+
compare_info (FullCompareInfo): The compare metrics of two works.
170170
compliance_matrix_df (pd.DataFrame | None, optional): DataFrame consisting
171171
structures similarity information of two works.
172172
@@ -186,15 +186,12 @@ def print_compare_result(
186186
[compare_info.fast],
187187
index=np.array(["Similarity"]),
188188
columns=pd.Index(
189-
(field.upper() for field in compare_info.fast._fields), name="FastMetrics:"
189+
(field.upper() for field in compare_info.fast._fields), name="FastCompareInfo:"
190190
),
191191
)
192192
print(main_metrics_df)
193193
print()
194194

195-
if compare_info.structure is None:
196-
return
197-
198195
additional_metrics_df = pd.DataFrame(
199196
compare_info.structure.similarity,
200197
index=np.array(["Similarity"]),

src/codeplag/handlers/check.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,11 @@
3737
from codeplag.reporters import CSVReporter
3838
from codeplag.types import (
3939
ASTFeatures,
40-
CompareInfo,
4140
ExitCode,
4241
Extension,
42+
FastCompareInfo,
4343
Flag,
44+
FullCompareInfo,
4445
MaxDepth,
4546
Mode,
4647
NgramsLength,
@@ -315,10 +316,10 @@ def _handle_compare_result(
315316
self: Self,
316317
work1: ASTFeatures,
317318
work2: ASTFeatures,
318-
metrics: CompareInfo,
319+
metrics: FullCompareInfo | FastCompareInfo,
319320
save: bool = False,
320321
) -> ExitCode:
321-
if metrics.structure is None:
322+
if isinstance(metrics, FastCompareInfo):
322323
return ExitCode.EXIT_SUCCESS
323324
if self.reporter and save:
324325
self.reporter.save_result(work1, work2, metrics)
@@ -347,7 +348,7 @@ def _handle_completed_futures(
347348
) -> ExitCode:
348349
exit_code = ExitCode.EXIT_SUCCESS
349350
for future in as_completed(futures):
350-
metrics: CompareInfo = future.result()
351+
metrics: FullCompareInfo | FastCompareInfo = future.result()
351352
proc_works_info = processing[future.id] # type: ignore
352353
exit_code = ExitCode(
353354
exit_code

src/codeplag/handlers/report.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
from codeplag.reporters import deserialize_compare_result, read_df
2525
from codeplag.translate import get_translations
2626
from codeplag.types import (
27-
CompareInfo,
2827
ExitCode,
28+
FullCompareInfo,
2929
Language,
3030
ReportType,
3131
SameFuncs,
@@ -222,7 +222,7 @@ def _get_parsed_line(
222222
df: pd.DataFrame,
223223
threshold: int = DEFAULT_THRESHOLD,
224224
include_funcs_less_threshold: bool = True,
225-
) -> Generator[tuple[pd.Series, CompareInfo, SameFuncs, SameFuncs], None, None]:
225+
) -> Generator[tuple[pd.Series, FullCompareInfo, SameFuncs, SameFuncs], None, None]:
226226
for _, line in df.iterrows():
227227
cmp_res = deserialize_compare_result(line)
228228
first_heads = _deserialize_head_nodes(line.first_heads)

src/codeplag/reporters.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
from codeplag.logger import codeplag_logger as logger
1515
from codeplag.types import (
1616
ASTFeatures,
17-
CompareInfo,
18-
FastMetrics,
19-
StructuresInfo,
17+
FastCompareInfo,
18+
FullCompareInfo,
19+
StructureCompareInfo,
2020
)
2121

2222

@@ -29,7 +29,7 @@ def save_result(
2929
self: Self,
3030
first_work: ASTFeatures,
3131
second_work: ASTFeatures,
32-
compare_info: CompareInfo,
32+
compare_info: FullCompareInfo,
3333
) -> None: ...
3434

3535

@@ -48,14 +48,14 @@ def save_result(
4848
self: Self,
4949
first_work: ASTFeatures,
5050
second_work: ASTFeatures,
51-
compare_info: CompareInfo,
51+
compare_info: FullCompareInfo,
5252
) -> None:
5353
"""Updates the cache with new comparisons and writes it to the filesystem periodically.
5454
5555
Args:
5656
first_work (ASTFeatures): Contains the first work metadata.
5757
second_work (ASTFeatures): Contains the second work metadata.
58-
compare_info (CompareInfo): Contains information about comparisons
58+
compare_info (FullCompareInfo): Contains information about comparisons
5959
between the first and second works.
6060
"""
6161
if not self.reports.is_dir():
@@ -91,7 +91,7 @@ def _write_df_to_fs(self: Self) -> None:
9191

9292
def get_compare_result_from_cache(
9393
self: Self, work1: ASTFeatures, work2: ASTFeatures
94-
) -> CompareInfo | None:
94+
) -> FullCompareInfo | None:
9595
cache_val = self.__df_report[
9696
(self.__df_report.first_path == str(work1.filepath))
9797
& (self.__df_report.second_path == str(work2.filepath))
@@ -112,10 +112,8 @@ def read_df(path: Path) -> pd.DataFrame:
112112
def serialize_compare_result(
113113
first_work: ASTFeatures,
114114
second_work: ASTFeatures,
115-
compare_info: CompareInfo,
115+
compare_info: FullCompareInfo,
116116
) -> pd.DataFrame:
117-
assert compare_info.structure is not None
118-
119117
return pd.DataFrame(
120118
{
121119
"date": _get_current_date(),
@@ -139,21 +137,21 @@ def serialize_compare_result(
139137
)
140138

141139

142-
def deserialize_compare_result(compare_result: pd.Series) -> CompareInfo:
140+
def deserialize_compare_result(compare_result: pd.Series) -> FullCompareInfo:
143141
if isinstance(compare_result.compliance_matrix, str):
144142
similarity_matrix = np.array(json.loads(compare_result.compliance_matrix))
145143
else:
146144
similarity_matrix = np.array(compare_result.compliance_matrix)
147145

148-
compare_info = CompareInfo(
149-
fast=FastMetrics(
146+
compare_info = FullCompareInfo(
147+
fast=FastCompareInfo(
150148
jakkar=float(compare_result.jakkar),
151149
operators=float(compare_result.operators),
152150
keywords=float(compare_result.keywords),
153151
literals=float(compare_result.literals),
154152
weighted_average=float(compare_result.weighted_average),
155153
),
156-
structure=StructuresInfo(
154+
structure=StructureCompareInfo(
157155
compliance_matrix=similarity_matrix,
158156
similarity=float(compare_result.struct_similarity),
159157
),

src/codeplag/types.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -113,22 +113,22 @@ def get_sha256(self: Self) -> str:
113113
# Compare information
114114

115115

116-
class FastMetrics(NamedTuple):
116+
class FastCompareInfo(NamedTuple):
117117
jakkar: float
118118
operators: float
119119
keywords: float
120120
literals: float
121121
weighted_average: float
122122

123123

124-
class StructuresInfo(NamedTuple):
124+
class StructureCompareInfo(NamedTuple):
125125
similarity: float
126126
compliance_matrix: npt.NDArray
127127

128128

129-
class CompareInfo(NamedTuple):
130-
fast: FastMetrics
131-
structure: StructuresInfo | None = None
129+
class FullCompareInfo(NamedTuple):
130+
fast: FastCompareInfo
131+
structure: StructureCompareInfo
132132

133133

134134
# Exceptions and errors
@@ -198,6 +198,6 @@ class ProcessingWorks(NamedTuple):
198198
# To fix that you need to pass right module-name to namedtuple-factory
199199
NodeCodePlace.__module__ = __name__
200200
NodeStructurePlace.__module__ = __name__
201-
FastMetrics.__module__ = __name__
202-
StructuresInfo.__module__ = __name__
203-
CompareInfo.__module__ = __name__
201+
FastCompareInfo.__module__ = __name__
202+
StructureCompareInfo.__module__ = __name__
203+
FullCompareInfo.__module__ = __name__

test/unit/codeplag/algorithms/test_compare.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
from typing_extensions import Self
33

44
from codeplag.algorithms.compare import compare_works, fast_compare
5-
from codeplag.types import ASTFeatures, CompareInfo
5+
from codeplag.types import ASTFeatures, FastCompareInfo, FullCompareInfo
66

77

88
class TestCompareWorks:
9-
def test_compare_works_with_structure(self: Self, first_compare_result: CompareInfo):
9+
def test_compare_works_with_structure(self: Self, first_compare_result: FullCompareInfo):
1010
assert first_compare_result.fast.jakkar == pytest.approx(0.737, 0.001)
1111
assert first_compare_result.fast.operators == pytest.approx(0.667, 0.001)
1212
assert first_compare_result.fast.keywords == 1.0
@@ -28,12 +28,12 @@ def test_compare_works_without_structure(
2828
features1=first_features, features2=third_features, threshold=60
2929
)
3030

31-
assert compare_info.fast.jakkar == 0.24
32-
assert compare_info.fast.operators == 0.0
33-
assert compare_info.fast.keywords == 0.6
34-
assert compare_info.fast.literals == 0.0
35-
assert compare_info.fast.weighted_average == pytest.approx(0.218, 0.001)
36-
assert compare_info.structure is None
31+
assert isinstance(compare_info, FastCompareInfo)
32+
assert compare_info.jakkar == 0.24
33+
assert compare_info.operators == 0.0
34+
assert compare_info.keywords == 0.6
35+
assert compare_info.literals == 0.0
36+
assert compare_info.weighted_average == pytest.approx(0.218, 0.001)
3737

3838

3939
class TestFastCompare:

test/unit/codeplag/conftest.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from codeplag.algorithms.compare import compare_works
88
from codeplag.pyplag.utils import get_ast_from_filename, get_features_from_ast
9-
from codeplag.types import ASTFeatures, CompareInfo
9+
from codeplag.types import ASTFeatures, FullCompareInfo
1010

1111
CWD = Path(os.path.dirname(os.path.abspath(__file__)))
1212
FILEPATH1 = CWD / "./data/test1.py"
@@ -51,7 +51,9 @@ def third_features(third_tree: ast.Module) -> ASTFeatures:
5151

5252

5353
@pytest.fixture
54-
def first_compare_result(first_features: ASTFeatures, second_features: ASTFeatures) -> CompareInfo:
54+
def first_compare_result(
55+
first_features: ASTFeatures, second_features: ASTFeatures
56+
) -> FullCompareInfo:
5557
compare_info = compare_works(features1=first_features, features2=second_features)
56-
assert compare_info.structure is not None
58+
assert isinstance(compare_info, FullCompareInfo)
5759
return compare_info

test/unit/codeplag/handlers/test_report.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
calculate_sources_total_similarity,
1919
)
2020
from codeplag.reporters import serialize_compare_result
21-
from codeplag.types import ASTFeatures, CompareInfo, SameHead
21+
from codeplag.types import ASTFeatures, FullCompareInfo, SameHead
2222

2323

2424
@pytest.mark.parametrize(
@@ -171,7 +171,7 @@ def test__get_same_funcs(
171171
def test__get_parsed_line(
172172
first_features: ASTFeatures,
173173
second_features: ASTFeatures,
174-
first_compare_result: CompareInfo,
174+
first_compare_result: FullCompareInfo,
175175
):
176176
compare_df = serialize_compare_result(first_features, second_features, first_compare_result)
177177
compare_df.iloc[0].first_heads = str(compare_df.iloc[0].first_heads)

0 commit comments

Comments
 (0)