11"""This module contains handlers for the report command of the CLI."""
22
3+ import re
34from collections import defaultdict
45from copy import deepcopy
56from pathlib import Path
6- from typing import Generator , Literal , TypedDict
7+ from typing import Callable , Generator , Literal , TypedDict
78
89import jinja2
910import numpy as np
@@ -94,9 +95,10 @@ def html_report_create(
9495 f"There is nothing in '{ reports_path } ' to create a basic html report from."
9596 )
9697 return ExitCode .EXIT_INVAL
97- return __html_report_create_from_csv (
98+ df = read_df (reports_path )
99+ return __html_report_create (
98100 report_path ,
99- reports_path ,
101+ df ,
100102 report_type ,
101103 settings_config ["threshold" ],
102104 settings_config ["language" ],
@@ -106,7 +108,7 @@ def html_report_create(
106108 elif reports_extension == "mongo" :
107109 connection = MongoDBConnection .from_settings (settings_config )
108110 compare_info_repo = ReportRepository (connection )
109- exit_code = __html_report_create_from_mongo (
111+ exit_code = __html_report_create (
110112 report_path ,
111113 compare_info_repo ,
112114 report_type ,
@@ -126,7 +128,9 @@ def html_report_create(
126128
127129
128130def calculate_general_total_similarity (
129- df : pd .DataFrame , unique_first_paths : NDArray , unique_second_paths : NDArray
131+ compare_infos : pd .DataFrame | ReportRepository ,
132+ unique_first_paths : NDArray ,
133+ unique_second_paths : NDArray ,
130134) -> float :
131135 total_similarity = 0.0
132136 if unique_first_paths .size == 0 :
@@ -135,13 +139,24 @@ def calculate_general_total_similarity(
135139 max_similarity = 0.0
136140 for second_path in unique_second_paths :
137141 sorted_paths = sorted ([first_path , second_path ])
138- selected = df [
139- (df ["first_path" ].str .startswith (sorted_paths [0 ])) # type: ignore
140- & (df ["second_path" ].str .startswith (sorted_paths [1 ])) # type: ignore
141- ]
142- if selected is None or selected .size == 0 :
143- continue
144- module_similarity = float (selected .iloc [0 ]["weighted_average" ])
142+ if isinstance (compare_infos , ReportRepository ):
143+ selected = compare_infos .collection .find_one (
144+ {
145+ "first_path" : re .compile (rf"{ sorted_paths [0 ]} [/.\w]*" ),
146+ "second_path" : re .compile (rf"{ sorted_paths [1 ]} [/.\w]*" ),
147+ }
148+ )
149+ if selected is None :
150+ continue
151+ module_similarity = selected ["compare_result" ]["fast" ]["weighted_average" ]
152+ else :
153+ selected = compare_infos [
154+ (compare_infos ["first_path" ].str .startswith (sorted_paths [0 ])) # type: ignore
155+ & (compare_infos ["second_path" ].str .startswith (sorted_paths [1 ])) # type: ignore
156+ ]
157+ if selected is None or selected .size == 0 :
158+ continue
159+ module_similarity = float (selected .iloc [0 ]["weighted_average" ])
145160 if module_similarity > max_similarity :
146161 max_similarity = module_similarity
147162 total_similarity += max_similarity
@@ -227,15 +242,14 @@ def _get_same_funcs(
227242
228243def _get_parsed_line (
229244 compare_results : pd .DataFrame | ReportRepository ,
245+ extract_func : Callable ,
230246 threshold : int = DEFAULT_THRESHOLD ,
231247 include_funcs_less_threshold : bool = True ,
232248) -> Generator [tuple [FullCompareInfo , SameFuncs , SameFuncs ], None , None ]:
233249 if isinstance (compare_results , ReportRepository ):
234- extract_func = lambda : compare_results .collection .find ({}) # noqa: E731
235250 handle_result_func = lambda result : result # noqa: E731
236251 deserialize_func = deserialize_compare_result_from_dict
237252 else :
238- extract_func = compare_results .iterrows
239253 handle_result_func = lambda result : result [1 ] # noqa: E731
240254 deserialize_func = deserialize_compare_result
241255 for result in extract_func ():
@@ -280,12 +294,14 @@ def _get_resulting_same_percentages(
280294
281295
282296def _search_sources (
283- compare_results : pd .DataFrame | ReportRepository , threshold : int = DEFAULT_THRESHOLD
297+ compare_results : pd .DataFrame | ReportRepository ,
298+ extract_func : Callable ,
299+ threshold : int = DEFAULT_THRESHOLD ,
284300) -> tuple [SamePartsOfAll , CntHeadNodes ]:
285301 same_parts_of_all : SamePartsOfAll = defaultdict (lambda : {})
286302 cnt_head_nodes : CntHeadNodes = {}
287303 for compare_info , same_parts_of_second , same_parts_of_first in _get_parsed_line (
288- compare_results , threshold , include_funcs_less_threshold = False
304+ compare_results , extract_func , threshold , include_funcs_less_threshold = False
289305 ):
290306 for path , heads in zip (
291307 (compare_info .first_path , compare_info .second_path ),
@@ -323,6 +339,7 @@ def _search_sources(
323339
324340def _create_general_report (
325341 compare_results : pd .DataFrame | ReportRepository ,
342+ extract_func : Callable ,
326343 save_path : Path ,
327344 environment : jinja2 .Environment ,
328345 threshold : Threshold = DEFAULT_THRESHOLD ,
@@ -331,13 +348,13 @@ def _create_general_report(
331348) -> None :
332349 if paths is not None :
333350 if isinstance (compare_results , ReportRepository ):
334- raise NotImplementedError (
335- "Creating general html report with MongoDB with provided paths is not implemented."
336- )
337- unique_first_paths = pd .unique (compare_results ["first_path" ])
338- unique_second_paths = pd .unique (compare_results ["second_path" ])
339- assert isinstance (unique_first_paths , np .ndarray )
340- assert isinstance (unique_second_paths , np .ndarray )
351+ unique_first_paths = np . array ( extract_func (). distinct ( "first_path" ))
352+ unique_second_paths = np . array ( extract_func (). distinct ( "second_path" ))
353+ else :
354+ unique_first_paths = pd .unique (compare_results ["first_path" ])
355+ unique_second_paths = pd .unique (compare_results ["second_path" ])
356+ assert isinstance (unique_first_paths , np .ndarray )
357+ assert isinstance (unique_second_paths , np .ndarray )
341358 first_root_path_sim = calculate_general_total_similarity (
342359 compare_results , unique_first_paths , unique_second_paths
343360 )
@@ -352,7 +369,7 @@ def _create_general_report(
352369 save_path = save_path / DEFAULT_GENERAL_REPORT_NAME
353370 save_path .write_text (
354371 template .render (
355- data = _get_parsed_line (compare_results ),
372+ data = _get_parsed_line (compare_results , extract_func ),
356373 list = list ,
357374 len = len ,
358375 round = round ,
@@ -367,13 +384,14 @@ def _create_general_report(
367384
368385def _create_sources_report (
369386 compare_results : pd .DataFrame | ReportRepository ,
387+ extract_func : Callable ,
370388 save_path : Path ,
371389 environment : jinja2 .Environment ,
372390 threshold : Threshold = DEFAULT_THRESHOLD ,
373391 language : Language = DEFAULT_LANGUAGE ,
374392 paths : tuple [str , str ] | None = None ,
375393) -> None :
376- data , cnt_head_nodes = _search_sources (compare_results , threshold )
394+ data , cnt_head_nodes = _search_sources (compare_results , extract_func , threshold )
377395 same_percentages = _get_resulting_same_percentages (data , cnt_head_nodes )
378396 if paths is not None :
379397 first_root_path_sim = calculate_sources_total_similarity (same_percentages , paths [0 ])
@@ -402,9 +420,9 @@ def _create_sources_report(
402420 )
403421
404422
405- def __html_report_create_from_mongo (
423+ def __html_report_create (
406424 report_path : Path ,
407- compare_info_repo : ReportRepository ,
425+ compare_infos : pd . DataFrame | ReportRepository ,
408426 report_type : ReportType ,
409427 threshold : Threshold ,
410428 language : Language ,
@@ -421,48 +439,30 @@ def __html_report_create_from_mongo(
421439 if not all_paths_provided and any ([first_root_path , second_root_path ]):
422440 raise ValueError (_ ("All paths must be provided." ))
423441
424- environment = jinja2 .Environment (extensions = ["jinja2.ext.i18n" ])
425- environment .install_gettext_translations (get_translations ()) # type: ignore
426- create_report_function (
427- compare_info_repo , # type:ignore
428- report_path ,
429- environment ,
430- threshold ,
431- language ,
432- )
433- return ExitCode .EXIT_SUCCESS
434-
435-
436- def __html_report_create_from_csv (
437- report_path : Path ,
438- reports_path : Path ,
439- report_type : ReportType ,
440- threshold : Threshold ,
441- language : Language ,
442- first_root_path : Path | str | None = None ,
443- second_root_path : Path | str | None = None ,
444- ) -> Literal [ExitCode .EXIT_SUCCESS ]:
445- if report_type == "general" :
446- create_report_function = _create_general_report
447- elif report_type == "sources" :
448- create_report_function = _create_sources_report
449- else :
450- raise ValueError (_ ("Invalid report type." ))
451- all_paths_provided = all ([first_root_path , second_root_path ])
452- if not all_paths_provided and any ([first_root_path , second_root_path ]):
453- raise ValueError (_ ("All paths must be provided." ))
454-
455- df = read_df (reports_path )
456442 if all_paths_provided :
457443 paths = tuple (sorted ([str (first_root_path ), str (second_root_path )]))
458- df = df [df ["first_path" ].str .startswith (paths [0 ])] # type: ignore
459- df = df [df ["second_path" ].str .startswith (paths [1 ])] # type: ignore
444+ if isinstance (compare_infos , pd .DataFrame ):
445+ compare_infos = compare_infos [compare_infos ["first_path" ].str .startswith (paths [0 ])] # type: ignore
446+ compare_infos = compare_infos [compare_infos ["second_path" ].str .startswith (paths [1 ])] # type: ignore
447+ extract_func = compare_infos .iterrows # type: ignore
448+ else :
449+ extract_func = lambda : compare_infos .collection .find ( # noqa: E731
450+ {
451+ "first_path" : re .compile (rf"{ paths [0 ]} [/.\w]*" ),
452+ "second_path" : re .compile (rf"{ paths [1 ]} [/.\w]*" ),
453+ }
454+ )
460455 else :
461456 paths = None
457+ if isinstance (compare_infos , ReportRepository ):
458+ extract_func = lambda : compare_infos .collection .find ({}) # noqa: E731
459+ else :
460+ extract_func = compare_infos .iterrows
462461 environment = jinja2 .Environment (extensions = ["jinja2.ext.i18n" ])
463462 environment .install_gettext_translations (get_translations ()) # type: ignore
464463 create_report_function (
465- df , # type:ignore
464+ compare_infos ,
465+ extract_func ,
466466 report_path ,
467467 environment ,
468468 threshold ,
0 commit comments