diff --git a/celery_task_router.py b/celery_task_router.py index 1d33b5958..8f4f8ee1c 100644 --- a/celery_task_router.py +++ b/celery_task_router.py @@ -4,8 +4,6 @@ from database.engine import get_db_session from database.models.core import Commit, CompareCommit, Owner, Repository -from database.models.labelanalysis import LabelAnalysisRequest -from database.models.staticanalysis import StaticAnalysisSuite def _get_user_plan_from_ownerid(db_session, ownerid, *args, **kwargs) -> str: @@ -45,34 +43,6 @@ def _get_user_plan_from_comparison_id(dbsession, comparison_id, *args, **kwargs) return DEFAULT_FREE_PLAN -def _get_user_plan_from_label_request_id(dbsession, request_id, *args, **kwargs) -> str: - result = ( - dbsession.query(Owner.plan) - .join(LabelAnalysisRequest.head_commit) - .join(Commit.repository) - .join(Repository.owner) - .filter(LabelAnalysisRequest.id_ == request_id) - .first() - ) - if result: - return result.plan - return DEFAULT_FREE_PLAN - - -def _get_user_plan_from_suite_id(dbsession, suite_id, *args, **kwargs) -> str: - result = ( - dbsession.query(Owner.plan) - .join(StaticAnalysisSuite.commit) - .join(Commit.repository) - .join(Repository.owner) - .filter(StaticAnalysisSuite.id_ == suite_id) - .first() - ) - if result: - return result.plan - return DEFAULT_FREE_PLAN - - def _get_user_plan_from_task(dbsession, task_name: str, task_kwargs: dict) -> str: owner_plan_lookup_funcs = { # from ownerid @@ -96,10 +66,6 @@ def _get_user_plan_from_task(dbsession, task_name: str, task_kwargs: dict) -> st shared_celery_config.manual_upload_completion_trigger_task_name: _get_user_plan_from_repoid, # from comparison_id shared_celery_config.compute_comparison_task_name: _get_user_plan_from_comparison_id, - # from label_request_id - shared_celery_config.label_analysis_task_name: _get_user_plan_from_label_request_id, - # from suite_id - shared_celery_config.static_analysis_task_name: _get_user_plan_from_suite_id, } func_to_use = owner_plan_lookup_funcs.get( task_name, lambda *args, **kwargs: DEFAULT_FREE_PLAN diff --git a/services/static_analysis/__init__.py b/services/static_analysis/__init__.py deleted file mode 100644 index a9672e0d4..000000000 --- a/services/static_analysis/__init__.py +++ /dev/null @@ -1,188 +0,0 @@ -import json -import logging -import typing - -import sentry_sdk -from shared.storage.exceptions import FileNotInStorageError - -from database.models.staticanalysis import ( - StaticAnalysisSingleFileSnapshot, - StaticAnalysisSuite, - StaticAnalysisSuiteFilepath, -) -from services.archive import ArchiveService -from services.static_analysis.git_diff_parser import DiffChange, DiffChangeType -from services.static_analysis.single_file_analyzer import ( - AntecessorFindingResult, - SingleFileSnapshotAnalyzer, -) - -log = logging.getLogger(__name__) - - -def _get_analysis_content_mapping(analysis: StaticAnalysisSuite, filepaths): - db_session = analysis.get_db_session() - return dict( - db_session.query( - StaticAnalysisSuiteFilepath.filepath, - StaticAnalysisSingleFileSnapshot.content_location, - ) - .join( - StaticAnalysisSuiteFilepath, - StaticAnalysisSuiteFilepath.file_snapshot_id - == StaticAnalysisSingleFileSnapshot.id_, - ) - .filter( - StaticAnalysisSuiteFilepath.filepath.in_(filepaths), - StaticAnalysisSuiteFilepath.analysis_suite_id == analysis.id_, - ) - ) - - -class StaticAnalysisComparisonService(object): - def __init__( - self, - base_static_analysis: StaticAnalysisSuite, - head_static_analysis: StaticAnalysisSuite, - git_diff: typing.List[DiffChange], - ): - self._base_static_analysis = base_static_analysis - self._head_static_analysis = head_static_analysis - self._git_diff = git_diff - self._archive_service = None - - @property - def archive_service(self): - if self._archive_service is None: - self._archive_service = ArchiveService( - self._base_static_analysis.commit.repository - ) - return self._archive_service - - @sentry_sdk.trace - def get_base_lines_relevant_to_change(self) -> typing.List[typing.Dict]: - final_result = {"all": False, "files": {}} - db_session = self._base_static_analysis.get_db_session() - head_analysis_content_locations_mapping = _get_analysis_content_mapping( - self._head_static_analysis, - [ - change.after_filepath - for change in self._git_diff - if change.after_filepath - ], - ) - base_analysis_content_locations_mapping = _get_analysis_content_mapping( - self._base_static_analysis, - [ - change.before_filepath - for change in self._git_diff - if change.before_filepath - ], - ) - # @giovanni-guidini 2023-06-14 - # NOTE: Maybe we can paralelize this bit. - # There's some level of IO involved. - for change in self._git_diff: - # This check should happen way earlier - if change.change_type == DiffChangeType.new: - return {"all": True} - final_result["files"][change.before_filepath] = self._analyze_single_change( - db_session, - change, - base_analysis_content_locations_mapping.get(change.before_filepath), - head_analysis_content_locations_mapping.get(change.after_filepath), - ) - return final_result - - def _load_snapshot_data( - self, filepath, content_location - ) -> typing.Optional[SingleFileSnapshotAnalyzer]: - if not content_location: - return None - try: - return SingleFileSnapshotAnalyzer( - filepath, - json.loads(self.archive_service.read_file(content_location)), - ) - except FileNotInStorageError: - log.warning( - "Unable to load file for static analysis comparison", - extra=dict(filepath=filepath, content_location=content_location), - ) - return None - - def _analyze_single_change( - self, - db_session, - change: DiffChange, - base_analysis_file_obj_content_location, - head_analysis_file_obj_content_location, - ): - if change.change_type == DiffChangeType.deleted: - # file simply deleted. - # all lines involved in it needs their tests rechecked - return {"all": True, "lines": None} - if change.change_type == DiffChangeType.modified: - result_so_far = {"all": False, "lines": set()} - head_analysis_file_data = self._load_snapshot_data( - change.after_filepath, head_analysis_file_obj_content_location - ) - base_analysis_file_data = self._load_snapshot_data( - change.before_filepath, base_analysis_file_obj_content_location - ) - if not head_analysis_file_data and not base_analysis_file_data: - return None - if head_analysis_file_data is None or base_analysis_file_data is None: - log.warning( - "Failed to load snapshot for file. Fallback to all lines in the file", - extra=dict( - file_path=change.after_filepath, - is_missing_head=(head_analysis_file_data is None), - is_missing_base=(base_analysis_file_data is None), - ), - ) - return {"all": True, "lines": None} - - for base_line in change.lines_only_on_base: - corresponding_exec_line = ( - base_analysis_file_data.get_corresponding_executable_line(base_line) - ) - if corresponding_exec_line is not None: - result_so_far["lines"].add(corresponding_exec_line) - affected_statement_lines = set( - x - for x in ( - head_analysis_file_data.get_corresponding_executable_line(li) - for li in change.lines_only_on_head - ) - if x is not None - ) - for head_line in affected_statement_lines: - ( - matching_type, - antecessor_head_line, - ) = head_analysis_file_data.get_antecessor_executable_line( - head_line, lines_to_not_consider=affected_statement_lines - ) - if matching_type == AntecessorFindingResult.file: - return {"all": True, "lines": None} - elif matching_type == AntecessorFindingResult.function: - matching_function = ( - base_analysis_file_data.find_function_by_identifier( - antecessor_head_line - ) - ) - if matching_function: - line_entrypoint = matching_function["start_line"] - result_so_far["lines"].add(line_entrypoint) - else: - # No matches, function does not exist on base, go to everything - return {"all": True, "lines": None} - elif matching_type == AntecessorFindingResult.line: - result_so_far["lines"].add(antecessor_head_line) - return result_so_far - log.warning( - "Unknown type of change. Fallback to all lines", - extra=dict(change_type=change.change_type), - ) - return {"all": True, "lines": None} diff --git a/services/static_analysis/git_diff_parser.py b/services/static_analysis/git_diff_parser.py deleted file mode 100644 index 6307f87e1..000000000 --- a/services/static_analysis/git_diff_parser.py +++ /dev/null @@ -1,85 +0,0 @@ -import typing -from dataclasses import dataclass -from enum import Enum - -import sentry_sdk - -from services.comparison.changes import get_segment_offsets - - -class DiffChangeType(Enum): - new = "new" - deleted = "deleted" - modified = "modified" - binary = "binary" - - @classmethod - def get_from_string(cls, string_value): - for i in cls: - if i.value == string_value: - return i - - -@dataclass -class DiffChange(object): - __slots__ = ( - "before_filepath", - "after_filepath", - "change_type", - "lines_only_on_base", - "lines_only_on_head", - ) - before_filepath: typing.Optional[str] - after_filepath: typing.Optional[str] - change_type: DiffChangeType - lines_only_on_base: typing.Optional[typing.List[int]] - lines_only_on_head: typing.Optional[typing.List[int]] - - def map_base_line_to_head_line(self, base_line: int): - return self._map_this_to_other( - base_line, self.lines_only_on_base, self.lines_only_on_head - ) - - def map_head_line_to_base_line(self, head_line: int): - return self._map_this_to_other( - head_line, self.lines_only_on_head, self.lines_only_on_base - ) - - def _map_this_to_other(self, line_number, this, other): - if self.change_type in ( - DiffChangeType.binary, - DiffChangeType.deleted, - DiffChangeType.new, - ): - return None - if line_number in this: - return None - smaller_lines = sum(1 for x in this if x < line_number) - current_point = line_number - smaller_lines - for lh in other: - if lh <= current_point: - current_point += 1 - return current_point - - -# NOTE: Computationally intensive. -@sentry_sdk.trace -def parse_git_diff_json(diff_json) -> typing.List[DiffChange]: - for key, value in diff_json["diff"]["files"].items(): - change_type = DiffChangeType.get_from_string(value["type"]) - after = None if change_type == DiffChangeType.deleted else key - before = ( - None if change_type == DiffChangeType.new else (value.get("before") or key) - ) - _, additions, removals = ( - get_segment_offsets(value["segments"]) - if change_type not in (DiffChangeType.binary, DiffChangeType.deleted) - else (None, None, None) - ) - yield DiffChange( - before_filepath=before, - after_filepath=after, - change_type=DiffChangeType.get_from_string(value["type"]), - lines_only_on_base=sorted(removals) if removals is not None else None, - lines_only_on_head=sorted(additions) if additions is not None else None, - ) diff --git a/services/static_analysis/single_file_analyzer.py b/services/static_analysis/single_file_analyzer.py deleted file mode 100644 index 8642707f8..000000000 --- a/services/static_analysis/single_file_analyzer.py +++ /dev/null @@ -1,129 +0,0 @@ -import logging -import typing -from enum import Enum, auto - -log = logging.getLogger(__name__) - - -class AntecessorFindingResult(Enum): - line = auto() - function = auto() - file = auto() - - -class SingleFileSnapshotAnalyzer(object): - """ - This is an analyzer for a single snapshot of a file (meaning a version of a file in - a particular moment of time) - - For now, the expected structure of the file snapshot is - (there can be more fields, but those are the ones being used in this context): - - empty_lines: - a list of lines that we know are empty - functions: - a list of functions/methods in this file, and its details. - The structure of a function is (some fields declared here might not be used): - declaration_line: The line where the function is declared - identifier: A unique identifier (in the global context) for the function - Something that can later help us tell that a moved function is - still the same function - start_line: The line where the function code starts - end_line: The line where the function code ends - code_hash: A hash of the function body that helps us tell when it changed - complexity_metrics: Some complexity metrics not used here - hash: The hash code of the file so its easy to tell when it has changed - language: The programming language of the file (not used here) - number_lines: The number of lines this file has - statements: A list of statements in this file. A statement structure is a tuple of two - elements: - - The first element is the line number where that statement is - - The second element is a dict with more information about that line: - - line_surety_ancestorship: It's the number of the line that we know - will be executed before this statement happens. Like - "We are sure this line will be an ancestor to this statement" - This is a way to construct a light version of the flowchart graph - of the file - start_column: The column where this code starts - line_hash: The hash of this line (to later tell line changes vs code change) - len: The number of lines (in addition to this one that this code entails) - extra_connected_lines: Which lines are not contiguous to this, but should - be considered to affect this line. One example is the "else" that indirectly - affects the "if", because it's like part of the if "jumping logic" - definition_lines: The lines where things (like classes, functions, enums) are defined - - Those don't have much use for now - import_lines: The lines where imports are. It's useful for other analysis. - But not this one - - We will eventually having a schema to validate data against this so we can ensure data - is valid when we use it. The schema will be better documentation of the format than this - """ - - def __init__(self, filepath, analysis_file_data): - self._filepath = filepath - self._analysis_file_data = analysis_file_data - self._statement_mapping = dict(analysis_file_data["statements"]) - - def get_corresponding_executable_line(self, line_number: int) -> int: - for that_line, statement_data in self._analysis_file_data["statements"]: - if ( - that_line <= line_number - and that_line + statement_data["len"] >= line_number - ): - return that_line - if line_number in statement_data["extra_connected_lines"]: - return that_line - # This is a logging.warning for now while we implement things - # But there will be a really reasonable case where customers - # change no code. So it won't have a corresponding executable line - log.warning( - "Not able to find corresponding executable line", - extra=dict( - filepath_=self._filepath, - line_number=line_number, - allstuff=self._analysis_file_data["statements"], - ), - ) - return None - - def get_antecessor_executable_line( - self, line_number: int, lines_to_not_consider: typing.List[int] - ) -> int: - current_line = line_number - while ( - current_line in lines_to_not_consider - and self._statement_mapping.get(current_line, {}).get( - "line_surety_ancestorship" - ) - and current_line - != self._statement_mapping.get(current_line, {}).get( - "line_surety_ancestorship" - ) - ): - current_line = self._statement_mapping.get(current_line, {}).get( - "line_surety_ancestorship" - ) - if current_line not in lines_to_not_consider: - return (AntecessorFindingResult.line, current_line) - for f in self._analysis_file_data["functions"]: - if ( - f.get("start_line") <= current_line - and f.get("end_line") >= current_line - ): - return (AntecessorFindingResult.function, f["identifier"]) - log.warning( - "Somehow not able to find antecessor line", - extra=dict( - filepath_=self._filepath, - line_number=line_number, - lines_to_not_consider=lines_to_not_consider, - allstuff=self._analysis_file_data["statements"], - ), - ) - return (AntecessorFindingResult.file, self._filepath) - - def find_function_by_identifier(self, function_identifier): - for func in self._analysis_file_data["functions"]: - if func["identifier"] == function_identifier: - return func - return None diff --git a/services/static_analysis/tests/__init__.py b/services/static_analysis/tests/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/services/static_analysis/tests/unit/__init__.py b/services/static_analysis/tests/unit/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/services/static_analysis/tests/unit/test_git_diff_parser.py b/services/static_analysis/tests/unit/test_git_diff_parser.py deleted file mode 100644 index d60358669..000000000 --- a/services/static_analysis/tests/unit/test_git_diff_parser.py +++ /dev/null @@ -1,217 +0,0 @@ -from services.static_analysis.git_diff_parser import ( - DiffChange, - DiffChangeType, - parse_git_diff_json, -) - - -class TestDiffChange(object): - def test_line_mapping_modified_file(self): - sample_git_diff_change = DiffChange( - before_filepath="README.rst", - after_filepath="README.rst", - change_type=DiffChangeType.modified, - lines_only_on_base=[12, 49, 153, 154], - lines_only_on_head=[12, 13, 50, 56, 57, 58, 59, 60, 61, 62, 161], - ) - # base to head - assert sample_git_diff_change.map_base_line_to_head_line(1) == 1 - assert sample_git_diff_change.map_base_line_to_head_line(11) == 11 - assert sample_git_diff_change.map_base_line_to_head_line(12) is None - assert sample_git_diff_change.map_base_line_to_head_line(13) == 14 - assert sample_git_diff_change.map_base_line_to_head_line(48) == 49 - assert sample_git_diff_change.map_base_line_to_head_line(49) is None - assert sample_git_diff_change.map_base_line_to_head_line(50) == 51 - # head to base - assert sample_git_diff_change.map_head_line_to_base_line(1) == 1 - assert sample_git_diff_change.map_head_line_to_base_line(11) == 11 - assert sample_git_diff_change.map_head_line_to_base_line(12) is None - assert sample_git_diff_change.map_head_line_to_base_line(13) is None - assert sample_git_diff_change.map_head_line_to_base_line(14) == 13 - assert sample_git_diff_change.map_head_line_to_base_line(49) == 48 - assert sample_git_diff_change.map_head_line_to_base_line(50) is None - assert sample_git_diff_change.map_head_line_to_base_line(51) == 50 - # next one is reasonable because there is 7 more head lines than base lines - assert sample_git_diff_change.map_head_line_to_base_line(1000) == 993 - assert sample_git_diff_change.map_base_line_to_head_line(993) == 1000 - - def test_line_mapping_deleted_file(self): - sample_git_diff_change = DiffChange( - before_filepath="README.rst", - after_filepath="README.rst", - change_type=DiffChangeType.deleted, - lines_only_on_base=None, - lines_only_on_head=None, - ) - assert sample_git_diff_change.map_head_line_to_base_line(1) is None - - def test_line_mapping_binary_file(self): - sample_git_diff_change = DiffChange( - before_filepath="README.rst", - after_filepath="README.rst", - change_type=DiffChangeType.binary, - lines_only_on_base=None, - lines_only_on_head=None, - ) - assert sample_git_diff_change.map_head_line_to_base_line(1) is None - - def test_line_mapping_new_file(self): - sample_git_diff_change = DiffChange( - before_filepath="README.rst", - after_filepath="README.rst", - change_type=DiffChangeType.new, - lines_only_on_base=None, - lines_only_on_head=None, - ) - assert sample_git_diff_change.map_head_line_to_base_line(1) is None - - -class TestParseGitDiffJson(object): - def test_parse_git_diff_json_single_file(self): - input_data = { - "diff": { - "files": { - "README.rst": { - "type": "modified", - "before": None, - "segments": [ - { - "header": ["9", "7", "9", "8"], - "lines": [ - " Overview", - " --------", - " ", - "-Main website: `Codecov `_.", - "+", - "+website: `Codecov `_.", - " ", - " .. code-block:: shell-session", - " ", - ], - }, - { - "header": ["46", "12", "47", "19"], - "lines": [ - " ", - " You may need to configure a ``.coveragerc`` file. Learn more `here `_. Start with this `generic .coveragerc `_ for example.", - " ", - "-We highly suggest adding `source` to your ``.coveragerc`` which solves a number of issues collecting coverage.", - "+We highly suggest adding ``source`` to your ``.coveragerc``, which solves a number of issues collecting coverage.", - " ", - " .. code-block:: ini", - " ", - " [run]", - " source=your_package_name", - "+ ", - "+If there are multiple sources, you instead should add ``include`` to your ``.coveragerc``", - "+", - "+.. code-block:: ini", - "+", - "+ [run]", - "+ include=your_package_name/*", - " ", - " unittests", - " ---------", - ], - }, - { - "header": ["150", "5", "158", "4"], - "lines": [ - " * Twitter: `@codecov `_.", - " * Email: `hello@codecov.io `_.", - " ", - "-We are happy to help if you have any questions. Please contact email our Support at [support@codecov.io](mailto:support@codecov.io)", - "-", - "+We are happy to help if you have any questions. Please contact email our Support at `support@codecov.io `_.", - ], - }, - ], - "stats": {"added": 11, "removed": 4}, - } - } - }, - } - res = list(parse_git_diff_json(input_data)) - assert res == [ - DiffChange( - before_filepath="README.rst", - after_filepath="README.rst", - change_type=DiffChangeType.modified, - lines_only_on_base=[12, 49, 153, 154], - lines_only_on_head=[12, 13, 50, 56, 57, 58, 59, 60, 61, 62, 161], - ) - ] - - def test_parse_git_diff_json_multiple_files(self): - input_data = { - "files": { - "banana.py": { - "type": "new", - "before": None, - "segments": [ - { - "header": ["0", "0", "1", "2"], - "lines": ["+suhduad", "+dsandsa"], - } - ], - "stats": {"added": 2, "removed": 0}, - }, - "codecov-alpine": { - "type": "binary", - "stats": {"added": 0, "removed": 0}, - }, - "codecov/settings_dev.py": { - "type": "modified", - "before": None, - "segments": [ - { - "header": ["49", "3", "49", "4"], - "lines": [ - ' SESSION_COOKIE_DOMAIN = "localhost"', - " ", - " GRAPHQL_PLAYGROUND = True", - "+IS_DEV = True", - ], - } - ], - "stats": {"added": 1, "removed": 0}, - }, - "production.yml": { - "type": "deleted", - "before": "production.yml", - "stats": {"added": 0, "removed": 0}, - }, - } - } - expected_result = [ - DiffChange( - before_filepath=None, - after_filepath="banana.py", - change_type=DiffChangeType.new, - lines_only_on_base=[], - lines_only_on_head=[1, 2], - ), - DiffChange( - before_filepath="codecov-alpine", - after_filepath="codecov-alpine", - change_type=DiffChangeType.binary, - lines_only_on_base=None, - lines_only_on_head=None, - ), - DiffChange( - before_filepath="codecov/settings_dev.py", - after_filepath="codecov/settings_dev.py", - change_type=DiffChangeType.modified, - lines_only_on_base=[], - lines_only_on_head=[52], - ), - DiffChange( - before_filepath="production.yml", - after_filepath=None, - change_type=DiffChangeType.deleted, - lines_only_on_base=None, - lines_only_on_head=None, - ), - ] - res = list(parse_git_diff_json({"diff": input_data})) - assert res == expected_result diff --git a/services/static_analysis/tests/unit/test_single_file_analyzer.py b/services/static_analysis/tests/unit/test_single_file_analyzer.py deleted file mode 100644 index f920cb456..000000000 --- a/services/static_analysis/tests/unit/test_single_file_analyzer.py +++ /dev/null @@ -1,107 +0,0 @@ -from services.static_analysis.single_file_analyzer import ( - AntecessorFindingResult, - SingleFileSnapshotAnalyzer, -) - -# While the structure of this is correct, the data itself was manually edited -# to make interesting test cases -sample_input_data = { - "empty_lines": [4, 8, 11], - "warnings": [], - "filename": "source.py", - "functions": [ - { - "identifier": "some_function", - "start_line": 5, - "end_line": 10, - "code_hash": "e4b52b6da12184142fcd7ff2c8412662", - "complexity_metrics": { - "conditions": 1, - "mccabe_cyclomatic_complexity": 2, - "returns": 1, - "max_nested_conditional": 1, - }, - } - ], - "hash": "811d0016249a5b1400a685164e5295de", - "language": "python", - "number_lines": 11, - "statements": [ - ( - 1, - { - "line_surety_ancestorship": None, - "start_column": 0, - "line_hash": "55c30cf01e202728b6952e9cba304798", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 2, - { - "line_surety_ancestorship": 1, - "start_column": 4, - "line_hash": "1d7be9f2145760a59513a4049fcd0d1c", - "len": 1, - "extra_connected_lines": (), - }, - ), - ( - 5, - { - "line_surety_ancestorship": None, - "start_column": 4, - "line_hash": "1d7be9f2145760a59513a4049fcd0d1c", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 6, - { - "line_surety_ancestorship": 5, - "start_column": 4, - "line_hash": "52f98812dca4687f18373b87433df695", - "len": 0, - "extra_connected_lines": (14,), - }, - ), - ( - 7, - { - "line_surety_ancestorship": 6, - "start_column": 4, - "line_hash": "52f98812dca4687f18373b87433df695", - "len": 0, - "extra_connected_lines": (), - }, - ), - ], - "definition_lines": [(4, 6)], - "import_lines": [], -} - - -def test_simple_single_file_snapshot_analyzer_get_corresponding_executable_line(): - sfsa = SingleFileSnapshotAnalyzer("filepath", sample_input_data) - assert sfsa.get_corresponding_executable_line(3) == 2 - assert sfsa.get_corresponding_executable_line(2) == 2 - assert sfsa.get_corresponding_executable_line(4) is None - assert sfsa.get_corresponding_executable_line(14) == 6 - - -def test_get_antecessor_executable_line(): - sfsa = SingleFileSnapshotAnalyzer("filepath", sample_input_data) - assert sfsa.get_antecessor_executable_line(7, lines_to_not_consider=[6, 7]) == ( - AntecessorFindingResult.line, - 5, - ) - assert sfsa.get_antecessor_executable_line(2, lines_to_not_consider=[1, 2]) == ( - AntecessorFindingResult.file, - "filepath", - ) - assert sfsa.get_antecessor_executable_line(5, lines_to_not_consider=[5]) == ( - AntecessorFindingResult.function, - "some_function", - ) diff --git a/services/static_analysis/tests/unit/test_static_analysis_comparison.py b/services/static_analysis/tests/unit/test_static_analysis_comparison.py deleted file mode 100644 index c14f519e6..000000000 --- a/services/static_analysis/tests/unit/test_static_analysis_comparison.py +++ /dev/null @@ -1,954 +0,0 @@ -import json - -import pytest - -from database.tests.factories.core import RepositoryFactory -from database.tests.factories.staticanalysis import ( - StaticAnalysisSingleFileSnapshotFactory, - StaticAnalysisSuiteFactory, - StaticAnalysisSuiteFilepathFactory, -) -from services.static_analysis import ( - SingleFileSnapshotAnalyzer, - StaticAnalysisComparisonService, - _get_analysis_content_mapping, -) -from services.static_analysis.git_diff_parser import DiffChange, DiffChangeType - - -def test_get_analysis_content_mapping(dbsession): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - static_analysis_suite = StaticAnalysisSuiteFactory.create( - commit__repository=repository - ) - secondary_static_analysis = StaticAnalysisSuiteFactory.create( - commit__repository=repository - ) - dbsession.add(static_analysis_suite) - dbsession.add(secondary_static_analysis) - dbsession.flush() - snapshot_1 = StaticAnalysisSingleFileSnapshotFactory.create(repository=repository) - snapshot_2 = StaticAnalysisSingleFileSnapshotFactory.create(repository=repository) - snapshot_3 = StaticAnalysisSingleFileSnapshotFactory.create(repository=repository) - snapshot_4 = StaticAnalysisSingleFileSnapshotFactory.create(repository=repository) - snapshot_5 = StaticAnalysisSingleFileSnapshotFactory.create(repository=repository) - dbsession.add_all([snapshot_1, snapshot_2, snapshot_3, snapshot_4, snapshot_5]) - dbsession.flush() - f_1 = StaticAnalysisSuiteFilepathFactory.create( - file_snapshot=snapshot_1, analysis_suite=static_analysis_suite - ) - f_2 = StaticAnalysisSuiteFilepathFactory.create( - file_snapshot=snapshot_2, analysis_suite=static_analysis_suite - ) - f_3 = StaticAnalysisSuiteFilepathFactory.create( - file_snapshot=snapshot_3, analysis_suite=static_analysis_suite - ) - f_4 = StaticAnalysisSuiteFilepathFactory.create( - file_snapshot=snapshot_4, analysis_suite=static_analysis_suite - ) - f_s_2 = StaticAnalysisSuiteFilepathFactory.create( - file_snapshot=snapshot_2, - analysis_suite=secondary_static_analysis, - filepath=f_1.filepath, - ) - f_s_3 = StaticAnalysisSuiteFilepathFactory.create( - file_snapshot=snapshot_3, analysis_suite=secondary_static_analysis - ) - f_s_5 = StaticAnalysisSuiteFilepathFactory.create( - file_snapshot=snapshot_5, analysis_suite=secondary_static_analysis - ) - dbsession.add_all([f_1, f_2, f_3, f_4, f_s_2, f_s_3, f_s_5]) - dbsession.flush() - first_res = _get_analysis_content_mapping( - static_analysis_suite, - [f_1.filepath, f_2.filepath, f_4.filepath, "somenonexistent.gh"], - ) - assert first_res == { - f_1.filepath: snapshot_1.content_location, - f_2.filepath: snapshot_2.content_location, - f_4.filepath: snapshot_4.content_location, - } - secondary_res = _get_analysis_content_mapping( - secondary_static_analysis, - [f_s_2.filepath, f_s_3.filepath], - ) - assert secondary_res == { - f_s_2.filepath: snapshot_2.content_location, - f_s_3.filepath: snapshot_3.content_location, - } - - -@pytest.fixture() -def sample_service(dbsession): - repository = RepositoryFactory.create() - head_static_analysis = StaticAnalysisSuiteFactory.create( - commit__repository=repository - ) - base_static_analysis = StaticAnalysisSuiteFactory.create( - commit__repository=repository - ) - dbsession.add(head_static_analysis) - dbsession.add(base_static_analysis) - dbsession.flush() - return StaticAnalysisComparisonService( - base_static_analysis=base_static_analysis, - head_static_analysis=head_static_analysis, - git_diff=[ - DiffChange( - before_filepath="path/changed.py", - after_filepath="path/changed.py", - change_type=DiffChangeType.modified, - lines_only_on_base=[], - lines_only_on_head=[20], - ), - ], - ) - - -class TestStaticAnalysisComparisonService(object): - def test_load_snapshot_data_unhappy_cases(self, sample_service, mock_storage): - assert sample_service._load_snapshot_data("filepath", None) is None - assert sample_service._load_snapshot_data("filepath", "fake_location") is None - - def test_load_snapshot_data_happy_cases(self, sample_service, mock_storage): - mock_storage.write_file( - "archive", - "real_content_location", - json.dumps({"statements": [(1, {"ha": "pokemon"})]}), - ) - res = sample_service._load_snapshot_data("filepath", "real_content_location") - assert isinstance(res, SingleFileSnapshotAnalyzer) - assert res._filepath == "filepath" - assert res._analysis_file_data == {"statements": [[1, {"ha": "pokemon"}]]} - assert res._statement_mapping == {1: {"ha": "pokemon"}} - - def test_get_base_lines_relevant_to_change_deleted_plus_changed_normal( - self, dbsession, mock_storage - ): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - snapshot_deleted = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository - ) - changed_snapshot_base = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository - ) - changed_snapshot_head = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository - ) - dbsession.add_all( - [ - snapshot_deleted, - changed_snapshot_base, - changed_snapshot_head, - ] - ) - dbsession.flush() - mock_storage.write_file( - "archive", snapshot_deleted.content_location, json.dumps({"statements": []}) - ) - mock_storage.write_file( - "archive", - changed_snapshot_base.content_location, - json.dumps( - { - "statements": [ - ( - 30, - { - "len": 1, - "line_surety_ancestorship": 29, - "extra_connected_lines": [35], - }, - ), - ] - } - ), - ) - mock_storage.write_file( - "archive", - changed_snapshot_head.content_location, - json.dumps( - { - "functions": [], - "statements": [ - (1, {"len": 0, "extra_connected_lines": []}), - (2, {"len": 1, "extra_connected_lines": []}), - (8, {"len": 0, "extra_connected_lines": []}), - ( - 10, - { - "len": 1, - "line_surety_ancestorship": 8, - "extra_connected_lines": [20], - }, - ), - ], - } - ), - ) - head_static_analysis = StaticAnalysisSuiteFactory.create( - commit__repository=repository - ) - base_static_analysis = StaticAnalysisSuiteFactory.create( - commit__repository=repository - ) - dbsession.add(head_static_analysis) - dbsession.add(base_static_analysis) - dbsession.flush() - deleted_sasff = StaticAnalysisSuiteFilepathFactory.create( - file_snapshot=snapshot_deleted, - analysis_suite=base_static_analysis, - filepath="deleted.py", - ) - old_changed_sasff = StaticAnalysisSuiteFilepathFactory.create( - file_snapshot=changed_snapshot_base, - analysis_suite=base_static_analysis, - filepath="path/changed.py", - ) - new_changed_sasff = StaticAnalysisSuiteFilepathFactory.create( - file_snapshot=changed_snapshot_head, - analysis_suite=head_static_analysis, - filepath="path/changed.py", - ) - dbsession.add_all([deleted_sasff, old_changed_sasff, new_changed_sasff]) - dbsession.flush() - service = StaticAnalysisComparisonService( - base_static_analysis=base_static_analysis, - head_static_analysis=head_static_analysis, - git_diff=[ - DiffChange( - before_filepath="path/changed.py", - after_filepath="path/changed.py", - change_type=DiffChangeType.modified, - lines_only_on_base=[30], - lines_only_on_head=[20], - ), - DiffChange( - before_filepath="deleted.py", - after_filepath=None, - change_type=DiffChangeType.deleted, - lines_only_on_base=None, - lines_only_on_head=None, - ), - ], - ) - assert service.get_base_lines_relevant_to_change() == { - "all": False, - "files": { - "deleted.py": {"all": True, "lines": None}, - "path/changed.py": {"all": False, "lines": {8, 30}}, - }, - } - - def test_get_base_lines_relevant_to_change_one_new_file( - self, dbsession, mock_storage - ): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - snapshot_deleted = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository - ) - changed_snapshot_base = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository - ) - changed_snapshot_head = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository - ) - dbsession.add_all( - [ - snapshot_deleted, - changed_snapshot_base, - changed_snapshot_head, - ] - ) - dbsession.flush() - mock_storage.write_file( - "archive", snapshot_deleted.content_location, json.dumps({"statements": []}) - ) - mock_storage.write_file( - "archive", - changed_snapshot_base.content_location, - json.dumps({"statements": [(1, {})]}), - ) - mock_storage.write_file( - "archive", - changed_snapshot_head.content_location, - json.dumps( - { - "functions": [], - "statements": [ - (1, {"len": 0, "extra_connected_lines": []}), - (2, {"len": 1, "extra_connected_lines": []}), - (8, {"len": 0, "extra_connected_lines": []}), - ( - 10, - { - "len": 1, - "line_surety_ancestorship": 8, - "extra_connected_lines": [20], - }, - ), - ], - } - ), - ) - head_static_analysis = StaticAnalysisSuiteFactory.create( - commit__repository=repository - ) - base_static_analysis = StaticAnalysisSuiteFactory.create( - commit__repository=repository - ) - dbsession.add(head_static_analysis) - dbsession.add(base_static_analysis) - dbsession.flush() - deleted_sasff = StaticAnalysisSuiteFilepathFactory.create( - file_snapshot=snapshot_deleted, - analysis_suite=base_static_analysis, - filepath="deleted.py", - ) - old_changed_sasff = StaticAnalysisSuiteFilepathFactory.create( - file_snapshot=changed_snapshot_base, - analysis_suite=base_static_analysis, - filepath="path/changed.py", - ) - new_changed_sasff = StaticAnalysisSuiteFilepathFactory.create( - file_snapshot=changed_snapshot_head, - analysis_suite=head_static_analysis, - filepath="path/changed.py", - ) - dbsession.add_all([deleted_sasff, old_changed_sasff, new_changed_sasff]) - dbsession.flush() - service = StaticAnalysisComparisonService( - base_static_analysis=base_static_analysis, - head_static_analysis=head_static_analysis, - git_diff=[ - DiffChange( - before_filepath="path/changed.py", - after_filepath="path/changed.py", - change_type=DiffChangeType.modified, - lines_only_on_base=[], - lines_only_on_head=[20], - ), - DiffChange( - before_filepath=None, - after_filepath="path/new.py", - change_type=DiffChangeType.new, - lines_only_on_base=[], - lines_only_on_head=[20], - ), - DiffChange( - before_filepath="deleted.py", - after_filepath=None, - change_type=DiffChangeType.deleted, - lines_only_on_base=None, - lines_only_on_head=None, - ), - ], - ) - assert service.get_base_lines_relevant_to_change() == {"all": True} - - def test_analyze_single_change_first_line_file(self, dbsession, mock_storage): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - changed_snapshot_base = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository - ) - changed_snapshot_head = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository - ) - dbsession.add_all( - [ - changed_snapshot_base, - changed_snapshot_head, - ] - ) - dbsession.flush() - mock_storage.write_file( - "archive", - changed_snapshot_base.content_location, - json.dumps( - { - "statements": [ - ( - 6, - { - "len": 1, - "extra_connected_lines": [9], - }, - ), - ] - } - ), - ) - mock_storage.write_file( - "archive", - changed_snapshot_head.content_location, - json.dumps( - { - "functions": [], - "statements": [ - ( - 10, - { - "len": 0, - "extra_connected_lines": [20], - }, - ), - ( - 11, - { - "len": 0, - "line_surety_ancestorship": 10, - "extra_connected_lines": [], - }, - ), - (12, {"len": 1, "extra_connected_lines": []}), - ( - 18, - { - "len": 0, - "line_surety_ancestorship": 12, - "extra_connected_lines": [], - }, - ), - ], - } - ), - ) - head_static_analysis = StaticAnalysisSuiteFactory.create( - commit__repository=repository - ) - base_static_analysis = StaticAnalysisSuiteFactory.create( - commit__repository=repository - ) - dbsession.add(head_static_analysis) - dbsession.add(base_static_analysis) - dbsession.flush() - change = DiffChange( - before_filepath="path/changed.py", - after_filepath="path/changed.py", - change_type=DiffChangeType.modified, - lines_only_on_base=[9], - lines_only_on_head=[11], - ) - service = StaticAnalysisComparisonService( - base_static_analysis=base_static_analysis, - head_static_analysis=head_static_analysis, - git_diff=[change], - ) - assert service._analyze_single_change( - dbsession, - change, - changed_snapshot_base.content_location, - changed_snapshot_head.content_location, - ) == {"all": False, "lines": {6, 10}} - - def test_analyze_single_change_base_change(self, dbsession, mock_storage): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - changed_snapshot_base = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository - ) - changed_snapshot_head = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository - ) - dbsession.add_all( - [ - changed_snapshot_base, - changed_snapshot_head, - ] - ) - dbsession.flush() - mock_storage.write_file( - "archive", - changed_snapshot_base.content_location, - json.dumps( - { - "functions": [ - { - "identifier": "banana_function", - "start_line": 3, - "end_line": 8, - } - ], - "statements": [ - ( - 1, - { - "len": 0, - "line_surety_ancestorship": None, - "extra_connected_lines": [], - }, - ), - ( - 2, - { - "len": 0, - "line_surety_ancestorship": 1, - "extra_connected_lines": [], - }, - ), - ], - } - ), - ) - mock_storage.write_file( - "archive", - changed_snapshot_head.content_location, - json.dumps( - { - "functions": [ - { - "identifier": "banana_function", - "start_line": 3, - "end_line": 8, - } - ], - "statements": [ - ( - 10, - { - "len": 0, - "extra_connected_lines": [20], - }, - ), - ( - 11, - { - "len": 0, - "line_surety_ancestorship": 10, - "extra_connected_lines": [], - }, - ), - (12, {"len": 1, "extra_connected_lines": []}), - ( - 18, - { - "len": 0, - "line_surety_ancestorship": 12, - "extra_connected_lines": [], - }, - ), - ], - } - ), - ) - head_static_analysis = StaticAnalysisSuiteFactory.create( - commit__repository=repository - ) - base_static_analysis = StaticAnalysisSuiteFactory.create( - commit__repository=repository - ) - dbsession.add(head_static_analysis) - dbsession.add(base_static_analysis) - dbsession.flush() - service = StaticAnalysisComparisonService( - base_static_analysis=base_static_analysis, - head_static_analysis=head_static_analysis, - git_diff=[ - DiffChange( - before_filepath="path/changed.py", - after_filepath="path/changed.py", - change_type=DiffChangeType.modified, - lines_only_on_base=[], - lines_only_on_head=[20], - ), - ], - ) - assert service._analyze_single_change( - dbsession, - DiffChange( - before_filepath="path/changed.py", - after_filepath="path/changed.py", - change_type=DiffChangeType.modified, - lines_only_on_base=[], - lines_only_on_head=[20], - ), - changed_snapshot_base.content_location, - changed_snapshot_head.content_location, - ) == {"all": True, "lines": None} - assert service._analyze_single_change( - dbsession, - DiffChange( - before_filepath="path/changed.py", - after_filepath="path/changed.py", - change_type=DiffChangeType.modified, - lines_only_on_base=[], - lines_only_on_head=[11], - ), - changed_snapshot_base.content_location, - changed_snapshot_head.content_location, - ) == {"all": False, "lines": {10}} - assert service._analyze_single_change( - dbsession, - DiffChange( - before_filepath="path/changed.py", - after_filepath="path/changed.py", - change_type=DiffChangeType.modified, - lines_only_on_base=[], - lines_only_on_head=[99, 100], - ), - changed_snapshot_base.content_location, - changed_snapshot_head.content_location, - ) == {"all": False, "lines": set()} - - def test_analyze_single_change_base_change_missing_head_snapshot( - self, dbsession, mock_storage - ): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - changed_snapshot_base = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository - ) - changed_snapshot_head = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository - ) - dbsession.add_all( - [ - changed_snapshot_base, - changed_snapshot_head, - ] - ) - dbsession.flush() - mock_storage.write_file( - "archive", - changed_snapshot_base.content_location, - json.dumps( - { - "functions": [ - { - "identifier": "banana_function", - "start_line": 3, - "end_line": 8, - } - ], - "statements": [ - ( - 1, - { - "len": 0, - "line_surety_ancestorship": None, - "extra_connected_lines": [], - }, - ), - ( - 2, - { - "len": 0, - "line_surety_ancestorship": 1, - "extra_connected_lines": [], - }, - ), - ], - } - ), - ) - head_static_analysis = StaticAnalysisSuiteFactory.create( - commit__repository=repository - ) - base_static_analysis = StaticAnalysisSuiteFactory.create( - commit__repository=repository - ) - dbsession.add(head_static_analysis) - dbsession.add(base_static_analysis) - dbsession.flush() - service = StaticAnalysisComparisonService( - base_static_analysis=base_static_analysis, - head_static_analysis=head_static_analysis, - git_diff=[ - DiffChange( - before_filepath="path/changed.py", - after_filepath="path/changed.py", - change_type=DiffChangeType.modified, - lines_only_on_base=[], - lines_only_on_head=[20], - ), - ], - ) - assert service._analyze_single_change( - dbsession, - DiffChange( - before_filepath="path/changed.py", - after_filepath="path/changed.py", - change_type=DiffChangeType.modified, - lines_only_on_base=[], - lines_only_on_head=[20], - ), - changed_snapshot_base.content_location, - changed_snapshot_head.content_location, - ) == {"all": True, "lines": None} - assert service._analyze_single_change( - dbsession, - DiffChange( - before_filepath="path/changed.py", - after_filepath="path/changed.py", - change_type=DiffChangeType.modified, - lines_only_on_base=[], - lines_only_on_head=[11], - ), - changed_snapshot_base.content_location, - changed_snapshot_head.content_location, - ) == {"all": True, "lines": None} - assert service._analyze_single_change( - dbsession, - DiffChange( - before_filepath="path/changed.py", - after_filepath="path/changed.py", - change_type=DiffChangeType.modified, - lines_only_on_base=[], - lines_only_on_head=[99, 100], - ), - changed_snapshot_base.content_location, - changed_snapshot_head.content_location, - ) == {"all": True, "lines": None} - - def test_analyze_single_change_function_based(self, dbsession, mock_storage): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - changed_snapshot_base = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository - ) - changed_snapshot_head = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository - ) - dbsession.add_all( - [ - changed_snapshot_base, - changed_snapshot_head, - ] - ) - dbsession.flush() - mock_storage.write_file( - "archive", - changed_snapshot_base.content_location, - json.dumps( - { - "functions": [ - { - "identifier": "banana_function", - "start_line": 3, - "end_line": 8, - } - ], - "statements": [(1, {})], - } - ), - ) - mock_storage.write_file( - "archive", - changed_snapshot_head.content_location, - json.dumps( - { - "functions": [ - { - "identifier": "banana_function", - "start_line": 9, - "end_line": 11, - } - ], - "statements": [ - ( - 10, - { - "len": 1, - "extra_connected_lines": [20], - }, - ), - ( - 11, - { - "len": 0, - "line_surety_ancestorship": 10, - "extra_connected_lines": [], - }, - ), - (12, {"len": 1, "extra_connected_lines": []}), - ( - 18, - { - "len": 0, - "line_surety_ancestorship": 12, - "extra_connected_lines": [], - }, - ), - ], - } - ), - ) - head_static_analysis = StaticAnalysisSuiteFactory.create( - commit__repository=repository - ) - base_static_analysis = StaticAnalysisSuiteFactory.create( - commit__repository=repository - ) - dbsession.add(head_static_analysis) - dbsession.add(base_static_analysis) - dbsession.flush() - service = StaticAnalysisComparisonService( - base_static_analysis=base_static_analysis, - head_static_analysis=head_static_analysis, - git_diff=[ - DiffChange( - before_filepath="path/changed.py", - after_filepath="path/changed.py", - change_type=DiffChangeType.modified, - lines_only_on_base=[], - lines_only_on_head=[20], - ), - ], - ) - change = DiffChange( - before_filepath="path/changed.py", - after_filepath="path/changed.py", - change_type=DiffChangeType.modified, - lines_only_on_base=[], - lines_only_on_head=[20], - ) - assert service._analyze_single_change( - dbsession, - change, - changed_snapshot_base.content_location, - changed_snapshot_head.content_location, - ) == {"all": False, "lines": {3}} - - def test_analyze_single_change_no_static_analysis_found( - self, dbsession, mock_storage, mocker, sample_service - ): - mocked_load_snapshot = mocker.patch.object( - StaticAnalysisComparisonService, "_load_snapshot_data", return_value=None - ) - change = DiffChange( - before_filepath="path/changed.py", - after_filepath="path/changed.py", - change_type=DiffChangeType.modified, - lines_only_on_base=[], - lines_only_on_head=[20], - ) - first_location, second_location = mocker.MagicMock(), mocker.MagicMock() - assert ( - sample_service._analyze_single_change( - dbsession, - change, - first_location, - second_location, - ) - is None - ) - assert mocked_load_snapshot.call_count == 2 - mocked_load_snapshot.assert_any_call("path/changed.py", second_location) - mocked_load_snapshot.assert_any_call("path/changed.py", first_location) - - def test_analyze_single_change_function_based_no_function_found( - self, dbsession, mock_storage - ): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - changed_snapshot_base = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository - ) - changed_snapshot_head = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository - ) - dbsession.add_all( - [ - changed_snapshot_base, - changed_snapshot_head, - ] - ) - dbsession.flush() - mock_storage.write_file( - "archive", - changed_snapshot_base.content_location, - json.dumps( - { - "functions": [], - "statements": [(1, {})], - } - ), - ) - mock_storage.write_file( - "archive", - changed_snapshot_head.content_location, - json.dumps( - { - "functions": [ - { - "identifier": "banana_function", - "start_line": 9, - "end_line": 11, - } - ], - "statements": [ - ( - 10, - { - "len": 1, - "extra_connected_lines": [20], - }, - ), - ( - 11, - { - "len": 0, - "line_surety_ancestorship": 10, - "extra_connected_lines": [], - }, - ), - (12, {"len": 1, "extra_connected_lines": []}), - ( - 18, - { - "len": 0, - "line_surety_ancestorship": 12, - "extra_connected_lines": [], - }, - ), - ], - } - ), - ) - head_static_analysis = StaticAnalysisSuiteFactory.create( - commit__repository=repository - ) - base_static_analysis = StaticAnalysisSuiteFactory.create( - commit__repository=repository - ) - dbsession.add(head_static_analysis) - dbsession.add(base_static_analysis) - dbsession.flush() - service = StaticAnalysisComparisonService( - base_static_analysis=base_static_analysis, - head_static_analysis=head_static_analysis, - git_diff=[ - DiffChange( - before_filepath="path/changed.py", - after_filepath="path/changed.py", - change_type=DiffChangeType.modified, - lines_only_on_base=[], - lines_only_on_head=[20], - ), - ], - ) - change = DiffChange( - before_filepath="path/changed.py", - after_filepath="path/changed.py", - change_type=DiffChangeType.modified, - lines_only_on_base=[], - lines_only_on_head=[20], - ) - assert service._analyze_single_change( - dbsession, - change, - changed_snapshot_base.content_location, - changed_snapshot_head.content_location, - ) == {"all": True, "lines": None} diff --git a/tasks/__init__.py b/tasks/__init__.py index 04c246658..2530cf824 100644 --- a/tasks/__init__.py +++ b/tasks/__init__.py @@ -41,7 +41,6 @@ from tasks.save_commit_measurements import save_commit_measurements_task from tasks.save_report_results import save_report_results_task from tasks.send_email import send_email -from tasks.static_analysis_suite_check import static_analysis_suite_check_task from tasks.status_set_error import status_set_error_task from tasks.status_set_pending import status_set_pending_task from tasks.sync_pull import pull_sync_task diff --git a/tasks/label_analysis.py b/tasks/label_analysis.py deleted file mode 100644 index d26d1308b..000000000 --- a/tasks/label_analysis.py +++ /dev/null @@ -1,562 +0,0 @@ -import logging -from typing import Dict, List, NamedTuple, Optional, Set, Tuple, TypedDict, Union - -import sentry_sdk -from asgiref.sync import async_to_sync -from shared.celery_config import label_analysis_task_name -from shared.labelanalysis import LabelAnalysisRequestState -from sqlalchemy.orm import Session - -from app import celery_app -from database.models.labelanalysis import ( - LabelAnalysisProcessingError, - LabelAnalysisProcessingErrorCode, - LabelAnalysisRequest, -) -from database.models.staticanalysis import StaticAnalysisSuite -from helpers.labels import get_all_report_labels, get_labels_per_session -from helpers.metrics import metrics -from services.report import Report, ReportService -from services.report.report_builder import SpecialLabelsEnum -from services.repository import get_repo_provider_service -from services.static_analysis import StaticAnalysisComparisonService -from services.static_analysis.git_diff_parser import DiffChange, parse_git_diff_json -from services.yaml import get_repo_yaml -from tasks.base import BaseCodecovTask - -log = logging.getLogger(__name__) - - -GLOBAL_LEVEL_LABEL = ( - SpecialLabelsEnum.CODECOV_ALL_LABELS_PLACEHOLDER.corresponding_label -) - -GLOBAL_LEVEL_LABEL_IDX = ( - SpecialLabelsEnum.CODECOV_ALL_LABELS_PLACEHOLDER.corresponding_index -) - - -class LinesRelevantToChangeInFile(TypedDict): - all: bool - lines: Set[int] - - -class LinesRelevantToChange(TypedDict): - all: bool - files: Dict[str, Optional[LinesRelevantToChangeInFile]] - - -class ExistingLabelSetsEncoded(NamedTuple): - all_report_labels: Set[int] - executable_lines_labels: Set[int] - global_level_labels: Set[int] - are_labels_encoded: bool = True - - -class ExistingLabelSetsNotEncoded(NamedTuple): - all_report_labels: Set[str] - executable_lines_labels: Set[str] - global_level_labels: Set[str] - are_labels_encoded: bool = False - - -ExistingLabelSets = Union[ExistingLabelSetsEncoded, ExistingLabelSetsNotEncoded] -PossiblyEncodedLabelSet = Union[Set[str], Set[int]] - - -class LabelAnalysisRequestProcessingTask( - BaseCodecovTask, name=label_analysis_task_name -): - errors: List[LabelAnalysisProcessingError] = None - dbsession: Session = None - - def reset_task_context(self): - """Resets the task's attributes to None to avoid spilling information - between task calls in the same process. - https://docs.celeryq.dev/en/latest/userguide/tasks.html#instantiation - """ - self.errors = None - self.dbsession = None - - def run_impl(self, db_session, request_id, *args, **kwargs): - self.errors = [] - self.dbsession = db_session - label_analysis_request = ( - db_session.query(LabelAnalysisRequest) - .filter(LabelAnalysisRequest.id_ == request_id) - .first() - ) - if label_analysis_request is None: - metrics.incr("label_analysis_task.failed_to_calculate.larq_not_found") - log.error( - "LabelAnalysisRequest not found", extra=dict(request_id=request_id) - ) - self.add_processing_error( - larq_id=request_id, - error_code=LabelAnalysisProcessingErrorCode.NOT_FOUND, - error_msg="LabelAnalysisRequest not found", - error_extra=dict(), - ) - response = { - "success": False, - "present_report_labels": [], - "present_diff_labels": [], - "absent_labels": [], - "global_level_labels": [], - "errors": self.errors, - } - self.reset_task_context() - return response - log.info( - "Starting label analysis request", - extra=dict( - request_id=request_id, - external_id=label_analysis_request.external_id, - commit=label_analysis_request.head_commit.commitid, - ), - ) - - if label_analysis_request.state_id == LabelAnalysisRequestState.FINISHED.db_id: - # Indicates that this request has been calculated already - # We might need to update the requested labels - response = self._handle_larq_already_calculated(label_analysis_request) - self.reset_task_context() - return response - - try: - lines_relevant_to_diff: Optional[LinesRelevantToChange] = ( - self._get_lines_relevant_to_diff(label_analysis_request) - ) - base_report = self._get_base_report(label_analysis_request) - - if lines_relevant_to_diff and base_report: - existing_labels: ExistingLabelSets = self._get_existing_labels( - base_report, lines_relevant_to_diff - ) - if existing_labels.are_labels_encoded: - # Translate label_ids - def partial_fn_to_apply(label_id_set): - return self._lookup_label_ids( - report=base_report, label_ids=label_id_set - ) - - existing_labels = ExistingLabelSetsNotEncoded( - all_report_labels=partial_fn_to_apply( - existing_labels.all_report_labels - ), - executable_lines_labels=partial_fn_to_apply( - existing_labels.executable_lines_labels - ), - global_level_labels=partial_fn_to_apply( - existing_labels.global_level_labels - ), - are_labels_encoded=False, - ) - - requested_labels = self._get_requested_labels(label_analysis_request) - result = self.calculate_final_result( - requested_labels=requested_labels, - existing_labels=existing_labels, - commit_sha=label_analysis_request.head_commit.commitid, - ) - label_analysis_request.result = result - label_analysis_request.state_id = ( - LabelAnalysisRequestState.FINISHED.db_id - ) - metrics.incr("label_analysis_task.success") - response = { - "success": True, - "present_report_labels": result["present_report_labels"], - "present_diff_labels": result["present_diff_labels"], - "absent_labels": result["absent_labels"], - "global_level_labels": result["global_level_labels"], - "errors": self.errors, - } - self.reset_task_context() - return response - except Exception: - # temporary general catch while we find possible problems on this - metrics.incr("label_analysis_task.failed_to_calculate.exception") - log.exception( - "Label analysis failed to calculate", - extra=dict( - request_id=request_id, - commit=label_analysis_request.head_commit.commitid, - external_id=label_analysis_request.external_id, - ), - ) - label_analysis_request.result = None - label_analysis_request.state_id = LabelAnalysisRequestState.ERROR.db_id - self.add_processing_error( - larq_id=request_id, - error_code=LabelAnalysisProcessingErrorCode.FAILED, - error_msg="Failed to calculate", - error_extra=dict(), - ) - response = { - "success": False, - "present_report_labels": [], - "present_diff_labels": [], - "absent_labels": [], - "global_level_labels": [], - "errors": self.errors, - } - self.reset_task_context() - return response - metrics.incr("label_analysis_task.failed_to_calculate.missing_info") - log.warning( - "We failed to get some information that was important to label analysis", - extra=dict( - has_relevant_lines=(lines_relevant_to_diff is not None), - has_base_report=(base_report is not None), - commit=label_analysis_request.head_commit.commitid, - external_id=label_analysis_request.external_id, - request_id=request_id, - ), - ) - label_analysis_request.state_id = LabelAnalysisRequestState.FINISHED.db_id - result_to_save = { - "success": True, - "present_report_labels": [], - "present_diff_labels": [], - "absent_labels": label_analysis_request.requested_labels, - "global_level_labels": [], - } - label_analysis_request.result = result_to_save - result_to_return = {**result_to_save, "errors": self.errors} - self.reset_task_context() - return result_to_return - - def add_processing_error( - self, - larq_id: int, - error_code: LabelAnalysisProcessingErrorCode, - error_msg: str, - error_extra: dict, - ): - error = LabelAnalysisProcessingError( - label_analysis_request_id=larq_id, - error_code=error_code.value, - error_params=dict(message=error_msg, extra=error_extra), - ) - self.errors.append(error.to_representation()) - self.dbsession.add(error) - - def _handle_larq_already_calculated(self, larq: LabelAnalysisRequest): - # This means we already calculated everything - # Except possibly the absent labels - log.info( - "Label analysis request was already calculated", - extra=dict( - request_id=larq.id, - external_id=larq.external_id, - commit=larq.head_commit.commitid, - ), - ) - if larq.requested_labels: - saved_result = larq.result - all_saved_labels = set( - saved_result.get("present_report_labels", []) - + saved_result.get("present_diff_labels", []) - + saved_result.get("global_level_labels", []) - ) - executable_lines_saved_labels = set( - saved_result.get("present_diff_labels", []) - ) - global_saved_labels = set(saved_result.get("global_level_labels", [])) - result = self.calculate_final_result( - requested_labels=larq.requested_labels, - existing_labels=ExistingLabelSetsNotEncoded( - all_saved_labels, executable_lines_saved_labels, global_saved_labels - ), - commit_sha=larq.head_commit.commitid, - ) - larq.result = result # Save the new result - metrics.incr("label_analysis_task.already_calculated.new_result") - return {**result, "success": True, "errors": []} - # No requested labels mean we don't have any new information - # So we don't need to calculate again - # This shouldn't actually happen - metrics.incr("label_analysis_task.already_calculated.same_result") - return {**larq.result, "success": True, "errors": []} - - def _lookup_label_ids(self, report: Report, label_ids: Set[int]) -> Set[str]: - labels: Set[str] = set() - for label_id in label_ids: - # This can raise shared.reports.exceptions.LabelNotFoundError - # But (1) we shouldn't let that happen and (2) there's no recovering from it - # So we should let that happen to surface bugs to us - labels.add(report.lookup_label_by_id(label_id)) - return labels - - def _get_requested_labels(self, label_analysis_request: LabelAnalysisRequest): - if label_analysis_request.requested_labels: - return label_analysis_request.requested_labels - # This is the case where the CLI PATCH the requested labels after collecting them - self.dbsession.refresh(label_analysis_request, ["requested_labels"]) - return label_analysis_request.requested_labels - - @sentry_sdk.trace - def _get_existing_labels( - self, report: Report, lines_relevant_to_diff: LinesRelevantToChange - ) -> ExistingLabelSets: - all_report_labels = self.get_all_report_labels(report) - ( - executable_lines_labels, - global_level_labels, - ) = self.get_executable_lines_labels(report, lines_relevant_to_diff) - - if len(all_report_labels) > 0: - # Check if report labels are encoded or not - test_label = all_report_labels.pop() - are_labels_encoded = isinstance(test_label, int) - all_report_labels.add(test_label) - else: - # There are no labels in the report - are_labels_encoded = False - - class_to_use = ( - ExistingLabelSetsEncoded - if are_labels_encoded - else ExistingLabelSetsNotEncoded - ) - - return class_to_use( - all_report_labels=all_report_labels, - executable_lines_labels=executable_lines_labels, - global_level_labels=global_level_labels, - ) - - @sentry_sdk.trace - def _get_lines_relevant_to_diff(self, label_analysis_request: LabelAnalysisRequest): - parsed_git_diff = self._get_parsed_git_diff(label_analysis_request) - if parsed_git_diff: - executable_lines_relevant_to_diff = self.get_relevant_executable_lines( - label_analysis_request, parsed_git_diff - ) - # This line will be useful for debugging - # And to tweak the heuristics - log.info( - "Lines relevant to diff", - extra=dict( - lines_relevant_to_diff=executable_lines_relevant_to_diff, - commit=label_analysis_request.head_commit.commitid, - external_id=label_analysis_request.external_id, - request_id=label_analysis_request.id_, - ), - ) - return executable_lines_relevant_to_diff - return None - - @sentry_sdk.trace - def _get_parsed_git_diff( - self, label_analysis_request: LabelAnalysisRequest - ) -> Optional[List[DiffChange]]: - try: - repo_service = get_repo_provider_service( - label_analysis_request.head_commit.repository - ) - git_diff = async_to_sync(repo_service.get_compare)( - label_analysis_request.base_commit.commitid, - label_analysis_request.head_commit.commitid, - ) - return list(parse_git_diff_json(git_diff)) - except Exception: - # temporary general catch while we find possible problems on this - log.exception( - "Label analysis failed to parse git diff", - extra=dict( - request_id=label_analysis_request.id, - external_id=label_analysis_request.external_id, - commit=label_analysis_request.head_commit.commitid, - ), - ) - self.add_processing_error( - larq_id=label_analysis_request.id, - error_code=LabelAnalysisProcessingErrorCode.FAILED, - error_msg="Failed to parse git diff", - error_extra=dict( - head_commit=label_analysis_request.head_commit.commitid, - base_commit=label_analysis_request.base_commit.commitid, - ), - ) - return None - - @sentry_sdk.trace - def _get_base_report( - self, label_analysis_request: LabelAnalysisRequest - ) -> Optional[Report]: - base_commit = label_analysis_request.base_commit - current_yaml = get_repo_yaml(base_commit.repository) - report_service = ReportService(current_yaml) - report: Report = report_service.get_existing_report_for_commit(base_commit) - if report is None: - log.warning( - "No report found for label analysis", - extra=dict( - request_id=label_analysis_request.id, - commit=label_analysis_request.head_commit.commitid, - ), - ) - self.add_processing_error( - larq_id=label_analysis_request.id, - error_code=LabelAnalysisProcessingErrorCode.MISSING_DATA, - error_msg="Missing base report", - error_extra=dict( - head_commit=label_analysis_request.head_commit.commitid, - base_commit=label_analysis_request.base_commit.commitid, - ), - ) - return report - - @sentry_sdk.trace - def calculate_final_result( - self, - *, - requested_labels: Optional[List[str]], - existing_labels: ExistingLabelSetsNotEncoded, - commit_sha: str, - ): - all_report_labels = existing_labels.all_report_labels - executable_lines_labels = existing_labels.executable_lines_labels - global_level_labels = existing_labels.global_level_labels - log.info( - "Final info", - extra=dict( - executable_lines_labels=sorted(executable_lines_labels), - all_report_labels=all_report_labels, - requested_labels=requested_labels, - global_level_labels=sorted(global_level_labels), - commit=commit_sha, - ), - ) - if requested_labels is not None: - requested_labels = set(requested_labels) - ans = { - "present_report_labels": sorted(all_report_labels & requested_labels), - "present_diff_labels": sorted( - executable_lines_labels & requested_labels - ), - "absent_labels": sorted(requested_labels - all_report_labels), - "global_level_labels": sorted(global_level_labels & requested_labels), - } - return ans - return { - "present_report_labels": sorted(all_report_labels), - "present_diff_labels": sorted(executable_lines_labels), - "absent_labels": [], - "global_level_labels": sorted(global_level_labels), - } - - @sentry_sdk.trace - def get_relevant_executable_lines( - self, label_analysis_request: LabelAnalysisRequest, parsed_git_diff - ): - db_session = label_analysis_request.get_db_session() - base_static_analysis: StaticAnalysisSuite = ( - db_session.query(StaticAnalysisSuite) - .filter( - StaticAnalysisSuite.commit_id == label_analysis_request.base_commit_id, - ) - .first() - ) - head_static_analysis: StaticAnalysisSuite = ( - db_session.query(StaticAnalysisSuite) - .filter( - StaticAnalysisSuite.commit_id == label_analysis_request.head_commit_id, - ) - .first() - ) - if not base_static_analysis or not head_static_analysis: - # TODO : Proper handling of this case - log.info( - "Trying to make prediction where there are no static analyses", - extra=dict( - base_static_analysis=base_static_analysis.id_ - if base_static_analysis is not None - else None, - head_static_analysis=head_static_analysis.id_ - if head_static_analysis is not None - else None, - commit=label_analysis_request.head_commit.commitid, - ), - ) - self.add_processing_error( - larq_id=label_analysis_request.id, - error_code=LabelAnalysisProcessingErrorCode.MISSING_DATA, - error_msg="Missing static analysis info", - error_extra=dict( - head_commit=label_analysis_request.head_commit.commitid, - base_commit=label_analysis_request.base_commit.commitid, - has_base_static_analysis=(base_static_analysis is not None), - has_head_static_analysis=(head_static_analysis is not None), - ), - ) - return None - static_analysis_comparison_service = StaticAnalysisComparisonService( - base_static_analysis, - head_static_analysis, - parsed_git_diff, - ) - return static_analysis_comparison_service.get_base_lines_relevant_to_change() - - @sentry_sdk.trace - def get_executable_lines_labels( - self, report: Report, executable_lines: LinesRelevantToChange - ) -> Tuple[PossiblyEncodedLabelSet, PossiblyEncodedLabelSet]: - if executable_lines["all"]: - return (self.get_all_report_labels(report), set()) - full_sessions = set() - labels: PossiblyEncodedLabelSet = set() - global_level_labels = set() - # Prime piece of code to be rust-ifyied - for name, file_executable_lines in executable_lines["files"].items(): - rf = report.get(name) - if rf and file_executable_lines: - if file_executable_lines["all"]: - for line_number, line in rf.lines: - if line and line.datapoints: - for datapoint in line.datapoints: - dp_labels = datapoint.label_ids or [] - labels.update(dp_labels) - if ( - # If labels are encoded - GLOBAL_LEVEL_LABEL_IDX in dp_labels - # If labels are NOT encoded - or GLOBAL_LEVEL_LABEL in dp_labels - ): - full_sessions.add(datapoint.sessionid) - else: - for line_number in file_executable_lines["lines"]: - line = rf.get(line_number) - if line and line.datapoints: - for datapoint in line.datapoints: - dp_labels = datapoint.label_ids or [] - labels.update(dp_labels) - if ( - # If labels are encoded - GLOBAL_LEVEL_LABEL_IDX in dp_labels - # If labels are NOT encoded - or GLOBAL_LEVEL_LABEL in dp_labels - ): - full_sessions.add(datapoint.sessionid) - for sess_id in full_sessions: - global_level_labels.update(self.get_labels_per_session(report, sess_id)) - return ( - labels - set([GLOBAL_LEVEL_LABEL_IDX, GLOBAL_LEVEL_LABEL]), - global_level_labels, - ) - - def get_labels_per_session(self, report: Report, sess_id: int): - return get_labels_per_session(report, sess_id) - - def get_all_report_labels(self, report: Report) -> set: - return get_all_report_labels(report) - - -RegisteredLabelAnalysisRequestProcessingTask = celery_app.register_task( - LabelAnalysisRequestProcessingTask() -) -label_analysis_task = celery_app.tasks[ - RegisteredLabelAnalysisRequestProcessingTask.name -] diff --git a/tasks/static_analysis_suite_check.py b/tasks/static_analysis_suite_check.py deleted file mode 100644 index 10168431b..000000000 --- a/tasks/static_analysis_suite_check.py +++ /dev/null @@ -1,75 +0,0 @@ -import logging -from typing import Optional - -from shared.celery_config import static_analysis_task_name -from shared.staticanalysis import StaticAnalysisSingleFileSnapshotState -from shared.storage.exceptions import FileNotInStorageError - -from app import celery_app -from database.models.staticanalysis import ( - StaticAnalysisSingleFileSnapshot, - StaticAnalysisSuite, - StaticAnalysisSuiteFilepath, -) -from services.archive import ArchiveService -from tasks.base import BaseCodecovTask - -log = logging.getLogger(__name__) - - -class StaticAnalysisSuiteCheckTask(BaseCodecovTask, name=static_analysis_task_name): - def run_impl( - self, - db_session, - *, - suite_id, - **kwargs, - ): - suite: Optional[StaticAnalysisSuite] = ( - db_session.query(StaticAnalysisSuite).filter_by(id_=suite_id).first() - ) - if suite is None: - log.warning("Checking Static Analysis that does not exist yet") - return {"successful": False, "changed_count": None} - log.info("Checking static analysis suite", extra=dict(suite_id=suite_id)) - query = ( - db_session.query( - StaticAnalysisSingleFileSnapshot, - StaticAnalysisSingleFileSnapshot.content_location, - ) - .join( - StaticAnalysisSuiteFilepath, - StaticAnalysisSuiteFilepath.file_snapshot_id - == StaticAnalysisSingleFileSnapshot.id_, - ) - .filter( - StaticAnalysisSuiteFilepath.analysis_suite_id == suite_id, - StaticAnalysisSingleFileSnapshot.state_id - == StaticAnalysisSingleFileSnapshotState.CREATED.db_id, - ) - ) - archive_service = ArchiveService(suite.commit.repository) - # purposefully iteration when an update would suffice, - # because we actually want to validate different stuff - changed_count = 0 - for elem, content_location in query: - try: - _ = archive_service.read_file(content_location) - elem.state_id = StaticAnalysisSingleFileSnapshotState.VALID.db_id - changed_count += 1 - except FileNotInStorageError: - log.warning( - "File not found to be analyzed", - extra=dict(filepath_id=elem.id, suite_id=suite_id), - ) - - db_session.commit() - return {"successful": True, "changed_count": changed_count} - - -RegisteredStaticAnalysisSuiteCheckTask = celery_app.register_task( - StaticAnalysisSuiteCheckTask() -) -static_analysis_suite_check_task = celery_app.tasks[ - RegisteredStaticAnalysisSuiteCheckTask.name -] diff --git a/tasks/tests/unit/test_check_static_analysis.py b/tasks/tests/unit/test_check_static_analysis.py deleted file mode 100644 index 418d3401e..000000000 --- a/tasks/tests/unit/test_check_static_analysis.py +++ /dev/null @@ -1,89 +0,0 @@ -from shared.staticanalysis import StaticAnalysisSingleFileSnapshotState - -from database.tests.factories.staticanalysis import ( - StaticAnalysisSuiteFactory, - StaticAnalysisSuiteFilepathFactory, -) -from tasks.static_analysis_suite_check import StaticAnalysisSuiteCheckTask - - -class TestStaticAnalysisCheckTask(object): - def test_simple_call_no_object_saved(self, dbsession): - task = StaticAnalysisSuiteCheckTask() - res = task.run_impl(dbsession, suite_id=987654321 * 7) - assert res == {"changed_count": None, "successful": False} - - def test_simple_call_with_suite_all_created( - self, dbsession, mock_storage, mock_configuration, mocker - ): - obj = StaticAnalysisSuiteFactory.create() - dbsession.add(obj) - dbsession.flush() - task = StaticAnalysisSuiteCheckTask() - for i in range(8): - fp_obj = StaticAnalysisSuiteFilepathFactory.create( - analysis_suite=obj, - file_snapshot__state_id=StaticAnalysisSingleFileSnapshotState.CREATED.db_id, - ) - mock_storage.write_file( - mock_configuration.params["services"]["minio"]["bucket"], - fp_obj.file_snapshot.content_location, - "aaaa", - ) - dbsession.add(fp_obj) - # adding one without writing - fp_obj = StaticAnalysisSuiteFilepathFactory.create( - analysis_suite=obj, - file_snapshot__state_id=StaticAnalysisSingleFileSnapshotState.CREATED.db_id, - ) - dbsession.add(fp_obj) - dbsession.flush() - res = task.run_impl(dbsession, suite_id=obj.id_) - assert res == {"changed_count": 8, "successful": True} - - def test_simple_call_with_suite_mix_from_other( - self, dbsession, mock_storage, mock_configuration, mocker - ): - obj = StaticAnalysisSuiteFactory.create() - another_obj_same_repo = StaticAnalysisSuiteFactory.create( - commit__repository=obj.commit.repository - ) - dbsession.add(obj) - dbsession.flush() - task = StaticAnalysisSuiteCheckTask() - for i in range(17): - fp_obj = StaticAnalysisSuiteFilepathFactory.create( - analysis_suite=another_obj_same_repo, - file_snapshot__state_id=StaticAnalysisSingleFileSnapshotState.CREATED.db_id, - ) - mock_storage.write_file( - mock_configuration.params["services"]["minio"]["bucket"], - fp_obj.file_snapshot.content_location, - "aaaa", - ) - dbsession.add(fp_obj) - for i in range(23): - fp_obj = StaticAnalysisSuiteFilepathFactory.create( - analysis_suite=obj, - file_snapshot__state_id=StaticAnalysisSingleFileSnapshotState.CREATED.db_id, - ) - mock_storage.write_file( - mock_configuration.params["services"]["minio"]["bucket"], - fp_obj.file_snapshot.content_location, - "aaaa", - ) - dbsession.add(fp_obj) - for i in range(2): - fp_obj = StaticAnalysisSuiteFilepathFactory.create( - analysis_suite=obj, - file_snapshot__state_id=StaticAnalysisSingleFileSnapshotState.VALID.db_id, - ) - mock_storage.write_file( - mock_configuration.params["services"]["minio"]["bucket"], - fp_obj.file_snapshot.content_location, - "aaaa", - ) - dbsession.add(fp_obj) - dbsession.flush() - res = task.run_impl(dbsession, suite_id=obj.id_) - assert res == {"changed_count": 23, "successful": True} diff --git a/tasks/tests/unit/test_label_analysis.py b/tasks/tests/unit/test_label_analysis.py deleted file mode 100644 index 5ebae7a28..000000000 --- a/tasks/tests/unit/test_label_analysis.py +++ /dev/null @@ -1,964 +0,0 @@ -import json - -import pytest -from mock import patch -from shared.reports.reportfile import ReportFile -from shared.reports.resources import Report -from shared.reports.types import CoverageDatapoint, LineSession, ReportLine - -from database.models.labelanalysis import LabelAnalysisRequest -from database.tests.factories import RepositoryFactory -from database.tests.factories.labelanalysis import LabelAnalysisRequestFactory -from database.tests.factories.staticanalysis import ( - StaticAnalysisSingleFileSnapshotFactory, - StaticAnalysisSuiteFactory, - StaticAnalysisSuiteFilepathFactory, -) -from services.report import ReportService -from services.static_analysis import StaticAnalysisComparisonService -from tasks.label_analysis import ( - LabelAnalysisRequestProcessingTask, - LabelAnalysisRequestState, -) - -sample_head_static_analysis_dict = { - "empty_lines": [2, 3, 11], - "warnings": [], - "filename": "source.py", - "functions": [ - { - "identifier": "some_function", - "start_line": 6, - "end_line": 10, - "code_hash": "e69c18eff7d24f8bad3370db87f64333", - "complexity_metrics": { - "conditions": 1, - "mccabe_cyclomatic_complexity": 2, - "returns": 1, - "max_nested_conditional": 1, - }, - } - ], - "hash": "84d371ab1c57d2349038ac3671428803", - "language": "python", - "number_lines": 11, - "statements": [ - ( - 1, - { - "line_surety_ancestorship": None, - "start_column": 0, - "line_hash": "55c30cf01e202728b6952e9cba304798", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 5, - { - "line_surety_ancestorship": None, - "start_column": 4, - "line_hash": "1d7be9f2145760a59513a4049fcd0d1c", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 6, - { - "line_surety_ancestorship": 5, - "start_column": 4, - "line_hash": "f802087a854c26782ee8d4ece7214425", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 7, - { - "line_surety_ancestorship": None, - "start_column": 8, - "line_hash": "6ae3393fa7880fe8a844c03256cac37b", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 8, - { - "line_surety_ancestorship": 6, - "start_column": 4, - "line_hash": "5b099d1822e9236c540a5701a657225e", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 9, - { - "line_surety_ancestorship": 8, - "start_column": 4, - "line_hash": "e5d4915bb7dddeb18f53dc9fde9a3064", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 10, - { - "line_surety_ancestorship": 9, - "start_column": 4, - "line_hash": "e70ce43136171575ee525375b10f91a1", - "len": 0, - "extra_connected_lines": (), - }, - ), - ], - "definition_lines": [(4, 6)], - "import_lines": [], -} - -sample_base_static_analysis_dict = { - "empty_lines": [2, 3, 11], - "warnings": [], - "filename": "source.py", - "functions": [ - { - "identifier": "some_function", - "start_line": 6, - "end_line": 10, - "code_hash": "e4b52b6da12184142fcd7ff2c8412662", - "complexity_metrics": { - "conditions": 1, - "mccabe_cyclomatic_complexity": 2, - "returns": 1, - "max_nested_conditional": 1, - }, - } - ], - "hash": "811d0016249a5b1400a685164e5295de", - "language": "python", - "number_lines": 11, - "statements": [ - ( - 1, - { - "line_surety_ancestorship": None, - "start_column": 0, - "line_hash": "55c30cf01e202728b6952e9cba304798", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 5, - { - "line_surety_ancestorship": None, - "start_column": 4, - "line_hash": "1d7be9f2145760a59513a4049fcd0d1c", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 6, - { - "line_surety_ancestorship": 5, - "start_column": 4, - "line_hash": "52f98812dca4687f18373b87433df695", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 7, - { - "line_surety_ancestorship": None, - "start_column": 8, - "line_hash": "6ae3393fa7880fe8a844c03256cac37b", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 8, - { - "line_surety_ancestorship": 7, - "start_column": 8, - "line_hash": "5b099d1822e9236c540a5701a657225e", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 9, - { - "line_surety_ancestorship": 6, - "start_column": 4, - "line_hash": "e5d4915bb7dddeb18f53dc9fde9a3064", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 10, - { - "line_surety_ancestorship": 9, - "start_column": 4, - "line_hash": "e70ce43136171575ee525375b10f91a1", - "len": 0, - "extra_connected_lines": (), - }, - ), - ], - "definition_lines": [(4, 6)], - "import_lines": [], -} - - -@pytest.fixture -def sample_report_with_labels(): - r = Report() - first_rf = ReportFile("source.py") - first_rf.append( - 5, - ReportLine.create( - coverage=1, - type=None, - sessions=[ - ( - LineSession( - id=1, - coverage=1, - ) - ) - ], - datapoints=[ - CoverageDatapoint( - sessionid=1, - coverage=1, - coverage_type=None, - label_ids=["apple", "label_one", "pineapple", "banana"], - ) - ], - complexity=None, - ), - ) - first_rf.append( - 6, - ReportLine.create( - coverage=1, - type=None, - sessions=[ - ( - LineSession( - id=1, - coverage=1, - ) - ) - ], - datapoints=[ - CoverageDatapoint( - sessionid=1, - coverage=1, - coverage_type=None, - label_ids=["label_one", "pineapple", "banana"], - ) - ], - complexity=None, - ), - ) - first_rf.append( - 7, - ReportLine.create( - coverage=1, - type=None, - sessions=[ - ( - LineSession( - id=1, - coverage=1, - ) - ) - ], - datapoints=[ - CoverageDatapoint( - sessionid=1, - coverage=1, - coverage_type=None, - label_ids=["banana"], - ) - ], - complexity=None, - ), - ) - first_rf.append( - 8, - ReportLine.create( - coverage=1, - type=None, - sessions=[ - ( - LineSession( - id=1, - coverage=1, - ) - ) - ], - datapoints=[ - CoverageDatapoint( - sessionid=1, - coverage=1, - coverage_type=None, - label_ids=["banana"], - ), - CoverageDatapoint( - sessionid=5, - coverage=1, - coverage_type=None, - label_ids=["orangejuice"], - ), - ], - complexity=None, - ), - ) - first_rf.append( - 99, - ReportLine.create( - coverage=1, - type=None, - sessions=[ - ( - LineSession( - id=5, - coverage=1, - ) - ) - ], - datapoints=[ - CoverageDatapoint( - sessionid=5, - coverage=1, - coverage_type=None, - label_ids=["justjuice"], - ), - ], - complexity=None, - ), - ) - first_rf.append( - 8, - ReportLine.create( - coverage=1, - type=None, - sessions=[ - ( - LineSession( - id=1, - coverage=1, - ) - ) - ], - datapoints=[ - CoverageDatapoint( - sessionid=1, - coverage=1, - coverage_type=None, - label_ids=["label_one", "pineapple", "banana"], - ), - CoverageDatapoint( - sessionid=5, - coverage=1, - coverage_type=None, - label_ids=["Th2dMtk4M_codecov", "applejuice"], - ), - ], - complexity=None, - ), - ) - second_rf = ReportFile("path/from/additionsonly.py") - second_rf.append( - 6, - ReportLine.create( - coverage=1, - type=None, - sessions=[ - ( - LineSession( - id=1, - coverage=1, - ) - ) - ], - datapoints=[ - CoverageDatapoint( - sessionid=1, - coverage=1, - coverage_type=None, - label_ids=["whatever", "here"], - ) - ], - complexity=None, - ), - ) - random_rf = ReportFile("path/from/randomfile_no_static_analysis.html") - random_rf.append( - 1, - ReportLine.create( - coverage=1, - type=None, - sessions=[(LineSession(id=1, coverage=1))], - datapoints=None, - complexity=None, - ), - ) - r.append(first_rf) - r.append(second_rf) - r.append(random_rf) - - return r - - -def test_simple_call_without_requested_labels_then_with_requested_labels( - dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider -): - mock_metrics = mocker.patch("tasks.label_analysis.metrics") - mocker.patch.object( - LabelAnalysisRequestProcessingTask, - "_get_lines_relevant_to_diff", - return_value={ - "all": False, - "files": {"source.py": {"all": False, "lines": {8, 6}}}, - }, - ) - mocker.patch.object( - ReportService, - "get_existing_report_for_commit", - return_value=sample_report_with_labels, - ) - repository = RepositoryFactory.create() - larf = LabelAnalysisRequestFactory.create( - base_commit__repository=repository, head_commit__repository=repository - ) - dbsession.add(larf) - dbsession.flush() - base_sasf = StaticAnalysisSuiteFactory.create(commit=larf.base_commit) - head_sasf = StaticAnalysisSuiteFactory.create(commit=larf.head_commit) - dbsession.add(base_sasf) - dbsession.add(head_sasf) - dbsession.flush() - first_path = "abdkasdauchudh.txt" - second_path = "0diao9u3qdsdu.txt" - mock_storage.write_file( - "archive", - first_path, - json.dumps(sample_base_static_analysis_dict), - ) - mock_storage.write_file( - "archive", - second_path, - json.dumps(sample_head_static_analysis_dict), - ) - first_snapshot = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository, content_location=first_path - ) - second_snapshot = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository, content_location=second_path - ) - dbsession.add(first_snapshot) - dbsession.add(second_snapshot) - dbsession.flush() - first_base_file = StaticAnalysisSuiteFilepathFactory.create( - file_snapshot=first_snapshot, - analysis_suite=base_sasf, - filepath="source.py", - ) - first_head_file = StaticAnalysisSuiteFilepathFactory.create( - file_snapshot=second_snapshot, - analysis_suite=head_sasf, - filepath="source.py", - ) - dbsession.add(first_base_file) - dbsession.add(first_head_file) - dbsession.flush() - - task = LabelAnalysisRequestProcessingTask() - res = task.run_impl(dbsession, larf.id) - expected_present_report_labels = [ - "apple", - "applejuice", - "banana", - "here", - "justjuice", - "label_one", - "orangejuice", - "pineapple", - "whatever", - ] - expected_present_diff_labels = sorted( - ["applejuice", "banana", "label_one", "orangejuice", "pineapple"] - ) - expected_result = { - "absent_labels": [], - "present_diff_labels": expected_present_diff_labels, - "present_report_labels": expected_present_report_labels, - "global_level_labels": ["applejuice", "justjuice", "orangejuice"], - "success": True, - "errors": [], - } - assert res == expected_result - mock_metrics.incr.assert_called_with("label_analysis_task.success") - dbsession.flush() - dbsession.refresh(larf) - assert larf.state_id == LabelAnalysisRequestState.FINISHED.db_id - assert larf.result == { - "absent_labels": [], - "present_diff_labels": expected_present_diff_labels, - "present_report_labels": expected_present_report_labels, - "global_level_labels": ["applejuice", "justjuice", "orangejuice"], - } - # Now we call the task again, this time with the requested labels. - # This illustrates what should happen if we patch the labels after calculating - # And trigger the task again to save the new results - larf.requested_labels = ["tangerine", "pear", "banana", "apple"] - dbsession.flush() - res = task.run_impl(dbsession, larf.id) - expected_present_diff_labels = ["banana"] - expected_present_report_labels = ["apple", "banana"] - expected_absent_labels = ["pear", "tangerine"] - assert res == { - "absent_labels": expected_absent_labels, - "present_diff_labels": expected_present_diff_labels, - "present_report_labels": expected_present_report_labels, - "success": True, - "global_level_labels": [], - "errors": [], - } - assert larf.result == { - "absent_labels": expected_absent_labels, - "present_diff_labels": expected_present_diff_labels, - "present_report_labels": expected_present_report_labels, - "global_level_labels": [], - } - mock_metrics.incr.assert_called_with( - "label_analysis_task.already_calculated.new_result" - ) - - -def test_simple_call_with_requested_labels( - dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider -): - mock_metrics = mocker.patch("tasks.label_analysis.metrics") - mocker.patch.object( - LabelAnalysisRequestProcessingTask, - "_get_lines_relevant_to_diff", - return_value={ - "all": False, - "files": {"source.py": {"all": False, "lines": {8, 6}}}, - }, - ) - mocker.patch.object( - ReportService, - "get_existing_report_for_commit", - return_value=sample_report_with_labels, - ) - larf = LabelAnalysisRequestFactory.create( - requested_labels=["tangerine", "pear", "banana", "apple"] - ) - dbsession.add(larf) - dbsession.flush() - task = LabelAnalysisRequestProcessingTask() - res = task.run_impl(dbsession, larf.id) - expected_present_diff_labels = ["banana"] - expected_present_report_labels = ["apple", "banana"] - expected_absent_labels = ["pear", "tangerine"] - assert res == { - "absent_labels": expected_absent_labels, - "present_diff_labels": expected_present_diff_labels, - "present_report_labels": expected_present_report_labels, - "success": True, - "global_level_labels": [], - "errors": [], - } - dbsession.flush() - dbsession.refresh(larf) - assert larf.state_id == LabelAnalysisRequestState.FINISHED.db_id - assert larf.result == { - "absent_labels": expected_absent_labels, - "present_diff_labels": expected_present_diff_labels, - "present_report_labels": expected_present_report_labels, - "global_level_labels": [], - } - mock_metrics.incr.assert_called_with("label_analysis_task.success") - - -def test_get_requested_labels(dbsession, mocker): - larf = LabelAnalysisRequestFactory.create(requested_labels=[]) - - def side_effect(*args, **kwargs): - larf.requested_labels = ["tangerine", "pear", "banana", "apple"] - - mock_refresh = mocker.patch.object(dbsession, "refresh", side_effect=side_effect) - dbsession.add(larf) - dbsession.flush() - task = LabelAnalysisRequestProcessingTask() - task.dbsession = dbsession - labels = task._get_requested_labels(larf) - mock_refresh.assert_called() - assert labels == ["tangerine", "pear", "banana", "apple"] - - -def test_call_label_analysis_no_request_object(dbsession, mocker): - task = LabelAnalysisRequestProcessingTask() - mock_metrics = mocker.patch("tasks.label_analysis.metrics") - res = task.run_impl(db_session=dbsession, request_id=-1) - assert res == { - "success": False, - "present_report_labels": [], - "present_diff_labels": [], - "absent_labels": [], - "global_level_labels": [], - "errors": [ - { - "error_code": "not found", - "error_params": { - "extra": {}, - "message": "LabelAnalysisRequest not found", - }, - } - ], - } - mock_metrics.incr.assert_called_with( - "label_analysis_task.failed_to_calculate.larq_not_found" - ) - - -def test_get_executable_lines_labels_all_labels(sample_report_with_labels): - executable_lines = {"all": True} - task = LabelAnalysisRequestProcessingTask() - assert task.get_executable_lines_labels( - sample_report_with_labels, executable_lines - ) == ( - { - "banana", - "justjuice", - "here", - "pineapple", - "applejuice", - "apple", - "whatever", - "label_one", - "orangejuice", - }, - set(), - ) - assert task.get_executable_lines_labels( - sample_report_with_labels, executable_lines - ) == (task.get_all_report_labels(sample_report_with_labels), set()) - - -def test_get_executable_lines_labels_all_labels_in_one_file(sample_report_with_labels): - executable_lines = {"all": False, "files": {"source.py": {"all": True}}} - task = LabelAnalysisRequestProcessingTask() - assert task.get_executable_lines_labels( - sample_report_with_labels, executable_lines - ) == ( - { - "apple", - "justjuice", - "applejuice", - "label_one", - "banana", - "orangejuice", - "pineapple", - }, - {"orangejuice", "justjuice", "applejuice"}, - ) - - -def test_get_executable_lines_labels_some_labels_in_one_file(sample_report_with_labels): - executable_lines = { - "all": False, - "files": {"source.py": {"all": False, "lines": set([5, 6])}}, - } - task = LabelAnalysisRequestProcessingTask() - assert task.get_executable_lines_labels( - sample_report_with_labels, executable_lines - ) == ( - {"apple", "label_one", "pineapple", "banana"}, - set(), - ) - - -def test_get_executable_lines_labels_some_labels_in_one_file_with_globals( - sample_report_with_labels, -): - executable_lines = { - "all": False, - "files": {"source.py": {"all": False, "lines": set([6, 8])}}, - } - task = LabelAnalysisRequestProcessingTask() - assert task.get_executable_lines_labels( - sample_report_with_labels, executable_lines - ) == ( - {"label_one", "pineapple", "banana", "orangejuice", "applejuice"}, - {"applejuice", "justjuice", "orangejuice"}, - ) - - -def test_get_executable_lines_labels_some_labels_in_one_file_other_null( - sample_report_with_labels, -): - executable_lines = { - "all": False, - "files": { - "source.py": {"all": False, "lines": set([5, 6])}, - "path/from/randomfile_no_static_analysis.html": None, - }, - } - task = LabelAnalysisRequestProcessingTask() - assert task.get_executable_lines_labels( - sample_report_with_labels, executable_lines - ) == ( - {"apple", "label_one", "pineapple", "banana"}, - set(), - ) - - -def test_get_all_labels_one_session(sample_report_with_labels): - task = LabelAnalysisRequestProcessingTask() - assert task.get_labels_per_session(sample_report_with_labels, 1) == { - "apple", - "banana", - "here", - "label_one", - "pineapple", - "whatever", - } - assert task.get_labels_per_session(sample_report_with_labels, 2) == set() - assert task.get_labels_per_session(sample_report_with_labels, 5) == { - "orangejuice", - "justjuice", - "applejuice", - } - - -def test_get_relevant_executable_lines_nothing_found(dbsession, mocker): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - larf = LabelAnalysisRequestFactory.create( - base_commit__repository=repository, head_commit__repository=repository - ) - dbsession.add(larf) - dbsession.flush() - task = LabelAnalysisRequestProcessingTask() - task.errors = [] - task.dbsession = dbsession - parsed_git_diff = [] - assert task.get_relevant_executable_lines(larf, parsed_git_diff) is None - - -def test_get_relevant_executable_lines_with_static_analyses(dbsession, mocker): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - larf = LabelAnalysisRequestFactory.create( - base_commit__repository=repository, head_commit__repository=repository - ) - dbsession.add(larf) - dbsession.flush() - base_sasf = StaticAnalysisSuiteFactory.create(commit=larf.base_commit) - head_sasf = StaticAnalysisSuiteFactory.create(commit=larf.head_commit) - dbsession.add(base_sasf) - dbsession.add(head_sasf) - dbsession.flush() - task = LabelAnalysisRequestProcessingTask() - parsed_git_diff = [] - mocked_res = mocker.patch.object( - StaticAnalysisComparisonService, "get_base_lines_relevant_to_change" - ) - assert ( - task.get_relevant_executable_lines(larf, parsed_git_diff) - == mocked_res.return_value - ) - - -def test_run_impl_with_error( - dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider -): - mock_metrics = mocker.patch("tasks.label_analysis.metrics") - mocker.patch.object( - LabelAnalysisRequestProcessingTask, - "_get_lines_relevant_to_diff", - side_effect=Exception("Oh no"), - ) - larf = LabelAnalysisRequestFactory.create( - requested_labels=["tangerine", "pear", "banana", "apple"] - ) - dbsession.add(larf) - dbsession.flush() - task = LabelAnalysisRequestProcessingTask() - res = task.run_impl(dbsession, larf.id) - expected_result = { - "absent_labels": [], - "present_diff_labels": [], - "present_report_labels": [], - "success": False, - "global_level_labels": [], - "errors": [ - { - "error_code": "failed", - "error_params": {"extra": {}, "message": "Failed to calculate"}, - } - ], - } - assert res == expected_result - dbsession.flush() - dbsession.refresh(larf) - assert larf.state_id == LabelAnalysisRequestState.ERROR.db_id - assert larf.result is None - mock_metrics.incr.assert_called_with( - "label_analysis_task.failed_to_calculate.exception" - ) - - -def test_calculate_result_no_report( - dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider -): - mock_metrics = mocker.patch("tasks.label_analysis.metrics") - larf: LabelAnalysisRequest = LabelAnalysisRequestFactory.create( - # This being not-ordered is important in the test - # TO make sure we go through the warning at the bottom of run_impl - requested_labels=["tangerine", "pear", "banana", "apple"] - ) - dbsession.add(larf) - dbsession.flush() - mocker.patch.object( - ReportService, - "get_existing_report_for_commit", - return_value=None, - ) - mocker.patch.object( - LabelAnalysisRequestProcessingTask, - "_get_lines_relevant_to_diff", - return_value=(set(), set(), set()), - ) - task = LabelAnalysisRequestProcessingTask() - res = task.run_impl(dbsession, larf.id) - assert res == { - "success": True, - "absent_labels": larf.requested_labels, - "present_diff_labels": [], - "present_report_labels": [], - "global_level_labels": [], - "errors": [ - { - "error_code": "missing data", - "error_params": { - "extra": { - "base_commit": larf.base_commit.commitid, - "head_commit": larf.head_commit.commitid, - }, - "message": "Missing base report", - }, - } - ], - } - mock_metrics.incr.assert_called_with( - "label_analysis_task.failed_to_calculate.missing_info" - ) - - -@patch("tasks.label_analysis.parse_git_diff_json", return_value=["parsed_git_diff"]) -def test__get_parsed_git_diff(mock_parse_diff, dbsession, mock_repo_provider): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - larq = LabelAnalysisRequestFactory.create( - base_commit__repository=repository, head_commit__repository=repository - ) - dbsession.add(larq) - dbsession.flush() - mock_repo_provider.get_compare.return_value = {"diff": "json"} - task = LabelAnalysisRequestProcessingTask() - task.errors = [] - parsed_diff = task._get_parsed_git_diff(larq) - assert parsed_diff == ["parsed_git_diff"] - mock_parse_diff.assert_called_with({"diff": "json"}) - mock_repo_provider.get_compare.assert_called_with( - larq.base_commit.commitid, larq.head_commit.commitid - ) - - -@patch("tasks.label_analysis.parse_git_diff_json", return_value=["parsed_git_diff"]) -def test__get_parsed_git_diff_error(mock_parse_diff, dbsession, mock_repo_provider): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - larq = LabelAnalysisRequestFactory.create( - base_commit__repository=repository, head_commit__repository=repository - ) - dbsession.add(larq) - dbsession.flush() - mock_repo_provider.get_compare.side_effect = Exception("Oh no") - task = LabelAnalysisRequestProcessingTask() - task.errors = [] - task.dbsession = dbsession - parsed_diff = task._get_parsed_git_diff(larq) - assert parsed_diff is None - mock_parse_diff.assert_not_called() - mock_repo_provider.get_compare.assert_called_with( - larq.base_commit.commitid, larq.head_commit.commitid - ) - - -@patch( - "tasks.label_analysis.LabelAnalysisRequestProcessingTask.get_relevant_executable_lines", - return_value=[{"all": False, "files": {}}], -) -@patch( - "tasks.label_analysis.LabelAnalysisRequestProcessingTask._get_parsed_git_diff", - return_value=["parsed_git_diff"], -) -def test__get_lines_relevant_to_diff( - mock_parse_diff, mock_get_relevant_lines, dbsession -): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - larq = LabelAnalysisRequestFactory.create( - base_commit__repository=repository, head_commit__repository=repository - ) - dbsession.add(larq) - dbsession.flush() - task = LabelAnalysisRequestProcessingTask() - lines = task._get_lines_relevant_to_diff(larq) - assert lines == [{"all": False, "files": {}}] - mock_parse_diff.assert_called_with(larq) - mock_get_relevant_lines.assert_called_with(larq, ["parsed_git_diff"]) - - -@patch( - "tasks.label_analysis.LabelAnalysisRequestProcessingTask.get_relevant_executable_lines" -) -@patch( - "tasks.label_analysis.LabelAnalysisRequestProcessingTask._get_parsed_git_diff", - return_value=None, -) -def test__get_lines_relevant_to_diff_error( - mock_parse_diff, mock_get_relevant_lines, dbsession -): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - larq = LabelAnalysisRequestFactory.create( - base_commit__repository=repository, head_commit__repository=repository - ) - dbsession.add(larq) - dbsession.flush() - task = LabelAnalysisRequestProcessingTask() - lines = task._get_lines_relevant_to_diff(larq) - assert lines is None - mock_parse_diff.assert_called_with(larq) - mock_get_relevant_lines.assert_not_called() diff --git a/tasks/tests/unit/test_label_analysis_encoded_labels.py b/tasks/tests/unit/test_label_analysis_encoded_labels.py deleted file mode 100644 index 56d78984c..000000000 --- a/tasks/tests/unit/test_label_analysis_encoded_labels.py +++ /dev/null @@ -1,1010 +0,0 @@ -import json - -import pytest -from mock import MagicMock, patch -from shared.reports.reportfile import ReportFile -from shared.reports.resources import Report -from shared.reports.types import CoverageDatapoint, LineSession, ReportLine - -from database.models.labelanalysis import LabelAnalysisRequest -from database.tests.factories import RepositoryFactory -from database.tests.factories.core import ReportFactory -from database.tests.factories.labelanalysis import LabelAnalysisRequestFactory -from database.tests.factories.staticanalysis import ( - StaticAnalysisSingleFileSnapshotFactory, - StaticAnalysisSuiteFactory, - StaticAnalysisSuiteFilepathFactory, -) -from helpers.labels import SpecialLabelsEnum -from services.report import ReportService -from services.static_analysis import StaticAnalysisComparisonService -from tasks.label_analysis import ( - ExistingLabelSetsNotEncoded, - LabelAnalysisRequestProcessingTask, - LabelAnalysisRequestState, -) - -sample_head_static_analysis_dict = { - "empty_lines": [2, 3, 11], - "warnings": [], - "filename": "source.py", - "functions": [ - { - "identifier": "some_function", - "start_line": 6, - "end_line": 10, - "code_hash": "e69c18eff7d24f8bad3370db87f64333", - "complexity_metrics": { - "conditions": 1, - "mccabe_cyclomatic_complexity": 2, - "returns": 1, - "max_nested_conditional": 1, - }, - } - ], - "hash": "84d371ab1c57d2349038ac3671428803", - "language": "python", - "number_lines": 11, - "statements": [ - ( - 1, - { - "line_surety_ancestorship": None, - "start_column": 0, - "line_hash": "55c30cf01e202728b6952e9cba304798", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 5, - { - "line_surety_ancestorship": None, - "start_column": 4, - "line_hash": "1d7be9f2145760a59513a4049fcd0d1c", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 6, - { - "line_surety_ancestorship": 5, - "start_column": 4, - "line_hash": "f802087a854c26782ee8d4ece7214425", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 7, - { - "line_surety_ancestorship": None, - "start_column": 8, - "line_hash": "6ae3393fa7880fe8a844c03256cac37b", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 8, - { - "line_surety_ancestorship": 6, - "start_column": 4, - "line_hash": "5b099d1822e9236c540a5701a657225e", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 9, - { - "line_surety_ancestorship": 8, - "start_column": 4, - "line_hash": "e5d4915bb7dddeb18f53dc9fde9a3064", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 10, - { - "line_surety_ancestorship": 9, - "start_column": 4, - "line_hash": "e70ce43136171575ee525375b10f91a1", - "len": 0, - "extra_connected_lines": (), - }, - ), - ], - "definition_lines": [(4, 6)], - "import_lines": [], -} - -sample_base_static_analysis_dict = { - "empty_lines": [2, 3, 11], - "warnings": [], - "filename": "source.py", - "functions": [ - { - "identifier": "some_function", - "start_line": 6, - "end_line": 10, - "code_hash": "e4b52b6da12184142fcd7ff2c8412662", - "complexity_metrics": { - "conditions": 1, - "mccabe_cyclomatic_complexity": 2, - "returns": 1, - "max_nested_conditional": 1, - }, - } - ], - "hash": "811d0016249a5b1400a685164e5295de", - "language": "python", - "number_lines": 11, - "statements": [ - ( - 1, - { - "line_surety_ancestorship": None, - "start_column": 0, - "line_hash": "55c30cf01e202728b6952e9cba304798", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 5, - { - "line_surety_ancestorship": None, - "start_column": 4, - "line_hash": "1d7be9f2145760a59513a4049fcd0d1c", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 6, - { - "line_surety_ancestorship": 5, - "start_column": 4, - "line_hash": "52f98812dca4687f18373b87433df695", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 7, - { - "line_surety_ancestorship": None, - "start_column": 8, - "line_hash": "6ae3393fa7880fe8a844c03256cac37b", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 8, - { - "line_surety_ancestorship": 7, - "start_column": 8, - "line_hash": "5b099d1822e9236c540a5701a657225e", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 9, - { - "line_surety_ancestorship": 6, - "start_column": 4, - "line_hash": "e5d4915bb7dddeb18f53dc9fde9a3064", - "len": 0, - "extra_connected_lines": (), - }, - ), - ( - 10, - { - "line_surety_ancestorship": 9, - "start_column": 4, - "line_hash": "e70ce43136171575ee525375b10f91a1", - "len": 0, - "extra_connected_lines": (), - }, - ), - ], - "definition_lines": [(4, 6)], - "import_lines": [], -} - - -@pytest.fixture -def sample_report_with_labels(): - r = Report() - report_labels_index = { - 0: SpecialLabelsEnum.CODECOV_ALL_LABELS_PLACEHOLDER.corresponding_label, - 1: "apple", - 2: "label_one", - 3: "pineapple", - 4: "banana", - 5: "orangejuice", - 6: "justjuice", - 7: "whatever", - 8: "here", - 9: "applejuice", - } - first_rf = ReportFile("source.py") - first_rf.append( - 5, - ReportLine.create( - coverage=1, - type=None, - sessions=[ - ( - LineSession( - id=1, - coverage=1, - ) - ) - ], - datapoints=[ - CoverageDatapoint( - sessionid=1, - coverage=1, - coverage_type=None, - label_ids=[1, 2, 3, 4], - ) - ], - complexity=None, - ), - ) - first_rf.append( - 6, - ReportLine.create( - coverage=1, - type=None, - sessions=[ - ( - LineSession( - id=1, - coverage=1, - ) - ) - ], - datapoints=[ - CoverageDatapoint( - sessionid=1, - coverage=1, - coverage_type=None, - label_ids=[2, 3, 4], - ) - ], - complexity=None, - ), - ) - first_rf.append( - 7, - ReportLine.create( - coverage=1, - type=None, - sessions=[ - ( - LineSession( - id=1, - coverage=1, - ) - ) - ], - datapoints=[ - CoverageDatapoint( - sessionid=1, - coverage=1, - coverage_type=None, - label_ids=[4], - ) - ], - complexity=None, - ), - ) - first_rf.append( - 8, - ReportLine.create( - coverage=1, - type=None, - sessions=[ - ( - LineSession( - id=1, - coverage=1, - ) - ) - ], - datapoints=[ - CoverageDatapoint( - sessionid=1, - coverage=1, - coverage_type=None, - label_ids=[4], - ), - CoverageDatapoint( - sessionid=5, - coverage=1, - coverage_type=None, - label_ids=[5], - ), - ], - complexity=None, - ), - ) - first_rf.append( - 99, - ReportLine.create( - coverage=1, - type=None, - sessions=[ - ( - LineSession( - id=5, - coverage=1, - ) - ) - ], - datapoints=[ - CoverageDatapoint( - sessionid=5, - coverage=1, - coverage_type=None, - label_ids=[6], - ), - ], - complexity=None, - ), - ) - first_rf.append( - 8, - ReportLine.create( - coverage=1, - type=None, - sessions=[ - ( - LineSession( - id=1, - coverage=1, - ) - ) - ], - datapoints=[ - CoverageDatapoint( - sessionid=1, - coverage=1, - coverage_type=None, - label_ids=[2, 3, 4], - ), - CoverageDatapoint( - sessionid=5, - coverage=1, - coverage_type=None, - label_ids=[0, 9], - ), - ], - complexity=None, - ), - ) - second_rf = ReportFile("path/from/additionsonly.py") - second_rf.append( - 6, - ReportLine.create( - coverage=1, - type=None, - sessions=[ - ( - LineSession( - id=1, - coverage=1, - ) - ) - ], - datapoints=[ - CoverageDatapoint( - sessionid=1, - coverage=1, - coverage_type=None, - label_ids=[7, 8], - ) - ], - complexity=None, - ), - ) - random_rf = ReportFile("path/from/randomfile_no_static_analysis.html") - random_rf.append( - 1, - ReportLine.create( - coverage=1, - type=None, - sessions=[(LineSession(id=1, coverage=1))], - datapoints=None, - complexity=None, - ), - ) - r.append(first_rf) - r.append(second_rf) - r.append(random_rf) - r.labels_index = report_labels_index - return r - - -def test_simple_call_without_requested_labels_then_with_requested_labels( - dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider -): - mock_metrics = mocker.patch("tasks.label_analysis.metrics") - mocker.patch.object( - LabelAnalysisRequestProcessingTask, - "_get_lines_relevant_to_diff", - return_value={ - "all": False, - "files": {"source.py": {"all": False, "lines": {8, 6}}}, - }, - ) - mocker.patch.object( - ReportService, - "get_existing_report_for_commit", - return_value=sample_report_with_labels, - ) - repository = RepositoryFactory.create() - larf = LabelAnalysisRequestFactory.create( - base_commit__repository=repository, head_commit__repository=repository - ) - dbsession.add(larf) - dbsession.flush() - base_sasf = StaticAnalysisSuiteFactory.create(commit=larf.base_commit) - head_sasf = StaticAnalysisSuiteFactory.create(commit=larf.head_commit) - dbsession.add(base_sasf) - dbsession.add(head_sasf) - dbsession.flush() - first_path = "abdkasdauchudh.txt" - second_path = "0diao9u3qdsdu.txt" - mock_storage.write_file( - "archive", - first_path, - json.dumps(sample_base_static_analysis_dict), - ) - mock_storage.write_file( - "archive", - second_path, - json.dumps(sample_head_static_analysis_dict), - ) - first_snapshot = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository, content_location=first_path - ) - second_snapshot = StaticAnalysisSingleFileSnapshotFactory.create( - repository=repository, content_location=second_path - ) - dbsession.add(first_snapshot) - dbsession.add(second_snapshot) - dbsession.flush() - first_base_file = StaticAnalysisSuiteFilepathFactory.create( - file_snapshot=first_snapshot, - analysis_suite=base_sasf, - filepath="source.py", - ) - first_head_file = StaticAnalysisSuiteFilepathFactory.create( - file_snapshot=second_snapshot, - analysis_suite=head_sasf, - filepath="source.py", - ) - dbsession.add(first_base_file) - dbsession.add(first_head_file) - dbsession.flush() - - task = LabelAnalysisRequestProcessingTask() - assert sample_report_with_labels.labels_index is not None - res = task.run_impl(dbsession, larf.id) - expected_present_report_labels = [ - "apple", - "applejuice", - "banana", - "here", - "justjuice", - "label_one", - "orangejuice", - "pineapple", - "whatever", - ] - expected_present_diff_labels = sorted( - ["applejuice", "banana", "label_one", "orangejuice", "pineapple"] - ) - expected_result = { - "absent_labels": [], - "present_diff_labels": expected_present_diff_labels, - "present_report_labels": expected_present_report_labels, - "global_level_labels": ["applejuice", "justjuice", "orangejuice"], - "success": True, - "errors": [], - } - assert res == expected_result - mock_metrics.incr.assert_called_with("label_analysis_task.success") - # It's zero because the report has the _labels_index already - dbsession.flush() - dbsession.refresh(larf) - assert larf.state_id == LabelAnalysisRequestState.FINISHED.db_id - assert larf.result == { - "absent_labels": [], - "present_diff_labels": expected_present_diff_labels, - "present_report_labels": expected_present_report_labels, - "global_level_labels": ["applejuice", "justjuice", "orangejuice"], - } - # Now we call the task again, this time with the requested labels. - # This illustrates what should happen if we patch the labels after calculating - # And trigger the task again to save the new results - larf.requested_labels = ["tangerine", "pear", "banana", "apple"] - dbsession.flush() - res = task.run_impl(dbsession, larf.id) - expected_present_diff_labels = ["banana"] - expected_present_report_labels = ["apple", "banana"] - expected_absent_labels = ["pear", "tangerine"] - assert res == { - "absent_labels": expected_absent_labels, - "present_diff_labels": expected_present_diff_labels, - "present_report_labels": expected_present_report_labels, - "success": True, - "global_level_labels": [], - "errors": [], - } - assert larf.result == { - "absent_labels": expected_absent_labels, - "present_diff_labels": expected_present_diff_labels, - "present_report_labels": expected_present_report_labels, - "global_level_labels": [], - } - mock_metrics.incr.assert_called_with( - "label_analysis_task.already_calculated.new_result" - ) - mock_metrics.incr.assert_called_with( - "label_analysis_task.already_calculated.new_result" - ) - - -def test_simple_call_with_requested_labels( - dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider -): - mock_metrics = mocker.patch("tasks.label_analysis.metrics") - mocker.patch.object( - LabelAnalysisRequestProcessingTask, - "_get_lines_relevant_to_diff", - return_value={ - "all": False, - "files": {"source.py": {"all": False, "lines": {8, 6}}}, - }, - ) - mocker.patch.object( - ReportService, - "get_existing_report_for_commit", - return_value=sample_report_with_labels, - ) - larf = LabelAnalysisRequestFactory.create( - requested_labels=["tangerine", "pear", "banana", "apple"] - ) - ReportFactory(commit=larf.base_commit) - dbsession.add(larf) - dbsession.flush() - task = LabelAnalysisRequestProcessingTask() - res = task.run_impl(dbsession, larf.id) - expected_present_diff_labels = ["banana"] - expected_present_report_labels = ["apple", "banana"] - expected_absent_labels = ["pear", "tangerine"] - assert res == { - "absent_labels": expected_absent_labels, - "present_diff_labels": expected_present_diff_labels, - "present_report_labels": expected_present_report_labels, - "success": True, - "global_level_labels": [], - "errors": [], - } - dbsession.flush() - dbsession.refresh(larf) - assert larf.state_id == LabelAnalysisRequestState.FINISHED.db_id - assert larf.result == { - "absent_labels": expected_absent_labels, - "present_diff_labels": expected_present_diff_labels, - "present_report_labels": expected_present_report_labels, - "global_level_labels": [], - } - mock_metrics.incr.assert_called_with("label_analysis_task.success") - mock_metrics.incr.assert_called_with("label_analysis_task.success") - - -def test_get_requested_labels(dbsession, mocker): - larf = LabelAnalysisRequestFactory.create(requested_labels=[]) - - def side_effect(*args, **kwargs): - larf.requested_labels = ["tangerine", "pear", "banana", "apple"] - - mock_refresh = mocker.patch.object(dbsession, "refresh", side_effect=side_effect) - dbsession.add(larf) - dbsession.flush() - task = LabelAnalysisRequestProcessingTask() - task.dbsession = dbsession - labels = task._get_requested_labels(larf) - mock_refresh.assert_called() - assert labels == ["tangerine", "pear", "banana", "apple"] - - -def test_call_label_analysis_no_request_object(dbsession, mocker): - task = LabelAnalysisRequestProcessingTask() - mock_metrics = mocker.patch("tasks.label_analysis.metrics") - res = task.run_impl(db_session=dbsession, request_id=-1) - assert res == { - "success": False, - "present_report_labels": [], - "present_diff_labels": [], - "absent_labels": [], - "global_level_labels": [], - "errors": [ - { - "error_code": "not found", - "error_params": { - "extra": {}, - "message": "LabelAnalysisRequest not found", - }, - } - ], - } - mock_metrics.incr.assert_called_with( - "label_analysis_task.failed_to_calculate.larq_not_found" - ) - - -def test_get_executable_lines_labels_all_labels(sample_report_with_labels): - executable_lines = {"all": True} - task = LabelAnalysisRequestProcessingTask() - assert task.get_executable_lines_labels( - sample_report_with_labels, executable_lines - ) == ( - { - 4, - 6, - 8, - 3, - 9, - 1, - 7, - 2, - 5, - }, - set(), - ) - assert task.get_executable_lines_labels( - sample_report_with_labels, executable_lines - ) == (task.get_all_report_labels(sample_report_with_labels), set()) - - -def test_get_executable_lines_labels_all_labels_in_one_file(sample_report_with_labels): - executable_lines = {"all": False, "files": {"source.py": {"all": True}}} - task = LabelAnalysisRequestProcessingTask() - assert task.get_executable_lines_labels( - sample_report_with_labels, executable_lines - ) == ( - { - 1, - 6, - 9, - 2, - 4, - 5, - 3, - }, - {5, 6, 9}, - ) - - -def test_get_executable_lines_labels_some_labels_in_one_file(sample_report_with_labels): - executable_lines = { - "all": False, - "files": {"source.py": {"all": False, "lines": set([5, 6])}}, - } - task = LabelAnalysisRequestProcessingTask() - assert task.get_executable_lines_labels( - sample_report_with_labels, executable_lines - ) == ( - {1, 2, 3, 4}, - set(), - ) - - -def test_get_executable_lines_labels_some_labels_in_one_file_with_globals( - sample_report_with_labels, -): - executable_lines = { - "all": False, - "files": {"source.py": {"all": False, "lines": set([6, 8])}}, - } - task = LabelAnalysisRequestProcessingTask() - assert task.get_executable_lines_labels( - sample_report_with_labels, executable_lines - ) == ( - {2, 3, 4, 5, 9}, - {9, 6, 5}, - ) - - -def test_get_executable_lines_labels_some_labels_in_one_file_other_null( - sample_report_with_labels, -): - executable_lines = { - "all": False, - "files": { - "source.py": {"all": False, "lines": set([5, 6])}, - "path/from/randomfile_no_static_analysis.html": None, - }, - } - task = LabelAnalysisRequestProcessingTask() - assert task.get_executable_lines_labels( - sample_report_with_labels, executable_lines - ) == ( - {1, 2, 3, 4}, - set(), - ) - - -def test_get_all_labels_one_session(sample_report_with_labels): - task = LabelAnalysisRequestProcessingTask() - assert task.get_labels_per_session(sample_report_with_labels, 1) == { - 1, - 4, - 8, - 2, - 3, - 7, - } - assert task.get_labels_per_session(sample_report_with_labels, 2) == set() - assert task.get_labels_per_session(sample_report_with_labels, 5) == { - 5, - 6, - 9, - } - - -def test_get_relevant_executable_lines_nothing_found(dbsession, mocker): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - larf = LabelAnalysisRequestFactory.create( - base_commit__repository=repository, head_commit__repository=repository - ) - dbsession.add(larf) - dbsession.flush() - task = LabelAnalysisRequestProcessingTask() - task.errors = [] - task.dbsession = dbsession - parsed_git_diff = [] - assert task.get_relevant_executable_lines(larf, parsed_git_diff) is None - - -def test_get_relevant_executable_lines_with_static_analyses(dbsession, mocker): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - larf = LabelAnalysisRequestFactory.create( - base_commit__repository=repository, head_commit__repository=repository - ) - dbsession.add(larf) - dbsession.flush() - base_sasf = StaticAnalysisSuiteFactory.create(commit=larf.base_commit) - head_sasf = StaticAnalysisSuiteFactory.create(commit=larf.head_commit) - dbsession.add(base_sasf) - dbsession.add(head_sasf) - dbsession.flush() - task = LabelAnalysisRequestProcessingTask() - parsed_git_diff = [] - mocked_res = mocker.patch.object( - StaticAnalysisComparisonService, "get_base_lines_relevant_to_change" - ) - assert ( - task.get_relevant_executable_lines(larf, parsed_git_diff) - == mocked_res.return_value - ) - - -def test_run_impl_with_error( - dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider -): - mock_metrics = mocker.patch("tasks.label_analysis.metrics") - mocker.patch.object( - LabelAnalysisRequestProcessingTask, - "_get_lines_relevant_to_diff", - side_effect=Exception("Oh no"), - ) - larf = LabelAnalysisRequestFactory.create( - requested_labels=["tangerine", "pear", "banana", "apple"] - ) - dbsession.add(larf) - dbsession.flush() - task = LabelAnalysisRequestProcessingTask() - res = task.run_impl(dbsession, larf.id) - expected_result = { - "absent_labels": [], - "present_diff_labels": [], - "present_report_labels": [], - "success": False, - "global_level_labels": [], - "errors": [ - { - "error_code": "failed", - "error_params": {"extra": {}, "message": "Failed to calculate"}, - } - ], - } - assert res == expected_result - dbsession.flush() - dbsession.refresh(larf) - assert larf.state_id == LabelAnalysisRequestState.ERROR.db_id - assert larf.result is None - mock_metrics.incr.assert_called_with( - "label_analysis_task.failed_to_calculate.exception" - ) - - -def test_calculate_result_no_report( - dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider -): - mock_metrics = mocker.patch("tasks.label_analysis.metrics") - larf: LabelAnalysisRequest = LabelAnalysisRequestFactory.create( - # This being not-ordered is important in the test - # TO make sure we go through the warning at the bottom of run_impl - requested_labels=["tangerine", "pear", "banana", "apple"] - ) - dbsession.add(larf) - dbsession.flush() - mocker.patch.object( - ReportService, - "get_existing_report_for_commit", - return_value=None, - ) - mocker.patch.object( - LabelAnalysisRequestProcessingTask, - "_get_lines_relevant_to_diff", - return_value=(set(), set(), set()), - ) - task = LabelAnalysisRequestProcessingTask() - res = task.run_impl(dbsession, larf.id) - assert res == { - "success": True, - "absent_labels": larf.requested_labels, - "present_diff_labels": [], - "present_report_labels": [], - "global_level_labels": [], - "errors": [ - { - "error_code": "missing data", - "error_params": { - "extra": { - "base_commit": larf.base_commit.commitid, - "head_commit": larf.head_commit.commitid, - }, - "message": "Missing base report", - }, - } - ], - } - mock_metrics.incr.assert_called_with( - "label_analysis_task.failed_to_calculate.missing_info" - ) - - -@patch("tasks.label_analysis.parse_git_diff_json", return_value=["parsed_git_diff"]) -def test__get_parsed_git_diff(mock_parse_diff, dbsession, mock_repo_provider): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - larq = LabelAnalysisRequestFactory.create( - base_commit__repository=repository, head_commit__repository=repository - ) - dbsession.add(larq) - dbsession.flush() - mock_repo_provider.get_compare.return_value = {"diff": "json"} - task = LabelAnalysisRequestProcessingTask() - task.errors = [] - parsed_diff = task._get_parsed_git_diff(larq) - assert parsed_diff == ["parsed_git_diff"] - mock_parse_diff.assert_called_with({"diff": "json"}) - mock_repo_provider.get_compare.assert_called_with( - larq.base_commit.commitid, larq.head_commit.commitid - ) - - -@patch("tasks.label_analysis.parse_git_diff_json", return_value=["parsed_git_diff"]) -def test__get_parsed_git_diff_error(mock_parse_diff, dbsession, mock_repo_provider): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - larq = LabelAnalysisRequestFactory.create( - base_commit__repository=repository, head_commit__repository=repository - ) - dbsession.add(larq) - dbsession.flush() - mock_repo_provider.get_compare.side_effect = Exception("Oh no") - task = LabelAnalysisRequestProcessingTask() - task.errors = [] - task.dbsession = dbsession - parsed_diff = task._get_parsed_git_diff(larq) - assert parsed_diff is None - mock_parse_diff.assert_not_called() - mock_repo_provider.get_compare.assert_called_with( - larq.base_commit.commitid, larq.head_commit.commitid - ) - - -@patch( - "tasks.label_analysis.LabelAnalysisRequestProcessingTask.get_relevant_executable_lines", - return_value=[{"all": False, "files": {}}], -) -@patch( - "tasks.label_analysis.LabelAnalysisRequestProcessingTask._get_parsed_git_diff", - return_value=["parsed_git_diff"], -) -def test__get_lines_relevant_to_diff( - mock_parse_diff, mock_get_relevant_lines, dbsession -): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - larq = LabelAnalysisRequestFactory.create( - base_commit__repository=repository, head_commit__repository=repository - ) - dbsession.add(larq) - dbsession.flush() - task = LabelAnalysisRequestProcessingTask() - lines = task._get_lines_relevant_to_diff(larq) - assert lines == [{"all": False, "files": {}}] - mock_parse_diff.assert_called_with(larq) - mock_get_relevant_lines.assert_called_with(larq, ["parsed_git_diff"]) - - -@patch( - "tasks.label_analysis.LabelAnalysisRequestProcessingTask.get_relevant_executable_lines" -) -@patch( - "tasks.label_analysis.LabelAnalysisRequestProcessingTask._get_parsed_git_diff", - return_value=None, -) -def test__get_lines_relevant_to_diff_error( - mock_parse_diff, mock_get_relevant_lines, dbsession -): - repository = RepositoryFactory.create() - dbsession.add(repository) - dbsession.flush() - larq = LabelAnalysisRequestFactory.create( - base_commit__repository=repository, head_commit__repository=repository - ) - dbsession.add(larq) - dbsession.flush() - task = LabelAnalysisRequestProcessingTask() - lines = task._get_lines_relevant_to_diff(larq) - assert lines is None - mock_parse_diff.assert_called_with(larq) - mock_get_relevant_lines.assert_not_called() - - -@patch( - "tasks.label_analysis.LabelAnalysisRequestProcessingTask.get_all_report_labels", - return_value=set(), -) -@patch( - "tasks.label_analysis.LabelAnalysisRequestProcessingTask.get_executable_lines_labels", - return_value=(set(), set()), -) -def test___get_existing_labels_no_labels_in_report( - mock_get_executable_lines_labels, mock_get_all_report_labels -): - report = MagicMock(name="fake_report") - lines_relevant = MagicMock(name="fake_lines_relevant_to_diff") - task = LabelAnalysisRequestProcessingTask() - res = task._get_existing_labels(report, lines_relevant) - expected = ExistingLabelSetsNotEncoded( - all_report_labels=set(), - executable_lines_labels=set(), - global_level_labels=set(), - ) - assert isinstance(res, ExistingLabelSetsNotEncoded) - assert res == expected