diff --git a/celery_task_router.py b/celery_task_router.py
index 1d33b5958..8f4f8ee1c 100644
--- a/celery_task_router.py
+++ b/celery_task_router.py
@@ -4,8 +4,6 @@
from database.engine import get_db_session
from database.models.core import Commit, CompareCommit, Owner, Repository
-from database.models.labelanalysis import LabelAnalysisRequest
-from database.models.staticanalysis import StaticAnalysisSuite
def _get_user_plan_from_ownerid(db_session, ownerid, *args, **kwargs) -> str:
@@ -45,34 +43,6 @@ def _get_user_plan_from_comparison_id(dbsession, comparison_id, *args, **kwargs)
return DEFAULT_FREE_PLAN
-def _get_user_plan_from_label_request_id(dbsession, request_id, *args, **kwargs) -> str:
- result = (
- dbsession.query(Owner.plan)
- .join(LabelAnalysisRequest.head_commit)
- .join(Commit.repository)
- .join(Repository.owner)
- .filter(LabelAnalysisRequest.id_ == request_id)
- .first()
- )
- if result:
- return result.plan
- return DEFAULT_FREE_PLAN
-
-
-def _get_user_plan_from_suite_id(dbsession, suite_id, *args, **kwargs) -> str:
- result = (
- dbsession.query(Owner.plan)
- .join(StaticAnalysisSuite.commit)
- .join(Commit.repository)
- .join(Repository.owner)
- .filter(StaticAnalysisSuite.id_ == suite_id)
- .first()
- )
- if result:
- return result.plan
- return DEFAULT_FREE_PLAN
-
-
def _get_user_plan_from_task(dbsession, task_name: str, task_kwargs: dict) -> str:
owner_plan_lookup_funcs = {
# from ownerid
@@ -96,10 +66,6 @@ def _get_user_plan_from_task(dbsession, task_name: str, task_kwargs: dict) -> st
shared_celery_config.manual_upload_completion_trigger_task_name: _get_user_plan_from_repoid,
# from comparison_id
shared_celery_config.compute_comparison_task_name: _get_user_plan_from_comparison_id,
- # from label_request_id
- shared_celery_config.label_analysis_task_name: _get_user_plan_from_label_request_id,
- # from suite_id
- shared_celery_config.static_analysis_task_name: _get_user_plan_from_suite_id,
}
func_to_use = owner_plan_lookup_funcs.get(
task_name, lambda *args, **kwargs: DEFAULT_FREE_PLAN
diff --git a/services/static_analysis/__init__.py b/services/static_analysis/__init__.py
deleted file mode 100644
index a9672e0d4..000000000
--- a/services/static_analysis/__init__.py
+++ /dev/null
@@ -1,188 +0,0 @@
-import json
-import logging
-import typing
-
-import sentry_sdk
-from shared.storage.exceptions import FileNotInStorageError
-
-from database.models.staticanalysis import (
- StaticAnalysisSingleFileSnapshot,
- StaticAnalysisSuite,
- StaticAnalysisSuiteFilepath,
-)
-from services.archive import ArchiveService
-from services.static_analysis.git_diff_parser import DiffChange, DiffChangeType
-from services.static_analysis.single_file_analyzer import (
- AntecessorFindingResult,
- SingleFileSnapshotAnalyzer,
-)
-
-log = logging.getLogger(__name__)
-
-
-def _get_analysis_content_mapping(analysis: StaticAnalysisSuite, filepaths):
- db_session = analysis.get_db_session()
- return dict(
- db_session.query(
- StaticAnalysisSuiteFilepath.filepath,
- StaticAnalysisSingleFileSnapshot.content_location,
- )
- .join(
- StaticAnalysisSuiteFilepath,
- StaticAnalysisSuiteFilepath.file_snapshot_id
- == StaticAnalysisSingleFileSnapshot.id_,
- )
- .filter(
- StaticAnalysisSuiteFilepath.filepath.in_(filepaths),
- StaticAnalysisSuiteFilepath.analysis_suite_id == analysis.id_,
- )
- )
-
-
-class StaticAnalysisComparisonService(object):
- def __init__(
- self,
- base_static_analysis: StaticAnalysisSuite,
- head_static_analysis: StaticAnalysisSuite,
- git_diff: typing.List[DiffChange],
- ):
- self._base_static_analysis = base_static_analysis
- self._head_static_analysis = head_static_analysis
- self._git_diff = git_diff
- self._archive_service = None
-
- @property
- def archive_service(self):
- if self._archive_service is None:
- self._archive_service = ArchiveService(
- self._base_static_analysis.commit.repository
- )
- return self._archive_service
-
- @sentry_sdk.trace
- def get_base_lines_relevant_to_change(self) -> typing.List[typing.Dict]:
- final_result = {"all": False, "files": {}}
- db_session = self._base_static_analysis.get_db_session()
- head_analysis_content_locations_mapping = _get_analysis_content_mapping(
- self._head_static_analysis,
- [
- change.after_filepath
- for change in self._git_diff
- if change.after_filepath
- ],
- )
- base_analysis_content_locations_mapping = _get_analysis_content_mapping(
- self._base_static_analysis,
- [
- change.before_filepath
- for change in self._git_diff
- if change.before_filepath
- ],
- )
- # @giovanni-guidini 2023-06-14
- # NOTE: Maybe we can paralelize this bit.
- # There's some level of IO involved.
- for change in self._git_diff:
- # This check should happen way earlier
- if change.change_type == DiffChangeType.new:
- return {"all": True}
- final_result["files"][change.before_filepath] = self._analyze_single_change(
- db_session,
- change,
- base_analysis_content_locations_mapping.get(change.before_filepath),
- head_analysis_content_locations_mapping.get(change.after_filepath),
- )
- return final_result
-
- def _load_snapshot_data(
- self, filepath, content_location
- ) -> typing.Optional[SingleFileSnapshotAnalyzer]:
- if not content_location:
- return None
- try:
- return SingleFileSnapshotAnalyzer(
- filepath,
- json.loads(self.archive_service.read_file(content_location)),
- )
- except FileNotInStorageError:
- log.warning(
- "Unable to load file for static analysis comparison",
- extra=dict(filepath=filepath, content_location=content_location),
- )
- return None
-
- def _analyze_single_change(
- self,
- db_session,
- change: DiffChange,
- base_analysis_file_obj_content_location,
- head_analysis_file_obj_content_location,
- ):
- if change.change_type == DiffChangeType.deleted:
- # file simply deleted.
- # all lines involved in it needs their tests rechecked
- return {"all": True, "lines": None}
- if change.change_type == DiffChangeType.modified:
- result_so_far = {"all": False, "lines": set()}
- head_analysis_file_data = self._load_snapshot_data(
- change.after_filepath, head_analysis_file_obj_content_location
- )
- base_analysis_file_data = self._load_snapshot_data(
- change.before_filepath, base_analysis_file_obj_content_location
- )
- if not head_analysis_file_data and not base_analysis_file_data:
- return None
- if head_analysis_file_data is None or base_analysis_file_data is None:
- log.warning(
- "Failed to load snapshot for file. Fallback to all lines in the file",
- extra=dict(
- file_path=change.after_filepath,
- is_missing_head=(head_analysis_file_data is None),
- is_missing_base=(base_analysis_file_data is None),
- ),
- )
- return {"all": True, "lines": None}
-
- for base_line in change.lines_only_on_base:
- corresponding_exec_line = (
- base_analysis_file_data.get_corresponding_executable_line(base_line)
- )
- if corresponding_exec_line is not None:
- result_so_far["lines"].add(corresponding_exec_line)
- affected_statement_lines = set(
- x
- for x in (
- head_analysis_file_data.get_corresponding_executable_line(li)
- for li in change.lines_only_on_head
- )
- if x is not None
- )
- for head_line in affected_statement_lines:
- (
- matching_type,
- antecessor_head_line,
- ) = head_analysis_file_data.get_antecessor_executable_line(
- head_line, lines_to_not_consider=affected_statement_lines
- )
- if matching_type == AntecessorFindingResult.file:
- return {"all": True, "lines": None}
- elif matching_type == AntecessorFindingResult.function:
- matching_function = (
- base_analysis_file_data.find_function_by_identifier(
- antecessor_head_line
- )
- )
- if matching_function:
- line_entrypoint = matching_function["start_line"]
- result_so_far["lines"].add(line_entrypoint)
- else:
- # No matches, function does not exist on base, go to everything
- return {"all": True, "lines": None}
- elif matching_type == AntecessorFindingResult.line:
- result_so_far["lines"].add(antecessor_head_line)
- return result_so_far
- log.warning(
- "Unknown type of change. Fallback to all lines",
- extra=dict(change_type=change.change_type),
- )
- return {"all": True, "lines": None}
diff --git a/services/static_analysis/git_diff_parser.py b/services/static_analysis/git_diff_parser.py
deleted file mode 100644
index 6307f87e1..000000000
--- a/services/static_analysis/git_diff_parser.py
+++ /dev/null
@@ -1,85 +0,0 @@
-import typing
-from dataclasses import dataclass
-from enum import Enum
-
-import sentry_sdk
-
-from services.comparison.changes import get_segment_offsets
-
-
-class DiffChangeType(Enum):
- new = "new"
- deleted = "deleted"
- modified = "modified"
- binary = "binary"
-
- @classmethod
- def get_from_string(cls, string_value):
- for i in cls:
- if i.value == string_value:
- return i
-
-
-@dataclass
-class DiffChange(object):
- __slots__ = (
- "before_filepath",
- "after_filepath",
- "change_type",
- "lines_only_on_base",
- "lines_only_on_head",
- )
- before_filepath: typing.Optional[str]
- after_filepath: typing.Optional[str]
- change_type: DiffChangeType
- lines_only_on_base: typing.Optional[typing.List[int]]
- lines_only_on_head: typing.Optional[typing.List[int]]
-
- def map_base_line_to_head_line(self, base_line: int):
- return self._map_this_to_other(
- base_line, self.lines_only_on_base, self.lines_only_on_head
- )
-
- def map_head_line_to_base_line(self, head_line: int):
- return self._map_this_to_other(
- head_line, self.lines_only_on_head, self.lines_only_on_base
- )
-
- def _map_this_to_other(self, line_number, this, other):
- if self.change_type in (
- DiffChangeType.binary,
- DiffChangeType.deleted,
- DiffChangeType.new,
- ):
- return None
- if line_number in this:
- return None
- smaller_lines = sum(1 for x in this if x < line_number)
- current_point = line_number - smaller_lines
- for lh in other:
- if lh <= current_point:
- current_point += 1
- return current_point
-
-
-# NOTE: Computationally intensive.
-@sentry_sdk.trace
-def parse_git_diff_json(diff_json) -> typing.List[DiffChange]:
- for key, value in diff_json["diff"]["files"].items():
- change_type = DiffChangeType.get_from_string(value["type"])
- after = None if change_type == DiffChangeType.deleted else key
- before = (
- None if change_type == DiffChangeType.new else (value.get("before") or key)
- )
- _, additions, removals = (
- get_segment_offsets(value["segments"])
- if change_type not in (DiffChangeType.binary, DiffChangeType.deleted)
- else (None, None, None)
- )
- yield DiffChange(
- before_filepath=before,
- after_filepath=after,
- change_type=DiffChangeType.get_from_string(value["type"]),
- lines_only_on_base=sorted(removals) if removals is not None else None,
- lines_only_on_head=sorted(additions) if additions is not None else None,
- )
diff --git a/services/static_analysis/single_file_analyzer.py b/services/static_analysis/single_file_analyzer.py
deleted file mode 100644
index 8642707f8..000000000
--- a/services/static_analysis/single_file_analyzer.py
+++ /dev/null
@@ -1,129 +0,0 @@
-import logging
-import typing
-from enum import Enum, auto
-
-log = logging.getLogger(__name__)
-
-
-class AntecessorFindingResult(Enum):
- line = auto()
- function = auto()
- file = auto()
-
-
-class SingleFileSnapshotAnalyzer(object):
- """
- This is an analyzer for a single snapshot of a file (meaning a version of a file in
- a particular moment of time)
-
- For now, the expected structure of the file snapshot is
- (there can be more fields, but those are the ones being used in this context):
-
- empty_lines:
- a list of lines that we know are empty
- functions:
- a list of functions/methods in this file, and its details.
- The structure of a function is (some fields declared here might not be used):
- declaration_line: The line where the function is declared
- identifier: A unique identifier (in the global context) for the function
- Something that can later help us tell that a moved function is
- still the same function
- start_line: The line where the function code starts
- end_line: The line where the function code ends
- code_hash: A hash of the function body that helps us tell when it changed
- complexity_metrics: Some complexity metrics not used here
- hash: The hash code of the file so its easy to tell when it has changed
- language: The programming language of the file (not used here)
- number_lines: The number of lines this file has
- statements: A list of statements in this file. A statement structure is a tuple of two
- elements:
- - The first element is the line number where that statement is
- - The second element is a dict with more information about that line:
- - line_surety_ancestorship: It's the number of the line that we know
- will be executed before this statement happens. Like
- "We are sure this line will be an ancestor to this statement"
- This is a way to construct a light version of the flowchart graph
- of the file
- start_column: The column where this code starts
- line_hash: The hash of this line (to later tell line changes vs code change)
- len: The number of lines (in addition to this one that this code entails)
- extra_connected_lines: Which lines are not contiguous to this, but should
- be considered to affect this line. One example is the "else" that indirectly
- affects the "if", because it's like part of the if "jumping logic"
- definition_lines: The lines where things (like classes, functions, enums) are defined
- - Those don't have much use for now
- import_lines: The lines where imports are. It's useful for other analysis.
- But not this one
-
- We will eventually having a schema to validate data against this so we can ensure data
- is valid when we use it. The schema will be better documentation of the format than this
- """
-
- def __init__(self, filepath, analysis_file_data):
- self._filepath = filepath
- self._analysis_file_data = analysis_file_data
- self._statement_mapping = dict(analysis_file_data["statements"])
-
- def get_corresponding_executable_line(self, line_number: int) -> int:
- for that_line, statement_data in self._analysis_file_data["statements"]:
- if (
- that_line <= line_number
- and that_line + statement_data["len"] >= line_number
- ):
- return that_line
- if line_number in statement_data["extra_connected_lines"]:
- return that_line
- # This is a logging.warning for now while we implement things
- # But there will be a really reasonable case where customers
- # change no code. So it won't have a corresponding executable line
- log.warning(
- "Not able to find corresponding executable line",
- extra=dict(
- filepath_=self._filepath,
- line_number=line_number,
- allstuff=self._analysis_file_data["statements"],
- ),
- )
- return None
-
- def get_antecessor_executable_line(
- self, line_number: int, lines_to_not_consider: typing.List[int]
- ) -> int:
- current_line = line_number
- while (
- current_line in lines_to_not_consider
- and self._statement_mapping.get(current_line, {}).get(
- "line_surety_ancestorship"
- )
- and current_line
- != self._statement_mapping.get(current_line, {}).get(
- "line_surety_ancestorship"
- )
- ):
- current_line = self._statement_mapping.get(current_line, {}).get(
- "line_surety_ancestorship"
- )
- if current_line not in lines_to_not_consider:
- return (AntecessorFindingResult.line, current_line)
- for f in self._analysis_file_data["functions"]:
- if (
- f.get("start_line") <= current_line
- and f.get("end_line") >= current_line
- ):
- return (AntecessorFindingResult.function, f["identifier"])
- log.warning(
- "Somehow not able to find antecessor line",
- extra=dict(
- filepath_=self._filepath,
- line_number=line_number,
- lines_to_not_consider=lines_to_not_consider,
- allstuff=self._analysis_file_data["statements"],
- ),
- )
- return (AntecessorFindingResult.file, self._filepath)
-
- def find_function_by_identifier(self, function_identifier):
- for func in self._analysis_file_data["functions"]:
- if func["identifier"] == function_identifier:
- return func
- return None
diff --git a/services/static_analysis/tests/__init__.py b/services/static_analysis/tests/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/services/static_analysis/tests/unit/__init__.py b/services/static_analysis/tests/unit/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/services/static_analysis/tests/unit/test_git_diff_parser.py b/services/static_analysis/tests/unit/test_git_diff_parser.py
deleted file mode 100644
index d60358669..000000000
--- a/services/static_analysis/tests/unit/test_git_diff_parser.py
+++ /dev/null
@@ -1,217 +0,0 @@
-from services.static_analysis.git_diff_parser import (
- DiffChange,
- DiffChangeType,
- parse_git_diff_json,
-)
-
-
-class TestDiffChange(object):
- def test_line_mapping_modified_file(self):
- sample_git_diff_change = DiffChange(
- before_filepath="README.rst",
- after_filepath="README.rst",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[12, 49, 153, 154],
- lines_only_on_head=[12, 13, 50, 56, 57, 58, 59, 60, 61, 62, 161],
- )
- # base to head
- assert sample_git_diff_change.map_base_line_to_head_line(1) == 1
- assert sample_git_diff_change.map_base_line_to_head_line(11) == 11
- assert sample_git_diff_change.map_base_line_to_head_line(12) is None
- assert sample_git_diff_change.map_base_line_to_head_line(13) == 14
- assert sample_git_diff_change.map_base_line_to_head_line(48) == 49
- assert sample_git_diff_change.map_base_line_to_head_line(49) is None
- assert sample_git_diff_change.map_base_line_to_head_line(50) == 51
- # head to base
- assert sample_git_diff_change.map_head_line_to_base_line(1) == 1
- assert sample_git_diff_change.map_head_line_to_base_line(11) == 11
- assert sample_git_diff_change.map_head_line_to_base_line(12) is None
- assert sample_git_diff_change.map_head_line_to_base_line(13) is None
- assert sample_git_diff_change.map_head_line_to_base_line(14) == 13
- assert sample_git_diff_change.map_head_line_to_base_line(49) == 48
- assert sample_git_diff_change.map_head_line_to_base_line(50) is None
- assert sample_git_diff_change.map_head_line_to_base_line(51) == 50
- # next one is reasonable because there is 7 more head lines than base lines
- assert sample_git_diff_change.map_head_line_to_base_line(1000) == 993
- assert sample_git_diff_change.map_base_line_to_head_line(993) == 1000
-
- def test_line_mapping_deleted_file(self):
- sample_git_diff_change = DiffChange(
- before_filepath="README.rst",
- after_filepath="README.rst",
- change_type=DiffChangeType.deleted,
- lines_only_on_base=None,
- lines_only_on_head=None,
- )
- assert sample_git_diff_change.map_head_line_to_base_line(1) is None
-
- def test_line_mapping_binary_file(self):
- sample_git_diff_change = DiffChange(
- before_filepath="README.rst",
- after_filepath="README.rst",
- change_type=DiffChangeType.binary,
- lines_only_on_base=None,
- lines_only_on_head=None,
- )
- assert sample_git_diff_change.map_head_line_to_base_line(1) is None
-
- def test_line_mapping_new_file(self):
- sample_git_diff_change = DiffChange(
- before_filepath="README.rst",
- after_filepath="README.rst",
- change_type=DiffChangeType.new,
- lines_only_on_base=None,
- lines_only_on_head=None,
- )
- assert sample_git_diff_change.map_head_line_to_base_line(1) is None
-
-
-class TestParseGitDiffJson(object):
- def test_parse_git_diff_json_single_file(self):
- input_data = {
- "diff": {
- "files": {
- "README.rst": {
- "type": "modified",
- "before": None,
- "segments": [
- {
- "header": ["9", "7", "9", "8"],
- "lines": [
- " Overview",
- " --------",
- " ",
- "-Main website: `Codecov `_.",
- "+",
- "+website: `Codecov `_.",
- " ",
- " .. code-block:: shell-session",
- " ",
- ],
- },
- {
- "header": ["46", "12", "47", "19"],
- "lines": [
- " ",
- " You may need to configure a ``.coveragerc`` file. Learn more `here `_. Start with this `generic .coveragerc `_ for example.",
- " ",
- "-We highly suggest adding `source` to your ``.coveragerc`` which solves a number of issues collecting coverage.",
- "+We highly suggest adding ``source`` to your ``.coveragerc``, which solves a number of issues collecting coverage.",
- " ",
- " .. code-block:: ini",
- " ",
- " [run]",
- " source=your_package_name",
- "+ ",
- "+If there are multiple sources, you instead should add ``include`` to your ``.coveragerc``",
- "+",
- "+.. code-block:: ini",
- "+",
- "+ [run]",
- "+ include=your_package_name/*",
- " ",
- " unittests",
- " ---------",
- ],
- },
- {
- "header": ["150", "5", "158", "4"],
- "lines": [
- " * Twitter: `@codecov `_.",
- " * Email: `hello@codecov.io `_.",
- " ",
- "-We are happy to help if you have any questions. Please contact email our Support at [support@codecov.io](mailto:support@codecov.io)",
- "-",
- "+We are happy to help if you have any questions. Please contact email our Support at `support@codecov.io `_.",
- ],
- },
- ],
- "stats": {"added": 11, "removed": 4},
- }
- }
- },
- }
- res = list(parse_git_diff_json(input_data))
- assert res == [
- DiffChange(
- before_filepath="README.rst",
- after_filepath="README.rst",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[12, 49, 153, 154],
- lines_only_on_head=[12, 13, 50, 56, 57, 58, 59, 60, 61, 62, 161],
- )
- ]
-
- def test_parse_git_diff_json_multiple_files(self):
- input_data = {
- "files": {
- "banana.py": {
- "type": "new",
- "before": None,
- "segments": [
- {
- "header": ["0", "0", "1", "2"],
- "lines": ["+suhduad", "+dsandsa"],
- }
- ],
- "stats": {"added": 2, "removed": 0},
- },
- "codecov-alpine": {
- "type": "binary",
- "stats": {"added": 0, "removed": 0},
- },
- "codecov/settings_dev.py": {
- "type": "modified",
- "before": None,
- "segments": [
- {
- "header": ["49", "3", "49", "4"],
- "lines": [
- ' SESSION_COOKIE_DOMAIN = "localhost"',
- " ",
- " GRAPHQL_PLAYGROUND = True",
- "+IS_DEV = True",
- ],
- }
- ],
- "stats": {"added": 1, "removed": 0},
- },
- "production.yml": {
- "type": "deleted",
- "before": "production.yml",
- "stats": {"added": 0, "removed": 0},
- },
- }
- }
- expected_result = [
- DiffChange(
- before_filepath=None,
- after_filepath="banana.py",
- change_type=DiffChangeType.new,
- lines_only_on_base=[],
- lines_only_on_head=[1, 2],
- ),
- DiffChange(
- before_filepath="codecov-alpine",
- after_filepath="codecov-alpine",
- change_type=DiffChangeType.binary,
- lines_only_on_base=None,
- lines_only_on_head=None,
- ),
- DiffChange(
- before_filepath="codecov/settings_dev.py",
- after_filepath="codecov/settings_dev.py",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[],
- lines_only_on_head=[52],
- ),
- DiffChange(
- before_filepath="production.yml",
- after_filepath=None,
- change_type=DiffChangeType.deleted,
- lines_only_on_base=None,
- lines_only_on_head=None,
- ),
- ]
- res = list(parse_git_diff_json({"diff": input_data}))
- assert res == expected_result
diff --git a/services/static_analysis/tests/unit/test_single_file_analyzer.py b/services/static_analysis/tests/unit/test_single_file_analyzer.py
deleted file mode 100644
index f920cb456..000000000
--- a/services/static_analysis/tests/unit/test_single_file_analyzer.py
+++ /dev/null
@@ -1,107 +0,0 @@
-from services.static_analysis.single_file_analyzer import (
- AntecessorFindingResult,
- SingleFileSnapshotAnalyzer,
-)
-
-# While the structure of this is correct, the data itself was manually edited
-# to make interesting test cases
-sample_input_data = {
- "empty_lines": [4, 8, 11],
- "warnings": [],
- "filename": "source.py",
- "functions": [
- {
- "identifier": "some_function",
- "start_line": 5,
- "end_line": 10,
- "code_hash": "e4b52b6da12184142fcd7ff2c8412662",
- "complexity_metrics": {
- "conditions": 1,
- "mccabe_cyclomatic_complexity": 2,
- "returns": 1,
- "max_nested_conditional": 1,
- },
- }
- ],
- "hash": "811d0016249a5b1400a685164e5295de",
- "language": "python",
- "number_lines": 11,
- "statements": [
- (
- 1,
- {
- "line_surety_ancestorship": None,
- "start_column": 0,
- "line_hash": "55c30cf01e202728b6952e9cba304798",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 2,
- {
- "line_surety_ancestorship": 1,
- "start_column": 4,
- "line_hash": "1d7be9f2145760a59513a4049fcd0d1c",
- "len": 1,
- "extra_connected_lines": (),
- },
- ),
- (
- 5,
- {
- "line_surety_ancestorship": None,
- "start_column": 4,
- "line_hash": "1d7be9f2145760a59513a4049fcd0d1c",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 6,
- {
- "line_surety_ancestorship": 5,
- "start_column": 4,
- "line_hash": "52f98812dca4687f18373b87433df695",
- "len": 0,
- "extra_connected_lines": (14,),
- },
- ),
- (
- 7,
- {
- "line_surety_ancestorship": 6,
- "start_column": 4,
- "line_hash": "52f98812dca4687f18373b87433df695",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- ],
- "definition_lines": [(4, 6)],
- "import_lines": [],
-}
-
-
-def test_simple_single_file_snapshot_analyzer_get_corresponding_executable_line():
- sfsa = SingleFileSnapshotAnalyzer("filepath", sample_input_data)
- assert sfsa.get_corresponding_executable_line(3) == 2
- assert sfsa.get_corresponding_executable_line(2) == 2
- assert sfsa.get_corresponding_executable_line(4) is None
- assert sfsa.get_corresponding_executable_line(14) == 6
-
-
-def test_get_antecessor_executable_line():
- sfsa = SingleFileSnapshotAnalyzer("filepath", sample_input_data)
- assert sfsa.get_antecessor_executable_line(7, lines_to_not_consider=[6, 7]) == (
- AntecessorFindingResult.line,
- 5,
- )
- assert sfsa.get_antecessor_executable_line(2, lines_to_not_consider=[1, 2]) == (
- AntecessorFindingResult.file,
- "filepath",
- )
- assert sfsa.get_antecessor_executable_line(5, lines_to_not_consider=[5]) == (
- AntecessorFindingResult.function,
- "some_function",
- )
diff --git a/services/static_analysis/tests/unit/test_static_analysis_comparison.py b/services/static_analysis/tests/unit/test_static_analysis_comparison.py
deleted file mode 100644
index c14f519e6..000000000
--- a/services/static_analysis/tests/unit/test_static_analysis_comparison.py
+++ /dev/null
@@ -1,954 +0,0 @@
-import json
-
-import pytest
-
-from database.tests.factories.core import RepositoryFactory
-from database.tests.factories.staticanalysis import (
- StaticAnalysisSingleFileSnapshotFactory,
- StaticAnalysisSuiteFactory,
- StaticAnalysisSuiteFilepathFactory,
-)
-from services.static_analysis import (
- SingleFileSnapshotAnalyzer,
- StaticAnalysisComparisonService,
- _get_analysis_content_mapping,
-)
-from services.static_analysis.git_diff_parser import DiffChange, DiffChangeType
-
-
-def test_get_analysis_content_mapping(dbsession):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- static_analysis_suite = StaticAnalysisSuiteFactory.create(
- commit__repository=repository
- )
- secondary_static_analysis = StaticAnalysisSuiteFactory.create(
- commit__repository=repository
- )
- dbsession.add(static_analysis_suite)
- dbsession.add(secondary_static_analysis)
- dbsession.flush()
- snapshot_1 = StaticAnalysisSingleFileSnapshotFactory.create(repository=repository)
- snapshot_2 = StaticAnalysisSingleFileSnapshotFactory.create(repository=repository)
- snapshot_3 = StaticAnalysisSingleFileSnapshotFactory.create(repository=repository)
- snapshot_4 = StaticAnalysisSingleFileSnapshotFactory.create(repository=repository)
- snapshot_5 = StaticAnalysisSingleFileSnapshotFactory.create(repository=repository)
- dbsession.add_all([snapshot_1, snapshot_2, snapshot_3, snapshot_4, snapshot_5])
- dbsession.flush()
- f_1 = StaticAnalysisSuiteFilepathFactory.create(
- file_snapshot=snapshot_1, analysis_suite=static_analysis_suite
- )
- f_2 = StaticAnalysisSuiteFilepathFactory.create(
- file_snapshot=snapshot_2, analysis_suite=static_analysis_suite
- )
- f_3 = StaticAnalysisSuiteFilepathFactory.create(
- file_snapshot=snapshot_3, analysis_suite=static_analysis_suite
- )
- f_4 = StaticAnalysisSuiteFilepathFactory.create(
- file_snapshot=snapshot_4, analysis_suite=static_analysis_suite
- )
- f_s_2 = StaticAnalysisSuiteFilepathFactory.create(
- file_snapshot=snapshot_2,
- analysis_suite=secondary_static_analysis,
- filepath=f_1.filepath,
- )
- f_s_3 = StaticAnalysisSuiteFilepathFactory.create(
- file_snapshot=snapshot_3, analysis_suite=secondary_static_analysis
- )
- f_s_5 = StaticAnalysisSuiteFilepathFactory.create(
- file_snapshot=snapshot_5, analysis_suite=secondary_static_analysis
- )
- dbsession.add_all([f_1, f_2, f_3, f_4, f_s_2, f_s_3, f_s_5])
- dbsession.flush()
- first_res = _get_analysis_content_mapping(
- static_analysis_suite,
- [f_1.filepath, f_2.filepath, f_4.filepath, "somenonexistent.gh"],
- )
- assert first_res == {
- f_1.filepath: snapshot_1.content_location,
- f_2.filepath: snapshot_2.content_location,
- f_4.filepath: snapshot_4.content_location,
- }
- secondary_res = _get_analysis_content_mapping(
- secondary_static_analysis,
- [f_s_2.filepath, f_s_3.filepath],
- )
- assert secondary_res == {
- f_s_2.filepath: snapshot_2.content_location,
- f_s_3.filepath: snapshot_3.content_location,
- }
-
-
-@pytest.fixture()
-def sample_service(dbsession):
- repository = RepositoryFactory.create()
- head_static_analysis = StaticAnalysisSuiteFactory.create(
- commit__repository=repository
- )
- base_static_analysis = StaticAnalysisSuiteFactory.create(
- commit__repository=repository
- )
- dbsession.add(head_static_analysis)
- dbsession.add(base_static_analysis)
- dbsession.flush()
- return StaticAnalysisComparisonService(
- base_static_analysis=base_static_analysis,
- head_static_analysis=head_static_analysis,
- git_diff=[
- DiffChange(
- before_filepath="path/changed.py",
- after_filepath="path/changed.py",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[],
- lines_only_on_head=[20],
- ),
- ],
- )
-
-
-class TestStaticAnalysisComparisonService(object):
- def test_load_snapshot_data_unhappy_cases(self, sample_service, mock_storage):
- assert sample_service._load_snapshot_data("filepath", None) is None
- assert sample_service._load_snapshot_data("filepath", "fake_location") is None
-
- def test_load_snapshot_data_happy_cases(self, sample_service, mock_storage):
- mock_storage.write_file(
- "archive",
- "real_content_location",
- json.dumps({"statements": [(1, {"ha": "pokemon"})]}),
- )
- res = sample_service._load_snapshot_data("filepath", "real_content_location")
- assert isinstance(res, SingleFileSnapshotAnalyzer)
- assert res._filepath == "filepath"
- assert res._analysis_file_data == {"statements": [[1, {"ha": "pokemon"}]]}
- assert res._statement_mapping == {1: {"ha": "pokemon"}}
-
- def test_get_base_lines_relevant_to_change_deleted_plus_changed_normal(
- self, dbsession, mock_storage
- ):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- snapshot_deleted = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository
- )
- changed_snapshot_base = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository
- )
- changed_snapshot_head = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository
- )
- dbsession.add_all(
- [
- snapshot_deleted,
- changed_snapshot_base,
- changed_snapshot_head,
- ]
- )
- dbsession.flush()
- mock_storage.write_file(
- "archive", snapshot_deleted.content_location, json.dumps({"statements": []})
- )
- mock_storage.write_file(
- "archive",
- changed_snapshot_base.content_location,
- json.dumps(
- {
- "statements": [
- (
- 30,
- {
- "len": 1,
- "line_surety_ancestorship": 29,
- "extra_connected_lines": [35],
- },
- ),
- ]
- }
- ),
- )
- mock_storage.write_file(
- "archive",
- changed_snapshot_head.content_location,
- json.dumps(
- {
- "functions": [],
- "statements": [
- (1, {"len": 0, "extra_connected_lines": []}),
- (2, {"len": 1, "extra_connected_lines": []}),
- (8, {"len": 0, "extra_connected_lines": []}),
- (
- 10,
- {
- "len": 1,
- "line_surety_ancestorship": 8,
- "extra_connected_lines": [20],
- },
- ),
- ],
- }
- ),
- )
- head_static_analysis = StaticAnalysisSuiteFactory.create(
- commit__repository=repository
- )
- base_static_analysis = StaticAnalysisSuiteFactory.create(
- commit__repository=repository
- )
- dbsession.add(head_static_analysis)
- dbsession.add(base_static_analysis)
- dbsession.flush()
- deleted_sasff = StaticAnalysisSuiteFilepathFactory.create(
- file_snapshot=snapshot_deleted,
- analysis_suite=base_static_analysis,
- filepath="deleted.py",
- )
- old_changed_sasff = StaticAnalysisSuiteFilepathFactory.create(
- file_snapshot=changed_snapshot_base,
- analysis_suite=base_static_analysis,
- filepath="path/changed.py",
- )
- new_changed_sasff = StaticAnalysisSuiteFilepathFactory.create(
- file_snapshot=changed_snapshot_head,
- analysis_suite=head_static_analysis,
- filepath="path/changed.py",
- )
- dbsession.add_all([deleted_sasff, old_changed_sasff, new_changed_sasff])
- dbsession.flush()
- service = StaticAnalysisComparisonService(
- base_static_analysis=base_static_analysis,
- head_static_analysis=head_static_analysis,
- git_diff=[
- DiffChange(
- before_filepath="path/changed.py",
- after_filepath="path/changed.py",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[30],
- lines_only_on_head=[20],
- ),
- DiffChange(
- before_filepath="deleted.py",
- after_filepath=None,
- change_type=DiffChangeType.deleted,
- lines_only_on_base=None,
- lines_only_on_head=None,
- ),
- ],
- )
- assert service.get_base_lines_relevant_to_change() == {
- "all": False,
- "files": {
- "deleted.py": {"all": True, "lines": None},
- "path/changed.py": {"all": False, "lines": {8, 30}},
- },
- }
-
- def test_get_base_lines_relevant_to_change_one_new_file(
- self, dbsession, mock_storage
- ):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- snapshot_deleted = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository
- )
- changed_snapshot_base = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository
- )
- changed_snapshot_head = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository
- )
- dbsession.add_all(
- [
- snapshot_deleted,
- changed_snapshot_base,
- changed_snapshot_head,
- ]
- )
- dbsession.flush()
- mock_storage.write_file(
- "archive", snapshot_deleted.content_location, json.dumps({"statements": []})
- )
- mock_storage.write_file(
- "archive",
- changed_snapshot_base.content_location,
- json.dumps({"statements": [(1, {})]}),
- )
- mock_storage.write_file(
- "archive",
- changed_snapshot_head.content_location,
- json.dumps(
- {
- "functions": [],
- "statements": [
- (1, {"len": 0, "extra_connected_lines": []}),
- (2, {"len": 1, "extra_connected_lines": []}),
- (8, {"len": 0, "extra_connected_lines": []}),
- (
- 10,
- {
- "len": 1,
- "line_surety_ancestorship": 8,
- "extra_connected_lines": [20],
- },
- ),
- ],
- }
- ),
- )
- head_static_analysis = StaticAnalysisSuiteFactory.create(
- commit__repository=repository
- )
- base_static_analysis = StaticAnalysisSuiteFactory.create(
- commit__repository=repository
- )
- dbsession.add(head_static_analysis)
- dbsession.add(base_static_analysis)
- dbsession.flush()
- deleted_sasff = StaticAnalysisSuiteFilepathFactory.create(
- file_snapshot=snapshot_deleted,
- analysis_suite=base_static_analysis,
- filepath="deleted.py",
- )
- old_changed_sasff = StaticAnalysisSuiteFilepathFactory.create(
- file_snapshot=changed_snapshot_base,
- analysis_suite=base_static_analysis,
- filepath="path/changed.py",
- )
- new_changed_sasff = StaticAnalysisSuiteFilepathFactory.create(
- file_snapshot=changed_snapshot_head,
- analysis_suite=head_static_analysis,
- filepath="path/changed.py",
- )
- dbsession.add_all([deleted_sasff, old_changed_sasff, new_changed_sasff])
- dbsession.flush()
- service = StaticAnalysisComparisonService(
- base_static_analysis=base_static_analysis,
- head_static_analysis=head_static_analysis,
- git_diff=[
- DiffChange(
- before_filepath="path/changed.py",
- after_filepath="path/changed.py",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[],
- lines_only_on_head=[20],
- ),
- DiffChange(
- before_filepath=None,
- after_filepath="path/new.py",
- change_type=DiffChangeType.new,
- lines_only_on_base=[],
- lines_only_on_head=[20],
- ),
- DiffChange(
- before_filepath="deleted.py",
- after_filepath=None,
- change_type=DiffChangeType.deleted,
- lines_only_on_base=None,
- lines_only_on_head=None,
- ),
- ],
- )
- assert service.get_base_lines_relevant_to_change() == {"all": True}
-
- def test_analyze_single_change_first_line_file(self, dbsession, mock_storage):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- changed_snapshot_base = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository
- )
- changed_snapshot_head = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository
- )
- dbsession.add_all(
- [
- changed_snapshot_base,
- changed_snapshot_head,
- ]
- )
- dbsession.flush()
- mock_storage.write_file(
- "archive",
- changed_snapshot_base.content_location,
- json.dumps(
- {
- "statements": [
- (
- 6,
- {
- "len": 1,
- "extra_connected_lines": [9],
- },
- ),
- ]
- }
- ),
- )
- mock_storage.write_file(
- "archive",
- changed_snapshot_head.content_location,
- json.dumps(
- {
- "functions": [],
- "statements": [
- (
- 10,
- {
- "len": 0,
- "extra_connected_lines": [20],
- },
- ),
- (
- 11,
- {
- "len": 0,
- "line_surety_ancestorship": 10,
- "extra_connected_lines": [],
- },
- ),
- (12, {"len": 1, "extra_connected_lines": []}),
- (
- 18,
- {
- "len": 0,
- "line_surety_ancestorship": 12,
- "extra_connected_lines": [],
- },
- ),
- ],
- }
- ),
- )
- head_static_analysis = StaticAnalysisSuiteFactory.create(
- commit__repository=repository
- )
- base_static_analysis = StaticAnalysisSuiteFactory.create(
- commit__repository=repository
- )
- dbsession.add(head_static_analysis)
- dbsession.add(base_static_analysis)
- dbsession.flush()
- change = DiffChange(
- before_filepath="path/changed.py",
- after_filepath="path/changed.py",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[9],
- lines_only_on_head=[11],
- )
- service = StaticAnalysisComparisonService(
- base_static_analysis=base_static_analysis,
- head_static_analysis=head_static_analysis,
- git_diff=[change],
- )
- assert service._analyze_single_change(
- dbsession,
- change,
- changed_snapshot_base.content_location,
- changed_snapshot_head.content_location,
- ) == {"all": False, "lines": {6, 10}}
-
- def test_analyze_single_change_base_change(self, dbsession, mock_storage):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- changed_snapshot_base = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository
- )
- changed_snapshot_head = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository
- )
- dbsession.add_all(
- [
- changed_snapshot_base,
- changed_snapshot_head,
- ]
- )
- dbsession.flush()
- mock_storage.write_file(
- "archive",
- changed_snapshot_base.content_location,
- json.dumps(
- {
- "functions": [
- {
- "identifier": "banana_function",
- "start_line": 3,
- "end_line": 8,
- }
- ],
- "statements": [
- (
- 1,
- {
- "len": 0,
- "line_surety_ancestorship": None,
- "extra_connected_lines": [],
- },
- ),
- (
- 2,
- {
- "len": 0,
- "line_surety_ancestorship": 1,
- "extra_connected_lines": [],
- },
- ),
- ],
- }
- ),
- )
- mock_storage.write_file(
- "archive",
- changed_snapshot_head.content_location,
- json.dumps(
- {
- "functions": [
- {
- "identifier": "banana_function",
- "start_line": 3,
- "end_line": 8,
- }
- ],
- "statements": [
- (
- 10,
- {
- "len": 0,
- "extra_connected_lines": [20],
- },
- ),
- (
- 11,
- {
- "len": 0,
- "line_surety_ancestorship": 10,
- "extra_connected_lines": [],
- },
- ),
- (12, {"len": 1, "extra_connected_lines": []}),
- (
- 18,
- {
- "len": 0,
- "line_surety_ancestorship": 12,
- "extra_connected_lines": [],
- },
- ),
- ],
- }
- ),
- )
- head_static_analysis = StaticAnalysisSuiteFactory.create(
- commit__repository=repository
- )
- base_static_analysis = StaticAnalysisSuiteFactory.create(
- commit__repository=repository
- )
- dbsession.add(head_static_analysis)
- dbsession.add(base_static_analysis)
- dbsession.flush()
- service = StaticAnalysisComparisonService(
- base_static_analysis=base_static_analysis,
- head_static_analysis=head_static_analysis,
- git_diff=[
- DiffChange(
- before_filepath="path/changed.py",
- after_filepath="path/changed.py",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[],
- lines_only_on_head=[20],
- ),
- ],
- )
- assert service._analyze_single_change(
- dbsession,
- DiffChange(
- before_filepath="path/changed.py",
- after_filepath="path/changed.py",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[],
- lines_only_on_head=[20],
- ),
- changed_snapshot_base.content_location,
- changed_snapshot_head.content_location,
- ) == {"all": True, "lines": None}
- assert service._analyze_single_change(
- dbsession,
- DiffChange(
- before_filepath="path/changed.py",
- after_filepath="path/changed.py",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[],
- lines_only_on_head=[11],
- ),
- changed_snapshot_base.content_location,
- changed_snapshot_head.content_location,
- ) == {"all": False, "lines": {10}}
- assert service._analyze_single_change(
- dbsession,
- DiffChange(
- before_filepath="path/changed.py",
- after_filepath="path/changed.py",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[],
- lines_only_on_head=[99, 100],
- ),
- changed_snapshot_base.content_location,
- changed_snapshot_head.content_location,
- ) == {"all": False, "lines": set()}
-
- def test_analyze_single_change_base_change_missing_head_snapshot(
- self, dbsession, mock_storage
- ):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- changed_snapshot_base = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository
- )
- changed_snapshot_head = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository
- )
- dbsession.add_all(
- [
- changed_snapshot_base,
- changed_snapshot_head,
- ]
- )
- dbsession.flush()
- mock_storage.write_file(
- "archive",
- changed_snapshot_base.content_location,
- json.dumps(
- {
- "functions": [
- {
- "identifier": "banana_function",
- "start_line": 3,
- "end_line": 8,
- }
- ],
- "statements": [
- (
- 1,
- {
- "len": 0,
- "line_surety_ancestorship": None,
- "extra_connected_lines": [],
- },
- ),
- (
- 2,
- {
- "len": 0,
- "line_surety_ancestorship": 1,
- "extra_connected_lines": [],
- },
- ),
- ],
- }
- ),
- )
- head_static_analysis = StaticAnalysisSuiteFactory.create(
- commit__repository=repository
- )
- base_static_analysis = StaticAnalysisSuiteFactory.create(
- commit__repository=repository
- )
- dbsession.add(head_static_analysis)
- dbsession.add(base_static_analysis)
- dbsession.flush()
- service = StaticAnalysisComparisonService(
- base_static_analysis=base_static_analysis,
- head_static_analysis=head_static_analysis,
- git_diff=[
- DiffChange(
- before_filepath="path/changed.py",
- after_filepath="path/changed.py",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[],
- lines_only_on_head=[20],
- ),
- ],
- )
- assert service._analyze_single_change(
- dbsession,
- DiffChange(
- before_filepath="path/changed.py",
- after_filepath="path/changed.py",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[],
- lines_only_on_head=[20],
- ),
- changed_snapshot_base.content_location,
- changed_snapshot_head.content_location,
- ) == {"all": True, "lines": None}
- assert service._analyze_single_change(
- dbsession,
- DiffChange(
- before_filepath="path/changed.py",
- after_filepath="path/changed.py",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[],
- lines_only_on_head=[11],
- ),
- changed_snapshot_base.content_location,
- changed_snapshot_head.content_location,
- ) == {"all": True, "lines": None}
- assert service._analyze_single_change(
- dbsession,
- DiffChange(
- before_filepath="path/changed.py",
- after_filepath="path/changed.py",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[],
- lines_only_on_head=[99, 100],
- ),
- changed_snapshot_base.content_location,
- changed_snapshot_head.content_location,
- ) == {"all": True, "lines": None}
-
- def test_analyze_single_change_function_based(self, dbsession, mock_storage):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- changed_snapshot_base = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository
- )
- changed_snapshot_head = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository
- )
- dbsession.add_all(
- [
- changed_snapshot_base,
- changed_snapshot_head,
- ]
- )
- dbsession.flush()
- mock_storage.write_file(
- "archive",
- changed_snapshot_base.content_location,
- json.dumps(
- {
- "functions": [
- {
- "identifier": "banana_function",
- "start_line": 3,
- "end_line": 8,
- }
- ],
- "statements": [(1, {})],
- }
- ),
- )
- mock_storage.write_file(
- "archive",
- changed_snapshot_head.content_location,
- json.dumps(
- {
- "functions": [
- {
- "identifier": "banana_function",
- "start_line": 9,
- "end_line": 11,
- }
- ],
- "statements": [
- (
- 10,
- {
- "len": 1,
- "extra_connected_lines": [20],
- },
- ),
- (
- 11,
- {
- "len": 0,
- "line_surety_ancestorship": 10,
- "extra_connected_lines": [],
- },
- ),
- (12, {"len": 1, "extra_connected_lines": []}),
- (
- 18,
- {
- "len": 0,
- "line_surety_ancestorship": 12,
- "extra_connected_lines": [],
- },
- ),
- ],
- }
- ),
- )
- head_static_analysis = StaticAnalysisSuiteFactory.create(
- commit__repository=repository
- )
- base_static_analysis = StaticAnalysisSuiteFactory.create(
- commit__repository=repository
- )
- dbsession.add(head_static_analysis)
- dbsession.add(base_static_analysis)
- dbsession.flush()
- service = StaticAnalysisComparisonService(
- base_static_analysis=base_static_analysis,
- head_static_analysis=head_static_analysis,
- git_diff=[
- DiffChange(
- before_filepath="path/changed.py",
- after_filepath="path/changed.py",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[],
- lines_only_on_head=[20],
- ),
- ],
- )
- change = DiffChange(
- before_filepath="path/changed.py",
- after_filepath="path/changed.py",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[],
- lines_only_on_head=[20],
- )
- assert service._analyze_single_change(
- dbsession,
- change,
- changed_snapshot_base.content_location,
- changed_snapshot_head.content_location,
- ) == {"all": False, "lines": {3}}
-
- def test_analyze_single_change_no_static_analysis_found(
- self, dbsession, mock_storage, mocker, sample_service
- ):
- mocked_load_snapshot = mocker.patch.object(
- StaticAnalysisComparisonService, "_load_snapshot_data", return_value=None
- )
- change = DiffChange(
- before_filepath="path/changed.py",
- after_filepath="path/changed.py",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[],
- lines_only_on_head=[20],
- )
- first_location, second_location = mocker.MagicMock(), mocker.MagicMock()
- assert (
- sample_service._analyze_single_change(
- dbsession,
- change,
- first_location,
- second_location,
- )
- is None
- )
- assert mocked_load_snapshot.call_count == 2
- mocked_load_snapshot.assert_any_call("path/changed.py", second_location)
- mocked_load_snapshot.assert_any_call("path/changed.py", first_location)
-
- def test_analyze_single_change_function_based_no_function_found(
- self, dbsession, mock_storage
- ):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- changed_snapshot_base = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository
- )
- changed_snapshot_head = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository
- )
- dbsession.add_all(
- [
- changed_snapshot_base,
- changed_snapshot_head,
- ]
- )
- dbsession.flush()
- mock_storage.write_file(
- "archive",
- changed_snapshot_base.content_location,
- json.dumps(
- {
- "functions": [],
- "statements": [(1, {})],
- }
- ),
- )
- mock_storage.write_file(
- "archive",
- changed_snapshot_head.content_location,
- json.dumps(
- {
- "functions": [
- {
- "identifier": "banana_function",
- "start_line": 9,
- "end_line": 11,
- }
- ],
- "statements": [
- (
- 10,
- {
- "len": 1,
- "extra_connected_lines": [20],
- },
- ),
- (
- 11,
- {
- "len": 0,
- "line_surety_ancestorship": 10,
- "extra_connected_lines": [],
- },
- ),
- (12, {"len": 1, "extra_connected_lines": []}),
- (
- 18,
- {
- "len": 0,
- "line_surety_ancestorship": 12,
- "extra_connected_lines": [],
- },
- ),
- ],
- }
- ),
- )
- head_static_analysis = StaticAnalysisSuiteFactory.create(
- commit__repository=repository
- )
- base_static_analysis = StaticAnalysisSuiteFactory.create(
- commit__repository=repository
- )
- dbsession.add(head_static_analysis)
- dbsession.add(base_static_analysis)
- dbsession.flush()
- service = StaticAnalysisComparisonService(
- base_static_analysis=base_static_analysis,
- head_static_analysis=head_static_analysis,
- git_diff=[
- DiffChange(
- before_filepath="path/changed.py",
- after_filepath="path/changed.py",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[],
- lines_only_on_head=[20],
- ),
- ],
- )
- change = DiffChange(
- before_filepath="path/changed.py",
- after_filepath="path/changed.py",
- change_type=DiffChangeType.modified,
- lines_only_on_base=[],
- lines_only_on_head=[20],
- )
- assert service._analyze_single_change(
- dbsession,
- change,
- changed_snapshot_base.content_location,
- changed_snapshot_head.content_location,
- ) == {"all": True, "lines": None}
diff --git a/tasks/__init__.py b/tasks/__init__.py
index 04c246658..2530cf824 100644
--- a/tasks/__init__.py
+++ b/tasks/__init__.py
@@ -41,7 +41,6 @@
from tasks.save_commit_measurements import save_commit_measurements_task
from tasks.save_report_results import save_report_results_task
from tasks.send_email import send_email
-from tasks.static_analysis_suite_check import static_analysis_suite_check_task
from tasks.status_set_error import status_set_error_task
from tasks.status_set_pending import status_set_pending_task
from tasks.sync_pull import pull_sync_task
diff --git a/tasks/label_analysis.py b/tasks/label_analysis.py
deleted file mode 100644
index d26d1308b..000000000
--- a/tasks/label_analysis.py
+++ /dev/null
@@ -1,562 +0,0 @@
-import logging
-from typing import Dict, List, NamedTuple, Optional, Set, Tuple, TypedDict, Union
-
-import sentry_sdk
-from asgiref.sync import async_to_sync
-from shared.celery_config import label_analysis_task_name
-from shared.labelanalysis import LabelAnalysisRequestState
-from sqlalchemy.orm import Session
-
-from app import celery_app
-from database.models.labelanalysis import (
- LabelAnalysisProcessingError,
- LabelAnalysisProcessingErrorCode,
- LabelAnalysisRequest,
-)
-from database.models.staticanalysis import StaticAnalysisSuite
-from helpers.labels import get_all_report_labels, get_labels_per_session
-from helpers.metrics import metrics
-from services.report import Report, ReportService
-from services.report.report_builder import SpecialLabelsEnum
-from services.repository import get_repo_provider_service
-from services.static_analysis import StaticAnalysisComparisonService
-from services.static_analysis.git_diff_parser import DiffChange, parse_git_diff_json
-from services.yaml import get_repo_yaml
-from tasks.base import BaseCodecovTask
-
-log = logging.getLogger(__name__)
-
-
-GLOBAL_LEVEL_LABEL = (
- SpecialLabelsEnum.CODECOV_ALL_LABELS_PLACEHOLDER.corresponding_label
-)
-
-GLOBAL_LEVEL_LABEL_IDX = (
- SpecialLabelsEnum.CODECOV_ALL_LABELS_PLACEHOLDER.corresponding_index
-)
-
-
-class LinesRelevantToChangeInFile(TypedDict):
- all: bool
- lines: Set[int]
-
-
-class LinesRelevantToChange(TypedDict):
- all: bool
- files: Dict[str, Optional[LinesRelevantToChangeInFile]]
-
-
-class ExistingLabelSetsEncoded(NamedTuple):
- all_report_labels: Set[int]
- executable_lines_labels: Set[int]
- global_level_labels: Set[int]
- are_labels_encoded: bool = True
-
-
-class ExistingLabelSetsNotEncoded(NamedTuple):
- all_report_labels: Set[str]
- executable_lines_labels: Set[str]
- global_level_labels: Set[str]
- are_labels_encoded: bool = False
-
-
-ExistingLabelSets = Union[ExistingLabelSetsEncoded, ExistingLabelSetsNotEncoded]
-PossiblyEncodedLabelSet = Union[Set[str], Set[int]]
-
-
-class LabelAnalysisRequestProcessingTask(
- BaseCodecovTask, name=label_analysis_task_name
-):
- errors: List[LabelAnalysisProcessingError] = None
- dbsession: Session = None
-
- def reset_task_context(self):
- """Resets the task's attributes to None to avoid spilling information
- between task calls in the same process.
- https://docs.celeryq.dev/en/latest/userguide/tasks.html#instantiation
- """
- self.errors = None
- self.dbsession = None
-
- def run_impl(self, db_session, request_id, *args, **kwargs):
- self.errors = []
- self.dbsession = db_session
- label_analysis_request = (
- db_session.query(LabelAnalysisRequest)
- .filter(LabelAnalysisRequest.id_ == request_id)
- .first()
- )
- if label_analysis_request is None:
- metrics.incr("label_analysis_task.failed_to_calculate.larq_not_found")
- log.error(
- "LabelAnalysisRequest not found", extra=dict(request_id=request_id)
- )
- self.add_processing_error(
- larq_id=request_id,
- error_code=LabelAnalysisProcessingErrorCode.NOT_FOUND,
- error_msg="LabelAnalysisRequest not found",
- error_extra=dict(),
- )
- response = {
- "success": False,
- "present_report_labels": [],
- "present_diff_labels": [],
- "absent_labels": [],
- "global_level_labels": [],
- "errors": self.errors,
- }
- self.reset_task_context()
- return response
- log.info(
- "Starting label analysis request",
- extra=dict(
- request_id=request_id,
- external_id=label_analysis_request.external_id,
- commit=label_analysis_request.head_commit.commitid,
- ),
- )
-
- if label_analysis_request.state_id == LabelAnalysisRequestState.FINISHED.db_id:
- # Indicates that this request has been calculated already
- # We might need to update the requested labels
- response = self._handle_larq_already_calculated(label_analysis_request)
- self.reset_task_context()
- return response
-
- try:
- lines_relevant_to_diff: Optional[LinesRelevantToChange] = (
- self._get_lines_relevant_to_diff(label_analysis_request)
- )
- base_report = self._get_base_report(label_analysis_request)
-
- if lines_relevant_to_diff and base_report:
- existing_labels: ExistingLabelSets = self._get_existing_labels(
- base_report, lines_relevant_to_diff
- )
- if existing_labels.are_labels_encoded:
- # Translate label_ids
- def partial_fn_to_apply(label_id_set):
- return self._lookup_label_ids(
- report=base_report, label_ids=label_id_set
- )
-
- existing_labels = ExistingLabelSetsNotEncoded(
- all_report_labels=partial_fn_to_apply(
- existing_labels.all_report_labels
- ),
- executable_lines_labels=partial_fn_to_apply(
- existing_labels.executable_lines_labels
- ),
- global_level_labels=partial_fn_to_apply(
- existing_labels.global_level_labels
- ),
- are_labels_encoded=False,
- )
-
- requested_labels = self._get_requested_labels(label_analysis_request)
- result = self.calculate_final_result(
- requested_labels=requested_labels,
- existing_labels=existing_labels,
- commit_sha=label_analysis_request.head_commit.commitid,
- )
- label_analysis_request.result = result
- label_analysis_request.state_id = (
- LabelAnalysisRequestState.FINISHED.db_id
- )
- metrics.incr("label_analysis_task.success")
- response = {
- "success": True,
- "present_report_labels": result["present_report_labels"],
- "present_diff_labels": result["present_diff_labels"],
- "absent_labels": result["absent_labels"],
- "global_level_labels": result["global_level_labels"],
- "errors": self.errors,
- }
- self.reset_task_context()
- return response
- except Exception:
- # temporary general catch while we find possible problems on this
- metrics.incr("label_analysis_task.failed_to_calculate.exception")
- log.exception(
- "Label analysis failed to calculate",
- extra=dict(
- request_id=request_id,
- commit=label_analysis_request.head_commit.commitid,
- external_id=label_analysis_request.external_id,
- ),
- )
- label_analysis_request.result = None
- label_analysis_request.state_id = LabelAnalysisRequestState.ERROR.db_id
- self.add_processing_error(
- larq_id=request_id,
- error_code=LabelAnalysisProcessingErrorCode.FAILED,
- error_msg="Failed to calculate",
- error_extra=dict(),
- )
- response = {
- "success": False,
- "present_report_labels": [],
- "present_diff_labels": [],
- "absent_labels": [],
- "global_level_labels": [],
- "errors": self.errors,
- }
- self.reset_task_context()
- return response
- metrics.incr("label_analysis_task.failed_to_calculate.missing_info")
- log.warning(
- "We failed to get some information that was important to label analysis",
- extra=dict(
- has_relevant_lines=(lines_relevant_to_diff is not None),
- has_base_report=(base_report is not None),
- commit=label_analysis_request.head_commit.commitid,
- external_id=label_analysis_request.external_id,
- request_id=request_id,
- ),
- )
- label_analysis_request.state_id = LabelAnalysisRequestState.FINISHED.db_id
- result_to_save = {
- "success": True,
- "present_report_labels": [],
- "present_diff_labels": [],
- "absent_labels": label_analysis_request.requested_labels,
- "global_level_labels": [],
- }
- label_analysis_request.result = result_to_save
- result_to_return = {**result_to_save, "errors": self.errors}
- self.reset_task_context()
- return result_to_return
-
- def add_processing_error(
- self,
- larq_id: int,
- error_code: LabelAnalysisProcessingErrorCode,
- error_msg: str,
- error_extra: dict,
- ):
- error = LabelAnalysisProcessingError(
- label_analysis_request_id=larq_id,
- error_code=error_code.value,
- error_params=dict(message=error_msg, extra=error_extra),
- )
- self.errors.append(error.to_representation())
- self.dbsession.add(error)
-
- def _handle_larq_already_calculated(self, larq: LabelAnalysisRequest):
- # This means we already calculated everything
- # Except possibly the absent labels
- log.info(
- "Label analysis request was already calculated",
- extra=dict(
- request_id=larq.id,
- external_id=larq.external_id,
- commit=larq.head_commit.commitid,
- ),
- )
- if larq.requested_labels:
- saved_result = larq.result
- all_saved_labels = set(
- saved_result.get("present_report_labels", [])
- + saved_result.get("present_diff_labels", [])
- + saved_result.get("global_level_labels", [])
- )
- executable_lines_saved_labels = set(
- saved_result.get("present_diff_labels", [])
- )
- global_saved_labels = set(saved_result.get("global_level_labels", []))
- result = self.calculate_final_result(
- requested_labels=larq.requested_labels,
- existing_labels=ExistingLabelSetsNotEncoded(
- all_saved_labels, executable_lines_saved_labels, global_saved_labels
- ),
- commit_sha=larq.head_commit.commitid,
- )
- larq.result = result # Save the new result
- metrics.incr("label_analysis_task.already_calculated.new_result")
- return {**result, "success": True, "errors": []}
- # No requested labels mean we don't have any new information
- # So we don't need to calculate again
- # This shouldn't actually happen
- metrics.incr("label_analysis_task.already_calculated.same_result")
- return {**larq.result, "success": True, "errors": []}
-
- def _lookup_label_ids(self, report: Report, label_ids: Set[int]) -> Set[str]:
- labels: Set[str] = set()
- for label_id in label_ids:
- # This can raise shared.reports.exceptions.LabelNotFoundError
- # But (1) we shouldn't let that happen and (2) there's no recovering from it
- # So we should let that happen to surface bugs to us
- labels.add(report.lookup_label_by_id(label_id))
- return labels
-
- def _get_requested_labels(self, label_analysis_request: LabelAnalysisRequest):
- if label_analysis_request.requested_labels:
- return label_analysis_request.requested_labels
- # This is the case where the CLI PATCH the requested labels after collecting them
- self.dbsession.refresh(label_analysis_request, ["requested_labels"])
- return label_analysis_request.requested_labels
-
- @sentry_sdk.trace
- def _get_existing_labels(
- self, report: Report, lines_relevant_to_diff: LinesRelevantToChange
- ) -> ExistingLabelSets:
- all_report_labels = self.get_all_report_labels(report)
- (
- executable_lines_labels,
- global_level_labels,
- ) = self.get_executable_lines_labels(report, lines_relevant_to_diff)
-
- if len(all_report_labels) > 0:
- # Check if report labels are encoded or not
- test_label = all_report_labels.pop()
- are_labels_encoded = isinstance(test_label, int)
- all_report_labels.add(test_label)
- else:
- # There are no labels in the report
- are_labels_encoded = False
-
- class_to_use = (
- ExistingLabelSetsEncoded
- if are_labels_encoded
- else ExistingLabelSetsNotEncoded
- )
-
- return class_to_use(
- all_report_labels=all_report_labels,
- executable_lines_labels=executable_lines_labels,
- global_level_labels=global_level_labels,
- )
-
- @sentry_sdk.trace
- def _get_lines_relevant_to_diff(self, label_analysis_request: LabelAnalysisRequest):
- parsed_git_diff = self._get_parsed_git_diff(label_analysis_request)
- if parsed_git_diff:
- executable_lines_relevant_to_diff = self.get_relevant_executable_lines(
- label_analysis_request, parsed_git_diff
- )
- # This line will be useful for debugging
- # And to tweak the heuristics
- log.info(
- "Lines relevant to diff",
- extra=dict(
- lines_relevant_to_diff=executable_lines_relevant_to_diff,
- commit=label_analysis_request.head_commit.commitid,
- external_id=label_analysis_request.external_id,
- request_id=label_analysis_request.id_,
- ),
- )
- return executable_lines_relevant_to_diff
- return None
-
- @sentry_sdk.trace
- def _get_parsed_git_diff(
- self, label_analysis_request: LabelAnalysisRequest
- ) -> Optional[List[DiffChange]]:
- try:
- repo_service = get_repo_provider_service(
- label_analysis_request.head_commit.repository
- )
- git_diff = async_to_sync(repo_service.get_compare)(
- label_analysis_request.base_commit.commitid,
- label_analysis_request.head_commit.commitid,
- )
- return list(parse_git_diff_json(git_diff))
- except Exception:
- # temporary general catch while we find possible problems on this
- log.exception(
- "Label analysis failed to parse git diff",
- extra=dict(
- request_id=label_analysis_request.id,
- external_id=label_analysis_request.external_id,
- commit=label_analysis_request.head_commit.commitid,
- ),
- )
- self.add_processing_error(
- larq_id=label_analysis_request.id,
- error_code=LabelAnalysisProcessingErrorCode.FAILED,
- error_msg="Failed to parse git diff",
- error_extra=dict(
- head_commit=label_analysis_request.head_commit.commitid,
- base_commit=label_analysis_request.base_commit.commitid,
- ),
- )
- return None
-
- @sentry_sdk.trace
- def _get_base_report(
- self, label_analysis_request: LabelAnalysisRequest
- ) -> Optional[Report]:
- base_commit = label_analysis_request.base_commit
- current_yaml = get_repo_yaml(base_commit.repository)
- report_service = ReportService(current_yaml)
- report: Report = report_service.get_existing_report_for_commit(base_commit)
- if report is None:
- log.warning(
- "No report found for label analysis",
- extra=dict(
- request_id=label_analysis_request.id,
- commit=label_analysis_request.head_commit.commitid,
- ),
- )
- self.add_processing_error(
- larq_id=label_analysis_request.id,
- error_code=LabelAnalysisProcessingErrorCode.MISSING_DATA,
- error_msg="Missing base report",
- error_extra=dict(
- head_commit=label_analysis_request.head_commit.commitid,
- base_commit=label_analysis_request.base_commit.commitid,
- ),
- )
- return report
-
- @sentry_sdk.trace
- def calculate_final_result(
- self,
- *,
- requested_labels: Optional[List[str]],
- existing_labels: ExistingLabelSetsNotEncoded,
- commit_sha: str,
- ):
- all_report_labels = existing_labels.all_report_labels
- executable_lines_labels = existing_labels.executable_lines_labels
- global_level_labels = existing_labels.global_level_labels
- log.info(
- "Final info",
- extra=dict(
- executable_lines_labels=sorted(executable_lines_labels),
- all_report_labels=all_report_labels,
- requested_labels=requested_labels,
- global_level_labels=sorted(global_level_labels),
- commit=commit_sha,
- ),
- )
- if requested_labels is not None:
- requested_labels = set(requested_labels)
- ans = {
- "present_report_labels": sorted(all_report_labels & requested_labels),
- "present_diff_labels": sorted(
- executable_lines_labels & requested_labels
- ),
- "absent_labels": sorted(requested_labels - all_report_labels),
- "global_level_labels": sorted(global_level_labels & requested_labels),
- }
- return ans
- return {
- "present_report_labels": sorted(all_report_labels),
- "present_diff_labels": sorted(executable_lines_labels),
- "absent_labels": [],
- "global_level_labels": sorted(global_level_labels),
- }
-
- @sentry_sdk.trace
- def get_relevant_executable_lines(
- self, label_analysis_request: LabelAnalysisRequest, parsed_git_diff
- ):
- db_session = label_analysis_request.get_db_session()
- base_static_analysis: StaticAnalysisSuite = (
- db_session.query(StaticAnalysisSuite)
- .filter(
- StaticAnalysisSuite.commit_id == label_analysis_request.base_commit_id,
- )
- .first()
- )
- head_static_analysis: StaticAnalysisSuite = (
- db_session.query(StaticAnalysisSuite)
- .filter(
- StaticAnalysisSuite.commit_id == label_analysis_request.head_commit_id,
- )
- .first()
- )
- if not base_static_analysis or not head_static_analysis:
- # TODO : Proper handling of this case
- log.info(
- "Trying to make prediction where there are no static analyses",
- extra=dict(
- base_static_analysis=base_static_analysis.id_
- if base_static_analysis is not None
- else None,
- head_static_analysis=head_static_analysis.id_
- if head_static_analysis is not None
- else None,
- commit=label_analysis_request.head_commit.commitid,
- ),
- )
- self.add_processing_error(
- larq_id=label_analysis_request.id,
- error_code=LabelAnalysisProcessingErrorCode.MISSING_DATA,
- error_msg="Missing static analysis info",
- error_extra=dict(
- head_commit=label_analysis_request.head_commit.commitid,
- base_commit=label_analysis_request.base_commit.commitid,
- has_base_static_analysis=(base_static_analysis is not None),
- has_head_static_analysis=(head_static_analysis is not None),
- ),
- )
- return None
- static_analysis_comparison_service = StaticAnalysisComparisonService(
- base_static_analysis,
- head_static_analysis,
- parsed_git_diff,
- )
- return static_analysis_comparison_service.get_base_lines_relevant_to_change()
-
- @sentry_sdk.trace
- def get_executable_lines_labels(
- self, report: Report, executable_lines: LinesRelevantToChange
- ) -> Tuple[PossiblyEncodedLabelSet, PossiblyEncodedLabelSet]:
- if executable_lines["all"]:
- return (self.get_all_report_labels(report), set())
- full_sessions = set()
- labels: PossiblyEncodedLabelSet = set()
- global_level_labels = set()
- # Prime piece of code to be rust-ifyied
- for name, file_executable_lines in executable_lines["files"].items():
- rf = report.get(name)
- if rf and file_executable_lines:
- if file_executable_lines["all"]:
- for line_number, line in rf.lines:
- if line and line.datapoints:
- for datapoint in line.datapoints:
- dp_labels = datapoint.label_ids or []
- labels.update(dp_labels)
- if (
- # If labels are encoded
- GLOBAL_LEVEL_LABEL_IDX in dp_labels
- # If labels are NOT encoded
- or GLOBAL_LEVEL_LABEL in dp_labels
- ):
- full_sessions.add(datapoint.sessionid)
- else:
- for line_number in file_executable_lines["lines"]:
- line = rf.get(line_number)
- if line and line.datapoints:
- for datapoint in line.datapoints:
- dp_labels = datapoint.label_ids or []
- labels.update(dp_labels)
- if (
- # If labels are encoded
- GLOBAL_LEVEL_LABEL_IDX in dp_labels
- # If labels are NOT encoded
- or GLOBAL_LEVEL_LABEL in dp_labels
- ):
- full_sessions.add(datapoint.sessionid)
- for sess_id in full_sessions:
- global_level_labels.update(self.get_labels_per_session(report, sess_id))
- return (
- labels - set([GLOBAL_LEVEL_LABEL_IDX, GLOBAL_LEVEL_LABEL]),
- global_level_labels,
- )
-
- def get_labels_per_session(self, report: Report, sess_id: int):
- return get_labels_per_session(report, sess_id)
-
- def get_all_report_labels(self, report: Report) -> set:
- return get_all_report_labels(report)
-
-
-RegisteredLabelAnalysisRequestProcessingTask = celery_app.register_task(
- LabelAnalysisRequestProcessingTask()
-)
-label_analysis_task = celery_app.tasks[
- RegisteredLabelAnalysisRequestProcessingTask.name
-]
diff --git a/tasks/static_analysis_suite_check.py b/tasks/static_analysis_suite_check.py
deleted file mode 100644
index 10168431b..000000000
--- a/tasks/static_analysis_suite_check.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import logging
-from typing import Optional
-
-from shared.celery_config import static_analysis_task_name
-from shared.staticanalysis import StaticAnalysisSingleFileSnapshotState
-from shared.storage.exceptions import FileNotInStorageError
-
-from app import celery_app
-from database.models.staticanalysis import (
- StaticAnalysisSingleFileSnapshot,
- StaticAnalysisSuite,
- StaticAnalysisSuiteFilepath,
-)
-from services.archive import ArchiveService
-from tasks.base import BaseCodecovTask
-
-log = logging.getLogger(__name__)
-
-
-class StaticAnalysisSuiteCheckTask(BaseCodecovTask, name=static_analysis_task_name):
- def run_impl(
- self,
- db_session,
- *,
- suite_id,
- **kwargs,
- ):
- suite: Optional[StaticAnalysisSuite] = (
- db_session.query(StaticAnalysisSuite).filter_by(id_=suite_id).first()
- )
- if suite is None:
- log.warning("Checking Static Analysis that does not exist yet")
- return {"successful": False, "changed_count": None}
- log.info("Checking static analysis suite", extra=dict(suite_id=suite_id))
- query = (
- db_session.query(
- StaticAnalysisSingleFileSnapshot,
- StaticAnalysisSingleFileSnapshot.content_location,
- )
- .join(
- StaticAnalysisSuiteFilepath,
- StaticAnalysisSuiteFilepath.file_snapshot_id
- == StaticAnalysisSingleFileSnapshot.id_,
- )
- .filter(
- StaticAnalysisSuiteFilepath.analysis_suite_id == suite_id,
- StaticAnalysisSingleFileSnapshot.state_id
- == StaticAnalysisSingleFileSnapshotState.CREATED.db_id,
- )
- )
- archive_service = ArchiveService(suite.commit.repository)
- # purposefully iteration when an update would suffice,
- # because we actually want to validate different stuff
- changed_count = 0
- for elem, content_location in query:
- try:
- _ = archive_service.read_file(content_location)
- elem.state_id = StaticAnalysisSingleFileSnapshotState.VALID.db_id
- changed_count += 1
- except FileNotInStorageError:
- log.warning(
- "File not found to be analyzed",
- extra=dict(filepath_id=elem.id, suite_id=suite_id),
- )
-
- db_session.commit()
- return {"successful": True, "changed_count": changed_count}
-
-
-RegisteredStaticAnalysisSuiteCheckTask = celery_app.register_task(
- StaticAnalysisSuiteCheckTask()
-)
-static_analysis_suite_check_task = celery_app.tasks[
- RegisteredStaticAnalysisSuiteCheckTask.name
-]
diff --git a/tasks/tests/unit/test_check_static_analysis.py b/tasks/tests/unit/test_check_static_analysis.py
deleted file mode 100644
index 418d3401e..000000000
--- a/tasks/tests/unit/test_check_static_analysis.py
+++ /dev/null
@@ -1,89 +0,0 @@
-from shared.staticanalysis import StaticAnalysisSingleFileSnapshotState
-
-from database.tests.factories.staticanalysis import (
- StaticAnalysisSuiteFactory,
- StaticAnalysisSuiteFilepathFactory,
-)
-from tasks.static_analysis_suite_check import StaticAnalysisSuiteCheckTask
-
-
-class TestStaticAnalysisCheckTask(object):
- def test_simple_call_no_object_saved(self, dbsession):
- task = StaticAnalysisSuiteCheckTask()
- res = task.run_impl(dbsession, suite_id=987654321 * 7)
- assert res == {"changed_count": None, "successful": False}
-
- def test_simple_call_with_suite_all_created(
- self, dbsession, mock_storage, mock_configuration, mocker
- ):
- obj = StaticAnalysisSuiteFactory.create()
- dbsession.add(obj)
- dbsession.flush()
- task = StaticAnalysisSuiteCheckTask()
- for i in range(8):
- fp_obj = StaticAnalysisSuiteFilepathFactory.create(
- analysis_suite=obj,
- file_snapshot__state_id=StaticAnalysisSingleFileSnapshotState.CREATED.db_id,
- )
- mock_storage.write_file(
- mock_configuration.params["services"]["minio"]["bucket"],
- fp_obj.file_snapshot.content_location,
- "aaaa",
- )
- dbsession.add(fp_obj)
- # adding one without writing
- fp_obj = StaticAnalysisSuiteFilepathFactory.create(
- analysis_suite=obj,
- file_snapshot__state_id=StaticAnalysisSingleFileSnapshotState.CREATED.db_id,
- )
- dbsession.add(fp_obj)
- dbsession.flush()
- res = task.run_impl(dbsession, suite_id=obj.id_)
- assert res == {"changed_count": 8, "successful": True}
-
- def test_simple_call_with_suite_mix_from_other(
- self, dbsession, mock_storage, mock_configuration, mocker
- ):
- obj = StaticAnalysisSuiteFactory.create()
- another_obj_same_repo = StaticAnalysisSuiteFactory.create(
- commit__repository=obj.commit.repository
- )
- dbsession.add(obj)
- dbsession.flush()
- task = StaticAnalysisSuiteCheckTask()
- for i in range(17):
- fp_obj = StaticAnalysisSuiteFilepathFactory.create(
- analysis_suite=another_obj_same_repo,
- file_snapshot__state_id=StaticAnalysisSingleFileSnapshotState.CREATED.db_id,
- )
- mock_storage.write_file(
- mock_configuration.params["services"]["minio"]["bucket"],
- fp_obj.file_snapshot.content_location,
- "aaaa",
- )
- dbsession.add(fp_obj)
- for i in range(23):
- fp_obj = StaticAnalysisSuiteFilepathFactory.create(
- analysis_suite=obj,
- file_snapshot__state_id=StaticAnalysisSingleFileSnapshotState.CREATED.db_id,
- )
- mock_storage.write_file(
- mock_configuration.params["services"]["minio"]["bucket"],
- fp_obj.file_snapshot.content_location,
- "aaaa",
- )
- dbsession.add(fp_obj)
- for i in range(2):
- fp_obj = StaticAnalysisSuiteFilepathFactory.create(
- analysis_suite=obj,
- file_snapshot__state_id=StaticAnalysisSingleFileSnapshotState.VALID.db_id,
- )
- mock_storage.write_file(
- mock_configuration.params["services"]["minio"]["bucket"],
- fp_obj.file_snapshot.content_location,
- "aaaa",
- )
- dbsession.add(fp_obj)
- dbsession.flush()
- res = task.run_impl(dbsession, suite_id=obj.id_)
- assert res == {"changed_count": 23, "successful": True}
diff --git a/tasks/tests/unit/test_label_analysis.py b/tasks/tests/unit/test_label_analysis.py
deleted file mode 100644
index 5ebae7a28..000000000
--- a/tasks/tests/unit/test_label_analysis.py
+++ /dev/null
@@ -1,964 +0,0 @@
-import json
-
-import pytest
-from mock import patch
-from shared.reports.reportfile import ReportFile
-from shared.reports.resources import Report
-from shared.reports.types import CoverageDatapoint, LineSession, ReportLine
-
-from database.models.labelanalysis import LabelAnalysisRequest
-from database.tests.factories import RepositoryFactory
-from database.tests.factories.labelanalysis import LabelAnalysisRequestFactory
-from database.tests.factories.staticanalysis import (
- StaticAnalysisSingleFileSnapshotFactory,
- StaticAnalysisSuiteFactory,
- StaticAnalysisSuiteFilepathFactory,
-)
-from services.report import ReportService
-from services.static_analysis import StaticAnalysisComparisonService
-from tasks.label_analysis import (
- LabelAnalysisRequestProcessingTask,
- LabelAnalysisRequestState,
-)
-
-sample_head_static_analysis_dict = {
- "empty_lines": [2, 3, 11],
- "warnings": [],
- "filename": "source.py",
- "functions": [
- {
- "identifier": "some_function",
- "start_line": 6,
- "end_line": 10,
- "code_hash": "e69c18eff7d24f8bad3370db87f64333",
- "complexity_metrics": {
- "conditions": 1,
- "mccabe_cyclomatic_complexity": 2,
- "returns": 1,
- "max_nested_conditional": 1,
- },
- }
- ],
- "hash": "84d371ab1c57d2349038ac3671428803",
- "language": "python",
- "number_lines": 11,
- "statements": [
- (
- 1,
- {
- "line_surety_ancestorship": None,
- "start_column": 0,
- "line_hash": "55c30cf01e202728b6952e9cba304798",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 5,
- {
- "line_surety_ancestorship": None,
- "start_column": 4,
- "line_hash": "1d7be9f2145760a59513a4049fcd0d1c",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 6,
- {
- "line_surety_ancestorship": 5,
- "start_column": 4,
- "line_hash": "f802087a854c26782ee8d4ece7214425",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 7,
- {
- "line_surety_ancestorship": None,
- "start_column": 8,
- "line_hash": "6ae3393fa7880fe8a844c03256cac37b",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 8,
- {
- "line_surety_ancestorship": 6,
- "start_column": 4,
- "line_hash": "5b099d1822e9236c540a5701a657225e",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 9,
- {
- "line_surety_ancestorship": 8,
- "start_column": 4,
- "line_hash": "e5d4915bb7dddeb18f53dc9fde9a3064",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 10,
- {
- "line_surety_ancestorship": 9,
- "start_column": 4,
- "line_hash": "e70ce43136171575ee525375b10f91a1",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- ],
- "definition_lines": [(4, 6)],
- "import_lines": [],
-}
-
-sample_base_static_analysis_dict = {
- "empty_lines": [2, 3, 11],
- "warnings": [],
- "filename": "source.py",
- "functions": [
- {
- "identifier": "some_function",
- "start_line": 6,
- "end_line": 10,
- "code_hash": "e4b52b6da12184142fcd7ff2c8412662",
- "complexity_metrics": {
- "conditions": 1,
- "mccabe_cyclomatic_complexity": 2,
- "returns": 1,
- "max_nested_conditional": 1,
- },
- }
- ],
- "hash": "811d0016249a5b1400a685164e5295de",
- "language": "python",
- "number_lines": 11,
- "statements": [
- (
- 1,
- {
- "line_surety_ancestorship": None,
- "start_column": 0,
- "line_hash": "55c30cf01e202728b6952e9cba304798",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 5,
- {
- "line_surety_ancestorship": None,
- "start_column": 4,
- "line_hash": "1d7be9f2145760a59513a4049fcd0d1c",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 6,
- {
- "line_surety_ancestorship": 5,
- "start_column": 4,
- "line_hash": "52f98812dca4687f18373b87433df695",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 7,
- {
- "line_surety_ancestorship": None,
- "start_column": 8,
- "line_hash": "6ae3393fa7880fe8a844c03256cac37b",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 8,
- {
- "line_surety_ancestorship": 7,
- "start_column": 8,
- "line_hash": "5b099d1822e9236c540a5701a657225e",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 9,
- {
- "line_surety_ancestorship": 6,
- "start_column": 4,
- "line_hash": "e5d4915bb7dddeb18f53dc9fde9a3064",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 10,
- {
- "line_surety_ancestorship": 9,
- "start_column": 4,
- "line_hash": "e70ce43136171575ee525375b10f91a1",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- ],
- "definition_lines": [(4, 6)],
- "import_lines": [],
-}
-
-
-@pytest.fixture
-def sample_report_with_labels():
- r = Report()
- first_rf = ReportFile("source.py")
- first_rf.append(
- 5,
- ReportLine.create(
- coverage=1,
- type=None,
- sessions=[
- (
- LineSession(
- id=1,
- coverage=1,
- )
- )
- ],
- datapoints=[
- CoverageDatapoint(
- sessionid=1,
- coverage=1,
- coverage_type=None,
- label_ids=["apple", "label_one", "pineapple", "banana"],
- )
- ],
- complexity=None,
- ),
- )
- first_rf.append(
- 6,
- ReportLine.create(
- coverage=1,
- type=None,
- sessions=[
- (
- LineSession(
- id=1,
- coverage=1,
- )
- )
- ],
- datapoints=[
- CoverageDatapoint(
- sessionid=1,
- coverage=1,
- coverage_type=None,
- label_ids=["label_one", "pineapple", "banana"],
- )
- ],
- complexity=None,
- ),
- )
- first_rf.append(
- 7,
- ReportLine.create(
- coverage=1,
- type=None,
- sessions=[
- (
- LineSession(
- id=1,
- coverage=1,
- )
- )
- ],
- datapoints=[
- CoverageDatapoint(
- sessionid=1,
- coverage=1,
- coverage_type=None,
- label_ids=["banana"],
- )
- ],
- complexity=None,
- ),
- )
- first_rf.append(
- 8,
- ReportLine.create(
- coverage=1,
- type=None,
- sessions=[
- (
- LineSession(
- id=1,
- coverage=1,
- )
- )
- ],
- datapoints=[
- CoverageDatapoint(
- sessionid=1,
- coverage=1,
- coverage_type=None,
- label_ids=["banana"],
- ),
- CoverageDatapoint(
- sessionid=5,
- coverage=1,
- coverage_type=None,
- label_ids=["orangejuice"],
- ),
- ],
- complexity=None,
- ),
- )
- first_rf.append(
- 99,
- ReportLine.create(
- coverage=1,
- type=None,
- sessions=[
- (
- LineSession(
- id=5,
- coverage=1,
- )
- )
- ],
- datapoints=[
- CoverageDatapoint(
- sessionid=5,
- coverage=1,
- coverage_type=None,
- label_ids=["justjuice"],
- ),
- ],
- complexity=None,
- ),
- )
- first_rf.append(
- 8,
- ReportLine.create(
- coverage=1,
- type=None,
- sessions=[
- (
- LineSession(
- id=1,
- coverage=1,
- )
- )
- ],
- datapoints=[
- CoverageDatapoint(
- sessionid=1,
- coverage=1,
- coverage_type=None,
- label_ids=["label_one", "pineapple", "banana"],
- ),
- CoverageDatapoint(
- sessionid=5,
- coverage=1,
- coverage_type=None,
- label_ids=["Th2dMtk4M_codecov", "applejuice"],
- ),
- ],
- complexity=None,
- ),
- )
- second_rf = ReportFile("path/from/additionsonly.py")
- second_rf.append(
- 6,
- ReportLine.create(
- coverage=1,
- type=None,
- sessions=[
- (
- LineSession(
- id=1,
- coverage=1,
- )
- )
- ],
- datapoints=[
- CoverageDatapoint(
- sessionid=1,
- coverage=1,
- coverage_type=None,
- label_ids=["whatever", "here"],
- )
- ],
- complexity=None,
- ),
- )
- random_rf = ReportFile("path/from/randomfile_no_static_analysis.html")
- random_rf.append(
- 1,
- ReportLine.create(
- coverage=1,
- type=None,
- sessions=[(LineSession(id=1, coverage=1))],
- datapoints=None,
- complexity=None,
- ),
- )
- r.append(first_rf)
- r.append(second_rf)
- r.append(random_rf)
-
- return r
-
-
-def test_simple_call_without_requested_labels_then_with_requested_labels(
- dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider
-):
- mock_metrics = mocker.patch("tasks.label_analysis.metrics")
- mocker.patch.object(
- LabelAnalysisRequestProcessingTask,
- "_get_lines_relevant_to_diff",
- return_value={
- "all": False,
- "files": {"source.py": {"all": False, "lines": {8, 6}}},
- },
- )
- mocker.patch.object(
- ReportService,
- "get_existing_report_for_commit",
- return_value=sample_report_with_labels,
- )
- repository = RepositoryFactory.create()
- larf = LabelAnalysisRequestFactory.create(
- base_commit__repository=repository, head_commit__repository=repository
- )
- dbsession.add(larf)
- dbsession.flush()
- base_sasf = StaticAnalysisSuiteFactory.create(commit=larf.base_commit)
- head_sasf = StaticAnalysisSuiteFactory.create(commit=larf.head_commit)
- dbsession.add(base_sasf)
- dbsession.add(head_sasf)
- dbsession.flush()
- first_path = "abdkasdauchudh.txt"
- second_path = "0diao9u3qdsdu.txt"
- mock_storage.write_file(
- "archive",
- first_path,
- json.dumps(sample_base_static_analysis_dict),
- )
- mock_storage.write_file(
- "archive",
- second_path,
- json.dumps(sample_head_static_analysis_dict),
- )
- first_snapshot = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository, content_location=first_path
- )
- second_snapshot = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository, content_location=second_path
- )
- dbsession.add(first_snapshot)
- dbsession.add(second_snapshot)
- dbsession.flush()
- first_base_file = StaticAnalysisSuiteFilepathFactory.create(
- file_snapshot=first_snapshot,
- analysis_suite=base_sasf,
- filepath="source.py",
- )
- first_head_file = StaticAnalysisSuiteFilepathFactory.create(
- file_snapshot=second_snapshot,
- analysis_suite=head_sasf,
- filepath="source.py",
- )
- dbsession.add(first_base_file)
- dbsession.add(first_head_file)
- dbsession.flush()
-
- task = LabelAnalysisRequestProcessingTask()
- res = task.run_impl(dbsession, larf.id)
- expected_present_report_labels = [
- "apple",
- "applejuice",
- "banana",
- "here",
- "justjuice",
- "label_one",
- "orangejuice",
- "pineapple",
- "whatever",
- ]
- expected_present_diff_labels = sorted(
- ["applejuice", "banana", "label_one", "orangejuice", "pineapple"]
- )
- expected_result = {
- "absent_labels": [],
- "present_diff_labels": expected_present_diff_labels,
- "present_report_labels": expected_present_report_labels,
- "global_level_labels": ["applejuice", "justjuice", "orangejuice"],
- "success": True,
- "errors": [],
- }
- assert res == expected_result
- mock_metrics.incr.assert_called_with("label_analysis_task.success")
- dbsession.flush()
- dbsession.refresh(larf)
- assert larf.state_id == LabelAnalysisRequestState.FINISHED.db_id
- assert larf.result == {
- "absent_labels": [],
- "present_diff_labels": expected_present_diff_labels,
- "present_report_labels": expected_present_report_labels,
- "global_level_labels": ["applejuice", "justjuice", "orangejuice"],
- }
- # Now we call the task again, this time with the requested labels.
- # This illustrates what should happen if we patch the labels after calculating
- # And trigger the task again to save the new results
- larf.requested_labels = ["tangerine", "pear", "banana", "apple"]
- dbsession.flush()
- res = task.run_impl(dbsession, larf.id)
- expected_present_diff_labels = ["banana"]
- expected_present_report_labels = ["apple", "banana"]
- expected_absent_labels = ["pear", "tangerine"]
- assert res == {
- "absent_labels": expected_absent_labels,
- "present_diff_labels": expected_present_diff_labels,
- "present_report_labels": expected_present_report_labels,
- "success": True,
- "global_level_labels": [],
- "errors": [],
- }
- assert larf.result == {
- "absent_labels": expected_absent_labels,
- "present_diff_labels": expected_present_diff_labels,
- "present_report_labels": expected_present_report_labels,
- "global_level_labels": [],
- }
- mock_metrics.incr.assert_called_with(
- "label_analysis_task.already_calculated.new_result"
- )
-
-
-def test_simple_call_with_requested_labels(
- dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider
-):
- mock_metrics = mocker.patch("tasks.label_analysis.metrics")
- mocker.patch.object(
- LabelAnalysisRequestProcessingTask,
- "_get_lines_relevant_to_diff",
- return_value={
- "all": False,
- "files": {"source.py": {"all": False, "lines": {8, 6}}},
- },
- )
- mocker.patch.object(
- ReportService,
- "get_existing_report_for_commit",
- return_value=sample_report_with_labels,
- )
- larf = LabelAnalysisRequestFactory.create(
- requested_labels=["tangerine", "pear", "banana", "apple"]
- )
- dbsession.add(larf)
- dbsession.flush()
- task = LabelAnalysisRequestProcessingTask()
- res = task.run_impl(dbsession, larf.id)
- expected_present_diff_labels = ["banana"]
- expected_present_report_labels = ["apple", "banana"]
- expected_absent_labels = ["pear", "tangerine"]
- assert res == {
- "absent_labels": expected_absent_labels,
- "present_diff_labels": expected_present_diff_labels,
- "present_report_labels": expected_present_report_labels,
- "success": True,
- "global_level_labels": [],
- "errors": [],
- }
- dbsession.flush()
- dbsession.refresh(larf)
- assert larf.state_id == LabelAnalysisRequestState.FINISHED.db_id
- assert larf.result == {
- "absent_labels": expected_absent_labels,
- "present_diff_labels": expected_present_diff_labels,
- "present_report_labels": expected_present_report_labels,
- "global_level_labels": [],
- }
- mock_metrics.incr.assert_called_with("label_analysis_task.success")
-
-
-def test_get_requested_labels(dbsession, mocker):
- larf = LabelAnalysisRequestFactory.create(requested_labels=[])
-
- def side_effect(*args, **kwargs):
- larf.requested_labels = ["tangerine", "pear", "banana", "apple"]
-
- mock_refresh = mocker.patch.object(dbsession, "refresh", side_effect=side_effect)
- dbsession.add(larf)
- dbsession.flush()
- task = LabelAnalysisRequestProcessingTask()
- task.dbsession = dbsession
- labels = task._get_requested_labels(larf)
- mock_refresh.assert_called()
- assert labels == ["tangerine", "pear", "banana", "apple"]
-
-
-def test_call_label_analysis_no_request_object(dbsession, mocker):
- task = LabelAnalysisRequestProcessingTask()
- mock_metrics = mocker.patch("tasks.label_analysis.metrics")
- res = task.run_impl(db_session=dbsession, request_id=-1)
- assert res == {
- "success": False,
- "present_report_labels": [],
- "present_diff_labels": [],
- "absent_labels": [],
- "global_level_labels": [],
- "errors": [
- {
- "error_code": "not found",
- "error_params": {
- "extra": {},
- "message": "LabelAnalysisRequest not found",
- },
- }
- ],
- }
- mock_metrics.incr.assert_called_with(
- "label_analysis_task.failed_to_calculate.larq_not_found"
- )
-
-
-def test_get_executable_lines_labels_all_labels(sample_report_with_labels):
- executable_lines = {"all": True}
- task = LabelAnalysisRequestProcessingTask()
- assert task.get_executable_lines_labels(
- sample_report_with_labels, executable_lines
- ) == (
- {
- "banana",
- "justjuice",
- "here",
- "pineapple",
- "applejuice",
- "apple",
- "whatever",
- "label_one",
- "orangejuice",
- },
- set(),
- )
- assert task.get_executable_lines_labels(
- sample_report_with_labels, executable_lines
- ) == (task.get_all_report_labels(sample_report_with_labels), set())
-
-
-def test_get_executable_lines_labels_all_labels_in_one_file(sample_report_with_labels):
- executable_lines = {"all": False, "files": {"source.py": {"all": True}}}
- task = LabelAnalysisRequestProcessingTask()
- assert task.get_executable_lines_labels(
- sample_report_with_labels, executable_lines
- ) == (
- {
- "apple",
- "justjuice",
- "applejuice",
- "label_one",
- "banana",
- "orangejuice",
- "pineapple",
- },
- {"orangejuice", "justjuice", "applejuice"},
- )
-
-
-def test_get_executable_lines_labels_some_labels_in_one_file(sample_report_with_labels):
- executable_lines = {
- "all": False,
- "files": {"source.py": {"all": False, "lines": set([5, 6])}},
- }
- task = LabelAnalysisRequestProcessingTask()
- assert task.get_executable_lines_labels(
- sample_report_with_labels, executable_lines
- ) == (
- {"apple", "label_one", "pineapple", "banana"},
- set(),
- )
-
-
-def test_get_executable_lines_labels_some_labels_in_one_file_with_globals(
- sample_report_with_labels,
-):
- executable_lines = {
- "all": False,
- "files": {"source.py": {"all": False, "lines": set([6, 8])}},
- }
- task = LabelAnalysisRequestProcessingTask()
- assert task.get_executable_lines_labels(
- sample_report_with_labels, executable_lines
- ) == (
- {"label_one", "pineapple", "banana", "orangejuice", "applejuice"},
- {"applejuice", "justjuice", "orangejuice"},
- )
-
-
-def test_get_executable_lines_labels_some_labels_in_one_file_other_null(
- sample_report_with_labels,
-):
- executable_lines = {
- "all": False,
- "files": {
- "source.py": {"all": False, "lines": set([5, 6])},
- "path/from/randomfile_no_static_analysis.html": None,
- },
- }
- task = LabelAnalysisRequestProcessingTask()
- assert task.get_executable_lines_labels(
- sample_report_with_labels, executable_lines
- ) == (
- {"apple", "label_one", "pineapple", "banana"},
- set(),
- )
-
-
-def test_get_all_labels_one_session(sample_report_with_labels):
- task = LabelAnalysisRequestProcessingTask()
- assert task.get_labels_per_session(sample_report_with_labels, 1) == {
- "apple",
- "banana",
- "here",
- "label_one",
- "pineapple",
- "whatever",
- }
- assert task.get_labels_per_session(sample_report_with_labels, 2) == set()
- assert task.get_labels_per_session(sample_report_with_labels, 5) == {
- "orangejuice",
- "justjuice",
- "applejuice",
- }
-
-
-def test_get_relevant_executable_lines_nothing_found(dbsession, mocker):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- larf = LabelAnalysisRequestFactory.create(
- base_commit__repository=repository, head_commit__repository=repository
- )
- dbsession.add(larf)
- dbsession.flush()
- task = LabelAnalysisRequestProcessingTask()
- task.errors = []
- task.dbsession = dbsession
- parsed_git_diff = []
- assert task.get_relevant_executable_lines(larf, parsed_git_diff) is None
-
-
-def test_get_relevant_executable_lines_with_static_analyses(dbsession, mocker):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- larf = LabelAnalysisRequestFactory.create(
- base_commit__repository=repository, head_commit__repository=repository
- )
- dbsession.add(larf)
- dbsession.flush()
- base_sasf = StaticAnalysisSuiteFactory.create(commit=larf.base_commit)
- head_sasf = StaticAnalysisSuiteFactory.create(commit=larf.head_commit)
- dbsession.add(base_sasf)
- dbsession.add(head_sasf)
- dbsession.flush()
- task = LabelAnalysisRequestProcessingTask()
- parsed_git_diff = []
- mocked_res = mocker.patch.object(
- StaticAnalysisComparisonService, "get_base_lines_relevant_to_change"
- )
- assert (
- task.get_relevant_executable_lines(larf, parsed_git_diff)
- == mocked_res.return_value
- )
-
-
-def test_run_impl_with_error(
- dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider
-):
- mock_metrics = mocker.patch("tasks.label_analysis.metrics")
- mocker.patch.object(
- LabelAnalysisRequestProcessingTask,
- "_get_lines_relevant_to_diff",
- side_effect=Exception("Oh no"),
- )
- larf = LabelAnalysisRequestFactory.create(
- requested_labels=["tangerine", "pear", "banana", "apple"]
- )
- dbsession.add(larf)
- dbsession.flush()
- task = LabelAnalysisRequestProcessingTask()
- res = task.run_impl(dbsession, larf.id)
- expected_result = {
- "absent_labels": [],
- "present_diff_labels": [],
- "present_report_labels": [],
- "success": False,
- "global_level_labels": [],
- "errors": [
- {
- "error_code": "failed",
- "error_params": {"extra": {}, "message": "Failed to calculate"},
- }
- ],
- }
- assert res == expected_result
- dbsession.flush()
- dbsession.refresh(larf)
- assert larf.state_id == LabelAnalysisRequestState.ERROR.db_id
- assert larf.result is None
- mock_metrics.incr.assert_called_with(
- "label_analysis_task.failed_to_calculate.exception"
- )
-
-
-def test_calculate_result_no_report(
- dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider
-):
- mock_metrics = mocker.patch("tasks.label_analysis.metrics")
- larf: LabelAnalysisRequest = LabelAnalysisRequestFactory.create(
- # This being not-ordered is important in the test
- # TO make sure we go through the warning at the bottom of run_impl
- requested_labels=["tangerine", "pear", "banana", "apple"]
- )
- dbsession.add(larf)
- dbsession.flush()
- mocker.patch.object(
- ReportService,
- "get_existing_report_for_commit",
- return_value=None,
- )
- mocker.patch.object(
- LabelAnalysisRequestProcessingTask,
- "_get_lines_relevant_to_diff",
- return_value=(set(), set(), set()),
- )
- task = LabelAnalysisRequestProcessingTask()
- res = task.run_impl(dbsession, larf.id)
- assert res == {
- "success": True,
- "absent_labels": larf.requested_labels,
- "present_diff_labels": [],
- "present_report_labels": [],
- "global_level_labels": [],
- "errors": [
- {
- "error_code": "missing data",
- "error_params": {
- "extra": {
- "base_commit": larf.base_commit.commitid,
- "head_commit": larf.head_commit.commitid,
- },
- "message": "Missing base report",
- },
- }
- ],
- }
- mock_metrics.incr.assert_called_with(
- "label_analysis_task.failed_to_calculate.missing_info"
- )
-
-
-@patch("tasks.label_analysis.parse_git_diff_json", return_value=["parsed_git_diff"])
-def test__get_parsed_git_diff(mock_parse_diff, dbsession, mock_repo_provider):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- larq = LabelAnalysisRequestFactory.create(
- base_commit__repository=repository, head_commit__repository=repository
- )
- dbsession.add(larq)
- dbsession.flush()
- mock_repo_provider.get_compare.return_value = {"diff": "json"}
- task = LabelAnalysisRequestProcessingTask()
- task.errors = []
- parsed_diff = task._get_parsed_git_diff(larq)
- assert parsed_diff == ["parsed_git_diff"]
- mock_parse_diff.assert_called_with({"diff": "json"})
- mock_repo_provider.get_compare.assert_called_with(
- larq.base_commit.commitid, larq.head_commit.commitid
- )
-
-
-@patch("tasks.label_analysis.parse_git_diff_json", return_value=["parsed_git_diff"])
-def test__get_parsed_git_diff_error(mock_parse_diff, dbsession, mock_repo_provider):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- larq = LabelAnalysisRequestFactory.create(
- base_commit__repository=repository, head_commit__repository=repository
- )
- dbsession.add(larq)
- dbsession.flush()
- mock_repo_provider.get_compare.side_effect = Exception("Oh no")
- task = LabelAnalysisRequestProcessingTask()
- task.errors = []
- task.dbsession = dbsession
- parsed_diff = task._get_parsed_git_diff(larq)
- assert parsed_diff is None
- mock_parse_diff.assert_not_called()
- mock_repo_provider.get_compare.assert_called_with(
- larq.base_commit.commitid, larq.head_commit.commitid
- )
-
-
-@patch(
- "tasks.label_analysis.LabelAnalysisRequestProcessingTask.get_relevant_executable_lines",
- return_value=[{"all": False, "files": {}}],
-)
-@patch(
- "tasks.label_analysis.LabelAnalysisRequestProcessingTask._get_parsed_git_diff",
- return_value=["parsed_git_diff"],
-)
-def test__get_lines_relevant_to_diff(
- mock_parse_diff, mock_get_relevant_lines, dbsession
-):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- larq = LabelAnalysisRequestFactory.create(
- base_commit__repository=repository, head_commit__repository=repository
- )
- dbsession.add(larq)
- dbsession.flush()
- task = LabelAnalysisRequestProcessingTask()
- lines = task._get_lines_relevant_to_diff(larq)
- assert lines == [{"all": False, "files": {}}]
- mock_parse_diff.assert_called_with(larq)
- mock_get_relevant_lines.assert_called_with(larq, ["parsed_git_diff"])
-
-
-@patch(
- "tasks.label_analysis.LabelAnalysisRequestProcessingTask.get_relevant_executable_lines"
-)
-@patch(
- "tasks.label_analysis.LabelAnalysisRequestProcessingTask._get_parsed_git_diff",
- return_value=None,
-)
-def test__get_lines_relevant_to_diff_error(
- mock_parse_diff, mock_get_relevant_lines, dbsession
-):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- larq = LabelAnalysisRequestFactory.create(
- base_commit__repository=repository, head_commit__repository=repository
- )
- dbsession.add(larq)
- dbsession.flush()
- task = LabelAnalysisRequestProcessingTask()
- lines = task._get_lines_relevant_to_diff(larq)
- assert lines is None
- mock_parse_diff.assert_called_with(larq)
- mock_get_relevant_lines.assert_not_called()
diff --git a/tasks/tests/unit/test_label_analysis_encoded_labels.py b/tasks/tests/unit/test_label_analysis_encoded_labels.py
deleted file mode 100644
index 56d78984c..000000000
--- a/tasks/tests/unit/test_label_analysis_encoded_labels.py
+++ /dev/null
@@ -1,1010 +0,0 @@
-import json
-
-import pytest
-from mock import MagicMock, patch
-from shared.reports.reportfile import ReportFile
-from shared.reports.resources import Report
-from shared.reports.types import CoverageDatapoint, LineSession, ReportLine
-
-from database.models.labelanalysis import LabelAnalysisRequest
-from database.tests.factories import RepositoryFactory
-from database.tests.factories.core import ReportFactory
-from database.tests.factories.labelanalysis import LabelAnalysisRequestFactory
-from database.tests.factories.staticanalysis import (
- StaticAnalysisSingleFileSnapshotFactory,
- StaticAnalysisSuiteFactory,
- StaticAnalysisSuiteFilepathFactory,
-)
-from helpers.labels import SpecialLabelsEnum
-from services.report import ReportService
-from services.static_analysis import StaticAnalysisComparisonService
-from tasks.label_analysis import (
- ExistingLabelSetsNotEncoded,
- LabelAnalysisRequestProcessingTask,
- LabelAnalysisRequestState,
-)
-
-sample_head_static_analysis_dict = {
- "empty_lines": [2, 3, 11],
- "warnings": [],
- "filename": "source.py",
- "functions": [
- {
- "identifier": "some_function",
- "start_line": 6,
- "end_line": 10,
- "code_hash": "e69c18eff7d24f8bad3370db87f64333",
- "complexity_metrics": {
- "conditions": 1,
- "mccabe_cyclomatic_complexity": 2,
- "returns": 1,
- "max_nested_conditional": 1,
- },
- }
- ],
- "hash": "84d371ab1c57d2349038ac3671428803",
- "language": "python",
- "number_lines": 11,
- "statements": [
- (
- 1,
- {
- "line_surety_ancestorship": None,
- "start_column": 0,
- "line_hash": "55c30cf01e202728b6952e9cba304798",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 5,
- {
- "line_surety_ancestorship": None,
- "start_column": 4,
- "line_hash": "1d7be9f2145760a59513a4049fcd0d1c",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 6,
- {
- "line_surety_ancestorship": 5,
- "start_column": 4,
- "line_hash": "f802087a854c26782ee8d4ece7214425",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 7,
- {
- "line_surety_ancestorship": None,
- "start_column": 8,
- "line_hash": "6ae3393fa7880fe8a844c03256cac37b",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 8,
- {
- "line_surety_ancestorship": 6,
- "start_column": 4,
- "line_hash": "5b099d1822e9236c540a5701a657225e",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 9,
- {
- "line_surety_ancestorship": 8,
- "start_column": 4,
- "line_hash": "e5d4915bb7dddeb18f53dc9fde9a3064",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 10,
- {
- "line_surety_ancestorship": 9,
- "start_column": 4,
- "line_hash": "e70ce43136171575ee525375b10f91a1",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- ],
- "definition_lines": [(4, 6)],
- "import_lines": [],
-}
-
-sample_base_static_analysis_dict = {
- "empty_lines": [2, 3, 11],
- "warnings": [],
- "filename": "source.py",
- "functions": [
- {
- "identifier": "some_function",
- "start_line": 6,
- "end_line": 10,
- "code_hash": "e4b52b6da12184142fcd7ff2c8412662",
- "complexity_metrics": {
- "conditions": 1,
- "mccabe_cyclomatic_complexity": 2,
- "returns": 1,
- "max_nested_conditional": 1,
- },
- }
- ],
- "hash": "811d0016249a5b1400a685164e5295de",
- "language": "python",
- "number_lines": 11,
- "statements": [
- (
- 1,
- {
- "line_surety_ancestorship": None,
- "start_column": 0,
- "line_hash": "55c30cf01e202728b6952e9cba304798",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 5,
- {
- "line_surety_ancestorship": None,
- "start_column": 4,
- "line_hash": "1d7be9f2145760a59513a4049fcd0d1c",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 6,
- {
- "line_surety_ancestorship": 5,
- "start_column": 4,
- "line_hash": "52f98812dca4687f18373b87433df695",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 7,
- {
- "line_surety_ancestorship": None,
- "start_column": 8,
- "line_hash": "6ae3393fa7880fe8a844c03256cac37b",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 8,
- {
- "line_surety_ancestorship": 7,
- "start_column": 8,
- "line_hash": "5b099d1822e9236c540a5701a657225e",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 9,
- {
- "line_surety_ancestorship": 6,
- "start_column": 4,
- "line_hash": "e5d4915bb7dddeb18f53dc9fde9a3064",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- (
- 10,
- {
- "line_surety_ancestorship": 9,
- "start_column": 4,
- "line_hash": "e70ce43136171575ee525375b10f91a1",
- "len": 0,
- "extra_connected_lines": (),
- },
- ),
- ],
- "definition_lines": [(4, 6)],
- "import_lines": [],
-}
-
-
-@pytest.fixture
-def sample_report_with_labels():
- r = Report()
- report_labels_index = {
- 0: SpecialLabelsEnum.CODECOV_ALL_LABELS_PLACEHOLDER.corresponding_label,
- 1: "apple",
- 2: "label_one",
- 3: "pineapple",
- 4: "banana",
- 5: "orangejuice",
- 6: "justjuice",
- 7: "whatever",
- 8: "here",
- 9: "applejuice",
- }
- first_rf = ReportFile("source.py")
- first_rf.append(
- 5,
- ReportLine.create(
- coverage=1,
- type=None,
- sessions=[
- (
- LineSession(
- id=1,
- coverage=1,
- )
- )
- ],
- datapoints=[
- CoverageDatapoint(
- sessionid=1,
- coverage=1,
- coverage_type=None,
- label_ids=[1, 2, 3, 4],
- )
- ],
- complexity=None,
- ),
- )
- first_rf.append(
- 6,
- ReportLine.create(
- coverage=1,
- type=None,
- sessions=[
- (
- LineSession(
- id=1,
- coverage=1,
- )
- )
- ],
- datapoints=[
- CoverageDatapoint(
- sessionid=1,
- coverage=1,
- coverage_type=None,
- label_ids=[2, 3, 4],
- )
- ],
- complexity=None,
- ),
- )
- first_rf.append(
- 7,
- ReportLine.create(
- coverage=1,
- type=None,
- sessions=[
- (
- LineSession(
- id=1,
- coverage=1,
- )
- )
- ],
- datapoints=[
- CoverageDatapoint(
- sessionid=1,
- coverage=1,
- coverage_type=None,
- label_ids=[4],
- )
- ],
- complexity=None,
- ),
- )
- first_rf.append(
- 8,
- ReportLine.create(
- coverage=1,
- type=None,
- sessions=[
- (
- LineSession(
- id=1,
- coverage=1,
- )
- )
- ],
- datapoints=[
- CoverageDatapoint(
- sessionid=1,
- coverage=1,
- coverage_type=None,
- label_ids=[4],
- ),
- CoverageDatapoint(
- sessionid=5,
- coverage=1,
- coverage_type=None,
- label_ids=[5],
- ),
- ],
- complexity=None,
- ),
- )
- first_rf.append(
- 99,
- ReportLine.create(
- coverage=1,
- type=None,
- sessions=[
- (
- LineSession(
- id=5,
- coverage=1,
- )
- )
- ],
- datapoints=[
- CoverageDatapoint(
- sessionid=5,
- coverage=1,
- coverage_type=None,
- label_ids=[6],
- ),
- ],
- complexity=None,
- ),
- )
- first_rf.append(
- 8,
- ReportLine.create(
- coverage=1,
- type=None,
- sessions=[
- (
- LineSession(
- id=1,
- coverage=1,
- )
- )
- ],
- datapoints=[
- CoverageDatapoint(
- sessionid=1,
- coverage=1,
- coverage_type=None,
- label_ids=[2, 3, 4],
- ),
- CoverageDatapoint(
- sessionid=5,
- coverage=1,
- coverage_type=None,
- label_ids=[0, 9],
- ),
- ],
- complexity=None,
- ),
- )
- second_rf = ReportFile("path/from/additionsonly.py")
- second_rf.append(
- 6,
- ReportLine.create(
- coverage=1,
- type=None,
- sessions=[
- (
- LineSession(
- id=1,
- coverage=1,
- )
- )
- ],
- datapoints=[
- CoverageDatapoint(
- sessionid=1,
- coverage=1,
- coverage_type=None,
- label_ids=[7, 8],
- )
- ],
- complexity=None,
- ),
- )
- random_rf = ReportFile("path/from/randomfile_no_static_analysis.html")
- random_rf.append(
- 1,
- ReportLine.create(
- coverage=1,
- type=None,
- sessions=[(LineSession(id=1, coverage=1))],
- datapoints=None,
- complexity=None,
- ),
- )
- r.append(first_rf)
- r.append(second_rf)
- r.append(random_rf)
- r.labels_index = report_labels_index
- return r
-
-
-def test_simple_call_without_requested_labels_then_with_requested_labels(
- dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider
-):
- mock_metrics = mocker.patch("tasks.label_analysis.metrics")
- mocker.patch.object(
- LabelAnalysisRequestProcessingTask,
- "_get_lines_relevant_to_diff",
- return_value={
- "all": False,
- "files": {"source.py": {"all": False, "lines": {8, 6}}},
- },
- )
- mocker.patch.object(
- ReportService,
- "get_existing_report_for_commit",
- return_value=sample_report_with_labels,
- )
- repository = RepositoryFactory.create()
- larf = LabelAnalysisRequestFactory.create(
- base_commit__repository=repository, head_commit__repository=repository
- )
- dbsession.add(larf)
- dbsession.flush()
- base_sasf = StaticAnalysisSuiteFactory.create(commit=larf.base_commit)
- head_sasf = StaticAnalysisSuiteFactory.create(commit=larf.head_commit)
- dbsession.add(base_sasf)
- dbsession.add(head_sasf)
- dbsession.flush()
- first_path = "abdkasdauchudh.txt"
- second_path = "0diao9u3qdsdu.txt"
- mock_storage.write_file(
- "archive",
- first_path,
- json.dumps(sample_base_static_analysis_dict),
- )
- mock_storage.write_file(
- "archive",
- second_path,
- json.dumps(sample_head_static_analysis_dict),
- )
- first_snapshot = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository, content_location=first_path
- )
- second_snapshot = StaticAnalysisSingleFileSnapshotFactory.create(
- repository=repository, content_location=second_path
- )
- dbsession.add(first_snapshot)
- dbsession.add(second_snapshot)
- dbsession.flush()
- first_base_file = StaticAnalysisSuiteFilepathFactory.create(
- file_snapshot=first_snapshot,
- analysis_suite=base_sasf,
- filepath="source.py",
- )
- first_head_file = StaticAnalysisSuiteFilepathFactory.create(
- file_snapshot=second_snapshot,
- analysis_suite=head_sasf,
- filepath="source.py",
- )
- dbsession.add(first_base_file)
- dbsession.add(first_head_file)
- dbsession.flush()
-
- task = LabelAnalysisRequestProcessingTask()
- assert sample_report_with_labels.labels_index is not None
- res = task.run_impl(dbsession, larf.id)
- expected_present_report_labels = [
- "apple",
- "applejuice",
- "banana",
- "here",
- "justjuice",
- "label_one",
- "orangejuice",
- "pineapple",
- "whatever",
- ]
- expected_present_diff_labels = sorted(
- ["applejuice", "banana", "label_one", "orangejuice", "pineapple"]
- )
- expected_result = {
- "absent_labels": [],
- "present_diff_labels": expected_present_diff_labels,
- "present_report_labels": expected_present_report_labels,
- "global_level_labels": ["applejuice", "justjuice", "orangejuice"],
- "success": True,
- "errors": [],
- }
- assert res == expected_result
- mock_metrics.incr.assert_called_with("label_analysis_task.success")
- # It's zero because the report has the _labels_index already
- dbsession.flush()
- dbsession.refresh(larf)
- assert larf.state_id == LabelAnalysisRequestState.FINISHED.db_id
- assert larf.result == {
- "absent_labels": [],
- "present_diff_labels": expected_present_diff_labels,
- "present_report_labels": expected_present_report_labels,
- "global_level_labels": ["applejuice", "justjuice", "orangejuice"],
- }
- # Now we call the task again, this time with the requested labels.
- # This illustrates what should happen if we patch the labels after calculating
- # And trigger the task again to save the new results
- larf.requested_labels = ["tangerine", "pear", "banana", "apple"]
- dbsession.flush()
- res = task.run_impl(dbsession, larf.id)
- expected_present_diff_labels = ["banana"]
- expected_present_report_labels = ["apple", "banana"]
- expected_absent_labels = ["pear", "tangerine"]
- assert res == {
- "absent_labels": expected_absent_labels,
- "present_diff_labels": expected_present_diff_labels,
- "present_report_labels": expected_present_report_labels,
- "success": True,
- "global_level_labels": [],
- "errors": [],
- }
- assert larf.result == {
- "absent_labels": expected_absent_labels,
- "present_diff_labels": expected_present_diff_labels,
- "present_report_labels": expected_present_report_labels,
- "global_level_labels": [],
- }
- mock_metrics.incr.assert_called_with(
- "label_analysis_task.already_calculated.new_result"
- )
- mock_metrics.incr.assert_called_with(
- "label_analysis_task.already_calculated.new_result"
- )
-
-
-def test_simple_call_with_requested_labels(
- dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider
-):
- mock_metrics = mocker.patch("tasks.label_analysis.metrics")
- mocker.patch.object(
- LabelAnalysisRequestProcessingTask,
- "_get_lines_relevant_to_diff",
- return_value={
- "all": False,
- "files": {"source.py": {"all": False, "lines": {8, 6}}},
- },
- )
- mocker.patch.object(
- ReportService,
- "get_existing_report_for_commit",
- return_value=sample_report_with_labels,
- )
- larf = LabelAnalysisRequestFactory.create(
- requested_labels=["tangerine", "pear", "banana", "apple"]
- )
- ReportFactory(commit=larf.base_commit)
- dbsession.add(larf)
- dbsession.flush()
- task = LabelAnalysisRequestProcessingTask()
- res = task.run_impl(dbsession, larf.id)
- expected_present_diff_labels = ["banana"]
- expected_present_report_labels = ["apple", "banana"]
- expected_absent_labels = ["pear", "tangerine"]
- assert res == {
- "absent_labels": expected_absent_labels,
- "present_diff_labels": expected_present_diff_labels,
- "present_report_labels": expected_present_report_labels,
- "success": True,
- "global_level_labels": [],
- "errors": [],
- }
- dbsession.flush()
- dbsession.refresh(larf)
- assert larf.state_id == LabelAnalysisRequestState.FINISHED.db_id
- assert larf.result == {
- "absent_labels": expected_absent_labels,
- "present_diff_labels": expected_present_diff_labels,
- "present_report_labels": expected_present_report_labels,
- "global_level_labels": [],
- }
- mock_metrics.incr.assert_called_with("label_analysis_task.success")
- mock_metrics.incr.assert_called_with("label_analysis_task.success")
-
-
-def test_get_requested_labels(dbsession, mocker):
- larf = LabelAnalysisRequestFactory.create(requested_labels=[])
-
- def side_effect(*args, **kwargs):
- larf.requested_labels = ["tangerine", "pear", "banana", "apple"]
-
- mock_refresh = mocker.patch.object(dbsession, "refresh", side_effect=side_effect)
- dbsession.add(larf)
- dbsession.flush()
- task = LabelAnalysisRequestProcessingTask()
- task.dbsession = dbsession
- labels = task._get_requested_labels(larf)
- mock_refresh.assert_called()
- assert labels == ["tangerine", "pear", "banana", "apple"]
-
-
-def test_call_label_analysis_no_request_object(dbsession, mocker):
- task = LabelAnalysisRequestProcessingTask()
- mock_metrics = mocker.patch("tasks.label_analysis.metrics")
- res = task.run_impl(db_session=dbsession, request_id=-1)
- assert res == {
- "success": False,
- "present_report_labels": [],
- "present_diff_labels": [],
- "absent_labels": [],
- "global_level_labels": [],
- "errors": [
- {
- "error_code": "not found",
- "error_params": {
- "extra": {},
- "message": "LabelAnalysisRequest not found",
- },
- }
- ],
- }
- mock_metrics.incr.assert_called_with(
- "label_analysis_task.failed_to_calculate.larq_not_found"
- )
-
-
-def test_get_executable_lines_labels_all_labels(sample_report_with_labels):
- executable_lines = {"all": True}
- task = LabelAnalysisRequestProcessingTask()
- assert task.get_executable_lines_labels(
- sample_report_with_labels, executable_lines
- ) == (
- {
- 4,
- 6,
- 8,
- 3,
- 9,
- 1,
- 7,
- 2,
- 5,
- },
- set(),
- )
- assert task.get_executable_lines_labels(
- sample_report_with_labels, executable_lines
- ) == (task.get_all_report_labels(sample_report_with_labels), set())
-
-
-def test_get_executable_lines_labels_all_labels_in_one_file(sample_report_with_labels):
- executable_lines = {"all": False, "files": {"source.py": {"all": True}}}
- task = LabelAnalysisRequestProcessingTask()
- assert task.get_executable_lines_labels(
- sample_report_with_labels, executable_lines
- ) == (
- {
- 1,
- 6,
- 9,
- 2,
- 4,
- 5,
- 3,
- },
- {5, 6, 9},
- )
-
-
-def test_get_executable_lines_labels_some_labels_in_one_file(sample_report_with_labels):
- executable_lines = {
- "all": False,
- "files": {"source.py": {"all": False, "lines": set([5, 6])}},
- }
- task = LabelAnalysisRequestProcessingTask()
- assert task.get_executable_lines_labels(
- sample_report_with_labels, executable_lines
- ) == (
- {1, 2, 3, 4},
- set(),
- )
-
-
-def test_get_executable_lines_labels_some_labels_in_one_file_with_globals(
- sample_report_with_labels,
-):
- executable_lines = {
- "all": False,
- "files": {"source.py": {"all": False, "lines": set([6, 8])}},
- }
- task = LabelAnalysisRequestProcessingTask()
- assert task.get_executable_lines_labels(
- sample_report_with_labels, executable_lines
- ) == (
- {2, 3, 4, 5, 9},
- {9, 6, 5},
- )
-
-
-def test_get_executable_lines_labels_some_labels_in_one_file_other_null(
- sample_report_with_labels,
-):
- executable_lines = {
- "all": False,
- "files": {
- "source.py": {"all": False, "lines": set([5, 6])},
- "path/from/randomfile_no_static_analysis.html": None,
- },
- }
- task = LabelAnalysisRequestProcessingTask()
- assert task.get_executable_lines_labels(
- sample_report_with_labels, executable_lines
- ) == (
- {1, 2, 3, 4},
- set(),
- )
-
-
-def test_get_all_labels_one_session(sample_report_with_labels):
- task = LabelAnalysisRequestProcessingTask()
- assert task.get_labels_per_session(sample_report_with_labels, 1) == {
- 1,
- 4,
- 8,
- 2,
- 3,
- 7,
- }
- assert task.get_labels_per_session(sample_report_with_labels, 2) == set()
- assert task.get_labels_per_session(sample_report_with_labels, 5) == {
- 5,
- 6,
- 9,
- }
-
-
-def test_get_relevant_executable_lines_nothing_found(dbsession, mocker):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- larf = LabelAnalysisRequestFactory.create(
- base_commit__repository=repository, head_commit__repository=repository
- )
- dbsession.add(larf)
- dbsession.flush()
- task = LabelAnalysisRequestProcessingTask()
- task.errors = []
- task.dbsession = dbsession
- parsed_git_diff = []
- assert task.get_relevant_executable_lines(larf, parsed_git_diff) is None
-
-
-def test_get_relevant_executable_lines_with_static_analyses(dbsession, mocker):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- larf = LabelAnalysisRequestFactory.create(
- base_commit__repository=repository, head_commit__repository=repository
- )
- dbsession.add(larf)
- dbsession.flush()
- base_sasf = StaticAnalysisSuiteFactory.create(commit=larf.base_commit)
- head_sasf = StaticAnalysisSuiteFactory.create(commit=larf.head_commit)
- dbsession.add(base_sasf)
- dbsession.add(head_sasf)
- dbsession.flush()
- task = LabelAnalysisRequestProcessingTask()
- parsed_git_diff = []
- mocked_res = mocker.patch.object(
- StaticAnalysisComparisonService, "get_base_lines_relevant_to_change"
- )
- assert (
- task.get_relevant_executable_lines(larf, parsed_git_diff)
- == mocked_res.return_value
- )
-
-
-def test_run_impl_with_error(
- dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider
-):
- mock_metrics = mocker.patch("tasks.label_analysis.metrics")
- mocker.patch.object(
- LabelAnalysisRequestProcessingTask,
- "_get_lines_relevant_to_diff",
- side_effect=Exception("Oh no"),
- )
- larf = LabelAnalysisRequestFactory.create(
- requested_labels=["tangerine", "pear", "banana", "apple"]
- )
- dbsession.add(larf)
- dbsession.flush()
- task = LabelAnalysisRequestProcessingTask()
- res = task.run_impl(dbsession, larf.id)
- expected_result = {
- "absent_labels": [],
- "present_diff_labels": [],
- "present_report_labels": [],
- "success": False,
- "global_level_labels": [],
- "errors": [
- {
- "error_code": "failed",
- "error_params": {"extra": {}, "message": "Failed to calculate"},
- }
- ],
- }
- assert res == expected_result
- dbsession.flush()
- dbsession.refresh(larf)
- assert larf.state_id == LabelAnalysisRequestState.ERROR.db_id
- assert larf.result is None
- mock_metrics.incr.assert_called_with(
- "label_analysis_task.failed_to_calculate.exception"
- )
-
-
-def test_calculate_result_no_report(
- dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider
-):
- mock_metrics = mocker.patch("tasks.label_analysis.metrics")
- larf: LabelAnalysisRequest = LabelAnalysisRequestFactory.create(
- # This being not-ordered is important in the test
- # TO make sure we go through the warning at the bottom of run_impl
- requested_labels=["tangerine", "pear", "banana", "apple"]
- )
- dbsession.add(larf)
- dbsession.flush()
- mocker.patch.object(
- ReportService,
- "get_existing_report_for_commit",
- return_value=None,
- )
- mocker.patch.object(
- LabelAnalysisRequestProcessingTask,
- "_get_lines_relevant_to_diff",
- return_value=(set(), set(), set()),
- )
- task = LabelAnalysisRequestProcessingTask()
- res = task.run_impl(dbsession, larf.id)
- assert res == {
- "success": True,
- "absent_labels": larf.requested_labels,
- "present_diff_labels": [],
- "present_report_labels": [],
- "global_level_labels": [],
- "errors": [
- {
- "error_code": "missing data",
- "error_params": {
- "extra": {
- "base_commit": larf.base_commit.commitid,
- "head_commit": larf.head_commit.commitid,
- },
- "message": "Missing base report",
- },
- }
- ],
- }
- mock_metrics.incr.assert_called_with(
- "label_analysis_task.failed_to_calculate.missing_info"
- )
-
-
-@patch("tasks.label_analysis.parse_git_diff_json", return_value=["parsed_git_diff"])
-def test__get_parsed_git_diff(mock_parse_diff, dbsession, mock_repo_provider):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- larq = LabelAnalysisRequestFactory.create(
- base_commit__repository=repository, head_commit__repository=repository
- )
- dbsession.add(larq)
- dbsession.flush()
- mock_repo_provider.get_compare.return_value = {"diff": "json"}
- task = LabelAnalysisRequestProcessingTask()
- task.errors = []
- parsed_diff = task._get_parsed_git_diff(larq)
- assert parsed_diff == ["parsed_git_diff"]
- mock_parse_diff.assert_called_with({"diff": "json"})
- mock_repo_provider.get_compare.assert_called_with(
- larq.base_commit.commitid, larq.head_commit.commitid
- )
-
-
-@patch("tasks.label_analysis.parse_git_diff_json", return_value=["parsed_git_diff"])
-def test__get_parsed_git_diff_error(mock_parse_diff, dbsession, mock_repo_provider):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- larq = LabelAnalysisRequestFactory.create(
- base_commit__repository=repository, head_commit__repository=repository
- )
- dbsession.add(larq)
- dbsession.flush()
- mock_repo_provider.get_compare.side_effect = Exception("Oh no")
- task = LabelAnalysisRequestProcessingTask()
- task.errors = []
- task.dbsession = dbsession
- parsed_diff = task._get_parsed_git_diff(larq)
- assert parsed_diff is None
- mock_parse_diff.assert_not_called()
- mock_repo_provider.get_compare.assert_called_with(
- larq.base_commit.commitid, larq.head_commit.commitid
- )
-
-
-@patch(
- "tasks.label_analysis.LabelAnalysisRequestProcessingTask.get_relevant_executable_lines",
- return_value=[{"all": False, "files": {}}],
-)
-@patch(
- "tasks.label_analysis.LabelAnalysisRequestProcessingTask._get_parsed_git_diff",
- return_value=["parsed_git_diff"],
-)
-def test__get_lines_relevant_to_diff(
- mock_parse_diff, mock_get_relevant_lines, dbsession
-):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- larq = LabelAnalysisRequestFactory.create(
- base_commit__repository=repository, head_commit__repository=repository
- )
- dbsession.add(larq)
- dbsession.flush()
- task = LabelAnalysisRequestProcessingTask()
- lines = task._get_lines_relevant_to_diff(larq)
- assert lines == [{"all": False, "files": {}}]
- mock_parse_diff.assert_called_with(larq)
- mock_get_relevant_lines.assert_called_with(larq, ["parsed_git_diff"])
-
-
-@patch(
- "tasks.label_analysis.LabelAnalysisRequestProcessingTask.get_relevant_executable_lines"
-)
-@patch(
- "tasks.label_analysis.LabelAnalysisRequestProcessingTask._get_parsed_git_diff",
- return_value=None,
-)
-def test__get_lines_relevant_to_diff_error(
- mock_parse_diff, mock_get_relevant_lines, dbsession
-):
- repository = RepositoryFactory.create()
- dbsession.add(repository)
- dbsession.flush()
- larq = LabelAnalysisRequestFactory.create(
- base_commit__repository=repository, head_commit__repository=repository
- )
- dbsession.add(larq)
- dbsession.flush()
- task = LabelAnalysisRequestProcessingTask()
- lines = task._get_lines_relevant_to_diff(larq)
- assert lines is None
- mock_parse_diff.assert_called_with(larq)
- mock_get_relevant_lines.assert_not_called()
-
-
-@patch(
- "tasks.label_analysis.LabelAnalysisRequestProcessingTask.get_all_report_labels",
- return_value=set(),
-)
-@patch(
- "tasks.label_analysis.LabelAnalysisRequestProcessingTask.get_executable_lines_labels",
- return_value=(set(), set()),
-)
-def test___get_existing_labels_no_labels_in_report(
- mock_get_executable_lines_labels, mock_get_all_report_labels
-):
- report = MagicMock(name="fake_report")
- lines_relevant = MagicMock(name="fake_lines_relevant_to_diff")
- task = LabelAnalysisRequestProcessingTask()
- res = task._get_existing_labels(report, lines_relevant)
- expected = ExistingLabelSetsNotEncoded(
- all_report_labels=set(),
- executable_lines_labels=set(),
- global_level_labels=set(),
- )
- assert isinstance(res, ExistingLabelSetsNotEncoded)
- assert res == expected