fkie-cad
diff --git a/‎src/plugins/analysis/cve_lookup/code/cve_lookup.py‎
Lines changed: 76 additions & 61 deletions b/‎src/plugins/analysis/cve_lookup/code/cve_lookup.py‎
Lines changed: 76 additions & 61 deletions
diff --git a/‎src/plugins/analysis/cve_lookup/internal/busybox_cve_filter.py‎
Lines changed: 9 additions & 11 deletions b/‎src/plugins/analysis/cve_lookup/internal/busybox_cve_filter.py‎
Lines changed: 9 additions & 11 deletions
diff --git a/‎src/plugins/analysis/cve_lookup/internal/lookup.py‎
Lines changed: 36 additions & 14 deletions b/‎src/plugins/analysis/cve_lookup/internal/lookup.py‎
Lines changed: 36 additions & 14 deletions
diff --git a/‎src/plugins/analysis/cve_lookup/test/test_busybox_cve_filter.py‎
Lines changed: 2 additions & 2 deletions b/‎src/plugins/analysis/cve_lookup/test/test_busybox_cve_filter.py‎
Lines changed: 2 additions & 2 deletions
@@ -1,97 +1,112 @@
 from __future__ import annotations
 
-import sys
 from pathlib import Path
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, List
+
+from pydantic import BaseModel
+from semver import Version
 
 import config
-from analysis.PluginBase import AnalysisBasePlugin
+from analysis.plugin import AnalysisPluginV0, Tag
 from helperFunctions.tag import TagColor
+from plugins.analysis.cve_lookup.internal.database.db_connection import DbConnection
+from plugins.analysis.cve_lookup.internal.lookup import CveMatch, CvssScore, Lookup
 from plugins.mime_blacklists import MIME_BLACKLIST_NON_EXECUTABLE
 
 if TYPE_CHECKING:
-    from objects.file import FileObject
+    from io import FileIO
 
-try:
-    from ..internal.database.db_connection import DbConnection
-    from ..internal.lookup import Lookup
-except ImportError:
-    sys.path.append(str(Path(__file__).parent.parent / 'internal'))
-    from database.db_connection import DbConnection
-    from lookup import Lookup
+    from plugins.analysis.software_components.code.software_components import AnalysisPlugin as SoftwarePlugin
 
 DB_PATH = str(Path(__file__).parent / '../internal/database/cve_cpe.db')
 
 
-class AnalysisPlugin(AnalysisBasePlugin):
+class CveResult(BaseModel):
+    software_name: str
+    cve_list: List[CveMatch]
+
+    def __lt__(self, other):
+        if not isinstance(other, self.__class__):
+            raise TypeError(f'Wrong type: {type(other)}')
+        return self.software_name < other.software_name  # to enable sorting
+
+
+class AnalysisPlugin(AnalysisPluginV0):
     """
     lookup vulnerabilities from CVE feeds using ID from CPE dictionary
     """
 
-    NAME = 'cve_lookup'
-    DESCRIPTION = 'lookup CVE vulnerabilities'
-    MIME_BLACKLIST = MIME_BLACKLIST_NON_EXECUTABLE
-    DEPENDENCIES = ['software_components']  # noqa: RUF012
-    VERSION = '0.2.0'
-    FILE = __file__
-
-    def additional_setup(self):
+    class Schema(BaseModel):
+        cve_results: List[CveResult]
+
+    def __init__(self):
+        super().__init__(
+            metadata=(
+                self.MetaData(
+                    name='cve_lookup',
+                    description='lookup CVE vulnerabilities',
+                    mime_blacklist=MIME_BLACKLIST_NON_EXECUTABLE,
+                    version=Version(1, 0, 0),
+                    dependencies=['software_components'],
+                    Schema=self.Schema,
+                )
+            )
+        )
         self.min_crit_score = getattr(config.backend.plugin.get(self.NAME, {}), 'min-critical-score', 9.0)
         self.match_any = getattr(config.backend.plugin.get(self.NAME, {}), 'match-any', False)
 
-    def process_object(self, file_object: FileObject) -> FileObject:
+    def analyze(self, file_handle: FileIO, virtual_file_path: dict, analyses: dict[str, BaseModel]) -> Schema:
         """
         Process the given file object and look up vulnerabilities for each software component.
         """
-        cves = {'cve_results': {}}
+        del virtual_file_path
         connection = DbConnection(f'sqlite:///{DB_PATH}')
-        lookup = Lookup(file_object, connection, match_any=self.match_any)
-        for sw_dict in file_object.processed_analysis['software_components']['result'].get('software_components', []):
-            product = sw_dict['name']
-            version = sw_dict['versions'][0] if sw_dict['versions'] else None
+
+        cve_results = []
+        lookup = Lookup(file_handle.name, connection, match_any=self.match_any)
+        sw_analysis: SoftwarePlugin.Schema = analyses['software_components']
+        for sw_dict in sw_analysis.software_components:
+            product = sw_dict.name
+            version = sw_dict.versions[0] if sw_dict.versions else None
             if product and version:
                 vulnerabilities = lookup.lookup_vulnerabilities(product, version)
                 if vulnerabilities:
                     component = f'{product} {version}'
-                    cves['cve_results'][component] = vulnerabilities
-
-        cves['summary'] = self._create_summary(cves['cve_results'])
-        file_object.processed_analysis[self.NAME] = cves
-        self.add_tags(cves['cve_results'], file_object)
-        return file_object
-
-    def _create_summary(self, cve_results: dict[str, dict[str, dict[str, str]]]) -> list[str]:
-        """
-        Creates a summary of the CVE results.
-        """
-        return list(
-            {
-                software if not self._software_has_critical_cve(entry) else f'{software} (CRITICAL)'
-                for software, entry in cve_results.items()
-            }
-        )
-
-    def _software_has_critical_cve(self, cve_dict: dict[str, dict[str, str]]) -> bool:
+                    cve_results.append(
+                        CveResult(
+                            software_name=component,
+                            cve_list=vulnerabilities,
+                        )
+                    )
+
+        return self.Schema(cve_results=cve_results)
+
+    def summarize(self, result: Schema) -> list[str]:
+        summary = {
+            entry.software_name
+            if not self._software_has_critical_cve(entry.cve_list)
+            else f'{entry.software_name} (CRITICAL)'
+            for entry in result.cve_results
+        }
+        return sorted(summary)
+
+    def get_tags(self, result: Schema, summary: list[str]) -> list[Tag]:
+        del summary
+        return [
+            Tag(name='CVE', value='critical CVE', color=TagColor.RED, propagate=True)
+            for component in result.cve_results
+            for cve in component.cve_list
+            if self._entry_has_critical_rating(cve.scores)
+        ]
+
+    def _software_has_critical_cve(self, cve_list: List[CveMatch]) -> bool:
         """
         Check if any entry in the given dictionary of CVEs has a critical rating.
         """
-        return any(self._entry_has_critical_rating(entry) for entry in cve_dict.values())
-
-    def add_tags(self, cve_results: dict[str, dict[str, dict[str, str]]], file_object: FileObject):
-        """
-        Adds analysis tags to a file object based on the critical CVE results.
+        return any(self._entry_has_critical_rating(entry.scores) for entry in cve_list)
 
-        Results structure: {'component': {'cve_id': {'score2': '6.4', 'score3': 'N/A'}}}
-        """
-        for component in cve_results:
-            for cve_id in cve_results[component]:
-                entry = cve_results[component][cve_id]
-                if self._entry_has_critical_rating(entry):
-                    self.add_analysis_tag(file_object, 'CVE', 'critical CVE', TagColor.RED, True)
-                    return
-
-    def _entry_has_critical_rating(self, entry: dict[str, dict[str, str]]) -> bool:
+    def _entry_has_critical_rating(self, scores: list[CvssScore]) -> bool:
         """
         Check if the given entry has a critical rating.
         """
-        return any(value != 'N/A' and float(value) >= self.min_crit_score for value in entry['scores'].values())
+        return any(entry.score != 'N/A' and float(entry.score) >= self.min_crit_score for entry in scores)
@@ -6,8 +6,6 @@
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    from objects.file import FileObject
-
     from .database.schema import Cve
 
 BASE_DIR = Path(__file__).parent
@@ -18,31 +16,31 @@
     GROUP_1 = f1.read().splitlines()
     GROUP_2 = f2.read().splitlines()
 
-PATTERNS_1 = [re.compile(rf'(?:\")(?:{re.escape(word)})(?:\-|\")') for word in GROUP_1]
-PATTERNS_2 = [re.compile(rf'(?:\b|\_)(?:{re.escape(word)})(?:\b|-)') for word in GROUP_2]
+PATTERNS_1 = [re.compile(rf'(?:"){re.escape(word)}(?:-|")') for word in GROUP_1]
+PATTERNS_2 = [re.compile(rf'(?:\b|_){re.escape(word)}(?:\b|-)') for word in GROUP_2]
 
 
-def filter_busybox_cves(file_object: FileObject, cves: dict[str, Cve]) -> dict[str, Cve]:
+def filter_busybox_cves(file_path: str, cves: dict[str, Cve]) -> dict[str, Cve]:
     """
     Filters the BusyBox CVEs based on the components present in the binary file and the specified version.
     """
-    components = get_busybox_components(file_object)
-    return filter_cves_by_component(file_object, cves, components)
+    components = get_busybox_components(file_path)
+    return filter_cves_by_component(file_path, cves, components)
 
 
-def get_busybox_components(file_object: FileObject) -> list[str]:
+def get_busybox_components(file_path: str) -> list[str]:
     """
     Extracts the BusyBox components from the binary file.
     """
-    data = Path(file_object.file_path).read_bytes()
+    data = Path(file_path).read_bytes()
     start_index = data.index(b'\x5b\x00\x5b\x5b\x00')
     end_index = data.index(b'\x00\x00', start_index + 5)
     extracted_bytes = data[start_index : end_index + 2]
     split_bytes = extracted_bytes.split(b'\x00')
     return [word.decode('ascii') for word in split_bytes if word]
 
 
-def filter_cves_by_component(file_object: FileObject, cves: dict[str, Cve], components: list[str]) -> dict[str, Cve]:
+def filter_cves_by_component(file_path: str, cves: dict[str, Cve], components: list[str]) -> dict[str, Cve]:
     """
     Filters CVEs based on the components present in the BusyBox binary file.
     """
@@ -54,7 +52,7 @@ def filter_cves_by_component(file_object: FileObject, cves: dict[str, Cve], comp
 
     num_deleted = len(cves) - len(filtered_cves)
     if num_deleted > 0:
-        logging.debug(f'{file_object}: Deleted {num_deleted} CVEs with components not found in this BusyBox binary')
+        logging.debug(f'{file_path}: Deleted {num_deleted} CVEs with components not found in this BusyBox binary')
 
     return filtered_cves
 
 
@@ -4,10 +4,11 @@
 import operator
 import re
 from itertools import combinations
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, List
 
 from packaging.version import InvalidVersion, Version
 from packaging.version import parse as parse_version
+from pydantic import BaseModel
 
 from .busybox_cve_filter import filter_busybox_cves
 from .database.db_interface import DbInterface
@@ -16,29 +17,46 @@
 if TYPE_CHECKING:
     from collections.abc import Callable
 
-    from objects.file import FileObject
-
     from .database.db_connection import DbConnection
     from .database.schema import Association, Cpe
 
 VALID_VERSION_REGEX = re.compile(r'v?(\d+!)?\d+(\.\d+)*([.-]?(a(lpha)?|b(eta)?|c|dev|post|pre(view)?|r|rc)?\d+)?')
 
 
+class CvssScore(BaseModel):
+    version: str
+    score: str
+
+
+class CveMatch(BaseModel):
+    id: str
+    cpe_version: str
+    scores: List[CvssScore]
+
+    def __lt__(self, other):
+        if not isinstance(other, self.__class__):
+            raise TypeError(f'Wrong type: {type(other)}')
+        return self.id < other.id  # to enable sorting
+
+    def _get_scores_as_dict(self):
+        return {cvss.version: cvss.score for cvss in self.scores}
+
+
 class Lookup:
-    def __init__(self, file_object: FileObject, connection: DbConnection, match_any: bool = False):
-        self.file_object = file_object
+    def __init__(self, file_path: str, connection: DbConnection, match_any: bool = False):
+        self.file_path = file_path
         self.db_interface = DbInterface(connection)
         self.match_any = match_any
 
     def lookup_vulnerabilities(
         self,
         product_name: str,
         requested_version: str,
-    ) -> dict:
+    ) -> list[CveMatch]:
         """
         Look up vulnerabilities for a given product and requested version.
         """
-        vulnerabilities = {}
+        vulnerabilities: list[CveMatch] = []
         product_terms = self._generate_search_terms(product_name)
         version = replace_wildcards([requested_version])[0]
         cpe_matches = self.db_interface.match_cpes(product_terms)
@@ -49,16 +67,19 @@ def lookup_vulnerabilities(
             cve_ids = [association.cve_id for association in association_matches]
             cves = self.db_interface.get_cves(cve_ids)
             if 'busybox' in product_terms:
-                cves = filter_busybox_cves(self.file_object, cves)
+                cves = filter_busybox_cves(self.file_path, cves)
             for association in association_matches:
                 cve = cves.get(association.cve_id)
                 if cve:
                     cpe = cpe_matches.get(association.cpe_id)
-                    vulnerabilities[cve.cve_id] = {
-                        'scores': cve.cvss_score,
-                        'cpe_version': self._build_version_string(association, cpe),
-                    }
-
+                    scores = [CvssScore(version=version, score=str(score)) for version, score in cve.cvss_score.items()]
+                    vulnerabilities.append(
+                        CveMatch(
+                            id=association.cve_id,
+                            cpe_version=self._build_version_string(association, cpe),
+                            scores=scores,
+                        )
+                    )
         return vulnerabilities
 
     @staticmethod
@@ -157,7 +178,8 @@ def _coerce_version(version: str) -> Version:
             # try to throw away revisions and other stuff at the end as a final measure
             return parse_version(re.split(r'[^v.\d]', fixed_version)[0])
 
-    def _build_version_string(self, association: Association, cpe: Cpe) -> str:
+    @staticmethod
+    def _build_version_string(association: Association, cpe: Cpe) -> str:
         """
         Build a version string based on the cpe cve association boundaries.
         """
 
@@ -1,7 +1,7 @@
 import pytest
 
-from ..internal.busybox_cve_filter import filter_cves_by_component
-from ..internal.database.schema import Cve
+from plugins.analysis.cve_lookup.internal.busybox_cve_filter import filter_cves_by_component
+from plugins.analysis.cve_lookup.internal.database.schema import Cve
 
 CVE_DICT = {
     'CVE-2021-42385': Cve(