diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 02a86db0..ab69bf3f 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -3,10 +3,10 @@ language: python language_version: python3 entry: cycode - args: [ '--no-progress-meter', 'scan', '--scan-type', 'secret', 'pre-commit' ] + args: [ '-o', 'text', '--no-progress-meter', 'scan', '-t', 'secret', 'pre-commit' ] - id: cycode-sca name: Cycode SCA pre-commit defender language: python language_version: python3 entry: cycode - args: [ '--no-progress-meter', 'scan', '--scan-type', 'sca', 'pre-commit' ] + args: [ '-o', 'text', '--no-progress-meter', 'scan', '-t', 'sca', 'pre-commit' ] diff --git a/README.md b/README.md index 82575d6f..13e23a6f 100644 --- a/README.md +++ b/README.md @@ -221,11 +221,11 @@ Perform the following steps to install the pre-commit hook: ```yaml repos: - repo: https://github.com/cycodehq/cycode-cli - rev: v2.3.0 + rev: v3.0.0 hooks: - id: cycode stages: - - commit + - pre-commit ``` 4. Modify the created file for your specific needs. Use hook ID `cycode` to enable scan for Secrets. Use hook ID `cycode-sca` to enable SCA scan. If you want to enable both, use this configuration: @@ -233,14 +233,14 @@ Perform the following steps to install the pre-commit hook: ```yaml repos: - repo: https://github.com/cycodehq/cycode-cli - rev: v2.3.0 + rev: v3.0.0 hooks: - id: cycode stages: - - commit + - pre-commit - id: cycode-sca stages: - - commit + - pre-commit ``` 5. Install Cycode’s hook: diff --git a/cycode/cli/apps/scan/code_scanner.py b/cycode/cli/apps/scan/code_scanner.py index a40a066e..c6337021 100644 --- a/cycode/cli/apps/scan/code_scanner.py +++ b/cycode/cli/apps/scan/code_scanner.py @@ -48,15 +48,17 @@ logger = get_logger('Code Scanner') -def scan_sca_pre_commit(ctx: typer.Context) -> None: +def scan_sca_pre_commit(ctx: typer.Context, repo_path: str) -> None: scan_type = ctx.obj['scan_type'] scan_parameters = get_scan_parameters(ctx) git_head_documents, pre_committed_documents = get_pre_commit_modified_documents( - ctx.obj['progress_bar'], ScanProgressBarSection.PREPARE_LOCAL_FILES + progress_bar=ctx.obj['progress_bar'], + progress_bar_section=ScanProgressBarSection.PREPARE_LOCAL_FILES, + repo_path=repo_path, ) git_head_documents = exclude_irrelevant_documents_to_scan(scan_type, git_head_documents) pre_committed_documents = exclude_irrelevant_documents_to_scan(scan_type, pre_committed_documents) - sca_code_scanner.perform_pre_hook_range_scan_actions(git_head_documents, pre_committed_documents) + sca_code_scanner.perform_pre_hook_range_scan_actions(repo_path, git_head_documents, pre_committed_documents) scan_commit_range_documents( ctx, git_head_documents, @@ -269,14 +271,13 @@ def scan_commit_range( commit_id = commit.hexsha commit_ids_to_scan.append(commit_id) parent = commit.parents[0] if commit.parents else git_proxy.get_null_tree() - diff = commit.diff(parent, create_patch=True, R=True) + diff_index = commit.diff(parent, create_patch=True, R=True) commit_documents_to_scan = [] - for blob in diff: - blob_path = get_path_by_os(os.path.join(path, get_diff_file_path(blob))) + for diff in diff_index: commit_documents_to_scan.append( Document( - path=blob_path, - content=blob.diff.decode('UTF-8', errors='replace'), + path=get_path_by_os(get_diff_file_path(diff)), + content=diff.diff.decode('UTF-8', errors='replace'), is_git_diff_format=True, unique_id=commit_id, ) @@ -413,10 +414,10 @@ def scan_commit_range_documents( _report_scan_status( cycode_client, scan_type, - local_scan_result.scan_id, + scan_id, scan_completed, - local_scan_result.relevant_detections_count, - local_scan_result.detections_count, + relevant_detections_count, + detections_count, len(to_documents_to_scan), zip_file_size, scan_command_type, @@ -658,7 +659,11 @@ def get_scan_parameters(ctx: typer.Context, paths: Optional[tuple[str, ...]] = N scan_parameters['paths'] = paths if len(paths) != 1: - # ignore remote url if multiple paths are provided + logger.debug('Multiple paths provided, going to ignore remote url') + return scan_parameters + + if not os.path.isdir(paths[0]): + logger.debug('Path is not a directory, going to ignore remote url') return scan_parameters remote_url = try_get_git_remote_url(paths[0]) diff --git a/cycode/cli/apps/scan/pre_commit/pre_commit_command.py b/cycode/cli/apps/scan/pre_commit/pre_commit_command.py index b919d659..40e6a8c1 100644 --- a/cycode/cli/apps/scan/pre_commit/pre_commit_command.py +++ b/cycode/cli/apps/scan/pre_commit/pre_commit_command.py @@ -27,14 +27,16 @@ def pre_commit_command( scan_type = ctx.obj['scan_type'] + repo_path = os.getcwd() # change locally for easy testing + progress_bar = ctx.obj['progress_bar'] progress_bar.start() if scan_type == consts.SCA_SCAN_TYPE: - scan_sca_pre_commit(ctx) + scan_sca_pre_commit(ctx, repo_path) return - diff_files = git_proxy.get_repo(os.getcwd()).index.diff('HEAD', create_patch=True, R=True) + diff_files = git_proxy.get_repo(repo_path).index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True) progress_bar.set_section_length(ScanProgressBarSection.PREPARE_LOCAL_FILES, len(diff_files)) diff --git a/cycode/cli/apps/scan/repository/repository_command.py b/cycode/cli/apps/scan/repository/repository_command.py index 16ad8611..c96ca577 100644 --- a/cycode/cli/apps/scan/repository/repository_command.py +++ b/cycode/cli/apps/scan/repository/repository_command.py @@ -1,4 +1,3 @@ -import os from pathlib import Path from typing import Annotated, Optional @@ -44,16 +43,16 @@ def repository_command( progress_bar.set_section_length(ScanProgressBarSection.PREPARE_LOCAL_FILES, len(file_entries)) documents_to_scan = [] - for file in file_entries: + for blob in file_entries: # FIXME(MarshalX): probably file could be tree or submodule too. we expect blob only progress_bar.update(ScanProgressBarSection.PREPARE_LOCAL_FILES) - absolute_path = get_path_by_os(os.path.join(path, file.path)) - file_path = file.path if monitor else absolute_path + absolute_path = get_path_by_os(blob.abspath) + file_path = get_path_by_os(blob.path) if monitor else absolute_path documents_to_scan.append( Document( file_path, - file.data_stream.read().decode('UTF-8', errors='replace'), + blob.data_stream.read().decode('UTF-8', errors='replace'), absolute_path=absolute_path, ) ) diff --git a/cycode/cli/cli_types.py b/cycode/cli/cli_types.py index 9b792a01..c2fa12a2 100644 --- a/cycode/cli/cli_types.py +++ b/cycode/cli/cli_types.py @@ -3,42 +3,50 @@ from cycode.cli import consts -class OutputTypeOption(str, Enum): +class StrEnum(str, Enum): + def __str__(self) -> str: + return self.value + + +class OutputTypeOption(StrEnum): RICH = 'rich' TEXT = 'text' JSON = 'json' TABLE = 'table' -class ExportTypeOption(str, Enum): +class ExportTypeOption(StrEnum): JSON = 'json' HTML = 'html' SVG = 'svg' -class ScanTypeOption(str, Enum): +class ScanTypeOption(StrEnum): SECRET = consts.SECRET_SCAN_TYPE SCA = consts.SCA_SCAN_TYPE IAC = consts.IAC_SCAN_TYPE SAST = consts.SAST_SCAN_TYPE + def __str__(self) -> str: + return self.value + -class ScaScanTypeOption(str, Enum): +class ScaScanTypeOption(StrEnum): PACKAGE_VULNERABILITIES = 'package-vulnerabilities' LICENSE_COMPLIANCE = 'license-compliance' -class SbomFormatOption(str, Enum): +class SbomFormatOption(StrEnum): SPDX_2_2 = 'spdx-2.2' SPDX_2_3 = 'spdx-2.3' CYCLONEDX_1_4 = 'cyclonedx-1.4' -class SbomOutputFormatOption(str, Enum): +class SbomOutputFormatOption(StrEnum): JSON = 'json' -class SeverityOption(str, Enum): +class SeverityOption(StrEnum): INFO = 'info' LOW = 'low' MEDIUM = 'medium' diff --git a/cycode/cli/files_collector/repository_documents.py b/cycode/cli/files_collector/repository_documents.py index b524ca4c..379346f8 100644 --- a/cycode/cli/files_collector/repository_documents.py +++ b/cycode/cli/files_collector/repository_documents.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Optional, Union from cycode.cli import consts -from cycode.cli.files_collector.sca import sca_code_scanner +from cycode.cli.files_collector.sca.sca_code_scanner import get_file_content_from_commit_diff from cycode.cli.models import Document from cycode.cli.utils.git_proxy import git_proxy from cycode.cli.utils.path_utils import get_file_content, get_path_by_os @@ -38,8 +38,14 @@ def parse_commit_range(commit_range: str, path: str) -> tuple[str, str]: return from_commit_rev, to_commit_rev -def get_diff_file_path(file: 'Diff') -> Optional[str]: - return file.b_path if file.b_path else file.a_path +def get_diff_file_path(file: 'Diff', relative: bool = False) -> Optional[str]: + if relative: + # relative to the repository root + return file.b_path if file.b_path else file.a_path + + if file.b_blob: + return file.b_blob.abspath + return file.a_blob.abspath def get_diff_file_content(file: 'Diff') -> str: @@ -47,21 +53,21 @@ def get_diff_file_content(file: 'Diff') -> str: def get_pre_commit_modified_documents( - progress_bar: 'BaseProgressBar', progress_bar_section: 'ProgressBarSection' + progress_bar: 'BaseProgressBar', + progress_bar_section: 'ProgressBarSection', + repo_path: str, ) -> tuple[list[Document], list[Document]]: git_head_documents = [] pre_committed_documents = [] - repo = git_proxy.get_repo(os.getcwd()) - diff_files = repo.index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True) - progress_bar.set_section_length(progress_bar_section, len(diff_files)) - for file in diff_files: + repo = git_proxy.get_repo(repo_path) + diff_index = repo.index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True) + progress_bar.set_section_length(progress_bar_section, len(diff_index)) + for diff in diff_index: progress_bar.update(progress_bar_section) - diff_file_path = get_diff_file_path(file) - file_path = get_path_by_os(diff_file_path) - - file_content = sca_code_scanner.get_file_content_from_commit(repo, consts.GIT_HEAD_COMMIT_REV, diff_file_path) + file_path = get_path_by_os(get_diff_file_path(diff)) + file_content = get_file_content_from_commit_diff(repo, consts.GIT_HEAD_COMMIT_REV, diff) if file_content is not None: git_head_documents.append(Document(file_path, file_content)) @@ -92,14 +98,13 @@ def get_commit_range_modified_documents( for blob in modified_files_diff: progress_bar.update(progress_bar_section) - diff_file_path = get_diff_file_path(blob) - file_path = get_path_by_os(diff_file_path) + file_path = get_path_by_os(get_diff_file_path(blob)) - file_content = sca_code_scanner.get_file_content_from_commit(repo, from_commit_rev, diff_file_path) + file_content = get_file_content_from_commit_diff(repo, from_commit_rev, blob) if file_content is not None: from_commit_documents.append(Document(file_path, file_content)) - file_content = sca_code_scanner.get_file_content_from_commit(repo, to_commit_rev, diff_file_path) + file_content = get_file_content_from_commit_diff(repo, to_commit_rev, blob) if file_content is not None: to_commit_documents.append(Document(file_path, file_content)) diff --git a/cycode/cli/files_collector/sca/sca_code_scanner.py b/cycode/cli/files_collector/sca/sca_code_scanner.py index e6ec0e9d..b9988122 100644 --- a/cycode/cli/files_collector/sca/sca_code_scanner.py +++ b/cycode/cli/files_collector/sca/sca_code_scanner.py @@ -1,4 +1,3 @@ -import os from typing import TYPE_CHECKING, Optional import typer @@ -18,7 +17,7 @@ from cycode.logger import get_logger if TYPE_CHECKING: - from git import Repo + from git import Diff, Repo BUILD_DEP_TREE_TIMEOUT = 180 @@ -39,9 +38,9 @@ def perform_pre_commit_range_scan_actions( def perform_pre_hook_range_scan_actions( - git_head_documents: list[Document], pre_committed_documents: list[Document] + repo_path: str, git_head_documents: list[Document], pre_committed_documents: list[Document] ) -> None: - repo = git_proxy.get_repo(os.getcwd()) + repo = git_proxy.get_repo(repo_path) add_ecosystem_related_files_if_exists(git_head_documents, repo, consts.GIT_HEAD_COMMIT_REV) add_ecosystem_related_files_if_exists(pre_committed_documents) @@ -69,7 +68,7 @@ def get_doc_ecosystem_related_project_files( file_to_search = join_paths(get_file_dir(doc.path), ecosystem_project_file) if not is_project_file_exists_in_documents(documents, file_to_search): if repo: - file_content = get_file_content_from_commit(repo, commit_rev, file_to_search) + file_content = get_file_content_from_commit_path(repo, commit_rev, file_to_search) else: file_content = get_file_content(file_to_search) @@ -151,13 +150,20 @@ def get_manifest_file_path(document: Document, is_monitor_action: bool, project_ return join_paths(project_path, document.path) if is_monitor_action else document.path -def get_file_content_from_commit(repo: 'Repo', commit: str, file_path: str) -> Optional[str]: +def get_file_content_from_commit_path(repo: 'Repo', commit: str, file_path: str) -> Optional[str]: try: return repo.git.show(f'{commit}:{file_path}') except git_proxy.get_git_command_error(): return None +def get_file_content_from_commit_diff(repo: 'Repo', commit: str, diff: 'Diff') -> Optional[str]: + from cycode.cli.files_collector.repository_documents import get_diff_file_path + + file_path = get_diff_file_path(diff, relative=True) + return get_file_content_from_commit_path(repo, commit, file_path) + + def perform_pre_scan_documents_actions( ctx: typer.Context, scan_type: str, documents_to_scan: list[Document], is_git_diff: bool = False ) -> None: diff --git a/cycode/cli/printers/console_printer.py b/cycode/cli/printers/console_printer.py index 17c402ff..50d48fd7 100644 --- a/cycode/cli/printers/console_printer.py +++ b/cycode/cli/printers/console_printer.py @@ -28,9 +28,8 @@ class ConsolePrinter: 'text': TextPrinter, 'json': JsonPrinter, 'table': TablePrinter, - # overrides + # overrides: 'table_sca': ScaTablePrinter, - 'text_sca': ScaTablePrinter, } def __init__( diff --git a/cycode/cli/printers/rich_printer.py b/cycode/cli/printers/rich_printer.py index 755278d6..7ee0f853 100644 --- a/cycode/cli/printers/rich_printer.py +++ b/cycode/cli/printers/rich_printer.py @@ -54,47 +54,69 @@ def _get_details_table(self, detection: 'Detection') -> Table: severity_icon = SeverityOption.get_member_emoji(severity.lower()) details_table.add_row('Severity', f'{severity_icon} {SeverityOption(severity).__rich__()}') - detection_details = detection.detection_details - path = str(get_detection_file_path(self.scan_type, detection)) shorten_path = f'...{path[-self.MAX_PATH_LENGTH :]}' if len(path) > self.MAX_PATH_LENGTH else path details_table.add_row('In file', f'[link=file://{path}]{shorten_path}[/]') - if self.scan_type == consts.SECRET_SCAN_TYPE: - details_table.add_row('Secret SHA', detection_details.get('sha512')) - elif self.scan_type == consts.SCA_SCAN_TYPE: - details_table.add_row('CVEs', get_detection_clickable_cwe_cve(self.scan_type, detection)) - details_table.add_row('Package', detection_details.get('package_name')) - details_table.add_row('Version', detection_details.get('package_version')) - - is_package_vulnerability = 'alert' in detection_details - if is_package_vulnerability: - details_table.add_row( - 'First patched version', detection_details['alert'].get('first_patched_version', 'Not fixed') - ) - - details_table.add_row('Dependency path', detection_details.get('dependency_paths', 'N/A')) - - if not is_package_vulnerability: - details_table.add_row('License', detection_details.get('license')) - elif self.scan_type == consts.IAC_SCAN_TYPE: - details_table.add_row('IaC Provider', detection_details.get('infra_provider')) - elif self.scan_type == consts.SAST_SCAN_TYPE: - details_table.add_row('CWE', get_detection_clickable_cwe_cve(self.scan_type, detection)) - details_table.add_row('Subcategory', detection_details.get('category')) - details_table.add_row('Language', ', '.join(detection_details.get('languages', []))) - - engine_id_to_display_name = { - '5db84696-88dc-11ec-a8a3-0242ac120002': 'Semgrep OSS (Orchestrated by Cycode)', - '560a0abd-d7da-4e6d-a3f1-0ed74895295c': 'Bearer (Powered by Cycode)', - } - engine_id = detection.detection_details.get('external_scanner_id') - details_table.add_row('Security Tool', engine_id_to_display_name.get(engine_id, 'N/A')) + self._add_scan_related_rows(details_table, detection) details_table.add_row('Rule ID', detection.detection_rule_id) return details_table + def _add_scan_related_rows(self, details_table: Table, detection: 'Detection') -> None: + scan_type_details_handlers = { + consts.SECRET_SCAN_TYPE: self.__add_secret_scan_related_rows, + consts.SCA_SCAN_TYPE: self.__add_sca_scan_related_rows, + consts.IAC_SCAN_TYPE: self.__add_iac_scan_related_rows, + consts.SAST_SCAN_TYPE: self.__add_sast_scan_related_rows, + } + + if self.scan_type not in scan_type_details_handlers: + raise ValueError(f'Unknown scan type: {self.scan_type}') + + scan_enricher_function = scan_type_details_handlers[self.scan_type] + scan_enricher_function(details_table, detection) + + @staticmethod + def __add_secret_scan_related_rows(details_table: Table, detection: 'Detection') -> None: + details_table.add_row('Secret SHA', detection.detection_details.get('sha512')) + + @staticmethod + def __add_sca_scan_related_rows(details_table: Table, detection: 'Detection') -> None: + detection_details = detection.detection_details + + details_table.add_row('CVEs', get_detection_clickable_cwe_cve(consts.SCA_SCAN_TYPE, detection)) + details_table.add_row('Package', detection_details.get('package_name')) + details_table.add_row('Version', detection_details.get('package_version')) + + if detection.has_alert: + patched_version = detection_details['alert'].get('patched_version') + details_table.add_row('First patched version', patched_version or 'Not fixed') + + dependency_path = detection_details.get('dependency_paths') + details_table.add_row('Dependency path', dependency_path or 'N/A') + + if not detection.has_alert: + details_table.add_row('License', detection_details.get('license')) + + @staticmethod + def __add_iac_scan_related_rows(details_table: Table, detection: 'Detection') -> None: + details_table.add_row('IaC Provider', detection.detection_details.get('infra_provider')) + + @staticmethod + def __add_sast_scan_related_rows(details_table: Table, detection: 'Detection') -> None: + details_table.add_row('CWE', get_detection_clickable_cwe_cve(consts.SAST_SCAN_TYPE, detection)) + details_table.add_row('Subcategory', detection.detection_details.get('category')) + details_table.add_row('Language', ', '.join(detection.detection_details.get('languages', []))) + + engine_id_to_display_name = { + '5db84696-88dc-11ec-a8a3-0242ac120002': 'Semgrep OSS (Orchestrated by Cycode)', + '560a0abd-d7da-4e6d-a3f1-0ed74895295c': 'Bearer (Powered by Cycode)', + } + engine_id = detection.detection_details.get('external_scanner_id') + details_table.add_row('Security Tool', engine_id_to_display_name.get(engine_id, 'N/A')) + def _print_violation_card( self, document: 'Document', detection: 'Detection', detection_number: int, detections_count: int ) -> None: @@ -117,8 +139,7 @@ def _print_violation_card( title=':computer: Code Snippet', ) - is_sca_package_vulnerability = self.scan_type == consts.SCA_SCAN_TYPE and 'alert' in detection.detection_details - if is_sca_package_vulnerability: + if detection.has_alert: summary = detection.detection_details['alert'].get('description') else: summary = detection.detection_details.get('description') or detection.message diff --git a/cycode/cli/printers/text_printer.py b/cycode/cli/printers/text_printer.py index 05a360fd..51da53c5 100644 --- a/cycode/cli/printers/text_printer.py +++ b/cycode/cli/printers/text_printer.py @@ -1,5 +1,6 @@ from typing import TYPE_CHECKING, Optional +from cycode.cli import consts from cycode.cli.cli_types import SeverityOption from cycode.cli.models import CliError, CliResult, Document from cycode.cli.printers.printer_base import PrinterBase @@ -66,10 +67,34 @@ def __print_detection_summary(self, detection: 'Detection', document_path: str) self.console.print( severity_icon, severity, - f'violation: [b bright_red]{title}[/]{detection_commit_id_message}\n' + f'violation: [b bright_red]{title}[/]{detection_commit_id_message}\n', + *self.__get_intermediate_summary_lines(detection), f'[dodger_blue1]File: {clickable_document_path}[/]', ) + def __get_intermediate_summary_lines(self, detection: 'Detection') -> list[str]: + intermediate_summary_lines = [] + + if self.scan_type == consts.SCA_SCAN_TYPE: + intermediate_summary_lines.extend(self.__get_sca_related_summary_lines(detection)) + + return intermediate_summary_lines + + @staticmethod + def __get_sca_related_summary_lines(detection: 'Detection') -> list[str]: + summary_lines = [] + + if detection.has_alert: + patched_version = detection.detection_details['alert'].get('first_patched_version') + patched_version = patched_version or 'Not fixed' + + summary_lines.append(f'First patched version: [cyan]{patched_version}[/]\n') + else: + package_license = detection.detection_details.get('license', 'N/A') + summary_lines.append(f'License: [cyan]{package_license}[/]\n') + + return summary_lines + def __print_detection_code_segment(self, detection: 'Detection', document: Document) -> None: self.console.print( get_code_snippet_syntax( diff --git a/cycode/cli/printers/utils/detection_data.py b/cycode/cli/printers/utils/detection_data.py index 989a6600..37bee310 100644 --- a/cycode/cli/printers/utils/detection_data.py +++ b/cycode/cli/printers/utils/detection_data.py @@ -83,7 +83,7 @@ def get_detection_title(scan_type: str, detection: 'Detection') -> str: elif scan_type == consts.SECRET_SCAN_TYPE: title = f'Hardcoded {detection.type} is used' - is_sca_package_vulnerability = scan_type == consts.SCA_SCAN_TYPE and 'alert' in detection.detection_details + is_sca_package_vulnerability = scan_type == consts.SCA_SCAN_TYPE and detection.has_alert if is_sca_package_vulnerability: title = detection.detection_details['alert'].get('summary', 'N/A') diff --git a/cycode/cyclient/models.py b/cycode/cyclient/models.py index 70e3e551..ed649644 100644 --- a/cycode/cyclient/models.py +++ b/cycode/cyclient/models.py @@ -33,6 +33,15 @@ def __repr__(self) -> str: f'detection_rule_id:{self.detection_rule_id}' ) + @property + def has_alert(self) -> bool: + """Check if the detection has an alert. + + For example, for SCA, it means that the detection is a package vulnerability. + Otherwise, it is a license. + """ + return 'alert' in self.detection_details + class DetectionSchema(Schema): class Meta: