Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from vulnerabilities.pipelines.v2_importers import (
elixir_security_importer as elixir_security_importer_v2,
)
from vulnerabilities.pipelines.v2_importers import github_issue_pr as github_issue_pr_v2
from vulnerabilities.pipelines.v2_importers import github_osv_importer as github_osv_importer_v2
from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2
from vulnerabilities.pipelines.v2_importers import istio_importer as istio_importer_v2
Expand Down Expand Up @@ -115,5 +116,6 @@
ubuntu_usn.UbuntuUSNImporter,
fireeye.FireyeImporter,
oss_fuzz.OSSFuzzImporter,
github_issue_pr_v2.GithubPipelineIssuePR,
]
)
92 changes: 92 additions & 0 deletions vulnerabilities/pipelines/v2_importers/github_issue_pr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import re
from collections import defaultdict

from github import Github

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import ReferenceV2
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
from vulnerablecode.settings import env

GITHUB_TOKEN = env.str("GITHUB_TOKEN")


class GithubPipelineIssuePR(VulnerableCodeBaseImporterPipelineV2):
"""
Pipeline to collect GitHub issues and PRs related to vulnerabilities.
"""

pipeline_id = "collect_issues_pr"

@classmethod
def steps(cls):
return (
cls.fetch_entries,
cls.collect_and_store_advisories,
)

def fetch_entries(self):
"""Clone the repository."""
self.repo_url = "https://github.com/torvalds/linux"
repo_name = "django/django"

g = Github(login_or_token=GITHUB_TOKEN)

base_query = f"repo:{repo_name} (CVE OR PYSEC OR GHSA)"
self.issues = g.search_issues(f"{base_query} is:issue")
self.pull_requestes = g.search_issues(f"{base_query} is:pr")

def advisories_count(self) -> int:
"""
Return total number of advisories discovered (issues + PRs).
"""
return self.issues.totalCount + self.pull_requestes.totalCount

def collect_issues_and_prs(self):
"""
Group issues and PRs by vulnerability identifiers (like CVE-xxxx-yyyy).
Returns a dict mapping vuln_id -> [(type, html_url)].
"""
self.log("Grouping GitHub issues and PRs by vulnerability identifiers.")

grouped_items = defaultdict(list)
pattern = re.compile(r"(CVE-\d{4}-\d+|PYSEC-\d{4}-\d+|GHSA-[\w-]+)", re.IGNORECASE)

for issue in self.issues:
matches = pattern.findall(issue.title + " " + (issue.body or ""))
for match in matches:
grouped_items[match].append(("Issue", issue.html_url))

for pr in self.pull_requestes:
matches = pattern.findall(pr.title + " " + (pr.body or ""))
for match in matches:
grouped_items[match].append(("PR", pr.html_url))

self.log(f"Grouped {len(grouped_items)} unique vulnerability identifiers.")
return grouped_items

def collect_advisories(self):
"""
Generate AdvisoryData objects for each vulnerability ID grouped with its related GitHub issues and PRs.
"""
self.log("Generating AdvisoryData objects from GitHub issues and PRs.")
grouped_data = self.collect_issues_and_prs()

for vuln_id, refs in grouped_data.items():
references = [ReferenceV2(reference_id=ref_id, url=url) for ref_id, url in refs]

yield AdvisoryData(
advisory_id=vuln_id,
aliases=[vuln_id],
references_v2=references,
url=self.repo_url,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import json
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import MagicMock

import pytest

from vulnerabilities.pipelines.v2_importers.github_issue_pr import GithubPipelineIssuePR
from vulnerabilities.tests import util_tests


@pytest.fixture
def pipeline():
pipeline = GithubPipelineIssuePR()
pipeline.repo_url = "https://github.com/test/repo"
pipeline.log = MagicMock()
return pipeline


@pytest.mark.django_db
def test_collect_issues_and_prs(pipeline):
pipeline.issues = [
SimpleNamespace(
title="Fix for CVE-2023-1234 found",
body="This resolves a security issue",
html_url="http://example.com/issue1",
),
SimpleNamespace(
title="No vulnerability mentioned",
body="This is unrelated",
html_url="http://example.com/issue2",
),
]

pipeline.pull_requestes = [
SimpleNamespace(
title="Patch addressing GHSA-zzz-111",
body="Also fixes PYSEC-2024-5678",
html_url="http://example.com/pr1",
)
]

result = pipeline.collect_issues_and_prs()
expected = {
"CVE-2023-1234": [("Issue", "http://example.com/issue1")],
"GHSA-zzz-111": [("PR", "http://example.com/pr1")],
"PYSEC-2024-5678": [("PR", "http://example.com/pr1")],
}

assert result == expected


TEST_DATA = Path(__file__).parent.parent.parent / "test_data" / "github_issue_pr"


@pytest.mark.django_db
def test_collect_advisories_from_json():
input_file = TEST_DATA / "issues_and_pr.json"
expected_file = TEST_DATA / "expected_advisory_output.json"

issues_and_prs = json.loads(input_file.read_text(encoding="utf-8"))

pipeline = GithubPipelineIssuePR()
pipeline.repo_url = "https://github.com/test/repo"
pipeline.log = MagicMock()

pipeline.collect_issues_and_prs = MagicMock(return_value=issues_and_prs)

result = [adv.to_dict() for adv in pipeline.collect_advisories()]

util_tests.check_results_against_json(result, expected_file)
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
[
{
"advisory_id": "CVE-2023-1234",
"aliases": [
"CVE-2023-1234"
],
"summary": "",
"affected_packages": [],
"references_v2": [
{
"reference_id": "Issue",
"reference_type": "",
"url": "https://example.com/issue1"
},
{
"reference_id": "PR",
"reference_type": "",
"url": "https://example.com/pr1"
}
],
"severities": [],
"date_published": null,
"weaknesses": [],
"url": "https://github.com/test/repo"
},
{
"advisory_id": "GHSA-zzz-111",
"aliases": [
"GHSA-zzz-111"
],
"summary": "",
"affected_packages": [],
"references_v2": [
{
"reference_id": "PR",
"reference_type": "",
"url": "https://example.com/pr1"
}
],
"severities": [],
"date_published": null,
"weaknesses": [],
"url": "https://github.com/test/repo"
},
{
"advisory_id": "PYSEC-2024-5678",
"aliases": [
"PYSEC-2024-5678"
],
"summary": "",
"affected_packages": [],
"references_v2": [
{
"reference_id": "PR",
"reference_type": "",
"url": "https://example.com/pr1"
}
],
"severities": [],
"date_published": null,
"weaknesses": [],
"url": "https://github.com/test/repo"
}
]
24 changes: 24 additions & 0 deletions vulnerabilities/tests/test_data/github_issue_pr/issues_and_pr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"CVE-2023-1234": [
[
"Issue",
"https://example.com/issue1"
],
[
"PR",
"https://example.com/pr1"
]
],
"GHSA-zzz-111": [
[
"PR",
"https://example.com/pr1"
]
],
"PYSEC-2024-5678": [
[
"PR",
"https://example.com/pr1"
]
]
}
Loading