Skip to content

Commit 454bd07

Browse files
authored
Merge pull request #1580 from aboutcode-org/gitlab-importer-pipeline
Migrate GitLab importer to aboutcode pipeline
2 parents f35640f + 0e3ec68 commit 454bd07

File tree

8 files changed

+198
-89
lines changed

8 files changed

+198
-89
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
from vulnerabilities.importers import gentoo
2222
from vulnerabilities.importers import github
2323
from vulnerabilities.importers import github_osv
24-
from vulnerabilities.importers import gitlab
2524
from vulnerabilities.importers import istio
2625
from vulnerabilities.importers import mozilla
2726
from vulnerabilities.importers import nvd
@@ -39,14 +38,14 @@
3938
from vulnerabilities.importers import vulnrichment
4039
from vulnerabilities.importers import xen
4140
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
41+
from vulnerabilities.pipelines import gitlab_importer
4242
from vulnerabilities.pipelines import nginx_importer
4343
from vulnerabilities.pipelines import npm_importer
4444
from vulnerabilities.pipelines import pypa_importer
4545

4646
IMPORTERS_REGISTRY = [
4747
nvd.NVDImporter,
4848
github.GitHubAPIImporter,
49-
gitlab.GitLabAPIImporter,
5049
pysec.PyPIImporter,
5150
alpine_linux.AlpineImporter,
5251
openssl.OpensslImporter,
@@ -78,6 +77,7 @@
7877
pypa_importer.PyPaImporterPipeline,
7978
npm_importer.NpmImporterPipeline,
8079
nginx_importer.NginxImporterPipeline,
80+
gitlab_importer.GitLabImporterPipeline,
8181
]
8282

8383
IMPORTERS_REGISTRY = {

vulnerabilities/improvers/valid_versions.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from datetime import datetime
1313
from typing import Iterable
1414
from typing import List
15-
from typing import Mapping
1615
from typing import Optional
1716

1817
from django.db.models import Q
@@ -34,7 +33,6 @@
3433
from vulnerabilities.importers.elixir_security import ElixirSecurityImporter
3534
from vulnerabilities.importers.github import GitHubAPIImporter
3635
from vulnerabilities.importers.github_osv import GithubOSVImporter
37-
from vulnerabilities.importers.gitlab import GitLabAPIImporter
3836
from vulnerabilities.importers.istio import IstioImporter
3937
from vulnerabilities.importers.oss_fuzz import OSSFuzzImporter
4038
from vulnerabilities.importers.ruby import RubyImporter
@@ -44,6 +42,7 @@
4442
from vulnerabilities.improver import Inference
4543
from vulnerabilities.models import Advisory
4644
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
45+
from vulnerabilities.pipelines.gitlab_importer import GitLabImporterPipeline
4746
from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline
4847
from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline
4948
from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage
@@ -367,7 +366,7 @@ class DebianBasicImprover(ValidVersionImprover):
367366

368367

369368
class GitLabBasicImprover(ValidVersionImprover):
370-
importer = GitLabAPIImporter
369+
importer = GitLabImporterPipeline
371370
ignorable_versions = []
372371

373372

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Generated by Django 4.2.15 on 2024-09-27 13:08
2+
3+
from django.db import migrations
4+
5+
"""
6+
Update the created_by field on Advisory from the old qualified_name
7+
to the new pipeline_id.
8+
"""
9+
10+
11+
def update_created_by(apps, schema_editor):
12+
from vulnerabilities.pipelines.gitlab_importer import GitLabImporterPipeline
13+
14+
Advisory = apps.get_model("vulnerabilities", "Advisory")
15+
Advisory.objects.filter(created_by="vulnerabilities.importers.gitlab.GitLabAPIImporter").update(
16+
created_by=GitLabImporterPipeline.pipeline_id
17+
)
18+
19+
20+
21+
def reverse_update_created_by(apps, schema_editor):
22+
from vulnerabilities.pipelines.gitlab_importer import GitLabImporterPipeline
23+
24+
Advisory = apps.get_model("vulnerabilities", "Advisory")
25+
Advisory.objects.filter(created_by=GitLabImporterPipeline.pipeline_id).update(
26+
created_by="vulnerabilities.importers.gitlab.GitLabAPIImporter"
27+
)
28+
29+
30+
class Migration(migrations.Migration):
31+
32+
dependencies = [
33+
("vulnerabilities", "0065_update_nginx_advisory_created_by"),
34+
]
35+
36+
operations = [
37+
migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by),
38+
]

vulnerabilities/importers/gitlab.py renamed to vulnerabilities/pipelines/gitlab_importer.py

Lines changed: 93 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@
1212
from pathlib import Path
1313
from typing import Iterable
1414
from typing import List
15-
from typing import Optional
15+
from typing import Tuple
1616

1717
import pytz
1818
import saneyaml
1919
from dateutil import parser as dateparser
20+
from fetchcode.vcs import fetch_via_vcs
2021
from packageurl import PackageURL
2122
from univers.version_range import RANGE_CLASS_BY_SCHEMES
2223
from univers.version_range import VersionRange
@@ -25,58 +26,88 @@
2526

2627
from vulnerabilities.importer import AdvisoryData
2728
from vulnerabilities.importer import AffectedPackage
28-
from vulnerabilities.importer import Importer
2929
from vulnerabilities.importer import Reference
30+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
3031
from vulnerabilities.utils import build_description
3132
from vulnerabilities.utils import get_advisory_url
3233
from vulnerabilities.utils import get_cwe_id
3334

34-
logger = logging.getLogger(__name__)
3535

36-
PURL_TYPE_BY_GITLAB_SCHEME = {
37-
"conan": "conan",
38-
"gem": "gem",
39-
# Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742
40-
# "go": "golang",
41-
"maven": "maven",
42-
"npm": "npm",
43-
"nuget": "nuget",
44-
"packagist": "composer",
45-
"pypi": "pypi",
46-
}
36+
class GitLabImporterPipeline(VulnerableCodeBaseImporterPipeline):
37+
"""Collect advisory from GitLab Advisory Database (Open Source Edition)."""
4738

48-
GITLAB_SCHEME_BY_PURL_TYPE = {v: k for k, v in PURL_TYPE_BY_GITLAB_SCHEME.items()}
39+
pipeline_id = "gitlab_importer"
4940

50-
51-
class GitLabAPIImporter(Importer):
5241
spdx_license_expression = "MIT"
5342
license_url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/LICENSE"
5443
importer_name = "GitLab Importer"
5544
repo_url = "git+https://gitlab.com/gitlab-org/advisories-community/"
5645

57-
def advisory_data(self, _keep_clone=False) -> Iterable[AdvisoryData]:
58-
try:
59-
self.clone(repo_url=self.repo_url)
60-
base_path = Path(self.vcs_response.dest_dir)
46+
@classmethod
47+
def steps(cls):
48+
return (
49+
cls.clone,
50+
cls.collect_and_store_advisories,
51+
cls.import_new_advisories,
52+
cls.clean_downloads,
53+
)
6154

62-
for file_path in base_path.glob("**/*.yml"):
63-
gitlab_type, package_slug, vuln_id = parse_advisory_path(
64-
base_path=base_path,
65-
file_path=file_path,
66-
)
55+
purl_type_by_gitlab_scheme = {
56+
"conan": "conan",
57+
"gem": "gem",
58+
# Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742
59+
# "go": "golang",
60+
"maven": "maven",
61+
"npm": "npm",
62+
"nuget": "nuget",
63+
"packagist": "composer",
64+
"pypi": "pypi",
65+
}
66+
67+
gitlab_scheme_by_purl_type = {v: k for k, v in purl_type_by_gitlab_scheme.items()}
68+
69+
def clone(self):
70+
self.log(f"Cloning `{self.repo_url}`")
71+
self.vcs_response = fetch_via_vcs(self.repo_url)
72+
73+
def advisories_count(self):
74+
root = Path(self.vcs_response.dest_dir)
75+
return sum(1 for _ in root.rglob("*.yml"))
76+
77+
def collect_advisories(self) -> Iterable[AdvisoryData]:
78+
base_path = Path(self.vcs_response.dest_dir)
79+
80+
for file_path in base_path.rglob("*.yml"):
81+
if file_path.parent == base_path:
82+
continue
83+
84+
gitlab_type, _, _ = parse_advisory_path(
85+
base_path=base_path,
86+
file_path=file_path,
87+
)
6788

68-
if gitlab_type in PURL_TYPE_BY_GITLAB_SCHEME:
69-
yield parse_gitlab_advisory(file=file_path, base_path=base_path)
89+
if gitlab_type not in self.purl_type_by_gitlab_scheme:
90+
# self.log(
91+
# f"Unknown package type {gitlab_type!r} in {file_path!r}",
92+
# level=logging.ERROR,
93+
# )
94+
continue
95+
96+
yield parse_gitlab_advisory(
97+
file=file_path,
98+
base_path=base_path,
99+
gitlab_scheme_by_purl_type=self.gitlab_scheme_by_purl_type,
100+
purl_type_by_gitlab_scheme=self.purl_type_by_gitlab_scheme,
101+
logger=self.log,
102+
)
70103

71-
else:
72-
logger.error(f"Unknow package type {gitlab_type!r} in {file_path!r}")
73-
continue
74-
finally:
75-
if self.vcs_response and not _keep_clone:
76-
self.vcs_response.delete()
104+
def clean_downloads(self):
105+
if self.vcs_response:
106+
self.log(f"Removing cloned repository")
107+
self.vcs_response.delete()
77108

78109

79-
def parse_advisory_path(base_path: Path, file_path: Path) -> Optional[AdvisoryData]:
110+
def parse_advisory_path(base_path: Path, file_path: Path) -> Tuple[str, str, str]:
80111
"""
81112
Parse a gitlab advisory file and return a 3-tuple of:
82113
(gitlab_type, package_slug, vulnerability_id)
@@ -96,21 +127,21 @@ def parse_advisory_path(base_path: Path, file_path: Path) -> Optional[AdvisoryDa
96127
>>> parse_advisory_path(base_path=base_path, file_path=file_path)
97128
('npm', '@express/beego/beego/v2', 'CVE-2021-43831')
98129
"""
99-
relative_path_segments = str(file_path.relative_to(base_path)).strip("/").split("/")
130+
relative_path_segments = file_path.relative_to(base_path).parts
100131
gitlab_type = relative_path_segments[0]
101-
vuln_id = relative_path_segments[-1].replace(".yml", "")
132+
vuln_id = file_path.stem
102133
package_slug = "/".join(relative_path_segments[1:-1])
103134

104135
return gitlab_type, package_slug, vuln_id
105136

106137

107-
def get_purl(package_slug):
138+
def get_purl(package_slug, purl_type_by_gitlab_scheme, logger):
108139
"""
109140
Return a PackageURL object from a package slug
110141
"""
111142
parts = [p for p in package_slug.strip("/").split("/") if p]
112143
gitlab_scheme = parts[0]
113-
purl_type = PURL_TYPE_BY_GITLAB_SCHEME[gitlab_scheme]
144+
purl_type = purl_type_by_gitlab_scheme[gitlab_scheme]
114145
if gitlab_scheme == "go":
115146
name = "/".join(parts[1:])
116147
return PackageURL(type=purl_type, namespace=None, name=name)
@@ -125,7 +156,7 @@ def get_purl(package_slug):
125156
name = parts[-1]
126157
namespace = "/".join(parts[1:-1])
127158
return PackageURL(type=purl_type, namespace=namespace, name=name)
128-
logger.error(f"get_purl: package_slug can not be parsed: {package_slug!r}")
159+
logger(f"get_purl: package_slug can not be parsed: {package_slug!r}", level=logging.ERROR)
129160
return
130161

131162

@@ -140,7 +171,7 @@ def extract_affected_packages(
140171
In case of gitlab advisory data we get a list of fixed_versions and a affected_version_range.
141172
Since we can not determine which package fixes which range.
142173
We store the all the fixed_versions with the same affected_version_range in the advisory.
143-
Later the advisory data is used to be infered in the GitLabBasicImprover.
174+
Later the advisory data is used to be inferred in the GitLabBasicImprover.
144175
"""
145176
for fixed_version in fixed_versions:
146177
yield AffectedPackage(
@@ -150,7 +181,9 @@ def extract_affected_packages(
150181
)
151182

152183

153-
def parse_gitlab_advisory(file, base_path):
184+
def parse_gitlab_advisory(
185+
file, base_path, gitlab_scheme_by_purl_type, purl_type_by_gitlab_scheme, logger
186+
):
154187
"""
155188
Parse a Gitlab advisory file and return an AdvisoryData or None.
156189
These files are YAML. There is a JSON schema documented at
@@ -177,8 +210,9 @@ def parse_gitlab_advisory(file, base_path):
177210
with open(file) as f:
178211
gitlab_advisory = saneyaml.load(f)
179212
if not isinstance(gitlab_advisory, dict):
180-
logger.error(
181-
f"parse_gitlab_advisory: unknown gitlab advisory format in {file!r} with data: {gitlab_advisory!r}"
213+
logger(
214+
f"parse_gitlab_advisory: unknown gitlab advisory format in {file!r} with data: {gitlab_advisory!r}",
215+
level=logging.ERROR,
182216
)
183217
return
184218

@@ -199,9 +233,15 @@ def parse_gitlab_advisory(file, base_path):
199233
base_path=base_path,
200234
url="https://gitlab.com/gitlab-org/advisories-community/-/blob/main/",
201235
)
202-
purl: PackageURL = get_purl(package_slug=package_slug)
236+
purl: PackageURL = get_purl(
237+
package_slug=package_slug,
238+
purl_type_by_gitlab_scheme=purl_type_by_gitlab_scheme,
239+
logger=logger,
240+
)
203241
if not purl:
204-
logger.error(f"parse_yaml_file: purl is not valid: {file!r} {package_slug!r}")
242+
logger(
243+
f"parse_yaml_file: purl is not valid: {file!r} {package_slug!r}", level=logging.ERROR
244+
)
205245
return AdvisoryData(
206246
aliases=aliases,
207247
summary=summary,
@@ -214,7 +254,7 @@ def parse_gitlab_advisory(file, base_path):
214254
affected_range = gitlab_advisory.get("affected_range")
215255
gitlab_native_schemes = set(["pypi", "gem", "npm", "go", "packagist", "conan"])
216256
vrc: VersionRange = RANGE_CLASS_BY_SCHEMES[purl.type]
217-
gitlab_scheme = GITLAB_SCHEME_BY_PURL_TYPE[purl.type]
257+
gitlab_scheme = gitlab_scheme_by_purl_type[purl.type]
218258
try:
219259
if affected_range:
220260
if gitlab_scheme in gitlab_native_schemes:
@@ -224,8 +264,9 @@ def parse_gitlab_advisory(file, base_path):
224264
else:
225265
affected_version_range = vrc.from_native(affected_range)
226266
except Exception as e:
227-
logger.error(
228-
f"parse_yaml_file: affected_range is not parsable: {affected_range!r} type:{purl.type!r} error: {e!r}\n {traceback.format_exc()}"
267+
logger(
268+
f"parse_yaml_file: affected_range is not parsable: {affected_range!r} for: {purl!s} error: {e!r}\n {traceback.format_exc()}",
269+
level=logging.ERROR,
229270
)
230271

231272
parsed_fixed_versions = []
@@ -234,8 +275,9 @@ def parse_gitlab_advisory(file, base_path):
234275
fixed_version = vrc.version_class(fixed_version)
235276
parsed_fixed_versions.append(fixed_version)
236277
except Exception as e:
237-
logger.error(
238-
f"parse_yaml_file: fixed_version is not parsable`: {fixed_version!r} error: {e!r}\n {traceback.format_exc()}"
278+
logger(
279+
f"parse_yaml_file: fixed_version is not parsable`: {fixed_version!r} error: {e!r}\n {traceback.format_exc()}",
280+
level=logging.ERROR,
239281
)
240282

241283
if parsed_fixed_versions:

vulnerabilities/pipes/advisory.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def import_advisory(
9090

9191
if not vulnerability:
9292
if logger:
93-
logger(f"Unable to get vulnerability for advisory: {advisory!r}", level=logging.WARNING)
93+
logger(f"Unable to get vulnerability for advisory: {advisory!r}", level=logging.ERROR)
9494
return
9595

9696
for ref in advisory_data.references:

0 commit comments

Comments
 (0)