1212from pathlib import Path
1313from typing import Iterable
1414from typing import List
15- from typing import Optional
15+ from typing import Tuple
1616
1717import pytz
1818import saneyaml
1919from dateutil import parser as dateparser
20+ from fetchcode .vcs import fetch_via_vcs
2021from packageurl import PackageURL
2122from univers .version_range import RANGE_CLASS_BY_SCHEMES
2223from univers .version_range import VersionRange
2526
2627from vulnerabilities .importer import AdvisoryData
2728from vulnerabilities .importer import AffectedPackage
28- from vulnerabilities .importer import Importer
2929from vulnerabilities .importer import Reference
30+ from vulnerabilities .pipelines import VulnerableCodeBaseImporterPipeline
3031from vulnerabilities .utils import build_description
3132from vulnerabilities .utils import get_advisory_url
3233from vulnerabilities .utils import get_cwe_id
3334
34- logger = logging .getLogger (__name__ )
3535
36- PURL_TYPE_BY_GITLAB_SCHEME = {
37- "conan" : "conan" ,
38- "gem" : "gem" ,
39- # Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742
40- # "go": "golang",
41- "maven" : "maven" ,
42- "npm" : "npm" ,
43- "nuget" : "nuget" ,
44- "packagist" : "composer" ,
45- "pypi" : "pypi" ,
46- }
36+ class GitLabImporterPipeline (VulnerableCodeBaseImporterPipeline ):
37+ """Collect advisory from GitLab Advisory Database (Open Source Edition)."""
4738
48- GITLAB_SCHEME_BY_PURL_TYPE = { v : k for k , v in PURL_TYPE_BY_GITLAB_SCHEME . items ()}
39+ pipeline_id = "gitlab_importer"
4940
50-
51- class GitLabAPIImporter (Importer ):
5241 spdx_license_expression = "MIT"
5342 license_url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/LICENSE"
5443 importer_name = "GitLab Importer"
5544 repo_url = "git+https://gitlab.com/gitlab-org/advisories-community/"
5645
57- def advisory_data (self , _keep_clone = False ) -> Iterable [AdvisoryData ]:
58- try :
59- self .clone (repo_url = self .repo_url )
60- base_path = Path (self .vcs_response .dest_dir )
46+ @classmethod
47+ def steps (cls ):
48+ return (
49+ cls .clone ,
50+ cls .collect_and_store_advisories ,
51+ cls .import_new_advisories ,
52+ cls .clean_downloads ,
53+ )
6154
62- for file_path in base_path .glob ("**/*.yml" ):
63- gitlab_type , package_slug , vuln_id = parse_advisory_path (
64- base_path = base_path ,
65- file_path = file_path ,
66- )
55+ purl_type_by_gitlab_scheme = {
56+ "conan" : "conan" ,
57+ "gem" : "gem" ,
58+ # Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742
59+ # "go": "golang",
60+ "maven" : "maven" ,
61+ "npm" : "npm" ,
62+ "nuget" : "nuget" ,
63+ "packagist" : "composer" ,
64+ "pypi" : "pypi" ,
65+ }
66+
67+ gitlab_scheme_by_purl_type = {v : k for k , v in purl_type_by_gitlab_scheme .items ()}
68+
69+ def clone (self ):
70+ self .log (f"Cloning `{ self .repo_url } `" )
71+ self .vcs_response = fetch_via_vcs (self .repo_url )
72+
73+ def advisories_count (self ):
74+ root = Path (self .vcs_response .dest_dir )
75+ return sum (1 for _ in root .rglob ("*.yml" ))
76+
77+ def collect_advisories (self ) -> Iterable [AdvisoryData ]:
78+ base_path = Path (self .vcs_response .dest_dir )
79+
80+ for file_path in base_path .rglob ("*.yml" ):
81+ if file_path .parent == base_path :
82+ continue
83+
84+ gitlab_type , _ , _ = parse_advisory_path (
85+ base_path = base_path ,
86+ file_path = file_path ,
87+ )
6788
68- if gitlab_type in PURL_TYPE_BY_GITLAB_SCHEME :
69- yield parse_gitlab_advisory (file = file_path , base_path = base_path )
89+ if gitlab_type not in self .purl_type_by_gitlab_scheme :
90+ # self.log(
91+ # f"Unknown package type {gitlab_type!r} in {file_path!r}",
92+ # level=logging.ERROR,
93+ # )
94+ continue
95+
96+ yield parse_gitlab_advisory (
97+ file = file_path ,
98+ base_path = base_path ,
99+ gitlab_scheme_by_purl_type = self .gitlab_scheme_by_purl_type ,
100+ purl_type_by_gitlab_scheme = self .purl_type_by_gitlab_scheme ,
101+ logger = self .log ,
102+ )
70103
71- else :
72- logger .error (f"Unknow package type { gitlab_type !r} in { file_path !r} " )
73- continue
74- finally :
75- if self .vcs_response and not _keep_clone :
76- self .vcs_response .delete ()
104+ def clean_downloads (self ):
105+ if self .vcs_response :
106+ self .log (f"Removing cloned repository" )
107+ self .vcs_response .delete ()
77108
78109
79- def parse_advisory_path (base_path : Path , file_path : Path ) -> Optional [ AdvisoryData ]:
110+ def parse_advisory_path (base_path : Path , file_path : Path ) -> Tuple [ str , str , str ]:
80111 """
81112 Parse a gitlab advisory file and return a 3-tuple of:
82113 (gitlab_type, package_slug, vulnerability_id)
@@ -96,21 +127,21 @@ def parse_advisory_path(base_path: Path, file_path: Path) -> Optional[AdvisoryDa
96127 >>> parse_advisory_path(base_path=base_path, file_path=file_path)
97128 ('npm', '@express/beego/beego/v2', 'CVE-2021-43831')
98129 """
99- relative_path_segments = str ( file_path .relative_to (base_path )). strip ( "/" ). split ( "/" )
130+ relative_path_segments = file_path .relative_to (base_path ). parts
100131 gitlab_type = relative_path_segments [0 ]
101- vuln_id = relative_path_segments [ - 1 ]. replace ( ".yml" , "" )
132+ vuln_id = file_path . stem
102133 package_slug = "/" .join (relative_path_segments [1 :- 1 ])
103134
104135 return gitlab_type , package_slug , vuln_id
105136
106137
107- def get_purl (package_slug ):
138+ def get_purl (package_slug , purl_type_by_gitlab_scheme , logger ):
108139 """
109140 Return a PackageURL object from a package slug
110141 """
111142 parts = [p for p in package_slug .strip ("/" ).split ("/" ) if p ]
112143 gitlab_scheme = parts [0 ]
113- purl_type = PURL_TYPE_BY_GITLAB_SCHEME [gitlab_scheme ]
144+ purl_type = purl_type_by_gitlab_scheme [gitlab_scheme ]
114145 if gitlab_scheme == "go" :
115146 name = "/" .join (parts [1 :])
116147 return PackageURL (type = purl_type , namespace = None , name = name )
@@ -125,7 +156,7 @@ def get_purl(package_slug):
125156 name = parts [- 1 ]
126157 namespace = "/" .join (parts [1 :- 1 ])
127158 return PackageURL (type = purl_type , namespace = namespace , name = name )
128- logger . error (f"get_purl: package_slug can not be parsed: { package_slug !r} " )
159+ logger (f"get_purl: package_slug can not be parsed: { package_slug !r} " , level = logging . ERROR )
129160 return
130161
131162
@@ -140,7 +171,7 @@ def extract_affected_packages(
140171 In case of gitlab advisory data we get a list of fixed_versions and a affected_version_range.
141172 Since we can not determine which package fixes which range.
142173 We store the all the fixed_versions with the same affected_version_range in the advisory.
143- Later the advisory data is used to be infered in the GitLabBasicImprover.
174+ Later the advisory data is used to be inferred in the GitLabBasicImprover.
144175 """
145176 for fixed_version in fixed_versions :
146177 yield AffectedPackage (
@@ -150,7 +181,9 @@ def extract_affected_packages(
150181 )
151182
152183
153- def parse_gitlab_advisory (file , base_path ):
184+ def parse_gitlab_advisory (
185+ file , base_path , gitlab_scheme_by_purl_type , purl_type_by_gitlab_scheme , logger
186+ ):
154187 """
155188 Parse a Gitlab advisory file and return an AdvisoryData or None.
156189 These files are YAML. There is a JSON schema documented at
@@ -177,8 +210,9 @@ def parse_gitlab_advisory(file, base_path):
177210 with open (file ) as f :
178211 gitlab_advisory = saneyaml .load (f )
179212 if not isinstance (gitlab_advisory , dict ):
180- logger .error (
181- f"parse_gitlab_advisory: unknown gitlab advisory format in { file !r} with data: { gitlab_advisory !r} "
213+ logger (
214+ f"parse_gitlab_advisory: unknown gitlab advisory format in { file !r} with data: { gitlab_advisory !r} " ,
215+ level = logging .ERROR ,
182216 )
183217 return
184218
@@ -199,9 +233,15 @@ def parse_gitlab_advisory(file, base_path):
199233 base_path = base_path ,
200234 url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/" ,
201235 )
202- purl : PackageURL = get_purl (package_slug = package_slug )
236+ purl : PackageURL = get_purl (
237+ package_slug = package_slug ,
238+ purl_type_by_gitlab_scheme = purl_type_by_gitlab_scheme ,
239+ logger = logger ,
240+ )
203241 if not purl :
204- logger .error (f"parse_yaml_file: purl is not valid: { file !r} { package_slug !r} " )
242+ logger (
243+ f"parse_yaml_file: purl is not valid: { file !r} { package_slug !r} " , level = logging .ERROR
244+ )
205245 return AdvisoryData (
206246 aliases = aliases ,
207247 summary = summary ,
@@ -214,7 +254,7 @@ def parse_gitlab_advisory(file, base_path):
214254 affected_range = gitlab_advisory .get ("affected_range" )
215255 gitlab_native_schemes = set (["pypi" , "gem" , "npm" , "go" , "packagist" , "conan" ])
216256 vrc : VersionRange = RANGE_CLASS_BY_SCHEMES [purl .type ]
217- gitlab_scheme = GITLAB_SCHEME_BY_PURL_TYPE [purl .type ]
257+ gitlab_scheme = gitlab_scheme_by_purl_type [purl .type ]
218258 try :
219259 if affected_range :
220260 if gitlab_scheme in gitlab_native_schemes :
@@ -224,8 +264,9 @@ def parse_gitlab_advisory(file, base_path):
224264 else :
225265 affected_version_range = vrc .from_native (affected_range )
226266 except Exception as e :
227- logger .error (
228- f"parse_yaml_file: affected_range is not parsable: { affected_range !r} type:{ purl .type !r} error: { e !r} \n { traceback .format_exc ()} "
267+ logger (
268+ f"parse_yaml_file: affected_range is not parsable: { affected_range !r} for: { purl !s} error: { e !r} \n { traceback .format_exc ()} " ,
269+ level = logging .ERROR ,
229270 )
230271
231272 parsed_fixed_versions = []
@@ -234,8 +275,9 @@ def parse_gitlab_advisory(file, base_path):
234275 fixed_version = vrc .version_class (fixed_version )
235276 parsed_fixed_versions .append (fixed_version )
236277 except Exception as e :
237- logger .error (
238- f"parse_yaml_file: fixed_version is not parsable`: { fixed_version !r} error: { e !r} \n { traceback .format_exc ()} "
278+ logger (
279+ f"parse_yaml_file: fixed_version is not parsable`: { fixed_version !r} error: { e !r} \n { traceback .format_exc ()} " ,
280+ level = logging .ERROR ,
239281 )
240282
241283 if parsed_fixed_versions :
0 commit comments