1212from pathlib import Path
1313from typing import Iterable
1414from typing import List
15- from typing import Optional
15+ from typing import Tuple
1616
1717import pytz
1818import saneyaml
1919from dateutil import parser as dateparser
20+ from fetchcode .vcs import fetch_via_vcs
2021from packageurl import PackageURL
2122from univers .version_range import RANGE_CLASS_BY_SCHEMES
2223from univers .version_range import VersionRange
2526
2627from vulnerabilities .importer import AdvisoryData
2728from vulnerabilities .importer import AffectedPackage
28- from vulnerabilities .importer import Importer
2929from vulnerabilities .importer import Reference
30+ from vulnerabilities .pipelines import VulnerableCodeBaseImporterPipeline
3031from vulnerabilities .utils import build_description
3132from vulnerabilities .utils import get_advisory_url
3233from vulnerabilities .utils import get_cwe_id
3334
34- logger = logging .getLogger (__name__ )
3535
36- PURL_TYPE_BY_GITLAB_SCHEME = {
37- "conan" : "conan" ,
38- "gem" : "gem" ,
39- # Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742
40- # "go": "golang",
41- "maven" : "maven" ,
42- "npm" : "npm" ,
43- "nuget" : "nuget" ,
44- "packagist" : "composer" ,
45- "pypi" : "pypi" ,
46- }
47-
48- GITLAB_SCHEME_BY_PURL_TYPE = {v : k for k , v in PURL_TYPE_BY_GITLAB_SCHEME .items ()}
49-
50-
51- class GitLabAPIImporter (Importer ):
36+ class GitLabImporterPipeline (VulnerableCodeBaseImporterPipeline ):
5237 spdx_license_expression = "MIT"
5338 license_url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/LICENSE"
5439 importer_name = "GitLab Importer"
5540 repo_url = "git+https://gitlab.com/gitlab-org/advisories-community/"
5641
57- def advisory_data (self , _keep_clone = False ) -> Iterable [AdvisoryData ]:
58- try :
59- self .clone (repo_url = self .repo_url )
60- base_path = Path (self .vcs_response .dest_dir )
42+ @classmethod
43+ def steps (cls ):
44+ return (
45+ cls .clone ,
46+ cls .collect_and_store_advisories ,
47+ cls .import_new_advisories ,
48+ cls .clean_downloads ,
49+ )
6150
62- for file_path in base_path .glob ("**/*.yml" ):
63- gitlab_type , package_slug , vuln_id = parse_advisory_path (
64- base_path = base_path ,
65- file_path = file_path ,
66- )
51+ purl_type_by_gitlab_scheme = {
52+ "conan" : "conan" ,
53+ "gem" : "gem" ,
54+ # Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742
55+ # "go": "golang",
56+ "maven" : "maven" ,
57+ "npm" : "npm" ,
58+ "nuget" : "nuget" ,
59+ "packagist" : "composer" ,
60+ "pypi" : "pypi" ,
61+ }
62+
63+ gitlab_scheme_by_purl_type = {v : k for k , v in purl_type_by_gitlab_scheme .items ()}
64+
65+ def clone (self ):
66+ self .log (f"Cloning `{ self .repo_url } `" )
67+ self .vcs_response = fetch_via_vcs (self .repo_url )
68+
69+ def advisories_count (self ):
70+ root = Path (self .vcs_response .dest_dir )
71+ return sum (1 for _ in root .rglob ("*.yml" ))
72+
73+ def collect_advisories (self ) -> Iterable [AdvisoryData ]:
74+ base_path = Path (self .vcs_response .dest_dir )
75+
76+ for file_path in base_path .rglob ("*.yml" ):
77+ if file_path .parent == base_path :
78+ continue
79+
80+ gitlab_type , _ , _ = parse_advisory_path (
81+ base_path = base_path ,
82+ file_path = file_path ,
83+ )
6784
68- if gitlab_type in PURL_TYPE_BY_GITLAB_SCHEME :
69- yield parse_gitlab_advisory (file = file_path , base_path = base_path )
85+ if gitlab_type not in self .purl_type_by_gitlab_scheme :
86+ # self.log(
87+ # f"Unknown package type {gitlab_type!r} in {file_path!r}",
88+ # level=logging.ERROR,
89+ # )
90+ continue
91+
92+ yield parse_gitlab_advisory (
93+ file = file_path ,
94+ base_path = base_path ,
95+ gitlab_scheme_by_purl_type = self .gitlab_scheme_by_purl_type ,
96+ purl_type_by_gitlab_scheme = self .purl_type_by_gitlab_scheme ,
97+ logger = self .log ,
98+ )
7099
71- else :
72- logger .error (f"Unknow package type { gitlab_type !r} in { file_path !r} " )
73- continue
74- finally :
75- if self .vcs_response and not _keep_clone :
76- self .vcs_response .delete ()
100+ def clean_downloads (self ):
101+ if self .vcs_response :
102+ self .log (f"Removing cloned repository" )
103+ self .vcs_response .delete ()
77104
78105
79- def parse_advisory_path (base_path : Path , file_path : Path ) -> Optional [ AdvisoryData ]:
106+ def parse_advisory_path (base_path : Path , file_path : Path ) -> Tuple [ str , str , str ]:
80107 """
81108 Parse a gitlab advisory file and return a 3-tuple of:
82109 (gitlab_type, package_slug, vulnerability_id)
@@ -96,21 +123,21 @@ def parse_advisory_path(base_path: Path, file_path: Path) -> Optional[AdvisoryDa
96123 >>> parse_advisory_path(base_path=base_path, file_path=file_path)
97124 ('npm', '@express/beego/beego/v2', 'CVE-2021-43831')
98125 """
99- relative_path_segments = str ( file_path .relative_to (base_path )). strip ( "/" ). split ( "/" )
126+ relative_path_segments = file_path .relative_to (base_path ). parts
100127 gitlab_type = relative_path_segments [0 ]
101- vuln_id = relative_path_segments [ - 1 ]. replace ( ".yml" , "" )
128+ vuln_id = file_path . stem
102129 package_slug = "/" .join (relative_path_segments [1 :- 1 ])
103130
104131 return gitlab_type , package_slug , vuln_id
105132
106133
107- def get_purl (package_slug ):
134+ def get_purl (package_slug , purl_type_by_gitlab_scheme , logger ):
108135 """
109136 Return a PackageURL object from a package slug
110137 """
111138 parts = [p for p in package_slug .strip ("/" ).split ("/" ) if p ]
112139 gitlab_scheme = parts [0 ]
113- purl_type = PURL_TYPE_BY_GITLAB_SCHEME [gitlab_scheme ]
140+ purl_type = purl_type_by_gitlab_scheme [gitlab_scheme ]
114141 if gitlab_scheme == "go" :
115142 name = "/" .join (parts [1 :])
116143 return PackageURL (type = purl_type , namespace = None , name = name )
@@ -125,7 +152,7 @@ def get_purl(package_slug):
125152 name = parts [- 1 ]
126153 namespace = "/" .join (parts [1 :- 1 ])
127154 return PackageURL (type = purl_type , namespace = namespace , name = name )
128- logger . error (f"get_purl: package_slug can not be parsed: { package_slug !r} " )
155+ logger (f"get_purl: package_slug can not be parsed: { package_slug !r} " , level = logging . ERROR )
129156 return
130157
131158
@@ -140,7 +167,7 @@ def extract_affected_packages(
140167 In case of gitlab advisory data we get a list of fixed_versions and a affected_version_range.
141168 Since we can not determine which package fixes which range.
142169 We store the all the fixed_versions with the same affected_version_range in the advisory.
143- Later the advisory data is used to be infered in the GitLabBasicImprover.
170+ Later the advisory data is used to be inferred in the GitLabBasicImprover.
144171 """
145172 for fixed_version in fixed_versions :
146173 yield AffectedPackage (
@@ -150,7 +177,9 @@ def extract_affected_packages(
150177 )
151178
152179
153- def parse_gitlab_advisory (file , base_path ):
180+ def parse_gitlab_advisory (
181+ file , base_path , gitlab_scheme_by_purl_type , purl_type_by_gitlab_scheme , logger
182+ ):
154183 """
155184 Parse a Gitlab advisory file and return an AdvisoryData or None.
156185 These files are YAML. There is a JSON schema documented at
@@ -177,8 +206,9 @@ def parse_gitlab_advisory(file, base_path):
177206 with open (file ) as f :
178207 gitlab_advisory = saneyaml .load (f )
179208 if not isinstance (gitlab_advisory , dict ):
180- logger .error (
181- f"parse_gitlab_advisory: unknown gitlab advisory format in { file !r} with data: { gitlab_advisory !r} "
209+ logger (
210+ f"parse_gitlab_advisory: unknown gitlab advisory format in { file !r} with data: { gitlab_advisory !r} " ,
211+ level = logging .ERROR ,
182212 )
183213 return
184214
@@ -199,9 +229,15 @@ def parse_gitlab_advisory(file, base_path):
199229 base_path = base_path ,
200230 url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/" ,
201231 )
202- purl : PackageURL = get_purl (package_slug = package_slug )
232+ purl : PackageURL = get_purl (
233+ package_slug = package_slug ,
234+ purl_type_by_gitlab_scheme = purl_type_by_gitlab_scheme ,
235+ logger = logger ,
236+ )
203237 if not purl :
204- logger .error (f"parse_yaml_file: purl is not valid: { file !r} { package_slug !r} " )
238+ logger (
239+ f"parse_yaml_file: purl is not valid: { file !r} { package_slug !r} " , level = logging .ERROR
240+ )
205241 return AdvisoryData (
206242 aliases = aliases ,
207243 summary = summary ,
@@ -214,7 +250,7 @@ def parse_gitlab_advisory(file, base_path):
214250 affected_range = gitlab_advisory .get ("affected_range" )
215251 gitlab_native_schemes = set (["pypi" , "gem" , "npm" , "go" , "packagist" , "conan" ])
216252 vrc : VersionRange = RANGE_CLASS_BY_SCHEMES [purl .type ]
217- gitlab_scheme = GITLAB_SCHEME_BY_PURL_TYPE [purl .type ]
253+ gitlab_scheme = gitlab_scheme_by_purl_type [purl .type ]
218254 try :
219255 if affected_range :
220256 if gitlab_scheme in gitlab_native_schemes :
@@ -224,8 +260,9 @@ def parse_gitlab_advisory(file, base_path):
224260 else :
225261 affected_version_range = vrc .from_native (affected_range )
226262 except Exception as e :
227- logger .error (
228- f"parse_yaml_file: affected_range is not parsable: { affected_range !r} type:{ purl .type !r} error: { e !r} \n { traceback .format_exc ()} "
263+ logger (
264+ f"parse_yaml_file: affected_range is not parsable: { affected_range !r} for: { purl !s} error: { e !r} \n { traceback .format_exc ()} " ,
265+ level = logging .ERROR ,
229266 )
230267
231268 parsed_fixed_versions = []
@@ -234,8 +271,9 @@ def parse_gitlab_advisory(file, base_path):
234271 fixed_version = vrc .version_class (fixed_version )
235272 parsed_fixed_versions .append (fixed_version )
236273 except Exception as e :
237- logger .error (
238- f"parse_yaml_file: fixed_version is not parsable`: { fixed_version !r} error: { e !r} \n { traceback .format_exc ()} "
274+ logger (
275+ f"parse_yaml_file: fixed_version is not parsable`: { fixed_version !r} error: { e !r} \n { traceback .format_exc ()} " ,
276+ level = logging .ERROR ,
239277 )
240278
241279 if parsed_fixed_versions :
0 commit comments