1010
1111import re
1212import xml .etree .ElementTree as ET
13- from typing import Set
13+ from pathlib import Path
14+ from typing import Iterable
1415
1516from packageurl import PackageURL
17+ from univers .version_constraint import VersionConstraint
18+ from univers .version_range import EbuildVersionRange
19+ from univers .versions import GentooVersion
1620
1721from vulnerabilities .importer import AdvisoryData
18- from vulnerabilities .importer import GitImporter
22+ from vulnerabilities .importer import AffectedPackage
23+ from vulnerabilities .importer import Importer
1924from vulnerabilities .importer import Reference
20- from vulnerabilities .utils import nearest_patched_package
2125
2226
23- class GentooImporter (GitImporter ):
24- def __enter__ (self ):
25- super (GentooImporter , self ).__enter__ ()
26-
27- if not getattr (self , "_added_files" , None ):
28- self ._added_files , self ._updated_files = self .file_changes (
29- recursive = True , file_ext = "xml"
30- )
31-
32- def updated_advisories (self ) -> Set [AdvisoryData ]:
33- files = self ._updated_files .union (self ._added_files )
34- advisories = []
35- for f in files :
36- processed_data = self .process_file (f )
37- advisories .extend (processed_data )
38- return self .batch_advisories (advisories )
27+ class GentooImporter (Importer ):
28+ repo_url = "git+https://anongit.gentoo.org/git/data/glsa.git"
29+ spdx_license_expression = "CC-BY-SA-4.0"
30+ # the license notice is at this url https://anongit.gentoo.org/ says:
31+ # The contents of this document, unless otherwise expressly stated, are licensed
32+ # under the [CC-BY-SA-4.0](https://creativecommons.org/licenses/by-sa/4.0/) license.
33+ license_url = "https://creativecommons.org/licenses/by-sa/4.0/"
34+
35+ def advisory_data (self ) -> Iterable [AdvisoryData ]:
36+ try :
37+ self .clone (repo_url = self .repo_url )
38+ base_path = Path (self .vcs_response .dest_dir )
39+ for file_path in base_path .glob ("**/*.xml" ):
40+ yield from self .process_file (file_path )
41+ finally :
42+ if self .vcs_response :
43+ self .vcs_response .delete ()
3944
4045 def process_file (self , file ):
41- xml_data = {}
46+ cves = []
47+ summary = ""
48+ vuln_references = []
4249 xml_root = ET .parse (file ).getroot ()
43- glsa = "GLSA-" + xml_root .attrib ["id" ]
44- vuln_reference = [
45- Reference (
46- reference_id = glsa ,
47- url = "https://security.gentoo.org/glsa/{}" .format (xml_root .attrib ["id" ]),
48- )
49- ]
50+ id = xml_root .attrib .get ("id" )
51+ if id :
52+ glsa = "GLSA-" + id
53+ vuln_references = [
54+ Reference (
55+ reference_id = glsa ,
56+ url = f"https://security.gentoo.org/glsa/{ id } " ,
57+ )
58+ ]
5059
5160 for child in xml_root :
5261 if child .tag == "references" :
53- xml_data [ " cves" ] = self .cves_from_reference (child )
62+ cves = self .cves_from_reference (child )
5463
5564 if child .tag == "synopsis" :
56- xml_data [ "description" ] = child .text
65+ summary = child .text
5766
5867 if child .tag == "affected" :
59- (
60- xml_data ["affected_purls" ],
61- xml_data ["unaffected_purls" ],
62- ) = self .affected_and_safe_purls (child )
63- xml_data ["unaffected_purls" ] = list (xml_data ["unaffected_purls" ])
64- xml_data ["affected_purls" ] = list (xml_data ["affected_purls" ])
65-
66- advisory_list = []
68+ affected_packages = list (self .affected_and_safe_purls (child ))
69+
6770 # It is very inefficient, to create new Advisory for each CVE
6871 # this way, but there seems no alternative.
69- for cve in xml_data ["cves" ]:
70- advisory = AdvisoryData (
71- vulnerability_id = cve ,
72- summary = xml_data ["description" ],
73- affected_packages = nearest_patched_package (
74- xml_data ["affected_purls" ], xml_data ["unaffected_purls" ]
75- ),
76- references = vuln_reference ,
72+ for cve in cves :
73+ yield AdvisoryData (
74+ aliases = [cve ],
75+ summary = summary ,
76+ references = vuln_references ,
77+ affected_packages = affected_packages ,
7778 )
78- advisory_list .append (advisory )
79- return advisory_list
8079
8180 @staticmethod
8281 def cves_from_reference (reference ):
@@ -91,40 +90,63 @@ def cves_from_reference(reference):
9190
9291 @staticmethod
9392 def affected_and_safe_purls (affected_elem ):
94- safe_purls = set ()
95- affected_purls = set ()
96- skip_versions = {"1.3*" , "7.3*" , "7.4*" }
93+ constraints = []
9794 for pkg in affected_elem :
98- for info in pkg :
99- if info .text in skip_versions :
95+ name = pkg .attrib .get ("name" )
96+ if not name :
97+ continue
98+ pkg_ns , _ , pkg_name = name .rpartition ("/" )
99+ purl = PackageURL (type = "ebuild" , name = pkg_name , namespace = pkg_ns )
100+ safe_versions , affected_versions = GentooImporter .get_safe_and_affected_versions (pkg )
101+
102+ for version in safe_versions :
103+ constraints .append (
104+ VersionConstraint (version = GentooVersion (version ), comparator = "=" ).invert ()
105+ )
106+
107+ for version in affected_versions :
108+ constraints .append (
109+ VersionConstraint (version = GentooVersion (version ), comparator = "=" )
110+ )
111+
112+ if not constraints :
113+ continue
114+
115+ yield AffectedPackage (
116+ package = purl , affected_version_range = EbuildVersionRange (constraints = constraints )
117+ )
118+
119+ @staticmethod
120+ def get_safe_and_affected_versions (pkg ):
121+ # TODO : Revisit why we are skipping some versions in gentoo importer
122+ skip_versions = {"1.3*" , "7.3*" , "7.4*" }
123+ safe_versions = set ()
124+ affected_versions = set ()
125+ for info in pkg :
126+ if info .text in skip_versions :
127+ continue
128+
129+ if info .attrib .get ("range" ):
130+ if len (info .attrib .get ("range" )) > 2 :
100131 continue
101- pkg_ns , pkg_name , = pkg .attrib [
102- "name"
103- ].split ("/" )
104- purl = PackageURL (type = "ebuild" , name = pkg_name , version = info .text , namespace = pkg_ns )
105-
106- if info .attrib .get ("range" ):
107- if len (info .attrib .get ("range" )) > 2 :
108- continue
109-
110- if info .tag == "unaffected" :
111- # quick hack, to know whether this
112- # version lies in this range, 'e' stands for
113- # equal, which is paired with 'greater' or 'less'.
114- # All possible values of info.attrib['range'] =
115- # {'gt', 'lt', 'rle', 'rge', 'rgt', 'le', 'ge', 'eq'}, out of
116- # which ('rle', 'rge', 'rgt') are ignored, because they compare
117- # 'release' not the 'version'.
118-
119- if "e" in info .attrib ["range" ]:
120- safe_purls .add (purl )
121- else :
122- affected_purls .add (purl )
123-
124- elif info .tag == "vulnerable" :
125- if "e" in info .attrib ["range" ]:
126- affected_purls .add (purl )
127- else :
128- safe_purls .add (purl )
129-
130- return (affected_purls , safe_purls )
132+
133+ if info .tag == "unaffected" :
134+ # quick hack, to know whether this
135+ # version lies in this range, 'e' stands for
136+ # equal, which is paired with 'greater' or 'less'.
137+ # All possible values of info.attrib['range'] =
138+ # {'gt', 'lt', 'rle', 'rge', 'rgt', 'le', 'ge', 'eq'}, out of
139+ # which ('rle', 'rge', 'rgt') are ignored, because they compare
140+ # 'release' not the 'version'.
141+ if "e" in info .attrib ["range" ]:
142+ safe_versions .add (info .text )
143+ else :
144+ affected_versions .add (info .text )
145+
146+ elif info .tag == "vulnerable" :
147+ if "e" in info .attrib ["range" ]:
148+ affected_versions .add (info .text )
149+ else :
150+ safe_versions .add (info .text )
151+
152+ return safe_versions , affected_versions
0 commit comments