1
- # Copyright (C) 2022 Intel Corporation
1
+ # Copyright (C) 2024 Intel Corporation
2
2
# SPDX-License-Identifier: GPL-3.0-or-later
3
3
4
4
import json
5
+ import re
5
6
import subprocess
6
7
from re import MULTILINE , compile , search
7
8
13
14
14
15
15
16
class PythonRequirementsParser (Parser ):
17
+ """
18
+ Parser for Python requirements files.
19
+ This parser is designed to parse Python requirements files (usually named
20
+ requirements.txt) and generate PURLs (Package URLs) for the listed packages.
21
+ """
22
+
16
23
def __init__ (self , cve_db , logger ):
24
+ """Initialize the python requirements file parser."""
25
+ self .purl_pkg_type = "pypi"
17
26
super ().__init__ (cve_db , logger )
18
27
28
+ def generate_purl (self , product , version , vendor , qualifier = {}, subpath = None ):
29
+ """Generates PURL after normalizing all components."""
30
+ product = re .sub (r"[^a-zA-Z0-9._-]" , "" , product ).lower ()
31
+ version = re .sub (r"[^a-zA-Z0-9.+-]" , "" , version )
32
+ vendor = "UNKNOWN"
33
+
34
+ if not product or not version :
35
+ return None
36
+
37
+ purl = super ().generate_purl (
38
+ product ,
39
+ version ,
40
+ vendor ,
41
+ qualifier ,
42
+ subpath ,
43
+ )
44
+
45
+ return purl
46
+
19
47
def run_checker (self , filename ):
48
+ """
49
+ Parse the requirements file and yield PURLs for the listed packages.
50
+ Args:
51
+ filename (str): The path to the requirements file.
52
+ Yields:
53
+ str: PURLs for the packages listed in the file.
54
+ """
20
55
self .filename = filename
21
56
try :
22
57
output = subprocess .check_output (
@@ -71,9 +106,36 @@ def run_checker(self, filename):
71
106
72
107
73
108
class PythonParser (Parser ):
109
+ """
110
+ Parser for Python package metadata files.
111
+ This parser is designed to parse Python package metadata files (usually named
112
+ PKG-INFO or METADATA) and generate PURLs (Package URLs) for the package.
113
+ """
114
+
74
115
def __init__ (self , cve_db , logger ):
116
+ """Initialize the python package metadata parser."""
117
+ self .purl_pkg_type = "pypi"
75
118
super ().__init__ (cve_db , logger )
76
119
120
+ def generate_purl (self , product , version , vendor , qualifier = {}, subpath = None ):
121
+ """Generates PURL after normalizing all components."""
122
+ product = re .sub (r"[^a-zA-Z0-9._-]" , "" , product ).lower ()
123
+ version = re .sub (r"[^a-zA-Z0-9.+-]" , "" , version )
124
+ vendor = "UNKNOWN"
125
+
126
+ if not product or not version :
127
+ return None
128
+
129
+ purl = super ().generate_purl (
130
+ product ,
131
+ version ,
132
+ vendor ,
133
+ qualifier ,
134
+ subpath ,
135
+ )
136
+
137
+ return purl
138
+
77
139
def run_checker (self , filename ):
78
140
"""
79
141
This generator runs only for python packages.
@@ -97,5 +159,4 @@ def run_checker(self, filename):
97
159
# There are packages with a METADATA file in them containing different data from what the tool expects
98
160
except AttributeError :
99
161
self .logger .debug (f"{ filename } is an invalid METADATA/PKG-INFO" )
100
-
101
162
self .logger .debug (f"Done scanning file: { filename } " )
0 commit comments