Skip to content

Commit da495bd

Browse files
joydeep049inosmeetterriko
authored
feat: PURL generation for PythonParser (#3945)
Co-authored-by: Joydeep Tripathy <[email protected]> Co-authored-by: Meet Soni <[email protected]> Co-authored-by: Terri Oda <[email protected]>
1 parent 0354939 commit da495bd

File tree

1 file changed

+63
-2
lines changed

1 file changed

+63
-2
lines changed

cve_bin_tool/parsers/python.py

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
# Copyright (C) 2022 Intel Corporation
1+
# Copyright (C) 2024 Intel Corporation
22
# SPDX-License-Identifier: GPL-3.0-or-later
33

44
import json
5+
import re
56
import subprocess
67
from re import MULTILINE, compile, search
78

@@ -13,10 +14,44 @@
1314

1415

1516
class PythonRequirementsParser(Parser):
17+
"""
18+
Parser for Python requirements files.
19+
This parser is designed to parse Python requirements files (usually named
20+
requirements.txt) and generate PURLs (Package URLs) for the listed packages.
21+
"""
22+
1623
def __init__(self, cve_db, logger):
24+
"""Initialize the python requirements file parser."""
25+
self.purl_pkg_type = "pypi"
1726
super().__init__(cve_db, logger)
1827

28+
def generate_purl(self, product, version, vendor, qualifier={}, subpath=None):
29+
"""Generates PURL after normalizing all components."""
30+
product = re.sub(r"[^a-zA-Z0-9._-]", "", product).lower()
31+
version = re.sub(r"[^a-zA-Z0-9.+-]", "", version)
32+
vendor = "UNKNOWN"
33+
34+
if not product or not version:
35+
return None
36+
37+
purl = super().generate_purl(
38+
product,
39+
version,
40+
vendor,
41+
qualifier,
42+
subpath,
43+
)
44+
45+
return purl
46+
1947
def run_checker(self, filename):
48+
"""
49+
Parse the requirements file and yield PURLs for the listed packages.
50+
Args:
51+
filename (str): The path to the requirements file.
52+
Yields:
53+
str: PURLs for the packages listed in the file.
54+
"""
2055
self.filename = filename
2156
try:
2257
output = subprocess.check_output(
@@ -71,9 +106,36 @@ def run_checker(self, filename):
71106

72107

73108
class PythonParser(Parser):
109+
"""
110+
Parser for Python package metadata files.
111+
This parser is designed to parse Python package metadata files (usually named
112+
PKG-INFO or METADATA) and generate PURLs (Package URLs) for the package.
113+
"""
114+
74115
def __init__(self, cve_db, logger):
116+
"""Initialize the python package metadata parser."""
117+
self.purl_pkg_type = "pypi"
75118
super().__init__(cve_db, logger)
76119

120+
def generate_purl(self, product, version, vendor, qualifier={}, subpath=None):
121+
"""Generates PURL after normalizing all components."""
122+
product = re.sub(r"[^a-zA-Z0-9._-]", "", product).lower()
123+
version = re.sub(r"[^a-zA-Z0-9.+-]", "", version)
124+
vendor = "UNKNOWN"
125+
126+
if not product or not version:
127+
return None
128+
129+
purl = super().generate_purl(
130+
product,
131+
version,
132+
vendor,
133+
qualifier,
134+
subpath,
135+
)
136+
137+
return purl
138+
77139
def run_checker(self, filename):
78140
"""
79141
This generator runs only for python packages.
@@ -97,5 +159,4 @@ def run_checker(self, filename):
97159
# There are packages with a METADATA file in them containing different data from what the tool expects
98160
except AttributeError:
99161
self.logger.debug(f"{filename} is an invalid METADATA/PKG-INFO")
100-
101162
self.logger.debug(f"Done scanning file: {filename}")

0 commit comments

Comments
 (0)