Skip to content

Commit 054acb8

Browse files
author
Orlando Barrera II
committed
Testing the sarif file parsing
1 parent b58656a commit 054acb8

File tree

1 file changed

+41
-39
lines changed

1 file changed

+41
-39
lines changed

socketsecurity/core/messages.py

Lines changed: 41 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str)
4141
Supports:
4242
1) JSON-based manifest files (package-lock.json, Pipfile.lock, composer.lock)
4343
- Locates a dictionary entry with the matching package & version
44-
- Searches the raw text for the dependency key
44+
- Searches the raw text for the key
4545
2) Text-based (requirements.txt, package.json, yarn.lock, pnpm-lock.yaml, etc.)
46-
- Uses compiled regex patterns to detect a match line by line
46+
- Uses regex patterns to detect a match line by line
4747
"""
4848
file_type = Path(manifest_file).name
4949
logging.debug("Processing file for line lookup: %s", manifest_file)
@@ -91,35 +91,37 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str)
9191
# ----------------------------------------------------
9292
# 2) Text-based / line-based manifests
9393
# ----------------------------------------------------
94-
# Updated search patterns; note the new pattern for pnpm-lock.yaml.
95-
search_patterns = {
96-
"package.json": rf'"{packagename}":\s*"[\^~]?{re.escape(packageversion)}"',
97-
"yarn.lock": rf'{packagename}@{packageversion}',
98-
# For pnpm-lock.yaml, look for a line in the packages section like:
99-
# /bitget-main/19.4.9:
100-
"pnpm-lock.yaml": rf'^/{re.escape(packagename)}/{re.escape(packageversion)}:',
101-
"requirements.txt": rf'^{re.escape(packagename)}\s*(?:==|===|!=|>=|<=|~=|\s+)?\s*{re.escape(packageversion)}(?:\s*;.*)?$',
102-
"pyproject.toml": rf'{packagename}\s*=\s*"{packageversion}"',
103-
"Pipfile": rf'"{packagename}"\s*=\s*"{packageversion}"',
104-
"go.mod": rf'require\s+{re.escape(packagename)}\s+{re.escape(packageversion)}',
105-
"go.sum": rf'{re.escape(packagename)}\s+{re.escape(packageversion)}',
106-
"pom.xml": rf'<artifactId>{re.escape(packagename)}</artifactId>\s*<version>{re.escape(packageversion)}</version>',
107-
"build.gradle": rf'implementation\s+"{re.escape(packagename)}:{re.escape(packageversion)}"',
108-
"Gemfile": rf'gem\s+"{re.escape(packagename)}",\s*"{re.escape(packageversion)}"',
109-
"Gemfile.lock": rf'\s+{re.escape(packagename)}\s+\({re.escape(packageversion)}\)',
110-
".csproj": rf'<PackageReference\s+Include="{re.escape(packagename)}"\s+Version="{re.escape(packageversion)}"\s*/>',
111-
".fsproj": rf'<PackageReference\s+Include="{re.escape(packagename)}"\s+Version="{re.escape(packageversion)}"\s*/>',
112-
"paket.dependencies": rf'nuget\s+{re.escape(packagename)}\s+{re.escape(packageversion)}',
113-
"Cargo.toml": rf'{re.escape(packagename)}\s*=\s*"{re.escape(packageversion)}"',
114-
"build.sbt": rf'"{re.escape(packagename)}"\s*%\s*"{re.escape(packageversion)}"',
115-
"Podfile": rf'pod\s+"{re.escape(packagename)}",\s*"{re.escape(packageversion)}"',
116-
"Package.swift": rf'\.package\(name:\s*"{re.escape(packagename)}",\s*url:\s*".*?",\s*version:\s*"{re.escape(packageversion)}"\)',
117-
"mix.exs": rf'\{{:{re.escape(packagename)},\s*"{re.escape(packageversion)}"\}}',
118-
"composer.json": rf'"{re.escape(packagename)}":\s*"{re.escape(packageversion)}"',
119-
"conanfile.txt": rf'{re.escape(packagename)}/{re.escape(packageversion)}',
120-
"vcpkg.json": rf'"{re.escape(packagename)}":\s*"{re.escape(packageversion)}"',
121-
}
122-
searchstring = search_patterns.get(file_type, rf'{re.escape(packagename)}.*{re.escape(packageversion)}')
94+
# For pnpm-lock.yaml, use a different pattern since its format is YAML.
95+
if file_type.lower() == "pnpm-lock.yaml":
96+
# Example pattern: /bitget-main/19.4.9:
97+
searchstring = rf'/{re.escape(packagename)}/{re.escape(packageversion)}:'
98+
else:
99+
search_patterns = {
100+
"package.json": rf'"{packagename}":\s*"[\^~]?{re.escape(packageversion)}"',
101+
"yarn.lock": rf'{packagename}@{packageversion}',
102+
"requirements.txt": rf'^{re.escape(packagename)}\s*(?:==|===|!=|>=|<=|~=|\s+)?\s*{re.escape(packageversion)}(?:\s*;.*)?$',
103+
"pyproject.toml": rf'{packagename}\s*=\s*"{re.escape(packageversion)}"',
104+
"Pipfile": rf'"{packagename}"\s*=\s*"{re.escape(packageversion)}"',
105+
"go.mod": rf'require\s+{re.escape(packagename)}\s+{re.escape(packageversion)}',
106+
"go.sum": rf'{re.escape(packagename)}\s+{re.escape(packageversion)}',
107+
"pom.xml": rf'<artifactId>{re.escape(packagename)}</artifactId>\s*<version>{re.escape(packageversion)}</version>',
108+
"build.gradle": rf'implementation\s+"{re.escape(packagename)}:{re.escape(packageversion)}"',
109+
"Gemfile": rf'gem\s+"{re.escape(packagename)}",\s*"{re.escape(packageversion)}"',
110+
"Gemfile.lock": rf'\s+{re.escape(packagename)}\s+\({re.escape(packageversion)}\)',
111+
".csproj": rf'<PackageReference\s+Include="{re.escape(packagename)}"\s+Version="{re.escape(packageversion)}"\s*/>',
112+
".fsproj": rf'<PackageReference\s+Include="{re.escape(packagename)}"\s+Version="{re.escape(packageversion)}"\s*/>',
113+
"paket.dependencies": rf'nuget\s+{re.escape(packagename)}\s+{re.escape(packageversion)}',
114+
"Cargo.toml": rf'{re.escape(packagename)}\s*=\s*"{re.escape(packageversion)}"',
115+
"build.sbt": rf'"{re.escape(packagename)}"\s*%\s*"{re.escape(packageversion)}"',
116+
"Podfile": rf'pod\s+"{re.escape(packagename)}",\s*"{re.escape(packageversion)}"',
117+
"Package.swift": rf'\.package\(name:\s*"{re.escape(packagename)}",\s*url:\s*".*?",\s*version:\s*"{re.escape(packageversion)}"\)',
118+
"mix.exs": rf'\{{:{re.escape(packagename)},\s*"{re.escape(packageversion)}"\}}',
119+
"composer.json": rf'"{re.escape(packagename)}":\s*"{re.escape(packageversion)}"',
120+
"conanfile.txt": rf'{re.escape(packagename)}/{re.escape(packageversion)}',
121+
"vcpkg.json": rf'"{re.escape(packagename)}":\s*"{re.escape(packageversion)}"',
122+
}
123+
searchstring = search_patterns.get(file_type, rf'{re.escape(packagename)}.*{re.escape(packageversion)}')
124+
123125
logging.debug("Using search pattern for %s: %s", file_type, searchstring)
124126
try:
125127
with open(manifest_file, 'r', encoding="utf-8") as file:
@@ -176,7 +178,7 @@ def create_security_comment_sarif(diff) -> dict:
176178
This function now:
177179
- Accepts multiple manifest files from alert.introduced_by or alert.manifests.
178180
- Generates an individual SARIF result for each manifest file.
179-
- Appends the manifest file name to the alert name (and rule ID) to make each result unique.
181+
- Appends the manifest file name to the rule ID and name for uniqueness.
180182
- Does NOT fall back to 'requirements.txt' if no manifest file is provided.
181183
- Adds detailed logging to validate our assumptions.
182184
"""
@@ -209,6 +211,7 @@ def create_security_comment_sarif(diff) -> dict:
209211
base_rule_id = f"{pkg_name}=={pkg_version}"
210212
severity = alert.severity
211213

214+
# Log raw alert data for manifest extraction.
212215
logging.debug("Alert %s - introduced_by: %s, manifests: %s", base_rule_id, alert.introduced_by, getattr(alert, 'manifests', None))
213216

214217
manifest_files = []
@@ -223,30 +226,29 @@ def create_security_comment_sarif(diff) -> dict:
223226
manifest_files = [mf.strip() for mf in alert.manifests.split(";") if mf.strip()]
224227

225228
logging.debug("Alert %s - extracted manifest_files: %s", base_rule_id, manifest_files)
226-
227229
if not manifest_files:
228230
logging.error("Alert %s: No manifest file found; cannot determine file location.", base_rule_id)
229231
continue
230232

231233
logging.debug("Alert %s - using manifest_files for processing: %s", base_rule_id, manifest_files)
232234

233-
# For each manifest file, generate a separate result
235+
# For each manifest file, create an individual SARIF result.
234236
for mf in manifest_files:
235237
logging.debug("Alert %s - Processing manifest file: %s", base_rule_id, mf)
236238
socket_url = Messages.get_manifest_type_url(mf, pkg_name, pkg_version)
237239
line_number, line_content = Messages.find_line_in_file(pkg_name, pkg_version, mf)
238240
if line_number < 1:
239241
line_number = 1
240242
logging.debug("Alert %s: Manifest %s, line %d: %s", base_rule_id, mf, line_number, line_content)
241-
242-
# Create a unique rule id and name by appending the manifest file name
243+
244+
# Create a unique rule id and name by appending the file name.
243245
unique_rule_id = f"{base_rule_id} ({mf})"
244246
rule_name = f"Alert {base_rule_id} ({mf})"
245-
247+
246248
short_desc = (f"{alert.props.get('note', '')}<br/><br/>Suggested Action:<br/>{alert.suggestion}"
247249
f"<br/><a href=\"{socket_url}\">{socket_url}</a>")
248250
full_desc = "{} - {}".format(alert.title, alert.description.replace('\r\n', '<br/>'))
249-
251+
250252
if unique_rule_id not in rules_map:
251253
rules_map[unique_rule_id] = {
252254
"id": unique_rule_id,
@@ -258,7 +260,7 @@ def create_security_comment_sarif(diff) -> dict:
258260
"level": Messages.map_severity_to_sarif(severity)
259261
},
260262
}
261-
263+
262264
result_obj = {
263265
"ruleId": unique_rule_id,
264266
"message": {"text": short_desc},

0 commit comments

Comments
 (0)