Skip to content

Commit f664b47

Browse files
author
Orlando Barrera II
committed
Improved the sarif file parsing
1 parent 0914971 commit f664b47

File tree

1 file changed

+87
-74
lines changed

1 file changed

+87
-74
lines changed

socketsecurity/core/messages.py

Lines changed: 87 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,20 @@
1111

1212

1313
class Messages:
14-
14+
1515
@staticmethod
1616
def map_severity_to_sarif(severity: str) -> str:
1717
"""
18-
Map Socket Security severity levels to SARIF levels.
18+
Map Socket severity levels to SARIF levels (GitHub code scanning).
19+
20+
'low' -> 'note'
21+
'medium' or 'middle' -> 'warning'
22+
'high' or 'critical' -> 'error'
1923
"""
2024
severity_mapping = {
2125
"low": "note",
2226
"medium": "warning",
23-
"middle": "warning",
27+
"middle": "warning", # older data might say "middle"
2428
"high": "error",
2529
"critical": "error",
2630
}
@@ -29,81 +33,82 @@ def map_severity_to_sarif(severity: str) -> str:
2933
@staticmethod
3034
def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) -> tuple:
3135
"""
32-
Given a manifest file, find the line number and snippet where the package is declared.
33-
For JSON-based manifests (e.g. package-lock.json, package.json, Pipfile.lock, composer.lock),
34-
we first verify the package exists (via JSON parsing) and then scan the raw text using one
35-
or more needle patterns.
36-
For text-based manifests, we use regex search.
36+
Finds the line number and snippet of code for the given package/version in a manifest file.
37+
Returns a 2-tuple: (line_number, snippet_or_message).
38+
39+
Supports:
40+
1) JSON-based manifest files (package-lock.json, Pipfile.lock, composer.lock)
41+
- Locates a dictionary entry with the matching package & version
42+
- Does a rough line-based search to find the actual line in the raw text
43+
2) Text-based (requirements.txt, package.json, yarn.lock, etc.)
44+
- Uses compiled regex patterns to detect a match line by line
3745
"""
46+
# Extract just the file name to detect manifest type
3847
file_type = Path(manifest_file).name
3948

40-
# --------------------
41-
# 1) JSON-based manifests
42-
# --------------------
43-
if file_type in ["package-lock.json", "Pipfile.lock", "composer.lock", "package.json"]:
49+
# ----------------------------------------------------
50+
# 1) JSON-based manifest files
51+
# ----------------------------------------------------
52+
if file_type in ["package-lock.json", "Pipfile.lock", "composer.lock"]:
4453
try:
54+
# Read entire file so we can parse JSON and also do raw line checks
4555
with open(manifest_file, "r", encoding="utf-8") as f:
4656
raw_text = f.read()
47-
try:
48-
data = json.loads(raw_text)
49-
except json.JSONDecodeError:
50-
data = {}
51-
52-
found = False
53-
# For package.json, check both dependencies and devDependencies.
54-
if file_type == "package.json":
55-
deps = data.get("dependencies", {})
56-
deps_dev = data.get("devDependencies", {})
57-
all_deps = {**deps, **deps_dev}
58-
if packagename in all_deps:
59-
# Allow for versions with caret/tilde prefixes.
60-
actual_version = all_deps[packagename]
61-
if actual_version == packageversion or actual_version.lstrip("^~") == packageversion:
62-
found = True
63-
else:
64-
# For package-lock.json and similar, look into common keys.
65-
for key in ["packages", "default", "dependencies"]:
66-
if key in data:
67-
packages_dict = data[key]
68-
# Keys in package-lock.json can be "node_modules/<pkg>"
69-
for key_item, info in packages_dict.items():
70-
if key_item.endswith(packagename):
71-
ver = info if isinstance(info, str) else info.get("version", "")
72-
if ver == packageversion or ver.lstrip("^~") == packageversion:
73-
found = True
74-
break
75-
if found:
76-
break
7757

78-
if not found:
79-
return 1, f'"{packagename}": not found in {manifest_file}'
58+
# Attempt JSON parse
59+
data = json.loads(raw_text)
60+
61+
# In practice, you may need to check data["dependencies"], data["default"], etc.
62+
# This is an example approach.
63+
packages_dict = (
64+
data.get("packages")
65+
or data.get("default")
66+
or data.get("dependencies")
67+
or {}
68+
)
69+
70+
found_key = None
71+
found_info = None
72+
# Locate a dictionary entry whose 'version' matches
73+
for key, value in packages_dict.items():
74+
# For NPM package-lock, keys might look like "node_modules/axios"
75+
if key.endswith(packagename) and "version" in value:
76+
if value["version"] == packageversion:
77+
found_key = key
78+
found_info = value
79+
break
8080

81-
# Build one or more needle patterns. For package-lock.json, try both patterns.
82-
needles = []
83-
if file_type == "package-lock.json":
84-
# Try with "node_modules/..." first, then without.
85-
needles.append(f'"node_modules/{packagename}"')
86-
needles.append(f'"{packagename}"')
81+
if found_key and found_info:
82+
# Search lines to approximate the correct line number
83+
needle_key = f'"{found_key}":' # e.g. "node_modules/axios":
84+
needle_version = f'"version": "{packageversion}"'
85+
lines = raw_text.splitlines()
86+
best_line = 1
87+
snippet = None
88+
89+
for i, line in enumerate(lines, start=1):
90+
if (needle_key in line) or (needle_version in line):
91+
best_line = i
92+
snippet = line.strip()
93+
break # On first match, stop
94+
95+
# If we found an approximate line, return it; else fallback to line 1
96+
if best_line > 0 and snippet:
97+
return best_line, snippet
98+
else:
99+
return 1, f'"{found_key}": {found_info}'
87100
else:
88-
needles.append(f'"{packagename}"')
89-
90-
# Scan through the file's lines to locate a matching needle.
91-
lines = raw_text.splitlines()
92-
for i, line in enumerate(lines, start=1):
93-
for needle in needles:
94-
if needle in line:
95-
return i, line.strip()
96-
return 1, f'"{packagename}": declaration not found'
97-
except FileNotFoundError:
98-
return 1, f"{manifest_file} not found"
99-
except Exception as e:
100-
return 1, f"Error reading {manifest_file}: {e}"
101-
102-
# --------------------
101+
return 1, f"{packagename} {packageversion} (not found in {manifest_file})"
102+
103+
except (FileNotFoundError, json.JSONDecodeError):
104+
return 1, f"Error reading {manifest_file}"
105+
106+
# ----------------------------------------------------
103107
# 2) Text-based / line-based manifests
104-
# --------------------
105-
# Define regex patterns for common text-based manifest types.
108+
# ----------------------------------------------------
109+
# Define a dictionary of patterns for common manifest types
106110
search_patterns = {
111+
"package.json": rf'"{packagename}":\s*"{packageversion}"',
107112
"yarn.lock": rf'{packagename}@{packageversion}',
108113
"pnpm-lock.yaml": rf'"{re.escape(packagename)}"\s*:\s*\{{[^}}]*"version":\s*"{re.escape(packageversion)}"',
109114
"requirements.txt": rf'^{re.escape(packagename)}\s*(?:==|===|!=|>=|<=|~=|\s+)?\s*{re.escape(packageversion)}(?:\s*;.*)?$',
@@ -127,24 +132,30 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str)
127132
"conanfile.txt": rf'{re.escape(packagename)}/{re.escape(packageversion)}',
128133
"vcpkg.json": rf'"{re.escape(packagename)}":\s*"{re.escape(packageversion)}"',
129134
}
135+
136+
# If no specific pattern is found for this file name, fallback to a naive approach
130137
searchstring = search_patterns.get(file_type, rf'{re.escape(packagename)}.*{re.escape(packageversion)}')
131138
try:
139+
# Read file lines and search for a match
132140
with open(manifest_file, 'r', encoding="utf-8") as file:
133141
lines = [line.rstrip("\n") for line in file]
134142
for line_number, line_content in enumerate(lines, start=1):
143+
# For Python conditional dependencies, ignore everything after first ';'
135144
line_main = line_content.split(";", 1)[0].strip()
145+
# Use a case-insensitive regex search
136146
if re.search(searchstring, line_main, re.IGNORECASE):
137147
return line_number, line_content.strip()
138148
except FileNotFoundError:
139149
return 1, f"{manifest_file} not found"
140150
except Exception as e:
141151
return 1, f"Error reading {manifest_file}: {e}"
152+
142153
return 1, f"{packagename} {packageversion} (not found)"
143154

144155
@staticmethod
145156
def get_manifest_type_url(manifest_file: str, pkg_name: str, pkg_version: str) -> str:
146157
"""
147-
Determine the URL prefix based on the manifest file.
158+
Determine the correct URL path based on the manifest file type.
148159
"""
149160
manifest_to_url_prefix = {
150161
"package.json": "npm",
@@ -167,6 +178,7 @@ def get_manifest_type_url(manifest_file: str, pkg_name: str, pkg_version: str) -
167178
"composer.json": "composer",
168179
"vcpkg.json": "vcpkg",
169180
}
181+
170182
file_type = Path(manifest_file).name
171183
url_prefix = manifest_to_url_prefix.get(file_type, "unknown")
172184
return f"https://socket.dev/{url_prefix}/package/{pkg_name}/alerts/{pkg_version}"
@@ -176,7 +188,7 @@ def create_security_comment_sarif(diff) -> dict:
176188
"""
177189
Create SARIF-compliant output from the diff report, including dynamic URL generation
178190
based on manifest type and improved <br/> formatting for GitHub SARIF display.
179-
191+
180192
This function now:
181193
- Accepts multiple manifest files from alert.introduced_by or alert.manifests.
182194
- Generates one SARIF location per manifest file.
@@ -215,6 +227,7 @@ def create_security_comment_sarif(diff) -> dict:
215227
severity = alert.severity
216228

217229
# --- Determine manifest files from alert data ---
230+
# Instead of using a single manifest file, split the values.
218231
manifest_files = []
219232
if alert.introduced_by and isinstance(alert.introduced_by, list):
220233
for entry in alert.introduced_by:
@@ -232,10 +245,10 @@ def create_security_comment_sarif(diff) -> dict:
232245
# Use the first manifest for URL generation.
233246
socket_url = Messages.get_manifest_type_url(manifest_files[0], pkg_name, pkg_version)
234247

235-
# Prepare the short and full descriptions.
248+
# Prepare descriptions with <br/> replacements.
236249
short_desc = (
237-
f"{alert.props.get('note', '')}<br/><br/>Suggested Action:<br/>"
238-
f"{alert.suggestion}<br/><a href=\"{socket_url}\">{socket_url}</a>"
250+
f"{alert.props.get('note', '')}<br/><br/>Suggested Action:<br/>{alert.suggestion}"
251+
f"<br/><a href=\"{socket_url}\">{socket_url}</a>"
239252
)
240253
full_desc = "{} - {}".format(alert.title, alert.description.replace('\r\n', '<br/>'))
241254

@@ -268,15 +281,15 @@ def create_security_comment_sarif(diff) -> dict:
268281
}
269282
})
270283

271-
# Create the SARIF result for this alert.
284+
# Create the SARIF result for this alert with multiple locations.
272285
result_obj = {
273286
"ruleId": rule_id,
274287
"message": {"text": short_desc},
275288
"locations": locations,
276289
}
277290
results_list.append(result_obj)
278291

279-
# Attach the collected rules and results.
292+
# Attach rules and results.
280293
sarif_data["runs"][0]["tool"]["driver"]["rules"] = list(rules_map.values())
281294
sarif_data["runs"][0]["results"] = results_list
282295

0 commit comments

Comments
 (0)