Skip to content

Commit 7cfc934

Browse files
author
Orlando Barrera II
committed
Improved teh sarif file parsing
1 parent 8b22b28 commit 7cfc934

File tree

1 file changed

+76
-100
lines changed

1 file changed

+76
-100
lines changed

socketsecurity/core/messages.py

Lines changed: 76 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,12 @@ class Messages:
1515
@staticmethod
1616
def map_severity_to_sarif(severity: str) -> str:
1717
"""
18-
Map Socket severity levels to SARIF levels (GitHub code scanning).
19-
20-
'low' -> 'note'
21-
'medium' or 'middle' -> 'warning'
22-
'high' or 'critical' -> 'error'
18+
Map Socket Security severity levels to SARIF levels.
2319
"""
2420
severity_mapping = {
2521
"low": "note",
2622
"medium": "warning",
27-
"middle": "warning", # older data might say "middle"
23+
"middle": "warning",
2824
"high": "error",
2925
"critical": "error",
3026
}
@@ -33,82 +29,67 @@ def map_severity_to_sarif(severity: str) -> str:
3329
@staticmethod
3430
def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) -> tuple:
3531
"""
36-
Finds the line number and snippet of code for the given package/version in a manifest file.
37-
Returns a 2-tuple: (line_number, snippet_or_message).
38-
39-
Supports:
40-
1) JSON-based manifest files (package-lock.json, Pipfile.lock, composer.lock)
41-
- Locates a dictionary entry with the matching package & version
42-
- Does a rough line-based search to find the actual line in the raw text
43-
2) Text-based (requirements.txt, package.json, yarn.lock, etc.)
44-
- Uses compiled regex patterns to detect a match line by line
32+
Given a manifest file, find the line number and snippet where the package is declared.
33+
For JSON-based manifests (package-lock.json, Pipfile.lock, composer.lock, package.json),
34+
we attempt to parse the JSON to verify the package is present, then search for the key.
35+
For text-based manifests, we use a regex search.
4536
"""
46-
# Extract just the file name to detect manifest type
4737
file_type = Path(manifest_file).name
4838

49-
# ----------------------------------------------------
50-
# 1) JSON-based manifest files
51-
# ----------------------------------------------------
52-
if file_type in ["package-lock.json", "Pipfile.lock", "composer.lock"]:
39+
# Handle JSON-based files.
40+
if file_type in ["package-lock.json", "Pipfile.lock", "composer.lock", "package.json"]:
5341
try:
54-
# Read entire file so we can parse JSON and also do raw line checks
5542
with open(manifest_file, "r", encoding="utf-8") as f:
5643
raw_text = f.read()
57-
58-
# Attempt JSON parse
59-
data = json.loads(raw_text)
60-
61-
# In practice, you may need to check data["dependencies"], data["default"], etc.
62-
# This is an example approach.
63-
packages_dict = (
64-
data.get("packages")
65-
or data.get("default")
66-
or data.get("dependencies")
67-
or {}
68-
)
69-
70-
found_key = None
71-
found_info = None
72-
# Locate a dictionary entry whose 'version' matches
73-
for key, value in packages_dict.items():
74-
# For NPM package-lock, keys might look like "node_modules/axios"
75-
if key.endswith(packagename) and "version" in value:
76-
if value["version"] == packageversion:
77-
found_key = key
78-
found_info = value
79-
break
80-
81-
if found_key and found_info:
82-
# Search lines to approximate the correct line number
83-
needle_key = f'"{found_key}":' # e.g. "node_modules/axios":
84-
needle_version = f'"version": "{packageversion}"'
85-
lines = raw_text.splitlines()
86-
best_line = 1
87-
snippet = None
88-
89-
for i, line in enumerate(lines, start=1):
90-
if (needle_key in line) or (needle_version in line):
91-
best_line = i
92-
snippet = line.strip()
93-
break # On first match, stop
94-
95-
# If we found an approximate line, return it; else fallback to line 1
96-
if best_line > 0 and snippet:
97-
return best_line, snippet
98-
else:
99-
return 1, f'"{found_key}": {found_info}'
44+
try:
45+
data = json.loads(raw_text)
46+
except json.JSONDecodeError:
47+
data = {}
48+
49+
found = False
50+
# For package.json, check dependencies and devDependencies.
51+
if file_type == "package.json":
52+
deps = data.get("dependencies", {})
53+
deps_dev = data.get("devDependencies", {})
54+
all_deps = {**deps, **deps_dev}
55+
if packagename in all_deps:
56+
actual_version = all_deps[packagename]
57+
# Allow for versions with caret/tilde prefixes.
58+
if actual_version == packageversion or actual_version.lstrip("^~") == packageversion:
59+
found = True
10060
else:
101-
return 1, f"{packagename} {packageversion} (not found in {manifest_file})"
102-
103-
except (FileNotFoundError, json.JSONDecodeError):
104-
return 1, f"Error reading {manifest_file}"
61+
# For other JSON-based manifests, look into common keys.
62+
for key in ["packages", "default", "dependencies"]:
63+
if key in data:
64+
packages_dict = data[key]
65+
# In package-lock.json, keys can be paths (e.g. "node_modules/axios")
66+
for key_item, info in packages_dict.items():
67+
if key_item.endswith(packagename):
68+
# info may be a dict (with "version") or a simple version string.
69+
ver = info if isinstance(info, str) else info.get("version", "")
70+
if ver == packageversion:
71+
found = True
72+
break
73+
if found:
74+
break
10575

106-
# ----------------------------------------------------
107-
# 2) Text-based / line-based manifests
108-
# ----------------------------------------------------
109-
# Define a dictionary of patterns for common manifest types
76+
if not found:
77+
return 1, f'"{packagename}": not found in {manifest_file}'
78+
79+
# Now search the raw text to locate the declaration line.
80+
needle = f'"{packagename}":'
81+
lines = raw_text.splitlines()
82+
for i, line in enumerate(lines, start=1):
83+
if needle in line:
84+
return i, line.strip()
85+
return 1, f'"{packagename}": declaration not found'
86+
except FileNotFoundError:
87+
return 1, f"{manifest_file} not found"
88+
except Exception as e:
89+
return 1, f"Error reading {manifest_file}: {e}"
90+
91+
# For text-based files, define regex search patterns for common manifest types.
11092
search_patterns = {
111-
"package.json": rf'"{packagename}":\s*"{packageversion}"',
11293
"yarn.lock": rf'{packagename}@{packageversion}',
11394
"pnpm-lock.yaml": rf'"{re.escape(packagename)}"\s*:\s*\{{[^}}]*"version":\s*"{re.escape(packageversion)}"',
11495
"requirements.txt": rf'^{re.escape(packagename)}\s*(?:==|===|!=|>=|<=|~=|\s+)?\s*{re.escape(packageversion)}(?:\s*;.*)?$',
@@ -132,33 +113,25 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str)
132113
"conanfile.txt": rf'{re.escape(packagename)}/{re.escape(packageversion)}',
133114
"vcpkg.json": rf'"{re.escape(packagename)}":\s*"{re.escape(packageversion)}"',
134115
}
135-
136-
# If no specific pattern is found for this file name, fallback to a naive approach
137116
searchstring = search_patterns.get(file_type, rf'{re.escape(packagename)}.*{re.escape(packageversion)}')
138-
139117
try:
140-
# Read file lines and search for a match
141118
with open(manifest_file, 'r', encoding="utf-8") as file:
142119
lines = [line.rstrip("\n") for line in file]
143120
for line_number, line_content in enumerate(lines, start=1):
144-
# For Python conditional dependencies, ignore everything after first ';'
121+
# For cases where dependencies have conditionals (e.g. Python), only consider the main part.
145122
line_main = line_content.split(";", 1)[0].strip()
146-
147-
# Use a case-insensitive regex search
148123
if re.search(searchstring, line_main, re.IGNORECASE):
149124
return line_number, line_content.strip()
150-
151125
except FileNotFoundError:
152126
return 1, f"{manifest_file} not found"
153127
except Exception as e:
154128
return 1, f"Error reading {manifest_file}: {e}"
155-
156129
return 1, f"{packagename} {packageversion} (not found)"
157130

158131
@staticmethod
159132
def get_manifest_type_url(manifest_file: str, pkg_name: str, pkg_version: str) -> str:
160133
"""
161-
Determine the correct URL path based on the manifest file type.
134+
Determine the URL prefix based on the manifest file.
162135
"""
163136
manifest_to_url_prefix = {
164137
"package.json": "npm",
@@ -181,18 +154,20 @@ def get_manifest_type_url(manifest_file: str, pkg_name: str, pkg_version: str) -
181154
"composer.json": "composer",
182155
"vcpkg.json": "vcpkg",
183156
}
184-
185157
file_type = Path(manifest_file).name
186158
url_prefix = manifest_to_url_prefix.get(file_type, "unknown")
187159
return f"https://socket.dev/{url_prefix}/package/{pkg_name}/alerts/{pkg_version}"
188160

189161
@staticmethod
190162
def create_security_comment_sarif(diff) -> dict:
191163
"""
192-
Create SARIF-compliant output from the diff report, including dynamic URL generation
193-
based on manifest type and improved <br/> formatting for GitHub SARIF display.
164+
Create a SARIF-compliant JSON object for alerts. This function now:
165+
- Accepts multiple manifest files (from alert.introduced_by or alert.manifests)
166+
- Generates one SARIF location per manifest file.
167+
- Supports various language-specific manifest types.
194168
"""
195169
scan_failed = False
170+
# (Optional: handle scan failure based on alert.error flags)
196171
if len(diff.new_alerts) == 0:
197172
for alert in diff.new_alerts:
198173
if alert.error:
@@ -225,27 +200,30 @@ def create_security_comment_sarif(diff) -> dict:
225200
rule_id = f"{pkg_name}=={pkg_version}"
226201
severity = alert.severity
227202

228-
# --- NEW LOGIC: Determine the list of manifest files ---
229-
if alert.introduced_by and isinstance(alert.introduced_by[0], list):
230-
# Extract file names from each introduced_by entry
231-
manifest_files = [entry[1] for entry in alert.introduced_by]
232-
elif alert.manifests:
233-
# Split semicolon-delimited manifest string if necessary
234-
manifest_files = [mf.strip() for mf in alert.manifests.split(";")]
203+
# --- Determine manifest files from alert data ---
204+
manifest_files = []
205+
if alert.introduced_by and isinstance(alert.introduced_by, list):
206+
for entry in alert.introduced_by:
207+
if isinstance(entry, list) and len(entry) >= 2:
208+
manifest_files.append(entry[1])
209+
elif isinstance(entry, str):
210+
manifest_files.extend([m.strip() for m in entry.split(";") if m.strip()])
211+
elif hasattr(alert, 'manifests') and alert.manifests:
212+
manifest_files = [mf.strip() for mf in alert.manifests.split(";") if mf.strip()]
235213
else:
236214
manifest_files = ["requirements.txt"]
237215

238-
# Use the first file for generating the help URL.
216+
# Use the first manifest for URL generation.
239217
socket_url = Messages.get_manifest_type_url(manifest_files[0], pkg_name, pkg_version)
240218

241-
# Prepare the description messages.
219+
# Prepare descriptions with HTML <br/> for GitHub display.
242220
short_desc = (
243221
f"{alert.props.get('note', '')}<br/><br/>Suggested Action:<br/>"
244222
f"{alert.suggestion}<br/><a href=\"{socket_url}\">{socket_url}</a>"
245223
)
246224
full_desc = "{} - {}".format(alert.title, alert.description.replace('\r\n', '<br/>'))
247225

248-
# Create the rule if not already defined.
226+
# Create or reuse the rule definition.
249227
if rule_id not in rules_map:
250228
rules_map[rule_id] = {
251229
"id": rule_id,
@@ -258,12 +236,12 @@ def create_security_comment_sarif(diff) -> dict:
258236
},
259237
}
260238

261-
# --- NEW LOGIC: Create separate locations for each manifest file ---
239+
# --- Build SARIF locations for each manifest file ---
262240
locations = []
263241
for mf in manifest_files:
264242
line_number, line_content = Messages.find_line_in_file(pkg_name, pkg_version, mf)
265243
if line_number < 1:
266-
line_number = 1 # Ensure SARIF compliance.
244+
line_number = 1
267245
locations.append({
268246
"physicalLocation": {
269247
"artifactLocation": {"uri": mf},
@@ -274,15 +252,13 @@ def create_security_comment_sarif(diff) -> dict:
274252
}
275253
})
276254

277-
# Add the SARIF result.
278255
result_obj = {
279256
"ruleId": rule_id,
280257
"message": {"text": short_desc},
281258
"locations": locations,
282259
}
283260
results_list.append(result_obj)
284261

285-
# Attach rules and results.
286262
sarif_data["runs"][0]["tool"]["driver"]["rules"] = list(rules_map.values())
287263
sarif_data["runs"][0]["results"] = results_list
288264

0 commit comments

Comments
 (0)