Skip to content

Commit bdb6de1

Browse files
author
Orlando Barrera II
committed
Improved teh sarif file parsing
1 parent 7cfc934 commit bdb6de1

File tree

1 file changed

+30
-22
lines changed

1 file changed

+30
-22
lines changed

socketsecurity/core/messages.py

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,16 @@ def map_severity_to_sarif(severity: str) -> str:
3030
def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) -> tuple:
3131
"""
3232
Given a manifest file, find the line number and snippet where the package is declared.
33-
For JSON-based manifests (package-lock.json, Pipfile.lock, composer.lock, package.json),
34-
we attempt to parse the JSON to verify the package is present, then search for the key.
35-
For text-based manifests, we use a regex search.
33+
For JSON-based manifests (e.g. package-lock.json, package.json, Pipfile.lock, composer.lock),
34+
we first verify the package exists (via JSON parsing) and then scan the raw text using one
35+
or more needle patterns.
36+
For text-based manifests, we use regex search.
3637
"""
3738
file_type = Path(manifest_file).name
3839

39-
# Handle JSON-based files.
40+
# --------------------
41+
# 1) JSON-based manifests
42+
# --------------------
4043
if file_type in ["package-lock.json", "Pipfile.lock", "composer.lock", "package.json"]:
4144
try:
4245
with open(manifest_file, "r", encoding="utf-8") as f:
@@ -47,27 +50,26 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str)
4750
data = {}
4851

4952
found = False
50-
# For package.json, check dependencies and devDependencies.
53+
# For package.json, check both dependencies and devDependencies.
5154
if file_type == "package.json":
5255
deps = data.get("dependencies", {})
5356
deps_dev = data.get("devDependencies", {})
5457
all_deps = {**deps, **deps_dev}
5558
if packagename in all_deps:
56-
actual_version = all_deps[packagename]
5759
# Allow for versions with caret/tilde prefixes.
60+
actual_version = all_deps[packagename]
5861
if actual_version == packageversion or actual_version.lstrip("^~") == packageversion:
5962
found = True
6063
else:
61-
# For other JSON-based manifests, look into common keys.
64+
# For package-lock.json and similar, look into common keys.
6265
for key in ["packages", "default", "dependencies"]:
6366
if key in data:
6467
packages_dict = data[key]
65-
# In package-lock.json, keys can be paths (e.g. "node_modules/axios")
68+
# Keys in package-lock.json can be "node_modules/<pkg>"
6669
for key_item, info in packages_dict.items():
6770
if key_item.endswith(packagename):
68-
# info may be a dict (with "version") or a simple version string.
6971
ver = info if isinstance(info, str) else info.get("version", "")
70-
if ver == packageversion:
72+
if ver == packageversion or ver.lstrip("^~") == packageversion:
7173
found = True
7274
break
7375
if found:
@@ -76,19 +78,31 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str)
7678
if not found:
7779
return 1, f'"{packagename}": not found in {manifest_file}'
7880

79-
# Now search the raw text to locate the declaration line.
80-
needle = f'"{packagename}":'
81+
# Build one or more needle patterns. For package-lock.json, try both patterns.
82+
needles = []
83+
if file_type == "package-lock.json":
84+
# Try with "node_modules/..." first, then without.
85+
needles.append(f'"node_modules/{packagename}"')
86+
needles.append(f'"{packagename}"')
87+
else:
88+
needles.append(f'"{packagename}"')
89+
90+
# Scan through the file's lines to locate a matching needle.
8191
lines = raw_text.splitlines()
8292
for i, line in enumerate(lines, start=1):
83-
if needle in line:
84-
return i, line.strip()
93+
for needle in needles:
94+
if needle in line:
95+
return i, line.strip()
8596
return 1, f'"{packagename}": declaration not found'
8697
except FileNotFoundError:
8798
return 1, f"{manifest_file} not found"
8899
except Exception as e:
89100
return 1, f"Error reading {manifest_file}: {e}"
90101

91-
# For text-based files, define regex search patterns for common manifest types.
102+
# --------------------
103+
# 2) Text-based / line-based manifests
104+
# --------------------
105+
# Define regex patterns for common text-based manifest types.
92106
search_patterns = {
93107
"yarn.lock": rf'{packagename}@{packageversion}',
94108
"pnpm-lock.yaml": rf'"{re.escape(packagename)}"\s*:\s*\{{[^}}]*"version":\s*"{re.escape(packageversion)}"',
@@ -118,7 +132,6 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str)
118132
with open(manifest_file, 'r', encoding="utf-8") as file:
119133
lines = [line.rstrip("\n") for line in file]
120134
for line_number, line_content in enumerate(lines, start=1):
121-
# For cases where dependencies have conditionals (e.g. Python), only consider the main part.
122135
line_main = line_content.split(";", 1)[0].strip()
123136
if re.search(searchstring, line_main, re.IGNORECASE):
124137
return line_number, line_content.strip()
@@ -166,12 +179,10 @@ def create_security_comment_sarif(diff) -> dict:
166179
- Generates one SARIF location per manifest file.
167180
- Supports various language-specific manifest types.
168181
"""
169-
scan_failed = False
170182
# (Optional: handle scan failure based on alert.error flags)
171183
if len(diff.new_alerts) == 0:
172184
for alert in diff.new_alerts:
173185
if alert.error:
174-
scan_failed = True
175186
break
176187

177188
sarif_data = {
@@ -216,14 +227,12 @@ def create_security_comment_sarif(diff) -> dict:
216227
# Use the first manifest for URL generation.
217228
socket_url = Messages.get_manifest_type_url(manifest_files[0], pkg_name, pkg_version)
218229

219-
# Prepare descriptions with HTML <br/> for GitHub display.
220230
short_desc = (
221231
f"{alert.props.get('note', '')}<br/><br/>Suggested Action:<br/>"
222232
f"{alert.suggestion}<br/><a href=\"{socket_url}\">{socket_url}</a>"
223233
)
224234
full_desc = "{} - {}".format(alert.title, alert.description.replace('\r\n', '<br/>'))
225235

226-
# Create or reuse the rule definition.
227236
if rule_id not in rules_map:
228237
rules_map[rule_id] = {
229238
"id": rule_id,
@@ -236,7 +245,6 @@ def create_security_comment_sarif(diff) -> dict:
236245
},
237246
}
238247

239-
# --- Build SARIF locations for each manifest file ---
240248
locations = []
241249
for mf in manifest_files:
242250
line_number, line_content = Messages.find_line_in_file(pkg_name, pkg_version, mf)
@@ -263,7 +271,7 @@ def create_security_comment_sarif(diff) -> dict:
263271
sarif_data["runs"][0]["results"] = results_list
264272

265273
return sarif_data
266-
274+
267275
@staticmethod
268276
def create_security_comment_json(diff: Diff) -> dict:
269277
scan_failed = False

0 commit comments

Comments
 (0)