Skip to content

Commit fd985a8

Browse files
committed
Refactor file detection and scanning logic to fix commit file handling
- Fix file argument parsing to handle list, string, and JSON formats more robustly - Clarify git repository detection and file selection logic with better separation of concerns - Add force_api_mode to handle cases where no supported manifest files are found - Replace ambiguous should_skip_scan logic with clearer file detection flow - Add create_full_scan_with_report_url method to Core for API-mode scanning - Improve logging messages and remove unused code (get_all_scores method) - Ensure consistent diff object initialization and ID handling - Automatically enable disable_blocking when no supported files are detected
1 parent 9a1d030 commit fd985a8

File tree

4 files changed

+170
-49
lines changed

4 files changed

+170
-49
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ build-backend = "hatchling.build"
66

77
[project]
88
name = "socketsecurity"
9-
version = "2.1.21"
9+
version = "2.1.22"
1010
requires-python = ">= 3.10"
1111
license = {"file" = "LICENSE"}
1212
dependencies = [

socketsecurity/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
__author__ = 'socket.dev'
2-
__version__ = '2.1.21'
2+
__version__ = '2.1.22'

socketsecurity/core/__init__.py

Lines changed: 69 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from io import BytesIO
99
from pathlib import PurePath
1010
from typing import BinaryIO, Dict, List, Tuple, Set, Union
11-
import re
1211
from socketdev import socketdev
1312
from socketdev.exceptions import APIFailure
1413
from socketdev.fullscans import FullScanParams, SocketArtifact
@@ -196,7 +195,7 @@ def find_files(self, path: str) -> List[str]:
196195
for ecosystem in patterns:
197196
if ecosystem in self.config.excluded_ecosystems:
198197
continue
199-
log.info(f'Scanning ecosystem: {ecosystem}')
198+
log.debug(f'Scanning ecosystem: {ecosystem}')
200199
ecosystem_patterns = patterns[ecosystem]
201200
for file_name in ecosystem_patterns:
202201
original_pattern = ecosystem_patterns[file_name]["pattern"]
@@ -356,6 +355,72 @@ def create_full_scan(self, files: list[tuple[str, tuple[str, BytesIO]]], params:
356355

357356
return full_scan
358357

358+
def create_full_scan_with_report_url(
359+
self,
360+
path: str,
361+
params: FullScanParams,
362+
no_change: bool = False
363+
) -> dict:
364+
"""Create a new full scan and return with html_report_url.
365+
366+
Args:
367+
path: Path to look for manifest files
368+
params: Query params for the Full Scan endpoint
369+
no_change: If True, return empty result
370+
371+
Returns:
372+
Dict with full scan data including html_report_url
373+
"""
374+
log.debug(f"starting create_full_scan_with_report_url with no_change: {no_change}")
375+
if no_change:
376+
return {
377+
"id": "NO_SCAN_RAN",
378+
"html_report_url": "",
379+
"unmatchedFiles": []
380+
}
381+
382+
# Find manifest files
383+
files = self.find_files(path)
384+
files_for_sending = self.load_files_for_sending(files, path)
385+
if not files:
386+
return {
387+
"id": "NO_SCAN_RAN",
388+
"html_report_url": "",
389+
"unmatchedFiles": []
390+
}
391+
392+
try:
393+
# Create new scan
394+
new_scan_start = time.time()
395+
new_full_scan = self.create_full_scan(files_for_sending, params)
396+
new_scan_end = time.time()
397+
log.info(f"Total time to create new full scan: {new_scan_end - new_scan_start:.2f}")
398+
except APIFailure as e:
399+
log.error(f"Failed to create full scan: {e}")
400+
raise
401+
402+
# Construct report URL
403+
base_socket = "https://socket.dev/dashboard/org"
404+
report_url = f"{base_socket}/{self.config.org_slug}/sbom/{new_full_scan.id}"
405+
if not params.include_license_details:
406+
report_url += "?include_license_details=false"
407+
408+
# Return result in the format expected by the user
409+
return {
410+
"id": new_full_scan.id,
411+
"created_at": new_full_scan.created_at,
412+
"updated_at": new_full_scan.updated_at,
413+
"organization_id": new_full_scan.organization_id,
414+
"repository_id": new_full_scan.repository_id,
415+
"branch": new_full_scan.branch,
416+
"commit_message": new_full_scan.commit_message,
417+
"commit_hash": new_full_scan.commit_hash,
418+
"pull_request": new_full_scan.pull_request,
419+
"committers": new_full_scan.committers,
420+
"html_report_url": report_url,
421+
"unmatchedFiles": getattr(new_full_scan, 'unmatchedFiles', [])
422+
}
423+
359424
def check_full_scans_status(self, head_full_scan_id: str, new_full_scan_id: str) -> bool:
360425
is_ready = False
361426
current_timeout = self.config.timeout
@@ -667,13 +732,13 @@ def create_new_diff(
667732
"""
668733
log.debug(f"starting create_new_diff with no_change: {no_change}")
669734
if no_change:
670-
return Diff(id="no_diff_id", diff_url="", report_url="")
735+
return Diff(id="NO_DIFF_RAN", diff_url="", report_url="")
671736

672737
# Find manifest files
673738
files = self.find_files(path)
674739
files_for_sending = self.load_files_for_sending(files, path)
675740
if not files:
676-
return Diff(id="no_diff_id", diff_url="", report_url="")
741+
return Diff(id="NO_DIFF_RAN", diff_url="", report_url="")
677742

678743
try:
679744
# Get head scan ID
@@ -809,12 +874,6 @@ def create_diff_report(
809874

810875
return diff
811876

812-
def get_all_scores(self, packages: dict[str, Package]) -> dict[str, Package]:
813-
components = []
814-
for package_id in packages:
815-
package = packages[package_id]
816-
return packages
817-
818877
def create_purl(self, package_id: str, packages: dict[str, Package]) -> Purl:
819878
"""
820879
Creates the extended PURL data for package identification and tracking.

socketsecurity/socketcli.py

Lines changed: 99 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -75,19 +75,49 @@ def main_code():
7575
log.debug("loaded client")
7676
core = Core(socket_config, sdk)
7777
log.debug("loaded core")
78-
# Load files - files defaults to "[]" in CliConfig
78+
# Parse files argument
7979
try:
80-
files = json.loads(config.files) # Will always succeed with empty list by default
81-
is_repo = True # FIXME: This is misleading - JSON parsing success doesn't indicate repo status
80+
if isinstance(config.files, list):
81+
# Already a list, use as-is
82+
specified_files = config.files
83+
elif isinstance(config.files, str):
84+
# Handle different string formats
85+
files_str = config.files.strip()
86+
87+
# If the string is wrapped in extra quotes, strip them
88+
if ((files_str.startswith('"') and files_str.endswith('"')) or
89+
(files_str.startswith("'") and files_str.endswith("'"))):
90+
# Check if the inner content looks like JSON
91+
inner_str = files_str[1:-1]
92+
if inner_str.startswith('[') and inner_str.endswith(']'):
93+
files_str = inner_str
94+
95+
# Try to parse as JSON
96+
try:
97+
specified_files = json.loads(files_str)
98+
except json.JSONDecodeError:
99+
# If JSON parsing fails, try replacing single quotes with double quotes
100+
files_str = files_str.replace("'", '"')
101+
specified_files = json.loads(files_str)
102+
else:
103+
# Default to empty list
104+
specified_files = []
82105
except Exception as error:
83-
# Only hits this if files was manually set to invalid JSON
84-
log.error(f"Unable to parse {config.files}")
85-
log.error(error)
106+
log.error(f"Unable to parse files argument: {config.files}")
107+
log.error(f"Error details: {error}")
108+
log.debug(f"Files type: {type(config.files)}")
109+
log.debug(f"Files repr: {repr(config.files)}")
86110
sys.exit(3)
87111

112+
# Determine if files were explicitly specified
113+
files_explicitly_specified = config.files != "[]" and len(specified_files) > 0
114+
88115
# Git setup
116+
is_repo = False
117+
git_repo = None
89118
try:
90119
git_repo = Git(config.target_path)
120+
is_repo = True
91121
if not config.repo:
92122
config.repo = git_repo.repo_name
93123
if not config.commit_sha:
@@ -98,12 +128,10 @@ def main_code():
98128
config.committers = [git_repo.committer]
99129
if not config.commit_message:
100130
config.commit_message = git_repo.commit_message
101-
if files and not config.ignore_commit_files: # files is empty by default, so this is False unless files manually specified
102-
files = git_repo.changed_files # Only gets git's changed files if files were manually specified
103-
is_repo = True # Redundant since already True
104131
except InvalidGitRepositoryError:
105-
is_repo = False # Overwrites previous True - this is the REAL repo status
106-
config.ignore_commit_files = True # Silently changes config - should log this
132+
is_repo = False
133+
log.debug("Not a git repository, setting ignore_commit_files=True")
134+
config.ignore_commit_files = True
107135
except NoSuchPathError:
108136
raise Exception(f"Unable to find path {config.target_path}")
109137

@@ -125,26 +153,43 @@ def main_code():
125153
if scm is not None:
126154
config.default_branch = scm.config.is_default_branch
127155

156+
# Determine files to check based on the new logic
157+
files_to_check = []
158+
force_api_mode = False
159+
160+
if files_explicitly_specified:
161+
# Case 2: Files are specified - use them and don't check commit details
162+
files_to_check = specified_files
163+
log.debug(f"Using explicitly specified files: {files_to_check}")
164+
elif not config.ignore_commit_files and is_repo:
165+
# Case 1: Files not specified and --ignore-commit-files not set - try to find changed files from commit
166+
files_to_check = git_repo.changed_files
167+
log.debug(f"Using changed files from commit: {files_to_check}")
168+
else:
169+
# ignore_commit_files is set or not a repo - scan everything but force API mode if no supported files
170+
files_to_check = []
171+
log.debug("No files to check from commit (ignore_commit_files=True or not a repo)")
128172

129-
# Combine manually specified files with git changes if applicable
130-
files_to_check = set(json.loads(config.files)) # Start with manually specified files
131-
132-
# Add git changes if this is a repo and we're not ignoring commit files
133-
if is_repo and not config.ignore_commit_files and not files_to_check:
134-
files_to_check.update(git_repo.changed_files)
135-
136-
# Determine if we need to scan based on manifest files
137-
should_skip_scan = True # Default to skipping
138-
if config.ignore_commit_files:
139-
should_skip_scan = False # Force scan if ignoring commit files
140-
elif files_to_check: # If we have any files to check
141-
should_skip_scan = not core.has_manifest_files(list(files_to_check))
142-
log.debug(f"in elif, should_skip_scan: {should_skip_scan}")
143-
144-
if should_skip_scan:
145-
log.debug("No manifest files found in changes, skipping scan")
173+
# Check if we have supported manifest files
174+
has_supported_files = files_to_check and core.has_manifest_files(files_to_check)
175+
176+
# Case 3: If no supported files or files are empty, force API mode (no PR comments)
177+
if not has_supported_files:
178+
force_api_mode = True
179+
log.debug("No supported manifest files found, forcing API mode")
180+
181+
# Determine scan behavior
182+
should_skip_scan = False # Always perform scan, but behavior changes based on supported files
183+
if config.ignore_commit_files and not files_explicitly_specified:
184+
# Force full scan when ignoring commit files and no explicit files
185+
should_skip_scan = False
186+
log.debug("Forcing full scan due to ignore_commit_files")
187+
elif not has_supported_files:
188+
# No supported files - still scan but in API mode
189+
should_skip_scan = False
190+
log.debug("No supported files but will scan in API mode")
146191
else:
147-
log.debug("Found manifest files or forced scan, proceeding")
192+
log.debug("Found supported manifest files, proceeding with normal scan")
148193

149194
org_slug = core.config.org_slug
150195
if config.repo_is_public:
@@ -177,6 +222,8 @@ def main_code():
177222
# Initialize diff
178223
diff = Diff()
179224
diff.id = "NO_DIFF_RAN"
225+
diff.diff_url = ""
226+
diff.report_url = ""
180227

181228
# Handle SCM-specific flows
182229
if scm is not None and scm.check_event_type() == "comment":
@@ -192,11 +239,9 @@ def main_code():
192239
log.debug("Removing comment alerts")
193240
scm.remove_comment_alerts(comments)
194241

195-
elif scm is not None and scm.check_event_type() != "comment":
242+
elif scm is not None and scm.check_event_type() != "comment" and not force_api_mode:
196243
log.info("Push initiated flow")
197-
if should_skip_scan:
198-
log.info("No manifest files changes, skipping scan")
199-
elif scm.check_event_type() == "diff":
244+
if scm.check_event_type() == "diff":
200245
log.info("Starting comment logic for PR/MR event")
201246
diff = core.create_new_diff(config.target_path, params, no_change=should_skip_scan)
202247
comments = scm.get_comments_for_pr()
@@ -255,12 +300,24 @@ def main_code():
255300

256301
output_handler.handle_output(diff)
257302
else:
258-
log.info("API Mode")
259-
diff = core.create_new_diff(config.target_path, params, no_change=should_skip_scan)
260-
output_handler.handle_output(diff)
303+
if force_api_mode:
304+
log.info("No Manifest files changed, creating Socket Report")
305+
else:
306+
log.info("API Mode")
307+
full_scan_result = core.create_full_scan_with_report_url(config.target_path, params, no_change=should_skip_scan)
308+
log.info(f"Full scan created with ID: {full_scan_result['id']}")
309+
log.info(f"Full scan report URL: {full_scan_result['html_report_url']}")
310+
311+
# Create a minimal diff-like object for compatibility with downstream code
312+
diff = Diff()
313+
diff.id = full_scan_result['id']
314+
diff.report_url = full_scan_result['html_report_url']
315+
diff.diff_url = full_scan_result['html_report_url']
316+
diff.packages = {} # No package data needed for API mode
317+
# No output handling needed for API mode - just creating the scan
261318

262319
# Handle license generation
263-
if not should_skip_scan and diff.id != "no_diff_id" and config.generate_license:
320+
if not should_skip_scan and diff.id != "NO_DIFF_RAN" and diff.id != "NO_SCAN_RAN" and config.generate_license:
264321
all_packages = {}
265322
for purl in diff.packages:
266323
package = diff.packages[purl]
@@ -279,6 +336,11 @@ def main_code():
279336
all_packages[package.id] = output
280337
core.save_file(config.license_file_name, json.dumps(all_packages))
281338

339+
# If we forced API mode due to no supported files, behave as if --disable-blocking was set
340+
if force_api_mode and not config.disable_blocking:
341+
log.debug("Temporarily enabling disable_blocking due to no supported manifest files")
342+
config.disable_blocking = True
343+
282344
sys.exit(output_handler.return_exit_code(diff))
283345

284346

0 commit comments

Comments
 (0)