Skip to content

Commit 3a4f56d

Browse files
fix: vuk (#108) (#110)
Signed-off-by: Harrison King Saturley-Hall <hsaturleyhal@nvidia.com> Co-authored-by: Tianhao Xu <49143331+tianhaox@users.noreply.github.com>
1 parent 45033ab commit 3a4f56d

File tree

1 file changed

+32
-8
lines changed

1 file changed

+32
-8
lines changed

collector/deep_collector/extract_data.py

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -232,21 +232,45 @@ def parse_ll_log_file(log_path: str) -> list[dict]:
232232
def collect_log_files(log_dir: str) -> list[Path]:
233233
"""
234234
Collect .log files directly under the directory (non-recursive).
235+
Security: Prevents path traversal and symlink attacks.
235236
"""
236237
# 1. Convert to Path object, automatically handles path separators (cross-platform compatible)
237238
log_path = Path(log_dir)
238-
# 2. Path normalization + absolute path (key: eliminates .. path traversal risk)
239+
240+
# 2. Path normalization + absolute path (eliminates .. path traversal risk)
239241
# strict=True requires path must exist, raises exception if not exists
240-
safe_path = log_path.resolve(strict=True)
241-
# 3. Validate directory + read permission (Checkmarx will recognize these two security checks)
242+
try:
243+
safe_path = log_path.resolve(strict=True)
244+
except (OSError, RuntimeError) as e:
245+
raise ValueError(f"Invalid path: {log_dir}") from e
246+
247+
# 3. Security check: reject symlinks to prevent symlink-based path traversal
248+
# Check the original input path before resolution
249+
if log_path.exists() and log_path.is_symlink():
250+
raise ValueError(f"Symlinks are not allowed for security reasons: {log_dir}")
251+
252+
# 4. Validate directory
242253
if not safe_path.is_dir():
243254
raise ValueError(f"{safe_path} is not a valid directory")
244-
if not os.access(safe_path, os.R_OK):
245-
raise PermissionError(f"No permission to read directory {safe_path}")
246255

247-
# 5. Safely traverse directory (only collect .log files)
248-
# glob is safer than listdir, supports pattern matching
249-
return list(safe_path.glob("*.log"))
256+
# 5. Test read permission by attempting to list directory
257+
# This avoids os.access() which has TOCTOU (Time-of-Check-Time-of-Use) issues
258+
try:
259+
# Test if we can actually read the directory
260+
next(safe_path.iterdir(), None)
261+
except PermissionError as e:
262+
raise PermissionError(f"No permission to read directory {safe_path}") from e
263+
264+
# 6. Safely collect .log files (exclude symlinks for security)
265+
log_files = []
266+
for log_file in safe_path.glob("*.log"):
267+
# Skip symlinked files
268+
if log_file.is_symlink():
269+
print(f"Warning: Skipping symlink file: {log_file.name}")
270+
continue
271+
log_files.append(log_file)
272+
273+
return log_files
250274

251275

252276
def _extract_node_num_from_filename(path: str) -> int:

0 commit comments

Comments
 (0)