@@ -232,21 +232,45 @@ def parse_ll_log_file(log_path: str) -> list[dict]:
232232def collect_log_files (log_dir : str ) -> list [Path ]:
233233 """
234234 Collect .log files directly under the directory (non-recursive).
235+ Security: Prevents path traversal and symlink attacks.
235236 """
236237 # 1. Convert to Path object, automatically handles path separators (cross-platform compatible)
237238 log_path = Path (log_dir )
238- # 2. Path normalization + absolute path (key: eliminates .. path traversal risk)
239+
240+ # 2. Path normalization + absolute path (eliminates .. path traversal risk)
239241 # strict=True requires path must exist, raises exception if not exists
240- safe_path = log_path .resolve (strict = True )
241- # 3. Validate directory + read permission (Checkmarx will recognize these two security checks)
242+ try :
243+ safe_path = log_path .resolve (strict = True )
244+ except (OSError , RuntimeError ) as e :
245+ raise ValueError (f"Invalid path: { log_dir } " ) from e
246+
247+ # 3. Security check: reject symlinks to prevent symlink-based path traversal
248+ # Check the original input path before resolution
249+ if log_path .exists () and log_path .is_symlink ():
250+ raise ValueError (f"Symlinks are not allowed for security reasons: { log_dir } " )
251+
252+ # 4. Validate directory
242253 if not safe_path .is_dir ():
243254 raise ValueError (f"{ safe_path } is not a valid directory" )
244- if not os .access (safe_path , os .R_OK ):
245- raise PermissionError (f"No permission to read directory { safe_path } " )
246255
247- # 5. Safely traverse directory (only collect .log files)
248- # glob is safer than listdir, supports pattern matching
249- return list (safe_path .glob ("*.log" ))
256+ # 5. Test read permission by attempting to list directory
257+ # This avoids os.access() which has TOCTOU (Time-of-Check-Time-of-Use) issues
258+ try :
259+ # Test if we can actually read the directory
260+ next (safe_path .iterdir (), None )
261+ except PermissionError as e :
262+ raise PermissionError (f"No permission to read directory { safe_path } " ) from e
263+
264+ # 6. Safely collect .log files (exclude symlinks for security)
265+ log_files = []
266+ for log_file in safe_path .glob ("*.log" ):
267+ # Skip symlinked files
268+ if log_file .is_symlink ():
269+ print (f"Warning: Skipping symlink file: { log_file .name } " )
270+ continue
271+ log_files .append (log_file )
272+
273+ return log_files
250274
251275
252276def _extract_node_num_from_filename (path : str ) -> int :
0 commit comments