google-labs-code · Mwessc · Jun 14, 2025
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+__pycache__/
+*.pyc
diff --git a/src/secure_file_utils.py b/src/secure_file_utils.py
@@ -0,0 +1,170 @@
+import os
+
+# Define a secure, designated directory for "source code" files.
+# In a real application, this would be a carefully managed, restricted path.
+# For this simulation, we'll assume a 'simulated_source_code' directory exists
+# relative to where this script might run.
+# You would NEVER allow access to arbitrary directories like '/' or '..'
+SECURE_BASE_DIR = "simulated_source_code"
+
+def secure_file_reader(filename: str) -> str:
+    """
+    Simulates securely reading a file, implementing robust input validation
+    to prevent directory traversal and other file-based vulnerabilities.
+
+    Args:
+        filename: The name of the file to "read" (e.g., "my_script.py").
+
+    Returns:
+        A message indicating success or the type of error encountered.
+    """
+    print(f"Attempting to process filename: '{filename}'")
+
+    # --- Secure by Design Principle 1: Basic Input Sanitization ---
+    # Remove leading/trailing whitespace. This prevents subtle issues.
+    filename = filename.strip()
+    if not filename:
+        return "Error: Filename cannot be empty."
+
+    # --- Secure by Design Principle 2: Prohibit Dangerous Characters ---
+    # Prevent directory traversal attempts (e.g., ../, ../../).
+    # Disallow absolute paths ('/' on Unix-like, '\' on Windows, though os.path.join handles this).
+    # Disallow null bytes which can truncate strings in some systems.
+    prohibited_chars = ['..', '/', '\\', '\x00'] # \x00 is null byte
+    for char_seq in prohibited_chars:
+        if char_seq in filename:
+            return f"Error: Filename contains prohibited characters ('{char_seq}'). Input validation failed."
+
+    # --- Secure by Design Principle 3: Enforce Allowed File Extensions (Optional but Recommended) ---
+    # This adds another layer of defense, ensuring only expected file types are processed.
+    # If the "Source Code Sleuth" is only for Python files, you'd restrict it.
+    allowed_extensions = ['.py', '.txt', '.c', '.cpp', '.java', '.js', '.html', '.css']
+    if not any(filename.endswith(ext) for ext in allowed_extensions):
+        return f"Error: File extension not allowed. Must be one of: {', '.join(allowed_extensions)}"
+
+    # --- Secure by Design Principle 4: Path Whitelisting (Crucial for Security) ---
+    # Construct the full, absolute path to the intended file within the secure base directory.
+    # os.path.join is crucial as it correctly handles path separators across OS.
+    # os.path.abspath normalizes the path.
+    # The key is to ensure the normalized path *starts with* the normalized secure base directory.
+
+    # Create the secure base directory if it doesn't exist for simulation purposes.
+    os.makedirs(SECURE_BASE_DIR, exist_ok=True)
+
+    full_path = os.path.join(SECURE_BASE_DIR, filename)
+
+    # Normalize paths to remove '..' components and resolve symlinks.
+    # This is critical for preventing advanced directory traversal or symlink attacks.
+    normalized_secure_base_dir = os.path.abspath(SECURE_BASE_DIR)
+    normalized_full_path = os.path.abspath(full_path)
+
+    # Verify that the normalized path *actually* resides within the secure base directory.
+    # This is the most important check to prevent accessing files outside the allowed zone.
+    if not normalized_full_path.startswith(normalized_secure_base_dir + os.sep) and \
+       normalized_full_path != normalized_secure_base_dir:
+        # Check if the path is attempting to go "above" the base directory even if it's just the base directory itself.
+        # This case handles when SECURE_BASE_DIR is a relative path like "." and filename is also "."
+        # or when filename tries to access the parent of SECURE_BASE_DIR using ".." that wasn't caught by prohibited_chars
+        # if SECURE_BASE_DIR was, for example, "foo" and filename was "../bar"
+        # normalized_full_path would be something like "/actual/path/to/repo/bar"
+        # normalized_secure_base_dir would be "/actual/path/to/repo/foo"
+        # This check is a bit redundant given the prohibited_chars check for '..'
+        # but provides an additional layer of defense based on the final absolute paths.
+        # A more direct check could also be to see if normalized_secure_base_dir is a prefix of normalized_full_path.
+        # The current check ensures that normalized_full_path is a child of normalized_secure_base_dir or is the directory itself.
+        return f"Error: Attempted access outside designated secure directory. Path: {normalized_full_path}"
+
+    # --- Secure by Design Principle 5: Simulate File Operations with Error Handling ---
+    try:
+        # In a real application, you would open and read the file here:
+        # with open(normalized_full_path, 'r') as f:
+        #     content = f.read()
+        #     return f"Successfully read content from {filename}: {content}"
+
+        # For this simulation, we'll just confirm that the checks passed.
+        # We'll simulate a file not found error if the file doesn't hypothetically exist.
+
+        # Simulate file existence. In a real scenario, you'd use os.path.exists(normalized_full_path)
+        # For demonstration, let's say 'example_code.py' and 'safe_file.txt' exist, others don't.
+        simulated_existing_files = [
+            os.path.join(normalized_secure_base_dir, "example_code.py"), # Use normalized base dir here
+            os.path.join(normalized_secure_base_dir, "safe_file.txt")    # Use normalized base dir here
+        ]
+
+        # For the simulation to work correctly when testing, if a "non-existent" file is requested
+        # (i.e., not in simulated_existing_files), we create it.
+        if not os.path.exists(normalized_full_path):
+             # If simulating non-existent files, create placeholder for them for testing
+             # You could create empty files here for the simulation to work more realistically
+             with open(normalized_full_path, 'w') as f:
+                 f.write(f"# This is a simulated source code file for {filename}\n")
+                 f.write("print('Hello, secure world!')\n")
+             # Update the print message to reflect that the file was created for simulation
+             return f"Successfully simulated reading content from '{filename}' (File created for simulation)."
+
+        return f"Successfully simulated reading content from '{filename}'."
+
+    except FileNotFoundError:
+        # This error is caught if os.path.exists() (if used) returns False, or actual open fails.
+        return f"Error: File '{filename}' not found in the secure directory."
+    except PermissionError:
+        # Simulate permission error if you want to demonstrate this scenario.
+        # In real code, this would be caught if the script lacks read permissions.
+        return f"Error: Permission denied to access '{filename}'."
+    except Exception as e:
+        # Catch any other unexpected errors during file operation.
+        return f"An unexpected error occurred while processing '{filename}': {e}"
+
+# --- Demonstrating Usage with Various Test Cases ---
+
+if __name__ == "__main__":
+    print("--- Testing Secure File Reader ---")
+
+    # Test Case 1: Valid and safe filename
+    print(secure_file_reader("my_script.py"))
+    print(secure_file_reader("another_file.txt"))
+
+    # Test Case 2: Attempting directory traversal (../)
+    print(secure_file_reader("../../../etc/passwd")) # Should be blocked
+    print(secure_file_reader("sub_dir/../../file.txt")) # Should be blocked
+
+    # Test Case 3: Attempting absolute path
+    print(secure_file_reader("/etc/passwd")) # Should be blocked
+    # Correcting Windows path for consistency in prohibited_chars check
+    print(secure_file_reader("C:\\Windows\\System32\\drivers\\etc\\hosts")) # Should be blocked by '\\'
+
+    # Test Case 4: Filename with null byte (common injection technique)
+    print(secure_file_reader("malicious_file.txt\x00.exe")) # Should be blocked
+
+    # Test Case 5: Empty filename
+    print(secure_file_reader("   ")) # Should be blocked
+
+    # Test Case 6: Filename with disallowed extension
+    print(secure_file_reader("image.jpg")) # Should be blocked
+    print(secure_file_reader("secret.bak")) # Should be blocked
+
+    # Test Case 7: Filename that appears safe but leads outside via normalization
+    # This is handled by the `startswith(normalized_secure_base_dir)` check.
+    # For example, if SECURE_BASE_DIR was /var/www/html/secure_files
+    # and filename was ../../other_files/secret.txt
+    # os.path.abspath(os.path.join(SECURE_BASE_DIR, filename)) would resolve to
+    # /var/www/html/other_files/secret.txt which doesn't start with /var/www/html/secure_files.
+    # The provided example "..\safe_file.txt" would be caught by prohibited_chars first.
+    # A better example for this specific check might involve symlinks or more complex relative paths
+    # that don't use '..' directly in the input string but resolve outside.
+    # However, with current '..' check, this specific case is harder to trigger without '..'
+    print(secure_file_reader("..\safe_file.txt")) # Caught by prohibited_chars for '..'
+
+    # Test Case 8: File that exists in simulated_existing_files
+    # To ensure this test works, we'll pre-create one of the simulated files.
+    # This part is more about the simulation logic than the security checks themselves.
+    # For the purpose of the subtask, we'll ensure the directory exists.
+    # The actual file creation for simulation is handled inside secure_file_reader.
+    if not os.path.exists(SECURE_BASE_DIR):
+        os.makedirs(SECURE_BASE_DIR)
+    with open(os.path.join(SECURE_BASE_DIR, "example_code.py"), 'w') as f:
+        f.write("# Pre-existing simulated file\nprint('Pre-existing')\n")
+    print(secure_file_reader("example_code.py")) # Should be successful
+
+    print("\n--- End of Testing ---")
+    print(f"Check the '{SECURE_BASE_DIR}' directory. Simulated files might have been created.")
diff --git a/tests/test_secure_file_utils.py b/tests/test_secure_file_utils.py
@@ -0,0 +1,144 @@
+import unittest
+import os
+import shutil
+import sys
+from io import StringIO
+
+# Add src directory to Python path to import secure_file_utils
+# This assumes the test script is run from the repository root
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src')))
+
+try:
+    from secure_file_utils import secure_file_reader, SECURE_BASE_DIR
+except ImportError:
+    print("Failed to import secure_file_utils. Ensure src directory is in PYTHONPATH.")
+    print(f"Current sys.path: {sys.path}")
+    print(f"Current working directory: {os.getcwd()}")
+    raise
+
+class TestSecureFileReader(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        # Ensure the base directory for tests is clean before any tests run
+        cls.test_secure_base_dir = os.path.abspath(SECURE_BASE_DIR)
+        if os.path.exists(cls.test_secure_base_dir):
+            shutil.rmtree(cls.test_secure_base_dir)
+        os.makedirs(cls.test_secure_base_dir, exist_ok=True)
+
+    def setUp(self):
+        # Clean and recreate the secure base directory before each test
+        # This ensures tests are isolated
+        if os.path.exists(self.test_secure_base_dir):
+            shutil.rmtree(self.test_secure_base_dir)
+        os.makedirs(self.test_secure_base_dir, exist_ok=True)
+
+        # Create some dummy files for valid read tests
+        with open(os.path.join(self.test_secure_base_dir, "valid_script.py"), "w") as f:
+            f.write("print('hello')")
+        with open(os.path.join(self.test_secure_base_dir, "another_safe.txt"), "w") as f:
+            f.write("some text")
+
+        # Suppress print output from secure_file_reader during tests
+        # by redirecting stdout, unless needed for specific debug
+        self.held_stdout = sys.stdout
+        sys.stdout = StringIO()
+
+
+    def tearDown(self):
+        # Restore stdout
+        sys.stdout = self.held_stdout
+        # Clean up the directory after each test
+        if os.path.exists(self.test_secure_base_dir):
+            shutil.rmtree(self.test_secure_base_dir)
+
+    def test_valid_file_access(self):
+        self.assertTrue("Successfully simulated reading content" in secure_file_reader("valid_script.py"))
+        self.assertTrue("Successfully simulated reading content" in secure_file_reader("another_safe.txt"))
+
+    def test_non_existent_file_simulation(self):
+        # This file doesn't exist, so it should be created by the simulation logic
+        result = secure_file_reader("new_simulated_file.py")
+        self.assertTrue("File created for simulation" in result)
+        self.assertTrue(os.path.exists(os.path.join(self.test_secure_base_dir, "new_simulated_file.py")))
+
+    def test_empty_filename(self):
+        self.assertEqual(secure_file_reader(""), "Error: Filename cannot be empty.")
+        self.assertEqual(secure_file_reader("   "), "Error: Filename cannot be empty.")
+
+    def test_prohibited_chars_path_traversal(self):
+        self.assertTrue("prohibited characters" in secure_file_reader("../../../etc/passwd"))
+        self.assertTrue("prohibited characters" in secure_file_reader("..\..\..\boot.ini")) # Windows style
+        self.assertTrue("prohibited characters" in secure_file_reader("valid_script.py/../../../../etc/passwd")) # Mixed
+
+    def test_prohibited_chars_absolute_paths(self):
+        self.assertTrue("prohibited characters" in secure_file_reader("/etc/passwd"))
+        self.assertTrue("prohibited characters" in secure_file_reader("C:\Windows\system32\kernel32.dll"))
+
+    def test_prohibited_chars_null_byte(self):
+        self.assertTrue("prohibited characters" in secure_file_reader("valid_script.py" + chr(0) + "not_valid.exe"))
+
+    def test_disallowed_extensions(self):
+        self.assertTrue("File extension not allowed" in secure_file_reader("image.jpg"))
+        self.assertTrue("File extension not allowed" in secure_file_reader("archive.zip"))
+        self.assertTrue("File extension not allowed" in secure_file_reader("document.docx"))
+
+    def test_access_outside_secure_directory_explicit(self):
+        # Create a file outside the secure_base_dir
+        # This test relies on the os.path.abspath and startswith check,
+        # assuming '..' was not used (as that's caught by prohibited_chars)
+        # We need to be clever here. The function itself joins with SECURE_BASE_DIR.
+        # So, we can't directly ask for a file like "/tmp/test.txt".
+        # The check `if not normalized_full_path.startswith(normalized_secure_base_dir + os.sep)`
+        # is the one we are targeting.
+
+        # This kind of test is tricky because secure_file_reader PREPENDS SECURE_BASE_DIR.
+        # The existing prohibited_chars check for '/' and '\' (when not part of SECURE_BASE_DIR itself)
+        # and '..' makes it hard to construct a path that doesn't get caught by those first.
+        # The primary protection against escaping is the final `startswith` check on absolute paths.
+        # Consider a scenario where SECURE_BASE_DIR = "simulated_source_code"
+        # If filename is "file.txt", normalized_full_path = /path/to/repo/simulated_source_code/file.txt
+        # If filename somehow becomes "../../../../../../../tmp/foo.txt" (bypassing initial '..' check - unlikely)
+        # then normalized_full_path = /tmp/foo.txt which fails startswith.
+
+        # Let's assume a symlink scenario for a more robust test of this specific check,
+        # though the function uses os.path.abspath which resolves symlinks before the check.
+        # So, if a symlink pointed outside, the resolved path would be checked.
+
+        # Given the current implementation, the prohibited chars check is very effective.
+        # We can test by trying to use a filename that, if SECURE_BASE_DIR was different,
+        # might seem to allow access.
+        # e.g. if SECURE_BASE_DIR = "data/files" and filename = "../../../etc/passwd"
+        # This is already caught by ".."
+        # The primary value of the abspath().startswith() check is against more obfuscated paths
+        # or misconfigurations where SECURE_BASE_DIR might be manipulated.
+
+        # For now, will rely on other tests (like prohibited absolute paths) that indirectly test this.
+        # A direct test for *only* the abspath().startswith() failure (without other prohibited chars)
+        # is difficult with the current design if SECURE_BASE_DIR is simple like "simulated_source_code".
+        pass # Placeholder if a more direct test for this specific condition is designed.
+
+
+    # The following tests for FileNotFoundError and PermissionError are conceptual
+    # as the current secure_file_reader simulates these and doesn't let OS raise them directly
+    # on the files it "reads". The simulation logic itself is tested.
+    # If the function were to actually interact with the filesystem for reads, these would be different.
+
+    def test_simulated_file_not_found(self):
+        # The current function creates files if they don't exist in the simulation.
+        # So, a true "file not found" from the perspective of the function's internal logic
+        # (before creation) is hard to test without altering the function.
+        # The "File created for simulation" message covers this.
+        pass
+
+    def test_simulated_permission_error(self):
+        # Similar to FileNotFoundError, permission errors are not directly raised from
+        # the OS in a way that the current test structure can easily intercept for
+        # a "real" file it's trying to read, due to the simulation layer.
+        # If we wanted to test this, we'd have to make a file unreadable by the user
+        # running the tests, then call secure_file_reader on it, AND modify
+        # secure_file_reader to actually attempt to read it.
+        pass
+
+if __name__ == '__main__':
+    unittest.main()