Merge pull request #17 from garland3/security-fixes

tlbauer2 · web-flow · commit 114b9280a970 · 2025-09-03T20:10:51.000-06:00
Fix critical security vulnerabilities (P0) - GitHub Issue #16
diff --git a/src/talkpipe/app/chatterlang_serve.py b/src/talkpipe/app/chatterlang_serve.py
@@ -179,6 +179,9 @@ async def favicon():
         # Configure middleware
         self._setup_middleware()
         
+        # Add security headers middleware
+        self._setup_security_headers()
+        
         # Configure routes
         self._setup_routes()
         
@@ -221,13 +224,15 @@ def get_or_create_session(self, request: Request, response: Response) -> UserSes
             )
             self.sessions[session_id] = session
             
-            # Set session cookie (expires in 24 hours)
+            # Set session cookie (expires in 24 hours) with security attributes
             response.set_cookie(
                 key="talkpipe_session_id",
                 value=session_id,
                 max_age=86400,  # 24 hours
-                httponly=True,
-                samesite="lax"
+                httponly=True,  # Prevent JavaScript access
+                samesite="lax",  # CSRF protection
+                secure=False,    # Set to True in production with HTTPS
+                path="/"         # Restrict cookie path
             )
             
             logger.info(f"Created new session: {session_id}")
@@ -268,15 +273,61 @@ def cleanup_worker():
         logger.info("Started session cleanup background task")
     
     def _setup_middleware(self):
-        """Configure CORS middleware"""
+        """Configure CORS middleware with security restrictions"""
+        # Define allowed origins - never use "*" in production
+        allowed_origins = [
+            "http://localhost:3000",
+            "http://localhost:8000", 
+            "http://127.0.0.1:3000",
+            "http://127.0.0.1:8000",
+            f"http://localhost:{self.port}",
+            f"http://127.0.0.1:{self.port}"
+        ]
+        
+        # Add environment-specific origins if configured
+        import os
+        env_origins = os.getenv('TALKPIPE_ALLOWED_ORIGINS', '').split(',')
+        allowed_origins.extend([origin.strip() for origin in env_origins if origin.strip()])
+        
         self.app.add_middleware(
             CORSMiddleware,
-            allow_origins=["*"],
+            allow_origins=allowed_origins,  # Specific origins only - never "*"
             allow_credentials=True,
-            allow_methods=["*"],
-            allow_headers=["*"],
+            allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],  # Specific methods only
+            allow_headers=["Content-Type", "Authorization", "X-API-Key"],  # Specific headers only
+            expose_headers=["Content-Type"],
+            max_age=86400,  # Cache preflight requests for 24 hours
         )
     
+    def _setup_security_headers(self):
+        """Add security headers to all responses"""
+        @self.app.middleware("http")
+        async def add_security_headers(request, call_next):
+            response = await call_next(request)
+            
+            # Security headers
+            response.headers["X-Frame-Options"] = "DENY"
+            response.headers["X-Content-Type-Options"] = "nosniff"
+            response.headers["X-XSS-Protection"] = "1; mode=block"
+            response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
+            response.headers["Content-Security-Policy"] = (
+                "default-src 'self'; "
+                "script-src 'self' 'unsafe-inline'; "
+                "style-src 'self' 'unsafe-inline'; "
+                "img-src 'self' data:; "
+                "connect-src 'self'; "
+                "font-src 'self'; "
+                "object-src 'none'; "
+                "media-src 'self'; "
+                "child-src 'none';"
+            )
+            response.headers["Permissions-Policy"] = (
+                "camera=(), microphone=(), geolocation=(), payment=(), "
+                "usb=(), magnetometer=(), gyroscope=(), speaker=()"
+            )
+            
+            return response
+    
     def _setup_routes(self):
         """Configure all API routes"""
         
@@ -326,12 +377,21 @@ async def get_form_config():
             return self.form_config.model_dump()
         
         @self.app.get("/output-stream")
-        async def output_stream(request: Request, response: Response):
+        async def output_stream(
+            request: Request, 
+            response: Response,
+            api_key: str = Depends(self._verify_api_key)
+        ):
             """Server-Sent Events endpoint for streaming output"""
             session = self.get_or_create_session(request, response)
             return StreamingResponse(
                 self._generate_output_stream(session),
-                media_type="text/event-stream"
+                media_type="text/event-stream",
+                headers={
+                    "Cache-Control": "no-cache",
+                    "Connection": "keep-alive",
+                    "X-Content-Type-Options": "nosniff"
+                }
             )
     
     async def _verify_api_key(self, x_api_key: Optional[str] = Header(None)):
diff --git a/src/talkpipe/util/data_manipulation.py b/src/talkpipe/util/data_manipulation.py
@@ -284,6 +284,23 @@ def compileLambda(expression: str, fail_on_error: bool = True):
     Returns:
         A callable function that takes a single 'item' parameter and returns the evaluated expression result
     """
+    # Security check: block dangerous patterns in expressions
+    dangerous_patterns = [
+        '__import__', 'import', 'exec', 'eval', 'compile', 'open', 'file',
+        'input', 'raw_input', 'reload', 'vars', 'locals', 'globals',
+        'dir', 'hasattr', 'getattr', 'setattr', 'delattr', 'classmethod',
+        'staticmethod', 'super', 'property', '__', '.mro', '.subclasses'
+    ]
+    
+    expression_lower = expression.lower()
+    for pattern in dangerous_patterns:
+        if pattern in expression_lower:
+            raise ValueError(f"Security violation: Expression contains prohibited pattern '{pattern}'")
+    
+    # Additional security: check for attribute access to dangerous methods
+    if '.__' in expression or 'getitem' in expression_lower or 'setitem' in expression_lower:
+        raise ValueError("Security violation: Expression contains prohibited attribute access patterns")
+
     # Set of safe built-ins that can be used in expressions
     _SAFE_BUILTINS = {
         'abs': abs, 'all': all, 'any': any, 'bool': bool, 'dict': dict,
@@ -315,11 +332,18 @@ def lambda_function(item: Any) -> Any:
 
         # If item is a dictionary, add its keys as variables for convenience
         if isinstance(item, dict):
-            locals_dict.update(item)
+            # Filter dictionary keys to prevent injection of dangerous names
+            safe_keys = {k: v for k, v in item.items() 
+                        if isinstance(k, str) and not k.startswith('_') and k not in dangerous_patterns}
+            locals_dict.update(safe_keys)
+
+        # Create a completely restricted environment with no access to dangerous globals
+        restricted_globals = {'__builtins__': {}}
+        restricted_globals.update(SAFE_BUILTINS)
 
-        # Evaluate the expression in a restricted environment
+        # Evaluate the expression in a heavily restricted environment
         try:
-            result = eval(compiled_code, dict(SAFE_BUILTINS), locals_dict)
+            result = eval(compiled_code, restricted_globals, locals_dict)
             return result
         except Exception as e:
             error_msg = f"Error evaluating expression '{expression}' on item {item}: {e}"
diff --git a/src/talkpipe/util/os.py b/src/talkpipe/util/os.py
@@ -1,28 +1,145 @@
 import logging
+import subprocess
+import shlex
 
 logger = logging.getLogger(__name__)
 
 
-import subprocess
+class SecurityError(Exception):
+    """Raised when a security violation is detected."""
+    pass
 
 
 def run_command(command: str):
     """
     Runs an external command and yields each line from stdout.
+    
+    Security note: This function implements security checks to prevent
+    command injection attacks.
 
     Args:
         command: The command to run as a string.
 
     Yields:
         Each line from the command's stdout.
+        
+    Raises:
+        SecurityError: If the command contains dangerous patterns.
+        subprocess.CalledProcessError: If the command fails.
+    """
+    # Security validation
+    _validate_command_security(command)
+    
+    logger.debug(f"Executing validated command: {command}")
+    
+    # Use shell=False and split command properly to prevent injection
+    try:
+        # Split command safely using shlex
+        command_parts = shlex.split(command)
+        
+        # Additional validation on command parts
+        if not command_parts:
+            raise ValueError("Empty command provided")
+            
+        # Check if the base command is in a safe list (optional additional security)
+        base_command = command_parts[0]
+        _validate_base_command(base_command)
+        
+        process = subprocess.Popen(
+            command_parts, 
+            stdout=subprocess.PIPE, 
+            stderr=subprocess.PIPE, 
+            text=True, 
+            shell=False  # Critical: never use shell=True
+        )
+        
+        for line in process.stdout:
+            logger.debug(f"Command output: {line.rstrip()}")
+            yield line.rstrip()  # Remove trailing newline
+            
+        process.wait()  # Wait for the command to complete
+        
+        if process.returncode != 0:
+            # Get stderr for better error reporting
+            stderr_output = process.stderr.read() if process.stderr else "No error details available"
+            logger.error(f"Command failed with return code {process.returncode}: {stderr_output}")
+            raise subprocess.CalledProcessError(process.returncode, command)
+            
+        logger.debug("Command completed successfully")
+        
+    except subprocess.CalledProcessError:
+        raise  # Re-raise subprocess errors
+    except Exception as e:
+        logger.error(f"Error executing command '{command}': {e}")
+        raise SecurityError(f"Command execution failed: {e}")
+
+
+def _validate_command_security(command: str):
+    """
+    Validate that the command does not contain dangerous patterns.
+    
+    Args:
+        command: The command string to validate.
+        
+    Raises:
+        SecurityError: If dangerous patterns are detected.
+    """
+    # Check for dangerous shell metacharacters and patterns
+    dangerous_patterns = [
+        ';',    # Command separator
+        '&&',   # Command chaining
+        '||',   # Command chaining
+        '|',    # Pipe (could be used maliciously)
+        '$(',   # Command substitution
+        '`',    # Command substitution (backticks)
+        '>',    # Redirection
+        '<',    # Redirection
+        '&',    # Background execution
+        '\n',   # Newline injection
+        '\r',   # Carriage return injection
+    ]
+    
+    for pattern in dangerous_patterns:
+        if pattern in command:
+            raise SecurityError(f"Security violation: Command contains dangerous pattern '{pattern}'")
+    
+    # Check for path traversal attempts
+    if '..' in command or '~/' in command:
+        raise SecurityError("Security violation: Command contains path traversal patterns")
+    
+    # Check for attempts to access sensitive files
+    sensitive_paths = ['/etc/passwd', '/etc/shadow', '/root/', '~root']
+    command_lower = command.lower()
+    for path in sensitive_paths:
+        if path in command_lower:
+            raise SecurityError(f"Security violation: Command attempts to access sensitive path '{path}'")
+
+
+def _validate_base_command(base_command: str):
+    """
+    Validate that the base command is from an allowed list.
+    
+    Args:
+        base_command: The base command to validate.
+        
+    Raises:
+        SecurityError: If the command is not allowed.
     """
-    logger.debug(f"Executing command: {command}")
-    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
-    for line in process.stdout:
-        logger.debug(f"Command output: {line.rstrip()}")
-        yield line.rstrip()  # Remove trailing newline
-    process.wait()  # Wait for the command to complete
-    if process.returncode != 0:
-        logger.error(f"Command failed with return code {process.returncode}")
-        raise subprocess.CalledProcessError(process.returncode, command)
-    logger.debug("Command completed successfully")
+    # Define a whitelist of allowed commands (can be extended as needed)
+    allowed_commands = {
+        'ls', 'cat', 'echo', 'pwd', 'head', 'tail', 'grep', 'find', 'wc',
+        'sort', 'uniq', 'cut', 'awk', 'sed', 'tr', 'date', 'whoami',
+        'id', 'uptime', 'df', 'du', 'ps', 'top', 'free', 'mount',
+        'python', 'python3', 'pip', 'git', 'curl', 'wget', 'ssh',
+        'rsync', 'tar', 'gzip', 'gunzip', 'zip', 'unzip'
+    }
+    
+    # Extract just the command name (remove path if present)
+    command_name = base_command.split('/')[-1]
+    
+    if command_name not in allowed_commands:
+        # Log the attempt for security monitoring
+        logger.warning(f"Attempted execution of non-whitelisted command: {base_command}")
+        raise SecurityError(f"Security violation: Command '{command_name}' is not in the allowed list")
+        
+    logger.debug(f"Base command '{command_name}' validated successfully")
diff --git a/tests/talkpipe/util/test_util.py b/tests/talkpipe/util/test_util.py
@@ -55,8 +55,8 @@ def test_run_command_error_handling():
     # Use a command that's extremely unlikely to exist
     command = "this_command_definitely_does_not_exist_12345"
     
-    # Should raise CalledProcessError or print error message
-    with pytest.raises(subprocess.CalledProcessError):
+    # Should raise CalledProcessError or SecurityError (for non-whitelisted commands)
+    with pytest.raises((subprocess.CalledProcessError, talkpipe.util.os.SecurityError)):
         list(talkpipe.util.os.run_command(command))
 
 def test_run_command_with_arguments():
diff --git a/tests/test_security_vulnerabilities.py b/tests/test_security_vulnerabilities.py