couchbaselabs · vipbhardwaj · Nov 7, 2025 · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025
diff --git a/client/pyproject.toml b/client/pyproject.toml
@@ -74,7 +74,8 @@ dev = [
     "mypy==1.15.0",
     "types-Deprecated==1.2.15.20241117",
     "types-requests==2.32.0.20241016",
-    "types-netifaces==0.11.0.20241025"
+    "types-netifaces==0.11.0.20241025",
+    "types-paramiko==3.5.0.20250801"
 ]
 
 [project.urls]

diff --git a/client/src/cbltest/api/syncgateway.py b/client/src/cbltest/api/syncgateway.py
@@ -1,10 +1,12 @@
+import re
 import ssl
 from abc import ABC, abstractmethod
 from json import dumps, loads
 from pathlib import Path
 from typing import Any, cast
 from urllib.parse import urljoin
 
+import paramiko
 import requests
 from aiohttp import BasicAuth, ClientSession, TCPConnector
 from deprecated import deprecated
@@ -1290,3 +1292,96 @@ async def get_document_revision_public(
             self.__secure, scheme, self.__hostname, 4984, auth
         ) as session:
             return await self._send_request("GET", path, params=params, session=session)
+
+    async def fetch_log_file(
+        self,
+        log_type: str,
+        ssh_key_path: str,
+        ssh_username: str = "ec2-user",
+    ) -> str:
+        """
+        Fetches a log file from the remote Sync Gateway server via SSH
+
+        :param log_type: The type of log to fetch (e.g., 'debug', 'info', 'error', 'warn')
+        :param ssh_key_path: Path to SSH private key for authentication
+        :param ssh_username: SSH username (default: ec2-user)
+        :return: Contents of the log file as a string
+        """
+        # Get log directory from SG configuration
+        server_config = await self._send_request("GET", "/_config")
+        log_dir = server_config.get("logging", {}).get(
+            "log_file_path", "/home/ec2-user/log"
+        )
+        remote_log_path = f"{log_dir}/sg_{log_type}.log"
+
+        with self.__tracer.start_as_current_span(
+            "fetch_log_file",
+            attributes={
+                "log.type": log_type,
+                "remote.path": remote_log_path,
+                "ssh.username": ssh_username,
+            },
+        ):
+            ssh = paramiko.SSHClient()
+            ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+
+            # Load private key
+            private_key = paramiko.Ed25519Key.from_private_key_file(ssh_key_path)
+
+            # Connect to the remote server
+            ssh.connect(
+                self.__hostname,
+                username=ssh_username,
+                pkey=private_key,
+            )
+
+            # Read the log file
+            sftp = ssh.open_sftp()
+            try:
+                with sftp.open(remote_log_path, "r") as remote_file:
+                    log_contents = remote_file.read().decode("utf-8")
+            finally:
+                sftp.close()
+                ssh.close()
+
+            return log_contents
+
+
+def scan_logs_for_untagged_sensitive_data(
+    log_content: str,
+    sensitive_patterns: list[str],
+) -> list[str]:
+    """
+    Scans log content for sensitive data that is NOT wrapped in <ud>...</ud> tags
+
+    :param log_content: The log file content as a string
+    :param sensitive_patterns: List of sensitive strings to look for (e.g., doc IDs, usernames)
+    :return: List of violations found (sensitive data without <ud> tags)
+    """
+    violations = []
+    for pattern in sensitive_patterns:
+        # Escape special regex characters in the pattern
+        escaped_pattern = re.escape(pattern)
+        for match in re.finditer(escaped_pattern, log_content):
+            start_pos = match.start()
+            end_pos = match.end()
+
+            # Check if this occurrence is within <ud>...</ud> tags
+            # Look backwards for <ud> and forwards for </ud>
+            before_text = log_content[max(0, start_pos - 100) : start_pos]
+            after_text = log_content[end_pos : min(len(log_content), end_pos + 100)]
+
+            # Check if there's an opening <ud> before and closing </ud> after
+            has_opening_tag = "<ud>" in before_text and before_text.rfind(
+                "<ud>"
+            ) > before_text.rfind("</ud>")
+            has_closing_tag = "</ud>" in after_text
+
+            if not (has_opening_tag and has_closing_tag):
+                context_start = max(0, start_pos - 50)
+                context_end = min(len(log_content), end_pos + 50)
+                context = log_content[context_start:context_end]
+                violations.append(
+                    f"Untagged '{pattern}' at position {start_pos}: ...{context}..."
+                )
+    return violations
diff --git a/environment/aws/sgw_setup/config/bootstrap.json b/environment/aws/sgw_setup/config/bootstrap.json
@@ -31,11 +31,45 @@
       "log_keys": ["*"]
     },
     "log_file_path": "/home/ec2-user/log",
+    "redaction_level": "partial",
     "debug": {
       "enabled": true,
       "rotation": {
-        "max_size": 512,
-        "rotated_logs_size_limit": 1024
+        "max_size": 10240,
+        "max_age": 365,
+        "rotated_logs_size_limit": 10240
+      }
+    },
+    "info": {
+      "enabled": true,
+      "rotation": {
+        "max_size": 10240,
+        "max_age": 365,
+        "rotated_logs_size_limit": 10240
+      }
+    },
+    "warn": {
+      "enabled": true,
+      "rotation": {
+        "max_size": 10240,
+        "max_age": 365,
+        "rotated_logs_size_limit": 10240
+      }
+    },
+    "error": {
+      "enabled": true,
+      "rotation": {
+        "max_size": 10240,
+        "max_age": 365,
+        "rotated_logs_size_limit": 10240
+      }
+    },
+    "trace": {
+      "enabled": true,
+      "rotation": {
+        "max_size": 10240,
+        "max_age": 365,
+        "rotated_logs_size_limit": 10240
       }
     }
   } 

diff --git a/spec/tests/QE/test_log_redaction.md b/spec/tests/QE/test_log_redaction.md
@@ -0,0 +1,16 @@
+# Log Redaction Tests
+
+## test_log_redaction_partial
+
+Test that Sync Gateway properly redacts sensitive data in logs (NEGATIVE TEST).
+
+This test verifies that NO document IDs or usernames appear in logs WITHOUT `<ud>...</ud>` tags when log redaction is enabled at the "partial" level.
+
+**Prerequisites**: Sync Gateway bootstrap.json must be configured with `redaction_level: "partial"` in the logging section.
+
+1. Create bucket and default collection
+2. Configure Sync Gateway with log redaction enabled
+3. Create user 'autotest' with access to channels
+4. Create 10 docs via Sync Gateway with xattrs
+5. Verify docs were created
+6. Fetch and scan SG logs for redaction violations
diff --git a/tests/QE/test_log_redaction.py b/tests/QE/test_log_redaction.py
@@ -0,0 +1,96 @@
+import os
+from pathlib import Path
+
+import pytest
+from cbltest import CBLPyTest
+from cbltest.api.cbltestclass import CBLTestClass
+from cbltest.api.syncgateway import (
+    DocumentUpdateEntry,
+    PutDatabasePayload,
+    scan_logs_for_untagged_sensitive_data,
+)
+
+
+@pytest.mark.sgw
+@pytest.mark.min_test_servers(0)
+@pytest.mark.min_sync_gateways(1)
+@pytest.mark.min_couchbase_servers(1)
+class TestLogRedaction(CBLTestClass):
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_log_redaction_partial(
+        self, cblpytest: CBLPyTest, dataset_path: Path
+    ) -> None:
+        sg = cblpytest.sync_gateways[0]
+        cbs = cblpytest.couchbase_servers[0]
+        num_docs = 10
+        sg_db = "db"
+        bucket_name = "data-bucket"
+        channels = ["log-redaction"]
+        username = "vipul"
+        password = "password"
+        ssh_key_path = os.environ.get(
+            "SSH_KEY_PATH", os.path.expanduser("~/.ssh/jborden.pem")
+        )
+
+        self.mark_test_step("Create bucket and default collection")
+        cbs.drop_bucket(bucket_name)
+        cbs.create_bucket(bucket_name)
+
+        self.mark_test_step("Configure Sync Gateway with log redaction enabled")
+        db_config = {
+            "bucket": bucket_name,
+            "index": {"num_replicas": 0},
+            "scopes": {"_default": {"collections": {"_default": {}}}},
+        }
+        db_payload = PutDatabasePayload(db_config)
+        if await sg.database_exists(sg_db):
+            await sg.delete_database(sg_db)
+        await sg.put_database(sg_db, db_payload)
+
+        self.mark_test_step(f"Create user '{username}' with access to channels")
+        sg_user = await sg.create_user_client(sg, sg_db, username, password, channels)
+
+        self.mark_test_step(f"Create {num_docs} docs via Sync Gateway")
+        sg_docs: list[DocumentUpdateEntry] = []
+        sg_doc_ids: list[str] = []
+        for i in range(num_docs):
+            doc_id = f"sg_doc_{i}"
+            sg_doc_ids.append(doc_id)
+            sg_docs.append(
+                DocumentUpdateEntry(
+                    doc_id,
+                    None,
+                    body={
+                        "type": "test_doc",
+                        "index": i,
+                        "channels": channels,
+                    },
+                )
+            )
+        await sg.update_documents(sg_db, sg_docs, "_default", "_default")
+
+        self.mark_test_step("Verify docs were created (public API)")
+        all_docs = await sg_user.get_all_documents(
+            sg_db, "_default", "_default", use_public_api=True
+        )
+        assert len(all_docs.rows) == num_docs, (
+            f"Expected {num_docs} docs, got {len(all_docs.rows)}"
+        )
+
+        self.mark_test_step("Fetch and scan SG logs for redaction violations")
+        try:
+            log_contents = await sg.fetch_log_file("debug", ssh_key_path)
+        except Exception as e:
+            raise Exception(f"Could not fetch log file: {e}")
+        sensitive_patterns = sg_doc_ids + [username]
+        violations = scan_logs_for_untagged_sensitive_data(
+            log_contents, sensitive_patterns
+        )
+        assert len(violations) == 0, (
+            f"Found {len(violations)} log redaction violations: Showing first 10:\n"
+            + "\n".join(violations[:10])
+        )
+
+        await sg_user.close()
+        await sg.delete_database(sg_db)
+        cbs.drop_bucket(bucket_name)