diff --git a/client/pyproject.toml b/client/pyproject.toml
index 14497066..2ee760bf 100644
--- a/client/pyproject.toml
+++ b/client/pyproject.toml
@@ -74,7 +74,8 @@ dev = [
"mypy==1.15.0",
"types-Deprecated==1.2.15.20241117",
"types-requests==2.32.0.20241016",
- "types-netifaces==0.11.0.20241025"
+ "types-netifaces==0.11.0.20241025",
+ "types-paramiko==3.5.0.20250801"
]
[project.urls]
diff --git a/client/src/cbltest/api/syncgateway.py b/client/src/cbltest/api/syncgateway.py
index 6c56236b..0dc5cf38 100644
--- a/client/src/cbltest/api/syncgateway.py
+++ b/client/src/cbltest/api/syncgateway.py
@@ -1,3 +1,4 @@
+import re
import ssl
from abc import ABC, abstractmethod
from json import dumps, loads
@@ -5,6 +6,7 @@
from typing import Any, cast
from urllib.parse import urljoin
+import paramiko
import requests
from aiohttp import BasicAuth, ClientSession, TCPConnector
from deprecated import deprecated
@@ -1290,3 +1292,96 @@ async def get_document_revision_public(
self.__secure, scheme, self.__hostname, 4984, auth
) as session:
return await self._send_request("GET", path, params=params, session=session)
+
+ async def fetch_log_file(
+ self,
+ log_type: str,
+ ssh_key_path: str,
+ ssh_username: str = "ec2-user",
+ ) -> str:
+ """
+ Fetches a log file from the remote Sync Gateway server via SSH
+
+ :param log_type: The type of log to fetch (e.g., 'debug', 'info', 'error', 'warn')
+ :param ssh_key_path: Path to SSH private key for authentication
+ :param ssh_username: SSH username (default: ec2-user)
+ :return: Contents of the log file as a string
+ """
+ # Get log directory from SG configuration
+ server_config = await self._send_request("GET", "/_config")
+ log_dir = server_config.get("logging", {}).get(
+ "log_file_path", "/home/ec2-user/log"
+ )
+ remote_log_path = f"{log_dir}/sg_{log_type}.log"
+
+ with self.__tracer.start_as_current_span(
+ "fetch_log_file",
+ attributes={
+ "log.type": log_type,
+ "remote.path": remote_log_path,
+ "ssh.username": ssh_username,
+ },
+ ):
+ ssh = paramiko.SSHClient()
+ ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+
+ # Load private key
+ private_key = paramiko.Ed25519Key.from_private_key_file(ssh_key_path)
+
+ # Connect to the remote server
+ ssh.connect(
+ self.__hostname,
+ username=ssh_username,
+ pkey=private_key,
+ )
+
+ # Read the log file
+ sftp = ssh.open_sftp()
+ try:
+ with sftp.open(remote_log_path, "r") as remote_file:
+ log_contents = remote_file.read().decode("utf-8")
+ finally:
+ sftp.close()
+ ssh.close()
+
+ return log_contents
+
+
+def scan_logs_for_untagged_sensitive_data(
+ log_content: str,
+ sensitive_patterns: list[str],
+) -> list[str]:
+ """
+ Scans log content for sensitive data that is NOT wrapped in ... tags
+
+ :param log_content: The log file content as a string
+ :param sensitive_patterns: List of sensitive strings to look for (e.g., doc IDs, usernames)
+ :return: List of violations found (sensitive data without tags)
+ """
+ violations = []
+ for pattern in sensitive_patterns:
+ # Escape special regex characters in the pattern
+ escaped_pattern = re.escape(pattern)
+ for match in re.finditer(escaped_pattern, log_content):
+ start_pos = match.start()
+ end_pos = match.end()
+
+ # Check if this occurrence is within ... tags
+ # Look backwards for and forwards for
+ before_text = log_content[max(0, start_pos - 100) : start_pos]
+ after_text = log_content[end_pos : min(len(log_content), end_pos + 100)]
+
+ # Check if there's an opening before and closing after
+ has_opening_tag = "" in before_text and before_text.rfind(
+ ""
+ ) > before_text.rfind("")
+ has_closing_tag = "" in after_text
+
+ if not (has_opening_tag and has_closing_tag):
+ context_start = max(0, start_pos - 50)
+ context_end = min(len(log_content), end_pos + 50)
+ context = log_content[context_start:context_end]
+ violations.append(
+ f"Untagged '{pattern}' at position {start_pos}: ...{context}..."
+ )
+ return violations
diff --git a/environment/aws/sgw_setup/config/bootstrap.json b/environment/aws/sgw_setup/config/bootstrap.json
index c5484e52..848c2425 100644
--- a/environment/aws/sgw_setup/config/bootstrap.json
+++ b/environment/aws/sgw_setup/config/bootstrap.json
@@ -31,11 +31,45 @@
"log_keys": ["*"]
},
"log_file_path": "/home/ec2-user/log",
+ "redaction_level": "partial",
"debug": {
"enabled": true,
"rotation": {
- "max_size": 512,
- "rotated_logs_size_limit": 1024
+ "max_size": 10240,
+ "max_age": 365,
+ "rotated_logs_size_limit": 10240
+ }
+ },
+ "info": {
+ "enabled": true,
+ "rotation": {
+ "max_size": 10240,
+ "max_age": 365,
+ "rotated_logs_size_limit": 10240
+ }
+ },
+ "warn": {
+ "enabled": true,
+ "rotation": {
+ "max_size": 10240,
+ "max_age": 365,
+ "rotated_logs_size_limit": 10240
+ }
+ },
+ "error": {
+ "enabled": true,
+ "rotation": {
+ "max_size": 10240,
+ "max_age": 365,
+ "rotated_logs_size_limit": 10240
+ }
+ },
+ "trace": {
+ "enabled": true,
+ "rotation": {
+ "max_size": 10240,
+ "max_age": 365,
+ "rotated_logs_size_limit": 10240
}
}
}
diff --git a/spec/tests/QE/test_log_redaction.md b/spec/tests/QE/test_log_redaction.md
new file mode 100644
index 00000000..d71b9a25
--- /dev/null
+++ b/spec/tests/QE/test_log_redaction.md
@@ -0,0 +1,16 @@
+# Log Redaction Tests
+
+## test_log_redaction_partial
+
+Test that Sync Gateway properly redacts sensitive data in logs (NEGATIVE TEST).
+
+This test verifies that NO document IDs or usernames appear in logs WITHOUT `...` tags when log redaction is enabled at the "partial" level.
+
+**Prerequisites**: Sync Gateway bootstrap.json must be configured with `redaction_level: "partial"` in the logging section.
+
+1. Create bucket and default collection
+2. Configure Sync Gateway with log redaction enabled
+3. Create user 'autotest' with access to channels
+4. Create 10 docs via Sync Gateway with xattrs
+5. Verify docs were created
+6. Fetch and scan SG logs for redaction violations
diff --git a/tests/QE/test_log_redaction.py b/tests/QE/test_log_redaction.py
new file mode 100644
index 00000000..0aa2cfcf
--- /dev/null
+++ b/tests/QE/test_log_redaction.py
@@ -0,0 +1,96 @@
+import os
+from pathlib import Path
+
+import pytest
+from cbltest import CBLPyTest
+from cbltest.api.cbltestclass import CBLTestClass
+from cbltest.api.syncgateway import (
+ DocumentUpdateEntry,
+ PutDatabasePayload,
+ scan_logs_for_untagged_sensitive_data,
+)
+
+
+@pytest.mark.sgw
+@pytest.mark.min_test_servers(0)
+@pytest.mark.min_sync_gateways(1)
+@pytest.mark.min_couchbase_servers(1)
+class TestLogRedaction(CBLTestClass):
+ @pytest.mark.asyncio(loop_scope="session")
+ async def test_log_redaction_partial(
+ self, cblpytest: CBLPyTest, dataset_path: Path
+ ) -> None:
+ sg = cblpytest.sync_gateways[0]
+ cbs = cblpytest.couchbase_servers[0]
+ num_docs = 10
+ sg_db = "db"
+ bucket_name = "data-bucket"
+ channels = ["log-redaction"]
+ username = "vipul"
+ password = "password"
+ ssh_key_path = os.environ.get(
+ "SSH_KEY_PATH", os.path.expanduser("~/.ssh/jborden.pem")
+ )
+
+ self.mark_test_step("Create bucket and default collection")
+ cbs.drop_bucket(bucket_name)
+ cbs.create_bucket(bucket_name)
+
+ self.mark_test_step("Configure Sync Gateway with log redaction enabled")
+ db_config = {
+ "bucket": bucket_name,
+ "index": {"num_replicas": 0},
+ "scopes": {"_default": {"collections": {"_default": {}}}},
+ }
+ db_payload = PutDatabasePayload(db_config)
+ if await sg.database_exists(sg_db):
+ await sg.delete_database(sg_db)
+ await sg.put_database(sg_db, db_payload)
+
+ self.mark_test_step(f"Create user '{username}' with access to channels")
+ sg_user = await sg.create_user_client(sg, sg_db, username, password, channels)
+
+ self.mark_test_step(f"Create {num_docs} docs via Sync Gateway")
+ sg_docs: list[DocumentUpdateEntry] = []
+ sg_doc_ids: list[str] = []
+ for i in range(num_docs):
+ doc_id = f"sg_doc_{i}"
+ sg_doc_ids.append(doc_id)
+ sg_docs.append(
+ DocumentUpdateEntry(
+ doc_id,
+ None,
+ body={
+ "type": "test_doc",
+ "index": i,
+ "channels": channels,
+ },
+ )
+ )
+ await sg.update_documents(sg_db, sg_docs, "_default", "_default")
+
+ self.mark_test_step("Verify docs were created (public API)")
+ all_docs = await sg_user.get_all_documents(
+ sg_db, "_default", "_default", use_public_api=True
+ )
+ assert len(all_docs.rows) == num_docs, (
+ f"Expected {num_docs} docs, got {len(all_docs.rows)}"
+ )
+
+ self.mark_test_step("Fetch and scan SG logs for redaction violations")
+ try:
+ log_contents = await sg.fetch_log_file("debug", ssh_key_path)
+ except Exception as e:
+ raise Exception(f"Could not fetch log file: {e}")
+ sensitive_patterns = sg_doc_ids + [username]
+ violations = scan_logs_for_untagged_sensitive_data(
+ log_contents, sensitive_patterns
+ )
+ assert len(violations) == 0, (
+ f"Found {len(violations)} log redaction violations: Showing first 10:\n"
+ + "\n".join(violations[:10])
+ )
+
+ await sg_user.close()
+ await sg.delete_database(sg_db)
+ cbs.drop_bucket(bucket_name)
diff --git a/tests/QE/test_xattrs.py b/tests/QE/test_xattrs.py
index 7d5158dc..d03c24c4 100644
--- a/tests/QE/test_xattrs.py
+++ b/tests/QE/test_xattrs.py
@@ -23,6 +23,8 @@ async def test_offline_processing_of_external_updates(
sg = cblpytest.sync_gateways[0]
cbs = cblpytest.couchbase_servers[0]
num_docs = 1000
+ username = "vipul"
+ password = "pass"
sg_db = "db"
bucket_name = "data-bucket"
@@ -45,14 +47,11 @@ async def test_offline_processing_of_external_updates(
await sg.delete_database(sg_db)
await sg.put_database(sg_db, db_payload)
- self.mark_test_step("Create user 'vipul' with access to SG and SDK channels")
- await sg.add_user(
- sg_db,
- "vipul",
- password="pass",
- collection_access={
- "_default": {"_default": {"admin_channels": ["SG", "SDK", "*"]}}
- },
+ self.mark_test_step(
+ f"Create user {username} with access to SG and SDK channels"
+ )
+ sg_user = await sg.create_user_client(
+ sg, sg_db, username, password, ["SG", "SDK"]
)
self.mark_test_step(f"Bulk create {num_docs} docs via Sync Gateway")
@@ -87,7 +86,9 @@ async def test_offline_processing_of_external_updates(
self.mark_test_step(
"Verify all SG docs were created successfully and store revisions, versions"
)
- sg_all_docs = await sg.get_all_documents(sg_db, "_default", "_default")
+ sg_all_docs = await sg_user.get_all_documents(
+ sg_db, "_default", "_default", use_public_api=True
+ )
sg_created_count = len(
[doc for doc in sg_all_docs.rows if doc.id.startswith("sg_")]
)
@@ -130,13 +131,8 @@ async def test_offline_processing_of_external_updates(
self.mark_test_step("Restart Sync Gateway (recreate database endpoint)")
await sg.put_database(sg_db, db_payload)
- await sg.add_user(
- sg_db,
- "seth",
- password="pass",
- collection_access={
- "_default": {"_default": {"admin_channels": ["SG", "SDK", "*"]}}
- },
+ sg_user = await sg.create_user_client(
+ sg, sg_db, username, password, ["SG", "SDK"]
)
self.mark_test_step("Verify revisions, versions and contents of all documents")
@@ -178,6 +174,7 @@ async def test_offline_processing_of_external_updates(
f"{len(content_errors)} documents didn't have correct content: {content_errors}"
)
+ await sg_user.close()
await sg.delete_database(sg_db)
cbs.drop_bucket(bucket_name)
@@ -186,6 +183,8 @@ async def test_purge(self, cblpytest: CBLPyTest, dataset_path: Path) -> None:
sg = cblpytest.sync_gateways[0]
cbs = cblpytest.couchbase_servers[0]
num_docs = 1000
+ username = "vipul"
+ password = "pass"
sg_db = "db"
bucket_name = "data-bucket"
channels = ["NASA"]
@@ -207,13 +206,8 @@ async def test_purge(self, cblpytest: CBLPyTest, dataset_path: Path) -> None:
await sg.delete_database(sg_db)
await sg.put_database(sg_db, db_payload)
- self.mark_test_step("Create user 'vipul' with access to channels")
- await sg.add_user(
- sg_db,
- "vipul",
- password="pass",
- collection_access={"_default": {"_default": {"admin_channels": channels}}},
- )
+ self.mark_test_step(f"Create user {username} with access to channels")
+ sg_user = await sg.create_user_client(sg, sg_db, username, password, channels)
self.mark_test_step(f"Bulk create {num_docs} docs via Sync Gateway")
sg_docs: list[DocumentUpdateEntry] = []
@@ -257,7 +251,9 @@ async def test_purge(self, cblpytest: CBLPyTest, dataset_path: Path) -> None:
all_doc_ids = sg_doc_ids + sdk_doc_ids
self.mark_test_step("Get all docs via Sync Gateway and save revisions")
- sg_all_docs = await sg.get_all_documents(sg_db, "_default", "_default")
+ sg_all_docs = await sg_user.get_all_documents(
+ sg_db, "_default", "_default", use_public_api=True
+ )
assert len(sg_all_docs.rows) == num_docs * 2, (
f"Expected {num_docs * 2} docs via SG, got {len(sg_all_docs.rows)}"
)
@@ -346,7 +342,9 @@ async def test_purge(self, cblpytest: CBLPyTest, dataset_path: Path) -> None:
await sg.purge_document(doc_id, sg_db, "_default", "_default")
self.mark_test_step("Verify SG can't see any docs after purge")
- sg_docs_after_purge = await sg.get_all_documents(sg_db, "_default", "_default")
+ sg_docs_after_purge = await sg_user.get_all_documents(
+ sg_db, "_default", "_default", use_public_api=True
+ )
assert len(sg_docs_after_purge.rows) == 0, (
f"Expected 0 docs after purge, got {len(sg_docs_after_purge.rows)}"
)
@@ -372,6 +370,7 @@ async def test_purge(self, cblpytest: CBLPyTest, dataset_path: Path) -> None:
f"Expected 0 docs visible via SDK after purge, got {sdk_visible_after_purge}"
)
+ await sg_user.close()
await sg.delete_database(sg_db)
cbs.drop_bucket(bucket_name)
@@ -407,7 +406,7 @@ async def test_sg_sdk_interop_unique_docs(
self.mark_test_step(
f"Create user '{username}' with access to SDK and SG channels"
)
- sg_user = await SyncGateway.create_user_client(
+ sg_user = await sg.create_user_client(
sg, sg_db, username, password, ["sdk", "sg"]
)
@@ -545,6 +544,7 @@ async def test_sg_sdk_interop_unique_docs(
f"Expected {num_docs * 2} docs to be deleted via SG, got {sg_deleted_count}"
)
+ await sg_user.close()
await sg.delete_database(sg_db)
cbs.drop_bucket(bucket_name)
@@ -577,9 +577,7 @@ async def test_sg_sdk_interop_shared_docs(
await sg.put_database(sg_db, db_payload)
self.mark_test_step(f"Create user '{username}' with access to shared channel")
- sg_user = await SyncGateway.create_user_client(
- sg, sg_db, username, password, ["shared"]
- )
+ sg_user = await sg.create_user_client(sg, sg_db, username, password, ["shared"])
self.mark_test_step(
f"Bulk create {num_docs} docs via SDK with tracking properties"
@@ -807,6 +805,7 @@ def delete_from_sdk() -> int:
f"Expected {num_docs * 2} docs deleted via SG, got {sg_deleted_count}"
)
+ await sg_user.close()
await sg.delete_database(sg_db)
cbs.drop_bucket(bucket_name)
@@ -865,10 +864,10 @@ async def test_sync_xattrs_update_concurrently(
self.mark_test_step(
f"Create users '{username1}', '{username2}' with access to '{sg_channel1}', '{sg_channel2}'"
)
- sg_user1 = await SyncGateway.create_user_client(
+ sg_user1 = await sg.create_user_client(
sg, sg_db, username1, password, [sg_channel1]
)
- sg_user2 = await SyncGateway.create_user_client(
+ sg_user2 = await sg.create_user_client(
sg, sg_db, username2, password, [sg_channel2]
)
@@ -982,5 +981,7 @@ async def query_as_user2() -> None:
f"got {user1_count_after}"
)
+ await sg_user1.close()
+ await sg_user2.close()
await sg.delete_database(sg_db)
cbs.drop_bucket(bucket_name)