Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion client/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ dev = [
"mypy==1.15.0",
"types-Deprecated==1.2.15.20241117",
"types-requests==2.32.0.20241016",
"types-netifaces==0.11.0.20241025"
"types-netifaces==0.11.0.20241025",
"types-paramiko==3.5.0.20250801"
]

[project.urls]
Expand Down
95 changes: 95 additions & 0 deletions client/src/cbltest/api/syncgateway.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import re
import ssl
from abc import ABC, abstractmethod
from json import dumps, loads
from pathlib import Path
from typing import Any, cast
from urllib.parse import urljoin

import paramiko
import requests
from aiohttp import BasicAuth, ClientSession, TCPConnector
from deprecated import deprecated
Expand Down Expand Up @@ -1290,3 +1292,96 @@ async def get_document_revision_public(
self.__secure, scheme, self.__hostname, 4984, auth
) as session:
return await self._send_request("GET", path, params=params, session=session)

async def fetch_log_file(
self,
log_type: str,
ssh_key_path: str,
ssh_username: str = "ec2-user",
) -> str:
"""
Fetches a log file from the remote Sync Gateway server via SSH

:param log_type: The type of log to fetch (e.g., 'debug', 'info', 'error', 'warn')
:param ssh_key_path: Path to SSH private key for authentication
:param ssh_username: SSH username (default: ec2-user)
:return: Contents of the log file as a string
"""
# Get log directory from SG configuration
server_config = await self._send_request("GET", "/_config")
log_dir = server_config.get("logging", {}).get(
"log_file_path", "/home/ec2-user/log"
)
remote_log_path = f"{log_dir}/sg_{log_type}.log"

with self.__tracer.start_as_current_span(
"fetch_log_file",
attributes={
"log.type": log_type,
"remote.path": remote_log_path,
"ssh.username": ssh_username,
},
):
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())

# Load private key
private_key = paramiko.Ed25519Key.from_private_key_file(ssh_key_path)

# Connect to the remote server
ssh.connect(
self.__hostname,
username=ssh_username,
pkey=private_key,
)

# Read the log file
sftp = ssh.open_sftp()
try:
with sftp.open(remote_log_path, "r") as remote_file:
log_contents = remote_file.read().decode("utf-8")
finally:
sftp.close()
ssh.close()

return log_contents


def scan_logs_for_untagged_sensitive_data(
log_content: str,
sensitive_patterns: list[str],
) -> list[str]:
"""
Scans log content for sensitive data that is NOT wrapped in <ud>...</ud> tags

:param log_content: The log file content as a string
:param sensitive_patterns: List of sensitive strings to look for (e.g., doc IDs, usernames)
:return: List of violations found (sensitive data without <ud> tags)
"""
violations = []
for pattern in sensitive_patterns:
# Escape special regex characters in the pattern
escaped_pattern = re.escape(pattern)
for match in re.finditer(escaped_pattern, log_content):
start_pos = match.start()
end_pos = match.end()

# Check if this occurrence is within <ud>...</ud> tags
# Look backwards for <ud> and forwards for </ud>
before_text = log_content[max(0, start_pos - 100) : start_pos]
after_text = log_content[end_pos : min(len(log_content), end_pos + 100)]

# Check if there's an opening <ud> before and closing </ud> after
has_opening_tag = "<ud>" in before_text and before_text.rfind(
"<ud>"
) > before_text.rfind("</ud>")
has_closing_tag = "</ud>" in after_text

if not (has_opening_tag and has_closing_tag):
context_start = max(0, start_pos - 50)
context_end = min(len(log_content), end_pos + 50)
context = log_content[context_start:context_end]
violations.append(
f"Untagged '{pattern}' at position {start_pos}: ...{context}..."
)
return violations
38 changes: 36 additions & 2 deletions environment/aws/sgw_setup/config/bootstrap.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,45 @@
"log_keys": ["*"]
},
"log_file_path": "/home/ec2-user/log",
"redaction_level": "partial",
"debug": {
"enabled": true,
"rotation": {
"max_size": 512,
"rotated_logs_size_limit": 1024
"max_size": 10240,
"max_age": 365,
"rotated_logs_size_limit": 10240
}
},
"info": {
"enabled": true,
"rotation": {
"max_size": 10240,
"max_age": 365,
"rotated_logs_size_limit": 10240
}
},
"warn": {
"enabled": true,
"rotation": {
"max_size": 10240,
"max_age": 365,
"rotated_logs_size_limit": 10240
}
},
"error": {
"enabled": true,
"rotation": {
"max_size": 10240,
"max_age": 365,
"rotated_logs_size_limit": 10240
}
},
"trace": {
"enabled": true,
"rotation": {
"max_size": 10240,
"max_age": 365,
"rotated_logs_size_limit": 10240
}
}
}
Expand Down
16 changes: 16 additions & 0 deletions spec/tests/QE/test_log_redaction.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Log Redaction Tests

## test_log_redaction_partial

Test that Sync Gateway properly redacts sensitive data in logs (NEGATIVE TEST).

This test verifies that NO document IDs or usernames appear in logs WITHOUT `<ud>...</ud>` tags when log redaction is enabled at the "partial" level.

**Prerequisites**: Sync Gateway bootstrap.json must be configured with `redaction_level: "partial"` in the logging section.

1. Create bucket and default collection
2. Configure Sync Gateway with log redaction enabled
3. Create user 'autotest' with access to channels
4. Create 10 docs via Sync Gateway with xattrs
5. Verify docs were created
6. Fetch and scan SG logs for redaction violations
96 changes: 96 additions & 0 deletions tests/QE/test_log_redaction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import os
from pathlib import Path

import pytest
from cbltest import CBLPyTest
from cbltest.api.cbltestclass import CBLTestClass
from cbltest.api.syncgateway import (
DocumentUpdateEntry,
PutDatabasePayload,
scan_logs_for_untagged_sensitive_data,
)


@pytest.mark.sgw
@pytest.mark.min_test_servers(0)
@pytest.mark.min_sync_gateways(1)
@pytest.mark.min_couchbase_servers(1)
class TestLogRedaction(CBLTestClass):
@pytest.mark.asyncio(loop_scope="session")
async def test_log_redaction_partial(
self, cblpytest: CBLPyTest, dataset_path: Path
) -> None:
sg = cblpytest.sync_gateways[0]
cbs = cblpytest.couchbase_servers[0]
num_docs = 10
sg_db = "db"
bucket_name = "data-bucket"
channels = ["log-redaction"]
username = "vipul"
password = "password"
ssh_key_path = os.environ.get(
"SSH_KEY_PATH", os.path.expanduser("~/.ssh/jborden.pem")
)

self.mark_test_step("Create bucket and default collection")
cbs.drop_bucket(bucket_name)
cbs.create_bucket(bucket_name)

self.mark_test_step("Configure Sync Gateway with log redaction enabled")
db_config = {
"bucket": bucket_name,
"index": {"num_replicas": 0},
"scopes": {"_default": {"collections": {"_default": {}}}},
}
db_payload = PutDatabasePayload(db_config)
if await sg.database_exists(sg_db):
await sg.delete_database(sg_db)
await sg.put_database(sg_db, db_payload)

self.mark_test_step(f"Create user '{username}' with access to channels")
sg_user = await sg.create_user_client(sg, sg_db, username, password, channels)

self.mark_test_step(f"Create {num_docs} docs via Sync Gateway")
sg_docs: list[DocumentUpdateEntry] = []
sg_doc_ids: list[str] = []
for i in range(num_docs):
doc_id = f"sg_doc_{i}"
sg_doc_ids.append(doc_id)
sg_docs.append(
DocumentUpdateEntry(
doc_id,
None,
body={
"type": "test_doc",
"index": i,
"channels": channels,
},
)
)
await sg.update_documents(sg_db, sg_docs, "_default", "_default")

self.mark_test_step("Verify docs were created (public API)")
all_docs = await sg_user.get_all_documents(
sg_db, "_default", "_default", use_public_api=True
)
assert len(all_docs.rows) == num_docs, (
f"Expected {num_docs} docs, got {len(all_docs.rows)}"
)

self.mark_test_step("Fetch and scan SG logs for redaction violations")
try:
log_contents = await sg.fetch_log_file("debug", ssh_key_path)
except Exception as e:
raise Exception(f"Could not fetch log file: {e}")
sensitive_patterns = sg_doc_ids + [username]
violations = scan_logs_for_untagged_sensitive_data(
log_contents, sensitive_patterns
)
assert len(violations) == 0, (
f"Found {len(violations)} log redaction violations: Showing first 10:\n"
+ "\n".join(violations[:10])
)

await sg_user.close()
await sg.delete_database(sg_db)
cbs.drop_bucket(bucket_name)
Loading
Loading