Skip to content

Commit f44b68b

Browse files
committed
Reapplying this branch on top of latest main as they share a functionality which was merged in the previous PR
1 parent 0f35bdd commit f44b68b

File tree

3 files changed

+215
-0
lines changed

3 files changed

+215
-0
lines changed

client/src/cbltest/api/syncgateway.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1+
import re
12
import ssl
23
from abc import ABC, abstractmethod
34
from json import dumps, loads
45
from pathlib import Path
56
from typing import Any, cast
67
from urllib.parse import urljoin
78

9+
import paramiko
810
import requests
911
from aiohttp import BasicAuth, ClientSession, TCPConnector
1012
from deprecated import deprecated
@@ -1112,6 +1114,15 @@ async def _replaced_revid(
11121114
assert revid == response_dict["_cv"] or revid == response_dict["_rev"]
11131115
return cast(dict, response)["_rev"]
11141116

1117+
async def get_server_config(self) -> dict[str, Any]:
1118+
"""
1119+
Gets the server-level configuration from the admin API.
1120+
1121+
Returns:
1122+
Dictionary containing the server configuration including logging settings
1123+
"""
1124+
return await self._send_request("GET", "/_config")
1125+
11151126
async def delete_document(
11161127
self,
11171128
doc_id: str,
@@ -1290,3 +1301,88 @@ async def get_document_revision_public(
12901301
self.__secure, scheme, self.__hostname, 4984, auth
12911302
) as session:
12921303
return await self._send_request("GET", path, params=params, session=session)
1304+
1305+
def fetch_log_file(
1306+
self,
1307+
remote_log_path: str,
1308+
ssh_key_path: str,
1309+
ssh_username: str = "ec2-user",
1310+
) -> str:
1311+
"""
1312+
Fetches a log file from the remote Sync Gateway server via SSH
1313+
1314+
:param remote_log_path: Path to the log file on the remote server
1315+
:param ssh_key_path: Path to SSH private key for authentication
1316+
:param ssh_username: SSH username (default: ec2-user)
1317+
:return: Contents of the log file as a string
1318+
"""
1319+
with self.__tracer.start_as_current_span(
1320+
"fetch_log_file",
1321+
attributes={
1322+
"remote.path": remote_log_path,
1323+
"ssh.username": ssh_username,
1324+
},
1325+
):
1326+
ssh = paramiko.SSHClient()
1327+
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
1328+
1329+
# Load private key
1330+
private_key = paramiko.Ed25519Key.from_private_key_file(ssh_key_path)
1331+
1332+
# Connect to the remote server
1333+
ssh.connect(
1334+
self.__hostname,
1335+
username=ssh_username,
1336+
pkey=private_key,
1337+
)
1338+
1339+
# Read the log file
1340+
sftp = ssh.open_sftp()
1341+
try:
1342+
with sftp.open(remote_log_path, "r") as remote_file:
1343+
log_contents = remote_file.read().decode("utf-8")
1344+
finally:
1345+
sftp.close()
1346+
ssh.close()
1347+
1348+
return log_contents
1349+
1350+
1351+
def scan_logs_for_untagged_sensitive_data(
1352+
log_content: str,
1353+
sensitive_patterns: list[str],
1354+
) -> list[str]:
1355+
"""
1356+
Scans log content for sensitive data that is NOT wrapped in <ud>...</ud> tags
1357+
1358+
:param log_content: The log file content as a string
1359+
:param sensitive_patterns: List of sensitive strings to look for (e.g., doc IDs, usernames)
1360+
:return: List of violations found (sensitive data without <ud> tags)
1361+
"""
1362+
violations = []
1363+
for pattern in sensitive_patterns:
1364+
# Escape special regex characters in the pattern
1365+
escaped_pattern = re.escape(pattern)
1366+
for match in re.finditer(escaped_pattern, log_content):
1367+
start_pos = match.start()
1368+
end_pos = match.end()
1369+
1370+
# Check if this occurrence is within <ud>...</ud> tags
1371+
# Look backwards for <ud> and forwards for </ud>
1372+
before_text = log_content[max(0, start_pos - 100) : start_pos]
1373+
after_text = log_content[end_pos : min(len(log_content), end_pos + 100)]
1374+
1375+
# Check if there's an opening <ud> before and closing </ud> after
1376+
has_opening_tag = "<ud>" in before_text and before_text.rfind(
1377+
"<ud>"
1378+
) > before_text.rfind("</ud>")
1379+
has_closing_tag = "</ud>" in after_text
1380+
1381+
if not (has_opening_tag and has_closing_tag):
1382+
context_start = max(0, start_pos - 50)
1383+
context_end = min(len(log_content), end_pos + 50)
1384+
context = log_content[context_start:context_end]
1385+
violations.append(
1386+
f"Untagged '{pattern}' at position {start_pos}: ...{context}..."
1387+
)
1388+
return violations
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Log Redaction Tests
2+
3+
## test_log_redaction_partial
4+
5+
Test that Sync Gateway properly redacts sensitive data in logs (NEGATIVE TEST).
6+
7+
This test verifies that NO document IDs or usernames appear in logs WITHOUT `<ud>...</ud>` tags when log redaction is enabled at the "partial" level.
8+
9+
**Prerequisites**: Sync Gateway bootstrap.json must be configured with `redaction_level: "partial"` in the logging section.
10+
11+
1. Create bucket and default collection
12+
2. Configure Sync Gateway with log redaction enabled
13+
3. Create user 'autotest' with access to channels
14+
4. Create 10 docs via Sync Gateway with xattrs
15+
5. Verify docs were created
16+
6. Fetch and scan SG logs for redaction violations

tests/QE/test_log_redaction.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import os
2+
from pathlib import Path
3+
4+
import pytest
5+
from cbltest import CBLPyTest
6+
from cbltest.api.cbltestclass import CBLTestClass
7+
from cbltest.api.syncgateway import (
8+
DocumentUpdateEntry,
9+
PutDatabasePayload,
10+
scan_logs_for_untagged_sensitive_data,
11+
)
12+
13+
14+
@pytest.mark.sgw
15+
@pytest.mark.min_test_servers(0)
16+
@pytest.mark.min_sync_gateways(1)
17+
@pytest.mark.min_couchbase_servers(1)
18+
class TestLogRedaction(CBLTestClass):
19+
@pytest.mark.asyncio(loop_scope="session")
20+
async def test_log_redaction_partial(
21+
self, cblpytest: CBLPyTest, dataset_path: Path
22+
) -> None:
23+
sg = cblpytest.sync_gateways[0]
24+
cbs = cblpytest.couchbase_servers[0]
25+
num_docs = 10
26+
sg_db = "db"
27+
bucket_name = "data-bucket"
28+
channels = ["log-redaction"]
29+
username = "vipul"
30+
password = "password"
31+
ssh_key_path = os.environ.get(
32+
"SSH_KEY_PATH", os.path.expanduser("~/.ssh/jborden.pem")
33+
)
34+
35+
self.mark_test_step("Create bucket and default collection")
36+
cbs.drop_bucket(bucket_name)
37+
cbs.create_bucket(bucket_name)
38+
39+
self.mark_test_step("Configure Sync Gateway with log redaction enabled")
40+
db_config = {
41+
"bucket": bucket_name,
42+
"index": {"num_replicas": 0},
43+
"scopes": {"_default": {"collections": {"_default": {}}}},
44+
}
45+
db_payload = PutDatabasePayload(db_config)
46+
if await sg.database_exists(sg_db):
47+
await sg.delete_database(sg_db)
48+
await sg.put_database(sg_db, db_payload)
49+
50+
self.mark_test_step(f"Create user '{username}' with access to channels")
51+
await sg.add_user(
52+
sg_db,
53+
username,
54+
password=password,
55+
collection_access={"_default": {"_default": {"admin_channels": channels}}},
56+
)
57+
58+
self.mark_test_step(f"Create {num_docs} docs via Sync Gateway")
59+
sg_docs: list[DocumentUpdateEntry] = []
60+
sg_doc_ids: list[str] = []
61+
for i in range(num_docs):
62+
doc_id = f"sg_doc_{i}"
63+
sg_doc_ids.append(doc_id)
64+
sg_docs.append(
65+
DocumentUpdateEntry(
66+
doc_id,
67+
None,
68+
body={
69+
"type": "test_doc",
70+
"index": i,
71+
"channels": channels,
72+
},
73+
)
74+
)
75+
await sg.update_documents(sg_db, sg_docs, "_default", "_default")
76+
77+
self.mark_test_step("Verify docs were created")
78+
all_docs = await sg.get_all_documents(sg_db, "_default", "_default")
79+
assert len(all_docs.rows) == num_docs, (
80+
f"Expected {num_docs} docs, got {len(all_docs.rows)}"
81+
)
82+
83+
self.mark_test_step("Fetch and scan SG logs for redaction violations")
84+
server_config = await sg.get_server_config()
85+
log_dir = server_config.get("logging", {}).get(
86+
"log_file_path", "/home/ec2-user/log"
87+
)
88+
remote_log_path = f"{log_dir}/sg_debug.log"
89+
try:
90+
log_contents = sg.fetch_log_file(remote_log_path, ssh_key_path)
91+
except Exception as e:
92+
raise Exception(f"Could not fetch log file: {e}")
93+
sensitive_patterns = sg_doc_ids + [username]
94+
violations = scan_logs_for_untagged_sensitive_data(
95+
log_contents, sensitive_patterns
96+
)
97+
assert len(violations) == 0, (
98+
f"Found {len(violations)} log redaction violations: Showing first 10:\n"
99+
+ "\n".join(violations[:10])
100+
)
101+
102+
await sg.delete_database(sg_db)
103+
cbs.drop_bucket(bucket_name)

0 commit comments

Comments
 (0)