Skip to content

Commit 79e8e99

Browse files
authored
Merge pull request #701 from Altinity/24.8_leak_check_2
24.8 Scan files for secrets in _upload_file_to_s3
2 parents c7b35ab + 79ce999 commit 79e8e99

File tree

2 files changed

+49
-4
lines changed

2 files changed

+49
-4
lines changed

.github/workflows/release_branches.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -538,8 +538,8 @@ jobs:
538538
##################################### REGRESSION TESTS ######################################
539539
#############################################################################################
540540
RegressionTestsRelease:
541-
needs: [BuilderDebRelease]
542-
if: ${{ !failure() && !cancelled() }}
541+
needs: [RunConfig, BuilderDebRelease]
542+
if: ${{ !failure() && !cancelled() && !contains(fromJson(needs.RunConfig.outputs.data).ci_settings.exclude_keywords, 'regression') }}
543543
uses: ./.github/workflows/regression.yml
544544
secrets: inherit
545545
with:
@@ -549,8 +549,8 @@ jobs:
549549
build_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
550550
timeout_minutes: 300
551551
RegressionTestsAarch64:
552-
needs: [BuilderDebAarch64]
553-
if: ${{ !failure() && !cancelled() }}
552+
needs: [RunConfig, BuilderDebAarch64]
553+
if: ${{ !failure() && !cancelled() && !contains(fromJson(needs.RunConfig.outputs.data).ci_settings.exclude_keywords, 'regression') && !contains(fromJson(needs.RunConfig.outputs.data).ci_settings.exclude_keywords, 'aarch64')}}
554554
uses: ./.github/workflows/regression.yml
555555
secrets: inherit
556556
with:

tests/ci/s3_helper.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from multiprocessing.dummy import Pool
77
from pathlib import Path
88
from typing import Any, List, Union
9+
import os
910

1011
import boto3 # type: ignore
1112
import botocore # type: ignore
@@ -19,6 +20,42 @@
1920
S3_URL,
2021
)
2122

23+
sensitive_var_pattern = re.compile(
24+
r"\b[A-Z_]*(?<!WRONG_)(SECRET|PASSWORD|ACCESS_KEY|TOKEN)[A-Z_]*\b(?!%)(?!=clickhouse$)(?!=minio)(?!: \*{3}$)(?! '\[HIDDEN\]')"
25+
)
26+
sensitive_strings = {
27+
var: value for var, value in os.environ.items() if sensitive_var_pattern.match(var)
28+
}
29+
30+
31+
def scan_file_for_sensitive_data(file_content, file_name):
32+
"""
33+
Scan the content of a file for sensitive strings.
34+
Raises ValueError if any sensitive values are found.
35+
"""
36+
37+
def clean_line(line):
38+
for name, value in sensitive_strings.items():
39+
line = line.replace(value, f"SECRET[{name}]")
40+
return line
41+
42+
matches = []
43+
for line_number, line in enumerate(file_content.splitlines(), start=1):
44+
for match in sensitive_var_pattern.finditer(line):
45+
matches.append((file_name, line_number, clean_line(line)))
46+
for name, value in sensitive_strings.items():
47+
if value in line:
48+
matches.append((file_name, line_number, clean_line(line)))
49+
50+
if not matches:
51+
return
52+
53+
logging.error(f"Sensitive values found in {file_name}")
54+
for file_name, line_number, match in matches:
55+
logging.error(f"{file_name}:{line_number}: {match}")
56+
57+
raise ValueError(f"Sensitive values found in {file_name}")
58+
2259

2360
def _flatten_list(lst):
2461
result = []
@@ -45,6 +82,14 @@ def __init__(self, client: Any = None, endpoint: str = S3_URL):
4582
def _upload_file_to_s3(
4683
self, bucket_name: str, file_path: Path, s3_path: str
4784
) -> str:
85+
logging.debug("Checking %s for sensitive values", file_path)
86+
try:
87+
file_content = file_path.read_text(encoding="utf-8")
88+
except UnicodeDecodeError:
89+
logging.warning("Failed to read file %s, unknown encoding", file_path)
90+
else:
91+
scan_file_for_sensitive_data(file_content, file_path.name)
92+
4893
logging.debug(
4994
"Start uploading %s to bucket=%s path=%s", file_path, bucket_name, s3_path
5095
)

0 commit comments

Comments
 (0)