|
| 1 | +""" |
| 2 | +SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
| 3 | +
|
| 4 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +you may not use this file except in compliance with the License. |
| 6 | +You may obtain a copy of the License at |
| 7 | +
|
| 8 | +http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +
|
| 10 | +Unless required by applicable law or agreed to in writing, software |
| 11 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +See the License for the specific language governing permissions and |
| 14 | +limitations under the License. |
| 15 | +
|
| 16 | +SPDX-License-Identifier: Apache-2.0 |
| 17 | +""" |
| 18 | +import base64 |
| 19 | +import re |
| 20 | +from typing import Generator, Iterable |
| 21 | + |
| 22 | + |
| 23 | +# Regex to match secrets in the spec. While this is not a perfect solution, it solves the majority |
| 24 | +# of cases. Regex from: https://lookingatcomputer.substack.com/p/regex-is-almost-all-you-need |
| 25 | +# Proper secret management: |
| 26 | +# https://nvidia.github.io/OSMO/main/user_guide/getting_started/credentials.html |
| 27 | +SECRET_REDACTION_RE = re.compile( |
| 28 | + r'''(?i)[\w.-]{0,50}?(?:access|auth|(?-i:[Aa]pi|API)|credential|creds|key|passw(?:or)?d|secret|token)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([\w.=-]{10,150}|[a-z0-9][a-z0-9+/]{11,}={0,3})(?:[\x60'"\s;]|\\[nr]|$)''' # pylint: disable=line-too-long |
| 29 | +) |
| 30 | + |
| 31 | +# Matches base64-encoded fragments: at least 16 chars of base64 alphabet with optional padding, |
| 32 | +# not adjacent to other base64 characters (to capture complete tokens). |
| 33 | +_BASE64_FRAGMENT_RE = re.compile( |
| 34 | + r'(?<![A-Za-z0-9+/])[A-Za-z0-9+/]{16,}={0,2}(?![A-Za-z0-9+/=])' |
| 35 | +) |
| 36 | + |
| 37 | + |
| 38 | +def redact_secrets(lines: Iterable[str]) -> Generator[str, None, None]: |
| 39 | + """ |
| 40 | + Yield lines with secrets redacted. |
| 41 | +
|
| 42 | + Scans each line for key=value patterns that look like secrets and replaces |
| 43 | + the value with [MASKED]. Also detects base64-encoded fragments, decodes them, |
| 44 | + and replaces the whole fragment with [MASKED] if secrets are found inside. |
| 45 | + """ |
| 46 | + def redact_base64_fragments(line: str) -> str: |
| 47 | + """ |
| 48 | + Find base64-encoded fragments in a line, decode them, redact any secrets found inside, |
| 49 | + and replace the whole fragment with [MASKED]. |
| 50 | + """ |
| 51 | + def replace_if_secrets(m: re.Match) -> str: |
| 52 | + fragment = m.group(0) |
| 53 | + try: |
| 54 | + padded = fragment + '=' * (-len(fragment) % 4) |
| 55 | + decoded = base64.b64decode(padded, validate=True).decode('utf-8') |
| 56 | + except (ValueError, UnicodeDecodeError): |
| 57 | + return fragment |
| 58 | + redacted = SECRET_REDACTION_RE.sub( |
| 59 | + lambda sm: sm.group(0).replace(sm.group(1), '[MASKED]'), |
| 60 | + decoded, |
| 61 | + ) |
| 62 | + if redacted == decoded: |
| 63 | + return fragment |
| 64 | + return '[MASKED]' |
| 65 | + return _BASE64_FRAGMENT_RE.sub(replace_if_secrets, line) |
| 66 | + |
| 67 | + for line in lines: |
| 68 | + line = redact_base64_fragments(line) |
| 69 | + yield SECRET_REDACTION_RE.sub( |
| 70 | + lambda m: m.group(0).replace(m.group(1), '[MASKED]'), line) |
0 commit comments