Skip to content

Commit 5cf0523

Browse files
committed
Redact secrets during the workflow file upload
Reduces the risk of secrets being uploaded to object storage.
1 parent a484225 commit 5cf0523

File tree

8 files changed

+94
-54
lines changed

8 files changed

+94
-54
lines changed

src/lib/utils/BUILD

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,3 +182,8 @@ osmo_py_library(
182182
":osmo_errors",
183183
],
184184
)
185+
186+
osmo_py_library(
187+
name = "redact",
188+
srcs = ["redact.py"],
189+
)

src/lib/utils/redact.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
"""
2+
SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
16+
SPDX-License-Identifier: Apache-2.0
17+
"""
18+
import base64
19+
import re
20+
from typing import Generator, Iterable
21+
22+
23+
# Regex to match secrets in the spec. While this is not a perfect solution, it solves the majority
24+
# of cases. Regex from: https://lookingatcomputer.substack.com/p/regex-is-almost-all-you-need
25+
# Proper secret management:
26+
# https://nvidia.github.io/OSMO/main/user_guide/getting_started/credentials.html
27+
SECRET_REDACTION_RE = re.compile(
28+
r'''(?i)[\w.-]{0,50}?(?:access|auth|(?-i:[Aa]pi|API)|credential|creds|key|passw(?:or)?d|secret|token)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([\w.=-]{10,150}|[a-z0-9][a-z0-9+/]{11,}={0,3})(?:[\x60'"\s;]|\\[nr]|$)''' # pylint: disable=line-too-long
29+
)
30+
31+
# Matches base64-encoded fragments: at least 16 chars of base64 alphabet with optional padding,
32+
# not adjacent to other base64 characters (to capture complete tokens).
33+
_BASE64_FRAGMENT_RE = re.compile(
34+
r'(?<![A-Za-z0-9+/])[A-Za-z0-9+/]{16,}={0,2}(?![A-Za-z0-9+/=])'
35+
)
36+
37+
38+
def redact_secrets(lines: Iterable[str]) -> Generator[str, None, None]:
39+
"""
40+
Yield lines with secrets redacted.
41+
42+
Scans each line for key=value patterns that look like secrets and replaces
43+
the value with [MASKED]. Also detects base64-encoded fragments, decodes them,
44+
and replaces the whole fragment with [MASKED] if secrets are found inside.
45+
"""
46+
def redact_base64_fragments(line: str) -> str:
47+
"""
48+
Find base64-encoded fragments in a line, decode them, redact any secrets found inside,
49+
and replace the whole fragment with [MASKED].
50+
"""
51+
def replace_if_secrets(m: re.Match) -> str:
52+
fragment = m.group(0)
53+
try:
54+
padded = fragment + '=' * (-len(fragment) % 4)
55+
decoded = base64.b64decode(padded, validate=True).decode('utf-8')
56+
except (ValueError, UnicodeDecodeError):
57+
return fragment
58+
redacted = SECRET_REDACTION_RE.sub(
59+
lambda sm: sm.group(0).replace(sm.group(1), '[MASKED]'),
60+
decoded,
61+
)
62+
if redacted == decoded:
63+
return fragment
64+
return '[MASKED]'
65+
return _BASE64_FRAGMENT_RE.sub(replace_if_secrets, line)
66+
67+
for line in lines:
68+
line = redact_base64_fragments(line)
69+
yield SECRET_REDACTION_RE.sub(
70+
lambda m: m.group(0).replace(m.group(1), '[MASKED]'), line)

src/lib/utils/tests/BUILD

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,11 @@ osmo_py_test(
4242
"//src/lib/utils:jinja_sandbox",
4343
]
4444
)
45+
46+
osmo_py_test(
47+
name = "test_redact_secrets",
48+
srcs = ["test_redact_secrets.py"],
49+
deps = [
50+
"//src/lib/utils:redact",
51+
],
52+
)

src/service/core/workflow/tests/test_redact_secrets.py renamed to src/lib/utils/tests/test_redact_secrets.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import textwrap
2020
import unittest
2121

22-
from src.service.core.workflow.workflow_service import redact_secrets
22+
from src.lib.utils.redact import redact_secrets
2323

2424

2525
# The AWS keys used below are the well-known example credentials from the AWS documentation

src/service/core/workflow/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ osmo_py_library(
3939
"//src/lib/utils:login",
4040
"//src/lib/utils:priority",
4141
"//src/lib/utils:osmo_errors",
42+
"//src/lib/utils:redact",
4243
"//src/utils:static_config",
4344
"//src/utils/job:job",
4445
"//src/utils:yaml",

src/service/core/workflow/objects.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from src.lib.data import storage
2828
from src.lib.data.storage.credentials import credentials as data_credentials
2929
from src.lib.utils import credentials, common, osmo_errors, priority as wf_priority
30+
from src.lib.utils.redact import redact_secrets
3031
import src.lib.utils.logging
3132
from src.utils.job import app, common as task_common, jobs, kb_objects, task, workflow
3233
from src.utils import connectors, static_config, yaml as util_yaml
@@ -902,10 +903,16 @@ def convert_task_file_contents(curr_task_spec: Dict):
902903
convert_task_file_contents(task_spec)
903904

904905
workflow_spec = yaml.dump(workflow_dict, default_flow_style=False, allow_unicode=True)
906+
907+
# Redact secrets in the workflow spec
908+
workflow_spec = next(redact_secrets(list(workflow_spec)))
909+
905910
files = [
906911
jobs.File(path=common.WORKFLOW_SPEC_FILE_NAME, content=workflow_spec)
907912
]
908913
if original_templated_spec is not None:
914+
# Redact secrets in the original templated spec
915+
original_templated_spec = next(redact_secrets(list(original_templated_spec)))
909916
files.append(jobs.File(
910917
path=common.TEMPLATED_WORKFLOW_SPEC_FILE_NAME,
911918
content=original_templated_spec))

src/service/core/workflow/tests/BUILD

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,3 @@ py_test(
4646
],
4747
)
4848

49-
py_test(
50-
name = "test_redact_secrets",
51-
srcs = ["test_redact_secrets.py"],
52-
deps = [
53-
"//src/service/core/workflow",
54-
],
55-
)
56-

src/service/core/workflow/workflow_service.py

Lines changed: 2 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
SPDX-License-Identifier: Apache-2.0
1818
"""
1919

20-
import base64
2120
import collections
2221
import dataclasses
2322
import datetime
@@ -26,7 +25,7 @@
2625
import json
2726
import logging
2827
import re
29-
from typing import Any, AsyncGenerator, Dict, Generator, Iterable, List, Optional
28+
from typing import Any, AsyncGenerator, Dict, List, Optional
3029
import urllib.parse
3130
import yaml
3231

@@ -36,6 +35,7 @@
3635

3736
from src.lib.data import storage
3837
from src.lib.utils import common, credentials, login, osmo_errors, priority as wf_priority
38+
from src.lib.utils.redact import redact_secrets
3939
from src.utils.job import common as job_common, jobs, workflow, task
4040
from src.service.core.workflow import helpers, objects
4141
from src.utils import connectors
@@ -49,49 +49,6 @@
4949

5050
FETCH_TASK_LIMIT = 1000
5151

52-
# Regex to match secrets in the spec. While this is not a perfect solution, it solves the majority
53-
# of cases.
54-
# Regex from: https://lookingatcomputer.substack.com/p/regex-is-almost-all-you-need
55-
# Proper secret management:
56-
# https://nvidia.github.io/OSMO/main/user_guide/getting_started/credentials.html
57-
SECRET_REDACTION_RE = re.compile(
58-
r'''(?i)[\w.-]{0,50}?(?:access|auth|(?-i:[Aa]pi|API)|credential|creds|key|passw(?:or)?d|secret|token)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([\w.=-]{10,150}|[a-z0-9][a-z0-9+/]{11,}={0,3})(?:[\x60'"\s;]|\\[nr]|$)''' # pylint: disable=line-too-long
59-
)
60-
61-
# Matches base64-encoded fragments: at least 16 chars of base64 alphabet with optional padding,
62-
# not adjacent to other base64 characters (to capture complete tokens).
63-
_BASE64_FRAGMENT_RE = re.compile(r'(?<![A-Za-z0-9+/])[A-Za-z0-9+/]{16,}={0,2}(?![A-Za-z0-9+/=])')
64-
65-
66-
def redact_secrets(lines: Iterable[str]) -> Generator[str, None, None]:
67-
""" Yield lines with secrets in the spec redacted. """
68-
def redact_base64_fragments(line: str) -> str:
69-
"""
70-
Find base64-encoded fragments in a line, decode them, redact any secrets found inside,
71-
and replace the whole fragment with [MASKED].
72-
"""
73-
def replace_if_secrets(m: re.Match) -> str:
74-
fragment = m.group(0)
75-
try:
76-
padded = fragment + '=' * (-len(fragment) % 4)
77-
decoded = base64.b64decode(padded, validate=True).decode('utf-8')
78-
except (ValueError, UnicodeDecodeError):
79-
return fragment
80-
redacted = SECRET_REDACTION_RE.sub(
81-
lambda sm: sm.group(0).replace(sm.group(1), '[MASKED]'),
82-
decoded,
83-
)
84-
if redacted == decoded:
85-
return fragment
86-
return '[MASKED]'
87-
return _BASE64_FRAGMENT_RE.sub(replace_if_secrets, line)
88-
89-
for line in lines:
90-
line = redact_base64_fragments(line)
91-
yield SECRET_REDACTION_RE.sub(
92-
lambda m: m.group(0).replace(m.group(1), '[MASKED]'), line)
93-
94-
9552
class ActionType(enum.Enum):
9653
EXEC = 'exec'
9754
PORTFORWARD = 'portforward'

0 commit comments

Comments
 (0)