Skip to content

Commit 466f4e7

Browse files
committed
Mask sensitive data in CRs and ConfigMaps
must-gather collections can contain sensitive data in Custom Resources (e.g., customServiceConfig in cinderVolumes) and ConfigMaps. The existing mask.py only handled base64-encoded Secrets. This adds PlaintextMask class to recursively mask sensitive data in plain text YAML resources (ConfigMaps and CRs). Updated gather_crs and gather_services_cm to apply masking after collection. Added DO_NOT_MASK env var (default: 0) for CI control. Tests: - Created test_mask_plaintext.py: 7 comprehensive tests covering ConfigMaps, CRs, connection strings, nested structures, multi-line preservation, and edge cases - Updated test_mask.py: Added documentation for expected error messages from edge case tests, skip non-YAML files - Created test sample files and README documentation in tests/samples/ and tests/samples_plaintext/ Example output: Input: customServiceConfig: | [section] hpe3par_password=3parpass Output: customServiceConfig: | [section] hpe3par_password=********** Jira: OSPRH-20621 AssistedBy: cloude-4-sonnet Signed-off-by: Martin Schuppert <[email protected]>
1 parent 2c5a8f3 commit 466f4e7

File tree

12 files changed

+617
-6
lines changed

12 files changed

+617
-6
lines changed

collection-scripts/gather_crs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ if [[ -z "$DIR_NAME" ]]; then
77
source "${DIR_NAME}/common.sh"
88
fi
99

10+
# This option is used for CI only purposes and
11+
# is disabled by default
12+
DO_NOT_MASK=${DO_NOT_MASK:-0}
13+
1014
# Resource list
1115
crs=()
1216

@@ -38,3 +42,8 @@ for res in "${crs[@]}"; do
3842
done
3943

4044
[[ $CALLED -eq 1 ]] && wait_bg
45+
46+
if [[ "${DO_NOT_MASK}" -eq 0 ]]; then
47+
# All CRs have been collected, apply masking on the CRs directory tree
48+
/usr/bin/mask.py --dir "${BASE_COLLECTION_PATH}/namespaces"
49+
fi

collection-scripts/gather_services_cm

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ if [[ -z "$DIR_NAME" ]]; then
77
source "${DIR_NAME}/common.sh"
88
fi
99

10+
# This option is used for CI only purposes and
11+
# is disabled by default
12+
DO_NOT_MASK=${DO_NOT_MASK:-0}
1013

1114
get_cm() {
1215
local NS="$1"
@@ -42,6 +45,10 @@ if [[ $CALLED -eq 1 ]]; then
4245
fi
4346

4447
gather_services_cm "$NS"
45-
4648
wait_bg
49+
50+
if [[ "${DO_NOT_MASK}" -eq 0 ]]; then
51+
# All configmaps have been collected, apply masking on the configmaps directory
52+
/usr/bin/mask.py --dir "${NAMESPACE_PATH}/${NS}/configmaps"
53+
fi
4754
fi

pyscripts/mask.py

Lines changed: 141 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,11 @@
4444

4545
CONNECTION_KEYS = ["rabbit", "database_connection",
4646
"slave_connection", "sql_connection"]
47+
48+
# Error messages
4749
ERR_STR = "Not a valid string for masking"
4850
ERR_FORMAT = "Required a dict for masking"
51+
4952
# Masking string
5053
MASK_STR = "**********"
5154

@@ -64,6 +67,16 @@
6467
regexes = [gen_regex, con_regex]
6568

6669

70+
# Custom YAML representer to preserve multi-line strings as block scalars
71+
def str_representer(dumper: Any, data: str) -> Any:
72+
if '\n' in data:
73+
return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
74+
return dumper.represent_scalar('tag:yaml.org,2002:str', data)
75+
76+
77+
yaml.add_representer(str, str_representer)
78+
79+
6780
class SecretMask():
6881
"""
6982
Given a path to k8s secret containing sensitive base64 encoded data,
@@ -219,6 +232,127 @@ def _process_data(self, data_map: Any) -> Any:
219232
return d
220233

221234

235+
class PlaintextMask():
236+
"""
237+
Mask sensitive data in plain text YAML files (ConfigMaps and CRs).
238+
Unlike SecretMask, this works on non-base64 encoded data.
239+
"""
240+
241+
def __init__(self, path: Optional[str] = None) -> None:
242+
self.path: Union[str, None] = path
243+
244+
def mask(self) -> bool:
245+
"""
246+
Read a k8s resource (ConfigMap or CR) dumped as yaml and process
247+
recursively to mask any sensitive information.
248+
"""
249+
resource = self._readYaml()
250+
if not resource or len(resource) == 0:
251+
return True
252+
253+
# Recursively mask the entire resource
254+
self._applyMaskRecursive(resource)
255+
256+
# Write the masked file
257+
self._writeYaml(resource)
258+
return True
259+
260+
def _readYaml(self) -> Dict[str, Any]:
261+
"""
262+
Read and Load the k8s resource dumped as yaml file.
263+
"""
264+
try:
265+
assert self.path is not None
266+
with open(self.path, 'r') as f:
267+
resource = yaml.safe_load(f)
268+
return resource if resource else {}
269+
except (FileNotFoundError, yaml.YAMLError) as e:
270+
print(f"Error while reading YAML {self.path}: {e}")
271+
return {}
272+
273+
def _writeYaml(self, resource: Any) -> None:
274+
"""
275+
Re-write the masked resource to the same path.
276+
"""
277+
try:
278+
assert self.path is not None
279+
with open(self.path, 'w') as f:
280+
# Write with settings to preserve readability
281+
yaml.dump(resource, f, default_flow_style=False,
282+
allow_unicode=True, sort_keys=False)
283+
except (IOError, yaml.YAMLError) as e:
284+
print(f"Error while writing the masked file {self.path}: {e}")
285+
286+
def _applyMaskRecursive(self, obj: Any) -> Any:
287+
"""
288+
Recursively traverse the object and mask sensitive data.
289+
Two-step strategy:
290+
1. If key name is sensitive AND value is single-line -> fully mask
291+
2. If multi-line (has newlines) -> parse content with regex
292+
"""
293+
if isinstance(obj, dict):
294+
for key, value in obj.items():
295+
if isinstance(value, str):
296+
# If key name matches sensitive pattern and value is single-line, fully mask
297+
# This catches: password: secret123, transport_url: mysql://..., etc.
298+
if re.search(key_regex, key) and '\n' not in value:
299+
obj[key] = MASK_STR
300+
else:
301+
# Parse content to mask sensitive parts
302+
# This handles: customServiceConfig blocks (multi-line), long configs, etc.
303+
obj[key] = self._applyRegex(value)
304+
elif isinstance(value, (dict, list)):
305+
# Recursively process nested structures
306+
self._applyMaskRecursive(value)
307+
elif isinstance(obj, list):
308+
for i, item in enumerate(obj):
309+
if isinstance(item, str):
310+
obj[i] = self._applyRegex(item)
311+
elif isinstance(item, (dict, list)):
312+
self._applyMaskRecursive(item)
313+
return obj
314+
315+
def _applyRegex(self, text: str) -> str:
316+
"""
317+
Apply regex patterns to mask sensitive information in text.
318+
Handles both single-line and multi-line strings.
319+
"""
320+
for pattern in regexes:
321+
text = re.sub(pattern, r"\1{}".format(MASK_STR), text, flags=re.MULTILINE)
322+
return text
323+
324+
325+
def get_resource_kind(path: str) -> Optional[str]:
326+
"""
327+
Read a YAML file and return its 'kind' field to determine resource type.
328+
Returns None if the file cannot be read or doesn't have a 'kind' field.
329+
"""
330+
try:
331+
with open(path, 'r') as f:
332+
resource = yaml.safe_load(f)
333+
if isinstance(resource, dict):
334+
return resource.get('kind', None)
335+
except (FileNotFoundError, yaml.YAMLError) as e:
336+
print(f"Error while reading YAML to determine kind: {e}")
337+
return None
338+
339+
340+
def mask_resource(path: str, dump_conf: bool = False) -> bool:
341+
"""
342+
Dispatcher function that determines the resource type and applies
343+
the appropriate masking strategy:
344+
- Secrets: Use SecretMask (base64 decode/encode)
345+
- ConfigMaps/CRs/Other: Use PlaintextMask (direct text masking)
346+
"""
347+
kind = get_resource_kind(path)
348+
349+
if kind == "Secret":
350+
return SecretMask(path, dump_conf).mask()
351+
else:
352+
# ConfigMaps, CRs, and any other resource type
353+
return PlaintextMask(path).mask()
354+
355+
222356
def parse_opts(argv: Any) -> Any:
223357
"""
224358
Utility for the main function: it provides a way to parse
@@ -247,7 +381,12 @@ def parse_opts(argv: Any) -> Any:
247381
# argument and process all the files found in
248382
# that directory
249383
for root, subdirs, files in os.walk(OPTS.dir):
250-
[SecretMask(os.path.join(root, f), OPTS.dump_conf).mask() for f in files]
384+
for f in files:
385+
# Skip non-YAML files
386+
if not f.endswith('.yaml') and not f.endswith('.yml'):
387+
continue
388+
file_path = os.path.join(root, f)
389+
mask_resource(file_path, OPTS.dump_conf)
251390

252391
if OPTS.path is not None and os.path.exists(OPTS.path):
253-
SecretMask(OPTS.path, OPTS.dump_conf).mask()
392+
mask_resource(OPTS.path, OPTS.dump_conf)

pyscripts/test_mask.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,15 @@ def test_mask(self):
3030
- Process using the SecreMask module
3131
- assert the 'data' content of the secret is
3232
different
33+
Note: Some test samples (secret4, secret5) intentionally
34+
contain malformed data to test error handling. The error
35+
messages printed during test execution are expected.
3336
"""
3437
for root, subdirs, files in os.walk(SAMPLE_DIR):
3538
for f in files:
39+
# Skip non-YAML files (e.g., README.md)
40+
if not f.endswith('.yaml') and not f.endswith('.yml'):
41+
continue
3642
print("Processing file %s" % f)
3743
actual = self._read_sample(os.path.join(root, f))
3844
# Mask secret by processing the data section

0 commit comments

Comments
 (0)