Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions mlos_bench/mlos_bench/services/config_persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,11 +191,11 @@
if any(c in json for c in ("{", "[")):
# If the path contains braces, it is likely already a json string,
# so just parse it.
_LOG.info("Load config from json string: %s", json)
_LOG.info("Load config from json string: %s", sanitize_config(json))
try:
config: Any = json5.loads(json)
except ValueError as ex:
_LOG.error("Failed to parse config from JSON string: %s", json)
_LOG.error("Failed to parse config from JSON string: %s", sanitize_config(json))
raise ValueError(f"Failed to parse config from JSON string: {json}") from ex
else:
json = self.resolve_path(json)
Expand Down Expand Up @@ -225,7 +225,7 @@
# (e.g. Azure ARM templates).
del config["$schema"]
else:
_LOG.warning("Config %s is not validated against a schema.", json)
_LOG.warning("Config %s is not validated against a schema.", sanitize_config(json))
return config # type: ignore[no-any-return]

def prepare_class_load(
Expand Down Expand Up @@ -707,7 +707,9 @@
--------
mlos_bench.services : Examples of service configurations.
"""
_LOG.info("Load services: %s parent: %s", jsons, parent.__class__.__name__)
_LOG.info(
"Load services: %s parent: %s", sanitize_config(jsons), parent.__class__.__name__
)
service = Service({}, global_config, parent)
for json in jsons:
config = self.load_config(json, ConfigSchema.SERVICE)
Expand Down Expand Up @@ -743,7 +745,7 @@
--------
mlos_bench.tunables : Examples of tunable parameter configurations.
"""
_LOG.info("Load tunables: '%s'", jsons)
_LOG.info("Load tunables: '%s'", sanitize_config(jsons))
if parent is None:
parent = TunableGroups()
tunables = parent.copy()
Expand Down
45 changes: 45 additions & 0 deletions mlos_bench/mlos_bench/tests/test_sanitize_confs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def test_sanitize_config_simple() -> None:
"other": 42,
}
sanitized = sanitize_config(config)
assert isinstance(sanitized, dict)
assert sanitized["username"] == "user1"
assert sanitized["password"] == "[REDACTED]"
assert sanitized["token"] == "[REDACTED]"
Expand All @@ -39,6 +40,7 @@ def test_sanitize_config_nested() -> None:
"api_key": "key",
}
sanitized = sanitize_config(config)
assert isinstance(sanitized, dict)
assert sanitized["outer"]["password"] == "[REDACTED]"
assert sanitized["outer"]["inner"]["token"] == "[REDACTED]"
assert sanitized["outer"]["inner"]["foo"] == "bar"
Expand All @@ -61,7 +63,50 @@ def test_sanitize_config_mixed_types() -> None:
"api_key": {"nested": "val"},
}
sanitized = sanitize_config(config)
assert isinstance(sanitized, dict)
assert sanitized["password"] == "[REDACTED]"
assert sanitized["token"] == "[REDACTED]"
assert sanitized["secret"] == "[REDACTED]"
assert sanitized["api_key"] == "[REDACTED]"


def test_sanitize_config_empty() -> None:
"""Test sanitization of an empty configuration."""
config = {}
sanitized = sanitize_config(config)
assert sanitized == config # Should remain empty dictionary


def test_sanitize_array() -> None:
"""Test sanitization of an array with sensitive keys."""
config = [
{"username": "user1", "password": "pass1"},
{"username": "user2", "password": "pass2"},
]
sanitized = sanitize_config(config)
assert isinstance(sanitized, list)
assert len(sanitized) == 2
assert sanitized[0]["username"] == "user1"
assert sanitized[0]["password"] == "[REDACTED]"
assert sanitized[1]["username"] == "user2"
assert sanitized[1]["password"] == "[REDACTED]"


def test_sanitize_config_with_non_string_values() -> None:
"""Test sanitization with non-string values."""
config = {
"int_value": 42,
"float_value": 3.14,
"bool_value": True,
"none_value": None,
"list_value": [1, "password", 3],
"dict_value": {"key": "value"},
}
sanitized = sanitize_config(config)
assert isinstance(sanitized, dict)
assert sanitized["int_value"] == 42
assert sanitized["float_value"] == 3.14
assert sanitized["bool_value"] is True
assert sanitized["none_value"] is None
assert sanitized["list_value"] == [1, "password", 3] # don't redact raw strings
assert sanitized["dict_value"] == {"key": "value"}
29 changes: 19 additions & 10 deletions mlos_bench/mlos_bench/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ def datetime_parser(
return new_datetime_col


def sanitize_config(config: dict[str, Any]) -> dict[str, Any]:
def sanitize_config(config: dict[str, Any] | list[Any] | Any) -> dict[str, Any] | list[Any] | Any:
"""
Sanitize a configuration dictionary by obfuscating potentially sensitive keys.

Expand All @@ -480,16 +480,25 @@ def sanitize_config(config: dict[str, Any]) -> dict[str, Any]:
"""
sanitize_keys = {"password", "secret", "token", "api_key"}

def recursive_sanitize(conf: dict[str, Any]) -> dict[str, Any]:
def recursive_sanitize(
conf: dict[str, Any] | list[Any] | str,
) -> dict[str, Any] | list[Any] | str:
"""Recursively sanitize a dictionary."""
sanitized = {}
for k, v in conf.items():
if k in sanitize_keys:
sanitized[k] = "[REDACTED]"
elif isinstance(v, dict):
sanitized[k] = recursive_sanitize(v) # type: ignore[assignment]
else:
sanitized[k] = v
return sanitized
if isinstance(conf, list):
return [recursive_sanitize(item) for item in conf]
if isinstance(conf, dict):
for k, v in conf.items():
if k in sanitize_keys:
sanitized[k] = "[REDACTED]"
elif isinstance(v, dict):
sanitized[k] = recursive_sanitize(v)
elif isinstance(v, list):
sanitized[k] = [recursive_sanitize(item) for item in v]
else:
sanitized[k] = v
return sanitized
# else, return un altered value (e.g., int, float, str)
return conf

return recursive_sanitize(config)
Loading