Skip to content

Commit 6283992

Browse files
authored
Merge pull request #1217 from abhishek9sharma/main
Implemented redaction functions to obscure sensitive information in strings
2 parents 96ec4d1 + 4b6e3a7 commit 6283992

File tree

6 files changed

+247
-5
lines changed

6 files changed

+247
-5
lines changed

guardrails/telemetry/common.py

Lines changed: 103 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import json
2-
from typing import Any, Callable, Dict, Optional, Union
2+
from typing import Any, Callable, Dict, Optional, Union, List
33
from opentelemetry.baggage import get_baggage
44
from opentelemetry import context
55
from opentelemetry.context import Context
@@ -124,3 +124,105 @@ def add_user_attributes(span: Span):
124124
except Exception as e:
125125
logger.warning("Error loading baggage user information", e)
126126
pass
127+
128+
129+
def redact(value: str) -> str:
130+
"""Redacts all but the last four characters of the given string.
131+
132+
Args:
133+
value (str): The string to be redacted.
134+
135+
Returns:
136+
str: The redacted string with all but the last four characters
137+
replaced by asterisks.
138+
"""
139+
redaction_length = len(value) - 4
140+
stars = "*" * redaction_length
141+
return f"{stars}{value[-4:]}"
142+
143+
144+
def ismatchingkey(
145+
target_key: str,
146+
keys_to_match: tuple[str, ...] = ("key", "token", "password"),
147+
) -> bool:
148+
"""Check if the target key contains any of the specified keys to match.
149+
150+
Args:
151+
target_key (str): The key to be checked.
152+
keys_to_match (tuple[str, ...], optional): A tuple of keys to match
153+
against the target key. Defaults to ("key", "token").
154+
155+
Returns:
156+
bool: True if any of the keys to match are found in the target key,
157+
False otherwise.
158+
"""
159+
for k in keys_to_match:
160+
if k in target_key:
161+
return True
162+
return False
163+
164+
165+
def can_convert_to_dict(s: str) -> bool:
166+
"""Check if a string can be converted to a dictionary.
167+
168+
This function attempts to load the input string as JSON. If successful,
169+
it returns True, indicating that the string can be converted to a dictionary.
170+
Otherwise, it catches ValueError and TypeError exceptions and returns False.
171+
172+
Args:
173+
s (str): The input string to be checked.
174+
175+
Returns:
176+
bool: True if the string can be converted to a dictionary, False otherwise.
177+
"""
178+
try:
179+
json.loads(s)
180+
return True
181+
except (ValueError, TypeError):
182+
return False
183+
184+
185+
def recursive_key_operation(
186+
data: Optional[Union[Dict[str, Any], List[Any], str]],
187+
operation: Callable[[str], str],
188+
keys_to_match: List[str] = ["key", "token", "password"],
189+
) -> Optional[Union[Dict[str, Any], List[Any], str]]:
190+
"""Recursively traverses a dictionary, list, or JSON string and applies a
191+
specified operation to the values of keys that match any in the
192+
`keys_to_match` list. This function is useful for masking sensitive data
193+
(e.g., keys, tokens, passwords) in nested structures.
194+
195+
Args:
196+
data (Optional[Union[Dict[str, Any], List[Any], str]]): The input data
197+
to traverse. This can bea dictionary, list, or JSON string. If a
198+
JSON string is provided, it will be parsed into a dictionary before
199+
processing.
200+
201+
operation (Callable[[str], str]): A function that takes a string value
202+
and returns a modified string. This operation is applied to the values
203+
of keys that match any in `keys_to_match`.
204+
keys_to_match (List[str]): A list of keys to search for in the data. If
205+
a key matche any in this list, the corresponding value will be processed
206+
by the `operation`. Defaults to ["key", "token", "password"].
207+
208+
Returns:
209+
Optional[Union[Dict[str, Any], List[Any], str]]: The modified data structure
210+
with the operation applied to the values of matched keys. The return type
211+
matches the input type (dict, list, or str).
212+
"""
213+
if isinstance(data, str) and can_convert_to_dict(data):
214+
data_dict = json.loads(data)
215+
data = str(recursive_key_operation(data_dict, operation, keys_to_match))
216+
elif isinstance(data, dict):
217+
for key, value in data.items():
218+
if ismatchingkey(key, tuple(keys_to_match)) and isinstance(value, str):
219+
# Apply the operation to the value of the matched key
220+
data[key] = operation(value)
221+
else:
222+
# Recursively process nested dictionaries or lists
223+
data[key] = recursive_key_operation(value, operation, keys_to_match)
224+
elif isinstance(data, list):
225+
for i in range(len(data)):
226+
data[i] = recursive_key_operation(data[i], operation, keys_to_match)
227+
228+
return data

guardrails/telemetry/open_inference.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
1+
import json
12
from typing import Any, Dict, List, Optional
23

3-
from guardrails.telemetry.common import get_span, to_dict, serialize
4+
from guardrails.telemetry.common import (
5+
get_span,
6+
to_dict,
7+
serialize,
8+
recursive_key_operation,
9+
redact,
10+
)
411

512

613
def trace_operation(
@@ -92,9 +99,18 @@ def trace_llm_call(
9299
)
93100

94101
ser_invocation_parameters = serialize(invocation_parameters)
95-
if ser_invocation_parameters:
102+
redacted_ser_invocation_parameters = recursive_key_operation(
103+
ser_invocation_parameters, redact
104+
)
105+
reser_invocation_parameters = (
106+
json.dumps(redacted_ser_invocation_parameters)
107+
if isinstance(redacted_ser_invocation_parameters, dict)
108+
or isinstance(redacted_ser_invocation_parameters, list)
109+
else redacted_ser_invocation_parameters
110+
)
111+
if reser_invocation_parameters:
96112
current_span.set_attribute(
97-
"llm.invocation_parameters", ser_invocation_parameters
113+
"llm.invocation_parameters", reser_invocation_parameters
98114
)
99115

100116
ser_model_name = serialize(model_name)

guardrails/telemetry/runner_tracing.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,13 @@
1717
from guardrails.classes.output_type import OT
1818
from guardrails.classes.validation_outcome import ValidationOutcome
1919
from guardrails.stores.context import get_guard_name
20-
from guardrails.telemetry.common import get_tracer, add_user_attributes, serialize
20+
from guardrails.telemetry.common import (
21+
get_tracer,
22+
add_user_attributes,
23+
serialize,
24+
recursive_key_operation,
25+
redact,
26+
)
2127
from guardrails.utils.safe_get import safe_get
2228
from guardrails.version import GUARDRAILS_VERSION
2329

@@ -45,10 +51,14 @@ def add_step_attributes(
4551

4652
ser_args = [serialize(arg) for arg in args]
4753
ser_kwargs = {k: serialize(v) for k, v in kwargs.items()}
54+
4855
inputs = {
4956
"args": [sarg for sarg in ser_args if sarg is not None],
5057
"kwargs": {k: v for k, v in ser_kwargs.items() if v is not None},
5158
}
59+
for k in inputs:
60+
inputs[k] = recursive_key_operation(inputs[k], redact)
61+
5262
step_span.set_attribute("input.mime_type", "application/json")
5363
step_span.set_attribute("input.value", json.dumps(inputs))
5464

@@ -239,6 +249,8 @@ def add_call_attributes(
239249
"args": [sarg for sarg in ser_args if sarg is not None],
240250
"kwargs": {k: v for k, v in ser_kwargs.items() if v is not None},
241251
}
252+
for k in inputs:
253+
inputs[k] = recursive_key_operation(inputs[k], redact)
242254
call_span.set_attribute("input.mime_type", "application/json")
243255
call_span.set_attribute("input.value", json.dumps(inputs))
244256

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import unittest
2+
from guardrails.telemetry.common import ismatchingkey
3+
4+
5+
class TestIsMatchingKey(unittest.TestCase):
6+
def test_key_matches_with_default_keys(self):
7+
self.assertTrue(ismatchingkey("api_key"))
8+
self.assertTrue(ismatchingkey("user_token"))
9+
self.assertFalse(ismatchingkey("username"))
10+
self.assertTrue(ismatchingkey("password"))
11+
12+
def test_key_matches_with_custom_keys(self):
13+
self.assertTrue(ismatchingkey("api_secret", keys_to_match=("secret",)))
14+
self.assertTrue(ismatchingkey("client_id", keys_to_match=("id",)))
15+
self.assertFalse(ismatchingkey("session", keys_to_match=("key", "token")))
16+
17+
def test_empty_key(self):
18+
self.assertFalse(ismatchingkey("", keys_to_match=("key", "token")))
19+
20+
def test_empty_keys_to_match(self):
21+
self.assertFalse(ismatchingkey("key", keys_to_match=()))
22+
23+
24+
if __name__ == "__main__":
25+
unittest.main()
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import unittest
2+
from guardrails.telemetry.common import recursive_key_operation, redact
3+
import ast
4+
5+
6+
# Test suite for recursive_key_operation function
7+
class TestRecursiveKeyOperation(unittest.TestCase):
8+
def test_list(self):
9+
data = '{"init_args": [], "init_kwargs": {"model": "gpt-4o-mini", \
10+
"api_base": "https://api.openai.com/v1", "api_key": "sk-1234"}}'
11+
result = recursive_key_operation(data, redact)
12+
assert ast.literal_eval(result)["init_kwargs"]["api_key"] == "***1234"
13+
14+
def test_dict_kwargs(self):
15+
data = {
16+
"index": "0",
17+
"api": '{"init_args": [], "init_kwargs": {"model": "gpt-4o-mini",\
18+
"api_base": "https://api.openai.com/v1", "api_key": "sk-1234"}}',
19+
"messages": None,
20+
"prompt_params": "{}",
21+
"output_schema": '{"type": "string"}',
22+
"output": None,
23+
}
24+
result = recursive_key_operation(data, redact)
25+
assert ast.literal_eval(result["api"])["init_kwargs"]["api_key"] == "***1234"
26+
27+
def test_nomatch(self):
28+
data = {"somekey": "soemvalue"}
29+
result = recursive_key_operation(data, redact)
30+
self.assertEqual(result, data)
31+
32+
def test_empty_dict(self):
33+
data = {}
34+
result = recursive_key_operation(data, redact)
35+
self.assertEqual(result, data)
36+
37+
def test_empty_list(self):
38+
data = []
39+
result = recursive_key_operation(data, redact)
40+
self.assertEqual(result, data)
41+
42+
def test_non_string_value(self):
43+
data = {"key": 123, "another_key": "value"}
44+
result = recursive_key_operation(data, redact)
45+
self.assertEqual(result, data)
46+
47+
48+
if __name__ == "__main__":
49+
unittest.main()
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import unittest
2+
from guardrails.telemetry.common import redact
3+
4+
5+
class TestRedactFunction(unittest.TestCase):
6+
def test_redact_long_string(self):
7+
self.assertEqual(redact("supersecretpassword"), "***************word")
8+
9+
def test_redact_short_string(self):
10+
self.assertEqual(redact("test"), "test")
11+
12+
def test_open_ai_example_key(self):
13+
self.assertEqual(
14+
redact("sk-hp37"),
15+
"***hp37",
16+
)
17+
18+
def test_redact_very_short_string(self):
19+
self.assertEqual(redact("abc"), "abc")
20+
21+
def test_redact_empty_string(self):
22+
self.assertEqual(redact(""), "")
23+
24+
def test_redact_exact_length(self):
25+
self.assertEqual(redact("1234"), "1234")
26+
27+
def test_redact_special_characters(self):
28+
self.assertEqual(redact("ab!@#12"), "***@#12")
29+
30+
def test_redact_single_character(self):
31+
self.assertEqual(redact("a"), "a")
32+
33+
def test_redact_spaces(self):
34+
self.assertEqual(redact(" test"), "******test")
35+
36+
37+
if __name__ == "__main__":
38+
unittest.main()

0 commit comments

Comments
 (0)