fix: Fix censor_string() behavior on short strings

agateau-gg · agateau-gg · commit 9fc7f21d4414 · 2025-05-06T16:09:58.000+02:00
When called with a one-char string, censor_string() would return the same char doubled (so `censor_string("a")` would return "aa"). The behavior was also not good on 2-char string: it would leave the string unchanged, and on 3-char string it would hide only the middle char. Change this to make sure that: - 1 & 2 char strings are fully censored - 2 chars out of 3 are censored in a 3 char string Fixes #1086
diff --git a/changelog.d/20250506_160702_aurelien.gateau_fix_censor.md b/changelog.d/20250506_160702_aurelien.gateau_fix_censor.md
@@ -0,0 +1,3 @@
+### Fixed
+
+- Fixed a bug in the way ggshield obfuscated secrets that caused a crash for short secrets (#1086).
diff --git a/ggshield/core/filter.py b/ggshield/core/filter.py
@@ -114,6 +114,13 @@ def censor_string(text: str) -> str:
     :return: the text censored
     """
     len_match = len(text)
+
+    # Special cases for short lengths
+    if len_match <= 2:
+        return "*" * len_match
+    if len_match == 3:
+        return f"**{text[2]}"
+
     start_privy_len = min(math.ceil(len_match / 6), MAXIMUM_CENSOR_LENGTH)
     end_privy_len = len_match - min(math.ceil(len_match / 6), MAXIMUM_CENSOR_LENGTH)
 
diff --git a/tests/unit/core/test_filter.py b/tests/unit/core/test_filter.py
@@ -6,7 +6,7 @@
 from pygitguardian.models import Match, PolicyBreak
 from snapshottest import Snapshot
 
-from ggshield.core.filter import censor_match, get_ignore_sha
+from ggshield.core.filter import censor_match, censor_string, get_ignore_sha
 from tests.unit.conftest import (
     _MULTILINE_SECRET,
     _MULTIPLE_SECRETS_SCAN_RESULT,
@@ -116,3 +116,18 @@ def test_censor_match(input_match: Match, expected_value: str) -> None:
     value = censor_match(input_match)
     assert len(value) == len(input_match.match)
     assert value == expected_value
+
+
+@pytest.mark.parametrize(
+    ["text", "expected"],
+    (
+        ("hello world", "he*** ***ld"),
+        ("abcd", "a**d"),
+        ("abc", "**c"),
+        ("ab", "**"),
+        ("a", "*"),
+    ),
+)
+def test_censor_string(text: str, expected: str) -> None:
+    censored = censor_string(text)
+    assert censored == expected

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+### Fixed`
	`2`	`+`
	`3`	`+- Fixed a bug in the way ggshield obfuscated secrets that caused a crash for short secrets (#1086).`