|
| 1 | +namespace Sentry.Internal; |
| 2 | + |
| 3 | +/// <summary> |
| 4 | +/// Extensions to help redact data that might contain Personally Identifiable Information (PII) before sending it to |
| 5 | +/// Sentry. |
| 6 | +/// </summary> |
| 7 | +internal static class PiiExtensions |
| 8 | +{ |
| 9 | + internal const string RedactedText = "[Filtered]"; |
| 10 | + private static readonly Regex AuthRegex = new (@"(?i)\b(https?://.*@.*)\b", RegexOptions.Compiled); |
| 11 | + private static readonly Regex UserInfoMatcher = new (@"^(?i)(https?://)(.*@)(.*)$", RegexOptions.Compiled); |
| 12 | + |
| 13 | + /// <summary> |
| 14 | + /// Searches for URLs in text data and redacts any PII data from these, as required. |
| 15 | + /// </summary> |
| 16 | + /// <param name="data">The data to be searched</param> |
| 17 | + /// <returns> |
| 18 | + /// The data, if no PII data is present or a copy of the data with PII data redacted otherwise |
| 19 | + /// </returns> |
| 20 | + public static string RedactUrl(this string data) |
| 21 | + { |
| 22 | + // If the data is empty then we don't need to redact anything |
| 23 | + if (string.IsNullOrWhiteSpace(data)) |
| 24 | + { |
| 25 | + return data; |
| 26 | + } |
| 27 | + |
| 28 | + // The pattern @"(?i)\b(https?://.*@.*)\b" uses the \b word boundary anchors to ensure that the match occurs at |
| 29 | + // a word boundary. This allows the URL to be matched even if it is part of a larger text. The (?i) flag ensures |
| 30 | + // case-insensitive matching for "https" or "http". |
| 31 | + var result = AuthRegex.Replace(data, match => |
| 32 | + { |
| 33 | + var matchedUrl = match.Groups[1].Value; |
| 34 | + return RedactAuth(matchedUrl); |
| 35 | + }); |
| 36 | + |
| 37 | + return result; |
| 38 | + } |
| 39 | + |
| 40 | + private static string RedactAuth(string data) |
| 41 | + { |
| 42 | + // ^ matches the start of the string. (?i)(https?://) gives a case-insensitive matching of the protocol. |
| 43 | + // (.*@) matches the username and password (authentication information). (.*)$ matches the rest of the URL. |
| 44 | + var match = UserInfoMatcher.Match(data); |
| 45 | + if (match is not { Success: true, Groups.Count: 4 }) |
| 46 | + { |
| 47 | + return data; |
| 48 | + } |
| 49 | + var userInfoString = match.Groups[2].Value; |
| 50 | + var replacementString = userInfoString.Contains(":") ? "[Filtered]:[Filtered]@" : "[Filtered]@"; |
| 51 | + return match.Groups[1].Value + replacementString + match.Groups[3].Value; |
| 52 | + } |
| 53 | +} |
0 commit comments