|
| 1 | +package candle_binding |
| 2 | + |
| 3 | +import ( |
| 4 | + "strings" |
| 5 | + "testing" |
| 6 | + "time" |
| 7 | +) |
| 8 | + |
| 9 | +func TestNewRegexProvider(t *testing.T) { |
| 10 | + t.Run("ValidConfig", func(t *testing.T) { |
| 11 | + cfg := RegexProviderConfig{ |
| 12 | + MaxPatterns: 10, |
| 13 | + MaxPatternLength: 100, |
| 14 | + MaxInputLength: 1000, |
| 15 | + DefaultTimeoutMs: 50, |
| 16 | + Patterns: []RegexPattern{ |
| 17 | + {ID: "email", Pattern: `\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b`}, |
| 18 | + }, |
| 19 | + } |
| 20 | + _, err := NewRegexProvider(cfg) |
| 21 | + if err != nil { |
| 22 | + t.Fatalf("expected no error, got %v", err) |
| 23 | + } |
| 24 | + }) |
| 25 | + |
| 26 | + t.Run("TooManyPatterns", func(t *testing.T) { |
| 27 | + cfg := RegexProviderConfig{ |
| 28 | + MaxPatterns: 1, |
| 29 | + Patterns: []RegexPattern{ |
| 30 | + {ID: "p1", Pattern: "a"}, |
| 31 | + {ID: "p2", Pattern: "b"}, |
| 32 | + }, |
| 33 | + } |
| 34 | + _, err := NewRegexProvider(cfg) |
| 35 | + if err == nil { |
| 36 | + t.Fatal("expected an error for too many patterns, got nil") |
| 37 | + } |
| 38 | + }) |
| 39 | + |
| 40 | + t.Run("PatternTooLong", func(t *testing.T) { |
| 41 | + cfg := RegexProviderConfig{ |
| 42 | + MaxPatterns: 10, |
| 43 | + MaxPatternLength: 5, |
| 44 | + Patterns: []RegexPattern{ |
| 45 | + {ID: "long", Pattern: "abcdef"}, |
| 46 | + }, |
| 47 | + } |
| 48 | + _, err := NewRegexProvider(cfg) |
| 49 | + if err == nil { |
| 50 | + t.Fatal("expected an error for pattern too long, got nil") |
| 51 | + } |
| 52 | + }) |
| 53 | + |
| 54 | + t.Run("InvalidRegex", func(t *testing.T) { |
| 55 | + cfg := RegexProviderConfig{ |
| 56 | + MaxPatterns: 10, |
| 57 | + MaxPatternLength: 100, |
| 58 | + Patterns: []RegexPattern{ |
| 59 | + {ID: "invalid", Pattern: `[`}, |
| 60 | + }, |
| 61 | + } |
| 62 | + _, err := NewRegexProvider(cfg) |
| 63 | + if err == nil { |
| 64 | + t.Fatal("expected an error for invalid regex, got nil") |
| 65 | + } |
| 66 | + }) |
| 67 | +} |
| 68 | + |
| 69 | +func TestRegexProvider_Scan(t *testing.T) { |
| 70 | + cfg := RegexProviderConfig{ |
| 71 | + MaxPatterns: 10, |
| 72 | + MaxPatternLength: 100, |
| 73 | + MaxInputLength: 1000, |
| 74 | + DefaultTimeoutMs: 100, |
| 75 | + Patterns: []RegexPattern{ |
| 76 | + {ID: "email", Pattern: `\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b`, Category: "pii"}, |
| 77 | + {ID: "word", Pattern: "hello", Category: "greeting"}, |
| 78 | + {ID: "case", Pattern: "World", Flags: "i", Category: "case-test"}, |
| 79 | + }, |
| 80 | + } |
| 81 | + rp, err := NewRegexProvider(cfg) |
| 82 | + if err != nil { |
| 83 | + t.Fatalf("failed to create regex provider: %v", err) |
| 84 | + } |
| 85 | + |
| 86 | + t.Run("SimpleMatch", func(t *testing.T) { |
| 87 | + input := "say hello to the world" |
| 88 | + matches, err := rp.Scan(input) |
| 89 | + if err != nil { |
| 90 | + t.Fatalf("scan failed: %v", err) |
| 91 | + } |
| 92 | + if len(matches) != 2 { // "hello" and "world" (case-insensitive) |
| 93 | + t.Fatalf("expected 2 matches, got %d", len(matches)) |
| 94 | + } |
| 95 | + }) |
| 96 | + |
| 97 | + t.Run("CaseInsensitiveMatch", func(t *testing.T) { |
| 98 | + input := "hello WORLD" |
| 99 | + matches, err := rp.Scan(input) |
| 100 | + if err != nil { |
| 101 | + t.Fatalf("scan failed: %v", err) |
| 102 | + } |
| 103 | + if len(matches) != 2 { |
| 104 | + t.Fatalf("expected 2 matches, got %d", len(matches)) |
| 105 | + } |
| 106 | + }) |
| 107 | + |
| 108 | + t.Run("MultipleMatches", func(t *testing.T) { |
| 109 | + input := "my email is [email protected], say hello" |
| 110 | + matches, err := rp.Scan(input) |
| 111 | + if err != nil { |
| 112 | + t.Fatalf("scan failed: %v", err) |
| 113 | + } |
| 114 | + if len(matches) != 2 { |
| 115 | + t.Fatalf("expected 2 matches, got %d", len(matches)) |
| 116 | + } |
| 117 | + }) |
| 118 | + |
| 119 | + t.Run("NoMatch", func(t *testing.T) { |
| 120 | + input := "nothing to see here" |
| 121 | + matches, err := rp.Scan(input) |
| 122 | + if err != nil { |
| 123 | + t.Fatalf("scan failed: %v", err) |
| 124 | + } |
| 125 | + if len(matches) != 0 { |
| 126 | + t.Fatalf("expected 0 matches, got %d", len(matches)) |
| 127 | + } |
| 128 | + }) |
| 129 | + |
| 130 | + t.Run("InputTooLong", func(t *testing.T) { |
| 131 | + rp.maxInputLength = 5 |
| 132 | + _, err := rp.Scan("abcdef") |
| 133 | + if err == nil { |
| 134 | + t.Fatal("expected an error for input too long, got nil") |
| 135 | + } |
| 136 | + rp.maxInputLength = 1000 // reset |
| 137 | + }) |
| 138 | + |
| 139 | + t.Run("Timeout", func(t *testing.T) { |
| 140 | + cfg := RegexProviderConfig{ |
| 141 | + MaxPatterns: 1, |
| 142 | + MaxPatternLength: 100, |
| 143 | + MaxInputLength: 1000, |
| 144 | + DefaultTimeoutMs: 10, // 10ms |
| 145 | + Patterns: []RegexPattern{ |
| 146 | + {ID: "any", Pattern: `.`}, |
| 147 | + }, |
| 148 | + } |
| 149 | + // Create a provider with a 20ms delay, which is longer than the timeout |
| 150 | + rp, err := NewRegexProvider(cfg, WithTestDelay(20*time.Millisecond)) |
| 151 | + if err != nil { |
| 152 | + t.Fatalf("failed to create regex provider: %v", err) |
| 153 | + } |
| 154 | + |
| 155 | + _, err = rp.Scan("a") |
| 156 | + if err == nil { |
| 157 | + t.Fatal("expected a timeout error, got nil") |
| 158 | + } |
| 159 | + if !strings.Contains(err.Error(), "timed out") { |
| 160 | + t.Errorf("expected timeout error, got: %v", err) |
| 161 | + } |
| 162 | + }) |
| 163 | + |
| 164 | + t.Run("ReDoSAttackVector", func(t *testing.T) { |
| 165 | + // This pattern is a known ReDoS vector for backtracking regex engines. |
| 166 | + // Go's engine is not vulnerable, so this should execute quickly. |
| 167 | + cfg := RegexProviderConfig{ |
| 168 | + MaxPatterns: 1, |
| 169 | + MaxPatternLength: 100, |
| 170 | + MaxInputLength: 1000, |
| 171 | + DefaultTimeoutMs: 500, // 500ms timeout |
| 172 | + Patterns: []RegexPattern{ |
| 173 | + {ID: "redos", Pattern: `(a+)+$`}, |
| 174 | + }, |
| 175 | + } |
| 176 | + rp, err := NewRegexProvider(cfg) |
| 177 | + if err != nil { |
| 178 | + t.Fatalf("failed to create regex provider: %v", err) |
| 179 | + } |
| 180 | + |
| 181 | + // A long string of 'a's followed by a non-matching character. |
| 182 | + // In a vulnerable engine, this would cause catastrophic backtracking. |
| 183 | + input := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab" |
| 184 | + |
| 185 | + _, err = rp.Scan(input) |
| 186 | + if err != nil { |
| 187 | + t.Fatalf("scan failed for ReDoS pattern: %v", err) |
| 188 | + } |
| 189 | + }) |
| 190 | +} |
0 commit comments