Skip to content

Commit 4500405

Browse files
committed
Refactor SanitizeEmailFilter for stricter final class design, improved null-safety, and simplified implementation.
1 parent 28880b4 commit 4500405

File tree

1 file changed

+17
-23
lines changed

1 file changed

+17
-23
lines changed

src/Filter/SanitizeEmailFilter.php

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@
1919
* - Normalizes IDN → punycode
2020
* - Preserves valid symbols (+, _, -, ., ')
2121
*/
22-
class SanitizeEmailFilter extends AbstractFilter
22+
final class SanitizeEmailFilter extends AbstractFilter
2323
{
2424
public function __construct(
2525
private readonly bool $strict = true,
2626
private readonly bool $normalizeIdn = true,
27-
private readonly bool $normalizeCase = true
27+
private readonly bool $normalizeCase = true,
2828
) {
2929
}
3030

@@ -41,19 +41,19 @@ public function apply(mixed $value, FieldDefinition $field): mixed
4141
}
4242

4343
// 1. Decode encoded CRLF (e.g. %0A)
44-
$email = str_ireplace(["%0a", "%0d"], "\n", $email);
44+
$email = str_ireplace(['%0a', '%0d'], "\n", $email);
4545

4646
// 2. Remove invisible control characters
47-
$email = preg_replace('/[^\P{C}\t\n\r]/u', '', $email ?? '') ?? '';
47+
$email = preg_replace('/[^\P{C}\t\n\r]/u', '', $email) ?? '';
4848

4949
// 3. Remove actual CR/LF
50-
$email = str_replace(["\r", "\n"], '', $email ?? '');
50+
$email = str_replace(["\r", "\n"], '', $email);
5151

52-
// 4. Strip dangerous header keywords after newline decode
53-
$email = preg_replace('/\b(?:cc|bcc|to|from|subject)\s*:\s*/iu', '', $email ?? '') ?? '';
52+
// 4. Strip dangerous header keywords
53+
$email = preg_replace('/\b(?:cc|bcc|to|from|subject)\s*:\s*/iu', '', $email) ?? '';
5454

5555
// 5. Strip angle brackets
56-
$email = str_replace(['<', '>'], '', $email ?? '');
56+
$email = str_replace(['<', '>'], '', $email);
5757

5858
// 6. Collapse multiple @ → keep first
5959
if (substr_count($email, '@') > 1) {
@@ -68,7 +68,7 @@ public function apply(mixed $value, FieldDefinition $field): mixed
6868
return $this->sanitizeLocal($email);
6969
}
7070

71-
[$local, $domain] = explode('@', $email, 2);
71+
[$local, $domain] = explode('@', $email, 2) + ['', ''];
7272

7373
// 8. Normalize domain case
7474
if ($this->normalizeCase) {
@@ -93,7 +93,7 @@ public function apply(mixed $value, FieldDefinition $field): mixed
9393
: $this->sanitizeUnicodeLocal($local);
9494

9595
// 11. Collapse double dots and trim
96-
$local = preg_replace('/\.{2,}/', '.', $local ?? '') ?? '';
96+
$local = preg_replace('/\.{2,}/', '.', $local) ?? '';
9797
$local = trim($local, '. ');
9898

9999
// 12. Recombine sanitized parts
@@ -102,27 +102,21 @@ public function apply(mixed $value, FieldDefinition $field): mixed
102102

103103
private function stripInvalidAscii(string $input): string
104104
{
105-
return preg_replace('/[^A-Za-z0-9!#$%&\'*+\/=?^_`{|}~.\-]/u', '', $input ?? '') ?? '';
105+
return preg_replace('/[^A-Za-z0-9!#$%&\'*+\/=?^_`{|}~.\-]/u', '', $input) ?? '';
106106
}
107107

108108
private function sanitizeUnicodeLocal(string $input): string
109109
{
110-
// Preserve Unicode letters, digits, and RFC-valid symbols (+ _ . ' -)
111-
// ✅ FINAL FIX: Escapes BOTH '\+' (as a literal, not \p{N} quantifier)
112-
// AND '\-' (as a literal, not a range).
113-
$input = preg_replace('/[^\p{L}\p{N}+_.\'\-]/u', '', $input ?? '') ?? '';
114-
115-
// Remove emojis explicitly
116-
$input = preg_replace('/[\x{1F300}-\x{1FAFF}\x{1F1E6}-\x{1F64F}\x{2600}-\x{27BF}]/u', '', $input ?? '') ?? '';
117-
118-
// Collapse multiple dots
119-
$input = preg_replace('/\.{2,}/', '.', $input ?? '') ?? '';
120-
110+
$input = preg_replace('/[^\p{L}\p{N}+_.\'\-]/u', '', $input) ?? '';
111+
$input = preg_replace('/[\x{1F300}-\x{1FAFF}\x{1F1E6}-\x{1F64F}\x{2600}-\x{27BF}]/u', '', $input) ?? '';
112+
$input = preg_replace('/\.{2,}/', '.', $input) ?? '';
121113
return trim($input, '. ');
122114
}
123115

124116
private function sanitizeLocal(string $input): string
125117
{
126-
return $this->strict ? $this->stripInvalidAscii($input) : $this->sanitizeUnicodeLocal($input);
118+
return $this->strict
119+
? $this->stripInvalidAscii($input)
120+
: $this->sanitizeUnicodeLocal($input);
127121
}
128122
}

0 commit comments

Comments
 (0)