Skip to content

Commit 8d18cbd

Browse files
authored
Merge pull request #53 from DirectoryTree/feature-50
Decode UTF-7 to UTF-8 characters when retrieving folder name
2 parents 6e99ab4 + 6f83368 commit 8d18cbd

File tree

4 files changed

+167
-1
lines changed

4 files changed

+167
-1
lines changed

src/Folder.php

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
use DirectoryTree\ImapEngine\Enums\ImapFetchIdentifier;
88
use DirectoryTree\ImapEngine\Exceptions\Exception;
99
use DirectoryTree\ImapEngine\Exceptions\ImapCapabilityException;
10+
use DirectoryTree\ImapEngine\Support\Str;
1011
use Illuminate\Contracts\Support\Arrayable;
1112
use Illuminate\Support\ItemNotFoundException;
1213
use JsonSerializable;
@@ -62,7 +63,9 @@ public function delimiter(): string
6263
*/
6364
public function name(): string
6465
{
65-
return last(explode($this->delimiter, $this->path));
66+
return Str::decodeUtf7Imap(
67+
last(explode($this->delimiter, $this->path))
68+
);
6669
}
6770

6871
/**

src/Support/Str.php

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,4 +130,73 @@ public static function escape(string $string): string
130130
// Escape backslashes first to avoid double-escaping and then escape double quotes.
131131
return str_replace(['\\', '"'], ['\\\\', '\\"'], $string);
132132
}
133+
134+
/**
135+
* Decode a modified UTF-7 string (IMAP specific) to UTF-8.
136+
*/
137+
public static function decodeUtf7Imap(string $string): string
138+
{
139+
// If the string doesn't contain any '&' character, it's not UTF-7 encoded.
140+
if (! str_contains($string, '&')) {
141+
return $string;
142+
}
143+
144+
// Handle the special case of '&-' which represents '&' in UTF-7.
145+
if ($string === '&-') {
146+
return '&';
147+
}
148+
149+
// Direct implementation of IMAP's modified UTF-7 decoding.
150+
return preg_replace_callback('/&([^-]*)-?/', function ($matches) {
151+
// If it's just an ampersand.
152+
if ($matches[1] === '') {
153+
return '&';
154+
}
155+
156+
// If it's the special case for ampersand.
157+
if ($matches[1] === '-') {
158+
return '&';
159+
}
160+
161+
// Convert modified base64 to standard base64.
162+
$base64 = strtr($matches[1], ',', '/');
163+
164+
// Add padding if necessary.
165+
switch (strlen($base64) % 4) {
166+
case 1: $base64 .= '===';
167+
break;
168+
case 2: $base64 .= '==';
169+
break;
170+
case 3: $base64 .= '=';
171+
break;
172+
}
173+
174+
// Decode base64 to binary.
175+
$binary = base64_decode($base64, true);
176+
177+
if ($binary === false) {
178+
// If decoding fails, return the original string.
179+
return '&'.$matches[1].($matches[2] ?? '');
180+
}
181+
182+
$result = '';
183+
184+
// Convert binary UTF-16BE to UTF-8.
185+
for ($i = 0; $i < strlen($binary); $i += 2) {
186+
if (isset($binary[$i + 1])) {
187+
$char = (ord($binary[$i]) << 8) | ord($binary[$i + 1]);
188+
189+
if ($char < 0x80) {
190+
$result .= chr($char);
191+
} elseif ($char < 0x800) {
192+
$result .= chr(0xC0 | ($char >> 6)).chr(0x80 | ($char & 0x3F));
193+
} else {
194+
$result .= chr(0xE0 | ($char >> 12)).chr(0x80 | (($char >> 6) & 0x3F)).chr(0x80 | ($char & 0x3F));
195+
}
196+
}
197+
}
198+
199+
return $result;
200+
}, $string);
201+
}
133202
}

tests/Unit/FolderTest.php

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
<?php
2+
3+
use DirectoryTree\ImapEngine\Folder;
4+
use DirectoryTree\ImapEngine\Mailbox;
5+
6+
test('it properly decodes name from UTF-7', function () {
7+
$mailbox = Mailbox::make();
8+
9+
// Create a folder with a UTF-7 encoded name.
10+
$folder = new Folder(
11+
mailbox: $mailbox,
12+
path: '[Gmail]/&BBoEPgRABDcEOAQ9BDA-',
13+
flags: ['\\HasNoChildren'],
14+
delimiter: '/'
15+
);
16+
17+
// The name should be decoded to UTF-8.
18+
expect($folder->name())->toBe('Корзина');
19+
20+
// The path should remain as is (UTF-7 encoded).
21+
expect($folder->path())->toBe('[Gmail]/&BBoEPgRABDcEOAQ9BDA-');
22+
});
23+
24+
test('it preserves existing UTF-8 characters in folder names', function () {
25+
$mailbox = Mailbox::make();
26+
27+
// Create a folder with a name that already contains UTF-8 characters.
28+
$utf8FolderName = 'Привет';
29+
30+
$folder = new Folder(
31+
mailbox: $mailbox,
32+
path: '[Gmail]/'.$utf8FolderName,
33+
flags: ['\\HasNoChildren'],
34+
delimiter: '/'
35+
);
36+
37+
// The name should remain unchanged
38+
expect($folder->name())->toBe($utf8FolderName);
39+
40+
// Test with a mix of UTF-8 characters from different languages.
41+
$mixedUtf8FolderName = 'Привет_你好_こんにちは';
42+
43+
$mixedFolder = new Folder(
44+
mailbox: $mailbox,
45+
path: '[Gmail]/'.$mixedUtf8FolderName,
46+
flags: ['\\HasNoChildren'],
47+
delimiter: '/'
48+
);
49+
50+
// The name should remain unchanged.
51+
expect($mixedFolder->name())->toBe($mixedUtf8FolderName);
52+
});

tests/Unit/Support/StrTest.php

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,45 @@
101101

102102
expect($result)->toEqual($expected);
103103
});
104+
105+
test('decodeUtf7Imap decodes UTF-7 encoded folder names', function () {
106+
// Russian Cyrillic example from the bug report.
107+
$encoded = '&BBoEPgRABDcEOAQ9BDA-';
108+
$decoded = 'Корзина';
109+
110+
expect(Str::decodeUtf7Imap($encoded))->toBe($decoded);
111+
});
112+
113+
test('decodeUtf7Imap handles non-encoded strings', function () {
114+
$plainString = 'INBOX';
115+
116+
expect(Str::decodeUtf7Imap($plainString))->toBe($plainString);
117+
});
118+
119+
test('decodeUtf7Imap handles special characters', function () {
120+
// Ampersand is represented as &- in UTF-7.
121+
$encoded = '&-';
122+
$decoded = '&';
123+
124+
expect(Str::decodeUtf7Imap($encoded))->toBe($decoded);
125+
});
126+
127+
test('decodeUtf7Imap handles mixed content', function () {
128+
// Test that the function doesn't modify the non-encoded part.
129+
$encoded = 'Hello &-';
130+
$decoded = 'Hello &';
131+
132+
expect(Str::decodeUtf7Imap($encoded))->toBe($decoded);
133+
});
134+
135+
test('decodeUtf7Imap preserves existing UTF-8 characters', function () {
136+
// Test with various UTF-8 characters that should remain unchanged.
137+
$utf8String = 'Привет мир 你好 こんにちは ñáéíóú';
138+
139+
// The function should return the string unchanged since it's already UTF-8.
140+
expect(Str::decodeUtf7Imap($utf8String))->toBe($utf8String);
141+
142+
// Test with a mix of UTF-8 and regular ASCII.
143+
$mixedString = 'Hello Привет 123';
144+
expect(Str::decodeUtf7Imap($mixedString))->toBe($mixedString);
145+
});

0 commit comments

Comments
 (0)