Skip to content

Commit 5ff738b

Browse files
EvilBeaverpiksel
authored andcommitted
Merge PR #280: Add support for explicit extraction file name charset
* fixed #274 and #278 encoding issue * Added russian name sample * Added workaround for irreversible text samples Some samples become broken when they're writen to byte array with their encoder * Renamed local variable which had the same name, that static field has * Magic codepage value for detecting auto-codepage. see #278 Co-authored-by: EvilBeaver <[email protected]>
1 parent 0c5da44 commit 5ff738b

File tree

3 files changed

+32
-6
lines changed

3 files changed

+32
-6
lines changed

src/ICSharpCode.SharpZipLib/Zip/ZipStrings.cs

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ static ZipStrings()
1313
{
1414
try
1515
{
16-
var codePage = Encoding.GetEncoding(0).CodePage;
17-
SystemDefaultCodePage = (codePage == 1 || codePage == 2 || codePage == 3 || codePage == 42) ? FallbackCodePage : codePage;
16+
var platformCodepage = Encoding.GetEncoding(0).CodePage;
17+
SystemDefaultCodePage = (platformCodepage == 1 || platformCodepage == 2 || platformCodepage == 3 || platformCodepage == 42) ? FallbackCodePage : platformCodepage;
1818
}
1919
catch
2020
{
@@ -29,8 +29,12 @@ static ZipStrings()
2929
/// In practice, most zip apps use OEM or system encoding (typically cp437 on Windows).
3030
/// Let's be good citizens and default to UTF-8 http://utf8everywhere.org/
3131
/// </remarks>
32-
private static int codePage = Encoding.UTF8.CodePage;
32+
private static int codePage = AutomaticCodePage;
3333

34+
/// Automatically select codepage while opening archive
35+
/// see https://github.com/icsharpcode/SharpZipLib/pull/280#issuecomment-433608324
36+
///
37+
private const int AutomaticCodePage = -1;
3438

3539
/// <summary>
3640
/// Encoding used for string conversion. Setting this to 65001 (UTF-8) will
@@ -40,7 +44,7 @@ public static int CodePage
4044
{
4145
get
4246
{
43-
return codePage;
47+
return codePage == AutomaticCodePage? Encoding.UTF8.CodePage:codePage;
4448
}
4549
set
4650
{
@@ -125,7 +129,14 @@ public static string ConvertToString(byte[] data)
125129
private static Encoding EncodingFromFlag(int flags)
126130
=> ((flags & (int)GeneralBitFlags.UnicodeText) != 0)
127131
? Encoding.UTF8
128-
: Encoding.GetEncoding(SystemDefaultCodePage);
132+
: Encoding.GetEncoding(
133+
// if CodePage wasn't set manually and no utf flag present
134+
// then we must use SystemDefault (old behavior)
135+
// otherwise, CodePage should be preferred over SystemDefault
136+
// see https://github.com/icsharpcode/SharpZipLib/issues/274
137+
codePage == AutomaticCodePage?
138+
SystemDefaultCodePage:
139+
codePage);
129140

130141
/// <summary>
131142
/// Convert a byte array to a string using <see cref="CodePage"/>

test/ICSharpCode.SharpZipLib.Tests/TestSupport/StringTesting.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ static StringTesting()
1212
AddLanguage("Greek", "Ϗΰ.txt", "windows-1253");
1313
AddLanguage("Nordic", "Åæ.txt", "windows-1252");
1414
AddLanguage("Arabic", "ڀڅ.txt", "windows-1256");
15+
AddLanguage("Russian", "Прйвёт.txt", "windows-1251");
1516
}
1617

1718
private static void AddLanguage(string language, string filename, string encoding)

test/ICSharpCode.SharpZipLib.Tests/Zip/FastZipHandling.cs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
using System;
1+
using System;
22
using System.Collections.Generic;
33
using System.IO;
44
using System.Linq;
@@ -224,6 +224,20 @@ public void NonUnicodeText()
224224
foreach((string language, string filename, string encoding) in StringTesting.GetTestSamples())
225225
{
226226
Console.WriteLine($"{language} filename \"{filename}\" using \"{encoding}\":");
227+
228+
// TODO: samples of this test must be reversible
229+
// Some samples can't be restored back with their encoding.
230+
// test wasn't failing only because SystemDefaultCodepage is 65001 on Net.Core and
231+
// old behaviour actually was using Unicode instead of user's passed codepage
232+
var encoder = Encoding.GetEncoding(encoding);
233+
var bytes = encoder.GetBytes(filename);
234+
var restoredString = encoder.GetString(bytes);
235+
if(string.CompareOrdinal(filename, restoredString) != 0)
236+
{
237+
Console.WriteLine($"Sample for language {language} with value of {filename} is skipped, because it's irreversable");
238+
continue;
239+
}
240+
227241
ZipStrings.CodePage = Encoding.GetEncoding(encoding).CodePage;
228242
TestFileNames(filename);
229243
}

0 commit comments

Comments
 (0)