Skip to content

Commit 6ee1eeb

Browse files
authored
Update ustar creation sanity check to use WCS path on Windows (libarchive#2230)
On Windows, the MBS pathname might be null if the string was set with a WCS that can't be represented by the current locale. This is handled properly by the rest of the code, but there's a sanity check that does not make the proper distinction. Note: this is a partial cherry-pick from libarchive#2095, which I'm going to go through and break into smaller pieces in hopes of getting some things in while discussion of other things can continue.
1 parent ad26e37 commit 6ee1eeb

File tree

2 files changed

+106
-0
lines changed

2 files changed

+106
-0
lines changed

libarchive/archive_write_set_format_ustar.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,11 @@ archive_write_ustar_header(struct archive_write *a, struct archive_entry *entry)
254254
sconv = ustar->opt_sconv;
255255

256256
/* Sanity check. */
257+
#if defined(_WIN32) && !defined(__CYGWIN__)
258+
if (archive_entry_pathname_w(entry) == NULL) {
259+
#else
257260
if (archive_entry_pathname(entry) == NULL) {
261+
#endif
258262
archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
259263
"Can't record entry in tar file without pathname");
260264
return (ARCHIVE_FAILED);

libarchive/test/test_ustar_filename_encoding.c

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,3 +390,105 @@ DEFINE_TEST(test_ustar_filename_encoding_CP932_UTF8)
390390
assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
391391
}
392392

393+
DEFINE_TEST(test_ustar_filename_encoding_UTF16_win)
394+
{
395+
#if !defined(_WIN32) || defined(__CYGWIN__)
396+
skipping("This test is meant to verify unicode string handling"
397+
" on Windows with UTF-16 names");
398+
return;
399+
#else
400+
struct archive *a;
401+
struct archive_entry *entry;
402+
char buff[4096];
403+
size_t used;
404+
405+
/*
406+
* Don't call setlocale because we're verifying that the '_w' functions
407+
* work as expected when 'hdrcharset' is UTF-8
408+
*/
409+
410+
/* Part 1: file */
411+
a = archive_write_new();
412+
assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
413+
if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
414+
skipping("This system cannot convert character-set"
415+
" from UTF-16 to UTF-8.");
416+
archive_write_free(a);
417+
return;
418+
}
419+
assertEqualInt(ARCHIVE_OK,
420+
archive_write_open_memory(a, buff, sizeof(buff), &used));
421+
422+
entry = archive_entry_new2(a);
423+
/* Set the filename using a UTF-16 string */
424+
archive_entry_copy_pathname_w(entry, L"\u8868.txt");
425+
archive_entry_set_filetype(entry, AE_IFREG);
426+
archive_entry_set_size(entry, 0);
427+
assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
428+
archive_entry_free(entry);
429+
assertEqualInt(ARCHIVE_OK, archive_write_free(a));
430+
431+
/* Check UTF-8 version. */
432+
assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
433+
434+
/* Part 2: directory */
435+
a = archive_write_new();
436+
assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
437+
assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
438+
assertEqualInt(ARCHIVE_OK,
439+
archive_write_open_memory(a, buff, sizeof(buff), &used));
440+
441+
entry = archive_entry_new2(a);
442+
/* Set the directory name using a UTF-16 string */
443+
/* NOTE: Explicitly not adding trailing slash to test that code path */
444+
archive_entry_copy_pathname_w(entry, L"\u8868");
445+
archive_entry_set_filetype(entry, AE_IFDIR);
446+
archive_entry_set_size(entry, 0);
447+
assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
448+
archive_entry_free(entry);
449+
assertEqualInt(ARCHIVE_OK, archive_write_free(a));
450+
451+
/* Check UTF-8 version. */
452+
assertEqualMem(buff, "\xE8\xA1\xA8/", 4);
453+
454+
/* Part 3: symlink */
455+
a = archive_write_new();
456+
assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
457+
assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
458+
assertEqualInt(ARCHIVE_OK,
459+
archive_write_open_memory(a, buff, sizeof(buff), &used));
460+
461+
entry = archive_entry_new2(a);
462+
/* Set the symlink target using a UTF-16 string */
463+
archive_entry_set_pathname(entry, "link.txt");
464+
archive_entry_copy_symlink_w(entry, L"\u8868.txt");
465+
archive_entry_set_filetype(entry, AE_IFLNK);
466+
archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE);
467+
archive_entry_set_size(entry, 0);
468+
assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
469+
archive_entry_free(entry);
470+
assertEqualInt(ARCHIVE_OK, archive_write_free(a));
471+
472+
/* Check UTF-8 version. */
473+
assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7);
474+
475+
/* Part 4: hardlink */
476+
a = archive_write_new();
477+
assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
478+
assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
479+
assertEqualInt(ARCHIVE_OK,
480+
archive_write_open_memory(a, buff, sizeof(buff), &used));
481+
482+
entry = archive_entry_new2(a);
483+
/* Set the symlink target using a UTF-16 string */
484+
archive_entry_set_pathname(entry, "link.txt");
485+
archive_entry_copy_hardlink_w(entry, L"\u8868.txt");
486+
archive_entry_set_size(entry, 0);
487+
assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
488+
archive_entry_free(entry);
489+
assertEqualInt(ARCHIVE_OK, archive_write_free(a));
490+
491+
/* Check UTF-8 version. */
492+
assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7);
493+
#endif
494+
}

0 commit comments

Comments
 (0)