@@ -527,3 +527,98 @@ DEFINE_TEST(test_zip_filename_encoding_CP932)
527527 assertEqualInt (0 , buff [7 ]);
528528 assertEqualMem (buff + 30 , "abcABC" , 6 );
529529}
530+
531+ DEFINE_TEST (test_zip_filename_encoding_UTF16_win )
532+ {
533+ #if !defined(_WIN32 ) || defined(__CYGWIN__ )
534+ skipping ("This test is meant to verify unicode string handling"
535+ " on Windows with UTF-16 names" );
536+ return ;
537+ #else
538+ struct archive * a ;
539+ struct archive_entry * entry ;
540+ char buff [4096 ];
541+ size_t used ;
542+
543+ /*
544+ * Don't call setlocale because we're verifying that the '_w' functions
545+ * work as expected when 'hdrcharset' is UTF-8
546+ */
547+
548+ /* Part 1: file */
549+ a = archive_write_new ();
550+ assertEqualInt (ARCHIVE_OK , archive_write_set_format_zip (a ));
551+ if (archive_write_set_options (a , "hdrcharset=UTF-8" ) != ARCHIVE_OK ) {
552+ skipping ("This system cannot convert character-set"
553+ " from UTF-16 to UTF-8." );
554+ archive_write_free (a );
555+ return ;
556+ }
557+ assertEqualInt (ARCHIVE_OK ,
558+ archive_write_open_memory (a , buff , sizeof (buff ), & used ));
559+
560+ entry = archive_entry_new2 (a );
561+ /* Set the filename using a UTF-16 string */
562+ archive_entry_copy_pathname_w (entry , L"\u8868.txt" );
563+ archive_entry_set_filetype (entry , AE_IFREG );
564+ archive_entry_set_size (entry , 0 );
565+ assertEqualInt (ARCHIVE_OK , archive_write_header (a , entry ));
566+ archive_entry_free (entry );
567+ assertEqualInt (ARCHIVE_OK , archive_write_free (a ));
568+
569+ /* A bit 11 of general purpose flag should be 1,
570+ * which indicates the filename charset is UTF-8. */
571+ assertEqualInt (0x08 , buff [7 ]);
572+ /* Check UTF-8 version. */
573+ assertEqualMem (buff + 30 , "\xE8\xA1\xA8.txt" , 7 );
574+
575+ /* Part 2: directory */
576+ a = archive_write_new ();
577+ assertEqualInt (ARCHIVE_OK , archive_write_set_format_zip (a ));
578+ assertEqualInt (ARCHIVE_OK , archive_write_set_options (a , "hdrcharset=UTF-8" ));
579+ assertEqualInt (ARCHIVE_OK ,
580+ archive_write_open_memory (a , buff , sizeof (buff ), & used ));
581+
582+ entry = archive_entry_new2 (a );
583+ /* Set the directory name using a UTF-16 string */
584+ /* NOTE: Explicitly not adding trailing slash to test that code path */
585+ archive_entry_copy_pathname_w (entry , L"\u8868" );
586+ archive_entry_set_filetype (entry , AE_IFDIR );
587+ archive_entry_set_size (entry , 0 );
588+ assertEqualInt (ARCHIVE_OK , archive_write_header (a , entry ));
589+ archive_entry_free (entry );
590+ assertEqualInt (ARCHIVE_OK , archive_write_free (a ));
591+
592+ /* A bit 11 of general purpose flag should be 1,
593+ * which indicates the filename charset is UTF-8. */
594+ assertEqualInt (0x08 , buff [7 ]);
595+ /* Check UTF-8 version. */
596+ assertEqualMem (buff + 30 , "\xE8\xA1\xA8/" , 4 );
597+
598+ /* Part 3: symlink */
599+ a = archive_write_new ();
600+ assertEqualInt (ARCHIVE_OK , archive_write_set_format_zip (a ));
601+ assertEqualInt (ARCHIVE_OK , archive_write_set_options (a , "hdrcharset=UTF-8" ));
602+ assertEqualInt (ARCHIVE_OK ,
603+ archive_write_open_memory (a , buff , sizeof (buff ), & used ));
604+
605+ entry = archive_entry_new2 (a );
606+ /* Set the symlink target using a UTF-16 string */
607+ archive_entry_set_pathname (entry , "link.txt" );
608+ archive_entry_copy_symlink_w (entry , L"\u8868.txt" );
609+ archive_entry_set_filetype (entry , AE_IFLNK );
610+ archive_entry_set_symlink_type (entry , AE_SYMLINK_TYPE_FILE );
611+ archive_entry_set_size (entry , 0 );
612+ assertEqualInt (ARCHIVE_OK , archive_write_header (a , entry ));
613+ archive_entry_free (entry );
614+ assertEqualInt (ARCHIVE_OK , archive_write_free (a ));
615+
616+ /* A bit 11 of general purpose flag should be 0,
617+ * because the file name is ASCII. */
618+ assertEqualInt (0 , buff [7 ]);
619+ /* Check UTF-8 version. */
620+ assertEqualMem (buff + 38 , "\xE8\xA1\xA8.txt" , 7 );
621+
622+ /* NOTE: ZIP does not support hardlinks */
623+ #endif
624+ }
0 commit comments