From 5ac115d939b6d37fa981048d473f886b7cfbf8eb Mon Sep 17 00:00:00 2001 From: Konstantin Obenland Date: Wed, 12 Nov 2025 12:54:58 -0600 Subject: [PATCH 1/3] Add tar.gz support to Mastodon importer Extends the Mastodon importer to accept tar.gz and tgz archives in addition to zip files. Since WordPress doesn't have built-in tar.gz support, the implementation tries multiple extraction methods: 1. PHP's PharData extension (most reliable when available) 2. System tar command via exec (fallback for servers that allow it) 3. Graceful failure with helpful error messages The extract_tar_gz() method follows WordPress conventions by returning WP_Error on failure, matching the behavior of unzip_file(). --- includes/wp-admin/import/class-mastodon.php | 122 ++++++++++++++++++-- 1 file changed, 115 insertions(+), 7 deletions(-) diff --git a/includes/wp-admin/import/class-mastodon.php b/includes/wp-admin/import/class-mastodon.php index a3aa87096..57f097366 100644 --- a/includes/wp-admin/import/class-mastodon.php +++ b/includes/wp-admin/import/class-mastodon.php @@ -109,10 +109,15 @@ public static function handle_upload() { return false; } - $file_info = \wp_check_filetype( sanitize_file_name( $_FILES['import']['name'] ), array( 'zip' => 'application/zip' ) ); - if ( 'application/zip' !== $file_info['type'] ) { + $allowed_types = array( + 'zip' => 'application/zip', + 'tar.gz' => 'application/x-gzip', + 'tgz' => 'application/x-gzip', + ); + $file_info = \wp_check_filetype( sanitize_file_name( $_FILES['import']['name'] ), $allowed_types ); + if ( ! in_array( $file_info['type'], $allowed_types, true ) ) { echo '

' . \esc_html( $error_message ) . '
'; - \esc_html_e( 'The uploaded file must be a ZIP archive. Please try again with the correct file format.', 'activitypub' ); + \esc_html_e( 'The uploaded file must be a ZIP or TAR.GZ archive. Please try again with the correct file format.', 'activitypub' ); echo '

'; return false; } @@ -211,20 +216,53 @@ public static function import_options() { public static function import() { $error_message = \__( 'Sorry, there has been an error.', 'activitypub' ); $file = \get_attached_file( self::$import_id ); + $basename = \basename( $file ); \WP_Filesystem(); global $wp_filesystem; $import_folder = $wp_filesystem->wp_content_dir() . 'import/'; - self::$archive = $import_folder . \basename( \basename( $file, '.txt' ), '.zip' ); + self::$archive = $import_folder . \preg_replace( '/\.(zip|tar\.gz|tgz)$/i', '', $basename ); // Clean up working directory. if ( $wp_filesystem->is_dir( self::$archive ) ) { $wp_filesystem->delete( self::$archive, true ); } - // Unzip package to working directory. - \unzip_file( $file, self::$archive ); + // Determine file type and extract accordingly. + $file_extension = \strtolower( \pathinfo( $file, PATHINFO_EXTENSION ) ); + $is_tar_gz = false; + + // Check if it's a tar.gz or tgz file. + if ( 'gz' === $file_extension && \str_ends_with( \strtolower( $basename ), '.tar.gz' ) ) { + $is_tar_gz = true; + } elseif ( 'tgz' === $file_extension ) { + $is_tar_gz = true; + } + + if ( 'zip' === $file_extension ) { + // Use WordPress built-in unzip function. + $unzip_result = \unzip_file( $file, self::$archive ); + if ( \is_wp_error( $unzip_result ) ) { + echo '

' . \esc_html( $error_message ) . '
'; + echo \esc_html( $unzip_result->get_error_message() ) . '

'; + return; + } + } elseif ( $is_tar_gz ) { + // Extract tar.gz file. + $extract_result = self::extract_tar_gz( $file ); + if ( \is_wp_error( $extract_result ) ) { + echo '

' . \esc_html( $error_message ) . '
'; + echo \esc_html( $extract_result->get_error_message() ) . '

'; + return; + } + } else { + echo '

' . \esc_html( $error_message ) . '
'; + \esc_html_e( 'The uploaded file must be a ZIP or TAR.GZ archive. Please try again with the correct file format.', 'activitypub' ); + echo '

'; + return; + } + $files = $wp_filesystem->dirlist( self::$archive ); if ( ! isset( $files['outbox.json'] ) ) { @@ -412,7 +450,7 @@ public static function greet() { echo '
    '; echo '
  1. ' . \wp_kses( \__( 'Log in to your Mastodon account and go to Preferences > Import and Export.', 'activitypub' ), array( 'strong' => array() ) ) . '
  2. '; echo '
  3. ' . \esc_html__( 'Request a new archive of your data and wait for the email notification.', 'activitypub' ) . '
  4. '; - echo '
  5. ' . \wp_kses( \__( 'Download the archive file (it will be a .zip file).', 'activitypub' ), array( 'code' => array() ) ) . '
  6. '; + echo '
  7. ' . \wp_kses( \__( 'Download the archive file (it will be a .zip or .tar.gz file).', 'activitypub' ), array( 'code' => array() ) ) . '
  8. '; echo '
  9. ' . \esc_html__( 'Upload that file below to begin the import process.', 'activitypub' ) . '
  10. '; echo '
'; @@ -434,4 +472,74 @@ private static function prepend_archive_path( $attachment ) { return $attachment; } + + /** + * Extract a tar.gz file. + * + * WordPress doesn't have built-in tar.gz support, so we try multiple methods: + * 1. PHP's PharData extension (most reliable) + * 2. System tar command (if exec is available) + * 3. Return WP_Error with helpful error message + * + * @param string $file Path to the tar.gz file. + * + * @return true|\WP_Error True if extraction succeeded, WP_Error on failure. + */ + private static function extract_tar_gz( $file ) { + global $wp_filesystem; + $last_error = ''; + + // Method 1: Try using PHP's PharData if available (PHP 5.3+). + if ( class_exists( 'PharData' ) ) { + try { + $phar = new \PharData( $file ); + $phar->extractTo( self::$archive ); + return true; + } catch ( \Exception $e ) { + // PharData extraction failed, try next method. + $last_error = $e->getMessage(); + } + } + + // Method 2: Try using system tar command if available. + if ( function_exists( 'exec' ) ) { + // Create the extraction directory first. + if ( ! $wp_filesystem->mkdir( self::$archive ) ) { + return new \WP_Error( + 'extract_tar_gz_mkdir_failed', + \__( 'Failed to create extraction directory.', 'activitypub' ) + ); + } + + $command = sprintf( + 'tar -xzf %s -C %s 2>&1', + \escapeshellarg( $file ), + \escapeshellarg( self::$archive ) + ); + $output = array(); + $return_var = 0; + exec( $command, $output, $return_var ); + + if ( 0 === $return_var ) { + return true; + } + + $last_error = implode( ' ', $output ); + } + + // If extraction failed, return appropriate error. + if ( ! class_exists( 'PharData' ) && ! function_exists( 'exec' ) ) { + return new \WP_Error( + 'extract_tar_gz_not_supported', + \__( 'Your server does not support tar.gz extraction. Please use a ZIP file instead, or ask your host to enable PharData or exec functions.', 'activitypub' ) + ); + } + + // Both methods were available but failed. + return new \WP_Error( + 'extract_tar_gz_failed', + \__( 'Unable to extract tar.gz archive. The file may be corrupted. Please try again with a new archive or use a ZIP file instead.', 'activitypub' ), + $last_error + ); + } } From f49dbc3e06bb253f840500e5f89f70e407f58730 Mon Sep 17 00:00:00 2001 From: Konstantin Obenland Date: Wed, 12 Nov 2025 12:58:42 -0600 Subject: [PATCH 2/3] Refactor archive extraction into dedicated method Extract the archive type detection and extraction logic from the import() method into a new extract_archive() method for better code organization and maintainability. This makes the import() method cleaner and the archive handling logic more reusable. --- includes/wp-admin/import/class-mastodon.php | 69 ++++++++++++--------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/includes/wp-admin/import/class-mastodon.php b/includes/wp-admin/import/class-mastodon.php index 57f097366..adfc99f4d 100644 --- a/includes/wp-admin/import/class-mastodon.php +++ b/includes/wp-admin/import/class-mastodon.php @@ -229,37 +229,11 @@ public static function import() { $wp_filesystem->delete( self::$archive, true ); } - // Determine file type and extract accordingly. - $file_extension = \strtolower( \pathinfo( $file, PATHINFO_EXTENSION ) ); - $is_tar_gz = false; - - // Check if it's a tar.gz or tgz file. - if ( 'gz' === $file_extension && \str_ends_with( \strtolower( $basename ), '.tar.gz' ) ) { - $is_tar_gz = true; - } elseif ( 'tgz' === $file_extension ) { - $is_tar_gz = true; - } - - if ( 'zip' === $file_extension ) { - // Use WordPress built-in unzip function. - $unzip_result = \unzip_file( $file, self::$archive ); - if ( \is_wp_error( $unzip_result ) ) { - echo '

' . \esc_html( $error_message ) . '
'; - echo \esc_html( $unzip_result->get_error_message() ) . '

'; - return; - } - } elseif ( $is_tar_gz ) { - // Extract tar.gz file. - $extract_result = self::extract_tar_gz( $file ); - if ( \is_wp_error( $extract_result ) ) { - echo '

' . \esc_html( $error_message ) . '
'; - echo \esc_html( $extract_result->get_error_message() ) . '

'; - return; - } - } else { + // Extract the archive. + $extract_result = self::extract_archive( $file, $basename ); + if ( \is_wp_error( $extract_result ) ) { echo '

' . \esc_html( $error_message ) . '
'; - \esc_html_e( 'The uploaded file must be a ZIP or TAR.GZ archive. Please try again with the correct file format.', 'activitypub' ); - echo '

'; + echo \esc_html( $extract_result->get_error_message() ) . '

'; return; } @@ -473,6 +447,41 @@ private static function prepend_archive_path( $attachment ) { return $attachment; } + /** + * Extract an archive file (ZIP or TAR.GZ). + * + * Determines the archive type and uses the appropriate extraction method. + * + * @param string $file Path to the archive file. + * @param string $basename Base name of the archive file. + * + * @return true|\WP_Error True if extraction succeeded, WP_Error on failure. + */ + private static function extract_archive( $file, $basename ) { + $file_extension = \strtolower( \pathinfo( $file, PATHINFO_EXTENSION ) ); + $is_tar_gz = false; + + // Check if it's a tar.gz or tgz file. + if ( 'gz' === $file_extension && \str_ends_with( \strtolower( $basename ), '.tar.gz' ) ) { + $is_tar_gz = true; + } elseif ( 'tgz' === $file_extension ) { + $is_tar_gz = true; + } + + if ( 'zip' === $file_extension ) { + // Use WordPress built-in unzip function. + return \unzip_file( $file, self::$archive ); + } elseif ( $is_tar_gz ) { + // Extract tar.gz file. + return self::extract_tar_gz( $file ); + } else { + return new \WP_Error( + 'unsupported_archive_format', + \__( 'The uploaded file must be a ZIP or TAR.GZ archive. Please try again with the correct file format.', 'activitypub' ) + ); + } + } + /** * Extract a tar.gz file. * From 87cd41cdde7c161be7cd313aadd47d5285e4fc88 Mon Sep 17 00:00:00 2001 From: Automattic Bot Date: Wed, 12 Nov 2025 21:04:19 +0200 Subject: [PATCH 3/3] Add changelog --- .github/changelog/2453-from-description | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .github/changelog/2453-from-description diff --git a/.github/changelog/2453-from-description b/.github/changelog/2453-from-description new file mode 100644 index 000000000..61261336c --- /dev/null +++ b/.github/changelog/2453-from-description @@ -0,0 +1,4 @@ +Significance: minor +Type: added + +Add support for tar.gz archives in Mastodon importer.