Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/changelog/2453-from-description
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Significance: minor
Type: added

Add support for tar.gz archives in Mastodon importer.
131 changes: 124 additions & 7 deletions includes/wp-admin/import/class-mastodon.php
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,15 @@ public static function handle_upload() {
return false;
}

$file_info = \wp_check_filetype( sanitize_file_name( $_FILES['import']['name'] ), array( 'zip' => 'application/zip' ) );
if ( 'application/zip' !== $file_info['type'] ) {
$allowed_types = array(
'zip' => 'application/zip',
'tar.gz' => 'application/x-gzip',
'tgz' => 'application/x-gzip',
);
$file_info = \wp_check_filetype( sanitize_file_name( $_FILES['import']['name'] ), $allowed_types );
if ( ! in_array( $file_info['type'], $allowed_types, true ) ) {
echo '<p><strong>' . \esc_html( $error_message ) . '</strong><br />';
\esc_html_e( 'The uploaded file must be a ZIP archive. Please try again with the correct file format.', 'activitypub' );
\esc_html_e( 'The uploaded file must be a ZIP or TAR.GZ archive. Please try again with the correct file format.', 'activitypub' );
echo '</p>';
return false;
}
Expand Down Expand Up @@ -211,20 +216,27 @@ public static function import_options() {
public static function import() {
$error_message = \__( 'Sorry, there has been an error.', 'activitypub' );
$file = \get_attached_file( self::$import_id );
$basename = \basename( $file );

\WP_Filesystem();

global $wp_filesystem;
$import_folder = $wp_filesystem->wp_content_dir() . 'import/';
self::$archive = $import_folder . \basename( \basename( $file, '.txt' ), '.zip' );
self::$archive = $import_folder . \preg_replace( '/\.(zip|tar\.gz|tgz)$/i', '', $basename );

// Clean up working directory.
if ( $wp_filesystem->is_dir( self::$archive ) ) {
$wp_filesystem->delete( self::$archive, true );
}

// Unzip package to working directory.
\unzip_file( $file, self::$archive );
// Extract the archive.
$extract_result = self::extract_archive( $file, $basename );
if ( \is_wp_error( $extract_result ) ) {
echo '<p><strong>' . \esc_html( $error_message ) . '</strong><br />';
echo \esc_html( $extract_result->get_error_message() ) . '</p>';
return;
}

$files = $wp_filesystem->dirlist( self::$archive );

if ( ! isset( $files['outbox.json'] ) ) {
Expand Down Expand Up @@ -412,7 +424,7 @@ public static function greet() {
echo '<ol>';
echo '<li>' . \wp_kses( \__( 'Log in to your Mastodon account and go to <strong>Preferences > Import and Export</strong>.', 'activitypub' ), array( 'strong' => array() ) ) . '</li>';
echo '<li>' . \esc_html__( 'Request a new archive of your data and wait for the email notification.', 'activitypub' ) . '</li>';
echo '<li>' . \wp_kses( \__( 'Download the archive file (it will be a <code>.zip</code> file).', 'activitypub' ), array( 'code' => array() ) ) . '</li>';
echo '<li>' . \wp_kses( \__( 'Download the archive file (it will be a <code>.zip</code> or <code>.tar.gz</code> file).', 'activitypub' ), array( 'code' => array() ) ) . '</li>';
echo '<li>' . \esc_html__( 'Upload that file below to begin the import process.', 'activitypub' ) . '</li>';
echo '</ol>';

Expand All @@ -434,4 +446,109 @@ private static function prepend_archive_path( $attachment ) {

return $attachment;
}

/**
* Extract an archive file (ZIP or TAR.GZ).
*
* Determines the archive type and uses the appropriate extraction method.
*
* @param string $file Path to the archive file.
* @param string $basename Base name of the archive file.
*
* @return true|\WP_Error True if extraction succeeded, WP_Error on failure.
*/
private static function extract_archive( $file, $basename ) {
$file_extension = \strtolower( \pathinfo( $file, PATHINFO_EXTENSION ) );
$is_tar_gz = false;

// Check if it's a tar.gz or tgz file.
if ( 'gz' === $file_extension && \str_ends_with( \strtolower( $basename ), '.tar.gz' ) ) {
$is_tar_gz = true;
} elseif ( 'tgz' === $file_extension ) {
$is_tar_gz = true;
}

if ( 'zip' === $file_extension ) {
// Use WordPress built-in unzip function.
return \unzip_file( $file, self::$archive );
} elseif ( $is_tar_gz ) {
// Extract tar.gz file.
return self::extract_tar_gz( $file );
} else {
return new \WP_Error(
'unsupported_archive_format',
\__( 'The uploaded file must be a ZIP or TAR.GZ archive. Please try again with the correct file format.', 'activitypub' )
);
}
}

/**
* Extract a tar.gz file.
*
* WordPress doesn't have built-in tar.gz support, so we try multiple methods:
* 1. PHP's PharData extension (most reliable)
* 2. System tar command (if exec is available)
* 3. Return WP_Error with helpful error message
*
* @param string $file Path to the tar.gz file.
*
* @return true|\WP_Error True if extraction succeeded, WP_Error on failure.
*/
private static function extract_tar_gz( $file ) {
global $wp_filesystem;
$last_error = '';

// Method 1: Try using PHP's PharData if available (PHP 5.3+).
if ( class_exists( 'PharData' ) ) {
try {
$phar = new \PharData( $file );
$phar->extractTo( self::$archive );
return true;
} catch ( \Exception $e ) {
// PharData extraction failed, try next method.
$last_error = $e->getMessage();
}
}

// Method 2: Try using system tar command if available.
if ( function_exists( 'exec' ) ) {
// Create the extraction directory first.
if ( ! $wp_filesystem->mkdir( self::$archive ) ) {
return new \WP_Error(
'extract_tar_gz_mkdir_failed',
\__( 'Failed to create extraction directory.', 'activitypub' )
);
}

$command = sprintf(
'tar -xzf %s -C %s 2>&1',
Copy link

Copilot AI Nov 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tar command execution could be vulnerable to command injection if the file path contains shell metacharacters. While escapeshellarg() is used, consider validating that self::$archive path doesn't contain unexpected characters or use a safer extraction method.

Copilot uses AI. Check for mistakes.
\escapeshellarg( $file ),
\escapeshellarg( self::$archive )
);
$output = array();
$return_var = 0;
exec( $command, $output, $return_var );
Copy link

Copilot AI Nov 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using exec() with file paths poses a security risk even with escapeshellarg(). Consider checking if the paths are within expected directories (e.g., using realpath() and verifying they're under wp_content_dir()) before executing the command.

Copilot uses AI. Check for mistakes.

if ( 0 === $return_var ) {
return true;
}

$last_error = implode( ' ', $output );
}

// If extraction failed, return appropriate error.
if ( ! class_exists( 'PharData' ) && ! function_exists( 'exec' ) ) {
return new \WP_Error(
'extract_tar_gz_not_supported',
\__( 'Your server does not support tar.gz extraction. Please use a ZIP file instead, or ask your host to enable PharData or exec functions.', 'activitypub' )
);
}

// Both methods were available but failed.
return new \WP_Error(
'extract_tar_gz_failed',
\__( 'Unable to extract tar.gz archive. The file may be corrupted. Please try again with a new archive or use a ZIP file instead.', 'activitypub' ),
$last_error
);
}
}
Loading