-
Notifications
You must be signed in to change notification settings - Fork 83
Add tar.gz support to Mastodon importer #2453
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
obenland
wants to merge
3
commits into
trunk
Choose a base branch
from
add/mastodon-import-tar-gz
base: trunk
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| Significance: minor | ||
| Type: added | ||
|
|
||
| Add support for tar.gz archives in Mastodon importer. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -109,10 +109,15 @@ public static function handle_upload() { | |
| return false; | ||
| } | ||
|
|
||
| $file_info = \wp_check_filetype( sanitize_file_name( $_FILES['import']['name'] ), array( 'zip' => 'application/zip' ) ); | ||
| if ( 'application/zip' !== $file_info['type'] ) { | ||
| $allowed_types = array( | ||
| 'zip' => 'application/zip', | ||
| 'tar.gz' => 'application/x-gzip', | ||
| 'tgz' => 'application/x-gzip', | ||
| ); | ||
| $file_info = \wp_check_filetype( sanitize_file_name( $_FILES['import']['name'] ), $allowed_types ); | ||
| if ( ! in_array( $file_info['type'], $allowed_types, true ) ) { | ||
| echo '<p><strong>' . \esc_html( $error_message ) . '</strong><br />'; | ||
| \esc_html_e( 'The uploaded file must be a ZIP archive. Please try again with the correct file format.', 'activitypub' ); | ||
| \esc_html_e( 'The uploaded file must be a ZIP or TAR.GZ archive. Please try again with the correct file format.', 'activitypub' ); | ||
| echo '</p>'; | ||
| return false; | ||
| } | ||
|
|
@@ -211,20 +216,27 @@ public static function import_options() { | |
| public static function import() { | ||
| $error_message = \__( 'Sorry, there has been an error.', 'activitypub' ); | ||
| $file = \get_attached_file( self::$import_id ); | ||
| $basename = \basename( $file ); | ||
|
|
||
| \WP_Filesystem(); | ||
|
|
||
| global $wp_filesystem; | ||
| $import_folder = $wp_filesystem->wp_content_dir() . 'import/'; | ||
| self::$archive = $import_folder . \basename( \basename( $file, '.txt' ), '.zip' ); | ||
| self::$archive = $import_folder . \preg_replace( '/\.(zip|tar\.gz|tgz)$/i', '', $basename ); | ||
|
|
||
| // Clean up working directory. | ||
| if ( $wp_filesystem->is_dir( self::$archive ) ) { | ||
| $wp_filesystem->delete( self::$archive, true ); | ||
| } | ||
|
|
||
| // Unzip package to working directory. | ||
| \unzip_file( $file, self::$archive ); | ||
| // Extract the archive. | ||
| $extract_result = self::extract_archive( $file, $basename ); | ||
| if ( \is_wp_error( $extract_result ) ) { | ||
| echo '<p><strong>' . \esc_html( $error_message ) . '</strong><br />'; | ||
| echo \esc_html( $extract_result->get_error_message() ) . '</p>'; | ||
| return; | ||
| } | ||
|
|
||
| $files = $wp_filesystem->dirlist( self::$archive ); | ||
|
|
||
| if ( ! isset( $files['outbox.json'] ) ) { | ||
|
|
@@ -412,7 +424,7 @@ public static function greet() { | |
| echo '<ol>'; | ||
| echo '<li>' . \wp_kses( \__( 'Log in to your Mastodon account and go to <strong>Preferences > Import and Export</strong>.', 'activitypub' ), array( 'strong' => array() ) ) . '</li>'; | ||
| echo '<li>' . \esc_html__( 'Request a new archive of your data and wait for the email notification.', 'activitypub' ) . '</li>'; | ||
| echo '<li>' . \wp_kses( \__( 'Download the archive file (it will be a <code>.zip</code> file).', 'activitypub' ), array( 'code' => array() ) ) . '</li>'; | ||
| echo '<li>' . \wp_kses( \__( 'Download the archive file (it will be a <code>.zip</code> or <code>.tar.gz</code> file).', 'activitypub' ), array( 'code' => array() ) ) . '</li>'; | ||
| echo '<li>' . \esc_html__( 'Upload that file below to begin the import process.', 'activitypub' ) . '</li>'; | ||
| echo '</ol>'; | ||
|
|
||
|
|
@@ -434,4 +446,109 @@ private static function prepend_archive_path( $attachment ) { | |
|
|
||
| return $attachment; | ||
| } | ||
|
|
||
| /** | ||
| * Extract an archive file (ZIP or TAR.GZ). | ||
| * | ||
| * Determines the archive type and uses the appropriate extraction method. | ||
| * | ||
| * @param string $file Path to the archive file. | ||
| * @param string $basename Base name of the archive file. | ||
| * | ||
| * @return true|\WP_Error True if extraction succeeded, WP_Error on failure. | ||
| */ | ||
| private static function extract_archive( $file, $basename ) { | ||
| $file_extension = \strtolower( \pathinfo( $file, PATHINFO_EXTENSION ) ); | ||
| $is_tar_gz = false; | ||
|
|
||
| // Check if it's a tar.gz or tgz file. | ||
| if ( 'gz' === $file_extension && \str_ends_with( \strtolower( $basename ), '.tar.gz' ) ) { | ||
| $is_tar_gz = true; | ||
| } elseif ( 'tgz' === $file_extension ) { | ||
| $is_tar_gz = true; | ||
| } | ||
|
|
||
| if ( 'zip' === $file_extension ) { | ||
| // Use WordPress built-in unzip function. | ||
| return \unzip_file( $file, self::$archive ); | ||
| } elseif ( $is_tar_gz ) { | ||
| // Extract tar.gz file. | ||
| return self::extract_tar_gz( $file ); | ||
| } else { | ||
| return new \WP_Error( | ||
| 'unsupported_archive_format', | ||
| \__( 'The uploaded file must be a ZIP or TAR.GZ archive. Please try again with the correct file format.', 'activitypub' ) | ||
| ); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Extract a tar.gz file. | ||
| * | ||
| * WordPress doesn't have built-in tar.gz support, so we try multiple methods: | ||
| * 1. PHP's PharData extension (most reliable) | ||
| * 2. System tar command (if exec is available) | ||
| * 3. Return WP_Error with helpful error message | ||
| * | ||
| * @param string $file Path to the tar.gz file. | ||
| * | ||
| * @return true|\WP_Error True if extraction succeeded, WP_Error on failure. | ||
| */ | ||
| private static function extract_tar_gz( $file ) { | ||
| global $wp_filesystem; | ||
| $last_error = ''; | ||
|
|
||
| // Method 1: Try using PHP's PharData if available (PHP 5.3+). | ||
| if ( class_exists( 'PharData' ) ) { | ||
| try { | ||
| $phar = new \PharData( $file ); | ||
| $phar->extractTo( self::$archive ); | ||
| return true; | ||
| } catch ( \Exception $e ) { | ||
| // PharData extraction failed, try next method. | ||
| $last_error = $e->getMessage(); | ||
| } | ||
| } | ||
|
|
||
| // Method 2: Try using system tar command if available. | ||
| if ( function_exists( 'exec' ) ) { | ||
| // Create the extraction directory first. | ||
| if ( ! $wp_filesystem->mkdir( self::$archive ) ) { | ||
| return new \WP_Error( | ||
| 'extract_tar_gz_mkdir_failed', | ||
| \__( 'Failed to create extraction directory.', 'activitypub' ) | ||
| ); | ||
| } | ||
|
|
||
| $command = sprintf( | ||
| 'tar -xzf %s -C %s 2>&1', | ||
| \escapeshellarg( $file ), | ||
| \escapeshellarg( self::$archive ) | ||
| ); | ||
| $output = array(); | ||
| $return_var = 0; | ||
| exec( $command, $output, $return_var ); | ||
|
||
|
|
||
| if ( 0 === $return_var ) { | ||
| return true; | ||
| } | ||
|
|
||
| $last_error = implode( ' ', $output ); | ||
| } | ||
|
|
||
| // If extraction failed, return appropriate error. | ||
| if ( ! class_exists( 'PharData' ) && ! function_exists( 'exec' ) ) { | ||
| return new \WP_Error( | ||
| 'extract_tar_gz_not_supported', | ||
| \__( 'Your server does not support tar.gz extraction. Please use a ZIP file instead, or ask your host to enable PharData or exec functions.', 'activitypub' ) | ||
| ); | ||
| } | ||
|
|
||
| // Both methods were available but failed. | ||
| return new \WP_Error( | ||
| 'extract_tar_gz_failed', | ||
| \__( 'Unable to extract tar.gz archive. The file may be corrupted. Please try again with a new archive or use a ZIP file instead.', 'activitypub' ), | ||
| $last_error | ||
| ); | ||
| } | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The tar command execution could be vulnerable to command injection if the file path contains shell metacharacters. While
escapeshellarg()is used, consider validating thatself::$archivepath doesn't contain unexpected characters or use a safer extraction method.