Skip to content

Commit 580851d

Browse files
authored
Merge pull request #260 from magefan/2491-wordpress-import
2491-wordpress-import-fix-src-p [Fixed]
2 parents 96d0cc6 + a5e3b03 commit 580851d

File tree

1 file changed

+162
-2
lines changed

1 file changed

+162
-2
lines changed

Model/Import/Wordpress.php

Lines changed: 162 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,10 +249,13 @@ public function execute()
249249
$content = str_replace('<!--more-->', '<!-- pagebreak -->', $content);
250250

251251
$content = preg_replace(
252-
'/((http:\/\/|https:\/\/|\/\/)(.*)|(\s|"|\')|(\/[\d\w_\-\.]*))\/wp-content\/uploads(.*)((\.jpg|\.jpeg|\.gif|\.png|\.tiff|\.tif|\.svg)|(\s|"|\'))/Ui',
253-
'$4{{media url="magefan_blog$6$8"}}$9',
252+
'/src=[\'"]((http:\/\/|https:\/\/|\/\/)(.*)|(\s|"|\')|(\/[\d\w_\-\.]*))\/wp-content\/uploads(.*)((\.jpg|\.jpeg|\.gif|\.png|\.tiff|\.tif|\.svg)|(\s|"|\'))[\'"\s]/Ui',
253+
'src="$4{{media url="magefan_blog$6$8"}}$9"',
254254
$content
255255
);
256+
257+
$content = $this->wordpressOutoutWrap($content);
258+
256259
$wordpressPostId = $data['ID'];
257260
$data = [
258261
'store_ids' => [$this->getStoreId()],
@@ -329,4 +332,161 @@ public function execute()
329332

330333
mysqli_close($con);
331334
}
335+
336+
337+
protected function wordpressOutoutWrap( $pee, $br = true )
338+
{
339+
$pre_tags = array();
340+
341+
if ( trim( $pee ) === '' ) {
342+
return '';
343+
}
344+
345+
// Just to make things a little easier, pad the end.
346+
$pee = $pee . "\n";
347+
348+
/*
349+
* Pre tags shouldn't be touched by autop.
350+
* Replace pre tags with placeholders and bring them back after autop.
351+
*/
352+
if ( strpos( $pee, '<pre' ) !== false ) {
353+
$pee_parts = explode( '</pre>', $pee );
354+
$last_pee = array_pop( $pee_parts );
355+
$pee = '';
356+
$i = 0;
357+
358+
foreach ( $pee_parts as $pee_part ) {
359+
$start = strpos( $pee_part, '<pre' );
360+
361+
// Malformed html?
362+
if ( $start === false ) {
363+
$pee .= $pee_part;
364+
continue;
365+
}
366+
367+
$name = "<pre wp-pre-tag-$i></pre>";
368+
$pre_tags[ $name ] = substr( $pee_part, $start ) . '</pre>';
369+
370+
$pee .= substr( $pee_part, 0, $start ) . $name;
371+
$i++;
372+
}
373+
374+
$pee .= $last_pee;
375+
}
376+
// Change multiple <br>s into two line breaks, which will turn into paragraphs.
377+
$pee = preg_replace( '|<br\s*/?>\s*<br\s*/?>|', "\n\n", $pee );
378+
379+
$allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)';
380+
381+
// Add a double line break above block-level opening tags.
382+
$pee = preg_replace( '!(<' . $allblocks . '[\s/>])!', "\n\n$1", $pee );
383+
384+
// Add a double line break below block-level closing tags.
385+
$pee = preg_replace( '!(</' . $allblocks . '>)!', "$1\n\n", $pee );
386+
387+
// Standardize newline characters to "\n".
388+
$pee = str_replace( array( "\r\n", "\r" ), "\n", $pee );
389+
390+
// Collapse line breaks before and after <option> elements so they don't get autop'd.
391+
if ( strpos( $pee, '<option' ) !== false ) {
392+
$pee = preg_replace( '|\s*<option|', '<option', $pee );
393+
$pee = preg_replace( '|</option>\s*|', '</option>', $pee );
394+
}
395+
396+
/*
397+
* Collapse line breaks inside <object> elements, before <param> and <embed> elements
398+
* so they don't get autop'd.
399+
*/
400+
if ( strpos( $pee, '</object>' ) !== false ) {
401+
$pee = preg_replace( '|(<object[^>]*>)\s*|', '$1', $pee );
402+
$pee = preg_replace( '|\s*</object>|', '</object>', $pee );
403+
$pee = preg_replace( '%\s*(</?(?:param|embed)[^>]*>)\s*%', '$1', $pee );
404+
}
405+
406+
/*
407+
* Collapse line breaks inside <audio> and <video> elements,
408+
* before and after <source> and <track> elements.
409+
*/
410+
if ( strpos( $pee, '<source' ) !== false || strpos( $pee, '<track' ) !== false ) {
411+
$pee = preg_replace( '%([<\[](?:audio|video)[^>\]]*[>\]])\s*%', '$1', $pee );
412+
$pee = preg_replace( '%\s*([<\[]/(?:audio|video)[>\]])%', '$1', $pee );
413+
$pee = preg_replace( '%\s*(<(?:source|track)[^>]*>)\s*%', '$1', $pee );
414+
}
415+
416+
// Collapse line breaks before and after <figcaption> elements.
417+
if ( strpos( $pee, '<figcaption' ) !== false ) {
418+
$pee = preg_replace( '|\s*(<figcaption[^>]*>)|', '$1', $pee );
419+
$pee = preg_replace( '|</figcaption>\s*|', '</figcaption>', $pee );
420+
}
421+
422+
// Remove more than two contiguous line breaks.
423+
$pee = preg_replace( "/\n\n+/", "\n\n", $pee );
424+
425+
// Split up the contents into an array of strings, separated by double line breaks.
426+
$pees = preg_split( '/\n\s*\n/', $pee, -1, PREG_SPLIT_NO_EMPTY );
427+
428+
// Reset $pee prior to rebuilding.
429+
$pee = '';
430+
431+
// Rebuild the content as a string, wrapping every bit with a <p>.
432+
foreach ( $pees as $tinkle ) {
433+
$pee .= '<p>' . trim( $tinkle, "\n" ) . "</p>\n";
434+
}
435+
436+
// Under certain strange conditions it could create a P of entirely whitespace.
437+
$pee = preg_replace( '|<p>\s*</p>|', '', $pee );
438+
439+
// Add a closing <p> inside <div>, <address>, or <form> tag if missing.
440+
$pee = preg_replace( '!<p>([^<]+)</(div|address|form)>!', '<p>$1</p></$2>', $pee );
441+
442+
// If an opening or closing block element tag is wrapped in a <p>, unwrap it.
443+
$pee = preg_replace( '!<p>\s*(</?' . $allblocks . '[^>]*>)\s*</p>!', '$1', $pee );
444+
445+
// In some cases <li> may get wrapped in <p>, fix them.
446+
$pee = preg_replace( '|<p>(<li.+?)</p>|', '$1', $pee );
447+
448+
// If a <blockquote> is wrapped with a <p>, move it inside the <blockquote>.
449+
$pee = preg_replace( '|<p><blockquote([^>]*)>|i', '<blockquote$1><p>', $pee );
450+
$pee = str_replace( '</blockquote></p>', '</p></blockquote>', $pee );
451+
452+
// If an opening or closing block element tag is preceded by an opening <p> tag, remove it.
453+
$pee = preg_replace( '!<p>\s*(</?' . $allblocks . '[^>]*>)!', '$1', $pee );
454+
455+
// If an opening or closing block element tag is followed by a closing <p> tag, remove it.
456+
$pee = preg_replace( '!(</?' . $allblocks . '[^>]*>)\s*</p>!', '$1', $pee );
457+
458+
// Optionally insert line breaks.
459+
if ( $br ) {
460+
// Replace newlines that shouldn't be touched with a placeholder.
461+
$pee = @preg_replace_callback( '/<(script|style).*?<\/\\1>/s', '_autop_newline_preservation_helper', $pee );
462+
463+
// Normalize <br>
464+
$pee = str_replace( array( '<br>', '<br/>' ), '<br />', $pee );
465+
466+
// Replace any new line characters that aren't preceded by a <br /> with a <br />.
467+
$pee = preg_replace( '|(?<!<br />)\s*\n|', "<br />\n", $pee );
468+
469+
// Replace newline placeholders with newlines.
470+
$pee = str_replace( '<WPPreserveNewline />', "\n", $pee );
471+
}
472+
473+
// If a <br /> tag is after an opening or closing block tag, remove it.
474+
$pee = preg_replace( '!(</?' . $allblocks . '[^>]*>)\s*<br />!', '$1', $pee );
475+
476+
// If a <br /> tag is before a subset of opening or closing block tags, remove it.
477+
$pee = preg_replace( '!<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)!', '$1', $pee );
478+
$pee = preg_replace( "|\n</p>$|", '</p>', $pee );
479+
480+
// Replace placeholder <pre> tags with their original content.
481+
if ( ! empty( $pre_tags ) ) {
482+
$pee = str_replace( array_keys( $pre_tags ), array_values( $pre_tags ), $pee );
483+
}
484+
485+
// Restore newlines in all elements.
486+
if ( false !== strpos( $pee, '<!-- wpnl -->' ) ) {
487+
$pee = str_replace( array( ' <!-- wpnl --> ', '<!-- wpnl -->' ), "\n", $pee );
488+
}
489+
490+
return $pee;
491+
}
332492
}

0 commit comments

Comments
 (0)