Skip to content

Commit 008ae52

Browse files
akirkpfefferle
andauthored
Hashtags, Mentions: Use a tag stack instead of regex for protecting tags (#455)
* Use a tag stack instead of regex for protecting tags * Use the placeholder in the test * Add comments * Update comment * ignor html comments thanks @marcS0H --------- Co-authored-by: Matthias Pfefferle <[email protected]>
1 parent addd7dd commit 008ae52

File tree

4 files changed

+95
-62
lines changed

4 files changed

+95
-62
lines changed

includes/class-hashtag.php

Lines changed: 46 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -43,38 +43,56 @@ public static function insert_post( $id, $post ) {
4343
* @return string the filtered post-content
4444
*/
4545
public static function the_content( $the_content ) {
46-
$protected_tags = array();
47-
$protect = function( $m ) use ( &$protected_tags ) {
48-
$c = \wp_rand( 100000, 999999 );
49-
$protect = '!#!#PROTECT' . $c . '#!#!';
50-
while ( isset( $protected_tags[ $protect ] ) ) {
51-
$c = \wp_rand( 100000, 999999 );
52-
$protect = '!#!#PROTECT' . $c . '#!#!';
53-
}
54-
$protected_tags[ $protect ] = $m[0];
55-
return $protect;
56-
};
57-
$the_content = preg_replace_callback(
58-
'#<!\[CDATA\[.*?\]\]>#is',
59-
$protect,
60-
$the_content
61-
);
62-
$the_content = preg_replace_callback(
63-
'#<(pre|code|textarea|style)\b[^>]*>.*?</\1[^>]*>#is',
64-
$protect,
65-
$the_content
66-
);
67-
$the_content = preg_replace_callback(
68-
'#<[^>]+>#i',
69-
$protect,
70-
$the_content
46+
$tag_stack = array();
47+
$protected_tags = array(
48+
'pre',
49+
'code',
50+
'textarea',
51+
'style',
52+
'a',
7153
);
54+
$content_with_links = '';
55+
$in_protected_tag = false;
56+
foreach ( wp_html_split( $the_content ) as $chunk ) {
57+
if ( preg_match( '#^<!--[\s\S]*-->$#i', $chunk, $m ) ) {
58+
$content_with_links .= $chunk;
59+
continue;
60+
}
61+
62+
if ( preg_match( '#^<(/)?([a-z-]+)\b[^>]*>$#i', $chunk, $m ) ) {
63+
$tag = strtolower( $m[2] );
64+
if ( '/' === $m[1] ) {
65+
// Closing tag.
66+
$i = array_search( $tag, $tag_stack );
67+
// We can only remove the tag from the stack if it is in the stack.
68+
if ( false !== $i ) {
69+
$tag_stack = array_slice( $tag_stack, 0, $i );
70+
}
71+
} else {
72+
// Opening tag, add it to the stack.
73+
$tag_stack[] = $tag;
74+
}
7275

73-
$the_content = \preg_replace_callback( '/' . ACTIVITYPUB_HASHTAGS_REGEXP . '/i', array( '\Activitypub\Hashtag', 'replace_with_links' ), $the_content );
76+
// If we're in a protected tag, the tag_stack contains at least one protected tag string.
77+
// The protected tag state can only change when we encounter a start or end tag.
78+
$in_protected_tag = array_intersect( $tag_stack, $protected_tags );
7479

75-
$the_content = str_replace( array_reverse( array_keys( $protected_tags ) ), array_reverse( array_values( $protected_tags ) ), $the_content );
80+
// Never inspect tags.
81+
$content_with_links .= $chunk;
82+
continue;
83+
}
84+
85+
if ( $in_protected_tag ) {
86+
// Don't inspect a chunk inside an inspected tag.
87+
$content_with_links .= $chunk;
88+
continue;
89+
}
90+
91+
// Only reachable when there is no protected tag in the stack.
92+
$content_with_links .= \preg_replace_callback( '/' . ACTIVITYPUB_HASHTAGS_REGEXP . '/i', array( '\Activitypub\Hashtag', 'replace_with_links' ), $chunk );
93+
}
7694

77-
return $the_content;
95+
return $content_with_links;
7896
}
7997

8098
/**

includes/class-mention.php

Lines changed: 46 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -25,43 +25,56 @@ public static function init() {
2525
* @return string the filtered post-content
2626
*/
2727
public static function the_content( $the_content ) {
28-
$protected_tags = array();
29-
$protect = function( $m ) use ( &$protected_tags ) {
30-
$c = \wp_rand( 100000, 999999 );
31-
$protect = '!#!#PROTECT' . $c . '#!#!';
32-
while ( isset( $protected_tags[ $protect ] ) ) {
33-
$c = \wp_rand( 100000, 999999 );
34-
$protect = '!#!#PROTECT' . $c . '#!#!';
35-
}
36-
$protected_tags[ $protect ] = $m[0];
37-
return $protect;
38-
};
39-
$the_content = preg_replace_callback(
40-
'#<!\[CDATA\[.*?\]\]>#is',
41-
$protect,
42-
$the_content
43-
);
44-
$the_content = preg_replace_callback(
45-
'#<(pre|code|textarea|style)\b[^>]*>.*?</\1[^>]*>#is',
46-
$protect,
47-
$the_content
48-
);
49-
$the_content = preg_replace_callback(
50-
'#<a.*?href=[^>]+>.*?</a>#i',
51-
$protect,
52-
$the_content
28+
$tag_stack = array();
29+
$protected_tags = array(
30+
'pre',
31+
'code',
32+
'textarea',
33+
'style',
34+
'a',
5335
);
36+
$content_with_links = '';
37+
$in_protected_tag = false;
38+
foreach ( wp_html_split( $the_content ) as $chunk ) {
39+
if ( preg_match( '#^<!--[\s\S]*-->$#i', $chunk, $m ) ) {
40+
$content_with_links .= $chunk;
41+
continue;
42+
}
5443

55-
$the_content = preg_replace_callback(
56-
'#<img.*?[^>]+>#i',
57-
$protect,
58-
$the_content
59-
);
44+
if ( preg_match( '#^<(/)?([a-z-]+)\b[^>]*>$#i', $chunk, $m ) ) {
45+
$tag = strtolower( $m[2] );
46+
if ( '/' === $m[1] ) {
47+
// Closing tag.
48+
$i = array_search( $tag, $tag_stack );
49+
// We can only remove the tag from the stack if it is in the stack.
50+
if ( false !== $i ) {
51+
$tag_stack = array_slice( $tag_stack, 0, $i );
52+
}
53+
} else {
54+
// Opening tag, add it to the stack.
55+
$tag_stack[] = $tag;
56+
}
57+
58+
// If we're in a protected tag, the tag_stack contains at least one protected tag string.
59+
// The protected tag state can only change when we encounter a start or end tag.
60+
$in_protected_tag = array_intersect( $tag_stack, $protected_tags );
61+
62+
// Never inspect tags.
63+
$content_with_links .= $chunk;
64+
continue;
65+
}
66+
67+
if ( $in_protected_tag ) {
68+
// Don't inspect a chunk inside an inspected tag.
69+
$content_with_links .= $chunk;
70+
continue;
71+
}
6072

61-
$the_content = \preg_replace_callback( '/@' . ACTIVITYPUB_USERNAME_REGEXP . '/', array( self::class, 'replace_with_links' ), $the_content );
62-
$the_content = \str_replace( array_reverse( array_keys( $protected_tags ) ), array_reverse( array_values( $protected_tags ) ), $the_content );
73+
// Only reachable when there is no protected tag in the stack.
74+
$content_with_links .= \preg_replace_callback( '/@' . ACTIVITYPUB_USERNAME_REGEXP . '/', array( self::class, 'replace_with_links' ), $chunk );
75+
}
6376

64-
return $the_content;
77+
return $content_with_links;
6578
}
6679

6780
/**

tests/test-class-activitypub-hashtag.php

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@ public function the_content_provider() {
4141
array( 'hallo <a href="http://test.test/#object">#test</a> test', 'hallo <a href="http://test.test/#object">#test</a> test' ),
4242
array( '<div>hallo #object test</div>', '<div>hallo <a rel="tag" class="hashtag u-tag u-category" href="%s">#object</a> test</div>' ),
4343
array( '<div>hallo #object</div>', '<div>hallo <a rel="tag" class="hashtag u-tag u-category" href="%s">#object</a></div>' ),
44-
array( '<div>#object</div>', '<div>#object</div>' ),
44+
array( '<div>#object</div>', '<div><a rel="tag" class="hashtag u-tag u-category" href="%s">#object</a></div>' ),
4545
array( '<a>#object</a>', '<a>#object</a>' ),
46+
array( '<!-- #object -->', '<!-- #object -->' ),
4647
array( '<div style="color: #ccc;">object</a>', '<div style="color: #ccc;">object</a>' ),
4748
array( $code, $code ),
4849
array( $style, $style ),

tests/test-class-activitypub-mention.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ public function the_content_provider() {
3333
array( 'hallo <a rel="mention" class="u-url mention" href="https://notiz.blog/author/matthias-pfefferle/">@[email protected]</a> test', 'hallo <a rel="mention" class="u-url mention" href="https://notiz.blog/author/matthias-pfefferle/">@[email protected]</a> test' ),
3434
array( 'hallo <a rel="mention" class="u-url mention" href="https://notiz.blog/@pfefferle/">@[email protected]</a> test', 'hallo <a rel="mention" class="u-url mention" href="https://notiz.blog/@pfefferle/">@[email protected]</a> test' ),
3535
array( 'hallo <img src="abc" alt="https://notiz.blog/@pfefferle/" title="@[email protected]"/> test', 'hallo <img src="abc" alt="https://notiz.blog/@pfefferle/" title="@[email protected]"/> test' ),
36+
array( '<!-- @[email protected] -->', '<!-- @[email protected] -->' ),
3637
array( $code, $code ),
3738
array( $pre, $pre ),
3839
);

0 commit comments

Comments
 (0)