Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Commit f9eda5f

Browse files
authored
Merge pull request #90 from dshanske/fixes
MF2 Improvements and Fixes Mark 3
2 parents a9526e0 + 3a20baf commit f9eda5f

File tree

9 files changed

+1023
-215
lines changed

9 files changed

+1023
-215
lines changed

includes/Mf2/Parser.php

Lines changed: 695 additions & 155 deletions
Large diffs are not rendered by default.

includes/class-linkbacks-mf2-handler.php

Lines changed: 197 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,13 @@ public static function get_rel_mapper() {
104104
* @return array
105105
*/
106106
public static function generate_commentdata( $commentdata ) {
107-
global $wpdb;
108-
109-
// add source
110-
$source = $commentdata['comment_author_url'];
107+
// Use new webmention source meta key.
108+
if ( array_key_exists( 'webmention_source_url', $commentdata['comment_meta'] ) ) {
109+
$source = $commentdata['comment_meta']['webmention_source_url'];
110+
} // Fallback to comment author url.
111+
else {
112+
$source = $commentdata['comment_author_url'];
113+
}
111114

112115
// parse source html
113116
$parser = new Parser( $commentdata['remote_source_original'], $source );
@@ -127,75 +130,223 @@ public static function generate_commentdata( $commentdata ) {
127130
return array();
128131
}
129132

130-
// the entry properties
131-
$properties = $entry['properties'];
133+
$commentdata['remote_source_properties'] = $properties = array_filter( self::flatten_microformats( $entry ) );
134+
$commentdata['remote_source_rels'] = $rels = $mf_array['rels'];
132135

133136
// try to find some content
134137
// @link http://indiewebcamp.com/comments-presentation
135-
if ( self::check_mf_attr( 'summary', $properties ) ) {
136-
$commentdata['comment_content'] = wp_slash( $properties['summary'][0] );
137-
} elseif ( self::check_mf_attr( 'content', $properties ) ) {
138-
$commentdata['comment_content'] = wp_filter_kses( $properties['content'][0]['html'] );
139-
} elseif ( self::check_mf_attr( 'name', $properties ) ) {
140-
$commentdata['comment_content'] = wp_slash( $properties['name'][0] );
138+
if ( isset( $properties['summary'] ) ) {
139+
$commentdata['comment_content'] = wp_slash( $properties['summary'] );
140+
} elseif ( isset( $properties['content'] ) ) {
141+
$commentdata['comment_content'] = wp_filter_kses( $properties['content']['html'] );
142+
} elseif ( isset( $properties['name'] ) ) {
143+
$commentdata['comment_content'] = wp_slash( $properties['name'] );
141144
}
142145
$commentdata['comment_content'] = trim( $commentdata['comment_content'] );
143146

144147
// set the right date
145-
if ( self::check_mf_attr( 'published', $properties ) ) {
146-
$time = strtotime( $properties['published'][0] );
147-
$commentdata['comment_date'] = get_date_from_gmt( date( 'Y-m-d H:i:s', $time ), 'Y-m-d H:i:s' );
148-
} elseif ( self::check_mf_attr( 'updated', $properties ) ) {
149-
$time = strtotime( $properties['updated'][0] );
150-
$commentdata['comment_date'] = get_date_from_gmt( date( 'Y-m-d H:i:s', $time ), 'Y-m-d H:i:s' );
148+
if ( isset( $properties['published'] ) ) {
149+
$commentdata['comment_date'] = self::convert_time( $properties['published'] );
150+
} elseif ( isset( $properties['updated'] ) ) {
151+
$commentdata['comment_date'] = self::convert_time( $properties['updated'] );
151152
}
152153

153154
$author = null;
154155

155156
// check if h-card has an author
156-
if ( isset( $properties['author'] ) && isset( $properties['author'][0]['properties'] ) ) {
157-
$author = $properties['author'][0]['properties'];
157+
if ( isset( $properties['author'] ) ) {
158+
$author = $properties['author'];
158159
} else {
159160
$author = self::get_representative_author( $mf_array, $source );
160161
}
161162

162163
// if author is present use the informations for the comment
163164
if ( $author ) {
164-
if ( self::check_mf_attr( 'name', $author ) ) {
165-
$commentdata['comment_author'] = wp_slash( $author['name'][0] );
165+
if ( ! is_array( $author ) ) {
166+
if ( self::is_url( $author ) ) {
167+
$response = Linkbacks_Handler::retrieve( $author );
168+
if ( ! is_wp_error( $response ) ) {
169+
$parser = new Parser( wp_remote_retrieve_body( $response ), $author );
170+
$author_array = $parser->parse( true );
171+
$properties['author'] = $author = self::get_representative_author( $author_array, $author );
172+
}
173+
} else {
174+
$comment_data['comment_author'] = wp_slash( $author );
175+
}
166176
}
167177

168-
if ( self::check_mf_attr( 'email', $author ) ) {
169-
$commentdata['comment_author_email'] = wp_slash( $author['email'][0] );
170-
}
178+
if ( is_array( $author ) ) {
179+
if ( ! isset( $author['me'] ) ) {
180+
if ( isset( $mf_array['rels']['me'] ) ) {
181+
$properties['author']['me'] = $author['me'] = $mf_array['rels']['me'];
182+
}
183+
}
184+
if ( isset( $properties['name'] ) ) {
185+
$commentdata['comment_author'] = wp_slash( $author['name'] );
186+
}
171187

172-
if ( self::check_mf_attr( 'url', $author ) ) {
173-
$commentdata['comment_meta']['semantic_linkbacks_author_url'] = esc_url_raw( $author['url'][0] );
174-
}
188+
if ( isset( $author['email'] ) ) {
189+
$commentdata['comment_author_email'] = wp_slash( $author['email'] );
190+
}
191+
192+
if ( isset( $author['url'] ) ) {
193+
$commentdata['comment_meta']['semantic_linkbacks_author_url'] = $author['url'];
194+
}
175195

176-
if ( self::check_mf_attr( 'photo', $author ) ) {
177-
$commentdata['comment_meta']['semantic_linkbacks_avatar'] = esc_url_raw( $author['photo'][0] );
196+
if ( isset( $properties['photo'] ) ) {
197+
$commentdata['comment_meta']['semantic_linkbacks_avatar'] = $author['photo'];
198+
}
178199
}
179200
}
180201

181202
// set canonical url (u-url)
182-
if ( self::check_mf_attr( 'url', $properties ) ) {
183-
$commentdata['comment_meta']['semantic_linkbacks_canonical'] = esc_url_raw( $properties['url'][0] );
203+
if ( isset( $properties['url'] ) ) {
204+
$commentdata['comment_meta']['semantic_linkbacks_canonical'] = $properties['url'];
184205
} else {
185206
$commentdata['comment_meta']['semantic_linkbacks_canonical'] = esc_url_raw( $source );
186207
}
187208

209+
// If u-syndication is not set use rel syndication
210+
if ( array_key_exists( 'syndication', $rels ) && ! array_key_exists( 'syndication', $properties ) ) {
211+
$properties['syndication'] = $rels['syndication'];
212+
}
213+
214+
// Check and parse for location property
215+
if ( array_key_exists( 'location', $properties ) ) {
216+
$location = $properties['location'];
217+
if ( is_array( $location ) ) {
218+
if ( array_key_exists( 'latitude', $location ) ) {
219+
$commentdata['comment_meta']['geo_latitude'] = $location['latitude'];
220+
}
221+
if ( array_key_exists( 'longitude', $location ) ) {
222+
$commentdata['comment_meta']['geo_longitude'] = $location['longitude'];
223+
}
224+
if ( array_key_exists( 'name', $location ) ) {
225+
$commentdata['comment_meta']['geo_address'] = $location['name'];
226+
}
227+
} else {
228+
if ( substr( $location, 0, 4 ) == 'geo:' ) {
229+
$geo = explode( ':', substr( urldecode( $location ), 4 ) );
230+
$geo = explode( ';', $geo[0] );
231+
$coords = explode( ',', $geo[0] );
232+
$commentdata['comment_meta']['geo_latitude'] = trim( $coords[0] );
233+
$commentdata['comment_meta']['geo_longitude'] = trim( $coords[1] );
234+
} else {
235+
$commentdata['comment_meta']['geo_address'] = $location;
236+
}
237+
}
238+
}
239+
188240
// check rsvp property
189-
if ( self::check_mf_attr( 'rsvp', $properties ) ) {
190-
$commentdata['comment_meta']['semantic_linkbacks_type'] = wp_slash( 'rsvp:' . $properties['rsvp'][0] );
241+
if ( isset( $properties['rsvp'] ) ) {
242+
$commentdata['comment_meta']['semantic_linkbacks_type'] = wp_slash( 'rsvp:' . $properties['rsvp'] );
191243
} else {
192244
// get post type
193245
$commentdata['comment_meta']['semantic_linkbacks_type'] = wp_slash( self::get_entry_type( $commentdata['target'], $entry, $mf_array ) );
194246
}
247+
$blacklist = array( 'name', 'content', 'summary', 'published', 'updated', 'type', 'url', 'comment', 'bridgy-omit-link' );
248+
$blacklist = apply_filters( 'semantic_linkbacks_mf2_props_blacklist', $blacklist );
249+
foreach ( $properties as $key => $value ) {
250+
if ( ! in_array( $key, $blacklist ) ) {
251+
$commentdata['comment_meta'][ 'mf2_' . $key ] = $value;
252+
}
253+
}
254+
$commentdata['comment_meta'] = array_filter( $commentdata['comment_meta'] );
195255

196256
return $commentdata;
197257
}
198258

259+
public static function convert_time( $time ) {
260+
$time = strtotime( $time );
261+
// If it can't read the time it will return null which will mean the comment time will be set to now.
262+
if ( $time ) {
263+
return get_date_from_gmt( date( 'Y-m-d H:i:s', $time ), 'Y-m-d H:i:s' );
264+
}
265+
return null;
266+
}
267+
268+
public static function get_property( $key, $properties ) {
269+
if ( isset( $properties[ $key ] ) && isset( $properties[ $key ][0] ) ) {
270+
if ( is_array( $properties[ $key ] ) ) {
271+
$properties[ $key ] = array_unique( $properties[ $key ] );
272+
}
273+
if ( 1 === count( $properties[ $key ] ) ) {
274+
return $properties[ $key ][0];
275+
}
276+
return $properties[ $key ];
277+
}
278+
return null;
279+
}
280+
281+
/**
282+
* Is string a URL.
283+
*
284+
* @param array $string
285+
* @return bool
286+
*/
287+
public static function is_url( $string ) {
288+
if ( ! is_string( $string ) ) {
289+
return false;
290+
}
291+
// If debugging is on just validate that URL is validly formatted
292+
if ( WP_DEBUG ) {
293+
return filter_var( $string, FILTER_VALIDATE_URL ) !== false;
294+
}
295+
// If debugging is off limit based on WordPress parameters
296+
return wp_http_validate_url( $string );
297+
}
298+
299+
// Accepted h types
300+
public static function is_h( $string ) {
301+
return in_array( $string, array( 'h-cite', 'h-entry', 'h-feed', 'h-product', 'h-event', 'h-review', 'h-recipe' ) );
302+
}
303+
304+
public static function flatten_microformats( $item ) {
305+
$flat = array();
306+
if ( 1 === count( $item ) ) {
307+
$item = $item[0];
308+
}
309+
if ( array_key_exists( 'type', $item ) ) {
310+
// If there are multiple types strip out everything but the standard one.
311+
if ( 1 < count( $item['type'] ) ) {
312+
$item['type'] = array_filter( $item['type'], array( 'Linkbacks_MF2_Handler', 'is_h' ) );
313+
}
314+
$flat['type'] = $item['type'][0];
315+
}
316+
if ( array_key_exists( 'properties', $item ) ) {
317+
$properties = $item['properties'];
318+
foreach ( $properties as $key => $value ) {
319+
$flat[ $key ] = self::get_property( $key, $properties );
320+
if ( 1 < count( $flat[ $key ] ) ) {
321+
$flat[ $key ] = self::flatten_microformats( $flat[ $key ] );
322+
}
323+
}
324+
} else {
325+
$flat = $item;
326+
}
327+
foreach ( $flat as $key => $value ) {
328+
// Sanitize all URL properties
329+
if ( self::is_url( $value ) ) {
330+
$flat[ $key ] = esc_url_raw( $value );
331+
}
332+
}
333+
334+
// If name and URL are the same, remove name.
335+
if ( array_key_exists( 'name', $flat ) && array_key_exists( 'url', $flat ) ) {
336+
if ( $flat['name'] === $flat['url'] ) {
337+
unset( $flat['name'] );
338+
}
339+
}
340+
341+
// Duplicate url values for a property may be caused by implied urls https://github.com/indieweb/php-mf2/issues/110
342+
if ( array_key_exists( 'url', $flat ) && is_array( $flat['url'] ) ) {
343+
$flat['url'] = $flat['url'][0];
344+
}
345+
$flat = array_filter( $flat );
346+
return $flat;
347+
}
348+
349+
199350
/**
200351
* get all h-entry items
201352
*
@@ -254,8 +405,12 @@ public static function get_representative_author( $mf_array, $source ) {
254405
// check domain
255406
if ( isset( $mf['properties'] ) && isset( $mf['properties']['url'] ) ) {
256407
foreach ( $mf['properties']['url'] as $url ) {
257-
if ( parse_url( $url, PHP_URL_HOST ) == parse_url( $source, PHP_URL_HOST ) ) {
258-
return $mf['properties'];
408+
if ( wp_parse_url( $url, PHP_URL_HOST ) == wp_parse_url( $source, PHP_URL_HOST ) ) {
409+
$flat = self::flatten_microformats( $mf );
410+
if ( isset( $mf_array['rels']['me'] ) ) {
411+
$flat['me'] = $mf_array['rels']['me'];
412+
}
413+
return $flat;
259414
break;
260415
}
261416
}
@@ -264,6 +419,11 @@ public static function get_representative_author( $mf_array, $source ) {
264419
}
265420
}
266421

422+
// If there is no h-card then return rel=author and see what can be done with it
423+
if ( isset( $mf_array['rels']['author'] ) ) {
424+
return $mf_array['rels']['author'];
425+
}
426+
267427
return null;
268428
}
269429

@@ -379,22 +539,6 @@ public static function get_entry_type( $target, $entry, $mf_array = array() ) {
379539
return 'mention';
380540
}
381541

382-
/**
383-
* checks if $node has $key
384-
*
385-
* @param string $key the array key to check
386-
* @param array $node the array to be checked
387-
*
388-
* @return boolean
389-
*/
390-
public static function check_mf_attr( $key, $node ) {
391-
if ( isset( $node[ $key ] ) && isset( $node[ $key ][0] ) ) {
392-
return true;
393-
}
394-
395-
return false;
396-
}
397-
398542
/**
399543
* compare an url with a list of urls
400544
*
@@ -405,7 +549,7 @@ public static function check_mf_attr( $key, $node ) {
405549
* @return boolean
406550
*/
407551
public static function compare_urls( $needle, $haystack, $schemeless = true ) {
408-
if ( ! is_string( $needle ) || ! is_array( $haystack ) ) {
552+
if ( ! self::is_url( $needle ) ) {
409553
return false;
410554
}
411555
if ( true === $schemeless ) {

0 commit comments

Comments
 (0)