@@ -104,10 +104,13 @@ public static function get_rel_mapper() {
104104 * @return array
105105 */
106106 public static function generate_commentdata ( $ commentdata ) {
107- global $ wpdb ;
108-
109- // add source
110- $ source = $ commentdata ['comment_author_url ' ];
107+ // Use new webmention source meta key.
108+ if ( array_key_exists ( 'webmention_source_url ' , $ commentdata ['comment_meta ' ] ) ) {
109+ $ source = $ commentdata ['comment_meta ' ]['webmention_source_url ' ];
110+ } // Fallback to comment author url.
111+ else {
112+ $ source = $ commentdata ['comment_author_url ' ];
113+ }
111114
112115 // parse source html
113116 $ parser = new Parser ( $ commentdata ['remote_source_original ' ], $ source );
@@ -127,75 +130,223 @@ public static function generate_commentdata( $commentdata ) {
127130 return array ();
128131 }
129132
130- // the entry properties
131- $ properties = $ entry [ ' properties ' ];
133+ $ commentdata [ ' remote_source_properties ' ] = $ properties = array_filter ( self :: flatten_microformats ( $ entry ) );
134+ $ commentdata [ ' remote_source_rels ' ] = $ rels = $ mf_array [ ' rels ' ];
132135
133136 // try to find some content
134137 // @link http://indiewebcamp.com/comments-presentation
135- if ( self :: check_mf_attr ( 'summary ' , $ properties ) ) {
136- $ commentdata ['comment_content ' ] = wp_slash ( $ properties ['summary ' ][ 0 ] );
137- } elseif ( self :: check_mf_attr ( 'content ' , $ properties ) ) {
138- $ commentdata ['comment_content ' ] = wp_filter_kses ( $ properties ['content ' ][0 ][ 'html ' ] );
139- } elseif ( self :: check_mf_attr ( 'name ' , $ properties ) ) {
140- $ commentdata ['comment_content ' ] = wp_slash ( $ properties ['name ' ][ 0 ] );
138+ if ( isset ( $ properties [ 'summary ' ] ) ) {
139+ $ commentdata ['comment_content ' ] = wp_slash ( $ properties ['summary ' ] );
140+ } elseif ( isset ( $ properties [ 'content ' ] ) ) {
141+ $ commentdata ['comment_content ' ] = wp_filter_kses ( $ properties ['content ' ]['html ' ] );
142+ } elseif ( isset ( $ properties [ 'name ' ] ) ) {
143+ $ commentdata ['comment_content ' ] = wp_slash ( $ properties ['name ' ] );
141144 }
142145 $ commentdata ['comment_content ' ] = trim ( $ commentdata ['comment_content ' ] );
143146
144147 // set the right date
145- if ( self ::check_mf_attr ( 'published ' , $ properties ) ) {
146- $ time = strtotime ( $ properties ['published ' ][0 ] );
147- $ commentdata ['comment_date ' ] = get_date_from_gmt ( date ( 'Y-m-d H:i:s ' , $ time ), 'Y-m-d H:i:s ' );
148- } elseif ( self ::check_mf_attr ( 'updated ' , $ properties ) ) {
149- $ time = strtotime ( $ properties ['updated ' ][0 ] );
150- $ commentdata ['comment_date ' ] = get_date_from_gmt ( date ( 'Y-m-d H:i:s ' , $ time ), 'Y-m-d H:i:s ' );
148+ if ( isset ( $ properties ['published ' ] ) ) {
149+ $ commentdata ['comment_date ' ] = self ::convert_time ( $ properties ['published ' ] );
150+ } elseif ( isset ( $ properties ['updated ' ] ) ) {
151+ $ commentdata ['comment_date ' ] = self ::convert_time ( $ properties ['updated ' ] );
151152 }
152153
153154 $ author = null ;
154155
155156 // check if h-card has an author
156- if ( isset ( $ properties ['author ' ] ) && isset ( $ properties [ ' author ' ][ 0 ][ ' properties ' ] ) ) {
157- $ author = $ properties ['author ' ][ 0 ][ ' properties ' ] ;
157+ if ( isset ( $ properties ['author ' ] ) ) {
158+ $ author = $ properties ['author ' ];
158159 } else {
159160 $ author = self ::get_representative_author ( $ mf_array , $ source );
160161 }
161162
162163 // if author is present use the informations for the comment
163164 if ( $ author ) {
164- if ( self ::check_mf_attr ( 'name ' , $ author ) ) {
165- $ commentdata ['comment_author ' ] = wp_slash ( $ author ['name ' ][0 ] );
165+ if ( ! is_array ( $ author ) ) {
166+ if ( self ::is_url ( $ author ) ) {
167+ $ response = Linkbacks_Handler::retrieve ( $ author );
168+ if ( ! is_wp_error ( $ response ) ) {
169+ $ parser = new Parser ( wp_remote_retrieve_body ( $ response ), $ author );
170+ $ author_array = $ parser ->parse ( true );
171+ $ properties ['author ' ] = $ author = self ::get_representative_author ( $ author_array , $ author );
172+ }
173+ } else {
174+ $ comment_data ['comment_author ' ] = wp_slash ( $ author );
175+ }
166176 }
167177
168- if ( self ::check_mf_attr ( 'email ' , $ author ) ) {
169- $ commentdata ['comment_author_email ' ] = wp_slash ( $ author ['email ' ][0 ] );
170- }
178+ if ( is_array ( $ author ) ) {
179+ if ( ! isset ( $ author ['me ' ] ) ) {
180+ if ( isset ( $ mf_array ['rels ' ]['me ' ] ) ) {
181+ $ properties ['author ' ]['me ' ] = $ author ['me ' ] = $ mf_array ['rels ' ]['me ' ];
182+ }
183+ }
184+ if ( isset ( $ properties ['name ' ] ) ) {
185+ $ commentdata ['comment_author ' ] = wp_slash ( $ author ['name ' ] );
186+ }
171187
172- if ( self ::check_mf_attr ( 'url ' , $ author ) ) {
173- $ commentdata ['comment_meta ' ]['semantic_linkbacks_author_url ' ] = esc_url_raw ( $ author ['url ' ][0 ] );
174- }
188+ if ( isset ( $ author ['email ' ] ) ) {
189+ $ commentdata ['comment_author_email ' ] = wp_slash ( $ author ['email ' ] );
190+ }
191+
192+ if ( isset ( $ author ['url ' ] ) ) {
193+ $ commentdata ['comment_meta ' ]['semantic_linkbacks_author_url ' ] = $ author ['url ' ];
194+ }
175195
176- if ( self ::check_mf_attr ( 'photo ' , $ author ) ) {
177- $ commentdata ['comment_meta ' ]['semantic_linkbacks_avatar ' ] = esc_url_raw ( $ author ['photo ' ][0 ] );
196+ if ( isset ( $ properties ['photo ' ] ) ) {
197+ $ commentdata ['comment_meta ' ]['semantic_linkbacks_avatar ' ] = $ author ['photo ' ];
198+ }
178199 }
179200 }
180201
181202 // set canonical url (u-url)
182- if ( self :: check_mf_attr ( 'url ' , $ properties ) ) {
183- $ commentdata ['comment_meta ' ]['semantic_linkbacks_canonical ' ] = esc_url_raw ( $ properties ['url ' ][ 0 ] ) ;
203+ if ( isset ( $ properties [ 'url ' ] ) ) {
204+ $ commentdata ['comment_meta ' ]['semantic_linkbacks_canonical ' ] = $ properties ['url ' ];
184205 } else {
185206 $ commentdata ['comment_meta ' ]['semantic_linkbacks_canonical ' ] = esc_url_raw ( $ source );
186207 }
187208
209+ // If u-syndication is not set use rel syndication
210+ if ( array_key_exists ( 'syndication ' , $ rels ) && ! array_key_exists ( 'syndication ' , $ properties ) ) {
211+ $ properties ['syndication ' ] = $ rels ['syndication ' ];
212+ }
213+
214+ // Check and parse for location property
215+ if ( array_key_exists ( 'location ' , $ properties ) ) {
216+ $ location = $ properties ['location ' ];
217+ if ( is_array ( $ location ) ) {
218+ if ( array_key_exists ( 'latitude ' , $ location ) ) {
219+ $ commentdata ['comment_meta ' ]['geo_latitude ' ] = $ location ['latitude ' ];
220+ }
221+ if ( array_key_exists ( 'longitude ' , $ location ) ) {
222+ $ commentdata ['comment_meta ' ]['geo_longitude ' ] = $ location ['longitude ' ];
223+ }
224+ if ( array_key_exists ( 'name ' , $ location ) ) {
225+ $ commentdata ['comment_meta ' ]['geo_address ' ] = $ location ['name ' ];
226+ }
227+ } else {
228+ if ( substr ( $ location , 0 , 4 ) == 'geo: ' ) {
229+ $ geo = explode ( ': ' , substr ( urldecode ( $ location ), 4 ) );
230+ $ geo = explode ( '; ' , $ geo [0 ] );
231+ $ coords = explode ( ', ' , $ geo [0 ] );
232+ $ commentdata ['comment_meta ' ]['geo_latitude ' ] = trim ( $ coords [0 ] );
233+ $ commentdata ['comment_meta ' ]['geo_longitude ' ] = trim ( $ coords [1 ] );
234+ } else {
235+ $ commentdata ['comment_meta ' ]['geo_address ' ] = $ location ;
236+ }
237+ }
238+ }
239+
188240 // check rsvp property
189- if ( self :: check_mf_attr ( 'rsvp ' , $ properties ) ) {
190- $ commentdata ['comment_meta ' ]['semantic_linkbacks_type ' ] = wp_slash ( 'rsvp: ' . $ properties ['rsvp ' ][ 0 ] );
241+ if ( isset ( $ properties [ 'rsvp ' ] ) ) {
242+ $ commentdata ['comment_meta ' ]['semantic_linkbacks_type ' ] = wp_slash ( 'rsvp: ' . $ properties ['rsvp ' ] );
191243 } else {
192244 // get post type
193245 $ commentdata ['comment_meta ' ]['semantic_linkbacks_type ' ] = wp_slash ( self ::get_entry_type ( $ commentdata ['target ' ], $ entry , $ mf_array ) );
194246 }
247+ $ blacklist = array ( 'name ' , 'content ' , 'summary ' , 'published ' , 'updated ' , 'type ' , 'url ' , 'comment ' , 'bridgy-omit-link ' );
248+ $ blacklist = apply_filters ( 'semantic_linkbacks_mf2_props_blacklist ' , $ blacklist );
249+ foreach ( $ properties as $ key => $ value ) {
250+ if ( ! in_array ( $ key , $ blacklist ) ) {
251+ $ commentdata ['comment_meta ' ][ 'mf2_ ' . $ key ] = $ value ;
252+ }
253+ }
254+ $ commentdata ['comment_meta ' ] = array_filter ( $ commentdata ['comment_meta ' ] );
195255
196256 return $ commentdata ;
197257 }
198258
259+ public static function convert_time ( $ time ) {
260+ $ time = strtotime ( $ time );
261+ // If it can't read the time it will return null which will mean the comment time will be set to now.
262+ if ( $ time ) {
263+ return get_date_from_gmt ( date ( 'Y-m-d H:i:s ' , $ time ), 'Y-m-d H:i:s ' );
264+ }
265+ return null ;
266+ }
267+
268+ public static function get_property ( $ key , $ properties ) {
269+ if ( isset ( $ properties [ $ key ] ) && isset ( $ properties [ $ key ][0 ] ) ) {
270+ if ( is_array ( $ properties [ $ key ] ) ) {
271+ $ properties [ $ key ] = array_unique ( $ properties [ $ key ] );
272+ }
273+ if ( 1 === count ( $ properties [ $ key ] ) ) {
274+ return $ properties [ $ key ][0 ];
275+ }
276+ return $ properties [ $ key ];
277+ }
278+ return null ;
279+ }
280+
281+ /**
282+ * Is string a URL.
283+ *
284+ * @param array $string
285+ * @return bool
286+ */
287+ public static function is_url ( $ string ) {
288+ if ( ! is_string ( $ string ) ) {
289+ return false ;
290+ }
291+ // If debugging is on just validate that URL is validly formatted
292+ if ( WP_DEBUG ) {
293+ return filter_var ( $ string , FILTER_VALIDATE_URL ) !== false ;
294+ }
295+ // If debugging is off limit based on WordPress parameters
296+ return wp_http_validate_url ( $ string );
297+ }
298+
299+ // Accepted h types
300+ public static function is_h ( $ string ) {
301+ return in_array ( $ string , array ( 'h-cite ' , 'h-entry ' , 'h-feed ' , 'h-product ' , 'h-event ' , 'h-review ' , 'h-recipe ' ) );
302+ }
303+
304+ public static function flatten_microformats ( $ item ) {
305+ $ flat = array ();
306+ if ( 1 === count ( $ item ) ) {
307+ $ item = $ item [0 ];
308+ }
309+ if ( array_key_exists ( 'type ' , $ item ) ) {
310+ // If there are multiple types strip out everything but the standard one.
311+ if ( 1 < count ( $ item ['type ' ] ) ) {
312+ $ item ['type ' ] = array_filter ( $ item ['type ' ], array ( 'Linkbacks_MF2_Handler ' , 'is_h ' ) );
313+ }
314+ $ flat ['type ' ] = $ item ['type ' ][0 ];
315+ }
316+ if ( array_key_exists ( 'properties ' , $ item ) ) {
317+ $ properties = $ item ['properties ' ];
318+ foreach ( $ properties as $ key => $ value ) {
319+ $ flat [ $ key ] = self ::get_property ( $ key , $ properties );
320+ if ( 1 < count ( $ flat [ $ key ] ) ) {
321+ $ flat [ $ key ] = self ::flatten_microformats ( $ flat [ $ key ] );
322+ }
323+ }
324+ } else {
325+ $ flat = $ item ;
326+ }
327+ foreach ( $ flat as $ key => $ value ) {
328+ // Sanitize all URL properties
329+ if ( self ::is_url ( $ value ) ) {
330+ $ flat [ $ key ] = esc_url_raw ( $ value );
331+ }
332+ }
333+
334+ // If name and URL are the same, remove name.
335+ if ( array_key_exists ( 'name ' , $ flat ) && array_key_exists ( 'url ' , $ flat ) ) {
336+ if ( $ flat ['name ' ] === $ flat ['url ' ] ) {
337+ unset( $ flat ['name ' ] );
338+ }
339+ }
340+
341+ // Duplicate url values for a property may be caused by implied urls https://github.com/indieweb/php-mf2/issues/110
342+ if ( array_key_exists ( 'url ' , $ flat ) && is_array ( $ flat ['url ' ] ) ) {
343+ $ flat ['url ' ] = $ flat ['url ' ][0 ];
344+ }
345+ $ flat = array_filter ( $ flat );
346+ return $ flat ;
347+ }
348+
349+
199350 /**
200351 * get all h-entry items
201352 *
@@ -254,8 +405,12 @@ public static function get_representative_author( $mf_array, $source ) {
254405 // check domain
255406 if ( isset ( $ mf ['properties ' ] ) && isset ( $ mf ['properties ' ]['url ' ] ) ) {
256407 foreach ( $ mf ['properties ' ]['url ' ] as $ url ) {
257- if ( parse_url ( $ url , PHP_URL_HOST ) == parse_url ( $ source , PHP_URL_HOST ) ) {
258- return $ mf ['properties ' ];
408+ if ( wp_parse_url ( $ url , PHP_URL_HOST ) == wp_parse_url ( $ source , PHP_URL_HOST ) ) {
409+ $ flat = self ::flatten_microformats ( $ mf );
410+ if ( isset ( $ mf_array ['rels ' ]['me ' ] ) ) {
411+ $ flat ['me ' ] = $ mf_array ['rels ' ]['me ' ];
412+ }
413+ return $ flat ;
259414 break ;
260415 }
261416 }
@@ -264,6 +419,11 @@ public static function get_representative_author( $mf_array, $source ) {
264419 }
265420 }
266421
422+ // If there is no h-card then return rel=author and see what can be done with it
423+ if ( isset ( $ mf_array ['rels ' ]['author ' ] ) ) {
424+ return $ mf_array ['rels ' ]['author ' ];
425+ }
426+
267427 return null ;
268428 }
269429
@@ -379,22 +539,6 @@ public static function get_entry_type( $target, $entry, $mf_array = array() ) {
379539 return 'mention ' ;
380540 }
381541
382- /**
383- * checks if $node has $key
384- *
385- * @param string $key the array key to check
386- * @param array $node the array to be checked
387- *
388- * @return boolean
389- */
390- public static function check_mf_attr ( $ key , $ node ) {
391- if ( isset ( $ node [ $ key ] ) && isset ( $ node [ $ key ][0 ] ) ) {
392- return true ;
393- }
394-
395- return false ;
396- }
397-
398542 /**
399543 * compare an url with a list of urls
400544 *
@@ -405,7 +549,7 @@ public static function check_mf_attr( $key, $node ) {
405549 * @return boolean
406550 */
407551 public static function compare_urls ( $ needle , $ haystack , $ schemeless = true ) {
408- if ( ! is_string ( $ needle ) || ! is_array ( $ haystack ) ) {
552+ if ( ! self :: is_url ( $ needle ) ) {
409553 return false ;
410554 }
411555 if ( true === $ schemeless ) {
0 commit comments