55* For the full copyright and license information, please view the LICENSE
66* file that was distributed with this source code.
77*
8- * @package Imdb
8+ * @package hmerritt/imdb-api
99* @author Harry Merritt
1010*/
1111namespace hmerritt \Imdb ;
@@ -67,7 +67,7 @@ public function search($query, $category="all") {
6767 $ row = [];
6868
6969 // Link ojbect
70- $ item_link = $ section_row -> find ( " td.result_text a " );
70+ $ item_link = $ this -> htmlFind ( $ section_row , ' td.result_text a ' );
7171 // Text value
7272 $ row ["title " ] = $ item_link ->text ;
7373 // Skip item if no text value
@@ -76,15 +76,10 @@ public function search($query, $category="all") {
7676 }
7777
7878 // Image object
79- $ item_image = $ section_row ->find ("td.primary_photo img " );
80- $ row ["image " ] = "" ;
81- if (count ($ item_image ) > 0 )
79+ $ row ["image " ] = $ this ->htmlFind ($ section_row , 'td.primary_photo img ' )->src ;
80+ if (preg_match ('/@/ ' , $ row ["image " ]))
8281 {
83- $ row ["image " ] = $ item_image ->src ;
84- if (preg_match ('/@/ ' , $ row ["image " ]))
85- {
86- $ row ["image " ] = preg_split ('~@(?=[^@]*$)~ ' , $ row ["image " ])[0 ] . "@.jpg " ;
87- }
82+ $ row ["image " ] = preg_split ('~@(?=[^@]*$)~ ' , $ row ["image " ])[0 ] . "@.jpg " ;
8883 }
8984
9085 // Get the id of the link
@@ -117,6 +112,7 @@ public function film($query, $techSpecs=false) {
117112 "year " => "" ,
118113 "length " => "" ,
119114 "rating " => "" ,
115+ "rating_votes " => "" ,
120116 "poster " => "" ,
121117 "plot " => "" ,
122118 "cast " => [],
@@ -153,14 +149,22 @@ public function film($query, $techSpecs=false) {
153149 // Load page
154150 $ film_page = $ this ->loadDom ($ film_url );
155151
156- $ response ["title " ] = $ this ->textClean ($ film_page ->find ('.title_wrapper h1 ' )->text );
157- $ response ["year " ] = $ this ->textClean ($ film_page ->find ('.title_wrapper h1 #titleYear a ' )->text );
158- $ response ["rating " ] = $ this ->textClean ($ film_page ->find ('.ratings_wrapper .ratingValue strong span ' )->text );
159- $ response ["length " ] = $ this ->textClean ($ film_page ->find ('.subtext time ' )->text );
160- $ response ["plot " ] = $ this ->textClean ($ film_page ->find ('.plot_summary .summary_text ' )->text );
152+ $ response ["title " ] = $ this ->textClean ($ this ->htmlFind ($ film_page , '.title_wrapper h1 ' )->text );
153+ $ response ["year " ] = $ this ->textClean ($ this ->htmlFind ($ film_page , '.title_wrapper h1 #titleYear a ' )->text );
154+ $ response ["rating " ] = $ this ->textClean ($ this ->htmlFind ($ film_page , '.ratings_wrapper .ratingValue strong span ' )->text );
155+ $ response ["rating_votes " ] = $ this ->textClean ($ this ->htmlFind ($ film_page , '.ratings_wrapper span[itemprop=ratingCount] ' )->text );
156+ $ response ["length " ] = $ this ->textClean ($ this ->htmlFind ($ film_page , '.subtext time ' )->text );
157+ $ response ["plot " ] = $ this ->textClean ($ this ->htmlFind ($ film_page , '.plot_summary .summary_text ' )->text );
158+
159+ // If rating votes exists
160+ if ($ this ->count ($ response ["rating_votes " ]) > 0 )
161+ {
162+ // Remove all non-numbers
163+ $ response ["rating_votes " ] = preg_replace ("/[^0-9 ]/ " , "" , $ response ["rating_votes " ]);
164+ }
161165
162166 // Get poster src
163- $ response ["poster " ] = $ film_page -> find ( '.poster img ' )->src ;
167+ $ response ["poster " ] = $ this -> htmlFind ( $ film_page , '.poster img ' )->src ;
164168 // If '@' appears in poster link
165169 if (preg_match ('/@/ ' , $ response ["poster " ]))
166170 {
@@ -193,7 +197,7 @@ public function film($query, $techSpecs=false) {
193197 // If character link does not exist
194198 if (count ($ character_link ) == 0 )
195199 {
196- $ actor ["character " ] = $ this ->textClean ($ cast_row -> find ( '.character ' )->text );
200+ $ actor ["character " ] = $ this ->textClean ($ this -> htmlFind ( $ cast_row , '.character ' )->text );
197201 } else
198202 {
199203 $ actor ["character " ] = $ this ->textClean ($ character_link ->text );
@@ -219,6 +223,7 @@ public function film($query, $techSpecs=false) {
219223 }
220224 }
221225
226+
222227 // Fetch technical specs
223228 if ($ techSpecs )
224229 {
@@ -268,6 +273,56 @@ private function loadDom($url) {
268273 }
269274
270275
276+ /**
277+ * Find object within DOM (if it exists) and reutrn an attribute
278+ *
279+ * @param $dom object - searchable dom object
280+ * @param $selection strting - css selector of what to find in dom
281+ * @param $return strting - what attribute to return (e.g. text, src, href)
282+ *
283+ * @return string|array
284+ */
285+ private function htmlFind ($ dom , $ selection ) {
286+ // Make selection within $dom object
287+ $ found = $ dom ->find ($ selection );
288+ // If anything was found in selection
289+ if (count ($ found ) > 0 )
290+ {
291+ return $ found ;
292+ } else
293+ {
294+ return $ this ->emptyDomElement ();
295+ }
296+ }
297+
298+
299+ /**
300+ * Extract an imdb-id from a string '/ttxxxxxxx/'
301+ * Returns string of id or empty string if none found
302+ *
303+ * @param $str string - string to extract ID from
304+ *
305+ * @return string
306+ */
307+ private function emptyDomElement () {
308+ $ dom = new Dom ;
309+ $ dom ->load ('<a src="" href=""></a> ' );
310+ return $ dom ;
311+ }
312+
313+
314+ /**
315+ * Count (either array items or string length)
316+ *
317+ * @param $item array|string - item to count
318+ *
319+ * @return string
320+ */
321+ private function count ($ item ) {
322+ return (is_array ($ item ) ? count ($ item ) : strlen ($ item ));
323+ }
324+
325+
271326 /**
272327 * Extract an imdb-id from a string '/ttxxxxxxx/'
273328 * Returns string of id or empty string if none found
0 commit comments