Fixed fatal bug causing script to return error 500

hmerritt · hmerritt · commit 647f3d669044 · 2019-08-21T14:52:44.000+01:00
- check if each element exists before extracting any data
diff --git a/README.md b/README.md
@@ -15,9 +15,9 @@ $ composer require hmerritt/imdb-api
 ### Film Data
 - Title
 - Year
-- Length
 - Rating
 - Poster
+- Length
 - Plot
 - Cast
   - actor name
@@ -75,4 +75,4 @@ $imdb->film("tt0816692", $techSpecs=true);
 ## Dependencies
 > All dependencies are managed automatically by `composer`.
 
-- [php-html-parser](https://github.com/paquettg/php-html-parser)
+- [php-html-parser](https://github.com/paquettg/php-html-parser)
diff --git a/src/imdb.php b/src/imdb.php
@@ -5,7 +5,7 @@
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 *
-* @package Imdb
+* @package hmerritt/imdb-api
 * @author Harry Merritt
 */
 namespace hmerritt\Imdb;
@@ -67,7 +67,7 @@ public function search($query, $category="all") {
                            $row = [];
 
                            // Link ojbect
-                           $item_link = $section_row->find("td.result_text a");
+                           $item_link = $this->htmlFind($section_row, 'td.result_text a');
                            // Text value
                            $row["title"] = $item_link->text;
                            // Skip item if no text value
@@ -76,15 +76,10 @@ public function search($query, $category="all") {
                            }
 
                           // Image object
-                          $item_image = $section_row->find("td.primary_photo img");
-                          $row["image"] = "";
-                          if (count($item_image) > 0)
+                          $row["image"] = $this->htmlFind($section_row, 'td.primary_photo img')->src;
+                          if (preg_match('/@/', $row["image"]))
                           {
-                              $row["image"] = $item_image->src;
-                              if (preg_match('/@/', $row["image"]))
-                              {
-                                  $row["image"] = preg_split('~@(?=[^@]*$)~', $row["image"])[0] . "@.jpg";
-                              }
+                              $row["image"] = preg_split('~@(?=[^@]*$)~', $row["image"])[0] . "@.jpg";
                           }
 
                            // Get the id of the link
@@ -117,6 +112,7 @@ public function film($query, $techSpecs=false) {
           "year" => "",
           "length" => "",
           "rating" => "",
+          "rating_votes" => "",
           "poster" => "",
           "plot" => "",
           "cast" => [],
@@ -153,14 +149,22 @@ public function film($query, $techSpecs=false) {
         // Load page
         $film_page = $this->loadDom($film_url);
 
-        $response["title"] =  $this->textClean($film_page->find('.title_wrapper h1')->text);
-        $response["year"] =   $this->textClean($film_page->find('.title_wrapper h1 #titleYear a')->text);
-        $response["rating"] = $this->textClean($film_page->find('.ratings_wrapper .ratingValue strong span')->text);
-        $response["length"] = $this->textClean($film_page->find('.subtext time')->text);
-        $response["plot"] =   $this->textClean($film_page->find('.plot_summary .summary_text')->text);
+        $response["title"] =        $this->textClean($this->htmlFind($film_page, '.title_wrapper h1')->text);
+        $response["year"] =         $this->textClean($this->htmlFind($film_page, '.title_wrapper h1 #titleYear a')->text);
+        $response["rating"] =       $this->textClean($this->htmlFind($film_page, '.ratings_wrapper .ratingValue strong span')->text);
+        $response["rating_votes"] = $this->textClean($this->htmlFind($film_page, '.ratings_wrapper span[itemprop=ratingCount]')->text);
+        $response["length"] =       $this->textClean($this->htmlFind($film_page, '.subtext time')->text);
+        $response["plot"] =         $this->textClean($this->htmlFind($film_page, '.plot_summary .summary_text')->text);
+
+        // If rating votes exists
+        if ($this->count($response["rating_votes"]) > 0)
+        {
+            // Remove all non-numbers
+            $response["rating_votes"] = preg_replace("/[^0-9 ]/", "", $response["rating_votes"]);
+        }
 
         // Get poster src
-        $response["poster"] = $film_page->find('.poster img')->src;
+        $response["poster"] = $this->htmlFind($film_page, '.poster img')->src;
         // If '@' appears in poster link
         if (preg_match('/@/', $response["poster"]))
         {
@@ -193,7 +197,7 @@ public function film($query, $techSpecs=false) {
                 // If character link does not exist
                 if (count($character_link) == 0)
                 {
-                    $actor["character"] = $this->textClean($cast_row->find('.character')->text);
+                    $actor["character"] = $this->textClean($this->htmlFind($cast_row, '.character')->text);
                 } else
                 {
                     $actor["character"] = $this->textClean($character_link->text);
@@ -219,6 +223,7 @@ public function film($query, $techSpecs=false) {
             }
         }
 
+
         // Fetch technical specs
         if ($techSpecs)
         {
@@ -268,6 +273,56 @@ private function loadDom($url) {
     }
 
 
+    /**
+     * Find object within DOM (if it exists) and reutrn an attribute
+     *
+     * @param $dom object - searchable dom object
+     * @param $selection strting - css selector of what to find in dom
+     * @param $return strting - what attribute to return (e.g. text, src, href)
+     *
+     * @return string|array
+     */
+    private function htmlFind($dom, $selection) {
+        // Make selection within $dom object
+        $found = $dom->find($selection);
+        // If anything was found in selection
+        if (count($found) > 0)
+        {
+            return $found;
+        } else
+        {
+            return $this->emptyDomElement();
+        }
+    }
+
+
+    /**
+     * Extract an imdb-id from a string '/ttxxxxxxx/'
+     * Returns string of id or empty string if none found
+     *
+     * @param $str string - string to extract ID from
+     *
+     * @return string
+     */
+    private function emptyDomElement() {
+        $dom = new Dom;
+        $dom->load('<a src="" href=""></a>');
+        return $dom;
+    }
+
+
+    /**
+     * Count (either array items or string length)
+     *
+     * @param $item array|string - item to count
+     *
+     * @return string
+     */
+    private function count($item) {
+        return (is_array($item) ? count($item) : strlen($item));
+    }
+
+
     /**
      * Extract an imdb-id from a string '/ttxxxxxxx/'
      * Returns string of id or empty string if none found