Skip to content

Commit 647f3d6

Browse files
committed
Fixed fatal bug causing script to return error 500
- check if each element exists before extracting any data
1 parent b56b414 commit 647f3d6

File tree

2 files changed

+74
-19
lines changed

2 files changed

+74
-19
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ $ composer require hmerritt/imdb-api
1515
### Film Data
1616
- Title
1717
- Year
18-
- Length
1918
- Rating
2019
- Poster
20+
- Length
2121
- Plot
2222
- Cast
2323
- actor name
@@ -75,4 +75,4 @@ $imdb->film("tt0816692", $techSpecs=true);
7575
## Dependencies
7676
> All dependencies are managed automatically by `composer`.
7777
78-
- [php-html-parser](https://github.com/paquettg/php-html-parser)
78+
- [php-html-parser](https://github.com/paquettg/php-html-parser)

src/imdb.php

Lines changed: 72 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* For the full copyright and license information, please view the LICENSE
66
* file that was distributed with this source code.
77
*
8-
* @package Imdb
8+
* @package hmerritt/imdb-api
99
* @author Harry Merritt
1010
*/
1111
namespace hmerritt\Imdb;
@@ -67,7 +67,7 @@ public function search($query, $category="all") {
6767
$row = [];
6868

6969
// Link ojbect
70-
$item_link = $section_row->find("td.result_text a");
70+
$item_link = $this->htmlFind($section_row, 'td.result_text a');
7171
// Text value
7272
$row["title"] = $item_link->text;
7373
// Skip item if no text value
@@ -76,15 +76,10 @@ public function search($query, $category="all") {
7676
}
7777

7878
// Image object
79-
$item_image = $section_row->find("td.primary_photo img");
80-
$row["image"] = "";
81-
if (count($item_image) > 0)
79+
$row["image"] = $this->htmlFind($section_row, 'td.primary_photo img')->src;
80+
if (preg_match('/@/', $row["image"]))
8281
{
83-
$row["image"] = $item_image->src;
84-
if (preg_match('/@/', $row["image"]))
85-
{
86-
$row["image"] = preg_split('~@(?=[^@]*$)~', $row["image"])[0] . "@.jpg";
87-
}
82+
$row["image"] = preg_split('~@(?=[^@]*$)~', $row["image"])[0] . "@.jpg";
8883
}
8984

9085
// Get the id of the link
@@ -117,6 +112,7 @@ public function film($query, $techSpecs=false) {
117112
"year" => "",
118113
"length" => "",
119114
"rating" => "",
115+
"rating_votes" => "",
120116
"poster" => "",
121117
"plot" => "",
122118
"cast" => [],
@@ -153,14 +149,22 @@ public function film($query, $techSpecs=false) {
153149
// Load page
154150
$film_page = $this->loadDom($film_url);
155151

156-
$response["title"] = $this->textClean($film_page->find('.title_wrapper h1')->text);
157-
$response["year"] = $this->textClean($film_page->find('.title_wrapper h1 #titleYear a')->text);
158-
$response["rating"] = $this->textClean($film_page->find('.ratings_wrapper .ratingValue strong span')->text);
159-
$response["length"] = $this->textClean($film_page->find('.subtext time')->text);
160-
$response["plot"] = $this->textClean($film_page->find('.plot_summary .summary_text')->text);
152+
$response["title"] = $this->textClean($this->htmlFind($film_page, '.title_wrapper h1')->text);
153+
$response["year"] = $this->textClean($this->htmlFind($film_page, '.title_wrapper h1 #titleYear a')->text);
154+
$response["rating"] = $this->textClean($this->htmlFind($film_page, '.ratings_wrapper .ratingValue strong span')->text);
155+
$response["rating_votes"] = $this->textClean($this->htmlFind($film_page, '.ratings_wrapper span[itemprop=ratingCount]')->text);
156+
$response["length"] = $this->textClean($this->htmlFind($film_page, '.subtext time')->text);
157+
$response["plot"] = $this->textClean($this->htmlFind($film_page, '.plot_summary .summary_text')->text);
158+
159+
// If rating votes exists
160+
if ($this->count($response["rating_votes"]) > 0)
161+
{
162+
// Remove all non-numbers
163+
$response["rating_votes"] = preg_replace("/[^0-9 ]/", "", $response["rating_votes"]);
164+
}
161165

162166
// Get poster src
163-
$response["poster"] = $film_page->find('.poster img')->src;
167+
$response["poster"] = $this->htmlFind($film_page, '.poster img')->src;
164168
// If '@' appears in poster link
165169
if (preg_match('/@/', $response["poster"]))
166170
{
@@ -193,7 +197,7 @@ public function film($query, $techSpecs=false) {
193197
// If character link does not exist
194198
if (count($character_link) == 0)
195199
{
196-
$actor["character"] = $this->textClean($cast_row->find('.character')->text);
200+
$actor["character"] = $this->textClean($this->htmlFind($cast_row, '.character')->text);
197201
} else
198202
{
199203
$actor["character"] = $this->textClean($character_link->text);
@@ -219,6 +223,7 @@ public function film($query, $techSpecs=false) {
219223
}
220224
}
221225

226+
222227
// Fetch technical specs
223228
if ($techSpecs)
224229
{
@@ -268,6 +273,56 @@ private function loadDom($url) {
268273
}
269274

270275

276+
/**
277+
* Find object within DOM (if it exists) and reutrn an attribute
278+
*
279+
* @param $dom object - searchable dom object
280+
* @param $selection strting - css selector of what to find in dom
281+
* @param $return strting - what attribute to return (e.g. text, src, href)
282+
*
283+
* @return string|array
284+
*/
285+
private function htmlFind($dom, $selection) {
286+
// Make selection within $dom object
287+
$found = $dom->find($selection);
288+
// If anything was found in selection
289+
if (count($found) > 0)
290+
{
291+
return $found;
292+
} else
293+
{
294+
return $this->emptyDomElement();
295+
}
296+
}
297+
298+
299+
/**
300+
* Extract an imdb-id from a string '/ttxxxxxxx/'
301+
* Returns string of id or empty string if none found
302+
*
303+
* @param $str string - string to extract ID from
304+
*
305+
* @return string
306+
*/
307+
private function emptyDomElement() {
308+
$dom = new Dom;
309+
$dom->load('<a src="" href=""></a>');
310+
return $dom;
311+
}
312+
313+
314+
/**
315+
* Count (either array items or string length)
316+
*
317+
* @param $item array|string - item to count
318+
*
319+
* @return string
320+
*/
321+
private function count($item) {
322+
return (is_array($item) ? count($item) : strlen($item));
323+
}
324+
325+
271326
/**
272327
* Extract an imdb-id from a string '/ttxxxxxxx/'
273328
* Returns string of id or empty string if none found

0 commit comments

Comments
 (0)