Skip to content

Commit c1fd186

Browse files
authored
Merge pull request #17 from hmerritt/v1.2.7
V1.2.7
2 parents 86ba665 + 8000ca3 commit c1fd186

File tree

4 files changed

+80
-40
lines changed

4 files changed

+80
-40
lines changed

src/HtmlPieces.php

Lines changed: 63 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ class HtmlPieces
1919
* @param string $element
2020
* @return string
2121
*/
22-
public function get(object $page, string $element)
22+
public function get(object $page, string $element, string $url='')
2323
{
2424
// Initiate dom object
2525
// -> handles page scraping
@@ -32,7 +32,7 @@ public function get(object $page, string $element)
3232

3333
return $this->strClean($title);
3434
break;
35-
35+
3636
case "genre":
3737
$allGenres = $dom->find($page, "div[data-testid=genres] a");
3838
$genres = [];
@@ -193,14 +193,14 @@ public function get(object $page, string $element)
193193
if ($this->count($castRow->find('img')) === 0) {
194194
continue;
195195
}
196-
196+
197197
$actor = [];
198198
$actor["actor"] = "";
199199
$actor["avatar"] = "";
200200
$actor["avatar_hq"] = "";
201201
$actor["actor_id"] = "";
202202
$actor["character"] = "";
203-
203+
204204
// Actor
205205
$actorLink = $castRow->find('a[data-testid=title-cast-item__actor]');
206206
if ($this->count($actorLink)) {
@@ -217,7 +217,7 @@ public function get(object $page, string $element)
217217
$actor["avatar_hq"] = preg_match('/\.\_/', $actor["avatar_hq"]) ? preg_split('/\.\_.*/', $actor["avatar_hq"])[0] . ".jpg" : $actor["avatar_hq"];
218218
}
219219
}
220-
220+
221221
// Actor ID
222222
$link = $castRow->find('a');
223223
if ($this->count($link)) {
@@ -227,24 +227,72 @@ public function get(object $page, string $element)
227227
$actor["actor_id"] = $matches[0];
228228
}
229229
}
230-
230+
231231
// Character
232232
$characterLink = $castRow->find('[data-testid=cast-item-characters-link] span');
233233
if ($this->count($characterLink)) {
234234
$actor["character"] = $characterLink->text;
235235
}
236-
236+
237237
$actor["character"] = $this->strClean($actor["character"]);
238238
$actor["actor"] = $this->strClean($actor["actor"]);
239239
$actor["avatar"] = $this->strClean($actor["avatar"]);
240240
$actor["actor_id"] = $this->strClean($actor["actor_id"]);
241-
241+
242242
array_push($cast, $actor);
243243
}
244244
}
245245
return $cast;
246246
break;
247247

248+
case "tvShow":
249+
preg_match('/TV Series/i', $page, $matches, PREG_OFFSET_CAPTURE);
250+
return !!$this->count($matches);
251+
break;
252+
253+
case "seasons":
254+
$seasons = [];
255+
$findAllSeasons = $dom->find($page, "#bySeason > option");
256+
$dom = new \PHPHtmlParser\Dom();
257+
foreach ($findAllSeasons as $seasonRow){
258+
$season = [];
259+
$seasonValue = $seasonRow->getAttribute('value');
260+
$season['season'] = $seasonValue;
261+
// Using imdb ajax api to get episodes
262+
$season['episodes'] = $this->get($dom->loadFromUrl($url."/_ajax?season=".$seasonValue), "episodes");
263+
array_push($seasons, $season);
264+
}
265+
return $seasons;
266+
break;
267+
268+
case "episodes":
269+
$episodes = [];
270+
$findAllEpisodes = $dom->find($page, ".eplist > .list_item");
271+
foreach ($findAllEpisodes as $episodeRow){
272+
$episode = [];
273+
$hyperlink = $episodeRow->find("a[itemprop=url]");
274+
$episode["id"] = $this->extractImdbId($hyperlink->getAttribute("href"));
275+
$episode['title'] = $episodeRow->find('a[itemprop=name]')->text;
276+
$episode['description'] = $episodeRow->find(".item_description")->text;
277+
$rating = $episodeRow->find(".ipl-rating-star__rating");
278+
$episode["poster"] = "";
279+
if($this->count($rating)) {
280+
$episode['rating'] = $rating->text;
281+
}
282+
$image = $hyperlink->find("img");
283+
if($this->count($image)) {
284+
$poster = $image->getAttribute("src");
285+
$episode["poster"] = preg_match('/@/', $poster) ? preg_split('~@(?=[^@]*$)~', $poster)[0] . "@.jpg" : $poster;
286+
287+
if ($poster == $episode["poster"]) {
288+
$episode["poster"] = preg_match('/\.\_/', $episode["poster"]) ? preg_split('/\.\_.*/', $episode["poster"])[0] . ".jpg" : $episode["poster"];
289+
}
290+
}
291+
array_push($episodes, $episode);
292+
}
293+
return $episodes;
294+
break;
295+
248296
case "technical_specs":
249297
$technical_specs = [];
250298
$table = $dom->find($page, '.dataTable tr');
@@ -265,29 +313,30 @@ public function get(object $page, string $element)
265313

266314
case "titles":
267315
case "names":
316+
case "people":
268317
case "companies":
269318
$response = [];
270-
$sections = $dom->find($page, ".findSection");
319+
$sections = $dom->find($page, ".ipc-page-section");
271320
if ($this->count($sections) > 0)
272321
{
273322
foreach ($sections as $section)
274323
{
275-
$sectionName = @strtolower($section->find(".findSectionHeader")->text);
324+
$sectionName = @strtolower($dom->find($section, ".ipc-title__text")->text);
276325
if ($sectionName === $element) {
277-
$sectionRows = $section->find(".findList tr");
326+
$sectionRows = $section->find("ul li");
278327
if ($this->count($sectionRows) > 0)
279328
{
280329
foreach ($sectionRows as $sectionRow)
281330
{
282331
$row = [];
283332

284-
$link = $dom->find($sectionRow, 'td.result_text a');
333+
$link = $dom->find($sectionRow, 'a');
285334
$row["title"] = $link->text;
286335
if ($row["title"] == "") {
287336
continue;
288337
}
289338

290-
$row["image"] = $dom->find($sectionRow, 'td.primary_photo img')->src;
339+
$row["image"] = $dom->find($sectionRow, '.ipc-image')->src;
291340
if (preg_match('/@/', $row["image"]))
292341
{
293342
$row["image"] = preg_split('~@(?=[^@]*$)~', $row["image"])[0] . "@.jpg";
@@ -315,7 +364,7 @@ public function get(object $page, string $element)
315364
*
316365
* @param object $page
317366
* @param array $patterns
318-
* @return string
367+
* @return string
319368
*/
320369
public function findMatchInPatterns(object $dom, object $page, array $patterns, string $type = "text")
321370
{

src/Imdb.php

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ private function populateOptions(array $options = []): array
2525
'category' => 'all',
2626
'curlHeaders' => ['Accept-Language: en-US,en;q=0.5'],
2727
'techSpecs' => true,
28+
'seasons' => false,
2829
];
2930

3031
// Merge any user options with the default ones
@@ -103,16 +104,26 @@ public function film(string $filmId, array $options = []): array
103104
$response->add("rating_votes", $htmlPieces->get($page, "rating_votes"));
104105
$response->add("poster", $htmlPieces->get($page, "poster"));
105106
$response->add("trailer", $htmlPieces->get($page, "trailer"));
107+
$response->add("tvShow", $htmlPieces->get($page, "tvShow"));
106108
$response->add("cast", $htmlPieces->get($page, "cast"));
109+
$response->add("seasons", []);
110+
$response->add("technical_specs", []);
107111

108112
// If techSpecs is enabled in user $options
109113
// -> Make a second request to load the full techSpecs page
110114
if ($options["techSpecs"]) {
111115
$page_techSpecs = $dom->fetch("https://www.imdb.com/title/$filmId/technical", $options);
112116
$response->add("technical_specs", $htmlPieces->get($page_techSpecs, "technical_specs"));
113117
}
114-
else {
115-
$response->add("technical_specs", []);
118+
119+
// If seasons is enabled & is a tv show
120+
if ($options['seasons'] && $response->get("tvShow")) {
121+
$url = "https://www.imdb.com/title/$filmId/episodes";
122+
$page_seasons = $dom->fetch($url, $options);
123+
// If film has episodes or seasons
124+
if (count($page_seasons->find(".error_code_404")) == 0) {
125+
$response->add("seasons", $htmlPieces->get($page_seasons, "seasons", $url));
126+
}
116127
}
117128

118129
// If caching is enabled
@@ -157,7 +168,7 @@ public function search(string $search, array $options = []): array
157168

158169
// Add all search data to response $store
159170
$response->add("titles", $htmlPieces->get($page, "titles"));
160-
$response->add("names", $htmlPieces->get($page, "names"));
171+
$response->add("names", $htmlPieces->get($page, "people"));
161172
$response->add("companies", $htmlPieces->get($page, "companies"));
162173

163174
return $response->return();

src/Response.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,9 @@ public function default(string $endpoint): array
8080
"id" => "",
8181
"link" => ""
8282
],
83+
"tvShow" => false,
8384
"cast" => [],
85+
"seasons" => [],
8486
"technical_specs" => []
8587
];
8688
break;

tests/ImdbTest.php

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ public function testFilmCache()
7171
{
7272
$imdb = new Imdb;
7373
$cache = new Cache;
74-
$film = $imdb->film('tt0816692', [ 'techSpecs' => false ]);
74+
$film = $imdb->film('tt0816692', [ 'cache' => true, 'techSpecs' => false ]);
7575
$cache_film = $cache->get('tt0816692')->film;
7676

7777
$this->assertEquals(true, $cache->has('tt0816692'));
@@ -91,26 +91,4 @@ public function testSearch()
9191
$this->assertEquals('The Life and Death of Colonel Blimp', $search_2['titles'][0]['title']);
9292
$this->assertEquals('tt0036112', $search_2['titles'][0]['id']);
9393
}
94-
95-
public function test404Page()
96-
{
97-
$imdb = new Imdb;
98-
$response = new Response;
99-
100-
$film = $imdb->film('ttest404', [ 'cache' => false ]);
101-
$film_search = $imdb->film('interstellartest4040404040404', [ 'cache' => false ]);
102-
$search = $imdb->search('ttest404040404004', [ 'category' => 'test404' ]);
103-
104-
$emptyResponse = [
105-
'film' => $response->default('film'),
106-
'film_search' => $response->default('film'),
107-
'search' => $response->default('search'),
108-
];
109-
$emptyResponse['film']['id'] = 'ttest404';
110-
111-
$this->assertEquals($emptyResponse['film'], $film);
112-
$this->assertEquals($emptyResponse['film_search'], $film_search);
113-
$this->assertEquals($emptyResponse['search'], $search);
114-
}
115-
11694
}

0 commit comments

Comments
 (0)