Skip to content

Commit 7ae5d68

Browse files
committed
Get titles, names and companies from imdb search dom
1 parent e7ba7cc commit 7ae5d68

File tree

3 files changed

+64
-16
lines changed

3 files changed

+64
-16
lines changed

src/HtmlPieces.php

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ class HtmlPieces
1919
* @param string $element
2020
* @return string
2121
*/
22-
public function get(object $page, string $element, array $options = [])
22+
public function get(object $page, string $element)
2323
{
2424
// Initiate dom object
2525
// -> handles page scraping
@@ -124,6 +124,48 @@ public function get(object $page, string $element, array $options = [])
124124
return $technical_specs;
125125
break;
126126

127+
case "titles":
128+
case "names":
129+
case "companies":
130+
$response = [];
131+
$sections = $page->find(".findSection");
132+
if (count($sections) > 0)
133+
{
134+
foreach ($sections as $section)
135+
{
136+
$sectionName = @strtolower($section->find(".findSectionHeader")->text);
137+
if ($sectionName === $element) {
138+
$sectionRows = $section->find(".findList tr");
139+
if (count($sectionRows) > 0)
140+
{
141+
foreach ($sectionRows as $sectionRow)
142+
{
143+
$row = [];
144+
145+
$link = $dom->find($sectionRow, 'td.result_text a');
146+
$row["title"] = $link->text;
147+
if ($row["title"] == "") {
148+
continue;
149+
}
150+
151+
$row["image"] = $dom->find($sectionRow, 'td.primary_photo img')->src;
152+
if (preg_match('/@/', $row["image"]))
153+
{
154+
$row["image"] = preg_split('~@(?=[^@]*$)~', $row["image"])[0] . "@.jpg";
155+
}
156+
$row["image"] = empty($row["image"]) ? "" : $row["image"];
157+
158+
$row["id"] = $this->extractImdbId($link->href);
159+
160+
array_push($response, $row);
161+
}
162+
}
163+
}
164+
}
165+
}
166+
return $response;
167+
break;
168+
127169
default:
128170
return "";
129171
}

src/imdb.php

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,6 @@
1111
class Imdb
1212
{
1313

14-
/**
15-
* IMDB base url
16-
*
17-
* @var string
18-
*/
19-
protected $baseUrl = 'https://www.imdb.com/';
20-
2114
/**
2215
* Returns default options combined with any user options
2316
*
@@ -66,8 +59,8 @@ public function film(string $filmId, array $options = []): array
6659
// -> handles finding specific content from the dom
6760
$htmlPieces = new HtmlPieces;
6861

69-
// Load imdb page and parse the dom
70-
$page = $dom->fetch($this->baseUrl."title/".$filmId, $options);
62+
// Load imdb film page and parse the dom
63+
$page = $dom->fetch("https://www.imdb.com/title/".$filmId, $options);
7164

7265
// Add all film data to response $store
7366
$response->add("id", $filmId);
@@ -81,9 +74,10 @@ public function film(string $filmId, array $options = []): array
8174
$response->add("trailer", $htmlPieces->get($page, "trailer"));
8275
$response->add("cast", $htmlPieces->get($page, "cast"));
8376

84-
// Technical specs
77+
// If techSpecs is enabled in user $options
78+
// -> Make a second request to load the full techSpecs page
8579
if ($options["techSpecs"]) {
86-
$page_techSpecs = $dom->fetch($this->baseUrl."title/".$filmId.'/technical', $options);
80+
$page_techSpecs = $dom->fetch("https://www.imdb.com/title/$filmId/technical", $options);
8781
$response->add("technical_specs", $htmlPieces->get($page_techSpecs, "technical_specs"));
8882
}
8983
else {
@@ -109,9 +103,21 @@ public function search(string $search, array $options = []): array
109103
// -> handles what the api returns
110104
$response = new Response;
111105

112-
$response->add("titles", []);
113-
$response->add("names", []);
114-
$response->add("companies", []);
106+
// Initiate dom object
107+
// -> handles page scraping
108+
$dom = new Dom;
109+
110+
// Initiate html-pieces object
111+
// -> handles finding specific content from the dom
112+
$htmlPieces = new HtmlPieces;
113+
114+
// Load imdb search page and parse the dom
115+
$page = $dom->fetch("https://www.imdb.com/find?q=$search&s=all", $options);
116+
117+
// Add all search data to response $store
118+
$response->add("titles", $htmlPieces->get($page, "titles"));
119+
$response->add("names", $htmlPieces->get($page, "names"));
120+
$response->add("companies", $htmlPieces->get($page, "companies"));
115121

116122
return $response->return();
117123
}

tests/index.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
// Initialise Imdb
1515
// Load film data
1616
$imdb = new Imdb();
17-
$film = $imdb->film($q); // tt0816692 tt8633464
17+
$film = $imdb->search($q); // tt0816692 tt8633464
1818

1919
// Return loaded film data
2020
echo json_encode($film, JSON_PRETTY_PRINT);

0 commit comments

Comments
 (0)