@@ -27,27 +27,28 @@ public function get(object $page, string $element)
2727
2828 switch ($ element ) {
2929 case "title " :
30- $ patterns = [".title_wrapper h1 " , " h1 [data-testid=hero-title-block__title] " ];
30+ $ patterns = ["h1 [data-testid=hero-title-block__title]" , " .title_wrapper h1 " ];
3131 $ title = $ this ->findMatchInPatterns ($ dom , $ page , $ patterns );
3232
3333 return $ this ->strClean ($ title );
3434 break ;
3535
3636 case "year " :
37- $ patterns = [".title_wrapper h1 #titleYear a " , " section section div div div ul li a " ];
37+ $ patterns = ["section section div div div ul li a " , " .title_wrapper h1 #titleYear a " ];
3838 $ year = $ this ->findMatchInPatterns ($ dom , $ page , $ patterns );
3939
4040 return $ this ->strClean ($ year );
4141 break ;
4242
4343 case "length " :
44- $ patterns = [".subtext time " , " section section div div div ul li " ];
44+ $ patterns = ["section section div div div ul li " , " .subtext time " ];
4545 $ length = "" ;
4646
47- $ length = $ dom ->find ($ page , $ patterns [0 ])->text ;
47+ $ length = $ dom ->find ($ page , $ patterns [1 ])->text ;
4848 if ($ this ->count ($ length ) > 0 ) return $ this ->strClean ($ length );
4949
50- $ iter = $ dom ->find ($ page , $ patterns [1 ]);
50+ $ length = "" ;
51+ $ iter = $ dom ->find ($ page , $ patterns [0 ]);
5152 if ($ this ->count ($ iter ) === 0 ) return $ length ;
5253
5354 // Loop row below main title
@@ -69,28 +70,29 @@ public function get(object $page, string $element)
6970 break ;
7071
7172 case "plot " :
72- $ patterns = [".plot_summary .summary_text " , " p[data-testid=plot] div " ];
73+ $ patterns = ["p[data-testid=plot] div " , " .plot_summary .summary_text " ];
7374 $ plot = $ this ->findMatchInPatterns ($ dom , $ page , $ patterns );
7475
7576 return $ this ->strClean ($ plot );
7677 break ;
7778
7879 case "rating " :
79- $ patterns = [".ratings_wrapper .ratingValue span[itemprop=ratingValue] " , " div[data-testid=hero-title-block__aggregate-rating__score] " ];
80+ $ patterns = ["main div[data-testid=hero-title-block__aggregate-rating__score] " , " .ratings_wrapper .ratingValue span[itemprop=ratingValue ] " ];
8081 $ rating = $ this ->findMatchInPatterns ($ dom , $ page , $ patterns );
8182
8283 return $ this ->strClean ($ rating );
8384 break ;
8485
8586 case "rating_votes " :
86- $ patterns = [".ratings_wrapper span[itemprop=ratingCount ] " , "div[class*=TotalRatingAmount ] " ];
87+ $ patterns = ["main div[class*=TotalRatingAmount ] " , ".ratings_wrapper span[itemprop=ratingCount ] " ];
8788 $ rating_votes = $ this ->findMatchInPatterns ($ dom , $ page , $ patterns );
89+ $ rating_votes = $ this ->unwrapFormattedNumber ($ rating_votes );
8890
89- return preg_replace ("/[^0-9 ]/ " , "" , $ this ->strClean ($ rating_votes ));
91+ return preg_replace ("/[^0-9]/ " , "" , $ this ->strClean ($ rating_votes ));
9092 break ;
9193
9294 case "poster " :
93- $ patterns = [".poster img " , ".ipc- poster img " ];
95+ $ patterns = [".ipc- poster .ipc-media img " , ".poster img " ];
9496 $ poster = $ this ->findMatchInPatterns ($ dom , $ page , $ patterns , "src " );
9597 $ poster = preg_match ('/@/ ' , $ poster ) ? preg_split ('~@(?=[^@]*$)~ ' , $ poster )[0 ] . "@.jpg " : $ poster ;
9698
@@ -100,14 +102,19 @@ public function get(object $page, string $element)
100102 case "trailer " :
101103 // section section div section section div div div div div a[aria-label^=Watch]
102104 // div a[class*=hero-media][aria-label^=Watch]
103- $ patterns = [".slate a[data-video] " , "div a[aria-label^=Watch] " ];
104- $ trailerLink = $ dom ->find ($ page , $ patterns [1 ]);
105+ $ patterns = ["div a[aria-label^=Watch] " , ".slate a[data-video] " ];
106+ $ trailerLinkOld = $ dom ->find ($ page , $ patterns [1 ]);
107+ $ trailerLink = $ dom ->find ($ page , $ patterns [0 ]);
105108
106109 if ($ this ->count ($ trailerLink )) {
107110 $ href = $ trailerLink ->getAttribute ("href " );
108111 preg_match ("/\/video\/(vi[a-zA-Z0-9]+)/ " , $ href , $ matches );
109112 $ trailerId = $ this ->count ($ matches ) > 1 ? $ matches [1 ] : "" ;
110113 $ trailerLink = $ this ->count ($ trailerId ) ? "https://www.imdb.com/video/ " .$ trailerId : "" ;
114+
115+ } elseif ($ this ->count ($ trailerLinkOld )) {
116+ $ trailerId = $ this ->count ($ trailerLinkOld ) ? $ trailerLinkOld ->getAttribute ("data-video " ) : "" ;
117+ $ trailerLink = $ this ->count ($ trailerId ) ? "https://www.imdb.com/video/ " .$ trailerId : "" ;
111118 } else {
112119 $ trailerId = "" ;
113120 $ trailerLink = "" ;
@@ -121,45 +128,82 @@ public function get(object $page, string $element)
121128
122129 case "cast " :
123130 $ cast = [];
131+ $ findAllCastOld = $ dom ->find ($ page , 'table.cast_list tr ' );
124132 $ findAllCast = $ dom ->find ($ page , 'section.title-cast div.title-cast__grid div ' );
125- foreach ($ findAllCast as $ castRow )
126- {
127- if ($ this ->count ($ castRow ->find ('img ' )) === 0 ) {
128- continue ;
129- }
130133
131- $ actor = [];
132- $ actor ["actor " ] = "" ;
133- $ actor ["actor_id " ] = "" ;
134- $ actor ["character " ] = "" ;
134+ // Use $findAllCastOld
135+ if ($ this ->count ($ findAllCastOld )) {
136+ foreach ($ findAllCastOld as $ castRow )
137+ {
138+ if ($ this ->count ($ castRow ->find ('.primary_photo ' )) === 0 ) {
139+ continue ;
140+ }
141+ $ actor = [];
142+
143+ $ characterLink = $ castRow ->find ('.character a ' );
144+ $ actor ["character " ] = count ($ characterLink ) ? $ characterLink ->text : $ dom ->find ($ castRow , '.character ' )->text ;
145+
146+ $ actorRow = $ castRow ->find ('td ' )[1 ];
147+ $ actorLink = $ actorRow ->find ('a ' );
148+ if ($ this ->count ($ actorLink ) > 0 ) {
149+ // Set actor name to text within link
150+ $ actor ["actor " ] = $ actorLink ->text ;
151+ $ actor ["actor_id " ] = $ this ->extractImdbId ($ actorLink ->href );
152+ } else {
153+ // No link found
154+ // Set actor name to whatever is there
155+ $ actor ["actor " ] = $ actorRow ->text ;
156+ }
135157
136- // Actor
137- $ actorLink = $ castRow ->find ('a[data-testid=title-cast-item__actor] ' );
138- if ($ this ->count ($ actorLink )) {
139- $ actor ["actor " ] = $ actorLink ->text ;
140- }
158+ $ actor ["character " ] = $ this ->strClean ($ actor ["character " ]);
159+ $ actor ["actor " ] = $ this ->strClean ($ actor ["actor " ]);
160+ $ actor ["actor_id " ] = $ this ->strClean ($ actor ["actor_id " ]);
141161
142- // Actor ID
143- $ link = $ castRow ->find ('a ' );
144- if ($ this ->count ($ link )) {
145- $ href = $ link ->getAttribute ("href " );
146- preg_match ("/(nm[0-9]+)/ " , $ href , $ matches );
147- if ($ this ->count ($ matches )) {
148- $ actor ["actor_id " ] = $ matches [0 ];
149- }
162+ array_push ($ cast , $ actor );
150163 }
164+ }
151165
152- // Character
153- $ characterLink = $ castRow ->find ('a[data-testid=cast-item-characters-link] ' );
154- if ($ this ->count ($ characterLink )) {
155- $ actor ["character " ] = $ characterLink ->text ;
166+ // Use 'new' $findAllCast
167+ if ($ this ->count ($ findAllCast )) {
168+ foreach ($ findAllCast as $ castRow )
169+ {
170+ if ($ this ->count ($ castRow ->find ('img ' )) === 0 ) {
171+ continue ;
172+ }
173+
174+ $ actor = [];
175+ $ actor ["actor " ] = "" ;
176+ $ actor ["actor_id " ] = "" ;
177+ $ actor ["character " ] = "" ;
178+
179+ // Actor
180+ $ actorLink = $ castRow ->find ('a[data-testid=title-cast-item__actor] ' );
181+ if ($ this ->count ($ actorLink )) {
182+ $ actor ["actor " ] = $ actorLink ->text ;
183+ }
184+
185+ // Actor ID
186+ $ link = $ castRow ->find ('a ' );
187+ if ($ this ->count ($ link )) {
188+ $ href = $ link ->getAttribute ("href " );
189+ preg_match ("/(nm[0-9]+)/ " , $ href , $ matches );
190+ if ($ this ->count ($ matches )) {
191+ $ actor ["actor_id " ] = $ matches [0 ];
192+ }
193+ }
194+
195+ // Character
196+ $ characterLink = $ castRow ->find ('a[data-testid=cast-item-characters-link] ' );
197+ if ($ this ->count ($ characterLink )) {
198+ $ actor ["character " ] = $ characterLink ->text ;
199+ }
200+
201+ $ actor ["character " ] = $ this ->strClean ($ actor ["character " ]);
202+ $ actor ["actor " ] = $ this ->strClean ($ actor ["actor " ]);
203+ $ actor ["actor_id " ] = $ this ->strClean ($ actor ["actor_id " ]);
204+
205+ array_push ($ cast , $ actor );
156206 }
157-
158- $ actor ["character " ] = $ this ->strClean ($ actor ["character " ]);
159- $ actor ["actor " ] = $ this ->strClean ($ actor ["actor " ]);
160- $ actor ["actor_id " ] = $ this ->strClean ($ actor ["actor_id " ]);
161-
162- array_push ($ cast , $ actor );
163207 }
164208 return $ cast ;
165209 break ;
@@ -255,6 +299,36 @@ public function findMatchInPatterns(object $dom, object $page, array $patterns,
255299 return $ str ;
256300 }
257301
302+ /**
303+ * Unwrap formatted number to original int - 1.5K -> 1500
304+ *
305+ * @param string $str
306+ * @return string
307+ */
308+ public function unwrapFormattedNumber ($ str )
309+ {
310+ $ unwrap = $ str ;
311+ $ divisors = ["K " , "M " , "B " ];
312+ $ divisorMap = [
313+ "K " => 1000 ,
314+ "M " => 1000000 ,
315+ "B " => 1000000000
316+ ];
317+
318+ $ strDivisor = substr ($ str , -1 );
319+ if (in_array ($ strDivisor , $ divisors )) {
320+ // Remove last charactor
321+ $ strNum = substr ($ str , 0 , -1 );
322+ $ num = floatval ($ strNum );
323+
324+ $ numActual = $ num * $ divisorMap [$ strDivisor ];
325+
326+ $ unwrap = strval ($ numActual );
327+ }
328+
329+ return $ unwrap ;
330+ }
331+
258332 /**
259333 * Extract an imdb-id from a string '/ttxxxxxxx/'
260334 * Returns string of id or empty string if none found
0 commit comments