Skip to content

Commit 88e07e5

Browse files
authored
Merge pull request #1000 from AudricV/yt-streaminfoitemextractor-improvements
[YouTube] Improve YoutubeStreamInfoItemExtractor
2 parents 896d7e0 + 0766b1d commit 88e07e5

File tree

2 files changed

+106
-38
lines changed

2 files changed

+106
-38
lines changed

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -403,12 +403,7 @@ private void collectStreamsFrom(@Nonnull final StreamInfoItemsCollector collecto
403403
.map(JsonObject.class::cast)
404404
.filter(video -> video.has(PLAYLIST_VIDEO_RENDERER))
405405
.map(video -> new YoutubeStreamInfoItemExtractor(
406-
video.getObject(PLAYLIST_VIDEO_RENDERER), timeAgoParser) {
407-
@Override
408-
public long getViewCount() {
409-
return -1;
410-
}
411-
})
406+
video.getObject(PLAYLIST_VIDEO_RENDERER), timeAgoParser))
412407
.forEachOrdered(collector::commit);
413408
}
414409

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java

Lines changed: 105 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
1111
import org.schabi.newpipe.extractor.stream.StreamType;
1212
import org.schabi.newpipe.extractor.utils.JsonUtils;
13+
import org.schabi.newpipe.extractor.utils.Parser;
1314
import org.schabi.newpipe.extractor.utils.Utils;
1415

1516
import javax.annotation.Nullable;
@@ -45,6 +46,7 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
4546
private final JsonObject videoInfo;
4647
private final TimeAgoParser timeAgoParser;
4748
private StreamType cachedStreamType;
49+
private Boolean isPremiere;
4850

4951
/**
5052
* Creates an extractor of StreamInfoItems from a YouTube page.
@@ -66,6 +68,10 @@ public StreamType getStreamType() {
6668

6769
final JsonArray badges = videoInfo.getArray("badges");
6870
for (final Object badge : badges) {
71+
if (!(badge instanceof JsonObject)) {
72+
continue;
73+
}
74+
6975
final JsonObject badgeRenderer
7076
= ((JsonObject) badge).getObject("metadataBadgeRenderer");
7177
if (badgeRenderer.getString("style", "").equals("BADGE_STYLE_TYPE_LIVE_NOW")
@@ -76,6 +82,10 @@ public StreamType getStreamType() {
7682
}
7783

7884
for (final Object overlay : videoInfo.getArray("thumbnailOverlays")) {
85+
if (!(overlay instanceof JsonObject)) {
86+
continue;
87+
}
88+
7989
final String style = ((JsonObject) overlay)
8090
.getObject("thumbnailOverlayTimeStatusRenderer")
8191
.getString("style", "");
@@ -116,30 +126,44 @@ public String getName() throws ParsingException {
116126

117127
@Override
118128
public long getDuration() throws ParsingException {
119-
if (getStreamType() == StreamType.LIVE_STREAM || isPremiere()) {
129+
if (getStreamType() == StreamType.LIVE_STREAM) {
120130
return -1;
121131
}
122132

123133
String duration = getTextFromObject(videoInfo.getObject("lengthText"));
124134

125135
if (isNullOrEmpty(duration)) {
126-
for (final Object thumbnailOverlay : videoInfo.getArray("thumbnailOverlays")) {
127-
if (((JsonObject) thumbnailOverlay).has("thumbnailOverlayTimeStatusRenderer")) {
128-
duration = getTextFromObject(((JsonObject) thumbnailOverlay)
129-
.getObject("thumbnailOverlayTimeStatusRenderer").getObject("text"));
136+
// Available in playlists for videos
137+
duration = videoInfo.getString("lengthSeconds");
138+
139+
if (isNullOrEmpty(duration)) {
140+
final JsonObject timeOverlay = videoInfo.getArray("thumbnailOverlays")
141+
.stream()
142+
.filter(JsonObject.class::isInstance)
143+
.map(JsonObject.class::cast)
144+
.filter(thumbnailOverlay ->
145+
thumbnailOverlay.has("thumbnailOverlayTimeStatusRenderer"))
146+
.findFirst()
147+
.orElse(null);
148+
149+
if (timeOverlay != null) {
150+
duration = getTextFromObject(
151+
timeOverlay.getObject("thumbnailOverlayTimeStatusRenderer")
152+
.getObject("text"));
130153
}
131154
}
132155

133156
if (isNullOrEmpty(duration)) {
157+
if (isPremiere()) {
158+
// Premieres can be livestreams, so the duration is not available in this
159+
// case
160+
return -1;
161+
}
162+
134163
throw new ParsingException("Could not get duration");
135164
}
136165
}
137166

138-
// NewPipe#8034 - YT returns not a correct duration for "YT shorts" videos
139-
if ("SHORTS".equalsIgnoreCase(duration)) {
140-
return 0;
141-
}
142-
143167
return YoutubeParsingHelper.parseDurationString(duration);
144168
}
145169

@@ -187,7 +211,6 @@ public String getUploaderUrl() throws ParsingException {
187211
@Nullable
188212
@Override
189213
public String getUploaderAvatarUrl() throws ParsingException {
190-
191214
if (videoInfo.has("channelThumbnailSupportedRenderers")) {
192215
return JsonUtils.getArray(videoInfo, "channelThumbnailSupportedRenderers"
193216
+ ".channelThumbnailWithLinkRenderer.thumbnail.thumbnails")
@@ -218,13 +241,19 @@ public String getTextualUploadDate() throws ParsingException {
218241
return DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm").format(getDateFromPremiere());
219242
}
220243

221-
final String publishedTimeText
222-
= getTextFromObject(videoInfo.getObject("publishedTimeText"));
223-
if (publishedTimeText != null && !publishedTimeText.isEmpty()) {
224-
return publishedTimeText;
244+
String publishedTimeText = getTextFromObject(videoInfo.getObject("publishedTimeText"));
245+
246+
if (isNullOrEmpty(publishedTimeText) && videoInfo.has("videoInfo")) {
247+
/*
248+
Returned in playlists, in the form: view count separator upload date
249+
*/
250+
publishedTimeText = videoInfo.getObject("videoInfo")
251+
.getArray("runs")
252+
.getObject(2)
253+
.getString("text");
225254
}
226255

227-
return null;
256+
return isNullOrEmpty(publishedTimeText) ? null : publishedTimeText;
228257
}
229258

230259
@Nullable
@@ -251,28 +280,69 @@ public DateWrapper getUploadDate() throws ParsingException {
251280

252281
@Override
253282
public long getViewCount() throws ParsingException {
254-
try {
255-
if (videoInfo.has("topStandaloneBadge") || isPremium()) {
256-
return -1;
257-
}
283+
if (videoInfo.has("topStandaloneBadge") || isPremium() || isPremiere()) {
284+
return -1;
285+
}
286+
287+
final String viewCount = getTextFromObject(videoInfo.getObject("viewCountText"));
288+
289+
if (!isNullOrEmpty(viewCount)) {
290+
try {
291+
// These approaches are language dependent
292+
if (viewCount.toLowerCase().contains("no views")) {
293+
return 0;
294+
} else if (viewCount.toLowerCase().contains("recommended")) {
295+
return -1;
296+
}
258297

259-
if (!videoInfo.has("viewCountText")) {
260-
// This object is null when a video has its views hidden.
261-
return -1;
298+
return Long.parseLong(Utils.removeNonDigitCharacters(viewCount));
299+
} catch (final Exception ignored) {
300+
// Ignore all exceptions, as we can fallback to accessibility data
262301
}
302+
}
263303

264-
final String viewCount = getTextFromObject(videoInfo.getObject("viewCountText"));
304+
// Try parsing the real view count from accessibility data, if that's not a running
305+
// livestream (the view count is returned and not the count of people watching currently
306+
// the livestream)
307+
if (getStreamType() != StreamType.LIVE_STREAM) {
308+
try {
309+
return Long.parseLong(Utils.removeNonDigitCharacters(
310+
// This approach is language dependent
311+
Parser.matchGroup1("([\\d,]+) views$",
312+
videoInfo.getObject("title")
313+
.getObject("accessibility")
314+
.getObject("accessibilityData")
315+
.getString("label", ""))));
316+
} catch (final Exception ignored) {
317+
// Ignore all exceptions, as the view count can be hidden by creators, and so
318+
// cannot be found in this case
319+
}
320+
}
265321

266-
if (viewCount.toLowerCase().contains("no views")) {
267-
return 0;
268-
} else if (viewCount.toLowerCase().contains("recommended")) {
269-
return -1;
322+
// Fallback to a short view count, always used for livestreams (see why above)
323+
try {
324+
// Returned in playlists, in the form: view count separator upload date
325+
if (videoInfo.has("videoInfo")) {
326+
return Utils.mixedNumberWordToLong(videoInfo.getObject("videoInfo")
327+
.getArray("runs")
328+
.getObject(0)
329+
.getString("text"));
270330
}
271331

272-
return Long.parseLong(Utils.removeNonDigitCharacters(viewCount));
273-
} catch (final Exception e) {
274-
throw new ParsingException("Could not get view count", e);
332+
// Returned everywhere but in playlists, used by the website to show view counts
333+
if (videoInfo.has("shortViewCountText")) {
334+
return Utils.mixedNumberWordToLong(videoInfo.getObject("shortViewCountText")
335+
.getArray("runs")
336+
.getObject(0)
337+
.getString("text"));
338+
}
339+
} catch (final Exception ignored) {
340+
// Ignore all exceptions, as the view count can be hidden by creators, and so cannot be
341+
// found in this case
275342
}
343+
344+
// No view count extracted: return -1, as the view count can be hidden by creators on videos
345+
return -1;
276346
}
277347

278348
@Override
@@ -292,7 +362,10 @@ private boolean isPremium() {
292362
}
293363

294364
private boolean isPremiere() {
295-
return videoInfo.has("upcomingEventData");
365+
if (isPremiere == null) {
366+
isPremiere = videoInfo.has("upcomingEventData");
367+
}
368+
return isPremiere;
296369
}
297370

298371
private OffsetDateTime getDateFromPremiere() throws ParsingException {

0 commit comments

Comments
 (0)