Skip to content

Commit d7e678a

Browse files
committed
[YouTube] Improve WEB client version and API key HTML extraction
Common code in WEB client version HTML extraction has been deduplicated, usage of the Java 8 Stream API has been made and initial data fallback has been used as a last resort. This means that the client version extraction from regexes will be used before this fallback, as it doesn't contain the full client version. This can be used as a way to fingerprint the extractor, even if it seems to be not the case.
1 parent 6a885ef commit d7e678a

File tree

1 file changed

+59
-37
lines changed

1 file changed

+59
-37
lines changed

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java

Lines changed: 59 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
import java.util.Optional;
7474
import java.util.Random;
7575
import java.util.regex.Pattern;
76+
import java.util.stream.Stream;
7677

7778
import javax.annotation.Nonnull;
7879
import javax.annotation.Nullable;
@@ -640,59 +641,79 @@ private static void extractClientVersionAndKeyFromHtmlSearchResultsPage()
640641
if (keyAndVersionExtracted) {
641642
return;
642643
}
644+
643645
// Don't provide a search term in order to have a smaller response
644646
final String url = "https://www.youtube.com/results?search_query=&ucbcb=1";
645647
final String html = getDownloader().get(url, getCookieHeader()).responseBody();
646648
final JsonObject initialData = getInitialData(html);
647649
final JsonArray serviceTrackingParams = initialData.getObject("responseContext")
648650
.getArray("serviceTrackingParams");
649-
String shortClientVersion = null;
650651

651652
// Try to get version from initial data first
652-
for (final Object service : serviceTrackingParams) {
653-
final JsonObject s = (JsonObject) service;
654-
if (s.getString("service").equals("CSI")) {
655-
final JsonArray params = s.getArray("params");
656-
for (final Object param : params) {
657-
final JsonObject p = (JsonObject) param;
658-
final String paramKey = p.getString("key");
659-
if (paramKey != null && paramKey.equals("cver")) {
660-
clientVersion = p.getString("value");
661-
}
662-
}
663-
} else if (s.getString("service").equals("ECATCHER")) {
664-
// Fallback to get a shortened client version which does not contain the last two
665-
// digits
666-
final JsonArray params = s.getArray("params");
667-
for (final Object param : params) {
668-
final JsonObject p = (JsonObject) param;
669-
final String paramKey = p.getString("key");
670-
if (paramKey != null && paramKey.equals("client.version")) {
671-
shortClientVersion = p.getString("value");
672-
}
673-
}
653+
final Stream<JsonObject> serviceTrackingParamsStream = serviceTrackingParams.stream()
654+
.filter(JsonObject.class::isInstance)
655+
.map(JsonObject.class::cast);
656+
657+
clientVersion = getClientVersionFromServiceTrackingParam(
658+
serviceTrackingParamsStream, "CSI", "cver");
659+
660+
if (clientVersion == null) {
661+
try {
662+
clientVersion = getStringResultFromRegexArray(html,
663+
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
664+
} catch (final Parser.RegexException ignored) {
674665
}
675666
}
676667

668+
// Fallback to get a shortened client version which does not contain the last two
669+
// digits
670+
if (isNullOrEmpty(clientVersion)) {
671+
clientVersion = getClientVersionFromServiceTrackingParam(
672+
serviceTrackingParamsStream, "ECATCHER", "client.version");
673+
}
674+
677675
try {
678-
clientVersion = getStringResultFromRegexArray(html,
679-
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
676+
key = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
680677
} catch (final Parser.RegexException ignored) {
681678
}
682679

683-
if (!isNullOrEmpty(clientVersion) && !isNullOrEmpty(shortClientVersion)) {
684-
clientVersion = shortClientVersion;
680+
if (isNullOrEmpty(key)) {
681+
throw new ParsingException(
682+
// CHECKSTYLE:OFF
683+
"Could not extract YouTube WEB InnerTube API key from HTML search results page");
684+
// CHECKSTYLE:ON
685685
}
686686

687-
try {
688-
key = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
689-
} catch (final Parser.RegexException e) {
690-
throw new ParsingException("Could not extract YouTube WEB InnerTube client version "
691-
+ "and API key from HTML search results page", e);
687+
if (clientVersion == null) {
688+
throw new ParsingException(
689+
// CHECKSTYLE:OFF
690+
"Could not extract YouTube WEB InnerTube client version from HTML search results page");
691+
// CHECKSTYLE:ON
692692
}
693+
693694
keyAndVersionExtracted = true;
694695
}
695696

697+
@Nullable
698+
private static String getClientVersionFromServiceTrackingParam(
699+
@Nonnull final Stream<JsonObject> serviceTrackingParamsStream,
700+
@Nonnull final String serviceName,
701+
@Nonnull final String clientVersionKey) {
702+
return serviceTrackingParamsStream.filter(serviceTrackingParam ->
703+
serviceTrackingParam.getString("service", "")
704+
.equals(serviceName))
705+
.flatMap(serviceTrackingParam -> serviceTrackingParam.getArray("params")
706+
.stream())
707+
.filter(JsonObject.class::isInstance)
708+
.map(JsonObject.class::cast)
709+
.filter(param -> param.getString("key", "")
710+
.equals(clientVersionKey))
711+
.map(param -> param.getString("value"))
712+
.filter(paramValue -> !isNullOrEmpty(paramValue))
713+
.findFirst()
714+
.orElse(null);
715+
}
716+
696717
/**
697718
* Get the client version used by YouTube website on InnerTube requests.
698719
*/
@@ -701,8 +722,8 @@ public static String getClientVersion() throws IOException, ExtractionException
701722
return clientVersion;
702723
}
703724

704-
// Always extract latest client version, by trying first to extract it from the JavaScript
705-
// service worker, then from HTML search results page as a fallback, to prevent
725+
// Always extract the latest client version, by trying first to extract it from the
726+
// JavaScript service worker, then from HTML search results page as a fallback, to prevent
706727
// fingerprinting based on the client version used
707728
try {
708729
extractClientVersionAndKeyFromSwJs();
@@ -714,7 +735,7 @@ public static String getClientVersion() throws IOException, ExtractionException
714735
return clientVersion;
715736
}
716737

717-
// Fallback to the hardcoded one if it's valid
738+
// Fallback to the hardcoded one if it is valid
718739
if (areHardcodedClientVersionAndKeyValid()) {
719740
clientVersion = HARDCODED_CLIENT_VERSION;
720741
return clientVersion;
@@ -731,7 +752,7 @@ public static String getKey() throws IOException, ExtractionException {
731752
return key;
732753
}
733754

734-
// Always extract the key used by the webiste, by trying first to extract it from the
755+
// Always extract the key used by the website, by trying first to extract it from the
735756
// JavaScript service worker, then from HTML search results page as a fallback, to prevent
736757
// fingerprinting based on the key and/or invalid key issues
737758
try {
@@ -751,7 +772,8 @@ public static String getKey() throws IOException, ExtractionException {
751772
}
752773

753774
// The ANDROID API key is also valid with the WEB client so return it if we couldn't
754-
// extract the WEB API key.
775+
// extract the WEB API key. This can be used as a way to fingerprint the extractor in this
776+
// case
755777
return ANDROID_YOUTUBE_KEY;
756778
}
757779

0 commit comments

Comments
 (0)