Skip to content

Commit 5f71c90

Browse files
committed
[YouTube] Remove fetch of YouTube service workers and improve WEB client version and API key extraction
Client version and API key are not available anymore in service workers, so it is unneeded to fetch them. Common code in WEB client version extraction has been deduplicated, usage of the Java 8 Stream API has been made and initial data fallback has been used as a last resort. This means that the client version extraction from regexes will be used before this fallback, as it doesn't contain the full client version. This can be used as a way to fingerprint the extractor, even if it seems to be not the case.
1 parent d120036 commit 5f71c90

File tree

1 file changed

+67
-93
lines changed

1 file changed

+67
-93
lines changed

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java

Lines changed: 67 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
import java.util.Optional;
7474
import java.util.Random;
7575
import java.util.regex.Pattern;
76+
import java.util.stream.Stream;
7677

7778
import javax.annotation.Nonnull;
7879
import javax.annotation.Nullable;
@@ -612,87 +613,85 @@ public static boolean areHardcodedClientVersionAndKeyValid()
612613
return hardcodedClientVersionAndKeyValid.get();
613614
}
614615

615-
616-
private static void extractClientVersionAndKeyFromSwJs()
617-
throws IOException, ExtractionException {
618-
if (keyAndVersionExtracted) {
619-
return;
620-
}
621-
final String url = "https://www.youtube.com/sw.js";
622-
final Map<String, List<String>> headers = new HashMap<>();
623-
headers.put("Origin", singletonList("https://www.youtube.com"));
624-
headers.put("Referer", singletonList("https://www.youtube.com"));
625-
final String response = getDownloader().get(url, headers).responseBody();
626-
try {
627-
clientVersion = getStringResultFromRegexArray(response,
628-
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
629-
key = getStringResultFromRegexArray(response, INNERTUBE_API_KEY_REGEXES, 1);
630-
} catch (final Parser.RegexException e) {
631-
throw new ParsingException("Could not extract YouTube WEB InnerTube client version "
632-
+ "and API key from sw.js", e);
633-
}
634-
keyAndVersionExtracted = true;
635-
}
636-
637616
private static void extractClientVersionAndKeyFromHtmlSearchResultsPage()
638617
throws IOException, ExtractionException {
639618
// Don't extract the client version and the InnerTube key if it has been already extracted
640619
if (keyAndVersionExtracted) {
641620
return;
642621
}
622+
643623
// Don't provide a search term in order to have a smaller response
644624
final String url = "https://www.youtube.com/results?search_query=&ucbcb=1";
645625
final String html = getDownloader().get(url, getCookieHeader()).responseBody();
646626
final JsonObject initialData = getInitialData(html);
647627
final JsonArray serviceTrackingParams = initialData.getObject("responseContext")
648628
.getArray("serviceTrackingParams");
649-
String shortClientVersion = null;
650629

651630
// Try to get version from initial data first
652-
for (final Object service : serviceTrackingParams) {
653-
final JsonObject s = (JsonObject) service;
654-
if (s.getString("service").equals("CSI")) {
655-
final JsonArray params = s.getArray("params");
656-
for (final Object param : params) {
657-
final JsonObject p = (JsonObject) param;
658-
final String paramKey = p.getString("key");
659-
if (paramKey != null && paramKey.equals("cver")) {
660-
clientVersion = p.getString("value");
661-
}
662-
}
663-
} else if (s.getString("service").equals("ECATCHER")) {
664-
// Fallback to get a shortened client version which does not contain the last two
665-
// digits
666-
final JsonArray params = s.getArray("params");
667-
for (final Object param : params) {
668-
final JsonObject p = (JsonObject) param;
669-
final String paramKey = p.getString("key");
670-
if (paramKey != null && paramKey.equals("client.version")) {
671-
shortClientVersion = p.getString("value");
672-
}
673-
}
631+
final Stream<JsonObject> serviceTrackingParamsStream = serviceTrackingParams.stream()
632+
.filter(JsonObject.class::isInstance)
633+
.map(JsonObject.class::cast);
634+
635+
clientVersion = getClientVersionFromServiceTrackingParam(
636+
serviceTrackingParamsStream, "CSI", "cver");
637+
638+
if (clientVersion == null) {
639+
try {
640+
clientVersion = getStringResultFromRegexArray(html,
641+
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
642+
} catch (final Parser.RegexException ignored) {
674643
}
675644
}
676645

646+
// Fallback to get a shortened client version which does not contain the last two
647+
// digits
648+
if (isNullOrEmpty(clientVersion)) {
649+
clientVersion = getClientVersionFromServiceTrackingParam(
650+
serviceTrackingParamsStream, "ECATCHER", "client.version");
651+
}
652+
677653
try {
678-
clientVersion = getStringResultFromRegexArray(html,
679-
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
654+
key = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
680655
} catch (final Parser.RegexException ignored) {
681656
}
682657

683-
if (!isNullOrEmpty(clientVersion) && !isNullOrEmpty(shortClientVersion)) {
684-
clientVersion = shortClientVersion;
658+
if (isNullOrEmpty(key)) {
659+
throw new ParsingException(
660+
// CHECKSTYLE:OFF
661+
"Could not extract YouTube WEB InnerTube API key from HTML search results page");
662+
// CHECKSTYLE:ON
685663
}
686664

687-
try {
688-
key = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
689-
} catch (final Parser.RegexException e) {
690-
throw new ParsingException("Could not extract YouTube WEB InnerTube client version "
691-
+ "and API key from HTML search results page", e);
665+
if (clientVersion == null) {
666+
throw new ParsingException(
667+
// CHECKSTYLE:OFF
668+
"Could not extract YouTube WEB InnerTube client version from HTML search results page");
669+
// CHECKSTYLE:ON
692670
}
671+
693672
keyAndVersionExtracted = true;
694673
}
695674

675+
@Nullable
676+
private static String getClientVersionFromServiceTrackingParam(
677+
@Nonnull final Stream<JsonObject> serviceTrackingParamsStream,
678+
@Nonnull final String serviceName,
679+
@Nonnull final String clientVersionKey) {
680+
return serviceTrackingParamsStream.filter(serviceTrackingParam ->
681+
serviceTrackingParam.getString("service", "")
682+
.equals(serviceName))
683+
.flatMap(serviceTrackingParam -> serviceTrackingParam.getArray("params")
684+
.stream())
685+
.filter(JsonObject.class::isInstance)
686+
.map(JsonObject.class::cast)
687+
.filter(param -> param.getString("key", "")
688+
.equals(clientVersionKey))
689+
.map(param -> param.getString("value"))
690+
.filter(paramValue -> !isNullOrEmpty(paramValue))
691+
.findFirst()
692+
.orElse(null);
693+
}
694+
696695
/**
697696
* Get the client version used by YouTube website on InnerTube requests.
698697
*/
@@ -701,20 +700,15 @@ public static String getClientVersion() throws IOException, ExtractionException
701700
return clientVersion;
702701
}
703702

704-
// Always extract latest client version, by trying first to extract it from the JavaScript
705-
// service worker, then from HTML search results page as a fallback, to prevent
703+
// Always extract the latest client version from HTML search results page to prevent
706704
// fingerprinting based on the client version used
707-
try {
708-
extractClientVersionAndKeyFromSwJs();
709-
} catch (final Exception e) {
710-
extractClientVersionAndKeyFromHtmlSearchResultsPage();
711-
}
705+
extractClientVersionAndKeyFromHtmlSearchResultsPage();
712706

713707
if (keyAndVersionExtracted) {
714708
return clientVersion;
715709
}
716710

717-
// Fallback to the hardcoded one if it's valid
711+
// Fallback to the hardcoded one if it is valid
718712
if (areHardcodedClientVersionAndKeyValid()) {
719713
clientVersion = HARDCODED_CLIENT_VERSION;
720714
return clientVersion;
@@ -731,14 +725,9 @@ public static String getKey() throws IOException, ExtractionException {
731725
return key;
732726
}
733727

734-
// Always extract the key used by the webiste, by trying first to extract it from the
735-
// JavaScript service worker, then from HTML search results page as a fallback, to prevent
736-
// fingerprinting based on the key and/or invalid key issues
737-
try {
738-
extractClientVersionAndKeyFromSwJs();
739-
} catch (final Exception e) {
740-
extractClientVersionAndKeyFromHtmlSearchResultsPage();
741-
}
728+
// Always extract the latest client version from HTML search results page to prevent
729+
// fingerprinting based on the client version used
730+
extractClientVersionAndKeyFromHtmlSearchResultsPage();
742731

743732
if (keyAndVersionExtracted) {
744733
return key;
@@ -751,7 +740,8 @@ public static String getKey() throws IOException, ExtractionException {
751740
}
752741

753742
// The ANDROID API key is also valid with the WEB client so return it if we couldn't
754-
// extract the WEB API key.
743+
// extract the WEB API key. This can be used as a way to fingerprint the extractor in this
744+
// case
755745
return ANDROID_YOUTUBE_KEY;
756746
}
757747

@@ -843,29 +833,13 @@ public static String[] getYoutubeMusicKey()
843833
return youtubeMusicKey;
844834
}
845835

846-
String musicClientVersion;
847-
String musicKey;
848-
String musicClientName;
849-
850-
try {
851-
final String url = "https://music.youtube.com/sw.js";
852-
final Map<String, List<String>> headers = new HashMap<>();
853-
headers.put("Origin", singletonList("https://music.youtube.com"));
854-
headers.put("Referer", singletonList("https://music.youtube.com"));
855-
final String response = getDownloader().get(url, headers).responseBody();
856-
musicClientVersion = getStringResultFromRegexArray(response,
857-
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
858-
musicKey = getStringResultFromRegexArray(response, INNERTUBE_API_KEY_REGEXES, 1);
859-
musicClientName = Parser.matchGroup1(INNERTUBE_CLIENT_NAME_REGEX, response);
860-
} catch (final Exception e) {
861-
final String url = "https://music.youtube.com/?ucbcb=1";
862-
final String html = getDownloader().get(url, getCookieHeader()).responseBody();
836+
final String url = "https://music.youtube.com/?ucbcb=1";
837+
final String html = getDownloader().get(url, getCookieHeader()).responseBody();
863838

864-
musicKey = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
865-
musicClientVersion = getStringResultFromRegexArray(html,
866-
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES);
867-
musicClientName = Parser.matchGroup1(INNERTUBE_CLIENT_NAME_REGEX, html);
868-
}
839+
final String musicKey = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
840+
final String musicClientVersion = getStringResultFromRegexArray(html,
841+
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES);
842+
final String musicClientName = Parser.matchGroup1(INNERTUBE_CLIENT_NAME_REGEX, html);
869843

870844
youtubeMusicKey = new String[] {musicKey, musicClientName, musicClientVersion};
871845
return youtubeMusicKey;

0 commit comments

Comments
 (0)