From 649641ab670ba0ba7434abd8a8d90095f513d20e Mon Sep 17 00:00:00 2001 From: David Asunmo <22662897+davidasunmo@users.noreply.github.com.> Date: Mon, 7 Jul 2025 04:34:42 +0100 Subject: [PATCH 01/13] asList -> List.of in YoutubeSearchQHTest --- .../youtube/search/YoutubeSearchQHTest.java | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchQHTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchQHTest.java index 0b9937f3ea..da862424ba 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchQHTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchQHTest.java @@ -6,10 +6,11 @@ import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory.MUSIC_SONGS; import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory.PLAYLISTS; import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory.VIDEOS; -import static java.util.Arrays.asList; import org.junit.jupiter.api.Test; +import java.util.List; + public class YoutubeSearchQHTest { @Test @@ -20,37 +21,37 @@ public void testRegularValues() throws Exception { assertEquals("https://www.youtube.com/results?search_query=G%C3%BCl%C3%BCm&sp=8AEB", YouTube.getSearchQHFactory().fromQuery("Gülüm").getUrl()); assertEquals("https://www.youtube.com/results?search_query=%3Fj%24%29H%C2%A7B&sp=8AEB", YouTube.getSearchQHFactory().fromQuery("?j$)H§B").getUrl()); - assertEquals("https://music.youtube.com/search?q=asdf", YouTube.getSearchQHFactory().fromQuery("asdf", asList(new String[]{MUSIC_SONGS}), "").getUrl()); - assertEquals("https://music.youtube.com/search?q=hans", YouTube.getSearchQHFactory().fromQuery("hans", asList(new String[]{MUSIC_SONGS}), "").getUrl()); - assertEquals("https://music.youtube.com/search?q=Poifj%26jaijf", YouTube.getSearchQHFactory().fromQuery("Poifj&jaijf", asList(new String[]{MUSIC_SONGS}), "").getUrl()); - assertEquals("https://music.youtube.com/search?q=G%C3%BCl%C3%BCm", YouTube.getSearchQHFactory().fromQuery("Gülüm", asList(new String[]{MUSIC_SONGS}), "").getUrl()); - assertEquals("https://music.youtube.com/search?q=%3Fj%24%29H%C2%A7B", YouTube.getSearchQHFactory().fromQuery("?j$)H§B", asList(new String[]{MUSIC_SONGS}), "").getUrl()); + assertEquals("https://music.youtube.com/search?q=asdf", YouTube.getSearchQHFactory().fromQuery("asdf", List.of(MUSIC_SONGS), "").getUrl()); + assertEquals("https://music.youtube.com/search?q=hans", YouTube.getSearchQHFactory().fromQuery("hans", List.of(MUSIC_SONGS), "").getUrl()); + assertEquals("https://music.youtube.com/search?q=Poifj%26jaijf", YouTube.getSearchQHFactory().fromQuery("Poifj&jaijf", List.of(MUSIC_SONGS), "").getUrl()); + assertEquals("https://music.youtube.com/search?q=G%C3%BCl%C3%BCm", YouTube.getSearchQHFactory().fromQuery("Gülüm", List.of(MUSIC_SONGS), "").getUrl()); + assertEquals("https://music.youtube.com/search?q=%3Fj%24%29H%C2%A7B", YouTube.getSearchQHFactory().fromQuery("?j$)H§B", List.of(MUSIC_SONGS), "").getUrl()); } @Test public void testGetContentFilter() throws Exception { assertEquals(VIDEOS, YouTube.getSearchQHFactory() - .fromQuery("", asList(new String[]{VIDEOS}), "").getContentFilters().get(0)); + .fromQuery("", List.of(VIDEOS), "").getContentFilters().get(0)); assertEquals(CHANNELS, YouTube.getSearchQHFactory() - .fromQuery("asdf", asList(new String[]{CHANNELS}), "").getContentFilters().get(0)); + .fromQuery("asdf", List.of(CHANNELS), "").getContentFilters().get(0)); assertEquals(MUSIC_SONGS, YouTube.getSearchQHFactory() - .fromQuery("asdf", asList(new String[]{MUSIC_SONGS}), "").getContentFilters().get(0)); + .fromQuery("asdf", List.of(MUSIC_SONGS), "").getContentFilters().get(0)); } @Test public void testWithContentfilter() throws Exception { assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAfABAQ%253D%253D", YouTube.getSearchQHFactory() - .fromQuery("asdf", asList(new String[]{VIDEOS}), "").getUrl()); + .fromQuery("asdf", List.of(VIDEOS), "").getUrl()); assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAvABAQ%253D%253D", YouTube.getSearchQHFactory() - .fromQuery("asdf", asList(new String[]{CHANNELS}), "").getUrl()); + .fromQuery("asdf", List.of(CHANNELS), "").getUrl()); assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQA_ABAQ%253D%253D", YouTube.getSearchQHFactory() - .fromQuery("asdf", asList(new String[]{PLAYLISTS}), "").getUrl()); + .fromQuery("asdf", List.of(PLAYLISTS), "").getUrl()); assertEquals("https://www.youtube.com/results?search_query=asdf&sp=8AEB", YouTube.getSearchQHFactory() - .fromQuery("asdf", asList(new String[]{"fjiijie"}), "").getUrl()); + .fromQuery("asdf", List.of("fjiijie"), "").getUrl()); assertEquals("https://music.youtube.com/search?q=asdf", YouTube.getSearchQHFactory() - .fromQuery("asdf", asList(new String[]{MUSIC_SONGS}), "").getUrl()); + .fromQuery("asdf", List.of(MUSIC_SONGS), "").getUrl()); } @Test From ca44c0445aecac6575c4d488964e351d6d3c25d8 Mon Sep 17 00:00:00 2001 From: David Asunmo <22662897+davidasunmo@users.noreply.github.com.> Date: Mon, 7 Jul 2025 04:35:15 +0100 Subject: [PATCH 02/13] [SoundCloud] Use Pattern in SoundcloudStreamLinkHandlerFactory instead of String --- .../SoundcloudStreamLinkHandlerFactory.java | 19 ++++++++++++++----- .../schabi/newpipe/extractor/utils/Utils.java | 12 +++++++++++- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/linkHandler/SoundcloudStreamLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/linkHandler/SoundcloudStreamLinkHandlerFactory.java index e7809c52a1..421022ef0a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/linkHandler/SoundcloudStreamLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/linkHandler/SoundcloudStreamLinkHandlerFactory.java @@ -1,5 +1,7 @@ package org.schabi.newpipe.extractor.services.soundcloud.linkHandler; +import java.util.regex.Pattern; + import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper; @@ -9,11 +11,18 @@ public final class SoundcloudStreamLinkHandlerFactory extends LinkHandlerFactory { private static final SoundcloudStreamLinkHandlerFactory INSTANCE = new SoundcloudStreamLinkHandlerFactory(); - private static final String URL_PATTERN = "^https?://(www\\.|m\\.|on\\.)?" - + "soundcloud.com/[0-9a-z_-]+" - + "/(?!(tracks|albums|sets|reposts|followers|following)/?$)[0-9a-z_-]+/?([#?].*)?$"; - private static final String API_URL_PATTERN = "^https?://api-v2\\.soundcloud.com" - + "/(tracks|albums|sets|reposts|followers|following)/([0-9a-z_-]+)/"; + + private static final Pattern URL_PATTERN = Pattern.compile( + "^https?://(?:www\\.|m\\.|on\\.)?" + + "soundcloud.com/[0-9a-z_-]+" + + "/(?!(?:tracks|albums|sets|reposts|followers|following)/?$)[0-9a-z_-]+/?(?:[#?].*)?$" + ); + + private static final Pattern API_URL_PATTERN = Pattern.compile( + "^https?://api-v2\\.soundcloud.com" + + "/(tracks|albums|sets|reposts|followers|following)/([0-9a-z_-]+)/" + ); + private SoundcloudStreamLinkHandlerFactory() { } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index c061ce30fa..bdf6bf20a2 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -110,12 +110,22 @@ public static long mixedNumberWordToLong(final String numberWord) * @param url the url to be tested */ public static void checkUrl(final String pattern, final String url) throws ParsingException { + checkUrl(Pattern.compile(pattern), url); + } + + /** + * Check if the url matches the pattern. + * + * @param pattern the pattern that will be used to check the url + * @param url the url to be tested + */ + public static void checkUrl(final Pattern pattern, final String url) throws ParsingException { if (isNullOrEmpty(url)) { throw new IllegalArgumentException("Url can't be null or empty"); } if (!Parser.isMatch(pattern, url.toLowerCase())) { - throw new ParsingException("Url don't match the pattern"); + throw new ParsingException("Url doesn't match the pattern"); } } From 2cf9a21c150ebe62ddcffda22158b98d230d912e Mon Sep 17 00:00:00 2001 From: David Asunmo <22662897+davidasunmo@users.noreply.github.com.> Date: Mon, 7 Jul 2025 04:39:50 +0100 Subject: [PATCH 03/13] Refactor AudioStream to use builder in constructor --- .../newpipe/extractor/stream/AudioStream.java | 98 ++++++++----------- 1 file changed, 40 insertions(+), 58 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/AudioStream.java b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/AudioStream.java index e31e1aff35..935ff1ff73 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/AudioStream.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/AudioStream.java @@ -28,7 +28,7 @@ import java.util.Locale; import java.util.Objects; -public final class AudioStream extends Stream { +public class AudioStream extends Stream { public static final int UNKNOWN_BITRATE = -1; private final int averageBitrate; @@ -60,7 +60,7 @@ public final class AudioStream extends Stream { * Class to build {@link AudioStream} objects. */ @SuppressWarnings("checkstyle:hiddenField") - public static final class Builder { + public static class Builder { private String id; private String content; private boolean isUrl; @@ -88,7 +88,8 @@ public Builder() { } /** - * Set the identifier of the {@link AudioStream}. + * Set the identifier of the {@link AudioStream} which uniquely identifies the stream, + * e.g. for YouTube this would be the itag * *
* It must not be null and should be non empty. @@ -108,14 +109,14 @@ public Builder setId(@Nonnull final String id) { } /** - * Set the content of the {@link AudioStream}. - * + * Set the content or the URL of the {@link AudioStream}, depending on whether isUrl is + * true *
* It must not be null, and should be non empty. *
* * @param content the content of the {@link AudioStream} - * @param isUrl whether the content is a URL + * @param isUrl whether content is the URL or the actual content of e.g. a DASH manifest * @return this {@link Builder} instance */ public Builder setContent(@Nonnull final String content, @@ -126,7 +127,7 @@ public Builder setContent(@Nonnull final String content, } /** - * Set the {@link MediaFormat} used by the {@link AudioStream}. + * Set the {@link MediaFormat} used by the {@link AudioStream}, which can be null * ** It should be one of the audio {@link MediaFormat}s ({@link MediaFormat#M4A M4A}, @@ -278,16 +279,22 @@ public Builder setItagItem(@Nullable final ItagItem itagItem) { * Build an {@link AudioStream} using the builder's current values. * *
- * The identifier and the content (and so the {@code isUrl} boolean) properties must have + * The identifier and the content (and thus {@code isUrl}) properties must have * been set. *
* * @return a new {@link AudioStream} using the builder's current values - * @throws IllegalStateException if {@code id}, {@code content} (and so {@code isUrl}) or + * @throws IllegalStateException if {@code id}, {@code content} (and thus {@code isUrl}) or * {@code deliveryMethod} have been not set, or have been set as {@code null} */ @Nonnull public AudioStream build() { + validateBuild(); + + return new AudioStream(this); + } + + void validateBuild() { if (id == null) { throw new IllegalStateException( "The identifier of the audio stream has been not set or is null. If you " @@ -305,64 +312,39 @@ public AudioStream build() { "The delivery method of the audio stream has been set as null, which is " + "not allowed. Pass a valid one instead with setDeliveryMethod."); } - - return new AudioStream(id, content, isUrl, mediaFormat, deliveryMethod, averageBitrate, - manifestUrl, audioTrackId, audioTrackName, audioLocale, audioTrackType, - itagItem); } } /** - * Create a new audio stream. + * Create a new audio stream using the given {@link Builder}. * - * @param id the identifier which uniquely identifies the stream, e.g. for YouTube - * this would be the itag - * @param content the content or the URL of the stream, depending on whether isUrl is - * true - * @param isUrl whether content is the URL or the actual content of e.g. a DASH - * manifest - * @param format the {@link MediaFormat} used by the stream, which can be null - * @param deliveryMethod the {@link DeliveryMethod} of the stream - * @param averageBitrate the average bitrate of the stream (which can be unknown, see - * {@link #UNKNOWN_BITRATE}) - * @param audioTrackId the id of the audio track - * @param audioTrackName the name of the audio track - * @param audioLocale the {@link Locale} of the audio stream, representing its language - * @param itagItem the {@link ItagItem} corresponding to the stream, which cannot be null - * @param manifestUrl the URL of the manifest this stream comes from (if applicable, - * otherwise null) + * @param builder The {@link Builder} to use to create the audio stream */ @SuppressWarnings("checkstyle:ParameterNumber") - private AudioStream(@Nonnull final String id, - @Nonnull final String content, - final boolean isUrl, - @Nullable final MediaFormat format, - @Nonnull final DeliveryMethod deliveryMethod, - final int averageBitrate, - @Nullable final String manifestUrl, - @Nullable final String audioTrackId, - @Nullable final String audioTrackName, - @Nullable final Locale audioLocale, - @Nullable final AudioTrackType audioTrackType, - @Nullable final ItagItem itagItem) { - super(id, content, isUrl, format, deliveryMethod, manifestUrl); - if (itagItem != null) { - this.itagItem = itagItem; - this.itag = itagItem.id; - this.quality = itagItem.getQuality(); - this.bitrate = itagItem.getBitrate(); - this.initStart = itagItem.getInitStart(); - this.initEnd = itagItem.getInitEnd(); - this.indexStart = itagItem.getIndexStart(); - this.indexEnd = itagItem.getIndexEnd(); - this.codec = itagItem.getCodec(); + AudioStream(final Builder builder) { + super(builder.id, + builder.content, + builder.isUrl, + builder.mediaFormat, + builder.deliveryMethod, + builder.manifestUrl); + if (builder.itagItem != null) { + this.itagItem = builder.itagItem; + this.itag = builder.itagItem.id; + this.quality = builder.itagItem.getQuality(); + this.bitrate = builder.itagItem.getBitrate(); + this.initStart = builder.itagItem.getInitStart(); + this.initEnd = builder.itagItem.getInitEnd(); + this.indexStart = builder.itagItem.getIndexStart(); + this.indexEnd = builder.itagItem.getIndexEnd(); + this.codec = builder.itagItem.getCodec(); } - this.averageBitrate = averageBitrate; - this.audioTrackId = audioTrackId; - this.audioTrackName = audioTrackName; - this.audioLocale = audioLocale; - this.audioTrackType = audioTrackType; + this.averageBitrate = builder.averageBitrate; + this.audioTrackId = builder.audioTrackId; + this.audioTrackName = builder.audioTrackName; + this.audioLocale = builder.audioLocale; + this.audioTrackType = builder.audioTrackType; } /** From 7433bead3375c46d8262f38e1f241422706c68a5 Mon Sep 17 00:00:00 2001 From: David Asunmo <22662897+davidasunmo@users.noreply.github.com.> Date: Mon, 7 Jul 2025 04:40:28 +0100 Subject: [PATCH 04/13] Refactor Description constructor --- .../org/schabi/newpipe/extractor/stream/Description.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/Description.java b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/Description.java index 2641815b12..439609a2c1 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/Description.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/Description.java @@ -17,11 +17,7 @@ public class Description implements Serializable { public Description(@Nullable final String content, final int type) { this.type = type; - if (content == null) { - this.content = ""; - } else { - this.content = content; - } + this.content = Objects.requireNonNullElse(content, ""); } public String getContent() { From 15f83a1c861b9f9fb972ad542f86ac3f503cbcb2 Mon Sep 17 00:00:00 2001 From: David Asunmo <22662897+davidasunmo@users.noreply.github.com.> Date: Mon, 7 Jul 2025 04:45:30 +0100 Subject: [PATCH 05/13] [SoundCloud] make SoundcloudStreamExtractor.getTimeStamp return 0 if no timestamp --- .../soundcloud/extractors/SoundcloudStreamExtractor.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java index 6b2abcf590..1577eaaacc 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java @@ -121,7 +121,8 @@ public long getLength() { @Override public long getTimeStamp() throws ParsingException { - return getTimestampSeconds("(#t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)"); + final var timestamp = getTimestampSeconds("(#t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)"); + return timestamp == -2 ? 0 : timestamp; } @Override From 3824a806d363afe82da6f50d6271b6cfef127c01 Mon Sep 17 00:00:00 2001 From: David Asunmo <22662897+davidasunmo@users.noreply.github.com.> Date: Mon, 7 Jul 2025 04:49:06 +0100 Subject: [PATCH 06/13] [SoundCloud] Validate HTTP response codes for SoundCloud --- .../extractor/downloader/Response.java | 20 +++++++++++ .../exceptions/HttpResponseException.java | 15 ++++++++ .../soundcloud/SoundcloudParsingHelper.java | 34 +++++++++++------- .../newpipe/extractor/utils/HttpUtils.java | 36 +++++++++++++++++++ 4 files changed, 92 insertions(+), 13 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/exceptions/HttpResponseException.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/utils/HttpUtils.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/downloader/Response.java b/extractor/src/main/java/org/schabi/newpipe/extractor/downloader/Response.java index ac792dc756..87c3577ef4 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/downloader/Response.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/downloader/Response.java @@ -6,6 +6,9 @@ import java.util.List; import java.util.Map; +import org.schabi.newpipe.extractor.exceptions.HttpResponseException; +import org.schabi.newpipe.extractor.utils.HttpUtils; + /** * A Data class used to hold the results from requests made by the Downloader implementation. */ @@ -80,4 +83,21 @@ public String getHeader(final String name) { return null; } + // CHECKSTYLE:OFF + /** + * Helper function simply to make it easier to validate response code inline + * before getting the code/body/latestUrl/etc. + * Validates the response codes for the given {@link Response}, and throws a {@link HttpResponseException} if the code is invalid + * @see HttpUtils#validateResponseCode(Response, int...) + * @param validResponseCodes Expected valid response codes + * @return {@link this} response + * @throws HttpResponseException Thrown when the response code is not in {@code validResponseCodes}, + * or when {@code validResponseCodes} is empty and the code is a 4xx or 5xx error. + */ + // CHECKSTYLE:ON + public Response validateResponseCode(final int... validResponseCodes) + throws HttpResponseException { + HttpUtils.validateResponseCode(this, validResponseCodes); + return this; + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/exceptions/HttpResponseException.java b/extractor/src/main/java/org/schabi/newpipe/extractor/exceptions/HttpResponseException.java new file mode 100644 index 0000000000..c07850a9d3 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/exceptions/HttpResponseException.java @@ -0,0 +1,15 @@ +package org.schabi.newpipe.extractor.exceptions; + +import java.io.IOException; +import org.schabi.newpipe.extractor.downloader.Response; + +public class HttpResponseException extends IOException { + public HttpResponseException(final Response response) { + this("Error in HTTP Response for " + response.latestUrl() + "\n\t" + + response.responseCode() + " - " + response.responseMessage()); + } + + public HttpResponseException(final String message) { + super(message); + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java index 621bc360d3..9afde92420 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java @@ -5,6 +5,7 @@ import static org.schabi.newpipe.extractor.ServiceList.SoundCloud; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps; +import static org.schabi.newpipe.extractor.utils.HttpUtils.validateResponseCode; import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; @@ -12,7 +13,6 @@ import com.grack.nanojson.JsonParserException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.schabi.newpipe.extractor.MultiInfoItemsCollector; import org.schabi.newpipe.extractor.Image; @@ -103,8 +103,8 @@ public static synchronized String clientId() throws ExtractionException, IOExcep final Downloader dl = NewPipe.getDownloader(); - final Response download = dl.get("https://soundcloud.com"); - final String responseBody = download.responseBody(); + final Response downloadResponse = dl.get("https://soundcloud.com").validateResponseCode(); + final String responseBody = downloadResponse.responseBody(); final String clientIdPattern = ",client_id:\"(.*?)\""; final Document doc = Jsoup.parse(responseBody); @@ -115,11 +115,12 @@ public static synchronized String clientId() throws ExtractionException, IOExcep final var headers = Map.of("Range", List.of("bytes=0-50000")); - for (final Element element : possibleScripts) { + for (final var element : possibleScripts) { final String srcUrl = element.attr("src"); if (!isNullOrEmpty(srcUrl)) { try { clientId = Parser.matchGroup1(clientIdPattern, dl.get(srcUrl, headers) + .validateResponseCode() .responseBody()); return clientId; } catch (final RegexException ignored) { @@ -147,11 +148,13 @@ public static OffsetDateTime parseDateFrom(final String textualUploadDate) } } + // CHECKSTYLE:OFF /** - * Call the endpoint "/resolve" of the API.+ * Call the endpoint "/resolve" of the API. *
- * See https://developers.soundcloud.com/docs/api/reference#resolve + * See https://web.archive.org/web/20170804051146/https://developers.soundcloud.com/docs/api/reference#resolve */ + // CHECKSTYLE:ON public static JsonObject resolveFor(@Nonnull final Downloader downloader, final String url) throws IOException, ExtractionException { final String apiUrl = SOUNDCLOUD_API_V2_URL + "resolve" @@ -176,10 +179,11 @@ public static JsonObject resolveFor(@Nonnull final Downloader downloader, final public static String resolveUrlWithEmbedPlayer(final String apiUrl) throws IOException, ReCaptchaException { - final String response = NewPipe.getDownloader().get("https://w.soundcloud.com/player/?url=" - + Utils.encodeUrlUtf8(apiUrl), SoundCloud.getLocalization()).responseBody(); - - return Jsoup.parse(response).select("link[rel=\"canonical\"]").first() + final var response = NewPipe.getDownloader().get("https://w.soundcloud.com/player/?url=" + + Utils.encodeUrlUtf8(apiUrl), SoundCloud.getLocalization()); + validateResponseCode(response); + final var responseBody = response.responseBody(); + return Jsoup.parse(responseBody).select("link[rel=\"canonical\"]").first() .attr("abs:href"); } @@ -188,6 +192,7 @@ public static String resolveUrlWithEmbedPlayer(final String apiUrl) throws IOExc * * @return the resolved id */ + // TODO: what makes this method different from the others? Don' they all return the same? public static String resolveIdWithWidgetApi(final String urlString) throws IOException, ParsingException { String fixedUrl = urlString; @@ -223,9 +228,12 @@ public static String resolveIdWithWidgetApi(final String urlString) throws IOExc final String widgetUrl = "https://api-widget.soundcloud.com/resolve?url=" + Utils.encodeUrlUtf8(url.toString()) + "&format=json&client_id=" + SoundcloudParsingHelper.clientId(); - final String response = NewPipe.getDownloader().get(widgetUrl, - SoundCloud.getLocalization()).responseBody(); - final JsonObject o = JsonParser.object().from(response); + + final var response = NewPipe.getDownloader().get(widgetUrl, + SoundCloud.getLocalization()); + + final var responseBody = response.validateResponseCode().responseBody(); + final JsonObject o = JsonParser.object().from(responseBody); return String.valueOf(JsonUtils.getValue(o, "id")); } catch (final JsonParserException e) { throw new ParsingException("Could not parse JSON response", e); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/HttpUtils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/HttpUtils.java new file mode 100644 index 0000000000..421ed03459 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/HttpUtils.java @@ -0,0 +1,36 @@ +package org.schabi.newpipe.extractor.utils; + +import java.util.Arrays; + +import org.schabi.newpipe.extractor.downloader.Response; +import org.schabi.newpipe.extractor.exceptions.HttpResponseException; + +public final class HttpUtils { + + private HttpUtils() { + // Utility class, no instances allowed + } + + // CHECKSTYLE:OFF + /** + * Validates the response codes for the given {@link Response}, and throws + * a {@link HttpResponseException} if the code is invalid + * @param response The response to validate + * @param validResponseCodes Expected valid response codes + * @throws HttpResponseException Thrown when the response code is not in {@code validResponseCodes}, + * or when {@code validResponseCodes} is empty and the code is a 4xx or 5xx error. + */ + // CHECKSTYLE:ON + public static void validateResponseCode(final Response response, + final int... validResponseCodes) + throws HttpResponseException { + final int code = response.responseCode(); + final var throwError = (validResponseCodes == null || validResponseCodes.length == 0) + ? code >= 400 && code <= 599 + : Arrays.stream(validResponseCodes).noneMatch(c -> c == code); + + if (throwError) { + throw new HttpResponseException(response); + } + } +} From c28e0246d888522cab6115b3b4cf9e975260b2d3 Mon Sep 17 00:00:00 2001 From: David Asunmo <22662897+davidasunmo@users.noreply.github.com.> Date: Mon, 7 Jul 2025 04:52:38 +0100 Subject: [PATCH 07/13] Refactor Parser.java Remove redundant null check in Utils.java --- .../newpipe/extractor/utils/Parser.java | 123 ++++++++++++++---- .../schabi/newpipe/extractor/utils/Utils.java | 5 +- 2 files changed, 96 insertions(+), 32 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java index cb28c5e6f7..0d3d6af7b6 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java @@ -44,50 +44,120 @@ public RegexException(final String message) { } } + /** + * Matches input to the pattern or throw an exception if it doesn't match + * @param pattern Pattern to match against + * @param input Input to check if it matches pattern + * @return The matcher after {@code find() == true} + * @throws RegexException if {@code find() == false} + */ + @Nonnull + public static Matcher matchOrThrow(@Nonnull final Pattern pattern, + final String input) throws RegexException { + final Matcher matcher = pattern.matcher(input); + if (matcher.find()) { + return matcher; + } else { + String errorMessage = "Failed to find pattern \"" + pattern.pattern() + "\""; + if (input.length() <= 1024) { + errorMessage += " inside of \"" + input + "\""; + } + throw new RegexException(errorMessage); + } + } + + /** + * Matches group 1 of the given pattern against the input + * and returns the matched group + * + * @param pattern The regex pattern to match. + * @param input The input string to match against. + * @return The matching group as a string. + * @throws RegexException If the pattern does not match the input or if the group is not found. + */ + @Nonnull public static String matchGroup1(final String pattern, final String input) throws RegexException { return matchGroup(pattern, input, 1); } - public static String matchGroup1(final Pattern pattern, - final String input) throws RegexException { + /** + * Matches group 1 of the given pattern against the input + * and returns the matched group + * + * @param pattern The regex pattern to match. + * @param input The input string to match against. + * @return The matching group as a string. + * @throws RegexException If the pattern does not match the input or if the group is not found. + */ + @Nonnull + public static String matchGroup1(final Pattern pattern, final String input) + throws RegexException { return matchGroup(pattern, input, 1); } - public static String matchGroup(final String pattern, - final String input, - final int group) throws RegexException { + /** + * Matches the specified group of the given pattern against the input, + * and returns the matched group + * + * @param pattern The regex pattern to match. + * @param input The input string to match against. + * @param group The group number to retrieve (1-based index). + * @return The matching group as a string. + * @throws RegexException If the pattern does not match the input or if the group is not found. + */ + @Nonnull + public static String matchGroup(final String pattern, final String input, final int group) + throws RegexException { return matchGroup(Pattern.compile(pattern), input, group); } - public static String matchGroup(@Nonnull final Pattern pat, + /** + * Matches the specified group of the given pattern against the input, + * and returns the matched group + * + * @param pattern The regex pattern to match. + * @param input The input string to match against. + * @param group The group number to retrieve (1-based index). + * @return The matching group as a string. + * @throws RegexException If the pattern does not match the input or if the group is not found. + */ + @Nonnull + public static String matchGroup(@Nonnull final Pattern pattern, final String input, - final int group) throws RegexException { - final Matcher matcher = pat.matcher(input); - final boolean foundMatch = matcher.find(); - if (foundMatch) { - return matcher.group(group); - } else { - // only pass input to exception message when it is not too long - if (input.length() > 1024) { - throw new RegexException("Failed to find pattern \"" + pat.pattern() + "\""); - } else { - throw new RegexException("Failed to find pattern \"" + pat.pattern() - + "\" inside of \"" + input + "\""); - } - } + final int group) + throws RegexException { + return matchOrThrow(pattern, input).group(group); } + /** + * Matches multiple patterns against the input string and + * returns the first successful matcher + * + * @param patterns The array of regex patterns to match. + * @param input The input string to match against. + * @return A {@code Matcher} for the first successful match. + * @throws RegexException If no patterns match the input or if {@code patterns} is empty. + */ public static String matchGroup1MultiplePatterns(final Pattern[] patterns, final String input) throws RegexException { return matchMultiplePatterns(patterns, input).group(1); } + /** + * Matches multiple patterns against the input string and + * returns the first successful matcher + * + * @param patterns The array of regex patterns to match. + * @param input The input string to match against. + * @return A {@code Matcher} for the first successful match. + * @throws RegexException If no patterns match the input or if {@code patterns} is empty. + */ public static Matcher matchMultiplePatterns(final Pattern[] patterns, final String input) throws RegexException { - Parser.RegexException exception = null; - for (final Pattern pattern : patterns) { - final Matcher matcher = pattern.matcher(input); + RegexException exception = null; + for (final var pattern : patterns) { + final var matcher = pattern.matcher(input); if (matcher.find()) { return matcher; } else if (exception == null) { @@ -110,14 +180,11 @@ public static Matcher matchMultiplePatterns(final Pattern[] patterns, final Stri } public static boolean isMatch(final String pattern, final String input) { - final Pattern pat = Pattern.compile(pattern); - final Matcher mat = pat.matcher(input); - return mat.find(); + return isMatch(Pattern.compile(pattern), input); } public static boolean isMatch(@Nonnull final Pattern pattern, final String input) { - final Matcher mat = pattern.matcher(input); - return mat.find(); + return pattern.matcher(input).find(); } @Nonnull diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index bdf6bf20a2..cfe7cf4044 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -398,10 +398,7 @@ public static String getStringResultFromRegexArray(@Nonnull final String input, throws Parser.RegexException { for (final Pattern regex : regexes) { try { - final String result = Parser.matchGroup(regex, input, group); - if (result != null) { - return result; - } + return Parser.matchGroup(regex, input, group); // Continue if the result is null } catch (final Parser.RegexException ignored) { From bba594549cd5488d593cc0c1125eaa9a73f5157c Mon Sep 17 00:00:00 2001 From: David Asunmo <22662897+davidasunmo@users.noreply.github.com.> Date: Mon, 7 Jul 2025 04:52:57 +0100 Subject: [PATCH 08/13] Fix typo --- .../java/org/schabi/newpipe/extractor/utils/ImageSuffix.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/ImageSuffix.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/ImageSuffix.java index 4d8a141917..7332c75c08 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/ImageSuffix.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/ImageSuffix.java @@ -14,7 +14,7 @@ *
* This class is used to construct {@link org.schabi.newpipe.extractor.Image Image} * instances from a single base URL/path, in order to get all or most image resolutions provided, - * depending of the service and the resolutions provided. + * depending on the service and the resolutions provided. *
* *
From 0b0b9bbe89628d205ceebbf22a99f81f6a852778 Mon Sep 17 00:00:00 2001
From: David Asunmo <22662897+davidasunmo@users.noreply.github.com.>
Date: Mon, 7 Jul 2025 04:56:54 +0100
Subject: [PATCH 09/13] [SoundCloud] Fix SoundcloudChannelTabExtractorTest Add
annotations to remove warnings
---
.../SoundcloudChannelTabExtractorTest.java | 39 +++++++++----------
1 file changed, 19 insertions(+), 20 deletions(-)
diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudChannelTabExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudChannelTabExtractorTest.java
index 475d15a69a..58c498ec93 100644
--- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudChannelTabExtractorTest.java
+++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudChannelTabExtractorTest.java
@@ -1,7 +1,9 @@
package org.schabi.newpipe.extractor.services.soundcloud;
import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
import org.schabi.newpipe.downloader.DownloaderTestImpl;
import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.NewPipe;
@@ -9,27 +11,23 @@
import org.schabi.newpipe.extractor.channel.tabs.ChannelTabExtractor;
import org.schabi.newpipe.extractor.channel.tabs.ChannelTabs;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
-import org.schabi.newpipe.extractor.exceptions.ParsingException;
-import org.schabi.newpipe.extractor.services.BaseListExtractorTest;
import org.schabi.newpipe.extractor.services.DefaultListExtractorTest;
import org.schabi.newpipe.extractor.services.soundcloud.extractors.SoundcloudChannelTabExtractor;
import java.io.IOException;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.schabi.newpipe.extractor.ServiceList.PeerTube;
import static org.schabi.newpipe.extractor.ServiceList.SoundCloud;
import static org.schabi.newpipe.extractor.services.DefaultTests.defaultTestGetPageInNewExtractor;
-import static org.schabi.newpipe.extractor.services.DefaultTests.defaultTestMoreItems;
-import static org.schabi.newpipe.extractor.services.DefaultTests.defaultTestRelatedItems;
class SoundcloudChannelTabExtractorTest {
- static class Tracks extends DefaultListExtractorTest
+ * - Abstract types start with 'I' (e.g., IExample).
+ * - Concrete immutable types do not have a prefix (e.g., Example).
+ * - Getters are prefixed with 'get', 'is', or no prefix.
+ * - Strict builder pattern is enforced.
+ */
+// CHECKSTYLE:ON
+@Target({ElementType.PACKAGE, ElementType.TYPE})
+@Value.Style(
+ get = {"get*", "is*", "*"}, // Methods matching these prefixes will be used as getters.
+ // Methods matching these patterns can NOT be used as setters.
+ typeAbstract = {"I*"}, // Abstract types start with I
+ typeImmutable = "*", // Generated concrete Immutable types will not have the I prefix
+ visibility = Value.Style.ImplementationVisibility.PUBLIC,
+ strictBuilder = true,
+ defaultAsDefault = true, // https://immutables.github.io/immutable.html#default-attributes
+ jdkOnly = true
+)
+public @interface ImmutableStyle { }
diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/ParameterisedDefaultSoundcloudStreamExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/ParameterisedDefaultSoundcloudStreamExtractorTest.java
new file mode 100644
index 0000000000..5cdf7d9330
--- /dev/null
+++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/ParameterisedDefaultSoundcloudStreamExtractorTest.java
@@ -0,0 +1,57 @@
+package org.schabi.newpipe.extractor.services;
+
+import org.junit.jupiter.api.Test;
+import org.schabi.newpipe.extractor.ExtractorAsserts;
+import org.schabi.newpipe.extractor.MediaFormat;
+import org.schabi.newpipe.extractor.services.testcases.SoundcloudStreamExtractorTestCase;
+import org.schabi.newpipe.extractor.stream.AudioStream;
+import org.schabi.newpipe.extractor.stream.DeliveryMethod;
+
+import java.util.List;
+import java.util.regex.Pattern;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public abstract class ParameterisedDefaultSoundcloudStreamExtractorTest
+ extends ParameterisedDefaultStreamExtractorTest
+ * Ideally you will supply a regex matcher that the url that will automatically parse
+ * certain values for the tests.
+ * Ones that can't be derived from the url should be overridden in the test case.
+ */
+public interface DefaultStreamExtractorTestCase extends DefaultExtractorTestCase {
+ /**
+ * Returns matcher for the URL
+ * Implementations should throw IllegalArgumentException if the pattern does not match
+ */
+ Matcher urlMatcher();
+
+ default String getGroupFromUrl(String groupName) {
+ return urlMatcher().group(groupName);
+ }
+
+ default int getGroupEndIndexFromUrl(String groupName) {
+ return urlMatcher().end(groupName);
+ }
+
+ default String id() { return getGroupFromUrl("id"); }
+
+ default String uploader() { return getGroupFromUrl("uploader"); }
+
+ StreamType streamType();
+ String uploaderName();
+ default String uploaderUrl() {
+ final int groupEndIndex = getGroupEndIndexFromUrl("uploader");
+ if (groupEndIndex < 0) {
+ return ""; // no uploader group found in url
+ }
+ return url().substring(0, groupEndIndex);
+ }
+ default boolean uploaderVerified() { return false; }
+ default long uploaderSubscriberCountAtLeast() { return UNKNOWN_SUBSCRIBER_COUNT; } // default: unknown
+ default String subChannelName() { return ""; } // default: no subchannel
+ default String subChannelUrl() { return ""; } // default: no subchannel
+ default boolean descriptionIsEmpty() { return false; } // default: description is not empty
+ List
+ * https://soundcloud.com/user-904087338/nether#t=46
+ *
+ */
+ Pattern URL_PATTERN = Pattern.compile(
+ "^https?://(?:www\\.|m\\.|on\\.)?soundcloud\\.com/"
+ + "(?
- * https://soundcloud.com/user-904087338/nether#t=46
+ * ...
*
*/
Pattern URL_PATTERN = Pattern.compile(
@@ -79,14 +78,14 @@ default String urlContains() {
}
@Value.Derived
- public default StreamingService service() { return SoundCloud; }
+ default StreamingService service() { return SoundCloud; }
@Value.Derived
@Override
- public default StreamType streamType() { return StreamType.AUDIO_STREAM; }
+ default StreamType streamType() { return StreamType.AUDIO_STREAM; }
@Override
- public default int timestamp() {
+ default int timestamp() {
try {
return Integer.parseInt(getGroupFromUrl("timestamp"));
}
@@ -97,15 +96,15 @@ public default int timestamp() {
}
@Override
- public default long dislikeCountAtLeast() { return -1; } // default: soundcloud has no dislikes
+ default long dislikeCountAtLeast() { return -1; } // default: soundcloud has no dislikes
@Override
- public default boolean hasVideoStreams() { return false; } // default: soundcloud has no video streams
+ default boolean hasVideoStreams() { return false; } // default: soundcloud has no video streams
@Override
- public default boolean hasSubtitles() { return false; } // default: soundcloud has no subtitles
+ default boolean hasSubtitles() { return false; } // default: soundcloud has no subtitles
- public default boolean hasFrames() { return false; } // default: soundcloud has no frames
+ default boolean hasFrames() { return false; } // default: soundcloud has no frames
- public default int streamSegmentsCount() { return 0; }
+ default int streamSegmentsCount() { return 0; }
}