Fixed channel and playlist pagination extraction 🥳

litetex · litetex · commit 7a511c84c04b · 2022-04-21T21:28:03.000+02:00
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/invidious/InvidiousParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/invidious/InvidiousParsingHelper.java
@@ -8,7 +8,6 @@
 import com.grack.nanojson.JsonParser;
 import com.grack.nanojson.JsonParserException;
 
-import org.schabi.newpipe.extractor.Page;
 import org.schabi.newpipe.extractor.downloader.Response;
 import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
 import org.schabi.newpipe.extractor.exceptions.ExtractionException;
@@ -111,10 +110,6 @@ public static String getUid(@Nonnull final String id) {
         return id;
     }
 
-    public static Page getPage(final String url, final int page) {
-        return new Page(url + "?page=" + page, String.valueOf(page));
-    }
-
     /**
      * Get thumbnail URL at a reasonable quality
      *
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/invidious/extractors/InvidiousChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/invidious/extractors/InvidiousChannelExtractor.java
@@ -11,6 +11,7 @@
 import org.schabi.newpipe.extractor.downloader.Downloader;
 import org.schabi.newpipe.extractor.downloader.Response;
 import org.schabi.newpipe.extractor.exceptions.ExtractionException;
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
 import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
 import org.schabi.newpipe.extractor.services.youtube.invidious.InvidiousParsingHelper;
 import org.schabi.newpipe.extractor.services.youtube.invidious.InvidiousService;
@@ -21,7 +22,6 @@
 
 import javax.annotation.Nonnull;
 
-
 public class InvidiousChannelExtractor extends ChannelExtractor {
 
     private final String baseUrl;
@@ -41,31 +41,41 @@ public InfoItemsPage<StreamInfoItem> getInitialPage() throws IOException, Extrac
         return getPage(getPage(1));
     }
 
-    protected Page getPage(final int page) {
-        return InvidiousParsingHelper.getPage(
-                baseUrl + "/api/v1/channels/videos/" + json.getString("authorId"),
-                page
-        );
+    protected Page getPage(final int page) throws ParsingException {
+        return new Page(
+                baseUrl + "/api/v1/channels/videos/" + getUid(getId())
+                        + "?page=" + page,
+                String.valueOf(page));
     }
 
     @Override
     public InfoItemsPage<StreamInfoItem> getPage(
             final Page page
     ) throws IOException, ExtractionException {
+        if (page == null) {
+            return InfoItemsPage.emptyPage();
+        }
+
         final Response rp = NewPipe.getDownloader().get(page.getUrl());
         final JsonArray array =
                 InvidiousParsingHelper.getValidJsonArrayFromResponse(rp, page.getUrl());
 
+        if (array.isEmpty()) {
+            return InfoItemsPage.emptyPage();
+        }
+
         final StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
         array.stream()
                 .filter(JsonObject.class::isInstance)
                 .map(JsonObject.class::cast)
                 .map(o -> new InvidiousStreamInfoItemExtractor(o, baseUrl))
                 .forEach(collector::commit);
 
-        final Page nextPage = array.size() < 59
-                // max number of items per page
-                // with Second it is 29 but next Page logic is not implemented
+        // If there are less than 60 results we reached the end
+        // CHECKSTYLE:OFF - url has to be there in one piece
+        // https://github.com/iv-org/invidious/blob/4900ce24fac163d801a56af1fcf0f4c207448adf/src/invidious/routes/api/v1/channels.cr#L178
+        // CHECKSTYLE:ON
+        final Page nextPage = array.size() < 60
                 ? null
                 : getPage(Integer.parseInt(page.getId()) + 1);
 
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/invidious/extractors/InvidiousPlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/invidious/extractors/InvidiousPlaylistExtractor.java
@@ -35,8 +35,7 @@ public InvidiousPlaylistExtractor(
     @Nonnull
     @Override
     public String getThumbnailUrl() {
-        final JsonArray thumbnails = json.getArray("authorThumbnails");
-        return InvidiousParsingHelper.getThumbnailUrl(thumbnails);
+        return InvidiousParsingHelper.getThumbnailUrl(json.getArray("authorThumbnails"));
     }
 
     @Override
@@ -63,14 +62,18 @@ public long getStreamCount() {
     @Nonnull
     @Override
     public InfoItemsPage<StreamInfoItem> getInitialPage() throws IOException, ExtractionException {
-        return getPage(getPage(1));
+        return getPage(getPageByIndex(0));
     }
 
     @Override
     public InfoItemsPage<StreamInfoItem> getPage(
             final Page page
     ) throws IOException, ExtractionException {
-        if (Integer.parseInt(page.getId()) != 1) {
+        if (page == null) {
+            return InfoItemsPage.emptyPage();
+        }
+        // Initial fetched page (onFetchPage) already contains these info so don't fetch again
+        if (Integer.parseInt(page.getId()) != 0) {
             final Response rp = NewPipe.getDownloader().get(page.getUrl());
             json = InvidiousParsingHelper.getValidJsonObjectFromResponse(rp, page.getUrl());
         }
@@ -84,17 +87,40 @@ public InfoItemsPage<StreamInfoItem> getPage(
                 .map(commentObj -> new InvidiousStreamInfoItemExtractor(commentObj, baseUrl))
                 .forEach(collector::commit);
 
-        final Page nextPage = videos.size() < 99
-                // max number of items per page
-                ? null
-                : getPage(Integer.parseInt(page.getId()) + 1);
+        final int lastIndex = videos.isEmpty()
+                ? -1
+                : videos.getObject(videos.size() - 1).getInt("index", -1);
 
+        final Page nextPage = lastIndex == -1 || lastIndex >= getStreamCount() - 1
+                ? null
+                : getPageByIndex(lastIndex);
 
         return new InfoItemsPage<>(collector, nextPage);
     }
 
-    public Page getPage(final int page) throws ParsingException {
-        return InvidiousParsingHelper.getPage(baseUrl + "/api/v1/playlists/" + getId(), page);
+    /*
+     * Note: Querying is done by index and not pagination, because it's a lot easier
+     * // CHECKSTYLE:OFF - url has to be there in one piece
+     * https://github.com/iv-org/invidious/blob/4900ce24fac163d801a56af1fcf0f4c207448adf/src/invidious/routes/api/v1/misc.cr#L20-L22
+     * // CHECKSTYLE:ON
+     *
+     * e.g. Paging contains multiple duplicate items:
+     * Playlist-Size=505
+     * Page StartIndex EndIndex Video-Count
+     * 1 0   199 200
+     * 2 50  249 200
+     * 3 150 349 200
+     * 4 250 449 200
+     * 5 350 504 154
+     * 6 450 504 54
+     * 7 -
+     *
+     * Also note that the index is also used as offset
+     */
+    public Page getPageByIndex(final int index) throws ParsingException {
+        return new Page(
+                baseUrl + "/api/v1/playlists/" + getId() + "?index=" + index,
+                String.valueOf(index));
     }
 
     @Override
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/invidious/linkHandler/InvidiousLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/invidious/linkHandler/InvidiousLinkHandlerFactory.java
@@ -13,7 +13,7 @@ public interface InvidiousLinkHandlerFactory extends YoutubeLikeLinkHandlerFacto
     @Override
     default boolean isInvidiousUrl(final URL url) {
         return Utils.removeMAndWWWFromHost(url.getHost())
-                .equalsIgnoreCase(Utils.getHostOrNull(getInvidiousBaseUrl())) &&
-                YoutubeUrlHelper.isInvidioURL(url);
+                .equalsIgnoreCase(Utils.getHostOrNull(getInvidiousBaseUrl()))
+                && YoutubeUrlHelper.isInvidioURL(url);
     }
 }
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/shared/linkHandler/YoutubeLikeLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/shared/linkHandler/YoutubeLikeLinkHandlerFactory.java
@@ -14,5 +14,5 @@ default boolean isSupportedYouTubeLikeHost(final URL url) {
                 || YoutubeUrlHelper.isY2ubeURL(url);
     }
 
-    boolean isInvidiousUrl(final URL url);
+    boolean isInvidiousUrl(URL url);
 }
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfoItemsCollector.java b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfoItemsCollector.java
@@ -107,7 +107,7 @@ public void commit(final StreamInfoItemExtractor extractor) {
         try {
             addItem(extract(extractor));
         } catch (final FoundAdException ae) {
-            //System.out.println("AD_WARNING: " + ae.getMessage());
+            // Do nothing
         } catch (final Exception e) {
             addError(e);
         }

Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,7 @@ public interface InvidiousLinkHandlerFactory extends YoutubeLikeLinkHandlerFacto`
`13`	`13`	`@Override`
`14`	`14`	`default boolean isInvidiousUrl(final URL url) {`
`15`	`15`	`return Utils.removeMAndWWWFromHost(url.getHost())`
`16`		`- .equalsIgnoreCase(Utils.getHostOrNull(getInvidiousBaseUrl())) &&`
`17`		`- YoutubeUrlHelper.isInvidioURL(url);`
	`16`	`+ .equalsIgnoreCase(Utils.getHostOrNull(getInvidiousBaseUrl()))`
	`17`	`+ && YoutubeUrlHelper.isInvidioURL(url);`
`18`	`18`	`}`
`19`	`19`	`}`
Original file line number	Diff line number	Diff line change
`@@ -14,5 +14,5 @@ default boolean isSupportedYouTubeLikeHost(final URL url) {`
`14`	`14`	`\|\| YoutubeUrlHelper.isY2ubeURL(url);`
`15`	`15`	`}`
`16`	`16`
`17`		`- boolean isInvidiousUrl(final URL url);`
	`17`	`+ boolean isInvidiousUrl(URL url);`
`18`	`18`	`}`
Original file line number	Diff line number	Diff line change
`@@ -107,7 +107,7 @@ public void commit(final StreamInfoItemExtractor extractor) {`
`107`	`107`	`try {`
`108`	`108`	`addItem(extract(extractor));`
`109`	`109`	`} catch (final FoundAdException ae) {`
`110`		`- //System.out.println("AD_WARNING: " + ae.getMessage());`
	`110`	`+ // Do nothing`
`111`	`111`	`} catch (final Exception e) {`
`112`	`112`	`addError(e);`
`113`	`113`	`}`