Skip to content

Commit 69e18c8

Browse files
authored
Merge pull request #799 from litetex/imporve-yt-comments-extractor
Refactored YoutubeCommentsExtractor
2 parents dfe8716 + 1a67ea1 commit 69e18c8

File tree

1 file changed

+87
-71
lines changed

1 file changed

+87
-71
lines changed

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java

Lines changed: 87 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
44
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
5-
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
65
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
76

87
import java.io.IOException;
8+
import java.nio.charset.StandardCharsets;
99
import java.util.Collections;
1010
import java.util.List;
1111
import java.util.Optional;
@@ -17,7 +17,6 @@
1717
import org.schabi.newpipe.extractor.StreamingService;
1818
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
1919
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
20-
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
2120
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
2221
import org.schabi.newpipe.extractor.downloader.Downloader;
2322
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
@@ -38,7 +37,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
3837
* Caching mechanism and holder of the commentsDisabled value.
3938
* <br/>
4039
* Initial value = empty -> unknown if comments are disabled or not<br/>
41-
* Some method calls {@link YoutubeCommentsExtractor#findInitialCommentsToken()}
40+
* Some method calls {@link #findInitialCommentsToken()}
4241
* -> value is set<br/>
4342
* If the method or another one that is depending on disabled comments
4443
* is now called again, the method execution can avoid unnecessary calls
@@ -74,45 +73,45 @@ public InfoItemsPage<CommentsInfoItem> getInitialPage()
7473

7574
/**
7675
* Finds the initial comments token and initializes commentsDisabled.
76+
* <br/>
77+
* Also sets {@link #optCommentsDisabled}.
7778
*
7879
* @return the continuation token or null if none was found
7980
*/
8081
@Nullable
8182
private String findInitialCommentsToken() throws ExtractionException {
82-
83-
final JsonArray jArray = JsonUtils.getArray(nextResponse,
84-
"contents.twoColumnWatchNextResults.results.results.contents");
85-
86-
final Optional<Object> itemSectionRenderer = jArray.stream().filter(o -> {
87-
JsonObject jObj = (JsonObject) o;
88-
89-
if (jObj.has("itemSectionRenderer")) {
90-
try {
91-
return JsonUtils.getString(jObj, "itemSectionRenderer.targetId")
92-
.equals("comments-section");
93-
} catch (final ParsingException ignored) {
94-
}
95-
}
96-
97-
return false;
98-
}).findFirst();
99-
100-
final String token;
101-
102-
if (itemSectionRenderer.isPresent()) {
103-
token = JsonUtils.getString(((JsonObject) itemSectionRenderer.get())
104-
.getObject("itemSectionRenderer").getArray("contents").getObject(0),
105-
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
106-
} else {
107-
token = null;
108-
}
109-
110-
if (token == null) {
111-
optCommentsDisabled = Optional.of(true);
112-
return null;
113-
}
114-
115-
optCommentsDisabled = Optional.of(false);
83+
final String token = JsonUtils.getArray(nextResponse,
84+
"contents.twoColumnWatchNextResults.results.results.contents")
85+
.stream()
86+
// Only use JsonObjects
87+
.filter(JsonObject.class::isInstance)
88+
.map(JsonObject.class::cast)
89+
// Check if the comment-section is present
90+
.filter(jObj -> {
91+
try {
92+
return "comments-section".equals(
93+
JsonUtils.getString(jObj, "itemSectionRenderer.targetId"));
94+
} catch (final ParsingException ignored) {
95+
return false;
96+
}
97+
})
98+
.findFirst()
99+
// Extract the token (or null in case of error)
100+
.map(itemSectionRenderer -> {
101+
try {
102+
return JsonUtils.getString(
103+
itemSectionRenderer
104+
.getObject("itemSectionRenderer")
105+
.getArray("contents").getObject(0),
106+
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
107+
} catch (final ParsingException ignored) {
108+
return null;
109+
}
110+
})
111+
.orElse(null);
112+
113+
// The comments are disabled if we couldn't get a token
114+
optCommentsDisabled = Optional.of(token == null);
116115

117116
return token;
118117
}
@@ -124,25 +123,37 @@ private InfoItemsPage<CommentsInfoItem> getInfoItemsPageForDisabledComments() {
124123

125124
@Nullable
126125
private Page getNextPage(@Nonnull final JsonObject ajaxJson) throws ExtractionException {
127-
final JsonArray jsonArray;
128-
final JsonArray onResponseReceivedEndpoints = ajaxJson.getArray(
129-
"onResponseReceivedEndpoints");
130-
final JsonObject endpoint = onResponseReceivedEndpoints.getObject(
131-
onResponseReceivedEndpoints.size() - 1);
126+
final JsonArray onResponseReceivedEndpoints =
127+
ajaxJson.getArray("onResponseReceivedEndpoints");
132128

129+
// Prevent ArrayIndexOutOfBoundsException
130+
if (onResponseReceivedEndpoints.isEmpty()) {
131+
return null;
132+
}
133+
134+
final JsonArray continuationItemsArray;
133135
try {
134-
jsonArray = endpoint.getObject("reloadContinuationItemsCommand", endpoint.getObject(
135-
"appendContinuationItemsAction")).getArray("continuationItems");
136+
final JsonObject endpoint = onResponseReceivedEndpoints
137+
.getObject(onResponseReceivedEndpoints.size() - 1);
138+
continuationItemsArray = endpoint
139+
.getObject("reloadContinuationItemsCommand",
140+
endpoint.getObject("appendContinuationItemsAction"))
141+
.getArray("continuationItems");
136142
} catch (final Exception e) {
137143
return null;
138144
}
139-
if (jsonArray.isEmpty()) {
145+
// Prevent ArrayIndexOutOfBoundsException
146+
if (continuationItemsArray.isEmpty()) {
140147
return null;
141148
}
142149

143-
final JsonObject continuationItemRenderer = jsonArray.getObject(jsonArray.size() - 1).getObject("continuationItemRenderer");
150+
final JsonObject continuationItemRenderer = continuationItemsArray
151+
.getObject(continuationItemsArray.size() - 1)
152+
.getObject("continuationItemRenderer");
144153

145-
final String jsonPath = continuationItemRenderer.has("button") ? "button.buttonRenderer.command.continuationCommand.token" : "continuationEndpoint.continuationCommand.token";
154+
final String jsonPath = continuationItemRenderer.has("button")
155+
? "button.buttonRenderer.command.continuationCommand.token"
156+
: "continuationEndpoint.continuationCommand.token";
146157

147158
final String continuation;
148159
try {
@@ -169,11 +180,11 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
169180
}
170181

171182
final Localization localization = getExtractorLocalization();
172-
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
173-
getExtractorContentCountry())
174-
.value("continuation", page.getId())
175-
.done())
176-
.getBytes(UTF_8);
183+
final byte[] body = JsonWriter.string(
184+
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
185+
.value("continuation", page.getId())
186+
.done())
187+
.getBytes(StandardCharsets.UTF_8);
177188

178189
final JsonObject ajaxJson = getJsonPostResponse("next", body, localization);
179190

@@ -186,10 +197,14 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
186197
private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
187198
@Nonnull final JsonObject ajaxJson) throws ParsingException {
188199

189-
final JsonArray onResponseReceivedEndpoints = ajaxJson.getArray(
190-
"onResponseReceivedEndpoints");
191-
final JsonObject commentsEndpoint = onResponseReceivedEndpoints.getObject(
192-
onResponseReceivedEndpoints.size() - 1);
200+
final JsonArray onResponseReceivedEndpoints =
201+
ajaxJson.getArray("onResponseReceivedEndpoints");
202+
// Prevent ArrayIndexOutOfBoundsException
203+
if (onResponseReceivedEndpoints.isEmpty()) {
204+
return;
205+
}
206+
final JsonObject commentsEndpoint =
207+
onResponseReceivedEndpoints.getObject(onResponseReceivedEndpoints.size() - 1);
193208

194209
final String path;
195210

@@ -204,18 +219,20 @@ private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
204219

205220
final JsonArray contents;
206221
try {
207-
contents = (JsonArray) JsonUtils.getArray(commentsEndpoint, path).clone();
222+
contents = new JsonArray(JsonUtils.getArray(commentsEndpoint, path));
208223
} catch (final Exception e) {
209224
// No comments
210225
return;
211226
}
212227

213228
final int index = contents.size() - 1;
214-
if (contents.getObject(index).has("continuationItemRenderer")) {
229+
if (!contents.isEmpty() && contents.getObject(index).has("continuationItemRenderer")) {
215230
contents.remove(index);
216231
}
217232

218-
final String jsonKey = contents.getObject(0).has("commentThreadRenderer") ? "commentThreadRenderer" : "commentRenderer";
233+
final String jsonKey = contents.getObject(0).has("commentThreadRenderer")
234+
? "commentThreadRenderer"
235+
: "commentRenderer";
219236

220237
final List<Object> comments;
221238
try {
@@ -224,24 +241,23 @@ private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
224241
throw new ParsingException("Unable to get parse youtube comments", e);
225242
}
226243

227-
for (final Object c : comments) {
228-
if (c instanceof JsonObject) {
229-
final CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor(
230-
(JsonObject) c, getUrl(), getTimeAgoParser());
231-
collector.commit(extractor);
232-
}
233-
}
244+
final String url = getUrl();
245+
comments.stream()
246+
.filter(JsonObject.class::isInstance)
247+
.map(JsonObject.class::cast)
248+
.map(jObj -> new YoutubeCommentsInfoItemExtractor(jObj, url, getTimeAgoParser()))
249+
.forEach(collector::commit);
234250
}
235251

236252
@Override
237253
public void onFetchPage(@Nonnull final Downloader downloader)
238254
throws IOException, ExtractionException {
239255
final Localization localization = getExtractorLocalization();
240-
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
241-
getExtractorContentCountry())
242-
.value("videoId", getId())
243-
.done())
244-
.getBytes(UTF_8);
256+
final byte[] body = JsonWriter.string(
257+
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
258+
.value("videoId", getId())
259+
.done())
260+
.getBytes(StandardCharsets.UTF_8);
245261

246262
nextResponse = getJsonPostResponse("next", body, localization);
247263
}

0 commit comments

Comments
 (0)