Skip to content

Commit f79ce1f

Browse files
committed
Refactored YoutubeCommentsExtractor
* Use Java Streaming API * Use StandardCharsets * Prevented several NPEs/ArrayIndexOutOfBound * Reformatted some code so that it's easier readable
1 parent dfe8716 commit f79ce1f

File tree

1 file changed

+88
-71
lines changed

1 file changed

+88
-71
lines changed

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java

Lines changed: 88 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
44
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
5-
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
65
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
76

87
import java.io.IOException;
8+
import java.nio.charset.StandardCharsets;
99
import java.util.Collections;
1010
import java.util.List;
1111
import java.util.Optional;
@@ -17,7 +17,6 @@
1717
import org.schabi.newpipe.extractor.StreamingService;
1818
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
1919
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
20-
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
2120
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
2221
import org.schabi.newpipe.extractor.downloader.Downloader;
2322
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
@@ -38,7 +37,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
3837
* Caching mechanism and holder of the commentsDisabled value.
3938
* <br/>
4039
* Initial value = empty -> unknown if comments are disabled or not<br/>
41-
* Some method calls {@link YoutubeCommentsExtractor#findInitialCommentsToken()}
40+
* Some method calls {@link #findInitialCommentsToken()}
4241
* -> value is set<br/>
4342
* If the method or another one that is depending on disabled comments
4443
* is now called again, the method execution can avoid unnecessary calls
@@ -74,45 +73,46 @@ public InfoItemsPage<CommentsInfoItem> getInitialPage()
7473

7574
/**
7675
* Finds the initial comments token and initializes commentsDisabled.
76+
* Also set
7777
*
7878
* @return the continuation token or null if none was found
7979
*/
8080
@Nullable
8181
private String findInitialCommentsToken() throws ExtractionException {
82-
83-
final JsonArray jArray = JsonUtils.getArray(nextResponse,
84-
"contents.twoColumnWatchNextResults.results.results.contents");
85-
86-
final Optional<Object> itemSectionRenderer = jArray.stream().filter(o -> {
87-
JsonObject jObj = (JsonObject) o;
88-
89-
if (jObj.has("itemSectionRenderer")) {
90-
try {
91-
return JsonUtils.getString(jObj, "itemSectionRenderer.targetId")
92-
.equals("comments-section");
93-
} catch (final ParsingException ignored) {
94-
}
95-
}
96-
97-
return false;
98-
}).findFirst();
99-
100-
final String token;
101-
102-
if (itemSectionRenderer.isPresent()) {
103-
token = JsonUtils.getString(((JsonObject) itemSectionRenderer.get())
104-
.getObject("itemSectionRenderer").getArray("contents").getObject(0),
105-
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
106-
} else {
107-
token = null;
108-
}
109-
110-
if (token == null) {
111-
optCommentsDisabled = Optional.of(true);
112-
return null;
113-
}
114-
115-
optCommentsDisabled = Optional.of(false);
82+
final String token = JsonUtils.getArray(nextResponse,
83+
"contents.twoColumnWatchNextResults.results.results.contents")
84+
.stream()
85+
// Only use JsonObjects
86+
.filter(JsonObject.class::isInstance)
87+
.map(JsonObject.class::cast)
88+
// Only process JsonObjects that have a itemSectionRenderer
89+
.filter(jObj -> jObj.has("itemSectionRenderer"))
90+
// Check if the comment-section is present
91+
.filter(jObj -> {
92+
try {
93+
return "comments-section".equals(
94+
JsonUtils.getString(jObj, "itemSectionRenderer.targetId"));
95+
} catch (final ParsingException ex) {
96+
return false;
97+
}
98+
})
99+
.findFirst()
100+
// Extract the token (or null in case of error)
101+
.map(itemSectionRenderer -> {
102+
try {
103+
return JsonUtils.getString(
104+
itemSectionRenderer
105+
.getObject("itemSectionRenderer")
106+
.getArray("contents").getObject(0),
107+
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
108+
} catch (final ParsingException ex) {
109+
return null;
110+
}
111+
})
112+
.orElse(null);
113+
114+
// The comments are disabled if we couldn't get a token
115+
optCommentsDisabled = Optional.of(token == null);
116116

117117
return token;
118118
}
@@ -124,25 +124,37 @@ private InfoItemsPage<CommentsInfoItem> getInfoItemsPageForDisabledComments() {
124124

125125
@Nullable
126126
private Page getNextPage(@Nonnull final JsonObject ajaxJson) throws ExtractionException {
127-
final JsonArray jsonArray;
128-
final JsonArray onResponseReceivedEndpoints = ajaxJson.getArray(
129-
"onResponseReceivedEndpoints");
130-
final JsonObject endpoint = onResponseReceivedEndpoints.getObject(
131-
onResponseReceivedEndpoints.size() - 1);
127+
final JsonArray onResponseReceivedEndpoints =
128+
ajaxJson.getArray("onResponseReceivedEndpoints");
132129

130+
// Prevent ArrayIndexOutOfBoundsException
131+
if (onResponseReceivedEndpoints.isEmpty()) {
132+
return null;
133+
}
134+
135+
final JsonArray continuationItemsArray;
133136
try {
134-
jsonArray = endpoint.getObject("reloadContinuationItemsCommand", endpoint.getObject(
135-
"appendContinuationItemsAction")).getArray("continuationItems");
137+
final JsonObject endpoint = onResponseReceivedEndpoints
138+
.getObject(onResponseReceivedEndpoints.size() - 1);
139+
continuationItemsArray = endpoint
140+
.getObject("reloadContinuationItemsCommand",
141+
endpoint.getObject("appendContinuationItemsAction"))
142+
.getArray("continuationItems");
136143
} catch (final Exception e) {
137144
return null;
138145
}
139-
if (jsonArray.isEmpty()) {
146+
// Prevent ArrayIndexOutOfBoundsException
147+
if (continuationItemsArray.isEmpty()) {
140148
return null;
141149
}
142150

143-
final JsonObject continuationItemRenderer = jsonArray.getObject(jsonArray.size() - 1).getObject("continuationItemRenderer");
151+
final JsonObject continuationItemRenderer = continuationItemsArray
152+
.getObject(continuationItemsArray.size() - 1)
153+
.getObject("continuationItemRenderer");
144154

145-
final String jsonPath = continuationItemRenderer.has("button") ? "button.buttonRenderer.command.continuationCommand.token" : "continuationEndpoint.continuationCommand.token";
155+
final String jsonPath = continuationItemRenderer.has("button")
156+
? "button.buttonRenderer.command.continuationCommand.token"
157+
: "continuationEndpoint.continuationCommand.token";
146158

147159
final String continuation;
148160
try {
@@ -169,11 +181,11 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
169181
}
170182

171183
final Localization localization = getExtractorLocalization();
172-
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
173-
getExtractorContentCountry())
174-
.value("continuation", page.getId())
175-
.done())
176-
.getBytes(UTF_8);
184+
final byte[] body = JsonWriter.string(
185+
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
186+
.value("continuation", page.getId())
187+
.done())
188+
.getBytes(StandardCharsets.UTF_8);
177189

178190
final JsonObject ajaxJson = getJsonPostResponse("next", body, localization);
179191

@@ -186,10 +198,14 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
186198
private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
187199
@Nonnull final JsonObject ajaxJson) throws ParsingException {
188200

189-
final JsonArray onResponseReceivedEndpoints = ajaxJson.getArray(
190-
"onResponseReceivedEndpoints");
191-
final JsonObject commentsEndpoint = onResponseReceivedEndpoints.getObject(
192-
onResponseReceivedEndpoints.size() - 1);
201+
final JsonArray onResponseReceivedEndpoints =
202+
ajaxJson.getArray("onResponseReceivedEndpoints");
203+
// Prevent ArrayIndexOutOfBoundsException
204+
if (onResponseReceivedEndpoints.isEmpty()) {
205+
return;
206+
}
207+
final JsonObject commentsEndpoint =
208+
onResponseReceivedEndpoints.getObject(onResponseReceivedEndpoints.size() - 1);
193209

194210
final String path;
195211

@@ -204,18 +220,20 @@ private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
204220

205221
final JsonArray contents;
206222
try {
207-
contents = (JsonArray) JsonUtils.getArray(commentsEndpoint, path).clone();
223+
contents = new JsonArray(JsonUtils.getArray(commentsEndpoint, path));
208224
} catch (final Exception e) {
209225
// No comments
210226
return;
211227
}
212228

213229
final int index = contents.size() - 1;
214-
if (contents.getObject(index).has("continuationItemRenderer")) {
230+
if (!contents.isEmpty() && contents.getObject(index).has("continuationItemRenderer")) {
215231
contents.remove(index);
216232
}
217233

218-
final String jsonKey = contents.getObject(0).has("commentThreadRenderer") ? "commentThreadRenderer" : "commentRenderer";
234+
final String jsonKey = contents.getObject(0).has("commentThreadRenderer")
235+
? "commentThreadRenderer"
236+
: "commentRenderer";
219237

220238
final List<Object> comments;
221239
try {
@@ -224,24 +242,23 @@ private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
224242
throw new ParsingException("Unable to get parse youtube comments", e);
225243
}
226244

227-
for (final Object c : comments) {
228-
if (c instanceof JsonObject) {
229-
final CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor(
230-
(JsonObject) c, getUrl(), getTimeAgoParser());
231-
collector.commit(extractor);
232-
}
233-
}
245+
final String url = getUrl();
246+
comments.stream()
247+
.filter(JsonObject.class::isInstance)
248+
.map(JsonObject.class::cast)
249+
.map(jObj -> new YoutubeCommentsInfoItemExtractor(jObj, url, getTimeAgoParser()))
250+
.forEach(collector::commit);
234251
}
235252

236253
@Override
237254
public void onFetchPage(@Nonnull final Downloader downloader)
238255
throws IOException, ExtractionException {
239256
final Localization localization = getExtractorLocalization();
240-
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
241-
getExtractorContentCountry())
242-
.value("videoId", getId())
243-
.done())
244-
.getBytes(UTF_8);
257+
final byte[] body = JsonWriter.string(
258+
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
259+
.value("videoId", getId())
260+
.done())
261+
.getBytes(StandardCharsets.UTF_8);
245262

246263
nextResponse = getJsonPostResponse("next", body, localization);
247264
}

0 commit comments

Comments
 (0)