Skip to content

Commit 58b3779

Browse files
committed
perf: drastically improved performance of bulk retrieval for playlists/channels with huge or small amount of videos in them by parallelizing the retrieval of transcript lists
1 parent ac8d9f2 commit 58b3779

File tree

1 file changed

+37
-8
lines changed

1 file changed

+37
-8
lines changed

lib/src/main/java/io/github/thoroldvix/internal/DefaultPlaylistsTranscriptApi.java

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
import java.util.HashMap;
1010
import java.util.List;
1111
import java.util.Map;
12+
import java.util.concurrent.ExecutionException;
13+
import java.util.concurrent.ExecutorService;
14+
import java.util.concurrent.Executors;
15+
import java.util.concurrent.Future;
1216

1317
import static io.github.thoroldvix.api.YtApiV3Endpoint.*;
1418

@@ -30,14 +34,41 @@ class DefaultPlaylistsTranscriptApi implements PlaylistsTranscriptApi {
3034
public Map<String, TranscriptList> listTranscriptsForPlaylist(String playlistId, String apiKey, String cookiesPath, boolean continueOnError) throws TranscriptRetrievalException {
3135
Map<String, TranscriptList> transcriptLists = new HashMap<>();
3236
List<String> videoIds = getVideoIds(playlistId, apiKey);
37+
ExecutorService executor = Executors.newCachedThreadPool();
38+
39+
List<Future<TranscriptList>> futures = new ArrayList<>();
3340

3441
for (String videoId : videoIds) {
35-
try {
36-
TranscriptList transcriptList = getTranscriptList(videoId, cookiesPath);
37-
transcriptLists.put(videoId, transcriptList);
38-
} catch (TranscriptRetrievalException e) {
39-
if (!continueOnError) throw e;
42+
futures.add(executor.submit(() -> {
43+
try {
44+
return getTranscriptList(videoId, cookiesPath);
45+
} catch (TranscriptRetrievalException e) {
46+
if (!continueOnError) throw e;
47+
return null;
48+
}
49+
}));
50+
}
51+
52+
try {
53+
for (Future<TranscriptList> future : futures) {
54+
try {
55+
TranscriptList transcriptList = future.get();
56+
if (transcriptList != null) {
57+
transcriptLists.put(transcriptList.getVideoId(), transcriptList);
58+
}
59+
} catch (ExecutionException e) {
60+
if (!continueOnError) {
61+
executor.shutdownNow();
62+
throw new TranscriptRetrievalException("Failed to retrieve transcripts for playlist: " + playlistId, e);
63+
}
64+
} catch (InterruptedException e) {
65+
Thread.currentThread().interrupt();
66+
executor.shutdownNow();
67+
throw new TranscriptRetrievalException("Failed to retrieve transcripts for playlist: " + playlistId, e);
68+
}
4069
}
70+
} finally {
71+
executor.shutdownNow();
4172
}
4273

4374
return transcriptLists;
@@ -120,13 +151,11 @@ private List<String> getVideoIds(String playlistId, String apiKey) throws Transc
120151

121152
while (true) {
122153
String playlistJson = client.get(PLAYLIST_ITEMS, params);
123-
124154
JsonNode jsonNode = parseJson(playlistJson,
125155
"Could not parse playlist JSON for the playlist: " + playlistId);
126-
127156
extractVideoId(jsonNode, videoIds);
128-
129157
JsonNode nextPageToken = jsonNode.get("nextPageToken");
158+
130159
if (nextPageToken == null) {
131160
break;
132161
}

0 commit comments

Comments
 (0)