Skip to content

Commit efce384

Browse files
committed
[YouTube] Add support for extracting auto-translated captions
Closes #977 Based on and adresses TeamNewPipe/NewPipe#8023
1 parent 41c8dce commit efce384

File tree

3 files changed

+63
-8
lines changed

3 files changed

+63
-8
lines changed

extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeStreamExtractor.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,7 @@ private void loadSubtitles() {
416416
.setMediaFormat(fmt)
417417
.setLanguageCode(languageCode)
418418
.setAutoGenerated(false)
419+
.setAutoTranslated(false)
419420
.build());
420421
}
421422
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -688,21 +688,20 @@ public List<SubtitlesStream> getSubtitlesDefault() throws ParsingException {
688688

689689
@Override
690690
@Nonnull
691-
public List<SubtitlesStream> getSubtitles(final MediaFormat format) throws ParsingException {
691+
public List<SubtitlesStream> getSubtitles(@Nonnull final MediaFormat format) {
692692
assertPageFetched();
693693

694694
// We cannot store the subtitles list because the media format may change
695695
final List<SubtitlesStream> subtitlesToReturn = new ArrayList<>();
696696
final JsonObject renderer = playerResponse.getObject("captions")
697697
.getObject("playerCaptionsTracklistRenderer");
698698
final JsonArray captionsArray = renderer.getArray("captionTracks");
699-
// TODO: use this to apply auto translation to different language from a source language
700-
// final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages");
701699

702700
for (int i = 0; i < captionsArray.size(); i++) {
703-
final String languageCode = captionsArray.getObject(i).getString("languageCode");
704-
final String baseUrl = captionsArray.getObject(i).getString("baseUrl");
705-
final String vssId = captionsArray.getObject(i).getString("vssId");
701+
final JsonObject caption = captionsArray.getObject(i);
702+
final String languageCode = caption.getString("languageCode");
703+
final String baseUrl = caption.getString("baseUrl");
704+
final String vssId = caption.getString("vssId");
706705

707706
if (languageCode != null && baseUrl != null && vssId != null) {
708707
final boolean isAutoGenerated = vssId.startsWith("a.");
@@ -717,7 +716,24 @@ public List<SubtitlesStream> getSubtitles(final MediaFormat format) throws Parsi
717716
.setMediaFormat(format)
718717
.setLanguageCode(languageCode)
719718
.setAutoGenerated(isAutoGenerated)
719+
.setAutoTranslated(false)
720720
.build());
721+
if (i == 0 && caption.getBoolean("isTranslatable")
722+
&& renderer.has("translationLanguages")) {
723+
final JsonArray languages = renderer.getArray("translationLanguages");
724+
for (int j = 0; j < languages.size(); j++) {
725+
final JsonObject lang = languages.getObject(j);
726+
final String tLanguageCode = lang.getString("languageCode");
727+
subtitlesToReturn.add(new SubtitlesStream.Builder()
728+
.setContent(cleanUrl + "&fmt=" + format.getSuffix()
729+
+ "&tlang=" + tLanguageCode, true)
730+
.setMediaFormat(format)
731+
.setLanguageCode(tLanguageCode)
732+
.setAutoGenerated(isAutoGenerated)
733+
.setAutoTranslated(true)
734+
.build());
735+
}
736+
}
721737
}
722738
}
723739

extractor/src/main/java/org/schabi/newpipe/extractor/stream/SubtitlesStream.java

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ public final class SubtitlesStream extends Stream {
1212
private final MediaFormat format;
1313
private final Locale locale;
1414
private final boolean autoGenerated;
15+
private final boolean autoTranslated;
1516
private final String code;
1617

1718
/**
@@ -30,6 +31,7 @@ public static final class Builder {
3031
private String languageCode;
3132
// Use of the Boolean class instead of the primitive type needed for setter call check
3233
private Boolean autoGenerated;
34+
private Boolean autoTranslated;
3335

3436
/**
3537
* Create a new {@link Builder} instance with default values.
@@ -150,6 +152,18 @@ public Builder setAutoGenerated(final boolean autoGenerated) {
150152
return this;
151153
}
152154

155+
/**
156+
* Set whether the subtitles have been automatically translated
157+
* (i.e. by a machine like Google Translator) by the streaming service.
158+
* @param autoTranslated whether the subtitles have been automatically translated by the
159+
* streaming service
160+
* @return this {@link Builder} instance
161+
*/
162+
public Builder setAutoTranslated(final boolean autoTranslated) {
163+
this.autoTranslated = autoTranslated;
164+
return this;
165+
}
166+
153167
/**
154168
* Build a {@link SubtitlesStream} using the builder's current values.
155169
*
@@ -194,13 +208,19 @@ public SubtitlesStream build() {
194208
+ "with setIsAutoGenerated.");
195209
}
196210

211+
if (autoTranslated == null) {
212+
throw new IllegalStateException("The subtitles stream has been not set as an "
213+
+ "automatically translated subtitles stream or not. "
214+
+ "Please specify this information with setIsAutoTranslated.");
215+
}
216+
197217
if (id == null) {
198218
id = languageCode + (mediaFormat != null ? "." + mediaFormat.suffix
199219
: "");
200220
}
201221

202222
return new SubtitlesStream(id, content, isUrl, mediaFormat, deliveryMethod,
203-
languageCode, autoGenerated, manifestUrl);
223+
languageCode, autoGenerated, autoTranslated, manifestUrl);
204224
}
205225
}
206226

@@ -217,6 +237,7 @@ public SubtitlesStream build() {
217237
* @param deliveryMethod the {@link DeliveryMethod} of the stream
218238
* @param languageCode the language code of the stream
219239
* @param autoGenerated whether the subtitles are auto-generated by the streaming service
240+
* @param autoTranslated whether the subtitles are auto-translated by the streaming service
220241
* @param manifestUrl the URL of the manifest this stream comes from (if applicable,
221242
* otherwise null)
222243
*/
@@ -228,6 +249,7 @@ private SubtitlesStream(@Nonnull final String id,
228249
@Nonnull final DeliveryMethod deliveryMethod,
229250
@Nonnull final String languageCode,
230251
final boolean autoGenerated,
252+
final boolean autoTranslated,
231253
@Nullable final String manifestUrl) {
232254
super(id, content, isUrl, mediaFormat, deliveryMethod, manifestUrl);
233255

@@ -253,6 +275,7 @@ private SubtitlesStream(@Nonnull final String id,
253275
this.code = languageCode;
254276
this.format = mediaFormat;
255277
this.autoGenerated = autoGenerated;
278+
this.autoTranslated = autoTranslated;
256279
}
257280

258281
/**
@@ -265,7 +288,7 @@ public String getExtension() {
265288
}
266289

267290
/**
268-
* Return whether if the subtitles are auto-generated.
291+
* Return whether the subtitles are auto-generated.
269292
* <p>
270293
* Some streaming services can generate subtitles for their contents, like YouTube.
271294
* </p>
@@ -276,6 +299,21 @@ public boolean isAutoGenerated() {
276299
return autoGenerated;
277300
}
278301

302+
/**
303+
* Whether the subtitles are translated automatically by a machine.
304+
*
305+
* <p>
306+
* Some streaming services provide automatically translated subtitles.
307+
* YouTube, for example, uses Google translator to generate translated subtitles.
308+
* Automatically translated subtitles might not coincide completely with the original text.
309+
* </p>
310+
*
311+
* @return {code true} if the subtitles are auto-translated, {@link false} otherwise
312+
*/
313+
public boolean isAutoTranslated() {
314+
return autoTranslated;
315+
}
316+
279317
/**
280318
* {@inheritDoc}
281319
*/

0 commit comments

Comments
 (0)