@@ -85,6 +85,85 @@ class YoutubeTranscript {
8585 . replace ( / \s + / g, " " ) ;
8686 }
8787
88+ /**
89+ * Calculates a preference score for a caption track to determine the best match
90+ * @param {Object } track - The caption track object from YouTube
91+ * @param {string } track.languageCode - ISO language code (e.g., 'zh-HK', 'en', 'es')
92+ * @param {string } track.kind - Track type ('asr' for auto-generated, "" for human-transcribed)
93+ * @param {string[] } preferredLanguages - Array of language codes in preference order (e.g., ['zh-HK', 'en'])
94+ * @returns {number } Preference score (lower is better)
95+ */
96+ static #calculatePreferenceScore( track , preferredLanguages ) {
97+ // Language preference: index in preferredLanguages array (0 = most preferred)
98+ const languagePreference = preferredLanguages . indexOf ( track . languageCode ) ;
99+ const languageScore = languagePreference === - 1 ? 9999 : languagePreference ;
100+
101+ // Kind bonus: prefer human-transcribed (undefined) over auto-generated ('asr')
102+ const kindBonus = track . kind === "asr" ? 0.5 : 0 ;
103+
104+ return languageScore + kindBonus ;
105+ }
106+
107+ /**
108+ * Finds the most suitable caption track based on preferred languages
109+ * @param {string } videoBody - The raw HTML response from YouTube
110+ * @param {string[] } preferredLanguages - Array of language codes in preference order
111+ * @returns {Object|null } The selected caption track or null if none found
112+ */
113+ static #findPreferredCaptionTrack( videoBody , preferredLanguages ) {
114+ const captionsConfigJson = videoBody . match (
115+ / " c a p t i o n s " : ( .* ?) , " v i d e o D e t a i l s " : / s
116+ ) ;
117+
118+ const captionsConfig = captionsConfigJson ?. [ 1 ]
119+ ? JSON . parse ( captionsConfigJson [ 1 ] )
120+ : null ;
121+
122+ const captionTracks = captionsConfig
123+ ? captionsConfig . playerCaptionsTracklistRenderer . captionTracks
124+ : null ;
125+
126+ if ( ! captionTracks || captionTracks . length === 0 ) {
127+ return null ;
128+ }
129+
130+ const sortedTracks = [ ...captionTracks ] . sort ( ( a , b ) => {
131+ const scoreA = this . #calculatePreferenceScore( a , preferredLanguages ) ;
132+ const scoreB = this . #calculatePreferenceScore( b , preferredLanguages ) ;
133+ return scoreA - scoreB ;
134+ } ) ;
135+
136+ return sortedTracks [ 0 ] ;
137+ }
138+
139+ /**
140+ * Fetches video page content and finds the preferred caption track
141+ * @param {string } videoId - YouTube video ID
142+ * @param {string[] } preferredLanguages - Array of preferred language codes
143+ * @returns {Promise<Object> } The preferred caption track
144+ * @throws {YoutubeTranscriptError } If no suitable caption track is found
145+ */
146+ static async #getPreferredCaptionTrack( videoId , preferredLanguages ) {
147+ const videoResponse = await fetch (
148+ `https://www.youtube.com/watch?v=${ videoId } ` ,
149+ { credentials : "omit" }
150+ ) ;
151+ const videoBody = await videoResponse . text ( ) ;
152+
153+ const preferredCaptionTrack = this . #findPreferredCaptionTrack(
154+ videoBody ,
155+ preferredLanguages
156+ ) ;
157+
158+ if ( ! preferredCaptionTrack ) {
159+ throw new YoutubeTranscriptError (
160+ "No suitable caption track found for the video"
161+ ) ;
162+ }
163+
164+ return preferredCaptionTrack ;
165+ }
166+
88167 /**
89168 * Fetch transcript from YouTube video
90169 * @param {string } videoId - Video URL or video identifier
@@ -93,14 +172,20 @@ class YoutubeTranscript {
93172 * @returns {Promise<string> } Video transcript text
94173 */
95174 static async fetchTranscript ( videoId , config = { } ) {
175+ const preferredLanguages = config ?. lang ? [ config ?. lang , "en" ] : [ "en" ] ;
96176 const identifier = this . retrieveVideoId ( videoId ) ;
97- const lang = config ?. lang ?? "en" ;
98177
99178 try {
179+ const preferredCaptionTrack = await this . #getPreferredCaptionTrack(
180+ identifier ,
181+ preferredLanguages
182+ ) ;
183+
100184 const innerProto = this . #getBase64Protobuf( {
101- param1 : "asr ",
102- param2 : lang ,
185+ param1 : preferredCaptionTrack . kind || " ",
186+ param2 : preferredCaptionTrack . languageCode ,
103187 } ) ;
188+
104189 const params = this . #getBase64Protobuf( {
105190 param1 : identifier ,
106191 param2 : innerProto ,
0 commit comments