3939 compat_str ,
4040 compat_tokenize_tokenize ,
4141 compat_urllib_error ,
42+ compat_urllib_parse ,
4243 compat_urllib_request ,
4344 compat_urllib_request_DataHandler ,
4445)
6061 format_bytes ,
6162 formatSeconds ,
6263 GeoRestrictedError ,
64+ HEADRequest ,
6365 int_or_none ,
6466 ISO3166Utils ,
6567 locked_file ,
7375 PostProcessingError ,
7476 preferredencoding ,
7577 prepend_extension ,
78+ process_communicate_or_kill ,
79+ PUTRequest ,
7680 register_socks_protocols ,
7781 render_table ,
7882 replace_extension ,
@@ -720,7 +724,7 @@ def prepare_filename(self, info_dict):
720724 filename = encodeFilename (filename , True ).decode (preferredencoding ())
721725 return sanitize_path (filename )
722726 except ValueError as err :
723- self .report_error ('Error in output template: ' + str (err ) + ' (encoding: ' + repr (preferredencoding ()) + ')' )
727+ self .report_error ('Error in output template: ' + error_to_compat_str (err ) + ' (encoding: ' + repr (preferredencoding ()) + ')' )
724728 return None
725729
726730 def _match_entry (self , info_dict , incomplete ):
@@ -773,11 +777,20 @@ def add_extra_info(info_dict, extra_info):
773777
774778 def extract_info (self , url , download = True , ie_key = None , extra_info = {},
775779 process = True , force_generic_extractor = False ):
776- '''
777- Returns a list with a dictionary for each video we find.
778- If 'download', also downloads the videos.
779- extra_info is a dict containing the extra values to add to each result
780- '''
780+ """
781+ Return a list with a dictionary for each video extracted.
782+
783+ Arguments:
784+ url -- URL to extract
785+
786+ Keyword arguments:
787+ download -- whether to download videos during extraction
788+ ie_key -- extractor key hint
789+ extra_info -- dictionary containing the extra values to add to each result
790+ process -- whether to resolve all unresolved references (URLs, playlist items),
791+ must be True for download to work.
792+ force_generic_extractor -- force using the generic extractor
793+ """
781794
782795 if not ie_key and force_generic_extractor :
783796 ie_key = 'Generic'
@@ -1511,14 +1524,18 @@ def sanitize_numeric_fields(info):
15111524 if 'display_id' not in info_dict and 'id' in info_dict :
15121525 info_dict ['display_id' ] = info_dict ['id' ]
15131526
1514- if info_dict .get ('upload_date' ) is None and info_dict .get ('timestamp' ) is not None :
1515- # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1516- # see http://bugs.python.org/issue1646728)
1517- try :
1518- upload_date = datetime .datetime .utcfromtimestamp (info_dict ['timestamp' ])
1519- info_dict ['upload_date' ] = upload_date .strftime ('%Y%m%d' )
1520- except (ValueError , OverflowError , OSError ):
1521- pass
1527+ for ts_key , date_key in (
1528+ ('timestamp' , 'upload_date' ),
1529+ ('release_timestamp' , 'release_date' ),
1530+ ):
1531+ if info_dict .get (date_key ) is None and info_dict .get (ts_key ) is not None :
1532+ # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1533+ # see http://bugs.python.org/issue1646728)
1534+ try :
1535+ upload_date = datetime .datetime .utcfromtimestamp (info_dict [ts_key ])
1536+ info_dict [date_key ] = compat_str (upload_date .strftime ('%Y%m%d' ))
1537+ except (ValueError , OverflowError , OSError ):
1538+ pass
15221539
15231540 # Auto generate title fields corresponding to the *_number fields when missing
15241541 # in order to always have clean titles. This is very common for TV series.
@@ -1556,9 +1573,6 @@ def sanitize_numeric_fields(info):
15561573 else :
15571574 formats = info_dict ['formats' ]
15581575
1559- if not formats :
1560- raise ExtractorError ('No video formats found!' )
1561-
15621576 def is_wellformed (f ):
15631577 url = f .get ('url' )
15641578 if not url :
@@ -1571,7 +1585,10 @@ def is_wellformed(f):
15711585 return True
15721586
15731587 # Filter out malformed formats for better extraction robustness
1574- formats = list (filter (is_wellformed , formats ))
1588+ formats = list (filter (is_wellformed , formats or []))
1589+
1590+ if not formats :
1591+ raise ExtractorError ('No video formats found!' )
15751592
15761593 formats_dict = {}
15771594
@@ -1765,10 +1782,9 @@ def process_info(self, info_dict):
17651782
17661783 assert info_dict .get ('_type' , 'video' ) == 'video'
17671784
1768- max_downloads = self .params .get ('max_downloads' )
1769- if max_downloads is not None :
1770- if self ._num_downloads >= int (max_downloads ):
1771- raise MaxDownloadsReached ()
1785+ max_downloads = int_or_none (self .params .get ('max_downloads' )) or float ('inf' )
1786+ if self ._num_downloads >= max_downloads :
1787+ raise MaxDownloadsReached ()
17721788
17731789 # TODO: backward compatibility, to be removed
17741790 info_dict ['fulltitle' ] = info_dict ['title' ]
@@ -1893,8 +1909,17 @@ def ensure_dir_exists(path):
18931909
18941910 if not self .params .get ('skip_download' , False ):
18951911 try :
1912+ def checked_get_suitable_downloader (info_dict , params ):
1913+ ed_args = params .get ('external_downloader_args' )
1914+ dler = get_suitable_downloader (info_dict , params )
1915+ if ed_args and not params .get ('external_downloader_args' ):
1916+ # external_downloader_args was cleared because external_downloader was rejected
1917+ self .report_warning ('Requested external downloader cannot be used: '
1918+ 'ignoring --external-downloader-args.' )
1919+ return dler
1920+
18961921 def dl (name , info ):
1897- fd = get_suitable_downloader (info , self .params )(self , self .params )
1922+ fd = checked_get_suitable_downloader (info , self .params )(self , self .params )
18981923 for ph in self ._progress_hooks :
18991924 fd .add_progress_hook (ph )
19001925 if self .params .get ('verbose' ):
@@ -2036,9 +2061,12 @@ def compatible_formats(formats):
20362061 try :
20372062 self .post_process (filename , info_dict )
20382063 except (PostProcessingError ) as err :
2039- self .report_error ('postprocessing: %s' % str (err ))
2064+ self .report_error ('postprocessing: %s' % error_to_compat_str (err ))
20402065 return
20412066 self .record_download_archive (info_dict )
2067+ # avoid possible nugatory search for further items (PR #26638)
2068+ if self ._num_downloads >= max_downloads :
2069+ raise MaxDownloadsReached ()
20422070
20432071 def download (self , url_list ):
20442072 """Download a given list of URLs."""
@@ -2272,6 +2300,27 @@ def urlopen(self, req):
22722300 """ Start an HTTP download """
22732301 if isinstance (req , compat_basestring ):
22742302 req = sanitized_Request (req )
2303+ # an embedded /../ sequence is not automatically handled by urllib2
2304+ # see https://github.com/yt-dlp/yt-dlp/issues/3355
2305+ url = req .get_full_url ()
2306+ parts = url .partition ('/../' )
2307+ if parts [1 ]:
2308+ url = compat_urllib_parse .urljoin (parts [0 ] + parts [1 ][:1 ], parts [1 ][1 :] + parts [2 ])
2309+ if url :
2310+ # worse, URL path may have initial /../ against RFCs: work-around
2311+ # by stripping such prefixes, like eg Firefox
2312+ parts = compat_urllib_parse .urlsplit (url )
2313+ path = parts .path
2314+ while path .startswith ('/../' ):
2315+ path = path [3 :]
2316+ url = parts ._replace (path = path ).geturl ()
2317+ # get a new Request with the munged URL
2318+ if url != req .get_full_url ():
2319+ req_type = {'HEAD' : HEADRequest , 'PUT' : PUTRequest }.get (
2320+ req .get_method (), compat_urllib_request .Request )
2321+ req = req_type (
2322+ url , data = req .data , headers = dict (req .header_items ()),
2323+ origin_req_host = req .origin_req_host , unverifiable = req .unverifiable )
22752324 return self ._opener .open (req , timeout = self ._socket_timeout )
22762325
22772326 def print_debug_header (self ):
@@ -2301,7 +2350,7 @@ def print_debug_header(self):
23012350 ['git' , 'rev-parse' , '--short' , 'HEAD' ],
23022351 stdout = subprocess .PIPE , stderr = subprocess .PIPE ,
23032352 cwd = os .path .dirname (os .path .abspath (__file__ )))
2304- out , err = sp . communicate ( )
2353+ out , err = process_communicate_or_kill ( sp )
23052354 out = out .decode ().strip ()
23062355 if re .match ('[0-9a-f]+' , out ):
23072356 self ._write_string ('[debug] Git HEAD: ' + out + '\n ' )
0 commit comments