11# 25.07.25
22
33import os
4- import asyncio
54import time
5+ import struct
6+ import asyncio
67from typing import Dict , Optional
78from urllib .parse import urlparse
89from pathlib import Path
@@ -76,14 +77,46 @@ def _infer_url_ext(url: Optional[str]) -> Optional[str]:
7677 ext = Path (path ).suffix
7778 return ext .lstrip ("." ).lower () if ext else None
7879
80+ @staticmethod
81+ def _has_varying_segment_urls (segment_urls : list ) -> bool :
82+ """
83+ Check if segment URLs represent different files (not just different query params).
84+ """
85+ if not segment_urls or len (segment_urls ) <= 1 :
86+ return False
87+
88+ # Extract base paths (without query/fragment)
89+ base_paths = []
90+ for url in segment_urls :
91+ parsed = urlparse (url )
92+ base_path = parsed .path
93+ base_paths .append (base_path )
94+
95+ # If all paths are identical, URLs only differ in query params
96+ unique_paths = set (base_paths )
97+ return len (unique_paths ) > 1
98+
7999 def _get_segment_url_type (self ) -> Optional [str ]:
80100 """Prefer representation field, otherwise infer from first segment URL."""
81101 rep = self .selected_representation or {}
82102 t = (rep .get ("segment_url_type" ) or "" ).strip ().lower ()
83103 if t :
84104 return t
85- urls = rep .get ("segment_urls" ) or []
86- return self ._infer_url_ext (urls [0 ]) if urls else None
105+
106+ segment_urls = rep .get ("segment_urls" ) or []
107+ init_url = rep .get ("init_url" )
108+
109+ # NEW: Se c'è un solo segmento e init_url == segment_url, trattalo come mp4 unico
110+ if len (segment_urls ) == 1 and init_url and segment_urls [0 ] == init_url :
111+ return "mp4"
112+
113+ # Check if segment URLs vary (different files vs same file with different params)
114+ if self ._has_varying_segment_urls (segment_urls ):
115+ # Different files = treat as segments (m4s-like)
116+ return "m4s"
117+
118+ # Fallback to extension inference
119+ return self ._infer_url_ext (segment_urls [0 ]) if segment_urls else None
87120
88121 def _merged_headers (self ) -> Dict [str , str ]:
89122 """Ensure UA exists while keeping caller-provided headers."""
@@ -96,7 +129,14 @@ def get_concat_path(self, output_dir: str = None):
96129 Get the path for the concatenated output file.
97130 """
98131 rep_id = self .selected_representation ['id' ]
99- ext = "mp4" if (self ._get_segment_url_type () == "mp4" ) else "m4s"
132+ seg_type = self ._get_segment_url_type ()
133+
134+ # Use mp4 extension for both single MP4 and MP4 segments
135+ if seg_type in ("mp4" , "m4s" ):
136+ ext = "mp4"
137+ else :
138+ ext = "m4s"
139+
100140 return os .path .join (output_dir or self .tmp_folder , f"{ rep_id } _encrypted.{ ext } " )
101141
102142 def get_segments_count (self ) -> int :
@@ -260,7 +300,18 @@ async def download_segments(self, output_dir: str = None, concurrent_downloads:
260300 async def _download_init_segment (self , client , init_url , concat_path , progress_bar ):
261301 """
262302 Download the init segment and update progress/estimator.
303+ For MP4 segments, skip init segment as each segment is a complete MP4.
263304 """
305+ seg_type = self ._get_segment_url_type ()
306+
307+ # Skip init segment for MP4 segment files
308+ if seg_type == "mp4" and self ._has_varying_segment_urls (self .selected_representation .get ('segment_urls' , [])):
309+ with open (concat_path , 'wb' ) as outfile :
310+ pass
311+
312+ progress_bar .update (1 )
313+ return
314+
264315 if not init_url :
265316 with open (concat_path , 'wb' ) as outfile :
266317 pass
@@ -435,24 +486,71 @@ async def download_single(url, idx):
435486 self .info_nFailed = nFailed_this_round
436487 global_retry_count += 1
437488
489+ def _extract_moof_mdat_atoms (self , file_path ):
490+ """
491+ Extracts only 'moof' and 'mdat' atoms from a fragmented MP4 file.
492+ Returns a generator of bytes chunks.
493+ """
494+ with open (file_path , 'rb' ) as f :
495+ while True :
496+ header = f .read (8 )
497+ if len (header ) < 8 :
498+ break
499+
500+ size , atom_type = struct .unpack (">I4s" , header )
501+ atom_type = atom_type .decode ("ascii" , errors = "replace" )
502+ if size < 8 :
503+ break # Invalid atom
504+
505+ data = header + f .read (size - 8 )
506+ if atom_type in ("moof" , "mdat" ):
507+ yield data
508+
438509 async def _concatenate_segments_in_order (self , temp_dir , concat_path , total_segments ):
439510 """
440511 Concatenate all segment files IN ORDER to the final output file.
512+ For MP4 segments, write full init, then only moof/mdat from others.
513+ For m4s segments, use init + segments approach.
441514 """
442- with open (concat_path , 'ab' ) as outfile :
443- for idx in range (total_segments ):
444- temp_file = os .path .join (temp_dir , f"seg_{ idx :06d} .tmp" )
445-
446- # Only concatenate successfully downloaded segments
447- if idx in self .downloaded_segments and os .path .exists (temp_file ):
448- with open (temp_file , 'rb' ) as infile :
449-
450- # Read and write in chunks to avoid memory issues
451- while True :
452- chunk = infile .read (8192 ) # 8KB chunks
453- if not chunk :
454- break
455- outfile .write (chunk )
515+ seg_type = self ._get_segment_url_type ()
516+ console .print (f"\n [cyan]Detected stream type: [green]{ seg_type } " )
517+ is_mp4_segments = seg_type == "mp4" and self ._has_varying_segment_urls (self .selected_representation .get ('segment_urls' , []))
518+
519+ if is_mp4_segments :
520+ console .print ("[cyan]Concatenating MP4 segments with moof/mdat extraction..." )
521+
522+ # Write VIDEO0.mp4 fully, then only moof/mdat from VIDEO1+.mp4
523+ with open (concat_path , 'wb' ) as outfile :
524+ for idx in range (total_segments ):
525+ temp_file = os .path .join (temp_dir , f"seg_{ idx :06d} .tmp" )
526+ if idx in self .downloaded_segments and os .path .exists (temp_file ):
527+ if idx == 0 :
528+
529+ # Write full init segment
530+ with open (temp_file , 'rb' ) as infile :
531+ while True :
532+ chunk = infile .read (8192 )
533+ if not chunk :
534+ break
535+ outfile .write (chunk )
536+ else :
537+ # Write only moof/mdat atoms
538+ for atom in self ._extract_moof_mdat_atoms (temp_file ):
539+ outfile .write (atom )
540+
541+ else :
542+ console .print ("[cyan]Concatenating m4s segments..." )
543+ with open (concat_path , 'ab' ) as outfile :
544+ for idx in range (total_segments ):
545+ temp_file = os .path .join (temp_dir , f"seg_{ idx :06d} .tmp" )
546+
547+ if idx in self .downloaded_segments and os .path .exists (temp_file ):
548+ with open (temp_file , 'rb' ) as infile :
549+ while True :
550+ chunk = infile .read (8192 )
551+ if not chunk :
552+ break
553+ outfile .write (chunk )
456554
457555 def _get_bar_format (self , description : str ) -> str :
458556 """
0 commit comments