@@ -28,6 +28,7 @@ def float_to_int16(audio: np.ndarray) -> np.ndarray:
2828 am = 32767 * 32768 // am
2929 return np .multiply (audio , am ).astype (np .int16 )
3030
31+
3132def float_np_array_to_wav_buf (wav : np .ndarray , sr : int , f32 = False ) -> BytesIO :
3233 buf = BytesIO ()
3334 if f32 :
@@ -41,10 +42,12 @@ def float_np_array_to_wav_buf(wav: np.ndarray, sr: int, f32=False) -> BytesIO:
4142 buf .seek (0 , 0 )
4243 return buf
4344
45+
4446def save_audio (path : str , audio : np .ndarray , sr : int , f32 = False ):
4547 with open (path , "wb" ) as f :
4648 f .write (float_np_array_to_wav_buf (audio , sr , f32 ).getbuffer ())
4749
50+
4851def wav2 (i : BytesIO , o : BufferedWriter , format : str ):
4952 inp = av .open (i , "r" )
5053 format = video_format_dict .get (format , format )
@@ -65,25 +68,40 @@ def wav2(i: BytesIO, o: BufferedWriter, format: str):
6568
6669
6770def load_audio (
68- file : Union [str , BytesIO , Path ],
69- sr : Optional [int ]= None ,
70- format : Optional [str ]= None ,
71- mono = True
72- ) -> Union [np .ndarray , Tuple [np .ndarray , int ]]:
73- if (isinstance (file , str ) and not Path (file ).exists ()) or (isinstance (file , Path ) and not file .exists ()):
71+ file : Union [str , BytesIO , Path ],
72+ sr : Optional [int ] = None ,
73+ format : Optional [str ] = None ,
74+ mono = True ,
75+ ) -> Union [np .ndarray , Tuple [np .ndarray , int ]]:
76+ if (isinstance (file , str ) and not Path (file ).exists ()) or (
77+ isinstance (file , Path ) and not file .exists ()
78+ ):
7479 raise FileNotFoundError (f"File not found: { file } " )
7580 rate = 0
7681
7782 container = av .open (file , format = format )
7883 audio_stream = next (s for s in container .streams if s .type == "audio" )
7984 channels = 1 if audio_stream .layout == "mono" else 2
8085 container .seek (0 )
81- resampler = AudioResampler (format = "fltp" , layout = audio_stream .layout , rate = sr ) if sr is not None else None
86+ resampler = (
87+ AudioResampler (format = "fltp" , layout = audio_stream .layout , rate = sr )
88+ if sr is not None
89+ else None
90+ )
8291
8392 # Estimated maximum total number of samples to pre-allocate the array
8493 # AV stores length in microseconds by default
85- estimated_total_samples = int (container .duration * sr // 1_000_000 ) if sr is not None else 48000
86- decoded_audio = np .zeros (estimated_total_samples + 1 if channels == 1 else (channels , estimated_total_samples + 1 ), dtype = np .float32 )
94+ estimated_total_samples = (
95+ int (container .duration * sr // 1_000_000 ) if sr is not None else 48000
96+ )
97+ decoded_audio = np .zeros (
98+ (
99+ estimated_total_samples + 1
100+ if channels == 1
101+ else (channels , estimated_total_samples + 1 )
102+ ),
103+ dtype = np .float32 ,
104+ )
87105
88106 offset = 0
89107
@@ -92,7 +110,9 @@ def process_packet(packet: List[AudioFrame]):
92110 rate = 0
93111 for frame in packet :
94112 frame .pts = None # 清除时间戳,避免重新采样问题
95- resampled_frames = resampler .resample (frame ) if resampler is not None else [frame ]
113+ resampled_frames = (
114+ resampler .resample (frame ) if resampler is not None else [frame ]
115+ )
96116 for resampled_frame in resampled_frames :
97117 frame_data = resampled_frame .to_ndarray ()
98118 rate = resampled_frame .rate
@@ -104,13 +124,16 @@ def frame_iter(container):
104124 yield p .decode ()
105125
106126 for r , frames_data in map (process_packet , frame_iter (container )):
107- if not rate : rate = r
127+ if not rate :
128+ rate = r
108129 for frame_data in frames_data :
109130 end_index = offset + len (frame_data [0 ])
110131
111132 # 检查 decoded_audio 是否有足够的空间,并在必要时调整大小
112133 if end_index > decoded_audio .shape [1 ]:
113- decoded_audio = np .resize (decoded_audio , (decoded_audio .shape [0 ], end_index * 4 ))
134+ decoded_audio = np .resize (
135+ decoded_audio , (decoded_audio .shape [0 ], end_index * 4 )
136+ )
114137
115138 np .copyto (decoded_audio [..., offset :end_index ], frame_data )
116139 offset += len (frame_data [0 ])
@@ -126,7 +149,9 @@ def frame_iter(container):
126149 return decoded_audio , rate
127150
128151
129- def downsample_audio (input_path : str , output_path : str , format : str , br = 128_000 ) -> None :
152+ def downsample_audio (
153+ input_path : str , output_path : str , format : str , br = 128_000
154+ ) -> None :
130155 """
131156 default to 128kb/s (equivalent to -q:a 2)
132157 """
0 commit comments