Skip to content

Commit 2b70095

Browse files
committed
feat: implement audio duration retrieval without ffmpeg dependencies
1 parent 6791eb7 commit 2b70095

File tree

10 files changed

+430
-139
lines changed

10 files changed

+430
-139
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ RUN go build -ldflags "-s -w -X 'github.com/QuantumNous/new-api/common.Version=$
2828
FROM alpine
2929

3030
RUN apk upgrade --no-cache \
31-
&& apk add --no-cache ca-certificates tzdata ffmpeg \
31+
&& apk add --no-cache ca-certificates tzdata \
3232
&& update-ca-certificates
3333

3434
COPY --from=builder2 /build/new-api /

common/audio.go

Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
package common
2+
3+
import (
4+
"context"
5+
"encoding/binary"
6+
"fmt"
7+
"io"
8+
9+
"github.com/abema/go-mp4"
10+
"github.com/go-audio/aiff"
11+
"github.com/go-audio/wav"
12+
"github.com/jfreymuth/oggvorbis"
13+
"github.com/mewkiz/flac"
14+
"github.com/pkg/errors"
15+
"github.com/tcolgate/mp3"
16+
"github.com/yapingcat/gomedia/go-codec"
17+
)
18+
19+
// GetAudioDuration 使用纯 Go 库获取音频文件的时长(秒)。
20+
// 它不再依赖外部的 ffmpeg 或 ffprobe 程序。
21+
func GetAudioDuration(ctx context.Context, f io.ReadSeeker, ext string) (duration float64, err error) {
22+
SysLog(fmt.Sprintf("GetAudioDuration: ext=%s", ext))
23+
// 根据文件扩展名选择解析器
24+
switch ext {
25+
case ".mp3":
26+
duration, err = getMP3Duration(f)
27+
case ".wav":
28+
duration, err = getWAVDuration(f)
29+
case ".flac":
30+
duration, err = getFLACDuration(f)
31+
case ".m4a", ".mp4":
32+
duration, err = getM4ADuration(f)
33+
case ".ogg", ".oga":
34+
duration, err = getOGGDuration(f)
35+
case ".opus":
36+
duration, err = getOpusDuration(f)
37+
case ".aiff", ".aif", ".aifc":
38+
duration, err = getAIFFDuration(f)
39+
case ".webm":
40+
duration, err = getWebMDuration(f)
41+
case ".aac":
42+
duration, err = getAACDuration(f)
43+
default:
44+
return 0, fmt.Errorf("unsupported audio format: %s", ext)
45+
}
46+
SysLog(fmt.Sprintf("GetAudioDuration: duration=%f", duration))
47+
return duration, err
48+
}
49+
50+
// getMP3Duration 解析 MP3 文件以获取时长。
51+
// 注意:对于 VBR (Variable Bitrate) MP3,这个估算可能不完全精确,但通常足够好。
52+
// FFmpeg 在这种情况下会扫描整个文件来获得精确值,但这里的库提供了快速估算。
53+
func getMP3Duration(r io.Reader) (float64, error) {
54+
d := mp3.NewDecoder(r)
55+
var f mp3.Frame
56+
skipped := 0
57+
duration := 0.0
58+
59+
for {
60+
if err := d.Decode(&f, &skipped); err != nil {
61+
if err == io.EOF {
62+
break
63+
}
64+
return 0, errors.Wrap(err, "failed to decode mp3 frame")
65+
}
66+
duration += f.Duration().Seconds()
67+
}
68+
return duration, nil
69+
}
70+
71+
// getWAVDuration 解析 WAV 文件头以获取时长。
72+
func getWAVDuration(r io.ReadSeeker) (float64, error) {
73+
dec := wav.NewDecoder(r)
74+
if !dec.IsValidFile() {
75+
return 0, errors.New("invalid wav file")
76+
}
77+
d, err := dec.Duration()
78+
if err != nil {
79+
return 0, errors.Wrap(err, "failed to get wav duration")
80+
}
81+
return d.Seconds(), nil
82+
}
83+
84+
// getFLACDuration 解析 FLAC 文件的 STREAMINFO 块。
85+
func getFLACDuration(r io.Reader) (float64, error) {
86+
stream, err := flac.Parse(r)
87+
if err != nil {
88+
return 0, errors.Wrap(err, "failed to parse flac stream")
89+
}
90+
defer stream.Close()
91+
92+
// 时长 = 总采样数 / 采样率
93+
duration := float64(stream.Info.NSamples) / float64(stream.Info.SampleRate)
94+
return duration, nil
95+
}
96+
97+
// getM4ADuration 解析 M4A/MP4 文件的 'mvhd' box。
98+
func getM4ADuration(r io.ReadSeeker) (float64, error) {
99+
// go-mp4 库需要 ReadSeeker 接口
100+
info, err := mp4.Probe(r)
101+
if err != nil {
102+
return 0, errors.Wrap(err, "failed to probe m4a/mp4 file")
103+
}
104+
// 时长 = Duration / Timescale
105+
return float64(info.Duration) / float64(info.Timescale), nil
106+
}
107+
108+
// getOGGDuration 解析 OGG/Vorbis 文件以获取时长。
109+
func getOGGDuration(r io.ReadSeeker) (float64, error) {
110+
// 重置 reader 到开头
111+
if _, err := r.Seek(0, io.SeekStart); err != nil {
112+
return 0, errors.Wrap(err, "failed to seek ogg file")
113+
}
114+
115+
reader, err := oggvorbis.NewReader(r)
116+
if err != nil {
117+
return 0, errors.Wrap(err, "failed to create ogg vorbis reader")
118+
}
119+
120+
// 计算时长 = 总采样数 / 采样率
121+
// 需要读取整个文件来获取总采样数
122+
channels := reader.Channels()
123+
sampleRate := reader.SampleRate()
124+
125+
// 估算方法:读取到文件结尾
126+
var totalSamples int64
127+
buf := make([]float32, 4096*channels)
128+
for {
129+
n, err := reader.Read(buf)
130+
if err == io.EOF {
131+
break
132+
}
133+
if err != nil {
134+
return 0, errors.Wrap(err, "failed to read ogg samples")
135+
}
136+
totalSamples += int64(n / channels)
137+
}
138+
139+
duration := float64(totalSamples) / float64(sampleRate)
140+
return duration, nil
141+
}
142+
143+
// getOpusDuration 解析 Opus 文件(在 OGG 容器中)以获取时长。
144+
func getOpusDuration(r io.ReadSeeker) (float64, error) {
145+
// Opus 通常封装在 OGG 容器中
146+
// 我们需要解析 OGG 页面来获取时长信息
147+
if _, err := r.Seek(0, io.SeekStart); err != nil {
148+
return 0, errors.Wrap(err, "failed to seek opus file")
149+
}
150+
151+
// 读取 OGG 页面头部
152+
var totalGranulePos int64
153+
buf := make([]byte, 27) // OGG 页面头部最小大小
154+
155+
for {
156+
n, err := r.Read(buf)
157+
if err == io.EOF {
158+
break
159+
}
160+
if err != nil {
161+
return 0, errors.Wrap(err, "failed to read opus/ogg page")
162+
}
163+
if n < 27 {
164+
break
165+
}
166+
167+
// 检查 OGG 页面标识 "OggS"
168+
if string(buf[0:4]) != "OggS" {
169+
// 跳过一些字节继续寻找
170+
if _, err := r.Seek(-26, io.SeekCurrent); err != nil {
171+
break
172+
}
173+
continue
174+
}
175+
176+
// 读取 granule position (字节 6-13, 小端序)
177+
granulePos := int64(binary.LittleEndian.Uint64(buf[6:14]))
178+
if granulePos > totalGranulePos {
179+
totalGranulePos = granulePos
180+
}
181+
182+
// 读取段表大小
183+
numSegments := int(buf[26])
184+
segmentTable := make([]byte, numSegments)
185+
if _, err := io.ReadFull(r, segmentTable); err != nil {
186+
break
187+
}
188+
189+
// 计算页面数据大小并跳过
190+
var pageSize int
191+
for _, segSize := range segmentTable {
192+
pageSize += int(segSize)
193+
}
194+
if _, err := r.Seek(int64(pageSize), io.SeekCurrent); err != nil {
195+
break
196+
}
197+
}
198+
199+
// Opus 的采样率固定为 48000 Hz
200+
duration := float64(totalGranulePos) / 48000.0
201+
return duration, nil
202+
}
203+
204+
// getAIFFDuration 解析 AIFF 文件头以获取时长。
205+
func getAIFFDuration(r io.ReadSeeker) (float64, error) {
206+
if _, err := r.Seek(0, io.SeekStart); err != nil {
207+
return 0, errors.Wrap(err, "failed to seek aiff file")
208+
}
209+
210+
dec := aiff.NewDecoder(r)
211+
if !dec.IsValidFile() {
212+
return 0, errors.New("invalid aiff file")
213+
}
214+
215+
d, err := dec.Duration()
216+
if err != nil {
217+
return 0, errors.Wrap(err, "failed to get aiff duration")
218+
}
219+
220+
return d.Seconds(), nil
221+
}
222+
223+
// getWebMDuration 解析 WebM 文件以获取时长。
224+
// WebM 使用 Matroska 容器格式
225+
func getWebMDuration(r io.ReadSeeker) (float64, error) {
226+
if _, err := r.Seek(0, io.SeekStart); err != nil {
227+
return 0, errors.Wrap(err, "failed to seek webm file")
228+
}
229+
230+
// WebM/Matroska 文件的解析比较复杂
231+
// 这里提供一个简化的实现,读取 EBML 头部
232+
// 对于完整的 WebM 解析,可能需要使用专门的库
233+
234+
// 简单实现:查找 Duration 元素
235+
// WebM Duration 的 Element ID 是 0x4489
236+
// 这是一个简化版本,可能不适用于所有 WebM 文件
237+
buf := make([]byte, 8192)
238+
n, err := r.Read(buf)
239+
if err != nil && err != io.EOF {
240+
return 0, errors.Wrap(err, "failed to read webm file")
241+
}
242+
243+
// 尝试查找 Duration 元素(这是一个简化的方法)
244+
// 实际的 WebM 解析需要完整的 EBML 解析器
245+
// 这里返回错误,建议使用专门的库
246+
if n > 0 {
247+
// 检查 EBML 标识
248+
if len(buf) >= 4 && binary.BigEndian.Uint32(buf[0:4]) == 0x1A45DFA3 {
249+
// 这是一个有效的 EBML 文件
250+
// 但完整解析需要更复杂的逻辑
251+
return 0, errors.New("webm duration parsing requires full EBML parser (consider using ffprobe for webm files)")
252+
}
253+
}
254+
255+
return 0, errors.New("failed to parse webm file")
256+
}
257+
258+
// getAACDuration 解析 AAC (ADTS格式) 文件以获取时长。
259+
// 使用 gomedia 库来解析 AAC ADTS 帧
260+
func getAACDuration(r io.ReadSeeker) (float64, error) {
261+
if _, err := r.Seek(0, io.SeekStart); err != nil {
262+
return 0, errors.Wrap(err, "failed to seek aac file")
263+
}
264+
265+
// 读取整个文件内容
266+
data, err := io.ReadAll(r)
267+
if err != nil {
268+
return 0, errors.Wrap(err, "failed to read aac file")
269+
}
270+
271+
var totalFrames int64
272+
var sampleRate int
273+
274+
// 使用 gomedia 的 SplitAACFrame 函数来分割 AAC 帧
275+
codec.SplitAACFrame(data, func(aac []byte) {
276+
// 解析 ADTS 头部以获取采样率信息
277+
if len(aac) >= 7 {
278+
// 使用 ConvertADTSToASC 来获取音频配置信息
279+
asc, err := codec.ConvertADTSToASC(aac)
280+
if err == nil && sampleRate == 0 {
281+
sampleRate = codec.AACSampleIdxToSample(int(asc.Sample_freq_index))
282+
}
283+
totalFrames++
284+
}
285+
})
286+
287+
if sampleRate == 0 || totalFrames == 0 {
288+
return 0, errors.New("no valid aac frames found")
289+
}
290+
291+
// 每个 AAC ADTS 帧包含 1024 个采样
292+
totalSamples := totalFrames * 1024
293+
duration := float64(totalSamples) / float64(sampleRate)
294+
return duration, nil
295+
}

common/gin.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ func parseFormData(data []byte, v any) error {
163163
return err
164164
}
165165

166-
return json.Unmarshal(jsonData, v)
166+
return Unmarshal(jsonData, v)
167167
}
168168

169169
func parseMultipartFormData(c *gin.Context, data []byte, v any) error {
@@ -174,7 +174,7 @@ func parseMultipartFormData(c *gin.Context, data []byte, v any) error {
174174
}
175175

176176
if boundary == "" {
177-
return json.Unmarshal(data, v) // Fallback to JSON
177+
return Unmarshal(data, v) // Fallback to JSON
178178
}
179179

180180
reader := multipart.NewReader(bytes.NewReader(data), boundary)
@@ -191,10 +191,10 @@ func parseMultipartFormData(c *gin.Context, data []byte, v any) error {
191191
formMap[key] = vals
192192
}
193193
}
194-
jsonData, err := json.Marshal(formMap)
194+
jsonData, err := Marshal(formMap)
195195
if err != nil {
196196
return err
197197
}
198198

199-
return json.Unmarshal(jsonData, v)
199+
return Unmarshal(jsonData, v)
200200
}

common/utils.go

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
package common
22

33
import (
4-
"bytes"
5-
"context"
64
crand "crypto/rand"
75
"encoding/base64"
86
"encoding/json"
@@ -329,43 +327,6 @@ func SaveTmpFile(filename string, data io.Reader) (string, error) {
329327
return f.Name(), nil
330328
}
331329

332-
// GetAudioDuration returns the duration of an audio file in seconds.
333-
func GetAudioDuration(ctx context.Context, filename string, ext string) (float64, error) {
334-
// ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {{input}}
335-
c := exec.CommandContext(ctx, "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", filename)
336-
output, err := c.Output()
337-
if err != nil {
338-
return 0, errors.Wrap(err, "failed to get audio duration")
339-
}
340-
durationStr := string(bytes.TrimSpace(output))
341-
if durationStr == "N/A" {
342-
// Create a temporary output file name
343-
tmpFp, err := os.CreateTemp("", "audio-*"+ext)
344-
if err != nil {
345-
return 0, errors.Wrap(err, "failed to create temporary file")
346-
}
347-
tmpName := tmpFp.Name()
348-
// Close immediately so ffmpeg can open the file on Windows.
349-
_ = tmpFp.Close()
350-
defer os.Remove(tmpName)
351-
352-
// ffmpeg -y -i filename -vcodec copy -acodec copy <tmpName>
353-
ffmpegCmd := exec.CommandContext(ctx, "ffmpeg", "-y", "-i", filename, "-vcodec", "copy", "-acodec", "copy", tmpName)
354-
if err := ffmpegCmd.Run(); err != nil {
355-
return 0, errors.Wrap(err, "failed to run ffmpeg")
356-
}
357-
358-
// Recalculate the duration of the new file
359-
c = exec.CommandContext(ctx, "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", tmpName)
360-
output, err := c.Output()
361-
if err != nil {
362-
return 0, errors.Wrap(err, "failed to get audio duration after ffmpeg")
363-
}
364-
durationStr = string(bytes.TrimSpace(output))
365-
}
366-
return strconv.ParseFloat(durationStr, 64)
367-
}
368-
369330
// BuildURL concatenates base and endpoint, returns the complete url string
370331
func BuildURL(base string, endpoint string) string {
371332
u, err := url.Parse(base)

0 commit comments

Comments
 (0)