diff --git a/.changeset/plain-bears-remain.md b/.changeset/plain-bears-remain.md new file mode 100644 index 00000000..2e26676c --- /dev/null +++ b/.changeset/plain-bears-remain.md @@ -0,0 +1,5 @@ +--- +'@webav/av-cliper': patch +--- + +fix: MP4Clip has not adapted to the video track’s matrix settings diff --git a/packages/av-cliper/src/clips/__tests__/mp4-clip.test.ts b/packages/av-cliper/src/clips/__tests__/mp4-clip.test.ts index 0b6db447..e6b4034a 100644 --- a/packages/av-cliper/src/clips/__tests__/mp4-clip.test.ts +++ b/packages/av-cliper/src/clips/__tests__/mp4-clip.test.ts @@ -179,15 +179,15 @@ test('get file header data', async () => { hasMoov: true, }), ); + expect(boxfile.moov?.mvhd.matrix.length).toBe(9); }); test('decode incorrectFrameTypeMp4', async () => { const clip = new MP4Clip((await fetch(incorrectFrameTypeMp4)).body!); await clip.ready; - console.log(clip.meta.duration); expect(Math.round(clip.meta.duration / 1e6)).toBe(5); - const { state, video } = await clip.tick(clip.meta.duration - 30e3); + // 获取最后一帧 + const { state } = await clip.tick(clip.meta.duration - 30e3); expect(state).toBe('success'); - expect(video?.timestamp).toBe(5e6); }); diff --git a/packages/av-cliper/src/clips/mp4-clip.ts b/packages/av-cliper/src/clips/mp4-clip.ts index 13efffbe..27e90887 100644 --- a/packages/av-cliper/src/clips/mp4-clip.ts +++ b/packages/av-cliper/src/clips/mp4-clip.ts @@ -3,7 +3,9 @@ import { MP4Info, MP4Sample } from '@webav/mp4box.js'; import { file, tmpfile, write } from 'opfs-tools'; import { audioResample, extractPCM4AudioData, sleep } from '../av-utils'; import { + createVFRotater, extractFileConfig, + parseMatrix, quickParseMP4File, } from '../mp4-utils/mp4box-utils'; import { DEFAULT_AUDIO_CONF, IClip } from './iclip'; @@ -85,6 +87,7 @@ export class MP4Clip implements IClip { #localFile: OPFSToolFile; + /** 存储视频头(box: ftyp, moov)的二进制数据 */ #headerBoxPos: Array<{ start: number; size: number }> = []; /** * 提供视频头(box: ftyp, moov)的二进制数据 @@ -103,6 +106,18 @@ export class MP4Clip implements IClip { ).arrayBuffer(); } + /**存储视频平移旋转信息,目前只还原旋转 */ + #parsedMatrix = { + perspective: 1, + rotationRad: 0, + rotationDeg: 0, + scaleX: 1, + scaleY: 1, + translateX: 0, + translateY: 0, + }; + #vfRotater: (vf: VideoFrame | null) => VideoFrame | null = (vf) => vf; + #volume = 1; #videoSamples: ExtMP4Sample[] = []; @@ -160,11 +175,18 @@ export class MP4Clip implements IClip { ? mp4FileToSamples(source, this.#opts) : Promise.resolve(source) ).then( - async ({ videoSamples, audioSamples, decoderConf, headerBoxPos }) => { + async ({ + videoSamples, + audioSamples, + decoderConf, + headerBoxPos, + parsedMatrix, + }) => { this.#videoSamples = videoSamples; this.#audioSamples = audioSamples; this.#decoderConf = decoderConf; this.#headerBoxPos = headerBoxPos; + this.#parsedMatrix = parsedMatrix; const { videoFrameFinder, audioFrameFinder } = genDecoder( { @@ -186,7 +208,22 @@ export class MP4Clip implements IClip { this.#videoFrameFinder = videoFrameFinder; this.#audioFrameFinder = audioFrameFinder; - this.#meta = genMeta(decoderConf, videoSamples, audioSamples); + const { codedWidth, codedHeight } = decoderConf.video ?? {}; + if (codedWidth && codedHeight) { + this.#vfRotater = createVFRotater( + codedWidth, + codedHeight, + parsedMatrix.rotationDeg, + ); + } + + this.#meta = genMeta( + decoderConf, + videoSamples, + audioSamples, + parsedMatrix.rotationDeg, + ); + this.#log.info('MP4Clip meta:', this.#meta); return { ...this.#meta }; }, @@ -223,7 +260,7 @@ export class MP4Clip implements IClip { const [audio, video] = await Promise.all([ this.#audioFrameFinder?.find(time) ?? [], - this.#videoFrameFinder?.find(time), + this.#videoFrameFinder?.find(time).then(this.#vfRotater), ]); if (video == null) { @@ -355,6 +392,7 @@ export class MP4Clip implements IClip { audioSamples: preAudioSlice ?? [], decoderConf: this.#decoderConf, headerBoxPos: this.#headerBoxPos, + parsedMatrix: this.#parsedMatrix, }, this.#opts, ); @@ -365,6 +403,7 @@ export class MP4Clip implements IClip { audioSamples: postAudioSlice ?? [], decoderConf: this.#decoderConf, headerBoxPos: this.#headerBoxPos, + parsedMatrix: this.#parsedMatrix, }, this.#opts, ); @@ -382,6 +421,7 @@ export class MP4Clip implements IClip { audioSamples: [...this.#audioSamples], decoderConf: this.#decoderConf, headerBoxPos: this.#headerBoxPos, + parsedMatrix: this.#parsedMatrix, }, this.#opts, ); @@ -408,6 +448,7 @@ export class MP4Clip implements IClip { audio: null, }, headerBoxPos: this.#headerBoxPos, + parsedMatrix: this.#parsedMatrix, }, this.#opts, ); @@ -426,6 +467,7 @@ export class MP4Clip implements IClip { video: null, }, headerBoxPos: this.#headerBoxPos, + parsedMatrix: this.#parsedMatrix, }, this.#opts, ); @@ -451,6 +493,7 @@ function genMeta( decoderConf: MP4DecoderConf, videoSamples: ExtMP4Sample[], audioSamples: ExtMP4Sample[], + rotationDeg: number, ) { const meta = { duration: 0, @@ -462,6 +505,11 @@ function genMeta( if (decoderConf.video != null && videoSamples.length > 0) { meta.width = decoderConf.video.codedWidth ?? 0; meta.height = decoderConf.video.codedHeight ?? 0; + // 90, 270 度,需要交换宽高 + const normalizedRotation = (Math.round(rotationDeg / 90) * 90 + 360) % 360; + if (normalizedRotation === 90 || normalizedRotation === 270) { + [meta.width, meta.height] = [meta.height, meta.width]; + } } if (decoderConf.audio != null && audioSamples.length > 0) { meta.audioSampleRate = DEFAULT_AUDIO_CONF.sampleRate; @@ -524,6 +572,15 @@ async function mp4FileToSamples(otFile: OPFSToolFile, opts: IMP4ClipOpts = {}) { let videoSamples: ExtMP4Sample[] = []; let audioSamples: ExtMP4Sample[] = []; let headerBoxPos: Array<{ start: number; size: number }> = []; + const parsedMatrix = { + perspective: 1, + rotationRad: 0, + rotationDeg: 0, + scaleX: 1, + scaleY: 1, + translateX: 0, + translateY: 0, + }; let videoDeltaTS = -1; let audioDeltaTS = -1; @@ -537,6 +594,8 @@ async function mp4FileToSamples(otFile: OPFSToolFile, opts: IMP4ClipOpts = {}) { const moov = data.mp4boxFile.moov!; headerBoxPos.push({ start: moov.start, size: moov.size }); + Object.assign(parsedMatrix, parseMatrix(mp4Info.videoTracks[0]?.matrix)); + let { videoDecoderConf: vc, audioDecoderConf: ac } = extractFileConfig( data.mp4boxFile, data.info, @@ -599,6 +658,7 @@ async function mp4FileToSamples(otFile: OPFSToolFile, opts: IMP4ClipOpts = {}) { audioSamples, decoderConf, headerBoxPos, + parsedMatrix, }; } @@ -1525,4 +1585,36 @@ if (import.meta.vitest) { expect(normalized.size).toBe(1000); expect(normalized.is_sync).toBe(normalized.is_idr); }); + + it('genMeta adjusts width and height based on rotation', () => { + const meta = genMeta( + { + video: { + codedWidth: 1920, + codedHeight: 1080, + }, + audio: null, + } as any, + [{ cts: 0, duration: 1000 }] as any, + [], + 90, + ); + expect(meta.width).toBe(1080); + expect(meta.height).toBe(1920); + + const meta2 = genMeta( + { + video: { + codedWidth: 1920, + codedHeight: 1080, + }, + audio: null, + } as any, + [{ cts: 0, duration: 1000 }] as any, + [], + 180, + ); + expect(meta2.width).toBe(1920); + expect(meta2.height).toBe(1080); + }); } diff --git a/packages/av-cliper/src/mp4-utils/__tests__/mp4-utils.test.ts b/packages/av-cliper/src/mp4-utils/__tests__/mp4-utils.test.ts index 25a7eeda..c8a84d56 100644 --- a/packages/av-cliper/src/mp4-utils/__tests__/mp4-utils.test.ts +++ b/packages/av-cliper/src/mp4-utils/__tests__/mp4-utils.test.ts @@ -1,8 +1,12 @@ -import { beforeAll, describe, expect, test, vi } from 'vitest'; -import mp4box from '@webav/mp4box.js'; import { autoReadStream, file2stream } from '@webav/internal-utils'; +import mp4box from '@webav/mp4box.js'; import { file, write } from 'opfs-tools'; -import { quickParseMP4File } from '../mp4box-utils'; +import { beforeAll, describe, expect, test, vi } from 'vitest'; +import { + createVFRotater, + parseMatrix, + quickParseMP4File, +} from '../mp4box-utils'; beforeAll(() => { vi.useFakeTimers(); @@ -95,3 +99,118 @@ test('quickParseMP4File', async () => { expect(sampleCount).toBe(40); await reader.close(); }); + +test('vfRotater can be rotate VideoFrame instance', () => { + const vf = new VideoFrame(new Uint8Array(200 * 100 * 4), { + codedHeight: 100, + codedWidth: 200, + format: 'RGBA', + timestamp: 0, + }); + + // Test 90 degree rotation + const rotater90 = createVFRotater(200, 100, 90); + const rotatedVF90 = rotater90(vf.clone()); + expect(rotatedVF90).not.toBeNull(); + if (rotatedVF90 == null) throw new Error('must not be null'); + expect(rotatedVF90.codedWidth).toBe(100); + expect(rotatedVF90.codedHeight).toBe(200); + rotatedVF90.close(); + + // Test 180 degree rotation + const rotater180 = createVFRotater(200, 100, 180); + const rotatedVF180 = rotater180(vf.clone()); + expect(rotatedVF180).not.toBeNull(); + if (rotatedVF180 == null) throw new Error('must not be null'); + expect(rotatedVF180.codedWidth).toBe(200); + expect(rotatedVF180.codedHeight).toBe(100); + rotatedVF180.close(); + + // Test 270 degree rotation + const rotater270 = createVFRotater(200, 100, 270); + const rotatedVF270 = rotater270(vf.clone()); + expect(rotatedVF270).not.toBeNull(); + if (rotatedVF270 == null) throw new Error('must not be null'); + expect(rotatedVF270.codedWidth).toBe(100); + expect(rotatedVF270.codedHeight).toBe(200); + rotatedVF270.close(); + + // Test 0 degree rotation + const rotater0 = createVFRotater(200, 100, 0); + const vfClone = vf.clone(); + const rotatedVF0 = rotater0(vfClone); + // For 0 rotation, it should return the original frame + expect(rotatedVF0).toBe(vfClone); + rotatedVF0?.close(); + + vf.close(); +}); + +describe('parseMatrix', () => { + test('should throw error for invalid matrix length', () => { + const matrix = new Int32Array(8); + expect(parseMatrix(matrix)).toEqual({}); + }); + + test('should parse 0 degree rotation matrix', () => { + const matrix = new Int32Array([65536, 0, 0, 0, 65536, 0, 0, 0, 1073741824]); + const result = parseMatrix(matrix); + expect(result.rotationDeg).toBe(0); + expect(result.scaleX).toBe(1); + expect(result.scaleY).toBe(1); + expect(result.translateX).toBe(0); + expect(result.translateY).toBe(0); + }); + + test('should parse 90 degree rotation matrix', () => { + // matrix for 90 deg rotation + const matrix = new Int32Array([ + 0, 65536, 0, -65536, 0, 0, 0, 0, 1073741824, + ]); + const result = parseMatrix(matrix); + expect(result.rotationDeg).toBe(-90); + expect(result.scaleX).toBe(1); + expect(result.scaleY).toBe(1); + }); + + test('should parse 180 degree rotation matrix', () => { + const matrix = new Int32Array([ + -65536, 0, 0, 0, -65536, 0, 0, 0, 1073741824, + ]); + const result = parseMatrix(matrix); + expect(result.rotationDeg).toBe(180); + expect(result.scaleX).toBe(1); + expect(result.scaleY).toBe(1); + }); + + test('should parse 270 degree rotation matrix', () => { + const matrix = new Int32Array([ + 0, -65536, 0, 65536, 0, 0, 0, 0, 1073741824, + ]); + const result = parseMatrix(matrix); + expect(result.rotationDeg).toBe(90); + expect(result.scaleX).toBe(1); + expect(result.scaleY).toBe(1); + }); + + test('should parse matrix with translation', () => { + const width = 1920; + const height = 1080; + // 180 deg rotation + translation + const matrix = new Int32Array([ + -65536, + 0, + 0, + 0, + -65536, + 0, + width * 65536, + height * 65536, + 1073741824, + ]); + const result = parseMatrix(matrix); + expect(result.rotationDeg).toBe(180); + expect(result.translateX).toBe(width); + expect(result.translateY).toBe(height); + }); +}); diff --git a/packages/av-cliper/src/mp4-utils/mp4box-utils.ts b/packages/av-cliper/src/mp4-utils/mp4box-utils.ts index 2e838d37..1bbdbc70 100644 --- a/packages/av-cliper/src/mp4-utils/mp4box-utils.ts +++ b/packages/av-cliper/src/mp4-utils/mp4box-utils.ts @@ -179,3 +179,72 @@ export async function quickParseMP4File( mp4boxFile.stop(); } } + +export function parseMatrix(matrix?: Int32Array) { + if (matrix?.length !== 9) return {}; + + const signedMatrix = new Int32Array(matrix.buffer); + + // 提取并转成浮点数 + const a = signedMatrix[0] / 65536.0; + const b = signedMatrix[1] / 65536.0; + const c = signedMatrix[3] / 65536.0; + const d = signedMatrix[4] / 65536.0; + const tx = signedMatrix[6] / 65536.0; // 一般是 0 + const ty = signedMatrix[7] / 65536.0; // 一般是 0 + const w = signedMatrix[8] / (1 << 30); // 一般是 1 + + // 缩放 + const scaleX = Math.sqrt(a * a + c * c); + const scaleY = Math.sqrt(b * b + d * d); + + // 旋转角度(弧度) + const rotationRad = Math.atan2(c, a); + const rotationDeg = (rotationRad * 180) / Math.PI; + + return { + scaleX, + scaleY, + rotationRad, + rotationDeg, + translateX: tx, + translateY: ty, + perspective: w, + }; +} + +/** + * 旋转 VideoFrame + */ +export function createVFRotater( + width: number, + height: number, + rotationDeg: number, +) { + const normalizedRotation = (Math.round(rotationDeg / 90) * 90 + 360) % 360; + if (normalizedRotation === 0) return (vf: VideoFrame | null) => vf; + + const rotatedWidth = + normalizedRotation === 90 || normalizedRotation === 270 ? height : width; + const rotatedHeight = + normalizedRotation === 90 || normalizedRotation === 270 ? width : height; + + const canvas = new OffscreenCanvas(rotatedWidth, rotatedHeight); + const ctx = canvas.getContext('2d')!; + + ctx.translate(rotatedWidth / 2, rotatedHeight / 2); + ctx.rotate((normalizedRotation * Math.PI) / 180); + ctx.translate(-width / 2, -height / 2); + + return (vf: VideoFrame | null) => { + if (vf == null) return null; + + ctx.drawImage(vf, 0, 0); + const newVF = new VideoFrame(canvas, { + timestamp: vf.timestamp, + duration: vf.duration ?? 0, + }); + vf.close(); + return newVF; + }; +} diff --git a/types/mp4box.d.ts b/types/mp4box.d.ts index d1c8876c..4e4186b0 100644 --- a/types/mp4box.d.ts +++ b/types/mp4box.d.ts @@ -18,6 +18,7 @@ declare module '@webav/mp4box.js' { } export interface MP4VideoTrack extends MP4MediaTrack { + matrix: Int32Array; video: { width: number; height: number;