Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/plain-bears-remain.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@webav/av-cliper': patch
---

fix: MP4Clip has not adapted to the video track’s matrix settings
6 changes: 3 additions & 3 deletions packages/av-cliper/src/clips/__tests__/mp4-clip.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -179,15 +179,15 @@ test('get file header data', async () => {
hasMoov: true,
}),
);

expect(boxfile.moov?.mvhd.matrix.length).toBe(9);
});

test('decode incorrectFrameTypeMp4', async () => {
const clip = new MP4Clip((await fetch(incorrectFrameTypeMp4)).body!);
await clip.ready;
console.log(clip.meta.duration);
expect(Math.round(clip.meta.duration / 1e6)).toBe(5);
const { state, video } = await clip.tick(clip.meta.duration - 30e3);
// 获取最后一帧
const { state } = await clip.tick(clip.meta.duration - 30e3);
expect(state).toBe('success');
expect(video?.timestamp).toBe(5e6);
});
98 changes: 95 additions & 3 deletions packages/av-cliper/src/clips/mp4-clip.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ import { MP4Info, MP4Sample } from '@webav/mp4box.js';
import { file, tmpfile, write } from 'opfs-tools';
import { audioResample, extractPCM4AudioData, sleep } from '../av-utils';
import {
createVFRotater,
extractFileConfig,
parseMatrix,
quickParseMP4File,
} from '../mp4-utils/mp4box-utils';
import { DEFAULT_AUDIO_CONF, IClip } from './iclip';
Expand Down Expand Up @@ -85,6 +87,7 @@ export class MP4Clip implements IClip {

#localFile: OPFSToolFile;

/** 存储视频头(box: ftyp, moov)的二进制数据 */
#headerBoxPos: Array<{ start: number; size: number }> = [];
/**
* 提供视频头(box: ftyp, moov)的二进制数据
Expand All @@ -103,6 +106,18 @@ export class MP4Clip implements IClip {
).arrayBuffer();
}

/**存储视频平移旋转信息,目前只还原旋转 */
#parsedMatrix = {
perspective: 1,
rotationRad: 0,
rotationDeg: 0,
scaleX: 1,
scaleY: 1,
translateX: 0,
translateY: 0,
};
#vfRotater: (vf: VideoFrame | null) => VideoFrame | null = (vf) => vf;

#volume = 1;

#videoSamples: ExtMP4Sample[] = [];
Expand Down Expand Up @@ -160,11 +175,18 @@ export class MP4Clip implements IClip {
? mp4FileToSamples(source, this.#opts)
: Promise.resolve(source)
).then(
async ({ videoSamples, audioSamples, decoderConf, headerBoxPos }) => {
async ({
videoSamples,
audioSamples,
decoderConf,
headerBoxPos,
parsedMatrix,
}) => {
this.#videoSamples = videoSamples;
this.#audioSamples = audioSamples;
this.#decoderConf = decoderConf;
this.#headerBoxPos = headerBoxPos;
this.#parsedMatrix = parsedMatrix;

const { videoFrameFinder, audioFrameFinder } = genDecoder(
{
Expand All @@ -186,7 +208,22 @@ export class MP4Clip implements IClip {
this.#videoFrameFinder = videoFrameFinder;
this.#audioFrameFinder = audioFrameFinder;

this.#meta = genMeta(decoderConf, videoSamples, audioSamples);
const { codedWidth, codedHeight } = decoderConf.video ?? {};
if (codedWidth && codedHeight) {
this.#vfRotater = createVFRotater(
codedWidth,
codedHeight,
parsedMatrix.rotationDeg,
);
}

this.#meta = genMeta(
decoderConf,
videoSamples,
audioSamples,
parsedMatrix.rotationDeg,
);

this.#log.info('MP4Clip meta:', this.#meta);
return { ...this.#meta };
},
Expand Down Expand Up @@ -223,7 +260,7 @@ export class MP4Clip implements IClip {

const [audio, video] = await Promise.all([
this.#audioFrameFinder?.find(time) ?? [],
this.#videoFrameFinder?.find(time),
this.#videoFrameFinder?.find(time).then(this.#vfRotater),
]);

if (video == null) {
Expand Down Expand Up @@ -355,6 +392,7 @@ export class MP4Clip implements IClip {
audioSamples: preAudioSlice ?? [],
decoderConf: this.#decoderConf,
headerBoxPos: this.#headerBoxPos,
parsedMatrix: this.#parsedMatrix,
},
this.#opts,
);
Expand All @@ -365,6 +403,7 @@ export class MP4Clip implements IClip {
audioSamples: postAudioSlice ?? [],
decoderConf: this.#decoderConf,
headerBoxPos: this.#headerBoxPos,
parsedMatrix: this.#parsedMatrix,
},
this.#opts,
);
Expand All @@ -382,6 +421,7 @@ export class MP4Clip implements IClip {
audioSamples: [...this.#audioSamples],
decoderConf: this.#decoderConf,
headerBoxPos: this.#headerBoxPos,
parsedMatrix: this.#parsedMatrix,
},
this.#opts,
);
Expand All @@ -408,6 +448,7 @@ export class MP4Clip implements IClip {
audio: null,
},
headerBoxPos: this.#headerBoxPos,
parsedMatrix: this.#parsedMatrix,
},
this.#opts,
);
Expand All @@ -426,6 +467,7 @@ export class MP4Clip implements IClip {
video: null,
},
headerBoxPos: this.#headerBoxPos,
parsedMatrix: this.#parsedMatrix,
},
this.#opts,
);
Expand All @@ -451,6 +493,7 @@ function genMeta(
decoderConf: MP4DecoderConf,
videoSamples: ExtMP4Sample[],
audioSamples: ExtMP4Sample[],
rotationDeg: number,
) {
const meta = {
duration: 0,
Expand All @@ -462,6 +505,11 @@ function genMeta(
if (decoderConf.video != null && videoSamples.length > 0) {
meta.width = decoderConf.video.codedWidth ?? 0;
meta.height = decoderConf.video.codedHeight ?? 0;
// 90, 270 度,需要交换宽高
const normalizedRotation = (Math.round(rotationDeg / 90) * 90 + 360) % 360;
if (normalizedRotation === 90 || normalizedRotation === 270) {
[meta.width, meta.height] = [meta.height, meta.width];
}
}
if (decoderConf.audio != null && audioSamples.length > 0) {
meta.audioSampleRate = DEFAULT_AUDIO_CONF.sampleRate;
Expand Down Expand Up @@ -524,6 +572,15 @@ async function mp4FileToSamples(otFile: OPFSToolFile, opts: IMP4ClipOpts = {}) {
let videoSamples: ExtMP4Sample[] = [];
let audioSamples: ExtMP4Sample[] = [];
let headerBoxPos: Array<{ start: number; size: number }> = [];
const parsedMatrix = {
perspective: 1,
rotationRad: 0,
rotationDeg: 0,
scaleX: 1,
scaleY: 1,
translateX: 0,
translateY: 0,
};

let videoDeltaTS = -1;
let audioDeltaTS = -1;
Expand All @@ -537,6 +594,8 @@ async function mp4FileToSamples(otFile: OPFSToolFile, opts: IMP4ClipOpts = {}) {
const moov = data.mp4boxFile.moov!;
headerBoxPos.push({ start: moov.start, size: moov.size });

Object.assign(parsedMatrix, parseMatrix(mp4Info.videoTracks[0]?.matrix));

let { videoDecoderConf: vc, audioDecoderConf: ac } = extractFileConfig(
data.mp4boxFile,
data.info,
Expand Down Expand Up @@ -599,6 +658,7 @@ async function mp4FileToSamples(otFile: OPFSToolFile, opts: IMP4ClipOpts = {}) {
audioSamples,
decoderConf,
headerBoxPos,
parsedMatrix,
};
}

Expand Down Expand Up @@ -1525,4 +1585,36 @@ if (import.meta.vitest) {
expect(normalized.size).toBe(1000);
expect(normalized.is_sync).toBe(normalized.is_idr);
});

it('genMeta adjusts width and height based on rotation', () => {
const meta = genMeta(
{
video: {
codedWidth: 1920,
codedHeight: 1080,
},
audio: null,
} as any,
[{ cts: 0, duration: 1000 }] as any,
[],
90,
);
expect(meta.width).toBe(1080);
expect(meta.height).toBe(1920);

const meta2 = genMeta(
{
video: {
codedWidth: 1920,
codedHeight: 1080,
},
audio: null,
} as any,
[{ cts: 0, duration: 1000 }] as any,
[],
180,
);
expect(meta2.width).toBe(1920);
expect(meta2.height).toBe(1080);
});
}
125 changes: 122 additions & 3 deletions packages/av-cliper/src/mp4-utils/__tests__/mp4-utils.test.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import { beforeAll, describe, expect, test, vi } from 'vitest';
import mp4box from '@webav/mp4box.js';
import { autoReadStream, file2stream } from '@webav/internal-utils';
import mp4box from '@webav/mp4box.js';
import { file, write } from 'opfs-tools';
import { quickParseMP4File } from '../mp4box-utils';
import { beforeAll, describe, expect, test, vi } from 'vitest';
import {
createVFRotater,
parseMatrix,
quickParseMP4File,
} from '../mp4box-utils';

beforeAll(() => {
vi.useFakeTimers();
Expand Down Expand Up @@ -95,3 +99,118 @@ test('quickParseMP4File', async () => {
expect(sampleCount).toBe(40);
await reader.close();
});

test('vfRotater can be rotate VideoFrame instance', () => {
const vf = new VideoFrame(new Uint8Array(200 * 100 * 4), {
codedHeight: 100,
codedWidth: 200,
format: 'RGBA',
timestamp: 0,
});

// Test 90 degree rotation
const rotater90 = createVFRotater(200, 100, 90);
const rotatedVF90 = rotater90(vf.clone());
expect(rotatedVF90).not.toBeNull();
if (rotatedVF90 == null) throw new Error('must not be null');
expect(rotatedVF90.codedWidth).toBe(100);
expect(rotatedVF90.codedHeight).toBe(200);
rotatedVF90.close();

// Test 180 degree rotation
const rotater180 = createVFRotater(200, 100, 180);
const rotatedVF180 = rotater180(vf.clone());
expect(rotatedVF180).not.toBeNull();
if (rotatedVF180 == null) throw new Error('must not be null');
expect(rotatedVF180.codedWidth).toBe(200);
expect(rotatedVF180.codedHeight).toBe(100);
rotatedVF180.close();

// Test 270 degree rotation
const rotater270 = createVFRotater(200, 100, 270);
const rotatedVF270 = rotater270(vf.clone());
expect(rotatedVF270).not.toBeNull();
if (rotatedVF270 == null) throw new Error('must not be null');
expect(rotatedVF270.codedWidth).toBe(100);
expect(rotatedVF270.codedHeight).toBe(200);
rotatedVF270.close();

// Test 0 degree rotation
const rotater0 = createVFRotater(200, 100, 0);
const vfClone = vf.clone();
const rotatedVF0 = rotater0(vfClone);
// For 0 rotation, it should return the original frame
expect(rotatedVF0).toBe(vfClone);
rotatedVF0?.close();

vf.close();
});

describe('parseMatrix', () => {
test('should throw error for invalid matrix length', () => {
const matrix = new Int32Array(8);
expect(parseMatrix(matrix)).toEqual({});
});

test('should parse 0 degree rotation matrix', () => {
const matrix = new Int32Array([65536, 0, 0, 0, 65536, 0, 0, 0, 1073741824]);
const result = parseMatrix(matrix);
expect(result.rotationDeg).toBe(0);
expect(result.scaleX).toBe(1);
expect(result.scaleY).toBe(1);
expect(result.translateX).toBe(0);
expect(result.translateY).toBe(0);
});

test('should parse 90 degree rotation matrix', () => {
// matrix for 90 deg rotation
const matrix = new Int32Array([
0, 65536, 0, -65536, 0, 0, 0, 0, 1073741824,
]);
const result = parseMatrix(matrix);
expect(result.rotationDeg).toBe(-90);
expect(result.scaleX).toBe(1);
expect(result.scaleY).toBe(1);
});

test('should parse 180 degree rotation matrix', () => {
const matrix = new Int32Array([
-65536, 0, 0, 0, -65536, 0, 0, 0, 1073741824,
]);
const result = parseMatrix(matrix);
expect(result.rotationDeg).toBe(180);
expect(result.scaleX).toBe(1);
expect(result.scaleY).toBe(1);
});

test('should parse 270 degree rotation matrix', () => {
const matrix = new Int32Array([
0, -65536, 0, 65536, 0, 0, 0, 0, 1073741824,
]);
const result = parseMatrix(matrix);
expect(result.rotationDeg).toBe(90);
expect(result.scaleX).toBe(1);
expect(result.scaleY).toBe(1);
});

test('should parse matrix with translation', () => {
const width = 1920;
const height = 1080;
// 180 deg rotation + translation
const matrix = new Int32Array([
-65536,
0,
0,
0,
-65536,
0,
width * 65536,
height * 65536,
1073741824,
]);
const result = parseMatrix(matrix);
expect(result.rotationDeg).toBe(180);
expect(result.translateX).toBe(width);
expect(result.translateY).toBe(height);
});
});
Loading
Loading