diff --git a/packages/react-sdk/src/components/BackgroundFilters/BackgroundFilters.tsx b/packages/react-sdk/src/components/BackgroundFilters/BackgroundFilters.tsx index 717984ae2b..83a9727d32 100644 --- a/packages/react-sdk/src/components/BackgroundFilters/BackgroundFilters.tsx +++ b/packages/react-sdk/src/components/BackgroundFilters/BackgroundFilters.tsx @@ -8,7 +8,6 @@ import { useState, } from 'react'; import { flushSync } from 'react-dom'; -import clsx from 'clsx'; import { useCall } from '@stream-io/video-react-bindings'; import { Call, disposeOfMediaStream } from '@stream-io/video-client'; import { @@ -17,10 +16,20 @@ import { createRenderer, isPlatformSupported, loadTFLite, + loadMediaPipe, PlatformSupportFlags, + VirtualBackground, Renderer, TFLite, + PerformanceStats, } from '@stream-io/video-filters-web'; +import clsx from 'clsx'; + +export enum FilterEngine { + TF = 'TF', + MEDIA_PIPE = 'MEDIA_PIPE', + NONE = 'NONE', +} export type BackgroundFiltersProps = PlatformSupportFlags & { /** @@ -66,13 +75,31 @@ export type BackgroundFiltersProps = PlatformSupportFlags & { */ modelFilePath?: string; + /** + * When `true`, uses the legacy background segmentation model. + */ + useLegacyFilterModel?: boolean; + + /** + * The path to the MediaPipe model file. + * Override this prop to use a custom path to the MediaPipe model file + * (e.g., if you choose to host it yourself). + */ + mediaPipeModelFilePath?: string; + /** * When a started filter encounters an error, this callback will be executed. * The default behavior (not overridable) is unregistering a failed filter. - * Use this callback to display UI error message, disable the corresponsing stream, + * Use this callback to display UI error message, disable the corresponding stream, * or to try registering the filter again. */ onError?: (error: any) => void; + + /** + * Called every ~1s with FPS and delay stats. + * Use this to track or display performance. + */ + onStats?: (stats: PerformanceStats) => void; }; export type BackgroundFiltersAPI = { @@ -81,6 +108,11 @@ export type BackgroundFiltersAPI = { */ isSupported: boolean; + /** + * The filter engine that will be used (TF, MEDIA_PIPE, or NONE). + */ + engine: FilterEngine; + /** * Indicates whether the background filters engine is loaded and ready. */ @@ -132,6 +164,25 @@ export const useBackgroundFilters = () => { return context; }; +/** + * Determines which filter engine is available. + * MEDIA_PIPE is the default unless legacy filters are requested or MediaPipe is unsupported. + * Returns NONE if neither is available. + */ +const determineEngine = async ( + useLegacyFilterModel: boolean | undefined, + forceSafariSupport: boolean | undefined, + forceMobileSupport: boolean | undefined, +): Promise => { + const isSupported = await isPlatformSupported({ + forceSafariSupport, + forceMobileSupport, + }); + + if (!isSupported) return FilterEngine.NONE; + return useLegacyFilterModel ? FilterEngine.TF : FilterEngine.MEDIA_PIPE; +}; + /** * A provider component that enables the use of background filters in your app. * @@ -149,8 +200,11 @@ export const BackgroundFiltersProvider = ( backgroundBlurLevel: bgBlurLevelFromProps = undefined, tfFilePath, modelFilePath, + useLegacyFilterModel, + mediaPipeModelFilePath, basePath, onError, + onStats, forceSafariSupport, forceMobileSupport, } = props; @@ -179,22 +233,39 @@ export const BackgroundFiltersProvider = ( setBackgroundBlurLevel(undefined); }, []); + const [engine, setEngine] = useState(FilterEngine.NONE); const [isSupported, setIsSupported] = useState(false); useEffect(() => { - isPlatformSupported({ + determineEngine( + useLegacyFilterModel, forceSafariSupport, forceMobileSupport, - }).then(setIsSupported); - }, [forceMobileSupport, forceSafariSupport]); + ).then((determinedEngine) => { + setEngine(determinedEngine); + setIsSupported(determinedEngine !== FilterEngine.NONE); + }); + }, [forceMobileSupport, forceSafariSupport, useLegacyFilterModel]); const [tfLite, setTfLite] = useState(); useEffect(() => { - // don't try to load TFLite if the platform is not supported - if (!isSupported) return; + if (engine !== FilterEngine.TF) return; + loadTFLite({ basePath, modelFilePath, tfFilePath }) .then(setTfLite) .catch((err) => console.error('Failed to load TFLite', err)); - }, [basePath, isSupported, modelFilePath, tfFilePath]); + }, [basePath, engine, modelFilePath, tfFilePath]); + + const [mediaPipe, setMediaPipe] = useState(); + useEffect(() => { + if (engine !== FilterEngine.MEDIA_PIPE) return; + + loadMediaPipe({ + wasmPath: 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision/wasm', + modelPath: mediaPipeModelFilePath, + }) + .then(setMediaPipe) + .catch((err) => console.error('Failed to preload MediaPipe', err)); + }, [engine, mediaPipeModelFilePath]); const handleError = useCallback( (error: any) => { @@ -211,7 +282,9 @@ export const BackgroundFiltersProvider = ( {children} - {tfLite && } + ); }; -const BackgroundFilters = (props: { tfLite: TFLite }) => { +const BackgroundFilters = (props: { tfLite?: TFLite }) => { const call = useCall(); const { children, start } = useRenderer(props.tfLite, call); - const { backgroundFilter, onError } = useBackgroundFilters(); + const { onError, backgroundFilter } = useBackgroundFilters(); const handleErrorRef = useRef<((error: any) => void) | undefined>(undefined); handleErrorRef.current = onError; + const enabled = !!backgroundFilter; + useEffect(() => { - if (!call || !backgroundFilter) return; - const { unregister } = call.camera.registerFilter((ms) => - start(ms, (error) => handleErrorRef.current?.(error)), - ); + if (!call || !enabled) return; + + const { unregister } = call.camera.registerFilter((ms) => { + return start(ms, (error) => handleErrorRef.current?.(error)); + }); return () => { unregister().catch((err) => console.warn(`Can't unregister filter`, err)); }; - }, [backgroundFilter, call, start]); + }, [call, start, enabled]); return children; }; -const useRenderer = (tfLite: TFLite, call: Call | undefined) => { - const { backgroundFilter, backgroundBlurLevel, backgroundImage } = - useBackgroundFilters(); +const useRenderer = (tfLite: TFLite | undefined, call: Call | undefined) => { + const { + backgroundFilter, + backgroundBlurLevel, + backgroundImage, + engine, + mediaPipeModelFilePath, + onStats, + } = useBackgroundFilters(); + const videoRef = useRef(null); const canvasRef = useRef(null); const bgImageRef = useRef(null); @@ -267,6 +351,7 @@ const useRenderer = (tfLite: TFLite, call: Call | undefined) => { const start = useCallback( (ms: MediaStream, onError?: (error: any) => void) => { let outputStream: MediaStream | undefined; + let processor: VirtualBackground | undefined; let renderer: Renderer | undefined; const output = new Promise((resolve, reject) => { @@ -279,58 +364,121 @@ const useRenderer = (tfLite: TFLite, call: Call | undefined) => { const canvasEl = canvasRef.current; const bgImageEl = bgImageRef.current; - if (!videoEl || !canvasEl || (backgroundImage && !bgImageEl)) { - // You should start renderer in effect or event handlers - reject(new Error('Renderer started before elements are ready')); + const [track] = ms.getVideoTracks(); + + if (!track) { + reject(new Error('No video tracks in input media stream')); return; } - videoEl.srcObject = ms; - videoEl.play().then( - () => { - const [track] = ms.getVideoTracks(); - - if (!track) { - reject(new Error('No video tracks in input media stream')); - return; - } - - const trackSettings = track.getSettings(); - flushSync(() => - setVideoSize({ - width: trackSettings.width ?? 0, - height: trackSettings.height ?? 0, - }), - ); - call?.tracer.trace('backgroundFilters.enable', { - backgroundFilter, + if (engine === FilterEngine.MEDIA_PIPE) { + call?.tracer.trace('backgroundFilters.enable', { + backgroundFilter, + backgroundBlurLevel, + backgroundImage, + engine, + }); + + if (!videoEl) { + reject(new Error('Renderer started before elements are ready')); + return; + } + + const trackSettings = track.getSettings(); + flushSync(() => + setVideoSize({ + width: trackSettings.width ?? 0, + height: trackSettings.height ?? 0, + }), + ); + call?.tracer.trace('backgroundFilters.enable', { + backgroundFilter, + backgroundBlurLevel, + backgroundImage, + engine, + }); + + processor = new VirtualBackground( + track, + { + modelPath: mediaPipeModelFilePath, backgroundBlurLevel, backgroundImage, + backgroundFilter, + }, + { onError, onStats }, + ); + processor + .start() + .then((processedTrack) => { + outputStream = new MediaStream([processedTrack]); + resolve(outputStream); + }) + .catch((error) => { + reject(error); }); - renderer = createRenderer( - tfLite, - videoEl, - canvasEl, - { + + return; + } + + if (engine === FilterEngine.TF) { + if (!videoEl || !canvasEl || (backgroundImage && !bgImageEl)) { + reject(new Error('Renderer started before elements are ready')); + return; + } + + videoEl.srcObject = ms; + videoEl.play().then( + () => { + const trackSettings = track.getSettings(); + flushSync(() => + setVideoSize({ + width: trackSettings.width ?? 0, + height: trackSettings.height ?? 0, + }), + ); + call?.tracer.trace('backgroundFilters.enable', { backgroundFilter, backgroundBlurLevel, - backgroundImage: bgImageEl ?? undefined, - }, - onError, - ); - outputStream = canvasEl.captureStream(); - resolve(outputStream); - }, - () => { - reject(new Error('Could not play the source video stream')); - }, - ); + backgroundImage, + engine, + }); + + if (!tfLite) { + reject(new Error('TensorFlow Lite not loaded')); + return; + } + + renderer = createRenderer( + tfLite, + videoEl, + canvasEl, + { + backgroundFilter, + backgroundBlurLevel, + backgroundImage: bgImageEl ?? undefined, + }, + onError, + ); + outputStream = canvasEl.captureStream(); + + resolve(outputStream); + }, + () => { + reject(new Error('Could not play the source video stream')); + }, + ); + return; + } + + reject(new Error('No supported engine available')); }); return { output, stop: () => { call?.tracer.trace('backgroundFilters.disable', null); + processor?.stop(); renderer?.dispose(); if (videoRef.current) videoRef.current.srcObject = null; if (outputStream) disposeOfMediaStream(outputStream); @@ -338,11 +486,14 @@ const useRenderer = (tfLite: TFLite, call: Call | undefined) => { }; }, [ + onStats, backgroundBlurLevel, backgroundFilter, backgroundImage, call?.tracer, tfLite, + engine, + mediaPipeModelFilePath, ], ); @@ -378,8 +529,5 @@ const useRenderer = (tfLite: TFLite, call: Call | undefined) => { ); - return { - start, - children, - }; + return { start, children }; }; diff --git a/packages/react-sdk/src/components/Notification/DegradedPerformanceNotification.tsx b/packages/react-sdk/src/components/Notification/DegradedPerformanceNotification.tsx new file mode 100644 index 0000000000..ffe4f1e18a --- /dev/null +++ b/packages/react-sdk/src/components/Notification/DegradedPerformanceNotification.tsx @@ -0,0 +1,47 @@ +import { PropsWithChildren } from 'react'; +import { Placement } from '@floating-ui/react'; + +import { useCallStateHooks, useI18n } from '@stream-io/video-react-bindings'; +import { Notification } from './Notification'; +import { useBackgroundFilters } from '../BackgroundFilters'; +import { useLowFpsWarning } from '../../hooks/useLowFpsWarning'; + +export type DegradedPerformanceNotificationProps = { + /** + * Text message displayed by the notification. + */ + text?: string; + placement?: Placement; + className?: string; +}; + +export const DegradedPerformanceNotification = ({ + children, + text, + placement, + className, +}: PropsWithChildren) => { + const { useCallStatsReport } = useCallStateHooks(); + const callStatsReport = useCallStatsReport(); + const { backgroundFilter } = useBackgroundFilters(); + + const showLowFpsWarning = useLowFpsWarning(callStatsReport?.publisherStats); + + const { t } = useI18n(); + + const message = + text ?? + t( + 'Background filters performance is degraded. Consider disabling filters for better performance.', + ); + return ( + + {children} + + ); +}; diff --git a/packages/react-sdk/src/components/Notification/Notification.tsx b/packages/react-sdk/src/components/Notification/Notification.tsx index b927a75036..893d2d70af 100644 --- a/packages/react-sdk/src/components/Notification/Notification.tsx +++ b/packages/react-sdk/src/components/Notification/Notification.tsx @@ -1,6 +1,6 @@ import { PropsWithChildren, ReactNode, useEffect } from 'react'; +import clsx from 'clsx'; import { Placement } from '@floating-ui/react'; - import { useFloatingUIPreset } from '../../hooks'; export type NotificationProps = { @@ -9,6 +9,7 @@ export type NotificationProps = { visibilityTimeout?: number; resetIsVisible?: () => void; placement?: Placement; + className?: string; iconClassName?: string | null; close?: () => void; }; @@ -21,6 +22,7 @@ export const Notification = (props: PropsWithChildren) => { visibilityTimeout, resetIsVisible, placement = 'top', + className, iconClassName = 'str-video__notification__icon', close, } = props; @@ -44,7 +46,7 @@ export const Notification = (props: PropsWithChildren) => {
{isVisible && (
(false); + const emaRef = useRef(DEFAULT_FPS); + const outlierStreakRef = useRef(0); + + const { highestFramesPerSecond, timestamp } = stats ?? {}; + + useEffect(() => { + if (!highestFramesPerSecond) { + emaRef.current = DEFAULT_FPS; + outlierStreakRef.current = 0; + setLowFps(false); + return; + } + + const prevEma = emaRef.current; + const deviation = Math.abs(highestFramesPerSecond - prevEma) / prevEma; + + const isOutlier = + highestFramesPerSecond < prevEma && deviation > DEVIATION_LIMIT; + outlierStreakRef.current = isOutlier ? outlierStreakRef.current + 1 : 0; + if (isOutlier && outlierStreakRef.current < OUTLIER_PERSISTENCE) return; + + emaRef.current = ALPHA * highestFramesPerSecond + (1 - ALPHA) * prevEma; + + setLowFps((prev) => { + if (prev && emaRef.current > FPS_WARNING_THRESHOLD_UPPER) return false; + if (!prev && emaRef.current < FPS_WARNING_THRESHOLD_LOWER) return true; + + return prev; + }); + }, [highestFramesPerSecond, timestamp]); + + return lowFps; +} diff --git a/packages/react-sdk/src/translations/en.json b/packages/react-sdk/src/translations/en.json index 2d33d1ae0b..b650492802 100644 --- a/packages/react-sdk/src/translations/en.json +++ b/packages/react-sdk/src/translations/en.json @@ -11,6 +11,7 @@ "Speakers": "Speakers", "Video": "Video", "You are muted. Unmute to speak.": "You are muted. Unmute to speak.", + "Background filters performance is degraded. Consider disabling filters for better performance.": "Background filters performance is degraded. Consider disabling filters for better performance.", "Live": "Live", "Livestream starts soon": "Livestream starts soon", diff --git a/packages/styling/src/BackgroundFilters/BackgroundFilters-layout.scss b/packages/styling/src/BackgroundFilters/BackgroundFilters-layout.scss index 18f013d6ad..46d6d5332c 100644 --- a/packages/styling/src/BackgroundFilters/BackgroundFilters-layout.scss +++ b/packages/styling/src/BackgroundFilters/BackgroundFilters-layout.scss @@ -32,3 +32,11 @@ height: auto; } } + +.str-video__background-filters__notifications { + position: fixed; + z-index: 3; + left: 20px; + right: 20px; + top: 60px; +} diff --git a/packages/video-filters-web/index.ts b/packages/video-filters-web/index.ts index 3e2c37cdd5..eefd056ca2 100644 --- a/packages/video-filters-web/index.ts +++ b/packages/video-filters-web/index.ts @@ -2,3 +2,6 @@ export * from './src/compatibility'; export * from './src/createRenderer'; export { SegmentationLevel } from './src/segmentation'; export * from './src/tflite'; +export * from './src/mediapipe'; +export * from './src/types'; +export * from './src/VirtualBackground'; diff --git a/packages/video-filters-web/mediapipe/models/selfie_multiclass_256x256.tflite b/packages/video-filters-web/mediapipe/models/selfie_multiclass_256x256.tflite new file mode 100644 index 0000000000..a9fbaef450 Binary files /dev/null and b/packages/video-filters-web/mediapipe/models/selfie_multiclass_256x256.tflite differ diff --git a/packages/video-filters-web/mediapipe/models/selfie_segmenter.tflite b/packages/video-filters-web/mediapipe/models/selfie_segmenter.tflite new file mode 100644 index 0000000000..844ee71d4e Binary files /dev/null and b/packages/video-filters-web/mediapipe/models/selfie_segmenter.tflite differ diff --git a/packages/video-filters-web/package.json b/packages/video-filters-web/package.json index 2c5da99ddb..2ef3fda1bf 100644 --- a/packages/video-filters-web/package.json +++ b/packages/video-filters-web/package.json @@ -19,6 +19,7 @@ "files": [ "dist", "tf", + "mediapipe", "src", "index.ts", "package.json", @@ -27,12 +28,14 @@ "CHANGELOG.md" ], "dependencies": { + "@mediapipe/tasks-vision": "^0.10.22-rc.20250304", "@stream-io/worker-timer": "^1.2.5", "wasm-feature-detect": "^1.8.0" }, "devDependencies": { "@rollup/plugin-replace": "^6.0.2", "@rollup/plugin-typescript": "^12.1.4", + "@types/dom-mediacapture-transform": "^0.1.11", "@types/emscripten": "^1.41.2", "rimraf": "^6.0.1", "rollup": "^4.52.4", diff --git a/packages/video-filters-web/src/FallbackGenerator.ts b/packages/video-filters-web/src/FallbackGenerator.ts new file mode 100644 index 0000000000..883d0526b3 --- /dev/null +++ b/packages/video-filters-web/src/FallbackGenerator.ts @@ -0,0 +1,81 @@ +/** + * Type representing a video track generator that can be either the native + * MediaStreamTrackGenerator or the fallback implementation. + */ +export interface MediaStreamTrackGenerator extends MediaStreamTrack { + readonly writable: WritableStream; +} + +/** + * Fallback video processor for browsers that do not support + * MediaStreamTrackGenerator. + * + * Produces a video MediaStreamTrack sourced from a canvas and exposes + * a WritableStream on track.writable for writing frames. + */ +class FallbackGenerator { + constructor({ + kind, + signalTarget, + }: { + kind: 'video'; + signalTarget?: MediaStreamTrack; + }) { + if (kind !== 'video') { + throw new Error('Only video tracks are supported'); + } + + const canvas = document.createElement('canvas'); + const ctx = canvas.getContext('2d', { desynchronized: true }); + if (!ctx) { + throw new Error('Failed to get 2D context from canvas'); + } + + const mediaStream = canvas.captureStream(); + const track = mediaStream.getVideoTracks()[0] as MediaStreamVideoTrack & { + writable: WritableStream; + }; + + const height = signalTarget?.getSettings().height; + const width = signalTarget?.getSettings().width; + if (height && width) { + canvas.height = height; + canvas.width = width; + } + + if (!track) { + throw new Error('Failed to create canvas track'); + } + + if (signalTarget) { + signalTarget.addEventListener('ended', () => { + track.stop(); + }); + } + + track.writable = new WritableStream({ + write: (frame: VideoFrame) => { + canvas.width = frame.displayWidth; + canvas.height = frame.displayHeight; + + ctx.drawImage(frame, 0, 0, canvas.width, canvas.height); + frame.close(); + }, + abort: () => { + track.stop(); + }, + close: () => { + track.stop(); + }, + }); + + return track as MediaStreamTrackGenerator; + } +} + +const TrackGenerator = + typeof MediaStreamTrackGenerator !== 'undefined' + ? MediaStreamTrackGenerator + : FallbackGenerator; + +export { TrackGenerator }; diff --git a/packages/video-filters-web/src/FallbackProcessor.ts b/packages/video-filters-web/src/FallbackProcessor.ts new file mode 100644 index 0000000000..01534adce9 --- /dev/null +++ b/packages/video-filters-web/src/FallbackProcessor.ts @@ -0,0 +1,95 @@ +import { WorkerTimer } from '@stream-io/worker-timer'; + +/** + * Type representing a video track processor that can be either the native + * MediaStreamTrackProcessor or the fallback implementation. + */ +export interface MediaStreamTrackProcessor { + readable: ReadableStream; +} + +/** + * Fallback video processor for browsers that do not support + * MediaStreamTrackProcessor. + * + * Takes a video track and produces a `ReadableStream` by drawing + * frames to an `OffscreenCanvas`. + */ +class FallbackProcessor implements MediaStreamTrackProcessor { + readonly readable: ReadableStream; + readonly timers: WorkerTimer; + + constructor({ track }: { track: MediaStreamTrack }) { + if (!track) throw new Error('MediaStreamTrack is required'); + if (track.kind !== 'video') { + throw new Error('MediaStreamTrack must be video'); + } + let running = true; + + const video = document.createElement('video'); + video.muted = true; + video.playsInline = true; + video.crossOrigin = 'anonymous'; + video.srcObject = new MediaStream([track]); + + const canvas = new OffscreenCanvas(1, 1); + const ctx = canvas.getContext('2d'); + + if (!ctx) throw new Error('Failed to get 2D context from OffscreenCanvas'); + + let timestamp = 0; + const frameRate = track.getSettings().frameRate || 30; + let frameDuration = 1000 / frameRate; + + const close = () => { + video.pause(); + video.srcObject = null; + video.src = ''; + + this.timers.destroy(); + }; + + this.timers = new WorkerTimer({ useWorker: true }); + this.readable = new ReadableStream({ + start: async () => { + await Promise.all([ + video.play(), + new Promise((r) => + video.addEventListener('loadeddata', r, { once: true }), + ), + ]); + frameDuration = 1000 / (track.getSettings().frameRate || 30); + timestamp = performance.now(); + }, + pull: async (controller) => { + if (!running) { + controller.close(); + close(); + return; + } + const delta = performance.now() - timestamp; + if (delta <= frameDuration) { + await new Promise((r: (value?: unknown) => void) => + this.timers.setTimeout(r, frameDuration - delta), + ); + } + timestamp = performance.now(); + canvas.width = video.videoWidth; + canvas.height = video.videoHeight; + ctx.drawImage(video, 0, 0); + controller.enqueue(new VideoFrame(canvas, { timestamp })); + }, + cancel: () => { + running = false; + close(); + }, + }); + } +} + +const TrackProcessor = + typeof MediaStreamTrackProcessor !== 'undefined' + ? MediaStreamTrackProcessor + : FallbackProcessor; + +export { TrackProcessor }; diff --git a/packages/video-filters-web/src/VirtualBackground.ts b/packages/video-filters-web/src/VirtualBackground.ts new file mode 100644 index 0000000000..dd164062ab --- /dev/null +++ b/packages/video-filters-web/src/VirtualBackground.ts @@ -0,0 +1,255 @@ +import { + BACKGROUND_BLUR_MAP, + BackgroundOptions, + SegmenterOptions, + VideoTrackProcessorHooks, +} from './types'; +import { FilesetResolver, ImageSegmenter } from '@mediapipe/tasks-vision'; +import { WebGLRenderer } from './WebGLRenderer'; +import { packageName, version } from './version'; +import { TrackGenerator, MediaStreamTrackGenerator } from './FallbackGenerator'; +import { MediaStreamTrackProcessor, TrackProcessor } from './FallbackProcessor'; + +/** + * Wraps a track in a real-time processing pipeline where each frame + * passes through a transformer and outputs a new `MediaStreamVideoTrack` + */ +export class VirtualBackground { + private readonly processor: MediaStreamTrackProcessor; + private readonly generator: MediaStreamTrackGenerator; + + private canvas!: OffscreenCanvas; + private segmenter: ImageSegmenter | null = null; + private isSegmenterReady = false; + private webGlRenderer!: WebGLRenderer; + private abortController: AbortController; + + private segmenterDelayTotal = 0; + private frames = 0; + private lastStatsTime = 0; + + constructor( + private readonly track: MediaStreamVideoTrack, + private readonly options: BackgroundOptions = {}, + private readonly hooks: VideoTrackProcessorHooks = {}, + ) { + this.processor = new TrackProcessor({ track }); + this.generator = new TrackGenerator({ + kind: 'video', + signalTarget: track, + }) as MediaStreamTrackGenerator; + + this.abortController = new AbortController(); + } + + public async start(): Promise { + const { onError } = this.hooks; + + const { readable } = this.processor; + const { writable } = this.generator; + + const displayWidth = this.track.getSettings().width ?? 1280; + const displayHeight = this.track.getSettings().height ?? 720; + + this.canvas = new OffscreenCanvas(displayWidth, displayHeight); + this.webGlRenderer = new WebGLRenderer(this.canvas); + + await this.initializeSegmenter(); + + const opts = await this.initializeSegmenterOptions(); + + const transformStream = new TransformStream({ + transform: async (frame, controller) => { + try { + if (this.abortController.signal.aborted) { + console.log('aborted'); + return frame.close(); + } + + const processed = await this.transform(frame, opts); + controller.enqueue(processed); + } catch (e) { + console.error('error processing frame: ', e); + controller.enqueue(frame); + console.error(e); + } finally { + frame.close(); + } + }, + flush: () => { + if (this.segmenter) { + this.segmenter.close(); + this.segmenter = null; + } + this.isSegmenterReady = false; + }, + }); + + const signal = this.abortController.signal; + + readable + .pipeThrough(transformStream, { signal }) + .pipeTo(writable) + .catch((e) => { + console.log('HEREE'); + if (e.name !== 'AbortError') { + console.error('[virtual-background] Error processing track:', e); + onError?.(e); + } + }); + + return this.generator; + } + + /** + * Loads and initializes the MediaPipe `ImageSegmenter`. + */ + private async initializeSegmenter() { + try { + const defaultModelPath = `https://unpkg.com/${packageName}@${version}/mediapipe/models/selfie_segmenter.tflite`; + + const model = this.options?.modelPath || defaultModelPath; + + const fileset = await FilesetResolver.forVisionTasks( + 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision/wasm', + ); + + this.segmenter = await ImageSegmenter.createFromOptions(fileset, { + baseOptions: { + modelAssetPath: model, + delegate: 'GPU', + }, + runningMode: 'VIDEO', + outputCategoryMask: true, + outputConfidenceMasks: true, + canvas: this.canvas, + }); + + this.isSegmenterReady = true; + } catch (error) { + console.error( + '[virtual-background] Failed to initialize MediaPipe segmenter:', + error, + ); + this.isSegmenterReady = false; + } + } + + /** + * Processes a single video frame. + * + * Performs segmentation via MediaPipe and then composites the frame + * through the WebGL renderer to apply background effects. + * + * @param frame - The incoming frame from the processor. + * @param opts - The segmentation options to use. + * + * @returns A new `VideoFrame` containing the processed image. + */ + private async transform( + frame: VideoFrame, + opts: SegmenterOptions, + ): Promise { + if (this.isSegmenterReady && this.segmenter) { + try { + const start = performance.now(); + await new Promise((resolve) => { + this.segmenter!.segmentForVideo(frame, frame.timestamp, (result) => { + const categoryMask = result.categoryMask!.getAsWebGLTexture(); + const confidenceMask = + result.confidenceMasks![0].getAsWebGLTexture(); + + this.webGlRenderer.render( + frame, + opts, + categoryMask, + confidenceMask, + ); + + const now = performance.now(); + this.segmenterDelayTotal += now - start; + this.frames++; + + if (this.lastStatsTime === 0) { + this.lastStatsTime = now; + } + + if (now - this.lastStatsTime > 1000) { + const delay = + Math.round((this.segmenterDelayTotal / this.frames) * 100) / + 100; + const fps = Math.round( + (1000 * this.frames) / (now - this.lastStatsTime), + ); + + this.hooks.onStats?.({ delay, fps }); + + this.lastStatsTime = now; + this.segmenterDelayTotal = 0; + this.frames = 0; + } + + resolve(); + }); + }); + } catch (error) { + console.error('[virtual-background] Error during segmentation:', error); + } + } + + return new VideoFrame(this.canvas, { timestamp: frame.timestamp }); + } + + private async loadBackground(url: string | undefined) { + if (!url) { + return; + } + + const response = await fetch(url); + if (!response.ok) { + console.error( + `[virtual-background] Failed to fetch background source ${url} (status: ${response.status})`, + ); + return; + } + const blob = await response.blob(); + + const imageBitmap = await createImageBitmap(blob); + return { type: 'image', media: imageBitmap, url }; + } + + private async initializeSegmenterOptions(): Promise { + const isSelfieMode = this.options.modelPath + ? this.options.modelPath?.includes('selfie_segmenter') + : true; + + if (this.options.backgroundFilter === 'image') { + return { + backgroundSource: await this.loadBackground( + this.options.backgroundImage, + ), + bgBlur: 0, + bgBlurRadius: 0, + isSelfieMode, + }; + } + + return { + ...BACKGROUND_BLUR_MAP[this.options.backgroundBlurLevel || 'medium'], + backgroundSource: undefined, + isSelfieMode, + }; + } + + public stop(): void { + this.abortController.abort(); + this.webGlRenderer.close(); + this.generator.stop(); + + if (this.segmenter) { + this.segmenter.close(); + this.segmenter = null; + } + this.isSegmenterReady = false; + } +} diff --git a/packages/video-filters-web/src/WebGLRenderer.ts b/packages/video-filters-web/src/WebGLRenderer.ts new file mode 100644 index 0000000000..8af6a9f4c4 --- /dev/null +++ b/packages/video-filters-web/src/WebGLRenderer.ts @@ -0,0 +1,1194 @@ +import { BackgroundSource } from './types'; + +type ImageInfo = { + type: 'image'; + texture: WebGLTexture; + width: number; + height: number; + url: string; +}; + +type VideoInfo = { + type: 'video'; + texture: WebGLTexture; + url: string; + media: ReadableStream; + canvas: OffscreenCanvas; +}; + +type ColorInfo = { + type: 'color'; + texture: WebGLTexture; + color: readonly [number, number, number, number]; +}; + +type BackgroundRenderInfo = ImageInfo | VideoInfo | ColorInfo; + +export class WebGLRenderer { + readonly canvas: OffscreenCanvas; + readonly gl: WebGL2RenderingContext; + + readonly stateUpdateProgram: WebGLProgram; + readonly maskRefineProgram: WebGLProgram; + readonly blurProgram: WebGLProgram; + readonly blendProgram: WebGLProgram; + + readonly stateUpdateLocations: { + position: number; + texCoord: number; + categoryTexture: WebGLUniformLocation | null; + confidenceTexture: WebGLUniformLocation | null; + prevStateTexture: WebGLUniformLocation | null; + smoothingFactor: WebGLUniformLocation | null; + smoothstepMin: WebGLUniformLocation | null; + smoothstepMax: WebGLUniformLocation | null; + selfieModel: WebGLUniformLocation | null; + }; + readonly maskRefineLocations: { + position: number; + texCoord: number; + maskTexture: WebGLUniformLocation | null; + frameTexture: WebGLUniformLocation | null; + texelSize: WebGLUniformLocation | null; + sigmaSpatial: WebGLUniformLocation | null; + sigmaRange: WebGLUniformLocation | null; + }; + readonly blurLocations: { + position: number; + texCoord: number; + image: WebGLUniformLocation | null; + texelSize: WebGLUniformLocation | null; + sigma: WebGLUniformLocation | null; + radiusScale: WebGLUniformLocation | null; + personMask: WebGLUniformLocation | null; + direction: WebGLUniformLocation | null; + }; + readonly blendLocations: { + position: number; + texCoord: number; + frameTexture: WebGLUniformLocation | null; + currentStateTexture: WebGLUniformLocation | null; + backgroundTexture: WebGLUniformLocation | null; + bgImageDimensions: WebGLUniformLocation | null; + canvasDimensions: WebGLUniformLocation | null; + borderSmooth: WebGLUniformLocation | null; + bgBlur: WebGLUniformLocation | null; + bgBlurRadius: WebGLUniformLocation | null; + enabled: WebGLUniformLocation | null; + }; + // Buffers & Framebuffers + readonly positionBuffer: WebGLBuffer | null; + readonly texCoordBuffer: WebGLBuffer | null; + readonly storedStateTextures: (WebGLTexture | null)[]; + readonly fbo: WebGLFramebuffer | null; + // Refine outputs + readonly refineFbo: WebGLFramebuffer | null; + readonly refinedMaskTexture: WebGLTexture | null; + // Frame texture + readonly frameTexture: WebGLTexture | null; + // Blur resources + readonly blurTexture1: WebGLTexture | null; + readonly blurTexture2: WebGLTexture | null; + readonly blurFbo1: WebGLFramebuffer | null; + readonly blurFbo2: WebGLFramebuffer | null; + + private running = false; + private static readonly DEFAULT_BG_COLOR: readonly [ + number, + number, + number, + number, + ] = [33, 150, 243, 255]; + private currentStateIndex = 0; + private backgroundRenderInfo: BackgroundRenderInfo | null = null; + private activeBackgroundSourceIdentifier: string | null = null; + + constructor(canvas: OffscreenCanvas) { + this.canvas = canvas; + const gl = this.canvas.getContext('webgl2', { + alpha: false, + antialias: false, + desynchronized: true, + }); + if (!gl) throw new Error('WebGL2 not supported'); + this.gl = gl; + + const stateUpdateVertexShaderSource = `attribute vec2 a_position; attribute vec2 a_texCoord; varying vec2 v_texCoord; void main() { gl_Position = vec4(a_position, 0.0, 1.0); v_texCoord = a_texCoord; }`; + const stateUpdateFragmentShaderSource = ` + precision mediump float; + varying vec2 v_texCoord; + uniform sampler2D u_categoryTexture; + uniform sampler2D u_confidenceTexture; + uniform sampler2D u_prevStateTexture; + uniform float u_smoothingFactor; + uniform float u_smoothstepMin; + uniform float u_smoothstepMax; + uniform int u_selfieModel; + + void main() { + vec2 prevCoord = vec2(v_texCoord.x, 1.0 - v_texCoord.y); + float categoryValue = texture2D(u_categoryTexture, v_texCoord).r; + float confidenceValue = texture2D(u_confidenceTexture, v_texCoord).r; + + if (u_selfieModel == 1) { + categoryValue = 1.0 - categoryValue; + confidenceValue = 1.0 - confidenceValue; + } + + if (categoryValue > 0.0) { + categoryValue = 1.0; + confidenceValue = 1.0 - confidenceValue; + } + + float nonLinearConfidence = smoothstep(u_smoothstepMin, u_smoothstepMax, confidenceValue); + float prevCategoryValue = texture2D(u_prevStateTexture, prevCoord).r; + float alpha = u_smoothingFactor * nonLinearConfidence; + float newCategoryValue = alpha * categoryValue + (1.0 - alpha) * prevCategoryValue; + + + gl_FragColor = vec4(newCategoryValue, 0.0, 0.0, 0.0); + } + `; + this.stateUpdateProgram = this.createAndLinkProgram( + stateUpdateVertexShaderSource, + stateUpdateFragmentShaderSource, + ); + this.stateUpdateLocations = { + position: gl.getAttribLocation(this.stateUpdateProgram, 'a_position'), + texCoord: gl.getAttribLocation(this.stateUpdateProgram, 'a_texCoord'), + categoryTexture: gl.getUniformLocation( + this.stateUpdateProgram, + 'u_categoryTexture', + ), + confidenceTexture: gl.getUniformLocation( + this.stateUpdateProgram, + 'u_confidenceTexture', + ), + prevStateTexture: gl.getUniformLocation( + this.stateUpdateProgram, + 'u_prevStateTexture', + ), + smoothingFactor: gl.getUniformLocation( + this.stateUpdateProgram, + 'u_smoothingFactor', + ), + smoothstepMin: gl.getUniformLocation( + this.stateUpdateProgram, + 'u_smoothstepMin', + ), + smoothstepMax: gl.getUniformLocation( + this.stateUpdateProgram, + 'u_smoothstepMax', + ), + selfieModel: gl.getUniformLocation( + this.stateUpdateProgram, + 'u_selfieModel', + ), + }; + + const maskRefineVertexShaderSource = stateUpdateVertexShaderSource; + const maskRefineFragmentShaderSource = ` + precision mediump float; + varying vec2 v_texCoord; + + uniform sampler2D u_maskTexture; + uniform sampler2D u_frameTexture; + uniform vec2 u_texelSize; + uniform float u_sigmaSpatial; + uniform float u_sigmaRange; + + void main() { + vec2 flippedCoord = v_texCoord; + vec3 centerPixelColor = texture2D(u_frameTexture, v_texCoord).rgb; + float totalWeight = 0.0; + float weightedMaskSum = 0.0; + + for (int offsetX = -2; offsetX <= 2; offsetX++) { + for (int offsetY = -2; offsetY <= 2; offsetY++) { + vec2 shift = vec2(float(offsetX), float(offsetY)) * u_texelSize; + vec2 frameCoord = v_texCoord + shift; + vec2 maskCoord = flippedCoord + shift; + + vec3 neighborPixelColor = texture2D(u_frameTexture, frameCoord).rgb; + float neighborMaskValue = texture2D(u_maskTexture, maskCoord).r; + + float spatialWeight = exp(-dot(shift, shift) / (2.0 * u_sigmaSpatial * u_sigmaSpatial)); + vec3 colorDifference = neighborPixelColor - centerPixelColor; + float rangeWeight = exp(-(dot(colorDifference, colorDifference)) / (2.0 * u_sigmaRange * u_sigmaRange)); + + float combinedWeight = spatialWeight * rangeWeight; + weightedMaskSum += neighborMaskValue * combinedWeight; + totalWeight += combinedWeight; + } + } + + float refinedMaskValue = weightedMaskSum / max(totalWeight, 1e-6); + gl_FragColor = vec4(refinedMaskValue, refinedMaskValue, refinedMaskValue, 1.0); + } + `; + + this.maskRefineProgram = this.createAndLinkProgram( + maskRefineVertexShaderSource, + maskRefineFragmentShaderSource, + ); + this.maskRefineLocations = { + position: gl.getAttribLocation(this.maskRefineProgram, 'a_position'), + texCoord: gl.getAttribLocation(this.maskRefineProgram, 'a_texCoord'), + maskTexture: gl.getUniformLocation( + this.maskRefineProgram, + 'u_maskTexture', + ), + frameTexture: gl.getUniformLocation( + this.maskRefineProgram, + 'u_frameTexture', + ), + texelSize: gl.getUniformLocation(this.maskRefineProgram, 'u_texelSize'), + sigmaSpatial: gl.getUniformLocation( + this.maskRefineProgram, + 'u_sigmaSpatial', + ), + sigmaRange: gl.getUniformLocation(this.maskRefineProgram, 'u_sigmaRange'), + }; + + const blurVertexShaderSource = stateUpdateVertexShaderSource; + const blurFragmentShaderSource = ` + precision highp float; + varying vec2 v_texCoord; + + uniform sampler2D u_image; + uniform sampler2D u_personMask; + uniform vec2 u_texelSize; + uniform float u_sigma; + uniform float u_radiusScale; + uniform vec2 u_direction; + + const int KERNEL_RADIUS = 10; + + float gauss(float x, float s) { + return exp(-(x * x) / (2.0 * s * s)); + } + + void main() { + vec2 maskCoord = u_direction.y > 0.5 ? vec2(v_texCoord.x, 1.0 - v_texCoord.y) : v_texCoord; + float mCenter = texture2D(u_personMask, maskCoord).r; + float wCenter = gauss(0.0, u_sigma); + vec4 accum = texture2D(u_image, v_texCoord) * wCenter * (1.0 - mCenter); + float weightSum = wCenter * (1.0 - mCenter); + + for (int i = 1; i <= KERNEL_RADIUS; i++) { + float f = float(i); + float offset = f * u_radiusScale; + float w = gauss(offset, u_sigma); + vec2 texOffset = u_direction * offset * u_texelSize; + + vec2 uvPlus = v_texCoord + texOffset; + vec2 maskCoordPlus = u_direction.y > 0.5 ? vec2(uvPlus.x, 1.0 - uvPlus.y) : uvPlus; + float mPlus = texture2D(u_personMask, maskCoordPlus).r; + accum += texture2D(u_image, uvPlus) * w * (1.0 - mPlus); + weightSum += w * (1.0 - mPlus); + + vec2 uvMinus = v_texCoord - texOffset; + vec2 maskCoordMinus = u_direction.y > 0.5 ? vec2(uvMinus.x, 1.0 - uvMinus.y) : uvMinus; + float mMinus = texture2D(u_personMask, maskCoordMinus).r; + accum += texture2D(u_image, uvMinus) * w * (1.0 - mMinus); + weightSum += w * (1.0 - mMinus); + } + + vec4 blurred = accum / max(weightSum, 1e-6); + gl_FragColor = blurred; + } + `; + + this.blurProgram = this.createAndLinkProgram( + blurVertexShaderSource, + blurFragmentShaderSource, + ); + this.blurLocations = { + position: gl.getAttribLocation(this.blurProgram, 'a_position'), + texCoord: gl.getAttribLocation(this.blurProgram, 'a_texCoord'), + image: gl.getUniformLocation(this.blurProgram, 'u_image'), + personMask: gl.getUniformLocation(this.blurProgram, 'u_personMask'), + texelSize: gl.getUniformLocation(this.blurProgram, 'u_texelSize'), + sigma: gl.getUniformLocation(this.blurProgram, 'u_sigma'), + radiusScale: gl.getUniformLocation(this.blurProgram, 'u_radiusScale'), + direction: gl.getUniformLocation(this.blurProgram, 'u_direction'), + }; + + const blendVertexShaderSource = stateUpdateVertexShaderSource; + const blendFragmentShaderSource = ` + precision mediump float; + varying vec2 v_texCoord; + + uniform sampler2D u_frameTexture; + uniform sampler2D u_currentStateTexture; + uniform sampler2D u_backgroundTexture; + uniform vec2 u_bgImageDimensions; // Dimensions of the background image + uniform vec2 u_canvasDimensions; // Dimensions of the canvas + uniform float u_borderSmooth; + uniform float u_bgBlur; + uniform float u_bgBlurRadius; + uniform int u_enabled; + + vec4 getMixedFragColor(vec2 bgTexCoord, vec2 categoryCoord, vec2 offset) { + vec4 backgroundColor = texture2D(u_backgroundTexture, bgTexCoord + offset); + vec4 frameColor = texture2D(u_frameTexture, v_texCoord + offset); + float categoryValue = texture2D(u_currentStateTexture, categoryCoord + offset).r; + return mix(backgroundColor, frameColor, categoryValue); + } + + void main() { + if (u_enabled == 0) { + gl_FragColor = texture2D(u_frameTexture, v_texCoord); + return; + } + + vec2 categoryCoord = v_texCoord; + float categoryValue = texture2D(u_currentStateTexture, categoryCoord).r; + + float canvasAspect = u_canvasDimensions.x / u_canvasDimensions.y; + float bgAspect = u_bgImageDimensions.x / u_bgImageDimensions.y; + + vec2 bgTexCoord = v_texCoord; + float scaleX = 1.0; + float scaleY = 1.0; + float offsetX = 0.0; + float offsetY = 0.0; + + if (canvasAspect < bgAspect) { + scaleY = 1.0; + scaleX = bgAspect / canvasAspect; + offsetX = (1.0 - scaleX) / 2.0; + } else { + scaleX = 1.0; + scaleY = canvasAspect / bgAspect; + offsetY = (1.0 - scaleY) / 2.0; + } + + bgTexCoord = vec2((v_texCoord.x - offsetX) / scaleX, (v_texCoord.y - offsetY) / scaleY); + gl_FragColor = getMixedFragColor(bgTexCoord, categoryCoord, vec2(0.0, 0.0)); + }`; + + this.blendProgram = this.createAndLinkProgram( + blendVertexShaderSource, + blendFragmentShaderSource, + ); + this.blendLocations = { + position: gl.getAttribLocation(this.blendProgram, 'a_position'), + texCoord: gl.getAttribLocation(this.blendProgram, 'a_texCoord'), + frameTexture: gl.getUniformLocation(this.blendProgram, 'u_frameTexture'), + currentStateTexture: gl.getUniformLocation( + this.blendProgram, + 'u_currentStateTexture', + ), + backgroundTexture: gl.getUniformLocation( + this.blendProgram, + 'u_backgroundTexture', + ), + bgImageDimensions: gl.getUniformLocation( + this.blendProgram, + 'u_bgImageDimensions', + ), + canvasDimensions: gl.getUniformLocation( + this.blendProgram, + 'u_canvasDimensions', + ), + borderSmooth: gl.getUniformLocation(this.blendProgram, 'u_borderSmooth'), + bgBlur: gl.getUniformLocation(this.blendProgram, 'u_bgBlur'), + bgBlurRadius: gl.getUniformLocation(this.blendProgram, 'u_bgBlurRadius'), + enabled: gl.getUniformLocation(this.blendProgram, 'u_enabled'), + }; + + this.positionBuffer = gl.createBuffer(); + gl.bindBuffer(gl.ARRAY_BUFFER, this.positionBuffer); + gl.bufferData( + gl.ARRAY_BUFFER, + new Float32Array([-1, -1, 1, -1, -1, 1, -1, 1, 1, -1, 1, 1]), + gl.STATIC_DRAW, + ); + + this.texCoordBuffer = gl.createBuffer(); + gl.bindBuffer(gl.ARRAY_BUFFER, this.texCoordBuffer); + gl.bufferData( + gl.ARRAY_BUFFER, + new Float32Array([0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0]), + gl.STATIC_DRAW, + ); + + this.storedStateTextures = Array.from({ length: 2 }, () => { + const tex = gl.createTexture(); + gl.bindTexture(gl.TEXTURE_2D, tex); + gl.texImage2D( + gl.TEXTURE_2D, + 0, + gl.RGBA, + 1, + 1, + 0, + gl.RGBA, + gl.UNSIGNED_BYTE, + new Uint8Array([0, 0, 0, 255]), + ); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.LINEAR); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.LINEAR); + return tex; + }); + gl.bindTexture(gl.TEXTURE_2D, null); + + this.fbo = gl.createFramebuffer(); + this.refineFbo = gl.createFramebuffer(); + + const refinedTex = gl.createTexture(); + this.frameTexture = gl.createTexture(); + if (!refinedTex) throw new Error('Failed to create refined mask texture'); + gl.bindTexture(gl.TEXTURE_2D, refinedTex); + gl.texImage2D( + gl.TEXTURE_2D, + 0, + gl.RGBA, + 1, + 1, + 0, + gl.RGBA, + gl.UNSIGNED_BYTE, + null, + ); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.LINEAR); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.LINEAR); + gl.bindTexture(gl.TEXTURE_2D, null); + this.refinedMaskTexture = refinedTex; + + const mkColorTex = () => { + const t = gl.createTexture(); + if (!t) throw new Error('Failed to create blur texture'); + gl.bindTexture(gl.TEXTURE_2D, t); + gl.texImage2D( + gl.TEXTURE_2D, + 0, + gl.RGBA, + 1, + 1, + 0, + gl.RGBA, + gl.UNSIGNED_BYTE, + null, + ); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.LINEAR); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.LINEAR); + gl.bindTexture(gl.TEXTURE_2D, null); + return t; + }; + this.blurTexture1 = mkColorTex(); + this.blurTexture2 = mkColorTex(); + + const mkFbo = (tex: WebGLTexture | null) => { + const fb = gl.createFramebuffer(); + if (!fb || !tex) throw new Error('Failed to create blur FBO'); + gl.bindFramebuffer(gl.FRAMEBUFFER, fb); + gl.framebufferTexture2D( + gl.FRAMEBUFFER, + gl.COLOR_ATTACHMENT0, + gl.TEXTURE_2D, + tex, + 0, + ); + gl.bindFramebuffer(gl.FRAMEBUFFER, null); + return fb; + }; + this.blurFbo1 = mkFbo(this.blurTexture1); + this.blurFbo2 = mkFbo(this.blurTexture2); + + this.running = true; + } + + private createAndLinkProgram( + vsSource: string, + fsSource: string, + ): WebGLProgram { + const vs = this.createShader(this.gl.VERTEX_SHADER, vsSource); + const fs = this.createShader(this.gl.FRAGMENT_SHADER, fsSource); + const prog = this.gl.createProgram(); + if (!prog) throw new Error('Failed to create program'); + this.gl.attachShader(prog, vs); + this.gl.attachShader(prog, fs); + this.gl.linkProgram(prog); + if (!this.gl.getProgramParameter(prog, this.gl.LINK_STATUS)) { + console.error('Program link error:', this.gl.getProgramInfoLog(prog)); + this.gl.deleteProgram(prog); + throw new Error('Link fail'); + } + this.gl.detachShader(prog, vs); + this.gl.detachShader(prog, fs); + this.gl.deleteShader(vs); + this.gl.deleteShader(fs); + return prog; + } + + private createShader(type: number, source: string): WebGLShader { + const shader = this.gl.createShader(type); + if (!shader) throw new Error(`Failed to create shader type: ${type}`); + this.gl.shaderSource(shader, source); + this.gl.compileShader(shader); + if (!this.gl.getShaderParameter(shader, this.gl.COMPILE_STATUS)) { + console.error('Shader compile error:', this.gl.getShaderInfoLog(shader)); + this.gl.deleteShader(shader); + throw new Error('Failed to compile shader'); + } + return shader; + } + + private createColorTexture( + r: number, + g: number, + b: number, + a: number, + ): { + texture: WebGLTexture; + color: readonly [number, number, number, number]; + } { + const texture = this.gl.createTexture(); + if (!texture) throw new Error('Failed to create texture for color'); + this.gl.bindTexture(this.gl.TEXTURE_2D, texture); + const pixel = new Uint8Array([r, g, b, a]); + this.gl.texImage2D( + this.gl.TEXTURE_2D, + 0, + this.gl.RGBA, + 1, + 1, + 0, + this.gl.RGBA, + this.gl.UNSIGNED_BYTE, + pixel, + ); + this.gl.texParameteri( + this.gl.TEXTURE_2D, + this.gl.TEXTURE_WRAP_S, + this.gl.CLAMP_TO_EDGE, + ); + this.gl.texParameteri( + this.gl.TEXTURE_2D, + this.gl.TEXTURE_WRAP_T, + this.gl.CLAMP_TO_EDGE, + ); + this.gl.texParameteri( + this.gl.TEXTURE_2D, + this.gl.TEXTURE_MIN_FILTER, + this.gl.NEAREST, + ); + this.gl.texParameteri( + this.gl.TEXTURE_2D, + this.gl.TEXTURE_MAG_FILTER, + this.gl.NEAREST, + ); + this.gl.bindTexture(this.gl.TEXTURE_2D, null); + return { texture, color: [r, g, b, a] as const }; + } + + private updateBackgroundIfNeeded(newSource?: BackgroundSource | null) { + const gl = this.gl; + let newIdentifier: string; + + if (!newSource) { + const [r, g, b, a] = WebGLRenderer.DEFAULT_BG_COLOR; + newIdentifier = `color(${r},${g},${b},${a})`; + } else { + newIdentifier = newSource.url; + } + + if ( + newIdentifier === this.activeBackgroundSourceIdentifier && + this.backgroundRenderInfo + ) { + return; + } + + if (this.backgroundRenderInfo) { + gl.deleteTexture(this.backgroundRenderInfo.texture); + this.backgroundRenderInfo = null; + } + this.activeBackgroundSourceIdentifier = newIdentifier; + + if (!newSource) { + const [r, g, b, a] = WebGLRenderer.DEFAULT_BG_COLOR; + const colorTexData = this.createColorTexture(r, g, b, a); + this.backgroundRenderInfo = { + type: 'color', + texture: colorTexData.texture, + color: colorTexData.color, + }; + this.activeBackgroundSourceIdentifier = `color(${r},${g},${b},${a})`; + } else { + if (newSource.type === 'image') { + const { media, url } = newSource as { media: ImageBitmap; url: string }; + const texture = this.gl.createTexture(); + if (!texture) { + throw new Error('Failed to create texture object for image.'); + } + this.gl.bindTexture(this.gl.TEXTURE_2D, texture); + this.gl.texImage2D( + this.gl.TEXTURE_2D, + 0, + this.gl.RGBA, + this.gl.RGBA, + this.gl.UNSIGNED_BYTE, + media, + ); + this.gl.texParameteri( + this.gl.TEXTURE_2D, + this.gl.TEXTURE_WRAP_S, + this.gl.CLAMP_TO_EDGE, + ); + this.gl.texParameteri( + this.gl.TEXTURE_2D, + this.gl.TEXTURE_WRAP_T, + this.gl.CLAMP_TO_EDGE, + ); + this.gl.texParameteri( + this.gl.TEXTURE_2D, + this.gl.TEXTURE_MIN_FILTER, + this.gl.LINEAR, + ); + this.gl.texParameteri( + this.gl.TEXTURE_2D, + this.gl.TEXTURE_MAG_FILTER, + this.gl.LINEAR, + ); + this.gl.bindTexture(this.gl.TEXTURE_2D, null); + + this.backgroundRenderInfo = { + type: 'image', + texture, + width: media.width, + height: media.height, + url, + }; + } else if (newSource.type === 'video') { + const { media, url } = newSource as { + media: ReadableStream; + url: string; + }; + + const canvas = new OffscreenCanvas(1, 1); + const ctx = canvas.getContext('2d'); + const writer = new WritableStream({ + write(videoFrame: VideoFrame) { + canvas.width = videoFrame.codedWidth; + canvas.height = videoFrame.codedHeight; + ctx?.drawImage(videoFrame, 0, 0); + videoFrame.close(); + }, + close() { + console.log('[virtual-background] video background close'); + }, + }); + media.pipeTo(writer).catch((err) => { + console.error('media.pipeTo(writer) error', err); + }); + + const texture = this.gl.createTexture(); + if (!texture) throw new Error('Failed to create texture for video'); + this.gl.bindTexture(this.gl.TEXTURE_2D, texture); + this.gl.texImage2D( + this.gl.TEXTURE_2D, + 0, + this.gl.RGBA, + 1, + 1, + 0, + this.gl.RGBA, + this.gl.UNSIGNED_BYTE, + null, + ); + this.gl.texParameteri( + this.gl.TEXTURE_2D, + this.gl.TEXTURE_WRAP_S, + this.gl.CLAMP_TO_EDGE, + ); + this.gl.texParameteri( + this.gl.TEXTURE_2D, + this.gl.TEXTURE_WRAP_T, + this.gl.CLAMP_TO_EDGE, + ); + this.gl.texParameteri( + this.gl.TEXTURE_2D, + this.gl.TEXTURE_MIN_FILTER, + this.gl.LINEAR, + ); + this.gl.texParameteri( + this.gl.TEXTURE_2D, + this.gl.TEXTURE_MAG_FILTER, + this.gl.LINEAR, + ); + this.gl.bindTexture(this.gl.TEXTURE_2D, null); + + this.backgroundRenderInfo = { + type: 'video', + texture, + url, + media, + canvas, + }; + } + } + + if (!this.backgroundRenderInfo) { + console.error( + 'Critical: backgroundRenderInfo is null after processing new source. Setting default color.', + ); + const [r, g, b, a] = WebGLRenderer.DEFAULT_BG_COLOR; + const colorTexData = this.createColorTexture(r, g, b, a); + this.backgroundRenderInfo = { + type: 'color', + texture: colorTexData.texture, + color: colorTexData.color, + }; + this.activeBackgroundSourceIdentifier = `color(${r},${g},${b},${a})`; + } + } + + public render( + videoFrame: VideoFrame, + options: { + backgroundSource?: BackgroundSource | null; + bgBlur: number; + bgBlurRadius: number; + isSelfieMode: boolean; + }, + categoryTexture?: WebGLTexture, + confidenceTexture?: WebGLTexture, + ) { + if (!this.running) return; + const { + gl, + fbo, + frameTexture, + storedStateTextures, + stateUpdateProgram, + stateUpdateLocations, + refineFbo, + refinedMaskTexture, + maskRefineProgram, + maskRefineLocations, + blendProgram, + blendLocations, + blurFbo1, + blurFbo2, + blurTexture1, + blurTexture2, + } = this; + + const { displayWidth: width, displayHeight: height } = videoFrame; + if (this.canvas.width !== width || this.canvas.height !== height) { + this.canvas.width = width; + this.canvas.height = height; + } + + if (!categoryTexture || !confidenceTexture) { + gl.viewport(0, 0, gl.drawingBufferWidth, gl.drawingBufferHeight); + gl.useProgram(blendProgram); + + const frame = gl.createTexture(); + gl.activeTexture(gl.TEXTURE0); + gl.bindTexture(gl.TEXTURE_2D, frame); + gl.texImage2D( + gl.TEXTURE_2D, + 0, + gl.RGBA, + gl.RGBA, + gl.UNSIGNED_BYTE, + videoFrame, + ); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.NEAREST); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.NEAREST); + gl.uniform1i(blendLocations.frameTexture, 0); + gl.uniform1i(blendLocations.enabled, 0); + + gl.enableVertexAttribArray(blendLocations.position); + gl.bindBuffer(gl.ARRAY_BUFFER, this.positionBuffer); + gl.vertexAttribPointer(blendLocations.position, 2, gl.FLOAT, false, 0, 0); + gl.enableVertexAttribArray(blendLocations.texCoord); + gl.bindBuffer(gl.ARRAY_BUFFER, this.texCoordBuffer); + gl.vertexAttribPointer(blendLocations.texCoord, 2, gl.FLOAT, false, 0, 0); + + gl.drawArrays(gl.TRIANGLES, 0, 6); + + gl.deleteTexture(frame); + gl.activeTexture(gl.TEXTURE0); + gl.bindTexture(gl.TEXTURE_2D, null); + + return; + } + + // Determine read/write indices for state ping-pong + const readStateIndex = this.currentStateIndex; + const writeStateIndex = (this.currentStateIndex + 1) % 2; + const prevStateTexture = storedStateTextures[readStateIndex]; + const newStateTexture = storedStateTextures[writeStateIndex]; + + this.updateBackgroundIfNeeded(options.backgroundSource); + + // --- 1. State Update Pass (Calculates Moving Average) --- + gl.bindFramebuffer(gl.FRAMEBUFFER, fbo); + gl.framebufferTexture2D( + gl.FRAMEBUFFER, + gl.COLOR_ATTACHMENT0, + gl.TEXTURE_2D, + newStateTexture, + 0, + ); + + gl.bindTexture(gl.TEXTURE_2D, newStateTexture); + gl.texImage2D( + gl.TEXTURE_2D, + 0, + gl.RGBA, + width, + height, + 0, + gl.RGBA, + gl.UNSIGNED_BYTE, + null, + ); + + gl.viewport(0, 0, width, height); + gl.useProgram(stateUpdateProgram); + + gl.activeTexture(gl.TEXTURE0); + gl.bindTexture(gl.TEXTURE_2D, categoryTexture); + gl.uniform1i(stateUpdateLocations.categoryTexture, 0); + gl.activeTexture(gl.TEXTURE1); + gl.bindTexture(gl.TEXTURE_2D, confidenceTexture); + gl.uniform1i(stateUpdateLocations.confidenceTexture, 1); + gl.activeTexture(gl.TEXTURE2); + gl.bindTexture(gl.TEXTURE_2D, prevStateTexture); // Read previous state + gl.uniform1i(stateUpdateLocations.prevStateTexture, 2); + + gl.uniform1f(stateUpdateLocations.smoothingFactor, 0.8); + gl.uniform1f(stateUpdateLocations.smoothstepMin, 0.0); + gl.uniform1f(stateUpdateLocations.smoothstepMax, 0.9); + + gl.uniform1i( + stateUpdateLocations.selfieModel, + options.isSelfieMode ? 1 : 0, + ); + + gl.enableVertexAttribArray(stateUpdateLocations.position); + gl.bindBuffer(gl.ARRAY_BUFFER, this.positionBuffer); + gl.vertexAttribPointer( + stateUpdateLocations.position, + 2, + gl.FLOAT, + false, + 0, + 0, + ); + gl.enableVertexAttribArray(stateUpdateLocations.texCoord); + gl.bindBuffer(gl.ARRAY_BUFFER, this.texCoordBuffer); + gl.vertexAttribPointer( + stateUpdateLocations.texCoord, + 2, + gl.FLOAT, + false, + 0, + 0, + ); + + gl.drawArrays(gl.TRIANGLES, 0, 6); + + gl.bindFramebuffer(gl.FRAMEBUFFER, refineFbo); + gl.framebufferTexture2D( + gl.FRAMEBUFFER, + gl.COLOR_ATTACHMENT0, + gl.TEXTURE_2D, + refinedMaskTexture, + 0, + ); + + gl.bindTexture(gl.TEXTURE_2D, refinedMaskTexture); + gl.texImage2D( + gl.TEXTURE_2D, + 0, + gl.RGBA, + width, + height, + 0, + gl.RGBA, + gl.UNSIGNED_BYTE, + null, + ); + + gl.viewport(0, 0, width, height); + gl.useProgram(maskRefineProgram); + + gl.enableVertexAttribArray(maskRefineLocations.position); + gl.bindBuffer(gl.ARRAY_BUFFER, this.positionBuffer); + gl.vertexAttribPointer( + maskRefineLocations.position, + 2, + gl.FLOAT, + false, + 0, + 0, + ); + gl.enableVertexAttribArray(maskRefineLocations.texCoord); + gl.bindBuffer(gl.ARRAY_BUFFER, this.texCoordBuffer); + gl.vertexAttribPointer( + maskRefineLocations.texCoord, + 2, + gl.FLOAT, + false, + 0, + 0, + ); + + gl.activeTexture(gl.TEXTURE0); + gl.bindTexture(gl.TEXTURE_2D, newStateTexture); + gl.uniform1i(maskRefineLocations.maskTexture, 0); + + gl.activeTexture(gl.TEXTURE1); + gl.bindTexture(gl.TEXTURE_2D, frameTexture); + gl.texImage2D( + gl.TEXTURE_2D, + 0, + gl.RGBA, + gl.RGBA, + gl.UNSIGNED_BYTE, + videoFrame, + ); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.NEAREST); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.NEAREST); + gl.uniform1i(maskRefineLocations.frameTexture, 1); + + gl.uniform2f(maskRefineLocations.texelSize, 1.0 / width, 1.0 / height); + gl.uniform1f(maskRefineLocations.sigmaSpatial, 2.0); + gl.uniform1f(maskRefineLocations.sigmaRange, 0.1); + + gl.drawArrays(gl.TRIANGLES, 0, 6); + + gl.disableVertexAttribArray(maskRefineLocations.position); + gl.disableVertexAttribArray(maskRefineLocations.texCoord); + + let backgroundTexToUse: WebGLTexture | null; + let bgWToSend = width; + let bgHToSend = height; + + if (options.bgBlur > 0 && options.bgBlurRadius > 0) { + const downscale = 0.5; + const blurW = Math.floor(width * downscale); + const blurH = Math.floor(height * downscale); + + gl.bindTexture(gl.TEXTURE_2D, blurTexture1); + gl.texImage2D( + gl.TEXTURE_2D, + 0, + gl.RGBA, + blurW, + blurH, + 0, + gl.RGBA, + gl.UNSIGNED_BYTE, + null, + ); + gl.bindTexture(gl.TEXTURE_2D, blurTexture2); + gl.texImage2D( + gl.TEXTURE_2D, + 0, + gl.RGBA, + blurW, + blurH, + 0, + gl.RGBA, + gl.UNSIGNED_BYTE, + null, + ); + + const KERNEL_RADIUS = 10.0; + const radiusScale = Math.max(0.0, options.bgBlurRadius) / KERNEL_RADIUS; + + gl.useProgram(this.blurProgram); + + gl.enableVertexAttribArray(this.blurLocations.position); + gl.bindBuffer(gl.ARRAY_BUFFER, this.positionBuffer); + gl.vertexAttribPointer( + this.blurLocations.position, + 2, + gl.FLOAT, + false, + 0, + 0, + ); + gl.enableVertexAttribArray(this.blurLocations.texCoord); + gl.bindBuffer(gl.ARRAY_BUFFER, this.texCoordBuffer); + gl.vertexAttribPointer( + this.blurLocations.texCoord, + 2, + gl.FLOAT, + false, + 0, + 0, + ); + + gl.activeTexture(gl.TEXTURE1); + gl.bindTexture(gl.TEXTURE_2D, refinedMaskTexture); + gl.uniform1i(this.blurLocations.personMask, 1); + gl.uniform1f(this.blurLocations.sigma, options.bgBlur * 0.7); + gl.uniform1f(this.blurLocations.radiusScale, radiusScale); + + const blurPasses = [ + { + direction: [1.0, 0.0], + input: frameTexture, + output: blurFbo1, + texelSize: [1.0 / width, 1.0 / height], + }, + { + direction: [0.0, 1.0], + input: blurTexture1, + output: blurFbo2, + texelSize: [1.0 / blurW, 1.0 / blurH], + }, + { + direction: [1.0, 0.0], + input: blurTexture2, + output: blurFbo1, + texelSize: [1.0 / blurW, 1.0 / blurH], + }, + { + direction: [0.0, 1.0], + input: blurTexture1, + output: blurFbo2, + texelSize: [1.0 / blurW, 1.0 / blurH], + }, + ]; + + for (const pass of blurPasses) { + gl.bindFramebuffer(gl.FRAMEBUFFER, pass.output); + gl.viewport(0, 0, blurW, blurH); + + gl.activeTexture(gl.TEXTURE0); + gl.bindTexture(gl.TEXTURE_2D, pass.input); + gl.uniform1i(this.blurLocations.image, 0); + gl.uniform2f( + this.blurLocations.texelSize, + pass.texelSize[0], + pass.texelSize[1], + ); + gl.uniform2f( + this.blurLocations.direction, + pass.direction[0], + pass.direction[1], + ); + + gl.drawArrays(gl.TRIANGLES, 0, 6); + } + + backgroundTexToUse = blurTexture2!; + bgWToSend = blurW; + bgHToSend = blurH; + } else if (options.backgroundSource && this.backgroundRenderInfo) { + backgroundTexToUse = this.backgroundRenderInfo.texture; + if (this.backgroundRenderInfo.type === 'video') { + const { canvas } = this.backgroundRenderInfo; + bgWToSend = canvas.width || width; + bgHToSend = canvas.height || height; + } else if (this.backgroundRenderInfo.type === 'image') { + bgWToSend = this.backgroundRenderInfo.width; + bgHToSend = this.backgroundRenderInfo.height; + } else { + bgWToSend = width; + bgHToSend = height; + } + } else { + backgroundTexToUse = this.backgroundRenderInfo?.texture ?? null; + } + + gl.bindFramebuffer(gl.FRAMEBUFFER, null); + gl.viewport(0, 0, gl.drawingBufferWidth, gl.drawingBufferHeight); + gl.useProgram(blendProgram); + + gl.activeTexture(gl.TEXTURE0); + gl.bindTexture(gl.TEXTURE_2D, frameTexture); + gl.uniform1i(blendLocations.frameTexture, 0); + gl.uniform1f(blendLocations.borderSmooth, 0); + gl.uniform1f(blendLocations.bgBlur, options.bgBlur); + gl.uniform1f(blendLocations.bgBlurRadius, options.bgBlurRadius); + gl.uniform1i(blendLocations.enabled, 1); + + gl.activeTexture(gl.TEXTURE1); + gl.bindTexture(gl.TEXTURE_2D, refinedMaskTexture); + gl.uniform1i(blendLocations.currentStateTexture, 1); + + if (backgroundTexToUse) { + gl.activeTexture(gl.TEXTURE2); + gl.bindTexture(gl.TEXTURE_2D, backgroundTexToUse); + + gl.uniform1i(blendLocations.backgroundTexture, 2); + gl.uniform2f(blendLocations.bgImageDimensions, bgWToSend, bgHToSend); + gl.uniform2f(blendLocations.canvasDimensions, width, height); + } else { + gl.uniform2f(blendLocations.bgImageDimensions, width, height); + gl.uniform2f(blendLocations.canvasDimensions, width, height); + } + + gl.enableVertexAttribArray(blendLocations.position); + gl.bindBuffer(gl.ARRAY_BUFFER, this.positionBuffer); + gl.vertexAttribPointer(blendLocations.position, 2, gl.FLOAT, false, 0, 0); + gl.enableVertexAttribArray(blendLocations.texCoord); + gl.bindBuffer(gl.ARRAY_BUFFER, this.texCoordBuffer); + gl.vertexAttribPointer(blendLocations.texCoord, 2, gl.FLOAT, false, 0, 0); + + gl.drawArrays(gl.TRIANGLES, 0, 6); + + for (let i = 0; i < 3; ++i) { + gl.activeTexture(gl.TEXTURE0 + i); + gl.bindTexture(gl.TEXTURE_2D, null); + } + + this.currentStateIndex = writeStateIndex; + } + + public close() { + if (!this.running) return; + this.running = false; + + const { gl, fbo, refineFbo, refinedMaskTexture, blurFbo1, blurFbo2 } = this; + + gl.clearColor(0, 0, 0, 0); + gl.clear(gl.COLOR_BUFFER_BIT); + gl.bindFramebuffer(gl.FRAMEBUFFER, null); + + if (fbo) gl.deleteFramebuffer(fbo); + if (refineFbo) gl.deleteFramebuffer(refineFbo); + if (blurFbo1) gl.deleteFramebuffer(blurFbo1); + if (blurFbo2) gl.deleteFramebuffer(blurFbo2); + + gl.deleteProgram(this.stateUpdateProgram); + gl.deleteProgram(this.maskRefineProgram); + gl.deleteProgram(this.blurProgram); + gl.deleteProgram(this.blendProgram); + + if (this.positionBuffer) gl.deleteBuffer(this.positionBuffer); + if (this.texCoordBuffer) gl.deleteBuffer(this.texCoordBuffer); + + if (refinedMaskTexture) gl.deleteTexture(refinedMaskTexture); + if (this.blurTexture1) gl.deleteTexture(this.blurTexture1); + if (this.blurTexture2) gl.deleteTexture(this.blurTexture2); + this.storedStateTextures.forEach((t) => t && gl.deleteTexture(t)); + this.storedStateTextures.splice(0, this.storedStateTextures.length); + if (this.backgroundRenderInfo?.texture) { + gl.deleteTexture(this.backgroundRenderInfo.texture); + this.backgroundRenderInfo = null; + } + this.activeBackgroundSourceIdentifier = null; + } +} diff --git a/packages/video-filters-web/src/createRenderer.ts b/packages/video-filters-web/src/createRenderer.ts index 20c9574a0f..c640d55290 100644 --- a/packages/video-filters-web/src/createRenderer.ts +++ b/packages/video-filters-web/src/createRenderer.ts @@ -2,9 +2,8 @@ import { WorkerTimer } from '@stream-io/worker-timer'; import { TFLite } from './tflite'; import { buildWebGL2Pipeline } from './webgl2/webgl2Pipeline'; import { getSegmentationParams, SegmentationLevel } from './segmentation'; +import { BackgroundBlurLevel, BackgroundFilter } from './types'; -export type BackgroundFilter = 'blur' | 'image'; -export type BackgroundBlurLevel = 'low' | 'medium' | 'high' | number; export type Renderer = { /** * Disposes of the renderer. diff --git a/packages/video-filters-web/src/mediapipe.ts b/packages/video-filters-web/src/mediapipe.ts new file mode 100644 index 0000000000..577b519804 --- /dev/null +++ b/packages/video-filters-web/src/mediapipe.ts @@ -0,0 +1,28 @@ +import { packageName, version } from './version'; + +let lastModelFilePath = ''; +let modelFileCache: ArrayBuffer | undefined; +export const loadMediaPipe = async ( + options: { + wasmPath?: string; + modelPath?: string; + } = {}, +): Promise => { + const basePath = `https://unpkg.com/${packageName}@${version}/mediapipe`; + + const { modelPath = `${basePath}/models/selfie_segmenter.tflite` } = options; + + const model = + modelPath === lastModelFilePath && modelFileCache + ? modelFileCache + : await fetch(modelPath).then((r) => r.arrayBuffer()); + + modelFileCache = model; + lastModelFilePath = modelPath; + + return model; +}; + +export const isMediaPipeSupported = () => + typeof MediaStreamTrackGenerator !== 'undefined' && + typeof MediaStreamTrackProcessor !== 'undefined'; diff --git a/packages/video-filters-web/src/types.ts b/packages/video-filters-web/src/types.ts new file mode 100644 index 0000000000..88f366983c --- /dev/null +++ b/packages/video-filters-web/src/types.ts @@ -0,0 +1,60 @@ +export type BackgroundSource = { + type: string; + media?: ImageBitmap | ReadableStream; + url: string; + video?: HTMLVideoElement; + track?: MediaStreamTrack; +}; +export type BackgroundFilter = 'blur' | 'image'; +export type BackgroundBlurLevel = 'low' | 'medium' | 'high' | number; + +export interface SegmenterOptions { + backgroundSource?: BackgroundSource | null; + bgBlur: number; + bgBlurRadius: number; + isSelfieMode: boolean; +} +/** + * Static configuration for the processor, defining which background + * effect should be applied and how it should behave. + */ +export interface BackgroundOptions { + modelPath?: string; + backgroundFilter?: BackgroundFilter; + backgroundBlurLevel?: BackgroundBlurLevel; + backgroundImage?: string | undefined; +} + +/** + * Performance statistics for video processing. + */ +export interface PerformanceStats { + delay: number; + fps: number; +} + +/** + * Runtime hooks for handling lifecycle or error events. + */ +export interface VideoTrackProcessorHooks { + onError?: (error: unknown) => void; + onStats?: (stats: PerformanceStats) => void; +} + +export const BACKGROUND_BLUR_MAP: Record< + BackgroundBlurLevel, + { bgBlur: number; bgBlurRadius: number } +> = { + low: { + bgBlur: 15, + bgBlurRadius: 5, + }, + medium: { + bgBlur: 20, + bgBlurRadius: 7, + }, + high: { + bgBlur: 25, + bgBlurRadius: 10, + }, +}; diff --git a/packages/video-filters-web/src/webgl2/backgroundBlurStage.ts b/packages/video-filters-web/src/webgl2/backgroundBlurStage.ts index 8a290d78e0..0176277634 100644 --- a/packages/video-filters-web/src/webgl2/backgroundBlurStage.ts +++ b/packages/video-filters-web/src/webgl2/backgroundBlurStage.ts @@ -4,7 +4,7 @@ import { createTexture, glsl, } from '../helpers/webglHelper'; -import type { BackgroundBlurLevel } from '../createRenderer'; +import { BackgroundBlurLevel } from '../types'; export type BackgroundBlurStage = { render(): void; diff --git a/packages/video-filters-web/src/webgl2/webgl2Pipeline.ts b/packages/video-filters-web/src/webgl2/webgl2Pipeline.ts index fb6c6d6854..6bcf52d274 100644 --- a/packages/video-filters-web/src/webgl2/webgl2Pipeline.ts +++ b/packages/video-filters-web/src/webgl2/webgl2Pipeline.ts @@ -11,8 +11,8 @@ import { import { buildJointBilateralFilterStage } from './jointBilateralFilterStage'; import { buildResizingStage } from './resizingStage'; import { buildSoftmaxStage } from './softmaxStage'; -import { BackgroundBlurLevel, BackgroundFilter } from '../createRenderer'; import { SegmentationParams } from '../segmentation'; +import type { BackgroundBlurLevel, BackgroundFilter } from '../types'; export function buildWebGL2Pipeline( videoSource: HTMLVideoElement, diff --git a/sample-apps/react/react-dogfood/components/ToggleDualCameraButton.tsx b/sample-apps/react/react-dogfood/components/ToggleDualCameraButton.tsx index 527fe52e27..4916965fd6 100644 --- a/sample-apps/react/react-dogfood/components/ToggleDualCameraButton.tsx +++ b/sample-apps/react/react-dogfood/components/ToggleDualCameraButton.tsx @@ -1,4 +1,5 @@ import { + DegradedPerformanceNotification, DeviceSelectorVideo, OwnCapability, Restricted, @@ -9,6 +10,7 @@ export const ToggleDualCameraButton = () => { return (
+ } menuPlacement="top" diff --git a/sample-apps/react/react-dogfood/pages/join/[callId].tsx b/sample-apps/react/react-dogfood/pages/join/[callId].tsx index 5426451a8e..35231b98f6 100644 --- a/sample-apps/react/react-dogfood/pages/join/[callId].tsx +++ b/sample-apps/react/react-dogfood/pages/join/[callId].tsx @@ -194,6 +194,7 @@ const CallRoom = (props: ServerSideCredentialsProps) => {