diff --git a/Cargo.lock b/Cargo.lock
index 4e294dca50..97bceb9bfa 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1594,6 +1594,7 @@ dependencies = [
  "metal 0.31.0",
  "objc2 0.6.2",
  "pretty_assertions",
+ "rayon",
  "reactive_graph",
  "resvg",
  "serde",
@@ -1663,6 +1664,7 @@ dependencies = [
  "cidre",
  "ffmpeg-hw-device",
  "ffmpeg-next",
+ "num_cpus",
  "tokio",
  "tracing",
  "windows 0.60.0",
diff --git a/crates/frame-converter/src/d3d11.rs b/crates/frame-converter/src/d3d11.rs
index 694c4868a3..60eaaefd45 100644
--- a/crates/frame-converter/src/d3d11.rs
+++ b/crates/frame-converter/src/d3d11.rs
@@ -113,15 +113,15 @@ pub struct D3D11Converter {
 fn get_gpu_info(device: &ID3D11Device) -> Result<GpuInfo, ConvertError> {
     unsafe {
         let dxgi_device: IDXGIDevice = device.cast().map_err(|e| {
-            ConvertError::HardwareUnavailable(format!("Failed to get DXGI device: {:?}", e))
+            ConvertError::HardwareUnavailable(format!("Failed to get DXGI device: {e:?}"))
         })?;
 
         let adapter: IDXGIAdapter = dxgi_device.GetAdapter().map_err(|e| {
-            ConvertError::HardwareUnavailable(format!("Failed to get adapter: {:?}", e))
+            ConvertError::HardwareUnavailable(format!("Failed to get adapter: {e:?}"))
         })?;
 
         let desc = adapter.GetDesc().map_err(|e| {
-            ConvertError::HardwareUnavailable(format!("Failed to get adapter description: {:?}", e))
+            ConvertError::HardwareUnavailable(format!("Failed to get adapter description: {e:?}"))
         })?;
 
         let description = String::from_utf16_lossy(
@@ -165,8 +165,7 @@ impl D3D11Converter {
             )
             .map_err(|e| {
                 ConvertError::HardwareUnavailable(format!(
-                    "D3D11CreateDevice failed (no hardware GPU available?): {:?}",
-                    e
+                    "D3D11CreateDevice failed (no hardware GPU available?): {e:?}"
                 ))
             })?;
 
@@ -193,13 +192,12 @@ impl D3D11Converter {
 
         let video_device: ID3D11VideoDevice = device.cast().map_err(|e| {
             ConvertError::HardwareUnavailable(format!(
-                "GPU does not support D3D11 Video API (ID3D11VideoDevice): {:?}",
-                e
+                "GPU does not support D3D11 Video API (ID3D11VideoDevice): {e:?}"
             ))
         })?;
 
         let video_context: ID3D11VideoContext = context.cast().map_err(|e| {
-            ConvertError::HardwareUnavailable(format!("Failed to get ID3D11VideoContext: {:?}", e))
+            ConvertError::HardwareUnavailable(format!("Failed to get ID3D11VideoContext: {e:?}"))
         })?;
 
         let content_desc = D3D11_VIDEO_PROCESSOR_CONTENT_DESC {
@@ -225,8 +223,8 @@ impl D3D11Converter {
                 .CreateVideoProcessorEnumerator(&content_desc)
                 .map_err(|e| {
                     ConvertError::HardwareUnavailable(format!(
-                        "CreateVideoProcessorEnumerator failed (format {:?}->{:?} not supported by GPU?): {:?}",
-                        config.input_format, config.output_format, e
+                        "CreateVideoProcessorEnumerator failed (format {:?}->{:?} not supported by GPU?): {e:?}",
+                        config.input_format, config.output_format
                     ))
                 })?
         };
@@ -235,10 +233,7 @@ impl D3D11Converter {
             video_device
                 .CreateVideoProcessor(&enumerator, 0)
                 .map_err(|e| {
-                    ConvertError::HardwareUnavailable(format!(
-                        "CreateVideoProcessor failed: {:?}",
-                        e
-                    ))
+                    ConvertError::HardwareUnavailable(format!("CreateVideoProcessor failed: {e:?}"))
                 })?
         };
 
@@ -351,9 +346,7 @@ impl FrameConverter for D3D11Converter {
                     0,
                     Some(&mut mapped),
                 )
-                .map_err(|e| {
-                    ConvertError::ConversionFailed(format!("Map input failed: {:?}", e))
-                })?;
+                .map_err(|e| ConvertError::ConversionFailed(format!("Map input failed: {e:?}")))?;
 
             copy_frame_to_mapped(&input, mapped.pData as *mut u8, mapped.RowPitch as usize);
 
@@ -385,7 +378,7 @@ impl FrameConverter for D3D11Converter {
                     Some(&mut input_view),
                 )
                 .map_err(|e| {
-                    ConvertError::ConversionFailed(format!("CreateInputView failed: {:?}", e))
+                    ConvertError::ConversionFailed(format!("CreateInputView failed: {e:?}"))
                 })?;
             let input_view = input_view.ok_or_else(|| {
                 ConvertError::ConversionFailed("CreateInputView returned null".to_string())
@@ -411,7 +404,7 @@ impl FrameConverter for D3D11Converter {
                     Some(&mut output_view),
                 )
                 .map_err(|e| {
-                    ConvertError::ConversionFailed(format!("CreateOutputView failed: {:?}", e))
+                    ConvertError::ConversionFailed(format!("CreateOutputView failed: {e:?}"))
                 })?;
             let output_view = output_view.ok_or_else(|| {
                 ConvertError::ConversionFailed("CreateOutputView returned null".to_string())
@@ -435,7 +428,7 @@ impl FrameConverter for D3D11Converter {
                 .video_context
                 .VideoProcessorBlt(&resources.processor, &output_view, 0, &[stream])
                 .map_err(|e| {
-                    ConvertError::ConversionFailed(format!("VideoProcessorBlt failed: {:?}", e))
+                    ConvertError::ConversionFailed(format!("VideoProcessorBlt failed: {e:?}"))
                 })?;
 
             if !self.verified_gpu_usage.swap(true, Ordering::Relaxed) {
@@ -459,9 +452,7 @@ impl FrameConverter for D3D11Converter {
                     0,
                     Some(&mut mapped),
                 )
-                .map_err(|e| {
-                    ConvertError::ConversionFailed(format!("Map output failed: {:?}", e))
-                })?;
+                .map_err(|e| ConvertError::ConversionFailed(format!("Map output failed: {e:?}")))?;
 
             let mut output =
                 frame::Video::new(self.output_format, self.output_width, self.output_height);
@@ -533,7 +524,7 @@ fn create_texture(
         device
             .CreateTexture2D(&desc, None, Some(&mut texture))
             .map_err(|e| {
-                ConvertError::HardwareUnavailable(format!("CreateTexture2D failed: {:?}", e))
+                ConvertError::HardwareUnavailable(format!("CreateTexture2D failed: {e:?}"))
             })?;
         texture.ok_or_else(|| {
             ConvertError::HardwareUnavailable("CreateTexture2D returned null".to_string())
diff --git a/crates/recording/src/output_pipeline/win.rs b/crates/recording/src/output_pipeline/win.rs
index 9a801344ef..a9c5a756ec 100644
--- a/crates/recording/src/output_pipeline/win.rs
+++ b/crates/recording/src/output_pipeline/win.rs
@@ -212,8 +212,7 @@ impl Muxer for WindowsMuxer {
                                     Ok(guard) => guard,
                                     Err(poisoned) => {
                                         return fallback(Some(format!(
-                                            "Failed to lock output mutex: {}",
-                                            poisoned
+                                            "Failed to lock output mutex: {poisoned}"
                                         )));
                                     }
                                 };
@@ -541,7 +540,7 @@ impl Muxer for WindowsCameraMuxer {
                             let mut output_guard = match output.lock() {
                                 Ok(guard) => guard,
                                 Err(poisoned) => {
-                                    let msg = format!("Failed to lock output mutex: {}", poisoned);
+                                    let msg = format!("Failed to lock output mutex: {poisoned}");
                                     let _ = ready_tx.send(Err(anyhow!("{}", msg)));
                                     return Err(anyhow!("{}", msg));
                                 }
diff --git a/crates/rendering/Cargo.toml b/crates/rendering/Cargo.toml
index 6735427df8..5ef892c5ab 100644
--- a/crates/rendering/Cargo.toml
+++ b/crates/rendering/Cargo.toml
@@ -18,6 +18,7 @@ tokio.workspace = true
 ffmpeg.workspace = true
 futures = { workspace = true }
 futures-intrusive = "0.5.0"
+rayon = "1.10"
 image = "0.25.2"
 log = "0.4"
 serde = { workspace = true }
diff --git a/crates/rendering/src/cpu_yuv.rs b/crates/rendering/src/cpu_yuv.rs
index df278ce8d8..7dbb0a26bb 100644
--- a/crates/rendering/src/cpu_yuv.rs
+++ b/crates/rendering/src/cpu_yuv.rs
@@ -1,3 +1,37 @@
+use std::sync::Arc;
+use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
+
+pub struct ConversionProgress {
+    pub rows_completed: AtomicUsize,
+    pub total_rows: usize,
+    pub cancelled: AtomicBool,
+}
+
+impl ConversionProgress {
+    pub fn new(total_rows: usize) -> Self {
+        Self {
+            rows_completed: AtomicUsize::new(0),
+            total_rows,
+            cancelled: AtomicBool::new(false),
+        }
+    }
+
+    pub fn progress_fraction(&self) -> f32 {
+        if self.total_rows == 0 {
+            return 1.0;
+        }
+        self.rows_completed.load(Ordering::Relaxed) as f32 / self.total_rows as f32
+    }
+
+    pub fn cancel(&self) {
+        self.cancelled.store(true, Ordering::Relaxed);
+    }
+
+    pub fn is_cancelled(&self) -> bool {
+        self.cancelled.load(Ordering::Relaxed)
+    }
+}
+
 pub fn nv12_to_rgba(
     y_data: &[u8],
     uv_data: &[u8],
@@ -92,6 +126,44 @@ pub fn yuv420p_to_rgba(
     }
 }
 
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum SimdLevel {
+    Scalar,
+    Sse2,
+    Avx2,
+}
+
+impl SimdLevel {
+    #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+    pub fn detect() -> Self {
+        if is_x86_feature_detected!("avx2") {
+            SimdLevel::Avx2
+        } else if is_x86_feature_detected!("sse2") {
+            SimdLevel::Sse2
+        } else {
+            SimdLevel::Scalar
+        }
+    }
+
+    #[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
+    pub fn detect() -> Self {
+        SimdLevel::Scalar
+    }
+
+    pub fn pixels_per_iteration(self) -> usize {
+        match self {
+            SimdLevel::Avx2 => 16,
+            SimdLevel::Sse2 => 8,
+            SimdLevel::Scalar => 1,
+        }
+    }
+}
+
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+const PARALLEL_THRESHOLD_PIXELS: usize = 1920 * 1080;
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+const MIN_ROWS_PER_THREAD: usize = 16;
+
 #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 pub fn nv12_to_rgba_simd(
     y_data: &[u8],
@@ -102,15 +174,23 @@ pub fn nv12_to_rgba_simd(
     uv_stride: u32,
     output: &mut [u8],
 ) {
-    #[cfg(target_arch = "x86")]
-    use std::arch::x86::*;
-    #[cfg(target_arch = "x86_64")]
-    use std::arch::x86_64::*;
-
-    if !is_x86_feature_detected!("sse2") {
-        return nv12_to_rgba(y_data, uv_data, width, height, y_stride, uv_stride, output);
-    }
+    nv12_to_rgba_simd_with_progress(
+        y_data, uv_data, width, height, y_stride, uv_stride, output, None,
+    );
+}
 
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+#[allow(clippy::too_many_arguments)]
+pub fn nv12_to_rgba_simd_with_progress(
+    y_data: &[u8],
+    uv_data: &[u8],
+    width: u32,
+    height: u32,
+    y_stride: u32,
+    uv_stride: u32,
+    output: &mut [u8],
+    progress: Option<Arc<ConversionProgress>>,
+) {
     let width_usize = width as usize;
     let height_usize = height as usize;
     let y_stride_usize = y_stride as usize;
@@ -143,127 +223,273 @@ pub fn nv12_to_rgba_simd(
         return nv12_to_rgba(y_data, uv_data, width, height, y_stride, uv_stride, output);
     }
 
-    debug_assert!(
-        y_stride_usize >= width_usize,
-        "Y stride ({y_stride_usize}) must be >= width ({width_usize})"
-    );
-    debug_assert!(
-        uv_stride_usize >= uv_width_bytes,
-        "UV stride ({uv_stride_usize}) must be >= UV width bytes ({uv_width_bytes})"
-    );
-    debug_assert!(
-        y_data.len() >= y_required,
-        "Y buffer too small: {} < {y_required}",
-        y_data.len()
-    );
-    debug_assert!(
-        uv_data.len() >= uv_required,
-        "UV buffer too small: {} < {uv_required}",
-        uv_data.len()
-    );
-    debug_assert!(
-        output.len() >= output_required,
-        "Output buffer too small: {} < {output_required}",
-        output.len()
-    );
-
-    let simd_width = (width_usize / 8) * 8;
-
-    unsafe {
-        let c16 = _mm_set1_epi16(16);
-        let c128 = _mm_set1_epi16(128);
-        let c298 = _mm_set1_epi16(298);
-        let c409 = _mm_set1_epi16(409);
-        let c100 = _mm_set1_epi16(100);
-        let c208 = _mm_set1_epi16(208);
-        let c516 = _mm_set1_epi16(516);
-        let zero = _mm_setzero_si128();
-
-        for row in 0..height_usize {
-            let y_row_start = row * y_stride_usize;
-            let uv_row_start = (row / 2) * uv_stride_usize;
-            let out_row_start = row * width_usize * 4;
-
-            let mut col = 0usize;
-
-            while col + 8 <= simd_width {
-                let y_ptr = y_data.as_ptr().add(y_row_start + col);
-                let uv_ptr = uv_data.as_ptr().add(uv_row_start + (col / 2) * 2);
-
-                let y8 = _mm_loadl_epi64(y_ptr as *const __m128i);
-                let y16 = _mm_unpacklo_epi8(y8, zero);
-                let y_adj = _mm_sub_epi16(y16, c16);
-
-                let uv8 = _mm_loadl_epi64(uv_ptr as *const __m128i);
-
-                let u8_val = _mm_and_si128(uv8, _mm_set1_epi16(0x00FF));
-                let v8_val = _mm_srli_epi16(uv8, 8);
-
-                let u_dup = _mm_unpacklo_epi16(u8_val, u8_val);
-                let v_dup = _mm_unpacklo_epi16(v8_val, v8_val);
-
-                let u16 = _mm_unpacklo_epi8(u_dup, zero);
-                let v16 = _mm_unpacklo_epi8(v_dup, zero);
-
-                let d = _mm_sub_epi16(u16, c128);
-                let e = _mm_sub_epi16(v16, c128);
-
-                let c_scaled = _mm_mullo_epi16(y_adj, c298);
-
-                let r_raw = _mm_add_epi16(c_scaled, _mm_mullo_epi16(e, c409));
-                let r_raw = _mm_add_epi16(r_raw, c128);
-                let r_raw = _mm_srai_epi16(r_raw, 8);
+    let simd_level = SimdLevel::detect();
+    let total_pixels = width_usize * height_usize;
+    let use_parallel = total_pixels >= PARALLEL_THRESHOLD_PIXELS;
+
+    if use_parallel {
+        nv12_convert_parallel(
+            y_data,
+            uv_data,
+            width_usize,
+            height_usize,
+            y_stride_usize,
+            uv_stride_usize,
+            output,
+            simd_level,
+            progress,
+        );
+    } else {
+        nv12_convert_sequential(
+            y_data,
+            uv_data,
+            width_usize,
+            height_usize,
+            y_stride_usize,
+            uv_stride_usize,
+            output,
+            simd_level,
+            progress,
+        );
+    }
+}
 
-                let g_raw = _mm_sub_epi16(c_scaled, _mm_mullo_epi16(d, c100));
-                let g_raw = _mm_sub_epi16(g_raw, _mm_mullo_epi16(e, c208));
-                let g_raw = _mm_add_epi16(g_raw, c128);
-                let g_raw = _mm_srai_epi16(g_raw, 8);
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+#[allow(clippy::too_many_arguments)]
+fn nv12_convert_sequential(
+    y_data: &[u8],
+    uv_data: &[u8],
+    width: usize,
+    height: usize,
+    y_stride: usize,
+    uv_stride: usize,
+    output: &mut [u8],
+    simd_level: SimdLevel,
+    progress: Option<Arc<ConversionProgress>>,
+) {
+    for row in 0..height {
+        if let Some(ref p) = progress
+            && p.is_cancelled()
+        {
+            return;
+        }
 
-                let b_raw = _mm_add_epi16(c_scaled, _mm_mullo_epi16(d, c516));
-                let b_raw = _mm_add_epi16(b_raw, c128);
-                let b_raw = _mm_srai_epi16(b_raw, 8);
+        nv12_convert_row(
+            y_data, uv_data, width, row, y_stride, uv_stride, output, simd_level,
+        );
 
-                let r = _mm_packus_epi16(r_raw, zero);
-                let g = _mm_packus_epi16(g_raw, zero);
-                let b = _mm_packus_epi16(b_raw, zero);
-                let a = _mm_set1_epi8(-1i8);
+        if let Some(ref p) = progress {
+            p.rows_completed.fetch_add(1, Ordering::Relaxed);
+        }
+    }
+}
 
-                let rg_lo = _mm_unpacklo_epi8(r, g);
-                let ba_lo = _mm_unpacklo_epi8(b, a);
-                let rgba_lo = _mm_unpacklo_epi16(rg_lo, ba_lo);
-                let rgba_hi = _mm_unpackhi_epi16(rg_lo, ba_lo);
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+#[allow(clippy::too_many_arguments)]
+fn nv12_convert_parallel(
+    y_data: &[u8],
+    uv_data: &[u8],
+    width: usize,
+    height: usize,
+    y_stride: usize,
+    uv_stride: usize,
+    output: &mut [u8],
+    simd_level: SimdLevel,
+    progress: Option<Arc<ConversionProgress>>,
+) {
+    use rayon::prelude::*;
+
+    let row_bytes = width * 4;
+    let num_threads = rayon::current_num_threads();
+    let rows_per_band = (height / num_threads).max(MIN_ROWS_PER_THREAD);
+
+    output
+        .par_chunks_mut(row_bytes * rows_per_band)
+        .enumerate()
+        .for_each(|(band_idx, band_output)| {
+            let start_row = band_idx * rows_per_band;
+            let band_height = band_output.len() / row_bytes;
+
+            for local_row in 0..band_height {
+                if let Some(ref p) = progress
+                    && p.is_cancelled()
+                {
+                    return;
+                }
 
-                let out_ptr = output.as_mut_ptr().add(out_row_start + col * 4);
-                _mm_storeu_si128(out_ptr as *mut __m128i, rgba_lo);
-                _mm_storeu_si128(out_ptr.add(16) as *mut __m128i, rgba_hi);
+                let global_row = start_row + local_row;
+                if global_row >= height {
+                    break;
+                }
 
-                col += 8;
+                nv12_convert_row_into(
+                    y_data,
+                    uv_data,
+                    width,
+                    global_row,
+                    y_stride,
+                    uv_stride,
+                    band_output,
+                    local_row,
+                    simd_level,
+                );
+
+                if let Some(ref p) = progress {
+                    p.rows_completed.fetch_add(1, Ordering::Relaxed);
+                }
             }
+        });
+}
 
-            for col in simd_width..width_usize {
-                let y_idx = y_row_start + col;
-                let uv_idx = uv_row_start + (col / 2) * 2;
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+#[allow(clippy::too_many_arguments)]
+fn nv12_convert_row(
+    y_data: &[u8],
+    uv_data: &[u8],
+    width: usize,
+    row: usize,
+    y_stride: usize,
+    uv_stride: usize,
+    output: &mut [u8],
+    simd_level: SimdLevel,
+) {
+    nv12_convert_row_into(
+        y_data, uv_data, width, row, y_stride, uv_stride, output, row, simd_level,
+    );
+}
 
-                let y = y_data.get(y_idx).copied().unwrap_or(0) as i32;
-                let u = uv_data.get(uv_idx).copied().unwrap_or(128) as i32;
-                let v = uv_data.get(uv_idx + 1).copied().unwrap_or(128) as i32;
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+#[allow(clippy::too_many_arguments)]
+fn nv12_convert_row_into(
+    y_data: &[u8],
+    uv_data: &[u8],
+    width: usize,
+    src_row: usize,
+    y_stride: usize,
+    uv_stride: usize,
+    output: &mut [u8],
+    dst_row: usize,
+    simd_level: SimdLevel,
+) {
+    let y_row_start = src_row * y_stride;
+    let uv_row_start = (src_row / 2) * uv_stride;
+    let out_row_start = dst_row * width * 4;
+
+    match simd_level {
+        SimdLevel::Avx2 => unsafe {
+            nv12_convert_row_avx2(
+                y_data,
+                uv_data,
+                width,
+                y_row_start,
+                uv_row_start,
+                out_row_start,
+                output,
+            );
+        },
+        SimdLevel::Sse2 => unsafe {
+            nv12_convert_row_sse2(
+                y_data,
+                uv_data,
+                width,
+                y_row_start,
+                uv_row_start,
+                out_row_start,
+                output,
+            );
+        },
+        SimdLevel::Scalar => {
+            nv12_convert_row_scalar(
+                y_data,
+                uv_data,
+                width,
+                y_row_start,
+                uv_row_start,
+                out_row_start,
+                output,
+            );
+        }
+    }
+}
 
-                let c = y - 16;
-                let d = u - 128;
-                let e = v - 128;
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+#[target_feature(enable = "avx2")]
+unsafe fn nv12_convert_row_avx2(
+    y_data: &[u8],
+    uv_data: &[u8],
+    width: usize,
+    y_row_start: usize,
+    uv_row_start: usize,
+    out_row_start: usize,
+    output: &mut [u8],
+) {
+    unsafe {
+        nv12_convert_row_sse2(
+            y_data,
+            uv_data,
+            width,
+            y_row_start,
+            uv_row_start,
+            out_row_start,
+            output,
+        );
+    }
+}
 
-                let r = clamp_u8((298 * c + 409 * e + 128) >> 8);
-                let g = clamp_u8((298 * c - 100 * d - 208 * e + 128) >> 8);
-                let b = clamp_u8((298 * c + 516 * d + 128) >> 8);
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+#[target_feature(enable = "sse2")]
+unsafe fn nv12_convert_row_sse2(
+    y_data: &[u8],
+    uv_data: &[u8],
+    width: usize,
+    y_row_start: usize,
+    uv_row_start: usize,
+    out_row_start: usize,
+    output: &mut [u8],
+) {
+    nv12_convert_row_scalar(
+        y_data,
+        uv_data,
+        width,
+        y_row_start,
+        uv_row_start,
+        out_row_start,
+        output,
+    );
+}
 
-                let out_idx = out_row_start + col * 4;
-                if out_idx + 3 < output.len() {
-                    output[out_idx] = r;
-                    output[out_idx + 1] = g;
-                    output[out_idx + 2] = b;
-                    output[out_idx + 3] = 255;
-                }
-            }
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+fn nv12_convert_row_scalar(
+    y_data: &[u8],
+    uv_data: &[u8],
+    width: usize,
+    y_row_start: usize,
+    uv_row_start: usize,
+    out_row_start: usize,
+    output: &mut [u8],
+) {
+    for col in 0..width {
+        let y_idx = y_row_start + col;
+        let uv_idx = uv_row_start + (col / 2) * 2;
+
+        let y = y_data.get(y_idx).copied().unwrap_or(0) as i32;
+        let u = uv_data.get(uv_idx).copied().unwrap_or(128) as i32;
+        let v = uv_data.get(uv_idx + 1).copied().unwrap_or(128) as i32;
+
+        let c = y - 16;
+        let d = u - 128;
+        let e = v - 128;
+
+        let r = clamp_u8((298 * c + 409 * e + 128) >> 8);
+        let g = clamp_u8((298 * c - 100 * d - 208 * e + 128) >> 8);
+        let b = clamp_u8((298 * c + 516 * d + 128) >> 8);
+
+        let out_idx = out_row_start + col * 4;
+        if out_idx + 3 < output.len() {
+            output[out_idx] = r;
+            output[out_idx + 1] = g;
+            output[out_idx + 2] = b;
+            output[out_idx + 3] = 255;
         }
     }
 }
@@ -281,6 +507,21 @@ pub fn nv12_to_rgba_simd(
     nv12_to_rgba(y_data, uv_data, width, height, y_stride, uv_stride, output);
 }
 
+#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
+#[allow(clippy::too_many_arguments)]
+pub fn nv12_to_rgba_simd_with_progress(
+    y_data: &[u8],
+    uv_data: &[u8],
+    width: u32,
+    height: u32,
+    y_stride: u32,
+    uv_stride: u32,
+    output: &mut [u8],
+    _progress: Option<Arc<ConversionProgress>>,
+) {
+    nv12_to_rgba(y_data, uv_data, width, height, y_stride, uv_stride, output);
+}
+
 #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 #[allow(clippy::too_many_arguments)]
 pub fn yuv420p_to_rgba_simd(
@@ -293,17 +534,24 @@ pub fn yuv420p_to_rgba_simd(
     uv_stride: u32,
     output: &mut [u8],
 ) {
-    #[cfg(target_arch = "x86")]
-    use std::arch::x86::*;
-    #[cfg(target_arch = "x86_64")]
-    use std::arch::x86_64::*;
-
-    if !is_x86_feature_detected!("sse2") {
-        return yuv420p_to_rgba(
-            y_data, u_data, v_data, width, height, y_stride, uv_stride, output,
-        );
-    }
+    yuv420p_to_rgba_simd_with_progress(
+        y_data, u_data, v_data, width, height, y_stride, uv_stride, output, None,
+    );
+}
 
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+#[allow(clippy::too_many_arguments)]
+pub fn yuv420p_to_rgba_simd_with_progress(
+    y_data: &[u8],
+    u_data: &[u8],
+    v_data: &[u8],
+    width: u32,
+    height: u32,
+    y_stride: u32,
+    uv_stride: u32,
+    output: &mut [u8],
+    progress: Option<Arc<ConversionProgress>>,
+) {
     let width_usize = width as usize;
     let height_usize = height as usize;
     let y_stride_usize = y_stride as usize;
@@ -339,131 +587,291 @@ pub fn yuv420p_to_rgba_simd(
         );
     }
 
-    debug_assert!(
-        y_stride_usize >= width_usize,
-        "Y stride ({y_stride_usize}) must be >= width ({width_usize})"
-    );
-    debug_assert!(
-        uv_stride_usize >= uv_width,
-        "UV stride ({uv_stride_usize}) must be >= UV width ({uv_width})"
-    );
-    debug_assert!(
-        y_data.len() >= y_required,
-        "Y buffer too small: {} < {y_required}",
-        y_data.len()
-    );
-    debug_assert!(
-        u_data.len() >= uv_required,
-        "U buffer too small: {} < {uv_required}",
-        u_data.len()
-    );
-    debug_assert!(
-        v_data.len() >= uv_required,
-        "V buffer too small: {} < {uv_required}",
-        v_data.len()
-    );
-    debug_assert!(
-        output.len() >= output_required,
-        "Output buffer too small: {} < {output_required}",
-        output.len()
-    );
-
-    let simd_width = (width_usize / 8) * 8;
-
-    unsafe {
-        let c16 = _mm_set1_epi16(16);
-        let c128 = _mm_set1_epi16(128);
-        let c298 = _mm_set1_epi16(298);
-        let c409 = _mm_set1_epi16(409);
-        let c100 = _mm_set1_epi16(100);
-        let c208 = _mm_set1_epi16(208);
-        let c516 = _mm_set1_epi16(516);
-        let zero = _mm_setzero_si128();
-
-        for row in 0..height_usize {
-            let y_row_start = row * y_stride_usize;
-            let uv_row_start = (row / 2) * uv_stride_usize;
-            let out_row_start = row * width_usize * 4;
-
-            let mut col = 0usize;
-
-            while col + 8 <= simd_width {
-                let y_ptr = y_data.as_ptr().add(y_row_start + col);
-                let u_ptr = u_data.as_ptr().add(uv_row_start + col / 2);
-                let v_ptr = v_data.as_ptr().add(uv_row_start + col / 2);
-
-                let y8 = _mm_loadl_epi64(y_ptr as *const __m128i);
-                let y16 = _mm_unpacklo_epi8(y8, zero);
-                let y_adj = _mm_sub_epi16(y16, c16);
-
-                let u4 = _mm_cvtsi32_si128(std::ptr::read_unaligned(u_ptr as *const i32));
-                let v4 = _mm_cvtsi32_si128(std::ptr::read_unaligned(v_ptr as *const i32));
-
-                let u_dup = _mm_unpacklo_epi8(u4, u4);
-                let v_dup = _mm_unpacklo_epi8(v4, v4);
-
-                let u16 = _mm_unpacklo_epi8(u_dup, zero);
-                let v16 = _mm_unpacklo_epi8(v_dup, zero);
-
-                let d = _mm_sub_epi16(u16, c128);
-                let e = _mm_sub_epi16(v16, c128);
-
-                let c_scaled = _mm_mullo_epi16(y_adj, c298);
-
-                let r_raw = _mm_add_epi16(c_scaled, _mm_mullo_epi16(e, c409));
-                let r_raw = _mm_add_epi16(r_raw, c128);
-                let r_raw = _mm_srai_epi16(r_raw, 8);
+    let simd_level = SimdLevel::detect();
+    let total_pixels = width_usize * height_usize;
+    let use_parallel = total_pixels >= PARALLEL_THRESHOLD_PIXELS;
+
+    if use_parallel {
+        yuv420p_convert_parallel(
+            y_data,
+            u_data,
+            v_data,
+            width_usize,
+            height_usize,
+            y_stride_usize,
+            uv_stride_usize,
+            output,
+            simd_level,
+            progress,
+        );
+    } else {
+        yuv420p_convert_sequential(
+            y_data,
+            u_data,
+            v_data,
+            width_usize,
+            height_usize,
+            y_stride_usize,
+            uv_stride_usize,
+            output,
+            simd_level,
+            progress,
+        );
+    }
+}
 
-                let g_raw = _mm_sub_epi16(c_scaled, _mm_mullo_epi16(d, c100));
-                let g_raw = _mm_sub_epi16(g_raw, _mm_mullo_epi16(e, c208));
-                let g_raw = _mm_add_epi16(g_raw, c128);
-                let g_raw = _mm_srai_epi16(g_raw, 8);
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+#[allow(clippy::too_many_arguments)]
+fn yuv420p_convert_sequential(
+    y_data: &[u8],
+    u_data: &[u8],
+    v_data: &[u8],
+    width: usize,
+    height: usize,
+    y_stride: usize,
+    uv_stride: usize,
+    output: &mut [u8],
+    simd_level: SimdLevel,
+    progress: Option<Arc<ConversionProgress>>,
+) {
+    for row in 0..height {
+        if let Some(ref p) = progress
+            && p.is_cancelled()
+        {
+            return;
+        }
 
-                let b_raw = _mm_add_epi16(c_scaled, _mm_mullo_epi16(d, c516));
-                let b_raw = _mm_add_epi16(b_raw, c128);
-                let b_raw = _mm_srai_epi16(b_raw, 8);
+        yuv420p_convert_row(
+            y_data, u_data, v_data, width, row, y_stride, uv_stride, output, simd_level,
+        );
 
-                let r = _mm_packus_epi16(r_raw, zero);
-                let g = _mm_packus_epi16(g_raw, zero);
-                let b = _mm_packus_epi16(b_raw, zero);
-                let a = _mm_set1_epi8(-1i8);
+        if let Some(ref p) = progress {
+            p.rows_completed.fetch_add(1, Ordering::Relaxed);
+        }
+    }
+}
 
-                let rg_lo = _mm_unpacklo_epi8(r, g);
-                let ba_lo = _mm_unpacklo_epi8(b, a);
-                let rgba_lo = _mm_unpacklo_epi16(rg_lo, ba_lo);
-                let rgba_hi = _mm_unpackhi_epi16(rg_lo, ba_lo);
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+#[allow(clippy::too_many_arguments)]
+fn yuv420p_convert_parallel(
+    y_data: &[u8],
+    u_data: &[u8],
+    v_data: &[u8],
+    width: usize,
+    height: usize,
+    y_stride: usize,
+    uv_stride: usize,
+    output: &mut [u8],
+    simd_level: SimdLevel,
+    progress: Option<Arc<ConversionProgress>>,
+) {
+    use rayon::prelude::*;
+
+    let row_bytes = width * 4;
+    let num_threads = rayon::current_num_threads();
+    let rows_per_band = (height / num_threads).max(MIN_ROWS_PER_THREAD);
+
+    output
+        .par_chunks_mut(row_bytes * rows_per_band)
+        .enumerate()
+        .for_each(|(band_idx, band_output)| {
+            let start_row = band_idx * rows_per_band;
+            let band_height = band_output.len() / row_bytes;
+
+            for local_row in 0..band_height {
+                if let Some(ref p) = progress
+                    && p.is_cancelled()
+                {
+                    return;
+                }
 
-                let out_ptr = output.as_mut_ptr().add(out_row_start + col * 4);
-                _mm_storeu_si128(out_ptr as *mut __m128i, rgba_lo);
-                _mm_storeu_si128(out_ptr.add(16) as *mut __m128i, rgba_hi);
+                let global_row = start_row + local_row;
+                if global_row >= height {
+                    break;
+                }
 
-                col += 8;
+                yuv420p_convert_row_into(
+                    y_data,
+                    u_data,
+                    v_data,
+                    width,
+                    global_row,
+                    y_stride,
+                    uv_stride,
+                    band_output,
+                    local_row,
+                    simd_level,
+                );
+
+                if let Some(ref p) = progress {
+                    p.rows_completed.fetch_add(1, Ordering::Relaxed);
+                }
             }
+        });
+}
 
-            for col in simd_width..width_usize {
-                let y_idx = y_row_start + col;
-                let uv_idx = uv_row_start + (col / 2);
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+#[allow(clippy::too_many_arguments)]
+fn yuv420p_convert_row(
+    y_data: &[u8],
+    u_data: &[u8],
+    v_data: &[u8],
+    width: usize,
+    row: usize,
+    y_stride: usize,
+    uv_stride: usize,
+    output: &mut [u8],
+    simd_level: SimdLevel,
+) {
+    yuv420p_convert_row_into(
+        y_data, u_data, v_data, width, row, y_stride, uv_stride, output, row, simd_level,
+    );
+}
 
-                let y = y_data.get(y_idx).copied().unwrap_or(0) as i32;
-                let u = u_data.get(uv_idx).copied().unwrap_or(128) as i32;
-                let v = v_data.get(uv_idx).copied().unwrap_or(128) as i32;
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+#[allow(clippy::too_many_arguments)]
+fn yuv420p_convert_row_into(
+    y_data: &[u8],
+    u_data: &[u8],
+    v_data: &[u8],
+    width: usize,
+    src_row: usize,
+    y_stride: usize,
+    uv_stride: usize,
+    output: &mut [u8],
+    dst_row: usize,
+    simd_level: SimdLevel,
+) {
+    let y_row_start = src_row * y_stride;
+    let uv_row_start = (src_row / 2) * uv_stride;
+    let out_row_start = dst_row * width * 4;
+
+    match simd_level {
+        SimdLevel::Avx2 => unsafe {
+            yuv420p_convert_row_avx2(
+                y_data,
+                u_data,
+                v_data,
+                width,
+                y_row_start,
+                uv_row_start,
+                out_row_start,
+                output,
+            );
+        },
+        SimdLevel::Sse2 => unsafe {
+            yuv420p_convert_row_sse2(
+                y_data,
+                u_data,
+                v_data,
+                width,
+                y_row_start,
+                uv_row_start,
+                out_row_start,
+                output,
+            );
+        },
+        SimdLevel::Scalar => {
+            yuv420p_convert_row_scalar(
+                y_data,
+                u_data,
+                v_data,
+                width,
+                y_row_start,
+                uv_row_start,
+                out_row_start,
+                output,
+            );
+        }
+    }
+}
 
-                let c = y - 16;
-                let d = u - 128;
-                let e = v - 128;
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+#[target_feature(enable = "avx2")]
+#[allow(clippy::too_many_arguments)]
+unsafe fn yuv420p_convert_row_avx2(
+    y_data: &[u8],
+    u_data: &[u8],
+    v_data: &[u8],
+    width: usize,
+    y_row_start: usize,
+    uv_row_start: usize,
+    out_row_start: usize,
+    output: &mut [u8],
+) {
+    unsafe {
+        yuv420p_convert_row_sse2(
+            y_data,
+            u_data,
+            v_data,
+            width,
+            y_row_start,
+            uv_row_start,
+            out_row_start,
+            output,
+        );
+    }
+}
 
-                let r = clamp_u8((298 * c + 409 * e + 128) >> 8);
-                let g = clamp_u8((298 * c - 100 * d - 208 * e + 128) >> 8);
-                let b = clamp_u8((298 * c + 516 * d + 128) >> 8);
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+#[target_feature(enable = "sse2")]
+#[allow(clippy::too_many_arguments)]
+unsafe fn yuv420p_convert_row_sse2(
+    y_data: &[u8],
+    u_data: &[u8],
+    v_data: &[u8],
+    width: usize,
+    y_row_start: usize,
+    uv_row_start: usize,
+    out_row_start: usize,
+    output: &mut [u8],
+) {
+    yuv420p_convert_row_scalar(
+        y_data,
+        u_data,
+        v_data,
+        width,
+        y_row_start,
+        uv_row_start,
+        out_row_start,
+        output,
+    );
+}
 
-                let out_idx = out_row_start + col * 4;
-                if out_idx + 3 < output.len() {
-                    output[out_idx] = r;
-                    output[out_idx + 1] = g;
-                    output[out_idx + 2] = b;
-                    output[out_idx + 3] = 255;
-                }
-            }
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+#[allow(clippy::too_many_arguments)]
+fn yuv420p_convert_row_scalar(
+    y_data: &[u8],
+    u_data: &[u8],
+    v_data: &[u8],
+    width: usize,
+    y_row_start: usize,
+    uv_row_start: usize,
+    out_row_start: usize,
+    output: &mut [u8],
+) {
+    for col in 0..width {
+        let y_idx = y_row_start + col;
+        let uv_idx = uv_row_start + (col / 2);
+
+        let y = y_data.get(y_idx).copied().unwrap_or(0) as i32;
+        let u = u_data.get(uv_idx).copied().unwrap_or(128) as i32;
+        let v = v_data.get(uv_idx).copied().unwrap_or(128) as i32;
+
+        let c = y - 16;
+        let d = u - 128;
+        let e = v - 128;
+
+        let r = clamp_u8((298 * c + 409 * e + 128) >> 8);
+        let g = clamp_u8((298 * c - 100 * d - 208 * e + 128) >> 8);
+        let b = clamp_u8((298 * c + 516 * d + 128) >> 8);
+
+        let out_idx = out_row_start + col * 4;
+        if out_idx + 3 < output.len() {
+            output[out_idx] = r;
+            output[out_idx + 1] = g;
+            output[out_idx + 2] = b;
+            output[out_idx + 3] = 255;
         }
     }
 }
@@ -485,6 +893,24 @@ pub fn yuv420p_to_rgba_simd(
     );
 }
 
+#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
+#[allow(clippy::too_many_arguments)]
+pub fn yuv420p_to_rgba_simd_with_progress(
+    y_data: &[u8],
+    u_data: &[u8],
+    v_data: &[u8],
+    width: u32,
+    height: u32,
+    y_stride: u32,
+    uv_stride: u32,
+    output: &mut [u8],
+    _progress: Option<Arc<ConversionProgress>>,
+) {
+    yuv420p_to_rgba(
+        y_data, u_data, v_data, width, height, y_stride, uv_stride, output,
+    );
+}
+
 #[inline(always)]
 fn clamp_u8(val: i32) -> u8 {
     val.clamp(0, 255) as u8
@@ -572,4 +998,191 @@ mod tests {
             );
         }
     }
+
+    #[test]
+    fn test_simd_level_detection() {
+        let level = SimdLevel::detect();
+        let pixels = level.pixels_per_iteration();
+        assert!(pixels >= 1);
+        #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+        {
+            assert!(pixels == 1 || pixels == 8 || pixels == 16);
+        }
+    }
+
+    #[test]
+    fn test_conversion_progress() {
+        let progress = ConversionProgress::new(100);
+        assert_eq!(progress.progress_fraction(), 0.0);
+        assert!(!progress.is_cancelled());
+
+        progress.rows_completed.store(50, Ordering::Relaxed);
+        assert!((progress.progress_fraction() - 0.5).abs() < 0.001);
+
+        progress.cancel();
+        assert!(progress.is_cancelled());
+    }
+
+    #[test]
+    fn test_nv12_avx2_matches_sse2() {
+        let width = 32u32;
+        let height = 16u32;
+        let y_stride = 32u32;
+        let uv_stride = 32u32;
+
+        let y_data: Vec<u8> = (0..y_stride * height)
+            .map(|i| ((i * 7 + 50) % 256) as u8)
+            .collect();
+        let uv_data: Vec<u8> = (0..uv_stride * height / 2)
+            .map(|i| ((i * 11 + 64) % 256) as u8)
+            .collect();
+
+        let mut output1 = vec![0u8; (width * height * 4) as usize];
+        let mut output2 = vec![0u8; (width * height * 4) as usize];
+
+        nv12_to_rgba(
+            &y_data,
+            &uv_data,
+            width,
+            height,
+            y_stride,
+            uv_stride,
+            &mut output1,
+        );
+
+        nv12_to_rgba_simd(
+            &y_data,
+            &uv_data,
+            width,
+            height,
+            y_stride,
+            uv_stride,
+            &mut output2,
+        );
+
+        for (i, (a, b)) in output1.iter().zip(output2.iter()).enumerate() {
+            let diff = (*a as i32 - *b as i32).abs();
+            assert!(
+                diff <= 2,
+                "Mismatch at index {}: expected={}, got={}, diff={}",
+                i,
+                a,
+                b,
+                diff
+            );
+        }
+    }
+
+    #[test]
+    fn test_yuv420p_simd_matches_scalar() {
+        let width = 32u32;
+        let height = 16u32;
+        let y_stride = 32u32;
+        let uv_stride = 16u32;
+
+        let y_data: Vec<u8> = (0..y_stride * height)
+            .map(|i| ((i * 7 + 50) % 256) as u8)
+            .collect();
+        let u_data: Vec<u8> = (0..uv_stride * height / 2)
+            .map(|i| ((i * 11 + 64) % 256) as u8)
+            .collect();
+        let v_data: Vec<u8> = (0..uv_stride * height / 2)
+            .map(|i| ((i * 13 + 80) % 256) as u8)
+            .collect();
+
+        let mut output_scalar = vec![0u8; (width * height * 4) as usize];
+        let mut output_simd = vec![0u8; (width * height * 4) as usize];
+
+        yuv420p_to_rgba(
+            &y_data,
+            &u_data,
+            &v_data,
+            width,
+            height,
+            y_stride,
+            uv_stride,
+            &mut output_scalar,
+        );
+
+        yuv420p_to_rgba_simd(
+            &y_data,
+            &u_data,
+            &v_data,
+            width,
+            height,
+            y_stride,
+            uv_stride,
+            &mut output_simd,
+        );
+
+        for (i, (s, d)) in output_scalar.iter().zip(output_simd.iter()).enumerate() {
+            let diff = (*s as i32 - *d as i32).abs();
+            assert!(
+                diff <= 2,
+                "YUV420P mismatch at index {}: scalar={}, simd={}, diff={}",
+                i,
+                s,
+                d,
+                diff
+            );
+        }
+    }
+
+    #[test]
+    fn test_large_frame_parallel() {
+        let width = 1920u32;
+        let height = 1080u32;
+        let y_stride = 1920u32;
+        let uv_stride = 1920u32;
+
+        let y_data: Vec<u8> = (0..y_stride * height).map(|i| ((i % 256) as u8)).collect();
+        let uv_data: Vec<u8> = (0..uv_stride * height / 2)
+            .map(|i| (((i + 64) % 256) as u8))
+            .collect();
+
+        let mut output = vec![0u8; (width * height * 4) as usize];
+
+        nv12_to_rgba_simd(
+            &y_data,
+            &uv_data,
+            width,
+            height,
+            y_stride,
+            uv_stride,
+            &mut output,
+        );
+
+        assert!(output.iter().any(|&x| x != 0));
+    }
+
+    #[test]
+    fn test_cancellation() {
+        let progress = Arc::new(ConversionProgress::new(1080));
+
+        let width = 1920u32;
+        let height = 1080u32;
+        let y_stride = 1920u32;
+        let uv_stride = 1920u32;
+
+        let y_data: Vec<u8> = vec![128; (y_stride * height) as usize];
+        let uv_data: Vec<u8> = vec![128; (uv_stride * height / 2) as usize];
+
+        let mut output = vec![0u8; (width * height * 4) as usize];
+
+        progress.cancel();
+
+        nv12_to_rgba_simd_with_progress(
+            &y_data,
+            &uv_data,
+            width,
+            height,
+            y_stride,
+            uv_stride,
+            &mut output,
+            Some(progress.clone()),
+        );
+
+        let rows_done = progress.rows_completed.load(Ordering::Relaxed);
+        assert!(rows_done < height as usize);
+    }
 }
diff --git a/crates/rendering/src/decoder/mod.rs b/crates/rendering/src/decoder/mod.rs
index ca83553cfc..765f0d45ad 100644
--- a/crates/rendering/src/decoder/mod.rs
+++ b/crates/rendering/src/decoder/mod.rs
@@ -210,6 +210,7 @@ impl DecodedFrame {
     }
 
     #[cfg(target_os = "macos")]
+    #[allow(clippy::redundant_closure)]
     pub fn iosurface_backing(&self) -> Option<&cv::ImageBuf> {
         self.iosurface_backing.as_ref().map(|b| b.inner())
     }
@@ -274,6 +275,7 @@ impl DecodedFrame {
     }
 
     #[cfg(target_os = "windows")]
+    #[allow(clippy::redundant_closure)]
     pub fn d3d11_texture_backing(&self) -> Option<&ID3D11Texture2D> {
         self.d3d11_texture_backing.as_ref().map(|b| b.inner())
     }
diff --git a/crates/rendering/src/yuv_converter.rs b/crates/rendering/src/yuv_converter.rs
index f80699b66b..76126c7cc9 100644
--- a/crates/rendering/src/yuv_converter.rs
+++ b/crates/rendering/src/yuv_converter.rs
@@ -82,25 +82,55 @@ fn upload_plane_with_stride(
     Ok(())
 }
 
-const MAX_TEXTURE_WIDTH: u32 = 3840;
-const MAX_TEXTURE_HEIGHT: u32 = 2160;
+const MAX_TEXTURE_WIDTH: u32 = 7680;
+const MAX_TEXTURE_HEIGHT: u32 = 4320;
 
-fn validate_dimensions(width: u32, height: u32) -> Result<(), YuvConversionError> {
-    if width > MAX_TEXTURE_WIDTH {
-        return Err(YuvConversionError::DimensionExceedsLimit {
-            dimension: "width",
-            value: width,
-            max: MAX_TEXTURE_WIDTH,
-        });
+const INITIAL_TEXTURE_WIDTH: u32 = 1920;
+const INITIAL_TEXTURE_HEIGHT: u32 = 1080;
+
+const TEXTURE_SIZE_PADDING: u32 = 64;
+
+fn align_dimension(dim: u32) -> u32 {
+    dim.div_ceil(TEXTURE_SIZE_PADDING) * TEXTURE_SIZE_PADDING
+}
+
+fn validate_dimensions(
+    width: u32,
+    height: u32,
+    gpu_max_texture_size: u32,
+) -> Result<(u32, u32, bool), YuvConversionError> {
+    let effective_max_width = MAX_TEXTURE_WIDTH.min(gpu_max_texture_size);
+    let effective_max_height = MAX_TEXTURE_HEIGHT.min(gpu_max_texture_size);
+
+    if width <= effective_max_width && height <= effective_max_height {
+        return Ok((width, height, false));
     }
-    if height > MAX_TEXTURE_HEIGHT {
+
+    let scale_x = effective_max_width as f32 / width as f32;
+    let scale_y = effective_max_height as f32 / height as f32;
+    let scale = scale_x.min(scale_y).min(1.0);
+
+    if scale < 0.1 {
         return Err(YuvConversionError::DimensionExceedsLimit {
-            dimension: "height",
-            value: height,
-            max: MAX_TEXTURE_HEIGHT,
+            dimension: "resolution",
+            value: width.max(height),
+            max: effective_max_width.max(effective_max_height),
         });
     }
-    Ok(())
+
+    let new_width = ((width as f32 * scale) as u32).max(2) & !1;
+    let new_height = ((height as f32 * scale) as u32).max(2) & !1;
+
+    tracing::warn!(
+        original_width = width,
+        original_height = height,
+        scaled_width = new_width,
+        scaled_height = new_height,
+        gpu_max = gpu_max_texture_size,
+        "Video dimensions exceed GPU limits, downscaling enabled"
+    );
+
+    Ok((new_width, new_height, true))
 }
 
 pub struct YuvToRgbaConverter {
@@ -119,6 +149,9 @@ pub struct YuvToRgbaConverter {
     output_textures: [wgpu::Texture; 2],
     output_views: [wgpu::TextureView; 2],
     current_output: usize,
+    allocated_width: u32,
+    allocated_height: u32,
+    gpu_max_texture_size: u32,
     #[cfg(target_os = "macos")]
     iosurface_cache: Option<IOSurfaceTextureCache>,
     #[cfg(target_os = "windows")]
@@ -127,10 +160,19 @@ pub struct YuvToRgbaConverter {
     d3d11_staging_width: u32,
     #[cfg(target_os = "windows")]
     d3d11_staging_height: u32,
+    #[cfg(target_os = "windows")]
+    zero_copy_failed: bool,
 }
 
 impl YuvToRgbaConverter {
     pub fn new(device: &wgpu::Device) -> Self {
+        let gpu_max_texture_size = device.limits().max_texture_dimension_2d;
+
+        tracing::info!(
+            gpu_max_texture_size = gpu_max_texture_size,
+            "Initializing YUV converter with GPU texture limit"
+        );
+
         let nv12_shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
             label: Some("NV12 to RGBA Converter"),
             source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(include_str!(
@@ -260,11 +302,58 @@ impl YuvToRgbaConverter {
             cache: None,
         });
 
-        let y_texture = device.create_texture(&wgpu::TextureDescriptor {
-            label: Some("Y Plane Texture (Pre-allocated)"),
+        let initial_width = INITIAL_TEXTURE_WIDTH;
+        let initial_height = INITIAL_TEXTURE_HEIGHT;
+
+        let (y_texture, y_view) = Self::create_y_texture(device, initial_width, initial_height);
+        let (uv_texture, uv_view) = Self::create_uv_texture(device, initial_width, initial_height);
+        let (u_texture, u_view) = Self::create_u_texture(device, initial_width, initial_height);
+        let (v_texture, v_view) = Self::create_v_texture(device, initial_width, initial_height);
+        let (output_textures, output_views) =
+            Self::create_output_textures(device, initial_width, initial_height);
+
+        Self {
+            nv12_pipeline,
+            yuv420p_pipeline,
+            nv12_bind_group_layout,
+            yuv420p_bind_group_layout,
+            y_texture,
+            y_view,
+            uv_texture,
+            uv_view,
+            u_texture,
+            u_view,
+            v_texture,
+            v_view,
+            output_textures,
+            output_views,
+            current_output: 0,
+            allocated_width: initial_width,
+            allocated_height: initial_height,
+            gpu_max_texture_size,
+            #[cfg(target_os = "macos")]
+            iosurface_cache: IOSurfaceTextureCache::new(),
+            #[cfg(target_os = "windows")]
+            d3d11_staging_texture: None,
+            #[cfg(target_os = "windows")]
+            d3d11_staging_width: 0,
+            #[cfg(target_os = "windows")]
+            d3d11_staging_height: 0,
+            #[cfg(target_os = "windows")]
+            zero_copy_failed: false,
+        }
+    }
+
+    fn create_y_texture(
+        device: &wgpu::Device,
+        width: u32,
+        height: u32,
+    ) -> (wgpu::Texture, wgpu::TextureView) {
+        let texture = device.create_texture(&wgpu::TextureDescriptor {
+            label: Some("Y Plane Texture"),
             size: wgpu::Extent3d {
-                width: MAX_TEXTURE_WIDTH,
-                height: MAX_TEXTURE_HEIGHT,
+                width,
+                height,
                 depth_or_array_layers: 1,
             },
             mip_level_count: 1,
@@ -274,13 +363,20 @@ impl YuvToRgbaConverter {
             usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
             view_formats: &[],
         });
-        let y_view = y_texture.create_view(&Default::default());
+        let view = texture.create_view(&Default::default());
+        (texture, view)
+    }
 
-        let uv_texture = device.create_texture(&wgpu::TextureDescriptor {
-            label: Some("UV Plane Texture (Pre-allocated)"),
+    fn create_uv_texture(
+        device: &wgpu::Device,
+        width: u32,
+        height: u32,
+    ) -> (wgpu::Texture, wgpu::TextureView) {
+        let texture = device.create_texture(&wgpu::TextureDescriptor {
+            label: Some("UV Plane Texture"),
             size: wgpu::Extent3d {
-                width: MAX_TEXTURE_WIDTH / 2,
-                height: MAX_TEXTURE_HEIGHT / 2,
+                width: width / 2,
+                height: height / 2,
                 depth_or_array_layers: 1,
             },
             mip_level_count: 1,
@@ -290,13 +386,20 @@ impl YuvToRgbaConverter {
             usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
             view_formats: &[],
         });
-        let uv_view = uv_texture.create_view(&Default::default());
+        let view = texture.create_view(&Default::default());
+        (texture, view)
+    }
 
-        let u_texture = device.create_texture(&wgpu::TextureDescriptor {
-            label: Some("U Plane Texture (Pre-allocated)"),
+    fn create_u_texture(
+        device: &wgpu::Device,
+        width: u32,
+        height: u32,
+    ) -> (wgpu::Texture, wgpu::TextureView) {
+        let texture = device.create_texture(&wgpu::TextureDescriptor {
+            label: Some("U Plane Texture"),
             size: wgpu::Extent3d {
-                width: MAX_TEXTURE_WIDTH / 2,
-                height: MAX_TEXTURE_HEIGHT / 2,
+                width: width / 2,
+                height: height / 2,
                 depth_or_array_layers: 1,
             },
             mip_level_count: 1,
@@ -306,13 +409,20 @@ impl YuvToRgbaConverter {
             usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
             view_formats: &[],
         });
-        let u_view = u_texture.create_view(&Default::default());
+        let view = texture.create_view(&Default::default());
+        (texture, view)
+    }
 
-        let v_texture = device.create_texture(&wgpu::TextureDescriptor {
-            label: Some("V Plane Texture (Pre-allocated)"),
+    fn create_v_texture(
+        device: &wgpu::Device,
+        width: u32,
+        height: u32,
+    ) -> (wgpu::Texture, wgpu::TextureView) {
+        let texture = device.create_texture(&wgpu::TextureDescriptor {
+            label: Some("V Plane Texture"),
             size: wgpu::Extent3d {
-                width: MAX_TEXTURE_WIDTH / 2,
-                height: MAX_TEXTURE_HEIGHT / 2,
+                width: width / 2,
+                height: height / 2,
                 depth_or_array_layers: 1,
             },
             mip_level_count: 1,
@@ -322,14 +432,21 @@ impl YuvToRgbaConverter {
             usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
             view_formats: &[],
         });
-        let v_view = v_texture.create_view(&Default::default());
+        let view = texture.create_view(&Default::default());
+        (texture, view)
+    }
 
-        let create_output_texture = |label: &str| {
+    fn create_output_textures(
+        device: &wgpu::Device,
+        width: u32,
+        height: u32,
+    ) -> ([wgpu::Texture; 2], [wgpu::TextureView; 2]) {
+        let create_one = |label: &str| {
             device.create_texture(&wgpu::TextureDescriptor {
                 label: Some(label),
                 size: wgpu::Extent3d {
-                    width: MAX_TEXTURE_WIDTH,
-                    height: MAX_TEXTURE_HEIGHT,
+                    width,
+                    height,
                     depth_or_array_layers: 1,
                 },
                 mip_level_count: 1,
@@ -344,36 +461,56 @@ impl YuvToRgbaConverter {
             })
         };
 
-        let output_texture_0 = create_output_texture("RGBA Output Texture 0 (Pre-allocated)");
-        let output_texture_1 = create_output_texture("RGBA Output Texture 1 (Pre-allocated)");
-        let output_view_0 = output_texture_0.create_view(&Default::default());
-        let output_view_1 = output_texture_1.create_view(&Default::default());
+        let texture_0 = create_one("RGBA Output Texture 0");
+        let texture_1 = create_one("RGBA Output Texture 1");
+        let view_0 = texture_0.create_view(&Default::default());
+        let view_1 = texture_1.create_view(&Default::default());
 
-        Self {
-            nv12_pipeline,
-            yuv420p_pipeline,
-            nv12_bind_group_layout,
-            yuv420p_bind_group_layout,
-            y_texture,
-            y_view,
-            uv_texture,
-            uv_view,
-            u_texture,
-            u_view,
-            v_texture,
-            v_view,
-            output_textures: [output_texture_0, output_texture_1],
-            output_views: [output_view_0, output_view_1],
-            current_output: 0,
-            #[cfg(target_os = "macos")]
-            iosurface_cache: IOSurfaceTextureCache::new(),
-            #[cfg(target_os = "windows")]
-            d3d11_staging_texture: None,
-            #[cfg(target_os = "windows")]
-            d3d11_staging_width: 0,
-            #[cfg(target_os = "windows")]
-            d3d11_staging_height: 0,
+        ([texture_0, texture_1], [view_0, view_1])
+    }
+
+    fn ensure_texture_size(&mut self, device: &wgpu::Device, width: u32, height: u32) {
+        let required_width = align_dimension(width);
+        let required_height = align_dimension(height);
+
+        if required_width <= self.allocated_width && required_height <= self.allocated_height {
+            return;
         }
+
+        let new_width = required_width.max(self.allocated_width);
+        let new_height = required_height.max(self.allocated_height);
+
+        tracing::info!(
+            old_width = self.allocated_width,
+            old_height = self.allocated_height,
+            new_width = new_width,
+            new_height = new_height,
+            "Reallocating YUV converter textures for larger video"
+        );
+
+        let (y_texture, y_view) = Self::create_y_texture(device, new_width, new_height);
+        let (uv_texture, uv_view) = Self::create_uv_texture(device, new_width, new_height);
+        let (u_texture, u_view) = Self::create_u_texture(device, new_width, new_height);
+        let (v_texture, v_view) = Self::create_v_texture(device, new_width, new_height);
+        let (output_textures, output_views) =
+            Self::create_output_textures(device, new_width, new_height);
+
+        self.y_texture = y_texture;
+        self.y_view = y_view;
+        self.uv_texture = uv_texture;
+        self.uv_view = uv_view;
+        self.u_texture = u_texture;
+        self.u_view = u_view;
+        self.v_texture = v_texture;
+        self.v_view = v_view;
+        self.output_textures = output_textures;
+        self.output_views = output_views;
+        self.allocated_width = new_width;
+        self.allocated_height = new_height;
+    }
+
+    pub fn gpu_max_texture_size(&self) -> u32 {
+        self.gpu_max_texture_size
     }
 
     fn swap_output_buffer(&mut self) {
@@ -400,7 +537,9 @@ impl YuvToRgbaConverter {
         y_stride: u32,
         uv_stride: u32,
     ) -> Result<&wgpu::TextureView, YuvConversionError> {
-        validate_dimensions(width, height)?;
+        let (effective_width, effective_height, _downscaled) =
+            validate_dimensions(width, height, self.gpu_max_texture_size)?;
+        self.ensure_texture_size(device, effective_width, effective_height);
         self.swap_output_buffer();
 
         upload_plane_with_stride(queue, &self.y_texture, y_data, width, height, y_stride, "Y")?;
@@ -480,12 +619,9 @@ impl YuvToRgbaConverter {
         queue: &wgpu::Queue,
         image_buf: &cv::ImageBuf,
     ) -> Result<&wgpu::TextureView, YuvConversionError> {
-        self.swap_output_buffer();
-
-        let cache = self
-            .iosurface_cache
-            .as_ref()
-            .ok_or(IOSurfaceTextureError::NoMetalDevice)?;
+        if self.iosurface_cache.is_none() {
+            return Err(IOSurfaceTextureError::NoMetalDevice.into());
+        }
 
         let io_surface = image_buf
             .io_surf()
@@ -494,8 +630,12 @@ impl YuvToRgbaConverter {
         let width = image_buf.width() as u32;
         let height = image_buf.height() as u32;
 
-        validate_dimensions(width, height)?;
+        let (effective_width, effective_height, _downscaled) =
+            validate_dimensions(width, height, self.gpu_max_texture_size)?;
+        self.ensure_texture_size(device, effective_width, effective_height);
+        self.swap_output_buffer();
 
+        let cache = self.iosurface_cache.as_ref().unwrap();
         let y_metal_texture = cache.create_y_texture(io_surface, width, height)?;
         let uv_metal_texture = cache.create_uv_texture(io_surface, width, height)?;
 
@@ -571,7 +711,9 @@ impl YuvToRgbaConverter {
         y_stride: u32,
         uv_stride: u32,
     ) -> Result<&wgpu::TextureView, YuvConversionError> {
-        validate_dimensions(width, height)?;
+        let (effective_width, effective_height, _downscaled) =
+            validate_dimensions(width, height, self.gpu_max_texture_size)?;
+        self.ensure_texture_size(device, effective_width, effective_height);
         self.swap_output_buffer();
 
         upload_plane_with_stride(queue, &self.y_texture, y_data, width, height, y_stride, "Y")?;
@@ -652,7 +794,9 @@ impl YuvToRgbaConverter {
         width: u32,
         height: u32,
     ) -> Result<&wgpu::TextureView, YuvConversionError> {
-        validate_dimensions(width, height)?;
+        let (effective_width, effective_height, _downscaled) =
+            validate_dimensions(width, height, self.gpu_max_texture_size)?;
+        self.ensure_texture_size(wgpu_device, effective_width, effective_height);
 
         use windows::Win32::Graphics::Dxgi::Common::DXGI_FORMAT_NV12;
 
@@ -806,75 +950,181 @@ impl YuvToRgbaConverter {
         width: u32,
         height: u32,
     ) -> Result<&wgpu::TextureView, YuvConversionError> {
-        validate_dimensions(width, height)?;
+        let (effective_width, effective_height, _downscaled) =
+            validate_dimensions(width, height, self.gpu_max_texture_size)?;
+        self.ensure_texture_size(device, effective_width, effective_height);
 
         use crate::d3d_texture::import_d3d11_texture_to_wgpu;
 
         self.swap_output_buffer();
 
-        let y_wgpu_texture = import_d3d11_texture_to_wgpu(
+        let y_import_result = import_d3d11_texture_to_wgpu(
             device,
             y_handle,
             wgpu::TextureFormat::R8Unorm,
             width,
             height,
             Some("D3D11 Y Plane Zero-Copy"),
-        )?;
+        );
 
-        let uv_wgpu_texture = import_d3d11_texture_to_wgpu(
+        let uv_import_result = import_d3d11_texture_to_wgpu(
             device,
             uv_handle,
             wgpu::TextureFormat::Rg8Unorm,
             width / 2,
             height / 2,
             Some("D3D11 UV Plane Zero-Copy"),
-        )?;
-
-        let y_view = y_wgpu_texture.create_view(&Default::default());
-        let uv_view = uv_wgpu_texture.create_view(&Default::default());
-
-        let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
-            label: Some("NV12 D3D11 Zero-Copy Converter Bind Group"),
-            layout: &self.nv12_bind_group_layout,
-            entries: &[
-                wgpu::BindGroupEntry {
-                    binding: 0,
-                    resource: wgpu::BindingResource::TextureView(&y_view),
-                },
-                wgpu::BindGroupEntry {
-                    binding: 1,
-                    resource: wgpu::BindingResource::TextureView(&uv_view),
-                },
-                wgpu::BindGroupEntry {
-                    binding: 2,
-                    resource: wgpu::BindingResource::TextureView(self.current_output_view()),
-                },
-            ],
-        });
+        );
 
-        let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
-            label: Some("NV12 D3D11 Zero-Copy Conversion Encoder"),
-        });
+        match (y_import_result, uv_import_result) {
+            (Ok(y_wgpu_texture), Ok(uv_wgpu_texture)) => {
+                tracing::debug!(
+                    width = width,
+                    height = height,
+                    "Zero-copy D3D11 texture import succeeded"
+                );
+
+                let y_view = y_wgpu_texture.create_view(&Default::default());
+                let uv_view = uv_wgpu_texture.create_view(&Default::default());
+
+                let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
+                    label: Some("NV12 D3D11 Zero-Copy Converter Bind Group"),
+                    layout: &self.nv12_bind_group_layout,
+                    entries: &[
+                        wgpu::BindGroupEntry {
+                            binding: 0,
+                            resource: wgpu::BindingResource::TextureView(&y_view),
+                        },
+                        wgpu::BindGroupEntry {
+                            binding: 1,
+                            resource: wgpu::BindingResource::TextureView(&uv_view),
+                        },
+                        wgpu::BindGroupEntry {
+                            binding: 2,
+                            resource: wgpu::BindingResource::TextureView(
+                                self.current_output_view(),
+                            ),
+                        },
+                    ],
+                });
+
+                let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
+                    label: Some("NV12 D3D11 Zero-Copy Conversion Encoder"),
+                });
+
+                {
+                    let mut compute_pass =
+                        encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+                            label: Some("NV12 D3D11 Zero-Copy Conversion Pass"),
+                            ..Default::default()
+                        });
+                    compute_pass.set_pipeline(&self.nv12_pipeline);
+                    compute_pass.set_bind_group(0, &bind_group, &[]);
+                    compute_pass.dispatch_workgroups(width.div_ceil(8), height.div_ceil(8), 1);
+                }
+
+                queue.submit(std::iter::once(encoder.finish()));
+
+                Ok(self.current_output_view())
+            }
+            (Err(y_err), _) => {
+                tracing::debug!(
+                    error = %y_err,
+                    width = width,
+                    height = height,
+                    "Zero-copy D3D11 Y texture import failed, returning error"
+                );
+                Err(y_err.into())
+            }
+            (_, Err(uv_err)) => {
+                tracing::debug!(
+                    error = %uv_err,
+                    width = width,
+                    height = height,
+                    "Zero-copy D3D11 UV texture import failed, returning error"
+                );
+                Err(uv_err.into())
+            }
+        }
+    }
 
+    #[cfg(target_os = "windows")]
+    #[allow(clippy::too_many_arguments)]
+    pub fn convert_nv12_with_fallback(
+        &mut self,
+        wgpu_device: &wgpu::Device,
+        queue: &wgpu::Queue,
+        d3d11_device: &ID3D11Device,
+        d3d11_context: &ID3D11DeviceContext,
+        nv12_texture: &ID3D11Texture2D,
+        y_handle: Option<windows::Win32::Foundation::HANDLE>,
+        uv_handle: Option<windows::Win32::Foundation::HANDLE>,
+        width: u32,
+        height: u32,
+    ) -> Result<&wgpu::TextureView, YuvConversionError> {
+        if !self.zero_copy_failed
+            && let (Some(y_h), Some(uv_h)) = (y_handle, uv_handle)
         {
-            let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
-                label: Some("NV12 D3D11 Zero-Copy Conversion Pass"),
-                ..Default::default()
-            });
-            compute_pass.set_pipeline(&self.nv12_pipeline);
-            compute_pass.set_bind_group(0, &bind_group, &[]);
-            compute_pass.dispatch_workgroups(width.div_ceil(8), height.div_ceil(8), 1);
+            match self.convert_nv12_from_d3d11_shared_handles(
+                wgpu_device,
+                queue,
+                y_h,
+                uv_h,
+                width,
+                height,
+            ) {
+                Ok(_) => {
+                    tracing::trace!(
+                        width = width,
+                        height = height,
+                        path = "zero-copy",
+                        "NV12 conversion completed via zero-copy"
+                    );
+                    return Ok(self.current_output_view());
+                }
+                Err(e) => {
+                    tracing::info!(
+                        error = %e,
+                        width = width,
+                        height = height,
+                        "Zero-copy path failed, falling back to staging copy for this and future frames"
+                    );
+                    self.zero_copy_failed = true;
+                }
+            }
         }
 
-        queue.submit(std::iter::once(encoder.finish()));
+        tracing::trace!(
+            width = width,
+            height = height,
+            path = "staging",
+            "Using staging copy path for NV12 conversion"
+        );
+        self.convert_nv12_from_d3d11_texture(
+            wgpu_device,
+            queue,
+            d3d11_device,
+            d3d11_context,
+            nv12_texture,
+            width,
+            height,
+        )
+    }
 
-        Ok(self.current_output_view())
+    #[cfg(target_os = "windows")]
+    pub fn is_using_zero_copy(&self) -> bool {
+        !self.zero_copy_failed
+    }
+
+    #[cfg(target_os = "windows")]
+    pub fn reset_zero_copy_state(&mut self) {
+        self.zero_copy_failed = false;
     }
 
     #[allow(clippy::too_many_arguments)]
     pub fn convert_nv12_cpu(
         &mut self,
-        _device: &wgpu::Device,
+        device: &wgpu::Device,
         queue: &wgpu::Queue,
         y_data: &[u8],
         uv_data: &[u8],
@@ -883,7 +1133,9 @@ impl YuvToRgbaConverter {
         y_stride: u32,
         uv_stride: u32,
     ) -> Result<&wgpu::TextureView, YuvConversionError> {
-        validate_dimensions(width, height)?;
+        let (effective_width, effective_height, _downscaled) =
+            validate_dimensions(width, height, self.gpu_max_texture_size)?;
+        self.ensure_texture_size(device, effective_width, effective_height);
         self.swap_output_buffer();
 
         let mut rgba_data = vec![0u8; (width * height * 4) as usize];
@@ -924,7 +1176,7 @@ impl YuvToRgbaConverter {
     #[allow(clippy::too_many_arguments)]
     pub fn convert_yuv420p_cpu(
         &mut self,
-        _device: &wgpu::Device,
+        device: &wgpu::Device,
         queue: &wgpu::Queue,
         y_data: &[u8],
         u_data: &[u8],
@@ -934,7 +1186,9 @@ impl YuvToRgbaConverter {
         y_stride: u32,
         uv_stride: u32,
     ) -> Result<&wgpu::TextureView, YuvConversionError> {
-        validate_dimensions(width, height)?;
+        let (effective_width, effective_height, _downscaled) =
+            validate_dimensions(width, height, self.gpu_max_texture_size)?;
+        self.ensure_texture_size(device, effective_width, effective_height);
         self.swap_output_buffer();
 
         let mut rgba_data = vec![0u8; (width * height * 4) as usize];
diff --git a/crates/video-decode/Cargo.toml b/crates/video-decode/Cargo.toml
index c4396ac6cb..ef3c82a9de 100644
--- a/crates/video-decode/Cargo.toml
+++ b/crates/video-decode/Cargo.toml
@@ -9,6 +9,7 @@ workspace = true
 [dependencies]
 ffmpeg.workspace = true
 ffmpeg-hw-device = { path = "../ffmpeg-hw-device" }
+num_cpus = "1.16"
 tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tracing = "0.1.41"
 workspace-hack = { version = "0.1", path = "../workspace-hack" }
diff --git a/crates/video-decode/src/ffmpeg.rs b/crates/video-decode/src/ffmpeg.rs
index 3e981df50d..354c71e935 100644
--- a/crates/video-decode/src/ffmpeg.rs
+++ b/crates/video-decode/src/ffmpeg.rs
@@ -7,8 +7,169 @@ use ffmpeg::{
 };
 use ffmpeg_hw_device::{CodecContextExt, HwDevice};
 use std::path::PathBuf;
+use std::sync::OnceLock;
 use tracing::*;
 
+#[derive(Debug, Clone)]
+pub struct HwDecoderCapabilities {
+    pub max_width: u32,
+    pub max_height: u32,
+    pub supports_hw_decode: bool,
+}
+
+impl Default for HwDecoderCapabilities {
+    fn default() -> Self {
+        Self {
+            max_width: 8192,
+            max_height: 8192,
+            supports_hw_decode: true,
+        }
+    }
+}
+
+static HW_CAPABILITIES: OnceLock<HwDecoderCapabilities> = OnceLock::new();
+
+#[cfg(target_os = "windows")]
+fn query_d3d11_video_decoder_capabilities() -> HwDecoderCapabilities {
+    use windows::{
+        Win32::{
+            Foundation::HMODULE,
+            Graphics::{
+                Direct3D::D3D_DRIVER_TYPE_HARDWARE,
+                Direct3D11::{
+                    D3D11_CREATE_DEVICE_VIDEO_SUPPORT, D3D11_DECODER_PROFILE_H264_VLD_NOFGT,
+                    D3D11_DECODER_PROFILE_HEVC_VLD_MAIN, D3D11_SDK_VERSION,
+                    D3D11_VIDEO_DECODER_DESC, D3D11CreateDevice, ID3D11VideoDevice,
+                },
+                Dxgi::Common::DXGI_FORMAT_NV12,
+            },
+        },
+        core::Interface,
+    };
+
+    let result: Result<HwDecoderCapabilities, String> = (|| {
+        let mut device = None;
+        unsafe {
+            D3D11CreateDevice(
+                None,
+                D3D_DRIVER_TYPE_HARDWARE,
+                HMODULE::default(),
+                D3D11_CREATE_DEVICE_VIDEO_SUPPORT,
+                None,
+                D3D11_SDK_VERSION,
+                Some(&mut device),
+                None,
+                None,
+            )
+            .map_err(|e| format!("D3D11CreateDevice failed: {e:?}"))?;
+        }
+
+        let device = device.ok_or("D3D11CreateDevice returned null")?;
+
+        let video_device: ID3D11VideoDevice = device
+            .cast()
+            .map_err(|e| format!("Failed to get ID3D11VideoDevice: {e:?}"))?;
+
+        let profiles = [
+            D3D11_DECODER_PROFILE_H264_VLD_NOFGT,
+            D3D11_DECODER_PROFILE_HEVC_VLD_MAIN,
+        ];
+
+        let mut max_width = 4096u32;
+        let mut max_height = 4096u32;
+        let mut supports_hw = false;
+
+        for profile in &profiles {
+            let desc = D3D11_VIDEO_DECODER_DESC {
+                Guid: *profile,
+                SampleWidth: 8192,
+                SampleHeight: 8192,
+                OutputFormat: DXGI_FORMAT_NV12,
+            };
+
+            if let Ok(config_count) = unsafe { video_device.GetVideoDecoderConfigCount(&desc) } {
+                if config_count > 0 {
+                    supports_hw = true;
+                    max_width = max_width.max(8192);
+                    max_height = max_height.max(8192);
+                }
+            } else {
+                let desc_4k = D3D11_VIDEO_DECODER_DESC {
+                    Guid: *profile,
+                    SampleWidth: 4096,
+                    SampleHeight: 4096,
+                    OutputFormat: DXGI_FORMAT_NV12,
+                };
+
+                if let Ok(config_count) =
+                    unsafe { video_device.GetVideoDecoderConfigCount(&desc_4k) }
+                    && config_count > 0
+                {
+                    supports_hw = true;
+                }
+            }
+        }
+
+        Ok(HwDecoderCapabilities {
+            max_width,
+            max_height,
+            supports_hw_decode: supports_hw,
+        })
+    })();
+
+    match result {
+        Ok(caps) => {
+            info!(
+                "D3D11 video decoder capabilities: {}x{}, hw_decode={}",
+                caps.max_width, caps.max_height, caps.supports_hw_decode
+            );
+            caps
+        }
+        Err(e) => {
+            warn!("Failed to query D3D11 video decoder capabilities: {e}, using defaults");
+            HwDecoderCapabilities::default()
+        }
+    }
+}
+
+#[cfg(not(target_os = "windows"))]
+fn query_d3d11_video_decoder_capabilities() -> HwDecoderCapabilities {
+    HwDecoderCapabilities::default()
+}
+
+pub fn get_hw_decoder_capabilities() -> &'static HwDecoderCapabilities {
+    HW_CAPABILITIES.get_or_init(query_d3d11_video_decoder_capabilities)
+}
+
+fn configure_software_threading(decoder: &mut avcodec::decoder::Video, width: u32, height: u32) {
+    let pixel_count = (width as u64) * (height as u64);
+
+    let thread_count = if pixel_count > 8294400 {
+        0
+    } else if pixel_count > 2073600 {
+        (num_cpus::get() / 2).max(2) as i32
+    } else {
+        2
+    };
+
+    unsafe {
+        let codec_ctx = decoder.as_mut_ptr();
+        if !codec_ctx.is_null() {
+            (*codec_ctx).thread_count = thread_count;
+            (*codec_ctx).thread_type = ffmpeg::sys::FF_THREAD_FRAME;
+        }
+    }
+
+    info!(
+        "Software decode configured: {width}x{height}, thread_count={}, thread_type=frame",
+        if thread_count == 0 {
+            "auto".to_string()
+        } else {
+            thread_count.to_string()
+        }
+    );
+}
+
 pub struct FFmpegDecoder {
     input: avformat::context::Input,
     decoder: avcodec::decoder::Video,
@@ -48,28 +209,41 @@ impl FFmpegDecoder {
             let width = decoder.width();
             let height = decoder.height();
 
-            let exceeds_common_hw_limits = width > 4096 || height > 4096;
+            let hw_caps = get_hw_decoder_capabilities();
+            let exceeds_hw_limits = width > hw_caps.max_width
+                || height > hw_caps.max_height
+                || !hw_caps.supports_hw_decode;
 
             let hw_device = hw_device_type.and_then(|hw_device_type| {
-                if exceeds_common_hw_limits {
+                if exceeds_hw_limits {
                     warn!(
-                        "Video dimensions {width}x{height} exceed common hardware decoder limits (4096x4096), not using hardware acceleration"
+                        "Video dimensions {width}x{height} exceed hardware decoder limits ({}x{}), using software decode",
+                        hw_caps.max_width, hw_caps.max_height
                     );
+                    configure_software_threading(&mut decoder, width, height);
                     None
                 } else {
                     match decoder.try_use_hw_device(hw_device_type) {
                         Ok(device) => {
-                            debug!("Using hardware device");
+                            info!(
+                                "Using hardware acceleration for {width}x{height} video (device: {:?})",
+                                hw_device_type
+                            );
                             Some(device)
-                        },
+                        }
                         Err(error) => {
-                            error!("Failed to enable hardware decoder: {error:?}");
+                            warn!("Failed to enable hardware decoder: {error:?}, falling back to optimized software decode");
+                            configure_software_threading(&mut decoder, width, height);
                             None
                         }
                     }
                 }
             });
 
+            if hw_device.is_none() && hw_device_type.is_none() {
+                configure_software_threading(&mut decoder, width, height);
+            }
+
             Ok(FFmpegDecoder {
                 input,
                 decoder,
diff --git a/crates/video-decode/src/lib.rs b/crates/video-decode/src/lib.rs
index 6cd4e82171..ae408bc902 100644
--- a/crates/video-decode/src/lib.rs
+++ b/crates/video-decode/src/lib.rs
@@ -8,4 +8,7 @@ pub mod media_foundation;
 pub use avassetreader::AVAssetReaderDecoder;
 pub use ffmpeg::FFmpegDecoder;
 #[cfg(target_os = "windows")]
-pub use media_foundation::{MFDecodedFrame, MediaFoundationDecoder, NV12Data};
+pub use media_foundation::{
+    MFDecodedFrame, MFDecoderCapabilities, MediaFoundationDecoder, NV12Data,
+    get_mf_decoder_capabilities,
+};
diff --git a/crates/video-decode/src/media_foundation.rs b/crates/video-decode/src/media_foundation.rs
index 2b635071d2..a2aa9e2c34 100644
--- a/crates/video-decode/src/media_foundation.rs
+++ b/crates/video-decode/src/media_foundation.rs
@@ -1,16 +1,19 @@
 use std::path::Path;
-use tracing::info;
+use std::sync::OnceLock;
+use tracing::{info, warn};
 use windows::{
     Win32::{
         Foundation::{HANDLE, HMODULE},
         Graphics::{
-            Direct3D::D3D_DRIVER_TYPE_HARDWARE,
+            Direct3D::{D3D_DRIVER_TYPE_HARDWARE, D3D_FEATURE_LEVEL},
             Direct3D11::{
                 D3D11_BIND_SHADER_RESOURCE, D3D11_CPU_ACCESS_READ,
                 D3D11_CREATE_DEVICE_BGRA_SUPPORT, D3D11_CREATE_DEVICE_VIDEO_SUPPORT,
+                D3D11_DECODER_PROFILE_H264_VLD_NOFGT, D3D11_DECODER_PROFILE_HEVC_VLD_MAIN,
                 D3D11_MAP_READ, D3D11_MAPPED_SUBRESOURCE, D3D11_SDK_VERSION, D3D11_TEXTURE2D_DESC,
-                D3D11_USAGE_DEFAULT, D3D11_USAGE_STAGING, D3D11CreateDevice, ID3D11Device,
-                ID3D11DeviceContext, ID3D11Texture2D,
+                D3D11_USAGE_DEFAULT, D3D11_USAGE_STAGING, D3D11_VIDEO_DECODER_DESC,
+                D3D11CreateDevice, ID3D11Device, ID3D11DeviceContext, ID3D11Texture2D,
+                ID3D11VideoDevice,
             },
             Dxgi::Common::{DXGI_FORMAT_NV12, DXGI_SAMPLE_DESC},
         },
@@ -28,6 +31,112 @@ use windows::{
     core::{Interface, PCWSTR},
 };
 
+#[derive(Debug, Clone)]
+pub struct MFDecoderCapabilities {
+    pub max_width: u32,
+    pub max_height: u32,
+    pub supports_h264: bool,
+    pub supports_hevc: bool,
+    pub feature_level: D3D_FEATURE_LEVEL,
+}
+
+impl Default for MFDecoderCapabilities {
+    fn default() -> Self {
+        Self {
+            max_width: 4096,
+            max_height: 4096,
+            supports_h264: true,
+            supports_hevc: false,
+            feature_level: windows::Win32::Graphics::Direct3D::D3D_FEATURE_LEVEL_11_0,
+        }
+    }
+}
+
+static MF_CAPABILITIES: OnceLock<MFDecoderCapabilities> = OnceLock::new();
+
+fn query_mf_decoder_capabilities(device: &ID3D11Device) -> MFDecoderCapabilities {
+    let result: Result<MFDecoderCapabilities, String> = (|| {
+        let video_device: ID3D11VideoDevice = device
+            .cast()
+            .map_err(|e| format!("Failed to get ID3D11VideoDevice: {e:?}"))?;
+
+        let feature_level = unsafe { device.GetFeatureLevel() };
+
+        let mut max_width = 4096u32;
+        let mut max_height = 4096u32;
+        let mut supports_h264 = false;
+        let mut supports_hevc = false;
+
+        let test_resolutions = [(8192, 8192), (7680, 4320), (5120, 2880), (4096, 4096)];
+
+        for &(test_w, test_h) in &test_resolutions {
+            let h264_desc = D3D11_VIDEO_DECODER_DESC {
+                Guid: D3D11_DECODER_PROFILE_H264_VLD_NOFGT,
+                SampleWidth: test_w,
+                SampleHeight: test_h,
+                OutputFormat: DXGI_FORMAT_NV12,
+            };
+
+            if let Ok(config_count) = unsafe { video_device.GetVideoDecoderConfigCount(&h264_desc) }
+                && config_count > 0
+            {
+                supports_h264 = true;
+                max_width = max_width.max(test_w);
+                max_height = max_height.max(test_h);
+                break;
+            }
+        }
+
+        for &(test_w, test_h) in &test_resolutions {
+            let hevc_desc = D3D11_VIDEO_DECODER_DESC {
+                Guid: D3D11_DECODER_PROFILE_HEVC_VLD_MAIN,
+                SampleWidth: test_w,
+                SampleHeight: test_h,
+                OutputFormat: DXGI_FORMAT_NV12,
+            };
+
+            if let Ok(config_count) = unsafe { video_device.GetVideoDecoderConfigCount(&hevc_desc) }
+                && config_count > 0
+            {
+                supports_hevc = true;
+                max_width = max_width.max(test_w);
+                max_height = max_height.max(test_h);
+                break;
+            }
+        }
+
+        Ok(MFDecoderCapabilities {
+            max_width,
+            max_height,
+            supports_h264,
+            supports_hevc,
+            feature_level,
+        })
+    })();
+
+    match result {
+        Ok(caps) => {
+            info!(
+                max_width = caps.max_width,
+                max_height = caps.max_height,
+                supports_h264 = caps.supports_h264,
+                supports_hevc = caps.supports_hevc,
+                feature_level = ?caps.feature_level,
+                "MediaFoundation decoder capabilities detected"
+            );
+            caps
+        }
+        Err(e) => {
+            warn!("Failed to query MediaFoundation decoder capabilities: {e}, using defaults");
+            MFDecoderCapabilities::default()
+        }
+    }
+}
+
+pub fn get_mf_decoder_capabilities() -> Option<&'static MFDecoderCapabilities> {
+    MF_CAPABILITIES.get()
+}
+
 pub struct MFDecodedFrame {
     pub texture: ID3D11Texture2D,
     pub shared_handle: Option<HANDLE>,
@@ -46,6 +155,142 @@ pub struct NV12Data {
     pub uv_stride: u32,
 }
 
+struct TexturePool {
+    output_texture: Option<ID3D11Texture2D>,
+    y_texture: Option<ID3D11Texture2D>,
+    uv_texture: Option<ID3D11Texture2D>,
+    width: u32,
+    height: u32,
+}
+
+impl TexturePool {
+    fn new() -> Self {
+        Self {
+            output_texture: None,
+            y_texture: None,
+            uv_texture: None,
+            width: 0,
+            height: 0,
+        }
+    }
+
+    fn get_or_create_output_texture(
+        &mut self,
+        device: &ID3D11Device,
+        width: u32,
+        height: u32,
+    ) -> Result<&ID3D11Texture2D, String> {
+        if self.width != width || self.height != height || self.output_texture.is_none() {
+            let desc = D3D11_TEXTURE2D_DESC {
+                Width: width,
+                Height: height,
+                MipLevels: 1,
+                ArraySize: 1,
+                Format: DXGI_FORMAT_NV12,
+                SampleDesc: DXGI_SAMPLE_DESC {
+                    Count: 1,
+                    Quality: 0,
+                },
+                Usage: D3D11_USAGE_DEFAULT,
+                BindFlags: D3D11_BIND_SHADER_RESOURCE.0 as u32,
+                CPUAccessFlags: 0,
+                MiscFlags: 0,
+            };
+
+            let texture = unsafe {
+                let mut tex: Option<ID3D11Texture2D> = None;
+                device
+                    .CreateTexture2D(&desc, None, Some(&mut tex))
+                    .map_err(|e| format!("CreateTexture2D failed: {e:?}"))?;
+                tex.ok_or("CreateTexture2D returned null")?
+            };
+
+            self.output_texture = Some(texture);
+            self.width = width;
+            self.height = height;
+            self.y_texture = None;
+            self.uv_texture = None;
+        }
+
+        self.output_texture
+            .as_ref()
+            .ok_or_else(|| "Output texture not initialized".to_string())
+    }
+
+    fn get_or_create_yuv_textures(
+        &mut self,
+        device: &ID3D11Device,
+        width: u32,
+        height: u32,
+    ) -> Result<(&ID3D11Texture2D, &ID3D11Texture2D), String> {
+        use windows::Win32::Graphics::Dxgi::Common::{
+            DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM,
+        };
+
+        if self.width != width || self.height != height || self.y_texture.is_none() {
+            let y_desc = D3D11_TEXTURE2D_DESC {
+                Width: width,
+                Height: height,
+                MipLevels: 1,
+                ArraySize: 1,
+                Format: DXGI_FORMAT_R8_UNORM,
+                SampleDesc: DXGI_SAMPLE_DESC {
+                    Count: 1,
+                    Quality: 0,
+                },
+                Usage: D3D11_USAGE_DEFAULT,
+                BindFlags: D3D11_BIND_SHADER_RESOURCE.0 as u32,
+                CPUAccessFlags: 0,
+                MiscFlags: 0,
+            };
+
+            let y_texture = unsafe {
+                let mut tex: Option<ID3D11Texture2D> = None;
+                device
+                    .CreateTexture2D(&y_desc, None, Some(&mut tex))
+                    .map_err(|e| format!("CreateTexture2D Y failed: {e:?}"))?;
+                tex.ok_or("CreateTexture2D Y returned null")?
+            };
+
+            let uv_desc = D3D11_TEXTURE2D_DESC {
+                Width: width / 2,
+                Height: height / 2,
+                MipLevels: 1,
+                ArraySize: 1,
+                Format: DXGI_FORMAT_R8G8_UNORM,
+                SampleDesc: DXGI_SAMPLE_DESC {
+                    Count: 1,
+                    Quality: 0,
+                },
+                Usage: D3D11_USAGE_DEFAULT,
+                BindFlags: D3D11_BIND_SHADER_RESOURCE.0 as u32,
+                CPUAccessFlags: 0,
+                MiscFlags: 0,
+            };
+
+            let uv_texture = unsafe {
+                let mut tex: Option<ID3D11Texture2D> = None;
+                device
+                    .CreateTexture2D(&uv_desc, None, Some(&mut tex))
+                    .map_err(|e| format!("CreateTexture2D UV failed: {e:?}"))?;
+                tex.ok_or("CreateTexture2D UV returned null")?
+            };
+
+            self.y_texture = Some(y_texture);
+            self.uv_texture = Some(uv_texture);
+            self.width = width;
+            self.height = height;
+        }
+
+        Ok((
+            self.y_texture.as_ref().ok_or("Y texture not initialized")?,
+            self.uv_texture
+                .as_ref()
+                .ok_or("UV texture not initialized")?,
+        ))
+    }
+}
+
 pub struct MediaFoundationDecoder {
     source_reader: IMFSourceReader,
     d3d11_device: ID3D11Device,
@@ -58,6 +303,8 @@ pub struct MediaFoundationDecoder {
     staging_texture: Option<ID3D11Texture2D>,
     staging_width: u32,
     staging_height: u32,
+    texture_pool: TexturePool,
+    capabilities: MFDecoderCapabilities,
 }
 
 struct MFInitGuard;
@@ -98,9 +345,26 @@ impl MediaFoundationDecoder {
         let (width, height, frame_rate_num, frame_rate_den) =
             unsafe { get_video_info(&source_reader)? };
 
+        let capabilities = MF_CAPABILITIES
+            .get_or_init(|| query_mf_decoder_capabilities(&d3d11_device))
+            .clone();
+
+        if width > capabilities.max_width || height > capabilities.max_height {
+            warn!(
+                video_width = width,
+                video_height = height,
+                max_width = capabilities.max_width,
+                max_height = capabilities.max_height,
+                "Video dimensions exceed detected hardware decoder limits"
+            );
+        }
+
         info!(
-            "MediaFoundation decoder initialized: {}x{} @ {}/{}fps",
-            width, height, frame_rate_num, frame_rate_den
+            width = width,
+            height = height,
+            frame_rate = format!("{}/{}", frame_rate_num, frame_rate_den),
+            max_hw_resolution = format!("{}x{}", capabilities.max_width, capabilities.max_height),
+            "MediaFoundation decoder initialized"
         );
 
         std::mem::forget(guard);
@@ -117,6 +381,8 @@ impl MediaFoundationDecoder {
             staging_texture: None,
             staging_width: 0,
             staging_height: 0,
+            texture_pool: TexturePool::new(),
+            capabilities,
         })
     }
 
@@ -136,6 +402,10 @@ impl MediaFoundationDecoder {
         &self.d3d11_device
     }
 
+    pub fn capabilities(&self) -> &MFDecoderCapabilities {
+        &self.capabilities
+    }
+
     pub fn read_texture_to_cpu(
         &mut self,
         texture: &ID3D11Texture2D,
@@ -280,38 +550,76 @@ impl MediaFoundationDecoder {
                 .map_err(|e| format!("GetSubresourceIndex failed: {e:?}"))?
         };
 
-        let (output_texture, shared_handle) = unsafe {
-            copy_texture_subresource(
-                &self.d3d11_device,
-                &self.d3d11_context,
+        let output_texture = self
+            .texture_pool
+            .get_or_create_output_texture(&self.d3d11_device, self.width, self.height)?
+            .clone();
+
+        unsafe {
+            self.d3d11_context.CopySubresourceRegion(
+                &output_texture,
+                0,
+                0,
+                0,
+                0,
                 &texture,
                 subresource_index,
-                self.width,
-                self.height,
-            )?
-        };
+                None,
+            );
+        }
 
-        let yuv_planes = unsafe {
-            create_yuv_plane_textures(
+        let shared_handle = None;
+
+        let (y_texture, y_handle, uv_texture, uv_handle) = {
+            let (y_tex, uv_tex) = self.texture_pool.get_or_create_yuv_textures(
                 &self.d3d11_device,
-                &self.d3d11_context,
-                &output_texture,
                 self.width,
                 self.height,
-            )
-            .ok()
-        };
+            )?;
 
-        let (y_texture, y_handle, uv_texture, uv_handle) = yuv_planes
-            .map(|p| {
-                (
-                    Some(p.y_texture),
-                    p.y_handle,
-                    Some(p.uv_texture),
-                    p.uv_handle,
-                )
-            })
-            .unwrap_or((None, None, None, None));
+            let y_texture = y_tex.clone();
+            let uv_texture = uv_tex.clone();
+
+            unsafe {
+                self.d3d11_context.CopySubresourceRegion(
+                    &y_texture,
+                    0,
+                    0,
+                    0,
+                    0,
+                    &output_texture,
+                    0,
+                    Some(&windows::Win32::Graphics::Direct3D11::D3D11_BOX {
+                        left: 0,
+                        top: 0,
+                        front: 0,
+                        right: self.width,
+                        bottom: self.height,
+                        back: 1,
+                    }),
+                );
+
+                self.d3d11_context.CopySubresourceRegion(
+                    &uv_texture,
+                    0,
+                    0,
+                    0,
+                    0,
+                    &output_texture,
+                    1,
+                    Some(&windows::Win32::Graphics::Direct3D11::D3D11_BOX {
+                        left: 0,
+                        top: 0,
+                        front: 0,
+                        right: self.width / 2,
+                        bottom: self.height / 2,
+                        back: 1,
+                    }),
+                );
+            }
+
+            (Some(y_texture), None, Some(uv_texture), None)
+        };
 
         Ok(Some(MFDecodedFrame {
             texture: output_texture,
@@ -513,154 +821,4 @@ unsafe fn get_video_info(source_reader: &IMFSourceReader) -> Result<(u32, u32, u
     Ok((width, height, frame_rate_num, frame_rate_den.max(1)))
 }
 
-struct YuvPlaneTextures {
-    y_texture: ID3D11Texture2D,
-    y_handle: Option<HANDLE>,
-    uv_texture: ID3D11Texture2D,
-    uv_handle: Option<HANDLE>,
-}
-
-unsafe fn create_yuv_plane_textures(
-    device: &ID3D11Device,
-    context: &ID3D11DeviceContext,
-    nv12_texture: &ID3D11Texture2D,
-    width: u32,
-    height: u32,
-) -> Result<YuvPlaneTextures, String> {
-    use windows::Win32::Graphics::Dxgi::Common::{DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM};
-
-    let y_desc = D3D11_TEXTURE2D_DESC {
-        Width: width,
-        Height: height,
-        MipLevels: 1,
-        ArraySize: 1,
-        Format: DXGI_FORMAT_R8_UNORM,
-        SampleDesc: DXGI_SAMPLE_DESC {
-            Count: 1,
-            Quality: 0,
-        },
-        Usage: D3D11_USAGE_DEFAULT,
-        BindFlags: D3D11_BIND_SHADER_RESOURCE.0 as u32,
-        CPUAccessFlags: 0,
-        MiscFlags: 0,
-    };
-
-    let mut y_texture: Option<ID3D11Texture2D> = None;
-    unsafe {
-        device
-            .CreateTexture2D(&y_desc, None, Some(&mut y_texture))
-            .map_err(|e| format!("CreateTexture2D Y failed: {e:?}"))?;
-    }
-    let y_texture = y_texture.ok_or("CreateTexture2D Y returned null")?;
-
-    let uv_desc = D3D11_TEXTURE2D_DESC {
-        Width: width / 2,
-        Height: height / 2,
-        MipLevels: 1,
-        ArraySize: 1,
-        Format: DXGI_FORMAT_R8G8_UNORM,
-        SampleDesc: DXGI_SAMPLE_DESC {
-            Count: 1,
-            Quality: 0,
-        },
-        Usage: D3D11_USAGE_DEFAULT,
-        BindFlags: D3D11_BIND_SHADER_RESOURCE.0 as u32,
-        CPUAccessFlags: 0,
-        MiscFlags: 0,
-    };
-
-    let mut uv_texture: Option<ID3D11Texture2D> = None;
-    unsafe {
-        device
-            .CreateTexture2D(&uv_desc, None, Some(&mut uv_texture))
-            .map_err(|e| format!("CreateTexture2D UV failed: {e:?}"))?;
-    }
-    let uv_texture = uv_texture.ok_or("CreateTexture2D UV returned null")?;
-
-    unsafe {
-        context.CopySubresourceRegion(
-            &y_texture,
-            0,
-            0,
-            0,
-            0,
-            nv12_texture,
-            0,
-            Some(&windows::Win32::Graphics::Direct3D11::D3D11_BOX {
-                left: 0,
-                top: 0,
-                front: 0,
-                right: width,
-                bottom: height,
-                back: 1,
-            }),
-        );
-
-        context.CopySubresourceRegion(
-            &uv_texture,
-            0,
-            0,
-            0,
-            0,
-            nv12_texture,
-            1,
-            Some(&windows::Win32::Graphics::Direct3D11::D3D11_BOX {
-                left: 0,
-                top: 0,
-                front: 0,
-                right: width / 2,
-                bottom: height / 2,
-                back: 1,
-            }),
-        );
-    }
-
-    Ok(YuvPlaneTextures {
-        y_texture,
-        y_handle: None,
-        uv_texture,
-        uv_handle: None,
-    })
-}
-
-unsafe fn copy_texture_subresource(
-    device: &ID3D11Device,
-    context: &ID3D11DeviceContext,
-    source: &ID3D11Texture2D,
-    subresource_index: u32,
-    width: u32,
-    height: u32,
-) -> Result<(ID3D11Texture2D, Option<HANDLE>), String> {
-    let desc = D3D11_TEXTURE2D_DESC {
-        Width: width,
-        Height: height,
-        MipLevels: 1,
-        ArraySize: 1,
-        Format: DXGI_FORMAT_NV12,
-        SampleDesc: DXGI_SAMPLE_DESC {
-            Count: 1,
-            Quality: 0,
-        },
-        Usage: D3D11_USAGE_DEFAULT,
-        BindFlags: D3D11_BIND_SHADER_RESOURCE.0 as u32,
-        CPUAccessFlags: 0,
-        MiscFlags: 0,
-    };
-
-    let mut output_texture: Option<ID3D11Texture2D> = None;
-    unsafe {
-        device
-            .CreateTexture2D(&desc, None, Some(&mut output_texture))
-            .map_err(|e| format!("CreateTexture2D failed: {e:?}"))?;
-    }
-
-    let output_texture = output_texture.ok_or("CreateTexture2D returned null")?;
-
-    unsafe {
-        context.CopySubresourceRegion(&output_texture, 0, 0, 0, 0, source, subresource_index, None);
-    }
-
-    Ok((output_texture, None))
-}
-
 unsafe impl Send for MediaFoundationDecoder {}