diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.cpp
index f9c55834..49203d3e 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.cpp
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.cpp
@@ -386,10 +386,20 @@ void CompositorInterface::StopRecording()
     activeVideoEncoder = nullptr;
 }
 
-void CompositorInterface::RecordFrameAsync(BYTE* videoFrame, LONGLONG frameTime, int numFrames)
+std::unique_ptr<VideoEncoder::VideoInput> CompositorInterface::GetAvailableRecordFrame()
+{
+    if (activeVideoEncoder == nullptr)
+    {
+        OutputDebugString(L"GetAvailableRecordFrame dropped, no active encoder\n");
+        return nullptr;
+    }
+    return activeVideoEncoder->GetAvailableVideoFrame();
+}
+
+void CompositorInterface::RecordFrameAsync(std::unique_ptr<VideoEncoder::VideoInput> frame, int numFrames)
 {
 #if _DEBUG
-	std::wstring debugString = L"RecordFrameAsync called, frameTime:" + std::to_wstring(frameTime) + L", numFrames:" + std::to_wstring(numFrames) + L"\n";
+	std::wstring debugString = L"RecordFrameAsync called, frameTime:" + std::to_wstring(frame->timestamp) + L", numFrames:" + std::to_wstring(numFrames) + L"\n";
 	OutputDebugString(debugString.data());
 #endif
 
@@ -407,8 +417,8 @@ void CompositorInterface::RecordFrameAsync(BYTE* videoFrame, LONGLONG frameTime,
 	// The encoder will update sample times internally based on the first seen sample time when recording.
 	// The encoder, however, does assume that audio and video samples will be based on the same source time.
 	// Providing audio and video samples with different starting times will cause issues in the generated video file.
-	LONGLONG sampleTime = frameTime;
-    activeVideoEncoder->QueueVideoFrame(videoFrame, sampleTime, numFrames * frameProvider->GetDurationHNS());
+    frame->duration = numFrames * frameProvider->GetDurationHNS();
+    activeVideoEncoder->QueueVideoFrame(std::move(frame));
 }
 
 void CompositorInterface::RecordAudioFrameAsync(BYTE* audioFrame, LONGLONG audioTime, int audioSize)
@@ -430,8 +440,9 @@ void CompositorInterface::RecordAudioFrameAsync(BYTE* audioFrame, LONGLONG audio
 	// The encoder will update sample times internally based on the first seen sample time when recording.
 	// The encoder, however, does assume that audio and video samples will be based on the same source time.
 	// Providing audio and video samples with different starting times will cause issues in the generated video file.
-	LONGLONG sampleTime = audioTime;
-    activeVideoEncoder->QueueAudioFrame(audioFrame, audioSize, sampleTime);
+    auto frame = activeVideoEncoder->GetAvailableAudioFrame();
+    frame->SetData(audioFrame, audioSize, audioTime);
+    activeVideoEncoder->QueueAudioFrame(std::move(frame));
 }
 
 bool CompositorInterface::ProvidesYUV()
diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.h b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.h
index e3e155f2..b349b126 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.h
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.h
@@ -80,7 +80,8 @@ class CompositorInterface
     DLLEXPORT void StopRecording();
     
 	// frameTime is in hundred nano seconds
-	DLLEXPORT void RecordFrameAsync(BYTE* videoFrame, LONGLONG frameTime, int numFrames);
+    DLLEXPORT std::unique_ptr<VideoEncoder::VideoInput> GetAvailableRecordFrame();
+	DLLEXPORT void RecordFrameAsync(std::unique_ptr<VideoEncoder::VideoInput>, int numFrames);
 
 	// audioTime is in hundrend nano seconds
     DLLEXPORT void RecordAudioFrameAsync(BYTE* audioFrame, LONGLONG audioTime, int audioSize);
diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
index 5f9dcf69..a44e91a8 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
@@ -6,6 +6,8 @@
 
 #include "codecapi.h"
 
+#define NUM_VIDEO_BUFFERS 10
+
 VideoEncoder::VideoEncoder(UINT frameWidth, UINT frameHeight, UINT frameStride, UINT fps,
     UINT32 audioSampleRate, UINT32 audioChannels, UINT32 audioBPS, UINT32 videoBitrate, UINT32 videoMpegLevel) :
     frameWidth(frameWidth),
@@ -21,10 +23,11 @@ VideoEncoder::VideoEncoder(UINT frameWidth, UINT frameHeight, UINT frameStride,
     isRecording(false)
 {
 #if HARDWARE_ENCODE_VIDEO
-  inputFormat = MFVideoFormat_NV12;
+    inputFormat = MFVideoFormat_NV12;
 #else
-  inputFormat = MFVideoFormat_RGB32;
+    inputFormat = MFVideoFormat_RGB32;
 #endif
+    inputFormat = MFVideoFormat_RGB32;
 }
 
 VideoEncoder::~VideoEncoder()
@@ -41,11 +44,26 @@ bool VideoEncoder::Initialize(ID3D11Device* device)
 
 #if HARDWARE_ENCODE_VIDEO
     MFCreateDXGIDeviceManager(&resetToken, &deviceManager);
+    this->device = device;
 
     if (deviceManager != nullptr)
     {
         OutputDebugString(L"Resetting device manager with graphics device.\n");
-        deviceManager->ResetDevice(device, resetToken);
+        hr = deviceManager->ResetDevice(device, resetToken);
+    }
+    for (int i = 0; i < NUM_VIDEO_BUFFERS; i++)
+    {
+        videoInputPool.push(std::make_unique<VideoInput>(device));
+    }
+
+    ID3D10Multithread* multithread;
+    device->QueryInterface(&multithread);
+    multithread->SetMultithreadProtected(TRUE);
+
+#else
+    for (int i = 0; i < NUM_VIDEO_BUFFERS; i++)
+    {
+        videoInputPool.push(std::make_unique<VideoInput>(frameHeight * frameStride));
     }
 #endif
 
@@ -72,7 +90,7 @@ void VideoEncoder::StartRecording(LPCWSTR videoPath, bool encodeAudio)
     prevVideoTime = INVALID_TIMESTAMP;
     prevAudioTime = INVALID_TIMESTAMP;
 
-    HRESULT hr = E_PENDING;
+    HRESULT hr = S_OK;
 
     sinkWriter = NULL;
     videoStreamIndex = MAXDWORD;
@@ -87,13 +105,14 @@ void VideoEncoder::StartRecording(LPCWSTR videoPath, bool encodeAudio)
 #endif
 
     IMFAttributes *attr = nullptr;
-    MFCreateAttributes(&attr, 3);
+    MFCreateAttributes(&attr, 4);
 
     if (SUCCEEDED(hr)) { hr = attr->SetUINT32(MF_SINK_WRITER_DISABLE_THROTTLING, TRUE); }
 
 #if HARDWARE_ENCODE_VIDEO
     if (SUCCEEDED(hr)) { hr = attr->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, true); }
     if (SUCCEEDED(hr)) { hr = attr->SetUINT32(MF_READWRITE_DISABLE_CONVERTERS, false); }
+    if (SUCCEEDED(hr)) { hr = attr->SetUnknown(MF_SINK_WRITER_D3D_MANAGER, deviceManager); }
 #endif
 
     hr = MFCreateSinkWriterFromURL(videoPath, NULL, attr, &sinkWriter);
@@ -138,6 +157,10 @@ void VideoEncoder::StartRecording(LPCWSTR videoPath, bool encodeAudio)
     if (SUCCEEDED(hr)) { hr = MFSetAttributeSize(pVideoTypeIn, MF_MT_FRAME_SIZE, frameWidth, frameHeight); }
     if (SUCCEEDED(hr)) { hr = MFSetAttributeRatio(pVideoTypeIn, MF_MT_FRAME_RATE, fps, 1); }
     if (SUCCEEDED(hr)) { hr = MFSetAttributeRatio(pVideoTypeIn, MF_MT_PIXEL_ASPECT_RATIO, 1, 1); }
+    if (SUCCEEDED(hr)) { hr = pVideoTypeIn->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE); }
+    if (SUCCEEDED(hr)) { hr = pVideoTypeIn->SetUINT32(MF_MT_DEFAULT_STRIDE, frameStride); }
+    if (SUCCEEDED(hr)) { hr = pVideoTypeIn->SetUINT32(MF_MT_FIXED_SIZE_SAMPLES, TRUE); }
+    if (SUCCEEDED(hr)) { hr = pVideoTypeIn->SetUINT32(MF_MT_SAMPLE_SIZE, frameStride * frameHeight); }
     if (SUCCEEDED(hr)) { hr = sinkWriter->SetInputMediaType(videoStreamIndex, pVideoTypeIn, NULL); }
 
     if (encodeAudio)
@@ -173,12 +196,12 @@ void VideoEncoder::StartRecording(LPCWSTR videoPath, bool encodeAudio)
 #endif
 }
 
-void VideoEncoder::WriteAudio(byte* buffer, int bufferSize, LONGLONG timestamp)
+void VideoEncoder::WriteAudio(std::unique_ptr<AudioInput> frame)
 {
     std::shared_lock<std::shared_mutex> lock(videoStateLock);
 #if _DEBUG
 	{
-		std::wstring debugString = L"Writing Audio, Timestamp:" + std::to_wstring(timestamp) + L"\n";
+		std::wstring debugString = L"Writing Audio, Timestamp:" + std::to_wstring(frame->timestamp) + L"\n";
 		OutputDebugString(debugString.data());
 	}
 #endif
@@ -186,33 +209,33 @@ void VideoEncoder::WriteAudio(byte* buffer, int bufferSize, LONGLONG timestamp)
 #if ENCODE_AUDIO
     if (!isRecording)
     {
-		std::wstring debugString = L"WriteAudio call failed: StartTime:" + std::to_wstring(startTime) + L", Timestamp:" + std::to_wstring(timestamp) + L"\n";
+		std::wstring debugString = L"WriteAudio call failed: StartTime:" + std::to_wstring(startTime) + L", Timestamp:" + std::to_wstring(frame->timestamp) + L"\n";
 		OutputDebugString(debugString.data());
         return;
     }
 	else if (startTime == INVALID_TIMESTAMP)
 	{
-		startTime = timestamp;
+		startTime = frame->timestamp;
 #if _DEBUG 
-		std::wstring debugString = L"Start time set from audio, Timestamp:" + std::to_wstring(timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
+		std::wstring debugString = L"Start time set from audio, Timestamp:" + std::to_wstring(frame->timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
 		OutputDebugString(debugString.data());
 #endif
 	}
-	else if (timestamp < startTime)
+	else if (frame->timestamp < startTime)
 	{
 #if _DEBUG 
-		std::wstring debugString = L"Audio not recorded, Timestamp less than start time. Timestamp:" + std::to_wstring(timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
+		std::wstring debugString = L"Audio not recorded, Timestamp less than start time. Timestamp:" + std::to_wstring(frame->timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
 		OutputDebugString(debugString.data());
 #endif
 		return;
 	}
 
-    LONGLONG sampleTimeNow = timestamp;
+    LONGLONG sampleTimeNow = frame->timestamp;
     LONGLONG sampleTimeStart = startTime;
 
     LONGLONG sampleTime = sampleTimeNow - sampleTimeStart;
 
-    LONGLONG duration = ((LONGLONG)((((float)AUDIO_SAMPLE_RATE * (16.0f /*bits per sample*/ / 8.0f /*bits per byte*/)) / (float)bufferSize) * 10000));
+    LONGLONG duration = ((LONGLONG)((((float)AUDIO_SAMPLE_RATE * (16.0f /*bits per sample*/ / 8.0f /*bits per byte*/)) / (float)frame->currentSize) * 10000));
     if (prevAudioTime != INVALID_TIMESTAMP)
     {
         duration = sampleTime - prevAudioTime;
@@ -222,74 +245,61 @@ void VideoEncoder::WriteAudio(byte* buffer, int bufferSize, LONGLONG timestamp)
 #endif
     }
 
-    // Copy frame to a temporary buffer and process on a background thread.
-    byte* tmpAudioBuffer = new byte[bufferSize];
-    memcpy(tmpAudioBuffer, buffer, bufferSize);
-
-    concurrency::create_task([=]()
+    audioWriteFuture = std::async(std::launch::async, [=, frame{ std::move(frame) }, previousWriteFuture{ std::move(audioWriteFuture) }]() mutable
     {
+        if (previousWriteFuture.valid())
+        {
+            previousWriteFuture.wait();
+            previousWriteFuture = {};
+        }
         std::shared_lock<std::shared_mutex> lock(videoStateLock);
 
-        HRESULT hr = E_PENDING;
         if (sinkWriter == NULL || !isRecording)
         {
             OutputDebugString(L"Must start recording before writing audio frames.\n");
-            delete[] tmpAudioBuffer;
             return;
         }
 
         IMFSample* pAudioSample = NULL;
-        IMFMediaBuffer* pAudioBuffer = NULL;
-
-        const DWORD cbAudioBuffer = bufferSize;
-
-        BYTE* pData = NULL;
-
-        hr = MFCreateMemoryBuffer(cbAudioBuffer, &pAudioBuffer);
-        if (SUCCEEDED(hr)) { hr = pAudioBuffer->Lock(&pData, NULL, NULL); }
-        memcpy(pData, tmpAudioBuffer, cbAudioBuffer);
-        if (pAudioBuffer)
-        {
-            pAudioBuffer->Unlock();
-        }
-
 
 #if _DEBUG
 		{
-			std::wstring debugString = L"Writing Audio Sample, SampleTime:" + std::to_wstring(sampleTime) + L", SampleDuration:" + std::to_wstring(duration) + L", BufferLength:" + std::to_wstring(cbAudioBuffer) + L"\n";
+			std::wstring debugString = L"Writing Audio Sample, SampleTime:" + std::to_wstring(sampleTime) + L", SampleDuration:" + std::to_wstring(duration) + L", BufferLength:" + std::to_wstring(frame->currentSize) + L"\n";
 			OutputDebugString(debugString.data());
 		}
 #endif
 
+        HRESULT hr = S_OK;
         if (SUCCEEDED(hr)) { hr = MFCreateSample(&pAudioSample); }
         if (SUCCEEDED(hr)) { hr = pAudioSample->SetSampleTime(sampleTime); }
         if (SUCCEEDED(hr)) { hr = pAudioSample->SetSampleDuration(duration); }
-        if (SUCCEEDED(hr)) { hr = pAudioBuffer->SetCurrentLength(cbAudioBuffer); }
-        if (SUCCEEDED(hr)) { hr = pAudioSample->AddBuffer(pAudioBuffer); }
+        if (SUCCEEDED(hr)) { hr = pAudioSample->AddBuffer(frame->mediaBuffer); }
 
         if (SUCCEEDED(hr)) { hr = sinkWriter->WriteSample(audioStreamIndex, pAudioSample); }
 
         SafeRelease(pAudioSample);
-        SafeRelease(pAudioBuffer);
 
         if (FAILED(hr))
         {
             OutputDebugString(L"Error writing audio frame.\n");
         }
 
-        delete[] tmpAudioBuffer;
+        {
+            std::shared_lock<std::shared_mutex> lock(audioInputPoolLock);
+            audioInputPool.push(std::move(frame));
+        }
     });
 
     prevAudioTime = sampleTime;
 #endif
 }
 
-void VideoEncoder::WriteVideo(byte* buffer, LONGLONG timestamp, LONGLONG duration)
+void VideoEncoder::WriteVideo(std::unique_ptr<VideoEncoder::VideoInput> frame)
 {
     std::shared_lock<std::shared_mutex> lock(videoStateLock);
 #if _DEBUG
 	{
-		std::wstring debugString = L"Writing Video, Timestamp:" + std::to_wstring(timestamp) + L"\n";
+		std::wstring debugString = L"Writing Video, Timestamp:" + std::to_wstring(frame->timestamp) + L"\n";
 		OutputDebugString(debugString.data());
 	}
 #endif
@@ -302,126 +312,93 @@ void VideoEncoder::WriteVideo(byte* buffer, LONGLONG timestamp, LONGLONG duratio
 
 	if (startTime == INVALID_TIMESTAMP)
 	{
-		startTime = timestamp;
+		startTime = frame->timestamp;
 #if _DEBUG 
-		std::wstring debugString = L"Start time set from video, Timestamp:" + std::to_wstring(timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
+		std::wstring debugString = L"Start time set from video, Timestamp:" + std::to_wstring(frame->timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
 		OutputDebugString(debugString.data());
 #endif
 	}
-    else if (timestamp < startTime)
+    else if (frame->timestamp < startTime)
     {
 #if _DEBUG 
-		std::wstring debugString = L"Video not recorded, Timestamp less than start time. Timestamp:" + std::to_wstring(timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
+		std::wstring debugString = L"Video not recorded, Timestamp less than start time. Timestamp:" + std::to_wstring(frame->timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
 		OutputDebugString(debugString.data());
 #endif
         return;
     }
 
-    if (timestamp == prevVideoTime)
+    if (frame->timestamp == prevVideoTime)
     {
 #if _DEBUG 
-		std::wstring debugString = L"Video not recorded, Timestamp equals prevVideoTime. Timestamp:" + std::to_wstring(timestamp) + L", StartTime:" + std::to_wstring(prevVideoTime) + L"\n";
+		std::wstring debugString = L"Video not recorded, Timestamp equals prevVideoTime. Timestamp:" + std::to_wstring(frame->timestamp) + L", StartTime:" + std::to_wstring(prevVideoTime) + L"\n";
 		OutputDebugString(debugString.data());
 #endif
         return;
     }
     
-    LONGLONG sampleTimeNow = timestamp;
+    LONGLONG sampleTimeNow = frame->timestamp;
     LONGLONG sampleTimeStart = startTime;
 
     LONGLONG sampleTime = sampleTimeNow - sampleTimeStart;
 
     if (prevVideoTime != INVALID_TIMESTAMP)
     {
-        duration = sampleTime - prevVideoTime;
+        frame->duration = sampleTime - prevVideoTime;
 #if _DEBUG 
-		std::wstring debugString = L"Updated write video duration:" + std::to_wstring(duration) + L", SampleTime:" + std::to_wstring(sampleTime) + L", PrevVideoTime:" + std::to_wstring(prevVideoTime) + L"\n";
+		std::wstring debugString = L"Updated write video duration:" + std::to_wstring(frame->duration) + L", SampleTime:" + std::to_wstring(sampleTime) + L", PrevVideoTime:" + std::to_wstring(prevVideoTime) + L"\n";
 		OutputDebugString(debugString.data());
 #endif
     }
 
-    // Copy frame to a temporary buffer and process on a background thread.
-#if HARDWARE_ENCODE_VIDEO
-    BYTE* tmpVideoBuffer = new BYTE[(int)(FRAME_BPP_NV12 * frameHeight * frameWidth)];
-    memcpy(tmpVideoBuffer, buffer, (int)(FRAME_BPP_NV12 * frameHeight * frameWidth));
-#else
-    BYTE* tmpVideoBuffer = new BYTE[frameHeight * frameStride];
-    memcpy(tmpVideoBuffer, buffer, frameHeight * frameStride);
-#endif
-
-    concurrency::create_task([=]()
+    videoWriteFuture = std::async(std::launch::async, [=, frame{ std::move(frame) }, previousWriteFuture{ std::move(videoWriteFuture) }]() mutable
     {
+        if (previousWriteFuture.valid())
+        {
+            previousWriteFuture.wait();
+            previousWriteFuture = {};
+        }
         std::shared_lock<std::shared_mutex> lock(videoStateLock);
 
-        HRESULT hr = E_PENDING;
+        HRESULT hr = S_OK;
         if (sinkWriter == NULL || !isRecording)
         {
             OutputDebugString(L"Must start recording before writing video frames.\n");
-            delete[] tmpVideoBuffer;
             return;
         }
 
-        LONG cbWidth = frameStride;
-        DWORD cbBuffer = cbWidth * frameHeight;
-        DWORD imageHeight = frameHeight;
-
-#if HARDWARE_ENCODE_VIDEO
-        cbWidth = frameWidth;
-        cbBuffer = (int)(FRAME_BPP_NV12 * frameWidth * frameHeight);
-        imageHeight = (int)(FRAME_BPP_NV12 * frameHeight);
-#endif
-
+        DWORD cbBuffer = frameStride * frameHeight;
         IMFSample* pVideoSample = NULL;
-        IMFMediaBuffer* pVideoBuffer = NULL;
-        BYTE* pData = NULL;
-
-        // Create a new memory buffer.
-        hr = MFCreateMemoryBuffer(cbBuffer, &pVideoBuffer);
-
-        // Lock the buffer and copy the video frame to the buffer.
-        if (SUCCEEDED(hr)) { hr = pVideoBuffer->Lock(&pData, NULL, NULL); }
-
-        if (SUCCEEDED(hr))
-        {
-            //TODO: Can pVideoBuffer be created from an ID3D11Texture2D*?
-            hr = MFCopyImage(
-                pData,                      // Destination buffer.
-                cbWidth,                    // Destination stride.
-                tmpVideoBuffer,
-                cbWidth,                    // Source stride.
-                cbWidth,                    // Image width in bytes.
-                imageHeight                 // Image height in pixels.
-            );
-        }
-
-        if (pVideoBuffer)
+#if _DEBUG
         {
-            pVideoBuffer->Unlock();
+            std::wstring debugString = L"Writing Video Sample, SampleTime:" + std::to_wstring(sampleTime) + L", SampleDuration:" + std::to_wstring(frame->duration) + L", BufferLength:" + std::to_wstring(cbBuffer) + L"\n";
+            OutputDebugString(debugString.data());
         }
+#endif
 
-#if _DEBUG
-		{
-			std::wstring debugString = L"Writing Video Sample, SampleTime:" + std::to_wstring(sampleTime) + L", SampleDuration:" + std::to_wstring(duration) + L", BufferLength:" + std::to_wstring(cbBuffer) + L"\n";
-			OutputDebugString(debugString.data());
-		}
+#if !HARDWARE_ENCODE_VIDEO
+        // In case the user locks the frame but forgets to unlock
+        frame->Unlock();
 #endif
 
         // Set the data length of the buffer.
-        if (SUCCEEDED(hr)) { hr = pVideoBuffer->SetCurrentLength(cbBuffer); }
+        if (SUCCEEDED(hr)) { hr = frame->mediaBuffer->SetCurrentLength(frameHeight * frameStride); }
 
         // Create a media sample and add the buffer to the sample.
         if (SUCCEEDED(hr)) { hr = MFCreateSample(&pVideoSample); }
-        if (SUCCEEDED(hr)) { hr = pVideoSample->AddBuffer(pVideoBuffer); }
+        if (SUCCEEDED(hr)) { hr = pVideoSample->AddBuffer(frame->mediaBuffer); }
 
         if (SUCCEEDED(hr)) { hr = pVideoSample->SetSampleTime(sampleTime); } //100-nanosecond units
-        if (SUCCEEDED(hr)) { hr = pVideoSample->SetSampleDuration(duration); } //100-nanosecond units
+        if (SUCCEEDED(hr)) { hr = pVideoSample->SetSampleDuration(frame->duration); } //100-nanosecond units
 
         // Send the sample to the Sink Writer.
         if (SUCCEEDED(hr)) { hr = sinkWriter->WriteSample(videoStreamIndex, pVideoSample); }
 
         SafeRelease(pVideoSample);
-        SafeRelease(pVideoBuffer);
-        delete[] tmpVideoBuffer;
+
+        {
+            std::shared_lock<std::shared_mutex>(videoInputPoolLock);
+            videoInputPool.push(std::move(frame));
+        }
 
         if (FAILED(hr))
         {
@@ -456,6 +433,11 @@ void VideoEncoder::StopRecording()
 
     concurrency::create_task([&]
     {
+        if (videoWriteFuture.valid())
+        {
+            videoWriteFuture.wait();
+            videoWriteFuture = {};
+        }
         while (!videoQueue.empty())
         {
 			videoQueue.pop();
@@ -473,6 +455,11 @@ void VideoEncoder::StopRecording()
 
     concurrency::create_task([&]
     {
+        if (audioWriteFuture.valid())
+        {
+            audioWriteFuture.wait();
+            audioWriteFuture = {};
+        }
         while (!audioQueue.empty())
         {
             audioQueue.pop();
@@ -508,31 +495,65 @@ void VideoEncoder::StopRecording()
     SafeRelease(sinkWriter);
 }
 
-void VideoEncoder::QueueVideoFrame(byte* buffer, LONGLONG timestamp, LONGLONG duration)
+std::unique_ptr<VideoEncoder::VideoInput> VideoEncoder::GetAvailableVideoFrame()
+{
+    std::shared_lock<std::shared_mutex> lock(videoInputPoolLock);
+    if (videoInputPool.empty())
+    {
+#if HARDWARE_ENCODE_VIDEO
+        return std::make_unique<VideoInput>(device);
+#else
+        return std::make_unique<VideoInput>(frameStride * frameHeight);
+#endif
+    }
+    else
+    {
+        auto result = std::move(videoInputPool.front());
+        videoInputPool.pop();
+        return result;
+    }
+}
+
+std::unique_ptr<VideoEncoder::AudioInput> VideoEncoder::GetAvailableAudioFrame()
+{
+    std::shared_lock<std::shared_mutex> lock(audioInputPoolLock);
+    if (audioInputPool.empty())
+    {
+        return std::make_unique<AudioInput>();
+    }
+    else
+    {
+        auto result = std::move(audioInputPool.front());
+        audioInputPool.pop();
+        return result;
+    }
+}
+
+void VideoEncoder::QueueVideoFrame(std::unique_ptr<VideoEncoder::VideoInput> frame)
 {
     std::shared_lock<std::shared_mutex> lock(videoStateLock);
 
     if (acceptQueuedFrames)
     {
-        videoQueue.push(VideoInput(buffer, timestamp, duration));
 #if _DEBUG
-		std::wstring debugString = L"Pushed Video Input, Timestamp:" + std::to_wstring(timestamp) + L"\n";
-		OutputDebugString(debugString.data());
+        std::wstring debugString = L"Pushed Video Input, Timestamp:" + std::to_wstring(frame->timestamp) + L"\n";
+        OutputDebugString(debugString.data());
 #endif
+        videoQueue.push(std::move(frame));
     }
 }
 
-void VideoEncoder::QueueAudioFrame(byte* buffer, int bufferSize, LONGLONG timestamp)
+void VideoEncoder::QueueAudioFrame(std::unique_ptr<VideoEncoder::AudioInput> frame)
 {
     std::shared_lock<std::shared_mutex> lock(videoStateLock);
 
     if (acceptQueuedFrames)
     {
-        audioQueue.push(AudioInput(buffer, bufferSize, timestamp));
 #if _DEBUG
-		std::wstring debugString = L"Pushed Audio Input, Timestamp:" + std::to_wstring(timestamp) + L"\n";
+		std::wstring debugString = L"Pushed Audio Input, Timestamp:" + std::to_wstring(frame->timestamp) + L"\n";
 		OutputDebugString(debugString.data());
 #endif
+        audioQueue.push(std::move(frame));
     }
 }
 
@@ -548,8 +569,7 @@ void VideoEncoder::Update()
     {
         if (isRecording)
         {
-            VideoInput input = videoQueue.front();
-            WriteVideo(input.sharedBuffer, input.timestamp, input.duration);
+            WriteVideo(std::move(videoQueue.front()));
             videoQueue.pop();
         }
     }
@@ -558,9 +578,7 @@ void VideoEncoder::Update()
     {
         if (isRecording)
         {
-            AudioInput input = audioQueue.front();
-            WriteAudio(input.buffer, input.bufferSize, input.timestamp);
-            delete[] input.buffer;
+            WriteAudio(std::move(audioQueue.front()));
             audioQueue.pop();
         }
     }
diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
index 35030056..76598fc9 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
@@ -38,50 +38,155 @@ class VideoEncoder
     void StopRecording();
 
     // Used for recording video from a background thread.
-    void QueueVideoFrame(byte* buffer, LONGLONG timestamp, LONGLONG duration);
-    void QueueAudioFrame(byte* buffer, int bufferSize, LONGLONG timestamp);
+    class VideoInput;
+    class AudioInput;
+    std::unique_ptr<VideoInput> GetAvailableVideoFrame();
+    std::unique_ptr<AudioInput> GetAvailableAudioFrame();
+    void QueueVideoFrame(std::unique_ptr<VideoInput> frame);
+    void QueueAudioFrame(std::unique_ptr<AudioInput> frame);
 
     // Do not call this from a background thread.
     void Update();
 
-private:
-    void WriteVideo(byte* buffer, LONGLONG timestamp, LONGLONG duration);
-    void WriteAudio(byte* buffer, int bufferSize, LONGLONG timestamp);
+    class VideoInputBase
+    {
+    public:
+        IMFMediaBuffer* mediaBuffer = nullptr;
+        LONGLONG timestamp = INVALID_TIMESTAMP;
+        LONGLONG duration = INVALID_TIMESTAMP;
+    };
 
-    LARGE_INTEGER freq;
+#ifdef HARDWARE_ENCODE_VIDEO
+    class VideoInput : public VideoInputBase
+    {
+        ID3D11Device* device;
+        ID3D11DeviceContext* deviceContext;
+        ID3D11Texture2D* texture = nullptr;
+    public:
+        VideoInput(ID3D11Device* _device) : device(_device)
+        {
+            device->AddRef();
+            device->GetImmediateContext(&deviceContext);
+        }
+
+        ~VideoInput()
+        {
+            SafeRelease(texture);
+            SafeRelease(deviceContext);
+            SafeRelease(device);
+            SafeRelease(mediaBuffer);
+        }
 
-    class VideoInput
+        void CopyFrom(ID3D11Texture2D* source)
+        {
+            if (texture == nullptr)
+            {
+                D3D11_TEXTURE2D_DESC existingDesc;
+                source->GetDesc(&existingDesc);
+
+                D3D11_TEXTURE2D_DESC textureDesc;
+                ZeroMemory(&textureDesc, sizeof(textureDesc));
+                textureDesc.Width = existingDesc.Width;
+                textureDesc.Height = existingDesc.Height;
+                textureDesc.MipLevels = existingDesc.MipLevels;
+                textureDesc.ArraySize = existingDesc.ArraySize;
+                textureDesc.Format = existingDesc.Format;
+                textureDesc.SampleDesc.Count = existingDesc.SampleDesc.Count;
+                textureDesc.SampleDesc.Quality = existingDesc.SampleDesc.Quality;
+                textureDesc.Usage = D3D11_USAGE_DEFAULT;
+
+                HRESULT hr = device->CreateTexture2D(&textureDesc, NULL, &texture);
+                if (SUCCEEDED(hr)) MFCreateDXGISurfaceBuffer(IID_ID3D11Texture2D, texture, 0, true, &mediaBuffer);
+                if (FAILED(hr))
+                {
+                    OutputDebugString(L"Creating video frame failed");
+                }
+            }
+
+            deviceContext->CopyResource(texture, source);
+        }
+    };
+#else
+    class VideoInput : public VideoInputBase
     {
+        byte* buffer = nullptr;
     public:
-        byte * sharedBuffer;
+        VideoInput(size_t bufferSize)
+        {
+            auto hr = MFCreateMemoryBuffer(bufferSize, &mediaBuffer);
+        }
 
-        LONGLONG timestamp;
-        LONGLONG duration;
+        ~VideoInput()
+        {
+            Unlock();
+            SafeRelease(mediaBuffer);
+        }
 
-        VideoInput(byte* buffer, LONGLONG timestamp, LONGLONG duration)
+        byte* Lock()
         {
-            this->sharedBuffer = buffer;
-            this->timestamp = timestamp;
-            this->duration = duration;
+            if (buffer == nullptr)
+            {
+                mediaBuffer->Lock(&buffer, NULL, NULL);
+            }
+            return buffer;
+        }
+
+        void Unlock()
+        {
+            if (buffer != nullptr)
+            {
+                mediaBuffer->Unlock();
+                buffer = nullptr;
+            }
         }
     };
+#endif
 
     class AudioInput
     {
     public:
-        byte* buffer;
+        IMFMediaBuffer* mediaBuffer = nullptr;
+        int capacity = 0;
+        int currentSize = 0;
         LONGLONG timestamp;
-        int bufferSize;
 
-        AudioInput(byte* buffer, int buffSize, LONGLONG timestamp)
+        ~AudioInput()
+        {
+            SafeRelease(mediaBuffer);
+        }
+
+        void SetData(const byte* buffer, int bufferSize, LONGLONG timestamp)
         {
-            bufferSize = buffSize;
-            this->buffer = new byte[buffSize];
-            memcpy(this->buffer, buffer, buffSize);
+            if (bufferSize > capacity)
+            {
+                SafeRelease(mediaBuffer);
+                auto hr = MFCreateMemoryBuffer(bufferSize, &mediaBuffer);
+                if (FAILED(hr))
+                {
+                    OutputDebugString(L"Failed to create audio memory buffer");
+                }
+                capacity = bufferSize;
+            }
+            
+            byte* lockedBuffer;
+            if (FAILED(mediaBuffer->Lock(&lockedBuffer, nullptr, nullptr)))
+            {
+                return;
+            }
+            memcpy(lockedBuffer, buffer, bufferSize);
+            mediaBuffer->Unlock();
+            mediaBuffer->SetCurrentLength(bufferSize);
+            currentSize = bufferSize;
             this->timestamp = timestamp;
         }
     };
 
+private:
+    void WriteVideo(std::unique_ptr<VideoInput> frame);
+    void WriteAudio(std::unique_ptr<AudioInput> frame);
+
+    LARGE_INTEGER freq;
+
     IMFSinkWriter* sinkWriter;
     DWORD videoStreamIndex = MAXDWORD;
     DWORD audioStreamIndex = MAXDWORD;
@@ -108,12 +213,19 @@ class VideoEncoder
 
     LONGLONG startTime = INVALID_TIMESTAMP;
 
-    std::queue<VideoInput> videoQueue;
-    std::queue<AudioInput> audioQueue;
+    std::queue<std::unique_ptr<VideoInput>> videoInputPool;
+    std::queue<std::unique_ptr<VideoInput>> videoQueue;
+    std::queue<std::unique_ptr<AudioInput>> audioInputPool;
+    std::queue<std::unique_ptr<AudioInput>> audioQueue;
 
     std::shared_mutex videoStateLock;
+    std::shared_mutex videoInputPoolLock;
+    std::shared_mutex audioInputPoolLock;
+    std::future<void> videoWriteFuture;
+    std::future<void> audioWriteFuture;
 
 #if HARDWARE_ENCODE_VIDEO
+    ID3D11Device* device;
     IMFDXGIDeviceManager* deviceManager = NULL;
     UINT resetToken = 0;
 #endif
diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp
index fb6433c6..7126a03b 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp
@@ -26,56 +26,6 @@ static BYTE* depthBytes = new BYTE[FRAME_BUFSIZE_DEPTH16];
 static BYTE* bodyMaskBytes = new BYTE[FRAME_BUFSIZE_DEPTH16];
 static BYTE* holoBytes = new BYTE[FRAME_BUFSIZE_RGBA];
 
-#define NUM_VIDEO_BUFFERS 10
-
-static byte** videoBytes = nullptr;
-static int videoBufferIndex = 0;
-
-void AllocateVideoBuffers(VideoRecordingFrameLayout frameLayout)
-{
-    if (videoBytes != nullptr)
-        return;
-
-    videoBytes = new byte*[NUM_VIDEO_BUFFERS];
-
-    int frameBufferSize;
-    if (frameLayout == VideoRecordingFrameLayout::Quad)
-    {
-#if HARDWARE_ENCODE_VIDEO
-        frameBufferSize = QUAD_FRAME_BUFSIZE_NV12;
-#else
-        frameBufferSize = QUAD_FRAME_BUFSIZE_RGBA;
-#endif
-    }
-    else
-    {
-#if HARDWARE_ENCODE_VIDEO
-        frameBufferSize = FRAME_BUFSIZE_NV12;
-#else
-        frameBufferSize = FRAME_BUFSIZE_RGBA;
-#endif
-    }
-
-    for (int i = 0; i < NUM_VIDEO_BUFFERS; i++)
-    {
-        videoBytes[i] = new byte[frameBufferSize];
-    }
-}
-
-void FreeVideoBuffers()
-{
-    if (videoBytes == nullptr)
-        return;
-
-    for (int i = 0; i < NUM_VIDEO_BUFFERS; i++)
-    {
-        delete[] videoBytes[i];
-    }
-    delete[] videoBytes;
-    videoBytes = nullptr;
-}
-
-
 static ID3D11Texture2D* g_holoRenderTexture = nullptr;
 
 static ID3D11Texture2D* g_colorTexture = nullptr;
@@ -168,19 +118,16 @@ static int queuedVideoFrameCount = 0;
 
 void UpdateVideoRecordingFrame()
 {
+#if !HARDWARE_ENCODE_VIDEO
     //We have an old frame, lets get the data and queue it now
     if (VideoTextureBuffer.IsDataAvailable())
     {
-        videoBufferIndex = (videoBufferIndex + 1) % NUM_VIDEO_BUFFERS;
-#if HARDWARE_ENCODE_VIDEO
-        float bpp = FRAME_BPP_NV12;
-#else
-        float bpp = FRAME_BPP_RGBA;
-#endif
-
-        VideoTextureBuffer.FetchTextureData(g_pD3D11Device, videoBytes[videoBufferIndex], bpp);
-        ci->RecordFrameAsync(videoBytes[videoBufferIndex], queuedVideoFrameTime, queuedVideoFrameCount);
+        auto frame = ci->GetAvailableRecordFrame();
+        VideoTextureBuffer.FetchTextureData(g_pD3D11Device, frame->Lock(), FRAME_BPP_RGBA);
+        frame->timestamp = queuedVideoFrameTime;
+        ci->RecordFrameAsync(std::move(frame), queuedVideoFrameCount);
     }
+#endif
 
     if (lastVideoFrame >= 0 && lastRecordedVideoFrame != lastVideoFrame)
     {
@@ -210,7 +157,14 @@ void UpdateVideoRecordingFrame()
 
         lastRecordedVideoFrame = lastVideoFrame;
         queuedVideoFrameTime = lastVideoFrame * ci->GetColorDuration();
+#if HARDWARE_ENCODE_VIDEO
+        auto frame = ci->GetAvailableRecordFrame();
+        frame->CopyFrom(g_videoTexture);
+        frame->timestamp = queuedVideoFrameTime;
+        ci->RecordFrameAsync(std::move(frame), queuedVideoFrameCount);
+#else
         VideoTextureBuffer.PrepareTextureFetch(g_pD3D11Device, g_videoTexture);
+#endif
     }
 
     lastVideoFrame = ci->compositeFrameIndex;
@@ -444,8 +398,6 @@ UNITYDLL void StopFrameProvider()
     {
         ci->StopFrameProvider();
     }
-
-    FreeVideoBuffers();
 }
 
 UNITYDLL void SetAudioData(BYTE* audioData, int audioSize, double audioTime)
@@ -482,7 +434,6 @@ UNITYDLL bool StartRecording(VideoRecordingFrameLayout frameLayout, LPCWSTR lpcD
     {
         lastVideoFrame = -1;
 		lastRecordedVideoFrame = -1;
-        AllocateVideoBuffers(frameLayout);
         VideoTextureBuffer.ReleaseTextures();
         VideoTextureBuffer.Reset();
 		isRecording = ci->StartRecording(frameLayout, lpcDesiredFileName, desiredFileNameLength, inputFileNameLength, lpFileName, fileNameLength);
@@ -497,7 +448,6 @@ UNITYDLL void StopRecording()
     if (videoInitialized && ci != nullptr)
     {
         ci->StopRecording();
-        FreeVideoBuffers();
         isRecording = false;
     }
 }
diff --git a/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/CompositionManager.cs b/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/CompositionManager.cs
index eb4bc7c0..327cf6cb 100644
--- a/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/CompositionManager.cs
+++ b/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/CompositionManager.cs
@@ -705,6 +705,55 @@ private void ResetCompositor()
             }
         }
 
+        struct AudioStartEstimation
+        {
+            public bool IsValid { get; }
+            public AudioStartEstimation(double dspTime, int frameIndex, double frameDuration)
+            {
+                IsValid = true;
+                this.frameDuration = frameDuration;
+                errorRange = timeOffset = 0.0; // just to initialize the struct fully
+                ResetEstimation(dspTime, frameIndex);
+            }
+
+            private void ResetEstimation(double dspTime, int frameIndex)
+            {
+                errorRange = frameDuration;
+                timeOffset = frameIndex * frameDuration - dspTime;
+            }
+
+            public void Update(double dspTime, int frameIndex)
+            {
+                int expectedFrameIndex = (int)((timeOffset + dspTime) / frameDuration); // rounded down
+                int frameError = frameIndex - expectedFrameIndex;
+
+                if (frameError == 0)
+                {
+                    //Debug.Log("I was correct");
+                    return;
+                }
+                else if (System.Math.Abs(frameError) == 1)
+                {
+                    //Debug.Log($"Corrected {((timeOffset + dspTime) / frameDuration)} ({expectedFrameIndex}) not {frameIndex} timeOffset {frameError}, newTimeOffset {timeOffset}, newErrorRange {errorRange}");
+                    double bound = frameError * errorRange; // either lower/upper depending on sign(frameError)
+                    timeOffset = timeOffset + bound / 2.0;
+                    errorRange /= 2.0; // as long as there are no jumps (abs(frameError) > 1) we approach the correct value
+                }
+                else
+                {
+                    Debug.Log($"Time jumped too far, had to reset audio start estimation {((timeOffset + dspTime) / frameDuration)} not {frameIndex}");
+                    ResetEstimation(dspTime, frameIndex);
+                }
+            }
+
+            public double GetStartTime(double curDspTime) => timeOffset + curDspTime;
+
+            private double frameDuration;
+            private double timeOffset;
+            private double errorRange;
+        }
+        AudioStartEstimation audioStartEstimation;
+
         // This function is not/not always called on the main thread.
         private void OnAudioFilterRead(float[] data, int channels)
         {
@@ -713,15 +762,24 @@ private void OnAudioFilterRead(float[] data, int channels)
                 return;
             }
 
+            if (!audioStartEstimation.IsValid)
+            {
+                audioStartEstimation = new AudioStartEstimation(
+                    AudioSettings.dspTime,
+                    UnityCompositorInterface.GetCaptureFrameIndex(),
+                    UnityCompositorInterface.GetColorDuration() / 10000000.0);
+            }
+            else
+            {
+                audioStartEstimation.Update(AudioSettings.dspTime, UnityCompositorInterface.GetCaptureFrameIndex());
+            }
+
             //Create new stream
             if (audioMemoryStream == null)
             {
                 audioMemoryStream = new MemoryStream();
                 audioStreamWriter = new BinaryWriter(audioMemoryStream);
-                double audioSettingsTime = AudioSettings.dspTime; // Audio time in seconds, more accurate than Time.time
-                double captureFrameTime = UnityCompositorInterface.GetCaptureFrameIndex() * UnityCompositorInterface.GetColorDuration() / 10000000.0; // Capture Frame Time in seconds
-                DebugLog($"Obtained Audio Sample, AudioSettingsTime:{audioSettingsTime}, CaptureFrameTime:{captureFrameTime}");
-                audioStartTime = captureFrameTime;
+                audioStartTime = audioStartEstimation.GetStartTime(AudioSettings.dspTime);
                 numCachedAudioFrames = 0;
             }
 
diff --git a/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/TextureManager.cs b/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/TextureManager.cs
index 2dbdc9b0..d53059e4 100644
--- a/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/TextureManager.cs
+++ b/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/TextureManager.cs
@@ -565,7 +565,8 @@ private IEnumerator OnPostRender()
                 }
 
                 // convert composite to the format expected by our video encoder (NV12 or BGR)
-                Graphics.Blit(videoSourceTexture, videoOutputTexture, hardwareEncodeVideo ? NV12VideoMat : BGRVideoMat);
+                //Graphics.Blit(videoSourceTexture, videoOutputTexture, hardwareEncodeVideo ? NV12VideoMat : BGRVideoMat);
+                Graphics.Blit(videoSourceTexture, videoOutputTexture, BGRVideoMat);
             }
 
             TextureRenderCompleted?.Invoke();
@@ -613,7 +614,7 @@ private void SetShaderValues()
             RGBToYUVMat.SetFloat("_Width", frameWidth);
             RGBToYUVMat.SetFloat("_Height", frameHeight);
 
-            BGRVideoMat.SetFloat("_YFlip", 0);
+            BGRVideoMat.SetFloat("_YFlip", 1);
         }
 
         /// <summary>