diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.cpp index f9c55834..49203d3e 100644 --- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.cpp +++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.cpp @@ -386,10 +386,20 @@ void CompositorInterface::StopRecording() activeVideoEncoder = nullptr; } -void CompositorInterface::RecordFrameAsync(BYTE* videoFrame, LONGLONG frameTime, int numFrames) +std::unique_ptr CompositorInterface::GetAvailableRecordFrame() +{ + if (activeVideoEncoder == nullptr) + { + OutputDebugString(L"GetAvailableRecordFrame dropped, no active encoder\n"); + return nullptr; + } + return activeVideoEncoder->GetAvailableVideoFrame(); +} + +void CompositorInterface::RecordFrameAsync(std::unique_ptr frame, int numFrames) { #if _DEBUG - std::wstring debugString = L"RecordFrameAsync called, frameTime:" + std::to_wstring(frameTime) + L", numFrames:" + std::to_wstring(numFrames) + L"\n"; + std::wstring debugString = L"RecordFrameAsync called, frameTime:" + std::to_wstring(frame->timestamp) + L", numFrames:" + std::to_wstring(numFrames) + L"\n"; OutputDebugString(debugString.data()); #endif @@ -407,8 +417,8 @@ void CompositorInterface::RecordFrameAsync(BYTE* videoFrame, LONGLONG frameTime, // The encoder will update sample times internally based on the first seen sample time when recording. // The encoder, however, does assume that audio and video samples will be based on the same source time. // Providing audio and video samples with different starting times will cause issues in the generated video file. - LONGLONG sampleTime = frameTime; - activeVideoEncoder->QueueVideoFrame(videoFrame, sampleTime, numFrames * frameProvider->GetDurationHNS()); + frame->duration = numFrames * frameProvider->GetDurationHNS(); + activeVideoEncoder->QueueVideoFrame(std::move(frame)); } void CompositorInterface::RecordAudioFrameAsync(BYTE* audioFrame, LONGLONG audioTime, int audioSize) @@ -430,8 +440,9 @@ void CompositorInterface::RecordAudioFrameAsync(BYTE* audioFrame, LONGLONG audio // The encoder will update sample times internally based on the first seen sample time when recording. // The encoder, however, does assume that audio and video samples will be based on the same source time. // Providing audio and video samples with different starting times will cause issues in the generated video file. - LONGLONG sampleTime = audioTime; - activeVideoEncoder->QueueAudioFrame(audioFrame, audioSize, sampleTime); + auto frame = activeVideoEncoder->GetAvailableAudioFrame(); + frame->SetData(audioFrame, audioSize, audioTime); + activeVideoEncoder->QueueAudioFrame(std::move(frame)); } bool CompositorInterface::ProvidesYUV() diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.h b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.h index e3e155f2..b349b126 100644 --- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.h +++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.h @@ -80,7 +80,8 @@ class CompositorInterface DLLEXPORT void StopRecording(); // frameTime is in hundred nano seconds - DLLEXPORT void RecordFrameAsync(BYTE* videoFrame, LONGLONG frameTime, int numFrames); + DLLEXPORT std::unique_ptr GetAvailableRecordFrame(); + DLLEXPORT void RecordFrameAsync(std::unique_ptr, int numFrames); // audioTime is in hundrend nano seconds DLLEXPORT void RecordAudioFrameAsync(BYTE* audioFrame, LONGLONG audioTime, int audioSize); diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp index 5f9dcf69..a44e91a8 100644 --- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp +++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp @@ -6,6 +6,8 @@ #include "codecapi.h" +#define NUM_VIDEO_BUFFERS 10 + VideoEncoder::VideoEncoder(UINT frameWidth, UINT frameHeight, UINT frameStride, UINT fps, UINT32 audioSampleRate, UINT32 audioChannels, UINT32 audioBPS, UINT32 videoBitrate, UINT32 videoMpegLevel) : frameWidth(frameWidth), @@ -21,10 +23,11 @@ VideoEncoder::VideoEncoder(UINT frameWidth, UINT frameHeight, UINT frameStride, isRecording(false) { #if HARDWARE_ENCODE_VIDEO - inputFormat = MFVideoFormat_NV12; + inputFormat = MFVideoFormat_NV12; #else - inputFormat = MFVideoFormat_RGB32; + inputFormat = MFVideoFormat_RGB32; #endif + inputFormat = MFVideoFormat_RGB32; } VideoEncoder::~VideoEncoder() @@ -41,11 +44,26 @@ bool VideoEncoder::Initialize(ID3D11Device* device) #if HARDWARE_ENCODE_VIDEO MFCreateDXGIDeviceManager(&resetToken, &deviceManager); + this->device = device; if (deviceManager != nullptr) { OutputDebugString(L"Resetting device manager with graphics device.\n"); - deviceManager->ResetDevice(device, resetToken); + hr = deviceManager->ResetDevice(device, resetToken); + } + for (int i = 0; i < NUM_VIDEO_BUFFERS; i++) + { + videoInputPool.push(std::make_unique(device)); + } + + ID3D10Multithread* multithread; + device->QueryInterface(&multithread); + multithread->SetMultithreadProtected(TRUE); + +#else + for (int i = 0; i < NUM_VIDEO_BUFFERS; i++) + { + videoInputPool.push(std::make_unique(frameHeight * frameStride)); } #endif @@ -72,7 +90,7 @@ void VideoEncoder::StartRecording(LPCWSTR videoPath, bool encodeAudio) prevVideoTime = INVALID_TIMESTAMP; prevAudioTime = INVALID_TIMESTAMP; - HRESULT hr = E_PENDING; + HRESULT hr = S_OK; sinkWriter = NULL; videoStreamIndex = MAXDWORD; @@ -87,13 +105,14 @@ void VideoEncoder::StartRecording(LPCWSTR videoPath, bool encodeAudio) #endif IMFAttributes *attr = nullptr; - MFCreateAttributes(&attr, 3); + MFCreateAttributes(&attr, 4); if (SUCCEEDED(hr)) { hr = attr->SetUINT32(MF_SINK_WRITER_DISABLE_THROTTLING, TRUE); } #if HARDWARE_ENCODE_VIDEO if (SUCCEEDED(hr)) { hr = attr->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, true); } if (SUCCEEDED(hr)) { hr = attr->SetUINT32(MF_READWRITE_DISABLE_CONVERTERS, false); } + if (SUCCEEDED(hr)) { hr = attr->SetUnknown(MF_SINK_WRITER_D3D_MANAGER, deviceManager); } #endif hr = MFCreateSinkWriterFromURL(videoPath, NULL, attr, &sinkWriter); @@ -138,6 +157,10 @@ void VideoEncoder::StartRecording(LPCWSTR videoPath, bool encodeAudio) if (SUCCEEDED(hr)) { hr = MFSetAttributeSize(pVideoTypeIn, MF_MT_FRAME_SIZE, frameWidth, frameHeight); } if (SUCCEEDED(hr)) { hr = MFSetAttributeRatio(pVideoTypeIn, MF_MT_FRAME_RATE, fps, 1); } if (SUCCEEDED(hr)) { hr = MFSetAttributeRatio(pVideoTypeIn, MF_MT_PIXEL_ASPECT_RATIO, 1, 1); } + if (SUCCEEDED(hr)) { hr = pVideoTypeIn->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE); } + if (SUCCEEDED(hr)) { hr = pVideoTypeIn->SetUINT32(MF_MT_DEFAULT_STRIDE, frameStride); } + if (SUCCEEDED(hr)) { hr = pVideoTypeIn->SetUINT32(MF_MT_FIXED_SIZE_SAMPLES, TRUE); } + if (SUCCEEDED(hr)) { hr = pVideoTypeIn->SetUINT32(MF_MT_SAMPLE_SIZE, frameStride * frameHeight); } if (SUCCEEDED(hr)) { hr = sinkWriter->SetInputMediaType(videoStreamIndex, pVideoTypeIn, NULL); } if (encodeAudio) @@ -173,12 +196,12 @@ void VideoEncoder::StartRecording(LPCWSTR videoPath, bool encodeAudio) #endif } -void VideoEncoder::WriteAudio(byte* buffer, int bufferSize, LONGLONG timestamp) +void VideoEncoder::WriteAudio(std::unique_ptr frame) { std::shared_lock lock(videoStateLock); #if _DEBUG { - std::wstring debugString = L"Writing Audio, Timestamp:" + std::to_wstring(timestamp) + L"\n"; + std::wstring debugString = L"Writing Audio, Timestamp:" + std::to_wstring(frame->timestamp) + L"\n"; OutputDebugString(debugString.data()); } #endif @@ -186,33 +209,33 @@ void VideoEncoder::WriteAudio(byte* buffer, int bufferSize, LONGLONG timestamp) #if ENCODE_AUDIO if (!isRecording) { - std::wstring debugString = L"WriteAudio call failed: StartTime:" + std::to_wstring(startTime) + L", Timestamp:" + std::to_wstring(timestamp) + L"\n"; + std::wstring debugString = L"WriteAudio call failed: StartTime:" + std::to_wstring(startTime) + L", Timestamp:" + std::to_wstring(frame->timestamp) + L"\n"; OutputDebugString(debugString.data()); return; } else if (startTime == INVALID_TIMESTAMP) { - startTime = timestamp; + startTime = frame->timestamp; #if _DEBUG - std::wstring debugString = L"Start time set from audio, Timestamp:" + std::to_wstring(timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n"; + std::wstring debugString = L"Start time set from audio, Timestamp:" + std::to_wstring(frame->timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n"; OutputDebugString(debugString.data()); #endif } - else if (timestamp < startTime) + else if (frame->timestamp < startTime) { #if _DEBUG - std::wstring debugString = L"Audio not recorded, Timestamp less than start time. Timestamp:" + std::to_wstring(timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n"; + std::wstring debugString = L"Audio not recorded, Timestamp less than start time. Timestamp:" + std::to_wstring(frame->timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n"; OutputDebugString(debugString.data()); #endif return; } - LONGLONG sampleTimeNow = timestamp; + LONGLONG sampleTimeNow = frame->timestamp; LONGLONG sampleTimeStart = startTime; LONGLONG sampleTime = sampleTimeNow - sampleTimeStart; - LONGLONG duration = ((LONGLONG)((((float)AUDIO_SAMPLE_RATE * (16.0f /*bits per sample*/ / 8.0f /*bits per byte*/)) / (float)bufferSize) * 10000)); + LONGLONG duration = ((LONGLONG)((((float)AUDIO_SAMPLE_RATE * (16.0f /*bits per sample*/ / 8.0f /*bits per byte*/)) / (float)frame->currentSize) * 10000)); if (prevAudioTime != INVALID_TIMESTAMP) { duration = sampleTime - prevAudioTime; @@ -222,74 +245,61 @@ void VideoEncoder::WriteAudio(byte* buffer, int bufferSize, LONGLONG timestamp) #endif } - // Copy frame to a temporary buffer and process on a background thread. - byte* tmpAudioBuffer = new byte[bufferSize]; - memcpy(tmpAudioBuffer, buffer, bufferSize); - - concurrency::create_task([=]() + audioWriteFuture = std::async(std::launch::async, [=, frame{ std::move(frame) }, previousWriteFuture{ std::move(audioWriteFuture) }]() mutable { + if (previousWriteFuture.valid()) + { + previousWriteFuture.wait(); + previousWriteFuture = {}; + } std::shared_lock lock(videoStateLock); - HRESULT hr = E_PENDING; if (sinkWriter == NULL || !isRecording) { OutputDebugString(L"Must start recording before writing audio frames.\n"); - delete[] tmpAudioBuffer; return; } IMFSample* pAudioSample = NULL; - IMFMediaBuffer* pAudioBuffer = NULL; - - const DWORD cbAudioBuffer = bufferSize; - - BYTE* pData = NULL; - - hr = MFCreateMemoryBuffer(cbAudioBuffer, &pAudioBuffer); - if (SUCCEEDED(hr)) { hr = pAudioBuffer->Lock(&pData, NULL, NULL); } - memcpy(pData, tmpAudioBuffer, cbAudioBuffer); - if (pAudioBuffer) - { - pAudioBuffer->Unlock(); - } - #if _DEBUG { - std::wstring debugString = L"Writing Audio Sample, SampleTime:" + std::to_wstring(sampleTime) + L", SampleDuration:" + std::to_wstring(duration) + L", BufferLength:" + std::to_wstring(cbAudioBuffer) + L"\n"; + std::wstring debugString = L"Writing Audio Sample, SampleTime:" + std::to_wstring(sampleTime) + L", SampleDuration:" + std::to_wstring(duration) + L", BufferLength:" + std::to_wstring(frame->currentSize) + L"\n"; OutputDebugString(debugString.data()); } #endif + HRESULT hr = S_OK; if (SUCCEEDED(hr)) { hr = MFCreateSample(&pAudioSample); } if (SUCCEEDED(hr)) { hr = pAudioSample->SetSampleTime(sampleTime); } if (SUCCEEDED(hr)) { hr = pAudioSample->SetSampleDuration(duration); } - if (SUCCEEDED(hr)) { hr = pAudioBuffer->SetCurrentLength(cbAudioBuffer); } - if (SUCCEEDED(hr)) { hr = pAudioSample->AddBuffer(pAudioBuffer); } + if (SUCCEEDED(hr)) { hr = pAudioSample->AddBuffer(frame->mediaBuffer); } if (SUCCEEDED(hr)) { hr = sinkWriter->WriteSample(audioStreamIndex, pAudioSample); } SafeRelease(pAudioSample); - SafeRelease(pAudioBuffer); if (FAILED(hr)) { OutputDebugString(L"Error writing audio frame.\n"); } - delete[] tmpAudioBuffer; + { + std::shared_lock lock(audioInputPoolLock); + audioInputPool.push(std::move(frame)); + } }); prevAudioTime = sampleTime; #endif } -void VideoEncoder::WriteVideo(byte* buffer, LONGLONG timestamp, LONGLONG duration) +void VideoEncoder::WriteVideo(std::unique_ptr frame) { std::shared_lock lock(videoStateLock); #if _DEBUG { - std::wstring debugString = L"Writing Video, Timestamp:" + std::to_wstring(timestamp) + L"\n"; + std::wstring debugString = L"Writing Video, Timestamp:" + std::to_wstring(frame->timestamp) + L"\n"; OutputDebugString(debugString.data()); } #endif @@ -302,126 +312,93 @@ void VideoEncoder::WriteVideo(byte* buffer, LONGLONG timestamp, LONGLONG duratio if (startTime == INVALID_TIMESTAMP) { - startTime = timestamp; + startTime = frame->timestamp; #if _DEBUG - std::wstring debugString = L"Start time set from video, Timestamp:" + std::to_wstring(timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n"; + std::wstring debugString = L"Start time set from video, Timestamp:" + std::to_wstring(frame->timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n"; OutputDebugString(debugString.data()); #endif } - else if (timestamp < startTime) + else if (frame->timestamp < startTime) { #if _DEBUG - std::wstring debugString = L"Video not recorded, Timestamp less than start time. Timestamp:" + std::to_wstring(timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n"; + std::wstring debugString = L"Video not recorded, Timestamp less than start time. Timestamp:" + std::to_wstring(frame->timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n"; OutputDebugString(debugString.data()); #endif return; } - if (timestamp == prevVideoTime) + if (frame->timestamp == prevVideoTime) { #if _DEBUG - std::wstring debugString = L"Video not recorded, Timestamp equals prevVideoTime. Timestamp:" + std::to_wstring(timestamp) + L", StartTime:" + std::to_wstring(prevVideoTime) + L"\n"; + std::wstring debugString = L"Video not recorded, Timestamp equals prevVideoTime. Timestamp:" + std::to_wstring(frame->timestamp) + L", StartTime:" + std::to_wstring(prevVideoTime) + L"\n"; OutputDebugString(debugString.data()); #endif return; } - LONGLONG sampleTimeNow = timestamp; + LONGLONG sampleTimeNow = frame->timestamp; LONGLONG sampleTimeStart = startTime; LONGLONG sampleTime = sampleTimeNow - sampleTimeStart; if (prevVideoTime != INVALID_TIMESTAMP) { - duration = sampleTime - prevVideoTime; + frame->duration = sampleTime - prevVideoTime; #if _DEBUG - std::wstring debugString = L"Updated write video duration:" + std::to_wstring(duration) + L", SampleTime:" + std::to_wstring(sampleTime) + L", PrevVideoTime:" + std::to_wstring(prevVideoTime) + L"\n"; + std::wstring debugString = L"Updated write video duration:" + std::to_wstring(frame->duration) + L", SampleTime:" + std::to_wstring(sampleTime) + L", PrevVideoTime:" + std::to_wstring(prevVideoTime) + L"\n"; OutputDebugString(debugString.data()); #endif } - // Copy frame to a temporary buffer and process on a background thread. -#if HARDWARE_ENCODE_VIDEO - BYTE* tmpVideoBuffer = new BYTE[(int)(FRAME_BPP_NV12 * frameHeight * frameWidth)]; - memcpy(tmpVideoBuffer, buffer, (int)(FRAME_BPP_NV12 * frameHeight * frameWidth)); -#else - BYTE* tmpVideoBuffer = new BYTE[frameHeight * frameStride]; - memcpy(tmpVideoBuffer, buffer, frameHeight * frameStride); -#endif - - concurrency::create_task([=]() + videoWriteFuture = std::async(std::launch::async, [=, frame{ std::move(frame) }, previousWriteFuture{ std::move(videoWriteFuture) }]() mutable { + if (previousWriteFuture.valid()) + { + previousWriteFuture.wait(); + previousWriteFuture = {}; + } std::shared_lock lock(videoStateLock); - HRESULT hr = E_PENDING; + HRESULT hr = S_OK; if (sinkWriter == NULL || !isRecording) { OutputDebugString(L"Must start recording before writing video frames.\n"); - delete[] tmpVideoBuffer; return; } - LONG cbWidth = frameStride; - DWORD cbBuffer = cbWidth * frameHeight; - DWORD imageHeight = frameHeight; - -#if HARDWARE_ENCODE_VIDEO - cbWidth = frameWidth; - cbBuffer = (int)(FRAME_BPP_NV12 * frameWidth * frameHeight); - imageHeight = (int)(FRAME_BPP_NV12 * frameHeight); -#endif - + DWORD cbBuffer = frameStride * frameHeight; IMFSample* pVideoSample = NULL; - IMFMediaBuffer* pVideoBuffer = NULL; - BYTE* pData = NULL; - - // Create a new memory buffer. - hr = MFCreateMemoryBuffer(cbBuffer, &pVideoBuffer); - - // Lock the buffer and copy the video frame to the buffer. - if (SUCCEEDED(hr)) { hr = pVideoBuffer->Lock(&pData, NULL, NULL); } - - if (SUCCEEDED(hr)) - { - //TODO: Can pVideoBuffer be created from an ID3D11Texture2D*? - hr = MFCopyImage( - pData, // Destination buffer. - cbWidth, // Destination stride. - tmpVideoBuffer, - cbWidth, // Source stride. - cbWidth, // Image width in bytes. - imageHeight // Image height in pixels. - ); - } - - if (pVideoBuffer) +#if _DEBUG { - pVideoBuffer->Unlock(); + std::wstring debugString = L"Writing Video Sample, SampleTime:" + std::to_wstring(sampleTime) + L", SampleDuration:" + std::to_wstring(frame->duration) + L", BufferLength:" + std::to_wstring(cbBuffer) + L"\n"; + OutputDebugString(debugString.data()); } +#endif -#if _DEBUG - { - std::wstring debugString = L"Writing Video Sample, SampleTime:" + std::to_wstring(sampleTime) + L", SampleDuration:" + std::to_wstring(duration) + L", BufferLength:" + std::to_wstring(cbBuffer) + L"\n"; - OutputDebugString(debugString.data()); - } +#if !HARDWARE_ENCODE_VIDEO + // In case the user locks the frame but forgets to unlock + frame->Unlock(); #endif // Set the data length of the buffer. - if (SUCCEEDED(hr)) { hr = pVideoBuffer->SetCurrentLength(cbBuffer); } + if (SUCCEEDED(hr)) { hr = frame->mediaBuffer->SetCurrentLength(frameHeight * frameStride); } // Create a media sample and add the buffer to the sample. if (SUCCEEDED(hr)) { hr = MFCreateSample(&pVideoSample); } - if (SUCCEEDED(hr)) { hr = pVideoSample->AddBuffer(pVideoBuffer); } + if (SUCCEEDED(hr)) { hr = pVideoSample->AddBuffer(frame->mediaBuffer); } if (SUCCEEDED(hr)) { hr = pVideoSample->SetSampleTime(sampleTime); } //100-nanosecond units - if (SUCCEEDED(hr)) { hr = pVideoSample->SetSampleDuration(duration); } //100-nanosecond units + if (SUCCEEDED(hr)) { hr = pVideoSample->SetSampleDuration(frame->duration); } //100-nanosecond units // Send the sample to the Sink Writer. if (SUCCEEDED(hr)) { hr = sinkWriter->WriteSample(videoStreamIndex, pVideoSample); } SafeRelease(pVideoSample); - SafeRelease(pVideoBuffer); - delete[] tmpVideoBuffer; + + { + std::shared_lock(videoInputPoolLock); + videoInputPool.push(std::move(frame)); + } if (FAILED(hr)) { @@ -456,6 +433,11 @@ void VideoEncoder::StopRecording() concurrency::create_task([&] { + if (videoWriteFuture.valid()) + { + videoWriteFuture.wait(); + videoWriteFuture = {}; + } while (!videoQueue.empty()) { videoQueue.pop(); @@ -473,6 +455,11 @@ void VideoEncoder::StopRecording() concurrency::create_task([&] { + if (audioWriteFuture.valid()) + { + audioWriteFuture.wait(); + audioWriteFuture = {}; + } while (!audioQueue.empty()) { audioQueue.pop(); @@ -508,31 +495,65 @@ void VideoEncoder::StopRecording() SafeRelease(sinkWriter); } -void VideoEncoder::QueueVideoFrame(byte* buffer, LONGLONG timestamp, LONGLONG duration) +std::unique_ptr VideoEncoder::GetAvailableVideoFrame() +{ + std::shared_lock lock(videoInputPoolLock); + if (videoInputPool.empty()) + { +#if HARDWARE_ENCODE_VIDEO + return std::make_unique(device); +#else + return std::make_unique(frameStride * frameHeight); +#endif + } + else + { + auto result = std::move(videoInputPool.front()); + videoInputPool.pop(); + return result; + } +} + +std::unique_ptr VideoEncoder::GetAvailableAudioFrame() +{ + std::shared_lock lock(audioInputPoolLock); + if (audioInputPool.empty()) + { + return std::make_unique(); + } + else + { + auto result = std::move(audioInputPool.front()); + audioInputPool.pop(); + return result; + } +} + +void VideoEncoder::QueueVideoFrame(std::unique_ptr frame) { std::shared_lock lock(videoStateLock); if (acceptQueuedFrames) { - videoQueue.push(VideoInput(buffer, timestamp, duration)); #if _DEBUG - std::wstring debugString = L"Pushed Video Input, Timestamp:" + std::to_wstring(timestamp) + L"\n"; - OutputDebugString(debugString.data()); + std::wstring debugString = L"Pushed Video Input, Timestamp:" + std::to_wstring(frame->timestamp) + L"\n"; + OutputDebugString(debugString.data()); #endif + videoQueue.push(std::move(frame)); } } -void VideoEncoder::QueueAudioFrame(byte* buffer, int bufferSize, LONGLONG timestamp) +void VideoEncoder::QueueAudioFrame(std::unique_ptr frame) { std::shared_lock lock(videoStateLock); if (acceptQueuedFrames) { - audioQueue.push(AudioInput(buffer, bufferSize, timestamp)); #if _DEBUG - std::wstring debugString = L"Pushed Audio Input, Timestamp:" + std::to_wstring(timestamp) + L"\n"; + std::wstring debugString = L"Pushed Audio Input, Timestamp:" + std::to_wstring(frame->timestamp) + L"\n"; OutputDebugString(debugString.data()); #endif + audioQueue.push(std::move(frame)); } } @@ -548,8 +569,7 @@ void VideoEncoder::Update() { if (isRecording) { - VideoInput input = videoQueue.front(); - WriteVideo(input.sharedBuffer, input.timestamp, input.duration); + WriteVideo(std::move(videoQueue.front())); videoQueue.pop(); } } @@ -558,9 +578,7 @@ void VideoEncoder::Update() { if (isRecording) { - AudioInput input = audioQueue.front(); - WriteAudio(input.buffer, input.bufferSize, input.timestamp); - delete[] input.buffer; + WriteAudio(std::move(audioQueue.front())); audioQueue.pop(); } } diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h index 35030056..76598fc9 100644 --- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h +++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h @@ -38,50 +38,155 @@ class VideoEncoder void StopRecording(); // Used for recording video from a background thread. - void QueueVideoFrame(byte* buffer, LONGLONG timestamp, LONGLONG duration); - void QueueAudioFrame(byte* buffer, int bufferSize, LONGLONG timestamp); + class VideoInput; + class AudioInput; + std::unique_ptr GetAvailableVideoFrame(); + std::unique_ptr GetAvailableAudioFrame(); + void QueueVideoFrame(std::unique_ptr frame); + void QueueAudioFrame(std::unique_ptr frame); // Do not call this from a background thread. void Update(); -private: - void WriteVideo(byte* buffer, LONGLONG timestamp, LONGLONG duration); - void WriteAudio(byte* buffer, int bufferSize, LONGLONG timestamp); + class VideoInputBase + { + public: + IMFMediaBuffer* mediaBuffer = nullptr; + LONGLONG timestamp = INVALID_TIMESTAMP; + LONGLONG duration = INVALID_TIMESTAMP; + }; - LARGE_INTEGER freq; +#ifdef HARDWARE_ENCODE_VIDEO + class VideoInput : public VideoInputBase + { + ID3D11Device* device; + ID3D11DeviceContext* deviceContext; + ID3D11Texture2D* texture = nullptr; + public: + VideoInput(ID3D11Device* _device) : device(_device) + { + device->AddRef(); + device->GetImmediateContext(&deviceContext); + } + + ~VideoInput() + { + SafeRelease(texture); + SafeRelease(deviceContext); + SafeRelease(device); + SafeRelease(mediaBuffer); + } - class VideoInput + void CopyFrom(ID3D11Texture2D* source) + { + if (texture == nullptr) + { + D3D11_TEXTURE2D_DESC existingDesc; + source->GetDesc(&existingDesc); + + D3D11_TEXTURE2D_DESC textureDesc; + ZeroMemory(&textureDesc, sizeof(textureDesc)); + textureDesc.Width = existingDesc.Width; + textureDesc.Height = existingDesc.Height; + textureDesc.MipLevels = existingDesc.MipLevels; + textureDesc.ArraySize = existingDesc.ArraySize; + textureDesc.Format = existingDesc.Format; + textureDesc.SampleDesc.Count = existingDesc.SampleDesc.Count; + textureDesc.SampleDesc.Quality = existingDesc.SampleDesc.Quality; + textureDesc.Usage = D3D11_USAGE_DEFAULT; + + HRESULT hr = device->CreateTexture2D(&textureDesc, NULL, &texture); + if (SUCCEEDED(hr)) MFCreateDXGISurfaceBuffer(IID_ID3D11Texture2D, texture, 0, true, &mediaBuffer); + if (FAILED(hr)) + { + OutputDebugString(L"Creating video frame failed"); + } + } + + deviceContext->CopyResource(texture, source); + } + }; +#else + class VideoInput : public VideoInputBase { + byte* buffer = nullptr; public: - byte * sharedBuffer; + VideoInput(size_t bufferSize) + { + auto hr = MFCreateMemoryBuffer(bufferSize, &mediaBuffer); + } - LONGLONG timestamp; - LONGLONG duration; + ~VideoInput() + { + Unlock(); + SafeRelease(mediaBuffer); + } - VideoInput(byte* buffer, LONGLONG timestamp, LONGLONG duration) + byte* Lock() { - this->sharedBuffer = buffer; - this->timestamp = timestamp; - this->duration = duration; + if (buffer == nullptr) + { + mediaBuffer->Lock(&buffer, NULL, NULL); + } + return buffer; + } + + void Unlock() + { + if (buffer != nullptr) + { + mediaBuffer->Unlock(); + buffer = nullptr; + } } }; +#endif class AudioInput { public: - byte* buffer; + IMFMediaBuffer* mediaBuffer = nullptr; + int capacity = 0; + int currentSize = 0; LONGLONG timestamp; - int bufferSize; - AudioInput(byte* buffer, int buffSize, LONGLONG timestamp) + ~AudioInput() + { + SafeRelease(mediaBuffer); + } + + void SetData(const byte* buffer, int bufferSize, LONGLONG timestamp) { - bufferSize = buffSize; - this->buffer = new byte[buffSize]; - memcpy(this->buffer, buffer, buffSize); + if (bufferSize > capacity) + { + SafeRelease(mediaBuffer); + auto hr = MFCreateMemoryBuffer(bufferSize, &mediaBuffer); + if (FAILED(hr)) + { + OutputDebugString(L"Failed to create audio memory buffer"); + } + capacity = bufferSize; + } + + byte* lockedBuffer; + if (FAILED(mediaBuffer->Lock(&lockedBuffer, nullptr, nullptr))) + { + return; + } + memcpy(lockedBuffer, buffer, bufferSize); + mediaBuffer->Unlock(); + mediaBuffer->SetCurrentLength(bufferSize); + currentSize = bufferSize; this->timestamp = timestamp; } }; +private: + void WriteVideo(std::unique_ptr frame); + void WriteAudio(std::unique_ptr frame); + + LARGE_INTEGER freq; + IMFSinkWriter* sinkWriter; DWORD videoStreamIndex = MAXDWORD; DWORD audioStreamIndex = MAXDWORD; @@ -108,12 +213,19 @@ class VideoEncoder LONGLONG startTime = INVALID_TIMESTAMP; - std::queue videoQueue; - std::queue audioQueue; + std::queue> videoInputPool; + std::queue> videoQueue; + std::queue> audioInputPool; + std::queue> audioQueue; std::shared_mutex videoStateLock; + std::shared_mutex videoInputPoolLock; + std::shared_mutex audioInputPoolLock; + std::future videoWriteFuture; + std::future audioWriteFuture; #if HARDWARE_ENCODE_VIDEO + ID3D11Device* device; IMFDXGIDeviceManager* deviceManager = NULL; UINT resetToken = 0; #endif diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp index fb6433c6..7126a03b 100644 --- a/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp +++ b/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp @@ -26,56 +26,6 @@ static BYTE* depthBytes = new BYTE[FRAME_BUFSIZE_DEPTH16]; static BYTE* bodyMaskBytes = new BYTE[FRAME_BUFSIZE_DEPTH16]; static BYTE* holoBytes = new BYTE[FRAME_BUFSIZE_RGBA]; -#define NUM_VIDEO_BUFFERS 10 - -static byte** videoBytes = nullptr; -static int videoBufferIndex = 0; - -void AllocateVideoBuffers(VideoRecordingFrameLayout frameLayout) -{ - if (videoBytes != nullptr) - return; - - videoBytes = new byte*[NUM_VIDEO_BUFFERS]; - - int frameBufferSize; - if (frameLayout == VideoRecordingFrameLayout::Quad) - { -#if HARDWARE_ENCODE_VIDEO - frameBufferSize = QUAD_FRAME_BUFSIZE_NV12; -#else - frameBufferSize = QUAD_FRAME_BUFSIZE_RGBA; -#endif - } - else - { -#if HARDWARE_ENCODE_VIDEO - frameBufferSize = FRAME_BUFSIZE_NV12; -#else - frameBufferSize = FRAME_BUFSIZE_RGBA; -#endif - } - - for (int i = 0; i < NUM_VIDEO_BUFFERS; i++) - { - videoBytes[i] = new byte[frameBufferSize]; - } -} - -void FreeVideoBuffers() -{ - if (videoBytes == nullptr) - return; - - for (int i = 0; i < NUM_VIDEO_BUFFERS; i++) - { - delete[] videoBytes[i]; - } - delete[] videoBytes; - videoBytes = nullptr; -} - - static ID3D11Texture2D* g_holoRenderTexture = nullptr; static ID3D11Texture2D* g_colorTexture = nullptr; @@ -168,19 +118,16 @@ static int queuedVideoFrameCount = 0; void UpdateVideoRecordingFrame() { +#if !HARDWARE_ENCODE_VIDEO //We have an old frame, lets get the data and queue it now if (VideoTextureBuffer.IsDataAvailable()) { - videoBufferIndex = (videoBufferIndex + 1) % NUM_VIDEO_BUFFERS; -#if HARDWARE_ENCODE_VIDEO - float bpp = FRAME_BPP_NV12; -#else - float bpp = FRAME_BPP_RGBA; -#endif - - VideoTextureBuffer.FetchTextureData(g_pD3D11Device, videoBytes[videoBufferIndex], bpp); - ci->RecordFrameAsync(videoBytes[videoBufferIndex], queuedVideoFrameTime, queuedVideoFrameCount); + auto frame = ci->GetAvailableRecordFrame(); + VideoTextureBuffer.FetchTextureData(g_pD3D11Device, frame->Lock(), FRAME_BPP_RGBA); + frame->timestamp = queuedVideoFrameTime; + ci->RecordFrameAsync(std::move(frame), queuedVideoFrameCount); } +#endif if (lastVideoFrame >= 0 && lastRecordedVideoFrame != lastVideoFrame) { @@ -210,7 +157,14 @@ void UpdateVideoRecordingFrame() lastRecordedVideoFrame = lastVideoFrame; queuedVideoFrameTime = lastVideoFrame * ci->GetColorDuration(); +#if HARDWARE_ENCODE_VIDEO + auto frame = ci->GetAvailableRecordFrame(); + frame->CopyFrom(g_videoTexture); + frame->timestamp = queuedVideoFrameTime; + ci->RecordFrameAsync(std::move(frame), queuedVideoFrameCount); +#else VideoTextureBuffer.PrepareTextureFetch(g_pD3D11Device, g_videoTexture); +#endif } lastVideoFrame = ci->compositeFrameIndex; @@ -444,8 +398,6 @@ UNITYDLL void StopFrameProvider() { ci->StopFrameProvider(); } - - FreeVideoBuffers(); } UNITYDLL void SetAudioData(BYTE* audioData, int audioSize, double audioTime) @@ -482,7 +434,6 @@ UNITYDLL bool StartRecording(VideoRecordingFrameLayout frameLayout, LPCWSTR lpcD { lastVideoFrame = -1; lastRecordedVideoFrame = -1; - AllocateVideoBuffers(frameLayout); VideoTextureBuffer.ReleaseTextures(); VideoTextureBuffer.Reset(); isRecording = ci->StartRecording(frameLayout, lpcDesiredFileName, desiredFileNameLength, inputFileNameLength, lpFileName, fileNameLength); @@ -497,7 +448,6 @@ UNITYDLL void StopRecording() if (videoInitialized && ci != nullptr) { ci->StopRecording(); - FreeVideoBuffers(); isRecording = false; } } diff --git a/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/CompositionManager.cs b/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/CompositionManager.cs index eb4bc7c0..327cf6cb 100644 --- a/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/CompositionManager.cs +++ b/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/CompositionManager.cs @@ -705,6 +705,55 @@ private void ResetCompositor() } } + struct AudioStartEstimation + { + public bool IsValid { get; } + public AudioStartEstimation(double dspTime, int frameIndex, double frameDuration) + { + IsValid = true; + this.frameDuration = frameDuration; + errorRange = timeOffset = 0.0; // just to initialize the struct fully + ResetEstimation(dspTime, frameIndex); + } + + private void ResetEstimation(double dspTime, int frameIndex) + { + errorRange = frameDuration; + timeOffset = frameIndex * frameDuration - dspTime; + } + + public void Update(double dspTime, int frameIndex) + { + int expectedFrameIndex = (int)((timeOffset + dspTime) / frameDuration); // rounded down + int frameError = frameIndex - expectedFrameIndex; + + if (frameError == 0) + { + //Debug.Log("I was correct"); + return; + } + else if (System.Math.Abs(frameError) == 1) + { + //Debug.Log($"Corrected {((timeOffset + dspTime) / frameDuration)} ({expectedFrameIndex}) not {frameIndex} timeOffset {frameError}, newTimeOffset {timeOffset}, newErrorRange {errorRange}"); + double bound = frameError * errorRange; // either lower/upper depending on sign(frameError) + timeOffset = timeOffset + bound / 2.0; + errorRange /= 2.0; // as long as there are no jumps (abs(frameError) > 1) we approach the correct value + } + else + { + Debug.Log($"Time jumped too far, had to reset audio start estimation {((timeOffset + dspTime) / frameDuration)} not {frameIndex}"); + ResetEstimation(dspTime, frameIndex); + } + } + + public double GetStartTime(double curDspTime) => timeOffset + curDspTime; + + private double frameDuration; + private double timeOffset; + private double errorRange; + } + AudioStartEstimation audioStartEstimation; + // This function is not/not always called on the main thread. private void OnAudioFilterRead(float[] data, int channels) { @@ -713,15 +762,24 @@ private void OnAudioFilterRead(float[] data, int channels) return; } + if (!audioStartEstimation.IsValid) + { + audioStartEstimation = new AudioStartEstimation( + AudioSettings.dspTime, + UnityCompositorInterface.GetCaptureFrameIndex(), + UnityCompositorInterface.GetColorDuration() / 10000000.0); + } + else + { + audioStartEstimation.Update(AudioSettings.dspTime, UnityCompositorInterface.GetCaptureFrameIndex()); + } + //Create new stream if (audioMemoryStream == null) { audioMemoryStream = new MemoryStream(); audioStreamWriter = new BinaryWriter(audioMemoryStream); - double audioSettingsTime = AudioSettings.dspTime; // Audio time in seconds, more accurate than Time.time - double captureFrameTime = UnityCompositorInterface.GetCaptureFrameIndex() * UnityCompositorInterface.GetColorDuration() / 10000000.0; // Capture Frame Time in seconds - DebugLog($"Obtained Audio Sample, AudioSettingsTime:{audioSettingsTime}, CaptureFrameTime:{captureFrameTime}"); - audioStartTime = captureFrameTime; + audioStartTime = audioStartEstimation.GetStartTime(AudioSettings.dspTime); numCachedAudioFrames = 0; } diff --git a/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/TextureManager.cs b/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/TextureManager.cs index 2dbdc9b0..d53059e4 100644 --- a/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/TextureManager.cs +++ b/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/TextureManager.cs @@ -565,7 +565,8 @@ private IEnumerator OnPostRender() } // convert composite to the format expected by our video encoder (NV12 or BGR) - Graphics.Blit(videoSourceTexture, videoOutputTexture, hardwareEncodeVideo ? NV12VideoMat : BGRVideoMat); + //Graphics.Blit(videoSourceTexture, videoOutputTexture, hardwareEncodeVideo ? NV12VideoMat : BGRVideoMat); + Graphics.Blit(videoSourceTexture, videoOutputTexture, BGRVideoMat); } TextureRenderCompleted?.Invoke(); @@ -613,7 +614,7 @@ private void SetShaderValues() RGBToYUVMat.SetFloat("_Width", frameWidth); RGBToYUVMat.SetFloat("_Height", frameHeight); - BGRVideoMat.SetFloat("_YFlip", 0); + BGRVideoMat.SetFloat("_YFlip", 1); } ///