Skip to content

Commit 742a6fd

Browse files
TheSpydogflibitijibibo
authored andcommitted
GPU: Query UnrestrictedBufferTextureCopyPitchSupported to avoid D3D12 realignment copies
1 parent 4df13e8 commit 742a6fd

File tree

2 files changed

+52
-22
lines changed

2 files changed

+52
-22
lines changed

include/SDL3/SDL_gpu.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1398,11 +1398,12 @@ typedef struct SDL_GPUViewport
13981398
* SDL_DownloadFromGPUTexture are used as default values respectively and data
13991399
* is considered to be tightly packed.
14001400
*
1401-
* **WARNING**: Direct3D 12 requires texture data row pitch to be 256 byte
1402-
* aligned, and offsets to be aligned to 512 bytes. If they are not, SDL will
1403-
* make a temporary copy of the data that is properly aligned, but this adds
1404-
* overhead to the transfer process. Apps can avoid this by aligning their
1405-
* data appropriately, or using a different GPU backend than Direct3D 12.
1401+
* **WARNING**: On some older/integrated hardware, Direct3D 12 requires texture
1402+
* data row pitch to be 256 byte aligned, and offsets to be aligned to 512 bytes.
1403+
* If they are not, SDL will make a temporary copy of the data that is properly
1404+
* aligned, but this adds overhead to the transfer process. Apps can avoid this
1405+
* by aligning their data appropriately, or using a different GPU backend than
1406+
* Direct3D 12.
14061407
*
14071408
* \since This struct is available since SDL 3.2.0.
14081409
*

src/gpu/d3d12/SDL_gpu_d3d12.c

Lines changed: 46 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,7 @@ struct D3D12Renderer
935935

936936
bool debug_mode;
937937
bool GPUUploadHeapSupported;
938+
bool UnrestrictedBufferTextureCopyPitchSupported;
938939
// FIXME: these might not be necessary since we're not using custom heaps
939940
bool UMA;
940941
bool UMACacheCoherent;
@@ -5965,6 +5966,7 @@ static void D3D12_UploadToTexture(
59655966
bool cycle)
59665967
{
59675968
D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer;
5969+
D3D12Renderer *renderer = (D3D12Renderer *)d3d12CommandBuffer->renderer;
59685970
D3D12BufferContainer *transferBufferContainer = (D3D12BufferContainer *)source->transfer_buffer;
59695971
D3D12Buffer *temporaryBuffer = NULL;
59705972
D3D12_TEXTURE_COPY_LOCATION sourceLocation;
@@ -5992,11 +5994,12 @@ static void D3D12_UploadToTexture(
59925994
cycle,
59935995
D3D12_RESOURCE_STATE_COPY_DEST);
59945996

5995-
/* D3D12 requires texture data row pitch to be 256 byte aligned, which is obviously insane.
5996-
* Instead of exposing that restriction to the client, which is a huge rake to step on,
5997-
* and a restriction that no other backend requires, we're going to copy data to a temporary buffer,
5998-
* copy THAT data to the texture, and then get rid of the temporary buffer ASAP.
5999-
* If we're lucky and the row pitch and depth pitch are already aligned, we can skip all of that.
5997+
/* Unless the UnrestrictedBufferTextureCopyPitchSupported feature is supported, D3D12 requires
5998+
* texture data row pitch to be 256 byte aligned, which is obviously insane. Instead of exposing
5999+
* that restriction to the client, which is a huge rake to step on, and a restriction that no
6000+
* other backend requires, we're going to copy data to a temporary buffer, copy THAT data to the
6001+
* texture, and then get rid of the temporary buffer ASAP. If we're lucky and the row pitch and
6002+
* depth pitch are already aligned, we can skip all of that.
60006003
*
60016004
* D3D12 also requires offsets to be 512 byte aligned. We'll fix that for the client and warn them as well.
60026005
*
@@ -6018,10 +6021,16 @@ static void D3D12_UploadToTexture(
60186021

60196022
bytesPerSlice = rowsPerSlice * rowPitch;
60206023

6021-
alignedRowPitch = (destination->w + (blockWidth - 1)) / blockWidth * blockSize;
6022-
alignedRowPitch = D3D12_INTERNAL_Align(alignedRowPitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
6023-
needsRealignment = rowsPerSlice != destination->h || rowPitch != alignedRowPitch;
6024-
needsPlacementCopy = source->offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT != 0;
6024+
if (renderer->UnrestrictedBufferTextureCopyPitchSupported) {
6025+
alignedRowPitch = rowPitch;
6026+
needsRealignment = false;
6027+
needsPlacementCopy = false;
6028+
} else {
6029+
alignedRowPitch = (destination->w + (blockWidth - 1)) / blockWidth * blockSize;
6030+
alignedRowPitch = D3D12_INTERNAL_Align(alignedRowPitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
6031+
needsRealignment = rowsPerSlice != destination->h || rowPitch != alignedRowPitch;
6032+
needsPlacementCopy = source->offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT != 0;
6033+
}
60256034

60266035
alignedBytesPerSlice = alignedRowPitch * destination->h;
60276036

@@ -6300,6 +6309,7 @@ static void D3D12_DownloadFromTexture(
63006309
const SDL_GPUTextureTransferInfo *destination)
63016310
{
63026311
D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer;
6312+
D3D12Renderer *renderer = d3d12CommandBuffer->renderer;
63036313
D3D12_TEXTURE_COPY_LOCATION sourceLocation;
63046314
D3D12_TEXTURE_COPY_LOCATION destinationLocation;
63056315
Uint32 pixelsPerRow = destination->pixels_per_row;
@@ -6317,11 +6327,12 @@ static void D3D12_DownloadFromTexture(
63176327
D3D12BufferContainer *destinationContainer = (D3D12BufferContainer *)destination->transfer_buffer;
63186328
D3D12Buffer *destinationBuffer = destinationContainer->activeBuffer;
63196329

6320-
/* D3D12 requires texture data row pitch to be 256 byte aligned, which is obviously insane.
6321-
* Instead of exposing that restriction to the client, which is a huge rake to step on,
6322-
* and a restriction that no other backend requires, we're going to copy data to a temporary buffer,
6323-
* copy THAT data to the texture, and then get rid of the temporary buffer ASAP.
6324-
* If we're lucky and the row pitch and depth pitch are already aligned, we can skip all of that.
6330+
/* Unless the UnrestrictedBufferTextureCopyPitchSupported feature is supported, D3D12 requires
6331+
* texture data row pitch to be 256 byte aligned, which is obviously insane. Instead of exposing
6332+
* that restriction to the client, which is a huge rake to step on, and a restriction that no
6333+
* other backend requires, we're going to copy data to a temporary buffer, copy THAT data to the
6334+
* texture, and then get rid of the temporary buffer ASAP. If we're lucky and the row pitch and
6335+
* depth pitch are already aligned, we can skip all of that.
63256336
*
63266337
* D3D12 also requires offsets to be 512 byte aligned. We'll fix that for the client and warn them as well.
63276338
*
@@ -6341,9 +6352,15 @@ static void D3D12_DownloadFromTexture(
63416352
rowsPerSlice = source->h;
63426353
}
63436354

6344-
alignedRowPitch = D3D12_INTERNAL_Align(rowPitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
6345-
needsRealignment = rowsPerSlice != source->h || rowPitch != alignedRowPitch;
6346-
needsPlacementCopy = destination->offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT != 0;
6355+
if (renderer->UnrestrictedBufferTextureCopyPitchSupported) {
6356+
alignedRowPitch = rowPitch;
6357+
needsRealignment = false;
6358+
needsPlacementCopy = false;
6359+
} else {
6360+
alignedRowPitch = D3D12_INTERNAL_Align(rowPitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
6361+
needsRealignment = rowsPerSlice != source->h || rowPitch != alignedRowPitch;
6362+
needsPlacementCopy = destination->offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT != 0;
6363+
}
63476364

63486365
sourceLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
63496366
sourceLocation.SubresourceIndex = sourceSubresource->index;
@@ -9687,6 +9704,18 @@ static SDL_GPUDevice *D3D12_CreateDevice(bool debugMode, bool preferLowPower, SD
96879704
}
96889705
#endif
96899706

9707+
// Check for unrestricted texture-buffer copy pitch support
9708+
D3D12_FEATURE_DATA_D3D12_OPTIONS13 options13;
9709+
res = ID3D12Device_CheckFeatureSupport(
9710+
renderer->device,
9711+
D3D12_FEATURE_D3D12_OPTIONS13,
9712+
&options13,
9713+
sizeof(options13));
9714+
9715+
if (SUCCEEDED(res)) {
9716+
renderer->UnrestrictedBufferTextureCopyPitchSupported = options13.UnrestrictedBufferTextureCopyPitchSupported;
9717+
}
9718+
96909719
// Create command queue
96919720
#if (defined(SDL_PLATFORM_XBOXONE) || defined(SDL_PLATFORM_XBOXSERIES))
96929721
if (s_CommandQueue != NULL) {

0 commit comments

Comments
 (0)