Skip to content

Commit 22c7515

Browse files
authored
Merge branch 'master' into remove_config
2 parents 148d40a + e6d723c commit 22c7515

File tree

9 files changed

+216
-129
lines changed

9 files changed

+216
-129
lines changed

rpcs3/Emu/RSX/Common/TextureUtils.cpp

Lines changed: 53 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,80 +1096,65 @@ namespace rsx
10961096
fmt::throw_exception("Wrong format 0x%x", format);
10971097
}
10981098

1099-
if (word_size)
1099+
if (!word_size)
11001100
{
1101-
if (word_size == 1)
1101+
return result;
1102+
}
1103+
1104+
result.element_size = word_size;
1105+
result.block_length = words_per_block;
1106+
1107+
bool require_cpu_swizzle = !caps.supports_hw_deswizzle && is_swizzled;
1108+
bool require_cpu_byteswap = word_size > 1 && !caps.supports_byteswap;
1109+
1110+
if (is_swizzled && caps.supports_hw_deswizzle)
1111+
{
1112+
result.require_deswizzle = true;
1113+
}
1114+
1115+
if (!require_cpu_byteswap && !require_cpu_swizzle)
1116+
{
1117+
result.require_swap = (word_size > 1);
1118+
1119+
if (caps.supports_zero_copy)
11021120
{
1103-
if (is_swizzled)
1104-
{
1105-
copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u8>(), src_layout.data.as_span<const u8>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
1106-
}
1107-
else if (caps.supports_zero_copy)
1108-
{
1109-
result.require_upload = true;
1110-
result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
1111-
}
1112-
else
1113-
{
1114-
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u8>(), src_layout.data.as_span<const u8>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
1115-
}
1121+
result.require_upload = true;
1122+
result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), word_size * words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
11161123
}
1117-
else
1124+
else if (word_size == 1)
11181125
{
1119-
result.element_size = word_size;
1120-
result.block_length = words_per_block;
1121-
1122-
bool require_cpu_swizzle = !caps.supports_hw_deswizzle && is_swizzled;
1123-
bool require_cpu_byteswap = !caps.supports_byteswap;
1126+
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u8>(), src_layout.data.as_span<const u8>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
1127+
}
1128+
else if (word_size == 2)
1129+
{
1130+
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u16>(), src_layout.data.as_span<const u16>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
1131+
}
1132+
else if (word_size == 4)
1133+
{
1134+
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const u32>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
1135+
}
11241136

1125-
if (is_swizzled && caps.supports_hw_deswizzle)
1126-
{
1127-
if (word_size == 4 || (((word_size * words_per_block) & 3) == 0))
1128-
{
1129-
result.require_deswizzle = true;
1130-
}
1131-
else
1132-
{
1133-
require_cpu_swizzle = true;
1134-
}
1135-
}
1137+
return result;
1138+
}
11361139

1137-
if (!require_cpu_byteswap && !require_cpu_swizzle)
1138-
{
1139-
result.require_swap = true;
1140-
1141-
if (caps.supports_zero_copy)
1142-
{
1143-
result.require_upload = true;
1144-
result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), word_size * words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
1145-
}
1146-
else if (word_size == 2)
1147-
{
1148-
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u16>(), src_layout.data.as_span<const u16>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
1149-
}
1150-
else if (word_size == 4)
1151-
{
1152-
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const u32>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
1153-
}
1154-
}
1155-
else
1156-
{
1157-
if (word_size == 2)
1158-
{
1159-
if (is_swizzled)
1160-
copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u16>(), src_layout.data.as_span<const be_t<u16>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
1161-
else
1162-
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u16>(), src_layout.data.as_span<const be_t<u16>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
1163-
}
1164-
else if (word_size == 4)
1165-
{
1166-
if (is_swizzled)
1167-
copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const be_t<u32>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
1168-
else
1169-
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const be_t<u32>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
1170-
}
1171-
}
1172-
}
1140+
if (word_size == 1)
1141+
{
1142+
ensure(is_swizzled);
1143+
copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u8>(), src_layout.data.as_span<const u8>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
1144+
}
1145+
else if (word_size == 2)
1146+
{
1147+
if (is_swizzled)
1148+
copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u16>(), src_layout.data.as_span<const be_t<u16>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
1149+
else
1150+
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u16>(), src_layout.data.as_span<const be_t<u16>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
1151+
}
1152+
else if (word_size == 4)
1153+
{
1154+
if (is_swizzled)
1155+
copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const be_t<u32>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
1156+
else
1157+
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const be_t<u32>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
11731158
}
11741159

11751160
return result;

rpcs3/Emu/RSX/Common/surface_store.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1219,7 +1219,7 @@ namespace rsx
12191219

12201220
if (result.size() > 1)
12211221
{
1222-
std::sort(result.begin(), result.end(), [](const auto &a, const auto &b)
1222+
result.sort([](const auto &a, const auto &b)
12231223
{
12241224
if (a.surface->last_use_tag == b.surface->last_use_tag)
12251225
{

rpcs3/Emu/RSX/Common/texture_cache_utils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1496,7 +1496,7 @@ namespace rsx
14961496

14971497
void on_miss()
14981498
{
1499-
rsx_log.warning("Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
1499+
// rsx_log.trace("Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
15001500
m_tex_cache->on_miss(*derived());
15011501
}
15021502

rpcs3/Emu/RSX/GL/GLCompute.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,6 @@ namespace gl
263263

264264
cs_deswizzle_3d()
265265
{
266-
ensure((sizeof(_BlockType) & 3) == 0); // "Unsupported block type"
267-
268266
initialize();
269267

270268
m_src =
@@ -294,8 +292,10 @@ namespace gl
294292
{ "%loc", std::to_string(GL_COMPUTE_BUFFER_SLOT(0))},
295293
{ "%push_block", fmt::format("binding=%d, std140", GL_COMPUTE_BUFFER_SLOT(2)) },
296294
{ "%ws", std::to_string(optimal_group_size) },
297-
{ "%_wordcount", std::to_string(sizeof(_BlockType) / 4) },
298-
{ "%f", transform }
295+
{ "%_wordcount", std::to_string(std::max<u32>(sizeof(_BlockType) / 4u, 1u)) },
296+
{ "%f", transform },
297+
{ "%_8bit", sizeof(_BlockType) == 1 ? "1" : "0" },
298+
{ "%_16bit", sizeof(_BlockType) == 2 ? "1" : "0" },
299299
};
300300

301301
m_src = fmt::replace_all(m_src, syntax_replace);
@@ -339,7 +339,8 @@ namespace gl
339339
set_parameters(cmd);
340340

341341
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
342-
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
342+
const u32 texels_per_dword = std::max<u32>(4u / sizeof(_BlockType), 1u); // For block sizes less than 4 bytes wide
343+
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation) / texels_per_dword;
343344
compute_task::run(cmd, linear_invocations);
344345
}
345346
};

rpcs3/Emu/RSX/GL/GLTexture.cpp

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,16 @@ namespace gl
3636
{
3737
switch (block_size)
3838
{
39+
case 1:
40+
gl::get_compute_task<gl::cs_deswizzle_3d<u8, WordType, SwapBytes>>()->run(
41+
cmd, dst, dst_offset, src, src_offset,
42+
data_length, width, height, depth, 1);
43+
break;
44+
case 2:
45+
gl::get_compute_task<gl::cs_deswizzle_3d<u16, WordType, SwapBytes>>()->run(
46+
cmd, dst, dst_offset, src, src_offset,
47+
data_length, width, height, depth, 1);
48+
break;
3949
case 4:
4050
gl::get_compute_task<gl::cs_deswizzle_3d<u32, WordType, SwapBytes>>()->run(
4151
cmd, dst, dst_offset, src, src_offset,
@@ -707,7 +717,7 @@ namespace gl
707717
}
708718

709719
rsx::io_buffer io_buf = dst_buffer;
710-
caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 4096);
720+
caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 1024);
711721
auto op = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps);
712722

713723
// Define upload region
@@ -748,39 +758,54 @@ namespace gl
748758
g_upload_transfer_buffer.copy_to(&g_deswizzle_scratch_buffer.get(), upload_scratch_mem.second, deswizzle_data_offset, static_cast<u32>(image_linear_size));
749759

750760
// 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem
751-
ensure(op.element_size == 2 || op.element_size == 4);
752761
const auto block_size = op.element_size * op.block_length;
753762

754763
if (op.require_swap)
755764
{
756765
mem_layout.swap_bytes = false;
757766

758-
if (op.element_size == 4) [[ likely ]]
767+
switch (op.element_size)
759768
{
760-
do_deswizzle_transformation<u32, true>(cmd, block_size,
769+
case 1:
770+
do_deswizzle_transformation<u8, true>(cmd, block_size,
761771
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
762772
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
763-
}
764-
else
765-
{
773+
break;
774+
case 2:
766775
do_deswizzle_transformation<u16, true>(cmd, block_size,
767776
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
768777
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
778+
break;
779+
case 4:
780+
do_deswizzle_transformation<u32, true>(cmd, block_size,
781+
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
782+
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
783+
break;
784+
default:
785+
fmt::throw_exception("Unimplemented element size deswizzle");
769786
}
770787
}
771788
else
772789
{
773-
if (op.element_size == 4) [[ likely ]]
790+
switch (op.element_size)
774791
{
775-
do_deswizzle_transformation<u32, false>(cmd, block_size,
792+
case 1:
793+
do_deswizzle_transformation<u8, false>(cmd, block_size,
776794
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
777795
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
778-
}
779-
else
780-
{
796+
break;
797+
case 2:
781798
do_deswizzle_transformation<u16, false>(cmd, block_size,
782799
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
783800
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
801+
break;
802+
case 4:
803+
do_deswizzle_transformation<u32, false>(cmd, block_size,
804+
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
805+
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
806+
break;
807+
default:
808+
fmt::throw_exception("Unimplemented element size deswizzle");
784809
}
785810
}
786811

rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl

Lines changed: 66 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ R"(
33

44
#define SSBO_LOCATION(x) (x + %loc)
55

6+
#define USE_8BIT_ADDRESSING %_8bit
7+
#define USE_16BIT_ADDRESSING %_16bit
8+
69
layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in;
710

811
layout(%set, binding=SSBO_LOCATION(0), std430) buffer ssbo0{ uint data_in[]; };
@@ -98,12 +101,57 @@ uint get_z_index(const in uint x_, const in uint y_, const in uint z_)
98101
return offset;
99102
}
100103

104+
#if USE_16BIT_ADDRESSING
105+
106+
void write16(inout uint accumulator, const in uint subword, const in uint src_id, const in uint dst_id)
107+
{
108+
const uint masks[] = { 0x0000FFFF, 0xFFFF0000 };
109+
accumulator |= data_in[src_id / 2] & masks[subword];
110+
111+
if (subword == 1)
112+
{
113+
data_out[dst_id / 2] = %f(accumulator);
114+
}
115+
}
116+
117+
#elif USE_8BIT_ADDRESSING
118+
119+
void write8(inout uint accumulator, const in uint subword, const in uint src_id, const in uint dst_id)
120+
{
121+
const uint masks[] = { 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 };
122+
accumulator |= data_in[src_id / 4] & masks[subword];
123+
124+
if (subword == 3)
125+
{
126+
data_out[dst_id / 4] = accumulator;
127+
}
128+
}
129+
130+
#else
131+
132+
void write32(const in uint word_count, in uint src_id, in uint dst_id)
133+
{
134+
for (uint i = 0; i < word_count; ++i)
135+
{
136+
uint value = data_in[src_id++];
137+
data_out[dst_id++] = %f(value);
138+
}
139+
}
140+
141+
#endif
142+
101143
void main()
102144
{
103145
uint invocations_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
104146
uint texel_id = (gl_GlobalInvocationID.y * invocations_x) + gl_GlobalInvocationID.x;
105147
uint word_count = %_wordcount;
106148

149+
#if USE_8BIT_ADDRESSING
150+
texel_id *= 4; // Each invocation consumes 4 texels
151+
#elif USE_16BIT_ADDRESSING
152+
texel_id *= 2; // Each invocation consumes 2 texels
153+
#endif
154+
107155
if (!init_invocation_properties(texel_id))
108156
return;
109157

@@ -116,14 +164,25 @@ void main()
116164
uint y = (slice_offset / row_length);
117165
uint x = (slice_offset % row_length);
118166

119-
uint src_texel_id = get_z_index(x, y, z);
120-
uint dst_id = (texel_id * word_count);
121-
uint src_id = (src_texel_id + invocation.data_offset) * word_count;
167+
#if USE_8BIT_ADDRESSING
168+
for (uint subword = 0, accumulator = 0; subword < 4; ++subword, ++x) {
169+
#elif USE_16BIT_ADDRESSING
170+
for (uint subword = 0, accumulator = 0; subword < 2; ++subword, ++x) {
171+
#endif
122172

123-
for (uint i = 0; i < word_count; ++i)
124-
{
125-
uint value = data_in[src_id++];
126-
data_out[dst_id++] = %f(value);
173+
uint src_texel_id = get_z_index(x, y, z);
174+
uint dst_id = (texel_id * word_count);
175+
uint src_id = (src_texel_id + invocation.data_offset) * word_count;
176+
177+
#if USE_8BIT_ADDRESSING
178+
write8(accumulator, subword, src_id, dst_id);
127179
}
180+
#elif USE_16BIT_ADDRESSING
181+
write16(accumulator, subword, src_id, dst_id);
182+
}
183+
#else
184+
write32(word_count, src_id, dst_id);
185+
#endif
186+
128187
}
129188
)"

0 commit comments

Comments
 (0)