Skip to content

Commit 62636ba

Browse files
committed
Initial harness for recording BCn artifacts, also synchronize all VkQueue dispatched functions (make OneByOne mode robust)
1 parent 8f7269f commit 62636ba

File tree

9 files changed

+120
-116
lines changed

9 files changed

+120
-116
lines changed

src/vulkan/wrapper/artifacts.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#include "artifacts.h"
2+
#include "wrapper_objects.h"
3+
#include "wrapper_private.h"
4+
5+
#include <string>
6+
7+
static FILE* open_log_file(const char* postfix, int id) {
8+
static char dir[256];
9+
static bool initialized = false;
10+
if (!initialized) {
11+
initialized = true;
12+
char time_str[20];
13+
get_current_time_string(time_str, sizeof(time_str));
14+
sprintf(dir, "/sdcard/Documents/Wrapper/artifacts_%s.%s.%d", time_str, getprogname(), getpid());
15+
if (mkdir(dir, 0777) == 0) {
16+
WLOGE("Failed to create the artifacts directory %s", dir);
17+
} else {
18+
WLOGD("Logging artifacts to %s", dir);
19+
}
20+
}
21+
std::string path = std::string(dir) + "/" + std::to_string(id) + "_" + postfix;
22+
return fopen(path.c_str(), "w");
23+
}
24+
25+
extern "C"
26+
void RecordBCnArtifacts(struct wrapper_device* device, const VkBufferImageCopy* region, VkBuffer srcBuffer, VkBuffer stagingBuffer, int decode_id) {
27+
// auto fd = open_log_file("region.txt", decode_id);
28+
struct wrapper_buffer* wbuf = get_wrapper_buffer(device, srcBuffer);
29+
if (!wbuf) {
30+
WLOGE("srcBuffer not tracked, skipping (decode_id=%d)", decode_id);
31+
}
32+
// TODO: Implement this
33+
}

src/vulkan/wrapper/artifacts.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#pragma once
2+
3+
#ifdef __cplusplus
4+
extern "C" {
5+
#endif
6+
7+
#include <vulkan/vulkan_core.h>
8+
9+
void RecordBCnArtifacts(struct wrapper_device* device, const VkBufferImageCopy* region, VkBuffer srcBuffer, VkBuffer stagingBuffer, int decode_id);
10+
11+
#ifdef __cplusplus
12+
}
13+
#endif

src/vulkan/wrapper/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ wrapper_files = files(
122122
'wrapper_objects.c',
123123
'spirv_edit.cpp',
124124
'graphics_env_hooks.cpp',
125+
'artifacts.cpp',
125126
)
126127

127128
wrapper_deps = [

src/vulkan/wrapper/vk_entrypoints.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,9 @@ def _generate_trampoline(command, dispatch_table="device->dispatch_table"):
376376
handle_unwrap_logic[idx] = f" WLOGA(\"dispatch->{command.name}({', '.join(types)}) (id=%d)\", {', '.join([p.name for p in params])}, cmd_id);"
377377
handle_wrap_logic.append(f" WLOGA(\"dispatch->{command.name} {'returned %d' if command.return_type != 'void' else 'finished'} (id=%d)\"{', result' if command.return_type != 'void' else ''}, cmd_id);")
378378

379+
if params[0].type == 'VkQueue':
380+
handle_unwrap_logic.append(" simple_mtx_lock(&base->resource_mutex);")
381+
handle_wrap_logic = [" simple_mtx_unlock(&base->resource_mutex);"] + handle_wrap_logic
379382
return TRAMPOLINE_TEMPLATE.substitute(
380383
return_type=command.return_type,
381384
name=command.name,

src/vulkan/wrapper/wrapper_debug.c

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -320,33 +320,7 @@ bool use_image_view_mode() {
320320
}
321321

322322
bool use_compute_shader_mode() {
323-
static bool initialized = false;
324-
if (initialized) {
325-
return g_use_compute_shader_mode;
326-
}
327-
initialized = true;
328-
329-
bool use_image_view = use_image_view_mode();
330-
331-
char* env = getenv("USE_COMPUTE_SHADER");
332-
if (env) {
333-
if (strcmp(env, "1") == 0) {
334-
WLOG("Enabling experimental compute shader mode");
335-
g_use_compute_shader_mode = true;
336-
} else if (strcmp(env, "0") == 0) {
337-
WLOG("Disabling experimental compute shader mode");
338-
g_use_compute_shader_mode = false;
339-
use_image_view = false;
340-
g_use_image_view = false;
341-
}
342-
}
343-
344-
if (use_image_view) {
345-
g_use_compute_shader_mode = true;
346-
return true;
347-
}
348-
349-
return g_use_compute_shader_mode;
323+
return true;
350324
}
351325

352326

src/vulkan/wrapper/wrapper_device.c

Lines changed: 47 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "vk_unwrappers.h"
1919
#include "vk_printers.h"
2020
#include "spirv_edit.h"
21+
#include "artifacts.h"
2122

2223
#include "bcdec.h"
2324

@@ -335,7 +336,9 @@ WRAPPER_CreateDevice(VkPhysicalDevice physicalDevice,
335336
device->dispatch_handle);
336337

337338
// Initialize the BCn interceptor states
338-
bool use_image_view = use_image_view_mode();
339+
bool record_artifacts = CHECK_FLAG("RECORD_ARTIFACTS");
340+
bool use_image_view = use_image_view_mode() && !record_artifacts;
341+
339342
result = InterceptorState_Init(&device->s3tc,
340343
wrapper_device_to_handle(device),
341344
use_image_view ? sizeof(s3tc_iv_spv) : sizeof(s3tc_spv),
@@ -809,18 +812,18 @@ WRAPPER_BindBufferMemory2(
809812
uint32_t bindInfoCount,
810813
const VkBindBufferMemoryInfo* pBindInfos)
811814
{
812-
VK_FROM_HANDLE(wrapper_device, _device, device);
815+
VK_FROM_HANDLE(wrapper_device, wdev, device);
813816

814817
if (bindInfoCount == 0 || pBindInfos == NULL) {
815818
WLOGE("wrapper_BindBufferMemory2 called with no bind infos");
816-
return vk_error(&_device->vk, VK_ERROR_INVALID_EXTERNAL_HANDLE);
819+
return vk_error(&wdev->vk, VK_ERROR_INVALID_EXTERNAL_HANDLE);
817820
}
818821

819822
// Track all of the bindInfos
820823
for (uint32_t i = 0; i < bindInfoCount; i++) {
821-
wrapper_buffer *_buffer = get_wrapper_buffer(_device, pBindInfos[i].buffer);
824+
wrapper_buffer *_buffer = get_wrapper_buffer(wdev, pBindInfos[i].buffer);
822825
if (!_buffer) {
823-
WLOGE("wrapper_BindBufferMemory2: buffer %p not tracked", pBindInfos[i].buffer);
826+
WLOG("wrapper_BindBufferMemory2: buffer %p not tracked", pBindInfos[i].buffer);
824827
// return vk_error(&_device->vk, VK_ERROR_INVALID_EXTERNAL_HANDLE);
825828
// TODO(leegao): figure out what's going wrong here, but there are reports of this
826829
continue;
@@ -885,7 +888,6 @@ static VkResult CreateConstantsUniformBuffer(
885888
static VkResult InterceptorState_Init(InterceptorState* state, VkDevice device, size_t spv_size, const uint32_t* spv_code, bool use_image_view, int bc_mode) {
886889
VkResult result;
887890
VK_FROM_HANDLE(wrapper_device, _device, device);
888-
// 1. Create Descriptor Set Layout
889891
VkDescriptorSetLayoutBinding setLayoutBinding[3] = {
890892
{
891893
.binding = 0,
@@ -1107,7 +1109,9 @@ static VkResult SubmitOneTimeCommands(
11071109
void (*recordCommands)(struct wrapper_command_buffer*, void*),
11081110
void* pUserData
11091111
) {
1110-
WLOG("Submitting one-time commands...");
1112+
_Atomic static int counter = 0;
1113+
int id = counter++;
1114+
WLOGD("Submitting one-time commands for id=%d", id);
11111115
VkResult result;
11121116
VkDevice device = (VkDevice) _device;
11131117
VkCommandBufferAllocateInfo allocInfo = { 0 };
@@ -1152,81 +1156,24 @@ static VkResult SubmitOneTimeCommands(
11521156
return result;
11531157
}
11541158

1155-
WLOG("Submitting command buffer to queue %p", queue);
1159+
WLOGD("Submitting command buffer to queue %p for id=%d", queue, id);
11561160
result = WCHECK(QueueSubmit((VkQueue) queue, 1, &submitInfo, fence));
11571161
if (result != VK_SUCCESS) {
11581162
return result;
11591163
}
11601164

1161-
WLOG("Waiting for fence %p", fence);
1162-
WCHECK(WaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX));
1165+
WLOGD("Waiting for fence %p for id=%d", fence, id);
1166+
result = WCHECK(WaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX));
11631167
if (result != VK_SUCCESS) {
11641168
return result;
11651169
}
1166-
WLOG("Command buffer execution completed");
1170+
WLOGD("Command buffer execution completed for id=%d", id);
11671171

11681172
WCHECKV(DestroyFence(device, fence, NULL));
11691173
WCHECKV(FreeCommandBuffers(device, commandPool, 1, &commandBuffer));
11701174
return VK_SUCCESS;
11711175
}
11721176

1173-
struct CmdComputeShaderForDecompressionArgs {
1174-
struct wrapper_device* _device;
1175-
struct wrapper_image* wimg;
1176-
VkBuffer srcBuffer;
1177-
VkImage dstImage;
1178-
VkImageLayout dstImageLayout;
1179-
VkBuffer stagingBuffer;
1180-
const VkBufferImageCopy* region;
1181-
struct InterceptorState* state;
1182-
};
1183-
1184-
// void decode_bc6h_to_r16g16b16a16_sfloat(const void* compressedData, void* dstPixelBlock, int pitch, int isSigned) {
1185-
// // bcdec_bc6h_half decompresses to a 4x4 block of 3-component (RGB) half-floats.
1186-
// // We need a temporary buffer to store this intermediate result because the
1187-
// // output format (RGBA) has a different layout than the library's output (RGB).
1188-
// half_float temp_rgb_half_block[4][4][3];
1189-
1190-
// // The pitch for the temporary float buffer is the size of one row in bytes.
1191-
// // A row has 4 pixels, each with 3 half_float components.
1192-
// const int temp_pitch_bytes = 4 * 3 * sizeof(half_float);
1193-
1194-
// // Step 1: Decompress the BC6h block into the temporary half-float buffer.
1195-
// // This is the most direct and efficient path for this target format.
1196-
// bcdec_bc6h_half(compressedData, temp_rgb_half_block, temp_pitch_bytes, isSigned);
1197-
1198-
// // Step 2: Copy the RGB half-float data to the RGBA destination and add the Alpha channel.
1199-
// unsigned char* dst_row_bytes = (unsigned char*)dstPixelBlock;
1200-
1201-
// // The bit representation of 1.0f in IEEE 754 half-precision format is 0x3C00.
1202-
// // This is used for the alpha channel, as BC6H is an RGB-only format.
1203-
// const half_float alpha_one = 0x3C00;
1204-
1205-
// for (int y = 0; y < 4; ++y) {
1206-
// // Get a pointer to the start of the current pixel row in the destination.
1207-
// half_float* dst_pixel = (half_float*)dst_row_bytes;
1208-
1209-
// for (int x = 0; x < 4; ++x) {
1210-
// // Get the RGB half values from the temporary buffer.
1211-
// const half_float r_half = temp_rgb_half_block[y][x][0];
1212-
// const half_float g_half = temp_rgb_half_block[y][x][1];
1213-
// const half_float b_half = temp_rgb_half_block[y][x][2];
1214-
1215-
// // Write the RGBA values to the destination.
1216-
// dst_pixel[0] = r_half;
1217-
// dst_pixel[1] = g_half;
1218-
// dst_pixel[2] = b_half;
1219-
// dst_pixel[3] = alpha_one; // Set alpha to 1.0f
1220-
1221-
// // Move to the next pixel in the destination row (4 half_floats).
1222-
// dst_pixel += 4;
1223-
// }
1224-
// // Move to the next row in the destination buffer using the provided pitch.
1225-
// dst_row_bytes += pitch;
1226-
// }
1227-
// }
1228-
1229-
12301177
static void BCnDecompression(VkFormat format,
12311178
void* mappedSrcBase,
12321179
void* mappedDst,
@@ -1522,6 +1469,18 @@ static VkDeviceSize calculate_bc_copy_size(const VkBufferImageCopy* region, uint
15221469
return offset_to_last_row + last_row_size_in_bytes;
15231470
}
15241471

1472+
struct CmdComputeShaderForDecompressionArgs {
1473+
struct wrapper_device* _device;
1474+
struct wrapper_image* wimg;
1475+
VkBuffer srcBuffer;
1476+
VkImage dstImage;
1477+
VkImageLayout dstImageLayout;
1478+
VkBuffer stagingBuffer;
1479+
const VkBufferImageCopy* region;
1480+
struct InterceptorState* state;
1481+
bool use_image_view;
1482+
};
1483+
15251484
static void CmdComputeShaderForDecompression(
15261485
struct wrapper_command_buffer* _commandBuffer,
15271486
struct CmdComputeShaderForDecompressionArgs* pArgs)
@@ -1535,7 +1494,7 @@ static void CmdComputeShaderForDecompression(
15351494
VkImage dstImage = wimg->dispatch_handle;
15361495
struct InterceptorState* state = pArgs->state;
15371496
VkCommandBuffer commandBuffer = _commandBuffer->dispatch_handle;
1538-
bool use_image_view = use_image_view_mode();
1497+
bool use_image_view = pArgs->use_image_view;
15391498
VkResult result;
15401499

15411500
WLOG("CmdComputeShaderForDecompression: srcBuffer = %p, dstImage = %p", srcBuffer, dstImage);
@@ -1841,13 +1800,14 @@ WRAPPER_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
18411800
}
18421801

18431802
// --- Decompression Path ---
1844-
_Atomic static int count = 0;
1845-
count++;
1846-
WLOG("Emulating support for format=%d, count=%d", wimg->original_format, count);
1847-
1803+
_Atomic static int counter = 0;
1804+
int decode_id = counter++;
1805+
WLOG("Emulating support for format=%d, decode_id=%d", wimg->original_format, decode_id);
1806+
1807+
bool record_artifacts = CHECK_FLAG("RECORD_ARTIFACTS");
18481808
bool use_cpu_bcn = (get_host_decoding_bcn_masks() & (1 << (wimg->original_format - 131))) != 0;
18491809
bool use_compute_shader = use_compute_shader_mode() && !use_cpu_bcn;
1850-
bool use_image_view = use_image_view_mode() && !use_cpu_bcn;
1810+
bool use_image_view = use_image_view_mode() && !use_cpu_bcn && !record_artifacts;
18511811

18521812
// Check if the queues are the same
18531813
struct wrapper_command_pool *pool = get_wrapper_command_pool(_device, wcb->pool);
@@ -1907,6 +1867,7 @@ WRAPPER_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
19071867
.srcBuffer = srcBuffer,
19081868
.region = region,
19091869
.state = state,
1870+
.use_image_view = use_image_view,
19101871
};
19111872

19121873
if (use_image_view) {
@@ -1916,13 +1877,18 @@ WRAPPER_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
19161877
args.stagingBuffer = stagingBuffer;
19171878
}
19181879

1919-
if (CHECK_FLAG("WRAPPER_ONE_BY_ONE")) {
1920-
SubmitOneTimeCommands(
1880+
if (CHECK_FLAG("WRAPPER_ONE_BY_ONE") || record_artifacts) {
1881+
WLOGD("Submitting decode_id %d", decode_id);
1882+
result = SubmitOneTimeCommands(
19211883
_device,
19221884
wcb->pool,
19231885
_device->graphics_queue,
19241886
(void (*)(struct wrapper_command_buffer*, void*)) &CmdComputeShaderForDecompression,
19251887
&args);
1888+
if (result != VK_SUCCESS) {
1889+
WLOGE("GPU BCn decompression failed, expect visual glitches.");
1890+
return;
1891+
}
19261892
} else {
19271893
CmdComputeShaderForDecompression(wcb, &args);
19281894
}
@@ -1934,6 +1900,11 @@ WRAPPER_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
19341900
}
19351901
}
19361902

1903+
if (record_artifacts) {
1904+
// Invariant: srcBuffer contains the BCn blocks, stagingBuffer contains the output
1905+
RecordBCnArtifacts(_device, region, srcBuffer, stagingBuffer, decode_id);
1906+
}
1907+
19371908
if (!use_image_view) {
19381909
VkBufferMemoryBarrier bufferBarrier = {
19391910
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,

src/vulkan/wrapper/wrapper_device_memory.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ wrapper_device_memory_from_handle(struct wrapper_device *device,
295295
return mem;
296296
}
297297

298+
// TODO: track all memory associated with host visible data
298299
WRAPPER_AllocateMemory(VkDevice _device,
299300
const VkMemoryAllocateInfo* pAllocateInfo,
300301
const VkAllocationCallbacks* pAllocator,

src/vulkan/wrapper/wrapper_objects.h

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818

1919
#include "wrapper_trampolines.h"
2020

21+
#ifdef __cplusplus
22+
extern "C" {
23+
#endif
24+
2125
struct wrapper_instance {
2226
struct vk_instance vk;
2327

@@ -126,15 +130,15 @@ VK_DEFINE_HANDLE_CASTS(wrapper_command_buffer, vk.base, VkCommandBuffer,
126130
VK_OBJECT_TYPE_COMMAND_BUFFER)
127131

128132
struct wrapper_device_memory {
129-
struct AHardwareBuffer *ahardware_buffer;
130-
struct wrapper_device *device;
131-
struct list_head link;
132-
int dmabuf_fd;
133-
void *map_address;
134-
size_t map_size;
135-
size_t alloc_size;
136-
VkDeviceMemory dispatch_handle;
137-
const VkAllocationCallbacks *alloc;
133+
struct AHardwareBuffer *ahardware_buffer;
134+
struct wrapper_device *device;
135+
struct list_head link;
136+
int dmabuf_fd;
137+
void *map_address;
138+
size_t map_size;
139+
size_t alloc_size;
140+
VkDeviceMemory dispatch_handle;
141+
const VkAllocationCallbacks *alloc;
138142
};
139143

140144
VkResult enumerate_physical_device(struct vk_instance *_instance);
@@ -232,3 +236,7 @@ struct wrapper_command_pool {
232236
};
233237

234238
MAKE_PROTOTYPES(wrapper_command_pool, VkCommandPool);
239+
240+
#ifdef __cplusplus
241+
}
242+
#endif

0 commit comments

Comments
 (0)