diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index 59fd561a2c5..1214c89e00a 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -612,6 +612,11 @@ void ComputeGraph::prepare() { if (config_.enable_querypool) { context_->initialize_querypool(); } + + for (SharedObject& shared_object : shared_objects_) { + shared_object.allocate(this); + shared_object.bind_users(this); + } } void ComputeGraph::encode_prepack() { @@ -636,11 +641,6 @@ void ComputeGraph::encode_execute() { context_->cmd_reset_querypool(); - for (SharedObject& shared_object : shared_objects_) { - shared_object.allocate(this); - shared_object.bind_users(this); - } - for (std::unique_ptr& node : execute_nodes_) { node->encode(this); } diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp index 3b6195a5c26..c4acb41b7b0 100644 --- a/backends/vulkan/test/utils/test_utils.cpp +++ b/backends/vulkan/test/utils/test_utils.cpp @@ -537,6 +537,59 @@ void execute_graph_and_check_output( } } +vkcompute::ComputeGraph build_mm_graph( + int B, + int M, + int K, + int N, + vkcompute::vkapi::ScalarType dtype, + vkcompute::utils::StorageType in_out_stype, + vkcompute::utils::GPUMemoryLayout memory_layout, + const bool prepack_mat2, + const float mat2_val) { + using namespace vkcompute; + GraphConfig config; + ComputeGraph graph(config); + + std::vector mat1_size = {M, K}; + std::vector mat2_size = {K, N}; + std::vector out_size = {M, N}; + if (B > 1) { + mat1_size.resize(3); + mat1_size = {B, M, K}; + mat2_size.resize(3); + mat2_size = {B, K, N}; + out_size.resize(3); + out_size = {B, M, N}; + } + + IOValueRef mat1 = + graph.add_input_tensor(mat1_size, dtype, in_out_stype, memory_layout); + IOValueRef mat2{}; + + CREATE_RAND_WEIGHT_TENSOR(mat2_w, mat2_size, dtype); + if (mat2_val != 0.0f) { + std::fill(data_mat2_w.begin(), data_mat2_w.end(), mat2_val); + } + + if (prepack_mat2) { + mat2.value = mat2_w; + } else { + mat2.value = + graph.add_tensor(mat2_size, dtype, in_out_stype, memory_layout); + mat2.staging = graph.set_input_tensor(mat2.value); + } + + IOValueRef out; + out.value = graph.add_tensor(out_size, dtype, in_out_stype, memory_layout); + + VK_GET_OP_FN("aten.mm.default")(graph, {mat1.value, mat2.value, out.value}); + + out.staging = graph.set_output_tensor(out.value); + + return graph; +} + bool check_close(float a, float b, float atol, float rtol) { float max = std::max(std::abs(a), std::abs(b)); float diff = std::abs(a - b); diff --git a/backends/vulkan/test/utils/test_utils.h b/backends/vulkan/test/utils/test_utils.h index f3ee2a717a5..71d6d0bc0de 100644 --- a/backends/vulkan/test/utils/test_utils.h +++ b/backends/vulkan/test/utils/test_utils.h @@ -8,6 +8,8 @@ #pragma once +#include + #include #include @@ -16,6 +18,8 @@ #include #include +#include + #define CREATE_FLOAT_TEXTURE(sizes, allocate_memory) \ vkcompute::api::vTensor( \ vkcompute::api::context(), \ @@ -135,6 +139,22 @@ void record_matmul_texture3d( // Input & Output Utilities // +inline std::vector create_random_float_vector( + const size_t numel, + const float min = 0.0f, + const float max = 1.0f) { + std::vector result(numel); + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dis(min, max); + + for (size_t i = 0; i < numel; ++i) { + result[i] = dis(gen); + } + + return result; +} + inline void fill_staging( vkcompute::api::StagingBuffer& staging, float val, @@ -232,6 +252,22 @@ void execute_graph_and_check_output( std::vector input_vals, std::vector expected_outputs); +#define CREATE_RAND_WEIGHT_TENSOR(name, sizes, dtype) \ + std::vector data_##name = \ + create_random_float_buffer(utils::multiply_integers(sizes)); \ + ValueRef name = graph.add_tensorref(sizes, dtype, data_##name.data()); + +vkcompute::ComputeGraph build_mm_graph( + int B, + int M, + int K, + int N, + vkcompute::vkapi::ScalarType dtype, + vkcompute::utils::StorageType in_out_stype, + vkcompute::utils::GPUMemoryLayout memory_layout, + const bool prepack_mat2 = false, + const float mat2_val = 0.0f); + // // Debugging Utilities // diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index 143e6704889..cf42a846db5 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -2753,43 +2753,8 @@ void test_mm( utils::StorageType storage_type, utils::GPUMemoryLayout memory_layout, bool prepack = true) { - GraphConfig config; - config.set_storage_type_override(storage_type); - ComputeGraph graph(config); - - std::vector mat1_size = {M, K}; - std::vector mat2_size = {K, N}; - std::vector out_size = {M, N}; - if (B > 1) { - mat1_size.resize(3); - mat1_size = {B, M, K}; - mat2_size.resize(3); - mat2_size = {B, K, N}; - out_size.resize(3); - out_size = {B, M, N}; - } - - IOValueRef mat2{}; - - CREATE_WEIGHT_TENSOR(mat2_w, mat2_size, dtype, 2.0f); - - // Build graph - - IOValueRef mat1 = graph.add_input_tensor(mat1_size, dtype, memory_layout); - - if (prepack) { - mat2.value = mat2_w; - } else { - mat2.value = graph.add_tensor(mat2_size, dtype, memory_layout); - mat2.staging = graph.set_input_tensor(mat2.value); - } - - IOValueRef out; - out.value = graph.add_tensor(out_size, dtype, memory_layout); - - VK_GET_OP_FN("aten.mm.default")(graph, {mat1.value, mat2.value, out.value}); - - out.staging = graph.set_output_tensor(out.value); + ComputeGraph graph = build_mm_graph( + B, M, K, N, dtype, storage_type, memory_layout, prepack, 2.0f); graph.prepare(); graph.encode_prepack(); @@ -2855,6 +2820,60 @@ TEST(VulkanComputeGraphOpsTest, mm_smoke_test) { #undef RUN_TESTS } +void test_mm_with_resize_reencode( + int B, + int M, + int K, + int N, + vkapi::ScalarType dtype, + utils::StorageType storage_type, + utils::GPUMemoryLayout memory_layout) { + ASSERT_TRUE(M > 1); + + ComputeGraph graph = build_mm_graph( + B, M, K, N, dtype, storage_type, memory_layout, false, 2.0f); + + graph.prepare(); + graph.encode_prepack(); + graph.prepack(); + graph.encode_execute(); + + for (int i = 1; i < 4; i++) { + float val_mat1 = i; + float val_mat2 = i + 1; + float val_out = K * (val_mat1 * val_mat2); + execute_graph_and_check_output(graph, {val_mat1, val_mat2}, {val_out}); + } + + // Switch to GEMV mode + int new_K = K / 2; + std::vector new_mat1_size = {1, new_K}; + std::vector new_mat2_size = {new_K, N}; + graph.resize_input(0, new_mat1_size); + graph.resize_input(1, new_mat2_size); + graph.propagate_resize(); + + graph.encode_execute(); + + for (int i = 1; i < 4; i++) { + float val_mat1 = i; + float val_mat2 = i + 1; + float val_out = new_K * (val_mat1 * val_mat2); + execute_graph_and_check_output(graph, {val_mat1, val_mat2}, {val_out}); + } +} + +TEST(VulkanComputeGraphOpsTest, test_graph_resize_reencode) { + test_mm_with_resize_reencode( + /*B = */ 1, + /*M = */ 31, + /*K = */ 127, + /*N = */ 23, + vkapi::kFloat, + utils::kTexture3D, + utils::kWidthPacked); +} + void test_max_pool2d( const std::vector& in_size, const int64_t base_val,