diff --git a/backends/vulkan/runtime/graph/ops/DispatchNode.h b/backends/vulkan/runtime/graph/ops/DispatchNode.h
index 958637218e2..7d04f7714e9 100644
--- a/backends/vulkan/runtime/graph/ops/DispatchNode.h
+++ b/backends/vulkan/runtime/graph/ops/DispatchNode.h
@@ -46,12 +46,15 @@ class PushConstantDataInfo {
     payload_.attr = attr;
   }
 
-  explicit PushConstantDataInfo(const void* data, uint32_t dataLen)
+  explicit PushConstantDataInfo(
+      const void* data,
+      uint32_t dataLen,
+      uint32_t pushConstantLen = 0)
       : tensorUniformData(nullptr) {
     VK_CHECK_COND(
         dataLen <= 16, "Single push constant data size must be <= 16 bytes");
-    payload_.dataSize = dataLen;
-    memcpy(payload_.data, data, payload_.dataSize);
+    payload_.dataSize = pushConstantLen ? pushConstantLen : dataLen;
+    memcpy(payload_.data, data, dataLen);
   }
 
   /*
diff --git a/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl b/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl
index be0e1bfa20a..62aa2f810dc 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl
@@ -19,11 +19,6 @@ layout(std430) buffer;
 ${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
 ${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
 ${layout_declare_tensor(B, "r", "t_other", DTYPE, STORAGE)}
-${layout_declare_ubo(B, "ivec4", "out_sizes")}
-${layout_declare_ubo(B, "ivec4", "in_sizes")}
-${layout_declare_ubo(B, "ivec4", "other_sizes")}
-${layout_declare_ubo(B, "ivec2", "broadcast_params")}
-${layout_declare_ubo(B, "float", "alpha")}
 
 #include "broadcasting_utils.h"
 #include "indexing_utils.h"
@@ -40,6 +35,14 @@ const lowp ivec4 in_axis_map = unhash_axis_map(in_layout);
 ${layout_declare_spec_const(C, "int", "other_layout", "DEFAULT_LAYOUT")}
 const lowp ivec4 other_axis_map = unhash_axis_map(other_layout);
 
+layout(push_constant) uniform restrict Block {
+  ivec4 out_sizes;
+  ivec4 in_sizes;
+  ivec4 other_sizes;
+  ivec2 broadcast_params;
+  float alpha;
+};
+
 void main() {
   const ivec3 lpos = ivec3(gl_GlobalInvocationID);
   const ivec4 tidx = lpos_to_tidx(lpos, out_sizes, out_axis_map.w, packed_dim);
diff --git a/backends/vulkan/runtime/graph/ops/glsl/permute.glsl b/backends/vulkan/runtime/graph/ops/glsl/permute.glsl
index 5378099d03f..59d6aecdc15 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/permute.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/permute.glsl
@@ -19,15 +19,9 @@ layout(std430) buffer;
 layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
 layout(set = 0, binding = 1) uniform PRECISION ${SAMPLER_T[NDIM][DTYPE]} image_in;
 
-layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits {
-  ivec3 out_limits;
-};
-
-layout(set = 0, binding = 3) uniform PRECISION restrict Sizes {
+layout(push_constant) uniform PRECISION restrict Block {
+  ivec4 out_limits;
   ivec4 sizes;
-};
-
-layout(set = 0, binding = 4) uniform PRECISION restrict Block {
   // output dims
   ivec4 out_ndims;
   // x = output channels aligned to 4, y = input channels aligned to 4
@@ -41,7 +35,7 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 void main() {
   const u16vec3 pos = u16vec3(gl_GlobalInvocationID);
 
-  if (any(greaterThanEqual(pos, out_limits))) {
+  if (any(greaterThanEqual(pos, out_limits.xyz))) {
     return;
   }
 
diff --git a/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp b/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp
index 33f73cd6dad..7e88982aaee 100644
--- a/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp
@@ -67,7 +67,10 @@ void add_binary_op_node(
     alpha_val = graph.extract_scalar<float>(alpha);
   }
 
-  const utils::ivec2 broadcast_params = create_broadcast_params(*t_in1, *t_in2);
+  const struct BinaryOpsParams {
+    const utils::ivec2 broadcast_params;
+    const float alpha_val;
+  } binary_ops_params{create_broadcast_params(*t_in1, *t_in2), alpha_val};
 
   std::string kernel_name("binary_");
   kernel_name.reserve(kShaderNameReserve);
@@ -83,16 +86,16 @@ void add_binary_op_node(
       {{out, vkapi::MemoryAccessType::WRITE},
        {{arg1, arg2}, vkapi::MemoryAccessType::READ}},
       // Shader params buffers
-      {t_out->sizes_ubo(),
-       t_in1->sizes_ubo(),
-       t_in2->sizes_ubo(),
-       graph.create_params_buffer(broadcast_params),
-       graph.create_params_buffer(alpha_val)},
+      {},
       // Specialization Constants
       {t_out->hashed_layout(), t_in1->hashed_layout(), t_in2->hashed_layout()},
       // Resizing Logic
       resize_binary_op_node,
-      {}));
+      {},
+      {{graph.sizes_pc_of(out),
+        graph.sizes_pc_of(arg1),
+        graph.sizes_pc_of(arg2),
+        PushConstantDataInfo(&binary_ops_params, sizeof(binary_ops_params))}}));
 }
 
 #define DEFINE_BINARY_OP_WITH_ALPHA_FN(op_name)                          \
diff --git a/backends/vulkan/runtime/graph/ops/impl/Permute.cpp b/backends/vulkan/runtime/graph/ops/impl/Permute.cpp
index c107f288f34..a56925751e7 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Permute.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Permute.cpp
@@ -75,13 +75,7 @@ void add_permute_node(
   int32_t out_c_aligned = utils::align_up_4(out_channels);
   int32_t in_c_aligned = utils::align_up_4(in_channels);
 
-  const struct Block final {
-    ivec4 out_ndims;
-    ivec2 ch_info;
-  } params{
-      out_dims,
-      {out_c_aligned, in_c_aligned},
-  };
+  const ivec2 ch_info = {out_c_aligned, in_c_aligned};
 
   graph.execute_nodes().emplace_back(new DispatchNode(
       graph,
@@ -90,14 +84,16 @@ void add_permute_node(
       graph.create_local_wg_size(out),
       {{out, vkapi::MemoryAccessType::WRITE},
        {in, vkapi::MemoryAccessType::READ}},
-      {t_out->logical_limits_ubo(),
-       t_out->sizes_ubo(),
-       graph.create_params_buffer(params)},
+      {},
       // Specialization Constants
       {},
       // Resizing Logic
       nullptr,
-      {}));
+      {},
+      {{graph.logical_limits_pc_of(out),
+        graph.sizes_pc_of(out),
+        PushConstantDataInfo(&out_dims, sizeof(out_dims)),
+        PushConstantDataInfo(&ch_info, sizeof(ch_info))}}));
 }
 
 void add_permute_node(
diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp
index 77a0458d901..604ad26588d 100644
--- a/backends/vulkan/test/vulkan_compute_api_test.cpp
+++ b/backends/vulkan/test/vulkan_compute_api_test.cpp
@@ -1601,9 +1601,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) {
   auto addFn = VK_GET_OP_FN("aten.add.Tensor");
   addFn(graph, {a.value, b.value, kDummyValueRef, c});
 
-  // +2: alpha UBO, broadcast UBO for arithmetic shader
-  // +1: t.sizes_ubo() for arithmetic shader output c
-  expected_vma_allocation_count += 3;
+  // no new allocations if binary op uses push constants
   EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count);
 
   IOValueRef d = graph.add_input_tensor(
@@ -1624,17 +1622,16 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) {
   auto mulFn = VK_GET_OP_FN("aten.mul.Tensor");
   mulFn(graph, {c, d.value, e});
 
-  // +2: alpha UBO, broadcast UBO for arithmetic shader
-  // +1: t.sizes_ubo() for arithmetic shader output e
-  expected_vma_allocation_count += 3;
+  // no new allocations if binary op uses push constants
   EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count);
 
   IOValueRef out = {};
   out.value = e;
   out.staging = graph.set_output_tensor(out.value);
 
+  // +1: staging buffer input tensor
   // +1: staging buffer for the output tensor
-  expected_vma_allocation_count += 1;
+  expected_vma_allocation_count += 2;
   EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count);
 
   graph.prepare();