Add proto [de]serialization for SelectKThunk.

khasanovaa · Google-ML-Automation · commit 5bd3de6148e4 · 2025-10-16T06:44:59.000-07:00
PiperOrigin-RevId: 820210212
diff --git a/xla/backends/gpu/runtime/BUILD b/xla/backends/gpu/runtime/BUILD
@@ -1136,12 +1136,15 @@ cc_library(
         "//xla/stream_executor:device_memory",
         "//xla/stream_executor:device_memory_allocator",
         "//xla/stream_executor:stream",
+        "//xla/tsl/platform:statusor",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/log",
         "@com_google_absl//absl/log:check",
         "@com_google_absl//absl/status",
         "@com_google_absl//absl/status:statusor",
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
+        "@tsl//tsl/platform:statusor",
     ] + if_cuda_is_configured(
         [":select_k_exec_raft"],
         no_cuda = [":select_k_exec_stub"],
@@ -1163,6 +1166,7 @@ xla_cc_test(
         "//xla/service:buffer_assignment",
         "//xla/tsl/platform:statusor",
         "//xla/tsl/util/proto:proto_matchers",
+        "@com_google_absl//absl/status:status_matchers",
         "@com_google_googletest//:gtest_main",
     ],
 )
diff --git a/xla/backends/gpu/runtime/select_k_thunk.cc b/xla/backends/gpu/runtime/select_k_thunk.cc
@@ -16,14 +16,18 @@ limitations under the License.
 #include "xla/backends/gpu/runtime/select_k_thunk.h"
 
 #include <cstdint>
+#include <memory>
 #include <string>
+#include <utility>
+#include <vector>
 
 #include "absl/container/inlined_vector.h"
 #include "absl/log/check.h"
 #include "absl/log/log.h"
 #include "absl/status/status.h"
 #include "absl/status/statusor.h"
 #include "absl/strings/str_cat.h"
+#include "absl/types/span.h"
 #include "xla/backends/gpu/runtime/select_k_exec.h"
 #include "xla/backends/gpu/runtime/thunk.h"
 #include "xla/backends/gpu/runtime/thunk.pb.h"
@@ -32,9 +36,11 @@ limitations under the License.
 #include "xla/hlo/ir/hlo_instruction.h"
 #include "xla/primitive_util.h"
 #include "xla/service/buffer_assignment.h"
+#include "xla/shape.h"
 #include "xla/stream_executor/device_memory.h"
 #include "xla/stream_executor/device_memory_allocator.h"
 #include "xla/stream_executor/stream.h"
+#include "xla/tsl/platform/statusor.h"
 #include "xla/types.h"
 
 namespace xla::gpu {
@@ -105,10 +111,35 @@ absl::Status SelectKThunk::ExecuteOnStream(const ExecuteParams& params) {
 absl::StatusOr<ThunkProto> SelectKThunk::ToProto() const {
   ThunkProto proto;
   *proto.mutable_thunk_info() = thunk_info().ToProto();
+  SelectKThunkProto* select_k_proto = proto.mutable_select_k_thunk();
 
-  SelectKThunkProto* select_k_thunk_proto = proto.mutable_select_k_thunk();
-  (void)select_k_thunk_proto;
-  // TODO(upwind): Add fields for SelectKThunkProto.
+  select_k_proto->set_batch_size(batch_size_);
+  select_k_proto->set_num_elements(num_elements_);
+  select_k_proto->set_k(k_);
+  select_k_proto->set_dtype(dtype_);
+
+  for (const BufferAllocation::Slice& arg : args_) {
+    TF_ASSIGN_OR_RETURN(*select_k_proto->add_args(), arg.ToProto());
+  }
   return proto;
 }
+
+absl::StatusOr<std::unique_ptr<SelectKThunk>> SelectKThunk::FromProto(
+    ThunkInfo thunk_info, const SelectKThunkProto& proto,
+    absl::Span<const BufferAllocation> buffer_allocations) {
+  std::vector<emitters::KernelArgument> arguments;
+  arguments.reserve(proto.args().size());
+  for (const xla::buffer_assignment::BufferAllocationSliceProto& arg :
+       proto.args()) {
+    TF_ASSIGN_OR_RETURN(
+        BufferAllocation::Slice slice,
+        BufferAllocation::Slice::FromProto(arg, buffer_allocations));
+    emitters::KernelArgument argument{Shape{}, slice};
+    arguments.push_back(std::move(argument));
+  }
+  return std::make_unique<SelectKThunk>(
+      thunk_info, proto.batch_size(), proto.num_elements(), proto.k(),
+      proto.dtype(), emitters::KernelArguments(std::move(arguments)));
+}
+
 }  // namespace xla::gpu
diff --git a/xla/backends/gpu/runtime/select_k_thunk.h b/xla/backends/gpu/runtime/select_k_thunk.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define XLA_BACKENDS_GPU_RUNTIME_SELECT_K_THUNK_H_
 
 #include <cstdint>
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -63,6 +64,10 @@ class SelectKThunk : public Thunk {
 
   absl::StatusOr<ThunkProto> ToProto() const override;
 
+  static absl::StatusOr<std::unique_ptr<SelectKThunk>> FromProto(
+      ThunkInfo thunk_info, const SelectKThunkProto& proto,
+      absl::Span<const BufferAllocation> buffer_allocations);
+
  private:
   std::uint32_t batch_size_;
   std::uint32_t num_elements_;
diff --git a/xla/backends/gpu/runtime/select_k_thunk_test.cc b/xla/backends/gpu/runtime/select_k_thunk_test.cc
@@ -21,6 +21,7 @@ limitations under the License.
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
+#include "absl/status/status_matchers.h"
 #include "xla/backends/gpu/runtime/thunk.h"
 #include "xla/backends/gpu/runtime/thunk.pb.h"
 #include "xla/backends/gpu/runtime/thunk_id.h"
@@ -35,6 +36,7 @@ limitations under the License.
 namespace xla::gpu {
 namespace {
 
+using ::absl_testing::IsOkAndHolds;
 using ::tsl::proto_testing::EqualsProto;
 
 TEST(SelectKThunkTest, ToProto) {
@@ -46,30 +48,45 @@ TEST(SelectKThunkTest, ToProto) {
   Thunk::ThunkInfo thunk_info =
       Thunk::ThunkInfo::WithProfileAnnotation(topKInst.get(), ThunkId{456});
 
-  BufferAllocation alloc0(/*index=*/0, /*size=*/20, /*color=*/0);
-  BufferAllocation::Slice slice0(&alloc0, /*offset=*/0, /*size=*/20);
+  std::vector<BufferAllocation> buffer_allocations = {
+      {/*index=*/0, /*size=*/20, /*color=*/0},
+      {/*index=*/1, /*size=*/12, /*color=*/0},
+      {/*index=*/2, /*size=*/12, /*color=*/0}};
 
-  BufferAllocation alloc1(/*index=*/1, /*size=*/12, /*color=*/0);
-  BufferAllocation::Slice slice1(&alloc1, /*offset=*/0, /*size=*/12);
-
-  BufferAllocation alloc2(/*index=*/2, /*size=*/12, /*color=*/0);
-  BufferAllocation::Slice slice2(&alloc2, /*offset=*/0, /*size=*/12);
+  BufferAllocation::Slice slice0(&buffer_allocations[0], /*offset=*/0,
+                                 /*size=*/20);
+  BufferAllocation::Slice slice1(&buffer_allocations[1], /*offset=*/0,
+                                 /*size=*/12);
+  BufferAllocation::Slice slice2(&buffer_allocations[2], /*offset=*/0,
+                                 /*size=*/12);
 
   emitters::KernelArgument arg0(ShapeUtil::MakeShape(F32, {1, 5}), slice0);
   emitters::KernelArgument arg1(ShapeUtil::MakeShape(F32, {1, 3}), slice1);
   emitters::KernelArgument arg2(ShapeUtil::MakeShape(U32, {1, 3}), slice2);
-  arg0.set_written(false);
-  arg1.set_written(true);
-  arg2.set_written(true);
 
   emitters::KernelArguments kernel_arguments({arg0, arg1, arg2});
 
   SelectKThunk thunk(std::move(thunk_info), 1, 5, 3, F32, kernel_arguments);
+
   TF_ASSERT_OK_AND_ASSIGN(ThunkProto proto, thunk.ToProto());
   EXPECT_THAT(proto, EqualsProto(R"pb(
                 thunk_info { profile_annotation: "custom-call" thunk_id: 456 }
-                select_k_thunk {}
+                select_k_thunk {
+                  args { buffer_allocation_index: 0 size: 20 }
+                  args { buffer_allocation_index: 1 size: 12 }
+                  args { buffer_allocation_index: 2 size: 12 }
+                  batch_size: 1
+                  num_elements: 5
+                  k: 3
+                  dtype: F32
+                }
               )pb"));
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SelectKThunk> deserialized,
+      SelectKThunk::FromProto(thunk.thunk_info(), proto.select_k_thunk(),
+                              buffer_allocations));
+  EXPECT_THAT(deserialized->ToProto(), IsOkAndHolds(EqualsProto(proto)));
 }
 
 }  // namespace
diff --git a/xla/backends/gpu/runtime/thunk.proto b/xla/backends/gpu/runtime/thunk.proto
@@ -178,7 +178,11 @@ message OutfeedThunkProto {
 }
 
 message SelectKThunkProto {
-  // TODO(upwind): Add fields for SelectKThunkProto.
+  repeated xla.buffer_assignment.BufferAllocationSliceProto args = 1;
+  uint32 batch_size = 3;
+  uint32 num_elements = 4;
+  uint32 k = 5;
+  xla.PrimitiveType dtype = 6;
 }
 
 message CublasLtMatmulThunkProto {

Original file line number	Diff line number	Diff line change
`@@ -178,7 +178,11 @@ message OutfeedThunkProto {`
`178`	`178`	`}`
`179`	`179`
`180`	`180`	`message SelectKThunkProto {`
`181`		`- // TODO(upwind): Add fields for SelectKThunkProto.`
	`181`	`+ repeated xla.buffer_assignment.BufferAllocationSliceProto args = 1;`
	`182`	`+ uint32 batch_size = 3;`
	`183`	`+ uint32 num_elements = 4;`
	`184`	`+ uint32 k = 5;`
	`185`	`+ xla.PrimitiveType dtype = 6;`
`182`	`186`	`}`
`183`	`187`
`184`	`188`	`message CublasLtMatmulThunkProto {`