Skip to content

Commit 5df263c

Browse files
Marcin RadomskiGoogle-ML-Automation
authored andcommitted
[XLA:GPU] Change SDC names to more descriptive ones
And dump the log proto into file called buffer_debug_log rather than sdc_log Changes to implementation details: - Renames: - SdcLogProto -> BufferDebugLogProto - SdcLog -> BufferDebugLog - SdcBufferId -> ThunkBufferId - SdcThunk -> BuffersChecksumThunk - SdcXorChecksumKernel -> BufferDebugXorChecksumKernel - move BufferDebugLog to stream_executor/gpu from stream_executor/cuda as it's not CUDA-specific PiperOrigin-RevId: 820186034
1 parent fdd293a commit 5df263c

21 files changed

+559
-516
lines changed

xla/backends/gpu/runtime/BUILD

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2895,11 +2895,11 @@ cc_library(
28952895
srcs = ["thunk_checksum_tracing_pass.cc"],
28962896
hdrs = ["thunk_checksum_tracing_pass.h"],
28972897
deps = [
2898+
":buffers_checksum_thunk",
28982899
":custom_call_thunk",
2899-
":sdc_buffer_id",
2900-
":sdc_thunk",
29012900
":sequential_thunk",
29022901
":thunk",
2902+
":thunk_buffer_id",
29032903
":thunk_pass_pipeline",
29042904
"//xla:shape_util",
29052905
"//xla:xla_data_proto_cc",
@@ -2911,7 +2911,7 @@ cc_library(
29112911
"//xla/service:dump",
29122912
"//xla/stream_executor:device_description",
29132913
"//xla/stream_executor:stream",
2914-
"//xla/stream_executor/cuda:sdc_log",
2914+
"//xla/stream_executor/gpu:buffer_debug_log",
29152915
"//xla/tsl/platform:statusor",
29162916
"@com_google_absl//absl/base:nullability",
29172917
"@com_google_absl//absl/container:flat_hash_map",
@@ -2927,11 +2927,11 @@ xla_cc_test(
29272927
name = "thunk_checksum_tracing_pass_test",
29282928
srcs = ["thunk_checksum_tracing_pass_test.cc"],
29292929
deps = [
2930+
":buffers_checksum_thunk",
29302931
":custom_call_thunk",
2931-
":sdc_buffer_id",
2932-
":sdc_thunk",
29332932
":sequential_thunk",
29342933
":thunk",
2934+
":thunk_buffer_id",
29352935
":thunk_checksum_tracing_pass",
29362936
":thunk_id",
29372937
":thunk_pass_pipeline",
@@ -2994,21 +2994,21 @@ xla_test(
29942994
)
29952995

29962996
cc_library(
2997-
name = "sdc_thunk",
2998-
srcs = ["sdc_thunk.cc"],
2999-
hdrs = ["sdc_thunk.h"],
2997+
name = "buffers_checksum_thunk",
2998+
srcs = ["buffers_checksum_thunk.cc"],
2999+
hdrs = ["buffers_checksum_thunk.h"],
30003000
deps = [
3001-
":sdc_buffer_id",
30023001
":thunk",
3002+
":thunk_buffer_id",
30033003
"//xla/service:buffer_assignment",
30043004
"//xla/stream_executor:device_memory",
30053005
"//xla/stream_executor:launch_dim",
30063006
"//xla/stream_executor:stream_executor_h",
30073007
"//xla/stream_executor/cuda:cuda_compute_capability",
30083008
"//xla/stream_executor/cuda:cuda_platform_id",
3009-
"//xla/stream_executor/cuda:sdc_log",
3009+
"//xla/stream_executor/gpu:buffer_debug_log",
3010+
"//xla/stream_executor/gpu:buffer_debug_xor_checksum_kernel",
30103011
"//xla/stream_executor/gpu:gpu_kernel_registry",
3011-
"//xla/stream_executor/gpu:sdc_xor_checksum_kernel",
30123012
"//xla/tsl/platform:errors",
30133013
"//xla/tsl/platform:statusor",
30143014
"@com_google_absl//absl/container:flat_hash_map",
@@ -3020,18 +3020,18 @@ cc_library(
30203020
)
30213021

30223022
xla_test(
3023-
name = "sdc_thunk_test",
3024-
srcs = ["sdc_thunk_test.cc"],
3023+
name = "buffers_checksum_thunk_test",
3024+
srcs = ["buffers_checksum_thunk_test.cc"],
30253025
backends = ["gpu"],
30263026
tags = [
30273027
"cuda-only",
30283028
"gpu",
30293029
],
30303030
deps = [
3031-
":sdc_buffer_id",
3032-
":sdc_log_structs",
3033-
":sdc_thunk",
3031+
":buffer_debug_log_structs",
3032+
":buffers_checksum_thunk",
30343033
":thunk",
3034+
":thunk_buffer_id",
30353035
":thunk_id",
30363036
"//xla/service:buffer_assignment",
30373037
"//xla/service:executable",
@@ -3042,21 +3042,21 @@ xla_test(
30423042
"//xla/stream_executor:platform_manager",
30433043
"//xla/stream_executor:stream",
30443044
"//xla/stream_executor:stream_executor_memory_allocator",
3045-
"//xla/stream_executor/cuda:sdc_log",
3045+
"//xla/stream_executor/gpu:buffer_debug_log",
30463046
"//xla/tsl/lib/core:status_test_util",
30473047
"//xla/tsl/platform:statusor",
30483048
"@com_google_googletest//:gtest_main",
30493049
],
30503050
)
30513051

30523052
tf_proto_library(
3053-
name = "sdc_proto",
3054-
srcs = ["sdc.proto"],
3053+
name = "buffer_debug_log_proto",
3054+
srcs = ["buffer_debug_log.proto"],
30553055
)
30563056

30573057
cc_library(
3058-
name = "sdc_buffer_id",
3059-
hdrs = ["sdc_buffer_id.h"],
3058+
name = "thunk_buffer_id",
3059+
hdrs = ["thunk_buffer_id.h"],
30603060
compatible_with = get_compatible_with_portable(),
30613061
deps = [
30623062
":thunk_id",
@@ -3067,10 +3067,10 @@ cc_library(
30673067
)
30683068

30693069
xla_cc_test(
3070-
name = "sdc_buffer_id_test",
3071-
srcs = ["sdc_buffer_id_test.cc"],
3070+
name = "thunk_buffer_id_test",
3071+
srcs = ["thunk_buffer_id_test.cc"],
30723072
deps = [
3073-
":sdc_buffer_id",
3073+
":thunk_buffer_id",
30743074
":thunk_id",
30753075
"//xla/tsl/platform:statusor",
30763076
"@com_google_absl//absl/status",
@@ -3080,10 +3080,10 @@ xla_cc_test(
30803080
)
30813081

30823082
cc_library(
3083-
name = "sdc_log_structs",
3084-
hdrs = ["sdc_log_structs.h"],
3083+
name = "buffer_debug_log_structs",
3084+
hdrs = ["buffer_debug_log_structs.h"],
30853085
compatible_with = get_compatible_with_portable(),
30863086
deps = [
3087-
":sdc_buffer_id",
3087+
":thunk_buffer_id",
30883088
],
30893089
)

xla/backends/gpu/runtime/sdc.proto renamed to xla/backends/gpu/runtime/buffer_debug_log.proto

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ syntax = "proto3";
1717

1818
package xla.gpu;
1919

20-
message SdcLogEntryProto {
20+
message BufferDebugLogEntryProto {
2121
// The ID of the thunk that produced this entry, as returned by
2222
// ThunkInfo::thunk_id().
2323
uint64 thunk_id = 1;
@@ -30,7 +30,8 @@ message SdcLogEntryProto {
3030
uint32 checksum = 3;
3131
}
3232

33-
message SdcLogProto {
34-
// The list of entries in the SDC log.
35-
repeated SdcLogEntryProto entries = 1;
33+
// A dump of a `BufferDebugLog` contents.
34+
message BufferDebugLogProto {
35+
// The list of entries in the log.
36+
repeated BufferDebugLogEntryProto entries = 1;
3637
}

xla/backends/gpu/runtime/sdc_log_structs.h renamed to xla/backends/gpu/runtime/buffer_debug_log_structs.h

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,57 +13,59 @@ See the License for the specific language governing permissions and
1313
limitations under the License.
1414
==============================================================================*/
1515

16-
#ifndef XLA_BACKENDS_GPU_RUNTIME_SDC_LOG_STRUCTS_H_
17-
#define XLA_BACKENDS_GPU_RUNTIME_SDC_LOG_STRUCTS_H_
16+
#ifndef XLA_BACKENDS_GPU_RUNTIME_BUFFER_DEBUG_LOG_STRUCTS_H_
17+
#define XLA_BACKENDS_GPU_RUNTIME_BUFFER_DEBUG_LOG_STRUCTS_H_
1818

1919
#include <cstddef>
2020
#include <cstdint>
2121
#include <tuple>
2222

23-
#include "xla/backends/gpu/runtime/sdc_buffer_id.h"
23+
#include "xla/backends/gpu/runtime/thunk_buffer_id.h"
2424

2525
namespace xla::gpu {
2626

27-
struct SdcLogEntry {
27+
struct BufferDebugLogEntry {
2828
// An ID that uniquely identifies a thunk and its specific input or output
2929
// buffer.
30-
SdcBufferId entry_id;
30+
ThunkBufferId entry_id;
3131
uint32_t checksum;
3232

3333
template <typename Sink>
34-
friend void AbslStringify(Sink& sink, const SdcLogEntry& entry) {
34+
friend void AbslStringify(Sink& sink, const BufferDebugLogEntry& entry) {
3535
absl::Format(&sink, "{entry_id: %v, checksum: %u}", entry.entry_id,
3636
entry.checksum);
3737
}
3838

39-
bool operator==(const SdcLogEntry& other) const {
39+
bool operator==(const BufferDebugLogEntry& other) const {
4040
return std::tie(entry_id, checksum) ==
4141
std::tie(other.entry_id, other.checksum);
4242
}
4343

44-
bool operator!=(const SdcLogEntry& other) const { return !(*this == other); }
44+
bool operator!=(const BufferDebugLogEntry& other) const {
45+
return !(*this == other);
46+
}
4547
};
4648

4749
// The struct layout must match on both host and device.
48-
static_assert(_Alignof(SdcLogEntry) == _Alignof(uint32_t));
49-
static_assert(sizeof(SdcLogEntry) == sizeof(uint32_t) * 2);
50-
static_assert(offsetof(SdcLogEntry, entry_id) == 0);
51-
static_assert(offsetof(SdcLogEntry, checksum) == sizeof(uint32_t));
50+
static_assert(_Alignof(BufferDebugLogEntry) == _Alignof(uint32_t));
51+
static_assert(sizeof(BufferDebugLogEntry) == sizeof(uint32_t) * 2);
52+
static_assert(offsetof(BufferDebugLogEntry, entry_id) == 0);
53+
static_assert(offsetof(BufferDebugLogEntry, checksum) == sizeof(uint32_t));
5254

53-
struct SdcLogHeader {
54-
// The first entry in `SdcLogEntry` following the header that has not
55+
struct BufferDebugLogHeader {
56+
// The first entry in `BufferDebugLogEntry` following the header that has not
5557
// been written to. May be bigger than `capacity` if the log was truncated.
5658
uint32_t write_idx;
57-
// The number of `SdcLogEntry` structs the log can hold.
59+
// The number of `BufferDebugLogEntry` structs the log can hold.
5860
uint32_t capacity;
5961
};
6062

6163
// The struct layout must match on both host and device.
62-
static_assert(_Alignof(SdcLogHeader) == _Alignof(uint32_t));
63-
static_assert(sizeof(SdcLogHeader) == sizeof(uint32_t) * 2);
64-
static_assert(offsetof(SdcLogHeader, write_idx) == 0);
65-
static_assert(offsetof(SdcLogHeader, capacity) == sizeof(uint32_t));
64+
static_assert(_Alignof(BufferDebugLogHeader) == _Alignof(uint32_t));
65+
static_assert(sizeof(BufferDebugLogHeader) == sizeof(uint32_t) * 2);
66+
static_assert(offsetof(BufferDebugLogHeader, write_idx) == 0);
67+
static_assert(offsetof(BufferDebugLogHeader, capacity) == sizeof(uint32_t));
6668

6769
} // namespace xla::gpu
6870

69-
#endif // XLA_BACKENDS_GPU_RUNTIME_SDC_LOG_STRUCTS_H_
71+
#endif // XLA_BACKENDS_GPU_RUNTIME_BUFFER_DEBUG_LOG_STRUCTS_H_

xla/backends/gpu/runtime/sdc_thunk.cc renamed to xla/backends/gpu/runtime/buffers_checksum_thunk.cc

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
1313
limitations under the License.
1414
==============================================================================*/
1515

16-
#include "xla/backends/gpu/runtime/sdc_thunk.h"
16+
#include "xla/backends/gpu/runtime/buffers_checksum_thunk.h"
1717

1818
#include <cstdint>
1919
#include <string>
@@ -24,10 +24,10 @@ limitations under the License.
2424
#include "xla/backends/gpu/runtime/thunk.h"
2525
#include "xla/stream_executor/cuda/cuda_compute_capability.h"
2626
#include "xla/stream_executor/cuda/cuda_platform_id.h"
27-
#include "xla/stream_executor/cuda/sdc_log.h"
2827
#include "xla/stream_executor/device_memory.h"
28+
#include "xla/stream_executor/gpu/buffer_debug_log.h"
29+
#include "xla/stream_executor/gpu/buffer_debug_xor_checksum_kernel.h"
2930
#include "xla/stream_executor/gpu/gpu_kernel_registry.h"
30-
#include "xla/stream_executor/gpu/sdc_xor_checksum_kernel.h"
3131
#include "xla/stream_executor/launch_dim.h"
3232
#include "xla/stream_executor/stream_executor.h"
3333
#include "xla/tsl/platform/errors.h"
@@ -37,63 +37,66 @@ namespace xla::gpu {
3737

3838
namespace se = stream_executor;
3939

40-
absl::Status SdcThunk::Initialize(const InitializeParams& params) {
40+
absl::Status BuffersDebugChecksumThunk::Initialize(
41+
const InitializeParams& params) {
4142
if (params.executor->GetPlatform()->id() != se::cuda::kCudaPlatformId) {
42-
VLOG(1) << "[SDC LOG] Not supported on non-CUDA platforms, skipping";
43+
VLOG(1)
44+
<< "Buffer checksumming not supported on non-CUDA platforms, skipping";
4345
return absl::OkStatus();
4446
}
4547
if (!params.executor->GetDeviceDescription()
4648
.cuda_compute_capability()
4749
.IsAtLeastPascal()) {
48-
VLOG(1) << "[SDC LOG] Not supported on CUDA architectures older than "
49-
"Pascal due to missing atomic fetch_add with system scope, "
50-
"skipping";
50+
VLOG(1)
51+
<< "Buffer checksumming not supported on CUDA architectures older than "
52+
"Pascal due to missing atomic fetch_add with system scope, skipping";
5153
return absl::OkStatus();
5254
}
5355

5456
se::gpu::GpuKernelRegistry registry =
5557
se::gpu::GpuKernelRegistry::GetGlobalRegistry();
5658
TF_ASSIGN_OR_RETURN(
57-
kernel_,
58-
registry.LoadKernel<se::gpu::SdcXorChecksumKernel>(params.executor));
59+
kernel_, registry.LoadKernel<se::gpu::BufferDebugXorChecksumKernel>(
60+
params.executor));
5961

60-
VLOG(1) << "[SDC LOG] SDC kernel loaded";
62+
VLOG(1) << "Checksum kernel loaded";
6163
return absl::OkStatus();
6264
}
6365

64-
absl::Status SdcThunk::ExecuteOnStream(const ExecuteParams& params) {
66+
absl::Status BuffersDebugChecksumThunk::ExecuteOnStream(
67+
const ExecuteParams& params) {
6568
se::StreamExecutor* executor = params.stream->parent();
6669
if (!kernel_.has_value()) {
6770
// Initialize didn't load the kernel. This can happen when we're running on
6871
// an unsupported platform.
69-
VLOG(1) << "[SDC LOG] SDC kernel not loaded, skipping";
72+
VLOG(1) << "Checksum kernel not loaded, skipping";
7073
return absl::OkStatus();
7174
}
7275

73-
VLOG(1) << "[SDC LOG] SdcThunk::ExecuteOnStream";
76+
VLOG(1) << "BuffersDebugChecksumThunk::ExecuteOnStream";
7477

7578
const se::ThreadDim thread_dim(
7679
executor->GetDeviceDescription().threads_per_block_limit(), 1, 1);
7780

7881
se::DeviceMemory<uint8_t> log_ptr(
7982
params.buffer_allocations->GetDeviceAddress(log_slice_));
80-
se::cuda::SdcLog sdc_log =
81-
se::cuda::SdcLog::FromDeviceMemoryUnchecked(log_ptr);
83+
se::cuda::BufferDebugLog buffer_debug_log =
84+
se::cuda::BufferDebugLog::FromDeviceMemoryUnchecked(log_ptr);
8285

8386
for (const auto& [entry_id, buffer] : buffers_) {
8487
se::DeviceMemory<uint8_t> device_buffer(
8588
params.buffer_allocations->GetDeviceAddress(buffer));
8689

87-
TF_RETURN_IF_ERROR(
88-
kernel_->Launch(thread_dim, se::BlockDim(1, 1, 1), params.stream,
89-
entry_id, device_buffer, device_buffer.size(),
90-
sdc_log.GetDeviceHeader(), sdc_log.GetDeviceEntries()));
90+
TF_RETURN_IF_ERROR(kernel_->Launch(
91+
thread_dim, se::BlockDim(1, 1, 1), params.stream, entry_id,
92+
device_buffer, device_buffer.size(), buffer_debug_log.GetDeviceHeader(),
93+
buffer_debug_log.GetDeviceEntries()));
9194
}
9295

9396
return absl::OkStatus();
9497
}
9598

96-
std::string SdcThunk::ToString(int indent) const {
99+
std::string BuffersDebugChecksumThunk::ToString(int indent) const {
97100
std::string result;
98101
absl::StrAppend(&result, ", buffers = ", buffers_.size());
99102
for (const auto& [buffer_id, buffer] : buffers_) {

0 commit comments

Comments
 (0)