Skip to content

Commit 08ae0af

Browse files
q10facebook-github-bot
authored andcommitted
Migrate backward cta kernel arguments to use PTA_B (#4831)
Summary: Pull Request resolved: #4831 - Migrate backward cta kernel arguments to use PTA_B Reviewed By: ionuthristodorescu Differential Revision: D79978498 fbshipit-source-id: 2659f1927dfd8b319bf722bebd2ea7513c7aaa03
1 parent 49050d9 commit 08ae0af

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

fbgemm_gpu/codegen/training/backward/embedding_backward_split_kernel_cta_template.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ batch_index_select_dim0_codegen_backward_kernel_cta_per_row(
159159
const bool enable_optimizer_offloading,
160160
{%- endif %}
161161
{%- if is_index_select %}
162-
const at::PackedTensorAccessor32<int64_t, 1, at::RestrictPtrTraits> grad_offsets,
162+
const pta::PackedTensorAccessor32<int64_t, 1, at::RestrictPtrTraits> grad_offsets,
163163
const bool permute_output_dim_0_1
164164
{%- else %}
165165
{{ args.split_kernel_args | replace_pta_namespace() | join(",\n ") }}
@@ -533,7 +533,7 @@ batch_index_select_dim0_codegen_backward_kernel_cta_per_row
533533
const bool enable_optimizer_offloading,
534534
{%- endif %}
535535
{%- if is_index_select %}
536-
const at::PackedTensorAccessor32<int64_t, 1, at::RestrictPtrTraits> grad_offsets,
536+
const pta::PackedTensorAccessor32<int64_t, 1, at::RestrictPtrTraits> grad_offsets,
537537
const bool permute_output_dim_0_1
538538
{%- else %}
539539
{{ args.split_kernel_args_no_defaults |

fbgemm_gpu/codegen/training/backward/embedding_backward_split_template.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ batch_index_select_dim0_codegen_backward_kernel_cta_per_row(
134134
const bool enable_optimizer_offloading,
135135
{%- endif %}
136136
{%- if is_index_select %}
137-
const at::PackedTensorAccessor32<int64_t, 1, at::RestrictPtrTraits> grad_offsets,
137+
const pta::PackedTensorAccessor32<int64_t, 1, at::RestrictPtrTraits> grad_offsets,
138138
const bool permute_output_dim_0_1
139139
{%- else %}
140140
{{ args.split_kernel_args | replace_pta_namespace() | join(",\n ") }}
@@ -1131,7 +1131,7 @@ Tensor {{ embedding_cuda_op }}(
11311131
enable_optimizer_offloading,
11321132
{%- endif %}
11331133
{%- if is_index_select %}
1134-
grad_offsets.packed_accessor32<int64_t, 1, at::RestrictPtrTraits>(),
1134+
PTA_B(grad_offsets, int64_t, 1, 32),
11351135
permute_output_dim_0_1
11361136
{%- else %}
11371137
{{ args.split_kernel_arg_constructors | make_pta_acc_builder_format() | join(",\n ") }}

0 commit comments

Comments
 (0)