@@ -95,10 +95,10 @@ Tensor _convolution_decomp(
95
95
bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) {
96
96
// Ignore everything. If the user called this in the normal way,
97
97
// then they should be fine.
98
- (void * ) benchmark;
99
- (void * ) deterministic;
100
- (void * ) cudnn_enabled;
101
- (void * ) allow_tf32;
98
+ (void ) benchmark;
99
+ (void ) deterministic;
100
+ (void ) cudnn_enabled;
101
+ (void ) allow_tf32;
102
102
return at::convolution (
103
103
input_r, weight_r, bias_r_opt, stride_, padding_, dilation_, transposed_, output_padding_, groups_);
104
104
}
@@ -149,73 +149,73 @@ bool first_dim_has_size_1(const Tensor& value, int64_t bdim) {
149
149
return value.size (0 ) == 1 ;
150
150
}
151
151
152
- std::tuple<Tensor,int64_t ,Tensor,int64_t > cudnn_conv_per_sample_grad_rule (
153
- const Tensor& self, optional<int64_t > self_bdim,
154
- const Tensor& grad_output, optional<int64_t > grad_output_bdim,
155
- const Tensor& weight, optional<int64_t > weight_bdim,
156
- IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark,
157
- bool deterministic, bool allow_tf32, std::array<bool , 2 > output_mask) {
158
- TORCH_INTERNAL_ASSERT (self_bdim && grad_output_bdim && !weight_bdim);
159
- // TODO: No clue if this works if the first non-batch dim isn't size 1
160
- TORCH_INTERNAL_ASSERT (first_dim_has_size_1 (self, *self_bdim));
161
- TORCH_INTERNAL_ASSERT (self.dim () == 5 );
162
-
163
- auto bdim_size = self.size (*self_bdim);
164
- auto self_ = reshape_dim_into (*self_bdim, 0 , self);
165
- auto in_channels = self_.size (1 );
166
- auto grad_output_ = reshape_dim_into (*grad_output_bdim, 0 , grad_output);
167
-
168
- auto grad_self = at::cudnn_convolution_backward_input (
169
- self_.sizes (), grad_output_, weight,
170
- padding, stride, dilation, groups, benchmark, deterministic, allow_tf32);
171
- grad_self = reshape_dim_outof (0 , bdim_size, grad_self);
172
-
173
- // Copied from https://github.com/pytorch/opacus/blob/master/opacus/grad_sample/conv.py
174
- auto A = at::im2col (self_, {weight.size (2 ), weight.size (3 )}, dilation, padding, stride);
175
- auto B = grad_output_.reshape ({bdim_size, -1 , A.size (-1 )});
176
- auto grad_sample = at::einsum (" noq,npq->nop" , {B, A});
177
- grad_sample = grad_sample.view ({
178
- bdim_size, groups, -1 , groups, in_channels / groups,
179
- weight.size (2 ) * weight.size (3 ) });
180
- grad_sample = at::einsum (" ngrg...->ngr..." , {grad_sample});
181
- grad_sample = grad_sample.reshape (
182
- {bdim_size, weight.size (0 ), weight.size (1 ), weight.size (2 ), weight.size (3 )});
183
-
184
- return std::make_tuple (grad_self, 0 , grad_sample, 0 );
185
- }
186
-
187
- std::tuple<Tensor,Tensor> cudnn_convolution_backward_plumbing (const Tensor & self, const Tensor & grad_output, const Tensor & weight, IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32, std::array<bool , 2 > output_mask) {
188
- auto maybe_layer = maybeCurrentDynamicLayer ();
189
- TORCH_INTERNAL_ASSERT (maybe_layer.has_value ());
190
- int64_t cur_level = maybe_layer->layerId ();
191
-
192
- Tensor self_value;
193
- optional<int64_t > self_bdim;
194
- std::tie (self_value, self_bdim) = unwrapTensorAtLevel (self, cur_level);
195
- Tensor grad_output_value;
196
- optional<int64_t > grad_output_bdim;
197
- std::tie (grad_output_value, grad_output_bdim) = unwrapTensorAtLevel (grad_output, cur_level);
198
- Tensor weight_value;
199
- optional<int64_t > weight_bdim;
200
- std::tie (weight_value, weight_bdim) = unwrapTensorAtLevel (weight, cur_level);
201
-
202
- if (self_bdim.has_value () && self_value.dim () == 5 && first_dim_has_size_1 (self_value, *self_bdim) && grad_output_bdim.has_value () && !weight_bdim.has_value ()) {
203
- c10::impl::ExcludeDispatchKeyGuard guard (kBatchedKey );
204
- auto result = cudnn_conv_per_sample_grad_rule (
205
- self_value, self_bdim,
206
- grad_output_value, grad_output_bdim,
207
- weight_value, weight_bdim,
208
- padding, stride, dilation, groups,
209
- benchmark, deterministic, allow_tf32, output_mask);
210
- return std::make_tuple (
211
- makeBatched (std::get<0 >(result), std::get<1 >(result), cur_level),
212
- makeBatched (std::get<2 >(result), std::get<3 >(result), cur_level));
213
- }
214
-
215
- static auto op = c10::Dispatcher::singleton ()
216
- .findSchemaOrThrow (" aten::cudnn_convolution_backward" , " " );
217
- return slow_fallback<Tensor,Tensor>(op, { self, grad_output, weight, padding, stride, dilation, groups, benchmark, deterministic, allow_tf32, output_mask });
218
- }
152
+ // std::tuple<Tensor,int64_t,Tensor,int64_t> cudnn_conv_per_sample_grad_rule(
153
+ // const Tensor& self, optional<int64_t> self_bdim,
154
+ // const Tensor& grad_output, optional<int64_t> grad_output_bdim,
155
+ // const Tensor& weight, optional<int64_t> weight_bdim,
156
+ // IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark,
157
+ // bool deterministic, bool allow_tf32, std::array<bool, 2> output_mask) {
158
+ // TORCH_INTERNAL_ASSERT(self_bdim && grad_output_bdim && !weight_bdim);
159
+ // // TODO: No clue if this works if the first non-batch dim isn't size 1
160
+ // TORCH_INTERNAL_ASSERT(first_dim_has_size_1(self, *self_bdim));
161
+ // TORCH_INTERNAL_ASSERT(self.dim() == 5);
162
+ //
163
+ // auto bdim_size = self.size(*self_bdim);
164
+ // auto self_ = reshape_dim_into(*self_bdim, 0, self);
165
+ // auto in_channels = self_.size(1);
166
+ // auto grad_output_ = reshape_dim_into(*grad_output_bdim, 0, grad_output);
167
+ //
168
+ // auto grad_self = at::cudnn_convolution_backward_input(
169
+ // self_.sizes(), grad_output_, weight,
170
+ // padding, stride, dilation, groups, benchmark, deterministic, allow_tf32);
171
+ // grad_self = reshape_dim_outof(0, bdim_size, grad_self);
172
+ //
173
+ // // Copied from https://github.com/pytorch/opacus/blob/master/opacus/grad_sample/conv.py
174
+ // auto A = at::im2col(self_, {weight.size(2), weight.size(3)}, dilation, padding, stride);
175
+ // auto B = grad_output_.reshape({bdim_size, -1, A.size(-1)});
176
+ // auto grad_sample = at::einsum("noq,npq->nop", {B, A});
177
+ // grad_sample = grad_sample.view({
178
+ // bdim_size, groups, -1, groups, in_channels / groups,
179
+ // weight.size(2) * weight.size(3) });
180
+ // grad_sample = at::einsum("ngrg...->ngr...", {grad_sample});
181
+ // grad_sample = grad_sample.reshape(
182
+ // {bdim_size, weight.size(0), weight.size(1), weight.size(2), weight.size(3)});
183
+ //
184
+ // return std::make_tuple(grad_self, 0, grad_sample, 0);
185
+ // }
186
+ //
187
+ // std::tuple<Tensor,Tensor> cudnn_convolution_backward_plumbing(const Tensor & self, const Tensor & grad_output, const Tensor & weight, IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32, std::array<bool, 2> output_mask) {
188
+ // auto maybe_layer = maybeCurrentDynamicLayer();
189
+ // TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
190
+ // int64_t cur_level = maybe_layer->layerId();
191
+ //
192
+ // Tensor self_value;
193
+ // optional<int64_t> self_bdim;
194
+ // std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
195
+ // Tensor grad_output_value;
196
+ // optional<int64_t> grad_output_bdim;
197
+ // std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
198
+ // Tensor weight_value;
199
+ // optional<int64_t> weight_bdim;
200
+ // std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight, cur_level);
201
+ //
202
+ // if (self_bdim.has_value() && self_value.dim() == 5 && first_dim_has_size_1(self_value, *self_bdim) && grad_output_bdim.has_value() && !weight_bdim.has_value()) {
203
+ // c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
204
+ // auto result = cudnn_conv_per_sample_grad_rule(
205
+ // self_value, self_bdim,
206
+ // grad_output_value, grad_output_bdim,
207
+ // weight_value, weight_bdim,
208
+ // padding, stride, dilation, groups,
209
+ // benchmark, deterministic, allow_tf32, output_mask);
210
+ // return std::make_tuple(
211
+ // makeBatched(std::get<0>(result), std::get<1>(result), cur_level),
212
+ // makeBatched(std::get<2>(result), std::get<3>(result), cur_level));
213
+ // }
214
+ //
215
+ // static auto op = c10::Dispatcher::singleton()
216
+ // .findSchemaOrThrow("aten::cudnn_convolution_backward", "");
217
+ // return slow_fallback<Tensor,Tensor>(op, { self, grad_output, weight, padding, stride, dilation, groups, benchmark, deterministic, allow_tf32, output_mask });
218
+ // }
219
219
220
220
std::tuple<Tensor,optional<int64_t >> embedding_batch_rule (
221
221
const Tensor& weight, optional<int64_t > weight_bdim,
@@ -563,9 +563,9 @@ struct CudnnGridSampleBackwardBatchRuleHelper {
563
563
TORCH_LIBRARY_IMPL (aten, FT_BATCHED_KEY, m) {
564
564
VMAP_SUPPORT (" convolution" , convolution_batch_rule);
565
565
m.impl (" _convolution" , _convolution_decomp);
566
- m.impl (" mkldnn_convolution" , mkldnn_convolution_decomp);
567
- m.impl (" cudnn_convolution_backward" , cudnn_convolution_backward_plumbing);
568
- m.impl (" cudnn_convolution" , cudnn_convolution_plumbing);
566
+ // m.impl("mkldnn_convolution", mkldnn_convolution_decomp);
567
+ // m.impl("cudnn_convolution_backward", cudnn_convolution_backward_plumbing);
568
+ // m.impl("cudnn_convolution", cudnn_convolution_plumbing);
569
569
570
570
EXISTING_BDIM (im2col);
571
571
EXISTING_BDIM (im2col_backward);
0 commit comments