|
| 1 | +#include "tensorflow/core/framework/op.h" |
| 2 | +#include "tensorflow/core/framework/op_kernel.h" |
| 3 | +#include <cstdio> |
| 4 | +#include <iostream> |
| 5 | +#include <typeinfo> |
| 6 | + |
| 7 | +using namespace tensorflow; |
| 8 | +using namespace std; |
| 9 | + |
| 10 | +REGISTER_OP("RoiPooling") |
| 11 | +.Input("input: float32") |
| 12 | +.Input("rois: int32") |
| 13 | +.Attr("pool_height: int") |
| 14 | +.Attr("pool_width: int") |
| 15 | +.Output("output: float32") |
| 16 | +.Output("argmax_output: int32"); |
| 17 | + |
| 18 | + |
| 19 | +#define Dtype float |
| 20 | + |
| 21 | +void RoiPoolingKernelLauncher(const float* input, const int* rois, int n_rois, int channels, int height, int width, |
| 22 | + int pooled_height, int pooled_width, Dtype* output, int* argmax_output); |
| 23 | + |
| 24 | +// IMPORTANT(maciek): need info about storage of the data in memory, assumed something but need the docs confirming it |
| 25 | + |
| 26 | +class RoiPoolingOp : public OpKernel { |
| 27 | + private: |
| 28 | + int pool_height_, pool_width_; |
| 29 | + public: |
| 30 | + explicit RoiPoolingOp(OpKernelConstruction* context) : OpKernel(context) { |
| 31 | + OP_REQUIRES_OK(context, |
| 32 | + context->GetAttr("pool_height", &pool_height_)); |
| 33 | + |
| 34 | + OP_REQUIRES_OK(context, |
| 35 | + context->GetAttr("pool_width", &pool_width_)); |
| 36 | + } |
| 37 | + |
| 38 | + |
| 39 | + void Compute(OpKernelContext* context) override { |
| 40 | + // Grab the input tensor |
| 41 | + const Tensor& input_tensor = context->input(0); |
| 42 | + const Tensor& rois_tensor = context->input(1); |
| 43 | + |
| 44 | + auto input = input_tensor.flat<float>(); |
| 45 | + auto rois = rois_tensor.flat<int32>(); |
| 46 | + |
| 47 | + // Create an output tensor |
| 48 | + Tensor* output_tensor = NULL; |
| 49 | + Tensor* argmax_output_tensor = NULL; |
| 50 | + |
| 51 | + auto input_shape = input_tensor.shape(); |
| 52 | + auto rois_shape = rois_tensor.shape(); |
| 53 | + |
| 54 | + int n_rois = rois_shape.dim_size(0); |
| 55 | + int height = input_shape.dim_size(1); |
| 56 | + int width = input_shape.dim_size(2); |
| 57 | + int channels = input_shape.dim_size(3); |
| 58 | + |
| 59 | + TensorShape output_shape = TensorShape({static_cast<int64>(n_rois), |
| 60 | + static_cast<int64>(channels), |
| 61 | + static_cast<int64>(pool_height_), |
| 62 | + static_cast<int64>(pool_width_)}); |
| 63 | + |
| 64 | + OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, |
| 65 | + &output_tensor)); |
| 66 | + |
| 67 | + OP_REQUIRES_OK(context, context->allocate_output(1, output_shape, |
| 68 | + &argmax_output_tensor)); |
| 69 | + |
| 70 | + auto output = output_tensor->template flat<float>(); |
| 71 | + auto argmax_output = argmax_output_tensor->template flat<int32>(); |
| 72 | + |
| 73 | + RoiPoolingKernelLauncher(input.data(), rois.data(), |
| 74 | + n_rois, channels, |
| 75 | + height, width, |
| 76 | + pool_height_, pool_width_, |
| 77 | + output.data(), argmax_output.data()); |
| 78 | + } |
| 79 | +}; |
| 80 | + |
| 81 | +REGISTER_KERNEL_BUILDER(Name("RoiPooling").Device(DEVICE_GPU), RoiPoolingOp); |
| 82 | + |
| 83 | +///////////// RoiPoolingGrad |
| 84 | + |
| 85 | + |
| 86 | +REGISTER_OP("RoiPoolingGrad") |
| 87 | +.Input("orig_input: float32") |
| 88 | +.Input("orig_rois: int32") |
| 89 | +.Input("orig_output: float32") |
| 90 | +.Input("orig_argmax_output: int32") |
| 91 | +.Input("orig_output_grad: float32") |
| 92 | +.Attr("pool_height: int") |
| 93 | +.Attr("pool_width: int") |
| 94 | +.Output("output: float32") |
| 95 | +.Doc(R"doc( |
| 96 | + region of interest pooling grad |
| 97 | +)doc"); |
| 98 | + |
| 99 | +#define Dtype float |
| 100 | +void RoiPoolingGradKernelLauncher(const Dtype* orig_input, const int* orig_rois, |
| 101 | + int mb_size, |
| 102 | + int n_rois, int channels, int height, int width, |
| 103 | + int pooled_height, int pooled_width, |
| 104 | + const Dtype* orig_output, const int* orig_argmax_output, |
| 105 | + const Dtype* orig_output_grad, |
| 106 | + Dtype* output); |
| 107 | + |
| 108 | +// IMPORTANT(maciek): need info about storage of the data in memory, assumed something but need the docs confirming it |
| 109 | + |
| 110 | +class RoiPoolingGradOp : public OpKernel { |
| 111 | + private: |
| 112 | + int pool_height_, pool_width_; |
| 113 | + public: |
| 114 | + explicit RoiPoolingGradOp(OpKernelConstruction* context) : OpKernel(context) { |
| 115 | + OP_REQUIRES_OK(context, |
| 116 | + context->GetAttr("pool_height", &pool_height_)); |
| 117 | + |
| 118 | + OP_REQUIRES_OK(context, |
| 119 | + context->GetAttr("pool_width", &pool_width_)); |
| 120 | + } |
| 121 | + |
| 122 | + |
| 123 | + void Compute(OpKernelContext* context) override { |
| 124 | + // Grab the input tensor |
| 125 | + const Tensor& orig_input_tensor = context->input(0); |
| 126 | + const Tensor& orig_rois_tensor = context->input(1); |
| 127 | + const Tensor& orig_output_tensor = context->input(2); |
| 128 | + const Tensor& orig_argmax_output_tensor = context->input(3); |
| 129 | + const Tensor& orig_output_grad_tensor = context->input(4); |
| 130 | + |
| 131 | + auto orig_input = orig_input_tensor.flat<float>(); |
| 132 | + auto orig_rois = orig_rois_tensor.flat<int32>(); |
| 133 | + auto orig_output = orig_output_tensor.flat<float>(); |
| 134 | + auto orig_argmax_output = orig_argmax_output_tensor.flat<int32>(); |
| 135 | + auto orig_output_grad = orig_output_grad_tensor.flat<float>(); |
| 136 | + |
| 137 | + // Create an output tensor |
| 138 | + Tensor* output_tensor = NULL; |
| 139 | + auto orig_input_shape = orig_input_tensor.shape(); |
| 140 | + auto orig_rois_shape = orig_rois_tensor.shape(); |
| 141 | + auto grads_shape = orig_input_shape; |
| 142 | + |
| 143 | + int mb_size = orig_input_shape.dim_size(0); |
| 144 | + int n_rois = orig_rois_shape.dim_size(0); |
| 145 | + int height = orig_input_shape.dim_size(1); |
| 146 | + int width = orig_input_shape.dim_size(2); |
| 147 | + int channels = orig_input_shape.dim_size(3); |
| 148 | + |
| 149 | + OP_REQUIRES_OK(context, context->allocate_output(0, grads_shape, |
| 150 | + &output_tensor)); |
| 151 | + |
| 152 | + auto output = output_tensor->template flat<float>(); |
| 153 | + |
| 154 | + // Call the cuda kernel launcher |
| 155 | + RoiPoolingGradKernelLauncher(orig_input.data(), orig_rois.data(), |
| 156 | + mb_size, n_rois, channels, height, width, pool_height_, pool_width_, |
| 157 | + orig_output.data(), orig_argmax_output.data(), orig_output_grad.data(), output.data()); |
| 158 | + } |
| 159 | +}; |
| 160 | + |
| 161 | + |
| 162 | +REGISTER_KERNEL_BUILDER(Name("RoiPoolingGrad").Device(DEVICE_GPU), RoiPoolingGradOp); |
0 commit comments