|
5 | 5 | #include "caffe/util/math_functions.hpp" |
6 | 6 |
|
7 | 7 | namespace caffe { |
8 | | - |
9 | 8 | template <typename Dtype> |
10 | 9 | void ReduceSumLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, |
11 | 10 | const vector<Blob<Dtype>*>& top) { |
@@ -58,71 +57,75 @@ namespace caffe { |
58 | 57 | top[0]->Reshape(top_shape); |
59 | 58 | } |
60 | 59 |
|
| 60 | + |
61 | 61 | template <typename Dtype> |
62 | | - void ReduceSumLayer<Dtype>::InReduceSum(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top, |
63 | | - int b_idx, int lv_in, int t_idx) { |
64 | | - const Dtype* bottom_data = bottom[0]->cpu_data(); |
65 | | - Dtype* top_data = top[0]->mutable_cpu_data(); |
66 | | - vector<int> shape_in(reduce_sum_axis_.size(), 0); |
67 | | - for (int i = 0; i < reduce_sum_axis_.size(); ++i) { |
68 | | - shape_in[i] = bottom[0]->shape()[reduce_sum_axis_[i]]; |
69 | | - } |
70 | | - for (int i = 0; i < shape_in[lv_in]; ++i) { |
71 | | - int b_idx_add = i * bottom[0]->count(reduce_sum_axis_[lv_in] + 1); |
72 | | - if (lv_in == shape_in.size() - 1) { |
73 | | - top_data[t_idx] += bottom_data[b_idx + b_idx_add]; |
74 | | - } |
75 | | - if (lv_in < shape_in.size() - 1) { |
76 | | - InReduceSum(bottom, top, b_idx + b_idx_add, lv_in + 1, t_idx); |
77 | | - } |
78 | | - } |
79 | | - }; |
| 62 | + inline vector<int> |
| 63 | + ReduceSumLayer<Dtype>::indices(int offset, const vector<int> &shape) const { |
| 64 | + vector<int> indices(shape.size()); |
| 65 | + int r = offset; |
| 66 | + for (int i = shape.size() - 1; i >= 0; i--) { |
| 67 | + indices[i] = r % shape[i]; |
| 68 | + r /= shape[i]; |
| 69 | + } |
| 70 | + return indices; |
| 71 | + } |
80 | 72 |
|
81 | 73 | template <typename Dtype> |
82 | | - void ReduceSumLayer<Dtype>::OutReduceSum(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top, |
83 | | - int lv_out, int b_idx, int lv_in, int t_idx) { |
84 | | - // parameters: shape_out, idx_out |
85 | | - vector<int> shape_out = bottom[0]->shape(); |
86 | | - vector<int> idx_out(bottom[0]->num_axes(), 0); |
87 | | - for (int i = 0; i < idx_out.size(); ++i) { |
88 | | - idx_out[i] = i; |
89 | | - } |
90 | | - for (int i = axis_dim_ - 1; i > -1; --i) { |
91 | | - shape_out.erase(shape_out.begin() + reduce_sum_axis_[i]); |
92 | | - idx_out.erase(idx_out.begin() + reduce_sum_axis_[i]); |
93 | | - } |
94 | | - // main part |
95 | | - for (int i = 0; i < shape_out[lv_out]; ++i) { |
96 | | - int b_idx_add = i * bottom[0]->count(idx_out[lv_out] + 1); |
97 | | - int t_idx_add = i * count_shape(shape_out, lv_out + 1); |
98 | | - if (lv_out == shape_out.size() - 1) { |
99 | | - InReduceSum(bottom, top, b_idx + b_idx_add, lv_in, t_idx + t_idx_add); |
100 | | - } |
101 | | - if (lv_out < shape_out.size() - 1) { |
102 | | - OutReduceSum(bottom, top, lv_out + 1, b_idx + b_idx_add, lv_in, t_idx + t_idx_add); |
103 | | - } |
| 74 | + inline int |
| 75 | + ReduceSumLayer<Dtype>::offset(const vector<Blob<Dtype>*>& bottom, const vector<int> &shape, |
| 76 | + const vector<int> &axis_ind, const vector<int> &indices) const { |
| 77 | + int offset = 0; |
| 78 | + for (int i = 0; i < axis_ind.size(); ++i) { |
| 79 | + offset += indices[i] * bottom[0]->count(axis_ind[i] + 1); |
104 | 80 | } |
105 | | - }; |
| 81 | + return offset; |
| 82 | + } |
| 83 | + |
106 | 84 |
|
107 | 85 | template <typename Dtype> |
108 | 86 | void ReduceSumLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
109 | 87 | const vector<Blob<Dtype>*>& top) { |
110 | 88 | const Dtype* bottom_data = bottom[0]->cpu_data(); |
111 | 89 | Dtype* top_data = top[0]->mutable_cpu_data(); |
| 90 | + const vector<int> bottom_shape = bottom[0]->shape(); |
112 | 91 | const int bottom_count = bottom[0]->count(); |
| 92 | + const int top_count = top[0]->count(); |
| 93 | + caffe_set(top_count, Dtype(0), top_data); |
113 | 94 | if (axis_dim_ == 0 || axis_dim_ == bottom[0]->num_axes()) { |
114 | 95 | // no axis, add all elements |
115 | 96 | for (int i = 0; i < bottom_count; ++i) { |
116 | 97 | top_data[0] += bottom_data[i]; |
117 | | - } |
| 98 | + } |
118 | 99 | } |
119 | 100 | else { |
120 | 101 | // has axis, add all elements in dim:reduce_sum_axis_ |
121 | | - int lv_out = 0; |
122 | | - int lv_in = 0; |
123 | | - int b_idx = 0; |
124 | | - int t_idx = 0; |
125 | | - OutReduceSum(bottom, top, lv_out, b_idx, lv_in, t_idx); |
| 102 | + vector<int> shape_out = bottom[0]->shape(); |
| 103 | + vector<int> axis_out(bottom[0]->num_axes(), 0); |
| 104 | + for (int i = 0; i < axis_out.size(); ++i) { |
| 105 | + axis_out[i] = i; |
| 106 | + } |
| 107 | + for (int i = axis_dim_ - 1; i > -1; --i) { |
| 108 | + shape_out.erase(shape_out.begin() + reduce_sum_axis_[i]); |
| 109 | + axis_out.erase(axis_out.begin() + reduce_sum_axis_[i]); |
| 110 | + } |
| 111 | + |
| 112 | + vector<int> shape_in(reduce_sum_axis_.size(), 0); |
| 113 | + for (int i = 0; i < reduce_sum_axis_.size(); ++i) { |
| 114 | + shape_in[i] = bottom[0]->shape()[reduce_sum_axis_[i]]; |
| 115 | + } |
| 116 | + |
| 117 | + |
| 118 | + for (int i = 0; i < top_count; ++i){ |
| 119 | + vector<int> ind_out = indices(i, shape_out); |
| 120 | + int offset_out = offset(bottom, bottom_shape, axis_out, ind_out); |
| 121 | + for (int j = 0; j < bottom_count/top_count; ++j) { |
| 122 | + vector<int> ind_in = indices(j, shape_in); |
| 123 | + int offset_in = offset(bottom, bottom_shape, reduce_sum_axis_, ind_in); |
| 124 | + int b_idx = offset_out + offset_in; |
| 125 | + top_data[i] += bottom_data[b_idx]; |
| 126 | + |
| 127 | + } |
| 128 | + } |
126 | 129 | } |
127 | 130 | } |
128 | 131 |
|
|
0 commit comments