Skip to content

Commit a948eea

Browse files
author
xutianbing
committed
clean unused code.
1 parent 58827e3 commit a948eea

File tree

13 files changed

+64
-557
lines changed

13 files changed

+64
-557
lines changed

paddle/cuda/include/hl_matrix.h

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -188,48 +188,6 @@ extern void hl_param_relu_backward_diff(real* grad_o,
188188
int width,
189189
int height,
190190
int partial_sum);
191-
/**
192-
* @brief cos sim forward
193-
*
194-
* @param[out] output output data
195-
* @param[in] input1 input1 data(matrix)
196-
* @param[in] input2 input2 data(matrix or vector)
197-
* @param[in] width matrix width
198-
* @param[in] input1_height input1_height
199-
* @param[in] input2_height input2_height
200-
* @param[in] scale scale factor
201-
*/
202-
extern void hl_cossim(real* output,
203-
real* input1,
204-
real* input2,
205-
int width,
206-
int input1_height,
207-
int input2_height,
208-
real scale);
209-
/**
210-
* @brief cos sim derivate
211-
*
212-
* @param[in] grad output grad
213-
* @param[in] output output data
214-
* @param[in] prevOutX input1 data
215-
* @param[in] prevOutY input2 data
216-
* @param[out] prevGradX input1 grad
217-
* @param[out] prevGradY input2 grad
218-
* @param[in] width matrix width
219-
* @param[in] input1_height input1 height
220-
* @param[in] input2_height input2 height
221-
* @param[in] scale scale factor
222-
*/
223-
extern void hl_cossim_derivative(real* grad,
224-
real* output,
225-
real* prevOutX,
226-
real* prevOutY,
227-
real* prevGradX,
228-
real* prevGradY,
229-
int width,
230-
int input1_height,
231-
int input2_height,
232-
real scale);
233191

234192
/**
235193
* @brief Matrix addition: A_d[i][j] += scale * B_d[j/channel].

paddle/cuda/include/stub/hl_matrix_stub.h

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -74,25 +74,6 @@ inline void hl_param_relu_backward_diff(real* grad_o,
7474
int height,
7575
int partial_sum) {}
7676

77-
inline void hl_cossim(real* output,
78-
real* input1,
79-
real* input2,
80-
int width,
81-
int input1_height,
82-
int input2_height,
83-
real scale) {}
84-
85-
inline void hl_cossim_derivative(real* grad,
86-
real* output,
87-
real* prevOutX,
88-
real* prevOutY,
89-
real* prevGradX,
90-
real* prevGradY,
91-
int width,
92-
int input1_height,
93-
int input2_height,
94-
real scale) {}
95-
9677
inline void hl_matrix_add_shared_bias(real* A_d,
9778
real* B_d,
9879
const int channel,

paddle/cuda/src/hl_cuda_matrix.cu

Lines changed: 0 additions & 171 deletions
Original file line numberDiff line numberDiff line change
@@ -584,177 +584,6 @@ void hl_param_relu_backward_diff(real* grad_o,
584584
CHECK_SYNC("hl_param_relu_backward_diff failed");
585585
}
586586

587-
template<int blockSize>
588-
__global__ void KeCosSim(real* output,
589-
real* input1,
590-
real* input2,
591-
int width,
592-
int input1_height,
593-
int input2_height,
594-
real scale) {
595-
const int ty = blockIdx.y;
596-
int tid = threadIdx.x;
597-
598-
__shared__ real xx[blockSize];
599-
__shared__ real yy[blockSize];
600-
__shared__ real xy[blockSize];
601-
602-
xx[tid] = 0.0;
603-
yy[tid] = 0.0;
604-
xy[tid] = 0.0;
605-
__syncthreads();
606-
607-
input1 += ty * width;
608-
if (input2_height > 1) {
609-
input2 += ty * width;
610-
}
611-
for (int index = tid; index < width; index += blockSize) {
612-
real x = input1[index];
613-
real y = input2[index];
614-
xx[tid] += x * x;
615-
yy[tid] += y * y;
616-
xy[tid] += x * y;
617-
}
618-
__syncthreads();
619-
620-
for (int s = blockSize / 2; s > 0; s >>= 1) {
621-
if (tid < s) {
622-
xx[tid] += xx[tid + s];
623-
yy[tid] += yy[tid + s];
624-
xy[tid] += xy[tid + s];
625-
}
626-
__syncthreads();
627-
}
628-
if (tid == 0) {
629-
output[ty] = scale * xy[0] / (sqrt(xx[0]) * sqrt(yy[0]));
630-
}
631-
}
632-
633-
void hl_cossim(real* output,
634-
real* input1,
635-
real* input2,
636-
int width,
637-
int input1_height,
638-
int input2_height,
639-
real scale) {
640-
CHECK_NOTNULL(output);
641-
CHECK_NOTNULL(input1);
642-
CHECK_NOTNULL(input2);
643-
const int blockSize = 256;
644-
dim3 threads(blockSize, 1);
645-
dim3 grid(1, input1_height);
646-
647-
KeCosSim<blockSize><<<grid, threads, 0, STREAM_DEFAULT>>>
648-
(output, input1, input2, width, input1_height, input2_height, scale);
649-
CHECK_SYNC("hl_cossim failed");
650-
}
651-
652-
template<int blockSize>
653-
__global__ void KeCosSimDerivative(real* grad,
654-
real* output,
655-
real* prevOutX,
656-
real* prevOutY,
657-
real* prevGradX,
658-
real* prevGradY,
659-
int width,
660-
int input1_height,
661-
int input2_height,
662-
real scale) {
663-
const int ty = blockIdx.y;
664-
int tid = threadIdx.x;
665-
666-
__shared__ real xx[blockSize];
667-
__shared__ real yy[blockSize];
668-
__shared__ real xy[blockSize];
669-
670-
xx[tid] = 0.0;
671-
yy[tid] = 0.0;
672-
xy[tid] = 0.0;
673-
__syncthreads();
674-
675-
prevOutX += ty * width;
676-
prevGradX += ty * width;
677-
if (input2_height > 1) {
678-
prevOutY += ty * width;
679-
prevGradY += ty * width;
680-
}
681-
for (int index = tid; index < width; index += blockSize) {
682-
real x = prevOutX[index];
683-
real y = prevOutY[index];
684-
xx[tid] += x * x;
685-
yy[tid] += y * y;
686-
xy[tid] += x * y;
687-
}
688-
__syncthreads();
689-
690-
for (int s = blockSize / 2; s > 0; s >>= 1) {
691-
if (tid < s) {
692-
xx[tid] += xx[tid + s];
693-
yy[tid] += yy[tid + s];
694-
xy[tid] += xy[tid + s];
695-
}
696-
__syncthreads();
697-
}
698-
if (xy[0] == 0) {
699-
real reciprocal = 1.0 / (sqrt(xx[0]) * sqrt(yy[0]));
700-
for (int index = tid; index < width; index += blockSize) {
701-
prevGradX[index] +=
702-
scale * grad[ty] * prevOutY[index] * reciprocal;
703-
if (input2_height > 1) {
704-
prevGradY[index] +=
705-
scale * grad[ty] * prevOutX[index] * reciprocal;
706-
} else {
707-
paddle::paddleAtomicAdd(prevGradY + index,
708-
scale * grad[ty] * prevOutX[index] * reciprocal);
709-
}
710-
}
711-
} else {
712-
real reciprocalXY = 1.0 / xy[0];
713-
real reciprocalSquareSumX = 1.0 / xx[0];
714-
real reciprocalSquareSumY = 1.0 / yy[0];
715-
for (int index = tid; index < width; index += blockSize) {
716-
prevGradX[index] += output[ty] * grad[ty] *
717-
(prevOutY[index] * reciprocalXY -
718-
prevOutX[index] * reciprocalSquareSumX);
719-
if (input2_height > 1) {
720-
prevGradY[index] += output[ty] * grad[ty] *
721-
(prevOutX[index] * reciprocalXY -
722-
prevOutY[index] * reciprocalSquareSumY);
723-
} else {
724-
paddle::paddleAtomicAdd(prevGradY + index, output[ty] * grad[ty] *
725-
(prevOutX[index] * reciprocalXY -
726-
prevOutY[index] * reciprocalSquareSumY));
727-
}
728-
}
729-
}
730-
}
731-
732-
733-
void hl_cossim_derivative(real* grad,
734-
real* output,
735-
real* prevOutX,
736-
real* prevOutY,
737-
real* prevGradX,
738-
real* prevGradY,
739-
int width,
740-
int input1_height,
741-
int input2_height,
742-
real scale) {
743-
CHECK_NOTNULL(grad);
744-
CHECK_NOTNULL(output);
745-
CHECK_NOTNULL(prevOutX);
746-
CHECK_NOTNULL(prevOutY);
747-
CHECK_NOTNULL(prevGradX);
748-
CHECK_NOTNULL(prevGradY);
749-
const int blockSize = 256;
750-
dim3 threads(blockSize, 1);
751-
dim3 grid(1, input1_height);
752-
KeCosSimDerivative<blockSize><<<grid, threads, 0, STREAM_DEFAULT>>>
753-
(grad, output, prevOutX, prevOutY, prevGradX, prevGradY, width,
754-
input1_height, input2_height, scale);
755-
CHECK_SYNC("hl_cossim_derivate failed");
756-
}
757-
758587
__global__ void KeMatrixAddSharedBias(real* A,
759588
real* B,
760589
const int channel,

paddle/function/CosSimOp.cpp

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ void CosSimForward<DEVICE_TYPE_CPU>(CpuMatrix* out_mat,
3434
CHECK(in2_mat->getHeight() == 1LU || in2_mat->getHeight() == num_samples);
3535
size_t inc = (in2_mat->getHeight() == 1LU) ? 0 : dim;
3636
for (size_t i = 0; i < num_samples; ++i, x += dim, y += inc) {
37-
/// for each row, todo(tianbing), use TensorExpression square2 ?
3837
real square_sum_x = 0;
3938
real square_sum_y = 0;
4039
real xy = 0;
@@ -147,12 +146,15 @@ void CosSimBackward<DEVICE_TYPE_CPU>(const CpuMatrix* out_grad,
147146
}
148147

149148
/**
150-
* \param inputs[0] output value 1, size: nSamples * 1.
151-
* \param inputs[1] input value 1, size: nSamples * dim.
152-
* \param inputs[2] input value 2, size: n2 * dim (n2 == 1 or n2 == nSamples).
153-
* \param inputs[3] input grad 1, size: nSamples * dim.
154-
* \param inputs[4] input grad 2, size: n2 * dim (n2 == 1 or n2 == nSamples).
155-
* \param outputs[0] output grad, size : nSamples * 1.
149+
* \param inouts[0] forward input grad 1, size: nSamples * dim.
150+
* \param inouts[1] forward input grad 2,
151+
* size: n2 * dim (n2 == 1 or n2 == nSamples).
152+
*
153+
* \param inputs[0] backward loss output grad, size : nSamples * 1.
154+
* \param inputs[1] forward output value, size: nSamples * 1.
155+
* \param inputs[2] forward input value 1, size: nSamples * dim.
156+
* \param inputs[3] forward input value 2,
157+
* size: n2 * dim (n2 == 1 or n2 == nSamples).
156158
*/
157159
template <DeviceType Device>
158160
class CosSimBackwardFunc : public FunctionBase {
@@ -163,35 +165,35 @@ class CosSimBackwardFunc : public FunctionBase {
163165
void calc(const Arguments& inputs,
164166
const Arguments& outputs,
165167
const Arguments& inouts) override {
166-
CHECK_EQ(inputs.size(), 5);
167-
CHECK_EQ(outputs.size(), 1);
168-
CHECK_EQ(inouts.size(), 0);
168+
CHECK_EQ(inputs.size(), 4);
169+
CHECK_EQ(outputs.size(), 0);
170+
CHECK_EQ(inouts.size(), 2);
169171
/// dim of out_grad and out_val == 1, column vector
170-
CHECK_EQ(outputs[0].dims_[1], 1UL);
171172
CHECK_EQ(inputs[0].dims_[1], 1UL);
173+
CHECK_EQ(inputs[1].dims_[1], 1UL);
172174
/// nSamples of out_grad == out_val == in_val1 == in_grad1
173-
CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]);
174-
CHECK_EQ(inputs[1].dims_[0], outputs[0].dims_[0]);
175-
CHECK_EQ(inputs[3].dims_[0], outputs[0].dims_[0]);
175+
CHECK_EQ(inputs[1].dims_[0], inputs[0].dims_[0]);
176+
CHECK_EQ(inputs[0].dims_[0], inputs[0].dims_[0]);
177+
CHECK_EQ(inouts[0].dims_[0], inputs[0].dims_[0]);
176178
/// dim of in1_val1 == in_val2 == in_grad1 == in_grad2
177-
CHECK_EQ(inputs[2].dims_[1], inputs[1].dims_[1]);
178-
CHECK_EQ(inputs[3].dims_[1], inputs[1].dims_[1]);
179-
CHECK_EQ(inputs[4].dims_[1], inputs[1].dims_[1]);
179+
CHECK_EQ(inputs[3].dims_[1], inputs[2].dims_[1]);
180+
CHECK_EQ(inouts[0].dims_[1], inputs[2].dims_[1]);
181+
CHECK_EQ(inouts[1].dims_[1], inputs[2].dims_[1]);
180182

181-
CHECK(outputs[0].getData() && inputs[0].getData() && inputs[1].getData() &&
182-
inputs[2].getData() && inputs[3].getData() && inputs[4].getData());
183+
CHECK(inputs[0].getData() && inputs[1].getData() && inputs[2].getData() &&
184+
inputs[3].getData() && inouts[0].getData() && inouts[1].getData());
183185
const auto out_grad = std::make_shared<typename MatrixT<Device>::type>(
184-
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]);
185-
const auto out_val = std::make_shared<typename MatrixT<Device>::type>(
186186
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
187-
const auto in1_val = std::make_shared<typename MatrixT<Device>::type>(
187+
const auto out_val = std::make_shared<typename MatrixT<Device>::type>(
188188
inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
189-
const auto in2_val = std::make_shared<typename MatrixT<Device>::type>(
189+
const auto in1_val = std::make_shared<typename MatrixT<Device>::type>(
190190
inputs[2].getData(), inputs[2].dims_[0], inputs[2].dims_[1]);
191-
auto in1_grad = std::make_shared<typename MatrixT<Device>::type>(
191+
const auto in2_val = std::make_shared<typename MatrixT<Device>::type>(
192192
inputs[3].getData(), inputs[3].dims_[0], inputs[3].dims_[1]);
193+
auto in1_grad = std::make_shared<typename MatrixT<Device>::type>(
194+
inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]);
193195
auto in2_grad = std::make_shared<typename MatrixT<Device>::type>(
194-
inputs[4].getData(), inputs[4].dims_[0], inputs[4].dims_[1]);
196+
inouts[1].getData(), inouts[1].dims_[0], inouts[1].dims_[1]);
195197

196198
CosSimBackward<Device>(out_grad.get(),
197199
out_val.get(),

paddle/function/CosSimOp.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ namespace paddle {
2525
* = scale * \sum_j (in1[i][j] * in2[i][j]) /
2626
* sqrt(sum_j (in1[i][j]^2) * sum_j (in2[i][j])^2)
2727
*
28-
* \param[out] output output data.
29-
* \param[in] intput1 input data.
30-
* \param[in] intput2 input data.
28+
* \param[out] output output value.
29+
* \param[in] intput1 input value.
30+
* \param[in] intput2 input value.
3131
* \param[in] scale default 1.0.
3232
*
3333
*/
@@ -40,13 +40,13 @@ void CosSimForward(typename MatrixT<Device>::type* output,
4040
/**
4141
* \brief Cosine Similarity BackWard for Derivative.
4242
*
43-
* \param[out] output1 backward loss output grad.
44-
* \param[in] input1 forward-output value.
45-
* \param[in] input2 forward input value 1.
46-
* \param[in] input3 forward input value 2.
47-
* \param[in] input4 forward input grad 1.
48-
* \param[in] input5 forward input grad 2.
49-
* \param[in] scale default 1.0.
43+
* \param[in] output grad backward loss output grad.
44+
* \param[in] output val forward-output value.
45+
* \param[in] input val1 forward input value 1.
46+
* \param[in] input val2 forward input value 2.
47+
* \param[in/out] input grad forward input grad 1.
48+
* \param[in/out] input grad forward input grad 2.
49+
* \param[in] scale default 1.0.
5050
*
5151
*/
5252
template <DeviceType Device>

0 commit comments

Comments
 (0)