Skip to content

Commit bf6f690

Browse files
author
xuwei06
committed
Add ScalingProjection
out = w * input where w is a parameter of size 1 Change-Id: Ife682d62323ceb1a20cbbf6269421b20a862d888
1 parent 0ba0f02 commit bf6f690

File tree

11 files changed

+223
-31
lines changed

11 files changed

+223
-31
lines changed

doc/ui/api/trainer_config_helpers/layers.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,12 @@ embedding_layer
191191
:members: embedding_layer
192192
:noindex:
193193

194+
scaling_projection
195+
-----------------
196+
.. automodule:: paddle.trainer_config_helpers.layers
197+
:members: scaling_projection
198+
:noindex:
199+
194200
dotmul_projection
195201
-----------------
196202
.. automodule:: paddle.trainer_config_helpers.layers

paddle/gserver/layers/FullMatrixProjection.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,9 @@ void FullMatrixProjection::backward(const UpdateCallback& callback) {
5252
}
5353

5454
hl_set_sync_flag(syncFlag);
55-
parameter_->incUpdate(callback);
55+
if (weight_->getWGrad()) {
56+
parameter_->incUpdate(callback);
57+
}
5658
}
5759

5860
} // namespace paddle
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "Projection.h"
16+
17+
namespace paddle {
18+
19+
class ScalingProjection : public Projection {
20+
public:
21+
ScalingProjection(const ProjectionConfig& config,
22+
const ParameterPtr& parameter, bool useGpu)
23+
: Projection(config, parameter, useGpu) {
24+
CHECK_EQ(parameter->getSize(), 1UL);
25+
weight_.reset(new Weight(1, 1, parameter));
26+
}
27+
28+
void forward() {
29+
CHECK(in_->value);
30+
out_->value->add(*in_->value, weight_->getW()->getElement(0, 0));
31+
}
32+
33+
void backward(const UpdateCallback& callback) {
34+
if (weight_->getWGrad()) {
35+
auto sum = Matrix::create(in_->value->getHeight(), 1, false, useGpu_);
36+
sum->sumOfProducts(*in_->value, *out_->grad,
37+
/* scaleSum= */1, /* scaleDest= */0);
38+
weight_->getWGrad()->sumCols(*sum,
39+
/* scaleSum= */1, /* scaleDest= */1);
40+
parameter_->incUpdate(callback);
41+
}
42+
if (in_->grad) {
43+
in_->grad->add(*out_->grad, weight_->getW()->getElement(0, 0));
44+
}
45+
}
46+
47+
protected:
48+
std::unique_ptr<Weight> weight_;
49+
};
50+
51+
REGISTER_PROJECTION(scaling, ScalingProjection);
52+
53+
} // namespace paddle

paddle/gserver/tests/test_LayerGrad.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,17 @@ TEST(Projection, identity) {
135135
}
136136
}
137137

138+
TEST(Projection, scaling) {
139+
ProjectionConfig conf;
140+
conf.set_type("scaling");
141+
conf.set_input_size(10);
142+
conf.set_output_size(10);
143+
for (auto useGpu : {false}) {
144+
testProjectionGrad(conf, INPUT_DATA, /* parameterSize */ 1,
145+
/* batchSize */ 100, useGpu);
146+
}
147+
}
148+
138149
#ifndef PADDLE_ONLY_CPU
139150
TEST(Projection, conv) {
140151
const int NUM_FILTERS = 16;

paddle/math/BaseMatrix.cu

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1451,6 +1451,8 @@ int BaseMatrixT<real>::applyRow(Agg agg, BaseMatrixT& b) {
14511451
MatrixOffset offset(0, 0, 0, 0, 0, 0);
14521452
int numRows = b.height_;
14531453
int numCols = b.width_;
1454+
CHECK_EQ(height_, numRows);
1455+
CHECK_EQ(width_, 1UL);
14541456
aggregate(agg, base::unary::identity(), base::binary::second(), b, numRows,
14551457
numCols, offset, false_type(), true_type() /*aAsColVector*/);
14561458

@@ -1463,18 +1465,39 @@ int BaseMatrixT<real>::applyRow(Agg agg, Saver sv, BaseMatrixT& b) {
14631465
MatrixOffset offset(0, 0, 0, 0, 0, 0);
14641466
int numRows = b.height_;
14651467
int numCols = b.width_;
1468+
CHECK_EQ(height_, numRows);
1469+
CHECK_EQ(width_, 1UL);
14661470
aggregate(agg, base::unary::identity(), sv, b, numRows, numCols, offset,
14671471
false_type(), true_type() /*aAsColVector*/);
14681472

14691473
return 0;
14701474
}
14711475

1476+
template<>
1477+
template <class Agg, class Op, class Saver>
1478+
int BaseMatrixT<real>::applyRow(Agg agg, Op op, Saver sv,
1479+
BaseMatrixT& b, BaseMatrixT& c) {
1480+
MatrixOffset offset(0, 0, 0, 0, 0, 0);
1481+
int numRows = b.height_;
1482+
int numCols = b.width_;
1483+
CHECK_EQ(height_, numRows);
1484+
CHECK_EQ(width_, 1UL);
1485+
CHECK_EQ(c.height_, numRows);
1486+
CHECK_EQ(c.width_, numCols);
1487+
aggregate(agg, op, sv,
1488+
b, c, numRows, numCols, offset,
1489+
false_type(), true_type() /*aAsColVector*/);
1490+
return 0;
1491+
}
1492+
14721493
template<>
14731494
template <class Agg>
14741495
int BaseMatrixT<real>::applyCol(Agg agg, BaseMatrixT& b) {
14751496
MatrixOffset offset(0, 0, 0, 0, 0, 0);
14761497
int numRows = b.height_;
14771498
int numCols = b.width_;
1499+
CHECK_EQ(width_, numCols);
1500+
CHECK_EQ(height_, 1UL);
14781501
aggregate(agg, base::unary::identity(), base::binary::second(), b, numRows,
14791502
numCols, offset, true_type() /*aAsRowVector*/, false_type());
14801503

@@ -1487,15 +1510,17 @@ int BaseMatrixT<real>::applyCol(Agg agg, Saver sv, BaseMatrixT& b) {
14871510
MatrixOffset offset(0, 0, 0, 0, 0, 0);
14881511
int numRows = b.height_;
14891512
int numCols = b.width_;
1513+
CHECK_EQ(width_, numCols);
1514+
CHECK_EQ(height_, 1UL);
14901515
aggregate(agg, base::unary::identity(), sv, b, numRows, numCols, offset,
14911516
true_type() /*aAsRowVector*/, false_type());
14921517

14931518
return 0;
14941519
}
14951520

14961521
template<>
1497-
void BaseMatrixT<real>::sumRows(BaseMatrixT& b) {
1498-
applyRow(aggregate::sum(), b);
1522+
void BaseMatrixT<real>::sumRows(BaseMatrixT& b, real scaleSum, real scaleDest) {
1523+
applyRow(aggregate::sum(), base::binary::add2(scaleDest, scaleSum), b);
14991524
}
15001525

15011526
template<>
@@ -1524,18 +1549,22 @@ void BaseMatrixT<real>::minCols(BaseMatrixT& b) {
15241549
}
15251550

15261551
template<>
1527-
void BaseMatrixT<real>::sumCols(BaseMatrixT& b, real scale) {
1528-
applyCol(aggregate::sum(), base::binary::add2(1.0, scale), b);
1552+
void BaseMatrixT<real>::sumCols(BaseMatrixT& b, real scaleSum, real scaleDest) {
1553+
applyCol(aggregate::sum(), base::binary::add2(scaleDest, scaleSum), b);
15291554
}
15301555

15311556
template<>
1532-
void BaseMatrixT<real>::sumOfSquares(BaseMatrixT& b, BaseMatrixT& c) {
1533-
int numRows = b.height_;
1534-
int numCols = b.width_;
1535-
MatrixOffset offset(0, 0, 0, 0, 0, 0);
1536-
aggregate(aggregate::sum(), base::binary::squaredDiff(), base::binary::add(),
1537-
b, c, numRows, numCols, offset, false_type(),
1538-
true_type() /*aAsColVector*/);
1557+
void BaseMatrixT<real>::sumOfSquaredDiffs(
1558+
BaseMatrixT& b, BaseMatrixT& c, real scaleSum, real scaleDest) {
1559+
applyRow(aggregate::sum(), base::binary::squaredDiff(),
1560+
base::binary::add2(scaleDest, scaleSum), b, c);
1561+
}
1562+
1563+
template<>
1564+
void BaseMatrixT<real>::sumOfProducts(
1565+
BaseMatrixT& b, BaseMatrixT& c, real scaleSum, real scaleDest) {
1566+
applyRow(aggregate::sum(), base::binary::mul(),
1567+
base::binary::add2(scaleDest, scaleSum), b, c);
15391568
}
15401569

15411570
template class BaseMatrixT<real>;

paddle/math/BaseMatrix.h

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,18 @@ class BaseMatrixT {
305305
template <class Agg>
306306
int applyRow(Agg agg, BaseMatrixT& b);
307307

308+
/**
309+
* a aggregate expression that apply each row of matrix b.
310+
*
311+
* @code
312+
* for each row i & 0 <= j < b.width_, do:
313+
* dst = agg(op(b[i*ldb + j], c[i*ldc + j])
314+
* this[i] = sv(this[i], dst)
315+
* @endcode
316+
*/
317+
template <class Agg, class Op, class Saver>
318+
int applyRow(Agg agg, Op op, Saver sv, BaseMatrixT& b, BaseMatrixT& c);
319+
308320
/**
309321
* a aggregate expression that apply each row of matrix b.
310322
*
@@ -920,7 +932,9 @@ class BaseMatrixT {
920932
void addRowScale(size_t cCol, BaseMatrixT& b, BaseMatrixT& c);
921933

922934
/// calculate the sum of each row of the matrix b.
923-
void sumRows(BaseMatrixT& b);
935+
/// this_i = scaleDest * this_i + scaleSum * \sum_j b_{ij}
936+
void sumRows(BaseMatrixT& b, T scaleSum, T scaleDest);
937+
924938
/// calculate the maximum value of each row of the matrix b.
925939
void maxRows(BaseMatrixT& b);
926940
/// calculate the minimum value of each row of the matrix b.
@@ -932,10 +946,18 @@ class BaseMatrixT {
932946
void maxCols(BaseMatrixT& b);
933947
/// calculate the minimum value of each column of the matrix b.
934948
void minCols(BaseMatrixT& b);
935-
void sumCols(BaseMatrixT& b, T scale);
936949

937-
/// calculate the sum of each row of (b - c)^2.
938-
void sumOfSquares(BaseMatrixT& b, BaseMatrixT& c);
950+
/// calculate the sum of each column of the matrix b.
951+
/// this_i = scaleDest * this_i + scaleSum * \sum_j b_{ji}
952+
void sumCols(BaseMatrixT& b, T scaleSum, T scaleDest);
953+
954+
/// this_i = scaleDest * this_i + scaleSum * \sum_j (b_{ij} - c_{ij})^2
955+
void sumOfSquaredDiffs(BaseMatrixT& b, BaseMatrixT& c,
956+
T scaleSum, T scaleDest);
957+
958+
/// this_i = scaleDest * this_i + scaleSum * \sum_j b_{ij} * c_{ij}
959+
void sumOfProducts(BaseMatrixT& b, BaseMatrixT& c,
960+
T scaleSum, T scaleDest);
939961

940962
/**
941963
* @code

paddle/math/Matrix.cpp

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ real GpuMatrix::getSum() {
242242
void GpuMatrix::accumulateColSum(Matrix& src) {
243243
CHECK_EQ(getWidth(), src.getWidth());
244244
CHECK_EQ(getHeight(), (size_t)1);
245-
sumCols(src, 1.0);
245+
sumCols(src, 1.0, 1.0);
246246
}
247247

248248
real GpuMatrix::getAbsSum() {
@@ -389,7 +389,7 @@ void GpuMatrix::collectBias(Matrix& a, real scale) {
389389
CHECK_EQ(width_, a.getWidth());
390390
GpuSparseMatrix* sMatPtr = dynamic_cast<GpuSparseMatrix*>(&a);
391391
if (!sMatPtr) {
392-
sumCols(a, scale);
392+
sumCols(a, /* scaleSum= */scale, /* scaleDest= */1);
393393
} else {
394394
real* data = getData();
395395
hl_sparse_matrix_s A_d = sMatPtr->sMatrix_.get();
@@ -589,7 +589,7 @@ void GpuMatrix::addToRows(Matrix& table, IVector& ids) {
589589
void GpuMatrix::colMerge(Matrix& src) {
590590
CHECK(src.height_ == height_);
591591
if (!trans_ && !src.trans_) {
592-
sumRows(src);
592+
sumRows(src, /* scaleSum= */1, /* scaleDest= */0);
593593
} else {
594594
LOG(FATAL) << "Is not supported";
595595
}
@@ -599,7 +599,7 @@ void GpuMatrix::rowSum(Matrix& sum) {
599599
CHECK_EQ(sum.getHeight(), getHeight());
600600
CHECK_EQ(sum.getWidth(), (size_t)1);
601601

602-
sum.sumRows(*this);
602+
sum.sumRows(*this, /* scaleSum= */1, /* scaleDest= */0);
603603
}
604604

605605
void GpuMatrix::rowMax(Matrix& max) {
@@ -790,7 +790,8 @@ void GpuMatrix::sumOfSquares(Matrix& output, Matrix& label) {
790790
LOG(FATAL) << "not supported: GpuSparseMatrix as label";
791791
}
792792

793-
BaseMatrix::sumOfSquares(output, label);
793+
BaseMatrix::sumOfSquaredDiffs(output, label,
794+
/* scaleSum= */1, /* scaleDest= */1);
794795
}
795796

796797
void GpuMatrix::sumOfSquaresBp(Matrix& outputV, Matrix& label) {
@@ -1501,7 +1502,7 @@ void CpuMatrix::accumulateColSum(Matrix& src) {
15011502
CHECK_EQ(getWidth(), src.getWidth());
15021503
CHECK_EQ(getHeight(), (size_t)1);
15031504

1504-
sumCols(src, 1.0);
1505+
sumCols(src, /* scaleSum= */1, /* scaleDest= */1);
15051506
}
15061507

15071508
real CpuMatrix::getAbsSum() {
@@ -2188,7 +2189,7 @@ void CpuMatrix::collectBias(Matrix& a, real scale) {
21882189
CHECK_EQ(width_, a.getWidth());
21892190
CpuSparseMatrix* aptr = dynamic_cast<CpuSparseMatrix*>(&a);
21902191
if (!aptr) {
2191-
sumCols(a, scale);
2192+
sumCols(a, /* scaleSum= */scale, /* scaleDest= */1);
21922193
} else {
21932194
size_t nnz = aptr->getElementCnt();
21942195
int* cols = aptr->getCols();
@@ -2227,7 +2228,7 @@ void CpuMatrix::sequenceAvgForward(Matrix& a,
22272228
real* dst = getData();
22282229
real* src = a.getData();
22292230
const int* starts = startsPos.getData();
2230-
MatrixPtr outMtx = Matrix::create(1, 1, false, false);
2231+
MatrixPtr outMtx = Matrix::create(nullptr, 1, width, false, false);
22312232
MatrixPtr dataMtx = Matrix::create(nullptr, 1, width, false, false);
22322233
for (size_t i = 0; i < height; i++) {
22332234
int sequenceLength = starts[i + 1] - starts[i];
@@ -2239,13 +2240,15 @@ void CpuMatrix::sequenceAvgForward(Matrix& a,
22392240
dataMtx->setData(src + starts[i] * width, sequenceLength, width);
22402241
if (mode == 0) {
22412242
// plain average
2242-
outMtx->sumCols(*dataMtx, (real)1 / (real)sequenceLength);
2243+
outMtx->sumCols(*dataMtx, (real)1 / (real)sequenceLength,
2244+
/* scaleDest= */1);
22432245
} else if (mode == 1) {
22442246
// sum instead of average
2245-
outMtx->sumCols(*dataMtx, (real)1);
2247+
outMtx->sumCols(*dataMtx, /* scaleSum= */1, /* scaleDest= */1);
22462248
} else if (mode == 2) {
22472249
// divide by square root of sequenceLength
2248-
outMtx->sumCols(*dataMtx, (real)1 / std::sqrt(sequenceLength));
2250+
outMtx->sumCols(*dataMtx, (real)1 / std::sqrt(sequenceLength),
2251+
/* scaleDest= */1);
22492252
} else {
22502253
LOG(FATAL) << "should not reach here";
22512254
}
@@ -2932,7 +2935,7 @@ void CpuMatrix::rowSum(Matrix& sum) {
29322935
CHECK_EQ(sum.getHeight(), getHeight());
29332936
CHECK_EQ(sum.getWidth(), (size_t)1);
29342937

2935-
sum.sumRows(*this);
2938+
sum.sumRows(*this, /* scaleSum= */1, /* scaleDest= */0);
29362939
}
29372940

29382941
void CpuMatrix::rowMaxId(IVector& maxIds) {
@@ -3485,7 +3488,8 @@ void CpuMatrix::sumOfSquares(Matrix& output, Matrix& label) {
34853488
}
34863489
}
34873490

3488-
BaseMatrix::sumOfSquares(output, label);
3491+
BaseMatrix::sumOfSquaredDiffs(output, label,
3492+
/* scaleSum= */1, /* scaleDest= */1);
34893493
}
34903494

34913495
/* calculate the error of outputV according to label */

python/paddle/trainer/config_parser.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,20 @@ def calc_parameter_size(self, input_size, output_size):
592592
def calc_parameter_dims(self, input_size, output_size):
593593
return [1, output_size]
594594

595+
# ScalingProjection
596+
@config_class
597+
class ScalingProjection(Projection):
598+
type = 'scaling'
599+
600+
def calc_output_size(self, input_layer_config):
601+
return input_layer_config.size
602+
603+
def calc_parameter_size(self, input_size, output_size):
604+
return 1
605+
606+
def calc_parameter_dims(self, input_size, output_size):
607+
return [1, 1]
608+
595609

596610
@config_class
597611
class TableProjection(Projection):

0 commit comments

Comments
 (0)