Skip to content

Commit df5a95d

Browse files
authored
Merge pull request #1701 from luotao1/stride
stride pooling for seqlastin and seqfirstin.
2 parents 892cc82 + e6366e3 commit df5a95d

17 files changed

+263
-29
lines changed

paddle/gserver/layers/SequenceLastInstanceLayer.cpp

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ namespace paddle {
2525
* Input: a sequence
2626
* If SequenceLevel = kNonseq:
2727
* Output: a sequence containing only the last instance of the input sequence
28+
* If stride_ > 0:
29+
* Output: a shorten sequence. The operation of getting last instance of a
30+
* sequence is independently performed on every slice of the input
31+
* sequence, which is obtained by sliding a window with the window
32+
* size set to stride_.
2833
* If SequenceLevel = kSeq:
2934
* Check input sequence must has sub-sequence
3035
* Output: a sequence containing only the last instance of each sub-sequence
@@ -37,6 +42,7 @@ class SequenceLastInstanceLayer : public SequencePoolLayer {
3742
protected:
3843
MatrixPtr tmpSrc_;
3944
MatrixPtr tmpDest_;
45+
std::vector<int> instanceIds_;
4046

4147
public:
4248
explicit SequenceLastInstanceLayer(const LayerConfig& config)
@@ -54,6 +60,7 @@ REGISTER_LAYER(seqlastins, SequenceLastInstanceLayer);
5460
bool SequenceLastInstanceLayer::init(const LayerMap& layerMap,
5561
const ParameterMap& parameterMap) {
5662
SequencePoolLayer::init(layerMap, parameterMap);
63+
reversed_ = config_.select_first();
5764

5865
tmpSrc_ =
5966
Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_);
@@ -66,17 +73,19 @@ bool SequenceLastInstanceLayer::init(const LayerMap& layerMap,
6673
void SequenceLastInstanceLayer::forward(PassType passType) {
6774
SequencePoolLayer::forward(passType);
6875

69-
const int* starts = startPositions_->getData(false);
76+
auto starts = (stride_ > 0) ? stridePositions_->getData()
77+
: startPositions_->getData(false);
7078
MatrixPtr inputValue = getInputValue(0);
7179
MatrixPtr outputValue = getOutputValue();
7280

7381
{
7482
AsyncGpuBlock asyncGpuBlock;
7583
REGISTER_TIMER_INFO("SequenceLastInstanceLayerForward", getName().c_str());
7684

85+
instanceIds_.clear();
7786
for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) {
78-
int insId =
79-
config_.select_first() ? starts[seqId] : starts[seqId + 1] - 1;
87+
int insId = reversed_ ? starts[seqId] : starts[seqId + 1] - 1;
88+
instanceIds_.push_back(insId);
8089

8190
outputValue->subMatrix(seqId, 1, tmpDest_)
8291
->assign(*(inputValue->subMatrix(insId, 1, tmpSrc_)));
@@ -96,18 +105,13 @@ void SequenceLastInstanceLayer::backward(const UpdateCallback& callback) {
96105

97106
MatrixPtr inputGrad = getInputGrad(0);
98107
MatrixPtr outputGrad = getOutputGrad();
99-
const int* starts = startPositions_->getData(false);
100-
size_t numSequences = startPositions_->getSize() - 1;
101108

102109
if (inputGrad) {
103110
AsyncGpuBlock asyncGpuBlock;
104111
REGISTER_TIMER_INFO("SequenceLastInstanceLayerBackward", getName().c_str());
105112

106-
for (size_t seqId = 0; seqId < numSequences; ++seqId) {
107-
int insId =
108-
config_.select_first() ? starts[seqId] : starts[seqId + 1] - 1;
109-
110-
inputGrad->subMatrix(insId, 1, tmpDest_)
113+
for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) {
114+
inputGrad->subMatrix(instanceIds_[seqId], 1, tmpDest_)
111115
->add(*(outputGrad->subMatrix(seqId, 1, tmpSrc_)));
112116
}
113117
}

paddle/gserver/layers/SequencePoolLayer.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ bool SequencePoolLayer::init(const LayerMap& layerMap,
3737
} else {
3838
LOG(FATAL) << "Unknown trans_type: " << config_.trans_type();
3939
}
40+
stride_ = config_.seq_pool_stride();
4041
setNeedSequenceInfo(false);
4142
return true;
4243
}
@@ -55,8 +56,6 @@ void SequencePoolLayer::forward(PassType passType) {
5556
CHECK_EQ(starts->getData()[newBatchSize_], input.getBatchSize());
5657
CHECK_EQ(newBatchSize_, starts->getSize() - 1);
5758

58-
resetOutput(newBatchSize_, dim);
59-
6059
/* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq,
6160
* thus, in this case, output_ has no sequenceStartPositions.
6261
* If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
@@ -67,6 +66,15 @@ void SequencePoolLayer::forward(PassType passType) {
6766
<< "when trans_type = seq, input must hasSubseq";
6867
output_.degradeSequence(input);
6968
}
69+
if (stride_ > 0) {
70+
CHECK_EQ(input.hasSubseq(), 0UL)
71+
<< "sequence stride pooling is invalid for hasSubseq now";
72+
output_.poolSequenceWithStride(
73+
input, stride_, &stridePositions_, reversed_);
74+
newBatchSize_ = stridePositions_->getSize() - 1;
75+
}
76+
77+
resetOutput(newBatchSize_, dim);
7078
}
7179

7280
void SequencePoolLayer::backward(const UpdateCallback& callback) {

paddle/gserver/layers/SequencePoolLayer.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ namespace paddle {
2626
* Output: output size is the number of input sequences (NOT input instances)
2727
* output[i] = seqlastin/average/max_{for each instance in this
2828
* sequence}{input[i]}
29+
* If stride_ > 0:
30+
* Check input sequence must not have sub-sequence
31+
* Output: a shorten sequence, pooling is performed upon a small local
32+
* area
2933
* If SequenceLevel = kSeq:
3034
* Check input sequence must has sub-sequence
3135
* Output: output size is the number of input sub-sequences
@@ -42,6 +46,11 @@ class SequencePoolLayer : public Layer {
4246
enum SequenceLevel { kNonSeq = 0, kSeq = 1 };
4347
size_t newBatchSize_;
4448
ICpuGpuVectorPtr startPositions_;
49+
int stride_;
50+
// Store the start position of each window.
51+
IVectorPtr stridePositions_;
52+
// Whether the input sequence is reversed or not.
53+
bool reversed_ = false;
4554

4655
public:
4756
explicit SequencePoolLayer(const LayerConfig& config) : Layer(config) {}

paddle/gserver/tests/test_LayerGrad.cpp

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -804,10 +804,14 @@ TEST(Layer, ExpandLayer) {
804804
testExpandLayer("seq", true); // seq expand to hasSubseq
805805
}
806806

807-
void testDegradeLayer(bool hasSubseq, string layer_type, string trans_type) {
807+
void testDegradeLayer(bool hasSubseq,
808+
string layer_type,
809+
string trans_type,
810+
int stride) {
808811
TestConfig config;
809812
config.layerConfig.set_type(layer_type);
810813
config.layerConfig.set_size(10);
814+
config.layerConfig.set_seq_pool_stride(stride);
811815
config.biasSize = 0;
812816

813817
config.inputDefs.push_back(
@@ -827,36 +831,46 @@ void testDegradeLayer(bool hasSubseq, string layer_type, string trans_type) {
827831
if (layer_type == "average") {
828832
for (auto strategy : {"average", "sum", "squarerootn"}) {
829833
LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type
830-
<< " average_strategy=" << strategy;
834+
<< " average_strategy=" << strategy
835+
<< " seq_pool_stride=" << stride;
831836
config.layerConfig.set_average_strategy(strategy);
832837
testDegradeLayerGrad(config, layer_type);
833838
}
834839
} else {
835-
LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type;
840+
LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type
841+
<< " seq_pool_stride=" << stride;
836842
testDegradeLayerGrad(config, layer_type);
837843
}
838844
}
839845

840846
TEST(Layer, MaxLayer) {
841-
testDegradeLayer(false, "max", "non-seq"); // seq max to non-seq
842-
testDegradeLayer(true, "max", "non-seq"); // hasSubseq max to non-seq
843-
testDegradeLayer(true, "max", "seq"); // hasSubseq max to seq
847+
testDegradeLayer(false, "max", "non-seq", -1); // seq max to non-seq
848+
testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq
849+
testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq
844850
}
845851

846852
TEST(Layer, SequenceLastInstanceLayer) {
847853
testDegradeLayer(false,
848854
"seqlastins",
849-
"non-seq"); // seq seqlastins to non-seq
855+
"non-seq",
856+
-1); // seq seqlastins to non-seq
857+
testDegradeLayer(false,
858+
"seqlastins",
859+
"non-seq",
860+
5); // seq seqlastins to a shorten seq, stride window = 5
850861
testDegradeLayer(true,
851862
"seqlastins",
852-
"non-seq"); // hasSubseq seqlastins to non-seq
853-
testDegradeLayer(true, "seqlastins", "seq"); // hasSubseq seqlastins to seq
863+
"non-seq",
864+
-1); // hasSubseq seqlastins to non-seq
865+
testDegradeLayer(
866+
true, "seqlastins", "seq", -1); // hasSubseq seqlastins to seq
854867
}
855868

856869
TEST(Layer, AverageLayer) {
857-
testDegradeLayer(false, "average", "non-seq"); // seq average to non-seq
858-
testDegradeLayer(true, "average", "non-seq"); // hasSubseq average to non-seq
859-
testDegradeLayer(true, "average", "seq"); // hasSubseq average to seq
870+
testDegradeLayer(false, "average", "non-seq", -1); // seq average to non-seq
871+
testDegradeLayer(
872+
true, "average", "non-seq", -1); // hasSubseq average to non-seq
873+
testDegradeLayer(true, "average", "seq", -1); // hasSubseq average to seq
860874
}
861875

862876
TEST(Layer, SequenceConcatLayer) {

paddle/parameter/Argument.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,49 @@ void Argument::degradeSequence(const Argument& input) {
559559
tgtBuf[numSequences] = numSubSequences;
560560
}
561561

562+
void Argument::poolSequenceWithStride(const Argument& input,
563+
size_t stride,
564+
IVectorPtr* stridePostions,
565+
bool reversed) {
566+
// If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5,
567+
// then sequenceStartPositions = [0, 2, 3, 4, 7].
568+
// If reversed = false, stridePostions = [0, 5, 9, 14, 17, 22, 27, 30];
569+
// else reversed = true, stridePostions = [0, 4, 9, 14, 17, 20, 25, 30]
570+
571+
CHECK(input.sequenceStartPositions);
572+
CHECK_EQ(input.hasSubseq(), 0UL);
573+
CHECK_GT(stride, 0) << "stride must larger than 0";
574+
size_t numSequences = input.getNumSequences();
575+
ICpuGpuVector::resizeOrCreate(
576+
sequenceStartPositions, numSequences + 1, false);
577+
const int* starts = input.sequenceStartPositions->getData(false);
578+
int* tgtBuf = sequenceStartPositions->getMutableData(false);
579+
// first index of target sequence and stride positions are both 0
580+
tgtBuf[0] = 0;
581+
std::vector<int> stridePos;
582+
for (size_t seqId = 0; seqId < numSequences; ++seqId) {
583+
size_t seqLength = starts[seqId + 1] - starts[seqId];
584+
stridePos.emplace_back(starts[seqId]);
585+
if (seqLength == 0) {
586+
// empty sequence
587+
tgtBuf[seqId + 1] = tgtBuf[seqId];
588+
} else {
589+
int size = ceil((float)seqLength / stride);
590+
tgtBuf[seqId + 1] = tgtBuf[seqId] + size;
591+
for (int i = 0; i < size - 1; ++i) {
592+
int cur = reversed ? starts[seqId + 1] - (size - 1 - i) * stride
593+
: stridePos.back() + stride;
594+
stridePos.emplace_back(cur);
595+
}
596+
}
597+
}
598+
stridePos.emplace_back(starts[numSequences]);
599+
int size = stridePos.size();
600+
CHECK_EQ(size - 1, tgtBuf[numSequences]);
601+
IVector::resizeOrCreate(*stridePostions, size, false);
602+
(*stridePostions)->copyFrom(stridePos.data(), size);
603+
}
604+
562605
void Argument::getValueString(
563606
std::unordered_map<std::string, std::string>* out) const {
564607
if (value) {

paddle/parameter/Argument.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,15 @@ struct Argument {
291291
*/
292292
void degradeSequence(const Argument& input);
293293

294+
/*
295+
After pooling with stride n (n is smaller than sequence length),
296+
a long sequence will be shorten.
297+
This function is invalid for sequence having sub-sequence.
298+
*/
299+
void poolSequenceWithStride(const Argument& input,
300+
size_t stride,
301+
IVectorPtr* stridePositions,
302+
bool reversed = false);
294303
/**
295304
* @brief getValueString will return the argument's output in string. There
296305
* are several kinds of output. The keys of output dictionary are 'value',
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
add_simple_unittest(test_common)
2+
add_simple_unittest(test_argument)
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include <gtest/gtest.h>
16+
#include <paddle/parameter/Argument.h>
17+
18+
using namespace paddle; // NOLINT
19+
20+
TEST(Argument, poolSequenceWithStride) {
21+
Argument input, output;
22+
ICpuGpuVector::resizeOrCreate(input.sequenceStartPositions, 5, false);
23+
int* inStart = input.sequenceStartPositions->getMutableData(false);
24+
inStart[0] = 0;
25+
inStart[1] = 9;
26+
inStart[2] = 14;
27+
inStart[3] = 17;
28+
inStart[4] = 30;
29+
30+
int strideResult[] = {0, 5, 9, 14, 17, 22, 27, 30};
31+
int strideResultReversed[] = {0, 4, 9, 14, 17, 20, 25, 30};
32+
33+
for (auto reversed : {false, true}) {
34+
IVectorPtr stridePositions;
35+
output.poolSequenceWithStride(
36+
input, 5 /* stride */, &stridePositions, reversed);
37+
38+
const int* outStart = output.sequenceStartPositions->getData(false);
39+
CHECK_EQ(outStart[0], 0);
40+
CHECK_EQ(outStart[1], 2);
41+
CHECK_EQ(outStart[2], 3);
42+
CHECK_EQ(outStart[3], 4);
43+
CHECK_EQ(outStart[4], 7);
44+
45+
CHECK_EQ(stridePositions->getSize(), 8);
46+
auto result = reversed ? strideResultReversed : strideResult;
47+
for (int i = 0; i < 8; i++) {
48+
CHECK_EQ(stridePositions->getData()[i], result[i]);
49+
}
50+
}
51+
}
52+
53+
int main(int argc, char** argv) {
54+
testing::InitGoogleTest(&argc, argv);
55+
initMain(argc, argv);
56+
return RUN_ALL_TESTS();
57+
}

proto/ModelConfig.proto

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,11 @@ message LayerConfig {
441441

442442
// blank label used in ctc loss
443443
optional uint32 blank = 52 [default = 0];
444+
445+
// stride parameter for seqlastins layer, AverageLayer, MaxLayer, which
446+
// controls the scope of pooling operation. can be set > 0.
447+
// leave empty or set to -1 to disable this stride pooling.
448+
optional int32 seq_pool_stride = 53 [default = -1];
444449
}
445450

446451
message EvaluatorConfig {

python/paddle/trainer/config_parser.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2485,6 +2485,7 @@ def __init__(self,
24852485
active_type='linear',
24862486
trans_type='non-seq',
24872487
bias=False,
2488+
stride=-1,
24882489
**xargs):
24892490
super(SequenceLastInstanceLayer, self).__init__(
24902491
name,
@@ -2495,10 +2496,11 @@ def __init__(self,
24952496
**xargs)
24962497
config_assert(
24972498
len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input')
2499+
if trans_type == 'seq':
2500+
config_assert(stride == -1, 'subseq does not support stride window')
24982501
self.config.trans_type = trans_type
2499-
for input_index in xrange(len(self.inputs)):
2500-
input_layer = self.get_input_layer(input_index)
2501-
self.set_layer_size(input_layer.size)
2502+
self.config.seq_pool_stride = stride
2503+
self.set_layer_size(self.get_input_layer(0).size)
25022504
self.create_bias_parameter(bias, self.config.size)
25032505

25042506

@@ -2510,10 +2512,16 @@ def __init__(self,
25102512
active_type='linear',
25112513
trans_type='non-seq',
25122514
bias=False,
2515+
stride=-1,
25132516
**xargs):
25142517
super(SequenceFirstInstanceLayer, self).__init__(
2515-
name, inputs=inputs, active_type=active_type, bias=bias, **xargs)
2516-
self.config.trans_type = trans_type
2518+
name,
2519+
inputs=inputs,
2520+
active_type=active_type,
2521+
trans_type=trans_type,
2522+
bias=bias,
2523+
stride=stride,
2524+
**xargs)
25172525
self.config.select_first = True
25182526

25192527

0 commit comments

Comments
 (0)