Skip to content

Commit 81b4fad

Browse files
achao2013qingqing01
authored andcommitted
add moving average absmax op and fix bug (#15155)
* Add moving average absmax op in quantilize-aware training.
1 parent 92b9ce3 commit 81b4fad

File tree

8 files changed

+409
-17
lines changed

8 files changed

+409
-17
lines changed

paddle/fluid/API.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ paddle.fluid.contrib.BeamSearchDecoder.read_array (ArgSpec(args=['self', 'init',
367367
paddle.fluid.contrib.BeamSearchDecoder.update_array (ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None), ('document', '5754e9b3212b7c09497151516a0de5a7'))
368368
paddle.fluid.contrib.memory_usage (ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', '8fcb2f93bb743693baa8d4860a5ccc47'))
369369
paddle.fluid.contrib.op_freq_statistic (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '4d43687113c4bf5b29d15aee2f4e4afa'))
370-
paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000)), ('document', '14b39f1fcd5667ff556b1aad94357d1d'))
370+
paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size', 'moving_rate'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000, 0.9)), ('document', '14b39f1fcd5667ff556b1aad94357d1d'))
371371
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 (ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
372372
paddle.fluid.contrib.QuantizeTranspiler.freeze_program (ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)), ('document', '909675a1ab055c69b436a7893fcae4fd'))
373373
paddle.fluid.contrib.QuantizeTranspiler.training_transpile (ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6dd9909f10b283ba2892a99058a72884'))

paddle/fluid/operators/fake_quantize_op.cc

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,30 @@ struct FindRangeAbsMaxFunctor<platform::CPUDeviceContext, T> {
8181

8282
template struct FindRangeAbsMaxFunctor<platform::CPUDeviceContext, float>;
8383

84+
template <typename T>
85+
struct FindMovingAverageAbsMaxFunctor<platform::CPUDeviceContext, T> {
86+
void operator()(const platform::CPUDeviceContext& ctx,
87+
const framework::Tensor& in_accum,
88+
const framework::Tensor& in_state, const T* cur_scale,
89+
const float rate, framework::Tensor* out_state,
90+
framework::Tensor* out_accum, framework::Tensor* out_scale) {
91+
T accum = in_accum.data<T>()[0];
92+
T state = in_state.data<T>()[0];
93+
T scale = cur_scale[0];
94+
95+
state = rate * state + 1;
96+
accum = rate * accum + scale;
97+
scale = accum / state;
98+
99+
out_state->mutable_data<T>(ctx.GetPlace())[0] = state;
100+
out_accum->mutable_data<T>(ctx.GetPlace())[0] = accum;
101+
out_scale->mutable_data<T>(ctx.GetPlace())[0] = scale;
102+
}
103+
};
104+
105+
template struct FindMovingAverageAbsMaxFunctor<platform::CPUDeviceContext,
106+
float>;
107+
84108
class FakeQuantizeAbsMaxOp : public framework::OperatorWithKernel {
85109
public:
86110
FakeQuantizeAbsMaxOp(const std::string& type,
@@ -255,6 +279,78 @@ FakeQuantize operator is used in static quantization.
255279
}
256280
};
257281

282+
class FakeQuantizeMovingAverageAbsMaxOp : public framework::OperatorWithKernel {
283+
public:
284+
FakeQuantizeMovingAverageAbsMaxOp(const std::string& type,
285+
const framework::VariableNameMap& inputs,
286+
const framework::VariableNameMap& outputs,
287+
const framework::AttributeMap& attrs)
288+
: OperatorWithKernel(type, inputs, outputs, attrs) {}
289+
290+
void InferShape(framework::InferShapeContext* ctx) const override {
291+
PADDLE_ENFORCE(
292+
ctx->HasInput("X"),
293+
"Input(X) of FakeQuantizeMovingAverageAbsMaxOp should not be null.");
294+
PADDLE_ENFORCE(
295+
ctx->HasOutput("Out"),
296+
"Output(Out) of FakeQuantizeMovingAverageAbsMaxOp should not be null.");
297+
PADDLE_ENFORCE(ctx->HasOutput("OutScale"),
298+
"Output(OutScale) of FakeQuantizeMovingAverageAbsMaxOp "
299+
"should not be null");
300+
if (ctx->HasOutput("OutState")) {
301+
ctx->SetOutputDim("OutState", {1});
302+
}
303+
if (ctx->HasOutput("OutAccum")) {
304+
ctx->SetOutputDim("OutAccum", {1});
305+
}
306+
ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
307+
ctx->SetOutputDim("OutScale", {1});
308+
ctx->ShareLoD("X", /*->*/ "Out");
309+
}
310+
311+
protected:
312+
framework::OpKernelType GetExpectedKernelType(
313+
const framework::ExecutionContext& ctx) const override {
314+
return framework::OpKernelType(ctx.Input<framework::LoDTensor>("X")->type(),
315+
ctx.device_context());
316+
}
317+
};
318+
319+
class FakeQuantizeMovingAverageAbsMaxOpMaker
320+
: public framework::OpProtoAndCheckerMaker {
321+
public:
322+
void Make() override {
323+
AddInput("X", "(Tensor) Input is float data type.");
324+
AddInput("InScale", "Last scale.");
325+
AddInput("InAccum", "Last accum.").AsDispensable();
326+
AddInput("InState", "Last state.").AsDispensable();
327+
AddOutput("Out", "(Tensor) Output of quantized low level tensor.");
328+
AddOutput("OutScale", " Current scale");
329+
AddOutput("OutState", "(Tensor) state buffer.").AsDispensable();
330+
AddOutput("OutAccum", "(Tensor) accum buffer.").AsDispensable();
331+
AddAttr<float>("moving_rate", "(float, default 0.9) moving rate.")
332+
.SetDefault(0.9);
333+
AddAttr<int>("bit_length", "(int, default 8), quantization bit number.")
334+
.SetDefault(8)
335+
.AddCustomChecker([](const int& bit_length) {
336+
PADDLE_ENFORCE(bit_length >= 1 && bit_length <= 16,
337+
"'bit_length' should be between 1 and 16.");
338+
});
339+
AddAttr<bool>("is_test",
340+
"(bool, default false) Set to true for inference only, false "
341+
"for training. Some layers may run faster when this is true.")
342+
.SetDefault(false);
343+
AddComment(R"DOC(
344+
FakeQuantize operator is used in static quantization.
345+
346+
$$scale = (0.9*max(abs(x))+accum)/(0.9*state+1)$$
347+
$$range = 2^{bit_length - 1} - 1$$
348+
$$Out = round(X/scale * range)$$
349+
350+
)DOC");
351+
}
352+
};
353+
258354
} // namespace operators
259355
} // namespace paddle
260356

@@ -273,6 +369,12 @@ REGISTER_OPERATOR(fake_quantize_range_abs_max, ops::FakeQuantizeRangeAbsMaxOp,
273369
REGISTER_OP_CPU_KERNEL(fake_quantize_range_abs_max,
274370
ops::FakeQuantizeRangeAbsMaxKernel<CPU, float>);
275371

372+
REGISTER_OPERATOR(fake_quantize_moving_average_abs_max,
373+
ops::FakeQuantizeMovingAverageAbsMaxOp,
374+
ops::FakeQuantizeMovingAverageAbsMaxOpMaker,
375+
paddle::framework::EmptyGradOpMaker);
376+
REGISTER_OP_CPU_KERNEL(fake_quantize_moving_average_abs_max,
377+
ops::FakeQuantizeMovingAverageAbsMaxKernel<CPU, float>);
276378
REGISTER_OPERATOR(fake_channel_wise_quantize_abs_max,
277379
ops::FakeChannelWiseQuantizeAbsMaxOp,
278380
ops::FakeChannelWiseQuantizeAbsMaxOpMaker,

paddle/fluid/operators/fake_quantize_op.cu

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,41 @@ struct FindRangeAbsMaxFunctor<platform::CUDADeviceContext, T> {
147147

148148
template struct FindRangeAbsMaxFunctor<platform::CUDADeviceContext, float>;
149149

150+
template <typename T>
151+
struct FindMovingAverageAbsMaxFunctor<platform::CUDADeviceContext, T> {
152+
void operator()(const platform::CUDADeviceContext& ctx,
153+
const framework::Tensor& in_accum,
154+
const framework::Tensor& in_state, const T* cur_scale,
155+
const float rate, framework::Tensor* out_state,
156+
framework::Tensor* out_accum, framework::Tensor* out_scale) {
157+
const auto gpu_place = boost::get<platform::CUDAPlace>(ctx.GetPlace());
158+
159+
T accum;
160+
memory::Copy(platform::CPUPlace(), &accum, gpu_place, in_accum.data<T>(),
161+
sizeof(T), 0);
162+
T state;
163+
memory::Copy(platform::CPUPlace(), &state, gpu_place, in_state.data<T>(),
164+
sizeof(T), 0);
165+
T scale;
166+
memory::Copy(platform::CPUPlace(), &scale, gpu_place, cur_scale, sizeof(T),
167+
0);
168+
169+
state = rate * state + 1;
170+
accum = rate * accum + scale;
171+
scale = accum / state;
172+
173+
memory::Copy(gpu_place, out_accum->mutable_data<T>(gpu_place),
174+
platform::CPUPlace(), &accum, sizeof(T), 0);
175+
memory::Copy(gpu_place, out_state->mutable_data<T>(gpu_place),
176+
platform::CPUPlace(), &state, sizeof(T), 0);
177+
memory::Copy(gpu_place, out_scale->mutable_data<T>(gpu_place),
178+
platform::CPUPlace(), &scale, sizeof(T), 0);
179+
}
180+
};
181+
182+
template struct FindMovingAverageAbsMaxFunctor<platform::CUDADeviceContext,
183+
float>;
184+
150185
template <typename T>
151186
struct ClipAndFakeQuantFunctor<platform::CUDADeviceContext, T> {
152187
void operator()(const platform::CUDADeviceContext& ctx,
@@ -178,3 +213,6 @@ REGISTER_OP_CUDA_KERNEL(fake_channel_wise_quantize_abs_max,
178213
ops::FakeChannelWiseQuantizeAbsMaxKernel<CUDA, float>);
179214
REGISTER_OP_CUDA_KERNEL(fake_quantize_range_abs_max,
180215
ops::FakeQuantizeRangeAbsMaxKernel<CUDA, float>);
216+
REGISTER_OP_CUDA_KERNEL(
217+
fake_quantize_moving_average_abs_max,
218+
ops::FakeQuantizeMovingAverageAbsMaxKernel<CUDA, float>);

paddle/fluid/operators/fake_quantize_op.h

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,20 @@ struct FindRangeAbsMaxFunctor {
4242
framework::Tensor* scales_arr, framework::Tensor* out_scale);
4343
};
4444

45+
template <typename DeviceContext, typename T>
46+
struct FindMovingAverageAbsMaxFunctor {
47+
void operator()(const DeviceContext& ctx, const framework::Tensor& in_accum,
48+
const framework::Tensor& in_state,
49+
const framework::Tensor& cur_scale,
50+
framework::Tensor* out_state, framework::Tensor* out_accum,
51+
framework::Tensor* out_scale);
52+
};
53+
4554
template <typename DeviceContext, typename T>
4655
class FakeQuantizeAbsMaxKernel : public framework::OpKernel<T> {
4756
public:
4857
void Compute(const framework::ExecutionContext& context) const override {
4958
auto* in = context.Input<framework::Tensor>("X");
50-
5159
auto* out = context.Output<framework::Tensor>("Out");
5260
auto* out_scale = context.Output<framework::Tensor>("OutScale");
5361
T* out_s = out_scale->mutable_data<T>(context.GetPlace());
@@ -138,5 +146,54 @@ class FakeQuantizeRangeAbsMaxKernel : public framework::OpKernel<T> {
138146
}
139147
};
140148

149+
template <typename DeviceContext, typename T>
150+
class FakeQuantizeMovingAverageAbsMaxKernel : public framework::OpKernel<T> {
151+
public:
152+
void Compute(const framework::ExecutionContext& context) const override {
153+
auto* in = context.Input<framework::Tensor>("X");
154+
auto* in_scale = context.Input<framework::Tensor>("InScale");
155+
auto* out = context.Output<framework::Tensor>("Out");
156+
out->mutable_data<T>(context.GetPlace());
157+
158+
bool is_test = context.Attr<bool>("is_test");
159+
int bit_length = context.Attr<int>("bit_length");
160+
int bin_cnt = std::pow(2, bit_length - 1) - 1;
161+
auto& dev_ctx = context.template device_context<DeviceContext>();
162+
163+
// testing
164+
if (is_test) {
165+
ClipAndFakeQuantFunctor<DeviceContext, T>()(dev_ctx, *in, *in_scale,
166+
bin_cnt, out);
167+
return;
168+
}
169+
170+
// training
171+
auto* in_accum = context.Input<framework::Tensor>("InAccum");
172+
auto* in_state = context.Input<framework::Tensor>("InState");
173+
auto& allocator =
174+
platform::DeviceTemporaryAllocator::Instance().Get(dev_ctx);
175+
auto cur_scale = allocator.Allocate(1 * sizeof(T));
176+
T* cur_scale_data = static_cast<T*>(cur_scale->ptr());
177+
178+
FindAbsMaxFunctor<DeviceContext, T>()(dev_ctx, in->data<T>(), in->numel(),
179+
cur_scale_data);
180+
181+
auto* out_state = context.Output<framework::Tensor>("OutState");
182+
auto* out_accum = context.Output<framework::Tensor>("OutAccum");
183+
auto* out_scale = context.Output<framework::Tensor>("OutScale");
184+
out_state->mutable_data<T>(context.GetPlace());
185+
out_accum->mutable_data<T>(context.GetPlace());
186+
out_scale->mutable_data<T>(context.GetPlace());
187+
float moving_rate = context.Attr<float>("moving_rate");
188+
189+
FindMovingAverageAbsMaxFunctor<DeviceContext, T>()(
190+
dev_ctx, *in_accum, *in_state, cur_scale_data, moving_rate, out_state,
191+
out_accum, out_scale);
192+
193+
ClipAndFakeQuantFunctor<DeviceContext, T>()(dev_ctx, *in, *out_scale,
194+
bin_cnt, out);
195+
}
196+
};
197+
141198
} // namespace operators
142199
} // namespace paddle

0 commit comments

Comments
 (0)