Skip to content

Commit 553c625

Browse files
committed
add tom p conv rounding for verification
1 parent da3611b commit 553c625

File tree

2 files changed

+201
-0
lines changed

2 files changed

+201
-0
lines changed

src/caffe/layers/conv_layer.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ void ConvolutionLayer<Dtype>::compute_output_shape() {
5151
}
5252
}
5353

54+
#include "conv_layer.ev.inc"
5455
template <typename Dtype>
5556
void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
5657
const vector<Blob<Dtype>*>& top) {

src/caffe/layers/conv_layer.ev.inc

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
/*
2+
The code below is inserted in caffe's conv_layer.cpp to change rounding behavior
3+
for a convolution.
4+
5+
In EV hardware an accumulated value in a convolution or innerproduct is reduced
6+
to fit into the destination blob size (e.g. 8 bits).
7+
8+
To implement the reduction, the accumulator is effectively multiplied by a
9+
floating-point number with a 15-bit mantissa. Integer multiplication and shift
10+
implement this multiplication, as the hardware does not have floating point:
11+
12+
- A s32 x s16 multiplication is done, producing an s48-bit result
13+
- The s48 result is shifted right and rounded symmetrically (also called round-to-even).
14+
15+
Next:
16+
- If there is a zero point, it is added,
17+
and the result saturated in the range [0,2*N-1] for an N-bit blob
18+
- otherwise, relu bounds, if any, are applied before storing into the destination blob.
19+
20+
The computation of the 15-bit mantissa is derived from a floating-point number F.
21+
F is the scale of the output blob divided by the scale of the accumulator.
22+
Note: the EV notion of scale is the inverse of Tensorflow; i.e. the floating
23+
point number represented by a pixel is F = (pixel - zero_point) / scale .
24+
Thus in Synopsys caffe,
25+
26+
F = input_scale * (double)weight_scale / output_scale;
27+
^^^ compute accumulator scale ^^^
28+
29+
The function normalize_fractional below takes the floating point number and computes
30+
the 15-bit mantiss and its accompanying shift, taking care to produce a number
31+
<= 32767 and a shift >= 1, in case hardware doesn't support a shift of 0.
32+
In addition if the computed integer is even it's shifted right to remove 0s
33+
solely for representational efficiency.
34+
35+
So, in summary:
36+
- compute a floating-point number that reduces the accumulator to the desired
37+
output scale
38+
- Convert the floating-point number to a 15-bit integer and 6-bit shift
39+
- Multiply the accumulator by the integer and shift with convergent rounding
40+
41+
The macro
42+
define LL_ROUND(X,shift) / (unbiased) round-to-even / \
43+
((X + ((X >> (shift)) & 1) + (LLSHL1(shift-1)-1)) >> (shift))
44+
implements round-to-even -- i.e., unbiased (convergent) rounding.
45+
46+
The environment variable
47+
set CAFFE_QUANTIZED_ROUND=EV
48+
engages this alternative rounding.
49+
50+
*/
51+
52+
53+
54+
typedef double Scale_type;
55+
#include <stdlib.h>
56+
#include <cmath>
57+
58+
static void normalize_fractional(Scale_type F, unsigned &mpy, unsigned &shift) {
59+
// Adapted from python code in evgencnn.
60+
int frac_bits = 16;
61+
Scale_type hi = 0.5;
62+
int nudge_power = frac_bits;
63+
// Due to symmetric rounding, >= 32767.5 rounds to 32768, which is invalid
64+
// as a 16-bit signed number.
65+
// So the high value should be shifted by 32767.49/32768 so rounding
66+
// will produce at most 32767. Nudge avoids that.
67+
unsigned two_to_nudge = 1<<nudge_power;
68+
hi *= (Scale_type(two_to_nudge)-0.51)/two_to_nudge;
69+
Scale_type lo = hi/2;
70+
int frac_adjust_shift = 0;
71+
F = fabs(F);
72+
Scale_type oldF = F;
73+
while (F >= hi) {
74+
frac_adjust_shift -= 1; F /= 2;
75+
}
76+
while (F < lo) {
77+
frac_adjust_shift += 1; F *= 2;
78+
}
79+
80+
int max_shift = 63;
81+
while (frac_bits + frac_adjust_shift > max_shift) {
82+
frac_adjust_shift--;
83+
}
84+
int total_shift = frac_bits + frac_adjust_shift;
85+
0 && printf("F=%f fas=%d\n",F,frac_adjust_shift);
86+
0 && printf("newF=%f\n",oldF*(1<<total_shift));
87+
mpy = std::round(oldF * (1<<frac_bits) * (1<<frac_adjust_shift));
88+
// Now if mpy is even, divide by 2 and reduce the shift.
89+
shift = frac_bits + frac_adjust_shift;
90+
const int MINSHIFT = 1; // Not knowing whether HW likes shift of 0, we make min 1.
91+
while ((mpy & 1) == 0 && shift > MINSHIFT) {
92+
// The end result is an odd fractional.
93+
mpy >>= 1; shift -= 1;
94+
}
95+
}
96+
97+
template <typename Dtype>
98+
void caffe_cpu_scale_better_round(const std::string &name, const int n, const Scale_type scale, Dtype* x){
99+
// refer to https://github.com/google/gemmlowp/blob/master/doc/quantization.md#implementation-of-quantized-matrix-multiplication
100+
Scale_type mul = scale; // multiplier in normalized interval [0.5, 1.0)
101+
enum Rmode { R_double_round, R_single_round, R_ev_round };
102+
auto tell = []() {
103+
const char* QR = getenv("CAFFE_QUANTIZED_ROUND");
104+
if (QR == 0) return R_double_round;
105+
return
106+
strcmp(QR,"SR")==0?R_single_round:
107+
strcmp(QR,"EV")==0?R_ev_round:
108+
(printf("Unrecognized rounding mode %s\n",QR), R_double_round);
109+
};
110+
static const Rmode QR = tell();
111+
switch(QR) {
112+
case R_double_round: case R_single_round: {
113+
if (QR != R_double_round)
114+
printf(" Layer %s: round mode %d by %18.15f\n",name.c_str(),QR,scale);
115+
bool SR = QR == R_single_round;
116+
int shift = 0;
117+
while (mul < 0.5) {
118+
mul *= 2.0;
119+
++shift;
120+
}
121+
shift = (1<<shift);
122+
for (int i = 0; i < n; ++i) {
123+
x[i] = SR ? x[i] * mul : std::round(x[i] * mul);
124+
x[i] = std::round(x[i]/shift);
125+
}
126+
} break;
127+
case R_ev_round: {
128+
#define LLSHL1(x) (1LL<<(x))
129+
#define LL_ROUND(X,shift) /* (unbiased) round-to-even */ \
130+
((X + ((X >> (shift)) & 1) + (LLSHL1(shift-1)-1)) >> (shift))
131+
unsigned mpy,shift;
132+
// Produces 15-bit mantissa and an exponent. The mantissa is
133+
// thus less precise than that of a 32-bit floating-point number.
134+
normalize_fractional(scale,mpy,shift);
135+
printf(" Layer %s: round mode %d by %18.15f = mpy %d shift %d\n",
136+
name.c_str(),QR,scale,mpy,shift);
137+
typedef signed long long SLL;
138+
for (int i = 0; i < n; ++i) {
139+
SLL acc = SLL(x[i]); // Assumed to be an integer already.
140+
acc *= mpy;
141+
x[i] = LL_ROUND(acc,shift);
142+
}
143+
} break;
144+
}
145+
}
146+
147+
//#define caffe_cpu_scale_double_round(A,B,C) \
148+
// caffe_cpu_scale_better_round(this->layer_param_.name(),A,B,C)
149+
150+
template <typename Dtype>
151+
void Multiply_better(
152+
const int n, Dtype* x, const int mul,
153+
const int shift, const int round_mode,
154+
const std::string &name, const Scale_type scale)
155+
{
156+
enum Rmode { R_double_round, R_ev_round };
157+
auto tell = []() {
158+
const char* QR = getenv("CAFFE_QUANTIZED_ROUND");
159+
if (QR == 0) return R_double_round;
160+
return
161+
strcmp(QR,"EV")==0?R_ev_round:
162+
(printf("Unrecognized rounding mode %s\n",QR), R_double_round);
163+
};
164+
static const Rmode QR = tell();
165+
static bool show_data_bool = getenv("CAFFE_SHOW_DATA") != 0;
166+
auto show_data = [&](const char *when) {
167+
printf("Data %s\n",when);
168+
for (int i = 0; i < n; i++) {
169+
printf("%4d = %f\n",i,x[i]);
170+
}
171+
return 0;
172+
};
173+
switch(QR) {
174+
case R_double_round: {
175+
MultiplyByQuantizedMultiplierVR(n, x, mul, shift, round_mode);
176+
} break;
177+
case R_ev_round: {
178+
#define LLSHL1(x) (1LL<<(x))
179+
#define LL_ROUND(X,shift) /* (unbiased) round-to-even */ \
180+
((X + ((X >> (shift)) & 1) + (LLSHL1(shift-1)-1)) >> (shift))
181+
unsigned mpy,shift;
182+
// Produces 15-bit mantissa and an exponent. The mantissa is
183+
// thus less precise than that of a 32-bit floating-point number.
184+
normalize_fractional(scale,mpy,shift);
185+
printf(" Layer %s: round mode %d by %18.15f = mpy %d shift %d\n",
186+
name.c_str(),QR,scale,mpy,shift);
187+
typedef signed long long SLL;
188+
if (show_data_bool) show_data("before scaling {");
189+
for (int i = 0; i < n; ++i) {
190+
SLL acc = SLL(x[i]); // Assumed to be an integer already.
191+
acc *= mpy;
192+
x[i] = double(LL_ROUND(acc,shift));
193+
}
194+
if (show_data_bool) show_data("after scaling }");
195+
} break;
196+
}
197+
}
198+
199+
#define MultiplyByQuantizedMultiplierVR(A,B,C,D,E) \
200+
Multiply_better(A,B,C,D,E,this->layer_param_.name(), out_scal)

0 commit comments

Comments
 (0)