|
1 | 1 | /* |
2 | | -The code below is inserted in caffe's conv_layer.cpp to change rounding behavior |
3 | | -for a convolution. |
4 | | -
|
5 | | -In EV hardware an accumulated value in a convolution or innerproduct is reduced |
6 | | -to fit into the destination blob size (e.g. 8 bits). |
7 | | -
|
8 | | -To implement the reduction, the accumulator is effectively multiplied by a |
9 | | -floating-point number with a 15-bit mantissa. Integer multiplication and shift |
10 | | -implement this multiplication, as the hardware does not have floating point: |
11 | | -
|
12 | | -- A s32 x s16 multiplication is done, producing an s48-bit result |
13 | | -- The s48 result is shifted right and rounded symmetrically (also called round-to-even). |
14 | | -
|
15 | | -Next: |
16 | | -- If there is a zero point, it is added, |
17 | | - and the result saturated in the range [0,2*N-1] for an N-bit blob |
18 | | -- otherwise, relu bounds, if any, are applied before storing into the destination blob. |
19 | | -
|
20 | | -The computation of the 15-bit mantissa is derived from a floating-point number F. |
21 | | -F is the scale of the output blob divided by the scale of the accumulator. |
22 | | -Note: the EV notion of scale is the inverse of Tensorflow; i.e. the floating |
23 | | -point number represented by a pixel is F = (pixel - zero_point) / scale . |
24 | | -Thus in Synopsys caffe, |
25 | | -
|
26 | | - F = input_scale * (double)weight_scale / output_scale; |
27 | | - ^^^ compute accumulator scale ^^^ |
28 | | -
|
29 | | -The function normalize_fractional below takes the floating point number and computes |
30 | | -the 15-bit mantiss and its accompanying shift, taking care to produce a number |
31 | | -<= 32767 and a shift >= 1, in case hardware doesn't support a shift of 0. |
32 | | -In addition if the computed integer is even it's shifted right to remove 0s |
33 | | -solely for representational efficiency. |
34 | | -
|
35 | | -So, in summary: |
36 | | -- compute a floating-point number that reduces the accumulator to the desired |
37 | | - output scale |
38 | | -- Convert the floating-point number to a 15-bit integer and 6-bit shift |
39 | | -- Multiply the accumulator by the integer and shift with convergent rounding |
40 | | -
|
41 | | -The macro |
42 | | - define LL_ROUND(X,shift) / (unbiased) round-to-even / \ |
| 2 | + The code below is inserted in caffe's conv_layer.cpp to change rounding behavior |
| 3 | + for a convolution. |
| 4 | +
|
| 5 | + In EV hardware an accumulated value in a convolution or innerproduct is reduced |
| 6 | + to fit into the destination blob size (e.g. 8 bits). |
| 7 | +
|
| 8 | + To implement the reduction, the accumulator is effectively multiplied by a |
| 9 | + floating-point number with a 15-bit mantissa. Integer multiplication and shift |
| 10 | + implement this multiplication, as the hardware does not have floating point: |
| 11 | +
|
| 12 | + - A s32 x s16 multiplication is done, producing an s48-bit result |
| 13 | + - The s48 result is shifted right and rounded symmetrically (also called round-to-even). |
| 14 | +
|
| 15 | + Next: |
| 16 | + - If there is a zero point, it is added, |
| 17 | + and the result saturated in the range [0,2*N-1] for an N-bit blob |
| 18 | + - otherwise, relu bounds, if any, are applied before storing into the destination blob. |
| 19 | +
|
| 20 | + The computation of the 15-bit mantissa is derived from a floating-point number F. |
| 21 | + F is the scale of the output blob divided by the scale of the accumulator. |
| 22 | + Note: the EV notion of scale is the inverse of Tensorflow; i.e. the floating |
| 23 | + point number represented by a pixel is F = (pixel - zero_point) / scale . |
| 24 | + Thus in Synopsys caffe, |
| 25 | +
|
| 26 | + F = input_scale * (double)weight_scale / output_scale; |
| 27 | + ^^^ compute accumulator scale ^^^ |
| 28 | +
|
| 29 | + The function normalize_fractional below takes the floating point number and computes |
| 30 | + the 15-bit mantiss and its accompanying shift, taking care to produce a number |
| 31 | + <= 32767 and a shift >= 1, in case hardware doesn't support a shift of 0. |
| 32 | + In addition if the computed integer is even it's shifted right to remove 0s |
| 33 | + solely for representational efficiency. |
| 34 | +
|
| 35 | + So, in summary: |
| 36 | + - compute a floating-point number that reduces the accumulator to the desired |
| 37 | + output scale |
| 38 | + - Convert the floating-point number to a 15-bit integer and 6-bit shift |
| 39 | + - Multiply the accumulator by the integer and shift with convergent rounding |
| 40 | +
|
| 41 | + The macro |
| 42 | + define LL_ROUND(X,shift) / (unbiased) round-to-even / \ |
43 | 43 | ((X + ((X >> (shift)) & 1) + (LLSHL1(shift-1)-1)) >> (shift)) |
44 | | -implements round-to-even -- i.e., unbiased (convergent) rounding. |
| 44 | + implements round-to-even -- i.e., unbiased (convergent) rounding. |
45 | 45 |
|
46 | | -The environment variable |
47 | | - set CAFFE_QUANTIZED_ROUND=EV |
48 | | -engages this alternative rounding. |
| 46 | + The environment variable |
| 47 | + set CAFFE_QUANTIZED_ROUND=EV |
| 48 | + engages this alternative rounding. |
49 | 49 |
|
50 | 50 | */ |
51 | 51 |
|
52 | | - |
53 | | - |
54 | 52 | typedef double Scale_type; |
55 | 53 | #include <stdlib.h> |
56 | 54 | #include <cmath> |
57 | 55 |
|
58 | 56 | static void normalize_fractional(Scale_type F, unsigned &mpy, unsigned &shift) { |
59 | | - // Adapted from python code in evgencnn. |
60 | | - int frac_bits = 16; |
61 | | - Scale_type hi = 0.5; |
62 | | - int nudge_power = frac_bits; |
63 | | - // Due to symmetric rounding, >= 32767.5 rounds to 32768, which is invalid |
64 | | - // as a 16-bit signed number. |
65 | | - // So the high value should be shifted by 32767.49/32768 so rounding |
66 | | - // will produce at most 32767. Nudge avoids that. |
67 | | - unsigned two_to_nudge = 1<<nudge_power; |
68 | | - hi *= (Scale_type(two_to_nudge)-0.51)/two_to_nudge; |
69 | | - Scale_type lo = hi/2; |
70 | | - int frac_adjust_shift = 0; |
71 | | - F = fabs(F); |
72 | | - Scale_type oldF = F; |
73 | | - while (F >= hi) { |
74 | | - frac_adjust_shift -= 1; F /= 2; |
75 | | - } |
76 | | - while (F < lo) { |
77 | | - frac_adjust_shift += 1; F *= 2; |
78 | | - } |
79 | | - |
80 | | - int max_shift = 63; |
81 | | - while (frac_bits + frac_adjust_shift > max_shift) { |
| 57 | + // Adapted from python code in evgencnn. |
| 58 | + int frac_bits = 16; |
| 59 | + Scale_type hi = 0.5; |
| 60 | + int nudge_power = frac_bits; |
| 61 | + // Due to symmetric rounding, >= 32767.5 rounds to 32768, which is invalid |
| 62 | + // as a 16-bit signed number. |
| 63 | + // So the high value should be shifted by 32767.49/32768 so rounding |
| 64 | + // will produce at most 32767. Nudge avoids that. |
| 65 | + unsigned two_to_nudge = 1 << nudge_power; |
| 66 | + hi *= (Scale_type(two_to_nudge) - 0.51) / two_to_nudge; |
| 67 | + Scale_type lo = hi / 2; |
| 68 | + int frac_adjust_shift = 0; |
| 69 | + F = fabs(F); |
| 70 | + Scale_type oldF = F; |
| 71 | + while (F >= hi) { |
| 72 | + frac_adjust_shift -= 1; |
| 73 | + F /= 2; |
| 74 | + } |
| 75 | + while (F < lo) { |
| 76 | + frac_adjust_shift += 1; |
| 77 | + F *= 2; |
| 78 | + } |
| 79 | + |
| 80 | + int max_shift = 63; |
| 81 | + while (frac_bits + frac_adjust_shift > max_shift) { |
82 | 82 | frac_adjust_shift--; |
83 | | - } |
84 | | - int total_shift = frac_bits + frac_adjust_shift; |
85 | | - 0 && printf("F=%f fas=%d\n",F,frac_adjust_shift); |
86 | | - 0 && printf("newF=%f\n",oldF*(1<<total_shift)); |
87 | | - mpy = std::round(oldF * (1<<frac_bits) * (1<<frac_adjust_shift)); |
88 | | - // Now if mpy is even, divide by 2 and reduce the shift. |
89 | | - shift = frac_bits + frac_adjust_shift; |
90 | | - const int MINSHIFT = 1; // Not knowing whether HW likes shift of 0, we make min 1. |
91 | | - while ((mpy & 1) == 0 && shift > MINSHIFT) { |
92 | | - // The end result is an odd fractional. |
93 | | - mpy >>= 1; shift -= 1; |
94 | | - } |
95 | | - } |
96 | | - |
97 | | -template <typename Dtype> |
98 | | -void caffe_cpu_scale_better_round(const std::string &name, const int n, const Scale_type scale, Dtype* x){ |
99 | | - // refer to https://github.com/google/gemmlowp/blob/master/doc/quantization.md#implementation-of-quantized-matrix-multiplication |
100 | | - Scale_type mul = scale; // multiplier in normalized interval [0.5, 1.0) |
101 | | - enum Rmode { R_double_round, R_single_round, R_ev_round }; |
102 | | - auto tell = []() { |
| 83 | + } |
| 84 | + int total_shift = frac_bits + frac_adjust_shift; |
| 85 | + // printf("F=%f fas=%d\n", F, frac_adjust_shift); |
| 86 | + // printf("newF=%f\n", oldF * (1 << total_shift)); |
| 87 | + mpy = std::round(oldF * (1 << frac_bits) * (1 << frac_adjust_shift)); |
| 88 | + // Now if mpy is even, divide by 2 and reduce the shift. |
| 89 | + shift = frac_bits + frac_adjust_shift; |
| 90 | + const int MINSHIFT = 1; // Not knowing whether HW likes shift of 0, we make min 1. |
| 91 | + while ((mpy & 1) == 0 && shift > MINSHIFT) { |
| 92 | + // The end result is an odd fractional. |
| 93 | + mpy >>= 1; |
| 94 | + shift -= 1; |
| 95 | + } |
| 96 | +} |
| 97 | + |
| 98 | +template<typename Dtype> |
| 99 | +void caffe_cpu_scale_better_round(const std::string &name, const int n, |
| 100 | + const Scale_type scale, Dtype* x) { |
| 101 | + // refer to https://github.com/google/gemmlowp/blob/master/doc/quantization.md#implementation-of-quantized-matrix-multiplication |
| 102 | + Scale_type mul = scale; // multiplier in normalized interval [0.5, 1.0) |
| 103 | + enum Rmode { |
| 104 | + R_double_round, R_single_round, R_ev_round |
| 105 | + }; |
| 106 | + auto tell = []() { |
103 | 107 | const char* QR = getenv("CAFFE_QUANTIZED_ROUND"); |
104 | 108 | if (QR == 0) return R_double_round; |
105 | 109 | return |
106 | | - strcmp(QR,"SR")==0?R_single_round: |
107 | | - strcmp(QR,"EV")==0?R_ev_round: |
108 | | - (printf("Unrecognized rounding mode %s\n",QR), R_double_round); |
109 | | - }; |
110 | | - static const Rmode QR = tell(); |
111 | | - switch(QR) { |
112 | | - case R_double_round: case R_single_round: { |
113 | | - if (QR != R_double_round) |
114 | | - printf(" Layer %s: round mode %d by %18.15f\n",name.c_str(),QR,scale); |
115 | | - bool SR = QR == R_single_round; |
116 | | - int shift = 0; |
117 | | - while (mul < 0.5) { |
118 | | - mul *= 2.0; |
119 | | - ++shift; |
120 | | - } |
121 | | - shift = (1<<shift); |
122 | | - for (int i = 0; i < n; ++i) { |
123 | | - x[i] = SR ? x[i] * mul : std::round(x[i] * mul); |
124 | | - x[i] = std::round(x[i]/shift); |
125 | | - } |
126 | | - } break; |
127 | | - case R_ev_round: { |
128 | | - #define LLSHL1(x) (1LL<<(x)) |
129 | | - #define LL_ROUND(X,shift) /* (unbiased) round-to-even */ \ |
130 | | - ((X + ((X >> (shift)) & 1) + (LLSHL1(shift-1)-1)) >> (shift)) |
131 | | - unsigned mpy,shift; |
132 | | - // Produces 15-bit mantissa and an exponent. The mantissa is |
133 | | - // thus less precise than that of a 32-bit floating-point number. |
134 | | - normalize_fractional(scale,mpy,shift); |
135 | | - printf(" Layer %s: round mode %d by %18.15f = mpy %d shift %d\n", |
136 | | - name.c_str(),QR,scale,mpy,shift); |
137 | | - typedef signed long long SLL; |
138 | | - for (int i = 0; i < n; ++i) { |
139 | | - SLL acc = SLL(x[i]); // Assumed to be an integer already. |
140 | | - acc *= mpy; |
141 | | - x[i] = LL_ROUND(acc,shift); |
142 | | - } |
143 | | - } break; |
| 110 | + strcmp(QR,"SR")==0?R_single_round: |
| 111 | + strcmp(QR,"EV")==0?R_ev_round: |
| 112 | + (printf("Unrecognized rounding mode %s\n",QR), R_double_round); |
| 113 | + }; |
| 114 | + static const Rmode QR = tell(); |
| 115 | + switch (QR) { |
| 116 | + case R_double_round: |
| 117 | + case R_single_round: { |
| 118 | + if (QR != R_double_round) |
| 119 | + printf(" Layer %s: round mode %d by %18.15f\n", name.c_str(), QR, |
| 120 | + scale); |
| 121 | + bool SR = QR == R_single_round; |
| 122 | + int shift = 0; |
| 123 | + while (mul < 0.5) { |
| 124 | + mul *= 2.0; |
| 125 | + ++shift; |
144 | 126 | } |
| 127 | + shift = (1 << shift); |
| 128 | + for (int i = 0; i < n; ++i) { |
| 129 | + x[i] = SR ? x[i] * mul : std::round(x[i] * mul); |
| 130 | + x[i] = std::round(x[i] / shift); |
145 | 131 | } |
| 132 | + } |
| 133 | + break; |
| 134 | + case R_ev_round: { |
| 135 | +#define LLSHL1(x) (1LL<<(x)) |
| 136 | +#define LL_ROUND(X,shift) /* (unbiased) round-to-even */ \ |
| 137 | + ((X + ((X >> (shift)) & 1) + (LLSHL1(shift-1)-1)) >> (shift)) |
| 138 | + unsigned mpy, shift; |
| 139 | + // Produces 15-bit mantissa and an exponent. The mantissa is |
| 140 | + // thus less precise than that of a 32-bit floating-point number. |
| 141 | + normalize_fractional(scale, mpy, shift); |
| 142 | + printf(" Layer %s: round mode %d by %18.15f = mpy %d shift %d\n", |
| 143 | + name.c_str(), QR, scale, mpy, shift); |
| 144 | + typedef signed long long SLL; |
| 145 | + for (int i = 0; i < n; ++i) { |
| 146 | + SLL acc = SLL(x[i]); // Assumed to be an integer already. |
| 147 | + acc *= mpy; |
| 148 | + x[i] = LL_ROUND(acc, shift); |
| 149 | + } |
| 150 | + } |
| 151 | + break; |
| 152 | + } |
| 153 | +} |
146 | 154 |
|
147 | 155 | //#define caffe_cpu_scale_double_round(A,B,C) \ |
148 | 156 | // caffe_cpu_scale_better_round(this->layer_param_.name(),A,B,C) |
149 | 157 |
|
150 | | -template <typename Dtype> |
151 | | -void Multiply_better( |
152 | | - const int n, Dtype* x, const int mul, |
153 | | - const int shift, const int round_mode, |
154 | | - const std::string &name, const Scale_type scale) |
155 | | - { |
156 | | - enum Rmode { R_double_round, R_ev_round }; |
157 | | - auto tell = []() { |
| 158 | +template<typename Dtype> |
| 159 | +void Multiply_better(const int n, Dtype* x, const int mul, const int shift, |
| 160 | + const int round_mode, const std::string &name, const Scale_type scale) { |
| 161 | + enum Rmode { |
| 162 | + R_double_round, R_ev_round |
| 163 | + }; |
| 164 | + auto tell = []() { |
158 | 165 | const char* QR = getenv("CAFFE_QUANTIZED_ROUND"); |
159 | 166 | if (QR == 0) return R_double_round; |
160 | 167 | return |
161 | | - strcmp(QR,"EV")==0?R_ev_round: |
162 | | - (printf("Unrecognized rounding mode %s\n",QR), R_double_round); |
163 | | - }; |
164 | | - static const Rmode QR = tell(); |
165 | | - static bool show_data_bool = getenv("CAFFE_SHOW_DATA") != 0; |
166 | | - auto show_data = [&](const char *when) { |
| 168 | + strcmp(QR,"EV")==0?R_ev_round: |
| 169 | + (printf("Unrecognized rounding mode %s\n",QR), R_double_round); |
| 170 | + }; |
| 171 | + static const Rmode QR = tell(); |
| 172 | + static bool show_data_bool = getenv("CAFFE_SHOW_DATA") != 0; |
| 173 | + auto show_data = [&](const char *when) { |
167 | 174 | printf("Data %s\n",when); |
168 | 175 | for (int i = 0; i < n; i++) { |
169 | | - printf("%4d = %f\n",i,x[i]); |
170 | | - } |
| 176 | + printf("%4d = %f\n",i,x[i]); |
| 177 | + } |
171 | 178 | return 0; |
172 | | - }; |
173 | | - switch(QR) { |
174 | | - case R_double_round: { |
175 | | - MultiplyByQuantizedMultiplierVR(n, x, mul, shift, round_mode); |
176 | | - } break; |
177 | | - case R_ev_round: { |
178 | | - #define LLSHL1(x) (1LL<<(x)) |
179 | | - #define LL_ROUND(X,shift) /* (unbiased) round-to-even */ \ |
| 179 | + }; |
| 180 | + switch (QR) { |
| 181 | + case R_double_round: { |
| 182 | + MultiplyByQuantizedMultiplierVR(n, x, mul, shift, round_mode); |
| 183 | + } |
| 184 | + break; |
| 185 | + case R_ev_round: { |
| 186 | +#define LLSHL1(x) (1LL<<(x)) |
| 187 | +#define LL_ROUND(X,shift) /* (unbiased) round-to-even */ \ |
180 | 188 | ((X + ((X >> (shift)) & 1) + (LLSHL1(shift-1)-1)) >> (shift)) |
181 | | - unsigned mpy,shift; |
182 | | - // Produces 15-bit mantissa and an exponent. The mantissa is |
183 | | - // thus less precise than that of a 32-bit floating-point number. |
184 | | - normalize_fractional(scale,mpy,shift); |
185 | | - printf(" Layer %s: round mode %d by %18.15f = mpy %d shift %d\n", |
186 | | - name.c_str(),QR,scale,mpy,shift); |
187 | | - typedef signed long long SLL; |
188 | | - if (show_data_bool) show_data("before scaling {"); |
189 | | - for (int i = 0; i < n; ++i) { |
190 | | - SLL acc = SLL(x[i]); // Assumed to be an integer already. |
191 | | - acc *= mpy; |
192 | | - x[i] = double(LL_ROUND(acc,shift)); |
193 | | - } |
194 | | - if (show_data_bool) show_data("after scaling }"); |
195 | | - } break; |
196 | | - } |
| 189 | + unsigned mpy, shift; |
| 190 | + // Produces 15-bit mantissa and an exponent. The mantissa is |
| 191 | + // thus less precise than that of a 32-bit floating-point number. |
| 192 | + normalize_fractional(scale, mpy, shift); |
| 193 | + printf(" Layer %s: round mode %d by %18.15f = mpy %d shift %d\n", |
| 194 | + name.c_str(), QR, scale, mpy, shift); |
| 195 | + typedef signed long long SLL; |
| 196 | + if (show_data_bool) |
| 197 | + show_data("before scaling {"); |
| 198 | + for (int i = 0; i < n; ++i) { |
| 199 | + SLL acc = SLL(x[i]); // Assumed to be an integer already. |
| 200 | + acc *= mpy; |
| 201 | + x[i] = double(LL_ROUND(acc, shift)); |
197 | 202 | } |
| 203 | + if (show_data_bool) |
| 204 | + show_data("after scaling }"); |
| 205 | + } |
| 206 | + break; |
| 207 | + } |
| 208 | +} |
198 | 209 |
|
199 | 210 | #define MultiplyByQuantizedMultiplierVR(A,B,C,D,E) \ |
200 | 211 | Multiply_better(A,B,C,D,E,this->layer_param_.name(), out_scal) |
0 commit comments