Skip to content

Commit 11b1d7c

Browse files
committed
add more parameter for Observer
1 parent 6822b56 commit 11b1d7c

File tree

6 files changed

+86
-30
lines changed

6 files changed

+86
-30
lines changed

intel_pytorch_extension_py/__init__.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,16 @@ def get_auto_mix_precision():
5959
else:
6060
return None
6161

62+
'''
63+
def quarry_int8_configure(model, inputs_shape):
64+
dummy_input = torch.randn(input_shapes).to(DEVICE)
65+
core.enable_mix_int8_fp32()
66+
with torch.no_grad():
67+
y = model(dummy_input)
68+
observer_configures = core.get_int8_observer_configures()
69+
return observer_configures
70+
'''
71+
6272
def calibration_reset():
6373
if core.get_int8_calibration():
6474
core.calibration_reset()
@@ -93,13 +103,15 @@ def generator_context(*args, **kwargs):
93103
return generator_context
94104

95105
class int8_calibration(_DecoratorContextManager):
96-
def __init__(self, file_name):
106+
def __init__(self, file_name, observer_configure=None):
107+
#self.observer_configure = observer_configure
97108
self.configure_file = file_name
98109

99110
def __enter__(self):
100111
if not core.get_mix_int8_fp32():
101112
raise ValueError("please first run enable_auto_mix_precision(torch.int8) before int8 calibration")
102113
core.enable_int8_calibration()
114+
#core.set_int8_observer_configure(self.observer_configure)
103115

104116
def __exit__(self, *args):
105117
core.disable_int8_calibration()

torch_ipex/csrc/auto_opt_config.h

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -60,46 +60,63 @@ class AutoOptConfig {
6060
return calibration_step_;
6161
}
6262

63-
inline void insert_or_updata_observer(std::string op_name, std::vector<float> max_values) {
63+
inline void insert_or_updata_observer(std::string op_name,
64+
std::vector<float> input_min_max_values, std::vector<float> output_min_max_values) {
6465
num_ops_id++;
6566
if (observers_.size() < num_ops_id) {
66-
//Operator op = {num_ops_id - 1, op_n};
67-
Observer new_observer = {num_ops_id - 1, op_name, max_values};
67+
// this path is that user not set int8 op's configure, using default configures
68+
Observer new_observer = {num_ops_id - 1, op_name, input_min_max_values, output_min_max_values};
6869
observers_.push_back(new_observer);
6970
} else {
70-
for (auto i = 0; i < max_values.size(); i++)
71-
observers_[num_ops_id -1].max_values[i] = std::max(observers_[num_ops_id -1].max_values[i], max_values[i]);
71+
// user has set configure or have run one interation
72+
auto input_pre = observers_[num_ops_id - 1].Input_min_max_values;
73+
auto output_pre = observers_[num_ops_id - 1].Output_min_max_values;
74+
if (observers_[num_ops_id - 1].Algorithm == "min_max") {
75+
observers_[num_ops_id - 1].Input_min_max_values[0] = std::min(input_pre[0], input_min_max_values[0]);
76+
observers_[num_ops_id - 1].Input_min_max_values[1] = std::max(input_pre[1], input_min_max_values[1]);
77+
observers_[num_ops_id - 1].Output_min_max_values[0] = std::min(output_pre[0], output_min_max_values[0]);
78+
observers_[num_ops_id - 1].Output_min_max_values[1] = std::max(output_pre[1], output_min_max_values[1]);
79+
} else if(observers_[num_ops_id -1].Algorithm == "moving_averager_min_max"){
80+
auto c = observers_[num_ops_id - 1].Averaging_constant;
81+
observers_[num_ops_id - 1].Input_min_max_values[0] = (1 - c) * input_pre[0] + c * input_min_max_values[0];
82+
observers_[num_ops_id - 1].Input_min_max_values[1] = (1 - c) * input_pre[1] + c * input_min_max_values[1];
83+
observers_[num_ops_id - 1].Output_min_max_values[0] = (1 - c) * output_pre[0] + c * output_min_max_values[0];
84+
observers_[num_ops_id - 1].Output_min_max_values[1] = (1 - c) * output_pre[1] + c * output_min_max_values[1];
85+
}
7286
}
7387
}
7488

89+
/*
7590
inline void print_observer() {
7691
for (auto i = 0; i< observers_.size(); i++) {
7792
for (auto j = 0; j < observers_[i].max_values.size(); j++)
7893
std::cout<<observers_[i].max_values[j]<<std::endl;
7994
}
8095
}
81-
96+
*/
8297
inline void print_indicator() {
8398
for (auto i = 0; i< indicators_.size(); i++) {
8499
auto scales = indicators_[i].get_indicator_scales();
85100
for (auto j = 0; j< scales.size(); j++)
86101
std::cout<<scales[j]<<std::endl;
87102
}
88103
}
89-
104+
90105
inline void add_indicators() {
91106
num_ops_id = 0;
92107
// default used is s8
93108
for (auto i = 0; i < observers_.size(); i++) {
94109
std::vector<float> scales;
95-
std::vector<bool> uint8_used;
96-
for (auto j = 0; j < observers_[i].max_values.size(); j++) {
97-
scales.push_back(127.5 / observers_[i].max_values[j]);
98-
uint8_used.push_back(false);
99-
}
100-
// zero_points not used now, zero_points = 0 for u8 and 128 for s8.
101-
//zero_point = 128;
102-
Indicator new_indicator(observers_[i].Id, observers_[i].Name, scales, uint8_used, true);
110+
std::vector<float> input_values = observers_[i].Input_min_max_values;
111+
std::vector<float> output_values = observers_[i].Output_min_max_values;
112+
113+
scales.push_back(127.5 / std::max(std::abs(input_values[0]), input_values[1]));
114+
scales.push_back(127.5 / std::max(std::abs(output_values[0]), output_values[1]));
115+
// zero_points not used now, zero_points = 0 for u8 and 128 for s8.
116+
//zero_point = 128;
117+
Indicator new_indicator(observers_[i].Id, observers_[i].Name, observers_[i].Algorithm,
118+
observers_[i].Weight_granularity, scales, {observers_[i].Input_dtype_uint8, observers_[i].Output_dtype_uint8},
119+
observers_[i].Quantized);
103120
indicators_.push_back(new_indicator);
104121
}
105122
observers_.clear();

torch_ipex/csrc/cpu/DevOPs.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ at::Tensor AtenIpexCPUDev::dil_convolution(
7373

7474
dil_input = dbl::comm::try_gen_dil_tensor(input);
7575
if (bias.defined()) {
76-
std::cout<<"convolution has bias"<<std::endl;
7776
CHECK_DNNL_OP_PRE_COND(bias);
7877
if (!check_auto_mix_int8_fp32()) {
7978
dbl::comm::reorder_to_bf16_for_mix_prec(bias, true);

torch_ipex/csrc/init_python_bindings.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ void InitIpexModuleBindings(py::module m) {
141141
m.def("get_int8_calibration", []() { return AutoOptConfig::singleton().get_int8_calibration(); });
142142
m.def("calibration_reset", []() { AutoOptConfig::singleton().calibration_reset(); });
143143
m.def("add_indicators", []() { AutoOptConfig::singleton().add_indicators(); });
144-
m.def("print_observer", []() { AutoOptConfig::singleton().print_observer(); });
144+
//m.def("print_observer", []() { AutoOptConfig::singleton().print_observer(); });
145145
m.def("print_indicator", []() { AutoOptConfig::singleton().print_indicator(); });
146146
m.def("get_int8_configures", []() {
147147
py::list output_list;
@@ -150,6 +150,8 @@ void InitIpexModuleBindings(py::module m) {
150150
py::dict d;
151151
d["id"] = indicator.get_indicator_id();
152152
d["name"] = indicator.get_indicator_name();
153+
d["algorithm"] = indicator.get_indicator_algorithm();
154+
d["weight_granularity"] = indicator.get_indicator_weight_granularity();
153155
std::vector<float> scales = indicator.get_indicator_scales();
154156
d["input_scale"] = scales[0];
155157
d["output_scale"] = scales[1];
@@ -166,12 +168,14 @@ void InitIpexModuleBindings(py::module m) {
166168
for (py::handle i : l) {
167169
int64_t id = py::cast<std::int64_t>(i["id"]);
168170
std::string op_name = py::cast<std::string>(i["name"]);
171+
std::string algorithm = py::cast<std::string>(i["algorithm"]);
172+
std::string weight_granularity = py::cast<std::string>(i["weight_granularity"]);
169173
float input_scale = py::cast<float>(i["input_scale"]);
170174
float output_scale = py::cast<float>(i["output_scale"]);
171175
bool input_uint8_used = py::cast<bool>(i["input_uint8_used"]);
172176
bool output_uint8_used = py::cast<bool>(i["output_uint8_used"]);
173177
bool quantized = py::cast<bool>(i["quantized"]);
174-
Indicator temp(id, op_name, {input_scale, output_scale},
178+
Indicator temp(id, op_name, algorithm, weight_granularity, {input_scale, output_scale},
175179
{input_uint8_used, output_uint8_used}, quantized);
176180
indicators.push_back(temp);
177181
}

torch_ipex/csrc/quantization/Observer.h

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,28 @@ namespace int8 {
88
struct Observer {
99
int64_t Id;
1010
std::string Name;
11-
// the max_values of input and output for one op
12-
std::vector<float> max_values;
11+
std::vector<float> Input_min_max_values;
12+
std::vector<float> Output_min_max_values;
13+
// default uising min/max to compute the quantization parameters,
14+
// only support min_max, MovingAverageMinMax and other none per_channel merthod
15+
std::string Algorithm = "min_max";
16+
float Averaging_constant = 0.01; // for MovingAverage method
17+
// only useful for conv, onednn only support per_channel foo conv's weight,
18+
// default is per_tensor
19+
std::string Weight_granularity = "per_tensor";
20+
// ture means input will be quantized to int8, otherwise quantized to uint8.
21+
bool Input_dtype_uint8 = false;
22+
bool Output_dtype_uint8 = false;
23+
bool Quantized = true;
1324
};
1425

1526
class Indicator {
1627
public:
17-
Indicator(int64_t id = 0, std::string name = "", std::vector<float> scales = std::vector<float>(2, 1),
18-
std::vector<bool> uint8_used = std::vector<bool>(2, false) , bool quantized = true):
19-
Id(id), Name(name), Scales(scales), Uint8_used(uint8_used), Quantized(quantized) {}
28+
Indicator(int64_t id = 0, std::string name = "", std::string algorithm = "min_max",
29+
std::string weight_granularity = "per_tensor", std::vector<float> scales = std::vector<float>(2, 1),
30+
std::vector<bool> uint8_used = std::vector<bool>(2, false),bool quantized = true):
31+
Id(id), Name(name), Algorithm(algorithm), Weight_granularity(weight_granularity),
32+
Scales(scales), Uint8_used(uint8_used), Quantized(quantized) {}
2033

2134
int64_t get_indicator_id() {
2235
return Id;
@@ -26,6 +39,14 @@ class Indicator {
2639
return Name;
2740
}
2841

42+
std::string get_indicator_algorithm() {
43+
return Algorithm;
44+
}
45+
46+
std::string get_indicator_weight_granularity() {
47+
return Weight_granularity;
48+
}
49+
2950
std::vector<float> get_indicator_scales() {
3051
return Scales;
3152
}
@@ -53,6 +74,8 @@ class Indicator {
5374
private:
5475
int64_t Id;
5576
std::string Name;
77+
std::string Algorithm;
78+
std::string Weight_granularity;
5679
std::vector<float> Scales;
5780
std::vector<bool> Uint8_used;
5881
bool Quantized;

torch_ipex/csrc/utils.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -129,15 +129,16 @@ bool check_int8_calibration() {
129129
}
130130

131131
void insert_or_updata_observer(const at::Tensor& self, const at::Tensor& output, std::string op_name) {
132-
std::vector<float> max_values;
133-
auto value = self.abs().max().item<float>();
134-
max_values.push_back(value);
132+
std::vector<float> input_min_max_values, output_min_max_values;
133+
input_min_max_values.push_back(self.abs().min().item<float>());
134+
input_min_max_values.push_back(self.abs().max().item<float>());
135135
if (output.defined()) {
136-
max_values.push_back(output.abs().max().item<float>());
136+
output_min_max_values.push_back(output.abs().min().item<float>());
137+
output_min_max_values.push_back(output.abs().max().item<float>());
137138
} else {
138-
max_values.push_back(value);
139+
output_min_max_values = input_min_max_values;
139140
}
140-
AutoOptConfig::singleton().insert_or_updata_observer(op_name, max_values);
141+
AutoOptConfig::singleton().insert_or_updata_observer(op_name, input_min_max_values, output_min_max_values);
141142
}
142143

143144
std::tuple<std::vector<float>, bool> get_indicator_scales(std::vector<bool> uint8_used) {

0 commit comments

Comments
 (0)