intel
diff --git a/‎intel_pytorch_extension_py/__init__.py‎
Lines changed: 92 additions & 78 deletions b/‎intel_pytorch_extension_py/__init__.py‎
Lines changed: 92 additions & 78 deletions
diff --git a/‎intel_pytorch_extension_py/ops/__init__.py‎
Lines changed: 0 additions & 1 deletion b/‎intel_pytorch_extension_py/ops/__init__.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎intel_pytorch_extension_py/ops/reshape.py‎
Lines changed: 0 additions & 18 deletions b/‎intel_pytorch_extension_py/ops/reshape.py‎
Lines changed: 0 additions & 18 deletions
diff --git a/‎tests/cpu/common_ipex_conf.py‎
Lines changed: 6 additions & 6 deletions b/‎tests/cpu/common_ipex_conf.py‎
Lines changed: 6 additions & 6 deletions
@@ -9,71 +9,27 @@
 import _torch_ipex as core
 
 DEVICE = 'dpcpp'
-def enable_auto_optimization(mixed_dtype = None, train = False, configure_file = None):
-    r""" Enable auto-mixed-precision to improve performance.
 
-    The auto-mixed-precision auto reorders the tensor to the specified low precision data type.
-    You don't need to convert the input tensors and the model to the specified data type manually,
-    the extension will do it automatically and then dispatch the extension backend to accelerate
-    computation
-
-    Args:
-        mixed_dtype(torch.dtype): Auto reorder the input tensors to the specified low precision data type
-            and dispatch to oneDNN backend for computation
-
-    """
-    if mixed_dtype != None:
-        core.enable_auto_dnnl()
-    enable_auto_mix_precision(mixed_dtype, train, configure_file)
-
-def get_auto_optimization():
-    return get_auto_mix_precision
-
-def get_train():
-    return core.get_train()
-
-def enable_auto_mix_precision(mixed_dtype = torch.bfloat16, train = False, configure_file = None):
-    if mixed_dtype == torch.bfloat16:
-        core.enable_mix_bf16_fp32()
-        core.disable_mix_int8_fp32()
-    elif mixed_dtype == torch.int8 or mixed_dtype == torch.uint8:
-        core.enable_mix_int8_fp32()
-        core.disable_mix_bf16_fp32()
-        if configure_file != None:
-            core.disable_int8_calibration()
-            f = open(configure_file)
-            configures = json.load(f)
-            core.load_indicators_file(configures)
-        else:
-            warnings.warn("please not forget do calibration before doing validation step")
-    else:
-        core.disable_mix_int8_fp32()
-        core.disable_mix_bf16_fp32()
-    core.set_execution_mode(train=train)
-
-def get_auto_mix_precision():
-    if core.get_mix_bf16_fp32():
-        return torch.bfloat16
-    elif core.get_mix_int8_fp32():
-        return torch.int8
-    else:
-        return None
-
-'''
-def quarry_int8_configure(model, inputs_shape):
-    dummy_input = torch.randn(input_shapes).to(DEVICE)
-    core.enable_mix_int8_fp32()
-    with torch.no_grad():
-        y = model(dummy_input)
-    observer_configures = core.get_int8_observer_configures()
-    return observer_configures
-'''
-
-def calibration_reset():
-    if core.get_int8_calibration():
-        core.calibration_reset()
-    else:
-        raise ValueError("please first run enable_calibration before calibration reset")
+class AmpConf(object):
+    def __init__(self, mixed_dtype = torch.bfloat16, configure_file = None):
+        self.dtype = mixed_dtype
+        self.configure_file = configure_file
+
+        # for int8 path, if user give a exited configure file, load it.
+        if self.configure_file != None and self.dtype != torch.bfloat16:
+            if os.path.exists(self.configure_file) and os.stat(self.configure_file).st_size != 0:
+                with open(self.configure_file, 'r') as f:
+                    configures = json.load(f)
+                    core.load_indicators_file(configures)
+            else:
+                assert False, 'Can not load a empty file or none existed file, plese first do calibartion step'
+
+    # for int8 quantization, will save the date after doing calibration step.
+    def save(self, configure_file):
+        core.add_indicators()
+        configures = core.get_int8_configures()
+        with open(configure_file, 'w') as fp:
+            json.dump(configures, fp, indent = 4)
 
 class _DecoratorContextManager:
     """Allow a context manager to be used as a decorator, copy form pytorch FW"""
@@ -102,22 +58,80 @@ def generator_context(*args, **kwargs):
                     break
         return generator_context
 
-class int8_calibration(_DecoratorContextManager):
-    def __init__(self, file_name, observer_configure=None):
-        #self.observer_configure = observer_configure
-        self.configure_file = file_name
+def get_auto_mix_precision():
+    if core.get_mix_bf16_fp32():
+        return torch.bfloat16
+    elif core.get_mix_int8_fp32():
+        return torch.int8
+    else:
+        return None
+
+def enable_auto_optimization(mixed_dtype = None, train = False):
+    r""" Enable auto-mixed-precision to improve performance for global scope.
+
+    The auto-mixed-precision auto reorders the tensor to the specified low precision data type.
+    You don't need to convert the input tensors and the model to the specified data type manually,
+    the extension will do it automatically and then dispatch the extension backend to accelerate
+    computation
+
+    Args:
+        mixed_dtype(torch.dtype): Auto reorder the input tensors to the specified low precision data type
+            and dispatch to oneDNN backend for computation, can be torch.bfloat16 or None.
+    """
+    if mixed_dtype != None:
+        core.enable_auto_dnnl()
+    running_mode = 'training' if train else 'inference'
+    enable_auto_mix_precision(AmpConf(mixed_dtype), running_mode).__enter__()
+
+def get_auto_optimization():
+    return get_auto_mix_precision
+
+def get_train():
+    return core.get_train()
+
+class enable_auto_mix_precision(_DecoratorContextManager):
+    def __init__(self, conf, running_mode = 'inference'):
+        self.pre_mixed_dtype = get_auto_mix_precision()
+        self.pre_running_mode = get_train()
+        self.pre_calibration_state = core.get_int8_calibration()
+        self.mixed_dtype = conf.dtype
+        self.running_mode = running_mode
 
     def __enter__(self):
-        if not core.get_mix_int8_fp32():
-            raise ValueError("please first run enable_auto_mix_precision(torch.int8) before int8 calibration")
-        core.enable_int8_calibration()
-        #core.set_int8_observer_configure(self.observer_configure)
+        if self.mixed_dtype == torch.bfloat16:
+            core.enable_mix_bf16_fp32()
+            core.disable_mix_int8_fp32()
+        elif self.mixed_dtype == torch.int8:
+            core.enable_mix_int8_fp32()
+            core.disable_mix_bf16_fp32()
+            if self.running_mode == 'inference':
+                core.disable_int8_calibration()
+            elif self.running_mode == 'calibration':
+                core.enable_int8_calibration()
+            else:
+                assert False, 'int8 quantization only suport inference and calibration running mode'
+        else:
+            core.disable_mix_int8_fp32()
+            core.disable_mix_bf16_fp32()
+        core.set_execution_mode(train = True if self.running_mode == 'training' else False)
 
     def __exit__(self, *args):
-        core.disable_int8_calibration()
-        core.add_indicators()
-        configures = core.get_int8_configures()
-        with open(self.configure_file, 'w') as fp:
-            json.dump(configures, fp, indent=4)
-        return False
+        if self.mixed_dtype == torch.int8:
+            if self.running_mode == 'calibration':
+                core.calibration_reset()
+        # restore previous state
+        if self.pre_calibration_state:
+            core.enable_int8_calibration()
+        else:
+            core.disable_int8_calibration()
+        if self.pre_mixed_dtype == torch.bfloat16:
+            core.enable_mix_bf16_fp32()
+            core.disable_mix_int8_fp32()
+        elif self.pre_mixed_dtype == torch.int8:
+            core.enable_mix_int8_fp32()
+            core.disable_mix_bf16_fp32()
+        else:
+            core.disable_mix_int8_fp32()
+            core.disable_mix_bf16_fp32()
+        core.set_execution_mode(train = self.pre_running_mode)
 
@@ -2,7 +2,6 @@
 from .embeddingbag import embeddingbag
 from .linear import *
 from .pooling import *
-from .reshape import *
 from .mlp import * 
 from .jit import *
 from .save import *
 
@@ -4,21 +4,21 @@
 class AutoMixPrecision(object):
     def __init__(self, enable_or_not = False, train = False):
         self.old_value = ipex.get_auto_mix_precision()
-        self.train_old_value = ipex.get_train()
+        self.pre_running_mode = 'training' if ipex.get_train() else 'inference'
         self.enable_or_not = enable_or_not
-        self.train = train
+        self.running_mode = 'training' if train else 'inference'
 
     def __enter__(self):
         if self.enable_or_not:
-            ipex.enable_auto_mix_precision(mixed_dtype=torch.bfloat16, train=self.train)
+            ipex.enable_auto_mix_precision(ipex.AmpConf(torch.bfloat16), self.running_mode).__enter__()
         else:
-            ipex.enable_auto_mix_precision(mixed_dtype=None)
+            ipex.enable_auto_mix_precision(ipex.AmpConf(None)).__enter__()
 
     def __exit__(self, *args, **kwargs):
         if self.old_value:
-            ipex.enable_auto_mix_precision(mixed_dtype=torch.bfloat16, train=self.train_old_value)
+            ipex.enable_auto_mix_precision(ipex.AmpConf(torch.bfloat16), self.pre_running_mode).__enter__()
         else:
-            ipex.enable_auto_mix_precision(mixed_dtype=None)
+            ipex.enable_auto_mix_precision(ipex.AmpConf(None)).__enter__()
 
 class AutoDNNL(object):
     def __init__(self, enable_or_not = False):