[Quantization] Add checkpoint quantization in low precision optimization tools. (#449)

xiaowan0322 · web-flow · commit 3537966b976c · 2022-09-20T21:10:43.000+08:00
diff --git a/tensorflow/c/quantize_embedding_variable.cc b/tensorflow/c/quantize_embedding_variable.cc
@@ -25,16 +25,8 @@ namespace tensorflow {
 namespace checkpoint {
 
 void WriteRestVariables(BundleReader& reader, BundleWriter& writer,
-                        const std::vector<string>& names,
-                        const std::set<string>& ev_suffix) {
-  std::set<string> updated_names;
-  for (int idx = 0; idx < names.size(); ++idx) {
-    updated_names.insert(names[idx] + "-values");
-    for (auto it = ev_suffix.cbegin(); it != ev_suffix.cend(); ++it) {
-      updated_names.insert(names[idx] + *it);
-    }
-  }
-
+                        const std::vector<string>& ignored_names) {
+  std::set<string> excluded_names(ignored_names.cbegin(), ignored_names.cend());
   std::vector<std::string> tensor_names;
   reader.Seek(kHeaderEntryKey);
   reader.Next();
@@ -45,7 +37,7 @@ void WriteRestVariables(BundleReader& reader, BundleWriter& writer,
     Status status;
     DataType dtype;
     TensorShape shape;
-    if (updated_names.count(tensor_name)) continue;
+    if (excluded_names.count(tensor_name)) continue;
     status = reader.LookupDtypeAndShape(tensor_name, &dtype, &shape);
     if (status.ok()) {
       Tensor tensor(dtype, shape);
@@ -55,6 +47,18 @@ void WriteRestVariables(BundleReader& reader, BundleWriter& writer,
   }
 }
 
+void WriteRestVariables(BundleReader& reader, BundleWriter& writer,
+                        const std::vector<string>& ignored_names,
+                        const std::set<string>& ev_suffix) {
+  std::vector<string> ev_names;
+  for (int idx = 0; idx < ignored_names.size(); ++idx) {
+    for (auto it = ev_suffix.cbegin(); it != ev_suffix.cend(); ++it) {
+      ev_names.push_back(ignored_names[idx] + *it);
+    }
+  }
+  WriteRestVariables(reader, writer, ev_names);
+}
+
 void ConvertToBF16Value(const Tensor& in_tensor, const string name,
                         BundleWriter& writer) {
   auto in_data = in_tensor.flat<float>();
@@ -120,19 +124,21 @@ Status QuantizeEmbeddingVariable(const string& input_prefix,
                                  const std::vector<string>& names,
                                  const std::vector<string>& quant_names,
                                  const std::vector<string>& scale_names,
-                                 TF_DataType data_type) {
+                                 const TF_DataType data_type,
+                                 const bool is_ev) {
   BundleReader reader(Env::Default(), input_prefix);
   BundleWriter writer(Env::Default(), output_prefix);
   const std::set<string> ev_suffix = {
       "-freqs",         "-freqs_filtered",          "-keys",
       "-keys_filtered", "-partition_filter_offset", "-partition_offset",
-      "-versions",      "-versions_filtered"};
+      "-versions",      "-versions_filtered",       "-values"};
 
   for (int idx = 0; idx < names.size(); ++idx) {
     Status status;
     DataType dtype;
     TensorShape shape;
-    string value_name = names[idx] + "-values";
+    string suffix = is_ev ? "-values" : "";
+    string value_name = names[idx] + suffix;
     status = reader.LookupDtypeAndShape(value_name, &dtype, &shape);
     if (!status.ok()) {
       errors::InvalidArgument("Invalid variable name:", value_name);
@@ -141,7 +147,7 @@ Status QuantizeEmbeddingVariable(const string& input_prefix,
     status = reader.Lookup(value_name, &in_tensor);
     auto in_data = in_tensor.flat<float>();
 
-    string quant_name = quant_names[idx] + "-values";
+    string quant_name = quant_names[idx] + suffix;
     if (data_type == TF_DataType::TF_BFLOAT16) {
       ConvertToBF16Value(in_tensor, quant_name, writer);
     } else if (data_type == TF_DataType::TF_HALF) {
@@ -151,20 +157,36 @@ Status QuantizeEmbeddingVariable(const string& input_prefix,
     } else {
       errors::InvalidArgument("Unsupported data type:", data_type);
     }
-    for (auto it = ev_suffix.cbegin(); it != ev_suffix.cend(); ++it) {
-      string tensor_name = names[idx] + *it;
-      status = reader.LookupDtypeAndShape(tensor_name, &dtype, &shape);
-      if (status.ok()) {
-        Tensor tensor(dtype, shape);
-        status = reader.Lookup(tensor_name, &tensor);
+    if (is_ev) {
+      for (auto it = ev_suffix.cbegin(); it != ev_suffix.cend(); ++it) {
+        if (*it == "-values") continue;
+        string tensor_name = names[idx] + *it;
+        status = reader.LookupDtypeAndShape(tensor_name, &dtype, &shape);
         if (status.ok()) {
-          writer.Add(quant_names[idx] + *it, tensor);
+          Tensor tensor(dtype, shape);
+          status = reader.Lookup(tensor_name, &tensor);
+          if (status.ok()) {
+            writer.Add(quant_names[idx] + *it, tensor);
+          }
         }
       }
     }
   }
 
-  WriteRestVariables(reader, writer, names, ev_suffix);
+  if (is_ev) {
+    WriteRestVariables(reader, writer, names, ev_suffix);
+  } else {
+    WriteRestVariables(reader, writer, names);
+  }
+  writer.Finish();
+  return Status::OK();
+}
+
+Status RemoveVariable(const string& input_prefix, const string& output_prefix,
+                      const std::vector<string>& names) {
+  BundleReader reader(Env::Default(), input_prefix);
+  BundleWriter writer(Env::Default(), output_prefix);
+  WriteRestVariables(reader, writer, names);
   writer.Finish();
   return Status::OK();
 }
diff --git a/tensorflow/c/quantize_embedding_variable.h b/tensorflow/c/quantize_embedding_variable.h
@@ -34,7 +34,10 @@ Status QuantizeEmbeddingVariable(const string& input_prefix,
                                  const std::vector<string>& names,
                                  const std::vector<string>& quant_names,
                                  const std::vector<string>& scale_names,
-                                 TF_DataType data_type);
+                                 const TF_DataType data_type, const bool is_ev);
+
+Status RemoveVariable(const string& input_prefix, const string& output_prefix,
+                      const std::vector<string>& names);
 
 }  // namespace checkpoint
 }  // namespace tensorflow
diff --git a/tensorflow/python/util/quantize_embedding_variable.i b/tensorflow/python/util/quantize_embedding_variable.i
@@ -26,28 +26,41 @@ limitations under the License.
 %unignore tensorflow;
 %unignore tensorflow::checkpoint;
 %unignore QuantizeEmbeddingVariablesByName;
+%unignore RemoveVariablesByName;
 
 %{
 void QuantizeEmbeddingVariablesByName(string input_prefix, string output_prefix,
                                       string names_string,
                                       string quant_names_string,
                                       string scale_names_string,
-                                      TF_DataType data_type) {
+                                      TF_DataType data_type, bool is_ev) {
   std::vector<string> names = tensorflow::str_util::Split(names_string, ',');
   std::vector<string> quant_names =
       tensorflow::str_util::Split(quant_names_string, ',');
   std::vector<string> scale_names =
       tensorflow::str_util::Split(scale_names_string, ',');
 
   tensorflow::checkpoint::QuantizeEmbeddingVariable(
-      input_prefix, output_prefix, names, quant_names, scale_names, data_type);
+      input_prefix, output_prefix, names, quant_names, scale_names, data_type,
+      is_ev);
 }
 %}
 
 void QuantizeEmbeddingVariablesByName(string input_prefix, string output_prefix,
                                       string names_string,
                                       string quant_names_string,
                                       string scale_names_string,
-                                      TF_DataType data_type);
+                                      TF_DataType data_type, bool is_ev);
+
+%{
+void RemoveVariablesByName(string input_prefix, string output_prefix,
+                           string names_string) {
+  std::vector<string> names = tensorflow::str_util::Split(names_string, ',');
+  tensorflow::checkpoint::RemoveVariable(input_prefix, output_prefix, names);
+}
+%}
+
+void RemoveVariablesByName(string input_prefix, string output_prefix,
+                           string names_string);
 
 %unignoreall
diff --git a/tensorflow/python/util/quantize_embedding_variable.py b/tensorflow/python/util/quantize_embedding_variable.py
@@ -15,12 +15,15 @@
 """Exposes the Python wrapper for quantize embedding variable."""
 from __future__ import absolute_import, division, print_function
 
-from tensorflow.python.pywrap_tensorflow import QuantizeEmbeddingVariablesByName
+from tensorflow.python.pywrap_tensorflow import (
+    QuantizeEmbeddingVariablesByName,
+    RemoveVariablesByName,
+)
 from tensorflow.python.util import compat
 
 
 def quantize_by_name(
-    input_prefix, output_prefix, names, quant_names, scale_names, dtype
+    input_prefix, output_prefix, names, quant_names, scale_names, dtype, is_ev
 ):
   """Python wrapper for quantize embedding variable.
 
@@ -31,6 +34,7 @@ def quantize_by_name(
     quant_names: List of quantized tensor names.
     scale_names: List of scale tensor names.
     dtype: tf.bfloat16 or tf.int8
+    is_ev: Boolean. Whether variables are EmbeddingVariable.
   """
   input_prefix = compat.as_bytes(input_prefix)
   output_prefix = compat.as_bytes(output_prefix)
@@ -44,4 +48,19 @@ def quantize_by_name(
       quant_names_string,
       scale_names_string,
       dtype.as_datatype_enum,
+      is_ev,
   )
+
+
+def remove_variables_by_name(input_prefix, output_prefix, names):
+  """Python wrapper for remove variables.
+
+  Args:
+    input_prefix: String. Prefix of input checkpoint.
+    output_prefix: String. Prefix of output checkpoint.
+    names: List of tensor names to be removed.
+  """
+  input_prefix = compat.as_bytes(input_prefix)
+  output_prefix = compat.as_bytes(output_prefix)
+  names_string = compat.as_bytes(",".join(names))
+  RemoveVariablesByName(input_prefix, output_prefix, names_string)
diff --git a/tools/low_precision_optimize/README.md b/tools/low_precision_optimize/README.md
@@ -60,3 +60,17 @@ for i in range(10):
 with open('calib_data.npy', 'wb') as f:
     np.save(f, calib_data)
 ```
+
+## 转换模型参数
+此外，鉴于部分使用场景中存在仅更新模型参数的需求，该工具提供单独量化模型参数的功能，使用方式如下：
+```python
+from low_precision_optimize import convert_ckpt
+
+# 指定输入的待优化参数checkpoint
+ckpt_prefix = 'dlrm/new_variables/variables'
+# 指定输出的优化参数checkpoint
+save_prefix = 'dlrm/opt_variables/variables'
+# 指定前一环节中优化后的saved_model目录
+opt_model_path = 'dlrm/saved_model_opt'
+convert_ckpt(ckpt_prefix, save_prefix, opt_model_path)
+```
diff --git a/tools/low_precision_optimize/low_precision_optimize.py b/tools/low_precision_optimize/low_precision_optimize.py
diff --git a/tools/low_precision_optimize/tf_graph_transform_utils.py b/tools/low_precision_optimize/tf_graph_transform_utils.py