tensorflow
diff --git a/‎examples/mnist.py‎
Lines changed: 14 additions & 13 deletions b/‎examples/mnist.py‎
Lines changed: 14 additions & 13 deletions
diff --git a/‎examples/toy_model_tpu.py‎
Lines changed: 7 additions & 6 deletions b/‎examples/toy_model_tpu.py‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎mesh_tensorflow/bert/run_classifier.py‎
Lines changed: 14 additions & 13 deletions b/‎mesh_tensorflow/bert/run_classifier.py‎
Lines changed: 14 additions & 13 deletions
diff --git a/‎mesh_tensorflow/bert/run_pretraining.py‎
Lines changed: 15 additions & 14 deletions b/‎mesh_tensorflow/bert/run_pretraining.py‎
Lines changed: 15 additions & 14 deletions
@@ -25,6 +25,7 @@
 import mesh_tensorflow as mtf
 import mnist_dataset as dataset  # local file import
 import tensorflow.compat.v1 as tf
+from tensorflow.compat.v1 import estimator as tf_estimator
 
 
 tf.flags.DEFINE_string("data_dir", "/tmp/mnist_data",
@@ -126,7 +127,7 @@ def model_fn(features, labels, mode, params):
   mesh_impl = mtf.placement_mesh_impl.PlacementMeshImpl(
       mesh_shape, layout_rules, mesh_devices)
 
-  if mode == tf.estimator.ModeKeys.TRAIN:
+  if mode == tf_estimator.ModeKeys.TRAIN:
     var_grads = mtf.gradients(
         [loss], [v.outputs[0] for v in graph.trainable_variables])
     optimizer = mtf.optimize.AdafactorOptimizer()
@@ -136,11 +137,11 @@ def model_fn(features, labels, mode, params):
   restore_hook = mtf.MtfRestoreHook(lowering)
 
   tf_logits = lowering.export_to_tf_tensor(logits)
-  if mode != tf.estimator.ModeKeys.PREDICT:
+  if mode != tf_estimator.ModeKeys.PREDICT:
     tf_loss = lowering.export_to_tf_tensor(loss)
     tf.summary.scalar("loss", tf_loss)
 
-  if mode == tf.estimator.ModeKeys.TRAIN:
+  if mode == tf_estimator.ModeKeys.TRAIN:
     tf_update_ops = [lowering.lowered_operation(op) for op in update_ops]
     tf_update_ops.append(tf.assign_add(global_step, 1))
     train_op = tf.group(tf_update_ops)
@@ -169,25 +170,25 @@ def model_fn(features, labels, mode, params):
     tf.summary.scalar("train_accuracy", accuracy[1])
 
     # restore_hook must come before saver_hook
-    return tf.estimator.EstimatorSpec(
-        tf.estimator.ModeKeys.TRAIN, loss=tf_loss, train_op=train_op,
+    return tf_estimator.EstimatorSpec(
+        tf_estimator.ModeKeys.TRAIN, loss=tf_loss, train_op=train_op,
         training_chief_hooks=[restore_hook, saver_hook])
 
-  if mode == tf.estimator.ModeKeys.PREDICT:
+  if mode == tf_estimator.ModeKeys.PREDICT:
     predictions = {
         "classes": tf.argmax(tf_logits, axis=1),
         "probabilities": tf.nn.softmax(tf_logits),
     }
-    return tf.estimator.EstimatorSpec(
-        mode=tf.estimator.ModeKeys.PREDICT,
+    return tf_estimator.EstimatorSpec(
+        mode=tf_estimator.ModeKeys.PREDICT,
         predictions=predictions,
         prediction_hooks=[restore_hook],
         export_outputs={
-            "classify": tf.estimator.export.PredictOutput(predictions)
+            "classify": tf_estimator.export.PredictOutput(predictions)
         })
-  if mode == tf.estimator.ModeKeys.EVAL:
-    return tf.estimator.EstimatorSpec(
-        mode=tf.estimator.ModeKeys.EVAL,
+  if mode == tf_estimator.ModeKeys.EVAL:
+    return tf_estimator.EstimatorSpec(
+        mode=tf_estimator.ModeKeys.EVAL,
         loss=tf_loss,
         evaluation_hooks=[restore_hook],
         eval_metric_ops={
@@ -199,7 +200,7 @@ def model_fn(features, labels, mode, params):
 
 def run_mnist():
   """Run MNIST training and eval loop."""
-  mnist_classifier = tf.estimator.Estimator(
+  mnist_classifier = tf_estimator.Estimator(
       model_fn=model_fn,
       model_dir=FLAGS.model_dir)
 
 
@@ -22,6 +22,7 @@
 import mesh_tensorflow as mtf
 import numpy
 import tensorflow.compat.v1 as tf
+from tensorflow.compat.v1 import estimator as tf_estimator
 
 from tensorflow.python.data.ops.dataset_ops import Dataset
 from tensorflow.python.platform import flags
@@ -176,7 +177,7 @@ def model_fn(features, labels, mode, params):
     logits, loss = toy_model(features, mesh)
 
   # TRAIN mode
-  if mode == tf.estimator.ModeKeys.TRAIN:
+  if mode == tf_estimator.ModeKeys.TRAIN:
     var_grads = mtf.gradients([loss],
                               [v.outputs[0] for v in graph.trainable_variables])
     if FLAGS.optimizer == 'Adafactor':
@@ -193,7 +194,7 @@ def model_fn(features, labels, mode, params):
 
   tf_loss = tf.to_float(lowering.export_to_tf_tensor(loss))
 
-  if mode == tf.estimator.ModeKeys.TRAIN:
+  if mode == tf_estimator.ModeKeys.TRAIN:
     tf_update_ops = [lowering.lowered_operation(op) for op in update_ops]
     tf_update_ops.append(tf.assign_add(global_step, 1))
     tf.logging.info('tf_update_ops: {}'.format(tf_update_ops))
@@ -204,7 +205,7 @@ def model_fn(features, labels, mode, params):
   with mtf.utils.outside_all_rewrites():
     # Copy master variables to slices. Must be called first.
     restore_hook = mtf.MtfRestoreHook(lowering)
-    if mode == tf.estimator.ModeKeys.TRAIN:
+    if mode == tf_estimator.ModeKeys.TRAIN:
       saver = tf.train.Saver(
           tf.global_variables(),
           sharded=True,
@@ -221,11 +222,11 @@ def model_fn(features, labels, mode, params):
           listeners=[saver_listener])
 
       return tpu_estimator.TPUEstimatorSpec(
-          tf.estimator.ModeKeys.TRAIN,
+          tf_estimator.ModeKeys.TRAIN,
           loss=tf_loss,
           train_op=train_op,
           training_hooks=[restore_hook, saver_hook])
-    elif mode == tf.estimator.ModeKeys.EVAL:
+    elif mode == tf_estimator.ModeKeys.EVAL:
 
       def metric_fn(tf_logits):
         mean_logits = tf.metrics.mean(tf_logits)
@@ -234,7 +235,7 @@ def metric_fn(tf_logits):
       eval_metrics = (metric_fn, [tf_logits])
 
       return tpu_estimator.TPUEstimatorSpec(
-          tf.estimator.ModeKeys.EVAL,
+          tf_estimator.ModeKeys.EVAL,
           evaluation_hooks=[restore_hook],
           loss=tf_loss,
           eval_metrics=eval_metrics)
 
@@ -29,6 +29,7 @@
 import mesh_tensorflow.bert.tokenization as tokenization
 from six.moves import range
 import tensorflow.compat.v1 as tf
+from tensorflow.compat.v1 import estimator as tf_estimator
 
 flags = tf.flags
 
@@ -694,7 +695,7 @@ def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
                                            [batch_dim, seq_dim])
     mtf_label_ids = mtf.import_tf_tensor(mesh, label_ids, [batch_dim])
 
-    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
+    is_training = (mode == tf_estimator.ModeKeys.TRAIN)
 
     (total_loss, per_example_loss, logits,
      probabilities) = create_model(bert_config, is_training, mtf_input_ids,
@@ -705,7 +706,7 @@ def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
     per_example_loss = mtf.anonymize(per_example_loss)
     logits = mtf.anonymize(logits)
 
-    if mode == tf.estimator.ModeKeys.TRAIN:
+    if mode == tf_estimator.ModeKeys.TRAIN:
       _, update_ops = optimization_lib.create_optimizer(
           total_loss,
           learning_rate,
@@ -718,13 +719,13 @@ def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
     lowering = mtf.Lowering(graph, {mesh: mesh_impl})
     tf_loss = tf.to_float(lowering.export_to_tf_tensor(total_loss))
 
-    if mode == tf.estimator.ModeKeys.TRAIN:
+    if mode == tf_estimator.ModeKeys.TRAIN:
       global_step = tf.train.get_global_step()
       tf_update_ops = [lowering.lowered_operation(op) for op in update_ops]
       tf_update_ops.append(tf.assign_add(global_step, 1))
       tf.logging.info("tf_update_ops: {}".format(tf_update_ops))
       train_op = tf.group(tf_update_ops)
-    elif mode == tf.estimator.ModeKeys.EVAL:
+    elif mode == tf_estimator.ModeKeys.EVAL:
 
       def metric_fn(per_example_loss, label_ids, logits, is_real_example):
         predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
@@ -768,7 +769,7 @@ def tpu_scaffold():
     with mtf.utils.outside_all_rewrites():
       # Copy master variables to slices. Must be called first.
       restore_hook = mtf.MtfRestoreHook(lowering)
-      if mode == tf.estimator.ModeKeys.TRAIN:
+      if mode == tf_estimator.ModeKeys.TRAIN:
         saver = tf.train.Saver(
             tf.global_variables(),
             sharded=True,
@@ -784,21 +785,21 @@ def tpu_scaffold():
             saver=saver,
             listeners=[saver_listener])
 
-        return tf.estimator.tpu.TPUEstimatorSpec(
+        return tf_estimator.tpu.TPUEstimatorSpec(
             mode,
             loss=tf_loss,
             train_op=train_op,
             training_hooks=[restore_hook, saver_hook],
             scaffold_fn=scaffold_fn)
-      elif mode == tf.estimator.ModeKeys.EVAL:
-        return tf.estimator.tpu.TPUEstimatorSpec(
+      elif mode == tf_estimator.ModeKeys.EVAL:
+        return tf_estimator.tpu.TPUEstimatorSpec(
             mode,
             evaluation_hooks=[restore_hook],
             loss=tf_loss,
             eval_metrics=eval_metrics,
             scaffold_fn=scaffold_fn)
       else:
-        return tf.estimator.tpu.TPUEstimatorSpec(
+        return tf_estimator.tpu.TPUEstimatorSpec(
             mode,
             prediction_hooks=[restore_hook],
             predictions={
@@ -925,15 +926,15 @@ def main(_):
     tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
         FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
 
-  run_config = tf.estimator.tpu.RunConfig(
+  run_config = tf_estimator.tpu.RunConfig(
       cluster=tpu_cluster_resolver,
       master=FLAGS.master,
       model_dir=FLAGS.output_dir,
       save_checkpoints_steps=FLAGS.save_checkpoints_steps,
-      tpu_config=tf.estimator.tpu.TPUConfig(
+      tpu_config=tf_estimator.tpu.TPUConfig(
           iterations_per_loop=FLAGS.iterations_per_loop,
           num_cores_per_replica=1,
-          per_host_input_for_training=tf.estimator.tpu.InputPipelineConfig
+          per_host_input_for_training=tf_estimator.tpu.InputPipelineConfig
           .BROADCAST))
 
   train_examples = None
@@ -956,7 +957,7 @@ def main(_):
 
   # If TPU is not available, this will fall back to normal Estimator on CPU
   # or GPU.
-  estimator = tf.estimator.tpu.TPUEstimator(
+  estimator = tf_estimator.tpu.TPUEstimator(
       use_tpu=FLAGS.use_tpu,
       model_fn=model_fn,
       config=run_config,
 
@@ -27,6 +27,7 @@
 import mesh_tensorflow.bert.optimization as optimization_lib
 from six.moves import range
 import tensorflow.compat.v1 as tf
+from tensorflow.compat.v1 import estimator as tf_estimator
 
 flags = tf.flags
 
@@ -201,7 +202,7 @@ def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
     mtf_next_sentence_labels = mtf.import_tf_tensor(
         mesh, next_sentence_labels, [batch_dim])
 
-    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
+    is_training = (mode == tf_estimator.ModeKeys.TRAIN)
 
     model = bert_lib.BertModel(
         config=bert_config,
@@ -230,7 +231,7 @@ def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
     next_sentence_logits = mtf.anonymize(next_sentence_logits)
 
     # TRAIN mode
-    if mode == tf.estimator.ModeKeys.TRAIN:
+    if mode == tf_estimator.ModeKeys.TRAIN:
       _, update_ops = optimization_lib.create_optimizer(
           total_loss + extra_loss,
           learning_rate,
@@ -243,13 +244,13 @@ def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
 
     tf_loss = tf.to_float(lowering.export_to_tf_tensor(total_loss))
 
-    if mode == tf.estimator.ModeKeys.TRAIN:
+    if mode == tf_estimator.ModeKeys.TRAIN:
       global_step = tf.train.get_global_step()
       tf_update_ops = [lowering.lowered_operation(op) for op in update_ops]
       tf_update_ops.append(tf.assign_add(global_step, 1))
       tf.logging.info("tf_update_ops: {}".format(tf_update_ops))
       train_op = tf.group(tf_update_ops)
-    elif mode == tf.estimator.ModeKeys.EVAL:
+    elif mode == tf_estimator.ModeKeys.EVAL:
 
       def metric_fn(masked_lm_example_loss, masked_lm_logits, masked_lm_ids,
                     masked_lm_weights, next_sentence_example_loss,
@@ -298,7 +299,7 @@ def metric_fn(masked_lm_example_loss, masked_lm_logits, masked_lm_ids,
     with mtf.utils.outside_all_rewrites():
       # Copy master variables to slices. Must be called first.
       restore_hook = mtf.MtfRestoreHook(lowering)
-      if mode == tf.estimator.ModeKeys.TRAIN:
+      if mode == tf_estimator.ModeKeys.TRAIN:
         saver = tf.train.Saver(
             tf.global_variables(),
             sharded=True,
@@ -314,14 +315,14 @@ def metric_fn(masked_lm_example_loss, masked_lm_logits, masked_lm_ids,
             saver=saver,
             listeners=[saver_listener])
 
-        return tf.estimator.tpu.TPUEstimatorSpec(
-            tf.estimator.ModeKeys.TRAIN,
+        return tf_estimator.tpu.TPUEstimatorSpec(
+            tf_estimator.ModeKeys.TRAIN,
             loss=tf_loss,
             train_op=train_op,
             training_hooks=[restore_hook, saver_hook])
-      elif mode == tf.estimator.ModeKeys.EVAL:
-        return tf.estimator.tpu.TPUEstimatorSpec(
-            tf.estimator.ModeKeys.EVAL,
+      elif mode == tf_estimator.ModeKeys.EVAL:
+        return tf_estimator.tpu.TPUEstimatorSpec(
+            tf_estimator.ModeKeys.EVAL,
             evaluation_hooks=[restore_hook],
             loss=tf_loss,
             eval_metrics=eval_metrics)
@@ -439,15 +440,15 @@ def main(_):
     tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
         FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
 
-  run_config = tf.estimator.tpu.RunConfig(
+  run_config = tf_estimator.tpu.RunConfig(
       cluster=tpu_cluster_resolver,
       master=FLAGS.master,
       model_dir=FLAGS.output_dir,
       save_checkpoints_steps=FLAGS.save_checkpoints_steps,
-      tpu_config=tf.estimator.tpu.TPUConfig(
+      tpu_config=tf_estimator.tpu.TPUConfig(
           iterations_per_loop=FLAGS.iterations_per_loop,
           num_cores_per_replica=1,
-          per_host_input_for_training=tf.estimator.tpu.InputPipelineConfig
+          per_host_input_for_training=tf_estimator.tpu.InputPipelineConfig
           .BROADCAST))
 
   model_fn = model_fn_builder(
@@ -459,7 +460,7 @@ def main(_):
 
   # If TPU is not available, this will fall back to normal Estimator on CPU
   # or GPU.
-  estimator = tf.estimator.tpu.TPUEstimator(
+  estimator = tf_estimator.tpu.TPUEstimator(
       use_tpu=FLAGS.use_tpu,
       model_fn=model_fn,
       config=run_config,