|
| 1 | + |
| 2 | +import os |
| 3 | +import sys |
| 4 | +from tensorflow.python.distribute import multi_process_lib |
| 5 | +import multiprocessing |
| 6 | +import tensorflow as tf |
| 7 | +import contextlib |
| 8 | +import functools |
| 9 | +from tensorflow_recommenders_addons import dynamic_embedding as de |
| 10 | + |
| 11 | +from absl.testing import parameterized |
| 12 | +import numpy as np |
| 13 | +from tensorflow.core.protobuf import saved_model_pb2 |
| 14 | +from tensorflow.python.checkpoint import checkpoint as tracking_util |
| 15 | +from tensorflow.python.compat import v2_compat |
| 16 | +from tensorflow.python.data.ops import dataset_ops |
| 17 | +from tensorflow.python.distribute import distribute_lib |
| 18 | +from tensorflow.python.distribute import multi_process_runner |
| 19 | +from tensorflow.python.distribute import multi_worker_test_base |
| 20 | +from tensorflow.python.distribute import parameter_server_strategy_v2 |
| 21 | +from tensorflow.python.distribute import ps_values |
| 22 | +from tensorflow.python.distribute import sharded_variable |
| 23 | +from tensorflow.python.distribute.cluster_resolver import cluster_resolver as cluster_resolver_lib |
| 24 | +from tensorflow.python.eager import context |
| 25 | +from tensorflow.python.eager import def_function |
| 26 | +from tensorflow.python.eager import test |
| 27 | +from tensorflow.python.framework import constant_op |
| 28 | +from tensorflow.python.framework import dtypes |
| 29 | +from tensorflow.python.framework import ops |
| 30 | +from tensorflow.python.framework import tensor_spec |
| 31 | +from tensorflow.python.framework import test_util |
| 32 | +from tensorflow.python.module import module |
| 33 | +from tensorflow.python.ops import array_ops |
| 34 | +from tensorflow.python.ops import embedding_ops |
| 35 | +from tensorflow.python.ops import init_ops_v2 |
| 36 | +from tensorflow.python.ops import linalg_ops_impl |
| 37 | +from tensorflow.python.ops import math_ops |
| 38 | +from tensorflow.python.ops import variable_scope |
| 39 | +from tensorflow.python.ops import variables |
| 40 | +from tensorflow.python.platform import gfile |
| 41 | +from tensorflow.python.saved_model import save as tf_save |
| 42 | +from tensorflow.python.trackable import autotrackable |
| 43 | +from tensorflow.python.training import server_lib |
| 44 | +from packaging import version |
| 45 | + |
| 46 | +if version.parse(tf.__version__) >= version.parse("2.16"): |
| 47 | + from tf_keras import layers |
| 48 | + from tf_keras import Sequential |
| 49 | + from tf_keras.optimizers import Adam |
| 50 | +else: |
| 51 | + from tensorflow.python.keras import layers |
| 52 | + from tensorflow.python.keras import Sequential |
| 53 | + from tensorflow.python.keras.optimizers import Adam |
| 54 | + |
| 55 | +class ParameterServerStrategyV2Test(test.TestCase): |
| 56 | + @classmethod |
| 57 | + def setUpClass(cls): |
| 58 | + super(ParameterServerStrategyV2Test, cls).setUpClass() |
| 59 | + cls.cluster = multi_worker_test_base.create_multi_process_cluster( |
| 60 | + num_workers=2, num_ps=3, rpc_layer="grpc") |
| 61 | + cls.cluster_resolver = cls.cluster.cluster_resolver |
| 62 | + |
| 63 | + @classmethod |
| 64 | + def tearDownClass(cls): |
| 65 | + super(ParameterServerStrategyV2Test, cls).tearDownClass() |
| 66 | + cls.cluster.stop() |
| 67 | + |
| 68 | + def testKerasFit(self): |
| 69 | + strategy = parameter_server_strategy_v2.ParameterServerStrategyV2( |
| 70 | + self.cluster_resolver) |
| 71 | + # vocab_size = 100 |
| 72 | + embed_dim = 8 |
| 73 | + with strategy.scope(): |
| 74 | + model = Sequential([ |
| 75 | + layers.Input(shape=(1,), dtype=tf.int32), |
| 76 | + de.keras.layers.Embedding(embed_dim, key_dtype=tf.int32), |
| 77 | + # layers.Embedding(input_dim=vocab_size, output_dim=embed_dim), |
| 78 | + layers.Flatten(), |
| 79 | + layers.Dense(1, activation='sigmoid') |
| 80 | + ]) |
| 81 | + optimizer = Adam(1E-3) |
| 82 | + optimizer = de.DynamicEmbeddingOptimizer(optimizer) |
| 83 | + model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) |
| 84 | + |
| 85 | + ids = np.random.randint(0, 100, size=(64*2, 1)) |
| 86 | + labels = np.random.randint(0, 2, size=(64*2, 1)) |
| 87 | + |
| 88 | + def dataset_fn(input_context): |
| 89 | + global_batch_size = 32 |
| 90 | + batch_size = input_context.get_per_replica_batch_size(global_batch_size) |
| 91 | + dataset = tf.data.Dataset.from_tensor_slices((ids, labels)) |
| 92 | + dataset = dataset.shard(input_context.num_input_pipelines, input_context.input_pipeline_id) |
| 93 | + dataset = dataset.batch(batch_size).repeat() |
| 94 | + return dataset |
| 95 | + |
| 96 | + dataset = strategy.distribute_datasets_from_function(dataset_fn) |
| 97 | + |
| 98 | + history = model.fit(dataset, epochs=1, steps_per_epoch=len(ids) // 64) |
| 99 | + self.assertIn('loss', history.history) |
| 100 | + |
| 101 | + # def testSparselyReadForEmbeddingLookup(self): |
| 102 | + # strategy = parameter_server_strategy_v2.ParameterServerStrategyV2( |
| 103 | + # self.cluster_resolver) |
| 104 | + # |
| 105 | + # class FakeModel(module.Module): |
| 106 | + # |
| 107 | + # def __init__(self): |
| 108 | + # self._var0 = variables.Variable([1.0, 2.0, 3.0, 4.0]) |
| 109 | + # self._var1 = variables.Variable([5.0, 6.0, 7.0, 8.0]) |
| 110 | + # |
| 111 | + # @def_function.function(input_signature=[ |
| 112 | + # tensor_spec.TensorSpec(shape=[2], dtype=dtypes.int32, name="inputs") |
| 113 | + # ]) |
| 114 | + # def func(self, x): |
| 115 | + # return embedding_ops.embedding_lookup([self._var0, self._var1], x) |
| 116 | + # |
| 117 | + # with strategy.scope(): |
| 118 | + # model = FakeModel() |
| 119 | + # |
| 120 | + # # Assert that ResourceGather op exists instead of Gather in training function. |
| 121 | + # found_resource_gather = False |
| 122 | + # found_gather = False |
| 123 | + # |
| 124 | + # for n in model.func.get_concrete_function().graph.as_graph_def().node: |
| 125 | + # if n.op == "ResourceGather": |
| 126 | + # found_resource_gather = True |
| 127 | + # elif n.op == "Gather": |
| 128 | + # found_gather = True |
| 129 | + # self.assertTrue(found_resource_gather) |
| 130 | + # self.assertFalse(found_gather) |
| 131 | + # |
| 132 | + # # Assert that ResourceGather op exists instead of Gather in saved_model. |
| 133 | + # found_resource_gather = False |
| 134 | + # found_gather = False |
| 135 | + # |
| 136 | + # tmp_dir = self.get_temp_dir() |
| 137 | + # tf_save.save(model, tmp_dir, signatures=model.func) |
| 138 | + # |
| 139 | + # with gfile.Open("%s/saved_model.pb" % tmp_dir, "rb") as f: |
| 140 | + # saved_model_proto = saved_model_pb2.SavedModel().FromString(f.read()) |
| 141 | + # |
| 142 | + # for function in saved_model_proto.meta_graphs[0].graph_def.library.function: |
| 143 | + # for n in function.node_def: |
| 144 | + # if n.op == "ResourceGather": |
| 145 | + # found_resource_gather = True |
| 146 | + # resource_gather_device = n.device |
| 147 | + # elif n.op == "Gather": |
| 148 | + # found_gather = True |
| 149 | + # self.assertTrue(found_resource_gather) |
| 150 | + # self.assertFalse(found_gather) |
| 151 | + # |
| 152 | + # # We also assert that the colocate_with in embedding_ops will not result in |
| 153 | + # # a hard-coded device string. |
| 154 | + # self.assertEmpty(resource_gather_device) |
| 155 | + |
| 156 | +def custom_set_spawn_exe_path(): |
| 157 | + print(f"custom_set_spawn_exe_path {sys.argv[0]} {os.environ['TEST_TARGET']}") |
| 158 | + if sys.argv[0].endswith('.py'): |
| 159 | + def guess_path(package_root): |
| 160 | + # If all we have is a python module path, we'll need to make a guess for |
| 161 | + # the actual executable path. |
| 162 | + if 'bazel-out' in sys.argv[0] and package_root in sys.argv[0]: |
| 163 | + package_root_base = sys.argv[0][:sys.argv[0].rfind(package_root)] |
| 164 | + binary = os.environ['TEST_TARGET'][2:].replace(':', '/', 1) |
| 165 | + print(f"package_root_base {package_root_base} binary {binary}") |
| 166 | + possible_path = os.path.join(package_root_base, package_root, |
| 167 | + binary) |
| 168 | + print('Guessed test binary path: %s', possible_path) |
| 169 | + if os.access(possible_path, os.X_OK): |
| 170 | + return possible_path |
| 171 | + return None |
| 172 | + path = guess_path('tf_recommenders_addons') |
| 173 | + if path is None: |
| 174 | + print( |
| 175 | + 'Cannot determine binary path. sys.argv[0]=%s os.environ=%s', |
| 176 | + sys.argv[0], os.environ) |
| 177 | + raise RuntimeError('Cannot determine binary path') |
| 178 | + sys.argv[0] = path |
| 179 | + # Note that this sets the executable for *all* contexts. |
| 180 | + multiprocessing.get_context().set_executable(sys.argv[0]) |
| 181 | + |
| 182 | + |
| 183 | +if __name__ == "__main__": |
| 184 | + multi_process_lib._set_spawn_exe_path = custom_set_spawn_exe_path |
| 185 | + v2_compat.enable_v2_behavior() |
| 186 | + multi_process_runner.test_main() |
0 commit comments