Skip to content

Commit 1f4747a

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents d2d01f4 + a7aa25d commit 1f4747a

File tree

116 files changed

+5135
-148
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

116 files changed

+5135
-148
lines changed

official/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,6 @@ If you would like to make any fixes or improvements to the models, please [submi
2828

2929
The *Official Models* are made available as a Python module. To run the models and associated scripts, add the top-level ***/models*** folder to the Python path with the command: `export PYTHONPATH="$PYTHONPATH:/path/to/models"`
3030

31+
To install dependencies pass `-r official/requirements.txt` to pip. (i.e. `pip3 install --user -r official/requirements.txt`)
32+
3133
To make Official Models easier to use, we are planning to create a pip installable Official Models package. This is being tracked in [#917](https://github.com/tensorflow/models/issues/917).

official/benchmark/datastore/schema/benchmark_run.json

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"description": "The date when the test of the model is started",
1616
"mode": "REQUIRED",
1717
"name": "run_date",
18-
"type": "DATETIME"
18+
"type": "TIMESTAMP"
1919
},
2020
{
2121
"description": "The tensorflow version information.",
@@ -58,7 +58,7 @@
5858
"type": "RECORD"
5959
},
6060
{
61-
"description": "Enviornment variables when the benchmark run is executed.",
61+
"description": "Environment variables when the benchmark run is executed.",
6262
"fields": [
6363
{
6464
"description": "The name of the variable.",
@@ -74,7 +74,27 @@
7474
}
7575
],
7676
"mode": "REPEATED",
77-
"name": "enviornment_variable",
77+
"name": "environment_variable",
78+
"type": "RECORD"
79+
},
80+
{
81+
"description": "TF Environment variables when the benchmark run is executed.",
82+
"fields": [
83+
{
84+
"description": "The name of the variable.",
85+
"mode": "REQUIRED",
86+
"name": "name",
87+
"type": "STRING"
88+
},
89+
{
90+
"description": "The value of the variable.",
91+
"mode": "NULLABLE",
92+
"name": "value",
93+
"type": "STRING"
94+
}
95+
],
96+
"mode": "REPEATED",
97+
"name": "tensorflow_environment_variables",
7898
"type": "RECORD"
7999
},
80100
{

official/mnist/mnist.py

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -175,28 +175,31 @@ def validate_batch_size_for_multi_gpu(batch_size):
175175
raise ValueError(err)
176176

177177

178-
def main(_):
178+
def main(argv):
179+
parser = MNISTArgParser()
180+
flags = parser.parse_args(args=argv[1:])
181+
179182
model_function = model_fn
180183

181-
if FLAGS.multi_gpu:
182-
validate_batch_size_for_multi_gpu(FLAGS.batch_size)
184+
if flags.multi_gpu:
185+
validate_batch_size_for_multi_gpu(flags.batch_size)
183186

184187
# There are two steps required if using multi-GPU: (1) wrap the model_fn,
185188
# and (2) wrap the optimizer. The first happens here, and (2) happens
186189
# in the model_fn itself when the optimizer is defined.
187190
model_function = tf.contrib.estimator.replicate_model_fn(
188191
model_fn, loss_reduction=tf.losses.Reduction.MEAN)
189192

190-
data_format = FLAGS.data_format
193+
data_format = flags.data_format
191194
if data_format is None:
192195
data_format = ('channels_first'
193196
if tf.test.is_built_with_cuda() else 'channels_last')
194197
mnist_classifier = tf.estimator.Estimator(
195198
model_fn=model_function,
196-
model_dir=FLAGS.model_dir,
199+
model_dir=flags.model_dir,
197200
params={
198201
'data_format': data_format,
199-
'multi_gpu': FLAGS.multi_gpu
202+
'multi_gpu': flags.multi_gpu
200203
})
201204

202205
# Set up training and evaluation input functions.
@@ -206,35 +209,35 @@ def train_input_fn():
206209
# When choosing shuffle buffer sizes, larger sizes result in better
207210
# randomness, while smaller sizes use less memory. MNIST is a small
208211
# enough dataset that we can easily shuffle the full epoch.
209-
ds = dataset.train(FLAGS.data_dir)
210-
ds = ds.cache().shuffle(buffer_size=50000).batch(FLAGS.batch_size)
212+
ds = dataset.train(flags.data_dir)
213+
ds = ds.cache().shuffle(buffer_size=50000).batch(flags.batch_size)
211214

212215
# Iterate through the dataset a set number (`epochs_between_evals`) of times
213216
# during each training session.
214-
ds = ds.repeat(FLAGS.epochs_between_evals)
217+
ds = ds.repeat(flags.epochs_between_evals)
215218
return ds
216219

217220
def eval_input_fn():
218-
return dataset.test(FLAGS.data_dir).batch(
219-
FLAGS.batch_size).make_one_shot_iterator().get_next()
221+
return dataset.test(flags.data_dir).batch(
222+
flags.batch_size).make_one_shot_iterator().get_next()
220223

221224
# Set up hook that outputs training logs every 100 steps.
222225
train_hooks = hooks_helper.get_train_hooks(
223-
FLAGS.hooks, batch_size=FLAGS.batch_size)
226+
flags.hooks, batch_size=flags.batch_size)
224227

225228
# Train and evaluate model.
226-
for _ in range(FLAGS.train_epochs // FLAGS.epochs_between_evals):
229+
for _ in range(flags.train_epochs // flags.epochs_between_evals):
227230
mnist_classifier.train(input_fn=train_input_fn, hooks=train_hooks)
228231
eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
229232
print('\nEvaluation results:\n\t%s\n' % eval_results)
230233

231234
# Export the model
232-
if FLAGS.export_dir is not None:
235+
if flags.export_dir is not None:
233236
image = tf.placeholder(tf.float32, [None, 28, 28])
234237
input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
235238
'image': image,
236239
})
237-
mnist_classifier.export_savedmodel(FLAGS.export_dir, input_fn)
240+
mnist_classifier.export_savedmodel(flags.export_dir, input_fn)
238241

239242

240243
class MNISTArgParser(argparse.ArgumentParser):
@@ -243,14 +246,9 @@ class MNISTArgParser(argparse.ArgumentParser):
243246
def __init__(self):
244247
super(MNISTArgParser, self).__init__(parents=[
245248
parsers.BaseParser(),
246-
parsers.ImageModelParser()])
247-
248-
self.add_argument(
249-
'--export_dir',
250-
type=str,
251-
help='[default: %(default)s] If set, a SavedModel serialization of the '
252-
'model will be exported to this directory at the end of training. '
253-
'See the README for more details and relevant links.')
249+
parsers.ImageModelParser(),
250+
parsers.ExportParser(),
251+
])
254252

255253
self.set_defaults(
256254
data_dir='/tmp/mnist_data',
@@ -261,6 +259,4 @@ def __init__(self):
261259

262260
if __name__ == '__main__':
263261
tf.logging.set_verbosity(tf.logging.INFO)
264-
parser = MNISTArgParser()
265-
FLAGS, unparsed = parser.parse_known_args()
266-
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
262+
main(argv=sys.argv)

official/mnist/mnist_eager.py

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@
3838
from official.mnist import mnist
3939
from official.utils.arg_parsers import parsers
4040

41-
FLAGS = None
42-
4341

4442
def loss(logits, labels):
4543
return tf.reduce_mean(
@@ -97,35 +95,38 @@ def test(model, dataset):
9795
tf.contrib.summary.scalar('accuracy', accuracy.result())
9896

9997

100-
def main(_):
98+
def main(argv):
99+
parser = MNISTEagerArgParser()
100+
flags = parser.parse_args(args=argv[1:])
101+
101102
tfe.enable_eager_execution()
102103

103104
# Automatically determine device and data_format
104105
(device, data_format) = ('/gpu:0', 'channels_first')
105-
if FLAGS.no_gpu or tfe.num_gpus() <= 0:
106+
if flags.no_gpu or tfe.num_gpus() <= 0:
106107
(device, data_format) = ('/cpu:0', 'channels_last')
107108
# If data_format is defined in FLAGS, overwrite automatically set value.
108-
if FLAGS.data_format is not None:
109+
if flags.data_format is not None:
109110
data_format = data_format
110111
print('Using device %s, and data format %s.' % (device, data_format))
111112

112113
# Load the datasets
113-
train_ds = mnist_dataset.train(FLAGS.data_dir).shuffle(60000).batch(
114-
FLAGS.batch_size)
115-
test_ds = mnist_dataset.test(FLAGS.data_dir).batch(FLAGS.batch_size)
114+
train_ds = mnist_dataset.train(flags.data_dir).shuffle(60000).batch(
115+
flags.batch_size)
116+
test_ds = mnist_dataset.test(flags.data_dir).batch(flags.batch_size)
116117

117118
# Create the model and optimizer
118119
model = mnist.Model(data_format)
119-
optimizer = tf.train.MomentumOptimizer(FLAGS.lr, FLAGS.momentum)
120+
optimizer = tf.train.MomentumOptimizer(flags.lr, flags.momentum)
120121

121122
# Create file writers for writing TensorBoard summaries.
122-
if FLAGS.output_dir:
123+
if flags.output_dir:
123124
# Create directories to which summaries will be written
124125
# tensorboard --logdir=<output_dir>
125126
# can then be used to see the recorded summaries.
126-
train_dir = os.path.join(FLAGS.output_dir, 'train')
127-
test_dir = os.path.join(FLAGS.output_dir, 'eval')
128-
tf.gfile.MakeDirs(FLAGS.output_dir)
127+
train_dir = os.path.join(flags.output_dir, 'train')
128+
test_dir = os.path.join(flags.output_dir, 'eval')
129+
tf.gfile.MakeDirs(flags.output_dir)
129130
else:
130131
train_dir = None
131132
test_dir = None
@@ -135,19 +136,19 @@ def main(_):
135136
test_dir, flush_millis=10000, name='test')
136137

137138
# Create and restore checkpoint (if one exists on the path)
138-
checkpoint_prefix = os.path.join(FLAGS.model_dir, 'ckpt')
139+
checkpoint_prefix = os.path.join(flags.model_dir, 'ckpt')
139140
step_counter = tf.train.get_or_create_global_step()
140141
checkpoint = tfe.Checkpoint(
141142
model=model, optimizer=optimizer, step_counter=step_counter)
142143
# Restore variables on creation if a checkpoint exists.
143-
checkpoint.restore(tf.train.latest_checkpoint(FLAGS.model_dir))
144+
checkpoint.restore(tf.train.latest_checkpoint(flags.model_dir))
144145

145146
# Train and evaluate for a set number of epochs.
146147
with tf.device(device):
147-
for _ in range(FLAGS.train_epochs):
148+
for _ in range(flags.train_epochs):
148149
start = time.time()
149150
with summary_writer.as_default():
150-
train(model, optimizer, train_ds, step_counter, FLAGS.log_interval)
151+
train(model, optimizer, train_ds, step_counter, flags.log_interval)
151152
end = time.time()
152153
print('\nTrain time for epoch #%d (%d total steps): %f' %
153154
(checkpoint.save_counter.numpy() + 1,
@@ -205,6 +206,4 @@ def __init__(self):
205206
)
206207

207208
if __name__ == '__main__':
208-
parser = MNISTEagerArgParser()
209-
FLAGS, unparsed = parser.parse_known_args()
210-
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
209+
main(argv=sys.argv)

official/mnist/mnist_tpu.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@
4646
"metadata.")
4747

4848
# Model specific parameters
49+
tf.flags.DEFINE_string(
50+
"master", default=None,
51+
help="GRPC URL of the master (e.g. grpc://ip.address.of.tpu:8470). You "
52+
"must specify either this flag or --tpu.")
4953
tf.flags.DEFINE_string("data_dir", "",
5054
"Path to directory containing the MNIST dataset")
5155
tf.flags.DEFINE_string("model_dir", None, "Estimator model_dir")
@@ -132,11 +136,24 @@ def main(argv):
132136
del argv # Unused.
133137
tf.logging.set_verbosity(tf.logging.INFO)
134138

135-
tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
136-
FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
139+
if FLAGS.master is None and FLAGS.tpu is None:
140+
raise RuntimeError('You must specify either --master or --tpu.')
141+
if FLAGS.master is not None:
142+
if FLAGS.tpu is not None:
143+
tf.logging.warn('Both --master and --tpu are set. Ignoring '
144+
'--tpu and using --master.')
145+
tpu_grpc_url = FLAGS.master
146+
else:
147+
tpu_cluster_resolver = (
148+
tf.contrib.cluster_resolver.TPUClusterResolver(
149+
FLAGS.tpu,
150+
zone=FLAGS.tpu_zone,
151+
project=FLAGS.gcp_project))
152+
tpu_grpc_url = tpu_cluster_resolver.get_master()
137153

138154
run_config = tf.contrib.tpu.RunConfig(
139-
cluster=tpu_cluster_resolver,
155+
master=tpu_grpc_url,
156+
evaluation_master=tpu_grpc_url,
140157
model_dir=FLAGS.model_dir,
141158
session_config=tf.ConfigProto(
142159
allow_soft_placement=True, log_device_placement=True),

official/requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
psutil>=5.4.3
2+
py-cpuinfo>=3.3.0
3+
google-cloud-bigquery>=0.31.0

official/resnet/README.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,4 +51,13 @@ The model will begin training and will automatically evaluate itself on the vali
5151
Note that there are a number of other options you can specify, including `--model_dir` to choose where to store the model and `--resnet_size` to choose the model size (options include ResNet-18 through ResNet-200). See [`resnet.py`](resnet.py) for the full list of options.
5252

5353
### Pre-trained model
54-
You can download a 190 MB pre-trained version of ResNet-50 achieving 75.3% top-1 single-crop accuracy here: [resnet50_2017_11_30.tar.gz](http://download.tensorflow.org/models/official/resnet50_2017_11_30.tar.gz). Simply download and uncompress the file, and point the model to the extracted directory using the `--model_dir` flag.
54+
You can download 190 MB pre-trained versions of ResNet-50 achieving 76.3% and 75.3% (respectively) top-1 single-crop accuracy here: [resnetv2_imagenet_checkpoint.tar.gz](http://download.tensorflow.org/models/official/resnetv2_imagenet_checkpoint.tar.gz), [resnetv1_imagenet_checkpoint.tar.gz](http://download.tensorflow.org/models/official/resnetv1_imagenet_checkpoint.tar.gz). Simply download and uncompress the file, and point the model to the extracted directory using the `--model_dir` flag.
55+
56+
Other versions and formats:
57+
58+
* [ResNet-v2-ImageNet Checkpoint](http://download.tensorflow.org/models/official/resnetv2_imagenet_checkpoint.tar.gz)
59+
* [ResNet-v2-ImageNet SavedModel](http://download.tensorflow.org/models/official/resnetv2_imagenet_savedmodel.tar.gz)
60+
* [ResNet-v2-ImageNet Frozen Graph](http://download.tensorflow.org/models/official/resnetv2_imagenet_frozen_graph.pb)
61+
* [ResNet-v1-ImageNet Checkpoint](http://download.tensorflow.org/models/official/resnetv1_imagenet_checkpoint.tar.gz)
62+
* [ResNet-v1-ImageNet SavedModel](http://download.tensorflow.org/models/official/resnetv1_imagenet_savedmodel.tar.gz)
63+
* [ResNet-v1-ImageNet Frozen Graph](http://download.tensorflow.org/models/official/resnetv1_imagenet_frozen_graph.pb)

official/resnet/cifar10_main.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,10 @@ def main(argv):
228228
flags = parser.parse_args(args=argv[1:])
229229

230230
input_function = flags.use_synthetic_data and get_synth_input_fn() or input_fn
231-
resnet_run_loop.resnet_main(flags, cifar10_model_fn, input_function)
231+
232+
resnet_run_loop.resnet_main(
233+
flags, cifar10_model_fn, input_function,
234+
shape=[_HEIGHT, _WIDTH, _NUM_CHANNELS])
232235

233236

234237
if __name__ == '__main__':

official/resnet/imagenet_main.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,10 @@ def main(argv):
305305
flags = parser.parse_args(args=argv[1:])
306306

307307
input_function = flags.use_synthetic_data and get_synth_input_fn() or input_fn
308-
resnet_run_loop.resnet_main(flags, imagenet_model_fn, input_function)
308+
309+
resnet_run_loop.resnet_main(
310+
flags, imagenet_model_fn, input_function,
311+
shape=[_DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE, _NUM_CHANNELS])
309312

310313

311314
if __name__ == '__main__':

0 commit comments

Comments
 (0)