From 489fd690473f49f7d0dc219b4cd0a5a32a16c44d Mon Sep 17 00:00:00 2001 From: Terry Taewoong Um Date: Mon, 16 May 2016 21:08:35 -0400 Subject: [PATCH] About random batch sampling To be honest, it's hard to understand the role of ```if 0: else:``` statements in the minibatch learning for-loop. I think using only ```mnist.train.next_batch(batch_size)``` can make decent results. I realized that if I use the *Random batch sampling* only, the performance decreases from 91-ish to 87-ish. The reason is that random sampling doesn't exploit the whole set because of duplicated samples. I changed this part by using ```np.random.permutation(n_train)``` to cover all data at each epoch. --- notebooks/logistic_regression_mnist.ipynb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/notebooks/logistic_regression_mnist.ipynb b/notebooks/logistic_regression_mnist.ipynb index 9a326c1..be5b16f 100755 --- a/notebooks/logistic_regression_mnist.ipynb +++ b/notebooks/logistic_regression_mnist.ipynb @@ -155,19 +155,20 @@ "# Launch the graph\n", "with tf.Session() as sess:\n", " sess.run(init)\n", + " n_train = trainimg.shape[0]\n", "\n", " # Training cycle\n", " for epoch in range(training_epochs):\n", " avg_cost = 0.\n", " num_batch = int(mnist.train.num_examples/batch_size)\n", + " randidx = np.random.permutation(n_train)\n", " # Loop over all batches\n", " for i in range(num_batch): \n", " if 0: # Using tensorflow API\n", " batch_xs, batch_ys = mnist.train.next_batch(batch_size)\n", " else: # Random batch sampling \n", - " randidx = np.random.randint(trainimg.shape[0], size=batch_size)\n", - " batch_xs = trainimg[randidx, :]\n", - " batch_ys = trainlabel[randidx, :] \n", + " batch_xs = trainimg[randidx[i*batch_size:(i+1)*batch_size], :]\n", + " batch_ys = trainlabel[randidx[i*batch_size:(i+1)*batch_size], :]\n", " \n", " # Fit training using batch data\n", " sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})\n",