From 3fc62ff4ca44cec9cfb6d44e1dfe5605ca10d85b Mon Sep 17 00:00:00 2001
From: Sungho Park <mrchaos@naver.com>
Date: Thu, 6 Feb 2020 01:27:37 +0900
Subject: [PATCH 01/18] mpl with gpu,minibatch

fix loss NaN
use minibatch
---
 vision/mnist/mlp_gpu_minibatch.jl | 97 +++++++++++++++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 vision/mnist/mlp_gpu_minibatch.jl

diff --git a/vision/mnist/mlp_gpu_minibatch.jl b/vision/mnist/mlp_gpu_minibatch.jl
new file mode 100644
index 000000000..939e51fcb
--- /dev/null
+++ b/vision/mnist/mlp_gpu_minibatch.jl
@@ -0,0 +1,97 @@
+#=
+Julia version: 1.3.1
+Flux version : 0.10.1
+=#
+__precompile__()
+module MNIST_BATCH
+using Flux
+using Flux.Data.MNIST, Statistics
+using Flux: onehotbatch, onecold, crossentropy,throttle
+using Base.Iterators: repeated,partition
+
+using CUDAnative
+using CuArrays
+CuArrays.allowscalar(false)
+
+#= 
+Very important !!
+ϵ is used to prevent loss NaN
+=#
+const ϵ = 1.0f-10
+
+# Load training labels and images from Flux.Data.MNIST
+@info("Loading data...")
+#=
+MNIST.images() : [(28x28),...,(28x28)] 60,000x28x28 training images
+MNIST.labels() : 0 ~ 9 labels , 60,000x10 training labels
+=#
+train_imgs = MNIST.images()
+train_labels = MNIST.labels()
+
+# use 1nd GPU : default
+#CUDAnative.device!(0)
+# use 2nd GPU
+#CUDAnative.device!(1)
+
+# Bundle images together with labels and group into minibatch
+function make_minibatch(imgs,labels,batch_size)
+  #=
+   reshape.(MNIST.images(),:) : [(784,),(784,),...,(784,)]  60,000개의 데이터
+   X : (784x60,000)
+   Y : (10x60,000)
+  =#
+  X = hcat(float.(reshape.(imgs,:))...) |> gpu
+  Y = float.(onehotbatch(labels,0:9)) |> gpu
+  # Y = Float32.(onehotbatch(labels,0:9))
+  
+  data_set = [(X[:,i],Y[:,i]) for i in partition(1:length(labels),batch_size)]
+  return data_set
+end
+
+@info("Making model...")
+# Model
+m = Chain(
+  Dense(28^2,32,relu), # y1 = relu(W1*x + b1), y1 : (32x?), W1 : (32x784), b1 : (32,)
+  Dense(32,10), # y2 = W2*y1 + b2, y2 : (10,?), W2: (10x32), b2:(10,)
+  softmax
+) |> gpu
+loss(x,y) = crossentropy(m(x) .+ ϵ, y .+ ϵ)
+accuracy(x,y) = mean(onecold(m(x)|>cpu) .== onecold(y|>cpu))
+
+batch_size = 500
+train_dataset = make_minibatch(train_imgs,train_labels,batch_size)
+
+opt = ADAM()
+
+
+@info("Training model...")
+
+epochs = 200
+# used for plots
+accs = Array{Float32}(undef,0)
+
+dataset_len = length(train_dataset)
+for i in 1:epochs
+  for (idx,dataset) in enumerate(train_dataset)
+    Flux.train!(loss,params(m),[dataset],opt)
+    # Flux.train!(loss,params(m),[dataset],opt,cb = throttle(()->@show(loss(dataset...)),20))
+    acc = accuracy(dataset...)
+    if idx == dataset_len
+      @info("Epoch# $(i)/$(epochs) - loss: $(loss(dataset...)), accuracy: $(acc)")
+      push!(accs,acc)
+    end
+  end
+end
+
+# Test Accuracy
+tX = hcat(float.(reshape.(MNIST.images(:test),:))...) |> gpu
+tY = float.(onehotbatch(MNIST.labels(:test),0:9)) |> gpu
+
+println("Test loss:", loss(tX,tY))
+println("Test accuracy:", accuracy(tX,tY))
+
+end
+
+using Plots;gr()
+plot(MNIST_BATCH.accs)
+

From 3003415875a5cbccb1c2c062361f5c5b67f8823d Mon Sep 17 00:00:00 2001
From: Sungho Park <mrchaos@naver.com>
Date: Thu, 6 Feb 2020 01:34:36 +0900
Subject: [PATCH 02/18] Update mlp_gpu_minibatch.jl

---
 vision/mnist/mlp_gpu_minibatch.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vision/mnist/mlp_gpu_minibatch.jl b/vision/mnist/mlp_gpu_minibatch.jl
index 939e51fcb..21d2cdb6d 100644
--- a/vision/mnist/mlp_gpu_minibatch.jl
+++ b/vision/mnist/mlp_gpu_minibatch.jl
@@ -74,9 +74,9 @@ dataset_len = length(train_dataset)
 for i in 1:epochs
   for (idx,dataset) in enumerate(train_dataset)
     Flux.train!(loss,params(m),[dataset],opt)
-    # Flux.train!(loss,params(m),[dataset],opt,cb = throttle(()->@show(loss(dataset...)),20))
-    acc = accuracy(dataset...)
+    # Flux.train!(loss,params(m),[dataset],opt,cb = throttle(()->@show(loss(dataset...)),20))    
     if idx == dataset_len
+      acc = accuracy(dataset...)
       @info("Epoch# $(i)/$(epochs) - loss: $(loss(dataset...)), accuracy: $(acc)")
       push!(accs,acc)
     end

From d1114bca54ccefd5a4549bc601a3c3eeb50c5de6 Mon Sep 17 00:00:00 2001
From: Sungho Park <mrchaos@naver.com>
Date: Fri, 7 Feb 2020 03:35:37 +0900
Subject: [PATCH 03/18] cifar10 gpu,minibatch version

minibatch for gpu out of memory
fix loss NaN
add etc...
---
 vision/cifar10/cifar10_gpu_minibatch.jl | 210 ++++++++++++++++++++++++
 1 file changed, 210 insertions(+)
 create mode 100644 vision/cifar10/cifar10_gpu_minibatch.jl

diff --git a/vision/cifar10/cifar10_gpu_minibatch.jl b/vision/cifar10/cifar10_gpu_minibatch.jl
new file mode 100644
index 000000000..4d3177a6c
--- /dev/null
+++ b/vision/cifar10/cifar10_gpu_minibatch.jl
@@ -0,0 +1,210 @@
+#=
+ cifar10 dataset spec
+ - 60,000 images of 32x32 size 
+ - train images : 50,000
+ - test images : 10,000
+ - classify item : 10
+ - each class have 6,000 images and 5,000 train images, 1,000 test images
+ 
+ Data format:
+ WHCN order : (width, height, #channels, #batches)
+ ex) A single 100x100 RGB image data format : 100x100x3x1
+ =#
+
+# Julia version : 1.3.1
+# Flux version : v0.10.1
+
+__precompile__()
+module _CIFAR10
+using Flux, Metalhead, Statistics
+using Flux: onehotbatch, onecold, crossentropy, throttle
+using Metalhead: trainimgs
+using Images: channelview
+using Statistics: mean
+using Base.Iterators: partition
+using CUDAnative
+using CuArrays
+CuArrays.allowscalar(false)
+
+using BSON: @save
+using Logging
+using Dates
+
+const model_file = "./cifar10_vgg16_model.bson"
+const log_file ="./cifar10_vgg16.log"
+
+# Very important : this prevent loss NaN
+const ϵ = 1.0f-10
+
+# use 1nd GPU : default
+#CUDAnative.device!(0)
+# use 2nd GPU
+#CUDAnative.device!(1)
+
+log = open(log_file, "w+")
+global_logger(ConsoleLogger(log))
+
+@info "Start - $(now())"
+@info "Config VGG16, VGG19 models ..."
+flush(log)
+# VGG16 and VGG19 models
+
+vgg16() = Chain(
+  Conv((3, 3), 3 => 64, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(64),
+  Conv((3, 3), 64 => 64, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(64),
+  MaxPool((2,2)),
+  Conv((3, 3), 64 => 128, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(128),
+  Conv((3, 3), 128 => 128, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(128),
+  MaxPool((2,2)),
+  Conv((3, 3), 128 => 256, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(256),
+  Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(256),
+  Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(256),
+  MaxPool((2,2)),
+  Conv((3, 3), 256 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  MaxPool((2,2)),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  MaxPool((2,2)),
+  x -> reshape(x, :, size(x, 4)),
+  Dense(512, 4096, relu),
+  Dropout(0.5),
+  Dense(4096, 4096, relu),
+  Dropout(0.5),
+  Dense(4096, 10),
+  softmax) |> gpu
+
+vgg19() = Chain(
+  Conv((3, 3), 3 => 64, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(64),
+  Conv((3, 3), 64 => 64, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(64),
+  MaxPool((2,2)),
+  Conv((3, 3), 64 => 128, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(128),
+  Conv((3, 3), 128 => 128, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(128),
+  MaxPool((2,2)),
+  Conv((3, 3), 128 => 256, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(256),
+  Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(256),
+  Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(256),
+  Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
+  MaxPool((2,2)),
+  Conv((3, 3), 256 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  MaxPool((2,2)),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  MaxPool((2,2)),
+  x -> reshape(x, :, size(x, 4)),
+  Dense(512, 4096, relu),
+  Dropout(0.5),
+  Dense(4096, 4096, relu),
+  Dropout(0.5),
+  Dense(4096, 10),
+  softmax) |> gpu
+
+# Function to convert the RGB image to Float32 Arrays
+getarray(X) = Float32.(permutedims(channelview(X), (2, 3, 1)))
+
+
+@info "Data download and preparing ..."
+
+function make_minibatch(imgs,labels,batch_size)
+  data_set = [(cat(imgs[i]..., dims = 4) |> gpu, 
+          labels[:,i]) |> gpu 
+          for i in partition(1:length(imgs), batch_size)]
+  return data_set
+end
+
+epochs = 40
+batch_size = 100
+
+X = trainimgs(CIFAR10)
+
+train_idxs = 1:49000
+train_imgs = [getarray(X[i].img) for i in train_idxs]
+train_labels = float.(onehotbatch([X[i].ground_truth.class for i in train_idxs],1:10))
+train_dataset = make_minibatch(train_imgs,train_labels,batch_size)
+
+valid_idxs = 49001:50000
+valX = cat([getarray(X[i].img) for i in valid_idxs]..., dims = 4) |> gpu
+valY = float.(onehotbatch([X[i].ground_truth.class for i in valid_idxs],1:10)) |> gpu
+
+# Defining the loss and accuracy functions
+
+@info "VGG16 models instantiation ..."
+m = vgg16()
+
+loss(x, y) = crossentropy(m(x) .+ ϵ, y .+ ϵ)
+
+accuracy(x, y) = mean(onecold(m(x)|>cpu, 1:10) .== onecold(y|>cpu, 1:10))
+
+# Defining the callback and the optimizer
+
+evalcb = throttle(() -> @info(accuracy(valX, valY)), 10)
+
+opt = ADAM()
+
+@info "Training model..."
+
+
+@time begin
+dataset_len = length(train_dataset)
+for i in 1:epochs
+  for (idx,dataset) in enumerate(train_dataset)
+    Flux.train!(loss,params(m),[dataset],opt)
+    #Flux.train!(loss,params(m),[dataset],opt,cb = evalcb)    
+    acc = accuracy(valX,valY)
+    @info "Epoch# $(i)/$(epochs) - #$(idx)/$(dataset_len) loss: $(loss(dataset...)), accuracy: $(acc)"
+    flush(log)
+  end
+  @save model_file m
+end
+end # end of @time
+
+# Fetch the test data from Metalhead and get it into proper shape.
+# CIFAR-10 does not specify a validation set so valimgs fetch the testdata instead of testimgs
+tX = valimgs(CIFAR10)
+test_idxs = 1:10000
+test_imgs = [getarray(tX[i].img) for i in test_idxs]
+test_labels = float.(onehotbatch([tX[i].ground_truth.class for i in test_idxs], 1:10))
+test_dataset = make_minibatch(test_imgs,test_labels,batch_size)
+
+dataset_len = length(test_dataset)
+for (idx,dataset) in enumerate(test_dataset)
+  acc = accuracy(dataset...)
+end
+
+@info "Test accuracy : $(mean(test_accs))"
+@info "End - $(now())"
+close(log)
+end

From 0943da3739ab98d928ecfa36e26e88f1ca2d6027 Mon Sep 17 00:00:00 2001
From: Sungho Park <mrchaos@naver.com>
Date: Fri, 7 Feb 2020 21:47:05 +0900
Subject: [PATCH 04/18] update cifar10

---
 vision/cifar10/cifar10_gpu_minibatch.jl | 321 +++++++++++++++---------
 1 file changed, 201 insertions(+), 120 deletions(-)

diff --git a/vision/cifar10/cifar10_gpu_minibatch.jl b/vision/cifar10/cifar10_gpu_minibatch.jl
index 4d3177a6c..65e7c7f58 100644
--- a/vision/cifar10/cifar10_gpu_minibatch.jl
+++ b/vision/cifar10/cifar10_gpu_minibatch.jl
@@ -13,143 +13,219 @@
 
 # Julia version : 1.3.1
 # Flux version : v0.10.1
-
-__precompile__()
-module _CIFAR10
+__precompile__(true)
+using Random
+using BSON
+using BSON: @save,@load
+using Logging
+using Dates
+using NNlib 
+using CuArrays
+using CUDAdrv
+using CUDAnative: device!
 using Flux, Metalhead, Statistics
 using Flux: onehotbatch, onecold, crossentropy, throttle
 using Metalhead: trainimgs
 using Images: channelview
 using Statistics: mean
 using Base.Iterators: partition
-using CUDAnative
-using CuArrays
-CuArrays.allowscalar(false)
+using ArgParse
+#=
+Argument parsing 
+=#
+
+function parse_commandline()
+    s = ArgParseSettings()
+    @add_arg_table s begin
+        "--epoch","-e"
+            help = "epoch number, default=30"
+            arg_type = Int
+            default = 30
+        "--batch", "-b"
+            help = "mini-batch size, default=200"
+            arg_type = Int
+            default = 100
+        "--gpu", "-g"
+            help = "gpu index to use , 0,1,2,3,.., default=0"
+            arg_type = Int
+            default = 0
+        "--model", "-m"
+            help = "use saved model file"
+            arg_type = Bool
+            default = true
+        "--log","-l"
+            help = "create log file"
+            arg_type = Bool
+            default = true            
+    end
+
+    return parse_args(s)
+end
+parsed_args = parse_commandline()
+
+epochs = parsed_args["epoch"]
+batch_size = parsed_args["batch"]
+use_saved_model = parsed_args["model"]
+gpu_device = parsed_args["gpu"]
+create_log_file = parsed_args["log"]
+
+if create_log_file
+  log_file ="./cifar10_vgg16_$(Dates.format(now(),"yyyymmdd-HHMMSS")).log"
+  log = open(log_file, "w+")
+else
+  log = stdout
+end
+global_logger(ConsoleLogger(log))
+
+@info "Start - $(now())";flush(log)
 
-using BSON: @save
-using Logging
-using Dates
+
+@info "=============== Arguments ==============="
+@info "epochs=$(epochs)"
+@info "batch_size=$(batch_size)"
+@info "use_saved_model=$(use_saved_model)"
+@info "gpu_device=$(gpu_device)"
+@info "=========================================";flush(log)
 
 const model_file = "./cifar10_vgg16_model.bson"
-const log_file ="./cifar10_vgg16.log"
 
 # Very important : this prevent loss NaN
 const ϵ = 1.0f-10
 
+
 # use 1nd GPU : default
 #CUDAnative.device!(0)
-# use 2nd GPU
-#CUDAnative.device!(1)
+device!(gpu_device)
+CuArrays.allowscalar(false)
 
-log = open(log_file, "w+")
-global_logger(ConsoleLogger(log))
+@info "Config VGG16, VGG19 models ...";flush(log)
 
-@info "Start - $(now())"
-@info "Config VGG16, VGG19 models ..."
-flush(log)
-# VGG16 and VGG19 models
-
-vgg16() = Chain(
-  Conv((3, 3), 3 => 64, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(64),
-  Conv((3, 3), 64 => 64, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(64),
-  MaxPool((2,2)),
-  Conv((3, 3), 64 => 128, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(128),
-  Conv((3, 3), 128 => 128, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(128),
-  MaxPool((2,2)),
-  Conv((3, 3), 128 => 256, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(256),
-  Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(256),
-  Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(256),
-  MaxPool((2,2)),
-  Conv((3, 3), 256 => 512, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(512),
-  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(512),
-  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(512),
-  MaxPool((2,2)),
-  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(512),
-  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(512),
-  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(512),
-  MaxPool((2,2)),
-  x -> reshape(x, :, size(x, 4)),
-  Dense(512, 4096, relu),
-  Dropout(0.5),
-  Dense(4096, 4096, relu),
-  Dropout(0.5),
-  Dense(4096, 10),
-  softmax) |> gpu
-
-vgg19() = Chain(
-  Conv((3, 3), 3 => 64, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(64),
-  Conv((3, 3), 64 => 64, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(64),
-  MaxPool((2,2)),
-  Conv((3, 3), 64 => 128, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(128),
-  Conv((3, 3), 128 => 128, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(128),
-  MaxPool((2,2)),
-  Conv((3, 3), 128 => 256, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(256),
-  Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(256),
-  Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(256),
-  Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
-  MaxPool((2,2)),
-  Conv((3, 3), 256 => 512, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(512),
-  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(512),
-  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(512),
-  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-  MaxPool((2,2)),
-  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(512),
-  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(512),
-  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-  BatchNorm(512),
-  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-  MaxPool((2,2)),
-  x -> reshape(x, :, size(x, 4)),
-  Dense(512, 4096, relu),
-  Dropout(0.5),
-  Dense(4096, 4096, relu),
-  Dropout(0.5),
-  Dense(4096, 10),
-  softmax) |> gpu
-
-# Function to convert the RGB image to Float32 Arrays
-getarray(X) = Float32.(permutedims(channelview(X), (2, 3, 1)))
+if use_saved_model && isfile(model_file)
+  # flush : 버퍼링 없이 즉각 log를 파일 또는 console에 write하도록 함
+  @info "Load saved model $(model_file) ...";flush(log)
+  # model : @save시 사용한 object명
+  @load model_file model
+  m = model |> gpu
+else
+  @info "Create new model ...";flush(log)
+  # VGG16 and VGG19 models
+  vgg16() = Chain(
+    Conv((3, 3), 3 => 64, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(64),
+    Conv((3, 3), 64 => 64, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(64),
+    MaxPool((2,2)),
+    Conv((3, 3), 64 => 128, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(128),
+    Conv((3, 3), 128 => 128, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(128),
+    MaxPool((2,2)),
+    Conv((3, 3), 128 => 256, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(256),
+    Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(256),
+    Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(256),
+    MaxPool((2,2)),
+    Conv((3, 3), 256 => 512, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(512),
+    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(512),
+    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(512),
+    MaxPool((2,2)),
+    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(512),
+    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(512),
+    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(512),
+    MaxPool((2,2)),
+    x -> reshape(x, :, size(x, 4)),
+    Dense(512, 4096, relu),
+    Dropout(0.5),
+    Dense(4096, 4096, relu),
+    Dropout(0.5),
+    Dense(4096, 10),
+    softmax)
 
+  vgg19() = Chain(
+    Conv((3, 3), 3 => 64, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(64),
+    Conv((3, 3), 64 => 64, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(64),
+    MaxPool((2,2)),
+    Conv((3, 3), 64 => 128, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(128),
+    Conv((3, 3), 128 => 128, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(128),
+    MaxPool((2,2)),
+    Conv((3, 3), 128 => 256, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(256),
+    Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(256),
+    Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(256),
+    Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
+    MaxPool((2,2)),
+    Conv((3, 3), 256 => 512, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(512),
+    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(512),
+    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(512),
+    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+    MaxPool((2,2)),
+    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(512),
+    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(512),
+    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+    BatchNorm(512),
+    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+    MaxPool((2,2)),
+    x -> reshape(x, :, size(x, 4)),
+    Dense(512, 4096, relu),
+    Dropout(0.5),
+    Dense(4096, 4096, relu),
+    Dropout(0.5),
+    Dense(4096, 10),
+    softmax)
+    
+  m = vgg16() |> gpu    
+end
+# 
+# Function to convert the RGB image to Float64 Arrays
+#=
+1)channelview로 이미지의 color를 channel별로 분리한다.
+- 분리된 channel은 맨앞에 새로운 차원을 추가 하여 channel을 분리한다.
+- 예) 32x32 이미지의 채널을 분리하면 3x32x32로 3개의 채널이 추가 된다
+2)permutedims로 분리된 채널을 뒤로 보낸다.
+- Flux에서 사용되는 이미지 포맷은 WHCN-width,height,#channel,#batches 이다
+- 채널분리된 이미지가 3x32x32인 경우 permutedims(img,(2,3,1))을 적용하면
+- 32x32x3으로 width,height,#channel 순으로 바뀐다.
+=#
+getarray(X) = Float32.(permutedims(channelview(X), (2, 3, 1)))
 
-@info "Data download and preparing ..."
 
+@info "Data download and preparing ...";flush(log)
 function make_minibatch(imgs,labels,batch_size)
   data_set = [(cat(imgs[i]..., dims = 4) |> gpu, 
           labels[:,i]) |> gpu 
           for i in partition(1:length(imgs), batch_size)]
   return data_set
 end
-
-epochs = 40
-batch_size = 100
+# Fetching the train and validation data and getting them into proper shape
+#=
+trainimgs(모듈명) : 
+ - 모듈명이 들어 가면 모듈명에 관련된 train용 데이터를 다운받아 리턴한다.
+ - ex) trainimgs(CIFAR10) : 50,000개의 train data가 return 된다.
+X 
+=#
 
 X = trainimgs(CIFAR10)
-
+# Training용 데이터 준비
+# 이미지 채널 분리 및 재배열, training용으로 60,000개중 50,000개를 사용한다.
 train_idxs = 1:49000
 train_imgs = [getarray(X[i].img) for i in train_idxs]
 train_labels = float.(onehotbatch([X[i].ground_truth.class for i in train_idxs],1:10))
@@ -161,8 +237,7 @@ valY = float.(onehotbatch([X[i].ground_truth.class for i in valid_idxs],1:10)) |
 
 # Defining the loss and accuracy functions
 
-@info "VGG16 models instantiation ..."
-m = vgg16()
+@info "VGG16 models instantiation ...";flush(log)
 
 loss(x, y) = crossentropy(m(x) .+ ϵ, y .+ ϵ)
 
@@ -174,23 +249,28 @@ evalcb = throttle(() -> @info(accuracy(valX, valY)), 10)
 
 opt = ADAM()
 
-@info "Training model..."
-
+@info "Training model...";flush(log)
 
+# used for plots
+# accs = Array{Float32}(undef,0)
 @time begin
 dataset_len = length(train_dataset)
+shuffle_idxs = collect(1:dataset_len)
+shuffle!(shuffle_idxs)
 for i in 1:epochs
-  for (idx,dataset) in enumerate(train_dataset)
+  for (idx,data_idx) in enumerate(shuffle_idxs)
+    dataset = train_dataset[data_idx]
     Flux.train!(loss,params(m),[dataset],opt)
     #Flux.train!(loss,params(m),[dataset],opt,cb = evalcb)    
     acc = accuracy(valX,valY)
-    @info "Epoch# $(i)/$(epochs) - #$(idx)/$(dataset_len) loss: $(loss(dataset...)), accuracy: $(acc)"
-    flush(log)
+    @info "Epoch# $(i)/$(epochs) - #$(idx)/$(dataset_len) loss: $(loss(dataset...)), accuracy: $(acc)";flush(log)
+    # push!(accs,acc)
   end
-  @save model_file m
+  model = m |> cpu  
+  # @load 시 여기에서 사용한 "model" 로 로딩 해야 함
+  @save model_file model
 end
 end # end of @time
-
 # Fetch the test data from Metalhead and get it into proper shape.
 # CIFAR-10 does not specify a validation set so valimgs fetch the testdata instead of testimgs
 tX = valimgs(CIFAR10)
@@ -199,12 +279,13 @@ test_imgs = [getarray(tX[i].img) for i in test_idxs]
 test_labels = float.(onehotbatch([tX[i].ground_truth.class for i in test_idxs], 1:10))
 test_dataset = make_minibatch(test_imgs,test_labels,batch_size)
 
+test_accs = Array{Float32}(undef,0)
 dataset_len = length(test_dataset)
 for (idx,dataset) in enumerate(test_dataset)
   acc = accuracy(dataset...)
+  push!(test_accs,acc)
 end
-
 @info "Test accuracy : $(mean(test_accs))"
 @info "End - $(now())"
 close(log)
-end
+

From db9d29890801c4c851f5699d121fa6ad267139b6 Mon Sep 17 00:00:00 2001
From: Sungho Park <mrchaos@naver.com>
Date: Thu, 13 Feb 2020 13:38:08 +0900
Subject: [PATCH 05/18] conv.jl fix version conv_gpu_minibatch.jl upload, add
 parser

---
 vision/mnist/cmd_parser.jl         |  32 +++++
 vision/mnist/conv_gpu_minibatch.jl | 212 +++++++++++++++++++++++++++++
 2 files changed, 244 insertions(+)
 create mode 100644 vision/mnist/cmd_parser.jl
 create mode 100644 vision/mnist/conv_gpu_minibatch.jl

diff --git a/vision/mnist/cmd_parser.jl b/vision/mnist/cmd_parser.jl
new file mode 100644
index 000000000..9a8b4e28a
--- /dev/null
+++ b/vision/mnist/cmd_parser.jl
@@ -0,0 +1,32 @@
+module CmdParser
+export parse_commandline
+using ArgParse
+
+# Argument parsing
+function parse_commandline()
+    s = ArgParseSettings()
+    @add_arg_table s begin
+        "--epochs","-e"
+            help = "epoch number, default=100"
+            arg_type = Int
+            default = 100
+        "--batch","-b"
+            help = "mini-batch size, default=128"
+            arg_type = Int
+            default = 128
+        "--gpu","-g"
+            help = "gpu index to use, 0,1,2,3,..., default=0"
+            arg_type = Int
+            default = 0
+        "--model","-m"
+            help = "use saved model file, default=true"
+            arg_type = Bool
+            default = true
+        "--log","-l"
+            help = "create log file, default=true"
+            arg_type = Bool
+            default = false
+    end
+    return parse_args(s)
+end
+end
\ No newline at end of file
diff --git a/vision/mnist/conv_gpu_minibatch.jl b/vision/mnist/conv_gpu_minibatch.jl
new file mode 100644
index 000000000..e7165f93b
--- /dev/null
+++ b/vision/mnist/conv_gpu_minibatch.jl
@@ -0,0 +1,212 @@
+#=
+Test Environment
+ - Julia : v1.3.1
+ - Flux  : v0.10.1
+ Usage:
+ - julia conv_gpu_minibatch.jl  --help
+ - ex) julia conv_gpu_minibatch.jl -e 100 -b 1000 -g 0 -l false
+ -     epochs : 100, batch size: 1000, gpu device index : 0 , log file : false
+=#
+
+# Classifies MNIST digits with a convolution network.
+# Writes out saved model to the file "mnist_conv.bson".
+# Demonstrates basic model construction, training, saving,
+# conditional early-exits, and learning rate scheduling.
+#
+# This model, while simple, should hit around 99% test
+# accuracy after training for approximately 20 epochs.
+
+using Flux, Flux.Data.MNIST, Statistics
+using Flux: onehotbatch, onecold, crossentropy, throttle
+using Base.Iterators: repeated, partition
+using Printf, BSON
+using Logging
+using Dates
+using CUDAnative: device!
+using CuArrays
+using Random
+using Dates
+
+working_path = dirname(@__FILE__)
+file_path(file_name) = joinpath(working_path,file_name)
+include(file_path("cmd_parser.jl"))
+
+model_file = file_path("conv_gpu_minibatch.bson")
+
+# Get arguments
+parsed_args = CmdParser.parse_commandline()
+
+epochs = parsed_args["epochs"]
+batch_size = parsed_args["batch"]
+use_saved_model = parsed_args["model"]
+gpu_device = parsed_args["gpu"]
+create_log_file = parsed_args["log"]
+
+if create_log_file
+    log_file = file_path("conv_gpu_minibatch_$(Dates.format(now(),"yyyymmdd-HHMMSS")).log")
+    log = open(log_file,"w+")
+else
+    log = stdout
+end
+global_logger(ConsoleLogger(log))
+
+@info "Start - $(now())";flush(log)
+
+@info "============= Arguments ============="
+@info "epochs=$(epochs)"
+@info "batch_size=$(batch_size)"
+@info "use_saved_model=$(use_saved_model)"
+@info "gpu_device=$(gpu_device)"
+@info "create_log_file=$(create_log_file)"
+@info "=====================================";flush(log)
+
+# set using GPU device
+device!(gpu_device)
+CuArrays.allowscalar(false)
+
+# Load labels and images from Flux.Data.MNIST
+@info "Loading data set";flush(log)
+
+# Bundle images together with labels and groups into minibatch
+function make_minibatch(imgs,labels,batch_size)
+    # WHCN: width x height x #channel x #batch
+    # transform (28x28) to (28x28x1x#bacth)
+    len = length(imgs)
+    sz = size(imgs[1])
+    data_set = 
+    [(cat([reshape(Float32.(imgs[i]),sz...,1,1) for i in idx]...,dims=4),
+      float.(onehotbatch(labels[idx],0:9)) ) for idx in partition(1:len,batch_size) ]
+    return data_set
+end
+
+# Train data load
+# 60,000 labels
+train_labels = MNIST.labels()
+# 60,000 images : ((28x28),...,(28x28))
+train_imgs = MNIST.images()
+# Make train data to minibatch
+train_set = make_minibatch(train_imgs,train_labels,batch_size)
+
+# Test data load
+test_labels = MNIST.labels(:test)
+test_imgs = MNIST.images(:test)
+test_set = make_minibatch(test_imgs,test_labels,batch_size)
+
+#=
+ Define our model. We will use a simple convolutional architecture with
+ three iterations of Conv -> ReLu -> MaxPool, followed by a final Dense
+ layer that feeds into a softmax probability output.
+=#
+@info "Construncting model...";flush(log)
+model = Chain(
+  # First convolution, operating upon a 28x28 image
+  Conv((3,3), 1=>16, pad=(1,1), relu),
+  MaxPool((2,2)),
+
+  # Second convolution, operating upon a 14x14 image
+  Conv((3,3), 16=>32, pad=(1,1), relu),
+  MaxPool((2,2)),
+  
+  # Third convolution, operating upon a 7x7 image
+  Conv((3,3), 32=>32, pad=(1,1), relu),
+  MaxPool((2,2)),
+  
+  # Reshape 3d tensor into a 2d one, at this point it should be (3,3,32,N)
+  # which is where we get the 288 in the `Dense` layer below:
+  x -> reshape(x, :, size(x,4)),
+  Dense(288,10),
+  
+  # Finally, softmax to get nice probabilities
+  softmax,
+)
+
+m = model |> gpu
+
+#= 
+`loss()` calculates the crossentropy loss between our prediction `y_hat`
+ (calculated from `m(x)`) and the ground truth `y`. We augment the data
+ a bit, adding gaussian random noise to our image to make it more robust.
+ =#
+function loss(x,y)
+  # We augment `x` a little bit here, adding in random noise
+  # x : (28,28,1,batch_size)
+  # y : (10,batch_size)
+  # aug = 0.1f0*randn(eltype(x),size(x)) |> gpu
+  # ŷ = m(x .+ aug)
+  ŷ = m(x)
+ return crossentropy(ŷ,y)
+end
+# Make sure our model is nicely precompiled befor starting our training loop
+
+function accuracy(data_set) 
+  l = length(data_set)*batch_size
+  s = 0f0
+  for (x,y) in data_set
+    s += sum((onecold(m(x|>gpu) |> cpu) .== onecold(y|>cpu)))
+  end
+  return s/l
+end
+
+# Make sure our is nicely precompiled befor starting our training loop
+# train_set[1][1] : (28,28,1,batch_size)
+m(train_set[1][1] |> gpu)
+
+# Train our model with the given training set using the ADAM optimizer and
+# printing out performance aganin the test set as we go.
+opt = ADAM(0.001)
+
+@info "Beginning training loop...";flush(log)
+best_acc = 0.0
+last_improvement = 0
+
+@time begin
+for epoch_idx in 1:epochs
+  global best_acc, last_improvement  
+  suffle_idxs = collect(1:length(train_set))
+  shuffle!(suffle_idxs)
+  for idx in suffle_idxs
+    (x,y) = train_set[idx]
+    # We augment `x` a little bit here, adding in random noise
+    x = (x .+ 0.1f0*randn(eltype(x),size(x))) |> gpu
+    y = y|> gpu
+    Flux.train!(loss, params(m), [(x, y)],opt)
+  end
+  acc = accuracy(test_set)
+  @info(@sprintf("[%d]: Test accuracy: %.4f",epoch_idx,acc));flush(log)
+
+  # If our accuracy is good enough, quit out.
+  if acc >= 0.999
+    @info " -> Early-exiting: We reached our target accuracy of 99.9%";flush(log)
+    break
+  end
+
+  # If this is the best accuracy we've seen so far, save the model out
+  if acc >= best_acc
+    @info " -> New best accuracy! saving model out to $(model_file)"; flush(log)
+    model = m |> cpu
+    acc = acc |> cpu
+    BSON.@save model_file model epoch_idx acc
+    best_acc = acc
+    last_improvement = epoch_idx
+  end
+
+  #If we haven't seen improvement in 5 epochs, drop out learing rate:
+  if epoch_idx - last_improvement >= 5 && opt.eta > 1e-6
+    opt.eta /= 10.0
+    @warn " -> Haven't improved in a while, dropping learning rate to $(opt.eta)!"; flush(log)
+
+    # After dropping learing rate, give it a few epochs to improve
+    last_improvement = epoch_idx
+  end
+
+  if epoch_idx - last_improvement >= 10
+    @warn " -> We're calling this converged.";flush(log)
+    break
+  end  
+end # for
+end # @time
+@info "End - $(now())"
+if create_log_file
+  close(log)
+end
+

From c8b3633f1bbbbc28625a26ecc81d4e6bdfca1cc3 Mon Sep 17 00:00:00 2001
From: Sungho Park <mrchaos@naver.com>
Date: Thu, 13 Feb 2020 14:15:50 +0900
Subject: [PATCH 06/18] Update conv_gpu_minibatch.jl

---
 vision/mnist/conv_gpu_minibatch.jl | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/vision/mnist/conv_gpu_minibatch.jl b/vision/mnist/conv_gpu_minibatch.jl
index e7165f93b..1292a2166 100644
--- a/vision/mnist/conv_gpu_minibatch.jl
+++ b/vision/mnist/conv_gpu_minibatch.jl
@@ -128,12 +128,7 @@ m = model |> gpu
  a bit, adding gaussian random noise to our image to make it more robust.
  =#
 function loss(x,y)
-  # We augment `x` a little bit here, adding in random noise
-  # x : (28,28,1,batch_size)
-  # y : (10,batch_size)
-  # aug = 0.1f0*randn(eltype(x),size(x)) |> gpu
-  # ŷ = m(x .+ aug)
-  ŷ = m(x)
+ ŷ = m(x)
  return crossentropy(ŷ,y)
 end
 # Make sure our model is nicely precompiled befor starting our training loop

From 9cd0c986b668953241f02090c668d91fd958e5ce Mon Sep 17 00:00:00 2001
From: Sungho Park <mrchaos@naver.com>
Date: Thu, 13 Feb 2020 21:05:52 +0900
Subject: [PATCH 07/18] update

---
 vision/cifar10/cifar10_gpu_minibatch.jl | 188 +++++++++++++-----------
 1 file changed, 105 insertions(+), 83 deletions(-)

diff --git a/vision/cifar10/cifar10_gpu_minibatch.jl b/vision/cifar10/cifar10_gpu_minibatch.jl
index 65e7c7f58..a85c7e3f0 100644
--- a/vision/cifar10/cifar10_gpu_minibatch.jl
+++ b/vision/cifar10/cifar10_gpu_minibatch.jl
@@ -29,46 +29,23 @@ using Metalhead: trainimgs
 using Images: channelview
 using Statistics: mean
 using Base.Iterators: partition
-using ArgParse
-#=
-Argument parsing 
-=#
 
-function parse_commandline()
-    s = ArgParseSettings()
-    @add_arg_table s begin
-        "--epoch","-e"
-            help = "epoch number, default=30"
-            arg_type = Int
-            default = 30
-        "--batch", "-b"
-            help = "mini-batch size, default=200"
-            arg_type = Int
-            default = 100
-        "--gpu", "-g"
-            help = "gpu index to use , 0,1,2,3,.., default=0"
-            arg_type = Int
-            default = 0
-        "--model", "-m"
-            help = "use saved model file"
-            arg_type = Bool
-            default = true
-        "--log","-l"
-            help = "create log file"
-            arg_type = Bool
-            default = true            
-    end
+working_path = dirname(@__FILE__)
+file_path(file_name) = joinpath(working_path,file_name)
+include(file_path("cmd_parser.jl"))
 
-    return parse_args(s)
-end
-parsed_args = parse_commandline()
+model_file = file_path("cifar10_vgg16_model.bson")
+
+# Get arguments
+parsed_args = CmdParser.parse_commandline()
 
-epochs = parsed_args["epoch"]
+epochs = parsed_args["epochs"]
 batch_size = parsed_args["batch"]
 use_saved_model = parsed_args["model"]
 gpu_device = parsed_args["gpu"]
 create_log_file = parsed_args["log"]
 
+
 if create_log_file
   log_file ="./cifar10_vgg16_$(Dates.format(now(),"yyyymmdd-HHMMSS")).log"
   log = open(log_file, "w+")
@@ -79,7 +56,6 @@ global_logger(ConsoleLogger(log))
 
 @info "Start - $(now())";flush(log)
 
-
 @info "=============== Arguments ==============="
 @info "epochs=$(epochs)"
 @info "batch_size=$(batch_size)"
@@ -87,10 +63,8 @@ global_logger(ConsoleLogger(log))
 @info "gpu_device=$(gpu_device)"
 @info "=========================================";flush(log)
 
-const model_file = "./cifar10_vgg16_model.bson"
-
 # Very important : this prevent loss NaN
-const ϵ = 1.0f-10
+ϵ = 1.0f-10
 
 
 # use 1nd GPU : default
@@ -100,12 +74,14 @@ CuArrays.allowscalar(false)
 
 @info "Config VGG16, VGG19 models ...";flush(log)
 
-if use_saved_model && isfile(model_file)
+acc = 0; epoch = 0
+if use_saved_model && isfile(model_file) && filesize(model_file) > 0
   # flush : 버퍼링 없이 즉각 log를 파일 또는 console에 write하도록 함
   @info "Load saved model $(model_file) ...";flush(log)
   # model : @save시 사용한 object명
-  @load model_file model
+  @load model_file model acc epoch  
   m = model |> gpu
+  @info " -> accuracy : $(acc), epochs : $(epoch)";flush(log)
 else
   @info "Create new model ...";flush(log)
   # VGG16 and VGG19 models
@@ -210,12 +186,12 @@ getarray(X) = Float32.(permutedims(channelview(X), (2, 3, 1)))
 
 @info "Data download and preparing ...";flush(log)
 function make_minibatch(imgs,labels,batch_size)
-  data_set = [(cat(imgs[i]..., dims = 4) |> gpu, 
-          labels[:,i]) |> gpu 
+  data_set = [(cat(imgs[i]..., dims = 4), 
+          labels[:,i]) 
           for i in partition(1:length(imgs), batch_size)]
   return data_set
 end
-# Fetching the train and validation data and getting them into proper shape
+# Fetching the train and verify data and getting them into proper shape
 #=
 trainimgs(모듈명) : 
  - 모듈명이 들어 가면 모듈명에 관련된 train용 데이터를 다운받아 리턴한다.
@@ -229,63 +205,109 @@ X = trainimgs(CIFAR10)
 train_idxs = 1:49000
 train_imgs = [getarray(X[i].img) for i in train_idxs]
 train_labels = float.(onehotbatch([X[i].ground_truth.class for i in train_idxs],1:10))
-train_dataset = make_minibatch(train_imgs,train_labels,batch_size)
+train_set = make_minibatch(train_imgs,train_labels,batch_size)
 
-valid_idxs = 49001:50000
-valX = cat([getarray(X[i].img) for i in valid_idxs]..., dims = 4) |> gpu
-valY = float.(onehotbatch([X[i].ground_truth.class for i in valid_idxs],1:10)) |> gpu
+verify_idxs = 49001:50000
+verify_imgs = cat([getarray(X[i].img) for i in verify_idxs]..., dims = 4)
+verify_labels = float.(onehotbatch([X[i].ground_truth.class for i in verify_idxs],1:10))
+verify_set = [(verify_imgs,verify_labels)]
 
+# Fetch the test data from Metalhead and get it into proper shape.
+# CIFAR-10 does not specify a verify set so valimgs fetch the testdata instead of testimgs
+tX = valimgs(CIFAR10)
+test_idxs = 1:10000
+test_imgs = [getarray(tX[i].img) for i in test_idxs]
+test_labels = float.(onehotbatch([tX[i].ground_truth.class for i in test_idxs], 1:10))
+test_set = make_minibatch(test_imgs,test_labels,batch_size)
 # Defining the loss and accuracy functions
 
 @info "VGG16 models instantiation ...";flush(log)
 
 loss(x, y) = crossentropy(m(x) .+ ϵ, y .+ ϵ)
 
-accuracy(x, y) = mean(onecold(m(x)|>cpu, 1:10) .== onecold(y|>cpu, 1:10))
-
-# Defining the callback and the optimizer
+# accuracy(x, y) = mean(onecold(m(x)|>cpu, 1:10) .== onecold(y|>cpu, 1:10))
+function accuracy(data_set) 
+  batch_size = size(data_set[1][1])[end]
+  l = length(data_set)*batch_size
+  s = 0f0
+  for (x,y) in data_set
+    s += sum((onecold(m(x|>gpu) |> cpu) .== onecold(y|>cpu)))
+  end
+  return s/l
+end
 
-evalcb = throttle(() -> @info(accuracy(valX, valY)), 10)
+# Make sure our is nicely precompiled befor starting our training loop
+# train_set[1][1] : (28,28,1,batch_size)
+@info "Model pre-compile...";flush(log)
+m(train_set[1][1] |> gpu)
 
-opt = ADAM()
+# Defining the callback and the optimizer
+# evalcb = throttle(() -> @info(accuracy(verify_set)), 10)
+opt = ADAM(0.001)
 
 @info "Training model...";flush(log)
-
+best_acc = 0.0
+last_improvement = 0
 # used for plots
-# accs = Array{Float32}(undef,0)
-@time begin
-dataset_len = length(train_dataset)
-shuffle_idxs = collect(1:dataset_len)
-shuffle!(shuffle_idxs)
-for i in 1:epochs
+for epoch_idx in 1+epoch:(epochs+=epoch)
+  accs = Array{Float32}(undef,0)
+  global best_acc, last_improvement
+  train_set_len = length(train_set)
+  shuffle_idxs = collect(1:train_set_len)
+  shuffle!(shuffle_idxs)  
+
   for (idx,data_idx) in enumerate(shuffle_idxs)
-    dataset = train_dataset[data_idx]
-    Flux.train!(loss,params(m),[dataset],opt)
-    #Flux.train!(loss,params(m),[dataset],opt,cb = evalcb)    
-    acc = accuracy(valX,valY)
-    @info "Epoch# $(i)/$(epochs) - #$(idx)/$(dataset_len) loss: $(loss(dataset...)), accuracy: $(acc)";flush(log)
-    # push!(accs,acc)
+    (x,y) = train_set[data_idx]
+    # We augment `x` a little bit here, adding in random noise
+    x = (x .+ 0.1f0*randn(eltype(x),size(x))) |> gpu
+    y = y|> gpu    
+    Flux.train!(loss,params(m),[(x,y)],opt)
+    #Flux.train!(loss,params(m),[(x,y)],opt,cb = evalcb)    
+    v_acc = accuracy(verify_set)
+    @info "Epoch# $(epoch_idx)/$(epochs) - #$(idx)/$(train_set_len) loss: $(loss(x,y)), accuracy: $(v_acc)";flush(log)
+    # @info "Epoch# $(epoch_idx)/$(epochs) - #$(idx)/$(train_set_len) accuracy: $(v_acc)";flush(log)
+    push!(accs,v_acc)
+  end # for
+
+  m_acc = mean(accs)
+  @info " -> Verify accuracy(mean) : $(m_acc)";flush(log)
+  test_acc = accuracy(test_set)
+  @info "Test accuracy : $(test_acc)";flush(log)  
+  
+  # If our accuracy is good enough, quit out.
+  if test_acc >= 0.98
+    @info " -> Early-exiting: We reached our target accuracy of 98%";flush(log)
+    model = m |> cpu;acc = test_acc;epoch = epoch_idx
+    @save model_file model acc epoch
+    break
   end
-  model = m |> cpu  
-  # @load 시 여기에서 사용한 "model" 로 로딩 해야 함
-  @save model_file model
-end
-end # end of @time
-# Fetch the test data from Metalhead and get it into proper shape.
-# CIFAR-10 does not specify a validation set so valimgs fetch the testdata instead of testimgs
-tX = valimgs(CIFAR10)
-test_idxs = 1:10000
-test_imgs = [getarray(tX[i].img) for i in test_idxs]
-test_labels = float.(onehotbatch([tX[i].ground_truth.class for i in test_idxs], 1:10))
-test_dataset = make_minibatch(test_imgs,test_labels,batch_size)
+  
+  # If this is the best accuracy we've seen so far, save the model out
+  if test_acc >= best_acc
+    @info " -> New best accuracy! saving model out to $(model_file)"; flush(log)
+    model = m |> cpu;acc = test_acc;epoch = epoch_idx
+    # @save,@load 시 같은 이름을 사용해야 함, 여기서는 "model"을 사용함
+    @save model_file model acc epoch
+    best_acc = test_acc
+    last_improvement = epoch_idx    
+  end
+  
+  # If we haven't seen improvement in 5 epochs, drop out learning rate:
+  if epoch_idx - last_improvement >= 5 && opt.eta > 1e-6
+    opt.eta /= 10.0
+    @info " -> Haven't improved in a while, dropping learning rate to $(opt.eta)!";flush(log)
+    # After dropping learning rate, give it a  few epochs to improve
+    last_improvement = epoch_idx
+  end  
+  
+  if epoch_idx - last_improvement >= 10  
+    @info " -> We're calling this converged."; flush(log)
+    break
+  end
+end # end of for
 
-test_accs = Array{Float32}(undef,0)
-dataset_len = length(test_dataset)
-for (idx,dataset) in enumerate(test_dataset)
-  acc = accuracy(dataset...)
-  push!(test_accs,acc)
-end
-@info "Test accuracy : $(mean(test_accs))"
 @info "End - $(now())"
-close(log)
+if create_log_file
+  close(log)
+end
 

From 25c5bf48af7bb77edacf48e2ace7a069a365bfad Mon Sep 17 00:00:00 2001
From: Sungho Park <mrchaos@naver.com>
Date: Thu, 13 Feb 2020 21:40:33 +0900
Subject: [PATCH 08/18] Update cifar10_gpu_minibatch.jl

---
 vision/cifar10/cifar10_gpu_minibatch.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vision/cifar10/cifar10_gpu_minibatch.jl b/vision/cifar10/cifar10_gpu_minibatch.jl
index a85c7e3f0..44a0fd58a 100644
--- a/vision/cifar10/cifar10_gpu_minibatch.jl
+++ b/vision/cifar10/cifar10_gpu_minibatch.jl
@@ -246,8 +246,8 @@ m(train_set[1][1] |> gpu)
 opt = ADAM(0.001)
 
 @info "Training model...";flush(log)
-best_acc = 0.0
-last_improvement = 0
+best_acc = acc
+last_improvement = epoch
 # used for plots
 for epoch_idx in 1+epoch:(epochs+=epoch)
   accs = Array{Float32}(undef,0)

From 12891a72b09e0313232c619f891da26527ace521 Mon Sep 17 00:00:00 2001
From: Sungho Park <mrchaos@naver.com>
Date: Tue, 18 Feb 2020 03:41:41 +0900
Subject: [PATCH 09/18] char-rnn error fix

test env : julia v1.3.1, flux v0.10.1
---
 text/char-rnn/char_rnn_gpu_minibatch.jl | 93 +++++++++++++++++++++++++
 1 file changed, 93 insertions(+)
 create mode 100644 text/char-rnn/char_rnn_gpu_minibatch.jl

diff --git a/text/char-rnn/char_rnn_gpu_minibatch.jl b/text/char-rnn/char_rnn_gpu_minibatch.jl
new file mode 100644
index 000000000..90dbf1674
--- /dev/null
+++ b/text/char-rnn/char_rnn_gpu_minibatch.jl
@@ -0,0 +1,93 @@
+using Flux
+using Flux: onehot, chunk, batchseq, throttle, crossentropy
+using StatsBase: wsample
+using Base.Iterators: partition
+using CuArrays
+using CUDAnative: device!
+using Random
+using Dates
+using Logging
+
+ϵ = 1.0f-32
+
+working_path = dirname(@__FILE__)
+file_path(file_name) = joinpath(working_path,file_name)
+
+device!(0)
+CuArrays.allowscalar(false)
+
+input_file = file_path("input.txt")
+isfile(input_file) ||
+    download("https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt",
+             input_file)
+
+# read(input_file) : 파일에서 텍스트 읽오옴 - 바이너리
+# String(read(input_file)) : 바이너리를 스트링으로 변환
+# collect(String(read(input_file)) : 스트링을 개별 char array로 변환 - Array{Char,1}
+text = collect(String(read(input_file)))
+
+# unique(text) : text에서 unique한 char array를 만든다 - 중복제거 - 하고
+# 맨뒤에 '_' 를 추가 한다.
+# unique한 char -알파벳 array를 만든다.
+alphabet = [unique(text)...,'_']
+# ch onehot을 만든다. onhot의 길이는 length(alphabet)이고 onehot에서 1이 있는 위치는
+# alphabet에서 ch가 있는 위치와 동일
+text = map(ch -> Float32.(onehot(ch,alphabet)),text)
+stop = Float32.(onehot('_',alphabet))
+
+N = length(alphabet)
+seqlen = 50
+nbatch = 50
+
+Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen))
+txt = circshift(text,-1)
+txt[end] = stop
+Ys = collect(partition(batchseq(chunk(txt, nbatch), stop), seqlen))
+
+model = Chain(
+  LSTM(N, 128),
+  LSTM(128, 128),
+  Dense(128, N),
+  softmax)
+
+opt = ADAM(0.01)
+m = model |>gpu
+
+tx, ty = (Xs[2]|>gpu, Ys[2]|>gpu)
+
+function loss2(xx, yy)
+  out = 0.0f0
+  for (idx, x) in enumerate(xx)
+    out += crossentropy(m(x) .+ ϵ, yy[idx])
+  end  
+  Flux.reset!(m)
+  out
+end
+
+
+epochs = 200
+idxs = length(Xs)
+for epoch_idx in 1:epochs
+  for (idx,(xs,ys)) in enumerate(zip(Xs, Ys))
+    Flux.train!(loss2, params(m), [(xs|>gpu,ys|>gpu)], opt)
+    if idx % 10 == 0
+      @info "epoch# $(epoch_idx)/$(epochs)-$(idx)/$(idxs) loss = $(loss2(tx,ty))";flush(stdout)
+    end
+  end
+end
+
+# Sampling
+
+function sample(m, alphabet, len)
+  m = cpu(m)
+  Flux.reset!(m)
+  buf = IOBuffer()
+  c = rand(alphabet)
+  for i = 1:len
+    write(buf, c)
+    c = wsample(alphabet, m(onehot(c, alphabet)))
+  end
+  return String(take!(buf))
+end
+
+sample(m, alphabet, 1000) |> println

From b50195d13d33cd6a654830cbea3609d72a419ae5 Mon Sep 17 00:00:00 2001
From: Sungho Park <mrchaos@naver.com>
Date: Thu, 20 Feb 2020 10:38:51 +0900
Subject: [PATCH 10/18] Update char_rnn_gpu_minibatch.jl

---
 text/char-rnn/char_rnn_gpu_minibatch.jl | 130 ++++++++++++++++++++----
 1 file changed, 112 insertions(+), 18 deletions(-)

diff --git a/text/char-rnn/char_rnn_gpu_minibatch.jl b/text/char-rnn/char_rnn_gpu_minibatch.jl
index 90dbf1674..4c1de0c1e 100644
--- a/text/char-rnn/char_rnn_gpu_minibatch.jl
+++ b/text/char-rnn/char_rnn_gpu_minibatch.jl
@@ -1,3 +1,5 @@
+using BSON
+using BSON: @save,@load
 using Flux
 using Flux: onehot, chunk, batchseq, throttle, crossentropy
 using StatsBase: wsample
@@ -9,11 +11,42 @@ using Dates
 using Logging
 
 ϵ = 1.0f-32
-
 working_path = dirname(@__FILE__)
 file_path(file_name) = joinpath(working_path,file_name)
+include(file_path("cmd_parser.jl"))
+
+model_file = file_path("char_rnn_gpu_minibatch.bson")
+
+# # Get arguments
+parsed_args = CmdParser.parse_commandline()
+epochs = parsed_args["epochs"]
+batch_size = parsed_args["batch"]
+use_saved_model = parsed_args["model"]
+gpu_device = parsed_args["gpu"]
+create_log_file = parsed_args["log"]
+sequence = parsed_args["seq"]
+
+if create_log_file
+  log_file ="./char_rnn_gpu_minibatch_$(Dates.format(now(),"yyyymmdd-HHMMSS")).log"
+  log = open(log_file, "w+")
+else
+  log = stdout
+end
+global_logger(ConsoleLogger(log))
+
+start_time = now()
+@info "Start - $(start_time)";flush(log)
+@info "=============== Arguments ==============="
+@info "epochs=$(epochs)"
+@info "batch_size=$(batch_size)"
+@info "use_saved_model=$(use_saved_model)"
+@info "gpu_device=$(gpu_device)"
+@info "sequence=$(sequence)"
+@info "log_file=$(create_log_file)"
+@info "=========================================";flush(log)
+
 
-device!(0)
+device!(gpu_device)
 CuArrays.allowscalar(false)
 
 input_file = file_path("input.txt")
@@ -36,26 +69,39 @@ text = map(ch -> Float32.(onehot(ch,alphabet)),text)
 stop = Float32.(onehot('_',alphabet))
 
 N = length(alphabet)
-seqlen = 50
-nbatch = 50
+seqlen = sequence
+nbatch = batch_size
 
 Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen))
 txt = circshift(text,-1)
 txt[end] = stop
 Ys = collect(partition(batchseq(chunk(txt, nbatch), stop), seqlen))
 
-model = Chain(
-  LSTM(N, 128),
-  LSTM(128, 128),
-  Dense(128, N),
-  softmax)
+vloss=Inf; epoch = 0; t_sec = Second(0);
+if use_saved_model && isfile(model_file) && filesize(model_file) > 0
+  # flush : 버퍼링 없이 즉각 log를 파일 또는 console에 write하도록 함
+  @info "Load saved model $(model_file) ...";flush(log)
+  # model : @save시 사용한 object명
+  @load model_file model vloss epoch sec
+  t_sec = sec 
+  m = model |> gpu
+  run_min = round(Second(t_sec), Minute)
+  @info " -> loss : $(vloss), epochs : $(epoch), run time : $(run_min)";flush(log)
+else
+  @info "Create new model ...";flush(log)  
+  model = Chain(
+    LSTM(N, 128),
+    LSTM(128, 256),
+    LSTM(256, 128),
+    Dense(128, N),
+    softmax)
+    m = model |>gpu
+end
 
 opt = ADAM(0.01)
-m = model |>gpu
-
-tx, ty = (Xs[2]|>gpu, Ys[2]|>gpu)
+tx, ty = (Xs[1]|>gpu, Ys[1]|>gpu)
 
-function loss2(xx, yy)
+function loss(xx, yy)
   out = 0.0f0
   for (idx, x) in enumerate(xx)
     out += crossentropy(m(x) .+ ϵ, yy[idx])
@@ -64,17 +110,60 @@ function loss2(xx, yy)
   out
 end
 
+@info "Training model...";flush(log)
 
-epochs = 200
 idxs = length(Xs)
-for epoch_idx in 1:epochs
+best_loss = vloss
+last_improvement = epoch
+epoch_start_time = now() 
+epochs += epoch
+epoch += 1
+for epoch_idx in epoch:epochs
+  global best_loss,last_improvement,t_sec,epoch_start_time
+  mean_loss = 0.0f0
   for (idx,(xs,ys)) in enumerate(zip(Xs, Ys))
-    Flux.train!(loss2, params(m), [(xs|>gpu,ys|>gpu)], opt)
+    Flux.train!(loss, params(m), [(xs|>gpu,ys|>gpu)], opt)
+    lss = loss(tx,ty)
+    mean_loss += lss
     if idx % 10 == 0
-      @info "epoch# $(epoch_idx)/$(epochs)-$(idx)/$(idxs) loss = $(loss2(tx,ty))";flush(stdout)
+      @info "epoch# $(epoch_idx)/$(epochs)-$(idx)/$(idxs) loss = $(lss)";flush(log)
     end
   end
+  mean_loss /= idxs
+
+  run_sec = round(Millisecond(now()-epoch_start_time), Second)
+  run_min = round(Second(run_sec), Minute)
+  t_run_min = round(Second(t_sec+run_sec), Minute)
+  @info "epoch# $(epoch_idx)/$(epochs)-> mean loss : $(mean_loss), running time : $(run_min)/$(t_run_min)";flush(log)
+  
+  # If this is the best accuracy we've seen so far, save the model out
+  if mean_loss <= best_loss
+    @info " -> New best loss! saving model out to $(model_file)"; flush(log)
+    model = m |> cpu
+    vloss = mean_loss;epoch = epoch_idx; sec = t_sec + run_sec
+    # @save,@load 시 같은 이름을 사용해야 함, 여기서는 "model"을 사용함
+    @save model_file model vloss epoch sec
+    best_loss = mean_loss
+    last_improvement = epoch_idx    
+  end
+
+  # If we haven't seen improvement in 5 epochs, drop out learning rate:
+  if epoch_idx - last_improvement >= 5 && opt.eta > 1e-6
+    opt.eta /= 10.0
+    @info " -> Haven't improved in a while, dropping learning rate to $(opt.eta)!";flush(log)
+    # After dropping learning rate, give it a  few epochs to improve
+    last_improvement = epoch_idx
+  end  
+
+  if epoch_idx - last_improvement >= 10  
+    @info " -> We're calling this converged."; flush(log)
+    break
+  end  
 end
+end_time = now()
+@info "End - $(end_time)";flush(log)
+run_min = round(round(Millisecond(end_time - start_time), Second),Minute)
+@info "Running time : $(run_min)";flush(log)
 
 # Sampling
 
@@ -90,4 +179,9 @@ function sample(m, alphabet, len)
   return String(take!(buf))
 end
 
-sample(m, alphabet, 1000) |> println
+@info sample(m, alphabet, 1000);flush(log)
+
+if create_log_file
+  close(log)
+end
+

From b1021100e8787795b5027914c65f91a191b321bf Mon Sep 17 00:00:00 2001
From: Sungho Park <mrchaos@naver.com>
Date: Thu, 20 Feb 2020 10:40:47 +0900
Subject: [PATCH 11/18] command line parser

---
 text/char-rnn/cmd_parser.jl | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 text/char-rnn/cmd_parser.jl

diff --git a/text/char-rnn/cmd_parser.jl b/text/char-rnn/cmd_parser.jl
new file mode 100644
index 000000000..ba7603a7e
--- /dev/null
+++ b/text/char-rnn/cmd_parser.jl
@@ -0,0 +1,36 @@
+module CmdParser
+export parse_commandline
+using ArgParse
+
+# Argument parsing
+function parse_commandline()
+    s = ArgParseSettings()
+    @add_arg_table! s begin
+        "--epochs","-e"
+            help = "epoch number, default=100"
+            arg_type = Int
+            default = 100
+        "--batch","-b"
+            help = "mini-batch size, default=128"
+            arg_type = Int
+            default = 50
+        "--seq","-s"
+            help = "sequence size, default=50"
+            arg_type = Int
+            default = 50            
+        "--gpu","-g"
+            help = "gpu index to use, 0,1,2,3,..., default=0"
+            arg_type = Int
+            default = 1
+        "--model","-m"
+            help = "use saved model file, default=true"
+            arg_type = Bool
+            default = true
+        "--log","-l"
+            help = "create log file, default=true"
+            arg_type = Bool
+            default = false
+    end
+    return parse_args(s)
+end
+end
\ No newline at end of file

From a67b160787352dcf90791fce7d079fb977327eb4 Mon Sep 17 00:00:00 2001
From: Sungho Park <mrchaos@naver.com>
Date: Fri, 21 Feb 2020 10:01:10 +0900
Subject: [PATCH 12/18] Update mlp_gpu_minibatch.jl

update
---
 vision/mnist/mlp_gpu_minibatch.jl | 34 ++++++++-----------------------
 1 file changed, 9 insertions(+), 25 deletions(-)

diff --git a/vision/mnist/mlp_gpu_minibatch.jl b/vision/mnist/mlp_gpu_minibatch.jl
index 21d2cdb6d..84fe82734 100644
--- a/vision/mnist/mlp_gpu_minibatch.jl
+++ b/vision/mnist/mlp_gpu_minibatch.jl
@@ -1,8 +1,3 @@
-#=
-Julia version: 1.3.1
-Flux version : 0.10.1
-=#
-__precompile__()
 module MNIST_BATCH
 using Flux
 using Flux.Data.MNIST, Statistics
@@ -13,37 +8,28 @@ using CUDAnative
 using CuArrays
 CuArrays.allowscalar(false)
 
-#= 
+#=
 Very important !!
 ϵ is used to prevent loss NaN
 =#
-const ϵ = 1.0f-10
+const ϵ = 1.0f-32
 
 # Load training labels and images from Flux.Data.MNIST
 @info("Loading data...")
-#=
-MNIST.images() : [(28x28),...,(28x28)] 60,000x28x28 training images
-MNIST.labels() : 0 ~ 9 labels , 60,000x10 training labels
-=#
+
 train_imgs = MNIST.images()
 train_labels = MNIST.labels()
 
 # use 1nd GPU : default
-#CUDAnative.device!(0)
+CUDAnative.device!(0)
 # use 2nd GPU
 #CUDAnative.device!(1)
 
 # Bundle images together with labels and group into minibatch
 function make_minibatch(imgs,labels,batch_size)
-  #=
-   reshape.(MNIST.images(),:) : [(784,),(784,),...,(784,)]  60,000개의 데이터
-   X : (784x60,000)
-   Y : (10x60,000)
-  =#
   X = hcat(float.(reshape.(imgs,:))...) |> gpu
   Y = float.(onehotbatch(labels,0:9)) |> gpu
-  # Y = Float32.(onehotbatch(labels,0:9))
-  
+
   data_set = [(X[:,i],Y[:,i]) for i in partition(1:length(labels),batch_size)]
   return data_set
 end
@@ -51,11 +37,11 @@ end
 @info("Making model...")
 # Model
 m = Chain(
-  Dense(28^2,32,relu), # y1 = relu(W1*x + b1), y1 : (32x?), W1 : (32x784), b1 : (32,)
-  Dense(32,10), # y2 = W2*y1 + b2, y2 : (10,?), W2: (10x32), b2:(10,)
+  Dense(28^2,32,relu),
+  Dense(32,10),
   softmax
 ) |> gpu
-loss(x,y) = crossentropy(m(x) .+ ϵ, y .+ ϵ)
+loss(x,y) = crossentropy(m(x) .+ ϵ, y)
 accuracy(x,y) = mean(onecold(m(x)|>cpu) .== onecold(y|>cpu))
 
 batch_size = 500
@@ -74,9 +60,8 @@ dataset_len = length(train_dataset)
 for i in 1:epochs
   for (idx,dataset) in enumerate(train_dataset)
     Flux.train!(loss,params(m),[dataset],opt)
-    # Flux.train!(loss,params(m),[dataset],opt,cb = throttle(()->@show(loss(dataset...)),20))    
+    acc = accuracy(dataset...)
     if idx == dataset_len
-      acc = accuracy(dataset...)
       @info("Epoch# $(i)/$(epochs) - loss: $(loss(dataset...)), accuracy: $(acc)")
       push!(accs,acc)
     end
@@ -94,4 +79,3 @@ end
 
 using Plots;gr()
 plot(MNIST_BATCH.accs)
-

From 91c92fd32849f35190e4f7cb12ce829954b880c7 Mon Sep 17 00:00:00 2001
From: Sungho Park <mrchaos@naver.com>
Date: Fri, 21 Feb 2020 10:22:36 +0900
Subject: [PATCH 13/18] Update conv_gpu_minibatch.jl

update
---
 vision/mnist/conv_gpu_minibatch.jl | 92 ++++++++----------------------
 1 file changed, 23 insertions(+), 69 deletions(-)

diff --git a/vision/mnist/conv_gpu_minibatch.jl b/vision/mnist/conv_gpu_minibatch.jl
index 1292a2166..b6bddabf4 100644
--- a/vision/mnist/conv_gpu_minibatch.jl
+++ b/vision/mnist/conv_gpu_minibatch.jl
@@ -2,10 +2,6 @@
 Test Environment
  - Julia : v1.3.1
  - Flux  : v0.10.1
- Usage:
- - julia conv_gpu_minibatch.jl  --help
- - ex) julia conv_gpu_minibatch.jl -e 100 -b 1000 -g 0 -l false
- -     epochs : 100, batch size: 1000, gpu device index : 0 , log file : false
 =#
 
 # Classifies MNIST digits with a convolution network.
@@ -20,69 +16,35 @@ using Flux, Flux.Data.MNIST, Statistics
 using Flux: onehotbatch, onecold, crossentropy, throttle
 using Base.Iterators: repeated, partition
 using Printf, BSON
-using Logging
 using Dates
 using CUDAnative: device!
 using CuArrays
 using Random
 using Dates
 
-working_path = dirname(@__FILE__)
-file_path(file_name) = joinpath(working_path,file_name)
-include(file_path("cmd_parser.jl"))
+model_file = joinpath(dirname(@__FILE__),"conv_gpu_minibatch.bson")
 
-model_file = file_path("conv_gpu_minibatch.bson")
-
-# Get arguments
-parsed_args = CmdParser.parse_commandline()
-
-epochs = parsed_args["epochs"]
-batch_size = parsed_args["batch"]
-use_saved_model = parsed_args["model"]
-gpu_device = parsed_args["gpu"]
-create_log_file = parsed_args["log"]
-
-if create_log_file
-    log_file = file_path("conv_gpu_minibatch_$(Dates.format(now(),"yyyymmdd-HHMMSS")).log")
-    log = open(log_file,"w+")
-else
-    log = stdout
-end
-global_logger(ConsoleLogger(log))
-
-@info "Start - $(now())";flush(log)
-
-@info "============= Arguments ============="
-@info "epochs=$(epochs)"
-@info "batch_size=$(batch_size)"
-@info "use_saved_model=$(use_saved_model)"
-@info "gpu_device=$(gpu_device)"
-@info "create_log_file=$(create_log_file)"
-@info "=====================================";flush(log)
+epochs = 100
+batch_size = 128
+gpu_device = 0
 
 # set using GPU device
 device!(gpu_device)
 CuArrays.allowscalar(false)
 
-# Load labels and images from Flux.Data.MNIST
-@info "Loading data set";flush(log)
 
 # Bundle images together with labels and groups into minibatch
 function make_minibatch(imgs,labels,batch_size)
-    # WHCN: width x height x #channel x #batch
-    # transform (28x28) to (28x28x1x#bacth)
     len = length(imgs)
     sz = size(imgs[1])
-    data_set = 
+    data_set =
     [(cat([reshape(Float32.(imgs[i]),sz...,1,1) for i in idx]...,dims=4),
       float.(onehotbatch(labels[idx],0:9)) ) for idx in partition(1:len,batch_size) ]
     return data_set
 end
 
 # Train data load
-# 60,000 labels
 train_labels = MNIST.labels()
-# 60,000 images : ((28x28),...,(28x28))
 train_imgs = MNIST.images()
 # Make train data to minibatch
 train_set = make_minibatch(train_imgs,train_labels,batch_size)
@@ -97,7 +59,7 @@ test_set = make_minibatch(test_imgs,test_labels,batch_size)
  three iterations of Conv -> ReLu -> MaxPool, followed by a final Dense
  layer that feeds into a softmax probability output.
 =#
-@info "Construncting model...";flush(log)
+@info "Construncting model..."
 model = Chain(
   # First convolution, operating upon a 28x28 image
   Conv((3,3), 1=>16, pad=(1,1), relu),
@@ -106,34 +68,34 @@ model = Chain(
   # Second convolution, operating upon a 14x14 image
   Conv((3,3), 16=>32, pad=(1,1), relu),
   MaxPool((2,2)),
-  
+
   # Third convolution, operating upon a 7x7 image
   Conv((3,3), 32=>32, pad=(1,1), relu),
   MaxPool((2,2)),
-  
+
   # Reshape 3d tensor into a 2d one, at this point it should be (3,3,32,N)
   # which is where we get the 288 in the `Dense` layer below:
   x -> reshape(x, :, size(x,4)),
   Dense(288,10),
-  
+
   # Finally, softmax to get nice probabilities
   softmax,
 )
 
 m = model |> gpu
 
-#= 
-`loss()` calculates the crossentropy loss between our prediction `y_hat`
+#=
+`loss()` calculates the crossentropy loss between our prediction `ŷ`
  (calculated from `m(x)`) and the ground truth `y`. We augment the data
  a bit, adding gaussian random noise to our image to make it more robust.
  =#
 function loss(x,y)
- ŷ = m(x)
- return crossentropy(ŷ,y)
+  ŷ = m(x)
+  return crossentropy(ŷ,y)
 end
-# Make sure our model is nicely precompiled befor starting our training loop
 
-function accuracy(data_set) 
+function accuracy(data_set)
+  batch_size = size(data_set[1][1])[end]
   l = length(data_set)*batch_size
   s = 0f0
   for (x,y) in data_set
@@ -143,20 +105,18 @@ function accuracy(data_set)
 end
 
 # Make sure our is nicely precompiled befor starting our training loop
-# train_set[1][1] : (28,28,1,batch_size)
 m(train_set[1][1] |> gpu)
 
 # Train our model with the given training set using the ADAM optimizer and
 # printing out performance aganin the test set as we go.
 opt = ADAM(0.001)
 
-@info "Beginning training loop...";flush(log)
+@info "Beginning training loop..."
 best_acc = 0.0
 last_improvement = 0
 
-@time begin
 for epoch_idx in 1:epochs
-  global best_acc, last_improvement  
+  global best_acc, last_improvement
   suffle_idxs = collect(1:length(train_set))
   shuffle!(suffle_idxs)
   for idx in suffle_idxs
@@ -167,17 +127,17 @@ for epoch_idx in 1:epochs
     Flux.train!(loss, params(m), [(x, y)],opt)
   end
   acc = accuracy(test_set)
-  @info(@sprintf("[%d]: Test accuracy: %.4f",epoch_idx,acc));flush(log)
+  @info(@sprintf("[%d]: Test accuracy: %.4f",epoch_idx,acc))
 
   # If our accuracy is good enough, quit out.
   if acc >= 0.999
-    @info " -> Early-exiting: We reached our target accuracy of 99.9%";flush(log)
+    @info " -> Early-exiting: We reached our target accuracy of 99.9%"
     break
   end
 
   # If this is the best accuracy we've seen so far, save the model out
   if acc >= best_acc
-    @info " -> New best accuracy! saving model out to $(model_file)"; flush(log)
+    @info " -> New best accuracy! saving model out to $(model_file)"
     model = m |> cpu
     acc = acc |> cpu
     BSON.@save model_file model epoch_idx acc
@@ -188,20 +148,14 @@ for epoch_idx in 1:epochs
   #If we haven't seen improvement in 5 epochs, drop out learing rate:
   if epoch_idx - last_improvement >= 5 && opt.eta > 1e-6
     opt.eta /= 10.0
-    @warn " -> Haven't improved in a while, dropping learning rate to $(opt.eta)!"; flush(log)
+    @warn " -> Haven't improved in a while, dropping learning rate to $(opt.eta)!"
 
     # After dropping learing rate, give it a few epochs to improve
     last_improvement = epoch_idx
   end
 
   if epoch_idx - last_improvement >= 10
-    @warn " -> We're calling this converged.";flush(log)
+    @warn " -> We're calling this converged."
     break
-  end  
-end # for
-end # @time
-@info "End - $(now())"
-if create_log_file
-  close(log)
+  end
 end
-

From 00af6db4da90908e9f6749c3342d536f22cb17c2 Mon Sep 17 00:00:00 2001
From: Sungho Park <mrchaos@naver.com>
Date: Fri, 21 Feb 2020 10:47:55 +0900
Subject: [PATCH 14/18] Update cifar10_gpu_minibatch.jl

update
---
 vision/cifar10/cifar10_gpu_minibatch.jl | 346 ++++++++----------------
 1 file changed, 111 insertions(+), 235 deletions(-)

diff --git a/vision/cifar10/cifar10_gpu_minibatch.jl b/vision/cifar10/cifar10_gpu_minibatch.jl
index 44a0fd58a..a6b9a1b57 100644
--- a/vision/cifar10/cifar10_gpu_minibatch.jl
+++ b/vision/cifar10/cifar10_gpu_minibatch.jl
@@ -1,25 +1,8 @@
-#=
- cifar10 dataset spec
- - 60,000 images of 32x32 size 
- - train images : 50,000
- - test images : 10,000
- - classify item : 10
- - each class have 6,000 images and 5,000 train images, 1,000 test images
- 
- Data format:
- WHCN order : (width, height, #channels, #batches)
- ex) A single 100x100 RGB image data format : 100x100x3x1
- =#
-
 # Julia version : 1.3.1
 # Flux version : v0.10.1
-__precompile__(true)
+
 using Random
-using BSON
-using BSON: @save,@load
-using Logging
 using Dates
-using NNlib 
 using CuArrays
 using CUDAdrv
 using CUDAnative: device!
@@ -30,178 +13,119 @@ using Images: channelview
 using Statistics: mean
 using Base.Iterators: partition
 
-working_path = dirname(@__FILE__)
-file_path(file_name) = joinpath(working_path,file_name)
-include(file_path("cmd_parser.jl"))
-
-model_file = file_path("cifar10_vgg16_model.bson")
+model_file = joinpath(dirname(@__FILE__),"cifar10_vgg16_model.bson")
 
 # Get arguments
-parsed_args = CmdParser.parse_commandline()
-
-epochs = parsed_args["epochs"]
-batch_size = parsed_args["batch"]
-use_saved_model = parsed_args["model"]
-gpu_device = parsed_args["gpu"]
-create_log_file = parsed_args["log"]
-
-
-if create_log_file
-  log_file ="./cifar10_vgg16_$(Dates.format(now(),"yyyymmdd-HHMMSS")).log"
-  log = open(log_file, "w+")
-else
-  log = stdout
-end
-global_logger(ConsoleLogger(log))
 
-@info "Start - $(now())";flush(log)
-
-@info "=============== Arguments ==============="
-@info "epochs=$(epochs)"
-@info "batch_size=$(batch_size)"
-@info "use_saved_model=$(use_saved_model)"
-@info "gpu_device=$(gpu_device)"
-@info "=========================================";flush(log)
+epochs = 100
+batch_size = 128
+gpu_device = 0
 
 # Very important : this prevent loss NaN
-ϵ = 1.0f-10
-
+ϵ = 1.0f-32
 
-# use 1nd GPU : default
+# use 1nd GPU
 #CUDAnative.device!(0)
 device!(gpu_device)
 CuArrays.allowscalar(false)
 
-@info "Config VGG16, VGG19 models ...";flush(log)
-
-acc = 0; epoch = 0
-if use_saved_model && isfile(model_file) && filesize(model_file) > 0
-  # flush : 버퍼링 없이 즉각 log를 파일 또는 console에 write하도록 함
-  @info "Load saved model $(model_file) ...";flush(log)
-  # model : @save시 사용한 object명
-  @load model_file model acc epoch  
-  m = model |> gpu
-  @info " -> accuracy : $(acc), epochs : $(epoch)";flush(log)
-else
-  @info "Create new model ...";flush(log)
-  # VGG16 and VGG19 models
-  vgg16() = Chain(
-    Conv((3, 3), 3 => 64, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(64),
-    Conv((3, 3), 64 => 64, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(64),
-    MaxPool((2,2)),
-    Conv((3, 3), 64 => 128, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(128),
-    Conv((3, 3), 128 => 128, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(128),
-    MaxPool((2,2)),
-    Conv((3, 3), 128 => 256, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(256),
-    Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(256),
-    Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(256),
-    MaxPool((2,2)),
-    Conv((3, 3), 256 => 512, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(512),
-    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(512),
-    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(512),
-    MaxPool((2,2)),
-    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(512),
-    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(512),
-    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(512),
-    MaxPool((2,2)),
-    x -> reshape(x, :, size(x, 4)),
-    Dense(512, 4096, relu),
-    Dropout(0.5),
-    Dense(4096, 4096, relu),
-    Dropout(0.5),
-    Dense(4096, 10),
-    softmax)
-
-  vgg19() = Chain(
-    Conv((3, 3), 3 => 64, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(64),
-    Conv((3, 3), 64 => 64, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(64),
-    MaxPool((2,2)),
-    Conv((3, 3), 64 => 128, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(128),
-    Conv((3, 3), 128 => 128, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(128),
-    MaxPool((2,2)),
-    Conv((3, 3), 128 => 256, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(256),
-    Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(256),
-    Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(256),
-    Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
-    MaxPool((2,2)),
-    Conv((3, 3), 256 => 512, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(512),
-    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(512),
-    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(512),
-    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-    MaxPool((2,2)),
-    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(512),
-    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(512),
-    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-    BatchNorm(512),
-    Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
-    MaxPool((2,2)),
-    x -> reshape(x, :, size(x, 4)),
-    Dense(512, 4096, relu),
-    Dropout(0.5),
-    Dense(4096, 4096, relu),
-    Dropout(0.5),
-    Dense(4096, 10),
-    softmax)
-    
-  m = vgg16() |> gpu    
-end
-# 
-# Function to convert the RGB image to Float64 Arrays
-#=
-1)channelview로 이미지의 color를 channel별로 분리한다.
-- 분리된 channel은 맨앞에 새로운 차원을 추가 하여 channel을 분리한다.
-- 예) 32x32 이미지의 채널을 분리하면 3x32x32로 3개의 채널이 추가 된다
-2)permutedims로 분리된 채널을 뒤로 보낸다.
-- Flux에서 사용되는 이미지 포맷은 WHCN-width,height,#channel,#batches 이다
-- 채널분리된 이미지가 3x32x32인 경우 permutedims(img,(2,3,1))을 적용하면
-- 32x32x3으로 width,height,#channel 순으로 바뀐다.
-=#
+# VGG16 and VGG19 models
+vgg16() = Chain(
+  Conv((3, 3), 3 => 64, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(64),
+  Conv((3, 3), 64 => 64, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(64),
+  MaxPool((2,2)),
+  Conv((3, 3), 64 => 128, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(128),
+  Conv((3, 3), 128 => 128, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(128),
+  MaxPool((2,2)),
+  Conv((3, 3), 128 => 256, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(256),
+  Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(256),
+  Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(256),
+  MaxPool((2,2)),
+  Conv((3, 3), 256 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  MaxPool((2,2)),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  MaxPool((2,2)),
+  x -> reshape(x, :, size(x, 4)),
+  Dense(512, 4096, relu),
+  Dropout(0.5),
+  Dense(4096, 4096, relu),
+  Dropout(0.5),
+  Dense(4096, 10),
+  softmax)
+
+vgg19() = Chain(
+  Conv((3, 3), 3 => 64, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(64),
+  Conv((3, 3), 64 => 64, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(64),
+  MaxPool((2,2)),
+  Conv((3, 3), 64 => 128, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(128),
+  Conv((3, 3), 128 => 128, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(128),
+  MaxPool((2,2)),
+  Conv((3, 3), 128 => 256, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(256),
+  Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(256),
+  Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(256),
+  Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
+  MaxPool((2,2)),
+  Conv((3, 3), 256 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  MaxPool((2,2)),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  BatchNorm(512),
+  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
+  MaxPool((2,2)),
+  x -> reshape(x, :, size(x, 4)),
+  Dense(512, 4096, relu),
+  Dropout(0.5),
+  Dense(4096, 4096, relu),
+  Dropout(0.5),
+  Dense(4096, 10),
+  softmax)
+
+m = vgg16() |> gpu
+
+# Function to convert the RGB image to Float32 Arrays
 getarray(X) = Float32.(permutedims(channelview(X), (2, 3, 1)))
 
-
-@info "Data download and preparing ...";flush(log)
 function make_minibatch(imgs,labels,batch_size)
-  data_set = [(cat(imgs[i]..., dims = 4), 
-          labels[:,i]) 
+  data_set = [(cat(imgs[i]..., dims = 4),
+          labels[:,i])
           for i in partition(1:length(imgs), batch_size)]
   return data_set
 end
-# Fetching the train and verify data and getting them into proper shape
-#=
-trainimgs(모듈명) : 
- - 모듈명이 들어 가면 모듈명에 관련된 train용 데이터를 다운받아 리턴한다.
- - ex) trainimgs(CIFAR10) : 50,000개의 train data가 return 된다.
-X 
-=#
 
 X = trainimgs(CIFAR10)
-# Training용 데이터 준비
-# 이미지 채널 분리 및 재배열, training용으로 60,000개중 50,000개를 사용한다.
 train_idxs = 1:49000
 train_imgs = [getarray(X[i].img) for i in train_idxs]
 train_labels = float.(onehotbatch([X[i].ground_truth.class for i in train_idxs],1:10))
@@ -219,14 +143,11 @@ test_idxs = 1:10000
 test_imgs = [getarray(tX[i].img) for i in test_idxs]
 test_labels = float.(onehotbatch([tX[i].ground_truth.class for i in test_idxs], 1:10))
 test_set = make_minibatch(test_imgs,test_labels,batch_size)
-# Defining the loss and accuracy functions
-
-@info "VGG16 models instantiation ...";flush(log)
 
-loss(x, y) = crossentropy(m(x) .+ ϵ, y .+ ϵ)
+# Defining the loss and accuracy functions
+loss(x, y) = crossentropy(m(x) .+ ϵ, y)
 
-# accuracy(x, y) = mean(onecold(m(x)|>cpu, 1:10) .== onecold(y|>cpu, 1:10))
-function accuracy(data_set) 
+function accuracy(data_set)
   batch_size = size(data_set[1][1])[end]
   l = length(data_set)*batch_size
   s = 0f0
@@ -237,77 +158,32 @@ function accuracy(data_set)
 end
 
 # Make sure our is nicely precompiled befor starting our training loop
-# train_set[1][1] : (28,28,1,batch_size)
-@info "Model pre-compile...";flush(log)
 m(train_set[1][1] |> gpu)
 
 # Defining the callback and the optimizer
-# evalcb = throttle(() -> @info(accuracy(verify_set)), 10)
 opt = ADAM(0.001)
 
-@info "Training model...";flush(log)
-best_acc = acc
-last_improvement = epoch
-# used for plots
-for epoch_idx in 1+epoch:(epochs+=epoch)
+@info "Training model..."
+
+for epoch_idx in 1:epochs
   accs = Array{Float32}(undef,0)
-  global best_acc, last_improvement
+
   train_set_len = length(train_set)
   shuffle_idxs = collect(1:train_set_len)
-  shuffle!(shuffle_idxs)  
+  shuffle!(shuffle_idxs)
 
   for (idx,data_idx) in enumerate(shuffle_idxs)
     (x,y) = train_set[data_idx]
     # We augment `x` a little bit here, adding in random noise
-    x = (x .+ 0.1f0*randn(eltype(x),size(x))) |> gpu
-    y = y|> gpu    
+    x = (x .+ ϵ*randn(eltype(x),size(x))) |> gpu
+    y = y|> gpu
     Flux.train!(loss,params(m),[(x,y)],opt)
-    #Flux.train!(loss,params(m),[(x,y)],opt,cb = evalcb)    
     v_acc = accuracy(verify_set)
-    @info "Epoch# $(epoch_idx)/$(epochs) - #$(idx)/$(train_set_len) loss: $(loss(x,y)), accuracy: $(v_acc)";flush(log)
-    # @info "Epoch# $(epoch_idx)/$(epochs) - #$(idx)/$(train_set_len) accuracy: $(v_acc)";flush(log)
+    @info "Epoch# $(epoch_idx)/$(epochs) - #$(idx)/$(train_set_len) loss: $(loss(x,y)), accuracy: $(v_acc)"
     push!(accs,v_acc)
-  end # for
-
-  m_acc = mean(accs)
-  @info " -> Verify accuracy(mean) : $(m_acc)";flush(log)
-  test_acc = accuracy(test_set)
-  @info "Test accuracy : $(test_acc)";flush(log)  
-  
-  # If our accuracy is good enough, quit out.
-  if test_acc >= 0.98
-    @info " -> Early-exiting: We reached our target accuracy of 98%";flush(log)
-    model = m |> cpu;acc = test_acc;epoch = epoch_idx
-    @save model_file model acc epoch
-    break
-  end
-  
-  # If this is the best accuracy we've seen so far, save the model out
-  if test_acc >= best_acc
-    @info " -> New best accuracy! saving model out to $(model_file)"; flush(log)
-    model = m |> cpu;acc = test_acc;epoch = epoch_idx
-    # @save,@load 시 같은 이름을 사용해야 함, 여기서는 "model"을 사용함
-    @save model_file model acc epoch
-    best_acc = test_acc
-    last_improvement = epoch_idx    
-  end
-  
-  # If we haven't seen improvement in 5 epochs, drop out learning rate:
-  if epoch_idx - last_improvement >= 5 && opt.eta > 1e-6
-    opt.eta /= 10.0
-    @info " -> Haven't improved in a while, dropping learning rate to $(opt.eta)!";flush(log)
-    # After dropping learning rate, give it a  few epochs to improve
-    last_improvement = epoch_idx
-  end  
-  
-  if epoch_idx - last_improvement >= 10  
-    @info " -> We're calling this converged."; flush(log)
-    break
   end
-end # end of for
-
-@info "End - $(now())"
-if create_log_file
-  close(log)
+  m_acc = mean(accs)
+  @info " -> Verify accuracy(mean) : $(m_acc)"
 end
-
+test_acc = accuracy(test_set)
+@info "Test accuracy : $(test_acc)"

From f0d15120f823c44ca555d90a9b97ea8b062a12b8 Mon Sep 17 00:00:00 2001
From: Sungho Park <mrchaos@naver.com>
Date: Fri, 21 Feb 2020 10:49:00 +0900
Subject: [PATCH 15/18] Delete cmd_parser.jl

---
 vision/mnist/cmd_parser.jl | 32 --------------------------------
 1 file changed, 32 deletions(-)
 delete mode 100644 vision/mnist/cmd_parser.jl

diff --git a/vision/mnist/cmd_parser.jl b/vision/mnist/cmd_parser.jl
deleted file mode 100644
index 9a8b4e28a..000000000
--- a/vision/mnist/cmd_parser.jl
+++ /dev/null
@@ -1,32 +0,0 @@
-module CmdParser
-export parse_commandline
-using ArgParse
-
-# Argument parsing
-function parse_commandline()
-    s = ArgParseSettings()
-    @add_arg_table s begin
-        "--epochs","-e"
-            help = "epoch number, default=100"
-            arg_type = Int
-            default = 100
-        "--batch","-b"
-            help = "mini-batch size, default=128"
-            arg_type = Int
-            default = 128
-        "--gpu","-g"
-            help = "gpu index to use, 0,1,2,3,..., default=0"
-            arg_type = Int
-            default = 0
-        "--model","-m"
-            help = "use saved model file, default=true"
-            arg_type = Bool
-            default = true
-        "--log","-l"
-            help = "create log file, default=true"
-            arg_type = Bool
-            default = false
-    end
-    return parse_args(s)
-end
-end
\ No newline at end of file

From ba14cdbc6c815d0a8e23455ad4687e90460c4d69 Mon Sep 17 00:00:00 2001
From: Sungho Park <mrchaos@naver.com>
Date: Fri, 21 Feb 2020 10:49:43 +0900
Subject: [PATCH 16/18] Delete cmd_parser.jl

---
 text/char-rnn/cmd_parser.jl | 36 ------------------------------------
 1 file changed, 36 deletions(-)
 delete mode 100644 text/char-rnn/cmd_parser.jl

diff --git a/text/char-rnn/cmd_parser.jl b/text/char-rnn/cmd_parser.jl
deleted file mode 100644
index ba7603a7e..000000000
--- a/text/char-rnn/cmd_parser.jl
+++ /dev/null
@@ -1,36 +0,0 @@
-module CmdParser
-export parse_commandline
-using ArgParse
-
-# Argument parsing
-function parse_commandline()
-    s = ArgParseSettings()
-    @add_arg_table! s begin
-        "--epochs","-e"
-            help = "epoch number, default=100"
-            arg_type = Int
-            default = 100
-        "--batch","-b"
-            help = "mini-batch size, default=128"
-            arg_type = Int
-            default = 50
-        "--seq","-s"
-            help = "sequence size, default=50"
-            arg_type = Int
-            default = 50            
-        "--gpu","-g"
-            help = "gpu index to use, 0,1,2,3,..., default=0"
-            arg_type = Int
-            default = 1
-        "--model","-m"
-            help = "use saved model file, default=true"
-            arg_type = Bool
-            default = true
-        "--log","-l"
-            help = "create log file, default=true"
-            arg_type = Bool
-            default = false
-    end
-    return parse_args(s)
-end
-end
\ No newline at end of file

From 196976bf175a6eb96ae7ed89441873d7f5abf521 Mon Sep 17 00:00:00 2001
From: Sungho Park <mrchaos@naver.com>
Date: Fri, 21 Feb 2020 11:36:49 +0900
Subject: [PATCH 17/18] Update char_rnn_gpu_minibatch.jl

---
 text/char-rnn/char_rnn_gpu_minibatch.jl | 142 ++++--------------------
 1 file changed, 19 insertions(+), 123 deletions(-)

diff --git a/text/char-rnn/char_rnn_gpu_minibatch.jl b/text/char-rnn/char_rnn_gpu_minibatch.jl
index 4c1de0c1e..8327afc16 100644
--- a/text/char-rnn/char_rnn_gpu_minibatch.jl
+++ b/text/char-rnn/char_rnn_gpu_minibatch.jl
@@ -1,5 +1,3 @@
-using BSON
-using BSON: @save,@load
 using Flux
 using Flux: onehot, chunk, batchseq, throttle, crossentropy
 using StatsBase: wsample
@@ -7,64 +5,26 @@ using Base.Iterators: partition
 using CuArrays
 using CUDAnative: device!
 using Random
-using Dates
-using Logging
 
 ϵ = 1.0f-32
-working_path = dirname(@__FILE__)
-file_path(file_name) = joinpath(working_path,file_name)
-include(file_path("cmd_parser.jl"))
-
-model_file = file_path("char_rnn_gpu_minibatch.bson")
-
-# # Get arguments
-parsed_args = CmdParser.parse_commandline()
-epochs = parsed_args["epochs"]
-batch_size = parsed_args["batch"]
-use_saved_model = parsed_args["model"]
-gpu_device = parsed_args["gpu"]
-create_log_file = parsed_args["log"]
-sequence = parsed_args["seq"]
-
-if create_log_file
-  log_file ="./char_rnn_gpu_minibatch_$(Dates.format(now(),"yyyymmdd-HHMMSS")).log"
-  log = open(log_file, "w+")
-else
-  log = stdout
-end
-global_logger(ConsoleLogger(log))
-
-start_time = now()
-@info "Start - $(start_time)";flush(log)
-@info "=============== Arguments ==============="
-@info "epochs=$(epochs)"
-@info "batch_size=$(batch_size)"
-@info "use_saved_model=$(use_saved_model)"
-@info "gpu_device=$(gpu_device)"
-@info "sequence=$(sequence)"
-@info "log_file=$(create_log_file)"
-@info "=========================================";flush(log)
 
+# Get arguments
+epochs = 2
+batch_size = 50
+sequence = 50
+gpu_device = 0
 
 device!(gpu_device)
 CuArrays.allowscalar(false)
 
-input_file = file_path("input.txt")
+input_file = joinpath(dirname(@__FILE__),"input.txt")
+
 isfile(input_file) ||
     download("https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt",
              input_file)
 
-# read(input_file) : 파일에서 텍스트 읽오옴 - 바이너리
-# String(read(input_file)) : 바이너리를 스트링으로 변환
-# collect(String(read(input_file)) : 스트링을 개별 char array로 변환 - Array{Char,1}
 text = collect(String(read(input_file)))
-
-# unique(text) : text에서 unique한 char array를 만든다 - 중복제거 - 하고
-# 맨뒤에 '_' 를 추가 한다.
-# unique한 char -알파벳 array를 만든다.
 alphabet = [unique(text)...,'_']
-# ch onehot을 만든다. onhot의 길이는 length(alphabet)이고 onehot에서 1이 있는 위치는
-# alphabet에서 ch가 있는 위치와 동일
 text = map(ch -> Float32.(onehot(ch,alphabet)),text)
 stop = Float32.(onehot('_',alphabet))
 
@@ -77,96 +37,38 @@ txt = circshift(text,-1)
 txt[end] = stop
 Ys = collect(partition(batchseq(chunk(txt, nbatch), stop), seqlen))
 
-vloss=Inf; epoch = 0; t_sec = Second(0);
-if use_saved_model && isfile(model_file) && filesize(model_file) > 0
-  # flush : 버퍼링 없이 즉각 log를 파일 또는 console에 write하도록 함
-  @info "Load saved model $(model_file) ...";flush(log)
-  # model : @save시 사용한 object명
-  @load model_file model vloss epoch sec
-  t_sec = sec 
-  m = model |> gpu
-  run_min = round(Second(t_sec), Minute)
-  @info " -> loss : $(vloss), epochs : $(epoch), run time : $(run_min)";flush(log)
-else
-  @info "Create new model ...";flush(log)  
-  model = Chain(
-    LSTM(N, 128),
-    LSTM(128, 256),
-    LSTM(256, 128),
-    Dense(128, N),
-    softmax)
-    m = model |>gpu
-end
+model = Chain(
+  LSTM(N, 128),
+  LSTM(128, 256),
+  LSTM(256, 128),
+  Dense(128, N),
+  softmax)
+  m = model |>gpu
 
 opt = ADAM(0.01)
-tx, ty = (Xs[1]|>gpu, Ys[1]|>gpu)
+tx, ty = (Xs[5]|>gpu, Ys[5]|>gpu)
 
 function loss(xx, yy)
   out = 0.0f0
   for (idx, x) in enumerate(xx)
     out += crossentropy(m(x) .+ ϵ, yy[idx])
-  end  
+  end
   Flux.reset!(m)
   out
 end
 
-@info "Training model...";flush(log)
-
 idxs = length(Xs)
-best_loss = vloss
-last_improvement = epoch
-epoch_start_time = now() 
-epochs += epoch
-epoch += 1
-for epoch_idx in epoch:epochs
-  global best_loss,last_improvement,t_sec,epoch_start_time
-  mean_loss = 0.0f0
+for epoch_idx in 1:epochs
   for (idx,(xs,ys)) in enumerate(zip(Xs, Ys))
     Flux.train!(loss, params(m), [(xs|>gpu,ys|>gpu)], opt)
     lss = loss(tx,ty)
-    mean_loss += lss
     if idx % 10 == 0
-      @info "epoch# $(epoch_idx)/$(epochs)-$(idx)/$(idxs) loss = $(lss)";flush(log)
+      @info "epoch# $(epoch_idx)/$(epochs)-$(idx)/$(idxs) loss = $(lss)"
     end
   end
-  mean_loss /= idxs
-
-  run_sec = round(Millisecond(now()-epoch_start_time), Second)
-  run_min = round(Second(run_sec), Minute)
-  t_run_min = round(Second(t_sec+run_sec), Minute)
-  @info "epoch# $(epoch_idx)/$(epochs)-> mean loss : $(mean_loss), running time : $(run_min)/$(t_run_min)";flush(log)
-  
-  # If this is the best accuracy we've seen so far, save the model out
-  if mean_loss <= best_loss
-    @info " -> New best loss! saving model out to $(model_file)"; flush(log)
-    model = m |> cpu
-    vloss = mean_loss;epoch = epoch_idx; sec = t_sec + run_sec
-    # @save,@load 시 같은 이름을 사용해야 함, 여기서는 "model"을 사용함
-    @save model_file model vloss epoch sec
-    best_loss = mean_loss
-    last_improvement = epoch_idx    
-  end
-
-  # If we haven't seen improvement in 5 epochs, drop out learning rate:
-  if epoch_idx - last_improvement >= 5 && opt.eta > 1e-6
-    opt.eta /= 10.0
-    @info " -> Haven't improved in a while, dropping learning rate to $(opt.eta)!";flush(log)
-    # After dropping learning rate, give it a  few epochs to improve
-    last_improvement = epoch_idx
-  end  
-
-  if epoch_idx - last_improvement >= 10  
-    @info " -> We're calling this converged."; flush(log)
-    break
-  end  
 end
-end_time = now()
-@info "End - $(end_time)";flush(log)
-run_min = round(round(Millisecond(end_time - start_time), Second),Minute)
-@info "Running time : $(run_min)";flush(log)
 
 # Sampling
-
 function sample(m, alphabet, len)
   m = cpu(m)
   Flux.reset!(m)
@@ -178,10 +80,4 @@ function sample(m, alphabet, len)
   end
   return String(take!(buf))
 end
-
-@info sample(m, alphabet, 1000);flush(log)
-
-if create_log_file
-  close(log)
-end
-
+@info sample(m, alphabet, 1000)

From f2ea384598dd29d13f2fe9244a2433709a90fe2b Mon Sep 17 00:00:00 2001
From: Sungho Park <mrchaos@naver.com>
Date: Fri, 21 Feb 2020 11:37:27 +0900
Subject: [PATCH 18/18] Update char_rnn_gpu_minibatch.jl

update
---
 text/char-rnn/char_rnn_gpu_minibatch.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/text/char-rnn/char_rnn_gpu_minibatch.jl b/text/char-rnn/char_rnn_gpu_minibatch.jl
index 8327afc16..d0a91a4a3 100644
--- a/text/char-rnn/char_rnn_gpu_minibatch.jl
+++ b/text/char-rnn/char_rnn_gpu_minibatch.jl
@@ -8,7 +8,6 @@ using Random
 
 ϵ = 1.0f-32
 
-# Get arguments
 epochs = 2
 batch_size = 50
 sequence = 50