-
-
Couldn't load subscription status.
- Fork 615
Closed
Description
So context is simple. I train a model in Fluxml using my GPU (NVIDIA GeForce RTX 3080 notebook), I save the model states using JLD2, all good until here. But when I try to load the model using saved states in this manner I get the error:
julia> using Flux, JLD2, CUDA
julia> include("src/utilities/cfg_parse.jl")
parse_terminal_args (generic function with 1 method)
julia> include("src/nets/net_build.jl")
admm_restoration_model (generic function with 1 method)
julia> cfg = fetch_json_data("train_cfg.json")
Dict{String, Any} with 9 entries:
"epochs" => 130
"lr_rate" => 0.0004
"im_shape" => Any[256, 256]
"use_iso" => true
"branches" => 2
"model_save_path" => "/models_weights"
"train_data" => Dict{String, Any}("x_path"=>"D:/Projects/ISETC2022/dcnn-deblur/dataset/GOPRO_Large/xt_256_0p8blur_10noise", "y_path"=>"D:/Projects/ISETC2022/dcnn-deblur/dataset/GOPRO_Large/xt_256_0p8blur_10noise")
"batch_size" => 3
"eval_data" => Dict{String, Any}("x_path"=>"D:/Projects/ISETC2022/dcnn-deblur/dataset/GOPRO_Large/xt_256_0p8blur_10noise", "y_path"=>"D:/Projects/ISETC2022/dcnn-deblur/dataset/GOPRO_Large/xt_256_0p8blur_10noise")
julia> model = admm_restoration_model(cfg)
MODEL SIZE (#parameters): 3581088Chain(
Parallel(
chcat,
Chain(
ADMMDeconv{typeof(relu6), Array{Float32, 4}, Vector{Float32}, Bool, Vector{Float32}, Int64, Bool, Float32}(NNlib.relu6, Float32[0.035917997 0.079589315 … 0.06321434 -0.08105426; 0.010757283 -0.061283972 … 0.040873725 -0.11001465; … ; 0.07982061 -0.09194836 … 0.091350466 -0.14958367; -0.053058878 0.097258545 … 0.1495896 -0.14328365;;;;], false, Float32[0.0006751783], Float32[0.5695928], 50, true, 0.0f0), # 102 parameters
ConvTranspose((38, 38), 3 => 18), # 77_994 parameters
Conv((19, 19), 18 => 18), # 116_982 parameters
AdaptiveMaxPool((256, 256)),
BatchNorm(18, relu6), # 36 parameters, plus 36
ConvTranspose((20, 20), 18 => 32), # 230_432 parameters
Conv((10, 10), 32 => 32), # 102_432 parameters
AdaptiveMaxPool((256, 256)),
BatchNorm(32, relu6), # 64 parameters, plus 64
ConvTranspose((16, 16), 32 => 64), # 524_352 parameters
Conv((8, 8), 64 => 64), # 262_208 parameters
AdaptiveMaxPool((256, 256)),
BatchNorm(64, relu6), # 128 parameters, plus 128
ConvTranspose((16, 16), 64 => 64), # 1_048_640 parameters
Conv((8, 8), 64 => 64), # 262_208 parameters
AdaptiveMaxPool((256, 256)),
BatchNorm(64, relu6), # 128 parameters, plus 128
),
Chain(
ADMMDeconv{typeof(relu6), Array{Float32, 4}, Vector{Float32}, Bool, Vector{Float32}, Int64, Bool, Float32}(NNlib.relu6, Float32[0.075822905 -0.050852973 … 0.08122373 -0.039612506; 0.026294839 -0.009715072 … 0.03403802 0.015126286; … ; 0.052702498 -0.0404368
… 0.037942544 -0.005757671; 0.08515987 -0.02476077 … 0.06367684 -0.004382413;;;;], false, Float32[1.0818578], Float32[0.14859931], 50, true, 0.0f0), # 402 parameters
ConvTranspose((38, 38), 3 => 3), # 12_999 parameters
BatchNorm(3, relu6), # 6 parameters, plus 6
ADMMDeconv{typeof(relu6), Array{Float32, 4}, Vector{Float32}, Bool, Vector{Float32}, Int64, Bool, Float32}(NNlib.relu6, Float32[-0.020457862 0.124111876 … -0.096539654 0.029231917; 0.13135242 0.052027464 … 0.024933446 -0.14350384; … ; 0.15128526 0.010382508
… -0.050241567 -0.096333385; -0.030062137 0.0784706 … -0.029577373 0.13084307;;;;], false, Float32[0.034583375], Float32[1.2101591], 50, true, 0.0f0), # 102 parameters
Conv((19, 19), 3 => 18), # 19_512 parameters
BatchNorm(18, relu6), # 36 parameters, plus 36
Conv((10, 10), 18 => 18), # 32_418 parameters
BatchNorm(18, relu6), # 36 parameters, plus 36
Conv((8, 8), 18 => 18), # 20_754 parameters
BatchNorm(18, relu6), # 36 parameters, plus 36
AdaptiveMaxPool((256, 256)),
ConvTranspose((16, 16), 18 => 18), # 82_962 parameters
BatchNorm(18, relu6), # 36 parameters, plus 36
ConvTranspose((20, 20), 18 => 32), # 230_432 parameters
BatchNorm(32, relu6), # 64 parameters, plus 64
ConvTranspose((16, 16), 32 => 64), # 524_352 parameters
BatchNorm(64, relu6), # 128 parameters, plus 128
AdaptiveMaxPool((256, 256)),
),
),
ConvTranspose((9, 9), 128 => 3, relu6), # 31_107 parameters
AdaptiveMaxPool((256, 256)),
) # Total: 63 trainable arrays, 3_581_088 parameters,
# plus 22 non-trainable, 698 parameters, summarysize 13.679 MiB.
julia> model_state = JLD2.load("D:/Projects/admm-deconv/trained_models/plm/plm-ep_4-vloss_0.4733-psnr_5.8388-mse_0.2607.jld2", "model_state");
julia> Flux.loadmodel!(model, model_state)
ERROR: CUDA error: invalid device context (code 201, ERROR_INVALID_CONTEXT)
Stacktrace:
[1] throw_api_error(res::CUDA.cudaError_enum)
@ CUDA C:\Users\George\.julia\packages\CUDA\75aiI\lib\cudadrv\libcuda.jl:30
[2] isvalid(ctx::CuContext)
@ CUDA C:\Users\George\.julia\packages\CUDA\75aiI\lib\cudadrv\context.jl:75
[3] #context!#990
@ C:\Users\George\.julia\packages\CUDA\75aiI\lib\cudadrv\state.jl:165 [inlined]
[4] context!
@ C:\Users\George\.julia\packages\CUDA\75aiI\lib\cudadrv\state.jl:163 [inlined]
[5] unsafe_copyto!(dest::Vector{Float32}, doffs::Int64, src::CuArray{Float32, 1, CUDA.DeviceMemory}, soffs::Int64, n::Int64)
@ CUDA C:\Users\George\.julia\packages\CUDA\75aiI\src\array.jl:550
[6] copyto!
@ C:\Users\George\.julia\packages\CUDA\75aiI\src\array.jl:503 [inlined]
[7] copyto!
@ C:\Users\George\.julia\packages\CUDA\75aiI\src\array.jl:507 [inlined]
[8] loadleaf!(dst::Vector{Float32}, src::CuArray{Float32, 1, CUDA.DeviceMemory})
@ Flux C:\Users\George\.julia\packages\Flux\CUn7U\src\loading.jl:22
[9] loadmodel!(dst::ADMMDeconv{typeof(relu6), Array{…}, Vector{…}, Bool, Vector{…}, Int64, Bool, Float32}, src::@NamedTuple{weight::CuArray{…}, bias::Bool, λ::CuArray{…}, ρ::CuArray{…}}; filter::Function, cache::Base.IdSet{Any})
@ Flux C:\Users\George\.julia\packages\Flux\CUn7U\src\loading.jl:103
[10] loadmodel!(dst::Tuple{…}, src::Tuple{…}; filter::Function, cache::Base.IdSet{…})
@ Flux C:\Users\George\.julia\packages\Flux\CUn7U\src\loading.jl:105
[11] loadmodel!(dst::Chain{Tuple{…}}, src::@NamedTuple{layers::Tuple{…}}; filter::Function, cache::Base.IdSet{Any})
@ Flux C:\Users\George\.julia\packages\Flux\CUn7U\src\loading.jl:105
[12] loadmodel!(dst::Tuple{Chain{Tuple{…}}, Chain{Tuple{…}}}, src::Tuple{@NamedTuple{layers::Tuple{…}}, @NamedTuple{layers::Tuple{…}}}; filter::Function, cache::Base.IdSet{Any})
@ Flux C:\Users\George\.julia\packages\Flux\CUn7U\src\loading.jl:105
[13] loadmodel!(dst::Parallel{typeof(chcat), Tuple{Chain{Tuple{…}}, Chain{Tuple{…}}}}, src::@NamedTuple{connection::Tuple{}, layers::Tuple{@NamedTuple{layers::Tuple{…}}, @NamedTuple{layers::Tuple{…}}}}; filter::Function, cache::Base.IdSet{Any})
@ Flux C:\Users\George\.julia\packages\Flux\CUn7U\src\loading.jl:105
[14] loadmodel!(dst::Tuple{Parallel{…}, ConvTranspose{…}, AdaptiveMaxPool{…}}, src::Tuple{@NamedTuple{…}, @NamedTuple{…}, Tuple{}}; filter::Function, cache::Base.IdSet{Any})
@ Flux C:\Users\George\.julia\packages\Flux\CUn7U\src\loading.jl:105
[15] loadmodel!(dst::Chain{Tuple{Parallel{…}, ConvTranspose{…}, AdaptiveMaxPool{…}}}, src::@NamedTuple{layers::Tuple{@NamedTuple{…}, @NamedTuple{…}, Tuple{}}}; filter::Function, cache::Base.IdSet{Any})
@ Flux C:\Users\George\.julia\packages\Flux\CUn7U\src\loading.jl:105
[16] loadmodel!(dst::Chain{Tuple{Parallel{…}, ConvTranspose{…}, AdaptiveMaxPool{…}}}, src::@NamedTuple{layers::Tuple{@NamedTuple{…}, @NamedTuple{…}, Tuple{}}})
@ Flux C:\Users\George\.julia\packages\Flux\CUn7U\src\loading.jl:90
[17] top-level scope
@ REPL[7]:1
Some type information was truncated. Use `show(err)` to see complete types.
Note that here I use a custom made layer. Thought that might be because of it. I tried training a model without this custom layer, using only base ones. Still, the same error persists
Metadata
Metadata
Assignees
Labels
No labels