Skip to content

Clean/efficient way to recover R2 and other parameters in the post-processing from Script "Q10_parallel_with_threads.jl" #174

@A-Tangarife

Description

@A-Tangarife

I am working in the EasyQ10 project. After succesfully training the Fluxnet sites it will be quite useful to have a dataframe to share with at least the next information (maybe additional parameters could be useful for plotting or additional analysis):

    site = site,
    Q10 = q10,
    MAT = MAT,
    Rb_mean = Rb_mean,
    RUE_mean = RUE_mean,
    GPP_mean = GPP_mean,
    GPP_median = GPP_median,
    RECO_mean = RECO_mean,
    R2 = r2,
    RUE_default = parameters.RUE[1],
    RUE_lower   = parameters.RUE[2],
    RUE_upper   = parameters.RUE[3],
    Rb_default  = parameters.Rb[1],
    Rb_lower    = parameters.Rb[2],
    Rb_upper    = parameters.Rb[3],
    Q10_default = parameters.Q10[1],
    Q10_lower   = parameters.Q10[2],
    Q10_upper   = parameters.Q10[3]

The current problem lies in the recovery of the R2 which is done through the nse of the validation loss as far as I understand.
Is there any way to improve the recovery of data and creation of this dataframe?

The current post-processing code I have is the next

=============================================================================

Post-processing / forward run (on master)

=============================================================================

Load one site’s best model (consistent path with training)

using Pkg
#import Pkg; Pkg.add("CSV")
#import Pkg; Pkg.add("DataFrames")
#import Pkg; Pkg.add("Statistics")

using CSV, Statistics, DataFrames
@info "Starting post-processing..."

dfQ10 = DataFrame() # initialize empty DataFrame

output_file = joinpath(output_folder, "$(site)", "trained_model.jld2")

all_groups = get_all_groups(output_file)

obs = load_group(output_file, :observations)
keys(obs["training"])

preds = load_group(output_file, :predictions)
keys(preds["training"])
preds["training"].parameters

vloss = load_group(output_file, :validation_loss)
tloss = load_group(output_file, :training_loss)

[1 - tloss[1][i].nse.NEE for i in 1:length(tloss[1])]
maximum([1 - vloss[1][i].nse.NEE for i in 1:length(vloss[1])])

for site in selected_sites
output_file = joinpath(output_folder, "$(site)", "trained_model.jld2")

@show output_file
if !isfile(output_file)
    @warn "Output file does not exist for site $site, skipping."
    continue
end
all_groups = get_all_groups(output_file)
preds = load_group(output_file, :predictions)

fluxnet_data = load_fluxnet_nc(joinpath(data_dir, "$site.nc"); timevar="date")
df = fluxnet_data.timeseries

q10 = preds["training"].Q10[1]
MAT = mean(skipmissing(df.TA))

Rb_mean  = haskey(preds["training"], :Rb)  ? mean(skipmissing(preds["training"].Rb))  : missing
RUE_mean = haskey(preds["training"], :RUE) ? mean(skipmissing(preds["training"].RUE)) : missing
GPP_mean   = haskey(preds["training"], :GPP)  ? mean(skipmissing(preds["training"].GPP))  : missing
GPP_median = haskey(preds["training"], :GPP)  ? median(skipmissing(preds["training"].GPP)) : missing
RECO_mean  = haskey(preds["training"], :RECO) ? mean(skipmissing(preds["training"].RECO)) : missing
# -----------------------------------



push!(dfQ10, (
    site = site,
    Q10 = q10,
    MAT = MAT,
    Rb_mean = Rb_mean,
    RUE_mean = RUE_mean,
    GPP_mean = GPP_mean,
    GPP_median = GPP_median,
    RECO_mean = RECO_mean,
    R2 = r2,
    RUE_default = parameters.RUE[1],
    RUE_lower   = parameters.RUE[2],
    RUE_upper   = parameters.RUE[3],
    Rb_default  = parameters.Rb[1],
    Rb_lower    = parameters.Rb[2],
    Rb_upper    = parameters.Rb[3],
    Q10_default = parameters.Q10[1],
    Q10_lower   = parameters.Q10[2],
    Q10_upper   = parameters.Q10[3],
))

end

Save the summary dataframe to a CSV file

csv_file = joinpath(output_folder, "training_summary.csv")
CSV.write(csv_file, dfQ10)
@info "Saved summary file: $csv_file"

============PLOTTING =================================================================

include(joinpath(EasyQ10_path, "plotting", "plot_Q10_vs_MAT.jl"))
fig = plot_Q10_vs_MAT(dfQ10, 3.5, ylabel="Q₁₀, from hybrid NEE partitioning", k = 3)
display(fig)
save(joinpath(output_folder, "Q10_vs_MAT.png"), fig)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions