diff --git a/examples/atomistic/compare-samplers-iso17.jl b/examples/atomistic/compare-samplers-iso17.jl index cb16271..ae662b3 100644 --- a/examples/atomistic/compare-samplers-iso17.jl +++ b/examples/atomistic/compare-samplers-iso17.jl @@ -11,6 +11,7 @@ include("utils/aux_sample_functions.jl") include("utils/plots.jl") include("utils/plotmetrics.jl") include("utils/atom-conf-features-extxyz.jl") +include("utils/subtract_peratom_e.jl") # Data ######################################################################### @@ -31,6 +32,17 @@ for (system, energy, forces) in chunk end confs = DataSet(confs) +# For ISO17, all confs have the same number of atoms +num_atoms = length(get_system(ds_train[1])) +all_energies = get_all_energies(confs) +avg_energy_per_atoms = mean(all_energies)/num_atoms +vref_dict = Dict(:H => avg_energy_per_atom, + :C => avg_energy_per_atom, + :O => avg_energy_per_atom) + +# This will permanently change the energies in the entire dataset !!! +adjust_energies(confs,vref_dict) + # Define basis basis = ACE(species = [:C, :O, :H], body_order = 4, @@ -41,12 +53,13 @@ basis = ACE(species = [:C, :O, :H], rcutoff = 4.4 ); # Update training dataset by adding energy and force descriptors -println("Computing energy descriptors of dataset...") -B_time = @elapsed e_descr = compute_local_descriptors(confs, basis) -println("Computing force descriptors of dataset...") -dB_time = @elapsed f_descr = compute_force_descriptors(confs, basis) -GC.gc() -ds_train = DataSet(confs .+ e_descr .+ f_descr) +#println("Computing energy descriptors of dataset...") +#B_time = @elapsed e_descr = compute_local_descriptors(confs, basis) +#println("Computing force descriptors of dataset...") +#dB_time = @elapsed f_descr = compute_force_descriptors(confs, basis) +#GC.gc() +#ds_train = DataSet(confs .+ e_descr .+ f_descr) +ds_train = deepcopy(confs) # Load test atomistic configurations (random subset of size N) M = 100 @@ -61,13 +74,16 @@ for (system, energy, forces) in chunk end confs = DataSet(confs) +# Note, I don't modify the test set energies !!! + # Update test dataset by adding energy and force descriptors -println("Computing energy descriptors of dataset...") -B_time = @elapsed e_descr = compute_local_descriptors(confs, basis) -println("Computing force descriptors of dataset...") -dB_time = @elapsed f_descr = compute_force_descriptors(confs, basis) -GC.gc() -ds_test = DataSet(confs .+ e_descr .+ f_descr) +#println("Computing energy descriptors of dataset...") +#B_time = @elapsed e_descr = compute_local_descriptors(confs, basis) +#println("Computing force descriptors of dataset...") +#dB_time = @elapsed f_descr = compute_force_descriptors(confs, basis) +#GC.gc() +#ds_test = DataSet(confs .+ e_descr .+ f_descr) +ds_test = deepcopy(confs) # Sampling experiments ######################################################### @@ -111,7 +127,7 @@ for j in 1:n_experiments for batch_size_prop in batch_size_props for sampler in samplers sample_experiment!(res_path, j, sampler, batch_size_prop, n_train, - ged_mat, ds_train_rnd, ds_test_rnd, basis, metrics) + ged_mat, ds_train_rnd, ds_test_rnd, basis, metrics; vref_dict=vref_dict) GC.gc() end end diff --git a/examples/atomistic/utils/aux_sample_functions.jl b/examples/atomistic/utils/aux_sample_functions.jl index 406de21..ac45c59 100644 --- a/examples/atomistic/utils/aux_sample_functions.jl +++ b/examples/atomistic/utils/aux_sample_functions.jl @@ -1,24 +1,31 @@ # Functions for performing sampling comparisons +include("./subtract_peratom_e.jl") # Fit function used to get errors based on sampling -function fit(path, ds_train, ds_test, basis) +function fit(path, ds_train, ds_test, basis; vref_dict=nothing) # Learn lb = PotentialLearning.LBasisPotential(basis) - ws, int = [30.0, 1.0], true - learn!(lb, ds_train, ws, int) + # no intercept , out-of-core formulation of linear learn + ws = [30.0, 1.0] + _AtWA, _AtWb = PotentialLearning.ooc_learn!(lb, ds_train;ws=ws,symmetrize=false, λ=0.01) @save_var path lb.β - @save_var path lb.β0 # Post-process output: calculate metrics, create plots, and save results ####### # Get true and predicted values n_atoms_train = length.(get_system.(ds_train)) n_atoms_test = length.(get_system.(ds_test)) - - e_train, e_train_pred = get_all_energies(ds_train) ./ n_atoms_train, - get_all_energies(ds_train, lb) ./ n_atoms_train + + # training energies were permanently modified, so need special function to recover original energies and predicted energies + if !isnothing(vref_dict) + e_train = get_all_energies_w_onebody(ds_train,vref_dict) ./n_atoms_train + e_train_pred = get_all_energies_w_onebody(ds_train,lb,vref_dict) ./n_atoms_train + else + e_train, e_train_pred = get_all_energies(ds_train) ./ n_atoms_train, + get_all_energies(ds_train, lb) ./ n_atoms_train + end f_train, f_train_pred = get_all_forces(ds_train), get_all_forces(ds_train, lb) @save_var path e_train @@ -26,8 +33,15 @@ function fit(path, ds_train, ds_test, basis) @save_var path f_train @save_var path f_train_pred - e_test, e_test_pred = get_all_energies(ds_test) ./ n_atoms_test, - get_all_energies(ds_test, lb) ./ n_atoms_test + # test energies were *not* permanently modified, so only need special function update predicted energies + if !isnothing(vref_dict) + e_test = get_all_energies(ds_test) ./ n_atoms_test + e_test_pred = get_all_energies_w_onebody(ds_test,lb,vref_dict) ./n_atoms_test + else + e_test, e_test_pred = get_all_energies(ds_test) ./ n_atoms_test, + get_all_energies(ds_test, lb) ./ n_atoms_test + end + f_test, f_test_pred = get_all_forces(ds_test), get_all_forces(ds_test, lb) @save_var path e_test @@ -84,7 +98,7 @@ end # Main sample experiment function function sample_experiment!(res_path, j, sampler, batch_size_prop, n_train, - ged_mat, ds_train_rnd, ds_test_rnd, basis, metrics) + ged_mat, ds_train_rnd, ds_test_rnd, basis, metrics; vref_dict=nothing) try print("Experiment:$j, method:$sampler, batch_size_prop:$batch_size_prop") exp_path = "$res_path/$j-$sampler-bsp$batch_size_prop/" @@ -93,7 +107,7 @@ function sample_experiment!(res_path, j, sampler, batch_size_prop, n_train, sampling_time = @elapsed begin inds = sampler(ged_mat, batch_size) end - metrics_j = fit(exp_path, (@views ds_train_rnd[Int64.(inds)]), ds_test_rnd, basis) + metrics_j = fit(exp_path, (@views ds_train_rnd[Int64.(inds)]), ds_test_rnd, basis; vref_dict=vref_dict) metrics_j = merge(OrderedDict("exp_number" => j, "method" => "$sampler", "batch_size_prop" => batch_size_prop, diff --git a/examples/atomistic/utils/subtract_peratom_e.jl b/examples/atomistic/utils/subtract_peratom_e.jl new file mode 100644 index 0000000..633cc0e --- /dev/null +++ b/examples/atomistic/utils/subtract_peratom_e.jl @@ -0,0 +1,56 @@ +using PotentialLearning + +function subtract_peratom_e(config::Configuration, vref_dict) + orig_e = get_energy(config) + e_unit = orig_e.u + new_e = get_values(orig_e) + for atom in get_system(config).particles + species = atom.atomic_symbol + new_e -= vref_dict[species] + end + + Energy(new_e,e_unit) +end + +function adjust_energies(ds, vref_dict) + for config in ds + new_energy = subtract_peratom_e(config,vref_dict) + config.data[Energy] = new_energy + end +end + +function get_all_energies_w_onebody( + ds::DataSet, + bp::BasisPotential, + vref_dict +) + energies = Float64[] + for config in ds + energy = PotentialLearning.potential_energy(config, bp) + for atom in get_system(config).particles + species = atom.atomic_symbol + energy += vref_dict[species] + end + push!(energies, energy) + end + + return energies +end + +function get_all_energies_w_onebody( + ds::DataSet, + vref_dict +) + energies = Float64[] + for config in ds + energy = get_values(get_energy(config)) + for atom in get_system(config).particles + species = atom.atomic_symbol + energy += vref_dict[species] + end + push!(energies, energy) + end + + return energies +end +