Skip to content

Sw/iso17 ready #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 29 additions & 13 deletions examples/atomistic/compare-samplers-iso17.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ include("utils/aux_sample_functions.jl")
include("utils/plots.jl")
include("utils/plotmetrics.jl")
include("utils/atom-conf-features-extxyz.jl")
include("utils/subtract_peratom_e.jl")

# Data #########################################################################

Expand All @@ -31,6 +32,17 @@ for (system, energy, forces) in chunk
end
confs = DataSet(confs)

# For ISO17, all confs have the same number of atoms
num_atoms = length(get_system(ds_train[1]))
all_energies = get_all_energies(confs)
avg_energy_per_atoms = mean(all_energies)/num_atoms
vref_dict = Dict(:H => avg_energy_per_atom,
:C => avg_energy_per_atom,
:O => avg_energy_per_atom)

# This will permanently change the energies in the entire dataset !!!
adjust_energies(confs,vref_dict)

# Define basis
basis = ACE(species = [:C, :O, :H],
body_order = 4,
Expand All @@ -41,12 +53,13 @@ basis = ACE(species = [:C, :O, :H],
rcutoff = 4.4 );

# Update training dataset by adding energy and force descriptors
println("Computing energy descriptors of dataset...")
B_time = @elapsed e_descr = compute_local_descriptors(confs, basis)
println("Computing force descriptors of dataset...")
dB_time = @elapsed f_descr = compute_force_descriptors(confs, basis)
GC.gc()
ds_train = DataSet(confs .+ e_descr .+ f_descr)
#println("Computing energy descriptors of dataset...")
#B_time = @elapsed e_descr = compute_local_descriptors(confs, basis)
#println("Computing force descriptors of dataset...")
#dB_time = @elapsed f_descr = compute_force_descriptors(confs, basis)
#GC.gc()
#ds_train = DataSet(confs .+ e_descr .+ f_descr)
ds_train = deepcopy(confs)

# Load test atomistic configurations (random subset of size N)
M = 100
Expand All @@ -61,13 +74,16 @@ for (system, energy, forces) in chunk
end
confs = DataSet(confs)

# Note, I don't modify the test set energies !!!

# Update test dataset by adding energy and force descriptors
println("Computing energy descriptors of dataset...")
B_time = @elapsed e_descr = compute_local_descriptors(confs, basis)
println("Computing force descriptors of dataset...")
dB_time = @elapsed f_descr = compute_force_descriptors(confs, basis)
GC.gc()
ds_test = DataSet(confs .+ e_descr .+ f_descr)
#println("Computing energy descriptors of dataset...")
#B_time = @elapsed e_descr = compute_local_descriptors(confs, basis)
#println("Computing force descriptors of dataset...")
#dB_time = @elapsed f_descr = compute_force_descriptors(confs, basis)
#GC.gc()
#ds_test = DataSet(confs .+ e_descr .+ f_descr)
ds_test = deepcopy(confs)


# Sampling experiments #########################################################
Expand Down Expand Up @@ -111,7 +127,7 @@ for j in 1:n_experiments
for batch_size_prop in batch_size_props
for sampler in samplers
sample_experiment!(res_path, j, sampler, batch_size_prop, n_train,
ged_mat, ds_train_rnd, ds_test_rnd, basis, metrics)
ged_mat, ds_train_rnd, ds_test_rnd, basis, metrics; vref_dict=vref_dict)
GC.gc()
end
end
Expand Down
36 changes: 25 additions & 11 deletions examples/atomistic/utils/aux_sample_functions.jl
Original file line number Diff line number Diff line change
@@ -1,33 +1,47 @@
# Functions for performing sampling comparisons

include("./subtract_peratom_e.jl")

# Fit function used to get errors based on sampling
function fit(path, ds_train, ds_test, basis)
function fit(path, ds_train, ds_test, basis; vref_dict=nothing)

# Learn
lb = PotentialLearning.LBasisPotential(basis)
ws, int = [30.0, 1.0], true
learn!(lb, ds_train, ws, int)
# no intercept , out-of-core formulation of linear learn
ws = [30.0, 1.0]
_AtWA, _AtWb = PotentialLearning.ooc_learn!(lb, ds_train;ws=ws,symmetrize=false, λ=0.01)
@save_var path lb.β
@save_var path lb.β0

# Post-process output: calculate metrics, create plots, and save results #######

# Get true and predicted values
n_atoms_train = length.(get_system.(ds_train))
n_atoms_test = length.(get_system.(ds_test))

e_train, e_train_pred = get_all_energies(ds_train) ./ n_atoms_train,
get_all_energies(ds_train, lb) ./ n_atoms_train

# training energies were permanently modified, so need special function to recover original energies and predicted energies
if !isnothing(vref_dict)
e_train = get_all_energies_w_onebody(ds_train,vref_dict) ./n_atoms_train
e_train_pred = get_all_energies_w_onebody(ds_train,lb,vref_dict) ./n_atoms_train
else
e_train, e_train_pred = get_all_energies(ds_train) ./ n_atoms_train,
get_all_energies(ds_train, lb) ./ n_atoms_train
end
f_train, f_train_pred = get_all_forces(ds_train),
get_all_forces(ds_train, lb)
@save_var path e_train
@save_var path e_train_pred
@save_var path f_train
@save_var path f_train_pred

e_test, e_test_pred = get_all_energies(ds_test) ./ n_atoms_test,
get_all_energies(ds_test, lb) ./ n_atoms_test
# test energies were *not* permanently modified, so only need special function update predicted energies
if !isnothing(vref_dict)
e_test = get_all_energies(ds_test) ./ n_atoms_test
e_test_pred = get_all_energies_w_onebody(ds_test,lb,vref_dict) ./n_atoms_test
else
e_test, e_test_pred = get_all_energies(ds_test) ./ n_atoms_test,
get_all_energies(ds_test, lb) ./ n_atoms_test
end

f_test, f_test_pred = get_all_forces(ds_test),
get_all_forces(ds_test, lb)
@save_var path e_test
Expand Down Expand Up @@ -84,7 +98,7 @@ end

# Main sample experiment function
function sample_experiment!(res_path, j, sampler, batch_size_prop, n_train,
ged_mat, ds_train_rnd, ds_test_rnd, basis, metrics)
ged_mat, ds_train_rnd, ds_test_rnd, basis, metrics; vref_dict=nothing)
try
print("Experiment:$j, method:$sampler, batch_size_prop:$batch_size_prop")
exp_path = "$res_path/$j-$sampler-bsp$batch_size_prop/"
Expand All @@ -93,7 +107,7 @@ function sample_experiment!(res_path, j, sampler, batch_size_prop, n_train,
sampling_time = @elapsed begin
inds = sampler(ged_mat, batch_size)
end
metrics_j = fit(exp_path, (@views ds_train_rnd[Int64.(inds)]), ds_test_rnd, basis)
metrics_j = fit(exp_path, (@views ds_train_rnd[Int64.(inds)]), ds_test_rnd, basis; vref_dict=vref_dict)
metrics_j = merge(OrderedDict("exp_number" => j,
"method" => "$sampler",
"batch_size_prop" => batch_size_prop,
Expand Down
56 changes: 56 additions & 0 deletions examples/atomistic/utils/subtract_peratom_e.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
using PotentialLearning

function subtract_peratom_e(config::Configuration, vref_dict)
orig_e = get_energy(config)
e_unit = orig_e.u
new_e = get_values(orig_e)
for atom in get_system(config).particles
species = atom.atomic_symbol
new_e -= vref_dict[species]
end

Energy(new_e,e_unit)
end

function adjust_energies(ds, vref_dict)
for config in ds
new_energy = subtract_peratom_e(config,vref_dict)
config.data[Energy] = new_energy
end
end

function get_all_energies_w_onebody(
ds::DataSet,
bp::BasisPotential,
vref_dict
)
energies = Float64[]
for config in ds
energy = PotentialLearning.potential_energy(config, bp)
for atom in get_system(config).particles
species = atom.atomic_symbol
energy += vref_dict[species]
end
push!(energies, energy)
end

return energies
end

function get_all_energies_w_onebody(
ds::DataSet,
vref_dict
)
energies = Float64[]
for config in ds
energy = get_values(get_energy(config))
for atom in get_system(config).particles
species = atom.atomic_symbol
energy += vref_dict[species]
end
push!(energies, energy)
end

return energies
end