From 274699d6b6c0ed3e77373b38a577431d2fa27efe Mon Sep 17 00:00:00 2001 From: acse-ej321 <89605848+acse-ej321@users.noreply.github.com> Date: Fri, 4 Jul 2025 08:24:45 +0100 Subject: [PATCH 1/7] Merge fork into branch (#71) Merging fork into branch --- UM2N/generator/burgers_solver.py | 43 +- UM2N/generator/mesh_generator.py | 3 +- UM2N/generator/swirl_solver.py | 29 +- UM2N/generator/swirl_solver_step.py | 3 +- script/build_burgers_square.py | 670 +++++++++++-------- script/build_helmholtz_poly.py | 830 ++++++++++++----------- script/build_helmholtz_square.py | 990 ++++++++++++++-------------- script/build_poisson_poly.py | 827 ++++++++++++----------- script/build_poisson_square.py | 821 ++++++++++++----------- script/build_swirl.py | 518 +++++++++------ script/make_dataset_helm_train.sh | 2 +- 11 files changed, 2567 insertions(+), 2169 deletions(-) diff --git a/UM2N/generator/burgers_solver.py b/UM2N/generator/burgers_solver.py index ca0b77e..fb901a6 100644 --- a/UM2N/generator/burgers_solver.py +++ b/UM2N/generator/burgers_solver.py @@ -9,6 +9,8 @@ import movement as mv import numpy as np # noqa +from firedrake.__future__ import interpolate # ej321 add + __all__ = ["BurgersSolver"] @@ -203,6 +205,9 @@ def solve_problem(self, callback=None): monitor_function=self.monitor_function, rtol=1e-3, ) + # ej321 - added monitor_function for feature extraction + raw_monitor_val = self.monitor_function(self.mesh) # ej321 - is this the correct mesh to use? + adapter.move() end = time.perf_counter() dur_ms = (end - start) * 1000 @@ -216,6 +221,10 @@ def solve_problem(self, callback=None): function_space = fd.FunctionSpace(self.mesh, "CG", 1) uh_0 = fd.Function(function_space) uh_0.project(self.u[0]) + + # ej321 - added monitor_function for feature extraction + monitor_val = fd.Function(function_space) + monitor_val.assign(raw_monitor_val) # calculate solution on adapted mesh self.mesh.coordinates.dat.data[:] = self.adapt_coord @@ -240,27 +249,42 @@ def solve_problem(self, callback=None): func_vec_space = fd.VectorFunctionSpace(self.mesh, "CG", 1) uh_grad = fd.interpolate(fd.grad(uh_0), func_vec_space) + + # ej321 - grad_norm copied from build_helmholtz_square.py + grad_uh_interpolate = fd.assemble(interpolate(fd.grad(self.u[0]),func_vec_space)) + grad_norm = fd.Function(function_space) + grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2) + grad_norm /= grad_norm.vector().max() + hessian_norm = self.f_norm hessian = self.l2_projection phi = adapter.phi phi_grad = adapter.grad_phi - sigma = adapter.sigma + # sigma = adapter.sigma + sigma = adapter.H # ej321 - this may be the updated hessian? I = fd.Identity(2) # noqa jacobian = I + sigma - jacobian_det = fd.Function(function_space, name="jacobian_det") - jacobian_det.project( + # jacobian_det = fd.Function(function_space, name="jacobian_det") + # jacobian_det = fd.Function(adapter.P1, name="jacobian_det") + self.jacob_det = fd.Function(adapter.P1, name="jacobian_det").project( jacobian[0, 0] * jacobian[1, 1] - jacobian[0, 1] * jacobian[1, 0] ) - self.jacob_det = fd.project( - jacobian_det, fd.FunctionSpace(self.mesh, "CG", 1) - ) - self.jacob = fd.project( - jacobian, fd.TensorFunctionSpace(self.mesh, "CG", 1) - ) + # self.jacob_det = jacobian_det + # self.jacob_det = fd.project( + # jacobian_det, fd.FunctionSpace(self.mesh, "CG", 1) + # ) # ej321 - not needed? + # self.jacob = jacobian # ej321 - this is copied from mesh_generator.py + self.jacob = fd.Function(adapter.P1_ten, name="jacobian").project(jacobian) + # self.jacob.project(jacobian) + # self.jacob = fd.project( + # jacobian, fd.TensorFunctionSpace(self.mesh, "CG", 1) + # ) # ej321 - not needed? + callback( uh=uh_0, uh_grad=uh_grad, + grad_norm = grad_norm, # ej321 - added grad_norm hessian_norm=hessian_norm, hessian=hessian, phi=phi, @@ -280,6 +304,7 @@ def solve_problem(self, callback=None): dur=dur_ms, t=t, idx=self.idx, + monitor_val=monitor_val, # ej321 - added monitor_val ) # step forward in time diff --git a/UM2N/generator/mesh_generator.py b/UM2N/generator/mesh_generator.py index 3c50d4e..8853c50 100644 --- a/UM2N/generator/mesh_generator.py +++ b/UM2N/generator/mesh_generator.py @@ -54,7 +54,8 @@ def move_mesh(self): ) mover.move() # extract Hessian of the movement - sigma = mover.sigma + # sigma = mover.sigma + sigma = mover.H # ej321 - this may be the updated hessian? I = fd.Identity(2) # noqa jacobian = I + sigma jacobian_det = fd.Function(mover.P1, name="jacobian_det") diff --git a/UM2N/generator/swirl_solver.py b/UM2N/generator/swirl_solver.py index 06b4bb3..750d7e7 100644 --- a/UM2N/generator/swirl_solver.py +++ b/UM2N/generator/swirl_solver.py @@ -660,19 +660,24 @@ def solve_problem(self, callback=None, fail_callback=None): hessian = self.l2_projection phi = adapter.phi phi_grad = adapter.grad_phi - sigma = adapter.sigma + # sigma = adapter.sigma + sigma = adapter.H # ej321 - this may be the updated hessian? I = fd.Identity(2) # noqa jacobian = I + sigma - jacobian_det = fd.Function(function_space, name="jacobian_det") - jacobian_det.project( - jacobian[0, 0] * jacobian[1, 1] - jacobian[0, 1] * jacobian[1, 0] - ) - self.jacob_det = fd.project( - jacobian_det, fd.FunctionSpace(self.mesh, "CG", 1) - ) - self.jacob = fd.project( - jacobian, fd.TensorFunctionSpace(self.mesh, "CG", 1) - ) + # jacobian_det = fd.Function(function_space, name="jacobian_det") + # jacobian_det.project( + # jacobian[0, 0] * jacobian[1, 1] - jacobian[0, 1] * jacobian[1, 0] + # ) + # self.jacob_det = fd.project( + # jacobian_det, fd.FunctionSpace(self.mesh, "CG", 1) + # ) + self.jacob_det = fd.Function(adapter.P1, name="jacobian_det").project( + jacobian[0, 0] * jacobian[1, 1] - jacobian[0, 1] * jacobian[1, 0] + ) + # self.jacob = fd.project( + # jacobian, fd.TensorFunctionSpace(self.mesh, "CG", 1) + # ) + self.jacob = fd.Function(adapter.P1_ten, name="jacobian").project(jacobian) if ((step + 1) % self.save_interval == 0) or (step == 0): callback( @@ -698,6 +703,8 @@ def solve_problem(self, callback=None, fail_callback=None): sigma=self.sigma, alpha=self.alpha, r_0=self.r_0, + x_0=self.x_0, + y_0=self.y_0, t=self.t, ) diff --git a/UM2N/generator/swirl_solver_step.py b/UM2N/generator/swirl_solver_step.py index 96aba99..a253ca0 100644 --- a/UM2N/generator/swirl_solver_step.py +++ b/UM2N/generator/swirl_solver_step.py @@ -755,7 +755,8 @@ def solve_problem(self, callback=None, fail_callback=None): hessian = self.l2_projection phi = adapter.phi phi_grad = adapter.grad_phi - sigma = adapter.sigma + # sigma = adapter.sigma + sigma = adapter.H # ej321 - this may be the updated hessian? I = fd.Identity(2) # noqa jacobian = I + sigma jacobian_det = fd.Function(function_space, name="jacobian_det") diff --git a/script/build_burgers_square.py b/script/build_burgers_square.py index a8ed4ff..c82043a 100644 --- a/script/build_burgers_square.py +++ b/script/build_burgers_square.py @@ -1,6 +1,7 @@ # Author: Chunyang Wang # GitHub Username: chunyang-w +import csv import os import random import shutil @@ -8,276 +9,300 @@ import firedrake as fd import matplotlib.pyplot as plt -import pandas as pd +# import pandas as pd +# dd the parent directory to the Python path +import sys +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import UM2N -def arg_parse(): - parser = ArgumentParser() - parser.add_argument( - "--mesh_type", type=int, default=2, help="algorithm used to generate mesh" - ) - parser.add_argument( - "--max_dist", - type=int, - default=6, - help="max number of distributions used to\ - generate the dataset (only works if\ - n_dist is not set)", - ) - parser.add_argument( - "--n_dist", - type=int, - default=None, - help="number of distributions used to\ - generate the dataset (this will disable\ - max_dist)", - ) - parser.add_argument( - "--lc", - type=float, - default=6e-2, - help="the length characteristic of the elements in the\ - mesh", - ) - parser.add_argument( - "--field_type", - type=str, - default="iso", - help="anisotropic or isotropic data type(aniso/iso)", - ) +def parse_arguments(): + """Parse command-line arguments.""" + parser = ArgumentParser(description="Build Burgers dataset with square meshes.") + parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.") + parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions.") + parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions.") + parser.add_argument("--lc", type=float, default=6e-2, help="Length characteristic of mesh elements.") + parser.add_argument("--field_type", type=str, default="iso", help="Data type (aniso/iso).") # use padded scheme or full-scale scheme to sample central point of the bump # noqa - parser.add_argument( - "--boundary_scheme", - type=str, - default="pad", - help="scheme used to generate the dataset (pad/full))", - ) - parser.add_argument( - "--n_case", type=int, default=5, help="number of simulation cases" - ) - parser.add_argument( - "--n_grid", - type=int, - default=20, - help="number of grids in a uniform mesh\ - only applied when mesh_type is 0", - ) - parser.add_argument( - "--rand_seed", type=int, default=63, help="number of samples generated" - ) - args_ = parser.parse_args() - print(args_) - return args_ - - -args = arg_parse() - -mesh_type = args.mesh_type - -data_type = args.field_type -use_iso = True if data_type == "iso" else False - -rand_seed = args.rand_seed -random.seed(rand_seed) - -# ==== Parameters ====================== -problem = "burgers" - -n_case = args.n_case - -# parameters for domain scale -scale_x = 1 -scale_y = 1 - -# parameters for random source -max_dist = args.max_dist -n_dist = args.n_dist -lc = args.lc -n_grid = args.n_grid - -# parameters for anisotropic data - distribution height scaler -z_min = 0 -z_max = 1 - -# parameters for isotropic data -w_min = 0.05 -w_max = 0.2 - -scheme = args.boundary_scheme -c_min = 0.2 if scheme == "pad" else 0 -c_max = 0.8 if scheme == "pad" else 1 - -# parameters for data split -p_train = 0.75 -p_test = 0.15 -p_val = 0.1 - -# ======================================= - - -df = pd.DataFrame( - { - "cmin": [c_min], - "cmax": [c_max], - "data_type": [data_type], - "scheme": [scheme], - "lc": [lc], - "mesh_type": [mesh_type], - } -) + parser.add_argument("--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full).") + parser.add_argument("--n_case", type=int, default=5, help="Number of simulation cases.") + parser.add_argument("--n_grid", type=int, default=20, help="Number of grids for uniform mesh if mesh_type 0.") + parser.add_argument("--rand_seed", type=int, default=63, help="number of samples generated / Random seed for reproducibility.") + + parsed_args = parser.parse_args() + + # Handle dependency between max_dist and n_dist + # max number of distributions used to generate the dataset + # only if n_dist is not set if n_dist is set, max_dist will be disabled + if parsed_args.n_dist is not None: + parsed_args.max_dist = None # Disable max_dist if n_dist is set + print("Warning: max_dist is ignored because n_dist is set.") + # QC: + print(parsed_args) + + return parser.parse_args() + +def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None): + """ + Set up directories for storing data, plots, and logs. + + Args: + base_dir (str): Base directory for the project. + parameters (dict): Dictionary of parameters, including "mesh_type" and "problem". + - "mesh_type" (int): Type of mesh used in the simulation (default: 0). + - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem"). + subdirs (list, optional): List of subdirectories to create. Defaults to: + ["data", "plot", "log", "mesh", "mesh_fine"]. + Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz". + dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders + matching keys in the `parameters` dictionary. Example: + "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}". + If not provided, raises a ValueError. + + Returns: + dict: A dictionary mapping subdirectory names to their full paths. + + Raises: + ValueError: If `dir_format` is not provided or is invalid. + """ + + # Define the project directory + if base_dir: + project_dir = os.path.abspath(base_dir) + else: + project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + # QC: + print(f"Project Directory: {project_dir}") + # Define the dataset directory + dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem) -def move_data(target, source, start, num_file): + # Use the provided format string for the problem-specific directory + if dir_format is None: + problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}") + else: + # check if dir_format is a valid string format + if not isinstance(dir_format, str): + raise ValueError("dir_format must be a string.") + problem_specific_dir = os.path.join(dataset_dir, dir_format) + + # Define default subdirectories if not provided + if subdirs is None: + subdirs = ["data", "plot", "log", "mesh", "mesh_fine", + "plot_compare", "train", "test", "val"] + + # Create and clear directories + directories = {} + for subdir in subdirs: + dir_path = os.path.join(problem_specific_dir, subdir) + if not os.path.exists(dir_path): + os.makedirs(dir_path) + else: + # Clear the directory by removing all files + for file in os.listdir(dir_path): + os.remove(os.path.join(dir_path, file)) + directories[subdir] = dir_path + + # QC: + print(f"Subdirectories created: {directories}") + + return directories + +def output_csv(parameters, key_list, output_dir): + """ + Write selected parameters to a CSV file. + + Args: + parameters (dict): Dictionary of parameters to write. + key_list (list): List of keys to include in the CSV. + output_dir (str): Directory where the CSV file will be saved. + """ + # Filter parameters based on key_list + csv_keys = [key for key in key_list if key in parameters] + csv_data = [parameters[key] for key in csv_keys] + + # Define the output file path + csv_file_path = os.path.join(output_dir, "info.csv") + + # Write to CSV + with open(csv_file_path, mode="w", newline="") as csvfile: + csv_writer = csv.writer(csvfile) + # Write header (keys) + csv_writer.writerow(csv_keys) + # Write data (values) + csv_writer.writerow(csv_data) + + print(f"Parameters saved to {csv_file_path}") + +def move_data(target, source, start, num_files): + """ + Move data files from the source directory to the target directory. + + Args: + target (str): The path to the target directory. + source (str): The path to the source directory. + start (int): The starting index of the files to move. + num_files (int): The total number of files to move. + + Raises: + FileNotFoundError: If the source directory does not exist. + ValueError: If the start index or num_files is invalid. + """ + if not os.path.exists(source): + raise FileNotFoundError(f"Source directory '{source}' does not exist.") + + if start < 0 or num_files <= 0: + raise ValueError("Invalid start index or number of files to move.") + + # Create the target directory if it doesn't exist if not os.path.exists(target): os.makedirs(target) else: - # delete all files under the directory - filelist = [f for f in os.listdir(target)] - for f in filelist: - os.remove(os.path.join(target, f)) - # copy data from data dir to train dir - for i in range(start, num_file): - shutil.copy( - os.path.join(source, "data_{}.npy".format(i)), - os.path.join(target, "data_{}.npy".format(i)), - ) - - -project_dir = os.path.dirname(os.path.dirname((os.path.abspath(__file__)))) -dataset_dir = os.path.join( - project_dir, "data", f"dataset_meshtype_{mesh_type}", problem -) -problem_specific_dir = os.path.join( - dataset_dir, - "lc={}_ngrid_{}_n={}_{}_{}_meshtype_{}".format( - lc, n_grid, n_case, data_type, scheme, mesh_type - ), -) - - -problem_data_dir = os.path.join(problem_specific_dir, "data") -problem_plot_dir = os.path.join(problem_specific_dir, "plot") -problem_log_dir = os.path.join(problem_specific_dir, "log") - -problem_mesh_dir = os.path.join(problem_specific_dir, "mesh") -problem_mesh_fine_dir = os.path.join(problem_specific_dir, "mesh_fine") - -if not os.path.exists(problem_mesh_dir): - os.makedirs(problem_mesh_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_mesh_dir)] - for f in filelist: - os.remove(os.path.join(problem_mesh_dir, f)) - -if not os.path.exists(problem_mesh_fine_dir): - os.makedirs(problem_mesh_fine_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_mesh_fine_dir)] - for f in filelist: - os.remove(os.path.join(problem_mesh_fine_dir, f)) - -if not os.path.exists(problem_data_dir): - os.makedirs(problem_data_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_data_dir)] - for f in filelist: - os.remove(os.path.join(problem_data_dir, f)) - -if not os.path.exists(problem_plot_dir): - os.makedirs(problem_plot_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_plot_dir)] - for f in filelist: - os.remove(os.path.join(problem_plot_dir, f)) - -if not os.path.exists(problem_log_dir): - os.makedirs(problem_log_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_log_dir)] - for f in filelist: - os.remove(os.path.join(problem_log_dir, f)) - -df.to_csv(os.path.join(problem_specific_dir, "info.csv")) + # Clear the target directory by removing all files + for file in os.listdir(target): + os.remove(os.path.join(target, file)) + + # Copy files sequentially starting from the specified index + for i in range(start, start + num_files): + try: + # Copy the data file + shutil.copy( + os.path.join(source, f"data_{i:04d}.npy"), + os.path.join(target, f"data_{i:04d}.npy"), + ) + except FileNotFoundError: + print(f"File data_{i:04d}.npy not found in {source}. Skipping.") + continue + except Exception as e: + print(f"An error occurred while copying data_{i:04d}.npy: {e}") + continue + +def generate_mesh(parameters, dirs): + """Generate the mesh based on the specified type.""" + if parameters["mesh_type"] != 0: + mesh_gen = UM2N.UnstructuredSquareMeshGenerator(scale=parameters["scale_x"], + mesh_type=parameters["mesh_type"]) + mesh = mesh_gen.generate_mesh(res=parameters["lc"], + output_filename=os.path.join(dirs["mesh"], "mesh.msh")) + mesh_new = fd.Mesh(os.path.join(dirs["mesh"], "mesh.msh")) + mesh_fine = mesh_gen.generate_mesh(res=1e-2, + output_filename=os.path.join(dirs["mesh_fine"], "mesh.msh")) + else: + n_grid = parameters["n_grid"] + mesh = fd.UnitSquareMesh(n_grid, n_grid) + mesh_new = fd.UnitSquareMesh(n_grid, n_grid) + mesh_fine = fd.UnitSquareMesh(100, 100) + return mesh, mesh_new, mesh_fine def get_sample_param_of_nu_generalization_by_idx_train(idx_in): - gauss_list_ = [] - if idx_in == 1: - param_ = {"cx": 0.225, "cy": 0.5, "w": 0.01} - gauss_list_.append(param_) - nu_ = 0.0001 - elif idx_in == 2: - param_ = {"cx": 0.225, "cy": 0.5, "w": 0.01} - gauss_list_.append(param_) - nu_ = 0.001 - elif idx_in == 3: - param_ = {"cx": 0.225, "cy": 0.5, "w": 0.01} - gauss_list_.append(param_) - nu_ = 0.002 - elif idx_in == 4: - shift_ = 0.15 - param_ = {"cx": 0.3, "cy": 0.5 - shift_, "w": 0.01} - gauss_list_.append(param_) - param_ = {"cx": 0.15, "cy": 0.5 + shift_, "w": 0.01} - gauss_list_.append(param_) - nu_ = 0.0001 - elif idx_in == 5: - shift_ = 0.15 - param_ = {"cx": 0.3, "cy": 0.5 - shift_, "w": 0.01} - gauss_list_.append(param_) - param_ = {"cx": 0.15, "cy": 0.5 + shift_, "w": 0.01} - gauss_list_.append(param_) - nu_ = 0.001 - elif idx_in == 6: - shift_ = 0.15 - param_ = {"cx": 0.3, "cy": 0.5 - shift_, "w": 0.01} - gauss_list_.append(param_) - param_ = {"cx": 0.15, "cy": 0.5 + shift_, "w": 0.01} - gauss_list_.append(param_) - nu_ = 0.002 - elif idx_in == 7: - shift_ = 0.2 - param_ = {"cx": 0.3, "cy": 0.5 + shift_, "w": 0.01} - gauss_list_.append(param_) - param_ = {"cx": 0.3, "cy": 0.5 - shift_, "w": 0.01} - gauss_list_.append(param_) - param_ = {"cx": 0.15, "cy": 0.5, "w": 0.01} - gauss_list_.append(param_) - nu_ = 0.0001 - elif idx_in == 8: - shift_ = 0.2 - param_ = {"cx": 0.3, "cy": 0.5 + shift_, "w": 0.01} - gauss_list_.append(param_) - param_ = {"cx": 0.3, "cy": 0.5 - shift_, "w": 0.01} - gauss_list_.append(param_) - param_ = {"cx": 0.15, "cy": 0.5, "w": 0.01} - gauss_list_.append(param_) - nu_ = 0.001 - elif idx_in == 9: - shift_ = 0.2 - param_ = {"cx": 0.3, "cy": 0.5 + shift_, "w": 0.01} - gauss_list_.append(param_) - param_ = {"cx": 0.3, "cy": 0.5 - shift_, "w": 0.01} - gauss_list_.append(param_) - param_ = {"cx": 0.15, "cy": 0.5, "w": 0.01} - gauss_list_.append(param_) - nu_ = 0.002 - return gauss_list_, nu_ + """ + Retrieve sample parameters for the Burgers problem based on the given index. + + Args: + idx_in (int): Index of the sample. + + Returns: + tuple: A list of Gaussian parameters and the viscosity value (nu). + """ + # Define a mapping of indices to parameters + param_map = { + 1: ({"cx": 0.225, "cy": 0.5, "w": 0.01}, 0.0001), + 2: ({"cx": 0.225, "cy": 0.5, "w": 0.01}, 0.001), + 3: ({"cx": 0.225, "cy": 0.5, "w": 0.01}, 0.002), + 4: ([{"cx": 0.3, "cy": 0.35, "w": 0.01}, {"cx": 0.15, "cy": 0.65, "w": 0.01}], 0.0001), + 5: ([{"cx": 0.3, "cy": 0.35, "w": 0.01}, {"cx": 0.15, "cy": 0.65, "w": 0.01}], 0.001), + 6: ([{"cx": 0.3, "cy": 0.35, "w": 0.01}, {"cx": 0.15, "cy": 0.65, "w": 0.01}], 0.002), + 7: ([{"cx": 0.3, "cy": 0.7, "w": 0.01}, {"cx": 0.3, "cy": 0.3, "w": 0.01}, {"cx": 0.15, "cy": 0.5, "w": 0.01}], 0.0001), + 8: ([{"cx": 0.3, "cy": 0.7, "w": 0.01}, {"cx": 0.3, "cy": 0.3, "w": 0.01}, {"cx": 0.15, "cy": 0.5, "w": 0.01}], 0.001), + 9: ([{"cx": 0.3, "cy": 0.7, "w": 0.01}, {"cx": 0.3, "cy": 0.3, "w": 0.01}, {"cx": 0.15, "cy": 0.5, "w": 0.01}], 0.002), + } + # Retrieve the parameters and viscosity for the given index + if idx_in not in param_map: + raise ValueError(f"Invalid index: {idx_in}. Supported indices are {list(param_map.keys())}.") -i = 0 + params, nu_ = param_map[idx_in] + # Ensure params is always a list + gauss_list_ = params if isinstance(params, list) else [params] + return gauss_list_, nu_ + +# def get_sample_params(idx): +# """Retrieve sample parameters for the Burgers problem.""" +# return UM2N.get_sample_param_of_nu_generalization_by_idx_train(idx) + + +# def solve_case(idx, mesh, mesh_new, mesh_fine, dirs): +# """Solve a single case of the Burgers problem.""" +# gauss_list, nu = get_sample_params(idx) +# solver = UM2N.BurgersSolver(mesh, mesh_fine, mesh_new, gauss_list=gauss_list, nu=nu, idx=idx) +# solver.solve_problem(lambda *args: sample_from_loop(*args, dirs)) + + +# def sample_from_loop( +# uh, uh_grad, hessian, hessian_norm, phi, grad_phi, jacobian, jacobian_det, +# uh_new, mesh_og, mesh_new, function_space, function_space_fine, uh_fine, +# dur, nu, gauss_list, t, idx, dirs, error_og_list=[], error_adapt_list=[] +# ): +# """Process and save data from a single simulation loop.""" +# mesh_processor = UM2N.MeshProcessor( +# original_mesh=mesh_og, +# optimal_mesh=mesh_new, +# function_space=function_space, +# use_4_edge=True, +# feature={ +# "uh": uh.dat.data_ro.reshape(-1, 1), +# "grad_uh": uh_grad.dat.data_ro.reshape(-1, 2), +# "hessian": hessian.dat.data_ro.reshape(-1, 4), +# "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), +# "jacobian": jacobian.dat.data_ro.reshape(-1, 4), +# "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1), +# "phi": phi.dat.data_ro.reshape(-1, 1), +# "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2), +# }, +# raw_feature={ +# "uh": uh, +# "hessian_norm": hessian_norm, +# "jacobian": jacobian, +# "jacobian_det": jacobian_det, +# }, +# nu=nu, +# gauss_list=gauss_list, +# dur=dur, +# t=t, +# idx=idx, +# ) + +# mesh_processor.save_taining_data(os.path.join(dirs["data"], f"data_{idx}")) + +# # Plot results +# fig, axes = plt.subplots(2, 3, figsize=(15, 10)) +# fd.trisurf(uh_fine, axes=axes[0, 0]).set_title("Solution field (HR)") +# fd.trisurf(uh, axes=axes[0, 1]).set_title("Solution field (Original Mesh)") +# fd.trisurf(uh_new, axes=axes[0, 2]).set_title("Solution field (Adapted Mesh)") +# fd.triplot(mesh_og, axes=axes[1, 0]).set_title("Original Mesh") +# fd.triplot(mesh_new, axes=axes[1, 1]).set_title("Adapted Mesh") +# fd.tripcolor(uh_new, cmap="coolwarm", axes=axes[1, 2]).set_title("Solution on Optimal Mesh") +# fig.savefig(os.path.join(dirs["plot"], f"plot_{idx}.png")) +# plt.close(fig) + +# # Save error metrics +# uh = fd.project(uh, function_space_fine) +# uh_new = fd.project(uh_new, function_space_fine) +# error_original_mesh = fd.errornorm(uh, uh_fine, norm_type="L2") +# error_optimal_mesh = fd.errornorm(uh_new, uh_fine, norm_type="L2") +# pd.DataFrame( +# {"error_og": [error_original_mesh], "error_adapt": [error_optimal_mesh], "time": [dur]} +# ).to_csv(os.path.join(dirs["log"], f"log_{idx}.csv"), index=False) def sample_from_loop( uh, uh_grad, + grad_norm, # ej321 - added grad_norm hessian, hessian_norm, phi, @@ -295,6 +320,7 @@ def sample_from_loop( gauss_list, t, idx, + monitor_val, error_og_list=[], error_adapt_list=[], ): @@ -308,16 +334,19 @@ def sample_from_loop( feature={ "uh": uh.dat.data_ro.reshape(-1, 1), "grad_uh": uh_grad.dat.data_ro.reshape(-1, 2), + "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), # ej321 - added grad_norm "hessian": hessian.dat.data_ro.reshape(-1, 4), "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), "jacobian": jacobian.dat.data_ro.reshape(-1, 4), "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1), "phi": phi.dat.data_ro.reshape(-1, 1), "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2), + "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), }, raw_feature={ "uh": uh, "hessian_norm": hessian_norm, + "monitor_val": monitor_val, # ej321 - added monitor_val "jacobian": jacobian, "jacobian_det": jacobian_det, }, @@ -329,7 +358,7 @@ def sample_from_loop( ) mesh_processor.save_taining_data( - os.path.join(problem_data_dir, "data_{}".format(i)) + os.path.join(dirs["data"], "data_{}".format(i)) ) # ==== Plot Scripts ====================== @@ -362,7 +391,7 @@ def sample_from_loop( fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6) fd.triplot(mesh_new, axes=ax6) - fig.savefig(os.path.join(problem_plot_dir, "plot_{}.png".format(i))) + fig.savefig(os.path.join(dirs["plot"], "plot_{}.png".format(i))) i += 1 # fig, ax = plt.subplots() @@ -378,49 +407,124 @@ def sample_from_loop( error_original_mesh = fd.errornorm(uh, uh_fine, norm_type="L2") error_optimal_mesh = fd.errornorm(uh_new, uh_fine, norm_type="L2") - df = pd.DataFrame( - { - "error_og": error_original_mesh, - "error_adapt": error_optimal_mesh, - "time": dur, - }, - index=[0], - ) - df.to_csv(os.path.join(problem_log_dir, "log{}.csv".format(i))) + + + # Write to CSV + with open(os.path.join(dirs["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile: + csv_writer = csv.writer(csvfile) + # Write header (keys) + csv_writer.writerow(["error_og", "error_adapt", "time"]) + # Write data (values) + csv_writer.writerow([error_original_mesh, error_optimal_mesh, dur]) + print("error og/optimal:", error_original_mesh, error_optimal_mesh) + # df = pd.DataFrame( + # { + # "error_og": error_original_mesh, + # "error_adapt": error_optimal_mesh, + # "time": dur, + # }, + # index=[0], + # ) + # df.to_csv(os.path.join(problem_log_dir, "log{}.csv".format(i))) + # print("error og/optimal:", error_original_mesh, error_optimal_mesh) return -# ==== Data Generation Scripts ====================== if __name__ == "__main__": + + # parse args + args = parse_arguments() + + # ==== Parameters ====================== + parameters = { + # parameters for problem + "problem": "burgers", + "n_case": args.n_case, + # parameters for random source + "n_dist": args.n_dist, + "max_dist": args.max_dist, + "lc": args.lc, + "n_grid": args.n_grid, + # parameters for ?????? + # "n_samples": args.n_samples, + "data_type": args.field_type, + "scheme": args.boundary_scheme, + "mesh_type": int(args.mesh_type), + # parameters for domain scale + "scale_x": 1, + "scale_y": 1, + # parameters for anisotropic data - distribution height scaler + "z_max": 1, + "z_min": 0, + # parameters for ????? + "x_start": 0, + "x_end": 1, + "y_start": 0, + "y_end": 1, + # parameters for isotropic data + "w_min": 0.05, + "w_max": 0.2, + "c_min": 0.2 if args.boundary_scheme == "pad" else 0, + "c_max": 0.8 if args.boundary_scheme == "pad" else 1, + # parameters for dataset challenging level + # larger, less challenging (because the gaussian is more like a circle) + # "sigma_mean_scaler": 1 / 4, + # "sigma_sigma_scaler": 1 / 6, + # "sigma_eps": 1 / 8, + # parameters for data split + "p_train": 0.75, + "p_test": 0.15, + "p_val": 0.1, + } + + # Set random seed + random.seed(args.rand_seed) + + + # ==== Setup Directories ====================== + problem_specific_dir = "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}".format( + lc=parameters["lc"], + n_grid=parameters["n_grid"], + n_case=parameters["n_case"], + data_type=parameters["data_type"], + scheme=parameters["scheme"], + mesh_type=parameters["mesh_type"], + ) + + subdirs = ["data", "plot", "log", "mesh", "mesh_fine", + "plot_compare", "train", "test", "val" + ] + + dirs = setup_directories(problem = parameters["problem"], + mesh_type = parameters["mesh_type"], + base_dir = None, + subdirs = subdirs, + dir_format = problem_specific_dir) + + # ==== Output CSV ====================== + key_list = [ + "cmin","cmax", "data_type", "scheme", "lc", "mesh_type" + ] + output_csv(parameters, key_list, dirs["data"]) + + # ==== Data Generation Scripts ====================== + + i = 0 + + # QC: print("In build_dataset.py") # for idx in range(1, n_case + 1): - for idx in range(1, n_case + 1): + for idx in range(1, parameters["n_case"] + 1): try: + # QC: print(f"Case {idx} building ...") - mesh = None - mesh_new = None - mesh_fine = None - if mesh_type != 0: - unstructured_square_mesh_gen = UM2N.UnstructuredSquareMesh( - scale=scale_x, mesh_type=mesh_type - ) # noqa - mesh = unstructured_square_mesh_gen.generate_mesh( - res=lc, output_filename=os.path.join(problem_mesh_dir, "mesh.msh") - ) - mesh_new = fd.Mesh(os.path.join(problem_mesh_dir, "mesh.msh")) - mesh_fine = unstructured_square_mesh_gen.generate_mesh( - res=1e-2, - output_filename=os.path.join(problem_mesh_fine_dir, "mesh.msh"), - ) - else: - mesh = fd.UnitSquareMesh(n_grid, n_grid) - mesh_new = fd.UnitSquareMesh(n_grid, n_grid) - mesh_fine = fd.UnitSquareMesh(100, 100) + mesh, mesh_new, mesh_fine = generate_mesh(parameters, dirs) # Generate Random solution field gaussian_list, nu = get_sample_param_of_nu_generalization_by_idx_train(idx) # noqa solver = UM2N.BurgersSolver( - mesh, mesh_fine, mesh_new, gauss_list=gaussian_list, nu=nu, idx=idx + mesh, mesh_fine, mesh_new, + gauss_list=gaussian_list, nu=nu, idx=idx ) solver.solve_problem(sample_from_loop) print() diff --git a/script/build_helmholtz_poly.py b/script/build_helmholtz_poly.py index 080116f..870820d 100644 --- a/script/build_helmholtz_poly.py +++ b/script/build_helmholtz_poly.py @@ -1,6 +1,6 @@ # Author: Chunyang Wang # GitHub Username: chunyang-w - +import csv import os import random import shutil @@ -10,426 +10,484 @@ import firedrake as fd import matplotlib.pyplot as plt import numpy as np -import pandas as pd +# import pandas as pd +from firedrake.__future__ import interpolate +# dd the parent directory to the Python path +import sys +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import UM2N +def parse_arguments(): + """Parse command-line arguments.""" + parser = ArgumentParser(description="Build Burgers dataset with square meshes.") + parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.") + parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions.") + parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions.") + parser.add_argument("--lc", type=float, default=6e-2, help="Length characteristic of mesh elements.") + parser.add_argument("--field_type", type=str, default="iso", help="Data type (aniso/iso).") + # use padded scheme or full-scale scheme to sample central point of the bump # noqa + parser.add_argument("--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full).") + parser.add_argument("--n_samples", type=int, default=100, help="Number of samples generated") + parser.add_argument("--rand_seed", type=int, default=63, help="Random seed") + + parsed_args = parser.parse_args() + + # Handle dependency between max_dist and n_dist + # max number of distributions used to generate the dataset + # only if n_dist is not set if n_dist is set, max_dist will be disabled + if parsed_args.n_dist is not None: + parsed_args.max_dist = None # Disable max_dist if n_dist is set + print("Warning: max_dist is ignored because n_dist is set.") + # QC: + print(parsed_args) + + return parser.parse_args() + +def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None): + """ + Set up directories for storing data, plots, and logs. + + Args: + base_dir (str): Base directory for the project. + parameters (dict): Dictionary of parameters, including "mesh_type" and "problem". + - "mesh_type" (int): Type of mesh used in the simulation (default: 0). + - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem"). + subdirs (list, optional): List of subdirectories to create. Defaults to: + ["data", "plot", "log", "mesh", "mesh_fine"]. + Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz". + dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders + matching keys in the `parameters` dictionary. Example: + "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}". + If not provided, raises a ValueError. + + Returns: + dict: A dictionary mapping subdirectory names to their full paths. + + Raises: + ValueError: If `dir_format` is not provided or is invalid. + """ + + # Define the project directory + if base_dir: + project_dir = os.path.abspath(base_dir) + else: + project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + # QC: + print(f"Project Directory: {project_dir}") -def arg_parse(): - parser = ArgumentParser() - parser.add_argument( - "--mesh_type", type=int, default=2, help="algorithm used to generate mesh" - ) - parser.add_argument( - "--max_dist", - type=int, - default=6, - help="max number of distributions used to\ - generate the dataset (only works if\ - n_dist is not set)", - ) - parser.add_argument( - "--n_dist", - type=int, - default=None, - help="number of distributions used to\ - generate the dataset (this will disable\ - max_dist)", + # Define the dataset directory + dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem) + + # Use the provided format string for the problem-specific directory + if dir_format is None: + problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}") + else: + # check if dir_format is a valid string format + if not isinstance(dir_format, str): + raise ValueError("dir_format must be a string.") + problem_specific_dir = os.path.join(dataset_dir, dir_format) + + # Define default subdirectories if not provided + if subdirs is None: + subdirs = ["data", "plot", "log", "mesh", "mesh_fine", + "plot_compare", "train", "test", "val"] + + # Create and clear directories + directories = {} + for subdir in subdirs: + dir_path = os.path.join(problem_specific_dir, subdir) + if not os.path.exists(dir_path): + os.makedirs(dir_path) + else: + # Clear the directory by removing all files + for file in os.listdir(dir_path): + os.remove(os.path.join(dir_path, file)) + directories[subdir] = dir_path + + # QC: + print(f"Subdirectories created: {directories}") + + return directories + +def output_csv(parameters, key_list, output_dir): + """ + Write selected parameters to a CSV file. + + Args: + parameters (dict): Dictionary of parameters to write. + key_list (list): List of keys to include in the CSV. + output_dir (str): Directory where the CSV file will be saved. + """ + # Filter parameters based on key_list + csv_keys = [key for key in key_list if key in parameters] + csv_data = [parameters[key] for key in csv_keys] + + # Define the output file path + csv_file_path = os.path.join(output_dir, "info.csv") + + # Write to CSV + with open(csv_file_path, mode="w", newline="") as csvfile: + csv_writer = csv.writer(csvfile) + # Write header (keys) + csv_writer.writerow(csv_keys) + # Write data (values) + csv_writer.writerow(csv_data) + +def move_data(target, source, start, num_files): + """ + Move data files from the source directory to the target directory. + + Args: + target (str): The path to the target directory. + source (str): The path to the source directory. + start (int): The starting index of the files to move. + num_files (int): The total number of files to move. + + Raises: + FileNotFoundError: If the source directory does not exist. + ValueError: If the start index or num_files is invalid. + """ + if not os.path.exists(source): + raise FileNotFoundError(f"Source directory '{source}' does not exist.") + + if start < 0 or num_files <= 0: + raise ValueError("Invalid start index or number of files to move.") + + # Create the target directory if it doesn't exist + if not os.path.exists(target): + os.makedirs(target) + else: + # Clear the target directory by removing all files + for file in os.listdir(target): + os.remove(os.path.join(target, file)) + + # Copy files sequentially starting from the specified index + for i in range(start, start + num_files): + try: + # Copy the data file + shutil.copy( + os.path.join(source, f"data_{i:04d}.npy"), + os.path.join(target, f"data_{i:04d}.npy"), + ) + except FileNotFoundError: + print(f"File data_{i:04d}.npy not found in {source}. Skipping.") + continue + except Exception as e: + print(f"An error occurred while copying data_{i:04d}.npy: {e}") + continue + +def process_features(parameters, dirs): + + mesh_type = parameters["mesh_type"] + scale_x = parameters["scale_x"] + lc = parameters["lc"] + + # create mesh + rand_poly_mesh_gen = UM2N.UnstructuredRandomPolygonalMeshGenerator( + scale=scale_x, mesh_type=mesh_type + ) # noqa + mesh = rand_poly_mesh_gen.generate_mesh( + res=lc, output_filename=os.path.join(dirs["mesh"], f"mesh{i}.msh") ) - parser.add_argument( - "--lc", - type=float, - default=6e-2, - help="the length characteristic of the elements in the\ - mesh", + num_boundary = rand_poly_mesh_gen.num_boundary + + # Generate Random solution field + rand_u_generator = UM2N.RandSourceGenerator( + use_iso= parameters["data_type"] == "iso", + dist_params= parameters ) - parser.add_argument( - "--field_type", - type=str, - default="aniso", - help="anisotropic or isotropic data type(aniso/iso)", + + # generate equation + helmholtz_eq = UM2N.RandHelmholtzEqGenerator(rand_u_generator) + # discretise the equation + res = helmholtz_eq.discretise(mesh) + # get specific parameters used + dist_params = rand_u_generator.get_dist_params() + # Solve the equation + solver = UM2N.EquationSolver( + params={ + "function_space": res["function_space"], + "LHS": res["LHS"], + "RHS": res["RHS"], + "bc": res["bc"], + } ) - # use padded scheme or full-scale scheme to sample central point of the bump # noqa - parser.add_argument( - "--boundary_scheme", - type=str, - default="full", - help="scheme used to generate the dataset (pad/full))", + + # original solution field + uh = solver.solve_eq() + + func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1) + grad_uh_interpolate = fd.assemble(interpolate(fd.grad(uh), func_vec_space)) + + # ej321 - grad_norm copied from build_helmholtz_square.py + grad_norm = fd.Function(res["function_space"]) + grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2) + grad_norm /= grad_norm.vector().max() + + # RHS of helmholtz problem + f_rhs = fd.assemble(interpolate(helmholtz_eq.f, helmholtz_eq.function_space)) + + # ej321 - this seems extra - the mesh is never used, just to build 'eq'? + # hessian = UM2N.MeshGenerator( + # params={ + # "eq": helmholtz_eq, + # "mesh": rand_poly_mesh_gen.generate_mesh( + # res=lc, + # output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"), + # ), + # } + # ).get_hessian(mesh) + # ej321 - using script from build_helmholtz_square.py + mesh_gen = UM2N.MeshGenerator(params={"eq": helmholtz_eq, "mesh": mesh}) + monitor_val = mesh_gen.monitor_func(mesh) + hessian = mesh_gen.get_hessian(mesh) + hessian_norm = fd.project(mesh_gen.get_hessian_norm(mesh), + fd.FunctionSpace(mesh, "CG", 1) + ) + + + # move the mesh? + start = time.perf_counter() + new_mesh = mesh_gen.move_mesh() + end = time.perf_counter() + dur = (end - start) * 1000 + + # this is the jacobian of x with respect to xi + jacobian = mesh_gen.get_jacobian() + jacobian = fd.project(jacobian, fd.TensorFunctionSpace(new_mesh, "CG", 1)) + jacobian_det = mesh_gen.get_jacobian_det() + jacobian_det = fd.project(jacobian_det, fd.FunctionSpace(new_mesh, "CG", 1)) + + # get phi/grad_phi projected to the original mesh + phi = mesh_gen.get_phi() + grad_phi = mesh_gen.get_grad_phi() + + # solve the equation on the new mesh + new_res = helmholtz_eq.discretise(new_mesh) + new_solver = UM2N.EquationSolver( + params={ + "function_space": new_res["function_space"], + "LHS": new_res["LHS"], + "RHS": new_res["RHS"], + "bc": new_res["bc"], + } ) - parser.add_argument( - "--n_samples", type=int, default=100, help="number of samples generated" + uh_new = new_solver.solve_eq() + + # process the data for training + mesh_processor = UM2N.MeshProcessor( + original_mesh=mesh, + optimal_mesh=new_mesh, + function_space=new_res["function_space"], + use_4_edge=False, + num_boundary=num_boundary, + feature={ + "uh": uh.dat.data_ro.reshape(-1, 1), + "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2), + "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), #ej321 - added + "hessian": hessian.dat.data_ro.reshape(-1, 4), + "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), + "jacobian": jacobian.dat.data_ro.reshape(-1, 4), + "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1), + "phi": phi.dat.data_ro.reshape(-1, 1), + "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2), + "f": f_rhs.dat.data_ro.reshape(-1, 1), + "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), # ej321 - added + }, + raw_feature={ + "uh": uh, + "hessian_norm": hessian_norm, + "monitor_val": monitor_val, # ej321 - added + "grad_uh_norm": grad_norm, # ej321 - added needed for poly only + "jacobian": jacobian, + "jacobian_det": jacobian_det, + }, + dist_params=dist_params, + poly_mesh=True, ) - parser.add_argument( - "--rand_seed", type=int, default=63, help="number of samples generated" + + # save out data + mesh_processor.save_taining_data( + os.path.join(dirs["data"], "data_{}".format(i)) ) - args_ = parser.parse_args() - print(args_) - return args_ + # ==== Plot Scripts ====================== + fig = plt.figure(figsize=(15, 10)) + ax1 = fig.add_subplot(2, 3, 1, projection="3d") + # Plot the exact solution + ax1.set_title("Exact Solution") + fd.trisurf(fd.interpolate(res["u_exact"], res["function_space"]), axes=ax1) + # Plot the solved solution + ax2 = fig.add_subplot(2, 3, 2, projection="3d") + ax2.set_title("FEM Solution") + fd.trisurf(uh, axes=ax2) + + # Plot the solution on a optimal mesh + ax3 = fig.add_subplot(2, 3, 3, projection="3d") + ax3.set_title("FEM Solution on Optimal Mesh") + fd.trisurf(uh_new, axes=ax3) + + # Plot the mesh + ax4 = fig.add_subplot(2, 3, 4) + ax4.set_title("Original Mesh") + fd.triplot(mesh, axes=ax4) + ax5 = fig.add_subplot(2, 3, 5) + ax5.set_title("Optimal Mesh") + fd.triplot(new_mesh, axes=ax5) + + # plot mesh with function evaluated on it + ax6 = fig.add_subplot(2, 3, 6) + ax6.set_title("Soultion Projected on optimal mesh") + fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6) + fd.triplot(new_mesh, axes=ax6) + + fig.savefig(os.path.join(dirs["plot"], "plot_{}.png".format(i))) + + + # ==== Log File ============================================ + high_res_mesh = rand_poly_mesh_gen.generate_mesh( + res=1e-2, + output_filename=os.path.join(dirs["mesh_fine"], f"mesh{i}.msh"), + ) -args = arg_parse() + high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1) -mesh_type = args.mesh_type + res_high_res = helmholtz_eq.discretise(high_res_mesh) + u_exact = fd.assemble(interpolate(res_high_res["u_exact"], + res_high_res["function_space"]) + ) -data_type = args.field_type -use_iso = True if data_type == "iso" else False + uh_proj = fd.project(uh, high_res_function_space) + uh_new_proj = fd.project(uh_new, high_res_function_space) -rand_seed = args.rand_seed -random.seed(rand_seed) -np.random.seed(rand_seed) + error_original_mesh = fd.errornorm(u_exact, uh_proj) + error_optimal_mesh = fd.errornorm(u_exact, uh_new_proj) -# ==== Parameters ====================== -problem = "holmholtz_poly" + # df = pd.DataFrame( + # { + # "error_og": error_original_mesh, + # "error_adapt": error_optimal_mesh, + # "time": dur, + # }, + # index=[0], + # ) + # df.to_csv(os.path.join(problem_log_dir, "log{}.csv".format(i))) -n_samples = args.n_samples + # Write to CSV + with open(os.path.join(dirs["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile: + csv_writer = csv.writer(csvfile) + # Write header (keys) + csv_writer.writerow(["error_og", "error_adapt", "time"]) + # Write data (values) + csv_writer.writerow([error_original_mesh, error_optimal_mesh, dur]) + print("error og/optimal:", error_original_mesh, error_optimal_mesh) -# parameters for domain scale -scale_x = 1 -scale_y = 1 -# parameters for random source -max_dist = args.max_dist -n_dist = args.n_dist -lc = args.lc -# parameters for anisotropic data - distribution height scaler -z_min = 0 -z_max = 1 +if __name__ == "__main__": -# parameters for isotropic data -w_min = 0.05 -w_max = 0.2 + # parse args + args = parse_arguments() + + # ==== Parameters ====================== + parameters = { + # parameters for problem + "problem": "holmholtz_poly", + # "n_case": args.n_case, # burgers problem only + # parameters for random source + "n_dist": args.n_dist, + "max_dist": args.max_dist, + "lc": args.lc, + # "n_grig": args.n_grid, # burgers problem only + # parameters for ?????? + "n_samples": args.n_samples, + "data_type": args.field_type, + "scheme": args.boundary_scheme, + "mesh_type": int(args.mesh_type), + # parameters for domain scale + "scale_x": 1, + "scale_y": 1, + # parameters for anisotropic data - distribution height scaler + "z_max": 1, + "z_min": 0, + # parameters for ????? + "x_start": 0, + "x_end": 1, + "y_start": 0, + "y_end": 1, + # parameters for isotropic data + "w_min": 0.05, + "w_max": 0.2, + "c_min": 0.3 if args.boundary_scheme == "pad" else 0, + "c_max": 0.7 if args.boundary_scheme == "pad" else 1, + # parameters for dataset challenging level + # larger, less challenging (because the gaussian is more like a circle) + "sigma_mean_scaler": 1 / 4, + "sigma_sigma_scaler": 1 / 6, + "sigma_eps": 1 / 8, + # parameters for data split + "p_train": 0.75, + "p_test": 0.15, + "p_val": 0.1, + } -scheme = args.boundary_scheme -c_min = 0.3 if scheme == "pad" else 0 -c_max = 0.7 if scheme == "pad" else 1 + # Set random seed + random.seed(args.rand_seed) + np.random.seed(args.rand_seed) -# parameters for data split -p_train = 0.75 -p_test = 0.15 -p_val = 0.1 + # ==== Setup Directories ====================== + problem_specific_dir = "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format( + parameters["z_min"], parameters["z_max"], + parameters["n_dist"],parameters["max_dist"], + parameters["lc"], parameters["n_samples"], + parameters["data_type"], parameters["scheme"], parameters["mesh_type"] + ) -num_train = int(n_samples * p_train) -num_test = int(n_samples * p_test) -num_val = int(n_samples * p_val) -# ======================================= + subdirs = [ + "data", "plot", "plot_compare", "log", "mesh", "mesh_fine", + "train", "test", "val", + ] + dirs = setup_directories(problem = parameters["problem"], + mesh_type = parameters["mesh_type"], + base_dir = None, + subdirs = subdirs, + dir_format = problem_specific_dir) -df = pd.DataFrame( - { - "cmin": [c_min], - "cmax": [c_max], - "data_type": [data_type], - "scheme": [scheme], - "n_samples": [n_samples], - "lc": [lc], - "mesh_type": [mesh_type], - } -) + # ==== Output CSV ====================== + key_list = [ + "cmin","cmax", + "data_type", "scheme", "n_samples", "lc", "mesh_type" + ] + output_csv(parameters, key_list, dirs["data"]) -def move_data(target, source, start, num_file): - if not os.path.exists(target): - os.makedirs(target) - else: - # delete all files under the directory - filelist = [f for f in os.listdir(target)] - for f in filelist: - os.remove(os.path.join(target, f)) - # copy data from data dir to train dir - for i in range(start, num_file): - shutil.copy( - os.path.join(source, "data_{}.npy".format(i)), - os.path.join(target, "data_{}.npy".format(i)), - ) - - -project_dir = os.path.dirname(os.path.dirname((os.path.abspath(__file__)))) -dataset_dir = os.path.join( - project_dir, "data", f"dataset_meshtype_{mesh_type}", "helmholtz_poly" -) # noqa -problem_specific_dir = os.path.join( - dataset_dir, - "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format( - z_min, z_max, n_dist, max_dist, lc, n_samples, data_type, scheme, mesh_type - ), -) - - -problem_data_dir = os.path.join(problem_specific_dir, "data") -problem_plot_dir = os.path.join(problem_specific_dir, "plot") -problem_log_dir = os.path.join(problem_specific_dir, "log") - -problem_mesh_dir = os.path.join(problem_specific_dir, "mesh") -problem_mesh_fine_dir = os.path.join(problem_specific_dir, "mesh_fine") -problem_train_dir = os.path.join(problem_specific_dir, "train") -problem_test_dir = os.path.join(problem_specific_dir, "test") -problem_val_dir = os.path.join(problem_specific_dir, "val") - -if not os.path.exists(problem_mesh_dir): - os.makedirs(problem_mesh_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_mesh_dir)] - for f in filelist: - os.remove(os.path.join(problem_mesh_dir, f)) - -if not os.path.exists(problem_mesh_fine_dir): - os.makedirs(problem_mesh_fine_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_mesh_fine_dir)] - for f in filelist: - os.remove(os.path.join(problem_mesh_fine_dir, f)) - -if not os.path.exists(problem_data_dir): - os.makedirs(problem_data_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_data_dir)] - for f in filelist: - os.remove(os.path.join(problem_data_dir, f)) - -if not os.path.exists(problem_plot_dir): - os.makedirs(problem_plot_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_plot_dir)] - for f in filelist: - os.remove(os.path.join(problem_plot_dir, f)) - -if not os.path.exists(problem_log_dir): - os.makedirs(problem_log_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_log_dir)] - for f in filelist: - os.remove(os.path.join(problem_log_dir, f)) - -df.to_csv(os.path.join(problem_specific_dir, "info.csv")) - - -# ==== Data Generation Scripts ====================== -if __name__ == "__main__": + # ==== Data Generation Scripts ====================== + # QC: print("In build_dataset.py") - i = 0 - while i < n_samples: + # i = 0 + # while i < n_samples: + for i in range(parameters["n_samples"]): try: print("Generating Sample: " + str(i)) - rand_poly_mesh_gen = UM2N.UnstructuredRandomPolygonalMeshGenerator( - scale=scale_x, mesh_type=mesh_type - ) # noqa - mesh = rand_poly_mesh_gen.generate_mesh( - res=lc, output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh") - ) - num_boundary = rand_poly_mesh_gen.num_boundary - # Generate Random solution field - rand_u_generator = UM2N.RandSourceGenerator( - use_iso=use_iso, - dist_params={ - "max_dist": max_dist, - "n_dist": n_dist, - "x_start": 0, - "x_end": 1, - "y_start": 0, - "y_end": 1, - "z_max": z_max, - "z_min": z_min, - "w_min": w_min, - "w_max": w_max, - "c_min": c_min, - "c_max": c_max, - }, - ) - helmholtz_eq = UM2N.RandHelmholtzEqGenerator(rand_u_generator) - res = helmholtz_eq.discretise(mesh) # discretise the equation - dist_params = rand_u_generator.get_dist_params() - # Solve the equation - solver = UM2N.EquationSolver( - params={ - "function_space": res["function_space"], - "LHS": res["LHS"], - "RHS": res["RHS"], - "bc": res["bc"], - } - ) - # RHS of helmholtz problem - f = fd.interpolate(helmholtz_eq.f, helmholtz_eq.function_space) - uh = solver.solve_eq() - # Generate Mesh - hessian = UM2N.MeshGenerator( - params={ - "eq": helmholtz_eq, - "mesh": rand_poly_mesh_gen.generate_mesh( - res=lc, - output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"), - ), - } - ).get_hessian(mesh) - - hessian_norm = UM2N.MeshGenerator( - params={ - "eq": helmholtz_eq, - "mesh": rand_poly_mesh_gen.generate_mesh( - res=lc, - output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"), - ), - } - ).monitor_func(mesh) - - hessian_norm = fd.project(hessian_norm, fd.FunctionSpace(mesh, "CG", 1)) - - func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1) - grad_uh_interpolate = fd.interpolate(fd.grad(uh), func_vec_space) - - mesh_gen = UM2N.MeshGenerator( - params={ - "eq": helmholtz_eq, - "mesh": rand_poly_mesh_gen.generate_mesh( - res=lc, - output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"), - ), - } - ) - - start = time.perf_counter() - new_mesh = mesh_gen.move_mesh() - end = time.perf_counter() - dur = (end - start) * 1000 - - # this is the jacobian of x with respect to xi - jacobian = mesh_gen.get_jacobian() - jacobian = fd.project(jacobian, fd.TensorFunctionSpace(new_mesh, "CG", 1)) - jacobian_det = mesh_gen.get_jacobian_det() - jacobian_det = fd.project(jacobian_det, fd.FunctionSpace(new_mesh, "CG", 1)) - - # get phi/grad_phi projected to the original mesh - phi = mesh_gen.get_phi() - grad_phi = mesh_gen.get_grad_phi() - - # solve the equation on the new mesh - new_res = helmholtz_eq.discretise(new_mesh) - new_solver = UM2N.EquationSolver( - params={ - "function_space": new_res["function_space"], - "LHS": new_res["LHS"], - "RHS": new_res["RHS"], - "bc": new_res["bc"], - } - ) - uh_new = new_solver.solve_eq() - - # process the data for training - mesh_processor = UM2N.MeshProcessor( - original_mesh=mesh, - optimal_mesh=new_mesh, - function_space=new_res["function_space"], - use_4_edge=False, - num_boundary=num_boundary, - feature={ - "uh": uh.dat.data_ro.reshape(-1, 1), - "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2), - "hessian": hessian.dat.data_ro.reshape(-1, 4), - "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), - "jacobian": jacobian.dat.data_ro.reshape(-1, 4), - "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1), - "phi": phi.dat.data_ro.reshape(-1, 1), - "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2), - "f": f.dat.data_ro.reshape(-1, 1), - }, - raw_feature={ - "uh": uh, - "hessian_norm": hessian_norm, - "jacobian": jacobian, - "jacobian_det": jacobian_det, - }, - dist_params=dist_params, - poly_mesh=True, - ) - - mesh_processor.save_taining_data( - os.path.join(problem_data_dir, "data_{}".format(i)) - ) - - # ==== Plot Scripts ====================== - fig = plt.figure(figsize=(15, 10)) - ax1 = fig.add_subplot(2, 3, 1, projection="3d") - # Plot the exact solution - ax1.set_title("Exact Solution") - fd.trisurf(fd.interpolate(res["u_exact"], res["function_space"]), axes=ax1) - # Plot the solved solution - ax2 = fig.add_subplot(2, 3, 2, projection="3d") - ax2.set_title("FEM Solution") - fd.trisurf(uh, axes=ax2) - - # Plot the solution on a optimal mesh - ax3 = fig.add_subplot(2, 3, 3, projection="3d") - ax3.set_title("FEM Solution on Optimal Mesh") - fd.trisurf(uh_new, axes=ax3) - - # Plot the mesh - ax4 = fig.add_subplot(2, 3, 4) - ax4.set_title("Original Mesh") - fd.triplot(mesh, axes=ax4) - ax5 = fig.add_subplot(2, 3, 5) - ax5.set_title("Optimal Mesh") - fd.triplot(new_mesh, axes=ax5) - - # plot mesh with function evaluated on it - ax6 = fig.add_subplot(2, 3, 6) - ax6.set_title("Soultion Projected on optimal mesh") - fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6) - fd.triplot(new_mesh, axes=ax6) - - fig.savefig(os.path.join(problem_plot_dir, "plot_{}.png".format(i))) - - # ========================================== - - # generate log file - high_res_mesh = rand_poly_mesh_gen.generate_mesh( - res=1e-2, - output_filename=os.path.join(problem_mesh_fine_dir, f"mesh{i}.msh"), - ) - - high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1) - - res_high_res = helmholtz_eq.discretise(high_res_mesh) - u_exact = fd.interpolate( - res_high_res["u_exact"], res_high_res["function_space"] - ) - - uh = fd.project(uh, high_res_function_space) - uh_new = fd.project(uh_new, high_res_function_space) - - error_original_mesh = fd.errornorm(u_exact, uh) - error_optimal_mesh = fd.errornorm(u_exact, uh_new) - - df = pd.DataFrame( - { - "error_og": error_original_mesh, - "error_adapt": error_optimal_mesh, - "time": dur, - }, - index=[0], - ) - df.to_csv(os.path.join(problem_log_dir, "log{}.csv".format(i))) - print("error og/optimal:", error_original_mesh, error_optimal_mesh) - i += 1 + + process_features(parameters, dirs) + # i += 1 except fd.exceptions.ConvergenceError: - pass + print(f"Iteration {i} did not converge.") + continue except AttributeError: pass except ValueError: pass - move_data(problem_train_dir, problem_data_dir, 0, num_train) - - move_data(problem_test_dir, problem_data_dir, num_train, num_train + num_test) + # ==== Data Splits ============================================ + num_train = int(parameters["n_samples"] * parameters["p_train"]) + num_test = int(parameters["n_samples"] * parameters["p_test"]) + num_val = parameters["n_samples"] - num_train - num_test - move_data( - problem_val_dir, - problem_data_dir, - num_train + num_test, - num_train + num_test + num_val, - ) -# ==== Data Generation Scripts ====================== + move_data(dirs["train"], dirs["data"], 0, num_train) + move_data(dirs["test"], dirs["data"], num_train, num_train + num_test) + move_data(dirs["val"], dirs["data"], num_train + num_test, num_train + num_test + num_val) diff --git a/script/build_helmholtz_square.py b/script/build_helmholtz_square.py index c8d25d1..c66d46f 100644 --- a/script/build_helmholtz_square.py +++ b/script/build_helmholtz_square.py @@ -1,6 +1,7 @@ # Author: Chunyang Wang # GitHub Username: chunyang-w +import csv import os import random import shutil @@ -9,553 +10,526 @@ import firedrake as fd import matplotlib.pyplot as plt -import pandas as pd +from firedrake.__future__ import interpolate + +# dd the parent directory to the Python path +import sys +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import UM2N -def arg_parse(): +def parse_arguments(): + """Parse command-line arguments.""" parser = ArgumentParser() - parser.add_argument( - "--mesh_type", type=int, default=2, help="algorithm used to generate mesh" - ) - parser.add_argument( - "--max_dist", - type=int, - default=6, - help="max number of distributions used to\ - generate the dataset (only works if\ - n_dist is not set)", + parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh") + parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions") + parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions") + parser.add_argument("--lc", type=float, default=5e-2, help="Length characteristic of mesh elements") + parser.add_argument("--field_type", type=str, default="aniso", help="Data type (aniso/iso)") + # use padded scheme or full-scale scheme to sample central point of the bump # noqa + parser.add_argument("--boundary_scheme", type=str, default="full", help="Boundary scheme (pad/full)") + parser.add_argument("--n_samples", type=int, default=100, help="Number of samples generated") + parser.add_argument("--rand_seed", type=int, default=63, help="Random seed") + + parsed_args = parser.parse_args() + + # Handle dependency between max_dist and n_dist + # max number of distributions used to generate the dataset + # only if n_dist is not set if n_dist is set, max_dist will be disabled + if parsed_args.n_dist is not None: + parsed_args.max_dist = None # Disable max_dist if n_dist is set + print("Warning: max_dist is ignored because n_dist is set.") + # QC: + print(parsed_args) + + return parsed_args + + +def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None): + """ + Set up directories for storing data, plots, and logs. + + Args: + base_dir (str): Base directory for the project. + parameters (dict): Dictionary of parameters, including "mesh_type" and "problem". + - "mesh_type" (int): Type of mesh used in the simulation (default: 0). + - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem"). + subdirs (list, optional): List of subdirectories to create. Defaults to: + ["data", "plot", "log", "mesh", "mesh_fine"]. + Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz". + dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders + matching keys in the `parameters` dictionary. Example: + "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}". + If not provided, raises a ValueError. + + Returns: + dict: A dictionary mapping subdirectory names to their full paths. + + Raises: + ValueError: If `dir_format` is not provided or is invalid. + """ + + # Define the project directory + if base_dir: + project_dir = os.path.abspath(base_dir) + else: + project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + # QC: + print(f"Project Directory: {project_dir}") + + # Define the dataset directory + dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem) + + # Use the provided format string for the problem-specific directory + if dir_format is None: + problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}") + else: + # check if dir_format is a valid string format + if not isinstance(dir_format, str): + raise ValueError("dir_format must be a string.") + problem_specific_dir = os.path.join(dataset_dir, dir_format) + + # Define default subdirectories if not provided + if subdirs is None: + subdirs = ["data", "plot", "log", "mesh", "mesh_fine", + "plot_compare", "train", "test", "val"] + + # Create and clear directories + directories = {} + for subdir in subdirs: + dir_path = os.path.join(problem_specific_dir, subdir) + if not os.path.exists(dir_path): + os.makedirs(dir_path) + else: + # Clear the directory by removing all files + for file in os.listdir(dir_path): + os.remove(os.path.join(dir_path, file)) + directories[subdir] = dir_path + + # QC: + print(f"Subdirectories created: {directories}") + + return directories + +def move_data(target, source, start, num_files): + """ + Move data files from the source directory to the target directory. + + Args: + target (str): The path to the target directory. + source (str): The path to the source directory. + start (int): The starting index of the files to move. + num_files (int): The total number of files to move. + + Raises: + FileNotFoundError: If the source directory does not exist. + ValueError: If the start index or num_files is invalid. + """ + if not os.path.exists(source): + raise FileNotFoundError(f"Source directory '{source}' does not exist.") + + if start < 0 or num_files <= 0: + raise ValueError("Invalid start index or number of files to move.") + + # Create the target directory if it doesn't exist + if not os.path.exists(target): + os.makedirs(target) + else: + # Clear the target directory by removing all files + for file in os.listdir(target): + os.remove(os.path.join(target, file)) + + # Copy files sequentially starting from the specified index + for i in range(start, start + num_files): + try: + # Copy the data file + shutil.copy( + os.path.join(source, f"data_{i:04d}.npy"), + os.path.join(target, f"data_{i:04d}.npy"), + ) + except FileNotFoundError: + print(f"File data_{i:04d}.npy not found in {source}. Skipping.") + continue + except Exception as e: + print(f"An error occurred while copying data_{i:04d}.npy: {e}") + continue + +def create_mesh(i, mesh_type, lc, scale_x, problem_mesh_dir): + """ + Generate a mesh for the given sample index. + + Args: + i: The sample index. + mesh_type: The type of mesh to generate. + lc: The length characteristic of the mesh. + scale_x: The scale of the mesh. + problem_mesh_dir: Directory to save the generated mesh. + + Returns: + The generated mesh. + """ + if mesh_type != 0: + unstructured_square_mesh_gen = UM2N.UnstructuredSquareMeshGenerator( + scale=scale_x, mesh_type=mesh_type + ) # noqa + return unstructured_square_mesh_gen.generate_mesh( + res=lc, + output_filename=os.path.join(problem_mesh_dir, f"mesh_{i:04d}.msh"), + ) + else: + n_grid = int(1 / lc) + return fd.UnitSquareMesh(n_grid, n_grid) + + +def process_features(parameters, directories): + + # create mesh + mesh = create_mesh( + i, mesh_type = parameters["mesh_type"], lc = parameters["lc"], + scale_x = parameters["scale_x"], problem_mesh_dir = directories["mesh"] ) - parser.add_argument( - "--n_dist", - type=int, - default=None, - help="number of distributions used to\ - generate the dataset (this will disable\ - max_dist)", + # Generate Random solution field + rand_u_generator = UM2N.RandSourceGenerator( + use_iso= parameters["data_type"] == "iso", + dist_params = parameters ) - parser.add_argument( - "--lc", - type=float, - default=5e-2, - help="the length characteristic of the elements in the\ - mesh", + + # generate equation + helmholtz_eq = UM2N.RandHelmholtzEqGenerator(rand_u_generator) + # discretise the equation + res = helmholtz_eq.discretise(mesh) + # get specific parameters used + dist_params = rand_u_generator.get_dist_params() + # Solve the equation + solver = UM2N.EquationSolver( + params={ + "function_space": res["function_space"], + "LHS": res["LHS"], + "RHS": res["RHS"], + "bc": res["bc"], + } ) - parser.add_argument( - "--field_type", - type=str, - default="aniso", - help="anisotropic or isotropic data type(aniso/iso)", + # original solution field + uh = solver.solve_eq() + + grad_uh_interpolate = fd.assemble(interpolate(fd.grad(uh), + fd.VectorFunctionSpace(mesh, "CG", 1) + )) + grad_norm = fd.Function(res["function_space"]) + grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2) + grad_norm /= grad_norm.vector().max() + + # FOR OUTPUT + # RHS of helmholtz problem + f_rhs = fd.assemble(interpolate(helmholtz_eq.f, helmholtz_eq.function_space)) + + + # generate mesh? + + mesh_gen = UM2N.MeshGenerator(params={"eq": helmholtz_eq, "mesh": mesh}) + monitor_val = mesh_gen.monitor_func(mesh) + hessian = mesh_gen.get_hessian(mesh) + hessian_norm = fd.project(mesh_gen.get_hessian_norm(mesh), + fd.FunctionSpace(mesh, "CG", 1) + ) + + # move the mesh? + start = time.perf_counter() + new_mesh = mesh_gen.move_mesh() # noqa + end = time.perf_counter() + dur = (end - start) * 1000 + + # this is the jacobian of x with respect to xi + jacobian = mesh_gen.get_jacobian() + jacobian = fd.project(jacobian, fd.TensorFunctionSpace(new_mesh, "CG", 1)) + jacobian_det = mesh_gen.get_jacobian_det() + jacobian_det = fd.project(jacobian_det, fd.FunctionSpace(new_mesh, "CG", 1)) + + # get phi/grad_phi projected to the original mesh + phi = mesh_gen.get_phi() + grad_phi = mesh_gen.get_grad_phi() + + # solve the equation on the new mesh + new_res = helmholtz_eq.discretise(new_mesh) + new_solver = UM2N.EquationSolver( + params={ + "function_space": new_res["function_space"], + "LHS": new_res["LHS"], + "RHS": new_res["RHS"], + "bc": new_res["bc"], + } ) - # use padded scheme or full-scale scheme to sample central point of the bump # noqa - parser.add_argument( - "--boundary_scheme", - type=str, - default="full", - help="scheme used to generate the dataset (pad/full))", + uh_new = new_solver.solve_eq() + + # process the data for training + mesh_processor = UM2N.MeshProcessor( + original_mesh=mesh, + optimal_mesh=new_mesh, + function_space=new_res["function_space"], + use_4_edge=True, + feature={ + "uh": uh.dat.data_ro.reshape(-1, 1), + "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2), + "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), + "hessian": hessian.dat.data_ro.reshape(-1, 4), + "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), + "jacobian": jacobian.dat.data_ro.reshape(-1, 4), + "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1), + "phi": phi.dat.data_ro.reshape(-1, 1), + "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2), + "f": f_rhs.dat.data_ro.reshape(-1, 1), + "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), + }, + raw_feature={ + "uh": uh, + "hessian_norm": hessian_norm, + "monitor_val": monitor_val, + "jacobian": jacobian, + "jacobian_det": jacobian_det, + }, + dist_params=dist_params, ) - parser.add_argument( - "--n_samples", type=int, default=100, help="number of samples generated" + + # save out data + mesh_processor.save_taining_data( + os.path.join(directories["data"], f"data_{i:04d}") ) - parser.add_argument( - "--rand_seed", type=int, default=63, help="number of samples generated" + + # ==== Log File ============================================ + high_res_mesh = create_mesh( + i, mesh_type = parameters["mesh_type"], lc = 1e-2, + scale_x = parameters["scale_x"], problem_mesh_dir = directories["mesh_fine"] ) - args_ = parser.parse_args() - print(args_) - return args_ + res_high_res = helmholtz_eq.discretise(high_res_mesh) + u_exact = fd.assemble(interpolate(res_high_res["u_exact"], + res_high_res["function_space"]) + ) + + uh_proj = fd.project(uh, fd.FunctionSpace(high_res_mesh, "CG", 1)) + uh_new_proj = fd.project(uh_new, fd.FunctionSpace(high_res_mesh, "CG", 1)) -args = arg_parse() + error_original_mesh = fd.errornorm(u_exact, uh_proj) + error_optimal_mesh = fd.errornorm(u_exact, uh_new_proj) -mesh_type = int(args.mesh_type) + # Write to CSV + with open(os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile: + csv_writer = csv.writer(csvfile) + # Write header (keys) + csv_writer.writerow(["error_og", "error_adapt", "time"]) + # Write data (values) + csv_writer.writerow([error_original_mesh, error_optimal_mesh, dur]) -data_type = args.field_type -use_iso = True if data_type == "iso" else False + print("error og/optimal:", error_original_mesh, error_optimal_mesh) -rand_seed = args.rand_seed -random.seed(rand_seed) + # ==== Plot mesh, solution, error ====================== -# ==== Parameters ====================== -problem = "holmholtz" + rows, cols = 3, 3 + cmap = "seismic" -n_samples = args.n_samples + fig, ax = plt.subplots( + rows, cols, figsize=(cols * 5, rows * 5), layout="compressed" + ) -# parameters for domain scale -scale_x = 1 -scale_y = 1 + # High resolution mesh + fd.triplot(high_res_mesh, axes=ax[0, 0]) + ax[0, 0].set_title("High resolution Mesh ") + # Orginal low resolution uniform mesh + fd.triplot(mesh, axes=ax[0, 1]) + ax[0, 1].set_title("Original uniform Mesh") + # Adapted mesh + fd.triplot(new_mesh, axes=ax[0, 2]) + ax[0, 2].set_title("Adapted Mesh (MA)") + # Solution on high resolution mesh + cb = fd.tripcolor(u_exact, cmap=cmap, axes=ax[1, 0]) + ax[1, 0].set_title("Solution on High Resolution (u_exact)") + plt.colorbar(cb) + # Solution on orginal low resolution uniform mesh + cb = fd.tripcolor(uh, cmap=cmap, axes=ax[1, 1]) + ax[1, 1].set_title("Solution on uniform Mesh") + plt.colorbar(cb) + # Solution on adapted mesh + cb = fd.tripcolor(uh_new, cmap=cmap, axes=ax[1, 2]) + ax[1, 2].set_title("Solution on Adapted Mesh (MA)") + plt.colorbar(cb) + + # Error on high resolution mesh + cb = fd.tripcolor(monitor_val, cmap=cmap, axes=ax[2, 0]) + ax[2, 0].set_title("Monitor values") + plt.colorbar(cb) + + err_orignal_mesh = fd.assemble(uh_proj - u_exact) + err_adapted_mesh = fd.assemble(uh_new_proj - u_exact) + err_abs_max_val_ori = max( + abs(err_orignal_mesh.dat.data[:].max()), + abs(err_orignal_mesh.dat.data[:].min()), + ) + err_abs_max_val_adapted = max( + abs(err_adapted_mesh.dat.data[:].max()), + abs(err_adapted_mesh.dat.data[:].min()), + ) + err_abs_max_val = max(err_abs_max_val_ori, err_abs_max_val_adapted) + err_v_max = err_abs_max_val + err_v_min = -err_v_max + + # Error on high resolution mesh + cb = fd.tripcolor(monitor_val, cmap=cmap, axes=ax[2, 0]) + ax[2, 0].set_title("Monitor values") + plt.colorbar(cb) + # Error on orginal low resolution uniform mesh + cb = fd.tripcolor( + err_orignal_mesh, + cmap=cmap, + axes=ax[2, 1], + vmax=err_v_max, + vmin=err_v_min, + ) + ax[2, 1].set_title( + f"Error (u-u_exact) uniform Mesh | L2 Norm: {error_original_mesh:.5f}" + ) + plt.colorbar(cb) + # Error on adapted mesh + cb = fd.tripcolor( + err_adapted_mesh, + cmap=cmap, + axes=ax[2, 2], + vmax=err_v_max, + vmin=err_v_min, + ) + ax[2, 2].set_title( + f"Error (u-u_exact) Adapted Mesh (MA)| L2 Norm: {error_optimal_mesh:.5f} | {(error_original_mesh-error_optimal_mesh)/error_original_mesh*100:.2f}%" + ) + plt.colorbar(cb) -# parameters for random source -max_dist = args.max_dist -n_dist = args.n_dist -lc = args.lc + for rr in range(rows): + for cc in range(cols): + ax[rr, cc].set_aspect("equal", "box") -# parameters for anisotropic data - distribution height scaler -z_min = 0 -z_max = 1 + fig.savefig(os.path.join(directories["plot_compare"], f"plot_{i:04d}.png")) + plt.close() -# parameters for isotropic data -w_min = 0.05 -w_max = 0.2 +def output_csv(parameters, key_list, output_dir): + """ + Write selected parameters to a CSV file. -scheme = args.boundary_scheme -c_min = 0.2 if scheme == "pad" else 0 -c_max = 0.8 if scheme == "pad" else 1 + Args: + parameters (dict): Dictionary of parameters to write. + key_list (list): List of keys to include in the CSV. + output_dir (str): Directory where the CSV file will be saved. + """ + # Filter parameters based on key_list + csv_keys = [key for key in key_list if key in parameters] + csv_data = [parameters[key] for key in csv_keys] -# parameters for data split -p_train = 0.75 -p_test = 0.15 -p_val = 0.1 + # Define the output file path + csv_file_path = os.path.join(output_dir, "info.csv") -num_train = int(n_samples * p_train) -num_test = int(n_samples * p_test) -num_val = int(n_samples * p_val) + # Write to CSV + with open(csv_file_path, mode="w", newline="") as csvfile: + csv_writer = csv.writer(csvfile) + # Write header (keys) + csv_writer.writerow(csv_keys) + # Write data (values) + csv_writer.writerow(csv_data) -# parameters for dataset challenging level -sigma_mean_scaler = 1 / 4 # -sigma_sigma_scaler = ( - 1 / 6 -) # larger, less challenging (because the gaussian is more like a circle) -sigma_eps = 1 / 8 -# ======================================= + print(f"Parameters saved to {csv_file_path}") +if __name__ == "__main__": -df = pd.DataFrame( - { - "cmin": [c_min], - "cmax": [c_max], - "sigma_mean_scaler": [sigma_mean_scaler], - "sigma_sigma_scaler": [sigma_sigma_scaler], - "sigma_eps": [sigma_eps], - "data_type": [data_type], - "scheme": [scheme], - "n_samples": [n_samples], - "lc": [lc], - "mesh_type": [mesh_type], + # parse args + args = parse_arguments() + + # ==== Parameters ====================== + parameters = { + # parameters for problem + "problem": "helmholtz", + # "n_case": args.n_case, # burgers problem only + # parameters for random source + "n_dist": args.n_dist, + "max_dist": args.max_dist, + "lc": args.lc, + # "n_grig": args.n_grid, # burgers problem only + # parameters for ?????? + "n_samples": args.n_samples, + "data_type": args.field_type, + "scheme": args.boundary_scheme, + "mesh_type": int(args.mesh_type), + # parameters for domain scale + "scale_x": 1, + "scale_y": 1, + # parameters for anisotropic data - distribution height scaler + "z_max": 1, + "z_min": 0, + # parameters for ????? + "x_start": 0, + "x_end": 1, + "y_start": 0, + "y_end": 1, + # parameters for isotropic data + "w_min": 0.05, + "w_max": 0.2, + "c_min": 0.2 if args.boundary_scheme == "pad" else 0, + "c_max": 0.8 if args.boundary_scheme == "pad" else 1, + # parameters for dataset challenging level + # larger, less challenging (because the gaussian is more like a circle) + "sigma_mean_scaler": 1 / 4, + "sigma_sigma_scaler": 1 / 6, + "sigma_eps": 1 / 8, + # parameters for data split + "p_train": 0.75, + "p_test": 0.15, + "p_val": 0.1, } -) + # Set random seed + random.seed(args.rand_seed) -def move_data(target, source, start, num_file): - if not os.path.exists(target): - os.makedirs(target) - else: - # delete all files under the directory - filelist = [f for f in os.listdir(target)] - for f in filelist: - os.remove(os.path.join(target, f)) - # copy data from data dir to train dir - for i in range(start, num_file): - shutil.copy( - os.path.join(source, f"data_{i:04d}.npy"), - os.path.join(target, f"data_{i:04d}.npy"), + # ==== Setup Directories ====================== + problem_specific_dir = "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format( + parameters["z_min"], parameters["z_max"], + parameters["n_dist"],parameters["max_dist"], + parameters["lc"], parameters["n_samples"], + parameters["data_type"], parameters["scheme"], parameters["mesh_type"] ) + subdirs = [ + "data", "plot", "plot_compare", "log", "mesh", "mesh_fine", + "train", "test", "val", + ] -project_dir = os.path.dirname(os.path.dirname((os.path.abspath(__file__)))) -dataset_dir = os.path.join( - project_dir, "data", f"dataset_meshtype_{mesh_type}", "helmholtz" -) # noqa -problem_specific_dir = os.path.join( - dataset_dir, - "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format( - z_min, z_max, n_dist, max_dist, lc, n_samples, data_type, scheme, mesh_type - ), -) - - -problem_data_dir = os.path.join(problem_specific_dir, "data") -problem_plot_dir = os.path.join(problem_specific_dir, "plot") -problem_plot_compare_dir = os.path.join(problem_specific_dir, "plot_compare") -problem_log_dir = os.path.join(problem_specific_dir, "log") - -problem_mesh_dir = os.path.join(problem_specific_dir, "mesh") -problem_mesh_fine_dir = os.path.join(problem_specific_dir, "mesh_fine") -problem_train_dir = os.path.join(problem_specific_dir, "train") -problem_test_dir = os.path.join(problem_specific_dir, "test") -problem_val_dir = os.path.join(problem_specific_dir, "val") - -if not os.path.exists(problem_mesh_dir): - os.makedirs(problem_mesh_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_mesh_dir)] - for f in filelist: - os.remove(os.path.join(problem_mesh_dir, f)) - -if not os.path.exists(problem_mesh_fine_dir): - os.makedirs(problem_mesh_fine_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_mesh_fine_dir)] - for f in filelist: - os.remove(os.path.join(problem_mesh_fine_dir, f)) - -if not os.path.exists(problem_data_dir): - os.makedirs(problem_data_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_data_dir)] - for f in filelist: - os.remove(os.path.join(problem_data_dir, f)) - -if not os.path.exists(problem_plot_dir): - os.makedirs(problem_plot_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_plot_dir)] - for f in filelist: - os.remove(os.path.join(problem_plot_dir, f)) - -if not os.path.exists(problem_plot_compare_dir): - os.makedirs(problem_plot_compare_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_plot_compare_dir)] - for f in filelist: - os.remove(os.path.join(problem_plot_compare_dir, f)) - -if not os.path.exists(problem_log_dir): - os.makedirs(problem_log_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_log_dir)] - for f in filelist: - os.remove(os.path.join(problem_log_dir, f)) - -df.to_csv(os.path.join(problem_specific_dir, "info.csv")) - - -# ==== Data Generation Scripts ====================== -if __name__ == "__main__": - print("In build_dataset.py") - i = 0 - while i < n_samples: - try: - print("Generating Sample: " + str(i)) - if mesh_type != 0: - unstructured_square_mesh_gen = UM2N.UnstructuredSquareMesh( - scale=scale_x, mesh_type=mesh_type - ) # noqa - mesh = unstructured_square_mesh_gen.generate_mesh( - res=lc, - output_filename=os.path.join(problem_mesh_dir, f"mesh_{i:04d}.msh"), - ) - else: - n_grid = int(1 / lc) - mesh = fd.UnitSquareMesh(n_grid, n_grid) - - # Generate Random solution field - rand_u_generator = UM2N.RandSourceGenerator( - use_iso=use_iso, - dist_params={ - "max_dist": max_dist, - "n_dist": n_dist, - "x_start": 0, - "x_end": 1, - "y_start": 0, - "y_end": 1, - "z_max": z_max, - "z_min": z_min, - "w_min": w_min, - "w_max": w_max, - "c_min": c_min, - "c_max": c_max, - "sigma_mean_scaler": sigma_mean_scaler, - "sigma_sigma_scaler": sigma_sigma_scaler, - "sigma_eps": sigma_eps, - }, - ) - helmholtz_eq = UM2N.RandHelmholtzEqGenerator(rand_u_generator) - res = helmholtz_eq.discretise(mesh) # discretise the equation - dist_params = rand_u_generator.get_dist_params() - # Solve the equation - solver = UM2N.EquationSolver( - params={ - "function_space": res["function_space"], - "LHS": res["LHS"], - "RHS": res["RHS"], - "bc": res["bc"], - } - ) - # RHS of helmholtz problem - f = fd.interpolate(helmholtz_eq.f, helmholtz_eq.function_space) - # fd.trisurf(f) - # plt.show() - uh = solver.solve_eq() - # Generate Mesh - hessian = UM2N.MeshGenerator( - params={"eq": helmholtz_eq, "mesh": mesh} - ).get_hessian(mesh) - - hessian_norm = UM2N.MeshGenerator( - params={"eq": helmholtz_eq, "mesh": mesh} - ).get_hessian_norm(mesh) - hessian_norm = fd.project(hessian_norm, fd.FunctionSpace(mesh, "CG", 1)) - - # Get monitor val - monitor_val = UM2N.MeshGenerator( - params={"eq": helmholtz_eq, "mesh": mesh} - ).monitor_func(mesh) - - # grad_uh_norm = UM2N.MeshGenerator( - # params={ - # "eq": helmholtz_eq, - # "mesh": fd.Mesh( - # os.path.join(problem_mesh_dir, f"mesh_{i:04d}.msh") - # ), # noqa - # } - # ).get_grad_norm(mesh) - - func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1) - grad_uh_interpolate = fd.interpolate(fd.grad(uh), func_vec_space) - - grad_norm = fd.Function(res["function_space"]) - grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2) - grad_norm /= grad_norm.vector().max() - grad_uh_norm = grad_norm - - mesh_gen = UM2N.MeshGenerator(params={"eq": helmholtz_eq, "mesh": mesh}) - - start = time.perf_counter() - new_mesh = mesh_gen.move_mesh() # noqa - end = time.perf_counter() - dur = (end - start) * 1000 - - # Get monitor val - # monitor_val = mesh_gen.get_monitor_val() - - # this is the jacobian of x with respect to xi - jacobian = mesh_gen.get_jacobian() - jacobian = fd.project(jacobian, fd.TensorFunctionSpace(new_mesh, "CG", 1)) - jacobian_det = mesh_gen.get_jacobian_det() - jacobian_det = fd.project(jacobian_det, fd.FunctionSpace(new_mesh, "CG", 1)) - - # get phi/grad_phi projected to the original mesh - phi = mesh_gen.get_phi() - # phi = fd.project( - # phi, fd.FunctionSpace(mesh, "CG", 1) - # ) - grad_phi = mesh_gen.get_grad_phi() - # grad_phi = fd.project( - # grad_phi, fd.VectorFunctionSpace(mesh, "CG", 1) - # ) - - # solve the equation on the new mesh - new_res = helmholtz_eq.discretise(new_mesh) - new_solver = UM2N.EquationSolver( - params={ - "function_space": new_res["function_space"], - "LHS": new_res["LHS"], - "RHS": new_res["RHS"], - "bc": new_res["bc"], - } - ) - uh_new = new_solver.solve_eq() - - # process the data for training - mesh_processor = UM2N.MeshProcessor( - original_mesh=mesh, - optimal_mesh=new_mesh, - function_space=new_res["function_space"], - use_4_edge=True, - feature={ - "uh": uh.dat.data_ro.reshape(-1, 1), - "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2), - "grad_uh_norm": grad_uh_norm.dat.data_ro.reshape(-1, 1), - "hessian": hessian.dat.data_ro.reshape(-1, 4), - "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), - "jacobian": jacobian.dat.data_ro.reshape(-1, 4), - "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1), - "phi": phi.dat.data_ro.reshape(-1, 1), - "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2), - "f": f.dat.data_ro.reshape(-1, 1), - "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), - }, - raw_feature={ - "uh": uh, - "hessian_norm": hessian_norm, - "monitor_val": monitor_val, - "jacobian": jacobian, - "jacobian_det": jacobian_det, - }, - dist_params=dist_params, - ) - - mesh_processor.save_taining_data( - os.path.join(problem_data_dir, f"data_{i:04d}") - ) - - # # ==== Plot Scripts ====================== - # fig = plt.figure(figsize=(15, 10)) - # ax1 = fig.add_subplot(2, 3, 1, projection='3d') - # # Plot the exact solution - # ax1.set_title('Exact Solution') - # fd.trisurf(fd.interpolate( - # res["u_exact"], res["function_space"]), axes=ax1) - # # Plot the solved solution - # ax2 = fig.add_subplot(2, 3, 2, projection='3d') - # ax2.set_title('FEM Solution') - # fd.trisurf(uh, axes=ax2) - - # # Plot the solution on a optimal mesh - # ax3 = fig.add_subplot(2, 3, 3, projection='3d') - # ax3.set_title('FEM Solution on Optimal Mesh') - # fd.trisurf(uh_new, axes=ax3) - - # # Plot the mesh - # ax4 = fig.add_subplot(2, 3, 4) - # ax4.set_title('Original Mesh') - # fd.triplot(mesh, axes=ax4) - # ax5 = fig.add_subplot(2, 3, 5) - # ax5.set_title('Optimal Mesh') - # fd.triplot(new_mesh, axes=ax5) - - # # plot mesh with function evaluated on it - # ax6 = fig.add_subplot(2, 3, 6) - # ax6.set_title('Soultion Projected on optimal mesh') - # fd.tripcolor( - # uh_new, cmap='coolwarm', axes=ax6) - # fd.triplot(new_mesh, axes=ax6) - - # fig.savefig( - # os.path.join( - # problem_plot_dir, f"plot_{i:04d}.png") - # ) - - # ========================================== - - if mesh_type != 0: - # generate log file - high_res_mesh = unstructured_square_mesh_gen.generate_mesh( - res=1e-2, - output_filename=os.path.join( - problem_mesh_fine_dir, f"mesh_{i:04d}.msh" - ), - ) - else: - high_res_mesh = fd.UnitSquareMesh(100, 100) - high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1) - - res_high_res = helmholtz_eq.discretise(high_res_mesh) - u_exact = fd.interpolate( - res_high_res["u_exact"], res_high_res["function_space"] - ) + directories = setup_directories(problem = parameters["problem"], + mesh_type = parameters["mesh_type"], + base_dir = None, + subdirs = subdirs, + dir_format = problem_specific_dir) - uh_proj = fd.project(uh, high_res_function_space) - uh_new_proj = fd.project(uh_new, high_res_function_space) - error_original_mesh = fd.errornorm(u_exact, uh_proj) - error_optimal_mesh = fd.errornorm(u_exact, uh_new_proj) + # ==== Output CSV ====================== + key_list = [ + "cmin","cmax", "sigma_mean_scaler", "sigma_sigma_scaler", "sigma_eps" + "data_type", "scheme", "n_samples", "lc", "mesh_type" + ] + output_csv(parameters, key_list, directories["data"]) - df = pd.DataFrame( - { - "error_og": error_original_mesh, - "error_adapt": error_optimal_mesh, - "time": dur, - }, - index=[0], - ) - df.to_csv(os.path.join(problem_log_dir, f"log_{i:04d}.csv")) - print("error og/optimal:", error_original_mesh, error_optimal_mesh) + # ==== Data Generation Scripts ====================== + for i in range(parameters["n_samples"]): + try: + print(f"Generating Sample: {i}") - # ==== Plot mesh, solution, error ====================== - rows, cols = 3, 3 - fig, ax = plt.subplots( - rows, cols, figsize=(cols * 5, rows * 5), layout="compressed" - ) + # create dataset + process_features(parameters, directories) - # High resolution mesh - fd.triplot(high_res_mesh, axes=ax[0, 0]) - ax[0, 0].set_title("High resolution Mesh ") - # Orginal low resolution uniform mesh - fd.triplot(mesh, axes=ax[0, 1]) - ax[0, 1].set_title("Original uniform Mesh") - # Adapted mesh - fd.triplot(new_mesh, axes=ax[0, 2]) - ax[0, 2].set_title("Adapted Mesh (MA)") - - cmap = "seismic" - # Solution on high resolution mesh - cb = fd.tripcolor(u_exact, cmap=cmap, axes=ax[1, 0]) - ax[1, 0].set_title("Solution on High Resolution (u_exact)") - plt.colorbar(cb) - # Solution on orginal low resolution uniform mesh - cb = fd.tripcolor(uh, cmap=cmap, axes=ax[1, 1]) - ax[1, 1].set_title("Solution on uniform Mesh") - plt.colorbar(cb) - # Solution on adapted mesh - cb = fd.tripcolor(uh_new, cmap=cmap, axes=ax[1, 2]) - ax[1, 2].set_title("Solution on Adapted Mesh (MA)") - plt.colorbar(cb) - - err_orignal_mesh = fd.assemble(uh_proj - u_exact) - err_adapted_mesh = fd.assemble(uh_new_proj - u_exact) - err_abs_max_val_ori = max( - abs(err_orignal_mesh.dat.data[:].max()), - abs(err_orignal_mesh.dat.data[:].min()), - ) - err_abs_max_val_adapted = max( - abs(err_adapted_mesh.dat.data[:].max()), - abs(err_adapted_mesh.dat.data[:].min()), - ) - err_abs_max_val = max(err_abs_max_val_ori, err_abs_max_val_adapted) - err_v_max = err_abs_max_val - err_v_min = -err_v_max - - # Error on high resolution mesh - cb = fd.tripcolor(monitor_val, cmap=cmap, axes=ax[2, 0]) - ax[2, 0].set_title("Monitor values") - plt.colorbar(cb) - # Error on orginal low resolution uniform mesh - cb = fd.tripcolor( - err_orignal_mesh, - cmap=cmap, - axes=ax[2, 1], - vmax=err_v_max, - vmin=err_v_min, - ) - ax[2, 1].set_title( - f"Error (u-u_exact) uniform Mesh | L2 Norm: {error_original_mesh:.5f}" - ) - plt.colorbar(cb) - # Error on adapted mesh - cb = fd.tripcolor( - err_adapted_mesh, - cmap=cmap, - axes=ax[2, 2], - vmax=err_v_max, - vmin=err_v_min, - ) - ax[2, 2].set_title( - f"Error (u-u_exact) Adapted Mesh (MA)| L2 Norm: {error_optimal_mesh:.5f} | {(error_original_mesh-error_optimal_mesh)/error_original_mesh*100:.2f}%" - ) - plt.colorbar(cb) + except fd.exceptions.ConvergenceError: + print(f"Iteration {i} did not converge.") + continue - for rr in range(rows): - for cc in range(cols): - ax[rr, cc].set_aspect("equal", "box") - fig.savefig(os.path.join(problem_plot_compare_dir, f"plot_{i:04d}.png")) - plt.close() + # ==== Data Splits ============================================ + num_train = int(parameters["n_samples"] * parameters["p_train"]) + num_test = int(parameters["n_samples"] * parameters["p_test"]) + num_val = parameters["n_samples"] - num_train - num_test - i += 1 - except fd.exceptions.ConvergenceError: - print(f"Iteration: {i}, not coverged.") - pass - # except AttributeError: - # print(f"AttributeError") - # pass - # except ValueError: - # pass - - move_data(problem_train_dir, problem_data_dir, 0, num_train) - - move_data(problem_test_dir, problem_data_dir, num_train, num_train + num_test) - - move_data( - problem_val_dir, - problem_data_dir, - num_train + num_test, - num_train + num_test + num_val, - ) -# ==== Data Generation Scripts ====================== + move_data(directories["train"], directories["data"], 0, num_train) + move_data(directories["test"], directories["data"], num_train, num_train + num_test) + move_data(directories["val"], directories["data"], num_train + num_test, num_train + num_test + num_val) diff --git a/script/build_poisson_poly.py b/script/build_poisson_poly.py index 87634f6..929fad8 100644 --- a/script/build_poisson_poly.py +++ b/script/build_poisson_poly.py @@ -1,6 +1,6 @@ # Author: Chunyang Wang # GitHub Username: chunyang-w - +import csv import os import random import shutil @@ -10,408 +10,468 @@ import firedrake as fd import matplotlib.pyplot as plt import numpy as np -import pandas as pd +# import pandas as pd +from firedrake.__future__ import interpolate +# dd the parent directory to the Python path +import sys +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import UM2N - -def arg_parse(): - parser = ArgumentParser() - parser.add_argument( - "--mesh_type", type=int, default=2, help="algorithm used to generate mesh" - ) - parser.add_argument( - "--max_dist", - type=int, - default=6, - help="max number of distributions used to\ - generate the dataset (only works if\ - n_dist is not set)", - ) - parser.add_argument( - "--n_dist", - type=int, - default=None, - help="number of distributions used to\ - generate the dataset (this will disable\ - max_dist)", - ) - parser.add_argument( - "--lc", - type=float, - default=5e-2, - help="the length characteristic of the elements in the\ - mesh", - ) - parser.add_argument( - "--field_type", - type=str, - default="aniso", - help="anisotropic or isotropic data type(aniso/iso)", - ) +def parse_arguments(): + """Parse command-line arguments.""" + parser = ArgumentParser(description="Build Burgers dataset with square meshes.") + parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.") + parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions.") + parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions.") + parser.add_argument("--lc", type=float, default=6e-2, help="Length characteristic of mesh elements.") + parser.add_argument("--field_type", type=str, default="iso", help="Data type (aniso/iso).") # use padded scheme or full-scale scheme to sample central point of the bump # noqa - parser.add_argument( - "--boundary_scheme", - type=str, - default="full", - help="scheme used to generate the dataset (pad/full))", - ) - parser.add_argument( - "--n_samples", type=int, default=100, help="number of samples generated" - ) - parser.add_argument( - "--rand_seed", type=int, default=63, help="number of samples generated" - ) - args_ = parser.parse_args() - print(args_) - return args_ - + parser.add_argument("--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full).") + parser.add_argument("--n_samples", type=int, default=100, help="Number of samples generated") + parser.add_argument("--rand_seed", type=int, default=63, help="Random seed") + + parsed_args = parser.parse_args() + + # Handle dependency between max_dist and n_dist + # max number of distributions used to generate the dataset + # only if n_dist is not set if n_dist is set, max_dist will be disabled + if parsed_args.n_dist is not None: + parsed_args.max_dist = None # Disable max_dist if n_dist is set + print("Warning: max_dist is ignored because n_dist is set.") + # QC: + print(parsed_args) + + return parser.parse_args() + + +def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None): + """ + Set up directories for storing data, plots, and logs. + + Args: + base_dir (str): Base directory for the project. + parameters (dict): Dictionary of parameters, including "mesh_type" and "problem". + - "mesh_type" (int): Type of mesh used in the simulation (default: 0). + - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem"). + subdirs (list, optional): List of subdirectories to create. Defaults to: + ["data", "plot", "log", "mesh", "mesh_fine"]. + Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz". + dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders + matching keys in the `parameters` dictionary. Example: + "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}". + If not provided, raises a ValueError. + + Returns: + dict: A dictionary mapping subdirectory names to their full paths. + + Raises: + ValueError: If `dir_format` is not provided or is invalid. + """ + + # Define the project directory + if base_dir: + project_dir = os.path.abspath(base_dir) + else: + project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + # QC: + print(f"Project Directory: {project_dir}") -args = arg_parse() + # Define the dataset directory + dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem) -mesh_type = args.mesh_type + # Use the provided format string for the problem-specific directory + if dir_format is None: + problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}") + else: + # check if dir_format is a valid string format + if not isinstance(dir_format, str): + raise ValueError("dir_format must be a string.") + problem_specific_dir = os.path.join(dataset_dir, dir_format) + + # Define default subdirectories if not provided + if subdirs is None: + subdirs = ["data", "plot", "log", "mesh", "mesh_fine", + "plot_compare", "train", "test", "val"] + + # Create and clear directories + directories = {} + for subdir in subdirs: + dir_path = os.path.join(problem_specific_dir, subdir) + if not os.path.exists(dir_path): + os.makedirs(dir_path) + else: + # Clear the directory by removing all files + for file in os.listdir(dir_path): + os.remove(os.path.join(dir_path, file)) + directories[subdir] = dir_path + + # QC: + print(f"Subdirectories created: {directories}") + + return directories + +def output_csv(parameters, key_list, output_dir): + """ + Write selected parameters to a CSV file. + + Args: + parameters (dict): Dictionary of parameters to write. + key_list (list): List of keys to include in the CSV. + output_dir (str): Directory where the CSV file will be saved. + """ + # Filter parameters based on key_list + csv_keys = [key for key in key_list if key in parameters] + csv_data = [parameters[key] for key in csv_keys] + + # Define the output file path + csv_file_path = os.path.join(output_dir, "info.csv") + + # Write to CSV + with open(csv_file_path, mode="w", newline="") as csvfile: + csv_writer = csv.writer(csvfile) + # Write header (keys) + csv_writer.writerow(csv_keys) + # Write data (values) + csv_writer.writerow(csv_data) + +def move_data(target, source, start, num_files): + """ + Move data files from the source directory to the target directory. + + Args: + target (str): The path to the target directory. + source (str): The path to the source directory. + start (int): The starting index of the files to move. + num_files (int): The total number of files to move. + + Raises: + FileNotFoundError: If the source directory does not exist. + ValueError: If the start index or num_files is invalid. + """ + if not os.path.exists(source): + raise FileNotFoundError(f"Source directory '{source}' does not exist.") + + if start < 0 or num_files <= 0: + raise ValueError("Invalid start index or number of files to move.") + + # Create the target directory if it doesn't exist + if not os.path.exists(target): + os.makedirs(target) + else: + # Clear the target directory by removing all files + for file in os.listdir(target): + os.remove(os.path.join(target, file)) -data_type = args.field_type -use_iso = True if data_type == "iso" else False + # Copy files sequentially starting from the specified index + for i in range(start, start + num_files): + try: + # Copy the data file + shutil.copy( + os.path.join(source, f"data_{i:04d}.npy"), + os.path.join(target, f"data_{i:04d}.npy"), + ) + except FileNotFoundError: + print(f"File data_{i:04d}.npy not found in {source}. Skipping.") + continue + except Exception as e: + print(f"An error occurred while copying data_{i:04d}.npy: {e}") + continue + +def process_features(parameters, dirs): + + # create mesh + scale_x = parameters["scale_x"] + mesh_type = parameters["mesh_type"] + lc = parameters["lc"] + rand_poly_mesh_gen = UM2N.UnstructuredRandomPolygonalMeshGenerator( + scale=scale_x, mesh_type=mesh_type + ) # noqa + mesh = rand_poly_mesh_gen.generate_mesh( + res=lc, output_filename=os.path.join(dirs["mesh"], f"mesh{i}.msh") + ) + num_boundary = rand_poly_mesh_gen.num_boundary -rand_seed = args.rand_seed -random.seed(rand_seed) -np.random.seed(rand_seed) + # Generate Random solution field + rand_u_generator = UM2N.RandSourceGenerator( + use_iso= parameters["data_type"] == "iso", + dist_params = parameters + ) -# ==== Parameters ====================== -problem = "poisson_poly" + # generate equation + poisson_eq = UM2N.RandPoissonEqGenerator(rand_u_generator) + # discretise the equation + res = poisson_eq.discretise(mesh) + # get specific parameters used + dist_params = rand_u_generator.get_dist_params() + # Solve the equation + solver = UM2N.EquationSolver( + params={ + "function_space": res["function_space"], + "LHS": res["LHS"], + "RHS": res["RHS"], + "bc": res["bc"], + } + ) + # original solution field + uh = solver.solve_eq() + # Generate Mesh + mesh_gen = UM2N.MeshGenerator(params={"eq": poisson_eq, "mesh": mesh}) + monitor_val = mesh_gen.monitor_func(mesh) + hessian = mesh_gen.get_hessian(mesh) + hessian_norm = fd.project(mesh_gen.get_hessian_norm(mesh), + fd.FunctionSpace(mesh, "CG", 1) + ) + # hessian = UM2N.MeshGenerator( + # params={ + # "eq": poisson_eq, + # "mesh": rand_poly_mesh_gen.generate_mesh( + # res=lc, + # output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"), + # ), + # } + # ).get_hessian(mesh) + + # hessian_norm = UM2N.MeshGenerator( + # params={ + # "eq": poisson_eq, + # "mesh": rand_poly_mesh_gen.generate_mesh( + # res=lc, + # output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"), + # ), + # } + # ).monitor_func(mesh) + + # is this the monitor function value? + # hessian_norm = fd.project(hessian_norm, fd.FunctionSpace(mesh, "CG", 1)) + + func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1) + grad_uh_interpolate = fd.interpolate(fd.grad(uh), func_vec_space) + + # ej321 - grad_norm copied from build_helmholtz_square.py + grad_norm = fd.Function(res["function_space"]) + grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2) + grad_norm /= grad_norm.vector().max() + + # mesh_gen = UM2N.MeshGenerator( + # params={ + # "eq": poisson_eq, + # "mesh": rand_poly_mesh_gen.generate_mesh( + # res=lc, + # output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"), + # ), + # } + # ) + + # move the mesh? + start = time.perf_counter() + new_mesh = mesh_gen.move_mesh() + end = time.perf_counter() + dur = (end - start) * 1000 + + # this is the jacobian of x with respect to xi + jacobian = mesh_gen.get_jacobian() + jacobian = fd.project(jacobian, fd.TensorFunctionSpace(new_mesh, "CG", 1)) + jacobian_det = mesh_gen.get_jacobian_det() + jacobian_det = fd.project(jacobian_det, fd.FunctionSpace(new_mesh, "CG", 1)) + + # get phi/grad_phi projected to the original mesh + phi = mesh_gen.get_phi() + grad_phi = mesh_gen.get_grad_phi() + + # solve the equation on the new mesh + new_res = poisson_eq.discretise(new_mesh) + new_solver = UM2N.EquationSolver( + params={ + "function_space": new_res["function_space"], + "LHS": new_res["LHS"], + "RHS": new_res["RHS"], + "bc": new_res["bc"], + } + ) + uh_new = new_solver.solve_eq() + + # process the data for training + mesh_processor = UM2N.MeshProcessor( + original_mesh=mesh, + optimal_mesh=new_mesh, + function_space=new_res["function_space"], + use_4_edge=False, + num_boundary=num_boundary, + feature={ + "uh": uh.dat.data_ro.reshape(-1, 1), + "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2), + "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), #ej321 - added + "hessian": hessian.dat.data_ro.reshape(-1, 4), + "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), + "jacobian": jacobian.dat.data_ro.reshape(-1, 4), + "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1), + "phi": phi.dat.data_ro.reshape(-1, 1), + "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2), + "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), # ej321 - added + + }, + raw_feature={ + "uh": uh, + "hessian_norm": hessian_norm, + "monitor_val": monitor_val, # ej321 - added + "grad_uh_norm": grad_norm, # ej321 - added needed for poly only + "jacobian": jacobian, + "jacobian_det": jacobian_det, + }, + dist_params=dist_params, + poly_mesh=True, + ) -n_samples = args.n_samples + mesh_processor.save_taining_data( + os.path.join(dirs["data"], "data_{}".format(i)) + ) -# parameters for domain scale -scale_x = 1 -scale_y = 1 + # ==== Plot Scripts ====================== + fig = plt.figure(figsize=(15, 10)) + ax1 = fig.add_subplot(2, 3, 1, projection="3d") + # Plot the exact solution + ax1.set_title("Exact Solution") + fd.trisurf(fd.interpolate(res["u_exact"], res["function_space"]), axes=ax1) + # Plot the solved solution + ax2 = fig.add_subplot(2, 3, 2, projection="3d") + ax2.set_title("FEM Solution") + fd.trisurf(uh, axes=ax2) + + # Plot the solution on a optimal mesh + ax3 = fig.add_subplot(2, 3, 3, projection="3d") + ax3.set_title("FEM Solution on Optimal Mesh") + fd.trisurf(uh_new, axes=ax3) + + # Plot the mesh + ax4 = fig.add_subplot(2, 3, 4) + ax4.set_title("Original Mesh") + fd.triplot(mesh, axes=ax4) + ax5 = fig.add_subplot(2, 3, 5) + ax5.set_title("Optimal Mesh") + fd.triplot(new_mesh, axes=ax5) + + # plot mesh with function evaluated on it + ax6 = fig.add_subplot(2, 3, 6) + ax6.set_title("Soultion Projected on optimal mesh") + fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6) + fd.triplot(new_mesh, axes=ax6) + + fig.savefig(os.path.join(dirs["plot"], "plot_{}.png".format(i))) + + # ==== Log File ============================================ + high_res_mesh = rand_poly_mesh_gen.generate_mesh( + res=1e-2, + output_filename=os.path.join(dirs["mesh_fine"], f"mesh{i}.msh"), + ) -# parameters for random source -max_dist = args.max_dist -n_dist = args.n_dist -lc = args.lc + high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1) -# parameters for anisotropic data - distribution height scaler -z_min = 0 -z_max = 1 + res_high_res = poisson_eq.discretise(high_res_mesh) + u_exact = fd.interpolate( + res_high_res["u_exact"], res_high_res["function_space"] + ) -# parameters for isotropic data -w_min = 0.05 -w_max = 0.2 + uh_proj = fd.project(uh, high_res_function_space) + uh_new_proj = fd.project(uh_new, high_res_function_space) -scheme = args.boundary_scheme -c_min = 0.3 if scheme == "pad" else 0 -c_max = 0.7 if scheme == "pad" else 1 + error_original_mesh = fd.errornorm(u_exact, uh_proj) + error_optimal_mesh = fd.errornorm(u_exact, uh_new_proj) -# parameters for data split -p_train = 0.75 -p_test = 0.15 -p_val = 0.1 + # Write to CSV + with open(os.path.join(dirs["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile: + csv_writer = csv.writer(csvfile) + # Write header (keys) + csv_writer.writerow(["error_og", "error_adapt", "time"]) + # Write data (values) + csv_writer.writerow([error_original_mesh, error_optimal_mesh, dur]) -num_train = int(n_samples * p_train) -num_test = int(n_samples * p_test) -num_val = int(n_samples * p_val) -# ======================================= + print("error og/optimal:", error_original_mesh, error_optimal_mesh) +if __name__ == "__main__": -df = pd.DataFrame( - { - "cmin": [c_min], - "cmax": [c_max], - "data_type": [data_type], - "scheme": [scheme], - "n_samples": [n_samples], - "lc": [lc], - "mesh_type": [mesh_type], + # parse args + args = parse_arguments() + + # ==== Parameters ====================== + parameters = { + # parameters for problem + "problem": "poisson_poly", + # "n_case": args.n_case, # burgers problem only + # parameters for random source + "n_dist": args.n_dist, + "max_dist": args.max_dist, + "lc": args.lc, + # "n_grig": args.n_grid, # burgers problem only + # parameters for ?????? + "n_samples": args.n_samples, + "data_type": args.field_type, + "scheme": args.boundary_scheme, + "mesh_type": int(args.mesh_type), + # parameters for domain scale + "scale_x": 1, + "scale_y": 1, + # parameters for anisotropic data - distribution height scaler + "z_max": 1, + "z_min": 0, + # parameters for ????? + "x_start": 0, + "x_end": 1, + "y_start": 0, + "y_end": 1, + # parameters for isotropic data + "w_min": 0.05, + "w_max": 0.2, + "c_min": 0.3 if args.boundary_scheme == "pad" else 0, + "c_max": 0.7 if args.boundary_scheme == "pad" else 1, + # parameters for dataset challenging level + # larger, less challenging (because the gaussian is more like a circle) + "sigma_mean_scaler": 1 / 4, + "sigma_sigma_scaler": 1 / 6, + "sigma_eps": 1 / 8, + # parameters for data split + "p_train": 0.75, + "p_test": 0.15, + "p_val": 0.1, } -) + # Set random seed + random.seed(args.rand_seed) -def move_data(target, source, start, num_file): - if not os.path.exists(target): - os.makedirs(target) - else: - # delete all files under the directory - filelist = [f for f in os.listdir(target)] - for f in filelist: - os.remove(os.path.join(target, f)) - # copy data from data dir to train dir - for i in range(start, num_file): - shutil.copy( - os.path.join(source, "data_{}.npy".format(i)), - os.path.join(target, "data_{}.npy".format(i)), + # ==== Setup Directories ====================== + problem_specific_dir = "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format( + parameters["z_min"], parameters["z_max"], + parameters["n_dist"],parameters["max_dist"], + parameters["lc"], parameters["n_samples"], + parameters["data_type"], parameters["scheme"], parameters["mesh_type"] ) + subdirs = ["data", "plot", "log", "mesh", "mesh_fine", + "train", "test", "val" + ] -project_dir = os.path.dirname(os.path.dirname((os.path.abspath(__file__)))) -dataset_dir = os.path.join( - project_dir, "data", f"dataset_meshtype_{mesh_type}", problem -) # noqa -problem_specific_dir = os.path.join( - dataset_dir, - "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format( - z_min, z_max, n_dist, max_dist, lc, n_samples, data_type, scheme, mesh_type - ), -) - - -problem_data_dir = os.path.join(problem_specific_dir, "data") -problem_plot_dir = os.path.join(problem_specific_dir, "plot") -problem_log_dir = os.path.join(problem_specific_dir, "log") - -problem_mesh_dir = os.path.join(problem_specific_dir, "mesh") -problem_mesh_fine_dir = os.path.join(problem_specific_dir, "mesh_fine") -problem_train_dir = os.path.join(problem_specific_dir, "train") -problem_test_dir = os.path.join(problem_specific_dir, "test") -problem_val_dir = os.path.join(problem_specific_dir, "val") - -if not os.path.exists(problem_mesh_dir): - os.makedirs(problem_mesh_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_mesh_dir)] - for f in filelist: - os.remove(os.path.join(problem_mesh_dir, f)) - -if not os.path.exists(problem_mesh_fine_dir): - os.makedirs(problem_mesh_fine_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_mesh_fine_dir)] - for f in filelist: - os.remove(os.path.join(problem_mesh_fine_dir, f)) - -if not os.path.exists(problem_data_dir): - os.makedirs(problem_data_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_data_dir)] - for f in filelist: - os.remove(os.path.join(problem_data_dir, f)) - -if not os.path.exists(problem_plot_dir): - os.makedirs(problem_plot_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_plot_dir)] - for f in filelist: - os.remove(os.path.join(problem_plot_dir, f)) - -if not os.path.exists(problem_log_dir): - os.makedirs(problem_log_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_log_dir)] - for f in filelist: - os.remove(os.path.join(problem_log_dir, f)) - -df.to_csv(os.path.join(problem_specific_dir, "info.csv")) - - -# ==== Data Generation Scripts ====================== -if __name__ == "__main__": + dirs = setup_directories(problem = parameters["problem"], + mesh_type = parameters["mesh_type"], + base_dir = None, + subdirs = subdirs, + dir_format = problem_specific_dir) + + # ==== Output CSV ====================== + key_list = [ + "cmin","cmax", "data_type", "scheme", "n_samples", "lc", "mesh_type" + ] + output_csv(parameters, key_list, dirs["data"]) + + # ==== Data Generation Scripts ====================== print("In build_dataset.py") - i = 0 - while i < n_samples: + # i = 0 + # while i < n_samples: + for i in range(parameters["n_samples"]): try: print("Generating Sample: " + str(i)) - rand_poly_mesh_gen = UM2N.UnstructuredRandomPolygonalMeshGenerator( - scale=scale_x, mesh_type=mesh_type - ) # noqa - mesh = rand_poly_mesh_gen.generate_mesh( - res=lc, output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh") - ) - num_boundary = rand_poly_mesh_gen.num_boundary - # Generate Random solution field - rand_u_generator = UM2N.RandSourceGenerator( - use_iso=use_iso, - dist_params={ - "max_dist": max_dist, - "n_dist": n_dist, - "x_start": 0, - "x_end": 1, - "y_start": 0, - "y_end": 1, - "z_max": z_max, - "z_min": z_min, - "w_min": w_min, - "w_max": w_max, - "c_min": c_min, - "c_max": c_max, - }, - ) - poisson_eq = UM2N.RandPoissonEqGenerator(rand_u_generator) - res = poisson_eq.discretise(mesh) # discretise the equation - dist_params = rand_u_generator.get_dist_params() - # Solve the equation - solver = UM2N.EquationSolver( - params={ - "function_space": res["function_space"], - "LHS": res["LHS"], - "RHS": res["RHS"], - "bc": res["bc"], - } - ) - uh = solver.solve_eq() - # Generate Mesh - hessian = UM2N.MeshGenerator( - params={ - "eq": poisson_eq, - "mesh": rand_poly_mesh_gen.generate_mesh( - res=lc, - output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"), - ), - } - ).get_hessian(mesh) - - hessian_norm = UM2N.MeshGenerator( - params={ - "eq": poisson_eq, - "mesh": rand_poly_mesh_gen.generate_mesh( - res=lc, - output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"), - ), - } - ).monitor_func(mesh) - - hessian_norm = fd.project(hessian_norm, fd.FunctionSpace(mesh, "CG", 1)) - - func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1) - grad_uh_interpolate = fd.interpolate(fd.grad(uh), func_vec_space) - - mesh_gen = UM2N.MeshGenerator( - params={ - "eq": poisson_eq, - "mesh": rand_poly_mesh_gen.generate_mesh( - res=lc, - output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"), - ), - } - ) - - start = time.perf_counter() - new_mesh = mesh_gen.move_mesh() - end = time.perf_counter() - dur = (end - start) * 1000 - - # this is the jacobian of x with respect to xi - jacobian = mesh_gen.get_jacobian() - jacobian = fd.project(jacobian, fd.TensorFunctionSpace(new_mesh, "CG", 1)) - jacobian_det = mesh_gen.get_jacobian_det() - jacobian_det = fd.project(jacobian_det, fd.FunctionSpace(new_mesh, "CG", 1)) - - # get phi/grad_phi projected to the original mesh - phi = mesh_gen.get_phi() - grad_phi = mesh_gen.get_grad_phi() - - # solve the equation on the new mesh - new_res = poisson_eq.discretise(new_mesh) - new_solver = UM2N.EquationSolver( - params={ - "function_space": new_res["function_space"], - "LHS": new_res["LHS"], - "RHS": new_res["RHS"], - "bc": new_res["bc"], - } - ) - uh_new = new_solver.solve_eq() - - # process the data for training - mesh_processor = UM2N.MeshProcessor( - original_mesh=mesh, - optimal_mesh=new_mesh, - function_space=new_res["function_space"], - use_4_edge=False, - num_boundary=num_boundary, - feature={ - "uh": uh.dat.data_ro.reshape(-1, 1), - "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2), - "hessian": hessian.dat.data_ro.reshape(-1, 4), - "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), - "jacobian": jacobian.dat.data_ro.reshape(-1, 4), - "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1), - "phi": phi.dat.data_ro.reshape(-1, 1), - "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2), - }, - raw_feature={ - "uh": uh, - "hessian_norm": hessian_norm, - "jacobian": jacobian, - "jacobian_det": jacobian_det, - }, - dist_params=dist_params, - poly_mesh=True, - ) - - mesh_processor.save_taining_data( - os.path.join(problem_data_dir, "data_{}".format(i)) - ) - - # ==== Plot Scripts ====================== - fig = plt.figure(figsize=(15, 10)) - ax1 = fig.add_subplot(2, 3, 1, projection="3d") - # Plot the exact solution - ax1.set_title("Exact Solution") - fd.trisurf(fd.interpolate(res["u_exact"], res["function_space"]), axes=ax1) - # Plot the solved solution - ax2 = fig.add_subplot(2, 3, 2, projection="3d") - ax2.set_title("FEM Solution") - fd.trisurf(uh, axes=ax2) - - # Plot the solution on a optimal mesh - ax3 = fig.add_subplot(2, 3, 3, projection="3d") - ax3.set_title("FEM Solution on Optimal Mesh") - fd.trisurf(uh_new, axes=ax3) - - # Plot the mesh - ax4 = fig.add_subplot(2, 3, 4) - ax4.set_title("Original Mesh") - fd.triplot(mesh, axes=ax4) - ax5 = fig.add_subplot(2, 3, 5) - ax5.set_title("Optimal Mesh") - fd.triplot(new_mesh, axes=ax5) - - # plot mesh with function evaluated on it - ax6 = fig.add_subplot(2, 3, 6) - ax6.set_title("Soultion Projected on optimal mesh") - fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6) - fd.triplot(new_mesh, axes=ax6) - - fig.savefig(os.path.join(problem_plot_dir, "plot_{}.png".format(i))) - - # ========================================== - - # generate log file - high_res_mesh = rand_poly_mesh_gen.generate_mesh( - res=1e-2, - output_filename=os.path.join(problem_mesh_fine_dir, f"mesh{i}.msh"), - ) - - high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1) - - res_high_res = poisson_eq.discretise(high_res_mesh) - u_exact = fd.interpolate( - res_high_res["u_exact"], res_high_res["function_space"] - ) - - uh = fd.project(uh, high_res_function_space) - uh_new = fd.project(uh_new, high_res_function_space) - - error_original_mesh = fd.errornorm(u_exact, uh) - error_optimal_mesh = fd.errornorm(u_exact, uh_new) - - df = pd.DataFrame( - { - "error_og": error_original_mesh, - "error_adapt": error_optimal_mesh, - "time": dur, - }, - index=[0], - ) - df.to_csv(os.path.join(problem_log_dir, "log{}.csv".format(i))) - print("error og/optimal:", error_original_mesh, error_optimal_mesh) - i += 1 + process_features(parameters, dirs) + # i += 1 except fd.exceptions.ConvergenceError: pass except AttributeError: @@ -419,14 +479,13 @@ def move_data(target, source, start, num_file): except ValueError: pass - move_data(problem_train_dir, problem_data_dir, 0, num_train) - move_data(problem_test_dir, problem_data_dir, num_train, num_train + num_test) - move_data( - problem_val_dir, - problem_data_dir, - num_train + num_test, - num_train + num_test + num_val, - ) -# ==== Data Generation Scripts ====================== + # ==== Data Splits ============================================ + num_train = int(parameters["n_samples"] * parameters["p_train"]) + num_test = int(parameters["n_samples"] * parameters["p_test"]) + num_val = parameters["n_samples"] - num_train - num_test + + move_data(dirs["train"], dirs["data"], 0, num_train) + move_data(dirs["test"], dirs["data"], num_train, num_train + num_test) + move_data(dirs["val"], dirs["data"], num_train + num_test, num_train + num_test + num_val) diff --git a/script/build_poisson_square.py b/script/build_poisson_square.py index 4bf9712..44b4f6e 100644 --- a/script/build_poisson_square.py +++ b/script/build_poisson_square.py @@ -1,6 +1,6 @@ # Author: Chunyang Wang # GitHub Username: chunyang-w - +import csv import os import random import shutil @@ -9,399 +9,486 @@ import firedrake as fd import matplotlib.pyplot as plt -import pandas as pd +# import pandas as pd +from firedrake.__future__ import interpolate +# dd the parent directory to the Python path +import sys +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import UM2N +def parse_arguments(): + """Parse command-line arguments.""" + parser = ArgumentParser(description="Build Burgers dataset with square meshes.") + parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.") + parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions.") + parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions.") + parser.add_argument("--lc", type=float, default=6e-2, help="Length characteristic of mesh elements.") + parser.add_argument("--field_type", type=str, default="iso", help="Data type (aniso/iso).") + # use padded scheme or full-scale scheme to sample central point of the bump # noqa + parser.add_argument("--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full).") + parser.add_argument("--n_samples", type=int, default=100, help="Number of samples generated") + parser.add_argument("--rand_seed", type=int, default=63, help="Random seed") + + parsed_args = parser.parse_args() + + # Handle dependency between max_dist and n_dist + # max number of distributions used to generate the dataset + # only if n_dist is not set if n_dist is set, max_dist will be disabled + if parsed_args.n_dist is not None: + parsed_args.max_dist = None # Disable max_dist if n_dist is set + print("Warning: max_dist is ignored because n_dist is set.") + # QC: + print(parsed_args) + + return parser.parse_args() + +def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None): + """ + Set up directories for storing data, plots, and logs. + + Args: + base_dir (str): Base directory for the project. + parameters (dict): Dictionary of parameters, including "mesh_type" and "problem". + - "mesh_type" (int): Type of mesh used in the simulation (default: 0). + - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem"). + subdirs (list, optional): List of subdirectories to create. Defaults to: + ["data", "plot", "log", "mesh", "mesh_fine"]. + Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz". + dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders + matching keys in the `parameters` dictionary. Example: + "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}". + If not provided, raises a ValueError. + + Returns: + dict: A dictionary mapping subdirectory names to their full paths. + + Raises: + ValueError: If `dir_format` is not provided or is invalid. + """ + + # Define the project directory + if base_dir: + project_dir = os.path.abspath(base_dir) + else: + project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + # QC: + print(f"Project Directory: {project_dir}") -def arg_parse(): - parser = ArgumentParser() - parser.add_argument( - "--mesh_type", type=int, default=2, help="algorithm used to generate mesh" - ) - parser.add_argument( - "--max_dist", - type=int, - default=6, - help="max number of distributions used to\ - generate the dataset (only works if\ - n_dist is not set)", - ) - parser.add_argument( - "--n_dist", - type=int, - default=None, - help="number of distributions used to\ - generate the dataset (this will disable\ - max_dist)", - ) - parser.add_argument( - "--lc", - type=float, - default=3e-2, - help="the length characteristic of the elements in the\ - mesh", + # Define the dataset directory + dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem) + + # Use the provided format string for the problem-specific directory + if dir_format is None: + problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}") + else: + # check if dir_format is a valid string format + if not isinstance(dir_format, str): + raise ValueError("dir_format must be a string.") + problem_specific_dir = os.path.join(dataset_dir, dir_format) + + # Define default subdirectories if not provided + if subdirs is None: + subdirs = ["data", "plot", "log", "mesh", "mesh_fine", + "plot_compare", "train", "test", "val"] + + # Create and clear directories + directories = {} + for subdir in subdirs: + dir_path = os.path.join(problem_specific_dir, subdir) + if not os.path.exists(dir_path): + os.makedirs(dir_path) + else: + # Clear the directory by removing all files + for file in os.listdir(dir_path): + os.remove(os.path.join(dir_path, file)) + directories[subdir] = dir_path + + # QC: + print(f"Subdirectories created: {directories}") + + return directories + +def output_csv(parameters, key_list, output_dir): + """ + Write selected parameters to a CSV file. + + Args: + parameters (dict): Dictionary of parameters to write. + key_list (list): List of keys to include in the CSV. + output_dir (str): Directory where the CSV file will be saved. + """ + # Filter parameters based on key_list + csv_keys = [key for key in key_list if key in parameters] + csv_data = [parameters[key] for key in csv_keys] + + # Define the output file path + csv_file_path = os.path.join(output_dir, "info.csv") + + # Write to CSV + with open(csv_file_path, mode="w", newline="") as csvfile: + csv_writer = csv.writer(csvfile) + # Write header (keys) + csv_writer.writerow(csv_keys) + # Write data (values) + csv_writer.writerow(csv_data) + +def move_data(target, source, start, num_files): + """ + Move data files from the source directory to the target directory. + + Args: + target (str): The path to the target directory. + source (str): The path to the source directory. + start (int): The starting index of the files to move. + num_files (int): The total number of files to move. + + Raises: + FileNotFoundError: If the source directory does not exist. + ValueError: If the start index or num_files is invalid. + """ + if not os.path.exists(source): + raise FileNotFoundError(f"Source directory '{source}' does not exist.") + + if start < 0 or num_files <= 0: + raise ValueError("Invalid start index or number of files to move.") + + # Create the target directory if it doesn't exist + if not os.path.exists(target): + os.makedirs(target) + else: + # Clear the target directory by removing all files + for file in os.listdir(target): + os.remove(os.path.join(target, file)) + + # Copy files sequentially starting from the specified index + for i in range(start, start + num_files): + try: + # Copy the data file + shutil.copy( + os.path.join(source, f"data_{i:04d}.npy"), + os.path.join(target, f"data_{i:04d}.npy"), + ) + except FileNotFoundError: + print(f"File data_{i:04d}.npy not found in {source}. Skipping.") + continue + except Exception as e: + print(f"An error occurred while copying data_{i:04d}.npy: {e}") + continue + +def process_features(parameters, problem_data_dir): + + # create mesh + scale_x = parameters["scale_x"] + mesh_type = parameters["mesh_type"] + lc = parameters["lc"] + unstructured_square_mesh_gen = UM2N.UnstructuredSquareMeshGenerator( + scale=scale_x, mesh_type=mesh_type + ) # noqa + mesh = unstructured_square_mesh_gen.generate_mesh( + res=lc, output_filename=os.path.join(directories["data"], f"mesh{i}.msh") ) - parser.add_argument( - "--field_type", - type=str, - default="aniso", - help="anisotropic or isotropic data type(aniso/iso)", + # Generate Random solution field + rand_u_generator = UM2N.RandSourceGenerator( + use_iso= parameters["data_type"] == "iso", + dist_params = parameters ) - # use padded scheme or full-scale scheme to sample central point of the bump # noqa - parser.add_argument( - "--boundary_scheme", - type=str, - default="full", - help="scheme used to generate the dataset (pad/full))", + + # generate equation + poisson_eq = UM2N.RandPoissonEqGenerator(rand_u_generator) + # discretise the equation + res = poisson_eq.discretise(mesh) + # get specific parameters used + dist_params = rand_u_generator.get_dist_params() + # Solve the equation + solver = UM2N.EquationSolver( + params={ + "function_space": res["function_space"], + "LHS": res["LHS"], + "RHS": res["RHS"], + "bc": res["bc"], + } ) - parser.add_argument( - "--n_samples", type=int, default=100, help="number of samples generated" + # original solution field + uh = solver.solve_eq() + + mesh_gen = UM2N.MeshGenerator(params={"eq": poisson_eq, "mesh": mesh}) + monitor_val = mesh_gen.monitor_func(mesh) + hessian = mesh_gen.get_hessian(mesh) + hessian_norm = fd.project(mesh_gen.get_hessian_norm(mesh), + fd.FunctionSpace(mesh, "CG", 1) + ) + + # Generate Mesh + # hessian = UM2N.MeshGenerator( + # params={ + # "eq": poisson_eq, + # "mesh": fd.Mesh(os.path.join(problem_mesh_dir, f"mesh{i}.msh")), # noqa + # } + # ).get_hessian(mesh) + + # hessian_norm = UM2N.MeshGenerator( + # params={ + # "eq": poisson_eq, + # "mesh": fd.Mesh(os.path.join(problem_mesh_dir, f"mesh{i}.msh")), # noqa + # } + # ).monitor_func(mesh) + + # hessian_norm = fd.project(hessian_norm, fd.FunctionSpace(mesh, "CG", 1)) + + func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1) + grad_uh_interpolate = fd.interpolate(fd.grad(uh), func_vec_space) + + # ej321 - grad_norm copied from build_helmholtz_square.py + grad_norm = fd.Function(res["function_space"]) + grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2) + grad_norm /= grad_norm.vector().max() + + # mesh_gen = UM2N.MeshGenerator( + # params={ + # "eq": poisson_eq, + # "mesh": fd.Mesh(os.path.join(problem_mesh_dir, f"mesh{i}.msh")), # noqa + # } + # ) + + start = time.perf_counter() + new_mesh = mesh_gen.move_mesh() + end = time.perf_counter() + dur = (end - start) * 1000 + + # this is the jacobian of x with respect to xi + jacobian = mesh_gen.get_jacobian() + jacobian = fd.project(jacobian, fd.TensorFunctionSpace(new_mesh, "CG", 1)) + jacobian_det = mesh_gen.get_jacobian_det() + jacobian_det = fd.project(jacobian_det, fd.FunctionSpace(new_mesh, "CG", 1)) + + # get phi/grad_phi projected to the original mesh + phi = mesh_gen.get_phi() + # phi = fd.project( + # phi, fd.FunctionSpace(mesh, "CG", 1) + # ) + grad_phi = mesh_gen.get_grad_phi() + # grad_phi = fd.project( + # grad_phi, fd.VectorFunctionSpace(mesh, "CG", 1) + # ) + + # solve the equation on the new mesh + new_res = poisson_eq.discretise(new_mesh) + new_solver = UM2N.EquationSolver( + params={ + "function_space": new_res["function_space"], + "LHS": new_res["LHS"], + "RHS": new_res["RHS"], + "bc": new_res["bc"], + } ) - parser.add_argument( - "--rand_seed", type=int, default=63, help="number of samples generated" + uh_new = new_solver.solve_eq() + + # process the data for training + mesh_processor = UM2N.MeshProcessor( + original_mesh=mesh, + optimal_mesh=new_mesh, + function_space=new_res["function_space"], + use_4_edge=True, + feature={ + "uh": uh.dat.data_ro.reshape(-1, 1), + "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2), + "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), # ej321 - added grad_norm + "hessian": hessian.dat.data_ro.reshape(-1, 4), + "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), + "jacobian": jacobian.dat.data_ro.reshape(-1, 4), + "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1), + "phi": phi.dat.data_ro.reshape(-1, 1), + "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2), + "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), # ej321 - added monitor_val + }, + raw_feature={ + "uh": uh, + "hessian_norm": hessian_norm, + "monitor_val": monitor_val, # ej321 - added monitor_val + "jacobian": jacobian, + "jacobian_det": jacobian_det, + }, + dist_params=dist_params, ) - args_ = parser.parse_args() - print(args_) - return args_ + mesh_processor.save_taining_data( + os.path.join(directories["data"], "data_{}".format(i)) + ) -args = arg_parse() - -mesh_type = args.mesh_type + # ==== Plot Scripts ====================== + fig = plt.figure(figsize=(15, 10)) + ax1 = fig.add_subplot(2, 3, 1, projection="3d") + # Plot the exact solution + ax1.set_title("Exact Solution") + fd.trisurf(fd.interpolate(res["u_exact"], res["function_space"]), axes=ax1) + # Plot the solved solution + ax2 = fig.add_subplot(2, 3, 2, projection="3d") + ax2.set_title("FEM Solution") + fd.trisurf(uh, axes=ax2) + + # Plot the solution on a optimal mesh + ax3 = fig.add_subplot(2, 3, 3, projection="3d") + ax3.set_title("FEM Solution on Optimal Mesh") + fd.trisurf(uh_new, axes=ax3) + + # Plot the mesh + ax4 = fig.add_subplot(2, 3, 4) + ax4.set_title("Original Mesh") + fd.triplot(mesh, axes=ax4) + ax5 = fig.add_subplot(2, 3, 5) + ax5.set_title("Optimal Mesh") + fd.triplot(new_mesh, axes=ax5) + + # plot mesh with function evaluated on it + ax6 = fig.add_subplot(2, 3, 6) + ax6.set_title("Soultion Projected on optimal mesh") + fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6) + fd.triplot(new_mesh, axes=ax6) + + fig.savefig(os.path.join(directories["plot"], "plot_{}.png".format(i))) + # ========================================== + + # generate log file + high_res_mesh = unstructured_square_mesh_gen.generate_mesh( + res=1e-2, + output_filename=os.path.join(directories["mesh"], f"mesh{i}.msh"), + ) + high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1) -data_type = args.field_type -use_iso = True if data_type == "iso" else False + res_high_res = poisson_eq.discretise(high_res_mesh) + u_exact = fd.interpolate( + res_high_res["u_exact"], res_high_res["function_space"] + ) -rand_seed = args.rand_seed -random.seed(rand_seed) + uh = fd.project(uh, high_res_function_space) + uh_new = fd.project(uh_new, high_res_function_space) -# ==== Parameters ====================== -problem = "poisson" + error_original_mesh = fd.errornorm(u_exact, uh) + error_optimal_mesh = fd.errornorm(u_exact, uh_new) -n_samples = args.n_samples + # Write to CSV + with open(os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile: + csv_writer = csv.writer(csvfile) + # Write header (keys) + csv_writer.writerow(["error_og", "error_adapt", "time"]) + # Write data (values) + csv_writer.writerow([error_original_mesh, error_optimal_mesh, dur]) + + print("error og/optimal:", error_original_mesh, error_optimal_mesh) + -# parameters for domain scale -scale_x = 1 -scale_y = 1 +if __name__ == "__main__": -# parameters for random source -max_dist = args.max_dist -n_dist = args.n_dist -lc = args.lc + # parse args + args = parse_arguments() + + # ==== Parameters ====================== + parameters = { + # parameters for problem + "problem": "poisson", + # "n_case": args.n_case, # burgers problem only + # parameters for random source + "n_dist": args.n_dist, + "max_dist": args.max_dist, + "lc": args.lc, + # "n_grig": args.n_grid, # burgers problem only + # parameters for ?????? + "n_samples": args.n_samples, + "data_type": args.field_type, + "scheme": args.boundary_scheme, + "mesh_type": int(args.mesh_type), + # parameters for domain scale + "scale_x": 1, + "scale_y": 1, + # parameters for anisotropic data - distribution height scaler + "z_max": 1, + "z_min": 0, + # parameters for ????? + "x_start": 0, + "x_end": 1, + "y_start": 0, + "y_end": 1, + # parameters for isotropic data + "w_min": 0.05, + "w_max": 0.2, + "c_min": 0.2 if args.boundary_scheme == "pad" else 0, + "c_max": 0.8 if args.boundary_scheme == "pad" else 1, + # parameters for dataset challenging level + # larger, less challenging (because the gaussian is more like a circle) + "sigma_mean_scaler": 1 / 4, + "sigma_sigma_scaler": 1 / 6, + "sigma_eps": 1 / 8, + # parameters for data split + "p_train": 0.75, + "p_test": 0.15, + "p_val": 0.1, + } -# parameters for anisotropic data - distribution height scaler -z_min = 0 -z_max = 1 + # Set random seed + random.seed(args.rand_seed) -# parameters for isotropic data -w_min = 0.05 -w_max = 0.2 + # ==== Setup Directories ====================== + problem_specific_dir = "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format( + parameters["z_min"], parameters["z_max"], + parameters["n_dist"],parameters["max_dist"], + parameters["lc"], parameters["n_samples"], + parameters["data_type"], parameters["scheme"], parameters["mesh_type"] + ) -scheme = args.boundary_scheme -c_min = 0.2 if scheme == "pad" else 0 -c_max = 0.8 if scheme == "pad" else 1 + subdirs = [ + "data", "plot", "log", "mesh", "mesh_fine", + "train", "test", "val", + ] -# parameters for data split -p_train = 0.75 -p_test = 0.15 -p_val = 0.1 + directories = setup_directories(problem = parameters["problem"], + mesh_type = parameters["mesh_type"], + base_dir = None, + subdirs = subdirs, + dir_format = problem_specific_dir) -num_train = int(n_samples * p_train) -num_test = int(n_samples * p_test) -num_val = int(n_samples * p_val) -# ======================================= + # ==== Output CSV ====================== + key_list = [ + "cmin","cmax", + "data_type", "scheme", "n_samples", "lc", "mesh_type" + ] + output_csv(parameters, key_list, directories["data"]) -df = pd.DataFrame( - { - "cmin": [c_min], - "cmax": [c_max], - "data_type": [data_type], - "scheme": [scheme], - "n_samples": [n_samples], - "lc": [lc], - "mesh_type": [mesh_type], - } -) + # ==== Data Generation Scripts ====================== + for i in range(parameters["n_samples"]): + try: + print(f"Generating Sample: {i}") + # create dataset + process_features(parameters, directories) -def move_data(target, source, start, num_file): - if not os.path.exists(target): - os.makedirs(target) - else: - # delete all files under the directory - filelist = [f for f in os.listdir(target)] - for f in filelist: - os.remove(os.path.join(target, f)) - # copy data from data dir to train dir - for i in range(start, num_file): - shutil.copy( - os.path.join(source, "data_{}.npy".format(i)), - os.path.join(target, "data_{}.npy".format(i)), - ) + except fd.exceptions.ConvergenceError: + print(f"Iteration {i} did not converge.") + continue + # ==== Data Splits ============================================ + num_train = int(parameters["n_samples"] * parameters["p_train"]) + num_test = int(parameters["n_samples"] * parameters["p_test"]) + num_val = parameters["n_samples"] - num_train - num_test -project_dir = os.path.dirname(os.path.dirname((os.path.abspath(__file__)))) -dataset_dir = os.path.join( - project_dir, "data", f"dataset_meshtype_{mesh_type}", problem -) # noqa -problem_specific_dir = os.path.join( - dataset_dir, - "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format( - z_min, z_max, n_dist, max_dist, lc, n_samples, data_type, scheme, mesh_type - ), -) - - -problem_data_dir = os.path.join(problem_specific_dir, "data") -problem_plot_dir = os.path.join(problem_specific_dir, "plot") -problem_log_dir = os.path.join(problem_specific_dir, "log") - -problem_mesh_dir = os.path.join(problem_specific_dir, "mesh") -problem_mesh_fine_dir = os.path.join(problem_specific_dir, "mesh_fine") -problem_train_dir = os.path.join(problem_specific_dir, "train") -problem_test_dir = os.path.join(problem_specific_dir, "test") -problem_val_dir = os.path.join(problem_specific_dir, "val") - -if not os.path.exists(problem_mesh_dir): - os.makedirs(problem_mesh_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_mesh_dir)] - for f in filelist: - os.remove(os.path.join(problem_mesh_dir, f)) - -if not os.path.exists(problem_mesh_fine_dir): - os.makedirs(problem_mesh_fine_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_mesh_fine_dir)] - for f in filelist: - os.remove(os.path.join(problem_mesh_fine_dir, f)) - -if not os.path.exists(problem_data_dir): - os.makedirs(problem_data_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_data_dir)] - for f in filelist: - os.remove(os.path.join(problem_data_dir, f)) - -if not os.path.exists(problem_plot_dir): - os.makedirs(problem_plot_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_plot_dir)] - for f in filelist: - os.remove(os.path.join(problem_plot_dir, f)) - -if not os.path.exists(problem_log_dir): - os.makedirs(problem_log_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_log_dir)] - for f in filelist: - os.remove(os.path.join(problem_log_dir, f)) - -df.to_csv(os.path.join(problem_specific_dir, "info.csv")) + move_data(directories["train"], directories["data"], 0, num_train) + move_data(directories["test"], directories["data"], num_train, num_train + num_test) + move_data(directories["val"], directories["data"], num_train + num_test, num_train + num_test + num_val) # ==== Data Generation Scripts ====================== if __name__ == "__main__": print("In build_dataset.py") - i = 0 - while i < n_samples: + # i = 0 + # while i < n_samples: + for i in range(parameters["n_samples"]): try: print("Generating Sample: " + str(i)) - unstructured_square_mesh_gen = UM2N.UnstructuredSquareMesh( - scale=scale_x, mesh_type=mesh_type - ) # noqa - mesh = unstructured_square_mesh_gen.generate_mesh( - res=lc, output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh") - ) - # Generate Random solution field - rand_u_generator = UM2N.RandSourceGenerator( - use_iso=use_iso, - dist_params={ - "max_dist": max_dist, - "n_dist": n_dist, - "x_start": 0, - "x_end": 1, - "y_start": 0, - "y_end": 1, - "z_max": z_max, - "z_min": z_min, - "w_min": w_min, - "w_max": w_max, - "c_min": c_min, - "c_max": c_max, - }, - ) - poisson_eq = UM2N.RandPoissonEqGenerator(rand_u_generator) - res = poisson_eq.discretise(mesh) # discretise the equation - dist_params = rand_u_generator.get_dist_params() - # Solve the equation - solver = UM2N.EquationSolver( - params={ - "function_space": res["function_space"], - "LHS": res["LHS"], - "RHS": res["RHS"], - "bc": res["bc"], - } - ) - uh = solver.solve_eq() - # Generate Mesh - hessian = UM2N.MeshGenerator( - params={ - "eq": poisson_eq, - "mesh": fd.Mesh(os.path.join(problem_mesh_dir, f"mesh{i}.msh")), # noqa - } - ).get_hessian(mesh) - - hessian_norm = UM2N.MeshGenerator( - params={ - "eq": poisson_eq, - "mesh": fd.Mesh(os.path.join(problem_mesh_dir, f"mesh{i}.msh")), # noqa - } - ).monitor_func(mesh) - - hessian_norm = fd.project(hessian_norm, fd.FunctionSpace(mesh, "CG", 1)) - - func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1) - grad_uh_interpolate = fd.interpolate(fd.grad(uh), func_vec_space) - - mesh_gen = UM2N.MeshGenerator( - params={ - "eq": poisson_eq, - "mesh": fd.Mesh(os.path.join(problem_mesh_dir, f"mesh{i}.msh")), # noqa - } - ) - - start = time.perf_counter() - new_mesh = mesh_gen.move_mesh() - end = time.perf_counter() - dur = (end - start) * 1000 - - # this is the jacobian of x with respect to xi - jacobian = mesh_gen.get_jacobian() - jacobian = fd.project(jacobian, fd.TensorFunctionSpace(new_mesh, "CG", 1)) - jacobian_det = mesh_gen.get_jacobian_det() - jacobian_det = fd.project(jacobian_det, fd.FunctionSpace(new_mesh, "CG", 1)) - - # get phi/grad_phi projected to the original mesh - phi = mesh_gen.get_phi() - # phi = fd.project( - # phi, fd.FunctionSpace(mesh, "CG", 1) - # ) - grad_phi = mesh_gen.get_grad_phi() - # grad_phi = fd.project( - # grad_phi, fd.VectorFunctionSpace(mesh, "CG", 1) - # ) - - # solve the equation on the new mesh - new_res = poisson_eq.discretise(new_mesh) - new_solver = UM2N.EquationSolver( - params={ - "function_space": new_res["function_space"], - "LHS": new_res["LHS"], - "RHS": new_res["RHS"], - "bc": new_res["bc"], - } - ) - uh_new = new_solver.solve_eq() - - # process the data for training - mesh_processor = UM2N.MeshProcessor( - original_mesh=mesh, - optimal_mesh=new_mesh, - function_space=new_res["function_space"], - use_4_edge=True, - feature={ - "uh": uh.dat.data_ro.reshape(-1, 1), - "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2), - "hessian": hessian.dat.data_ro.reshape(-1, 4), - "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), - "jacobian": jacobian.dat.data_ro.reshape(-1, 4), - "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1), - "phi": phi.dat.data_ro.reshape(-1, 1), - "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2), - }, - raw_feature={ - "uh": uh, - "hessian_norm": hessian_norm, - "jacobian": jacobian, - "jacobian_det": jacobian_det, - }, - dist_params=dist_params, - ) - - mesh_processor.save_taining_data( - os.path.join(problem_data_dir, "data_{}".format(i)) - ) - - # ==== Plot Scripts ====================== - fig = plt.figure(figsize=(15, 10)) - ax1 = fig.add_subplot(2, 3, 1, projection="3d") - # Plot the exact solution - ax1.set_title("Exact Solution") - fd.trisurf(fd.interpolate(res["u_exact"], res["function_space"]), axes=ax1) - # Plot the solved solution - ax2 = fig.add_subplot(2, 3, 2, projection="3d") - ax2.set_title("FEM Solution") - fd.trisurf(uh, axes=ax2) - - # Plot the solution on a optimal mesh - ax3 = fig.add_subplot(2, 3, 3, projection="3d") - ax3.set_title("FEM Solution on Optimal Mesh") - fd.trisurf(uh_new, axes=ax3) - - # Plot the mesh - ax4 = fig.add_subplot(2, 3, 4) - ax4.set_title("Original Mesh") - fd.triplot(mesh, axes=ax4) - ax5 = fig.add_subplot(2, 3, 5) - ax5.set_title("Optimal Mesh") - fd.triplot(new_mesh, axes=ax5) - - # plot mesh with function evaluated on it - ax6 = fig.add_subplot(2, 3, 6) - ax6.set_title("Soultion Projected on optimal mesh") - fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6) - fd.triplot(new_mesh, axes=ax6) - - fig.savefig(os.path.join(problem_plot_dir, "plot_{}.png".format(i))) - # ========================================== - - # generate log file - high_res_mesh = unstructured_square_mesh_gen.generate_mesh( - res=1e-2, - output_filename=os.path.join(problem_mesh_fine_dir, f"mesh{i}.msh"), - ) - high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1) - - res_high_res = poisson_eq.discretise(high_res_mesh) - u_exact = fd.interpolate( - res_high_res["u_exact"], res_high_res["function_space"] - ) - - uh = fd.project(uh, high_res_function_space) - uh_new = fd.project(uh_new, high_res_function_space) - - error_original_mesh = fd.errornorm(u_exact, uh) - error_optimal_mesh = fd.errornorm(u_exact, uh_new) - - df = pd.DataFrame( - { - "error_og": error_original_mesh, - "error_adapt": error_optimal_mesh, - "time": dur, - }, - index=[0], - ) - df.to_csv(os.path.join(problem_log_dir, "log{}.csv".format(i))) - print("error og/optimal:", error_original_mesh, error_optimal_mesh) - i += 1 + # create dataset + process_features(parameters, directories) + # i += 1 except fd.exceptions.ConvergenceError: pass except AttributeError: @@ -409,14 +496,12 @@ def move_data(target, source, start, num_file): except ValueError: pass - move_data(problem_train_dir, problem_data_dir, 0, num_train) + # ==== Data Splits ============================================ + num_train = int(parameters["n_samples"] * parameters["p_train"]) + num_test = int(parameters["n_samples"] * parameters["p_test"]) + num_val = parameters["n_samples"] - num_train - num_test - move_data(problem_test_dir, problem_data_dir, num_train, num_train + num_test) + move_data(directories["train"], directories["data"], 0, num_train) + move_data(directories["test"], directories["data"], num_train, num_train + num_test) + move_data(directories["val"], directories["data"], num_train + num_test, num_train + num_test + num_val) - move_data( - problem_val_dir, - problem_data_dir, - num_train + num_test, - num_train + num_test + num_val, - ) -# ==== Data Generation Scripts ====================== diff --git a/script/build_swirl.py b/script/build_swirl.py index c6e0859..4ea7814 100644 --- a/script/build_swirl.py +++ b/script/build_swirl.py @@ -1,6 +1,6 @@ # Author: Chunyang Wang # GitHub Username: chunyang-w - +import csv import os import shutil from argparse import ArgumentParser @@ -9,187 +9,179 @@ import matplotlib.pyplot as plt import pandas as pd -import UM2N - - -def arg_parse(): - parser = ArgumentParser() - parser.add_argument( - "--mesh_type", type=int, default=6, help="algorithm used to generate mesh" - ) - parser.add_argument( - "--sigma", - type=float, - default=(0.05 / 3), - help="sigma used to control the initial ring shape", - ) - parser.add_argument( - "--r_0", type=float, default=0.2, help="radius of the initial ring" - ) - parser.add_argument( - "--x_0", type=float, default=0.5, help="center of the ring in x" - ) - parser.add_argument( - "--y_0", type=float, default=0.5, help="center of the ring in y" - ) - parser.add_argument( - "--alpha", - type=float, - default=1.5, - help="scalar coefficient of the swirl (velocity)", - ) - parser.add_argument( - "--save_interval", type=int, default=10, help="interval for stroing sample file" - ) - parser.add_argument( - "--lc", - type=float, - default=5e-2, - help="the length characteristic of the elements in the\ - mesh (if using unstructured mesh)", - ) - parser.add_argument( - "--n_grid", - type=int, - default=20, - help="number of grids in a mesh (only appliable when\ - mesh_type is 0)", - ) - parser.add_argument( - "--n_monitor_smooth", - type=int, - default=10, - help="number of times for applying a Laplacian smoother for monitor function", - ) - args_ = parser.parse_args() - print(args_) - return args_ - +# import UM2N -args = arg_parse() +# import pandas as pd +from firedrake.__future__ import interpolate -mesh_type = args.mesh_type - -# ==== Parameters ====================== -problem = "swirl" +# dd the parent directory to the Python path +import sys +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import UM2N -# simulation time & time steps -T = 1 -# n_step = 1000 # * 2 # The CFL condition requires that the timestep is less than 0.0014 for fine mesh -# dt = T / n_step -dt = 1e-3 # * 2 # The CFL condition requires that the timestep is less than 0.0014 for fine mesh -n_step = 1000 +def parse_arguments(): + """Parse command-line arguments.""" + parser = ArgumentParser(description="Build Burgers dataset with square meshes.") + parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.") + parser.add_argument("--sigma", type=float, default=(0.05 / 3), help="initial ring shape control") + parser.add_argument("--r_0", type=float, default=0.2, help="initial ring radius") + parser.add_argument("--x_0", type=float, default=0.5, help="ring center x coordinate") + parser.add_argument("--y_0", type=float, default=0.5, help="ring center y coordinate") + parser.add_argument("--alpha", type=float, default=1.5, help="swirl (velocity) scalar coefficient") + parser.add_argument("--save_interval", type=int, default=10, help="output sample file interval") + parser.add_argument("--lc", type=float, default=5e-2, help="Length characteristic of unstructured mesh elements.") + parser.add_argument("--n_grid", type=int, default=20, help="number number of grids in a mesh when mesh_type is 0)") + parser.add_argument("--n_monitor_smooth", type=int, default=10, help="apply Laplacian smoother n time to monitor function") + + + parsed_args = parser.parse_args() + + # Handle dependency between max_dist and n_dist + # max number of distributions used to generate the dataset + # only if n_dist is not set if n_dist is set, max_dist will be disabled + # if parsed_args.n_dist is not None: + # parsed_args.max_dist = None # Disable max_dist if n_dist is set + # print("Warning: max_dist is ignored because n_dist is set.") + # QC: + print(parsed_args) + + return parser.parse_args() + + +def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None): + """ + Set up directories for storing data, plots, and logs. + + Args: + base_dir (str): Base directory for the project. + parameters (dict): Dictionary of parameters, including "mesh_type" and "problem". + - "mesh_type" (int): Type of mesh used in the simulation (default: 0). + - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem"). + subdirs (list, optional): List of subdirectories to create. Defaults to: + ["data", "plot", "log", "mesh", "mesh_fine"]. + Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz". + dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders + matching keys in the `parameters` dictionary. Example: + "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}". + If not provided, raises a ValueError. + + Returns: + dict: A dictionary mapping subdirectory names to their full paths. + + Raises: + ValueError: If `dir_format` is not provided or is invalid. + """ -# mesh setup -lc = args.lc -# n_grid = args.n_grid -n_grid = int(1 / lc) + # Define the project directory + if base_dir: + project_dir = os.path.abspath(base_dir) + else: + project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + # QC: + print(f"Project Directory: {project_dir}") -# number of times for applying a Laplacian smoother for monitor function -n_monitor_smooth = args.n_monitor_smooth + # Define the dataset directory + dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem) -# parameters for domain scale -scale_x = 1 -scale_y = 1 + # Use the provided format string for the problem-specific directory + if dir_format is None: + problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}") + else: + # check if dir_format is a valid string format + if not isinstance(dir_format, str): + raise ValueError("dir_format must be a string.") + problem_specific_dir = os.path.join(dataset_dir, dir_format) + + # Define default subdirectories if not provided + if subdirs is None: + subdirs = ["data", "plot", "log", "mesh", "mesh_fine", + "plot_compare", "train", "test", "val"] + + # Create and clear directories + directories = {} + for subdir in subdirs: + dir_path = os.path.join(problem_specific_dir, subdir) + if not os.path.exists(dir_path): + os.makedirs(dir_path) + else: + # Clear the directory by removing all files + for file in os.listdir(dir_path): + os.remove(os.path.join(dir_path, file)) + directories[subdir] = dir_path + + # QC: + print(f"Subdirectories created: {directories}") + + return directories + +def output_csv(parameters, key_list, output_dir): + """ + Write selected parameters to a CSV file. -# params for initial condition -sigma = args.sigma -r_0 = args.r_0 -alpha = args.alpha -x_0 = args.x_0 -y_0 = args.y_0 + Args: + parameters (dict): Dictionary of parameters to write. + key_list (list): List of keys to include in the CSV. + output_dir (str): Directory where the CSV file will be saved. + """ + # Filter parameters based on key_list + csv_keys = [key for key in key_list if key in parameters] + csv_data = [parameters[key] for key in csv_keys] + + # Define the output file path + csv_file_path = os.path.join(output_dir, "info.csv") + + # Write to CSV + with open(csv_file_path, mode="w", newline="") as csvfile: + csv_writer = csv.writer(csvfile) + # Write header (keys) + csv_writer.writerow(csv_keys) + # Write data (values) + csv_writer.writerow(csv_data) + +def move_data(target, source, start, num_files): + """ + Move data files from the source directory to the target directory. -# params for stroing files -save_interval = args.save_interval -# list storing failing dts -fail_t = [] + Args: + target (str): The path to the target directory. + source (str): The path to the source directory. + start (int): The starting index of the files to move. + num_files (int): The total number of files to move. -# ======================================= + Raises: + FileNotFoundError: If the source directory does not exist. + ValueError: If the start index or num_files is invalid. + """ + if not os.path.exists(source): + raise FileNotFoundError(f"Source directory '{source}' does not exist.") + if start < 0 or num_files <= 0: + raise ValueError("Invalid start index or number of files to move.") -def move_data(target, source, start, num_file): + # Create the target directory if it doesn't exist if not os.path.exists(target): os.makedirs(target) else: - # delete all files under the directory - filelist = [f for f in os.listdir(target)] - for f in filelist: - os.remove(os.path.join(target, f)) - # copy data from data dir to train dir - for i in range(start, num_file): - shutil.copy( - os.path.join(source, "data_{}.npy".format(i)), - os.path.join(target, "data_{}.npy".format(i)), - ) - + # Clear the target directory by removing all files + for file in os.listdir(target): + os.remove(os.path.join(target, file)) + + # Copy files sequentially starting from the specified index + for i in range(start, start + num_files): + try: + # Copy the data file + shutil.copy( + os.path.join(source, f"data_{i:04d}.npy"), + os.path.join(target, f"data_{i:04d}.npy"), + ) + except FileNotFoundError: + print(f"File data_{i:04d}.npy not found in {source}. Skipping.") + continue + except Exception as e: + print(f"An error occurred while copying data_{i:04d}.npy: {e}") + continue -project_dir = os.path.dirname(os.path.dirname((os.path.abspath(__file__)))) -dataset_dir = os.path.join( - project_dir, "data", f"dataset_meshtype_{mesh_type}", problem -) # noqa -problem_specific_dir = os.path.join( - dataset_dir, - f"sigma_{sigma:.3f}_alpha_{alpha}_r0_{r_0}_x0_{x_0}_y0_{y_0}_lc_{lc}_ngrid_{n_grid}_interval_{save_interval}_meshtype_{mesh_type}_smooth_{n_monitor_smooth}", -) # noqa - - -problem_data_dir = os.path.join(problem_specific_dir, "data") -problem_plot_dir = os.path.join(problem_specific_dir, "plot") -problem_plot_compare_dir = os.path.join(problem_specific_dir, "plot_compare") -problem_log_dir = os.path.join(problem_specific_dir, "log") -problem_mesh_dir = os.path.join(problem_specific_dir, "mesh") -problem_mesh_fine_dir = os.path.join(problem_specific_dir, "mesh_fine") - - -if not os.path.exists(problem_data_dir): - os.makedirs(problem_data_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_data_dir)] - for f in filelist: - os.remove(os.path.join(problem_data_dir, f)) - -if not os.path.exists(problem_plot_dir): - os.makedirs(problem_plot_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_plot_dir)] - for f in filelist: - os.remove(os.path.join(problem_plot_dir, f)) - -if not os.path.exists(problem_plot_compare_dir): - os.makedirs(problem_plot_compare_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_plot_compare_dir)] - for f in filelist: - os.remove(os.path.join(problem_plot_compare_dir, f)) - -if not os.path.exists(problem_log_dir): - os.makedirs(problem_log_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_log_dir)] - for f in filelist: - os.remove(os.path.join(problem_log_dir, f)) - -if not os.path.exists(problem_mesh_dir): - os.makedirs(problem_mesh_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_mesh_dir)] - for f in filelist: - os.remove(os.path.join(problem_mesh_dir, f)) - -if not os.path.exists(problem_mesh_fine_dir): - os.makedirs(problem_mesh_fine_dir) -else: - # delete all files under the directory - filelist = [f for f in os.listdir(problem_mesh_fine_dir)] - for f in filelist: - os.remove(os.path.join(problem_mesh_fine_dir, f)) - -i = 0 def fail_callback(t): @@ -221,6 +213,8 @@ def sample_from_loop( sigma, alpha, r_0, + x_0, # ej321 - added x_0 + y_0, # ej321 - added y_0 t, error_og_list=[], error_adapt_list=[], @@ -267,7 +261,7 @@ def sample_from_loop( dur=dur, ) - mesh_processor.save_taining_data(os.path.join(problem_data_dir, f"data_{i:04d}")) + mesh_processor.save_taining_data(os.path.join(directories["data"], f"data_{i:04d}")) # # ==== Plot Scripts ====================== # fig = plt.figure(figsize=(15, 10)) @@ -314,24 +308,24 @@ def sample_from_loop( # ax.legend() # plt.show() - # ========================================== + # ==== Log File ============================================ # function_space_fine = fd.FunctionSpace(mesh_fine, 'CG', 1) uh_proj = fd.project(uh, function_space_fine) uh_new_proj = fd.project(uh_new, function_space_fine) error_original_mesh = fd.errornorm(uh_proj, uh_fine, norm_type="L2") error_optimal_mesh = fd.errornorm(uh_new_proj, uh_fine, norm_type="L2") - df = pd.DataFrame( - { - "error_og": error_original_mesh, - "error_adapt": error_optimal_mesh, - "time": dur, - }, - index=[0], - ) - df.to_csv(os.path.join(problem_log_dir, f"log{i:04d}.csv")) + + # Write to CSV + with open(os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile: + csv_writer = csv.writer(csvfile) + # Write header (keys) + csv_writer.writerow(["error_og", "error_adapt", "time"]) + # Write data (values) + csv_writer.writerow([error_original_mesh, error_optimal_mesh, dur]) print("error og/optimal:", error_original_mesh, error_optimal_mesh) + # ==== Plot mesh, solution, error ====================== rows, cols = 3, 3 fig, ax = plt.subplots( @@ -405,32 +399,141 @@ def sample_from_loop( for cc in range(cols): ax[rr, cc].set_aspect("equal", "box") - fig.savefig(os.path.join(problem_plot_compare_dir, f"plot_{i:04d}.png")) + fig.savefig(os.path.join(directories["plot_compare"], f"plot_{i:04d}.png")) plt.close() i += 1 return -# ==== Data Generation Scripts ====================== if __name__ == "__main__": + + # parse args + args = parse_arguments() + + # ==== Parameters ====================== + parameters = { + # parameters for problem + "problem": "swirl", + # parameters for simulation time & time steps + "T": 1, + "dt": 1e-3, # The CFL condition requires that the timestep is less than 0.0014 for fine mesh + "n_step": 1000, + # "n_case": args.n_case, # burgers problem only + # parameters for random source + # "n_dist": args.n_dist, + # "max_dist": args.max_dist, + "lc": args.lc, + "n_grid": args.n_grid if args.n_grid else int(1 / lc), + # parameters for ?????? + # "n_samples": args.n_samples, + # "data_type": args.field_type, + # "scheme": args.boundary_scheme, + "mesh_type": int(args.mesh_type), + "n_monitor_smooth": args.n_monitor_smooth, + # parameters for domain scale + "scale_x": 1, + "scale_y": 1, + # parameters for anisotropic data - distribution height scaler + # "z_max": 1, + # "z_min": 0, + # parameters for ????? + # "x_start": 0, + # "x_end": 1, + # "y_start": 0, + # "y_end": 1, + # parameters for initial condition + "sigma": args.sigma, + "r_0": args.r_0, + "x_0": args.x_0, + "y_0": args.y_0, + "alpha": args.alpha, + # parameters for storing files + "save_interval": args.save_interval, + "fail_t": [], # list storing failing dts + + # parameters for isotropic data + # "w_min": 0.05, + # "w_max": 0.2, + # "c_min": 0.2 if args.boundary_scheme == "pad" else 0, + # "c_max": 0.8 if args.boundary_scheme == "pad" else 1, + # parameters for dataset challenging level + # larger, less challenging (because the gaussian is more like a circle) + # "sigma_mean_scaler": 1 / 4, + # "sigma_sigma_scaler": 1 / 6, + # "sigma_eps": 1 / 8, + # parameters for data split + # "p_train": 0.75, + # "p_test": 0.15, + # "p_val": 0.1, + } + + # # Set random seed + # random.seed(args.rand_seed) + + # ==== Setup Directories ====================== + problem_specific_dir = "sigma_{:.3f}_alpha_{}_r0_{}_x0_{}_y0_{}_lc_{}_ngrid_{}_interval_{}_meshtype_{}_smooth_{}".format( + parameters["sigma"], parameters["alpha"], + parameters["r_0"], parameters["x_0"], parameters["y_0"], + parameters["lc"], parameters["n_grid"], + parameters["save_interval"], parameters["mesh_type"], + parameters["n_monitor_smooth"] + ) + + subdirs = [ + "data", "plot","plot_compare","log", "mesh", "mesh_fine", + # "train", "test", "val", + ] + + directories = setup_directories(problem = parameters["problem"], + mesh_type = parameters["mesh_type"], + base_dir = None, + subdirs = subdirs, + dir_format = problem_specific_dir) + + + # ==== Output CSV ====================== + key_list = [ + "sigma", + "alpha", + "r_0", + "x_0", + "y_0", + "save_interval", + "T", + "n_step", + "dt", + "fail_t", + "lc", + "fail_cases", + "mesh_type", + ] + output_csv(parameters, key_list, directories["data"]) + + # ==== Data Generation Scripts ====================== print("In build_dataset.py") + + i = 0 # global variable to count the number of samples mesh = None mesh_fine = None mesh_new = None + mesh_type = parameters["mesh_type"] + lc = parameters["lc"] + n_grid = parameters["n_grid"] if mesh_type != 0: - mesh_gen = UM2N.UnstructuredSquareMesh(mesh_type=mesh_type) + mesh_gen = UM2N.UnstructuredSquareMeshGenerator(mesh_type=mesh_type) mesh = mesh_gen.generate_mesh( - res=lc, output_filename=os.path.join(problem_mesh_dir, "mesh.msh") + res=lc, output_filename=os.path.join(directories["mesh"], "mesh.msh") ) mesh_new = mesh_gen.generate_mesh( - res=lc, output_filename=os.path.join(problem_mesh_dir, "mesh.msh") + res=lc, output_filename=os.path.join(directories["mesh"], "mesh.msh") ) mesh_model = mesh_gen.generate_mesh( - res=lc, output_filename=os.path.join(problem_mesh_dir, "mesh.msh") + res=lc, output_filename=os.path.join(directories["mesh"], "mesh.msh") ) - mesh_gen_fine = UM2N.UnstructuredSquareMesh(mesh_type=mesh_type) + # ej321 - is this extra call to mesh gen needed? + mesh_gen_fine = UM2N.UnstructuredSquareMeshGenerator(mesh_type=mesh_type) mesh_fine = mesh_gen_fine.generate_mesh( - res=1e-2, output_filename=os.path.join(problem_mesh_fine_dir, "mesh.msh") + res=1e-2, output_filename=os.path.join(directories["mesh_fine"], "mesh.msh") ) else: mesh = fd.UnitSquareMesh(n_grid, n_grid) @@ -438,45 +541,26 @@ def sample_from_loop( mesh_model = fd.UnitSquareMesh(n_grid, n_grid) mesh_fine = fd.UnitSquareMesh(100, 100) - df = pd.DataFrame( - { - "sigma": [sigma], - "alpha": [alpha], - "r_0": [r_0], - "x_0": [x_0], - "y_0": [y_0], - "save_interval": [save_interval], - "T": [T], - "n_step": [n_step], - "dt": [dt], - "fail_t": [fail_t], - "lc": [lc], - "num_fail_cases": [len(fail_t)], - "mesh_type": [mesh_type], - } - ) - - df.to_csv(os.path.join(problem_specific_dir, "info.csv")) - # solver defination swirl_solver = UM2N.SwirlSolver( mesh, mesh_fine, mesh_new, mesh_model=mesh_model, - sigma=sigma, - alpha=alpha, - r_0=r_0, - x_0=x_0, - y_0=y_0, - save_interval=save_interval, - T=T, - dt=dt, - n_step=n_step, - n_monitor_smooth=n_monitor_smooth, + **parameters + # sigma=sigma, + # alpha=alpha, + # r_0=r_0, + # x_0=x_0, + # y_0=y_0, + # save_interval=save_interval, + # T=T, + # dt=dt, + # n_step=n_step, + # n_monitor_smooth=n_monitor_smooth, ) swirl_solver.solve_problem(callback=sample_from_loop, fail_callback=fail_callback) print("Done!") -# ==== Data Generation Scripts ====================== + diff --git a/script/make_dataset_helm_train.sh b/script/make_dataset_helm_train.sh index eaaaa93..b919098 100644 --- a/script/make_dataset_helm_train.sh +++ b/script/make_dataset_helm_train.sh @@ -18,7 +18,7 @@ for mt in "${mesh_type[@]}"; do for i in "${lcs[@]}"; do for n_s in "${n_samples_train[@]}"; do echo "lc = $i meshtype = $mt num samples = $n_s" - python ./script/build_helmholtz_square.py --lc=$i --rand_seed=$rand_seed --n_samples=$n_s --field_type="aniso" --boundary_scheme="full" --mesh_type=$mt + python build_helmholtz_square.py --lc=$i --rand_seed=$rand_seed --n_samples=$n_s --field_type="aniso" --boundary_scheme="full" --mesh_type=$mt # python ./script/build_helmholtz_square.py --lc=$i --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="pad" --mesh_type=$mesh_type # python ./script/build_helmholtz_square.py --lc=$i --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="full" --mesh_type=$mesh_type # python ./script/build_helmholtz_square.py --lc=$i --rand_seed $rand_seed --n_samples $n_samples_train --field_type "aniso" --boundary_scheme "pad" --mesh_type=$mesh_type From c4c8cf261bfcc3c57fc21ec9c53ae2175371aa92 Mon Sep 17 00:00:00 2001 From: acse-ej321 <89605848+acse-ej321@users.noreply.github.com> Date: Fri, 4 Jul 2025 11:26:48 +0100 Subject: [PATCH 2/7] move partitioning to function and minor edits --- script/build_burgers_square.py | 209 ++++++++++++++------ script/build_helmholtz_poly.py | 274 ++++++++++++++++---------- script/build_helmholtz_square.py | 291 +++++++++++++++++---------- script/build_poisson_poly.py | 266 +++++++++++++++---------- script/build_poisson_square.py | 325 ++++++++++++++++--------------- script/build_swirl.py | 180 ++++++++++------- 6 files changed, 932 insertions(+), 613 deletions(-) diff --git a/script/build_burgers_square.py b/script/build_burgers_square.py index c82043a..6266040 100644 --- a/script/build_burgers_square.py +++ b/script/build_burgers_square.py @@ -9,28 +9,48 @@ import firedrake as fd import matplotlib.pyplot as plt -# import pandas as pd -# dd the parent directory to the Python path -import sys -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import UM2N def parse_arguments(): """Parse command-line arguments.""" parser = ArgumentParser(description="Build Burgers dataset with square meshes.") - parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.") - parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions.") - parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions.") - parser.add_argument("--lc", type=float, default=6e-2, help="Length characteristic of mesh elements.") - parser.add_argument("--field_type", type=str, default="iso", help="Data type (aniso/iso).") + parser.add_argument( + "--mesh_type", type=int, default=2, help="Algorithm used to generate mesh." + ) + parser.add_argument( + "--max_dist", type=int, default=6, help="Max number of distributions." + ) + parser.add_argument( + "--n_dist", type=int, default=None, help="Number of distributions." + ) + parser.add_argument( + "--lc", type=float, default=6e-2, help="Length characteristic of mesh elements." + ) + parser.add_argument( + "--field_type", type=str, default="iso", help="Data type (aniso/iso)." + ) # use padded scheme or full-scale scheme to sample central point of the bump # noqa - parser.add_argument("--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full).") - parser.add_argument("--n_case", type=int, default=5, help="Number of simulation cases.") - parser.add_argument("--n_grid", type=int, default=20, help="Number of grids for uniform mesh if mesh_type 0.") - parser.add_argument("--rand_seed", type=int, default=63, help="number of samples generated / Random seed for reproducibility.") - + parser.add_argument( + "--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full)." + ) + parser.add_argument( + "--n_case", type=int, default=5, help="Number of simulation cases." + ) + parser.add_argument( + "--n_grid", + type=int, + default=20, + help="Number of grids for uniform mesh if mesh_type 0.", + ) + parser.add_argument( + "--rand_seed", + type=int, + default=63, + help="number of samples generated / Random seed for reproducibility.", + ) + parsed_args = parser.parse_args() # Handle dependency between max_dist and n_dist @@ -40,11 +60,12 @@ def parse_arguments(): parsed_args.max_dist = None # Disable max_dist if n_dist is set print("Warning: max_dist is ignored because n_dist is set.") # QC: - print(parsed_args) - + # print(parsed_args) + return parser.parse_args() -def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None): + +def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None): """ Set up directories for storing data, plots, and logs. @@ -73,16 +94,20 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form project_dir = os.path.abspath(base_dir) else: project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - + # QC: print(f"Project Directory: {project_dir}") # Define the dataset directory - dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem) + dataset_dir = os.path.join( + project_dir, "data", f"dataset_meshtype_{mesh_type}", problem + ) # Use the provided format string for the problem-specific directory if dir_format is None: - problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}") + problem_specific_dir = os.path.join( + dataset_dir, f"{problem}_meshtype_{mesh_type}" + ) else: # check if dir_format is a valid string format if not isinstance(dir_format, str): @@ -91,8 +116,17 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form # Define default subdirectories if not provided if subdirs is None: - subdirs = ["data", "plot", "log", "mesh", "mesh_fine", - "plot_compare", "train", "test", "val"] + subdirs = [ + "data", + "plot", + "log", + "mesh", + "mesh_fine", + "plot_compare", + "train", + "test", + "val", + ] # Create and clear directories directories = {} @@ -107,10 +141,11 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form directories[subdir] = dir_path # QC: - print(f"Subdirectories created: {directories}") + # print(f"Subdirectories created: {directories}") return directories + def output_csv(parameters, key_list, output_dir): """ Write selected parameters to a CSV file. @@ -137,6 +172,7 @@ def output_csv(parameters, key_list, output_dir): print(f"Parameters saved to {csv_file_path}") + def move_data(target, source, start, num_files): """ Move data files from the source directory to the target directory. @@ -179,17 +215,22 @@ def move_data(target, source, start, num_files): except Exception as e: print(f"An error occurred while copying data_{i:04d}.npy: {e}") continue - -def generate_mesh(parameters, dirs): + + +def generate_mesh(parameters, directories): """Generate the mesh based on the specified type.""" if parameters["mesh_type"] != 0: - mesh_gen = UM2N.UnstructuredSquareMeshGenerator(scale=parameters["scale_x"], - mesh_type=parameters["mesh_type"]) - mesh = mesh_gen.generate_mesh(res=parameters["lc"], - output_filename=os.path.join(dirs["mesh"], "mesh.msh")) - mesh_new = fd.Mesh(os.path.join(dirs["mesh"], "mesh.msh")) - mesh_fine = mesh_gen.generate_mesh(res=1e-2, - output_filename=os.path.join(dirs["mesh_fine"], "mesh.msh")) + mesh_gen = UM2N.UnstructuredSquareMeshGenerator( + scale=parameters["scale_x"], mesh_type=parameters["mesh_type"] + ) + mesh = mesh_gen.generate_mesh( + res=parameters["lc"], + output_filename=os.path.join(directories["mesh"], "mesh.msh"), + ) + mesh_new = fd.Mesh(os.path.join(directories["mesh"], "mesh.msh")) + mesh_fine = mesh_gen.generate_mesh( + res=1e-2, output_filename=os.path.join(directories["mesh_fine"], "mesh.msh") + ) else: n_grid = parameters["n_grid"] mesh = fd.UnitSquareMesh(n_grid, n_grid) @@ -213,17 +254,49 @@ def get_sample_param_of_nu_generalization_by_idx_train(idx_in): 1: ({"cx": 0.225, "cy": 0.5, "w": 0.01}, 0.0001), 2: ({"cx": 0.225, "cy": 0.5, "w": 0.01}, 0.001), 3: ({"cx": 0.225, "cy": 0.5, "w": 0.01}, 0.002), - 4: ([{"cx": 0.3, "cy": 0.35, "w": 0.01}, {"cx": 0.15, "cy": 0.65, "w": 0.01}], 0.0001), - 5: ([{"cx": 0.3, "cy": 0.35, "w": 0.01}, {"cx": 0.15, "cy": 0.65, "w": 0.01}], 0.001), - 6: ([{"cx": 0.3, "cy": 0.35, "w": 0.01}, {"cx": 0.15, "cy": 0.65, "w": 0.01}], 0.002), - 7: ([{"cx": 0.3, "cy": 0.7, "w": 0.01}, {"cx": 0.3, "cy": 0.3, "w": 0.01}, {"cx": 0.15, "cy": 0.5, "w": 0.01}], 0.0001), - 8: ([{"cx": 0.3, "cy": 0.7, "w": 0.01}, {"cx": 0.3, "cy": 0.3, "w": 0.01}, {"cx": 0.15, "cy": 0.5, "w": 0.01}], 0.001), - 9: ([{"cx": 0.3, "cy": 0.7, "w": 0.01}, {"cx": 0.3, "cy": 0.3, "w": 0.01}, {"cx": 0.15, "cy": 0.5, "w": 0.01}], 0.002), + 4: ( + [{"cx": 0.3, "cy": 0.35, "w": 0.01}, {"cx": 0.15, "cy": 0.65, "w": 0.01}], + 0.0001, + ), + 5: ( + [{"cx": 0.3, "cy": 0.35, "w": 0.01}, {"cx": 0.15, "cy": 0.65, "w": 0.01}], + 0.001, + ), + 6: ( + [{"cx": 0.3, "cy": 0.35, "w": 0.01}, {"cx": 0.15, "cy": 0.65, "w": 0.01}], + 0.002, + ), + 7: ( + [ + {"cx": 0.3, "cy": 0.7, "w": 0.01}, + {"cx": 0.3, "cy": 0.3, "w": 0.01}, + {"cx": 0.15, "cy": 0.5, "w": 0.01}, + ], + 0.0001, + ), + 8: ( + [ + {"cx": 0.3, "cy": 0.7, "w": 0.01}, + {"cx": 0.3, "cy": 0.3, "w": 0.01}, + {"cx": 0.15, "cy": 0.5, "w": 0.01}, + ], + 0.001, + ), + 9: ( + [ + {"cx": 0.3, "cy": 0.7, "w": 0.01}, + {"cx": 0.3, "cy": 0.3, "w": 0.01}, + {"cx": 0.15, "cy": 0.5, "w": 0.01}, + ], + 0.002, + ), } # Retrieve the parameters and viscosity for the given index if idx_in not in param_map: - raise ValueError(f"Invalid index: {idx_in}. Supported indices are {list(param_map.keys())}.") + raise ValueError( + f"Invalid index: {idx_in}. Supported indices are {list(param_map.keys())}." + ) params, nu_ = param_map[idx_in] # Ensure params is always a list @@ -231,6 +304,7 @@ def get_sample_param_of_nu_generalization_by_idx_train(idx_in): return gauss_list_, nu_ + # def get_sample_params(idx): # """Retrieve sample parameters for the Burgers problem.""" # return UM2N.get_sample_param_of_nu_generalization_by_idx_train(idx) @@ -299,6 +373,7 @@ def get_sample_param_of_nu_generalization_by_idx_train(idx_in): # {"error_og": [error_original_mesh], "error_adapt": [error_optimal_mesh], "time": [dur]} # ).to_csv(os.path.join(dirs["log"], f"log_{idx}.csv"), index=False) + def sample_from_loop( uh, uh_grad, @@ -334,7 +409,9 @@ def sample_from_loop( feature={ "uh": uh.dat.data_ro.reshape(-1, 1), "grad_uh": uh_grad.dat.data_ro.reshape(-1, 2), - "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), # ej321 - added grad_norm + "grad_uh_norm": grad_norm.dat.data_ro.reshape( + -1, 1 + ), # ej321 - added grad_norm "hessian": hessian.dat.data_ro.reshape(-1, 4), "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), "jacobian": jacobian.dat.data_ro.reshape(-1, 4), @@ -346,7 +423,7 @@ def sample_from_loop( raw_feature={ "uh": uh, "hessian_norm": hessian_norm, - "monitor_val": monitor_val, # ej321 - added monitor_val + "monitor_val": monitor_val, # ej321 - added monitor_val "jacobian": jacobian, "jacobian_det": jacobian_det, }, @@ -357,9 +434,7 @@ def sample_from_loop( idx=idx, ) - mesh_processor.save_taining_data( - os.path.join(dirs["data"], "data_{}".format(i)) - ) + mesh_processor.save_taining_data(os.path.join(directories["data"], f"data_{i:04d}")) # ==== Plot Scripts ====================== fig = plt.figure(figsize=(15, 10)) @@ -391,7 +466,7 @@ def sample_from_loop( fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6) fd.triplot(mesh_new, axes=ax6) - fig.savefig(os.path.join(dirs["plot"], "plot_{}.png".format(i))) + fig.savefig(os.path.join(directories["plot"], "plot_{}.png".format(i))) i += 1 # fig, ax = plt.subplots() @@ -408,9 +483,10 @@ def sample_from_loop( error_original_mesh = fd.errornorm(uh, uh_fine, norm_type="L2") error_optimal_mesh = fd.errornorm(uh_new, uh_fine, norm_type="L2") - # Write to CSV - with open(os.path.join(dirs["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile: + with open( + os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="" + ) as csvfile: csv_writer = csv.writer(csvfile) # Write header (keys) csv_writer.writerow(["error_og", "error_adapt", "time"]) @@ -432,7 +508,6 @@ def sample_from_loop( if __name__ == "__main__": - # parse args args = parse_arguments() @@ -481,7 +556,6 @@ def sample_from_loop( # Set random seed random.seed(args.rand_seed) - # ==== Setup Directories ====================== problem_specific_dir = "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}".format( lc=parameters["lc"], @@ -492,21 +566,29 @@ def sample_from_loop( mesh_type=parameters["mesh_type"], ) - subdirs = ["data", "plot", "log", "mesh", "mesh_fine", - "plot_compare", "train", "test", "val" - ] + subdirs = [ + "data", + "plot", + "log", + "mesh", + "mesh_fine", + "plot_compare", + "train", + "test", + "val", + ] - dirs = setup_directories(problem = parameters["problem"], - mesh_type = parameters["mesh_type"], - base_dir = None, - subdirs = subdirs, - dir_format = problem_specific_dir) + directories = setup_directories( + problem=parameters["problem"], + mesh_type=parameters["mesh_type"], + base_dir=None, + subdirs=subdirs, + dir_format=problem_specific_dir, + ) # ==== Output CSV ====================== - key_list = [ - "cmin","cmax", "data_type", "scheme", "lc", "mesh_type" - ] - output_csv(parameters, key_list, dirs["data"]) + key_list = ["cmin", "cmax", "data_type", "scheme", "lc", "mesh_type"] + output_csv(parameters, key_list, directories["log"]) # ==== Data Generation Scripts ====================== @@ -519,12 +601,11 @@ def sample_from_loop( try: # QC: print(f"Case {idx} building ...") - mesh, mesh_new, mesh_fine = generate_mesh(parameters, dirs) + mesh, mesh_new, mesh_fine = generate_mesh(parameters, directories) # Generate Random solution field gaussian_list, nu = get_sample_param_of_nu_generalization_by_idx_train(idx) # noqa solver = UM2N.BurgersSolver( - mesh, mesh_fine, mesh_new, - gauss_list=gaussian_list, nu=nu, idx=idx + mesh, mesh_fine, mesh_new, gauss_list=gaussian_list, nu=nu, idx=idx ) solver.solve_problem(sample_from_loop) print() diff --git a/script/build_helmholtz_poly.py b/script/build_helmholtz_poly.py index 870820d..c7bddb8 100644 --- a/script/build_helmholtz_poly.py +++ b/script/build_helmholtz_poly.py @@ -10,27 +10,40 @@ import firedrake as fd import matplotlib.pyplot as plt import numpy as np + # import pandas as pd from firedrake.__future__ import interpolate -# dd the parent directory to the Python path -import sys -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import UM2N + def parse_arguments(): """Parse command-line arguments.""" parser = ArgumentParser(description="Build Burgers dataset with square meshes.") - parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.") - parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions.") - parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions.") - parser.add_argument("--lc", type=float, default=6e-2, help="Length characteristic of mesh elements.") - parser.add_argument("--field_type", type=str, default="iso", help="Data type (aniso/iso).") + parser.add_argument( + "--mesh_type", type=int, default=2, help="Algorithm used to generate mesh." + ) + parser.add_argument( + "--max_dist", type=int, default=6, help="Max number of distributions." + ) + parser.add_argument( + "--n_dist", type=int, default=None, help="Number of distributions." + ) + parser.add_argument( + "--lc", type=float, default=6e-2, help="Length characteristic of mesh elements." + ) + parser.add_argument( + "--field_type", type=str, default="iso", help="Data type (aniso/iso)." + ) # use padded scheme or full-scale scheme to sample central point of the bump # noqa - parser.add_argument("--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full).") - parser.add_argument("--n_samples", type=int, default=100, help="Number of samples generated") + parser.add_argument( + "--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full)." + ) + parser.add_argument( + "--n_samples", type=int, default=100, help="Number of samples generated" + ) parser.add_argument("--rand_seed", type=int, default=63, help="Random seed") - + parsed_args = parser.parse_args() # Handle dependency between max_dist and n_dist @@ -40,11 +53,12 @@ def parse_arguments(): parsed_args.max_dist = None # Disable max_dist if n_dist is set print("Warning: max_dist is ignored because n_dist is set.") # QC: - print(parsed_args) - + # print(parsed_args) + return parser.parse_args() -def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None): + +def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None): """ Set up directories for storing data, plots, and logs. @@ -73,16 +87,20 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form project_dir = os.path.abspath(base_dir) else: project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - + # QC: print(f"Project Directory: {project_dir}") # Define the dataset directory - dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem) + dataset_dir = os.path.join( + project_dir, "data", f"dataset_meshtype_{mesh_type}", problem + ) # Use the provided format string for the problem-specific directory if dir_format is None: - problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}") + problem_specific_dir = os.path.join( + dataset_dir, f"{problem}_meshtype_{mesh_type}" + ) else: # check if dir_format is a valid string format if not isinstance(dir_format, str): @@ -91,8 +109,17 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form # Define default subdirectories if not provided if subdirs is None: - subdirs = ["data", "plot", "log", "mesh", "mesh_fine", - "plot_compare", "train", "test", "val"] + subdirs = [ + "data", + "plot", + "log", + "mesh", + "mesh_fine", + "plot_compare", + "train", + "test", + "val", + ] # Create and clear directories directories = {} @@ -107,10 +134,11 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form directories[subdir] = dir_path # QC: - print(f"Subdirectories created: {directories}") + # print(f"Subdirectories created: {directories}") return directories + def output_csv(parameters, key_list, output_dir): """ Write selected parameters to a CSV file. @@ -135,51 +163,73 @@ def output_csv(parameters, key_list, output_dir): # Write data (values) csv_writer.writerow(csv_data) -def move_data(target, source, start, num_files): + +def split_data( + source_dir, + train_dir, + test_dir, + val_dir, + train_ratio=0.75, + test_ratio=0.15, + val_ratio=0.1, +): """ - Move data files from the source directory to the target directory. + Split files in a source directory into train, test, and validation directories. Args: - target (str): The path to the target directory. - source (str): The path to the source directory. - start (int): The starting index of the files to move. - num_files (int): The total number of files to move. + source_dir (str): Path to the source directory containing files. + train_dir (str): Path to the train directory. + test_dir (str): Path to the test directory. + val_dir (str): Path to the validation directory. + train_ratio (float): Proportion of files to allocate to the train set. + test_ratio (float): Proportion of files to allocate to the test set. + val_ratio (float): Proportion of files to allocate to the validation set. Raises: - FileNotFoundError: If the source directory does not exist. - ValueError: If the start index or num_files is invalid. + ValueError: If the sum of train_ratio, test_ratio, and val_ratio is not 1. """ - if not os.path.exists(source): - raise FileNotFoundError(f"Source directory '{source}' does not exist.") - - if start < 0 or num_files <= 0: - raise ValueError("Invalid start index or number of files to move.") + # Validate ratios + if not (0 <= train_ratio <= 1 and 0 <= test_ratio <= 1 and 0 <= val_ratio <= 1): + raise ValueError("Ratios must be between 0 and 1.") + if train_ratio + test_ratio + val_ratio != 1: + raise ValueError( + "The sum of train_ratio, test_ratio, and val_ratio must equal 1." + ) - # Create the target directory if it doesn't exist - if not os.path.exists(target): - os.makedirs(target) - else: - # Clear the target directory by removing all files - for file in os.listdir(target): - os.remove(os.path.join(target, file)) + # Get all files in the source directory + files = [ + f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f)) + ] + random.shuffle(files) # Shuffle files for unbiased distribution - # Copy files sequentially starting from the specified index - for i in range(start, start + num_files): - try: - # Copy the data file + # QC: + # print(f'files {files}') + + # Calculate split indices - preference train > test > val + total_files = len(files) + num_train = int(total_files * train_ratio) + num_test = max(int(total_files * test_ratio), total_files - num_train) + num_val = total_files - num_train - num_test + + # Distribute files + train_files = files[:num_train] + test_files = files[num_train : num_train + num_test] + val_files = files[num_train + num_test :] + + for datafiles, target_dir in zip( + [train_files, test_files, val_files], [train_dir, test_dir, val_dir] + ): + for datafile in datafiles: shutil.copy( - os.path.join(source, f"data_{i:04d}.npy"), - os.path.join(target, f"data_{i:04d}.npy"), + os.path.join(source_dir, datafile), os.path.join(target_dir, datafile) ) - except FileNotFoundError: - print(f"File data_{i:04d}.npy not found in {source}. Skipping.") - continue - except Exception as e: - print(f"An error occurred while copying data_{i:04d}.npy: {e}") - continue -def process_features(parameters, dirs): + print( + f"Data split complete: {num_train} train, {num_test} test, {num_val} validation files." + ) + +def process_features(parameters, directories): mesh_type = parameters["mesh_type"] scale_x = parameters["scale_x"] lc = parameters["lc"] @@ -189,14 +239,13 @@ def process_features(parameters, dirs): scale=scale_x, mesh_type=mesh_type ) # noqa mesh = rand_poly_mesh_gen.generate_mesh( - res=lc, output_filename=os.path.join(dirs["mesh"], f"mesh{i}.msh") + res=lc, output_filename=os.path.join(directories["mesh"], f"mesh{i}.msh") ) num_boundary = rand_poly_mesh_gen.num_boundary # Generate Random solution field rand_u_generator = UM2N.RandSourceGenerator( - use_iso= parameters["data_type"] == "iso", - dist_params= parameters + use_iso=parameters["data_type"] == "iso", dist_params=parameters ) # generate equation @@ -243,10 +292,9 @@ def process_features(parameters, dirs): mesh_gen = UM2N.MeshGenerator(params={"eq": helmholtz_eq, "mesh": mesh}) monitor_val = mesh_gen.monitor_func(mesh) hessian = mesh_gen.get_hessian(mesh) - hessian_norm = fd.project(mesh_gen.get_hessian_norm(mesh), - fd.FunctionSpace(mesh, "CG", 1) - ) - + hessian_norm = fd.project( + mesh_gen.get_hessian_norm(mesh), fd.FunctionSpace(mesh, "CG", 1) + ) # move the mesh? start = time.perf_counter() @@ -286,7 +334,7 @@ def process_features(parameters, dirs): feature={ "uh": uh.dat.data_ro.reshape(-1, 1), "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2), - "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), #ej321 - added + "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), # ej321 - added "hessian": hessian.dat.data_ro.reshape(-1, 4), "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), "jacobian": jacobian.dat.data_ro.reshape(-1, 4), @@ -294,13 +342,13 @@ def process_features(parameters, dirs): "phi": phi.dat.data_ro.reshape(-1, 1), "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2), "f": f_rhs.dat.data_ro.reshape(-1, 1), - "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), # ej321 - added + "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), # ej321 - added }, raw_feature={ "uh": uh, "hessian_norm": hessian_norm, - "monitor_val": monitor_val, # ej321 - added - "grad_uh_norm": grad_norm, # ej321 - added needed for poly only + "monitor_val": monitor_val, # ej321 - added + "grad_uh_norm": grad_norm, # ej321 - added needed for poly only "jacobian": jacobian, "jacobian_det": jacobian_det, }, @@ -309,16 +357,16 @@ def process_features(parameters, dirs): ) # save out data - mesh_processor.save_taining_data( - os.path.join(dirs["data"], "data_{}".format(i)) - ) + mesh_processor.save_taining_data(os.path.join(directories["data"], f"data_{i:04d}")) # ==== Plot Scripts ====================== fig = plt.figure(figsize=(15, 10)) ax1 = fig.add_subplot(2, 3, 1, projection="3d") # Plot the exact solution ax1.set_title("Exact Solution") - fd.trisurf(fd.interpolate(res["u_exact"], res["function_space"]), axes=ax1) + fd.trisurf( + fd.assemble(interpolate(res["u_exact"], res["function_space"])), axes=ax1 + ) # Plot the solved solution ax2 = fig.add_subplot(2, 3, 2, projection="3d") ax2.set_title("FEM Solution") @@ -343,21 +391,20 @@ def process_features(parameters, dirs): fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6) fd.triplot(new_mesh, axes=ax6) - fig.savefig(os.path.join(dirs["plot"], "plot_{}.png".format(i))) - + fig.savefig(os.path.join(directories["plot"], "plot_{}.png".format(i))) # ==== Log File ============================================ high_res_mesh = rand_poly_mesh_gen.generate_mesh( res=1e-2, - output_filename=os.path.join(dirs["mesh_fine"], f"mesh{i}.msh"), + output_filename=os.path.join(directories["mesh_fine"], f"mesh{i}.msh"), ) high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1) res_high_res = helmholtz_eq.discretise(high_res_mesh) - u_exact = fd.assemble(interpolate(res_high_res["u_exact"], - res_high_res["function_space"]) - ) + u_exact = fd.assemble( + interpolate(res_high_res["u_exact"], res_high_res["function_space"]) + ) uh_proj = fd.project(uh, high_res_function_space) uh_new_proj = fd.project(uh_new, high_res_function_space) @@ -376,7 +423,9 @@ def process_features(parameters, dirs): # df.to_csv(os.path.join(problem_log_dir, "log{}.csv".format(i))) # Write to CSV - with open(os.path.join(dirs["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile: + with open( + os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="" + ) as csvfile: csv_writer = csv.writer(csvfile) # Write header (keys) csv_writer.writerow(["error_og", "error_adapt", "time"]) @@ -385,12 +434,10 @@ def process_features(parameters, dirs): print("error og/optimal:", error_original_mesh, error_optimal_mesh) - if __name__ == "__main__": - # parse args args = parse_arguments() - + # ==== Parameters ====================== parameters = { # parameters for problem @@ -438,42 +485,54 @@ def process_features(parameters, dirs): np.random.seed(args.rand_seed) # ==== Setup Directories ====================== - problem_specific_dir = "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format( - parameters["z_min"], parameters["z_max"], - parameters["n_dist"],parameters["max_dist"], - parameters["lc"], parameters["n_samples"], - parameters["data_type"], parameters["scheme"], parameters["mesh_type"] + problem_specific_dir = ( + "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format( + parameters["z_min"], + parameters["z_max"], + parameters["n_dist"], + parameters["max_dist"], + parameters["lc"], + parameters["n_samples"], + parameters["data_type"], + parameters["scheme"], + parameters["mesh_type"], ) + ) subdirs = [ - "data", "plot", "plot_compare", "log", "mesh", "mesh_fine", - "train", "test", "val", + "data", + "plot", + "plot_compare", + "log", + "mesh", + "mesh_fine", + "train", + "test", + "val", ] - dirs = setup_directories(problem = parameters["problem"], - mesh_type = parameters["mesh_type"], - base_dir = None, - subdirs = subdirs, - dir_format = problem_specific_dir) - + directories = setup_directories( + problem=parameters["problem"], + mesh_type=parameters["mesh_type"], + base_dir=None, + subdirs=subdirs, + dir_format=problem_specific_dir, + ) # ==== Output CSV ====================== - key_list = [ - "cmin","cmax", - "data_type", "scheme", "n_samples", "lc", "mesh_type" - ] - output_csv(parameters, key_list, dirs["data"]) + key_list = ["cmin", "cmax", "data_type", "scheme", "n_samples", "lc", "mesh_type"] + output_csv(parameters, key_list, directories["log"]) # ==== Data Generation Scripts ====================== - # QC: + # QC: print("In build_dataset.py") # i = 0 # while i < n_samples: for i in range(parameters["n_samples"]): try: print("Generating Sample: " + str(i)) - - process_features(parameters, dirs) + + process_features(parameters, directories) # i += 1 except fd.exceptions.ConvergenceError: print(f"Iteration {i} did not converge.") @@ -484,10 +543,13 @@ def process_features(parameters, dirs): pass # ==== Data Splits ============================================ - num_train = int(parameters["n_samples"] * parameters["p_train"]) - num_test = int(parameters["n_samples"] * parameters["p_test"]) - num_val = parameters["n_samples"] - num_train - num_test - - move_data(dirs["train"], dirs["data"], 0, num_train) - move_data(dirs["test"], dirs["data"], num_train, num_train + num_test) - move_data(dirs["val"], dirs["data"], num_train + num_test, num_train + num_test + num_val) + # TODO: this should probably be done in the training script, not the build script + split_data( + source_dir=directories["data"], + train_dir=directories["train"], + test_dir=directories["test"], + val_dir=directories["val"], + train_ratio=parameters["p_train"], + test_ratio=parameters["p_test"], + val_ratio=parameters["p_val"], + ) diff --git a/script/build_helmholtz_square.py b/script/build_helmholtz_square.py index c66d46f..ca85846 100644 --- a/script/build_helmholtz_square.py +++ b/script/build_helmholtz_square.py @@ -12,24 +12,34 @@ import matplotlib.pyplot as plt from firedrake.__future__ import interpolate - -# dd the parent directory to the Python path -import sys -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import UM2N def parse_arguments(): """Parse command-line arguments.""" parser = ArgumentParser() - parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh") - parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions") - parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions") - parser.add_argument("--lc", type=float, default=5e-2, help="Length characteristic of mesh elements") - parser.add_argument("--field_type", type=str, default="aniso", help="Data type (aniso/iso)") + parser.add_argument( + "--mesh_type", type=int, default=2, help="Algorithm used to generate mesh" + ) + parser.add_argument( + "--max_dist", type=int, default=6, help="Max number of distributions" + ) + parser.add_argument( + "--n_dist", type=int, default=None, help="Number of distributions" + ) + parser.add_argument( + "--lc", type=float, default=5e-2, help="Length characteristic of mesh elements" + ) + parser.add_argument( + "--field_type", type=str, default="aniso", help="Data type (aniso/iso)" + ) # use padded scheme or full-scale scheme to sample central point of the bump # noqa - parser.add_argument("--boundary_scheme", type=str, default="full", help="Boundary scheme (pad/full)") - parser.add_argument("--n_samples", type=int, default=100, help="Number of samples generated") + parser.add_argument( + "--boundary_scheme", type=str, default="full", help="Boundary scheme (pad/full)" + ) + parser.add_argument( + "--n_samples", type=int, default=100, help="Number of samples generated" + ) parser.add_argument("--rand_seed", type=int, default=63, help="Random seed") parsed_args = parser.parse_args() @@ -41,12 +51,12 @@ def parse_arguments(): parsed_args.max_dist = None # Disable max_dist if n_dist is set print("Warning: max_dist is ignored because n_dist is set.") # QC: - print(parsed_args) + # print(parsed_args) return parsed_args -def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None): +def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None): """ Set up directories for storing data, plots, and logs. @@ -75,16 +85,20 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form project_dir = os.path.abspath(base_dir) else: project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - + # QC: print(f"Project Directory: {project_dir}") # Define the dataset directory - dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem) + dataset_dir = os.path.join( + project_dir, "data", f"dataset_meshtype_{mesh_type}", problem + ) # Use the provided format string for the problem-specific directory if dir_format is None: - problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}") + problem_specific_dir = os.path.join( + dataset_dir, f"{problem}_meshtype_{mesh_type}" + ) else: # check if dir_format is a valid string format if not isinstance(dir_format, str): @@ -93,8 +107,17 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form # Define default subdirectories if not provided if subdirs is None: - subdirs = ["data", "plot", "log", "mesh", "mesh_fine", - "plot_compare", "train", "test", "val"] + subdirs = [ + "data", + "plot", + "log", + "mesh", + "mesh_fine", + "plot_compare", + "train", + "test", + "val", + ] # Create and clear directories directories = {} @@ -109,52 +132,10 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form directories[subdir] = dir_path # QC: - print(f"Subdirectories created: {directories}") + # print(f"Subdirectories created: {directories}") return directories -def move_data(target, source, start, num_files): - """ - Move data files from the source directory to the target directory. - - Args: - target (str): The path to the target directory. - source (str): The path to the source directory. - start (int): The starting index of the files to move. - num_files (int): The total number of files to move. - - Raises: - FileNotFoundError: If the source directory does not exist. - ValueError: If the start index or num_files is invalid. - """ - if not os.path.exists(source): - raise FileNotFoundError(f"Source directory '{source}' does not exist.") - - if start < 0 or num_files <= 0: - raise ValueError("Invalid start index or number of files to move.") - - # Create the target directory if it doesn't exist - if not os.path.exists(target): - os.makedirs(target) - else: - # Clear the target directory by removing all files - for file in os.listdir(target): - os.remove(os.path.join(target, file)) - - # Copy files sequentially starting from the specified index - for i in range(start, start + num_files): - try: - # Copy the data file - shutil.copy( - os.path.join(source, f"data_{i:04d}.npy"), - os.path.join(target, f"data_{i:04d}.npy"), - ) - except FileNotFoundError: - print(f"File data_{i:04d}.npy not found in {source}. Skipping.") - continue - except Exception as e: - print(f"An error occurred while copying data_{i:04d}.npy: {e}") - continue def create_mesh(i, mesh_type, lc, scale_x, problem_mesh_dir): """ @@ -184,16 +165,17 @@ def create_mesh(i, mesh_type, lc, scale_x, problem_mesh_dir): def process_features(parameters, directories): - # create mesh mesh = create_mesh( - i, mesh_type = parameters["mesh_type"], lc = parameters["lc"], - scale_x = parameters["scale_x"], problem_mesh_dir = directories["mesh"] + i, + mesh_type=parameters["mesh_type"], + lc=parameters["lc"], + scale_x=parameters["scale_x"], + problem_mesh_dir=directories["mesh"], ) # Generate Random solution field rand_u_generator = UM2N.RandSourceGenerator( - use_iso= parameters["data_type"] == "iso", - dist_params = parameters + use_iso=parameters["data_type"] == "iso", dist_params=parameters ) # generate equation @@ -214,9 +196,9 @@ def process_features(parameters, directories): # original solution field uh = solver.solve_eq() - grad_uh_interpolate = fd.assemble(interpolate(fd.grad(uh), - fd.VectorFunctionSpace(mesh, "CG", 1) - )) + grad_uh_interpolate = fd.assemble( + interpolate(fd.grad(uh), fd.VectorFunctionSpace(mesh, "CG", 1)) + ) grad_norm = fd.Function(res["function_space"]) grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2) grad_norm /= grad_norm.vector().max() @@ -225,15 +207,14 @@ def process_features(parameters, directories): # RHS of helmholtz problem f_rhs = fd.assemble(interpolate(helmholtz_eq.f, helmholtz_eq.function_space)) - # generate mesh? mesh_gen = UM2N.MeshGenerator(params={"eq": helmholtz_eq, "mesh": mesh}) monitor_val = mesh_gen.monitor_func(mesh) hessian = mesh_gen.get_hessian(mesh) - hessian_norm = fd.project(mesh_gen.get_hessian_norm(mesh), - fd.FunctionSpace(mesh, "CG", 1) - ) + hessian_norm = fd.project( + mesh_gen.get_hessian_norm(mesh), fd.FunctionSpace(mesh, "CG", 1) + ) # move the mesh? start = time.perf_counter() @@ -293,20 +274,21 @@ def process_features(parameters, directories): ) # save out data - mesh_processor.save_taining_data( - os.path.join(directories["data"], f"data_{i:04d}") - ) + mesh_processor.save_taining_data(os.path.join(directories["data"], f"data_{i:04d}")) # ==== Log File ============================================ high_res_mesh = create_mesh( - i, mesh_type = parameters["mesh_type"], lc = 1e-2, - scale_x = parameters["scale_x"], problem_mesh_dir = directories["mesh_fine"] + i, + mesh_type=parameters["mesh_type"], + lc=1e-2, + scale_x=parameters["scale_x"], + problem_mesh_dir=directories["mesh_fine"], ) res_high_res = helmholtz_eq.discretise(high_res_mesh) - u_exact = fd.assemble(interpolate(res_high_res["u_exact"], - res_high_res["function_space"]) - ) + u_exact = fd.assemble( + interpolate(res_high_res["u_exact"], res_high_res["function_space"]) + ) uh_proj = fd.project(uh, fd.FunctionSpace(high_res_mesh, "CG", 1)) uh_new_proj = fd.project(uh_new, fd.FunctionSpace(high_res_mesh, "CG", 1)) @@ -315,7 +297,9 @@ def process_features(parameters, directories): error_optimal_mesh = fd.errornorm(u_exact, uh_new_proj) # Write to CSV - with open(os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile: + with open( + os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="" + ) as csvfile: csv_writer = csv.writer(csvfile) # Write header (keys) csv_writer.writerow(["error_og", "error_adapt", "time"]) @@ -410,6 +394,7 @@ def process_features(parameters, directories): fig.savefig(os.path.join(directories["plot_compare"], f"plot_{i:04d}.png")) plt.close() + def output_csv(parameters, key_list, output_dir): """ Write selected parameters to a CSV file. @@ -436,11 +421,76 @@ def output_csv(parameters, key_list, output_dir): print(f"Parameters saved to {csv_file_path}") -if __name__ == "__main__": +def split_data( + source_dir, + train_dir, + test_dir, + val_dir, + train_ratio=0.75, + test_ratio=0.15, + val_ratio=0.1, +): + """ + Split files in a source directory into train, test, and validation directories. + + Args: + source_dir (str): Path to the source directory containing files. + train_dir (str): Path to the train directory. + test_dir (str): Path to the test directory. + val_dir (str): Path to the validation directory. + train_ratio (float): Proportion of files to allocate to the train set. + test_ratio (float): Proportion of files to allocate to the test set. + val_ratio (float): Proportion of files to allocate to the validation set. + + Raises: + ValueError: If the sum of train_ratio, test_ratio, and val_ratio is not 1. + """ + # Validate ratios + if not (0 <= train_ratio <= 1 and 0 <= test_ratio <= 1 and 0 <= val_ratio <= 1): + raise ValueError("Ratios must be between 0 and 1.") + if train_ratio + test_ratio + val_ratio != 1: + raise ValueError( + "The sum of train_ratio, test_ratio, and val_ratio must equal 1." + ) + + # Get all files in the source directory + files = [ + f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f)) + ] + random.shuffle(files) # Shuffle files for unbiased distribution + + # QC: + # print(f'files {files}') + + # Calculate split indices - preference train > test > val + total_files = len(files) + num_train = int(total_files * train_ratio) + num_test = max(int(total_files * test_ratio), total_files - num_train) + num_val = total_files - num_train - num_test + + # Distribute files + train_files = files[:num_train] + test_files = files[num_train : num_train + num_test] + val_files = files[num_train + num_test :] + + for datafiles, target_dir in zip( + [train_files, test_files, val_files], [train_dir, test_dir, val_dir] + ): + for datafile in datafiles: + shutil.copy( + os.path.join(source_dir, datafile), os.path.join(target_dir, datafile) + ) + + print( + f"Data split complete: {num_train} train, {num_test} test, {num_val} validation files." + ) + + +if __name__ == "__main__": # parse args args = parse_arguments() - + # ==== Parameters ====================== parameters = { # parameters for problem @@ -450,7 +500,7 @@ def output_csv(parameters, key_list, output_dir): "n_dist": args.n_dist, "max_dist": args.max_dist, "lc": args.lc, - # "n_grig": args.n_grid, # burgers problem only + # "n_grid": args.n_grid, # burgers problem only # parameters for ?????? "n_samples": args.n_samples, "data_type": args.field_type, @@ -487,31 +537,53 @@ def output_csv(parameters, key_list, output_dir): random.seed(args.rand_seed) # ==== Setup Directories ====================== - problem_specific_dir = "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format( - parameters["z_min"], parameters["z_max"], - parameters["n_dist"],parameters["max_dist"], - parameters["lc"], parameters["n_samples"], - parameters["data_type"], parameters["scheme"], parameters["mesh_type"] + problem_specific_dir = ( + "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format( + parameters["z_min"], + parameters["z_max"], + parameters["n_dist"], + parameters["max_dist"], + parameters["lc"], + parameters["n_samples"], + parameters["data_type"], + parameters["scheme"], + parameters["mesh_type"], ) + ) subdirs = [ - "data", "plot", "plot_compare", "log", "mesh", "mesh_fine", - "train", "test", "val", + "data", + "plot", + "plot_compare", + "log", + "mesh", + "mesh_fine", + "train", + "test", + "val", ] - directories = setup_directories(problem = parameters["problem"], - mesh_type = parameters["mesh_type"], - base_dir = None, - subdirs = subdirs, - dir_format = problem_specific_dir) - + directories = setup_directories( + problem=parameters["problem"], + mesh_type=parameters["mesh_type"], + base_dir=None, + subdirs=subdirs, + dir_format=problem_specific_dir, + ) # ==== Output CSV ====================== key_list = [ - "cmin","cmax", "sigma_mean_scaler", "sigma_sigma_scaler", "sigma_eps" - "data_type", "scheme", "n_samples", "lc", "mesh_type" + "cmin", + "cmax", + "sigma_mean_scaler", + "sigma_sigma_scaler", + "sigma_eps" "data_type", + "scheme", + "n_samples", + "lc", + "mesh_type", ] - output_csv(parameters, key_list, directories["data"]) + output_csv(parameters, key_list, directories["log"]) # ==== Data Generation Scripts ====================== for i in range(parameters["n_samples"]): @@ -526,10 +598,13 @@ def output_csv(parameters, key_list, output_dir): continue # ==== Data Splits ============================================ - num_train = int(parameters["n_samples"] * parameters["p_train"]) - num_test = int(parameters["n_samples"] * parameters["p_test"]) - num_val = parameters["n_samples"] - num_train - num_test - - move_data(directories["train"], directories["data"], 0, num_train) - move_data(directories["test"], directories["data"], num_train, num_train + num_test) - move_data(directories["val"], directories["data"], num_train + num_test, num_train + num_test + num_val) + # TODO: this should probably be done in the training script, not the build script + split_data( + source_dir=directories["data"], + train_dir=directories["train"], + test_dir=directories["test"], + val_dir=directories["val"], + train_ratio=parameters["p_train"], + test_ratio=parameters["p_test"], + val_ratio=parameters["p_val"], + ) diff --git a/script/build_poisson_poly.py b/script/build_poisson_poly.py index 929fad8..7f6403c 100644 --- a/script/build_poisson_poly.py +++ b/script/build_poisson_poly.py @@ -9,28 +9,38 @@ import firedrake as fd import matplotlib.pyplot as plt -import numpy as np -# import pandas as pd from firedrake.__future__ import interpolate -# dd the parent directory to the Python path -import sys -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import UM2N + def parse_arguments(): """Parse command-line arguments.""" parser = ArgumentParser(description="Build Burgers dataset with square meshes.") - parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.") - parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions.") - parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions.") - parser.add_argument("--lc", type=float, default=6e-2, help="Length characteristic of mesh elements.") - parser.add_argument("--field_type", type=str, default="iso", help="Data type (aniso/iso).") + parser.add_argument( + "--mesh_type", type=int, default=2, help="Algorithm used to generate mesh." + ) + parser.add_argument( + "--max_dist", type=int, default=6, help="Max number of distributions." + ) + parser.add_argument( + "--n_dist", type=int, default=None, help="Number of distributions." + ) + parser.add_argument( + "--lc", type=float, default=6e-2, help="Length characteristic of mesh elements." + ) + parser.add_argument( + "--field_type", type=str, default="iso", help="Data type (aniso/iso)." + ) # use padded scheme or full-scale scheme to sample central point of the bump # noqa - parser.add_argument("--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full).") - parser.add_argument("--n_samples", type=int, default=100, help="Number of samples generated") + parser.add_argument( + "--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full)." + ) + parser.add_argument( + "--n_samples", type=int, default=100, help="Number of samples generated" + ) parser.add_argument("--rand_seed", type=int, default=63, help="Random seed") - + parsed_args = parser.parse_args() # Handle dependency between max_dist and n_dist @@ -40,12 +50,12 @@ def parse_arguments(): parsed_args.max_dist = None # Disable max_dist if n_dist is set print("Warning: max_dist is ignored because n_dist is set.") # QC: - print(parsed_args) - + # print(parsed_args) + return parser.parse_args() -def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None): +def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None): """ Set up directories for storing data, plots, and logs. @@ -74,16 +84,20 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form project_dir = os.path.abspath(base_dir) else: project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - + # QC: print(f"Project Directory: {project_dir}") # Define the dataset directory - dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem) + dataset_dir = os.path.join( + project_dir, "data", f"dataset_meshtype_{mesh_type}", problem + ) # Use the provided format string for the problem-specific directory if dir_format is None: - problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}") + problem_specific_dir = os.path.join( + dataset_dir, f"{problem}_meshtype_{mesh_type}" + ) else: # check if dir_format is a valid string format if not isinstance(dir_format, str): @@ -92,8 +106,17 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form # Define default subdirectories if not provided if subdirs is None: - subdirs = ["data", "plot", "log", "mesh", "mesh_fine", - "plot_compare", "train", "test", "val"] + subdirs = [ + "data", + "plot", + "log", + "mesh", + "mesh_fine", + "plot_compare", + "train", + "test", + "val", + ] # Create and clear directories directories = {} @@ -108,10 +131,11 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form directories[subdir] = dir_path # QC: - print(f"Subdirectories created: {directories}") + # print(f"Subdirectories created: {directories}") return directories + def output_csv(parameters, key_list, output_dir): """ Write selected parameters to a CSV file. @@ -135,52 +159,74 @@ def output_csv(parameters, key_list, output_dir): csv_writer.writerow(csv_keys) # Write data (values) csv_writer.writerow(csv_data) - -def move_data(target, source, start, num_files): + + +def split_data( + source_dir, + train_dir, + test_dir, + val_dir, + train_ratio=0.75, + test_ratio=0.15, + val_ratio=0.1, +): """ - Move data files from the source directory to the target directory. + Split files in a source directory into train, test, and validation directories. Args: - target (str): The path to the target directory. - source (str): The path to the source directory. - start (int): The starting index of the files to move. - num_files (int): The total number of files to move. + source_dir (str): Path to the source directory containing files. + train_dir (str): Path to the train directory. + test_dir (str): Path to the test directory. + val_dir (str): Path to the validation directory. + train_ratio (float): Proportion of files to allocate to the train set. + test_ratio (float): Proportion of files to allocate to the test set. + val_ratio (float): Proportion of files to allocate to the validation set. Raises: - FileNotFoundError: If the source directory does not exist. - ValueError: If the start index or num_files is invalid. + ValueError: If the sum of train_ratio, test_ratio, and val_ratio is not 1. """ - if not os.path.exists(source): - raise FileNotFoundError(f"Source directory '{source}' does not exist.") - - if start < 0 or num_files <= 0: - raise ValueError("Invalid start index or number of files to move.") + # Validate ratios + if not (0 <= train_ratio <= 1 and 0 <= test_ratio <= 1 and 0 <= val_ratio <= 1): + raise ValueError("Ratios must be between 0 and 1.") + if train_ratio + test_ratio + val_ratio != 1: + raise ValueError( + "The sum of train_ratio, test_ratio, and val_ratio must equal 1." + ) - # Create the target directory if it doesn't exist - if not os.path.exists(target): - os.makedirs(target) - else: - # Clear the target directory by removing all files - for file in os.listdir(target): - os.remove(os.path.join(target, file)) + # Get all files in the source directory + files = [ + f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f)) + ] + random.shuffle(files) # Shuffle files for unbiased distribution - # Copy files sequentially starting from the specified index - for i in range(start, start + num_files): - try: - # Copy the data file + # QC: + # print(f'files {files}') + + # Calculate split indices - preference train > test > val + total_files = len(files) + num_train = int(total_files * train_ratio) + num_test = max(int(total_files * test_ratio), total_files - num_train) + num_val = total_files - num_train - num_test + + # Distribute files + train_files = files[:num_train] + test_files = files[num_train : num_train + num_test] + val_files = files[num_train + num_test :] + + for datafiles, target_dir in zip( + [train_files, test_files, val_files], [train_dir, test_dir, val_dir] + ): + for datafile in datafiles: shutil.copy( - os.path.join(source, f"data_{i:04d}.npy"), - os.path.join(target, f"data_{i:04d}.npy"), + os.path.join(source_dir, datafile), os.path.join(target_dir, datafile) ) - except FileNotFoundError: - print(f"File data_{i:04d}.npy not found in {source}. Skipping.") - continue - except Exception as e: - print(f"An error occurred while copying data_{i:04d}.npy: {e}") - continue -def process_features(parameters, dirs): + print( + f"Data split complete: {num_train} train, {num_test} test, {num_val} validation files." + ) + +def process_features(parameters, directories): # create mesh scale_x = parameters["scale_x"] mesh_type = parameters["mesh_type"] @@ -189,14 +235,13 @@ def process_features(parameters, dirs): scale=scale_x, mesh_type=mesh_type ) # noqa mesh = rand_poly_mesh_gen.generate_mesh( - res=lc, output_filename=os.path.join(dirs["mesh"], f"mesh{i}.msh") + res=lc, output_filename=os.path.join(directories["mesh"], f"mesh{i}.msh") ) num_boundary = rand_poly_mesh_gen.num_boundary # Generate Random solution field rand_u_generator = UM2N.RandSourceGenerator( - use_iso= parameters["data_type"] == "iso", - dist_params = parameters + use_iso=parameters["data_type"] == "iso", dist_params=parameters ) # generate equation @@ -220,9 +265,9 @@ def process_features(parameters, dirs): mesh_gen = UM2N.MeshGenerator(params={"eq": poisson_eq, "mesh": mesh}) monitor_val = mesh_gen.monitor_func(mesh) hessian = mesh_gen.get_hessian(mesh) - hessian_norm = fd.project(mesh_gen.get_hessian_norm(mesh), - fd.FunctionSpace(mesh, "CG", 1) - ) + hessian_norm = fd.project( + mesh_gen.get_hessian_norm(mesh), fd.FunctionSpace(mesh, "CG", 1) + ) # hessian = UM2N.MeshGenerator( # params={ # "eq": poisson_eq, @@ -247,7 +292,7 @@ def process_features(parameters, dirs): # hessian_norm = fd.project(hessian_norm, fd.FunctionSpace(mesh, "CG", 1)) func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1) - grad_uh_interpolate = fd.interpolate(fd.grad(uh), func_vec_space) + grad_uh_interpolate = fd.assemble(interpolate(fd.grad(uh), func_vec_space)) # ej321 - grad_norm copied from build_helmholtz_square.py grad_norm = fd.Function(res["function_space"]) @@ -263,7 +308,7 @@ def process_features(parameters, dirs): # ), # } # ) - + # move the mesh? start = time.perf_counter() new_mesh = mesh_gen.move_mesh() @@ -302,21 +347,20 @@ def process_features(parameters, dirs): feature={ "uh": uh.dat.data_ro.reshape(-1, 1), "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2), - "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), #ej321 - added + "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), # ej321 - added "hessian": hessian.dat.data_ro.reshape(-1, 4), "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), "jacobian": jacobian.dat.data_ro.reshape(-1, 4), "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1), "phi": phi.dat.data_ro.reshape(-1, 1), "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2), - "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), # ej321 - added - + "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), # ej321 - added }, raw_feature={ "uh": uh, "hessian_norm": hessian_norm, - "monitor_val": monitor_val, # ej321 - added - "grad_uh_norm": grad_norm, # ej321 - added needed for poly only + "monitor_val": monitor_val, # ej321 - added + "grad_uh_norm": grad_norm, # ej321 - added needed for poly only "jacobian": jacobian, "jacobian_det": jacobian_det, }, @@ -324,16 +368,16 @@ def process_features(parameters, dirs): poly_mesh=True, ) - mesh_processor.save_taining_data( - os.path.join(dirs["data"], "data_{}".format(i)) - ) + mesh_processor.save_taining_data(os.path.join(directories["data"], f"data_{i:04d}")) # ==== Plot Scripts ====================== fig = plt.figure(figsize=(15, 10)) ax1 = fig.add_subplot(2, 3, 1, projection="3d") # Plot the exact solution ax1.set_title("Exact Solution") - fd.trisurf(fd.interpolate(res["u_exact"], res["function_space"]), axes=ax1) + fd.trisurf( + fd.assemble(interpolate(res["u_exact"], res["function_space"])), axes=ax1 + ) # Plot the solved solution ax2 = fig.add_subplot(2, 3, 2, projection="3d") ax2.set_title("FEM Solution") @@ -358,19 +402,19 @@ def process_features(parameters, dirs): fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6) fd.triplot(new_mesh, axes=ax6) - fig.savefig(os.path.join(dirs["plot"], "plot_{}.png".format(i))) + fig.savefig(os.path.join(directories["plot"], "plot_{}.png".format(i))) # ==== Log File ============================================ high_res_mesh = rand_poly_mesh_gen.generate_mesh( res=1e-2, - output_filename=os.path.join(dirs["mesh_fine"], f"mesh{i}.msh"), + output_filename=os.path.join(directories["mesh_fine"], f"mesh{i}.msh"), ) high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1) res_high_res = poisson_eq.discretise(high_res_mesh) - u_exact = fd.interpolate( - res_high_res["u_exact"], res_high_res["function_space"] + u_exact = fd.assemble( + interpolate(res_high_res["u_exact"], res_high_res["function_space"]) ) uh_proj = fd.project(uh, high_res_function_space) @@ -379,8 +423,10 @@ def process_features(parameters, dirs): error_original_mesh = fd.errornorm(u_exact, uh_proj) error_optimal_mesh = fd.errornorm(u_exact, uh_new_proj) - # Write to CSV - with open(os.path.join(dirs["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile: + # Write to CSV + with open( + os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="" + ) as csvfile: csv_writer = csv.writer(csvfile) # Write header (keys) csv_writer.writerow(["error_og", "error_adapt", "time"]) @@ -389,8 +435,8 @@ def process_features(parameters, dirs): print("error og/optimal:", error_original_mesh, error_optimal_mesh) -if __name__ == "__main__": +if __name__ == "__main__": # parse args args = parse_arguments() @@ -440,28 +486,33 @@ def process_features(parameters, dirs): random.seed(args.rand_seed) # ==== Setup Directories ====================== - problem_specific_dir = "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format( - parameters["z_min"], parameters["z_max"], - parameters["n_dist"],parameters["max_dist"], - parameters["lc"], parameters["n_samples"], - parameters["data_type"], parameters["scheme"], parameters["mesh_type"] + problem_specific_dir = ( + "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format( + parameters["z_min"], + parameters["z_max"], + parameters["n_dist"], + parameters["max_dist"], + parameters["lc"], + parameters["n_samples"], + parameters["data_type"], + parameters["scheme"], + parameters["mesh_type"], ) + ) - subdirs = ["data", "plot", "log", "mesh", "mesh_fine", - "train", "test", "val" - ] + subdirs = ["data", "plot", "log", "mesh", "mesh_fine", "train", "test", "val"] - dirs = setup_directories(problem = parameters["problem"], - mesh_type = parameters["mesh_type"], - base_dir = None, - subdirs = subdirs, - dir_format = problem_specific_dir) + directories = setup_directories( + problem=parameters["problem"], + mesh_type=parameters["mesh_type"], + base_dir=None, + subdirs=subdirs, + dir_format=problem_specific_dir, + ) # ==== Output CSV ====================== - key_list = [ - "cmin","cmax", "data_type", "scheme", "n_samples", "lc", "mesh_type" - ] - output_csv(parameters, key_list, dirs["data"]) + key_list = ["cmin", "cmax", "data_type", "scheme", "n_samples", "lc", "mesh_type"] + output_csv(parameters, key_list, directories["log"]) # ==== Data Generation Scripts ====================== print("In build_dataset.py") @@ -470,7 +521,7 @@ def process_features(parameters, dirs): for i in range(parameters["n_samples"]): try: print("Generating Sample: " + str(i)) - process_features(parameters, dirs) + process_features(parameters, directories) # i += 1 except fd.exceptions.ConvergenceError: pass @@ -479,13 +530,14 @@ def process_features(parameters, dirs): except ValueError: pass - - # ==== Data Splits ============================================ - num_train = int(parameters["n_samples"] * parameters["p_train"]) - num_test = int(parameters["n_samples"] * parameters["p_test"]) - num_val = parameters["n_samples"] - num_train - num_test - - move_data(dirs["train"], dirs["data"], 0, num_train) - move_data(dirs["test"], dirs["data"], num_train, num_train + num_test) - move_data(dirs["val"], dirs["data"], num_train + num_test, num_train + num_test + num_val) + # TODO: this should probably be done in the training script, not the build script + split_data( + source_dir=directories["data"], + train_dir=directories["train"], + test_dir=directories["test"], + val_dir=directories["val"], + train_ratio=parameters["p_train"], + test_ratio=parameters["p_test"], + val_ratio=parameters["p_val"], + ) diff --git a/script/build_poisson_square.py b/script/build_poisson_square.py index 44b4f6e..5513e1b 100644 --- a/script/build_poisson_square.py +++ b/script/build_poisson_square.py @@ -9,27 +9,38 @@ import firedrake as fd import matplotlib.pyplot as plt -# import pandas as pd from firedrake.__future__ import interpolate -# dd the parent directory to the Python path -import sys -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import UM2N + def parse_arguments(): """Parse command-line arguments.""" parser = ArgumentParser(description="Build Burgers dataset with square meshes.") - parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.") - parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions.") - parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions.") - parser.add_argument("--lc", type=float, default=6e-2, help="Length characteristic of mesh elements.") - parser.add_argument("--field_type", type=str, default="iso", help="Data type (aniso/iso).") + parser.add_argument( + "--mesh_type", type=int, default=2, help="Algorithm used to generate mesh." + ) + parser.add_argument( + "--max_dist", type=int, default=6, help="Max number of distributions." + ) + parser.add_argument( + "--n_dist", type=int, default=None, help="Number of distributions." + ) + parser.add_argument( + "--lc", type=float, default=6e-2, help="Length characteristic of mesh elements." + ) + parser.add_argument( + "--field_type", type=str, default="iso", help="Data type (aniso/iso)." + ) # use padded scheme or full-scale scheme to sample central point of the bump # noqa - parser.add_argument("--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full).") - parser.add_argument("--n_samples", type=int, default=100, help="Number of samples generated") + parser.add_argument( + "--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full)." + ) + parser.add_argument( + "--n_samples", type=int, default=100, help="Number of samples generated" + ) parser.add_argument("--rand_seed", type=int, default=63, help="Random seed") - + parsed_args = parser.parse_args() # Handle dependency between max_dist and n_dist @@ -39,11 +50,12 @@ def parse_arguments(): parsed_args.max_dist = None # Disable max_dist if n_dist is set print("Warning: max_dist is ignored because n_dist is set.") # QC: - print(parsed_args) - + # print(parsed_args) + return parser.parse_args() -def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None): + +def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None): """ Set up directories for storing data, plots, and logs. @@ -72,16 +84,20 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form project_dir = os.path.abspath(base_dir) else: project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - + # QC: print(f"Project Directory: {project_dir}") # Define the dataset directory - dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem) + dataset_dir = os.path.join( + project_dir, "data", f"dataset_meshtype_{mesh_type}", problem + ) # Use the provided format string for the problem-specific directory if dir_format is None: - problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}") + problem_specific_dir = os.path.join( + dataset_dir, f"{problem}_meshtype_{mesh_type}" + ) else: # check if dir_format is a valid string format if not isinstance(dir_format, str): @@ -90,8 +106,17 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form # Define default subdirectories if not provided if subdirs is None: - subdirs = ["data", "plot", "log", "mesh", "mesh_fine", - "plot_compare", "train", "test", "val"] + subdirs = [ + "data", + "plot", + "log", + "mesh", + "mesh_fine", + "plot_compare", + "train", + "test", + "val", + ] # Create and clear directories directories = {} @@ -106,10 +131,11 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form directories[subdir] = dir_path # QC: - print(f"Subdirectories created: {directories}") + # print(f"Subdirectories created: {directories}") return directories + def output_csv(parameters, key_list, output_dir): """ Write selected parameters to a CSV file. @@ -134,51 +160,73 @@ def output_csv(parameters, key_list, output_dir): # Write data (values) csv_writer.writerow(csv_data) -def move_data(target, source, start, num_files): + +def split_data( + source_dir, + train_dir, + test_dir, + val_dir, + train_ratio=0.75, + test_ratio=0.15, + val_ratio=0.1, +): """ - Move data files from the source directory to the target directory. + Split files in a source directory into train, test, and validation directories. Args: - target (str): The path to the target directory. - source (str): The path to the source directory. - start (int): The starting index of the files to move. - num_files (int): The total number of files to move. + source_dir (str): Path to the source directory containing files. + train_dir (str): Path to the train directory. + test_dir (str): Path to the test directory. + val_dir (str): Path to the validation directory. + train_ratio (float): Proportion of files to allocate to the train set. + test_ratio (float): Proportion of files to allocate to the test set. + val_ratio (float): Proportion of files to allocate to the validation set. Raises: - FileNotFoundError: If the source directory does not exist. - ValueError: If the start index or num_files is invalid. + ValueError: If the sum of train_ratio, test_ratio, and val_ratio is not 1. """ - if not os.path.exists(source): - raise FileNotFoundError(f"Source directory '{source}' does not exist.") - - if start < 0 or num_files <= 0: - raise ValueError("Invalid start index or number of files to move.") + # Validate ratios + if not (0 <= train_ratio <= 1 and 0 <= test_ratio <= 1 and 0 <= val_ratio <= 1): + raise ValueError("Ratios must be between 0 and 1.") + if train_ratio + test_ratio + val_ratio != 1: + raise ValueError( + "The sum of train_ratio, test_ratio, and val_ratio must equal 1." + ) - # Create the target directory if it doesn't exist - if not os.path.exists(target): - os.makedirs(target) - else: - # Clear the target directory by removing all files - for file in os.listdir(target): - os.remove(os.path.join(target, file)) + # Get all files in the source directory + files = [ + f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f)) + ] + random.shuffle(files) # Shuffle files for unbiased distribution - # Copy files sequentially starting from the specified index - for i in range(start, start + num_files): - try: - # Copy the data file + # QC: + # print(f'files {files}') + + # Calculate split indices - preference train > test > val + total_files = len(files) + num_train = int(total_files * train_ratio) + num_test = max(int(total_files * test_ratio), total_files - num_train) + num_val = total_files - num_train - num_test + + # Distribute files + train_files = files[:num_train] + test_files = files[num_train : num_train + num_test] + val_files = files[num_train + num_test :] + + for datafiles, target_dir in zip( + [train_files, test_files, val_files], [train_dir, test_dir, val_dir] + ): + for datafile in datafiles: shutil.copy( - os.path.join(source, f"data_{i:04d}.npy"), - os.path.join(target, f"data_{i:04d}.npy"), + os.path.join(source_dir, datafile), os.path.join(target_dir, datafile) ) - except FileNotFoundError: - print(f"File data_{i:04d}.npy not found in {source}. Skipping.") - continue - except Exception as e: - print(f"An error occurred while copying data_{i:04d}.npy: {e}") - continue + + print( + f"Data split complete: {num_train} train, {num_test} test, {num_val} validation files." + ) + def process_features(parameters, problem_data_dir): - # create mesh scale_x = parameters["scale_x"] mesh_type = parameters["mesh_type"] @@ -187,12 +235,11 @@ def process_features(parameters, problem_data_dir): scale=scale_x, mesh_type=mesh_type ) # noqa mesh = unstructured_square_mesh_gen.generate_mesh( - res=lc, output_filename=os.path.join(directories["data"], f"mesh{i}.msh") + res=lc, output_filename=os.path.join(directories["mesh"], f"mesh{i}.msh") ) # Generate Random solution field rand_u_generator = UM2N.RandSourceGenerator( - use_iso= parameters["data_type"] == "iso", - dist_params = parameters + use_iso=parameters["data_type"] == "iso", dist_params=parameters ) # generate equation @@ -216,42 +263,18 @@ def process_features(parameters, problem_data_dir): mesh_gen = UM2N.MeshGenerator(params={"eq": poisson_eq, "mesh": mesh}) monitor_val = mesh_gen.monitor_func(mesh) hessian = mesh_gen.get_hessian(mesh) - hessian_norm = fd.project(mesh_gen.get_hessian_norm(mesh), - fd.FunctionSpace(mesh, "CG", 1) - ) - - # Generate Mesh - # hessian = UM2N.MeshGenerator( - # params={ - # "eq": poisson_eq, - # "mesh": fd.Mesh(os.path.join(problem_mesh_dir, f"mesh{i}.msh")), # noqa - # } - # ).get_hessian(mesh) - - # hessian_norm = UM2N.MeshGenerator( - # params={ - # "eq": poisson_eq, - # "mesh": fd.Mesh(os.path.join(problem_mesh_dir, f"mesh{i}.msh")), # noqa - # } - # ).monitor_func(mesh) - - # hessian_norm = fd.project(hessian_norm, fd.FunctionSpace(mesh, "CG", 1)) + hessian_norm = fd.project( + mesh_gen.get_hessian_norm(mesh), fd.FunctionSpace(mesh, "CG", 1) + ) func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1) - grad_uh_interpolate = fd.interpolate(fd.grad(uh), func_vec_space) + grad_uh_interpolate = fd.assemble(interpolate(fd.grad(uh), func_vec_space)) # ej321 - grad_norm copied from build_helmholtz_square.py grad_norm = fd.Function(res["function_space"]) grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2) grad_norm /= grad_norm.vector().max() - # mesh_gen = UM2N.MeshGenerator( - # params={ - # "eq": poisson_eq, - # "mesh": fd.Mesh(os.path.join(problem_mesh_dir, f"mesh{i}.msh")), # noqa - # } - # ) - start = time.perf_counter() new_mesh = mesh_gen.move_mesh() end = time.perf_counter() @@ -265,13 +288,7 @@ def process_features(parameters, problem_data_dir): # get phi/grad_phi projected to the original mesh phi = mesh_gen.get_phi() - # phi = fd.project( - # phi, fd.FunctionSpace(mesh, "CG", 1) - # ) grad_phi = mesh_gen.get_grad_phi() - # grad_phi = fd.project( - # grad_phi, fd.VectorFunctionSpace(mesh, "CG", 1) - # ) # solve the equation on the new mesh new_res = poisson_eq.discretise(new_mesh) @@ -294,35 +311,39 @@ def process_features(parameters, problem_data_dir): feature={ "uh": uh.dat.data_ro.reshape(-1, 1), "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2), - "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), # ej321 - added grad_norm + "grad_uh_norm": grad_norm.dat.data_ro.reshape( + -1, 1 + ), # ej321 - added grad_norm "hessian": hessian.dat.data_ro.reshape(-1, 4), "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), "jacobian": jacobian.dat.data_ro.reshape(-1, 4), "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1), "phi": phi.dat.data_ro.reshape(-1, 1), "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2), - "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), # ej321 - added monitor_val + "monitor_val": monitor_val.dat.data_ro.reshape( + -1, 1 + ), # ej321 - added monitor_val }, raw_feature={ "uh": uh, "hessian_norm": hessian_norm, - "monitor_val": monitor_val, # ej321 - added monitor_val + "monitor_val": monitor_val, # ej321 - added monitor_val "jacobian": jacobian, "jacobian_det": jacobian_det, }, dist_params=dist_params, ) - mesh_processor.save_taining_data( - os.path.join(directories["data"], "data_{}".format(i)) - ) + mesh_processor.save_taining_data(os.path.join(directories["data"], f"data_{i:04d}")) # ==== Plot Scripts ====================== fig = plt.figure(figsize=(15, 10)) ax1 = fig.add_subplot(2, 3, 1, projection="3d") # Plot the exact solution ax1.set_title("Exact Solution") - fd.trisurf(fd.interpolate(res["u_exact"], res["function_space"]), axes=ax1) + fd.trisurf( + fd.assemble(interpolate(res["u_exact"], res["function_space"])), axes=ax1 + ) # Plot the solved solution ax2 = fig.add_subplot(2, 3, 2, projection="3d") ax2.set_title("FEM Solution") @@ -358,8 +379,8 @@ def process_features(parameters, problem_data_dir): high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1) res_high_res = poisson_eq.discretise(high_res_mesh) - u_exact = fd.interpolate( - res_high_res["u_exact"], res_high_res["function_space"] + u_exact = fd.assemble( + interpolate(res_high_res["u_exact"], res_high_res["function_space"]) ) uh = fd.project(uh, high_res_function_space) @@ -369,21 +390,22 @@ def process_features(parameters, problem_data_dir): error_optimal_mesh = fd.errornorm(u_exact, uh_new) # Write to CSV - with open(os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile: + with open( + os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="" + ) as csvfile: csv_writer = csv.writer(csvfile) # Write header (keys) csv_writer.writerow(["error_og", "error_adapt", "time"]) # Write data (values) csv_writer.writerow([error_original_mesh, error_optimal_mesh, dur]) - + print("error og/optimal:", error_original_mesh, error_optimal_mesh) - -if __name__ == "__main__": +if __name__ == "__main__": # parse args args = parse_arguments() - + # ==== Parameters ====================== parameters = { # parameters for problem @@ -430,31 +452,42 @@ def process_features(parameters, problem_data_dir): random.seed(args.rand_seed) # ==== Setup Directories ====================== - problem_specific_dir = "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format( - parameters["z_min"], parameters["z_max"], - parameters["n_dist"],parameters["max_dist"], - parameters["lc"], parameters["n_samples"], - parameters["data_type"], parameters["scheme"], parameters["mesh_type"] + problem_specific_dir = ( + "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format( + parameters["z_min"], + parameters["z_max"], + parameters["n_dist"], + parameters["max_dist"], + parameters["lc"], + parameters["n_samples"], + parameters["data_type"], + parameters["scheme"], + parameters["mesh_type"], ) + ) subdirs = [ - "data", "plot", "log", "mesh", "mesh_fine", - "train", "test", "val", + "data", + "plot", + "log", + "mesh", + "mesh_fine", + "train", + "test", + "val", ] - directories = setup_directories(problem = parameters["problem"], - mesh_type = parameters["mesh_type"], - base_dir = None, - subdirs = subdirs, - dir_format = problem_specific_dir) - + directories = setup_directories( + problem=parameters["problem"], + mesh_type=parameters["mesh_type"], + base_dir=None, + subdirs=subdirs, + dir_format=problem_specific_dir, + ) # ==== Output CSV ====================== - key_list = [ - "cmin","cmax", - "data_type", "scheme", "n_samples", "lc", "mesh_type" - ] - output_csv(parameters, key_list, directories["data"]) + key_list = ["cmin", "cmax", "data_type", "scheme", "n_samples", "lc", "mesh_type"] + output_csv(parameters, key_list, directories["log"]) # ==== Data Generation Scripts ====================== for i in range(parameters["n_samples"]): @@ -469,39 +502,13 @@ def process_features(parameters, problem_data_dir): continue # ==== Data Splits ============================================ - num_train = int(parameters["n_samples"] * parameters["p_train"]) - num_test = int(parameters["n_samples"] * parameters["p_test"]) - num_val = parameters["n_samples"] - num_train - num_test - - move_data(directories["train"], directories["data"], 0, num_train) - move_data(directories["test"], directories["data"], num_train, num_train + num_test) - move_data(directories["val"], directories["data"], num_train + num_test, num_train + num_test + num_val) - - -# ==== Data Generation Scripts ====================== -if __name__ == "__main__": - print("In build_dataset.py") - # i = 0 - # while i < n_samples: - for i in range(parameters["n_samples"]): - try: - print("Generating Sample: " + str(i)) - # create dataset - process_features(parameters, directories) - # i += 1 - except fd.exceptions.ConvergenceError: - pass - except AttributeError: - pass - except ValueError: - pass - - # ==== Data Splits ============================================ - num_train = int(parameters["n_samples"] * parameters["p_train"]) - num_test = int(parameters["n_samples"] * parameters["p_test"]) - num_val = parameters["n_samples"] - num_train - num_test - - move_data(directories["train"], directories["data"], 0, num_train) - move_data(directories["test"], directories["data"], num_train, num_train + num_test) - move_data(directories["val"], directories["data"], num_train + num_test, num_train + num_test + num_val) - + # TODO: this should probably be done in the training script, not the build script + split_data( + source_dir=directories["data"], + train_dir=directories["train"], + test_dir=directories["test"], + val_dir=directories["val"], + train_ratio=parameters["p_train"], + test_ratio=parameters["p_test"], + val_ratio=parameters["p_val"], + ) diff --git a/script/build_swirl.py b/script/build_swirl.py index 4ea7814..cc8b0ff 100644 --- a/script/build_swirl.py +++ b/script/build_swirl.py @@ -7,33 +7,51 @@ import firedrake as fd import matplotlib.pyplot as plt -import pandas as pd -# import UM2N - -# import pandas as pd -from firedrake.__future__ import interpolate - -# dd the parent directory to the Python path -import sys -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import UM2N + def parse_arguments(): """Parse command-line arguments.""" parser = ArgumentParser(description="Build Burgers dataset with square meshes.") - parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.") - parser.add_argument("--sigma", type=float, default=(0.05 / 3), help="initial ring shape control") + parser.add_argument( + "--mesh_type", type=int, default=2, help="Algorithm used to generate mesh." + ) + parser.add_argument( + "--sigma", type=float, default=(0.05 / 3), help="initial ring shape control" + ) parser.add_argument("--r_0", type=float, default=0.2, help="initial ring radius") - parser.add_argument("--x_0", type=float, default=0.5, help="ring center x coordinate") - parser.add_argument("--y_0", type=float, default=0.5, help="ring center y coordinate") - parser.add_argument("--alpha", type=float, default=1.5, help="swirl (velocity) scalar coefficient") - parser.add_argument("--save_interval", type=int, default=10, help="output sample file interval") - parser.add_argument("--lc", type=float, default=5e-2, help="Length characteristic of unstructured mesh elements.") - parser.add_argument("--n_grid", type=int, default=20, help="number number of grids in a mesh when mesh_type is 0)") - parser.add_argument("--n_monitor_smooth", type=int, default=10, help="apply Laplacian smoother n time to monitor function") - - + parser.add_argument( + "--x_0", type=float, default=0.5, help="ring center x coordinate" + ) + parser.add_argument( + "--y_0", type=float, default=0.5, help="ring center y coordinate" + ) + parser.add_argument( + "--alpha", type=float, default=1.5, help="swirl (velocity) scalar coefficient" + ) + parser.add_argument( + "--save_interval", type=int, default=10, help="output sample file interval" + ) + parser.add_argument( + "--lc", + type=float, + default=5e-2, + help="Length characteristic of unstructured mesh elements.", + ) + parser.add_argument( + "--n_grid", + type=int, + default=20, + help="number number of grids in a mesh when mesh_type is 0)", + ) + parser.add_argument( + "--n_monitor_smooth", + type=int, + default=10, + help="apply Laplacian smoother n time to monitor function", + ) + parsed_args = parser.parse_args() # Handle dependency between max_dist and n_dist @@ -43,12 +61,12 @@ def parse_arguments(): # parsed_args.max_dist = None # Disable max_dist if n_dist is set # print("Warning: max_dist is ignored because n_dist is set.") # QC: - print(parsed_args) - - return parser.parse_args() + # print(parsed_args) + + return parsed_args -def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None): +def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None): """ Set up directories for storing data, plots, and logs. @@ -77,16 +95,20 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form project_dir = os.path.abspath(base_dir) else: project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - + # QC: print(f"Project Directory: {project_dir}") # Define the dataset directory - dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem) + dataset_dir = os.path.join( + project_dir, "data", f"dataset_meshtype_{mesh_type}", problem + ) # Use the provided format string for the problem-specific directory if dir_format is None: - problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}") + problem_specific_dir = os.path.join( + dataset_dir, f"{problem}_meshtype_{mesh_type}" + ) else: # check if dir_format is a valid string format if not isinstance(dir_format, str): @@ -95,8 +117,17 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form # Define default subdirectories if not provided if subdirs is None: - subdirs = ["data", "plot", "log", "mesh", "mesh_fine", - "plot_compare", "train", "test", "val"] + subdirs = [ + "data", + "plot", + "log", + "mesh", + "mesh_fine", + "plot_compare", + "train", + "test", + "val", + ] # Create and clear directories directories = {} @@ -111,10 +142,11 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form directories[subdir] = dir_path # QC: - print(f"Subdirectories created: {directories}") + # print(f"Subdirectories created: {directories}") return directories + def output_csv(parameters, key_list, output_dir): """ Write selected parameters to a CSV file. @@ -138,7 +170,8 @@ def output_csv(parameters, key_list, output_dir): csv_writer.writerow(csv_keys) # Write data (values) csv_writer.writerow(csv_data) - + + def move_data(target, source, start, num_files): """ Move data files from the source directory to the target directory. @@ -183,13 +216,13 @@ def move_data(target, source, start, num_files): continue - def fail_callback(t): """ Call back for failing cases. Log current time for those cases which MA did not converge. """ - fail_t.append(t) + print(f"fail to converge at {t}") + # fail_t.append(t) # def sample_from_loop( @@ -213,8 +246,8 @@ def sample_from_loop( sigma, alpha, r_0, - x_0, # ej321 - added x_0 - y_0, # ej321 - added y_0 + x_0, # ej321 - added x_0 + y_0, # ej321 - added y_0 t, error_og_list=[], error_adapt_list=[], @@ -317,7 +350,9 @@ def sample_from_loop( error_optimal_mesh = fd.errornorm(uh_new_proj, uh_fine, norm_type="L2") # Write to CSV - with open(os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile: + with open( + os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="" + ) as csvfile: csv_writer = csv.writer(csvfile) # Write header (keys) csv_writer.writerow(["error_og", "error_adapt", "time"]) @@ -325,7 +360,6 @@ def sample_from_loop( csv_writer.writerow([error_original_mesh, error_optimal_mesh, dur]) print("error og/optimal:", error_original_mesh, error_optimal_mesh) - # ==== Plot mesh, solution, error ====================== rows, cols = 3, 3 fig, ax = plt.subplots( @@ -406,10 +440,9 @@ def sample_from_loop( if __name__ == "__main__": - # parse args args = parse_arguments() - + # ==== Parameters ====================== parameters = { # parameters for problem @@ -423,7 +456,7 @@ def sample_from_loop( # "n_dist": args.n_dist, # "max_dist": args.max_dist, "lc": args.lc, - "n_grid": args.n_grid if args.n_grid else int(1 / lc), + "n_grid": args.n_grid if args.n_grid else int(1 / args.lc), # parameters for ?????? # "n_samples": args.n_samples, # "data_type": args.field_type, @@ -450,7 +483,6 @@ def sample_from_loop( # parameters for storing files "save_interval": args.save_interval, "fail_t": [], # list storing failing dts - # parameters for isotropic data # "w_min": 0.05, # "w_max": 0.2, @@ -472,42 +504,53 @@ def sample_from_loop( # ==== Setup Directories ====================== problem_specific_dir = "sigma_{:.3f}_alpha_{}_r0_{}_x0_{}_y0_{}_lc_{}_ngrid_{}_interval_{}_meshtype_{}_smooth_{}".format( - parameters["sigma"], parameters["alpha"], - parameters["r_0"], parameters["x_0"], parameters["y_0"], - parameters["lc"], parameters["n_grid"], - parameters["save_interval"], parameters["mesh_type"], - parameters["n_monitor_smooth"] + parameters["sigma"], + parameters["alpha"], + parameters["r_0"], + parameters["x_0"], + parameters["y_0"], + parameters["lc"], + parameters["n_grid"], + parameters["save_interval"], + parameters["mesh_type"], + parameters["n_monitor_smooth"], ) subdirs = [ - "data", "plot","plot_compare","log", "mesh", "mesh_fine", + "data", + "plot", + "plot_compare", + "log", + "mesh", + "mesh_fine", # "train", "test", "val", ] - directories = setup_directories(problem = parameters["problem"], - mesh_type = parameters["mesh_type"], - base_dir = None, - subdirs = subdirs, - dir_format = problem_specific_dir) - + directories = setup_directories( + problem=parameters["problem"], + mesh_type=parameters["mesh_type"], + base_dir=None, + subdirs=subdirs, + dir_format=problem_specific_dir, + ) # ==== Output CSV ====================== key_list = [ - "sigma", - "alpha", - "r_0", - "x_0", - "y_0", - "save_interval", - "T", - "n_step", - "dt", - "fail_t", - "lc", - "fail_cases", - "mesh_type", + "sigma", + "alpha", + "r_0", + "x_0", + "y_0", + "save_interval", + "T", + "n_step", + "dt", + "fail_t", + "lc", + "fail_cases", + "mesh_type", ] - output_csv(parameters, key_list, directories["data"]) + output_csv(parameters, key_list, directories["log"]) # ==== Data Generation Scripts ====================== print("In build_dataset.py") @@ -547,7 +590,7 @@ def sample_from_loop( mesh_fine, mesh_new, mesh_model=mesh_model, - **parameters + **parameters, # sigma=sigma, # alpha=alpha, # r_0=r_0, @@ -563,4 +606,3 @@ def sample_from_loop( swirl_solver.solve_problem(callback=sample_from_loop, fail_callback=fail_callback) print("Done!") - From 33c85db917456b9d21a36e8bd4b26e92ee7d44ac Mon Sep 17 00:00:00 2001 From: acse-ej321 <89605848+acse-ej321@users.noreply.github.com> Date: Mon, 7 Jul 2025 08:33:59 +0100 Subject: [PATCH 3/7] #70 clean comments --- UM2N/generator/burgers_solver.py | 45 ++++-------- UM2N/generator/mesh_generator.py | 3 +- UM2N/generator/swirl_solver.py | 36 ++++----- UM2N/generator/swirl_solver_step.py | 14 ++-- script/build_burgers_square.py | 103 ++------------------------ script/build_helmholtz_poly.py | 42 +++-------- script/build_helmholtz_square.py | 16 ++-- script/build_poisson_poly.py | 54 +++----------- script/build_poisson_square.py | 21 ++---- script/build_swirl.py | 109 +--------------------------- script/ej321_helm_dataset_run.sh | 57 +++++++++++++++ script/make_build_all_test.sh | 15 ++++ 12 files changed, 156 insertions(+), 359 deletions(-) create mode 100644 script/ej321_helm_dataset_run.sh create mode 100644 script/make_build_all_test.sh diff --git a/UM2N/generator/burgers_solver.py b/UM2N/generator/burgers_solver.py index fb901a6..5c78c46 100644 --- a/UM2N/generator/burgers_solver.py +++ b/UM2N/generator/burgers_solver.py @@ -9,7 +9,7 @@ import movement as mv import numpy as np # noqa -from firedrake.__future__ import interpolate # ej321 add +from firedrake.__future__ import interpolate __all__ = ["BurgersSolver"] @@ -200,15 +200,15 @@ def solve_problem(self, callback=None): # solve on fine mesh fd.solve(self.F_fine == 0, self.u_fine) start = time.perf_counter() - adapter = mv.MongeAmpereMover( + adaptor = mv.MongeAmpereMover( self.mesh, monitor_function=self.monitor_function, rtol=1e-3, ) - # ej321 - added monitor_function for feature extraction - raw_monitor_val = self.monitor_function(self.mesh) # ej321 - is this the correct mesh to use? - adapter.move() + raw_monitor_val = self.monitor_function(self.mesh) + + adaptor.move() end = time.perf_counter() dur_ms = (end - start) * 1000 @@ -221,8 +221,7 @@ def solve_problem(self, callback=None): function_space = fd.FunctionSpace(self.mesh, "CG", 1) uh_0 = fd.Function(function_space) uh_0.project(self.u[0]) - - # ej321 - added monitor_function for feature extraction + monitor_val = fd.Function(function_space) monitor_val.assign(raw_monitor_val) @@ -250,41 +249,29 @@ def solve_problem(self, callback=None): func_vec_space = fd.VectorFunctionSpace(self.mesh, "CG", 1) uh_grad = fd.interpolate(fd.grad(uh_0), func_vec_space) - # ej321 - grad_norm copied from build_helmholtz_square.py - grad_uh_interpolate = fd.assemble(interpolate(fd.grad(self.u[0]),func_vec_space)) + grad_uh_interpolate = fd.assemble( + interpolate(fd.grad(self.u[0]), func_vec_space) + ) grad_norm = fd.Function(function_space) grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2) grad_norm /= grad_norm.vector().max() hessian_norm = self.f_norm hessian = self.l2_projection - phi = adapter.phi - phi_grad = adapter.grad_phi - # sigma = adapter.sigma - sigma = adapter.H # ej321 - this may be the updated hessian? + phi = adaptor.phi + phi_grad = adaptor.grad_phi + sigma = adaptor.H I = fd.Identity(2) # noqa jacobian = I + sigma - # jacobian_det = fd.Function(function_space, name="jacobian_det") - # jacobian_det = fd.Function(adapter.P1, name="jacobian_det") - self.jacob_det = fd.Function(adapter.P1, name="jacobian_det").project( + self.jacob_det = fd.Function(adaptor.P1, name="jacobian_det").project( jacobian[0, 0] * jacobian[1, 1] - jacobian[0, 1] * jacobian[1, 0] ) - # self.jacob_det = jacobian_det - # self.jacob_det = fd.project( - # jacobian_det, fd.FunctionSpace(self.mesh, "CG", 1) - # ) # ej321 - not needed? - # self.jacob = jacobian # ej321 - this is copied from mesh_generator.py - self.jacob = fd.Function(adapter.P1_ten, name="jacobian").project(jacobian) - # self.jacob.project(jacobian) - # self.jacob = fd.project( - # jacobian, fd.TensorFunctionSpace(self.mesh, "CG", 1) - # ) # ej321 - not needed? - + self.jacob = fd.Function(adaptor.P1_ten, name="jacobian").project(jacobian) callback( uh=uh_0, uh_grad=uh_grad, - grad_norm = grad_norm, # ej321 - added grad_norm + grad_norm=grad_norm, hessian_norm=hessian_norm, hessian=hessian, phi=phi, @@ -304,7 +291,7 @@ def solve_problem(self, callback=None): dur=dur_ms, t=t, idx=self.idx, - monitor_val=monitor_val, # ej321 - added monitor_val + monitor_val=monitor_val, ) # step forward in time diff --git a/UM2N/generator/mesh_generator.py b/UM2N/generator/mesh_generator.py index 8853c50..1e7431f 100644 --- a/UM2N/generator/mesh_generator.py +++ b/UM2N/generator/mesh_generator.py @@ -54,8 +54,7 @@ def move_mesh(self): ) mover.move() # extract Hessian of the movement - # sigma = mover.sigma - sigma = mover.H # ej321 - this may be the updated hessian? + sigma = mover.H I = fd.Identity(2) # noqa jacobian = I + sigma jacobian_det = fd.Function(mover.P1, name="jacobian_det") diff --git a/UM2N/generator/swirl_solver.py b/UM2N/generator/swirl_solver.py index 750d7e7..2c98f35 100644 --- a/UM2N/generator/swirl_solver.py +++ b/UM2N/generator/swirl_solver.py @@ -575,7 +575,7 @@ def solve_problem(self, callback=None, fail_callback=None): print("In solve problem") self.t = 0.0 step = 0 - adapter = mv.MongeAmpereMover( + adaptor = mv.MongeAmpereMover( self.mesh, monitor_function=self.monitor_function, rtol=1e-3, maxiter=500 ) for i in range(self.n_step): @@ -602,15 +602,15 @@ def solve_problem(self, callback=None, fail_callback=None): # self.mesh.coordinates.dat.data[:] = self.adapt_coord_prev # mesh movement - calculate the adapted coords start = time.perf_counter() - # adapter = mv.MongeAmpereMover( + # adaptor = mv.MongeAmpereMover( # self.mesh, monitor_function=self.monitor_function, rtol=1e-3, maxiter=100 # ) - adapter.move() + adaptor.move() end = time.perf_counter() dur_ms = (end - start) * 1e3 # self.mesh_new.coordinates.dat.data[:] = self.adapt_coord self.mesh_new.coordinates.dat.data[:] = ( - adapter.mesh.coordinates.dat.data[:] + adaptor.mesh.coordinates.dat.data[:] ) # self.adapt_coord_prev = self.mesh_new.coordinates.dat.data[:] @@ -658,26 +658,18 @@ def solve_problem(self, callback=None, fail_callback=None): uh_grad = fd.interpolate(fd.grad(uh), func_vec_space) hessian = self.l2_projection - phi = adapter.phi - phi_grad = adapter.grad_phi - # sigma = adapter.sigma - sigma = adapter.H # ej321 - this may be the updated hessian? + phi = adaptor.phi + phi_grad = adaptor.grad_phi + sigma = adaptor.H I = fd.Identity(2) # noqa jacobian = I + sigma - # jacobian_det = fd.Function(function_space, name="jacobian_det") - # jacobian_det.project( - # jacobian[0, 0] * jacobian[1, 1] - jacobian[0, 1] * jacobian[1, 0] - # ) - # self.jacob_det = fd.project( - # jacobian_det, fd.FunctionSpace(self.mesh, "CG", 1) - # ) - self.jacob_det = fd.Function(adapter.P1, name="jacobian_det").project( - jacobian[0, 0] * jacobian[1, 1] - jacobian[0, 1] * jacobian[1, 0] - ) - # self.jacob = fd.project( - # jacobian, fd.TensorFunctionSpace(self.mesh, "CG", 1) - # ) - self.jacob = fd.Function(adapter.P1_ten, name="jacobian").project(jacobian) + self.jacob_det = fd.Function(adaptor.P1, name="jacobian_det").project( + jacobian[0, 0] * jacobian[1, 1] - jacobian[0, 1] * jacobian[1, 0] + ) + + self.jacob = fd.Function(adaptor.P1_ten, name="jacobian").project( + jacobian + ) if ((step + 1) % self.save_interval == 0) or (step == 0): callback( diff --git a/UM2N/generator/swirl_solver_step.py b/UM2N/generator/swirl_solver_step.py index a253ca0..e5d79fc 100644 --- a/UM2N/generator/swirl_solver_step.py +++ b/UM2N/generator/swirl_solver_step.py @@ -698,13 +698,13 @@ def solve_problem(self, callback=None, fail_callback=None): # self.mesh.coordinates.dat.data[:] = self.adapt_coord_prev # mesh movement - calculate the adapted coords start = time.perf_counter() - adapter = mv.MongeAmpereMover( + adaptor = mv.MongeAmpereMover( self.mesh, monitor_function=self.monitor_function, rtol=1e-3, maxiter=100, ) - adapter.move() + adaptor.move() end = time.perf_counter() dur_ms = (end - start) * 1e3 self.mesh_new.coordinates.dat.data[:] = self.adapt_coord @@ -750,13 +750,11 @@ def solve_problem(self, callback=None, fail_callback=None): func_vec_space = fd.VectorFunctionSpace(self.mesh, "CG", 1) uh_grad = fd.interpolate(fd.grad(self.uh), func_vec_space) - # hessian_norm = self.f_norm - # monitor_values = adapter.monitor + hessian = self.l2_projection - phi = adapter.phi - phi_grad = adapter.grad_phi - # sigma = adapter.sigma - sigma = adapter.H # ej321 - this may be the updated hessian? + phi = adaptor.phi + phi_grad = adaptor.grad_phi + sigma = adaptor.H I = fd.Identity(2) # noqa jacobian = I + sigma jacobian_det = fd.Function(function_space, name="jacobian_det") diff --git a/script/build_burgers_square.py b/script/build_burgers_square.py index 6266040..ec5504d 100644 --- a/script/build_burgers_square.py +++ b/script/build_burgers_square.py @@ -31,9 +31,11 @@ def parse_arguments(): parser.add_argument( "--field_type", type=str, default="iso", help="Data type (aniso/iso)." ) - # use padded scheme or full-scale scheme to sample central point of the bump # noqa parser.add_argument( - "--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full)." + "--boundary_scheme", + type=str, + default="pad", + help="use padded scheme or full-scale scheme to sample central point of the bump (pad/full).", ) parser.add_argument( "--n_case", type=int, default=5, help="Number of simulation cases." @@ -305,79 +307,10 @@ def get_sample_param_of_nu_generalization_by_idx_train(idx_in): return gauss_list_, nu_ -# def get_sample_params(idx): -# """Retrieve sample parameters for the Burgers problem.""" -# return UM2N.get_sample_param_of_nu_generalization_by_idx_train(idx) - - -# def solve_case(idx, mesh, mesh_new, mesh_fine, dirs): -# """Solve a single case of the Burgers problem.""" -# gauss_list, nu = get_sample_params(idx) -# solver = UM2N.BurgersSolver(mesh, mesh_fine, mesh_new, gauss_list=gauss_list, nu=nu, idx=idx) -# solver.solve_problem(lambda *args: sample_from_loop(*args, dirs)) - - -# def sample_from_loop( -# uh, uh_grad, hessian, hessian_norm, phi, grad_phi, jacobian, jacobian_det, -# uh_new, mesh_og, mesh_new, function_space, function_space_fine, uh_fine, -# dur, nu, gauss_list, t, idx, dirs, error_og_list=[], error_adapt_list=[] -# ): -# """Process and save data from a single simulation loop.""" -# mesh_processor = UM2N.MeshProcessor( -# original_mesh=mesh_og, -# optimal_mesh=mesh_new, -# function_space=function_space, -# use_4_edge=True, -# feature={ -# "uh": uh.dat.data_ro.reshape(-1, 1), -# "grad_uh": uh_grad.dat.data_ro.reshape(-1, 2), -# "hessian": hessian.dat.data_ro.reshape(-1, 4), -# "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), -# "jacobian": jacobian.dat.data_ro.reshape(-1, 4), -# "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1), -# "phi": phi.dat.data_ro.reshape(-1, 1), -# "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2), -# }, -# raw_feature={ -# "uh": uh, -# "hessian_norm": hessian_norm, -# "jacobian": jacobian, -# "jacobian_det": jacobian_det, -# }, -# nu=nu, -# gauss_list=gauss_list, -# dur=dur, -# t=t, -# idx=idx, -# ) - -# mesh_processor.save_taining_data(os.path.join(dirs["data"], f"data_{idx}")) - -# # Plot results -# fig, axes = plt.subplots(2, 3, figsize=(15, 10)) -# fd.trisurf(uh_fine, axes=axes[0, 0]).set_title("Solution field (HR)") -# fd.trisurf(uh, axes=axes[0, 1]).set_title("Solution field (Original Mesh)") -# fd.trisurf(uh_new, axes=axes[0, 2]).set_title("Solution field (Adapted Mesh)") -# fd.triplot(mesh_og, axes=axes[1, 0]).set_title("Original Mesh") -# fd.triplot(mesh_new, axes=axes[1, 1]).set_title("Adapted Mesh") -# fd.tripcolor(uh_new, cmap="coolwarm", axes=axes[1, 2]).set_title("Solution on Optimal Mesh") -# fig.savefig(os.path.join(dirs["plot"], f"plot_{idx}.png")) -# plt.close(fig) - -# # Save error metrics -# uh = fd.project(uh, function_space_fine) -# uh_new = fd.project(uh_new, function_space_fine) -# error_original_mesh = fd.errornorm(uh, uh_fine, norm_type="L2") -# error_optimal_mesh = fd.errornorm(uh_new, uh_fine, norm_type="L2") -# pd.DataFrame( -# {"error_og": [error_original_mesh], "error_adapt": [error_optimal_mesh], "time": [dur]} -# ).to_csv(os.path.join(dirs["log"], f"log_{idx}.csv"), index=False) - - def sample_from_loop( uh, uh_grad, - grad_norm, # ej321 - added grad_norm + grad_norm, hessian, hessian_norm, phi, @@ -409,9 +342,7 @@ def sample_from_loop( feature={ "uh": uh.dat.data_ro.reshape(-1, 1), "grad_uh": uh_grad.dat.data_ro.reshape(-1, 2), - "grad_uh_norm": grad_norm.dat.data_ro.reshape( - -1, 1 - ), # ej321 - added grad_norm + "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), "hessian": hessian.dat.data_ro.reshape(-1, 4), "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), "jacobian": jacobian.dat.data_ro.reshape(-1, 4), @@ -423,7 +354,7 @@ def sample_from_loop( raw_feature={ "uh": uh, "hessian_norm": hessian_norm, - "monitor_val": monitor_val, # ej321 - added monitor_val + "monitor_val": monitor_val, "jacobian": jacobian, "jacobian_det": jacobian_det, }, @@ -469,13 +400,6 @@ def sample_from_loop( fig.savefig(os.path.join(directories["plot"], "plot_{}.png".format(i))) i += 1 - # fig, ax = plt.subplots() - # ax.set_title("adapt error list") - # ax.plot(error_adapt_list, linestyle='--', color='blue', label='adapt') - # # ax.plot(error_og_list, linestyle='--', color='red', label='og') - # ax.legend() - # plt.show() - # ========================================== uh = fd.project(uh, function_space_fine) uh_new = fd.project(uh_new, function_space_fine) @@ -494,16 +418,6 @@ def sample_from_loop( csv_writer.writerow([error_original_mesh, error_optimal_mesh, dur]) print("error og/optimal:", error_original_mesh, error_optimal_mesh) - # df = pd.DataFrame( - # { - # "error_og": error_original_mesh, - # "error_adapt": error_optimal_mesh, - # "time": dur, - # }, - # index=[0], - # ) - # df.to_csv(os.path.join(problem_log_dir, "log{}.csv".format(i))) - # print("error og/optimal:", error_original_mesh, error_optimal_mesh) return @@ -521,8 +435,7 @@ def sample_from_loop( "max_dist": args.max_dist, "lc": args.lc, "n_grid": args.n_grid, - # parameters for ?????? - # "n_samples": args.n_samples, + # parameters for mesh def "data_type": args.field_type, "scheme": args.boundary_scheme, "mesh_type": int(args.mesh_type), diff --git a/script/build_helmholtz_poly.py b/script/build_helmholtz_poly.py index c7bddb8..a330680 100644 --- a/script/build_helmholtz_poly.py +++ b/script/build_helmholtz_poly.py @@ -10,8 +10,6 @@ import firedrake as fd import matplotlib.pyplot as plt import numpy as np - -# import pandas as pd from firedrake.__future__ import interpolate import UM2N @@ -35,9 +33,11 @@ def parse_arguments(): parser.add_argument( "--field_type", type=str, default="iso", help="Data type (aniso/iso)." ) - # use padded scheme or full-scale scheme to sample central point of the bump # noqa parser.add_argument( - "--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full)." + "--boundary_scheme", + type=str, + default="pad", + help="Use padded scheme or full-scale scheme to sample central point of the bump (pad/full).", ) parser.add_argument( "--n_samples", type=int, default=100, help="Number of samples generated" @@ -270,7 +270,6 @@ def process_features(parameters, directories): func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1) grad_uh_interpolate = fd.assemble(interpolate(fd.grad(uh), func_vec_space)) - # ej321 - grad_norm copied from build_helmholtz_square.py grad_norm = fd.Function(res["function_space"]) grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2) grad_norm /= grad_norm.vector().max() @@ -278,17 +277,6 @@ def process_features(parameters, directories): # RHS of helmholtz problem f_rhs = fd.assemble(interpolate(helmholtz_eq.f, helmholtz_eq.function_space)) - # ej321 - this seems extra - the mesh is never used, just to build 'eq'? - # hessian = UM2N.MeshGenerator( - # params={ - # "eq": helmholtz_eq, - # "mesh": rand_poly_mesh_gen.generate_mesh( - # res=lc, - # output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"), - # ), - # } - # ).get_hessian(mesh) - # ej321 - using script from build_helmholtz_square.py mesh_gen = UM2N.MeshGenerator(params={"eq": helmholtz_eq, "mesh": mesh}) monitor_val = mesh_gen.monitor_func(mesh) hessian = mesh_gen.get_hessian(mesh) @@ -296,7 +284,7 @@ def process_features(parameters, directories): mesh_gen.get_hessian_norm(mesh), fd.FunctionSpace(mesh, "CG", 1) ) - # move the mesh? + # move the mesh start = time.perf_counter() new_mesh = mesh_gen.move_mesh() end = time.perf_counter() @@ -334,7 +322,7 @@ def process_features(parameters, directories): feature={ "uh": uh.dat.data_ro.reshape(-1, 1), "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2), - "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), # ej321 - added + "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), "hessian": hessian.dat.data_ro.reshape(-1, 4), "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), "jacobian": jacobian.dat.data_ro.reshape(-1, 4), @@ -342,13 +330,13 @@ def process_features(parameters, directories): "phi": phi.dat.data_ro.reshape(-1, 1), "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2), "f": f_rhs.dat.data_ro.reshape(-1, 1), - "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), # ej321 - added + "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), }, raw_feature={ "uh": uh, "hessian_norm": hessian_norm, - "monitor_val": monitor_val, # ej321 - added - "grad_uh_norm": grad_norm, # ej321 - added needed for poly only + "monitor_val": monitor_val, + "grad_uh_norm": grad_norm, "jacobian": jacobian, "jacobian_det": jacobian_det, }, @@ -412,16 +400,6 @@ def process_features(parameters, directories): error_original_mesh = fd.errornorm(u_exact, uh_proj) error_optimal_mesh = fd.errornorm(u_exact, uh_new_proj) - # df = pd.DataFrame( - # { - # "error_og": error_original_mesh, - # "error_adapt": error_optimal_mesh, - # "time": dur, - # }, - # index=[0], - # ) - # df.to_csv(os.path.join(problem_log_dir, "log{}.csv".format(i))) - # Write to CSV with open( os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="" @@ -442,12 +420,10 @@ def process_features(parameters, directories): parameters = { # parameters for problem "problem": "holmholtz_poly", - # "n_case": args.n_case, # burgers problem only # parameters for random source "n_dist": args.n_dist, "max_dist": args.max_dist, "lc": args.lc, - # "n_grig": args.n_grid, # burgers problem only # parameters for ?????? "n_samples": args.n_samples, "data_type": args.field_type, diff --git a/script/build_helmholtz_square.py b/script/build_helmholtz_square.py index ca85846..0a3452b 100644 --- a/script/build_helmholtz_square.py +++ b/script/build_helmholtz_square.py @@ -33,9 +33,11 @@ def parse_arguments(): parser.add_argument( "--field_type", type=str, default="aniso", help="Data type (aniso/iso)" ) - # use padded scheme or full-scale scheme to sample central point of the bump # noqa parser.add_argument( - "--boundary_scheme", type=str, default="full", help="Boundary scheme (pad/full)" + "--boundary_scheme", + type=str, + default="full", + help="Use padded scheme or full-scale scheme to sample central point of the bump (pad/full)", ) parser.add_argument( "--n_samples", type=int, default=100, help="Number of samples generated" @@ -207,8 +209,7 @@ def process_features(parameters, directories): # RHS of helmholtz problem f_rhs = fd.assemble(interpolate(helmholtz_eq.f, helmholtz_eq.function_space)) - # generate mesh? - + # generate mesh mesh_gen = UM2N.MeshGenerator(params={"eq": helmholtz_eq, "mesh": mesh}) monitor_val = mesh_gen.monitor_func(mesh) hessian = mesh_gen.get_hessian(mesh) @@ -216,7 +217,7 @@ def process_features(parameters, directories): mesh_gen.get_hessian_norm(mesh), fd.FunctionSpace(mesh, "CG", 1) ) - # move the mesh? + # move the mesh start = time.perf_counter() new_mesh = mesh_gen.move_mesh() # noqa end = time.perf_counter() @@ -309,7 +310,6 @@ def process_features(parameters, directories): print("error og/optimal:", error_original_mesh, error_optimal_mesh) # ==== Plot mesh, solution, error ====================== - rows, cols = 3, 3 cmap = "seismic" @@ -495,13 +495,11 @@ def split_data( parameters = { # parameters for problem "problem": "helmholtz", - # "n_case": args.n_case, # burgers problem only # parameters for random source "n_dist": args.n_dist, "max_dist": args.max_dist, "lc": args.lc, - # "n_grid": args.n_grid, # burgers problem only - # parameters for ?????? + # parameters for mesh def "n_samples": args.n_samples, "data_type": args.field_type, "scheme": args.boundary_scheme, diff --git a/script/build_poisson_poly.py b/script/build_poisson_poly.py index 7f6403c..714a4f7 100644 --- a/script/build_poisson_poly.py +++ b/script/build_poisson_poly.py @@ -32,9 +32,12 @@ def parse_arguments(): parser.add_argument( "--field_type", type=str, default="iso", help="Data type (aniso/iso)." ) - # use padded scheme or full-scale scheme to sample central point of the bump # noqa + # # noqa parser.add_argument( - "--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full)." + "--boundary_scheme", + type=str, + default="pad", + help="Use padded scheme or full-scale scheme to sample central point of the bump (pad/full).", ) parser.add_argument( "--n_samples", type=int, default=100, help="Number of samples generated" @@ -268,48 +271,15 @@ def process_features(parameters, directories): hessian_norm = fd.project( mesh_gen.get_hessian_norm(mesh), fd.FunctionSpace(mesh, "CG", 1) ) - # hessian = UM2N.MeshGenerator( - # params={ - # "eq": poisson_eq, - # "mesh": rand_poly_mesh_gen.generate_mesh( - # res=lc, - # output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"), - # ), - # } - # ).get_hessian(mesh) - - # hessian_norm = UM2N.MeshGenerator( - # params={ - # "eq": poisson_eq, - # "mesh": rand_poly_mesh_gen.generate_mesh( - # res=lc, - # output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"), - # ), - # } - # ).monitor_func(mesh) - - # is this the monitor function value? - # hessian_norm = fd.project(hessian_norm, fd.FunctionSpace(mesh, "CG", 1)) func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1) grad_uh_interpolate = fd.assemble(interpolate(fd.grad(uh), func_vec_space)) - # ej321 - grad_norm copied from build_helmholtz_square.py grad_norm = fd.Function(res["function_space"]) grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2) grad_norm /= grad_norm.vector().max() - # mesh_gen = UM2N.MeshGenerator( - # params={ - # "eq": poisson_eq, - # "mesh": rand_poly_mesh_gen.generate_mesh( - # res=lc, - # output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"), - # ), - # } - # ) - - # move the mesh? + # move the mesh start = time.perf_counter() new_mesh = mesh_gen.move_mesh() end = time.perf_counter() @@ -347,20 +317,20 @@ def process_features(parameters, directories): feature={ "uh": uh.dat.data_ro.reshape(-1, 1), "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2), - "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), # ej321 - added + "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), "hessian": hessian.dat.data_ro.reshape(-1, 4), "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), "jacobian": jacobian.dat.data_ro.reshape(-1, 4), "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1), "phi": phi.dat.data_ro.reshape(-1, 1), "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2), - "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), # ej321 - added + "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), }, raw_feature={ "uh": uh, "hessian_norm": hessian_norm, - "monitor_val": monitor_val, # ej321 - added - "grad_uh_norm": grad_norm, # ej321 - added needed for poly only + "monitor_val": monitor_val, + "grad_uh_norm": grad_norm, "jacobian": jacobian, "jacobian_det": jacobian_det, }, @@ -444,13 +414,11 @@ def process_features(parameters, directories): parameters = { # parameters for problem "problem": "poisson_poly", - # "n_case": args.n_case, # burgers problem only # parameters for random source "n_dist": args.n_dist, "max_dist": args.max_dist, "lc": args.lc, - # "n_grig": args.n_grid, # burgers problem only - # parameters for ?????? + # parameters for mesh def "n_samples": args.n_samples, "data_type": args.field_type, "scheme": args.boundary_scheme, diff --git a/script/build_poisson_square.py b/script/build_poisson_square.py index 5513e1b..7b4919f 100644 --- a/script/build_poisson_square.py +++ b/script/build_poisson_square.py @@ -32,9 +32,11 @@ def parse_arguments(): parser.add_argument( "--field_type", type=str, default="iso", help="Data type (aniso/iso)." ) - # use padded scheme or full-scale scheme to sample central point of the bump # noqa parser.add_argument( - "--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full)." + "--boundary_scheme", + type=str, + default="pad", + help="Use padded scheme or full-scale scheme to sample central point of the bump (pad/full).", ) parser.add_argument( "--n_samples", type=int, default=100, help="Number of samples generated" @@ -270,7 +272,6 @@ def process_features(parameters, problem_data_dir): func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1) grad_uh_interpolate = fd.assemble(interpolate(fd.grad(uh), func_vec_space)) - # ej321 - grad_norm copied from build_helmholtz_square.py grad_norm = fd.Function(res["function_space"]) grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2) grad_norm /= grad_norm.vector().max() @@ -311,23 +312,19 @@ def process_features(parameters, problem_data_dir): feature={ "uh": uh.dat.data_ro.reshape(-1, 1), "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2), - "grad_uh_norm": grad_norm.dat.data_ro.reshape( - -1, 1 - ), # ej321 - added grad_norm + "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), "hessian": hessian.dat.data_ro.reshape(-1, 4), "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1), "jacobian": jacobian.dat.data_ro.reshape(-1, 4), "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1), "phi": phi.dat.data_ro.reshape(-1, 1), "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2), - "monitor_val": monitor_val.dat.data_ro.reshape( - -1, 1 - ), # ej321 - added monitor_val + "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), }, raw_feature={ "uh": uh, "hessian_norm": hessian_norm, - "monitor_val": monitor_val, # ej321 - added monitor_val + "monitor_val": monitor_val, "jacobian": jacobian, "jacobian_det": jacobian_det, }, @@ -410,13 +407,11 @@ def process_features(parameters, problem_data_dir): parameters = { # parameters for problem "problem": "poisson", - # "n_case": args.n_case, # burgers problem only # parameters for random source "n_dist": args.n_dist, "max_dist": args.max_dist, "lc": args.lc, - # "n_grig": args.n_grid, # burgers problem only - # parameters for ?????? + # parameters for mesh def "n_samples": args.n_samples, "data_type": args.field_type, "scheme": args.boundary_scheme, diff --git a/script/build_swirl.py b/script/build_swirl.py index cc8b0ff..8b5dd25 100644 --- a/script/build_swirl.py +++ b/script/build_swirl.py @@ -54,15 +54,6 @@ def parse_arguments(): parsed_args = parser.parse_args() - # Handle dependency between max_dist and n_dist - # max number of distributions used to generate the dataset - # only if n_dist is not set if n_dist is set, max_dist will be disabled - # if parsed_args.n_dist is not None: - # parsed_args.max_dist = None # Disable max_dist if n_dist is set - # print("Warning: max_dist is ignored because n_dist is set.") - # QC: - # print(parsed_args) - return parsed_args @@ -246,8 +237,8 @@ def sample_from_loop( sigma, alpha, r_0, - x_0, # ej321 - added x_0 - y_0, # ej321 - added y_0 + x_0, + y_0, t, error_og_list=[], error_adapt_list=[], @@ -296,51 +287,6 @@ def sample_from_loop( mesh_processor.save_taining_data(os.path.join(directories["data"], f"data_{i:04d}")) - # # ==== Plot Scripts ====================== - # fig = plt.figure(figsize=(15, 10)) - # ax1 = fig.add_subplot(2, 3, 1, projection='3d') - # # Plot the exact solution - # ax1.set_title('Solution field (HR)') - # fd.trisurf(uh_fine, axes=ax1) - # # Plot the solved solution - # ax2 = fig.add_subplot(2, 3, 2, projection='3d') - # ax2.set_title('Solution field (Original Mesh)') - # fd.trisurf(uh, axes=ax2) - - # ax3 = fig.add_subplot(2, 3, 3, projection='3d') - # ax3.set_title('Solution field (Adapted Mesh)') - # fd.trisurf(uh_new, axes=ax3) - - # # Plot the mesh - # ax4 = fig.add_subplot(2, 3, 4) - # ax4.set_title('Original Mesh ') - # fd.triplot(mesh_og, axes=ax4) - - # ax5 = fig.add_subplot(2, 3, 5) - # ax5.set_title('Optimal Mesh') - # # fd.tripcolor( - # # uh, cmap='coolwarm', axes=ax5) - # fd.triplot(mesh_new, axes=ax5) - - # # plot mesh with function evaluated on it - # ax6 = fig.add_subplot(2, 3, 6) - # ax6.set_title('Solution Projected on Optimal Mesh') - # fd.tripcolor( - # uh_new, cmap='coolwarm', axes=ax6) - # fd.triplot(mesh_new, axes=ax6) - - # fig.savefig( - # os.path.join( - # problem_plot_dir, f"plot_{i:04d}.png") - # ) - # plt.close() - # fig, ax = plt.subplots() - # ax.set_title("adapt error list") - # ax.plot(error_adapt_list, linestyle='--', color='blue', label='adapt') - # # ax.plot(error_og_list, linestyle='--', color='red', label='og') - # ax.legend() - # plt.show() - # ==== Log File ============================================ # function_space_fine = fd.FunctionSpace(mesh_fine, 'CG', 1) uh_proj = fd.project(uh, function_space_fine) @@ -402,11 +348,6 @@ def sample_from_loop( err_v_max = err_abs_max_val err_v_min = -err_v_max - # # Error on high resolution mesh - # cb = fd.tripcolor(fd.assemble(uh_fine - uh_fine), cmap=cmap, axes=ax[2, 0], vmax=err_v_max, vmin=err_v_min) - # ax[2, 0].set_title(f"Error Map High Resolution") - # plt.colorbar(cb) - # Monitor values cb = fd.tripcolor(monitor_values, cmap=cmap, axes=ax[2, 0]) ax[2, 0].set_title("Monitor Values") @@ -451,29 +392,14 @@ def sample_from_loop( "T": 1, "dt": 1e-3, # The CFL condition requires that the timestep is less than 0.0014 for fine mesh "n_step": 1000, - # "n_case": args.n_case, # burgers problem only - # parameters for random source - # "n_dist": args.n_dist, - # "max_dist": args.max_dist, "lc": args.lc, "n_grid": args.n_grid if args.n_grid else int(1 / args.lc), - # parameters for ?????? - # "n_samples": args.n_samples, - # "data_type": args.field_type, - # "scheme": args.boundary_scheme, + # parameters for mesh def "mesh_type": int(args.mesh_type), "n_monitor_smooth": args.n_monitor_smooth, # parameters for domain scale "scale_x": 1, "scale_y": 1, - # parameters for anisotropic data - distribution height scaler - # "z_max": 1, - # "z_min": 0, - # parameters for ????? - # "x_start": 0, - # "x_end": 1, - # "y_start": 0, - # "y_end": 1, # parameters for initial condition "sigma": args.sigma, "r_0": args.r_0, @@ -483,25 +409,8 @@ def sample_from_loop( # parameters for storing files "save_interval": args.save_interval, "fail_t": [], # list storing failing dts - # parameters for isotropic data - # "w_min": 0.05, - # "w_max": 0.2, - # "c_min": 0.2 if args.boundary_scheme == "pad" else 0, - # "c_max": 0.8 if args.boundary_scheme == "pad" else 1, - # parameters for dataset challenging level - # larger, less challenging (because the gaussian is more like a circle) - # "sigma_mean_scaler": 1 / 4, - # "sigma_sigma_scaler": 1 / 6, - # "sigma_eps": 1 / 8, - # parameters for data split - # "p_train": 0.75, - # "p_test": 0.15, - # "p_val": 0.1, } - # # Set random seed - # random.seed(args.rand_seed) - # ==== Setup Directories ====================== problem_specific_dir = "sigma_{:.3f}_alpha_{}_r0_{}_x0_{}_y0_{}_lc_{}_ngrid_{}_interval_{}_meshtype_{}_smooth_{}".format( parameters["sigma"], @@ -573,7 +482,7 @@ def sample_from_loop( mesh_model = mesh_gen.generate_mesh( res=lc, output_filename=os.path.join(directories["mesh"], "mesh.msh") ) - # ej321 - is this extra call to mesh gen needed? + # is this extra call to mesh gen needed? mesh_gen_fine = UM2N.UnstructuredSquareMeshGenerator(mesh_type=mesh_type) mesh_fine = mesh_gen_fine.generate_mesh( res=1e-2, output_filename=os.path.join(directories["mesh_fine"], "mesh.msh") @@ -591,16 +500,6 @@ def sample_from_loop( mesh_new, mesh_model=mesh_model, **parameters, - # sigma=sigma, - # alpha=alpha, - # r_0=r_0, - # x_0=x_0, - # y_0=y_0, - # save_interval=save_interval, - # T=T, - # dt=dt, - # n_step=n_step, - # n_monitor_smooth=n_monitor_smooth, ) swirl_solver.solve_problem(callback=sample_from_loop, fail_callback=fail_callback) diff --git a/script/ej321_helm_dataset_run.sh b/script/ej321_helm_dataset_run.sh new file mode 100644 index 0000000..f3803d1 --- /dev/null +++ b/script/ej321_helm_dataset_run.sh @@ -0,0 +1,57 @@ + + +###################### Training dataset generation ###################### +# Training data +# Problem type: Helmholtz +# Meshtype: 2 +# n_samples: 300 +# Random_seed: 63 +# Resolution: 0.05, 0.055 + +# use 2 / 6 / 0 +mesh_type=(6) +# training set build +rand_seed=63 +lcs=(0.05 0.055) +n_samples_train=(300) + +# helmholtz square case +for mt in "${mesh_type[@]}"; do + for i in "${lcs[@]}"; do + for n_s in "${n_samples_train[@]}"; do + echo "lc = $i meshtype = $mt num samples = $n_s" + python build_helmholtz_square.py --lc=$i --rand_seed=$rand_seed --n_samples=$n_s --field_type="aniso" --boundary_scheme="full" --mesh_type=$mt + # python ./script/build_helmholtz_square.py --lc=$i --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="pad" --mesh_type=$mesh_type + # python ./script/build_helmholtz_square.py --lc=$i --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="full" --mesh_type=$mesh_type + # python ./script/build_helmholtz_square.py --lc=$i --rand_seed $rand_seed --n_samples $n_samples_train --field_type "aniso" --boundary_scheme "pad" --mesh_type=$mesh_type + done + done +done +###################### Training dataset generation ###################### + + +# ###################### Testing dataset generation ###################### +# # Test data +# # Problem type: Helmholtz +# # Meshtype: 0, 2, 6 +# # n_samples: 100 +# # Random_seed: 42 +# # Resolution: 0.05, 0.055, 0.028 + +# n_samples_train=101 +# rand_seed=42 +# mesh_types=(6 2 0) +# # length character for polygon mesh +# lcs=(0.05 0.028) + +# # helmholtz square case +# for m in "${mesh_types[@]}"; do +# for i in "${lcs[@]}"; do +# echo "lc = $i Meshtype = $m" +# python ./script/build_helmholtz_square.py --lc=$i --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m +# # python ./script/build_helmholtz_square.py --lc=$i --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="pad" --mesh_type=$mesh_type +# # python ./script/build_helmholtz_square.py --lc=$i --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="full" --mesh_type=$mesh_type +# # python ./script/build_helmholtz_square.py --lc=$i --rand_seed $rand_seed --n_samples $n_samples_train --field_type "aniso" --boundary_scheme "pad" --mesh_type=$mesh_type +# done +# done +# ###################### Testing dataset generation ###################### \ No newline at end of file diff --git a/script/make_build_all_test.sh b/script/make_build_all_test.sh new file mode 100644 index 0000000..a531450 --- /dev/null +++ b/script/make_build_all_test.sh @@ -0,0 +1,15 @@ +mesh_types=(6 2 0) +rand_seed=42 +n_samples_train=3 + +for m in "${mesh_types[@]}"; do + python build_burgers_square.py --rand_seed=$rand_seed --n_case=$n_samples_train --mesh_type=$m + + # python build_helmholtz_square.py --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m + # python build_helmholtz_poly.py --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m + # python build_poisson_square.py --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m + # python build_poisson_poly.py --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m + # python build_burgers_square.py --rand_seed=$rand_seed --n_case=$n_samples_train --mesh_type=$m + # python build_swirl.py --lc=0.028 --alpha=1.5 --r_0 0.20 --x_0 0.3 --y_0 0.3 --n_monitor_smooth=10 --mesh_type=$m + +done \ No newline at end of file From a899c3ddf9377db6901887d0f6e468f4a3fce623 Mon Sep 17 00:00:00 2001 From: acse-ej321 <89605848+acse-ej321@users.noreply.github.com> Date: Mon, 7 Jul 2025 08:58:09 +0100 Subject: [PATCH 4/7] moved common build script to helper --- UM2N/generator/swirl_solver.py | 8 +- UM2N/generator/swirl_solver_step.py | 14 +- script/build_burgers_square.py | 157 +------------------- script/build_helmholtz_poly.py | 177 +--------------------- script/build_helmholtz_square.py | 70 +-------- script/build_helper.py | 221 ++++++++++++++++++++++++++++ script/build_poisson_poly.py | 176 +--------------------- script/build_poisson_square.py | 176 +--------------------- script/build_swirl.py | 4 +- 9 files changed, 241 insertions(+), 762 deletions(-) create mode 100644 script/build_helper.py diff --git a/UM2N/generator/swirl_solver.py b/UM2N/generator/swirl_solver.py index 2c98f35..3db102c 100644 --- a/UM2N/generator/swirl_solver.py +++ b/UM2N/generator/swirl_solver.py @@ -22,6 +22,8 @@ from tqdm import tqdm # noqa from UM2N.model.train_util import model_forward +from firedrake.__future__ import interpolate + def get_log_og(log_path, idx): """ @@ -453,7 +455,7 @@ def monitor_function(self, mesh, alpha=10, beta=5): ) func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1) - uh_grad = fd.interpolate(fd.grad(self.u_cur), func_vec_space) + uh_grad = fd.assemble(interpolate(fd.grad(self.u_cur), func_vec_space)) self.grad_norm.interpolate(uh_grad[0] ** 2 + uh_grad[1] ** 2) # filter_monitor_val = np.minimum(1e3, self.f_norm.dat.data[:]) @@ -526,7 +528,7 @@ def monitor_function_on_coarse_mesh(self, mesh, alpha=10, beta=5): ) func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1) - uh_grad = fd.interpolate(fd.grad(self.u_cur), func_vec_space) + uh_grad = fd.assemble(interpolate(fd.grad(self.u_cur), func_vec_space)) self.grad_norm.interpolate(uh_grad[0] ** 2 + uh_grad[1] ** 2) # Normlize the hessian @@ -655,7 +657,7 @@ def solve_problem(self, callback=None, fail_callback=None): ) func_vec_space = fd.VectorFunctionSpace(self.mesh, "CG", 1) - uh_grad = fd.interpolate(fd.grad(uh), func_vec_space) + uh_grad = fd.assemble(interpolate(fd.grad(uh), func_vec_space)) hessian = self.l2_projection phi = adaptor.phi diff --git a/UM2N/generator/swirl_solver_step.py b/UM2N/generator/swirl_solver_step.py index e5d79fc..5dc9dd4 100644 --- a/UM2N/generator/swirl_solver_step.py +++ b/UM2N/generator/swirl_solver_step.py @@ -17,6 +17,8 @@ from tqdm import tqdm # noqa +from firedrake.__future__ import interpolate + def get_c(x, y, t, threshold=0.5, alpha=1.5): """ @@ -459,7 +461,7 @@ def monitor_function_grad(self, mesh, alpha=5): ) func_vec_space = fd.VectorFunctionSpace(self.mesh, "CG", 1) - uh_grad = fd.interpolate(fd.grad(self.u_cur), func_vec_space) + uh_grad = fd.assemble(interpolate(fd.grad(self.u_cur), func_vec_space)) self.grad_norm.project(uh_grad[0] ** 2 + uh_grad[1] ** 2) self.adapt_coord = mesh.coordinates.vector().array().reshape(-1, 2) # noqa @@ -481,7 +483,7 @@ def monitor_function_smoothed_grad(self, mesh, alpha=5): ) func_vec_space = fd.VectorFunctionSpace(self.mesh, "CG", 1) - uh_grad = fd.interpolate(fd.grad(self.u_cur), func_vec_space) + uh_grad = fd.assemble(interpolate(fd.grad(self.u_cur), func_vec_space)) self.grad_norm.project(uh_grad[0] ** 2 + uh_grad[1] ** 2) # Normlize the grad @@ -508,7 +510,7 @@ def monitor_function_for_merge(self, mesh, alpha=10, beta=5): ) func_vec_space = fd.VectorFunctionSpace(self.mesh, "CG", 1) - uh_grad = fd.interpolate(fd.grad(self.u_cur), func_vec_space) + uh_grad = fd.assemble(interpolate(fd.grad(self.u_cur), func_vec_space)) self.grad_norm.project(uh_grad[0] ** 2 + uh_grad[1] ** 2) # Normlize the hessian @@ -568,7 +570,7 @@ def monitor_function(self, mesh, alpha=10, beta=5): ) func_vec_space = fd.VectorFunctionSpace(self.mesh, "CG", 1) - uh_grad = fd.interpolate(fd.grad(self.u_cur), func_vec_space) + uh_grad = fd.assemble(interpolate(fd.grad(self.u_cur), func_vec_space)) self.grad_norm.project(uh_grad[0] ** 2 + uh_grad[1] ** 2) # Normlize the hessian @@ -631,7 +633,7 @@ def monitor_function_on_coarse_mesh(self, mesh, alpha=10, beta=5): ) func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1) - uh_grad = fd.interpolate(fd.grad(self.u_cur), func_vec_space) + uh_grad = fd.assemble(interpolate(fd.grad(self.u_cur), func_vec_space)) self.grad_norm.project(uh_grad[0] ** 2 + uh_grad[1] ** 2) # Normlize the hessian @@ -749,7 +751,7 @@ def solve_problem(self, callback=None, fail_callback=None): uh_fine.project(self.u_cur_fine) func_vec_space = fd.VectorFunctionSpace(self.mesh, "CG", 1) - uh_grad = fd.interpolate(fd.grad(self.uh), func_vec_space) + uh_grad = fd.assemble(interpolate(fd.grad(self.uh), func_vec_space)) hessian = self.l2_projection phi = adaptor.phi diff --git a/script/build_burgers_square.py b/script/build_burgers_square.py index ec5504d..14a2828 100644 --- a/script/build_burgers_square.py +++ b/script/build_burgers_square.py @@ -1,14 +1,11 @@ # Author: Chunyang Wang # GitHub Username: chunyang-w -import csv -import os -import random -import shutil from argparse import ArgumentParser import firedrake as fd import matplotlib.pyplot as plt +from build_helper import * import UM2N @@ -67,158 +64,6 @@ def parse_arguments(): return parser.parse_args() -def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None): - """ - Set up directories for storing data, plots, and logs. - - Args: - base_dir (str): Base directory for the project. - parameters (dict): Dictionary of parameters, including "mesh_type" and "problem". - - "mesh_type" (int): Type of mesh used in the simulation (default: 0). - - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem"). - subdirs (list, optional): List of subdirectories to create. Defaults to: - ["data", "plot", "log", "mesh", "mesh_fine"]. - Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz". - dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders - matching keys in the `parameters` dictionary. Example: - "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}". - If not provided, raises a ValueError. - - Returns: - dict: A dictionary mapping subdirectory names to their full paths. - - Raises: - ValueError: If `dir_format` is not provided or is invalid. - """ - - # Define the project directory - if base_dir: - project_dir = os.path.abspath(base_dir) - else: - project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - - # QC: - print(f"Project Directory: {project_dir}") - - # Define the dataset directory - dataset_dir = os.path.join( - project_dir, "data", f"dataset_meshtype_{mesh_type}", problem - ) - - # Use the provided format string for the problem-specific directory - if dir_format is None: - problem_specific_dir = os.path.join( - dataset_dir, f"{problem}_meshtype_{mesh_type}" - ) - else: - # check if dir_format is a valid string format - if not isinstance(dir_format, str): - raise ValueError("dir_format must be a string.") - problem_specific_dir = os.path.join(dataset_dir, dir_format) - - # Define default subdirectories if not provided - if subdirs is None: - subdirs = [ - "data", - "plot", - "log", - "mesh", - "mesh_fine", - "plot_compare", - "train", - "test", - "val", - ] - - # Create and clear directories - directories = {} - for subdir in subdirs: - dir_path = os.path.join(problem_specific_dir, subdir) - if not os.path.exists(dir_path): - os.makedirs(dir_path) - else: - # Clear the directory by removing all files - for file in os.listdir(dir_path): - os.remove(os.path.join(dir_path, file)) - directories[subdir] = dir_path - - # QC: - # print(f"Subdirectories created: {directories}") - - return directories - - -def output_csv(parameters, key_list, output_dir): - """ - Write selected parameters to a CSV file. - - Args: - parameters (dict): Dictionary of parameters to write. - key_list (list): List of keys to include in the CSV. - output_dir (str): Directory where the CSV file will be saved. - """ - # Filter parameters based on key_list - csv_keys = [key for key in key_list if key in parameters] - csv_data = [parameters[key] for key in csv_keys] - - # Define the output file path - csv_file_path = os.path.join(output_dir, "info.csv") - - # Write to CSV - with open(csv_file_path, mode="w", newline="") as csvfile: - csv_writer = csv.writer(csvfile) - # Write header (keys) - csv_writer.writerow(csv_keys) - # Write data (values) - csv_writer.writerow(csv_data) - - print(f"Parameters saved to {csv_file_path}") - - -def move_data(target, source, start, num_files): - """ - Move data files from the source directory to the target directory. - - Args: - target (str): The path to the target directory. - source (str): The path to the source directory. - start (int): The starting index of the files to move. - num_files (int): The total number of files to move. - - Raises: - FileNotFoundError: If the source directory does not exist. - ValueError: If the start index or num_files is invalid. - """ - if not os.path.exists(source): - raise FileNotFoundError(f"Source directory '{source}' does not exist.") - - if start < 0 or num_files <= 0: - raise ValueError("Invalid start index or number of files to move.") - - # Create the target directory if it doesn't exist - if not os.path.exists(target): - os.makedirs(target) - else: - # Clear the target directory by removing all files - for file in os.listdir(target): - os.remove(os.path.join(target, file)) - - # Copy files sequentially starting from the specified index - for i in range(start, start + num_files): - try: - # Copy the data file - shutil.copy( - os.path.join(source, f"data_{i:04d}.npy"), - os.path.join(target, f"data_{i:04d}.npy"), - ) - except FileNotFoundError: - print(f"File data_{i:04d}.npy not found in {source}. Skipping.") - continue - except Exception as e: - print(f"An error occurred while copying data_{i:04d}.npy: {e}") - continue - - def generate_mesh(parameters, directories): """Generate the mesh based on the specified type.""" if parameters["mesh_type"] != 0: diff --git a/script/build_helmholtz_poly.py b/script/build_helmholtz_poly.py index a330680..caf9b77 100644 --- a/script/build_helmholtz_poly.py +++ b/script/build_helmholtz_poly.py @@ -1,15 +1,13 @@ # Author: Chunyang Wang # GitHub Username: chunyang-w -import csv -import os -import random -import shutil + import time from argparse import ArgumentParser import firedrake as fd import matplotlib.pyplot as plt import numpy as np +from build_helper import * from firedrake.__future__ import interpolate import UM2N @@ -58,177 +56,6 @@ def parse_arguments(): return parser.parse_args() -def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None): - """ - Set up directories for storing data, plots, and logs. - - Args: - base_dir (str): Base directory for the project. - parameters (dict): Dictionary of parameters, including "mesh_type" and "problem". - - "mesh_type" (int): Type of mesh used in the simulation (default: 0). - - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem"). - subdirs (list, optional): List of subdirectories to create. Defaults to: - ["data", "plot", "log", "mesh", "mesh_fine"]. - Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz". - dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders - matching keys in the `parameters` dictionary. Example: - "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}". - If not provided, raises a ValueError. - - Returns: - dict: A dictionary mapping subdirectory names to their full paths. - - Raises: - ValueError: If `dir_format` is not provided or is invalid. - """ - - # Define the project directory - if base_dir: - project_dir = os.path.abspath(base_dir) - else: - project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - - # QC: - print(f"Project Directory: {project_dir}") - - # Define the dataset directory - dataset_dir = os.path.join( - project_dir, "data", f"dataset_meshtype_{mesh_type}", problem - ) - - # Use the provided format string for the problem-specific directory - if dir_format is None: - problem_specific_dir = os.path.join( - dataset_dir, f"{problem}_meshtype_{mesh_type}" - ) - else: - # check if dir_format is a valid string format - if not isinstance(dir_format, str): - raise ValueError("dir_format must be a string.") - problem_specific_dir = os.path.join(dataset_dir, dir_format) - - # Define default subdirectories if not provided - if subdirs is None: - subdirs = [ - "data", - "plot", - "log", - "mesh", - "mesh_fine", - "plot_compare", - "train", - "test", - "val", - ] - - # Create and clear directories - directories = {} - for subdir in subdirs: - dir_path = os.path.join(problem_specific_dir, subdir) - if not os.path.exists(dir_path): - os.makedirs(dir_path) - else: - # Clear the directory by removing all files - for file in os.listdir(dir_path): - os.remove(os.path.join(dir_path, file)) - directories[subdir] = dir_path - - # QC: - # print(f"Subdirectories created: {directories}") - - return directories - - -def output_csv(parameters, key_list, output_dir): - """ - Write selected parameters to a CSV file. - - Args: - parameters (dict): Dictionary of parameters to write. - key_list (list): List of keys to include in the CSV. - output_dir (str): Directory where the CSV file will be saved. - """ - # Filter parameters based on key_list - csv_keys = [key for key in key_list if key in parameters] - csv_data = [parameters[key] for key in csv_keys] - - # Define the output file path - csv_file_path = os.path.join(output_dir, "info.csv") - - # Write to CSV - with open(csv_file_path, mode="w", newline="") as csvfile: - csv_writer = csv.writer(csvfile) - # Write header (keys) - csv_writer.writerow(csv_keys) - # Write data (values) - csv_writer.writerow(csv_data) - - -def split_data( - source_dir, - train_dir, - test_dir, - val_dir, - train_ratio=0.75, - test_ratio=0.15, - val_ratio=0.1, -): - """ - Split files in a source directory into train, test, and validation directories. - - Args: - source_dir (str): Path to the source directory containing files. - train_dir (str): Path to the train directory. - test_dir (str): Path to the test directory. - val_dir (str): Path to the validation directory. - train_ratio (float): Proportion of files to allocate to the train set. - test_ratio (float): Proportion of files to allocate to the test set. - val_ratio (float): Proportion of files to allocate to the validation set. - - Raises: - ValueError: If the sum of train_ratio, test_ratio, and val_ratio is not 1. - """ - # Validate ratios - if not (0 <= train_ratio <= 1 and 0 <= test_ratio <= 1 and 0 <= val_ratio <= 1): - raise ValueError("Ratios must be between 0 and 1.") - if train_ratio + test_ratio + val_ratio != 1: - raise ValueError( - "The sum of train_ratio, test_ratio, and val_ratio must equal 1." - ) - - # Get all files in the source directory - files = [ - f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f)) - ] - random.shuffle(files) # Shuffle files for unbiased distribution - - # QC: - # print(f'files {files}') - - # Calculate split indices - preference train > test > val - total_files = len(files) - num_train = int(total_files * train_ratio) - num_test = max(int(total_files * test_ratio), total_files - num_train) - num_val = total_files - num_train - num_test - - # Distribute files - train_files = files[:num_train] - test_files = files[num_train : num_train + num_test] - val_files = files[num_train + num_test :] - - for datafiles, target_dir in zip( - [train_files, test_files, val_files], [train_dir, test_dir, val_dir] - ): - for datafile in datafiles: - shutil.copy( - os.path.join(source_dir, datafile), os.path.join(target_dir, datafile) - ) - - print( - f"Data split complete: {num_train} train, {num_test} test, {num_val} validation files." - ) - - def process_features(parameters, directories): mesh_type = parameters["mesh_type"] scale_x = parameters["scale_x"] diff --git a/script/build_helmholtz_square.py b/script/build_helmholtz_square.py index 0a3452b..642e004 100644 --- a/script/build_helmholtz_square.py +++ b/script/build_helmholtz_square.py @@ -1,15 +1,12 @@ # Author: Chunyang Wang # GitHub Username: chunyang-w -import csv -import os -import random -import shutil import time from argparse import ArgumentParser import firedrake as fd import matplotlib.pyplot as plt +from build_helper import * from firedrake.__future__ import interpolate import UM2N @@ -422,71 +419,6 @@ def output_csv(parameters, key_list, output_dir): print(f"Parameters saved to {csv_file_path}") -def split_data( - source_dir, - train_dir, - test_dir, - val_dir, - train_ratio=0.75, - test_ratio=0.15, - val_ratio=0.1, -): - """ - Split files in a source directory into train, test, and validation directories. - - Args: - source_dir (str): Path to the source directory containing files. - train_dir (str): Path to the train directory. - test_dir (str): Path to the test directory. - val_dir (str): Path to the validation directory. - train_ratio (float): Proportion of files to allocate to the train set. - test_ratio (float): Proportion of files to allocate to the test set. - val_ratio (float): Proportion of files to allocate to the validation set. - - Raises: - ValueError: If the sum of train_ratio, test_ratio, and val_ratio is not 1. - """ - # Validate ratios - if not (0 <= train_ratio <= 1 and 0 <= test_ratio <= 1 and 0 <= val_ratio <= 1): - raise ValueError("Ratios must be between 0 and 1.") - if train_ratio + test_ratio + val_ratio != 1: - raise ValueError( - "The sum of train_ratio, test_ratio, and val_ratio must equal 1." - ) - - # Get all files in the source directory - files = [ - f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f)) - ] - random.shuffle(files) # Shuffle files for unbiased distribution - - # QC: - # print(f'files {files}') - - # Calculate split indices - preference train > test > val - total_files = len(files) - num_train = int(total_files * train_ratio) - num_test = max(int(total_files * test_ratio), total_files - num_train) - num_val = total_files - num_train - num_test - - # Distribute files - train_files = files[:num_train] - test_files = files[num_train : num_train + num_test] - val_files = files[num_train + num_test :] - - for datafiles, target_dir in zip( - [train_files, test_files, val_files], [train_dir, test_dir, val_dir] - ): - for datafile in datafiles: - shutil.copy( - os.path.join(source_dir, datafile), os.path.join(target_dir, datafile) - ) - - print( - f"Data split complete: {num_train} train, {num_test} test, {num_val} validation files." - ) - - if __name__ == "__main__": # parse args args = parse_arguments() diff --git a/script/build_helper.py b/script/build_helper.py new file mode 100644 index 0000000..78ac0e2 --- /dev/null +++ b/script/build_helper.py @@ -0,0 +1,221 @@ +import csv +import os +import random +import shutil + + +def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None): + """ + Set up directories for storing data, plots, and logs. + + Args: + base_dir (str): Base directory for the project. + parameters (dict): Dictionary of parameters, including "mesh_type" and "problem". + - "mesh_type" (int): Type of mesh used in the simulation (default: 0). + - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem"). + subdirs (list, optional): List of subdirectories to create. Defaults to: + ["data", "plot", "log", "mesh", "mesh_fine"]. + Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz". + dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders + matching keys in the `parameters` dictionary. Example: + "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}". + If not provided, raises a ValueError. + + Returns: + dict: A dictionary mapping subdirectory names to their full paths. + + Raises: + ValueError: If `dir_format` is not provided or is invalid. + """ + + # Define the project directory + if base_dir: + project_dir = os.path.abspath(base_dir) + else: + project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + # QC: + print(f"Project Directory: {project_dir}") + + # Define the dataset directory + dataset_dir = os.path.join( + project_dir, "data", f"dataset_meshtype_{mesh_type}", problem + ) + + # Use the provided format string for the problem-specific directory + if dir_format is None: + problem_specific_dir = os.path.join( + dataset_dir, f"{problem}_meshtype_{mesh_type}" + ) + else: + # check if dir_format is a valid string format + if not isinstance(dir_format, str): + raise ValueError("dir_format must be a string.") + problem_specific_dir = os.path.join(dataset_dir, dir_format) + + # Define default subdirectories if not provided + if subdirs is None: + subdirs = [ + "data", + "plot", + "log", + "mesh", + "mesh_fine", + "plot_compare", + "train", + "test", + "val", + ] + + # Create and clear directories + directories = {} + for subdir in subdirs: + dir_path = os.path.join(problem_specific_dir, subdir) + if not os.path.exists(dir_path): + os.makedirs(dir_path) + else: + # Clear the directory by removing all files + for file in os.listdir(dir_path): + os.remove(os.path.join(dir_path, file)) + directories[subdir] = dir_path + + # QC: + # print(f"Subdirectories created: {directories}") + + return directories + + +def output_csv(parameters, key_list, output_dir): + """ + Write selected parameters to a CSV file. + + Args: + parameters (dict): Dictionary of parameters to write. + key_list (list): List of keys to include in the CSV. + output_dir (str): Directory where the CSV file will be saved. + """ + # Filter parameters based on key_list + csv_keys = [key for key in key_list if key in parameters] + csv_data = [parameters[key] for key in csv_keys] + + # Define the output file path + csv_file_path = os.path.join(output_dir, "info.csv") + + # Write to CSV + with open(csv_file_path, mode="w", newline="") as csvfile: + csv_writer = csv.writer(csvfile) + # Write header (keys) + csv_writer.writerow(csv_keys) + # Write data (values) + csv_writer.writerow(csv_data) + + print(f"Parameters saved to {csv_file_path}") + + +def move_data(target, source, start, num_files): + """ + Move data files from the source directory to the target directory. + + Args: + target (str): The path to the target directory. + source (str): The path to the source directory. + start (int): The starting index of the files to move. + num_files (int): The total number of files to move. + + Raises: + FileNotFoundError: If the source directory does not exist. + ValueError: If the start index or num_files is invalid. + """ + if not os.path.exists(source): + raise FileNotFoundError(f"Source directory '{source}' does not exist.") + + if start < 0 or num_files <= 0: + raise ValueError("Invalid start index or number of files to move.") + + # Create the target directory if it doesn't exist + if not os.path.exists(target): + os.makedirs(target) + else: + # Clear the target directory by removing all files + for file in os.listdir(target): + os.remove(os.path.join(target, file)) + + # Copy files sequentially starting from the specified index + for i in range(start, start + num_files): + try: + # Copy the data file + shutil.copy( + os.path.join(source, f"data_{i:04d}.npy"), + os.path.join(target, f"data_{i:04d}.npy"), + ) + except FileNotFoundError: + print(f"File data_{i:04d}.npy not found in {source}. Skipping.") + continue + except Exception as e: + print(f"An error occurred while copying data_{i:04d}.npy: {e}") + continue + + +def split_data( + source_dir, + train_dir, + test_dir, + val_dir, + train_ratio=0.75, + test_ratio=0.15, + val_ratio=0.1, +): + """ + Split files in a source directory into train, test, and validation directories. + + Args: + source_dir (str): Path to the source directory containing files. + train_dir (str): Path to the train directory. + test_dir (str): Path to the test directory. + val_dir (str): Path to the validation directory. + train_ratio (float): Proportion of files to allocate to the train set. + test_ratio (float): Proportion of files to allocate to the test set. + val_ratio (float): Proportion of files to allocate to the validation set. + + Raises: + ValueError: If the sum of train_ratio, test_ratio, and val_ratio is not 1. + """ + # Validate ratios + if not (0 <= train_ratio <= 1 and 0 <= test_ratio <= 1 and 0 <= val_ratio <= 1): + raise ValueError("Ratios must be between 0 and 1.") + if train_ratio + test_ratio + val_ratio != 1: + raise ValueError( + "The sum of train_ratio, test_ratio, and val_ratio must equal 1." + ) + + # Get all files in the source directory + files = [ + f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f)) + ] + random.shuffle(files) # Shuffle files for unbiased distribution + + # QC: + # print(f'files {files}') + + # Calculate split indices - preference train > test > val + total_files = len(files) + num_train = int(total_files * train_ratio) + num_test = max(int(total_files * test_ratio), total_files - num_train) + num_val = total_files - num_train - num_test + + # Distribute files + train_files = files[:num_train] + test_files = files[num_train : num_train + num_test] + val_files = files[num_train + num_test :] + + for datafiles, target_dir in zip( + [train_files, test_files, val_files], [train_dir, test_dir, val_dir] + ): + for datafile in datafiles: + shutil.copy( + os.path.join(source_dir, datafile), os.path.join(target_dir, datafile) + ) + + print( + f"Data split complete: {num_train} train, {num_test} test, {num_val} validation files." + ) diff --git a/script/build_poisson_poly.py b/script/build_poisson_poly.py index 714a4f7..5bf33cb 100644 --- a/script/build_poisson_poly.py +++ b/script/build_poisson_poly.py @@ -1,14 +1,11 @@ # Author: Chunyang Wang # GitHub Username: chunyang-w -import csv -import os -import random -import shutil import time from argparse import ArgumentParser import firedrake as fd import matplotlib.pyplot as plt +from build_helper import * from firedrake.__future__ import interpolate import UM2N @@ -58,177 +55,6 @@ def parse_arguments(): return parser.parse_args() -def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None): - """ - Set up directories for storing data, plots, and logs. - - Args: - base_dir (str): Base directory for the project. - parameters (dict): Dictionary of parameters, including "mesh_type" and "problem". - - "mesh_type" (int): Type of mesh used in the simulation (default: 0). - - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem"). - subdirs (list, optional): List of subdirectories to create. Defaults to: - ["data", "plot", "log", "mesh", "mesh_fine"]. - Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz". - dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders - matching keys in the `parameters` dictionary. Example: - "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}". - If not provided, raises a ValueError. - - Returns: - dict: A dictionary mapping subdirectory names to their full paths. - - Raises: - ValueError: If `dir_format` is not provided or is invalid. - """ - - # Define the project directory - if base_dir: - project_dir = os.path.abspath(base_dir) - else: - project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - - # QC: - print(f"Project Directory: {project_dir}") - - # Define the dataset directory - dataset_dir = os.path.join( - project_dir, "data", f"dataset_meshtype_{mesh_type}", problem - ) - - # Use the provided format string for the problem-specific directory - if dir_format is None: - problem_specific_dir = os.path.join( - dataset_dir, f"{problem}_meshtype_{mesh_type}" - ) - else: - # check if dir_format is a valid string format - if not isinstance(dir_format, str): - raise ValueError("dir_format must be a string.") - problem_specific_dir = os.path.join(dataset_dir, dir_format) - - # Define default subdirectories if not provided - if subdirs is None: - subdirs = [ - "data", - "plot", - "log", - "mesh", - "mesh_fine", - "plot_compare", - "train", - "test", - "val", - ] - - # Create and clear directories - directories = {} - for subdir in subdirs: - dir_path = os.path.join(problem_specific_dir, subdir) - if not os.path.exists(dir_path): - os.makedirs(dir_path) - else: - # Clear the directory by removing all files - for file in os.listdir(dir_path): - os.remove(os.path.join(dir_path, file)) - directories[subdir] = dir_path - - # QC: - # print(f"Subdirectories created: {directories}") - - return directories - - -def output_csv(parameters, key_list, output_dir): - """ - Write selected parameters to a CSV file. - - Args: - parameters (dict): Dictionary of parameters to write. - key_list (list): List of keys to include in the CSV. - output_dir (str): Directory where the CSV file will be saved. - """ - # Filter parameters based on key_list - csv_keys = [key for key in key_list if key in parameters] - csv_data = [parameters[key] for key in csv_keys] - - # Define the output file path - csv_file_path = os.path.join(output_dir, "info.csv") - - # Write to CSV - with open(csv_file_path, mode="w", newline="") as csvfile: - csv_writer = csv.writer(csvfile) - # Write header (keys) - csv_writer.writerow(csv_keys) - # Write data (values) - csv_writer.writerow(csv_data) - - -def split_data( - source_dir, - train_dir, - test_dir, - val_dir, - train_ratio=0.75, - test_ratio=0.15, - val_ratio=0.1, -): - """ - Split files in a source directory into train, test, and validation directories. - - Args: - source_dir (str): Path to the source directory containing files. - train_dir (str): Path to the train directory. - test_dir (str): Path to the test directory. - val_dir (str): Path to the validation directory. - train_ratio (float): Proportion of files to allocate to the train set. - test_ratio (float): Proportion of files to allocate to the test set. - val_ratio (float): Proportion of files to allocate to the validation set. - - Raises: - ValueError: If the sum of train_ratio, test_ratio, and val_ratio is not 1. - """ - # Validate ratios - if not (0 <= train_ratio <= 1 and 0 <= test_ratio <= 1 and 0 <= val_ratio <= 1): - raise ValueError("Ratios must be between 0 and 1.") - if train_ratio + test_ratio + val_ratio != 1: - raise ValueError( - "The sum of train_ratio, test_ratio, and val_ratio must equal 1." - ) - - # Get all files in the source directory - files = [ - f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f)) - ] - random.shuffle(files) # Shuffle files for unbiased distribution - - # QC: - # print(f'files {files}') - - # Calculate split indices - preference train > test > val - total_files = len(files) - num_train = int(total_files * train_ratio) - num_test = max(int(total_files * test_ratio), total_files - num_train) - num_val = total_files - num_train - num_test - - # Distribute files - train_files = files[:num_train] - test_files = files[num_train : num_train + num_test] - val_files = files[num_train + num_test :] - - for datafiles, target_dir in zip( - [train_files, test_files, val_files], [train_dir, test_dir, val_dir] - ): - for datafile in datafiles: - shutil.copy( - os.path.join(source_dir, datafile), os.path.join(target_dir, datafile) - ) - - print( - f"Data split complete: {num_train} train, {num_test} test, {num_val} validation files." - ) - - def process_features(parameters, directories): # create mesh scale_x = parameters["scale_x"] diff --git a/script/build_poisson_square.py b/script/build_poisson_square.py index 7b4919f..0a8b549 100644 --- a/script/build_poisson_square.py +++ b/script/build_poisson_square.py @@ -1,14 +1,11 @@ # Author: Chunyang Wang # GitHub Username: chunyang-w -import csv -import os -import random -import shutil import time from argparse import ArgumentParser import firedrake as fd import matplotlib.pyplot as plt +from build_helper import * from firedrake.__future__ import interpolate import UM2N @@ -57,177 +54,6 @@ def parse_arguments(): return parser.parse_args() -def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None): - """ - Set up directories for storing data, plots, and logs. - - Args: - base_dir (str): Base directory for the project. - parameters (dict): Dictionary of parameters, including "mesh_type" and "problem". - - "mesh_type" (int): Type of mesh used in the simulation (default: 0). - - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem"). - subdirs (list, optional): List of subdirectories to create. Defaults to: - ["data", "plot", "log", "mesh", "mesh_fine"]. - Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz". - dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders - matching keys in the `parameters` dictionary. Example: - "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}". - If not provided, raises a ValueError. - - Returns: - dict: A dictionary mapping subdirectory names to their full paths. - - Raises: - ValueError: If `dir_format` is not provided or is invalid. - """ - - # Define the project directory - if base_dir: - project_dir = os.path.abspath(base_dir) - else: - project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - - # QC: - print(f"Project Directory: {project_dir}") - - # Define the dataset directory - dataset_dir = os.path.join( - project_dir, "data", f"dataset_meshtype_{mesh_type}", problem - ) - - # Use the provided format string for the problem-specific directory - if dir_format is None: - problem_specific_dir = os.path.join( - dataset_dir, f"{problem}_meshtype_{mesh_type}" - ) - else: - # check if dir_format is a valid string format - if not isinstance(dir_format, str): - raise ValueError("dir_format must be a string.") - problem_specific_dir = os.path.join(dataset_dir, dir_format) - - # Define default subdirectories if not provided - if subdirs is None: - subdirs = [ - "data", - "plot", - "log", - "mesh", - "mesh_fine", - "plot_compare", - "train", - "test", - "val", - ] - - # Create and clear directories - directories = {} - for subdir in subdirs: - dir_path = os.path.join(problem_specific_dir, subdir) - if not os.path.exists(dir_path): - os.makedirs(dir_path) - else: - # Clear the directory by removing all files - for file in os.listdir(dir_path): - os.remove(os.path.join(dir_path, file)) - directories[subdir] = dir_path - - # QC: - # print(f"Subdirectories created: {directories}") - - return directories - - -def output_csv(parameters, key_list, output_dir): - """ - Write selected parameters to a CSV file. - - Args: - parameters (dict): Dictionary of parameters to write. - key_list (list): List of keys to include in the CSV. - output_dir (str): Directory where the CSV file will be saved. - """ - # Filter parameters based on key_list - csv_keys = [key for key in key_list if key in parameters] - csv_data = [parameters[key] for key in csv_keys] - - # Define the output file path - csv_file_path = os.path.join(output_dir, "info.csv") - - # Write to CSV - with open(csv_file_path, mode="w", newline="") as csvfile: - csv_writer = csv.writer(csvfile) - # Write header (keys) - csv_writer.writerow(csv_keys) - # Write data (values) - csv_writer.writerow(csv_data) - - -def split_data( - source_dir, - train_dir, - test_dir, - val_dir, - train_ratio=0.75, - test_ratio=0.15, - val_ratio=0.1, -): - """ - Split files in a source directory into train, test, and validation directories. - - Args: - source_dir (str): Path to the source directory containing files. - train_dir (str): Path to the train directory. - test_dir (str): Path to the test directory. - val_dir (str): Path to the validation directory. - train_ratio (float): Proportion of files to allocate to the train set. - test_ratio (float): Proportion of files to allocate to the test set. - val_ratio (float): Proportion of files to allocate to the validation set. - - Raises: - ValueError: If the sum of train_ratio, test_ratio, and val_ratio is not 1. - """ - # Validate ratios - if not (0 <= train_ratio <= 1 and 0 <= test_ratio <= 1 and 0 <= val_ratio <= 1): - raise ValueError("Ratios must be between 0 and 1.") - if train_ratio + test_ratio + val_ratio != 1: - raise ValueError( - "The sum of train_ratio, test_ratio, and val_ratio must equal 1." - ) - - # Get all files in the source directory - files = [ - f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f)) - ] - random.shuffle(files) # Shuffle files for unbiased distribution - - # QC: - # print(f'files {files}') - - # Calculate split indices - preference train > test > val - total_files = len(files) - num_train = int(total_files * train_ratio) - num_test = max(int(total_files * test_ratio), total_files - num_train) - num_val = total_files - num_train - num_test - - # Distribute files - train_files = files[:num_train] - test_files = files[num_train : num_train + num_test] - val_files = files[num_train + num_test :] - - for datafiles, target_dir in zip( - [train_files, test_files, val_files], [train_dir, test_dir, val_dir] - ): - for datafile in datafiles: - shutil.copy( - os.path.join(source_dir, datafile), os.path.join(target_dir, datafile) - ) - - print( - f"Data split complete: {num_train} train, {num_test} test, {num_val} validation files." - ) - - def process_features(parameters, problem_data_dir): # create mesh scale_x = parameters["scale_x"] diff --git a/script/build_swirl.py b/script/build_swirl.py index 8b5dd25..f246c36 100644 --- a/script/build_swirl.py +++ b/script/build_swirl.py @@ -1,12 +1,10 @@ # Author: Chunyang Wang # GitHub Username: chunyang-w -import csv -import os -import shutil from argparse import ArgumentParser import firedrake as fd import matplotlib.pyplot as plt +from build_helper import * import UM2N From e4bd8e10b11acfdbdf791918fef4b1d595f9f914 Mon Sep 17 00:00:00 2001 From: acse-ej321 <89605848+acse-ej321@users.noreply.github.com> Date: Wed, 16 Jul 2025 11:24:06 +0100 Subject: [PATCH 5/7] #70 add flow control for feature parameters --- UM2N/processor/processor.py | 46 ++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/UM2N/processor/processor.py b/UM2N/processor/processor.py index 485404a..40a26cb 100644 --- a/UM2N/processor/processor.py +++ b/UM2N/processor/processor.py @@ -234,16 +234,18 @@ def get_conv_feat(self, fix_reso_x=20, fix_reso_y=20): for j in range(len(conv_y_fix)): # (x, y) conv_feat conv_xy_fix[:, i, j] = np.array([conv_x_fix[i], conv_y_fix[j]]) - conv_uh_fix[:, i, j] = self.raw_feature["uh"].at( - [conv_x_fix[i], conv_y_fix[j]], tolerance=1e-3 - ) + if "uh" in self.raw_feature: + conv_uh_fix[:, i, j] = self.raw_feature["uh"].at( + [conv_x_fix[i], conv_y_fix[j]], tolerance=1e-3 + ) if "grad_uh_norm" in self.raw_feature: conv_grad_uh_norm_fix[:, i, j] = self.raw_feature[ "grad_uh_norm" ].at([conv_x_fix[i], conv_y_fix[j]], tolerance=1e-3) - conv_hessian_norm_fix[:, i, j] = self.raw_feature["hessian_norm"].at( - [conv_x_fix[i], conv_y_fix[j]], tolerance=1e-3 - ) + if "hessian_norm" in self.raw_feature: + conv_hessian_norm_fix[:, i, j] = self.raw_feature[ + "hessian_norm" + ].at([conv_x_fix[i], conv_y_fix[j]], tolerance=1e-3) conv_monitor_val_fix[:, i, j] = self.raw_feature["monitor_val"].at( [conv_x_fix[i], conv_y_fix[j]], tolerance=1e-3 ) @@ -316,16 +318,6 @@ def to_train_data(self): np_data = { "x": self.x, "coord": self.coordinates, - "u": self.feature["uh"], - "grad_u": self.feature["grad_uh"], - "grad_u_norm": self.feature["grad_uh_norm"], - "hessian": self.feature["hessian"], - "phi": self.feature["phi"], - "grad_phi": self.feature["grad_phi"], - "hessian_norm": self.feature["hessian_norm"], - "jacobian": self.feature["jacobian"], - "jacobian_det": self.feature["jacobian_det"], - "monitor_val": self.feature["monitor_val"], "edge_index": self.edge_T, "edge_index_bi": self.edge_bi_T, "cluster_edges": None, # this will be added if we use data_transform.py to add cluster edges # noqa @@ -365,6 +357,28 @@ def to_train_data(self): "swirl_params": self.swirl_params, "t": self.t, # time step when solving burgers eq. "idx": self.idx, # index number for picking params for burgers tracer. # noqa + "u": self.feature["uh"] if "uh" in self.feature else None, + "grad_u": self.feature["grad_uh"] if "grad_uh" in self.feature else None, + "grad_u_norm": self.feature["grad_uh_norm"] + if "grad_uh_norm" in self.feature + else None, + "hessian": self.feature["hessian"] if "hessian" in self.feature else None, + "phi": self.feature["phi"] if "phi" in self.feature else None, + "grad_phi": self.feature["grad_phi"] + if "grad_phi" in self.feature + else None, + "hessian_norm": self.feature["hessian_norm"] + if "hessian_norm" in self.feature + else None, + "jacobian": self.feature["jacobian"] + if "jacobian" in self.feature + else None, + "jacobian_det": self.feature["jacobian_det"] + if "jacobian_det" in self.feature + else None, + "monitor_val": self.feature["monitor_val"] + if "monitor_val" in self.feature + else None, "f": self.feature["f"] if "f" in self.feature else None, } if "uh_adapt" in self.feature: # currently only in swirl case From 6da913b56e82f59a617f621ad16c2468a3a48a5a Mon Sep 17 00:00:00 2001 From: acse-ej321 <89605848+acse-ej321@users.noreply.github.com> Date: Wed, 16 Jul 2025 13:20:35 +0100 Subject: [PATCH 6/7] #70 add build from monitor only --- script/build_monitor_dataset.py | 324 ++++++++++++++++++++++++++++++++ 1 file changed, 324 insertions(+) create mode 100644 script/build_monitor_dataset.py diff --git a/script/build_monitor_dataset.py b/script/build_monitor_dataset.py new file mode 100644 index 0000000..3bb2f64 --- /dev/null +++ b/script/build_monitor_dataset.py @@ -0,0 +1,324 @@ +import time +from argparse import ArgumentParser + +import firedrake as fd +import matplotlib.pyplot as plt +import movement as mv +from build_helper import * +from matplotlib.colors import LogNorm + +import UM2N + + +def parse_arguments(): + """Parse command-line arguments.""" + parser = ArgumentParser() + parser.add_argument( + "--mesh_type", type=int, default=2, help="Algorithm used to generate mesh" + ) + parser.add_argument( + "--max_dist", type=int, default=6, help="Max number of distributions" + ) + parser.add_argument( + "--n_dist", type=int, default=None, help="Number of distributions" + ) + parser.add_argument( + "--lc", type=float, default=5e-2, help="Length characteristic of mesh elements" + ) + parser.add_argument( + "--field_type", type=str, default="aniso", help="Data type (aniso/iso)" + ) + parser.add_argument( + "--boundary_scheme", + type=str, + default="full", + help="Use padded scheme or full-scale scheme to sample central point of the bump (pad/full)", + ) + parser.add_argument( + "--n_samples", type=int, default=100, help="Number of samples generated" + ) + parser.add_argument("--rand_seed", type=int, default=63, help="Random seed") + + parsed_args = parser.parse_args() + + # Handle dependency between max_dist and n_dist + # max number of distributions used to generate the dataset + # only if n_dist is not set if n_dist is set, max_dist will be disabled + if parsed_args.n_dist is not None: + parsed_args.max_dist = None # Disable max_dist if n_dist is set + print("Warning: max_dist is ignored because n_dist is set.") + # QC: + # print(parsed_args) + + return parsed_args + + +def create_mesh(i, mesh_type, lc, scale_x, problem_mesh_dir): + """ + Generate a mesh for the given sample index. + + Args: + i: The sample index. + mesh_type: The type of mesh to generate. + lc: The length characteristic of the mesh. + scale_x: The scale of the mesh. + problem_mesh_dir: Directory to save the generated mesh. + + Returns: + The generated mesh. + """ + if mesh_type != 0: + unstructured_square_mesh_gen = UM2N.UnstructuredSquareMeshGenerator( + scale=scale_x, mesh_type=mesh_type + ) # noqa + return unstructured_square_mesh_gen.generate_mesh( + res=lc, + output_filename=os.path.join(problem_mesh_dir, f"mesh_{i:04d}.msh"), + ) + else: + n_grid = int(1 / lc) + return fd.UnitSquareMesh(n_grid, n_grid) + + +def generate_monitor(): + """ + Generate a monitor function and its parameters. + + Returns: + monitor_func: A Function(mesh) which returns a Firedrake Form of the monitor function eq + monitor_params: A dictionary containing the parameters used to generate the monitor. + """ + # Generate random monitor parameters + # Note: These parameters are specific to the RingMonitor function + monitor_params = { + "centre": ( + round(random.uniform(0.2, 0.8), 3), # Random x-coordinate of the center + round(random.uniform(0.2, 0.8), 3), # Random y-coordinate of the center + ), + "radius": round(random.uniform(0.1, 0.5), 3), # Random radius + "amplitude": int(random.uniform(10, 100)), # Random amplitude + "width": int(random.uniform(10, 200)), # Random width + } + + # Initialize the monitor function + # The monitor function is created using the RingMonitorBuilder from the movement library. + # To modify this function for a different monitor, replace. + mb = mv.RingMonitorBuilder( + centre=monitor_params["centre"], + radius=monitor_params["radius"], + amplitude=monitor_params["amplitude"], + width=monitor_params["width"], + ) + # Get the monitor function as a Firedrake Form + monitor_func = mb.get_monitor() + + return monitor_func, monitor_params + + +def process_features(parameters, directories): + # ==== Create the mesh ====================== + mesh = create_mesh( + i, + mesh_type=parameters["mesh_type"], + lc=parameters["lc"], + scale_x=parameters["scale_x"], + problem_mesh_dir=directories["mesh"], + ) + + # ==== Generate monitor ====================== + monitor_func, monitor_params = generate_monitor() + + # output specific parameters to csv file + output_csv(monitor_params, list(monitor_params.keys()), directories["log"]) + + # get projection of the monitor function for feature output + monitor_val = monitor_func(mesh) + + # ==== Move the mesh ====================== + + # create Monge Ampere obj + mover = mv.MongeAmpereMover( + mesh, monitor_func, method="relaxation", rtol=1e-3, maxiter=500 + ) + + start = time.perf_counter() + + # move the mesh + mover.move() + + # assign new_mesh + new_mesh = mover.mesh + + # ==== Extract features from moved mesh ====================== + + # this is the jacobian of x with respect to xi + jacobian = fd.project( + fd.Identity(2) + mover.H, fd.TensorFunctionSpace(new_mesh, "CG", 1) + ) + jacobian_det = fd.Function(fd.FunctionSpace(new_mesh, "CG", 1), name="jacobian_det") + jacobian_det.project( + jacobian[0, 0] * jacobian[1, 1] - jacobian[0, 1] * jacobian[1, 0] + ) + + # get phi/grad_phi projected to the original mesh + phi = mover.phi + grad_phi = mover.grad_phi + + end = time.perf_counter() + dur = (end - start) * 1000 + + # ==== Process data for training ====================== + mesh_processor = UM2N.MeshProcessor( + original_mesh=mesh, + optimal_mesh=new_mesh, + function_space=fd.FunctionSpace(new_mesh, "CG", 1), + use_4_edge=True, + feature={ + "jacobian": jacobian.dat.data_ro.reshape(-1, 4), + "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1), + "phi": phi.dat.data_ro.reshape(-1, 1), + "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2), + "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), + }, + raw_feature={ + "monitor_val": monitor_val, + "jacobian": jacobian, + "jacobian_det": jacobian_det, + }, + # dist_params=None, # When nothing passed the default is used + ) + + # save out data + mesh_processor.save_taining_data(os.path.join(directories["data"], f"data_{i:04d}")) + + # ==== Plot mesh, solution, error ====================== + rows, cols = 2, 2 + cmap = "plasma" + + fig, ax = plt.subplots( + rows, cols, figsize=(cols * 5, rows * 5), layout="compressed" + ) + + # Orginal low resolution uniform mesh + fd.triplot(mesh, axes=ax[0, 0]) + ax[0, 0].set_title("Original uniform Mesh") + # Adapted mesh + fd.triplot(new_mesh, axes=ax[0, 1]) + ax[0, 1].set_title(f"Adapted Mesh (MA): time taken {dur:.2f} ms") + + # Monitor on high resolution mesh + fd.triplot(mesh, axes=ax[1, 0]) + cb = fd.tripcolor(monitor_val, cmap=cmap, axes=ax[1, 0], alpha=0.5, norm=LogNorm()) + ax[1, 0].set_title( + f'Monitor (c: {monitor_params["centre"]} r: {monitor_params["radius"]} a: {monitor_params["amplitude"]} w: {monitor_params["width"]} )' + ) + plt.colorbar(cb) + + # Monitor on high resolution mesh + fd.triplot(new_mesh, axes=ax[1, 1]) + cb = fd.tripcolor(monitor_val, cmap=cmap, axes=ax[1, 1], alpha=0.5, norm=LogNorm()) + ax[1, 1].set_title("Monitor overlayed with Adapted Mesh") + plt.colorbar(cb) + + for rr in range(rows): + for cc in range(cols): + ax[rr, cc].set_aspect("equal", "box") + + fig.savefig(os.path.join(directories["plot_compare"], f"plot_{i:04d}.png")) + plt.close() + + +if __name__ == "__main__": + # parse args + args = parse_arguments() + + # ==== Parameters ====================== + parameters = { + # parameters for problem + "problem": "ring_monitor_test", + "lc": args.lc, + # parameters for mesh def + "n_samples": args.n_samples, + "data_type": args.field_type, + "scheme": args.boundary_scheme, + "mesh_type": int(args.mesh_type), + # parameters for domain scale + "scale_x": 1, + "scale_y": 1, + # parameters for data split + "p_train": 0.75, + "p_test": 0.15, + "p_val": 0.1, + } + + # Set random seed + random.seed(args.rand_seed) + + # ==== Setup Directories ====================== + problem_specific_dir = "lc={}_n={}_{}_{}_meshtype_{}".format( + parameters["lc"], + parameters["n_samples"], + parameters["data_type"], + parameters["scheme"], + parameters["mesh_type"], + ) + + subdirs = [ + "data", + "plot", + "plot_compare", + "log", + "mesh", + "mesh_fine", + "train", + "test", + "val", + ] + + directories = setup_directories( + problem=parameters["problem"], + mesh_type=parameters["mesh_type"], + base_dir=None, + subdirs=subdirs, + dir_format=problem_specific_dir, + ) + + # ==== Output CSV ====================== + key_list = [ + "cmin", + "cmax", + "sigma_mean_scaler", + "sigma_sigma_scaler", + "sigma_eps" "data_type", + "scheme", + "n_samples", + "lc", + "mesh_type", + ] + output_csv(parameters, key_list, directories["log"]) + + # ==== Data Generation Scripts ====================== + i = 0 + while i < parameters["n_samples"]: + try: + print(f"Generating Sample: {i}") + + # create dataset + process_features(parameters, directories) + i += 1 + + except fd.exceptions.ConvergenceError: + print(f"Iteration {i} did not converge.") + continue + + # ==== Data Splits ============================================ + # TODO: this should probably be done in the training script, not the build script + split_data( + source_dir=directories["data"], + train_dir=directories["train"], + test_dir=directories["test"], + val_dir=directories["val"], + train_ratio=parameters["p_train"], + test_ratio=parameters["p_test"], + val_ratio=parameters["p_val"], + ) From 04808da04d940b9c75ef4463702aafb7c81839f1 Mon Sep 17 00:00:00 2001 From: acse-ej321 <89605848+acse-ej321@users.noreply.github.com> Date: Wed, 16 Jul 2025 13:24:33 +0100 Subject: [PATCH 7/7] #70 remove extra files --- script/ej321_helm_dataset_run.sh | 57 -------------------------------- script/make_build_all_test.sh | 15 --------- 2 files changed, 72 deletions(-) delete mode 100644 script/ej321_helm_dataset_run.sh delete mode 100644 script/make_build_all_test.sh diff --git a/script/ej321_helm_dataset_run.sh b/script/ej321_helm_dataset_run.sh deleted file mode 100644 index f3803d1..0000000 --- a/script/ej321_helm_dataset_run.sh +++ /dev/null @@ -1,57 +0,0 @@ - - -###################### Training dataset generation ###################### -# Training data -# Problem type: Helmholtz -# Meshtype: 2 -# n_samples: 300 -# Random_seed: 63 -# Resolution: 0.05, 0.055 - -# use 2 / 6 / 0 -mesh_type=(6) -# training set build -rand_seed=63 -lcs=(0.05 0.055) -n_samples_train=(300) - -# helmholtz square case -for mt in "${mesh_type[@]}"; do - for i in "${lcs[@]}"; do - for n_s in "${n_samples_train[@]}"; do - echo "lc = $i meshtype = $mt num samples = $n_s" - python build_helmholtz_square.py --lc=$i --rand_seed=$rand_seed --n_samples=$n_s --field_type="aniso" --boundary_scheme="full" --mesh_type=$mt - # python ./script/build_helmholtz_square.py --lc=$i --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="pad" --mesh_type=$mesh_type - # python ./script/build_helmholtz_square.py --lc=$i --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="full" --mesh_type=$mesh_type - # python ./script/build_helmholtz_square.py --lc=$i --rand_seed $rand_seed --n_samples $n_samples_train --field_type "aniso" --boundary_scheme "pad" --mesh_type=$mesh_type - done - done -done -###################### Training dataset generation ###################### - - -# ###################### Testing dataset generation ###################### -# # Test data -# # Problem type: Helmholtz -# # Meshtype: 0, 2, 6 -# # n_samples: 100 -# # Random_seed: 42 -# # Resolution: 0.05, 0.055, 0.028 - -# n_samples_train=101 -# rand_seed=42 -# mesh_types=(6 2 0) -# # length character for polygon mesh -# lcs=(0.05 0.028) - -# # helmholtz square case -# for m in "${mesh_types[@]}"; do -# for i in "${lcs[@]}"; do -# echo "lc = $i Meshtype = $m" -# python ./script/build_helmholtz_square.py --lc=$i --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m -# # python ./script/build_helmholtz_square.py --lc=$i --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="pad" --mesh_type=$mesh_type -# # python ./script/build_helmholtz_square.py --lc=$i --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="full" --mesh_type=$mesh_type -# # python ./script/build_helmholtz_square.py --lc=$i --rand_seed $rand_seed --n_samples $n_samples_train --field_type "aniso" --boundary_scheme "pad" --mesh_type=$mesh_type -# done -# done -# ###################### Testing dataset generation ###################### \ No newline at end of file diff --git a/script/make_build_all_test.sh b/script/make_build_all_test.sh deleted file mode 100644 index a531450..0000000 --- a/script/make_build_all_test.sh +++ /dev/null @@ -1,15 +0,0 @@ -mesh_types=(6 2 0) -rand_seed=42 -n_samples_train=3 - -for m in "${mesh_types[@]}"; do - python build_burgers_square.py --rand_seed=$rand_seed --n_case=$n_samples_train --mesh_type=$m - - # python build_helmholtz_square.py --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m - # python build_helmholtz_poly.py --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m - # python build_poisson_square.py --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m - # python build_poisson_poly.py --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m - # python build_burgers_square.py --rand_seed=$rand_seed --n_case=$n_samples_train --mesh_type=$m - # python build_swirl.py --lc=0.028 --alpha=1.5 --r_0 0.20 --x_0 0.3 --y_0 0.3 --n_monitor_smooth=10 --mesh_type=$m - -done \ No newline at end of file