From 274699d6b6c0ed3e77373b38a577431d2fa27efe Mon Sep 17 00:00:00 2001
From: acse-ej321 <89605848+acse-ej321@users.noreply.github.com>
Date: Fri, 4 Jul 2025 08:24:45 +0100
Subject: [PATCH 1/7] Merge fork into branch (#71)

Merging fork into branch
---
 UM2N/generator/burgers_solver.py    |  43 +-
 UM2N/generator/mesh_generator.py    |   3 +-
 UM2N/generator/swirl_solver.py      |  29 +-
 UM2N/generator/swirl_solver_step.py |   3 +-
 script/build_burgers_square.py      | 670 +++++++++++--------
 script/build_helmholtz_poly.py      | 830 ++++++++++++-----------
 script/build_helmholtz_square.py    | 990 ++++++++++++++--------------
 script/build_poisson_poly.py        | 827 ++++++++++++-----------
 script/build_poisson_square.py      | 821 ++++++++++++-----------
 script/build_swirl.py               | 518 +++++++++------
 script/make_dataset_helm_train.sh   |   2 +-
 11 files changed, 2567 insertions(+), 2169 deletions(-)

diff --git a/UM2N/generator/burgers_solver.py b/UM2N/generator/burgers_solver.py
index ca0b77e..fb901a6 100644
--- a/UM2N/generator/burgers_solver.py
+++ b/UM2N/generator/burgers_solver.py
@@ -9,6 +9,8 @@
 import movement as mv
 import numpy as np  # noqa
 
+from firedrake.__future__ import interpolate # ej321 add
+
 __all__ = ["BurgersSolver"]
 
 
@@ -203,6 +205,9 @@ def solve_problem(self, callback=None):
                 monitor_function=self.monitor_function,
                 rtol=1e-3,
             )
+            # ej321 - added monitor_function for feature extraction
+            raw_monitor_val = self.monitor_function(self.mesh) # ej321 - is this the correct mesh to use?
+
             adapter.move()
             end = time.perf_counter()
             dur_ms = (end - start) * 1000
@@ -216,6 +221,10 @@ def solve_problem(self, callback=None):
             function_space = fd.FunctionSpace(self.mesh, "CG", 1)
             uh_0 = fd.Function(function_space)
             uh_0.project(self.u[0])
+            
+            # ej321 - added monitor_function for feature extraction
+            monitor_val = fd.Function(function_space)
+            monitor_val.assign(raw_monitor_val)
 
             # calculate solution on adapted mesh
             self.mesh.coordinates.dat.data[:] = self.adapt_coord
@@ -240,27 +249,42 @@ def solve_problem(self, callback=None):
 
             func_vec_space = fd.VectorFunctionSpace(self.mesh, "CG", 1)
             uh_grad = fd.interpolate(fd.grad(uh_0), func_vec_space)
+
+            # ej321 - grad_norm copied from build_helmholtz_square.py
+            grad_uh_interpolate = fd.assemble(interpolate(fd.grad(self.u[0]),func_vec_space))
+            grad_norm = fd.Function(function_space)
+            grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2)
+            grad_norm /= grad_norm.vector().max()
+
             hessian_norm = self.f_norm
             hessian = self.l2_projection
             phi = adapter.phi
             phi_grad = adapter.grad_phi
-            sigma = adapter.sigma
+            # sigma = adapter.sigma
+            sigma = adapter.H # ej321 - this may be the updated hessian?
             I = fd.Identity(2)  # noqa
             jacobian = I + sigma
-            jacobian_det = fd.Function(function_space, name="jacobian_det")
-            jacobian_det.project(
+            # jacobian_det = fd.Function(function_space, name="jacobian_det")
+            # jacobian_det = fd.Function(adapter.P1, name="jacobian_det")
+            self.jacob_det = fd.Function(adapter.P1, name="jacobian_det").project(
                 jacobian[0, 0] * jacobian[1, 1] - jacobian[0, 1] * jacobian[1, 0]
             )
-            self.jacob_det = fd.project(
-                jacobian_det, fd.FunctionSpace(self.mesh, "CG", 1)
-            )
-            self.jacob = fd.project(
-                jacobian, fd.TensorFunctionSpace(self.mesh, "CG", 1)
-            )
+            # self.jacob_det = jacobian_det
+            # self.jacob_det = fd.project(
+            #     jacobian_det, fd.FunctionSpace(self.mesh, "CG", 1)
+            # ) #  ej321 - not needed?
+            # self.jacob = jacobian # ej321 - this is copied from mesh_generator.py
+            self.jacob = fd.Function(adapter.P1_ten, name="jacobian").project(jacobian)
+            # self.jacob.project(jacobian)
+            # self.jacob = fd.project(
+            #     jacobian, fd.TensorFunctionSpace(self.mesh, "CG", 1)
+            # ) #  ej321 - not needed?
+            
 
             callback(
                 uh=uh_0,
                 uh_grad=uh_grad,
+                grad_norm = grad_norm,  # ej321 - added grad_norm
                 hessian_norm=hessian_norm,
                 hessian=hessian,
                 phi=phi,
@@ -280,6 +304,7 @@ def solve_problem(self, callback=None):
                 dur=dur_ms,
                 t=t,
                 idx=self.idx,
+                monitor_val=monitor_val,  # ej321 - added monitor_val
             )
 
             # step forward in time
diff --git a/UM2N/generator/mesh_generator.py b/UM2N/generator/mesh_generator.py
index 3c50d4e..8853c50 100644
--- a/UM2N/generator/mesh_generator.py
+++ b/UM2N/generator/mesh_generator.py
@@ -54,7 +54,8 @@ def move_mesh(self):
         )
         mover.move()
         # extract Hessian of the movement
-        sigma = mover.sigma
+        # sigma = mover.sigma
+        sigma = mover.H # ej321 - this may be the updated hessian?
         I = fd.Identity(2)  # noqa
         jacobian = I + sigma
         jacobian_det = fd.Function(mover.P1, name="jacobian_det")
diff --git a/UM2N/generator/swirl_solver.py b/UM2N/generator/swirl_solver.py
index 06b4bb3..750d7e7 100644
--- a/UM2N/generator/swirl_solver.py
+++ b/UM2N/generator/swirl_solver.py
@@ -660,19 +660,24 @@ def solve_problem(self, callback=None, fail_callback=None):
                 hessian = self.l2_projection
                 phi = adapter.phi
                 phi_grad = adapter.grad_phi
-                sigma = adapter.sigma
+                # sigma = adapter.sigma
+                sigma = adapter.H # ej321 - this may be the updated hessian?
                 I = fd.Identity(2)  # noqa
                 jacobian = I + sigma
-                jacobian_det = fd.Function(function_space, name="jacobian_det")
-                jacobian_det.project(
-                    jacobian[0, 0] * jacobian[1, 1] - jacobian[0, 1] * jacobian[1, 0]
-                )
-                self.jacob_det = fd.project(
-                    jacobian_det, fd.FunctionSpace(self.mesh, "CG", 1)
-                )
-                self.jacob = fd.project(
-                    jacobian, fd.TensorFunctionSpace(self.mesh, "CG", 1)
-                )
+                # jacobian_det = fd.Function(function_space, name="jacobian_det")
+                # jacobian_det.project(
+                #     jacobian[0, 0] * jacobian[1, 1] - jacobian[0, 1] * jacobian[1, 0]
+                # )
+                # self.jacob_det = fd.project(
+                #     jacobian_det, fd.FunctionSpace(self.mesh, "CG", 1)
+                # )
+                self.jacob_det = fd.Function(adapter.P1, name="jacobian_det").project(
+                jacobian[0, 0] * jacobian[1, 1] - jacobian[0, 1] * jacobian[1, 0]
+            )
+                # self.jacob = fd.project(
+                #     jacobian, fd.TensorFunctionSpace(self.mesh, "CG", 1)
+                # )
+                self.jacob = fd.Function(adapter.P1_ten, name="jacobian").project(jacobian)
 
                 if ((step + 1) % self.save_interval == 0) or (step == 0):
                     callback(
@@ -698,6 +703,8 @@ def solve_problem(self, callback=None, fail_callback=None):
                         sigma=self.sigma,
                         alpha=self.alpha,
                         r_0=self.r_0,
+                        x_0=self.x_0,
+                        y_0=self.y_0,
                         t=self.t,
                     )
 
diff --git a/UM2N/generator/swirl_solver_step.py b/UM2N/generator/swirl_solver_step.py
index 96aba99..a253ca0 100644
--- a/UM2N/generator/swirl_solver_step.py
+++ b/UM2N/generator/swirl_solver_step.py
@@ -755,7 +755,8 @@ def solve_problem(self, callback=None, fail_callback=None):
                     hessian = self.l2_projection
                     phi = adapter.phi
                     phi_grad = adapter.grad_phi
-                    sigma = adapter.sigma
+                    # sigma = adapter.sigma
+                    sigma = adapter.H # ej321 - this may be the updated hessian?
                     I = fd.Identity(2)  # noqa
                     jacobian = I + sigma
                     jacobian_det = fd.Function(function_space, name="jacobian_det")
diff --git a/script/build_burgers_square.py b/script/build_burgers_square.py
index a8ed4ff..c82043a 100644
--- a/script/build_burgers_square.py
+++ b/script/build_burgers_square.py
@@ -1,6 +1,7 @@
 # Author: Chunyang Wang
 # GitHub Username: chunyang-w
 
+import csv
 import os
 import random
 import shutil
@@ -8,276 +9,300 @@
 
 import firedrake as fd
 import matplotlib.pyplot as plt
-import pandas as pd
+# import pandas as pd
 
+# dd the parent directory to the Python path
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import UM2N
 
 
-def arg_parse():
-    parser = ArgumentParser()
-    parser.add_argument(
-        "--mesh_type", type=int, default=2, help="algorithm used to generate mesh"
-    )
-    parser.add_argument(
-        "--max_dist",
-        type=int,
-        default=6,
-        help="max number of distributions used to\
-                            generate the dataset (only works if\
-                                n_dist is not set)",
-    )
-    parser.add_argument(
-        "--n_dist",
-        type=int,
-        default=None,
-        help="number of distributions used to\
-                            generate the dataset (this will disable\
-                                max_dist)",
-    )
-    parser.add_argument(
-        "--lc",
-        type=float,
-        default=6e-2,
-        help="the length characteristic of the elements in the\
-                            mesh",
-    )
-    parser.add_argument(
-        "--field_type",
-        type=str,
-        default="iso",
-        help="anisotropic or isotropic data type(aniso/iso)",
-    )
+def parse_arguments():
+    """Parse command-line arguments."""
+    parser = ArgumentParser(description="Build Burgers dataset with square meshes.")
+    parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.")
+    parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions.")
+    parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions.")
+    parser.add_argument("--lc", type=float, default=6e-2, help="Length characteristic of mesh elements.")
+    parser.add_argument("--field_type", type=str, default="iso", help="Data type (aniso/iso).")
     # use padded scheme or full-scale scheme to sample central point of the bump  # noqa
-    parser.add_argument(
-        "--boundary_scheme",
-        type=str,
-        default="pad",
-        help="scheme used to generate the dataset (pad/full))",
-    )
-    parser.add_argument(
-        "--n_case", type=int, default=5, help="number of simulation cases"
-    )
-    parser.add_argument(
-        "--n_grid",
-        type=int,
-        default=20,
-        help="number of grids in a uniform mesh\
-                            only applied when mesh_type is 0",
-    )
-    parser.add_argument(
-        "--rand_seed", type=int, default=63, help="number of samples generated"
-    )
-    args_ = parser.parse_args()
-    print(args_)
-    return args_
-
-
-args = arg_parse()
-
-mesh_type = args.mesh_type
-
-data_type = args.field_type
-use_iso = True if data_type == "iso" else False
-
-rand_seed = args.rand_seed
-random.seed(rand_seed)
-
-# ====  Parameters ======================
-problem = "burgers"
-
-n_case = args.n_case
-
-# parameters for domain scale
-scale_x = 1
-scale_y = 1
-
-# parameters for random source
-max_dist = args.max_dist
-n_dist = args.n_dist
-lc = args.lc
-n_grid = args.n_grid
-
-# parameters for anisotropic data - distribution height scaler
-z_min = 0
-z_max = 1
-
-# parameters for isotropic data
-w_min = 0.05
-w_max = 0.2
-
-scheme = args.boundary_scheme
-c_min = 0.2 if scheme == "pad" else 0
-c_max = 0.8 if scheme == "pad" else 1
-
-# parameters for data split
-p_train = 0.75
-p_test = 0.15
-p_val = 0.1
-
-# =======================================
-
-
-df = pd.DataFrame(
-    {
-        "cmin": [c_min],
-        "cmax": [c_max],
-        "data_type": [data_type],
-        "scheme": [scheme],
-        "lc": [lc],
-        "mesh_type": [mesh_type],
-    }
-)
+    parser.add_argument("--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full).")
+    parser.add_argument("--n_case", type=int, default=5, help="Number of simulation cases.")
+    parser.add_argument("--n_grid", type=int, default=20, help="Number of grids for uniform mesh if mesh_type 0.")
+    parser.add_argument("--rand_seed", type=int, default=63, help="number of samples generated / Random seed for reproducibility.")
+    
+    parsed_args = parser.parse_args()
+
+    # Handle dependency between max_dist and n_dist
+    # max number of distributions used to generate the dataset
+    # only if n_dist is not set if n_dist is set, max_dist will be disabled
+    if parsed_args.n_dist is not None:
+        parsed_args.max_dist = None  # Disable max_dist if n_dist is set
+        print("Warning: max_dist is ignored because n_dist is set.")
+    # QC:
+    print(parsed_args)
+    
+    return parser.parse_args()
+
+def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None):
+    """
+    Set up directories for storing data, plots, and logs.
+
+    Args:
+        base_dir (str): Base directory for the project.
+        parameters (dict): Dictionary of parameters, including "mesh_type" and "problem".
+            - "mesh_type" (int): Type of mesh used in the simulation (default: 0).
+            - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem").
+        subdirs (list, optional): List of subdirectories to create. Defaults to:
+            ["data", "plot", "log", "mesh", "mesh_fine"].
+            Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz".
+        dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders
+            matching keys in the `parameters` dictionary. Example:
+            "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}".
+            If not provided, raises a ValueError.
+
+    Returns:
+        dict: A dictionary mapping subdirectory names to their full paths.
+
+    Raises:
+        ValueError: If `dir_format` is not provided or is invalid.
+    """
+
+    # Define the project directory
+    if base_dir:
+        project_dir = os.path.abspath(base_dir)
+    else:
+        project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    
+    # QC:
+    print(f"Project Directory: {project_dir}")
 
+    # Define the dataset directory
+    dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem)
 
-def move_data(target, source, start, num_file):
+    # Use the provided format string for the problem-specific directory
+    if dir_format is None:
+        problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}")
+    else:
+        # check if dir_format is a valid string format
+        if not isinstance(dir_format, str):
+            raise ValueError("dir_format must be a string.")
+        problem_specific_dir = os.path.join(dataset_dir, dir_format)
+
+    # Define default subdirectories if not provided
+    if subdirs is None:
+        subdirs = ["data", "plot", "log", "mesh", "mesh_fine",
+                   "plot_compare", "train", "test", "val"]
+
+    # Create and clear directories
+    directories = {}
+    for subdir in subdirs:
+        dir_path = os.path.join(problem_specific_dir, subdir)
+        if not os.path.exists(dir_path):
+            os.makedirs(dir_path)
+        else:
+            # Clear the directory by removing all files
+            for file in os.listdir(dir_path):
+                os.remove(os.path.join(dir_path, file))
+        directories[subdir] = dir_path
+
+    # QC:
+    print(f"Subdirectories created: {directories}")
+
+    return directories
+
+def output_csv(parameters, key_list, output_dir):
+    """
+    Write selected parameters to a CSV file.
+
+    Args:
+        parameters (dict): Dictionary of parameters to write.
+        key_list (list): List of keys to include in the CSV.
+        output_dir (str): Directory where the CSV file will be saved.
+    """
+    # Filter parameters based on key_list
+    csv_keys = [key for key in key_list if key in parameters]
+    csv_data = [parameters[key] for key in csv_keys]
+
+    # Define the output file path
+    csv_file_path = os.path.join(output_dir, "info.csv")
+
+    # Write to CSV
+    with open(csv_file_path, mode="w", newline="") as csvfile:
+        csv_writer = csv.writer(csvfile)
+        # Write header (keys)
+        csv_writer.writerow(csv_keys)
+        # Write data (values)
+        csv_writer.writerow(csv_data)
+
+    print(f"Parameters saved to {csv_file_path}")
+
+def move_data(target, source, start, num_files):
+    """
+    Move data files from the source directory to the target directory.
+
+    Args:
+        target (str): The path to the target directory.
+        source (str): The path to the source directory.
+        start (int): The starting index of the files to move.
+        num_files (int): The total number of files to move.
+
+    Raises:
+        FileNotFoundError: If the source directory does not exist.
+        ValueError: If the start index or num_files is invalid.
+    """
+    if not os.path.exists(source):
+        raise FileNotFoundError(f"Source directory '{source}' does not exist.")
+
+    if start < 0 or num_files <= 0:
+        raise ValueError("Invalid start index or number of files to move.")
+
+    # Create the target directory if it doesn't exist
     if not os.path.exists(target):
         os.makedirs(target)
     else:
-        # delete all files under the directory
-        filelist = [f for f in os.listdir(target)]
-        for f in filelist:
-            os.remove(os.path.join(target, f))
-    # copy data from data dir to train dir
-    for i in range(start, num_file):
-        shutil.copy(
-            os.path.join(source, "data_{}.npy".format(i)),
-            os.path.join(target, "data_{}.npy".format(i)),
-        )
-
-
-project_dir = os.path.dirname(os.path.dirname((os.path.abspath(__file__))))
-dataset_dir = os.path.join(
-    project_dir, "data", f"dataset_meshtype_{mesh_type}", problem
-)
-problem_specific_dir = os.path.join(
-    dataset_dir,
-    "lc={}_ngrid_{}_n={}_{}_{}_meshtype_{}".format(
-        lc, n_grid, n_case, data_type, scheme, mesh_type
-    ),
-)
-
-
-problem_data_dir = os.path.join(problem_specific_dir, "data")
-problem_plot_dir = os.path.join(problem_specific_dir, "plot")
-problem_log_dir = os.path.join(problem_specific_dir, "log")
-
-problem_mesh_dir = os.path.join(problem_specific_dir, "mesh")
-problem_mesh_fine_dir = os.path.join(problem_specific_dir, "mesh_fine")
-
-if not os.path.exists(problem_mesh_dir):
-    os.makedirs(problem_mesh_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_mesh_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_mesh_dir, f))
-
-if not os.path.exists(problem_mesh_fine_dir):
-    os.makedirs(problem_mesh_fine_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_mesh_fine_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_mesh_fine_dir, f))
-
-if not os.path.exists(problem_data_dir):
-    os.makedirs(problem_data_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_data_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_data_dir, f))
-
-if not os.path.exists(problem_plot_dir):
-    os.makedirs(problem_plot_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_plot_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_plot_dir, f))
-
-if not os.path.exists(problem_log_dir):
-    os.makedirs(problem_log_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_log_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_log_dir, f))
-
-df.to_csv(os.path.join(problem_specific_dir, "info.csv"))
+        # Clear the target directory by removing all files
+        for file in os.listdir(target):
+            os.remove(os.path.join(target, file))
+
+    # Copy files sequentially starting from the specified index
+    for i in range(start, start + num_files):
+        try:
+            # Copy the data file
+            shutil.copy(
+                os.path.join(source, f"data_{i:04d}.npy"),
+                os.path.join(target, f"data_{i:04d}.npy"),
+            )
+        except FileNotFoundError:
+            print(f"File data_{i:04d}.npy not found in {source}. Skipping.")
+            continue
+        except Exception as e:
+            print(f"An error occurred while copying data_{i:04d}.npy: {e}")
+            continue
+            
+def generate_mesh(parameters, dirs):
+    """Generate the mesh based on the specified type."""
+    if parameters["mesh_type"] != 0:
+        mesh_gen = UM2N.UnstructuredSquareMeshGenerator(scale=parameters["scale_x"],
+        mesh_type=parameters["mesh_type"])
+        mesh = mesh_gen.generate_mesh(res=parameters["lc"],
+        output_filename=os.path.join(dirs["mesh"], "mesh.msh"))
+        mesh_new = fd.Mesh(os.path.join(dirs["mesh"], "mesh.msh"))
+        mesh_fine = mesh_gen.generate_mesh(res=1e-2,
+        output_filename=os.path.join(dirs["mesh_fine"], "mesh.msh"))
+    else:
+        n_grid = parameters["n_grid"]
+        mesh = fd.UnitSquareMesh(n_grid, n_grid)
+        mesh_new = fd.UnitSquareMesh(n_grid, n_grid)
+        mesh_fine = fd.UnitSquareMesh(100, 100)
+    return mesh, mesh_new, mesh_fine
 
 
 def get_sample_param_of_nu_generalization_by_idx_train(idx_in):
-    gauss_list_ = []
-    if idx_in == 1:
-        param_ = {"cx": 0.225, "cy": 0.5, "w": 0.01}
-        gauss_list_.append(param_)
-        nu_ = 0.0001
-    elif idx_in == 2:
-        param_ = {"cx": 0.225, "cy": 0.5, "w": 0.01}
-        gauss_list_.append(param_)
-        nu_ = 0.001
-    elif idx_in == 3:
-        param_ = {"cx": 0.225, "cy": 0.5, "w": 0.01}
-        gauss_list_.append(param_)
-        nu_ = 0.002
-    elif idx_in == 4:
-        shift_ = 0.15
-        param_ = {"cx": 0.3, "cy": 0.5 - shift_, "w": 0.01}
-        gauss_list_.append(param_)
-        param_ = {"cx": 0.15, "cy": 0.5 + shift_, "w": 0.01}
-        gauss_list_.append(param_)
-        nu_ = 0.0001
-    elif idx_in == 5:
-        shift_ = 0.15
-        param_ = {"cx": 0.3, "cy": 0.5 - shift_, "w": 0.01}
-        gauss_list_.append(param_)
-        param_ = {"cx": 0.15, "cy": 0.5 + shift_, "w": 0.01}
-        gauss_list_.append(param_)
-        nu_ = 0.001
-    elif idx_in == 6:
-        shift_ = 0.15
-        param_ = {"cx": 0.3, "cy": 0.5 - shift_, "w": 0.01}
-        gauss_list_.append(param_)
-        param_ = {"cx": 0.15, "cy": 0.5 + shift_, "w": 0.01}
-        gauss_list_.append(param_)
-        nu_ = 0.002
-    elif idx_in == 7:
-        shift_ = 0.2
-        param_ = {"cx": 0.3, "cy": 0.5 + shift_, "w": 0.01}
-        gauss_list_.append(param_)
-        param_ = {"cx": 0.3, "cy": 0.5 - shift_, "w": 0.01}
-        gauss_list_.append(param_)
-        param_ = {"cx": 0.15, "cy": 0.5, "w": 0.01}
-        gauss_list_.append(param_)
-        nu_ = 0.0001
-    elif idx_in == 8:
-        shift_ = 0.2
-        param_ = {"cx": 0.3, "cy": 0.5 + shift_, "w": 0.01}
-        gauss_list_.append(param_)
-        param_ = {"cx": 0.3, "cy": 0.5 - shift_, "w": 0.01}
-        gauss_list_.append(param_)
-        param_ = {"cx": 0.15, "cy": 0.5, "w": 0.01}
-        gauss_list_.append(param_)
-        nu_ = 0.001
-    elif idx_in == 9:
-        shift_ = 0.2
-        param_ = {"cx": 0.3, "cy": 0.5 + shift_, "w": 0.01}
-        gauss_list_.append(param_)
-        param_ = {"cx": 0.3, "cy": 0.5 - shift_, "w": 0.01}
-        gauss_list_.append(param_)
-        param_ = {"cx": 0.15, "cy": 0.5, "w": 0.01}
-        gauss_list_.append(param_)
-        nu_ = 0.002
-    return gauss_list_, nu_
+    """
+    Retrieve sample parameters for the Burgers problem based on the given index.
+
+    Args:
+        idx_in (int): Index of the sample.
+
+    Returns:
+        tuple: A list of Gaussian parameters and the viscosity value (nu).
+    """
+    # Define a mapping of indices to parameters
+    param_map = {
+        1: ({"cx": 0.225, "cy": 0.5, "w": 0.01}, 0.0001),
+        2: ({"cx": 0.225, "cy": 0.5, "w": 0.01}, 0.001),
+        3: ({"cx": 0.225, "cy": 0.5, "w": 0.01}, 0.002),
+        4: ([{"cx": 0.3, "cy": 0.35, "w": 0.01}, {"cx": 0.15, "cy": 0.65, "w": 0.01}], 0.0001),
+        5: ([{"cx": 0.3, "cy": 0.35, "w": 0.01}, {"cx": 0.15, "cy": 0.65, "w": 0.01}], 0.001),
+        6: ([{"cx": 0.3, "cy": 0.35, "w": 0.01}, {"cx": 0.15, "cy": 0.65, "w": 0.01}], 0.002),
+        7: ([{"cx": 0.3, "cy": 0.7, "w": 0.01}, {"cx": 0.3, "cy": 0.3, "w": 0.01}, {"cx": 0.15, "cy": 0.5, "w": 0.01}], 0.0001),
+        8: ([{"cx": 0.3, "cy": 0.7, "w": 0.01}, {"cx": 0.3, "cy": 0.3, "w": 0.01}, {"cx": 0.15, "cy": 0.5, "w": 0.01}], 0.001),
+        9: ([{"cx": 0.3, "cy": 0.7, "w": 0.01}, {"cx": 0.3, "cy": 0.3, "w": 0.01}, {"cx": 0.15, "cy": 0.5, "w": 0.01}], 0.002),
+    }
 
+    # Retrieve the parameters and viscosity for the given index
+    if idx_in not in param_map:
+        raise ValueError(f"Invalid index: {idx_in}. Supported indices are {list(param_map.keys())}.")
 
-i = 0
+    params, nu_ = param_map[idx_in]
+    # Ensure params is always a list
+    gauss_list_ = params if isinstance(params, list) else [params]
 
+    return gauss_list_, nu_
+
+# def get_sample_params(idx):
+#     """Retrieve sample parameters for the Burgers problem."""
+#     return UM2N.get_sample_param_of_nu_generalization_by_idx_train(idx)
+
+
+# def solve_case(idx, mesh, mesh_new, mesh_fine, dirs):
+#     """Solve a single case of the Burgers problem."""
+#     gauss_list, nu = get_sample_params(idx)
+#     solver = UM2N.BurgersSolver(mesh, mesh_fine, mesh_new, gauss_list=gauss_list, nu=nu, idx=idx)
+#     solver.solve_problem(lambda *args: sample_from_loop(*args, dirs))
+
+
+# def sample_from_loop(
+#     uh, uh_grad, hessian, hessian_norm, phi, grad_phi, jacobian, jacobian_det,
+#     uh_new, mesh_og, mesh_new, function_space, function_space_fine, uh_fine,
+#     dur, nu, gauss_list, t, idx, dirs, error_og_list=[], error_adapt_list=[]
+# ):
+#     """Process and save data from a single simulation loop."""
+#     mesh_processor = UM2N.MeshProcessor(
+#         original_mesh=mesh_og,
+#         optimal_mesh=mesh_new,
+#         function_space=function_space,
+#         use_4_edge=True,
+#         feature={
+#             "uh": uh.dat.data_ro.reshape(-1, 1),
+#             "grad_uh": uh_grad.dat.data_ro.reshape(-1, 2),
+#             "hessian": hessian.dat.data_ro.reshape(-1, 4),
+#             "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
+#             "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
+#             "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1),
+#             "phi": phi.dat.data_ro.reshape(-1, 1),
+#             "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2),
+#         },
+#         raw_feature={
+#             "uh": uh,
+#             "hessian_norm": hessian_norm,
+#             "jacobian": jacobian,
+#             "jacobian_det": jacobian_det,
+#         },
+#         nu=nu,
+#         gauss_list=gauss_list,
+#         dur=dur,
+#         t=t,
+#         idx=idx,
+#     )
+
+#     mesh_processor.save_taining_data(os.path.join(dirs["data"], f"data_{idx}"))
+
+#     # Plot results
+#     fig, axes = plt.subplots(2, 3, figsize=(15, 10))
+#     fd.trisurf(uh_fine, axes=axes[0, 0]).set_title("Solution field (HR)")
+#     fd.trisurf(uh, axes=axes[0, 1]).set_title("Solution field (Original Mesh)")
+#     fd.trisurf(uh_new, axes=axes[0, 2]).set_title("Solution field (Adapted Mesh)")
+#     fd.triplot(mesh_og, axes=axes[1, 0]).set_title("Original Mesh")
+#     fd.triplot(mesh_new, axes=axes[1, 1]).set_title("Adapted Mesh")
+#     fd.tripcolor(uh_new, cmap="coolwarm", axes=axes[1, 2]).set_title("Solution on Optimal Mesh")
+#     fig.savefig(os.path.join(dirs["plot"], f"plot_{idx}.png"))
+#     plt.close(fig)
+
+#     # Save error metrics
+#     uh = fd.project(uh, function_space_fine)
+#     uh_new = fd.project(uh_new, function_space_fine)
+#     error_original_mesh = fd.errornorm(uh, uh_fine, norm_type="L2")
+#     error_optimal_mesh = fd.errornorm(uh_new, uh_fine, norm_type="L2")
+#     pd.DataFrame(
+#         {"error_og": [error_original_mesh], "error_adapt": [error_optimal_mesh], "time": [dur]}
+#     ).to_csv(os.path.join(dirs["log"], f"log_{idx}.csv"), index=False)
 
 def sample_from_loop(
     uh,
     uh_grad,
+    grad_norm,  # ej321 - added grad_norm
     hessian,
     hessian_norm,
     phi,
@@ -295,6 +320,7 @@ def sample_from_loop(
     gauss_list,
     t,
     idx,
+    monitor_val,
     error_og_list=[],
     error_adapt_list=[],
 ):
@@ -308,16 +334,19 @@ def sample_from_loop(
         feature={
             "uh": uh.dat.data_ro.reshape(-1, 1),
             "grad_uh": uh_grad.dat.data_ro.reshape(-1, 2),
+            "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), # ej321 - added grad_norm
             "hessian": hessian.dat.data_ro.reshape(-1, 4),
             "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
             "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
             "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1),
             "phi": phi.dat.data_ro.reshape(-1, 1),
             "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2),
+            "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1),
         },
         raw_feature={
             "uh": uh,
             "hessian_norm": hessian_norm,
+            "monitor_val": monitor_val, # ej321 - added monitor_val
             "jacobian": jacobian,
             "jacobian_det": jacobian_det,
         },
@@ -329,7 +358,7 @@ def sample_from_loop(
     )
 
     mesh_processor.save_taining_data(
-        os.path.join(problem_data_dir, "data_{}".format(i))
+        os.path.join(dirs["data"], "data_{}".format(i))
     )
 
     # ====  Plot Scripts ======================
@@ -362,7 +391,7 @@ def sample_from_loop(
     fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6)
     fd.triplot(mesh_new, axes=ax6)
 
-    fig.savefig(os.path.join(problem_plot_dir, "plot_{}.png".format(i)))
+    fig.savefig(os.path.join(dirs["plot"], "plot_{}.png".format(i)))
     i += 1
 
     # fig, ax = plt.subplots()
@@ -378,49 +407,124 @@ def sample_from_loop(
 
     error_original_mesh = fd.errornorm(uh, uh_fine, norm_type="L2")
     error_optimal_mesh = fd.errornorm(uh_new, uh_fine, norm_type="L2")
-    df = pd.DataFrame(
-        {
-            "error_og": error_original_mesh,
-            "error_adapt": error_optimal_mesh,
-            "time": dur,
-        },
-        index=[0],
-    )
-    df.to_csv(os.path.join(problem_log_dir, "log{}.csv".format(i)))
+
+
+    # Write to CSV
+    with open(os.path.join(dirs["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile:
+        csv_writer = csv.writer(csvfile)
+        # Write header (keys)
+        csv_writer.writerow(["error_og", "error_adapt", "time"])
+        # Write data (values)
+        csv_writer.writerow([error_original_mesh, error_optimal_mesh, dur])
+
     print("error og/optimal:", error_original_mesh, error_optimal_mesh)
+    # df = pd.DataFrame(
+    #     {
+    #         "error_og": error_original_mesh,
+    #         "error_adapt": error_optimal_mesh,
+    #         "time": dur,
+    #     },
+    #     index=[0],
+    # )
+    # df.to_csv(os.path.join(problem_log_dir, "log{}.csv".format(i)))
+    # print("error og/optimal:", error_original_mesh, error_optimal_mesh)
     return
 
 
-# ====  Data Generation Scripts ======================
 if __name__ == "__main__":
+    
+    # parse args
+    args = parse_arguments()
+
+    # ====  Parameters ======================
+    parameters = {
+        # parameters for problem
+        "problem": "burgers",
+        "n_case": args.n_case,
+        # parameters for random source
+        "n_dist": args.n_dist,
+        "max_dist": args.max_dist,
+        "lc": args.lc,
+        "n_grid": args.n_grid,
+        # parameters for ??????
+        # "n_samples": args.n_samples,
+        "data_type": args.field_type,
+        "scheme": args.boundary_scheme,
+        "mesh_type": int(args.mesh_type),
+        # parameters for domain scale
+        "scale_x": 1,
+        "scale_y": 1,
+        # parameters for anisotropic data - distribution height scaler
+        "z_max": 1,
+        "z_min": 0,
+        # parameters for ?????
+        "x_start": 0,
+        "x_end": 1,
+        "y_start": 0,
+        "y_end": 1,
+        # parameters for isotropic data
+        "w_min": 0.05,
+        "w_max": 0.2,
+        "c_min": 0.2 if args.boundary_scheme == "pad" else 0,
+        "c_max": 0.8 if args.boundary_scheme == "pad" else 1,
+        # parameters for dataset challenging level
+        # larger, less challenging (because the gaussian is more like a circle)
+        # "sigma_mean_scaler": 1 / 4,
+        # "sigma_sigma_scaler": 1 / 6,
+        # "sigma_eps": 1 / 8,
+        # parameters for data split
+        "p_train": 0.75,
+        "p_test": 0.15,
+        "p_val": 0.1,
+    }
+
+    # Set random seed
+    random.seed(args.rand_seed)
+
+
+    # ====  Setup Directories ======================
+    problem_specific_dir = "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}".format(
+        lc=parameters["lc"],
+        n_grid=parameters["n_grid"],
+        n_case=parameters["n_case"],
+        data_type=parameters["data_type"],
+        scheme=parameters["scheme"],
+        mesh_type=parameters["mesh_type"],
+    )
+
+    subdirs = ["data", "plot", "log", "mesh", "mesh_fine",
+               "plot_compare", "train", "test", "val"
+               ]
+
+    dirs = setup_directories(problem = parameters["problem"],
+                            mesh_type = parameters["mesh_type"],
+                            base_dir = None,
+                            subdirs = subdirs,
+                            dir_format = problem_specific_dir)
+
+    # ====  Output CSV ======================
+    key_list = [
+        "cmin","cmax", "data_type", "scheme", "lc", "mesh_type"
+    ]
+    output_csv(parameters, key_list, dirs["data"])
+
+    # ====  Data Generation Scripts ======================
+
+    i = 0
+
+    # QC:
     print("In build_dataset.py")
     # for idx in range(1, n_case + 1):
-    for idx in range(1, n_case + 1):
+    for idx in range(1, parameters["n_case"] + 1):
         try:
+            # QC:
             print(f"Case {idx} building ...")
-            mesh = None
-            mesh_new = None
-            mesh_fine = None
-            if mesh_type != 0:
-                unstructured_square_mesh_gen = UM2N.UnstructuredSquareMesh(
-                    scale=scale_x, mesh_type=mesh_type
-                )  # noqa
-                mesh = unstructured_square_mesh_gen.generate_mesh(
-                    res=lc, output_filename=os.path.join(problem_mesh_dir, "mesh.msh")
-                )
-                mesh_new = fd.Mesh(os.path.join(problem_mesh_dir, "mesh.msh"))
-                mesh_fine = unstructured_square_mesh_gen.generate_mesh(
-                    res=1e-2,
-                    output_filename=os.path.join(problem_mesh_fine_dir, "mesh.msh"),
-                )
-            else:
-                mesh = fd.UnitSquareMesh(n_grid, n_grid)
-                mesh_new = fd.UnitSquareMesh(n_grid, n_grid)
-                mesh_fine = fd.UnitSquareMesh(100, 100)
+            mesh, mesh_new, mesh_fine = generate_mesh(parameters, dirs)
             # Generate Random solution field
             gaussian_list, nu = get_sample_param_of_nu_generalization_by_idx_train(idx)  # noqa
             solver = UM2N.BurgersSolver(
-                mesh, mesh_fine, mesh_new, gauss_list=gaussian_list, nu=nu, idx=idx
+                mesh, mesh_fine, mesh_new,
+                gauss_list=gaussian_list, nu=nu, idx=idx
             )
             solver.solve_problem(sample_from_loop)
             print()
diff --git a/script/build_helmholtz_poly.py b/script/build_helmholtz_poly.py
index 080116f..870820d 100644
--- a/script/build_helmholtz_poly.py
+++ b/script/build_helmholtz_poly.py
@@ -1,6 +1,6 @@
 # Author: Chunyang Wang
 # GitHub Username: chunyang-w
-
+import csv
 import os
 import random
 import shutil
@@ -10,426 +10,484 @@
 import firedrake as fd
 import matplotlib.pyplot as plt
 import numpy as np
-import pandas as pd
+# import pandas as pd
+from firedrake.__future__ import interpolate
 
+# dd the parent directory to the Python path
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import UM2N
 
+def parse_arguments():
+    """Parse command-line arguments."""
+    parser = ArgumentParser(description="Build Burgers dataset with square meshes.")
+    parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.")
+    parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions.")
+    parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions.")
+    parser.add_argument("--lc", type=float, default=6e-2, help="Length characteristic of mesh elements.")
+    parser.add_argument("--field_type", type=str, default="iso", help="Data type (aniso/iso).")
+    # use padded scheme or full-scale scheme to sample central point of the bump  # noqa
+    parser.add_argument("--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full).")
+    parser.add_argument("--n_samples", type=int, default=100, help="Number of samples generated")
+    parser.add_argument("--rand_seed", type=int, default=63, help="Random seed")
+    
+    parsed_args = parser.parse_args()
+
+    # Handle dependency between max_dist and n_dist
+    # max number of distributions used to generate the dataset
+    # only if n_dist is not set if n_dist is set, max_dist will be disabled
+    if parsed_args.n_dist is not None:
+        parsed_args.max_dist = None  # Disable max_dist if n_dist is set
+        print("Warning: max_dist is ignored because n_dist is set.")
+    # QC:
+    print(parsed_args)
+    
+    return parser.parse_args()
+
+def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None):
+    """
+    Set up directories for storing data, plots, and logs.
+
+    Args:
+        base_dir (str): Base directory for the project.
+        parameters (dict): Dictionary of parameters, including "mesh_type" and "problem".
+            - "mesh_type" (int): Type of mesh used in the simulation (default: 0).
+            - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem").
+        subdirs (list, optional): List of subdirectories to create. Defaults to:
+            ["data", "plot", "log", "mesh", "mesh_fine"].
+            Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz".
+        dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders
+            matching keys in the `parameters` dictionary. Example:
+            "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}".
+            If not provided, raises a ValueError.
+
+    Returns:
+        dict: A dictionary mapping subdirectory names to their full paths.
+
+    Raises:
+        ValueError: If `dir_format` is not provided or is invalid.
+    """
+
+    # Define the project directory
+    if base_dir:
+        project_dir = os.path.abspath(base_dir)
+    else:
+        project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    
+    # QC:
+    print(f"Project Directory: {project_dir}")
 
-def arg_parse():
-    parser = ArgumentParser()
-    parser.add_argument(
-        "--mesh_type", type=int, default=2, help="algorithm used to generate mesh"
-    )
-    parser.add_argument(
-        "--max_dist",
-        type=int,
-        default=6,
-        help="max number of distributions used to\
-                            generate the dataset (only works if\
-                                n_dist is not set)",
-    )
-    parser.add_argument(
-        "--n_dist",
-        type=int,
-        default=None,
-        help="number of distributions used to\
-                            generate the dataset (this will disable\
-                                max_dist)",
+    # Define the dataset directory
+    dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem)
+
+    # Use the provided format string for the problem-specific directory
+    if dir_format is None:
+        problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}")
+    else:
+        # check if dir_format is a valid string format
+        if not isinstance(dir_format, str):
+            raise ValueError("dir_format must be a string.")
+        problem_specific_dir = os.path.join(dataset_dir, dir_format)
+
+    # Define default subdirectories if not provided
+    if subdirs is None:
+        subdirs = ["data", "plot", "log", "mesh", "mesh_fine",
+                   "plot_compare", "train", "test", "val"]
+
+    # Create and clear directories
+    directories = {}
+    for subdir in subdirs:
+        dir_path = os.path.join(problem_specific_dir, subdir)
+        if not os.path.exists(dir_path):
+            os.makedirs(dir_path)
+        else:
+            # Clear the directory by removing all files
+            for file in os.listdir(dir_path):
+                os.remove(os.path.join(dir_path, file))
+        directories[subdir] = dir_path
+
+    # QC:
+    print(f"Subdirectories created: {directories}")
+
+    return directories
+
+def output_csv(parameters, key_list, output_dir):
+    """
+    Write selected parameters to a CSV file.
+
+    Args:
+        parameters (dict): Dictionary of parameters to write.
+        key_list (list): List of keys to include in the CSV.
+        output_dir (str): Directory where the CSV file will be saved.
+    """
+    # Filter parameters based on key_list
+    csv_keys = [key for key in key_list if key in parameters]
+    csv_data = [parameters[key] for key in csv_keys]
+
+    # Define the output file path
+    csv_file_path = os.path.join(output_dir, "info.csv")
+
+    # Write to CSV
+    with open(csv_file_path, mode="w", newline="") as csvfile:
+        csv_writer = csv.writer(csvfile)
+        # Write header (keys)
+        csv_writer.writerow(csv_keys)
+        # Write data (values)
+        csv_writer.writerow(csv_data)
+
+def move_data(target, source, start, num_files):
+    """
+    Move data files from the source directory to the target directory.
+
+    Args:
+        target (str): The path to the target directory.
+        source (str): The path to the source directory.
+        start (int): The starting index of the files to move.
+        num_files (int): The total number of files to move.
+
+    Raises:
+        FileNotFoundError: If the source directory does not exist.
+        ValueError: If the start index or num_files is invalid.
+    """
+    if not os.path.exists(source):
+        raise FileNotFoundError(f"Source directory '{source}' does not exist.")
+
+    if start < 0 or num_files <= 0:
+        raise ValueError("Invalid start index or number of files to move.")
+
+    # Create the target directory if it doesn't exist
+    if not os.path.exists(target):
+        os.makedirs(target)
+    else:
+        # Clear the target directory by removing all files
+        for file in os.listdir(target):
+            os.remove(os.path.join(target, file))
+
+    # Copy files sequentially starting from the specified index
+    for i in range(start, start + num_files):
+        try:
+            # Copy the data file
+            shutil.copy(
+                os.path.join(source, f"data_{i:04d}.npy"),
+                os.path.join(target, f"data_{i:04d}.npy"),
+            )
+        except FileNotFoundError:
+            print(f"File data_{i:04d}.npy not found in {source}. Skipping.")
+            continue
+        except Exception as e:
+            print(f"An error occurred while copying data_{i:04d}.npy: {e}")
+            continue
+
+def process_features(parameters, dirs):
+
+    mesh_type = parameters["mesh_type"]
+    scale_x = parameters["scale_x"]
+    lc = parameters["lc"]
+
+    # create mesh
+    rand_poly_mesh_gen = UM2N.UnstructuredRandomPolygonalMeshGenerator(
+        scale=scale_x, mesh_type=mesh_type
+    )  # noqa
+    mesh = rand_poly_mesh_gen.generate_mesh(
+        res=lc, output_filename=os.path.join(dirs["mesh"], f"mesh{i}.msh")
     )
-    parser.add_argument(
-        "--lc",
-        type=float,
-        default=6e-2,
-        help="the length characteristic of the elements in the\
-                            mesh",
+    num_boundary = rand_poly_mesh_gen.num_boundary
+
+    # Generate Random solution field
+    rand_u_generator = UM2N.RandSourceGenerator(
+        use_iso= parameters["data_type"] == "iso",
+        dist_params= parameters
     )
-    parser.add_argument(
-        "--field_type",
-        type=str,
-        default="aniso",
-        help="anisotropic or isotropic data type(aniso/iso)",
+
+    # generate equation
+    helmholtz_eq = UM2N.RandHelmholtzEqGenerator(rand_u_generator)
+    # discretise the equation
+    res = helmholtz_eq.discretise(mesh)
+    # get specific parameters used
+    dist_params = rand_u_generator.get_dist_params()
+    # Solve the equation
+    solver = UM2N.EquationSolver(
+        params={
+            "function_space": res["function_space"],
+            "LHS": res["LHS"],
+            "RHS": res["RHS"],
+            "bc": res["bc"],
+        }
     )
-    # use padded scheme or full-scale scheme to sample central point of the bump  # noqa
-    parser.add_argument(
-        "--boundary_scheme",
-        type=str,
-        default="full",
-        help="scheme used to generate the dataset (pad/full))",
+
+    # original solution field
+    uh = solver.solve_eq()
+
+    func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1)
+    grad_uh_interpolate = fd.assemble(interpolate(fd.grad(uh), func_vec_space))
+
+    # ej321 - grad_norm copied from build_helmholtz_square.py
+    grad_norm = fd.Function(res["function_space"])
+    grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2)
+    grad_norm /= grad_norm.vector().max()
+
+    # RHS of helmholtz problem
+    f_rhs = fd.assemble(interpolate(helmholtz_eq.f, helmholtz_eq.function_space))
+
+    # ej321 - this seems extra - the mesh is never used, just to build 'eq'?
+    # hessian = UM2N.MeshGenerator(
+    #     params={
+    #         "eq": helmholtz_eq,
+    #         "mesh": rand_poly_mesh_gen.generate_mesh(
+    #             res=lc,
+    #             output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"),
+    #         ),
+    #     }
+    # ).get_hessian(mesh)
+    # ej321 - using script from build_helmholtz_square.py
+    mesh_gen = UM2N.MeshGenerator(params={"eq": helmholtz_eq, "mesh": mesh})
+    monitor_val = mesh_gen.monitor_func(mesh)
+    hessian = mesh_gen.get_hessian(mesh)
+    hessian_norm = fd.project(mesh_gen.get_hessian_norm(mesh),
+                                fd.FunctionSpace(mesh, "CG", 1)
+                                )
+
+
+    # move the mesh?
+    start = time.perf_counter()
+    new_mesh = mesh_gen.move_mesh()
+    end = time.perf_counter()
+    dur = (end - start) * 1000
+
+    # this is the jacobian of x with respect to xi
+    jacobian = mesh_gen.get_jacobian()
+    jacobian = fd.project(jacobian, fd.TensorFunctionSpace(new_mesh, "CG", 1))
+    jacobian_det = mesh_gen.get_jacobian_det()
+    jacobian_det = fd.project(jacobian_det, fd.FunctionSpace(new_mesh, "CG", 1))
+
+    # get phi/grad_phi projected to the original mesh
+    phi = mesh_gen.get_phi()
+    grad_phi = mesh_gen.get_grad_phi()
+
+    # solve the equation on the new mesh
+    new_res = helmholtz_eq.discretise(new_mesh)
+    new_solver = UM2N.EquationSolver(
+        params={
+            "function_space": new_res["function_space"],
+            "LHS": new_res["LHS"],
+            "RHS": new_res["RHS"],
+            "bc": new_res["bc"],
+        }
     )
-    parser.add_argument(
-        "--n_samples", type=int, default=100, help="number of samples generated"
+    uh_new = new_solver.solve_eq()
+
+    # process the data for training
+    mesh_processor = UM2N.MeshProcessor(
+        original_mesh=mesh,
+        optimal_mesh=new_mesh,
+        function_space=new_res["function_space"],
+        use_4_edge=False,
+        num_boundary=num_boundary,
+        feature={
+            "uh": uh.dat.data_ro.reshape(-1, 1),
+            "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2),
+            "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), #ej321 - added
+            "hessian": hessian.dat.data_ro.reshape(-1, 4),
+            "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
+            "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
+            "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1),
+            "phi": phi.dat.data_ro.reshape(-1, 1),
+            "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2),
+            "f": f_rhs.dat.data_ro.reshape(-1, 1),
+            "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), # ej321 - added
+        },
+        raw_feature={
+            "uh": uh,
+            "hessian_norm": hessian_norm,
+            "monitor_val": monitor_val, # ej321 - added
+            "grad_uh_norm": grad_norm, # ej321 - added needed for poly only
+            "jacobian": jacobian,
+            "jacobian_det": jacobian_det,
+        },
+        dist_params=dist_params,
+        poly_mesh=True,
     )
-    parser.add_argument(
-        "--rand_seed", type=int, default=63, help="number of samples generated"
+
+    # save out data
+    mesh_processor.save_taining_data(
+        os.path.join(dirs["data"], "data_{}".format(i))
     )
-    args_ = parser.parse_args()
-    print(args_)
-    return args_
 
+    # ====  Plot Scripts ======================
+    fig = plt.figure(figsize=(15, 10))
+    ax1 = fig.add_subplot(2, 3, 1, projection="3d")
+    # Plot the exact solution
+    ax1.set_title("Exact Solution")
+    fd.trisurf(fd.interpolate(res["u_exact"], res["function_space"]), axes=ax1)
+    # Plot the solved solution
+    ax2 = fig.add_subplot(2, 3, 2, projection="3d")
+    ax2.set_title("FEM Solution")
+    fd.trisurf(uh, axes=ax2)
+
+    # Plot the solution on a optimal mesh
+    ax3 = fig.add_subplot(2, 3, 3, projection="3d")
+    ax3.set_title("FEM Solution on Optimal Mesh")
+    fd.trisurf(uh_new, axes=ax3)
+
+    # Plot the mesh
+    ax4 = fig.add_subplot(2, 3, 4)
+    ax4.set_title("Original Mesh")
+    fd.triplot(mesh, axes=ax4)
+    ax5 = fig.add_subplot(2, 3, 5)
+    ax5.set_title("Optimal Mesh")
+    fd.triplot(new_mesh, axes=ax5)
+
+    # plot mesh with function evaluated on it
+    ax6 = fig.add_subplot(2, 3, 6)
+    ax6.set_title("Soultion Projected on optimal mesh")
+    fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6)
+    fd.triplot(new_mesh, axes=ax6)
+
+    fig.savefig(os.path.join(dirs["plot"], "plot_{}.png".format(i)))
+
+
+    # ====  Log File ============================================
+    high_res_mesh = rand_poly_mesh_gen.generate_mesh(
+        res=1e-2,
+        output_filename=os.path.join(dirs["mesh_fine"], f"mesh{i}.msh"),
+    )
 
-args = arg_parse()
+    high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1)
 
-mesh_type = args.mesh_type
+    res_high_res = helmholtz_eq.discretise(high_res_mesh)
+    u_exact = fd.assemble(interpolate(res_high_res["u_exact"],
+                    res_high_res["function_space"])
+                    )
 
-data_type = args.field_type
-use_iso = True if data_type == "iso" else False
+    uh_proj = fd.project(uh, high_res_function_space)
+    uh_new_proj = fd.project(uh_new, high_res_function_space)
 
-rand_seed = args.rand_seed
-random.seed(rand_seed)
-np.random.seed(rand_seed)
+    error_original_mesh = fd.errornorm(u_exact, uh_proj)
+    error_optimal_mesh = fd.errornorm(u_exact, uh_new_proj)
 
-# ====  Parameters ======================
-problem = "holmholtz_poly"
+    # df = pd.DataFrame(
+    #     {
+    #         "error_og": error_original_mesh,
+    #         "error_adapt": error_optimal_mesh,
+    #         "time": dur,
+    #     },
+    #     index=[0],
+    # )
+    # df.to_csv(os.path.join(problem_log_dir, "log{}.csv".format(i)))
 
-n_samples = args.n_samples
+    # Write to CSV
+    with open(os.path.join(dirs["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile:
+        csv_writer = csv.writer(csvfile)
+        # Write header (keys)
+        csv_writer.writerow(["error_og", "error_adapt", "time"])
+        # Write data (values)
+        csv_writer.writerow([error_original_mesh, error_optimal_mesh, dur])
+    print("error og/optimal:", error_original_mesh, error_optimal_mesh)
 
-# parameters for domain scale
-scale_x = 1
-scale_y = 1
 
-# parameters for random source
-max_dist = args.max_dist
-n_dist = args.n_dist
-lc = args.lc
 
-# parameters for anisotropic data - distribution height scaler
-z_min = 0
-z_max = 1
+if __name__ == "__main__":
 
-# parameters for isotropic data
-w_min = 0.05
-w_max = 0.2
+    # parse args
+    args = parse_arguments()
+    
+    # ====  Parameters ======================
+    parameters = {
+        # parameters for problem
+        "problem": "holmholtz_poly",
+        # "n_case": args.n_case, # burgers problem only
+        # parameters for random source
+        "n_dist": args.n_dist,
+        "max_dist": args.max_dist,
+        "lc": args.lc,
+        # "n_grig": args.n_grid, # burgers problem only
+        # parameters for ??????
+        "n_samples": args.n_samples,
+        "data_type": args.field_type,
+        "scheme": args.boundary_scheme,
+        "mesh_type": int(args.mesh_type),
+        # parameters for domain scale
+        "scale_x": 1,
+        "scale_y": 1,
+        # parameters for anisotropic data - distribution height scaler
+        "z_max": 1,
+        "z_min": 0,
+        # parameters for ?????
+        "x_start": 0,
+        "x_end": 1,
+        "y_start": 0,
+        "y_end": 1,
+        # parameters for isotropic data
+        "w_min": 0.05,
+        "w_max": 0.2,
+        "c_min": 0.3 if args.boundary_scheme == "pad" else 0,
+        "c_max": 0.7 if args.boundary_scheme == "pad" else 1,
+        # parameters for dataset challenging level
+        # larger, less challenging (because the gaussian is more like a circle)
+        "sigma_mean_scaler": 1 / 4,
+        "sigma_sigma_scaler": 1 / 6,
+        "sigma_eps": 1 / 8,
+        # parameters for data split
+        "p_train": 0.75,
+        "p_test": 0.15,
+        "p_val": 0.1,
+    }
 
-scheme = args.boundary_scheme
-c_min = 0.3 if scheme == "pad" else 0
-c_max = 0.7 if scheme == "pad" else 1
+    # Set random seed
+    random.seed(args.rand_seed)
+    np.random.seed(args.rand_seed)
 
-# parameters for data split
-p_train = 0.75
-p_test = 0.15
-p_val = 0.1
+    # ====  Setup Directories ======================
+    problem_specific_dir = "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format(
+            parameters["z_min"], parameters["z_max"],
+            parameters["n_dist"],parameters["max_dist"],
+            parameters["lc"], parameters["n_samples"],
+            parameters["data_type"], parameters["scheme"], parameters["mesh_type"]
+        )
 
-num_train = int(n_samples * p_train)
-num_test = int(n_samples * p_test)
-num_val = int(n_samples * p_val)
-# =======================================
+    subdirs = [
+        "data", "plot", "plot_compare", "log", "mesh", "mesh_fine",
+        "train", "test", "val",
+    ]
 
+    dirs = setup_directories(problem = parameters["problem"],
+                        mesh_type = parameters["mesh_type"],
+                        base_dir = None,
+                        subdirs = subdirs,
+                        dir_format = problem_specific_dir)
 
-df = pd.DataFrame(
-    {
-        "cmin": [c_min],
-        "cmax": [c_max],
-        "data_type": [data_type],
-        "scheme": [scheme],
-        "n_samples": [n_samples],
-        "lc": [lc],
-        "mesh_type": [mesh_type],
-    }
-)
 
+    # ====  Output CSV ======================
+    key_list = [
+        "cmin","cmax",
+        "data_type", "scheme", "n_samples", "lc", "mesh_type"
+    ]
+    output_csv(parameters, key_list, dirs["data"])
 
-def move_data(target, source, start, num_file):
-    if not os.path.exists(target):
-        os.makedirs(target)
-    else:
-        # delete all files under the directory
-        filelist = [f for f in os.listdir(target)]
-        for f in filelist:
-            os.remove(os.path.join(target, f))
-    # copy data from data dir to train dir
-    for i in range(start, num_file):
-        shutil.copy(
-            os.path.join(source, "data_{}.npy".format(i)),
-            os.path.join(target, "data_{}.npy".format(i)),
-        )
-
-
-project_dir = os.path.dirname(os.path.dirname((os.path.abspath(__file__))))
-dataset_dir = os.path.join(
-    project_dir, "data", f"dataset_meshtype_{mesh_type}", "helmholtz_poly"
-)  # noqa
-problem_specific_dir = os.path.join(
-    dataset_dir,
-    "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format(
-        z_min, z_max, n_dist, max_dist, lc, n_samples, data_type, scheme, mesh_type
-    ),
-)
-
-
-problem_data_dir = os.path.join(problem_specific_dir, "data")
-problem_plot_dir = os.path.join(problem_specific_dir, "plot")
-problem_log_dir = os.path.join(problem_specific_dir, "log")
-
-problem_mesh_dir = os.path.join(problem_specific_dir, "mesh")
-problem_mesh_fine_dir = os.path.join(problem_specific_dir, "mesh_fine")
-problem_train_dir = os.path.join(problem_specific_dir, "train")
-problem_test_dir = os.path.join(problem_specific_dir, "test")
-problem_val_dir = os.path.join(problem_specific_dir, "val")
-
-if not os.path.exists(problem_mesh_dir):
-    os.makedirs(problem_mesh_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_mesh_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_mesh_dir, f))
-
-if not os.path.exists(problem_mesh_fine_dir):
-    os.makedirs(problem_mesh_fine_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_mesh_fine_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_mesh_fine_dir, f))
-
-if not os.path.exists(problem_data_dir):
-    os.makedirs(problem_data_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_data_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_data_dir, f))
-
-if not os.path.exists(problem_plot_dir):
-    os.makedirs(problem_plot_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_plot_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_plot_dir, f))
-
-if not os.path.exists(problem_log_dir):
-    os.makedirs(problem_log_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_log_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_log_dir, f))
-
-df.to_csv(os.path.join(problem_specific_dir, "info.csv"))
-
-
-# ====  Data Generation Scripts ======================
-if __name__ == "__main__":
+    # ====  Data Generation Scripts ======================
+    # QC: 
     print("In build_dataset.py")
-    i = 0
-    while i < n_samples:
+    # i = 0
+    # while i < n_samples:
+    for i in range(parameters["n_samples"]):
         try:
             print("Generating Sample: " + str(i))
-            rand_poly_mesh_gen = UM2N.UnstructuredRandomPolygonalMeshGenerator(
-                scale=scale_x, mesh_type=mesh_type
-            )  # noqa
-            mesh = rand_poly_mesh_gen.generate_mesh(
-                res=lc, output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh")
-            )
-            num_boundary = rand_poly_mesh_gen.num_boundary
-            # Generate Random solution field
-            rand_u_generator = UM2N.RandSourceGenerator(
-                use_iso=use_iso,
-                dist_params={
-                    "max_dist": max_dist,
-                    "n_dist": n_dist,
-                    "x_start": 0,
-                    "x_end": 1,
-                    "y_start": 0,
-                    "y_end": 1,
-                    "z_max": z_max,
-                    "z_min": z_min,
-                    "w_min": w_min,
-                    "w_max": w_max,
-                    "c_min": c_min,
-                    "c_max": c_max,
-                },
-            )
-            helmholtz_eq = UM2N.RandHelmholtzEqGenerator(rand_u_generator)
-            res = helmholtz_eq.discretise(mesh)  # discretise the equation
-            dist_params = rand_u_generator.get_dist_params()
-            # Solve the equation
-            solver = UM2N.EquationSolver(
-                params={
-                    "function_space": res["function_space"],
-                    "LHS": res["LHS"],
-                    "RHS": res["RHS"],
-                    "bc": res["bc"],
-                }
-            )
-            # RHS of helmholtz problem
-            f = fd.interpolate(helmholtz_eq.f, helmholtz_eq.function_space)
-            uh = solver.solve_eq()
-            # Generate Mesh
-            hessian = UM2N.MeshGenerator(
-                params={
-                    "eq": helmholtz_eq,
-                    "mesh": rand_poly_mesh_gen.generate_mesh(
-                        res=lc,
-                        output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"),
-                    ),
-                }
-            ).get_hessian(mesh)
-
-            hessian_norm = UM2N.MeshGenerator(
-                params={
-                    "eq": helmholtz_eq,
-                    "mesh": rand_poly_mesh_gen.generate_mesh(
-                        res=lc,
-                        output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"),
-                    ),
-                }
-            ).monitor_func(mesh)
-
-            hessian_norm = fd.project(hessian_norm, fd.FunctionSpace(mesh, "CG", 1))
-
-            func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1)
-            grad_uh_interpolate = fd.interpolate(fd.grad(uh), func_vec_space)
-
-            mesh_gen = UM2N.MeshGenerator(
-                params={
-                    "eq": helmholtz_eq,
-                    "mesh": rand_poly_mesh_gen.generate_mesh(
-                        res=lc,
-                        output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"),
-                    ),
-                }
-            )
-
-            start = time.perf_counter()
-            new_mesh = mesh_gen.move_mesh()
-            end = time.perf_counter()
-            dur = (end - start) * 1000
-
-            # this is the jacobian of x with respect to xi
-            jacobian = mesh_gen.get_jacobian()
-            jacobian = fd.project(jacobian, fd.TensorFunctionSpace(new_mesh, "CG", 1))
-            jacobian_det = mesh_gen.get_jacobian_det()
-            jacobian_det = fd.project(jacobian_det, fd.FunctionSpace(new_mesh, "CG", 1))
-
-            # get phi/grad_phi projected to the original mesh
-            phi = mesh_gen.get_phi()
-            grad_phi = mesh_gen.get_grad_phi()
-
-            # solve the equation on the new mesh
-            new_res = helmholtz_eq.discretise(new_mesh)
-            new_solver = UM2N.EquationSolver(
-                params={
-                    "function_space": new_res["function_space"],
-                    "LHS": new_res["LHS"],
-                    "RHS": new_res["RHS"],
-                    "bc": new_res["bc"],
-                }
-            )
-            uh_new = new_solver.solve_eq()
-
-            # process the data for training
-            mesh_processor = UM2N.MeshProcessor(
-                original_mesh=mesh,
-                optimal_mesh=new_mesh,
-                function_space=new_res["function_space"],
-                use_4_edge=False,
-                num_boundary=num_boundary,
-                feature={
-                    "uh": uh.dat.data_ro.reshape(-1, 1),
-                    "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2),
-                    "hessian": hessian.dat.data_ro.reshape(-1, 4),
-                    "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
-                    "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
-                    "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1),
-                    "phi": phi.dat.data_ro.reshape(-1, 1),
-                    "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2),
-                    "f": f.dat.data_ro.reshape(-1, 1),
-                },
-                raw_feature={
-                    "uh": uh,
-                    "hessian_norm": hessian_norm,
-                    "jacobian": jacobian,
-                    "jacobian_det": jacobian_det,
-                },
-                dist_params=dist_params,
-                poly_mesh=True,
-            )
-
-            mesh_processor.save_taining_data(
-                os.path.join(problem_data_dir, "data_{}".format(i))
-            )
-
-            # ====  Plot Scripts ======================
-            fig = plt.figure(figsize=(15, 10))
-            ax1 = fig.add_subplot(2, 3, 1, projection="3d")
-            # Plot the exact solution
-            ax1.set_title("Exact Solution")
-            fd.trisurf(fd.interpolate(res["u_exact"], res["function_space"]), axes=ax1)
-            # Plot the solved solution
-            ax2 = fig.add_subplot(2, 3, 2, projection="3d")
-            ax2.set_title("FEM Solution")
-            fd.trisurf(uh, axes=ax2)
-
-            # Plot the solution on a optimal mesh
-            ax3 = fig.add_subplot(2, 3, 3, projection="3d")
-            ax3.set_title("FEM Solution on Optimal Mesh")
-            fd.trisurf(uh_new, axes=ax3)
-
-            # Plot the mesh
-            ax4 = fig.add_subplot(2, 3, 4)
-            ax4.set_title("Original Mesh")
-            fd.triplot(mesh, axes=ax4)
-            ax5 = fig.add_subplot(2, 3, 5)
-            ax5.set_title("Optimal Mesh")
-            fd.triplot(new_mesh, axes=ax5)
-
-            # plot mesh with function evaluated on it
-            ax6 = fig.add_subplot(2, 3, 6)
-            ax6.set_title("Soultion Projected on optimal mesh")
-            fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6)
-            fd.triplot(new_mesh, axes=ax6)
-
-            fig.savefig(os.path.join(problem_plot_dir, "plot_{}.png".format(i)))
-
-            # ==========================================
-
-            # generate log file
-            high_res_mesh = rand_poly_mesh_gen.generate_mesh(
-                res=1e-2,
-                output_filename=os.path.join(problem_mesh_fine_dir, f"mesh{i}.msh"),
-            )
-
-            high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1)
-
-            res_high_res = helmholtz_eq.discretise(high_res_mesh)
-            u_exact = fd.interpolate(
-                res_high_res["u_exact"], res_high_res["function_space"]
-            )
-
-            uh = fd.project(uh, high_res_function_space)
-            uh_new = fd.project(uh_new, high_res_function_space)
-
-            error_original_mesh = fd.errornorm(u_exact, uh)
-            error_optimal_mesh = fd.errornorm(u_exact, uh_new)
-
-            df = pd.DataFrame(
-                {
-                    "error_og": error_original_mesh,
-                    "error_adapt": error_optimal_mesh,
-                    "time": dur,
-                },
-                index=[0],
-            )
-            df.to_csv(os.path.join(problem_log_dir, "log{}.csv".format(i)))
-            print("error og/optimal:", error_original_mesh, error_optimal_mesh)
-            i += 1
+          
+            process_features(parameters, dirs)
+            # i += 1
         except fd.exceptions.ConvergenceError:
-            pass
+            print(f"Iteration {i} did not converge.")
+            continue
         except AttributeError:
             pass
         except ValueError:
             pass
 
-    move_data(problem_train_dir, problem_data_dir, 0, num_train)
-
-    move_data(problem_test_dir, problem_data_dir, num_train, num_train + num_test)
+    # ====  Data Splits ============================================
+    num_train = int(parameters["n_samples"] * parameters["p_train"])
+    num_test = int(parameters["n_samples"] * parameters["p_test"])
+    num_val = parameters["n_samples"] - num_train - num_test
 
-    move_data(
-        problem_val_dir,
-        problem_data_dir,
-        num_train + num_test,
-        num_train + num_test + num_val,
-    )
-# ====  Data Generation Scripts ======================
+    move_data(dirs["train"], dirs["data"], 0, num_train)
+    move_data(dirs["test"], dirs["data"], num_train, num_train + num_test)
+    move_data(dirs["val"], dirs["data"], num_train + num_test, num_train + num_test + num_val)
diff --git a/script/build_helmholtz_square.py b/script/build_helmholtz_square.py
index c8d25d1..c66d46f 100644
--- a/script/build_helmholtz_square.py
+++ b/script/build_helmholtz_square.py
@@ -1,6 +1,7 @@
 # Author: Chunyang Wang
 # GitHub Username: chunyang-w
 
+import csv
 import os
 import random
 import shutil
@@ -9,553 +10,526 @@
 
 import firedrake as fd
 import matplotlib.pyplot as plt
-import pandas as pd
+from firedrake.__future__ import interpolate
 
+
+# dd the parent directory to the Python path
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import UM2N
 
 
-def arg_parse():
+def parse_arguments():
+    """Parse command-line arguments."""
     parser = ArgumentParser()
-    parser.add_argument(
-        "--mesh_type", type=int, default=2, help="algorithm used to generate mesh"
-    )
-    parser.add_argument(
-        "--max_dist",
-        type=int,
-        default=6,
-        help="max number of distributions used to\
-                            generate the dataset (only works if\
-                                n_dist is not set)",
+    parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh")
+    parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions")
+    parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions")
+    parser.add_argument("--lc", type=float, default=5e-2, help="Length characteristic of mesh elements")
+    parser.add_argument("--field_type", type=str, default="aniso", help="Data type (aniso/iso)")
+    # use padded scheme or full-scale scheme to sample central point of the bump  # noqa
+    parser.add_argument("--boundary_scheme", type=str, default="full", help="Boundary scheme (pad/full)")
+    parser.add_argument("--n_samples", type=int, default=100, help="Number of samples generated")
+    parser.add_argument("--rand_seed", type=int, default=63, help="Random seed")
+
+    parsed_args = parser.parse_args()
+
+    # Handle dependency between max_dist and n_dist
+    # max number of distributions used to generate the dataset
+    # only if n_dist is not set if n_dist is set, max_dist will be disabled
+    if parsed_args.n_dist is not None:
+        parsed_args.max_dist = None  # Disable max_dist if n_dist is set
+        print("Warning: max_dist is ignored because n_dist is set.")
+    # QC:
+    print(parsed_args)
+
+    return parsed_args
+
+
+def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None):
+    """
+    Set up directories for storing data, plots, and logs.
+
+    Args:
+        base_dir (str): Base directory for the project.
+        parameters (dict): Dictionary of parameters, including "mesh_type" and "problem".
+            - "mesh_type" (int): Type of mesh used in the simulation (default: 0).
+            - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem").
+        subdirs (list, optional): List of subdirectories to create. Defaults to:
+            ["data", "plot", "log", "mesh", "mesh_fine"].
+            Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz".
+        dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders
+            matching keys in the `parameters` dictionary. Example:
+            "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}".
+            If not provided, raises a ValueError.
+
+    Returns:
+        dict: A dictionary mapping subdirectory names to their full paths.
+
+    Raises:
+        ValueError: If `dir_format` is not provided or is invalid.
+    """
+
+    # Define the project directory
+    if base_dir:
+        project_dir = os.path.abspath(base_dir)
+    else:
+        project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    
+    # QC:
+    print(f"Project Directory: {project_dir}")
+
+    # Define the dataset directory
+    dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem)
+
+    # Use the provided format string for the problem-specific directory
+    if dir_format is None:
+        problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}")
+    else:
+        # check if dir_format is a valid string format
+        if not isinstance(dir_format, str):
+            raise ValueError("dir_format must be a string.")
+        problem_specific_dir = os.path.join(dataset_dir, dir_format)
+
+    # Define default subdirectories if not provided
+    if subdirs is None:
+        subdirs = ["data", "plot", "log", "mesh", "mesh_fine",
+                   "plot_compare", "train", "test", "val"]
+
+    # Create and clear directories
+    directories = {}
+    for subdir in subdirs:
+        dir_path = os.path.join(problem_specific_dir, subdir)
+        if not os.path.exists(dir_path):
+            os.makedirs(dir_path)
+        else:
+            # Clear the directory by removing all files
+            for file in os.listdir(dir_path):
+                os.remove(os.path.join(dir_path, file))
+        directories[subdir] = dir_path
+
+    # QC:
+    print(f"Subdirectories created: {directories}")
+
+    return directories
+
+def move_data(target, source, start, num_files):
+    """
+    Move data files from the source directory to the target directory.
+
+    Args:
+        target (str): The path to the target directory.
+        source (str): The path to the source directory.
+        start (int): The starting index of the files to move.
+        num_files (int): The total number of files to move.
+
+    Raises:
+        FileNotFoundError: If the source directory does not exist.
+        ValueError: If the start index or num_files is invalid.
+    """
+    if not os.path.exists(source):
+        raise FileNotFoundError(f"Source directory '{source}' does not exist.")
+
+    if start < 0 or num_files <= 0:
+        raise ValueError("Invalid start index or number of files to move.")
+
+    # Create the target directory if it doesn't exist
+    if not os.path.exists(target):
+        os.makedirs(target)
+    else:
+        # Clear the target directory by removing all files
+        for file in os.listdir(target):
+            os.remove(os.path.join(target, file))
+
+    # Copy files sequentially starting from the specified index
+    for i in range(start, start + num_files):
+        try:
+            # Copy the data file
+            shutil.copy(
+                os.path.join(source, f"data_{i:04d}.npy"),
+                os.path.join(target, f"data_{i:04d}.npy"),
+            )
+        except FileNotFoundError:
+            print(f"File data_{i:04d}.npy not found in {source}. Skipping.")
+            continue
+        except Exception as e:
+            print(f"An error occurred while copying data_{i:04d}.npy: {e}")
+            continue
+
+def create_mesh(i, mesh_type, lc, scale_x, problem_mesh_dir):
+    """
+    Generate a mesh for the given sample index.
+
+    Args:
+        i: The sample index.
+        mesh_type: The type of mesh to generate.
+        lc: The length characteristic of the mesh.
+        scale_x: The scale of the mesh.
+        problem_mesh_dir: Directory to save the generated mesh.
+
+    Returns:
+        The generated mesh.
+    """
+    if mesh_type != 0:
+        unstructured_square_mesh_gen = UM2N.UnstructuredSquareMeshGenerator(
+            scale=scale_x, mesh_type=mesh_type
+        )  # noqa
+        return unstructured_square_mesh_gen.generate_mesh(
+            res=lc,
+            output_filename=os.path.join(problem_mesh_dir, f"mesh_{i:04d}.msh"),
+        )
+    else:
+        n_grid = int(1 / lc)
+        return fd.UnitSquareMesh(n_grid, n_grid)
+
+
+def process_features(parameters, directories):
+
+    # create mesh
+    mesh = create_mesh(
+        i, mesh_type = parameters["mesh_type"], lc = parameters["lc"],
+        scale_x = parameters["scale_x"], problem_mesh_dir = directories["mesh"]
     )
-    parser.add_argument(
-        "--n_dist",
-        type=int,
-        default=None,
-        help="number of distributions used to\
-                            generate the dataset (this will disable\
-                                max_dist)",
+    # Generate Random solution field
+    rand_u_generator = UM2N.RandSourceGenerator(
+        use_iso= parameters["data_type"] == "iso",
+        dist_params = parameters
     )
-    parser.add_argument(
-        "--lc",
-        type=float,
-        default=5e-2,
-        help="the length characteristic of the elements in the\
-                            mesh",
+
+    # generate equation
+    helmholtz_eq = UM2N.RandHelmholtzEqGenerator(rand_u_generator)
+    # discretise the equation
+    res = helmholtz_eq.discretise(mesh)
+    # get specific parameters used
+    dist_params = rand_u_generator.get_dist_params()
+    # Solve the equation
+    solver = UM2N.EquationSolver(
+        params={
+            "function_space": res["function_space"],
+            "LHS": res["LHS"],
+            "RHS": res["RHS"],
+            "bc": res["bc"],
+        }
     )
-    parser.add_argument(
-        "--field_type",
-        type=str,
-        default="aniso",
-        help="anisotropic or isotropic data type(aniso/iso)",
+    # original solution field
+    uh = solver.solve_eq()
+
+    grad_uh_interpolate = fd.assemble(interpolate(fd.grad(uh),
+                                fd.VectorFunctionSpace(mesh, "CG", 1)
+                                ))
+    grad_norm = fd.Function(res["function_space"])
+    grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2)
+    grad_norm /= grad_norm.vector().max()
+
+    # FOR OUTPUT
+    # RHS of helmholtz problem
+    f_rhs = fd.assemble(interpolate(helmholtz_eq.f, helmholtz_eq.function_space))
+
+
+    # generate mesh?
+
+    mesh_gen = UM2N.MeshGenerator(params={"eq": helmholtz_eq, "mesh": mesh})
+    monitor_val = mesh_gen.monitor_func(mesh)
+    hessian = mesh_gen.get_hessian(mesh)
+    hessian_norm = fd.project(mesh_gen.get_hessian_norm(mesh),
+                                fd.FunctionSpace(mesh, "CG", 1)
+                                )
+
+    # move the mesh?
+    start = time.perf_counter()
+    new_mesh = mesh_gen.move_mesh()  # noqa
+    end = time.perf_counter()
+    dur = (end - start) * 1000
+
+    # this is the jacobian of x with respect to xi
+    jacobian = mesh_gen.get_jacobian()
+    jacobian = fd.project(jacobian, fd.TensorFunctionSpace(new_mesh, "CG", 1))
+    jacobian_det = mesh_gen.get_jacobian_det()
+    jacobian_det = fd.project(jacobian_det, fd.FunctionSpace(new_mesh, "CG", 1))
+
+    # get phi/grad_phi projected to the original mesh
+    phi = mesh_gen.get_phi()
+    grad_phi = mesh_gen.get_grad_phi()
+
+    # solve the equation on the new mesh
+    new_res = helmholtz_eq.discretise(new_mesh)
+    new_solver = UM2N.EquationSolver(
+        params={
+            "function_space": new_res["function_space"],
+            "LHS": new_res["LHS"],
+            "RHS": new_res["RHS"],
+            "bc": new_res["bc"],
+        }
     )
-    # use padded scheme or full-scale scheme to sample central point of the bump  # noqa
-    parser.add_argument(
-        "--boundary_scheme",
-        type=str,
-        default="full",
-        help="scheme used to generate the dataset (pad/full))",
+    uh_new = new_solver.solve_eq()
+
+    # process the data for training
+    mesh_processor = UM2N.MeshProcessor(
+        original_mesh=mesh,
+        optimal_mesh=new_mesh,
+        function_space=new_res["function_space"],
+        use_4_edge=True,
+        feature={
+            "uh": uh.dat.data_ro.reshape(-1, 1),
+            "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2),
+            "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1),
+            "hessian": hessian.dat.data_ro.reshape(-1, 4),
+            "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
+            "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
+            "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1),
+            "phi": phi.dat.data_ro.reshape(-1, 1),
+            "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2),
+            "f": f_rhs.dat.data_ro.reshape(-1, 1),
+            "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1),
+        },
+        raw_feature={
+            "uh": uh,
+            "hessian_norm": hessian_norm,
+            "monitor_val": monitor_val,
+            "jacobian": jacobian,
+            "jacobian_det": jacobian_det,
+        },
+        dist_params=dist_params,
     )
-    parser.add_argument(
-        "--n_samples", type=int, default=100, help="number of samples generated"
+
+    # save out data
+    mesh_processor.save_taining_data(
+        os.path.join(directories["data"], f"data_{i:04d}")
     )
-    parser.add_argument(
-        "--rand_seed", type=int, default=63, help="number of samples generated"
+
+    # ====  Log File ============================================
+    high_res_mesh = create_mesh(
+        i, mesh_type = parameters["mesh_type"], lc = 1e-2,
+        scale_x = parameters["scale_x"], problem_mesh_dir = directories["mesh_fine"]
     )
-    args_ = parser.parse_args()
-    print(args_)
-    return args_
 
+    res_high_res = helmholtz_eq.discretise(high_res_mesh)
+    u_exact = fd.assemble(interpolate(res_high_res["u_exact"],
+                            res_high_res["function_space"])
+                            )
+
+    uh_proj = fd.project(uh, fd.FunctionSpace(high_res_mesh, "CG", 1))
+    uh_new_proj = fd.project(uh_new, fd.FunctionSpace(high_res_mesh, "CG", 1))
 
-args = arg_parse()
+    error_original_mesh = fd.errornorm(u_exact, uh_proj)
+    error_optimal_mesh = fd.errornorm(u_exact, uh_new_proj)
 
-mesh_type = int(args.mesh_type)
+    # Write to CSV
+    with open(os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile:
+        csv_writer = csv.writer(csvfile)
+        # Write header (keys)
+        csv_writer.writerow(["error_og", "error_adapt", "time"])
+        # Write data (values)
+        csv_writer.writerow([error_original_mesh, error_optimal_mesh, dur])
 
-data_type = args.field_type
-use_iso = True if data_type == "iso" else False
+    print("error og/optimal:", error_original_mesh, error_optimal_mesh)
 
-rand_seed = args.rand_seed
-random.seed(rand_seed)
+    # ====  Plot mesh, solution, error ======================
 
-# ====  Parameters ======================
-problem = "holmholtz"
+    rows, cols = 3, 3
+    cmap = "seismic"
 
-n_samples = args.n_samples
+    fig, ax = plt.subplots(
+        rows, cols, figsize=(cols * 5, rows * 5), layout="compressed"
+    )
 
-# parameters for domain scale
-scale_x = 1
-scale_y = 1
+    # High resolution mesh
+    fd.triplot(high_res_mesh, axes=ax[0, 0])
+    ax[0, 0].set_title("High resolution Mesh ")
+    # Orginal low resolution uniform mesh
+    fd.triplot(mesh, axes=ax[0, 1])
+    ax[0, 1].set_title("Original uniform Mesh")
+    # Adapted mesh
+    fd.triplot(new_mesh, axes=ax[0, 2])
+    ax[0, 2].set_title("Adapted Mesh (MA)")
+    # Solution on high resolution mesh
+    cb = fd.tripcolor(u_exact, cmap=cmap, axes=ax[1, 0])
+    ax[1, 0].set_title("Solution on High Resolution (u_exact)")
+    plt.colorbar(cb)
+    # Solution on orginal low resolution uniform mesh
+    cb = fd.tripcolor(uh, cmap=cmap, axes=ax[1, 1])
+    ax[1, 1].set_title("Solution on uniform Mesh")
+    plt.colorbar(cb)
+    # Solution on adapted mesh
+    cb = fd.tripcolor(uh_new, cmap=cmap, axes=ax[1, 2])
+    ax[1, 2].set_title("Solution on Adapted Mesh (MA)")
+    plt.colorbar(cb)
+
+    # Error on high resolution mesh
+    cb = fd.tripcolor(monitor_val, cmap=cmap, axes=ax[2, 0])
+    ax[2, 0].set_title("Monitor values")
+    plt.colorbar(cb)
+
+    err_orignal_mesh = fd.assemble(uh_proj - u_exact)
+    err_adapted_mesh = fd.assemble(uh_new_proj - u_exact)
+    err_abs_max_val_ori = max(
+        abs(err_orignal_mesh.dat.data[:].max()),
+        abs(err_orignal_mesh.dat.data[:].min()),
+    )
+    err_abs_max_val_adapted = max(
+        abs(err_adapted_mesh.dat.data[:].max()),
+        abs(err_adapted_mesh.dat.data[:].min()),
+    )
+    err_abs_max_val = max(err_abs_max_val_ori, err_abs_max_val_adapted)
+    err_v_max = err_abs_max_val
+    err_v_min = -err_v_max
+
+    # Error on high resolution mesh
+    cb = fd.tripcolor(monitor_val, cmap=cmap, axes=ax[2, 0])
+    ax[2, 0].set_title("Monitor values")
+    plt.colorbar(cb)
+    # Error on orginal low resolution uniform mesh
+    cb = fd.tripcolor(
+        err_orignal_mesh,
+        cmap=cmap,
+        axes=ax[2, 1],
+        vmax=err_v_max,
+        vmin=err_v_min,
+    )
+    ax[2, 1].set_title(
+        f"Error (u-u_exact) uniform Mesh | L2 Norm: {error_original_mesh:.5f}"
+    )
+    plt.colorbar(cb)
+    # Error on adapted mesh
+    cb = fd.tripcolor(
+        err_adapted_mesh,
+        cmap=cmap,
+        axes=ax[2, 2],
+        vmax=err_v_max,
+        vmin=err_v_min,
+    )
+    ax[2, 2].set_title(
+        f"Error (u-u_exact) Adapted Mesh (MA)| L2 Norm: {error_optimal_mesh:.5f} | {(error_original_mesh-error_optimal_mesh)/error_original_mesh*100:.2f}%"
+    )
+    plt.colorbar(cb)
 
-# parameters for random source
-max_dist = args.max_dist
-n_dist = args.n_dist
-lc = args.lc
+    for rr in range(rows):
+        for cc in range(cols):
+            ax[rr, cc].set_aspect("equal", "box")
 
-# parameters for anisotropic data - distribution height scaler
-z_min = 0
-z_max = 1
+    fig.savefig(os.path.join(directories["plot_compare"], f"plot_{i:04d}.png"))
+    plt.close()
 
-# parameters for isotropic data
-w_min = 0.05
-w_max = 0.2
+def output_csv(parameters, key_list, output_dir):
+    """
+    Write selected parameters to a CSV file.
 
-scheme = args.boundary_scheme
-c_min = 0.2 if scheme == "pad" else 0
-c_max = 0.8 if scheme == "pad" else 1
+    Args:
+        parameters (dict): Dictionary of parameters to write.
+        key_list (list): List of keys to include in the CSV.
+        output_dir (str): Directory where the CSV file will be saved.
+    """
+    # Filter parameters based on key_list
+    csv_keys = [key for key in key_list if key in parameters]
+    csv_data = [parameters[key] for key in csv_keys]
 
-# parameters for data split
-p_train = 0.75
-p_test = 0.15
-p_val = 0.1
+    # Define the output file path
+    csv_file_path = os.path.join(output_dir, "info.csv")
 
-num_train = int(n_samples * p_train)
-num_test = int(n_samples * p_test)
-num_val = int(n_samples * p_val)
+    # Write to CSV
+    with open(csv_file_path, mode="w", newline="") as csvfile:
+        csv_writer = csv.writer(csvfile)
+        # Write header (keys)
+        csv_writer.writerow(csv_keys)
+        # Write data (values)
+        csv_writer.writerow(csv_data)
 
-# parameters for dataset challenging level
-sigma_mean_scaler = 1 / 4  #
-sigma_sigma_scaler = (
-    1 / 6
-)  # larger, less challenging (because the gaussian is more like a circle)
-sigma_eps = 1 / 8
-# =======================================
+    print(f"Parameters saved to {csv_file_path}")
 
+if __name__ == "__main__":
 
-df = pd.DataFrame(
-    {
-        "cmin": [c_min],
-        "cmax": [c_max],
-        "sigma_mean_scaler": [sigma_mean_scaler],
-        "sigma_sigma_scaler": [sigma_sigma_scaler],
-        "sigma_eps": [sigma_eps],
-        "data_type": [data_type],
-        "scheme": [scheme],
-        "n_samples": [n_samples],
-        "lc": [lc],
-        "mesh_type": [mesh_type],
+    # parse args
+    args = parse_arguments()
+    
+    # ====  Parameters ======================
+    parameters = {
+        # parameters for problem
+        "problem": "helmholtz",
+        # "n_case": args.n_case, # burgers problem only
+        # parameters for random source
+        "n_dist": args.n_dist,
+        "max_dist": args.max_dist,
+        "lc": args.lc,
+        # "n_grig": args.n_grid, # burgers problem only
+        # parameters for ??????
+        "n_samples": args.n_samples,
+        "data_type": args.field_type,
+        "scheme": args.boundary_scheme,
+        "mesh_type": int(args.mesh_type),
+        # parameters for domain scale
+        "scale_x": 1,
+        "scale_y": 1,
+        # parameters for anisotropic data - distribution height scaler
+        "z_max": 1,
+        "z_min": 0,
+        # parameters for ?????
+        "x_start": 0,
+        "x_end": 1,
+        "y_start": 0,
+        "y_end": 1,
+        # parameters for isotropic data
+        "w_min": 0.05,
+        "w_max": 0.2,
+        "c_min": 0.2 if args.boundary_scheme == "pad" else 0,
+        "c_max": 0.8 if args.boundary_scheme == "pad" else 1,
+        # parameters for dataset challenging level
+        # larger, less challenging (because the gaussian is more like a circle)
+        "sigma_mean_scaler": 1 / 4,
+        "sigma_sigma_scaler": 1 / 6,
+        "sigma_eps": 1 / 8,
+        # parameters for data split
+        "p_train": 0.75,
+        "p_test": 0.15,
+        "p_val": 0.1,
     }
-)
 
+    # Set random seed
+    random.seed(args.rand_seed)
 
-def move_data(target, source, start, num_file):
-    if not os.path.exists(target):
-        os.makedirs(target)
-    else:
-        # delete all files under the directory
-        filelist = [f for f in os.listdir(target)]
-        for f in filelist:
-            os.remove(os.path.join(target, f))
-    # copy data from data dir to train dir
-    for i in range(start, num_file):
-        shutil.copy(
-            os.path.join(source, f"data_{i:04d}.npy"),
-            os.path.join(target, f"data_{i:04d}.npy"),
+    # ====  Setup Directories ======================
+    problem_specific_dir = "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format(
+            parameters["z_min"], parameters["z_max"],
+            parameters["n_dist"],parameters["max_dist"],
+            parameters["lc"], parameters["n_samples"],
+            parameters["data_type"], parameters["scheme"], parameters["mesh_type"]
         )
 
+    subdirs = [
+        "data", "plot", "plot_compare", "log", "mesh", "mesh_fine",
+        "train", "test", "val",
+    ]
 
-project_dir = os.path.dirname(os.path.dirname((os.path.abspath(__file__))))
-dataset_dir = os.path.join(
-    project_dir, "data", f"dataset_meshtype_{mesh_type}", "helmholtz"
-)  # noqa
-problem_specific_dir = os.path.join(
-    dataset_dir,
-    "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format(
-        z_min, z_max, n_dist, max_dist, lc, n_samples, data_type, scheme, mesh_type
-    ),
-)
-
-
-problem_data_dir = os.path.join(problem_specific_dir, "data")
-problem_plot_dir = os.path.join(problem_specific_dir, "plot")
-problem_plot_compare_dir = os.path.join(problem_specific_dir, "plot_compare")
-problem_log_dir = os.path.join(problem_specific_dir, "log")
-
-problem_mesh_dir = os.path.join(problem_specific_dir, "mesh")
-problem_mesh_fine_dir = os.path.join(problem_specific_dir, "mesh_fine")
-problem_train_dir = os.path.join(problem_specific_dir, "train")
-problem_test_dir = os.path.join(problem_specific_dir, "test")
-problem_val_dir = os.path.join(problem_specific_dir, "val")
-
-if not os.path.exists(problem_mesh_dir):
-    os.makedirs(problem_mesh_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_mesh_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_mesh_dir, f))
-
-if not os.path.exists(problem_mesh_fine_dir):
-    os.makedirs(problem_mesh_fine_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_mesh_fine_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_mesh_fine_dir, f))
-
-if not os.path.exists(problem_data_dir):
-    os.makedirs(problem_data_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_data_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_data_dir, f))
-
-if not os.path.exists(problem_plot_dir):
-    os.makedirs(problem_plot_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_plot_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_plot_dir, f))
-
-if not os.path.exists(problem_plot_compare_dir):
-    os.makedirs(problem_plot_compare_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_plot_compare_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_plot_compare_dir, f))
-
-if not os.path.exists(problem_log_dir):
-    os.makedirs(problem_log_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_log_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_log_dir, f))
-
-df.to_csv(os.path.join(problem_specific_dir, "info.csv"))
-
-
-# ====  Data Generation Scripts ======================
-if __name__ == "__main__":
-    print("In build_dataset.py")
-    i = 0
-    while i < n_samples:
-        try:
-            print("Generating Sample: " + str(i))
-            if mesh_type != 0:
-                unstructured_square_mesh_gen = UM2N.UnstructuredSquareMesh(
-                    scale=scale_x, mesh_type=mesh_type
-                )  # noqa
-                mesh = unstructured_square_mesh_gen.generate_mesh(
-                    res=lc,
-                    output_filename=os.path.join(problem_mesh_dir, f"mesh_{i:04d}.msh"),
-                )
-            else:
-                n_grid = int(1 / lc)
-                mesh = fd.UnitSquareMesh(n_grid, n_grid)
-
-            # Generate Random solution field
-            rand_u_generator = UM2N.RandSourceGenerator(
-                use_iso=use_iso,
-                dist_params={
-                    "max_dist": max_dist,
-                    "n_dist": n_dist,
-                    "x_start": 0,
-                    "x_end": 1,
-                    "y_start": 0,
-                    "y_end": 1,
-                    "z_max": z_max,
-                    "z_min": z_min,
-                    "w_min": w_min,
-                    "w_max": w_max,
-                    "c_min": c_min,
-                    "c_max": c_max,
-                    "sigma_mean_scaler": sigma_mean_scaler,
-                    "sigma_sigma_scaler": sigma_sigma_scaler,
-                    "sigma_eps": sigma_eps,
-                },
-            )
-            helmholtz_eq = UM2N.RandHelmholtzEqGenerator(rand_u_generator)
-            res = helmholtz_eq.discretise(mesh)  # discretise the equation
-            dist_params = rand_u_generator.get_dist_params()
-            # Solve the equation
-            solver = UM2N.EquationSolver(
-                params={
-                    "function_space": res["function_space"],
-                    "LHS": res["LHS"],
-                    "RHS": res["RHS"],
-                    "bc": res["bc"],
-                }
-            )
-            # RHS of helmholtz problem
-            f = fd.interpolate(helmholtz_eq.f, helmholtz_eq.function_space)
-            # fd.trisurf(f)
-            # plt.show()
-            uh = solver.solve_eq()
-            # Generate Mesh
-            hessian = UM2N.MeshGenerator(
-                params={"eq": helmholtz_eq, "mesh": mesh}
-            ).get_hessian(mesh)
-
-            hessian_norm = UM2N.MeshGenerator(
-                params={"eq": helmholtz_eq, "mesh": mesh}
-            ).get_hessian_norm(mesh)
-            hessian_norm = fd.project(hessian_norm, fd.FunctionSpace(mesh, "CG", 1))
-
-            # Get monitor val
-            monitor_val = UM2N.MeshGenerator(
-                params={"eq": helmholtz_eq, "mesh": mesh}
-            ).monitor_func(mesh)
-
-            # grad_uh_norm = UM2N.MeshGenerator(
-            #     params={
-            #         "eq": helmholtz_eq,
-            #         "mesh": fd.Mesh(
-            #             os.path.join(problem_mesh_dir, f"mesh_{i:04d}.msh")
-            #         ),  # noqa
-            #     }
-            # ).get_grad_norm(mesh)
-
-            func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1)
-            grad_uh_interpolate = fd.interpolate(fd.grad(uh), func_vec_space)
-
-            grad_norm = fd.Function(res["function_space"])
-            grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2)
-            grad_norm /= grad_norm.vector().max()
-            grad_uh_norm = grad_norm
-
-            mesh_gen = UM2N.MeshGenerator(params={"eq": helmholtz_eq, "mesh": mesh})
-
-            start = time.perf_counter()
-            new_mesh = mesh_gen.move_mesh()  # noqa
-            end = time.perf_counter()
-            dur = (end - start) * 1000
-
-            # Get monitor val
-            # monitor_val = mesh_gen.get_monitor_val()
-
-            # this is the jacobian of x with respect to xi
-            jacobian = mesh_gen.get_jacobian()
-            jacobian = fd.project(jacobian, fd.TensorFunctionSpace(new_mesh, "CG", 1))
-            jacobian_det = mesh_gen.get_jacobian_det()
-            jacobian_det = fd.project(jacobian_det, fd.FunctionSpace(new_mesh, "CG", 1))
-
-            # get phi/grad_phi projected to the original mesh
-            phi = mesh_gen.get_phi()
-            # phi = fd.project(
-            #     phi, fd.FunctionSpace(mesh, "CG", 1)
-            # )
-            grad_phi = mesh_gen.get_grad_phi()
-            # grad_phi = fd.project(
-            #     grad_phi, fd.VectorFunctionSpace(mesh, "CG", 1)
-            # )
-
-            # solve the equation on the new mesh
-            new_res = helmholtz_eq.discretise(new_mesh)
-            new_solver = UM2N.EquationSolver(
-                params={
-                    "function_space": new_res["function_space"],
-                    "LHS": new_res["LHS"],
-                    "RHS": new_res["RHS"],
-                    "bc": new_res["bc"],
-                }
-            )
-            uh_new = new_solver.solve_eq()
-
-            # process the data for training
-            mesh_processor = UM2N.MeshProcessor(
-                original_mesh=mesh,
-                optimal_mesh=new_mesh,
-                function_space=new_res["function_space"],
-                use_4_edge=True,
-                feature={
-                    "uh": uh.dat.data_ro.reshape(-1, 1),
-                    "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2),
-                    "grad_uh_norm": grad_uh_norm.dat.data_ro.reshape(-1, 1),
-                    "hessian": hessian.dat.data_ro.reshape(-1, 4),
-                    "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
-                    "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
-                    "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1),
-                    "phi": phi.dat.data_ro.reshape(-1, 1),
-                    "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2),
-                    "f": f.dat.data_ro.reshape(-1, 1),
-                    "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1),
-                },
-                raw_feature={
-                    "uh": uh,
-                    "hessian_norm": hessian_norm,
-                    "monitor_val": monitor_val,
-                    "jacobian": jacobian,
-                    "jacobian_det": jacobian_det,
-                },
-                dist_params=dist_params,
-            )
-
-            mesh_processor.save_taining_data(
-                os.path.join(problem_data_dir, f"data_{i:04d}")
-            )
-
-            # # ====  Plot Scripts ======================
-            # fig = plt.figure(figsize=(15, 10))
-            # ax1 = fig.add_subplot(2, 3, 1, projection='3d')
-            # # Plot the exact solution
-            # ax1.set_title('Exact Solution')
-            # fd.trisurf(fd.interpolate(
-            #     res["u_exact"], res["function_space"]), axes=ax1)
-            # # Plot the solved solution
-            # ax2 = fig.add_subplot(2, 3, 2, projection='3d')
-            # ax2.set_title('FEM Solution')
-            # fd.trisurf(uh, axes=ax2)
-
-            # # Plot the solution on a optimal mesh
-            # ax3 = fig.add_subplot(2, 3, 3, projection='3d')
-            # ax3.set_title('FEM Solution on Optimal Mesh')
-            # fd.trisurf(uh_new, axes=ax3)
-
-            # # Plot the mesh
-            # ax4 = fig.add_subplot(2, 3, 4)
-            # ax4.set_title('Original Mesh')
-            # fd.triplot(mesh, axes=ax4)
-            # ax5 = fig.add_subplot(2, 3, 5)
-            # ax5.set_title('Optimal Mesh')
-            # fd.triplot(new_mesh, axes=ax5)
-
-            # # plot mesh with function evaluated on it
-            # ax6 = fig.add_subplot(2, 3, 6)
-            # ax6.set_title('Soultion Projected on optimal mesh')
-            # fd.tripcolor(
-            #     uh_new, cmap='coolwarm', axes=ax6)
-            # fd.triplot(new_mesh, axes=ax6)
-
-            # fig.savefig(
-            #     os.path.join(
-            #         problem_plot_dir, f"plot_{i:04d}.png")
-            # )
-
-            # ==========================================
-
-            if mesh_type != 0:
-                # generate log file
-                high_res_mesh = unstructured_square_mesh_gen.generate_mesh(
-                    res=1e-2,
-                    output_filename=os.path.join(
-                        problem_mesh_fine_dir, f"mesh_{i:04d}.msh"
-                    ),
-                )
-            else:
-                high_res_mesh = fd.UnitSquareMesh(100, 100)
-            high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1)
-
-            res_high_res = helmholtz_eq.discretise(high_res_mesh)
-            u_exact = fd.interpolate(
-                res_high_res["u_exact"], res_high_res["function_space"]
-            )
+    directories = setup_directories(problem = parameters["problem"],
+                        mesh_type = parameters["mesh_type"],
+                        base_dir = None,
+                        subdirs = subdirs,
+                        dir_format = problem_specific_dir)
 
-            uh_proj = fd.project(uh, high_res_function_space)
-            uh_new_proj = fd.project(uh_new, high_res_function_space)
 
-            error_original_mesh = fd.errornorm(u_exact, uh_proj)
-            error_optimal_mesh = fd.errornorm(u_exact, uh_new_proj)
+    # ====  Output CSV ======================
+    key_list = [
+        "cmin","cmax", "sigma_mean_scaler", "sigma_sigma_scaler", "sigma_eps"
+        "data_type", "scheme", "n_samples", "lc", "mesh_type"
+    ]
+    output_csv(parameters, key_list, directories["data"])
 
-            df = pd.DataFrame(
-                {
-                    "error_og": error_original_mesh,
-                    "error_adapt": error_optimal_mesh,
-                    "time": dur,
-                },
-                index=[0],
-            )
-            df.to_csv(os.path.join(problem_log_dir, f"log_{i:04d}.csv"))
-            print("error og/optimal:", error_original_mesh, error_optimal_mesh)
+    # ====  Data Generation Scripts ======================
+    for i in range(parameters["n_samples"]):
+        try:
+            print(f"Generating Sample: {i}")
 
-            # ====  Plot mesh, solution, error ======================
-            rows, cols = 3, 3
-            fig, ax = plt.subplots(
-                rows, cols, figsize=(cols * 5, rows * 5), layout="compressed"
-            )
+            # create dataset
+            process_features(parameters, directories)
 
-            # High resolution mesh
-            fd.triplot(high_res_mesh, axes=ax[0, 0])
-            ax[0, 0].set_title("High resolution Mesh ")
-            # Orginal low resolution uniform mesh
-            fd.triplot(mesh, axes=ax[0, 1])
-            ax[0, 1].set_title("Original uniform Mesh")
-            # Adapted mesh
-            fd.triplot(new_mesh, axes=ax[0, 2])
-            ax[0, 2].set_title("Adapted Mesh (MA)")
-
-            cmap = "seismic"
-            # Solution on high resolution mesh
-            cb = fd.tripcolor(u_exact, cmap=cmap, axes=ax[1, 0])
-            ax[1, 0].set_title("Solution on High Resolution (u_exact)")
-            plt.colorbar(cb)
-            # Solution on orginal low resolution uniform mesh
-            cb = fd.tripcolor(uh, cmap=cmap, axes=ax[1, 1])
-            ax[1, 1].set_title("Solution on uniform Mesh")
-            plt.colorbar(cb)
-            # Solution on adapted mesh
-            cb = fd.tripcolor(uh_new, cmap=cmap, axes=ax[1, 2])
-            ax[1, 2].set_title("Solution on Adapted Mesh (MA)")
-            plt.colorbar(cb)
-
-            err_orignal_mesh = fd.assemble(uh_proj - u_exact)
-            err_adapted_mesh = fd.assemble(uh_new_proj - u_exact)
-            err_abs_max_val_ori = max(
-                abs(err_orignal_mesh.dat.data[:].max()),
-                abs(err_orignal_mesh.dat.data[:].min()),
-            )
-            err_abs_max_val_adapted = max(
-                abs(err_adapted_mesh.dat.data[:].max()),
-                abs(err_adapted_mesh.dat.data[:].min()),
-            )
-            err_abs_max_val = max(err_abs_max_val_ori, err_abs_max_val_adapted)
-            err_v_max = err_abs_max_val
-            err_v_min = -err_v_max
-
-            # Error on high resolution mesh
-            cb = fd.tripcolor(monitor_val, cmap=cmap, axes=ax[2, 0])
-            ax[2, 0].set_title("Monitor values")
-            plt.colorbar(cb)
-            # Error on orginal low resolution uniform mesh
-            cb = fd.tripcolor(
-                err_orignal_mesh,
-                cmap=cmap,
-                axes=ax[2, 1],
-                vmax=err_v_max,
-                vmin=err_v_min,
-            )
-            ax[2, 1].set_title(
-                f"Error (u-u_exact) uniform Mesh | L2 Norm: {error_original_mesh:.5f}"
-            )
-            plt.colorbar(cb)
-            # Error on adapted mesh
-            cb = fd.tripcolor(
-                err_adapted_mesh,
-                cmap=cmap,
-                axes=ax[2, 2],
-                vmax=err_v_max,
-                vmin=err_v_min,
-            )
-            ax[2, 2].set_title(
-                f"Error (u-u_exact) Adapted Mesh (MA)| L2 Norm: {error_optimal_mesh:.5f} | {(error_original_mesh-error_optimal_mesh)/error_original_mesh*100:.2f}%"
-            )
-            plt.colorbar(cb)
+        except fd.exceptions.ConvergenceError:
+            print(f"Iteration {i} did not converge.")
+            continue
 
-            for rr in range(rows):
-                for cc in range(cols):
-                    ax[rr, cc].set_aspect("equal", "box")
-            fig.savefig(os.path.join(problem_plot_compare_dir, f"plot_{i:04d}.png"))
-            plt.close()
+    # ====  Data Splits ============================================
+    num_train = int(parameters["n_samples"] * parameters["p_train"])
+    num_test = int(parameters["n_samples"] * parameters["p_test"])
+    num_val = parameters["n_samples"] - num_train - num_test
 
-            i += 1
-        except fd.exceptions.ConvergenceError:
-            print(f"Iteration: {i}, not coverged.")
-            pass
-        # except AttributeError:
-        #     print(f"AttributeError")
-        #     pass
-        # except ValueError:
-        #     pass
-
-    move_data(problem_train_dir, problem_data_dir, 0, num_train)
-
-    move_data(problem_test_dir, problem_data_dir, num_train, num_train + num_test)
-
-    move_data(
-        problem_val_dir,
-        problem_data_dir,
-        num_train + num_test,
-        num_train + num_test + num_val,
-    )
-# ====  Data Generation Scripts ======================
+    move_data(directories["train"], directories["data"], 0, num_train)
+    move_data(directories["test"], directories["data"], num_train, num_train + num_test)
+    move_data(directories["val"], directories["data"], num_train + num_test, num_train + num_test + num_val)
diff --git a/script/build_poisson_poly.py b/script/build_poisson_poly.py
index 87634f6..929fad8 100644
--- a/script/build_poisson_poly.py
+++ b/script/build_poisson_poly.py
@@ -1,6 +1,6 @@
 # Author: Chunyang Wang
 # GitHub Username: chunyang-w
-
+import csv
 import os
 import random
 import shutil
@@ -10,408 +10,468 @@
 import firedrake as fd
 import matplotlib.pyplot as plt
 import numpy as np
-import pandas as pd
+# import pandas as pd
+from firedrake.__future__ import interpolate
 
+# dd the parent directory to the Python path
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import UM2N
 
-
-def arg_parse():
-    parser = ArgumentParser()
-    parser.add_argument(
-        "--mesh_type", type=int, default=2, help="algorithm used to generate mesh"
-    )
-    parser.add_argument(
-        "--max_dist",
-        type=int,
-        default=6,
-        help="max number of distributions used to\
-                            generate the dataset (only works if\
-                                n_dist is not set)",
-    )
-    parser.add_argument(
-        "--n_dist",
-        type=int,
-        default=None,
-        help="number of distributions used to\
-                            generate the dataset (this will disable\
-                                max_dist)",
-    )
-    parser.add_argument(
-        "--lc",
-        type=float,
-        default=5e-2,
-        help="the length characteristic of the elements in the\
-                            mesh",
-    )
-    parser.add_argument(
-        "--field_type",
-        type=str,
-        default="aniso",
-        help="anisotropic or isotropic data type(aniso/iso)",
-    )
+def parse_arguments():
+    """Parse command-line arguments."""
+    parser = ArgumentParser(description="Build Burgers dataset with square meshes.")
+    parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.")
+    parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions.")
+    parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions.")
+    parser.add_argument("--lc", type=float, default=6e-2, help="Length characteristic of mesh elements.")
+    parser.add_argument("--field_type", type=str, default="iso", help="Data type (aniso/iso).")
     # use padded scheme or full-scale scheme to sample central point of the bump  # noqa
-    parser.add_argument(
-        "--boundary_scheme",
-        type=str,
-        default="full",
-        help="scheme used to generate the dataset (pad/full))",
-    )
-    parser.add_argument(
-        "--n_samples", type=int, default=100, help="number of samples generated"
-    )
-    parser.add_argument(
-        "--rand_seed", type=int, default=63, help="number of samples generated"
-    )
-    args_ = parser.parse_args()
-    print(args_)
-    return args_
-
+    parser.add_argument("--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full).")
+    parser.add_argument("--n_samples", type=int, default=100, help="Number of samples generated")
+    parser.add_argument("--rand_seed", type=int, default=63, help="Random seed")
+    
+    parsed_args = parser.parse_args()
+
+    # Handle dependency between max_dist and n_dist
+    # max number of distributions used to generate the dataset
+    # only if n_dist is not set if n_dist is set, max_dist will be disabled
+    if parsed_args.n_dist is not None:
+        parsed_args.max_dist = None  # Disable max_dist if n_dist is set
+        print("Warning: max_dist is ignored because n_dist is set.")
+    # QC:
+    print(parsed_args)
+    
+    return parser.parse_args()
+
+
+def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None):
+    """
+    Set up directories for storing data, plots, and logs.
+
+    Args:
+        base_dir (str): Base directory for the project.
+        parameters (dict): Dictionary of parameters, including "mesh_type" and "problem".
+            - "mesh_type" (int): Type of mesh used in the simulation (default: 0).
+            - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem").
+        subdirs (list, optional): List of subdirectories to create. Defaults to:
+            ["data", "plot", "log", "mesh", "mesh_fine"].
+            Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz".
+        dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders
+            matching keys in the `parameters` dictionary. Example:
+            "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}".
+            If not provided, raises a ValueError.
+
+    Returns:
+        dict: A dictionary mapping subdirectory names to their full paths.
+
+    Raises:
+        ValueError: If `dir_format` is not provided or is invalid.
+    """
+
+    # Define the project directory
+    if base_dir:
+        project_dir = os.path.abspath(base_dir)
+    else:
+        project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    
+    # QC:
+    print(f"Project Directory: {project_dir}")
 
-args = arg_parse()
+    # Define the dataset directory
+    dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem)
 
-mesh_type = args.mesh_type
+    # Use the provided format string for the problem-specific directory
+    if dir_format is None:
+        problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}")
+    else:
+        # check if dir_format is a valid string format
+        if not isinstance(dir_format, str):
+            raise ValueError("dir_format must be a string.")
+        problem_specific_dir = os.path.join(dataset_dir, dir_format)
+
+    # Define default subdirectories if not provided
+    if subdirs is None:
+        subdirs = ["data", "plot", "log", "mesh", "mesh_fine",
+                   "plot_compare", "train", "test", "val"]
+
+    # Create and clear directories
+    directories = {}
+    for subdir in subdirs:
+        dir_path = os.path.join(problem_specific_dir, subdir)
+        if not os.path.exists(dir_path):
+            os.makedirs(dir_path)
+        else:
+            # Clear the directory by removing all files
+            for file in os.listdir(dir_path):
+                os.remove(os.path.join(dir_path, file))
+        directories[subdir] = dir_path
+
+    # QC:
+    print(f"Subdirectories created: {directories}")
+
+    return directories
+
+def output_csv(parameters, key_list, output_dir):
+    """
+    Write selected parameters to a CSV file.
+
+    Args:
+        parameters (dict): Dictionary of parameters to write.
+        key_list (list): List of keys to include in the CSV.
+        output_dir (str): Directory where the CSV file will be saved.
+    """
+    # Filter parameters based on key_list
+    csv_keys = [key for key in key_list if key in parameters]
+    csv_data = [parameters[key] for key in csv_keys]
+
+    # Define the output file path
+    csv_file_path = os.path.join(output_dir, "info.csv")
+
+    # Write to CSV
+    with open(csv_file_path, mode="w", newline="") as csvfile:
+        csv_writer = csv.writer(csvfile)
+        # Write header (keys)
+        csv_writer.writerow(csv_keys)
+        # Write data (values)
+        csv_writer.writerow(csv_data)
+        
+def move_data(target, source, start, num_files):
+    """
+    Move data files from the source directory to the target directory.
+
+    Args:
+        target (str): The path to the target directory.
+        source (str): The path to the source directory.
+        start (int): The starting index of the files to move.
+        num_files (int): The total number of files to move.
+
+    Raises:
+        FileNotFoundError: If the source directory does not exist.
+        ValueError: If the start index or num_files is invalid.
+    """
+    if not os.path.exists(source):
+        raise FileNotFoundError(f"Source directory '{source}' does not exist.")
+
+    if start < 0 or num_files <= 0:
+        raise ValueError("Invalid start index or number of files to move.")
+
+    # Create the target directory if it doesn't exist
+    if not os.path.exists(target):
+        os.makedirs(target)
+    else:
+        # Clear the target directory by removing all files
+        for file in os.listdir(target):
+            os.remove(os.path.join(target, file))
 
-data_type = args.field_type
-use_iso = True if data_type == "iso" else False
+    # Copy files sequentially starting from the specified index
+    for i in range(start, start + num_files):
+        try:
+            # Copy the data file
+            shutil.copy(
+                os.path.join(source, f"data_{i:04d}.npy"),
+                os.path.join(target, f"data_{i:04d}.npy"),
+            )
+        except FileNotFoundError:
+            print(f"File data_{i:04d}.npy not found in {source}. Skipping.")
+            continue
+        except Exception as e:
+            print(f"An error occurred while copying data_{i:04d}.npy: {e}")
+            continue
+
+def process_features(parameters, dirs):
+
+    # create mesh
+    scale_x = parameters["scale_x"]
+    mesh_type = parameters["mesh_type"]
+    lc = parameters["lc"]
+    rand_poly_mesh_gen = UM2N.UnstructuredRandomPolygonalMeshGenerator(
+        scale=scale_x, mesh_type=mesh_type
+    )  # noqa
+    mesh = rand_poly_mesh_gen.generate_mesh(
+        res=lc, output_filename=os.path.join(dirs["mesh"], f"mesh{i}.msh")
+    )
+    num_boundary = rand_poly_mesh_gen.num_boundary
 
-rand_seed = args.rand_seed
-random.seed(rand_seed)
-np.random.seed(rand_seed)
+    # Generate Random solution field
+    rand_u_generator = UM2N.RandSourceGenerator(
+        use_iso= parameters["data_type"] == "iso",
+        dist_params = parameters
+    )
 
-# ====  Parameters ======================
-problem = "poisson_poly"
+    # generate equation
+    poisson_eq = UM2N.RandPoissonEqGenerator(rand_u_generator)
+    # discretise the equation
+    res = poisson_eq.discretise(mesh)
+    # get specific parameters used
+    dist_params = rand_u_generator.get_dist_params()
+    # Solve the equation
+    solver = UM2N.EquationSolver(
+        params={
+            "function_space": res["function_space"],
+            "LHS": res["LHS"],
+            "RHS": res["RHS"],
+            "bc": res["bc"],
+        }
+    )
+    # original solution field
+    uh = solver.solve_eq()
+    # Generate Mesh
+    mesh_gen = UM2N.MeshGenerator(params={"eq": poisson_eq, "mesh": mesh})
+    monitor_val = mesh_gen.monitor_func(mesh)
+    hessian = mesh_gen.get_hessian(mesh)
+    hessian_norm = fd.project(mesh_gen.get_hessian_norm(mesh),
+                                fd.FunctionSpace(mesh, "CG", 1)
+                                )
+    # hessian = UM2N.MeshGenerator(
+    #     params={
+    #         "eq": poisson_eq,
+    #         "mesh": rand_poly_mesh_gen.generate_mesh(
+    #             res=lc,
+    #             output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"),
+    #         ),
+    #     }
+    # ).get_hessian(mesh)
+
+    # hessian_norm = UM2N.MeshGenerator(
+    #     params={
+    #         "eq": poisson_eq,
+    #         "mesh": rand_poly_mesh_gen.generate_mesh(
+    #             res=lc,
+    #             output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"),
+    #         ),
+    #     }
+    # ).monitor_func(mesh)
+
+    # is this the monitor function value?
+    # hessian_norm = fd.project(hessian_norm, fd.FunctionSpace(mesh, "CG", 1))
+
+    func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1)
+    grad_uh_interpolate = fd.interpolate(fd.grad(uh), func_vec_space)
+
+    # ej321 - grad_norm copied from build_helmholtz_square.py
+    grad_norm = fd.Function(res["function_space"])
+    grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2)
+    grad_norm /= grad_norm.vector().max()
+
+    # mesh_gen = UM2N.MeshGenerator(
+    #     params={
+    #         "eq": poisson_eq,
+    #         "mesh": rand_poly_mesh_gen.generate_mesh(
+    #             res=lc,
+    #             output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"),
+    #         ),
+    #     }
+    # )
+    
+    # move the mesh?
+    start = time.perf_counter()
+    new_mesh = mesh_gen.move_mesh()
+    end = time.perf_counter()
+    dur = (end - start) * 1000
+
+    # this is the jacobian of x with respect to xi
+    jacobian = mesh_gen.get_jacobian()
+    jacobian = fd.project(jacobian, fd.TensorFunctionSpace(new_mesh, "CG", 1))
+    jacobian_det = mesh_gen.get_jacobian_det()
+    jacobian_det = fd.project(jacobian_det, fd.FunctionSpace(new_mesh, "CG", 1))
+
+    # get phi/grad_phi projected to the original mesh
+    phi = mesh_gen.get_phi()
+    grad_phi = mesh_gen.get_grad_phi()
+
+    # solve the equation on the new mesh
+    new_res = poisson_eq.discretise(new_mesh)
+    new_solver = UM2N.EquationSolver(
+        params={
+            "function_space": new_res["function_space"],
+            "LHS": new_res["LHS"],
+            "RHS": new_res["RHS"],
+            "bc": new_res["bc"],
+        }
+    )
+    uh_new = new_solver.solve_eq()
+
+    # process the data for training
+    mesh_processor = UM2N.MeshProcessor(
+        original_mesh=mesh,
+        optimal_mesh=new_mesh,
+        function_space=new_res["function_space"],
+        use_4_edge=False,
+        num_boundary=num_boundary,
+        feature={
+            "uh": uh.dat.data_ro.reshape(-1, 1),
+            "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2),
+            "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), #ej321 - added
+            "hessian": hessian.dat.data_ro.reshape(-1, 4),
+            "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
+            "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
+            "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1),
+            "phi": phi.dat.data_ro.reshape(-1, 1),
+            "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2),
+            "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), # ej321 - added
+
+        },
+        raw_feature={
+            "uh": uh,
+            "hessian_norm": hessian_norm,
+            "monitor_val": monitor_val, # ej321 - added
+            "grad_uh_norm": grad_norm, # ej321 - added needed for poly only
+            "jacobian": jacobian,
+            "jacobian_det": jacobian_det,
+        },
+        dist_params=dist_params,
+        poly_mesh=True,
+    )
 
-n_samples = args.n_samples
+    mesh_processor.save_taining_data(
+        os.path.join(dirs["data"], "data_{}".format(i))
+    )
 
-# parameters for domain scale
-scale_x = 1
-scale_y = 1
+    # ====  Plot Scripts ======================
+    fig = plt.figure(figsize=(15, 10))
+    ax1 = fig.add_subplot(2, 3, 1, projection="3d")
+    # Plot the exact solution
+    ax1.set_title("Exact Solution")
+    fd.trisurf(fd.interpolate(res["u_exact"], res["function_space"]), axes=ax1)
+    # Plot the solved solution
+    ax2 = fig.add_subplot(2, 3, 2, projection="3d")
+    ax2.set_title("FEM Solution")
+    fd.trisurf(uh, axes=ax2)
+
+    # Plot the solution on a optimal mesh
+    ax3 = fig.add_subplot(2, 3, 3, projection="3d")
+    ax3.set_title("FEM Solution on Optimal Mesh")
+    fd.trisurf(uh_new, axes=ax3)
+
+    # Plot the mesh
+    ax4 = fig.add_subplot(2, 3, 4)
+    ax4.set_title("Original Mesh")
+    fd.triplot(mesh, axes=ax4)
+    ax5 = fig.add_subplot(2, 3, 5)
+    ax5.set_title("Optimal Mesh")
+    fd.triplot(new_mesh, axes=ax5)
+
+    # plot mesh with function evaluated on it
+    ax6 = fig.add_subplot(2, 3, 6)
+    ax6.set_title("Soultion Projected on optimal mesh")
+    fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6)
+    fd.triplot(new_mesh, axes=ax6)
+
+    fig.savefig(os.path.join(dirs["plot"], "plot_{}.png".format(i)))
+
+    # ====  Log File ============================================
+    high_res_mesh = rand_poly_mesh_gen.generate_mesh(
+        res=1e-2,
+        output_filename=os.path.join(dirs["mesh_fine"], f"mesh{i}.msh"),
+    )
 
-# parameters for random source
-max_dist = args.max_dist
-n_dist = args.n_dist
-lc = args.lc
+    high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1)
 
-# parameters for anisotropic data - distribution height scaler
-z_min = 0
-z_max = 1
+    res_high_res = poisson_eq.discretise(high_res_mesh)
+    u_exact = fd.interpolate(
+        res_high_res["u_exact"], res_high_res["function_space"]
+    )
 
-# parameters for isotropic data
-w_min = 0.05
-w_max = 0.2
+    uh_proj = fd.project(uh, high_res_function_space)
+    uh_new_proj = fd.project(uh_new, high_res_function_space)
 
-scheme = args.boundary_scheme
-c_min = 0.3 if scheme == "pad" else 0
-c_max = 0.7 if scheme == "pad" else 1
+    error_original_mesh = fd.errornorm(u_exact, uh_proj)
+    error_optimal_mesh = fd.errornorm(u_exact, uh_new_proj)
 
-# parameters for data split
-p_train = 0.75
-p_test = 0.15
-p_val = 0.1
+     # Write to CSV
+    with open(os.path.join(dirs["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile:
+        csv_writer = csv.writer(csvfile)
+        # Write header (keys)
+        csv_writer.writerow(["error_og", "error_adapt", "time"])
+        # Write data (values)
+        csv_writer.writerow([error_original_mesh, error_optimal_mesh, dur])
 
-num_train = int(n_samples * p_train)
-num_test = int(n_samples * p_test)
-num_val = int(n_samples * p_val)
-# =======================================
+    print("error og/optimal:", error_original_mesh, error_optimal_mesh)
 
+if __name__ == "__main__":
 
-df = pd.DataFrame(
-    {
-        "cmin": [c_min],
-        "cmax": [c_max],
-        "data_type": [data_type],
-        "scheme": [scheme],
-        "n_samples": [n_samples],
-        "lc": [lc],
-        "mesh_type": [mesh_type],
+    # parse args
+    args = parse_arguments()
+
+    # ====  Parameters ======================
+    parameters = {
+        # parameters for problem
+        "problem": "poisson_poly",
+        # "n_case": args.n_case, # burgers problem only
+        # parameters for random source
+        "n_dist": args.n_dist,
+        "max_dist": args.max_dist,
+        "lc": args.lc,
+        # "n_grig": args.n_grid, # burgers problem only
+        # parameters for ??????
+        "n_samples": args.n_samples,
+        "data_type": args.field_type,
+        "scheme": args.boundary_scheme,
+        "mesh_type": int(args.mesh_type),
+        # parameters for domain scale
+        "scale_x": 1,
+        "scale_y": 1,
+        # parameters for anisotropic data - distribution height scaler
+        "z_max": 1,
+        "z_min": 0,
+        # parameters for ?????
+        "x_start": 0,
+        "x_end": 1,
+        "y_start": 0,
+        "y_end": 1,
+        # parameters for isotropic data
+        "w_min": 0.05,
+        "w_max": 0.2,
+        "c_min": 0.3 if args.boundary_scheme == "pad" else 0,
+        "c_max": 0.7 if args.boundary_scheme == "pad" else 1,
+        # parameters for dataset challenging level
+        # larger, less challenging (because the gaussian is more like a circle)
+        "sigma_mean_scaler": 1 / 4,
+        "sigma_sigma_scaler": 1 / 6,
+        "sigma_eps": 1 / 8,
+        # parameters for data split
+        "p_train": 0.75,
+        "p_test": 0.15,
+        "p_val": 0.1,
     }
-)
 
+    # Set random seed
+    random.seed(args.rand_seed)
 
-def move_data(target, source, start, num_file):
-    if not os.path.exists(target):
-        os.makedirs(target)
-    else:
-        # delete all files under the directory
-        filelist = [f for f in os.listdir(target)]
-        for f in filelist:
-            os.remove(os.path.join(target, f))
-    # copy data from data dir to train dir
-    for i in range(start, num_file):
-        shutil.copy(
-            os.path.join(source, "data_{}.npy".format(i)),
-            os.path.join(target, "data_{}.npy".format(i)),
+    # ====  Setup Directories ======================
+    problem_specific_dir = "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format(
+            parameters["z_min"], parameters["z_max"],
+            parameters["n_dist"],parameters["max_dist"],
+            parameters["lc"], parameters["n_samples"],
+            parameters["data_type"], parameters["scheme"], parameters["mesh_type"]
         )
 
+    subdirs = ["data", "plot", "log", "mesh", "mesh_fine",
+               "train", "test", "val"
+               ]
 
-project_dir = os.path.dirname(os.path.dirname((os.path.abspath(__file__))))
-dataset_dir = os.path.join(
-    project_dir, "data", f"dataset_meshtype_{mesh_type}", problem
-)  # noqa
-problem_specific_dir = os.path.join(
-    dataset_dir,
-    "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format(
-        z_min, z_max, n_dist, max_dist, lc, n_samples, data_type, scheme, mesh_type
-    ),
-)
-
-
-problem_data_dir = os.path.join(problem_specific_dir, "data")
-problem_plot_dir = os.path.join(problem_specific_dir, "plot")
-problem_log_dir = os.path.join(problem_specific_dir, "log")
-
-problem_mesh_dir = os.path.join(problem_specific_dir, "mesh")
-problem_mesh_fine_dir = os.path.join(problem_specific_dir, "mesh_fine")
-problem_train_dir = os.path.join(problem_specific_dir, "train")
-problem_test_dir = os.path.join(problem_specific_dir, "test")
-problem_val_dir = os.path.join(problem_specific_dir, "val")
-
-if not os.path.exists(problem_mesh_dir):
-    os.makedirs(problem_mesh_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_mesh_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_mesh_dir, f))
-
-if not os.path.exists(problem_mesh_fine_dir):
-    os.makedirs(problem_mesh_fine_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_mesh_fine_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_mesh_fine_dir, f))
-
-if not os.path.exists(problem_data_dir):
-    os.makedirs(problem_data_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_data_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_data_dir, f))
-
-if not os.path.exists(problem_plot_dir):
-    os.makedirs(problem_plot_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_plot_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_plot_dir, f))
-
-if not os.path.exists(problem_log_dir):
-    os.makedirs(problem_log_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_log_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_log_dir, f))
-
-df.to_csv(os.path.join(problem_specific_dir, "info.csv"))
-
-
-# ====  Data Generation Scripts ======================
-if __name__ == "__main__":
+    dirs = setup_directories(problem = parameters["problem"],
+                            mesh_type = parameters["mesh_type"],
+                            base_dir = None,
+                            subdirs = subdirs,
+                            dir_format = problem_specific_dir)
+
+    # ====  Output CSV ======================
+    key_list = [
+        "cmin","cmax", "data_type", "scheme", "n_samples", "lc", "mesh_type"
+    ]
+    output_csv(parameters, key_list, dirs["data"])
+
+    # ====  Data Generation Scripts ======================
     print("In build_dataset.py")
-    i = 0
-    while i < n_samples:
+    # i = 0
+    # while i < n_samples:
+    for i in range(parameters["n_samples"]):
         try:
             print("Generating Sample: " + str(i))
-            rand_poly_mesh_gen = UM2N.UnstructuredRandomPolygonalMeshGenerator(
-                scale=scale_x, mesh_type=mesh_type
-            )  # noqa
-            mesh = rand_poly_mesh_gen.generate_mesh(
-                res=lc, output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh")
-            )
-            num_boundary = rand_poly_mesh_gen.num_boundary
-            # Generate Random solution field
-            rand_u_generator = UM2N.RandSourceGenerator(
-                use_iso=use_iso,
-                dist_params={
-                    "max_dist": max_dist,
-                    "n_dist": n_dist,
-                    "x_start": 0,
-                    "x_end": 1,
-                    "y_start": 0,
-                    "y_end": 1,
-                    "z_max": z_max,
-                    "z_min": z_min,
-                    "w_min": w_min,
-                    "w_max": w_max,
-                    "c_min": c_min,
-                    "c_max": c_max,
-                },
-            )
-            poisson_eq = UM2N.RandPoissonEqGenerator(rand_u_generator)
-            res = poisson_eq.discretise(mesh)  # discretise the equation
-            dist_params = rand_u_generator.get_dist_params()
-            # Solve the equation
-            solver = UM2N.EquationSolver(
-                params={
-                    "function_space": res["function_space"],
-                    "LHS": res["LHS"],
-                    "RHS": res["RHS"],
-                    "bc": res["bc"],
-                }
-            )
-            uh = solver.solve_eq()
-            # Generate Mesh
-            hessian = UM2N.MeshGenerator(
-                params={
-                    "eq": poisson_eq,
-                    "mesh": rand_poly_mesh_gen.generate_mesh(
-                        res=lc,
-                        output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"),
-                    ),
-                }
-            ).get_hessian(mesh)
-
-            hessian_norm = UM2N.MeshGenerator(
-                params={
-                    "eq": poisson_eq,
-                    "mesh": rand_poly_mesh_gen.generate_mesh(
-                        res=lc,
-                        output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"),
-                    ),
-                }
-            ).monitor_func(mesh)
-
-            hessian_norm = fd.project(hessian_norm, fd.FunctionSpace(mesh, "CG", 1))
-
-            func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1)
-            grad_uh_interpolate = fd.interpolate(fd.grad(uh), func_vec_space)
-
-            mesh_gen = UM2N.MeshGenerator(
-                params={
-                    "eq": poisson_eq,
-                    "mesh": rand_poly_mesh_gen.generate_mesh(
-                        res=lc,
-                        output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"),
-                    ),
-                }
-            )
-
-            start = time.perf_counter()
-            new_mesh = mesh_gen.move_mesh()
-            end = time.perf_counter()
-            dur = (end - start) * 1000
-
-            # this is the jacobian of x with respect to xi
-            jacobian = mesh_gen.get_jacobian()
-            jacobian = fd.project(jacobian, fd.TensorFunctionSpace(new_mesh, "CG", 1))
-            jacobian_det = mesh_gen.get_jacobian_det()
-            jacobian_det = fd.project(jacobian_det, fd.FunctionSpace(new_mesh, "CG", 1))
-
-            # get phi/grad_phi projected to the original mesh
-            phi = mesh_gen.get_phi()
-            grad_phi = mesh_gen.get_grad_phi()
-
-            # solve the equation on the new mesh
-            new_res = poisson_eq.discretise(new_mesh)
-            new_solver = UM2N.EquationSolver(
-                params={
-                    "function_space": new_res["function_space"],
-                    "LHS": new_res["LHS"],
-                    "RHS": new_res["RHS"],
-                    "bc": new_res["bc"],
-                }
-            )
-            uh_new = new_solver.solve_eq()
-
-            # process the data for training
-            mesh_processor = UM2N.MeshProcessor(
-                original_mesh=mesh,
-                optimal_mesh=new_mesh,
-                function_space=new_res["function_space"],
-                use_4_edge=False,
-                num_boundary=num_boundary,
-                feature={
-                    "uh": uh.dat.data_ro.reshape(-1, 1),
-                    "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2),
-                    "hessian": hessian.dat.data_ro.reshape(-1, 4),
-                    "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
-                    "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
-                    "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1),
-                    "phi": phi.dat.data_ro.reshape(-1, 1),
-                    "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2),
-                },
-                raw_feature={
-                    "uh": uh,
-                    "hessian_norm": hessian_norm,
-                    "jacobian": jacobian,
-                    "jacobian_det": jacobian_det,
-                },
-                dist_params=dist_params,
-                poly_mesh=True,
-            )
-
-            mesh_processor.save_taining_data(
-                os.path.join(problem_data_dir, "data_{}".format(i))
-            )
-
-            # ====  Plot Scripts ======================
-            fig = plt.figure(figsize=(15, 10))
-            ax1 = fig.add_subplot(2, 3, 1, projection="3d")
-            # Plot the exact solution
-            ax1.set_title("Exact Solution")
-            fd.trisurf(fd.interpolate(res["u_exact"], res["function_space"]), axes=ax1)
-            # Plot the solved solution
-            ax2 = fig.add_subplot(2, 3, 2, projection="3d")
-            ax2.set_title("FEM Solution")
-            fd.trisurf(uh, axes=ax2)
-
-            # Plot the solution on a optimal mesh
-            ax3 = fig.add_subplot(2, 3, 3, projection="3d")
-            ax3.set_title("FEM Solution on Optimal Mesh")
-            fd.trisurf(uh_new, axes=ax3)
-
-            # Plot the mesh
-            ax4 = fig.add_subplot(2, 3, 4)
-            ax4.set_title("Original Mesh")
-            fd.triplot(mesh, axes=ax4)
-            ax5 = fig.add_subplot(2, 3, 5)
-            ax5.set_title("Optimal Mesh")
-            fd.triplot(new_mesh, axes=ax5)
-
-            # plot mesh with function evaluated on it
-            ax6 = fig.add_subplot(2, 3, 6)
-            ax6.set_title("Soultion Projected on optimal mesh")
-            fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6)
-            fd.triplot(new_mesh, axes=ax6)
-
-            fig.savefig(os.path.join(problem_plot_dir, "plot_{}.png".format(i)))
-
-            # ==========================================
-
-            # generate log file
-            high_res_mesh = rand_poly_mesh_gen.generate_mesh(
-                res=1e-2,
-                output_filename=os.path.join(problem_mesh_fine_dir, f"mesh{i}.msh"),
-            )
-
-            high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1)
-
-            res_high_res = poisson_eq.discretise(high_res_mesh)
-            u_exact = fd.interpolate(
-                res_high_res["u_exact"], res_high_res["function_space"]
-            )
-
-            uh = fd.project(uh, high_res_function_space)
-            uh_new = fd.project(uh_new, high_res_function_space)
-
-            error_original_mesh = fd.errornorm(u_exact, uh)
-            error_optimal_mesh = fd.errornorm(u_exact, uh_new)
-
-            df = pd.DataFrame(
-                {
-                    "error_og": error_original_mesh,
-                    "error_adapt": error_optimal_mesh,
-                    "time": dur,
-                },
-                index=[0],
-            )
-            df.to_csv(os.path.join(problem_log_dir, "log{}.csv".format(i)))
-            print("error og/optimal:", error_original_mesh, error_optimal_mesh)
-            i += 1
+            process_features(parameters, dirs)
+            # i += 1
         except fd.exceptions.ConvergenceError:
             pass
         except AttributeError:
@@ -419,14 +479,13 @@ def move_data(target, source, start, num_file):
         except ValueError:
             pass
 
-    move_data(problem_train_dir, problem_data_dir, 0, num_train)
 
-    move_data(problem_test_dir, problem_data_dir, num_train, num_train + num_test)
 
-    move_data(
-        problem_val_dir,
-        problem_data_dir,
-        num_train + num_test,
-        num_train + num_test + num_val,
-    )
-# ====  Data Generation Scripts ======================
+    # ====  Data Splits ============================================
+    num_train = int(parameters["n_samples"] * parameters["p_train"])
+    num_test = int(parameters["n_samples"] * parameters["p_test"])
+    num_val = parameters["n_samples"] - num_train - num_test
+
+    move_data(dirs["train"], dirs["data"], 0, num_train)
+    move_data(dirs["test"], dirs["data"], num_train, num_train + num_test)
+    move_data(dirs["val"], dirs["data"], num_train + num_test, num_train + num_test + num_val)
diff --git a/script/build_poisson_square.py b/script/build_poisson_square.py
index 4bf9712..44b4f6e 100644
--- a/script/build_poisson_square.py
+++ b/script/build_poisson_square.py
@@ -1,6 +1,6 @@
 # Author: Chunyang Wang
 # GitHub Username: chunyang-w
-
+import csv
 import os
 import random
 import shutil
@@ -9,399 +9,486 @@
 
 import firedrake as fd
 import matplotlib.pyplot as plt
-import pandas as pd
+# import pandas as pd
+from firedrake.__future__ import interpolate
 
+# dd the parent directory to the Python path
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import UM2N
 
+def parse_arguments():
+    """Parse command-line arguments."""
+    parser = ArgumentParser(description="Build Burgers dataset with square meshes.")
+    parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.")
+    parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions.")
+    parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions.")
+    parser.add_argument("--lc", type=float, default=6e-2, help="Length characteristic of mesh elements.")
+    parser.add_argument("--field_type", type=str, default="iso", help="Data type (aniso/iso).")
+    # use padded scheme or full-scale scheme to sample central point of the bump  # noqa
+    parser.add_argument("--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full).")
+    parser.add_argument("--n_samples", type=int, default=100, help="Number of samples generated")
+    parser.add_argument("--rand_seed", type=int, default=63, help="Random seed")
+    
+    parsed_args = parser.parse_args()
+
+    # Handle dependency between max_dist and n_dist
+    # max number of distributions used to generate the dataset
+    # only if n_dist is not set if n_dist is set, max_dist will be disabled
+    if parsed_args.n_dist is not None:
+        parsed_args.max_dist = None  # Disable max_dist if n_dist is set
+        print("Warning: max_dist is ignored because n_dist is set.")
+    # QC:
+    print(parsed_args)
+    
+    return parser.parse_args()
+
+def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None):
+    """
+    Set up directories for storing data, plots, and logs.
+
+    Args:
+        base_dir (str): Base directory for the project.
+        parameters (dict): Dictionary of parameters, including "mesh_type" and "problem".
+            - "mesh_type" (int): Type of mesh used in the simulation (default: 0).
+            - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem").
+        subdirs (list, optional): List of subdirectories to create. Defaults to:
+            ["data", "plot", "log", "mesh", "mesh_fine"].
+            Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz".
+        dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders
+            matching keys in the `parameters` dictionary. Example:
+            "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}".
+            If not provided, raises a ValueError.
+
+    Returns:
+        dict: A dictionary mapping subdirectory names to their full paths.
+
+    Raises:
+        ValueError: If `dir_format` is not provided or is invalid.
+    """
+
+    # Define the project directory
+    if base_dir:
+        project_dir = os.path.abspath(base_dir)
+    else:
+        project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    
+    # QC:
+    print(f"Project Directory: {project_dir}")
 
-def arg_parse():
-    parser = ArgumentParser()
-    parser.add_argument(
-        "--mesh_type", type=int, default=2, help="algorithm used to generate mesh"
-    )
-    parser.add_argument(
-        "--max_dist",
-        type=int,
-        default=6,
-        help="max number of distributions used to\
-                            generate the dataset (only works if\
-                                n_dist is not set)",
-    )
-    parser.add_argument(
-        "--n_dist",
-        type=int,
-        default=None,
-        help="number of distributions used to\
-                            generate the dataset (this will disable\
-                                max_dist)",
-    )
-    parser.add_argument(
-        "--lc",
-        type=float,
-        default=3e-2,
-        help="the length characteristic of the elements in the\
-                            mesh",
+    # Define the dataset directory
+    dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem)
+
+    # Use the provided format string for the problem-specific directory
+    if dir_format is None:
+        problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}")
+    else:
+        # check if dir_format is a valid string format
+        if not isinstance(dir_format, str):
+            raise ValueError("dir_format must be a string.")
+        problem_specific_dir = os.path.join(dataset_dir, dir_format)
+
+    # Define default subdirectories if not provided
+    if subdirs is None:
+        subdirs = ["data", "plot", "log", "mesh", "mesh_fine",
+                   "plot_compare", "train", "test", "val"]
+
+    # Create and clear directories
+    directories = {}
+    for subdir in subdirs:
+        dir_path = os.path.join(problem_specific_dir, subdir)
+        if not os.path.exists(dir_path):
+            os.makedirs(dir_path)
+        else:
+            # Clear the directory by removing all files
+            for file in os.listdir(dir_path):
+                os.remove(os.path.join(dir_path, file))
+        directories[subdir] = dir_path
+
+    # QC:
+    print(f"Subdirectories created: {directories}")
+
+    return directories
+
+def output_csv(parameters, key_list, output_dir):
+    """
+    Write selected parameters to a CSV file.
+
+    Args:
+        parameters (dict): Dictionary of parameters to write.
+        key_list (list): List of keys to include in the CSV.
+        output_dir (str): Directory where the CSV file will be saved.
+    """
+    # Filter parameters based on key_list
+    csv_keys = [key for key in key_list if key in parameters]
+    csv_data = [parameters[key] for key in csv_keys]
+
+    # Define the output file path
+    csv_file_path = os.path.join(output_dir, "info.csv")
+
+    # Write to CSV
+    with open(csv_file_path, mode="w", newline="") as csvfile:
+        csv_writer = csv.writer(csvfile)
+        # Write header (keys)
+        csv_writer.writerow(csv_keys)
+        # Write data (values)
+        csv_writer.writerow(csv_data)
+
+def move_data(target, source, start, num_files):
+    """
+    Move data files from the source directory to the target directory.
+
+    Args:
+        target (str): The path to the target directory.
+        source (str): The path to the source directory.
+        start (int): The starting index of the files to move.
+        num_files (int): The total number of files to move.
+
+    Raises:
+        FileNotFoundError: If the source directory does not exist.
+        ValueError: If the start index or num_files is invalid.
+    """
+    if not os.path.exists(source):
+        raise FileNotFoundError(f"Source directory '{source}' does not exist.")
+
+    if start < 0 or num_files <= 0:
+        raise ValueError("Invalid start index or number of files to move.")
+
+    # Create the target directory if it doesn't exist
+    if not os.path.exists(target):
+        os.makedirs(target)
+    else:
+        # Clear the target directory by removing all files
+        for file in os.listdir(target):
+            os.remove(os.path.join(target, file))
+
+    # Copy files sequentially starting from the specified index
+    for i in range(start, start + num_files):
+        try:
+            # Copy the data file
+            shutil.copy(
+                os.path.join(source, f"data_{i:04d}.npy"),
+                os.path.join(target, f"data_{i:04d}.npy"),
+            )
+        except FileNotFoundError:
+            print(f"File data_{i:04d}.npy not found in {source}. Skipping.")
+            continue
+        except Exception as e:
+            print(f"An error occurred while copying data_{i:04d}.npy: {e}")
+            continue
+
+def process_features(parameters, problem_data_dir):
+    
+    # create mesh
+    scale_x = parameters["scale_x"]
+    mesh_type = parameters["mesh_type"]
+    lc = parameters["lc"]
+    unstructured_square_mesh_gen = UM2N.UnstructuredSquareMeshGenerator(
+        scale=scale_x, mesh_type=mesh_type
+    )  # noqa
+    mesh = unstructured_square_mesh_gen.generate_mesh(
+        res=lc, output_filename=os.path.join(directories["data"], f"mesh{i}.msh")
     )
-    parser.add_argument(
-        "--field_type",
-        type=str,
-        default="aniso",
-        help="anisotropic or isotropic data type(aniso/iso)",
+    # Generate Random solution field
+    rand_u_generator = UM2N.RandSourceGenerator(
+        use_iso= parameters["data_type"] == "iso",
+        dist_params = parameters
     )
-    # use padded scheme or full-scale scheme to sample central point of the bump  # noqa
-    parser.add_argument(
-        "--boundary_scheme",
-        type=str,
-        default="full",
-        help="scheme used to generate the dataset (pad/full))",
+
+    # generate equation
+    poisson_eq = UM2N.RandPoissonEqGenerator(rand_u_generator)
+    # discretise the equation
+    res = poisson_eq.discretise(mesh)
+    # get specific parameters used
+    dist_params = rand_u_generator.get_dist_params()
+    # Solve the equation
+    solver = UM2N.EquationSolver(
+        params={
+            "function_space": res["function_space"],
+            "LHS": res["LHS"],
+            "RHS": res["RHS"],
+            "bc": res["bc"],
+        }
     )
-    parser.add_argument(
-        "--n_samples", type=int, default=100, help="number of samples generated"
+    # original solution field
+    uh = solver.solve_eq()
+
+    mesh_gen = UM2N.MeshGenerator(params={"eq": poisson_eq, "mesh": mesh})
+    monitor_val = mesh_gen.monitor_func(mesh)
+    hessian = mesh_gen.get_hessian(mesh)
+    hessian_norm = fd.project(mesh_gen.get_hessian_norm(mesh),
+                                fd.FunctionSpace(mesh, "CG", 1)
+                                )
+
+    # Generate Mesh
+    # hessian = UM2N.MeshGenerator(
+    #     params={
+    #         "eq": poisson_eq,
+    #         "mesh": fd.Mesh(os.path.join(problem_mesh_dir, f"mesh{i}.msh")),  # noqa
+    #     }
+    # ).get_hessian(mesh)
+
+    # hessian_norm = UM2N.MeshGenerator(
+    #     params={
+    #         "eq": poisson_eq,
+    #         "mesh": fd.Mesh(os.path.join(problem_mesh_dir, f"mesh{i}.msh")),  # noqa
+    #     }
+    # ).monitor_func(mesh)
+
+    # hessian_norm = fd.project(hessian_norm, fd.FunctionSpace(mesh, "CG", 1))
+
+    func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1)
+    grad_uh_interpolate = fd.interpolate(fd.grad(uh), func_vec_space)
+
+    # ej321 - grad_norm copied from build_helmholtz_square.py
+    grad_norm = fd.Function(res["function_space"])
+    grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2)
+    grad_norm /= grad_norm.vector().max()
+
+    # mesh_gen = UM2N.MeshGenerator(
+    #     params={
+    #         "eq": poisson_eq,
+    #         "mesh": fd.Mesh(os.path.join(problem_mesh_dir, f"mesh{i}.msh")),  # noqa
+    #     }
+    # )
+
+    start = time.perf_counter()
+    new_mesh = mesh_gen.move_mesh()
+    end = time.perf_counter()
+    dur = (end - start) * 1000
+
+    # this is the jacobian of x with respect to xi
+    jacobian = mesh_gen.get_jacobian()
+    jacobian = fd.project(jacobian, fd.TensorFunctionSpace(new_mesh, "CG", 1))
+    jacobian_det = mesh_gen.get_jacobian_det()
+    jacobian_det = fd.project(jacobian_det, fd.FunctionSpace(new_mesh, "CG", 1))
+
+    # get phi/grad_phi projected to the original mesh
+    phi = mesh_gen.get_phi()
+    # phi = fd.project(
+    #     phi, fd.FunctionSpace(mesh, "CG", 1)
+    # )
+    grad_phi = mesh_gen.get_grad_phi()
+    # grad_phi = fd.project(
+    #     grad_phi, fd.VectorFunctionSpace(mesh, "CG", 1)
+    # )
+
+    # solve the equation on the new mesh
+    new_res = poisson_eq.discretise(new_mesh)
+    new_solver = UM2N.EquationSolver(
+        params={
+            "function_space": new_res["function_space"],
+            "LHS": new_res["LHS"],
+            "RHS": new_res["RHS"],
+            "bc": new_res["bc"],
+        }
     )
-    parser.add_argument(
-        "--rand_seed", type=int, default=63, help="number of samples generated"
+    uh_new = new_solver.solve_eq()
+
+    # process the data for training
+    mesh_processor = UM2N.MeshProcessor(
+        original_mesh=mesh,
+        optimal_mesh=new_mesh,
+        function_space=new_res["function_space"],
+        use_4_edge=True,
+        feature={
+            "uh": uh.dat.data_ro.reshape(-1, 1),
+            "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2),
+            "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), # ej321 - added grad_norm
+            "hessian": hessian.dat.data_ro.reshape(-1, 4),
+            "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
+            "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
+            "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1),
+            "phi": phi.dat.data_ro.reshape(-1, 1),
+            "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2),
+            "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), # ej321 - added monitor_val
+        },
+        raw_feature={
+            "uh": uh,
+            "hessian_norm": hessian_norm,
+            "monitor_val": monitor_val, # ej321 - added monitor_val
+            "jacobian": jacobian,
+            "jacobian_det": jacobian_det,
+        },
+        dist_params=dist_params,
     )
-    args_ = parser.parse_args()
-    print(args_)
-    return args_
 
+    mesh_processor.save_taining_data(
+        os.path.join(directories["data"], "data_{}".format(i))
+    )
 
-args = arg_parse()
-
-mesh_type = args.mesh_type
+    # ====  Plot Scripts ======================
+    fig = plt.figure(figsize=(15, 10))
+    ax1 = fig.add_subplot(2, 3, 1, projection="3d")
+    # Plot the exact solution
+    ax1.set_title("Exact Solution")
+    fd.trisurf(fd.interpolate(res["u_exact"], res["function_space"]), axes=ax1)
+    # Plot the solved solution
+    ax2 = fig.add_subplot(2, 3, 2, projection="3d")
+    ax2.set_title("FEM Solution")
+    fd.trisurf(uh, axes=ax2)
+
+    # Plot the solution on a optimal mesh
+    ax3 = fig.add_subplot(2, 3, 3, projection="3d")
+    ax3.set_title("FEM Solution on Optimal Mesh")
+    fd.trisurf(uh_new, axes=ax3)
+
+    # Plot the mesh
+    ax4 = fig.add_subplot(2, 3, 4)
+    ax4.set_title("Original Mesh")
+    fd.triplot(mesh, axes=ax4)
+    ax5 = fig.add_subplot(2, 3, 5)
+    ax5.set_title("Optimal Mesh")
+    fd.triplot(new_mesh, axes=ax5)
+
+    # plot mesh with function evaluated on it
+    ax6 = fig.add_subplot(2, 3, 6)
+    ax6.set_title("Soultion Projected on optimal mesh")
+    fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6)
+    fd.triplot(new_mesh, axes=ax6)
+
+    fig.savefig(os.path.join(directories["plot"], "plot_{}.png".format(i)))
+    # ==========================================
+
+    # generate log file
+    high_res_mesh = unstructured_square_mesh_gen.generate_mesh(
+        res=1e-2,
+        output_filename=os.path.join(directories["mesh"], f"mesh{i}.msh"),
+    )
+    high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1)
 
-data_type = args.field_type
-use_iso = True if data_type == "iso" else False
+    res_high_res = poisson_eq.discretise(high_res_mesh)
+    u_exact = fd.interpolate(
+        res_high_res["u_exact"], res_high_res["function_space"]
+    )
 
-rand_seed = args.rand_seed
-random.seed(rand_seed)
+    uh = fd.project(uh, high_res_function_space)
+    uh_new = fd.project(uh_new, high_res_function_space)
 
-# ====  Parameters ======================
-problem = "poisson"
+    error_original_mesh = fd.errornorm(u_exact, uh)
+    error_optimal_mesh = fd.errornorm(u_exact, uh_new)
 
-n_samples = args.n_samples
+    # Write to CSV
+    with open(os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile:
+        csv_writer = csv.writer(csvfile)
+        # Write header (keys)
+        csv_writer.writerow(["error_og", "error_adapt", "time"])
+        # Write data (values)
+        csv_writer.writerow([error_original_mesh, error_optimal_mesh, dur])
+        
+    print("error og/optimal:", error_original_mesh, error_optimal_mesh)
+    
 
-# parameters for domain scale
-scale_x = 1
-scale_y = 1
+if __name__ == "__main__":
 
-# parameters for random source
-max_dist = args.max_dist
-n_dist = args.n_dist
-lc = args.lc
+    # parse args
+    args = parse_arguments()
+    
+    # ====  Parameters ======================
+    parameters = {
+        # parameters for problem
+        "problem": "poisson",
+        # "n_case": args.n_case, # burgers problem only
+        # parameters for random source
+        "n_dist": args.n_dist,
+        "max_dist": args.max_dist,
+        "lc": args.lc,
+        # "n_grig": args.n_grid, # burgers problem only
+        # parameters for ??????
+        "n_samples": args.n_samples,
+        "data_type": args.field_type,
+        "scheme": args.boundary_scheme,
+        "mesh_type": int(args.mesh_type),
+        # parameters for domain scale
+        "scale_x": 1,
+        "scale_y": 1,
+        # parameters for anisotropic data - distribution height scaler
+        "z_max": 1,
+        "z_min": 0,
+        # parameters for ?????
+        "x_start": 0,
+        "x_end": 1,
+        "y_start": 0,
+        "y_end": 1,
+        # parameters for isotropic data
+        "w_min": 0.05,
+        "w_max": 0.2,
+        "c_min": 0.2 if args.boundary_scheme == "pad" else 0,
+        "c_max": 0.8 if args.boundary_scheme == "pad" else 1,
+        # parameters for dataset challenging level
+        # larger, less challenging (because the gaussian is more like a circle)
+        "sigma_mean_scaler": 1 / 4,
+        "sigma_sigma_scaler": 1 / 6,
+        "sigma_eps": 1 / 8,
+        # parameters for data split
+        "p_train": 0.75,
+        "p_test": 0.15,
+        "p_val": 0.1,
+    }
 
-# parameters for anisotropic data - distribution height scaler
-z_min = 0
-z_max = 1
+    # Set random seed
+    random.seed(args.rand_seed)
 
-# parameters for isotropic data
-w_min = 0.05
-w_max = 0.2
+    # ====  Setup Directories ======================
+    problem_specific_dir = "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format(
+            parameters["z_min"], parameters["z_max"],
+            parameters["n_dist"],parameters["max_dist"],
+            parameters["lc"], parameters["n_samples"],
+            parameters["data_type"], parameters["scheme"], parameters["mesh_type"]
+        )
 
-scheme = args.boundary_scheme
-c_min = 0.2 if scheme == "pad" else 0
-c_max = 0.8 if scheme == "pad" else 1
+    subdirs = [
+        "data", "plot", "log", "mesh", "mesh_fine",
+        "train", "test", "val",
+    ]
 
-# parameters for data split
-p_train = 0.75
-p_test = 0.15
-p_val = 0.1
+    directories = setup_directories(problem = parameters["problem"],
+                        mesh_type = parameters["mesh_type"],
+                        base_dir = None,
+                        subdirs = subdirs,
+                        dir_format = problem_specific_dir)
 
-num_train = int(n_samples * p_train)
-num_test = int(n_samples * p_test)
-num_val = int(n_samples * p_val)
-# =======================================
 
+    # ====  Output CSV ======================
+    key_list = [
+        "cmin","cmax",
+        "data_type", "scheme", "n_samples", "lc", "mesh_type"
+    ]
+    output_csv(parameters, key_list, directories["data"])
 
-df = pd.DataFrame(
-    {
-        "cmin": [c_min],
-        "cmax": [c_max],
-        "data_type": [data_type],
-        "scheme": [scheme],
-        "n_samples": [n_samples],
-        "lc": [lc],
-        "mesh_type": [mesh_type],
-    }
-)
+    # ====  Data Generation Scripts ======================
+    for i in range(parameters["n_samples"]):
+        try:
+            print(f"Generating Sample: {i}")
 
+            # create dataset
+            process_features(parameters, directories)
 
-def move_data(target, source, start, num_file):
-    if not os.path.exists(target):
-        os.makedirs(target)
-    else:
-        # delete all files under the directory
-        filelist = [f for f in os.listdir(target)]
-        for f in filelist:
-            os.remove(os.path.join(target, f))
-    # copy data from data dir to train dir
-    for i in range(start, num_file):
-        shutil.copy(
-            os.path.join(source, "data_{}.npy".format(i)),
-            os.path.join(target, "data_{}.npy".format(i)),
-        )
+        except fd.exceptions.ConvergenceError:
+            print(f"Iteration {i} did not converge.")
+            continue
 
+    # ====  Data Splits ============================================
+    num_train = int(parameters["n_samples"] * parameters["p_train"])
+    num_test = int(parameters["n_samples"] * parameters["p_test"])
+    num_val = parameters["n_samples"] - num_train - num_test
 
-project_dir = os.path.dirname(os.path.dirname((os.path.abspath(__file__))))
-dataset_dir = os.path.join(
-    project_dir, "data", f"dataset_meshtype_{mesh_type}", problem
-)  # noqa
-problem_specific_dir = os.path.join(
-    dataset_dir,
-    "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format(
-        z_min, z_max, n_dist, max_dist, lc, n_samples, data_type, scheme, mesh_type
-    ),
-)
-
-
-problem_data_dir = os.path.join(problem_specific_dir, "data")
-problem_plot_dir = os.path.join(problem_specific_dir, "plot")
-problem_log_dir = os.path.join(problem_specific_dir, "log")
-
-problem_mesh_dir = os.path.join(problem_specific_dir, "mesh")
-problem_mesh_fine_dir = os.path.join(problem_specific_dir, "mesh_fine")
-problem_train_dir = os.path.join(problem_specific_dir, "train")
-problem_test_dir = os.path.join(problem_specific_dir, "test")
-problem_val_dir = os.path.join(problem_specific_dir, "val")
-
-if not os.path.exists(problem_mesh_dir):
-    os.makedirs(problem_mesh_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_mesh_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_mesh_dir, f))
-
-if not os.path.exists(problem_mesh_fine_dir):
-    os.makedirs(problem_mesh_fine_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_mesh_fine_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_mesh_fine_dir, f))
-
-if not os.path.exists(problem_data_dir):
-    os.makedirs(problem_data_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_data_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_data_dir, f))
-
-if not os.path.exists(problem_plot_dir):
-    os.makedirs(problem_plot_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_plot_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_plot_dir, f))
-
-if not os.path.exists(problem_log_dir):
-    os.makedirs(problem_log_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_log_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_log_dir, f))
-
-df.to_csv(os.path.join(problem_specific_dir, "info.csv"))
+    move_data(directories["train"], directories["data"], 0, num_train)
+    move_data(directories["test"], directories["data"], num_train, num_train + num_test)
+    move_data(directories["val"], directories["data"], num_train + num_test, num_train + num_test + num_val)
 
 
 # ====  Data Generation Scripts ======================
 if __name__ == "__main__":
     print("In build_dataset.py")
-    i = 0
-    while i < n_samples:
+    # i = 0
+    # while i < n_samples:
+    for i in range(parameters["n_samples"]):
         try:
             print("Generating Sample: " + str(i))
-            unstructured_square_mesh_gen = UM2N.UnstructuredSquareMesh(
-                scale=scale_x, mesh_type=mesh_type
-            )  # noqa
-            mesh = unstructured_square_mesh_gen.generate_mesh(
-                res=lc, output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh")
-            )
-            # Generate Random solution field
-            rand_u_generator = UM2N.RandSourceGenerator(
-                use_iso=use_iso,
-                dist_params={
-                    "max_dist": max_dist,
-                    "n_dist": n_dist,
-                    "x_start": 0,
-                    "x_end": 1,
-                    "y_start": 0,
-                    "y_end": 1,
-                    "z_max": z_max,
-                    "z_min": z_min,
-                    "w_min": w_min,
-                    "w_max": w_max,
-                    "c_min": c_min,
-                    "c_max": c_max,
-                },
-            )
-            poisson_eq = UM2N.RandPoissonEqGenerator(rand_u_generator)
-            res = poisson_eq.discretise(mesh)  # discretise the equation
-            dist_params = rand_u_generator.get_dist_params()
-            # Solve the equation
-            solver = UM2N.EquationSolver(
-                params={
-                    "function_space": res["function_space"],
-                    "LHS": res["LHS"],
-                    "RHS": res["RHS"],
-                    "bc": res["bc"],
-                }
-            )
-            uh = solver.solve_eq()
-            # Generate Mesh
-            hessian = UM2N.MeshGenerator(
-                params={
-                    "eq": poisson_eq,
-                    "mesh": fd.Mesh(os.path.join(problem_mesh_dir, f"mesh{i}.msh")),  # noqa
-                }
-            ).get_hessian(mesh)
-
-            hessian_norm = UM2N.MeshGenerator(
-                params={
-                    "eq": poisson_eq,
-                    "mesh": fd.Mesh(os.path.join(problem_mesh_dir, f"mesh{i}.msh")),  # noqa
-                }
-            ).monitor_func(mesh)
-
-            hessian_norm = fd.project(hessian_norm, fd.FunctionSpace(mesh, "CG", 1))
-
-            func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1)
-            grad_uh_interpolate = fd.interpolate(fd.grad(uh), func_vec_space)
-
-            mesh_gen = UM2N.MeshGenerator(
-                params={
-                    "eq": poisson_eq,
-                    "mesh": fd.Mesh(os.path.join(problem_mesh_dir, f"mesh{i}.msh")),  # noqa
-                }
-            )
-
-            start = time.perf_counter()
-            new_mesh = mesh_gen.move_mesh()
-            end = time.perf_counter()
-            dur = (end - start) * 1000
-
-            # this is the jacobian of x with respect to xi
-            jacobian = mesh_gen.get_jacobian()
-            jacobian = fd.project(jacobian, fd.TensorFunctionSpace(new_mesh, "CG", 1))
-            jacobian_det = mesh_gen.get_jacobian_det()
-            jacobian_det = fd.project(jacobian_det, fd.FunctionSpace(new_mesh, "CG", 1))
-
-            # get phi/grad_phi projected to the original mesh
-            phi = mesh_gen.get_phi()
-            # phi = fd.project(
-            #     phi, fd.FunctionSpace(mesh, "CG", 1)
-            # )
-            grad_phi = mesh_gen.get_grad_phi()
-            # grad_phi = fd.project(
-            #     grad_phi, fd.VectorFunctionSpace(mesh, "CG", 1)
-            # )
-
-            # solve the equation on the new mesh
-            new_res = poisson_eq.discretise(new_mesh)
-            new_solver = UM2N.EquationSolver(
-                params={
-                    "function_space": new_res["function_space"],
-                    "LHS": new_res["LHS"],
-                    "RHS": new_res["RHS"],
-                    "bc": new_res["bc"],
-                }
-            )
-            uh_new = new_solver.solve_eq()
-
-            # process the data for training
-            mesh_processor = UM2N.MeshProcessor(
-                original_mesh=mesh,
-                optimal_mesh=new_mesh,
-                function_space=new_res["function_space"],
-                use_4_edge=True,
-                feature={
-                    "uh": uh.dat.data_ro.reshape(-1, 1),
-                    "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2),
-                    "hessian": hessian.dat.data_ro.reshape(-1, 4),
-                    "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
-                    "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
-                    "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1),
-                    "phi": phi.dat.data_ro.reshape(-1, 1),
-                    "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2),
-                },
-                raw_feature={
-                    "uh": uh,
-                    "hessian_norm": hessian_norm,
-                    "jacobian": jacobian,
-                    "jacobian_det": jacobian_det,
-                },
-                dist_params=dist_params,
-            )
-
-            mesh_processor.save_taining_data(
-                os.path.join(problem_data_dir, "data_{}".format(i))
-            )
-
-            # ====  Plot Scripts ======================
-            fig = plt.figure(figsize=(15, 10))
-            ax1 = fig.add_subplot(2, 3, 1, projection="3d")
-            # Plot the exact solution
-            ax1.set_title("Exact Solution")
-            fd.trisurf(fd.interpolate(res["u_exact"], res["function_space"]), axes=ax1)
-            # Plot the solved solution
-            ax2 = fig.add_subplot(2, 3, 2, projection="3d")
-            ax2.set_title("FEM Solution")
-            fd.trisurf(uh, axes=ax2)
-
-            # Plot the solution on a optimal mesh
-            ax3 = fig.add_subplot(2, 3, 3, projection="3d")
-            ax3.set_title("FEM Solution on Optimal Mesh")
-            fd.trisurf(uh_new, axes=ax3)
-
-            # Plot the mesh
-            ax4 = fig.add_subplot(2, 3, 4)
-            ax4.set_title("Original Mesh")
-            fd.triplot(mesh, axes=ax4)
-            ax5 = fig.add_subplot(2, 3, 5)
-            ax5.set_title("Optimal Mesh")
-            fd.triplot(new_mesh, axes=ax5)
-
-            # plot mesh with function evaluated on it
-            ax6 = fig.add_subplot(2, 3, 6)
-            ax6.set_title("Soultion Projected on optimal mesh")
-            fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6)
-            fd.triplot(new_mesh, axes=ax6)
-
-            fig.savefig(os.path.join(problem_plot_dir, "plot_{}.png".format(i)))
-            # ==========================================
-
-            # generate log file
-            high_res_mesh = unstructured_square_mesh_gen.generate_mesh(
-                res=1e-2,
-                output_filename=os.path.join(problem_mesh_fine_dir, f"mesh{i}.msh"),
-            )
-            high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1)
-
-            res_high_res = poisson_eq.discretise(high_res_mesh)
-            u_exact = fd.interpolate(
-                res_high_res["u_exact"], res_high_res["function_space"]
-            )
-
-            uh = fd.project(uh, high_res_function_space)
-            uh_new = fd.project(uh_new, high_res_function_space)
-
-            error_original_mesh = fd.errornorm(u_exact, uh)
-            error_optimal_mesh = fd.errornorm(u_exact, uh_new)
-
-            df = pd.DataFrame(
-                {
-                    "error_og": error_original_mesh,
-                    "error_adapt": error_optimal_mesh,
-                    "time": dur,
-                },
-                index=[0],
-            )
-            df.to_csv(os.path.join(problem_log_dir, "log{}.csv".format(i)))
-            print("error og/optimal:", error_original_mesh, error_optimal_mesh)
-            i += 1
+            # create dataset
+            process_features(parameters, directories)
+        #    i += 1
         except fd.exceptions.ConvergenceError:
             pass
         except AttributeError:
@@ -409,14 +496,12 @@ def move_data(target, source, start, num_file):
         except ValueError:
             pass
 
-    move_data(problem_train_dir, problem_data_dir, 0, num_train)
+    # ====  Data Splits ============================================
+    num_train = int(parameters["n_samples"] * parameters["p_train"])
+    num_test = int(parameters["n_samples"] * parameters["p_test"])
+    num_val = parameters["n_samples"] - num_train - num_test
 
-    move_data(problem_test_dir, problem_data_dir, num_train, num_train + num_test)
+    move_data(directories["train"], directories["data"], 0, num_train)
+    move_data(directories["test"], directories["data"], num_train, num_train + num_test)
+    move_data(directories["val"], directories["data"], num_train + num_test, num_train + num_test + num_val)
 
-    move_data(
-        problem_val_dir,
-        problem_data_dir,
-        num_train + num_test,
-        num_train + num_test + num_val,
-    )
-# ====  Data Generation Scripts ======================
diff --git a/script/build_swirl.py b/script/build_swirl.py
index c6e0859..4ea7814 100644
--- a/script/build_swirl.py
+++ b/script/build_swirl.py
@@ -1,6 +1,6 @@
 # Author: Chunyang Wang
 # GitHub Username: chunyang-w
-
+import csv
 import os
 import shutil
 from argparse import ArgumentParser
@@ -9,187 +9,179 @@
 import matplotlib.pyplot as plt
 import pandas as pd
 
-import UM2N
-
-
-def arg_parse():
-    parser = ArgumentParser()
-    parser.add_argument(
-        "--mesh_type", type=int, default=6, help="algorithm used to generate mesh"
-    )
-    parser.add_argument(
-        "--sigma",
-        type=float,
-        default=(0.05 / 3),
-        help="sigma used to control the initial ring shape",
-    )
-    parser.add_argument(
-        "--r_0", type=float, default=0.2, help="radius of the initial ring"
-    )
-    parser.add_argument(
-        "--x_0", type=float, default=0.5, help="center of the ring in x"
-    )
-    parser.add_argument(
-        "--y_0", type=float, default=0.5, help="center of the ring in y"
-    )
-    parser.add_argument(
-        "--alpha",
-        type=float,
-        default=1.5,
-        help="scalar coefficient of the swirl (velocity)",
-    )
-    parser.add_argument(
-        "--save_interval", type=int, default=10, help="interval for stroing sample file"
-    )
-    parser.add_argument(
-        "--lc",
-        type=float,
-        default=5e-2,
-        help="the length characteristic of the elements in the\
-                            mesh (if using unstructured mesh)",
-    )
-    parser.add_argument(
-        "--n_grid",
-        type=int,
-        default=20,
-        help="number of grids in a mesh (only appliable when\
-                                mesh_type is 0)",
-    )
-    parser.add_argument(
-        "--n_monitor_smooth",
-        type=int,
-        default=10,
-        help="number of times for applying a Laplacian smoother for monitor function",
-    )
-    args_ = parser.parse_args()
-    print(args_)
-    return args_
-
+# import UM2N
 
-args = arg_parse()
+# import pandas as pd
+from firedrake.__future__ import interpolate
 
-mesh_type = args.mesh_type
-
-# ====  Parameters ======================
-problem = "swirl"
+# dd the parent directory to the Python path
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import UM2N
 
-# simulation time & time steps
-T = 1
-# n_step = 1000 # * 2 # The CFL condition requires that the timestep is less than 0.0014 for fine mesh
-# dt = T / n_step
-dt = 1e-3  # * 2 # The CFL condition requires that the timestep is less than 0.0014 for fine mesh
-n_step = 1000
+def parse_arguments():
+    """Parse command-line arguments."""
+    parser = ArgumentParser(description="Build Burgers dataset with square meshes.")
+    parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.")
+    parser.add_argument("--sigma", type=float, default=(0.05 / 3), help="initial ring shape control")
+    parser.add_argument("--r_0", type=float, default=0.2, help="initial ring radius")
+    parser.add_argument("--x_0", type=float, default=0.5, help="ring center x coordinate")
+    parser.add_argument("--y_0", type=float, default=0.5, help="ring center y coordinate")
+    parser.add_argument("--alpha", type=float, default=1.5, help="swirl (velocity) scalar coefficient")
+    parser.add_argument("--save_interval", type=int, default=10, help="output sample file interval")
+    parser.add_argument("--lc", type=float, default=5e-2, help="Length characteristic of unstructured mesh elements.")
+    parser.add_argument("--n_grid", type=int, default=20, help="number number of grids in a mesh when mesh_type is 0)")
+    parser.add_argument("--n_monitor_smooth", type=int, default=10, help="apply Laplacian smoother n time to monitor function")
+   
+    
+    parsed_args = parser.parse_args()
+
+    # Handle dependency between max_dist and n_dist
+    # max number of distributions used to generate the dataset
+    # only if n_dist is not set if n_dist is set, max_dist will be disabled
+    # if parsed_args.n_dist is not None:
+    #     parsed_args.max_dist = None  # Disable max_dist if n_dist is set
+    #     print("Warning: max_dist is ignored because n_dist is set.")
+    # QC:
+    print(parsed_args)
+    
+    return parser.parse_args()
+
+
+def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None):
+    """
+    Set up directories for storing data, plots, and logs.
+
+    Args:
+        base_dir (str): Base directory for the project.
+        parameters (dict): Dictionary of parameters, including "mesh_type" and "problem".
+            - "mesh_type" (int): Type of mesh used in the simulation (default: 0).
+            - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem").
+        subdirs (list, optional): List of subdirectories to create. Defaults to:
+            ["data", "plot", "log", "mesh", "mesh_fine"].
+            Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz".
+        dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders
+            matching keys in the `parameters` dictionary. Example:
+            "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}".
+            If not provided, raises a ValueError.
+
+    Returns:
+        dict: A dictionary mapping subdirectory names to their full paths.
+
+    Raises:
+        ValueError: If `dir_format` is not provided or is invalid.
+    """
 
-# mesh setup
-lc = args.lc
-# n_grid = args.n_grid
-n_grid = int(1 / lc)
+    # Define the project directory
+    if base_dir:
+        project_dir = os.path.abspath(base_dir)
+    else:
+        project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    
+    # QC:
+    print(f"Project Directory: {project_dir}")
 
-# number of times for applying a Laplacian smoother for monitor function
-n_monitor_smooth = args.n_monitor_smooth
+    # Define the dataset directory
+    dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem)
 
-# parameters for domain scale
-scale_x = 1
-scale_y = 1
+    # Use the provided format string for the problem-specific directory
+    if dir_format is None:
+        problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}")
+    else:
+        # check if dir_format is a valid string format
+        if not isinstance(dir_format, str):
+            raise ValueError("dir_format must be a string.")
+        problem_specific_dir = os.path.join(dataset_dir, dir_format)
+
+    # Define default subdirectories if not provided
+    if subdirs is None:
+        subdirs = ["data", "plot", "log", "mesh", "mesh_fine",
+                   "plot_compare", "train", "test", "val"]
+
+    # Create and clear directories
+    directories = {}
+    for subdir in subdirs:
+        dir_path = os.path.join(problem_specific_dir, subdir)
+        if not os.path.exists(dir_path):
+            os.makedirs(dir_path)
+        else:
+            # Clear the directory by removing all files
+            for file in os.listdir(dir_path):
+                os.remove(os.path.join(dir_path, file))
+        directories[subdir] = dir_path
+
+    # QC:
+    print(f"Subdirectories created: {directories}")
+
+    return directories
+
+def output_csv(parameters, key_list, output_dir):
+    """
+    Write selected parameters to a CSV file.
 
-# params for initial condition
-sigma = args.sigma
-r_0 = args.r_0
-alpha = args.alpha
-x_0 = args.x_0
-y_0 = args.y_0
+    Args:
+        parameters (dict): Dictionary of parameters to write.
+        key_list (list): List of keys to include in the CSV.
+        output_dir (str): Directory where the CSV file will be saved.
+    """
+    # Filter parameters based on key_list
+    csv_keys = [key for key in key_list if key in parameters]
+    csv_data = [parameters[key] for key in csv_keys]
+
+    # Define the output file path
+    csv_file_path = os.path.join(output_dir, "info.csv")
+
+    # Write to CSV
+    with open(csv_file_path, mode="w", newline="") as csvfile:
+        csv_writer = csv.writer(csvfile)
+        # Write header (keys)
+        csv_writer.writerow(csv_keys)
+        # Write data (values)
+        csv_writer.writerow(csv_data)
+        
+def move_data(target, source, start, num_files):
+    """
+    Move data files from the source directory to the target directory.
 
-# params for stroing files
-save_interval = args.save_interval
-# list storing failing dts
-fail_t = []
+    Args:
+        target (str): The path to the target directory.
+        source (str): The path to the source directory.
+        start (int): The starting index of the files to move.
+        num_files (int): The total number of files to move.
 
-# =======================================
+    Raises:
+        FileNotFoundError: If the source directory does not exist.
+        ValueError: If the start index or num_files is invalid.
+    """
+    if not os.path.exists(source):
+        raise FileNotFoundError(f"Source directory '{source}' does not exist.")
 
+    if start < 0 or num_files <= 0:
+        raise ValueError("Invalid start index or number of files to move.")
 
-def move_data(target, source, start, num_file):
+    # Create the target directory if it doesn't exist
     if not os.path.exists(target):
         os.makedirs(target)
     else:
-        # delete all files under the directory
-        filelist = [f for f in os.listdir(target)]
-        for f in filelist:
-            os.remove(os.path.join(target, f))
-    # copy data from data dir to train dir
-    for i in range(start, num_file):
-        shutil.copy(
-            os.path.join(source, "data_{}.npy".format(i)),
-            os.path.join(target, "data_{}.npy".format(i)),
-        )
-
+        # Clear the target directory by removing all files
+        for file in os.listdir(target):
+            os.remove(os.path.join(target, file))
+
+    # Copy files sequentially starting from the specified index
+    for i in range(start, start + num_files):
+        try:
+            # Copy the data file
+            shutil.copy(
+                os.path.join(source, f"data_{i:04d}.npy"),
+                os.path.join(target, f"data_{i:04d}.npy"),
+            )
+        except FileNotFoundError:
+            print(f"File data_{i:04d}.npy not found in {source}. Skipping.")
+            continue
+        except Exception as e:
+            print(f"An error occurred while copying data_{i:04d}.npy: {e}")
+            continue
 
-project_dir = os.path.dirname(os.path.dirname((os.path.abspath(__file__))))
-dataset_dir = os.path.join(
-    project_dir, "data", f"dataset_meshtype_{mesh_type}", problem
-)  # noqa
-problem_specific_dir = os.path.join(
-    dataset_dir,
-    f"sigma_{sigma:.3f}_alpha_{alpha}_r0_{r_0}_x0_{x_0}_y0_{y_0}_lc_{lc}_ngrid_{n_grid}_interval_{save_interval}_meshtype_{mesh_type}_smooth_{n_monitor_smooth}",
-)  # noqa
-
-
-problem_data_dir = os.path.join(problem_specific_dir, "data")
-problem_plot_dir = os.path.join(problem_specific_dir, "plot")
-problem_plot_compare_dir = os.path.join(problem_specific_dir, "plot_compare")
-problem_log_dir = os.path.join(problem_specific_dir, "log")
-problem_mesh_dir = os.path.join(problem_specific_dir, "mesh")
-problem_mesh_fine_dir = os.path.join(problem_specific_dir, "mesh_fine")
-
-
-if not os.path.exists(problem_data_dir):
-    os.makedirs(problem_data_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_data_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_data_dir, f))
-
-if not os.path.exists(problem_plot_dir):
-    os.makedirs(problem_plot_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_plot_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_plot_dir, f))
-
-if not os.path.exists(problem_plot_compare_dir):
-    os.makedirs(problem_plot_compare_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_plot_compare_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_plot_compare_dir, f))
-
-if not os.path.exists(problem_log_dir):
-    os.makedirs(problem_log_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_log_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_log_dir, f))
-
-if not os.path.exists(problem_mesh_dir):
-    os.makedirs(problem_mesh_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_mesh_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_mesh_dir, f))
-
-if not os.path.exists(problem_mesh_fine_dir):
-    os.makedirs(problem_mesh_fine_dir)
-else:
-    # delete all files under the directory
-    filelist = [f for f in os.listdir(problem_mesh_fine_dir)]
-    for f in filelist:
-        os.remove(os.path.join(problem_mesh_fine_dir, f))
-
-i = 0
 
 
 def fail_callback(t):
@@ -221,6 +213,8 @@ def sample_from_loop(
     sigma,
     alpha,
     r_0,
+    x_0, # ej321 - added x_0
+    y_0, # ej321 - added y_0
     t,
     error_og_list=[],
     error_adapt_list=[],
@@ -267,7 +261,7 @@ def sample_from_loop(
         dur=dur,
     )
 
-    mesh_processor.save_taining_data(os.path.join(problem_data_dir, f"data_{i:04d}"))
+    mesh_processor.save_taining_data(os.path.join(directories["data"], f"data_{i:04d}"))
 
     # # ====  Plot Scripts ======================
     # fig = plt.figure(figsize=(15, 10))
@@ -314,24 +308,24 @@ def sample_from_loop(
     # ax.legend()
     # plt.show()
 
-    # ==========================================
+    # ====  Log File ============================================
     # function_space_fine = fd.FunctionSpace(mesh_fine, 'CG', 1)
     uh_proj = fd.project(uh, function_space_fine)
     uh_new_proj = fd.project(uh_new, function_space_fine)
 
     error_original_mesh = fd.errornorm(uh_proj, uh_fine, norm_type="L2")
     error_optimal_mesh = fd.errornorm(uh_new_proj, uh_fine, norm_type="L2")
-    df = pd.DataFrame(
-        {
-            "error_og": error_original_mesh,
-            "error_adapt": error_optimal_mesh,
-            "time": dur,
-        },
-        index=[0],
-    )
-    df.to_csv(os.path.join(problem_log_dir, f"log{i:04d}.csv"))
+
+    # Write to CSV
+    with open(os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile:
+        csv_writer = csv.writer(csvfile)
+        # Write header (keys)
+        csv_writer.writerow(["error_og", "error_adapt", "time"])
+        # Write data (values)
+        csv_writer.writerow([error_original_mesh, error_optimal_mesh, dur])
     print("error og/optimal:", error_original_mesh, error_optimal_mesh)
 
+
     # ====  Plot mesh, solution, error ======================
     rows, cols = 3, 3
     fig, ax = plt.subplots(
@@ -405,32 +399,141 @@ def sample_from_loop(
         for cc in range(cols):
             ax[rr, cc].set_aspect("equal", "box")
 
-    fig.savefig(os.path.join(problem_plot_compare_dir, f"plot_{i:04d}.png"))
+    fig.savefig(os.path.join(directories["plot_compare"], f"plot_{i:04d}.png"))
     plt.close()
     i += 1
     return
 
 
-# ====  Data Generation Scripts ======================
 if __name__ == "__main__":
+
+    # parse args
+    args = parse_arguments()
+    
+    # ====  Parameters ======================
+    parameters = {
+        # parameters for problem
+        "problem": "swirl",
+        # parameters for simulation time & time steps
+        "T": 1,
+        "dt": 1e-3,  # The CFL condition requires that the timestep is less than 0.0014 for fine mesh
+        "n_step": 1000,
+        # "n_case": args.n_case, # burgers problem only
+        # parameters for random source
+        # "n_dist": args.n_dist,
+        # "max_dist": args.max_dist,
+        "lc": args.lc,
+        "n_grid": args.n_grid if args.n_grid else int(1 / lc),
+        # parameters for ??????
+        # "n_samples": args.n_samples,
+        # "data_type": args.field_type,
+        # "scheme": args.boundary_scheme,
+        "mesh_type": int(args.mesh_type),
+        "n_monitor_smooth": args.n_monitor_smooth,
+        # parameters for domain scale
+        "scale_x": 1,
+        "scale_y": 1,
+        # parameters for anisotropic data - distribution height scaler
+        # "z_max": 1,
+        # "z_min": 0,
+        # parameters for ?????
+        # "x_start": 0,
+        # "x_end": 1,
+        # "y_start": 0,
+        # "y_end": 1,
+        # parameters for initial condition
+        "sigma": args.sigma,
+        "r_0": args.r_0,
+        "x_0": args.x_0,
+        "y_0": args.y_0,
+        "alpha": args.alpha,
+        # parameters for storing files
+        "save_interval": args.save_interval,
+        "fail_t": [],  # list storing failing dts
+        
+        # parameters for isotropic data
+        # "w_min": 0.05,
+        # "w_max": 0.2,
+        # "c_min": 0.2 if args.boundary_scheme == "pad" else 0,
+        # "c_max": 0.8 if args.boundary_scheme == "pad" else 1,
+        # parameters for dataset challenging level
+        # larger, less challenging (because the gaussian is more like a circle)
+        # "sigma_mean_scaler": 1 / 4,
+        # "sigma_sigma_scaler": 1 / 6,
+        # "sigma_eps": 1 / 8,
+        # parameters for data split
+        # "p_train": 0.75,
+        # "p_test": 0.15,
+        # "p_val": 0.1,
+    }
+
+    # # Set random seed
+    # random.seed(args.rand_seed)
+
+    # ====  Setup Directories ======================
+    problem_specific_dir = "sigma_{:.3f}_alpha_{}_r0_{}_x0_{}_y0_{}_lc_{}_ngrid_{}_interval_{}_meshtype_{}_smooth_{}".format(
+            parameters["sigma"], parameters["alpha"],
+            parameters["r_0"], parameters["x_0"], parameters["y_0"],
+            parameters["lc"], parameters["n_grid"],
+            parameters["save_interval"], parameters["mesh_type"],
+            parameters["n_monitor_smooth"]
+    )
+
+    subdirs = [
+        "data", "plot","plot_compare","log", "mesh", "mesh_fine",
+        # "train", "test", "val",
+    ]
+
+    directories = setup_directories(problem = parameters["problem"],
+                        mesh_type = parameters["mesh_type"],
+                        base_dir = None,
+                        subdirs = subdirs,
+                        dir_format = problem_specific_dir)
+
+
+    # ====  Output CSV ======================
+    key_list = [
+            "sigma",
+            "alpha",
+            "r_0",
+            "x_0",
+            "y_0",
+            "save_interval",
+            "T",
+            "n_step",
+            "dt",
+            "fail_t",
+            "lc",
+            "fail_cases",
+            "mesh_type",
+    ]
+    output_csv(parameters, key_list, directories["data"])
+
+    # ====  Data Generation Scripts ======================
     print("In build_dataset.py")
+
+    i = 0  # global variable to count the number of samples
     mesh = None
     mesh_fine = None
     mesh_new = None
+    mesh_type = parameters["mesh_type"]
+    lc = parameters["lc"]
+    n_grid = parameters["n_grid"]
     if mesh_type != 0:
-        mesh_gen = UM2N.UnstructuredSquareMesh(mesh_type=mesh_type)
+        mesh_gen = UM2N.UnstructuredSquareMeshGenerator(mesh_type=mesh_type)
         mesh = mesh_gen.generate_mesh(
-            res=lc, output_filename=os.path.join(problem_mesh_dir, "mesh.msh")
+            res=lc, output_filename=os.path.join(directories["mesh"], "mesh.msh")
         )
         mesh_new = mesh_gen.generate_mesh(
-            res=lc, output_filename=os.path.join(problem_mesh_dir, "mesh.msh")
+            res=lc, output_filename=os.path.join(directories["mesh"], "mesh.msh")
         )
         mesh_model = mesh_gen.generate_mesh(
-            res=lc, output_filename=os.path.join(problem_mesh_dir, "mesh.msh")
+            res=lc, output_filename=os.path.join(directories["mesh"], "mesh.msh")
         )
-        mesh_gen_fine = UM2N.UnstructuredSquareMesh(mesh_type=mesh_type)
+        # ej321 - is this extra call to mesh gen needed?
+        mesh_gen_fine = UM2N.UnstructuredSquareMeshGenerator(mesh_type=mesh_type)
         mesh_fine = mesh_gen_fine.generate_mesh(
-            res=1e-2, output_filename=os.path.join(problem_mesh_fine_dir, "mesh.msh")
+            res=1e-2, output_filename=os.path.join(directories["mesh_fine"], "mesh.msh")
         )
     else:
         mesh = fd.UnitSquareMesh(n_grid, n_grid)
@@ -438,45 +541,26 @@ def sample_from_loop(
         mesh_model = fd.UnitSquareMesh(n_grid, n_grid)
         mesh_fine = fd.UnitSquareMesh(100, 100)
 
-    df = pd.DataFrame(
-        {
-            "sigma": [sigma],
-            "alpha": [alpha],
-            "r_0": [r_0],
-            "x_0": [x_0],
-            "y_0": [y_0],
-            "save_interval": [save_interval],
-            "T": [T],
-            "n_step": [n_step],
-            "dt": [dt],
-            "fail_t": [fail_t],
-            "lc": [lc],
-            "num_fail_cases": [len(fail_t)],
-            "mesh_type": [mesh_type],
-        }
-    )
-
-    df.to_csv(os.path.join(problem_specific_dir, "info.csv"))
-
     # solver defination
     swirl_solver = UM2N.SwirlSolver(
         mesh,
         mesh_fine,
         mesh_new,
         mesh_model=mesh_model,
-        sigma=sigma,
-        alpha=alpha,
-        r_0=r_0,
-        x_0=x_0,
-        y_0=y_0,
-        save_interval=save_interval,
-        T=T,
-        dt=dt,
-        n_step=n_step,
-        n_monitor_smooth=n_monitor_smooth,
+        **parameters
+        # sigma=sigma,
+        # alpha=alpha,
+        # r_0=r_0,
+        # x_0=x_0,
+        # y_0=y_0,
+        # save_interval=save_interval,
+        # T=T,
+        # dt=dt,
+        # n_step=n_step,
+        # n_monitor_smooth=n_monitor_smooth,
     )
 
     swirl_solver.solve_problem(callback=sample_from_loop, fail_callback=fail_callback)
 
     print("Done!")
-# ====  Data Generation Scripts ======================
+
diff --git a/script/make_dataset_helm_train.sh b/script/make_dataset_helm_train.sh
index eaaaa93..b919098 100644
--- a/script/make_dataset_helm_train.sh
+++ b/script/make_dataset_helm_train.sh
@@ -18,7 +18,7 @@ for mt in "${mesh_type[@]}"; do
     for i in "${lcs[@]}"; do
         for n_s in "${n_samples_train[@]}"; do
             echo "lc = $i meshtype = $mt num samples = $n_s"
-            python ./script/build_helmholtz_square.py --lc=$i   --rand_seed=$rand_seed --n_samples=$n_s --field_type="aniso" --boundary_scheme="full" --mesh_type=$mt
+            python build_helmholtz_square.py --lc=$i   --rand_seed=$rand_seed --n_samples=$n_s --field_type="aniso" --boundary_scheme="full" --mesh_type=$mt
             # python ./script/build_helmholtz_square.py --lc=$i   --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="pad" --mesh_type=$mesh_type
             # python ./script/build_helmholtz_square.py --lc=$i   --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="full" --mesh_type=$mesh_type
             # python ./script/build_helmholtz_square.py --lc=$i   --rand_seed $rand_seed --n_samples $n_samples_train --field_type "aniso" --boundary_scheme "pad" --mesh_type=$mesh_type

From c4c8cf261bfcc3c57fc21ec9c53ae2175371aa92 Mon Sep 17 00:00:00 2001
From: acse-ej321 <89605848+acse-ej321@users.noreply.github.com>
Date: Fri, 4 Jul 2025 11:26:48 +0100
Subject: [PATCH 2/7] move partitioning to function and minor edits

---
 script/build_burgers_square.py   | 209 ++++++++++++++------
 script/build_helmholtz_poly.py   | 274 ++++++++++++++++----------
 script/build_helmholtz_square.py | 291 +++++++++++++++++----------
 script/build_poisson_poly.py     | 266 +++++++++++++++----------
 script/build_poisson_square.py   | 325 ++++++++++++++++---------------
 script/build_swirl.py            | 180 ++++++++++-------
 6 files changed, 932 insertions(+), 613 deletions(-)

diff --git a/script/build_burgers_square.py b/script/build_burgers_square.py
index c82043a..6266040 100644
--- a/script/build_burgers_square.py
+++ b/script/build_burgers_square.py
@@ -9,28 +9,48 @@
 
 import firedrake as fd
 import matplotlib.pyplot as plt
-# import pandas as pd
 
-# dd the parent directory to the Python path
-import sys
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import UM2N
 
 
 def parse_arguments():
     """Parse command-line arguments."""
     parser = ArgumentParser(description="Build Burgers dataset with square meshes.")
-    parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.")
-    parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions.")
-    parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions.")
-    parser.add_argument("--lc", type=float, default=6e-2, help="Length characteristic of mesh elements.")
-    parser.add_argument("--field_type", type=str, default="iso", help="Data type (aniso/iso).")
+    parser.add_argument(
+        "--mesh_type", type=int, default=2, help="Algorithm used to generate mesh."
+    )
+    parser.add_argument(
+        "--max_dist", type=int, default=6, help="Max number of distributions."
+    )
+    parser.add_argument(
+        "--n_dist", type=int, default=None, help="Number of distributions."
+    )
+    parser.add_argument(
+        "--lc", type=float, default=6e-2, help="Length characteristic of mesh elements."
+    )
+    parser.add_argument(
+        "--field_type", type=str, default="iso", help="Data type (aniso/iso)."
+    )
     # use padded scheme or full-scale scheme to sample central point of the bump  # noqa
-    parser.add_argument("--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full).")
-    parser.add_argument("--n_case", type=int, default=5, help="Number of simulation cases.")
-    parser.add_argument("--n_grid", type=int, default=20, help="Number of grids for uniform mesh if mesh_type 0.")
-    parser.add_argument("--rand_seed", type=int, default=63, help="number of samples generated / Random seed for reproducibility.")
-    
+    parser.add_argument(
+        "--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full)."
+    )
+    parser.add_argument(
+        "--n_case", type=int, default=5, help="Number of simulation cases."
+    )
+    parser.add_argument(
+        "--n_grid",
+        type=int,
+        default=20,
+        help="Number of grids for uniform mesh if mesh_type 0.",
+    )
+    parser.add_argument(
+        "--rand_seed",
+        type=int,
+        default=63,
+        help="number of samples generated / Random seed for reproducibility.",
+    )
+
     parsed_args = parser.parse_args()
 
     # Handle dependency between max_dist and n_dist
@@ -40,11 +60,12 @@ def parse_arguments():
         parsed_args.max_dist = None  # Disable max_dist if n_dist is set
         print("Warning: max_dist is ignored because n_dist is set.")
     # QC:
-    print(parsed_args)
-    
+    # print(parsed_args)
+
     return parser.parse_args()
 
-def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None):
+
+def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None):
     """
     Set up directories for storing data, plots, and logs.
 
@@ -73,16 +94,20 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form
         project_dir = os.path.abspath(base_dir)
     else:
         project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-    
+
     # QC:
     print(f"Project Directory: {project_dir}")
 
     # Define the dataset directory
-    dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem)
+    dataset_dir = os.path.join(
+        project_dir, "data", f"dataset_meshtype_{mesh_type}", problem
+    )
 
     # Use the provided format string for the problem-specific directory
     if dir_format is None:
-        problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}")
+        problem_specific_dir = os.path.join(
+            dataset_dir, f"{problem}_meshtype_{mesh_type}"
+        )
     else:
         # check if dir_format is a valid string format
         if not isinstance(dir_format, str):
@@ -91,8 +116,17 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form
 
     # Define default subdirectories if not provided
     if subdirs is None:
-        subdirs = ["data", "plot", "log", "mesh", "mesh_fine",
-                   "plot_compare", "train", "test", "val"]
+        subdirs = [
+            "data",
+            "plot",
+            "log",
+            "mesh",
+            "mesh_fine",
+            "plot_compare",
+            "train",
+            "test",
+            "val",
+        ]
 
     # Create and clear directories
     directories = {}
@@ -107,10 +141,11 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form
         directories[subdir] = dir_path
 
     # QC:
-    print(f"Subdirectories created: {directories}")
+    # print(f"Subdirectories created: {directories}")
 
     return directories
 
+
 def output_csv(parameters, key_list, output_dir):
     """
     Write selected parameters to a CSV file.
@@ -137,6 +172,7 @@ def output_csv(parameters, key_list, output_dir):
 
     print(f"Parameters saved to {csv_file_path}")
 
+
 def move_data(target, source, start, num_files):
     """
     Move data files from the source directory to the target directory.
@@ -179,17 +215,22 @@ def move_data(target, source, start, num_files):
         except Exception as e:
             print(f"An error occurred while copying data_{i:04d}.npy: {e}")
             continue
-            
-def generate_mesh(parameters, dirs):
+
+
+def generate_mesh(parameters, directories):
     """Generate the mesh based on the specified type."""
     if parameters["mesh_type"] != 0:
-        mesh_gen = UM2N.UnstructuredSquareMeshGenerator(scale=parameters["scale_x"],
-        mesh_type=parameters["mesh_type"])
-        mesh = mesh_gen.generate_mesh(res=parameters["lc"],
-        output_filename=os.path.join(dirs["mesh"], "mesh.msh"))
-        mesh_new = fd.Mesh(os.path.join(dirs["mesh"], "mesh.msh"))
-        mesh_fine = mesh_gen.generate_mesh(res=1e-2,
-        output_filename=os.path.join(dirs["mesh_fine"], "mesh.msh"))
+        mesh_gen = UM2N.UnstructuredSquareMeshGenerator(
+            scale=parameters["scale_x"], mesh_type=parameters["mesh_type"]
+        )
+        mesh = mesh_gen.generate_mesh(
+            res=parameters["lc"],
+            output_filename=os.path.join(directories["mesh"], "mesh.msh"),
+        )
+        mesh_new = fd.Mesh(os.path.join(directories["mesh"], "mesh.msh"))
+        mesh_fine = mesh_gen.generate_mesh(
+            res=1e-2, output_filename=os.path.join(directories["mesh_fine"], "mesh.msh")
+        )
     else:
         n_grid = parameters["n_grid"]
         mesh = fd.UnitSquareMesh(n_grid, n_grid)
@@ -213,17 +254,49 @@ def get_sample_param_of_nu_generalization_by_idx_train(idx_in):
         1: ({"cx": 0.225, "cy": 0.5, "w": 0.01}, 0.0001),
         2: ({"cx": 0.225, "cy": 0.5, "w": 0.01}, 0.001),
         3: ({"cx": 0.225, "cy": 0.5, "w": 0.01}, 0.002),
-        4: ([{"cx": 0.3, "cy": 0.35, "w": 0.01}, {"cx": 0.15, "cy": 0.65, "w": 0.01}], 0.0001),
-        5: ([{"cx": 0.3, "cy": 0.35, "w": 0.01}, {"cx": 0.15, "cy": 0.65, "w": 0.01}], 0.001),
-        6: ([{"cx": 0.3, "cy": 0.35, "w": 0.01}, {"cx": 0.15, "cy": 0.65, "w": 0.01}], 0.002),
-        7: ([{"cx": 0.3, "cy": 0.7, "w": 0.01}, {"cx": 0.3, "cy": 0.3, "w": 0.01}, {"cx": 0.15, "cy": 0.5, "w": 0.01}], 0.0001),
-        8: ([{"cx": 0.3, "cy": 0.7, "w": 0.01}, {"cx": 0.3, "cy": 0.3, "w": 0.01}, {"cx": 0.15, "cy": 0.5, "w": 0.01}], 0.001),
-        9: ([{"cx": 0.3, "cy": 0.7, "w": 0.01}, {"cx": 0.3, "cy": 0.3, "w": 0.01}, {"cx": 0.15, "cy": 0.5, "w": 0.01}], 0.002),
+        4: (
+            [{"cx": 0.3, "cy": 0.35, "w": 0.01}, {"cx": 0.15, "cy": 0.65, "w": 0.01}],
+            0.0001,
+        ),
+        5: (
+            [{"cx": 0.3, "cy": 0.35, "w": 0.01}, {"cx": 0.15, "cy": 0.65, "w": 0.01}],
+            0.001,
+        ),
+        6: (
+            [{"cx": 0.3, "cy": 0.35, "w": 0.01}, {"cx": 0.15, "cy": 0.65, "w": 0.01}],
+            0.002,
+        ),
+        7: (
+            [
+                {"cx": 0.3, "cy": 0.7, "w": 0.01},
+                {"cx": 0.3, "cy": 0.3, "w": 0.01},
+                {"cx": 0.15, "cy": 0.5, "w": 0.01},
+            ],
+            0.0001,
+        ),
+        8: (
+            [
+                {"cx": 0.3, "cy": 0.7, "w": 0.01},
+                {"cx": 0.3, "cy": 0.3, "w": 0.01},
+                {"cx": 0.15, "cy": 0.5, "w": 0.01},
+            ],
+            0.001,
+        ),
+        9: (
+            [
+                {"cx": 0.3, "cy": 0.7, "w": 0.01},
+                {"cx": 0.3, "cy": 0.3, "w": 0.01},
+                {"cx": 0.15, "cy": 0.5, "w": 0.01},
+            ],
+            0.002,
+        ),
     }
 
     # Retrieve the parameters and viscosity for the given index
     if idx_in not in param_map:
-        raise ValueError(f"Invalid index: {idx_in}. Supported indices are {list(param_map.keys())}.")
+        raise ValueError(
+            f"Invalid index: {idx_in}. Supported indices are {list(param_map.keys())}."
+        )
 
     params, nu_ = param_map[idx_in]
     # Ensure params is always a list
@@ -231,6 +304,7 @@ def get_sample_param_of_nu_generalization_by_idx_train(idx_in):
 
     return gauss_list_, nu_
 
+
 # def get_sample_params(idx):
 #     """Retrieve sample parameters for the Burgers problem."""
 #     return UM2N.get_sample_param_of_nu_generalization_by_idx_train(idx)
@@ -299,6 +373,7 @@ def get_sample_param_of_nu_generalization_by_idx_train(idx_in):
 #         {"error_og": [error_original_mesh], "error_adapt": [error_optimal_mesh], "time": [dur]}
 #     ).to_csv(os.path.join(dirs["log"], f"log_{idx}.csv"), index=False)
 
+
 def sample_from_loop(
     uh,
     uh_grad,
@@ -334,7 +409,9 @@ def sample_from_loop(
         feature={
             "uh": uh.dat.data_ro.reshape(-1, 1),
             "grad_uh": uh_grad.dat.data_ro.reshape(-1, 2),
-            "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), # ej321 - added grad_norm
+            "grad_uh_norm": grad_norm.dat.data_ro.reshape(
+                -1, 1
+            ),  # ej321 - added grad_norm
             "hessian": hessian.dat.data_ro.reshape(-1, 4),
             "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
             "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
@@ -346,7 +423,7 @@ def sample_from_loop(
         raw_feature={
             "uh": uh,
             "hessian_norm": hessian_norm,
-            "monitor_val": monitor_val, # ej321 - added monitor_val
+            "monitor_val": monitor_val,  # ej321 - added monitor_val
             "jacobian": jacobian,
             "jacobian_det": jacobian_det,
         },
@@ -357,9 +434,7 @@ def sample_from_loop(
         idx=idx,
     )
 
-    mesh_processor.save_taining_data(
-        os.path.join(dirs["data"], "data_{}".format(i))
-    )
+    mesh_processor.save_taining_data(os.path.join(directories["data"], f"data_{i:04d}"))
 
     # ====  Plot Scripts ======================
     fig = plt.figure(figsize=(15, 10))
@@ -391,7 +466,7 @@ def sample_from_loop(
     fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6)
     fd.triplot(mesh_new, axes=ax6)
 
-    fig.savefig(os.path.join(dirs["plot"], "plot_{}.png".format(i)))
+    fig.savefig(os.path.join(directories["plot"], "plot_{}.png".format(i)))
     i += 1
 
     # fig, ax = plt.subplots()
@@ -408,9 +483,10 @@ def sample_from_loop(
     error_original_mesh = fd.errornorm(uh, uh_fine, norm_type="L2")
     error_optimal_mesh = fd.errornorm(uh_new, uh_fine, norm_type="L2")
 
-
     # Write to CSV
-    with open(os.path.join(dirs["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile:
+    with open(
+        os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline=""
+    ) as csvfile:
         csv_writer = csv.writer(csvfile)
         # Write header (keys)
         csv_writer.writerow(["error_og", "error_adapt", "time"])
@@ -432,7 +508,6 @@ def sample_from_loop(
 
 
 if __name__ == "__main__":
-    
     # parse args
     args = parse_arguments()
 
@@ -481,7 +556,6 @@ def sample_from_loop(
     # Set random seed
     random.seed(args.rand_seed)
 
-
     # ====  Setup Directories ======================
     problem_specific_dir = "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}".format(
         lc=parameters["lc"],
@@ -492,21 +566,29 @@ def sample_from_loop(
         mesh_type=parameters["mesh_type"],
     )
 
-    subdirs = ["data", "plot", "log", "mesh", "mesh_fine",
-               "plot_compare", "train", "test", "val"
-               ]
+    subdirs = [
+        "data",
+        "plot",
+        "log",
+        "mesh",
+        "mesh_fine",
+        "plot_compare",
+        "train",
+        "test",
+        "val",
+    ]
 
-    dirs = setup_directories(problem = parameters["problem"],
-                            mesh_type = parameters["mesh_type"],
-                            base_dir = None,
-                            subdirs = subdirs,
-                            dir_format = problem_specific_dir)
+    directories = setup_directories(
+        problem=parameters["problem"],
+        mesh_type=parameters["mesh_type"],
+        base_dir=None,
+        subdirs=subdirs,
+        dir_format=problem_specific_dir,
+    )
 
     # ====  Output CSV ======================
-    key_list = [
-        "cmin","cmax", "data_type", "scheme", "lc", "mesh_type"
-    ]
-    output_csv(parameters, key_list, dirs["data"])
+    key_list = ["cmin", "cmax", "data_type", "scheme", "lc", "mesh_type"]
+    output_csv(parameters, key_list, directories["log"])
 
     # ====  Data Generation Scripts ======================
 
@@ -519,12 +601,11 @@ def sample_from_loop(
         try:
             # QC:
             print(f"Case {idx} building ...")
-            mesh, mesh_new, mesh_fine = generate_mesh(parameters, dirs)
+            mesh, mesh_new, mesh_fine = generate_mesh(parameters, directories)
             # Generate Random solution field
             gaussian_list, nu = get_sample_param_of_nu_generalization_by_idx_train(idx)  # noqa
             solver = UM2N.BurgersSolver(
-                mesh, mesh_fine, mesh_new,
-                gauss_list=gaussian_list, nu=nu, idx=idx
+                mesh, mesh_fine, mesh_new, gauss_list=gaussian_list, nu=nu, idx=idx
             )
             solver.solve_problem(sample_from_loop)
             print()
diff --git a/script/build_helmholtz_poly.py b/script/build_helmholtz_poly.py
index 870820d..c7bddb8 100644
--- a/script/build_helmholtz_poly.py
+++ b/script/build_helmholtz_poly.py
@@ -10,27 +10,40 @@
 import firedrake as fd
 import matplotlib.pyplot as plt
 import numpy as np
+
 # import pandas as pd
 from firedrake.__future__ import interpolate
 
-# dd the parent directory to the Python path
-import sys
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import UM2N
 
+
 def parse_arguments():
     """Parse command-line arguments."""
     parser = ArgumentParser(description="Build Burgers dataset with square meshes.")
-    parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.")
-    parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions.")
-    parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions.")
-    parser.add_argument("--lc", type=float, default=6e-2, help="Length characteristic of mesh elements.")
-    parser.add_argument("--field_type", type=str, default="iso", help="Data type (aniso/iso).")
+    parser.add_argument(
+        "--mesh_type", type=int, default=2, help="Algorithm used to generate mesh."
+    )
+    parser.add_argument(
+        "--max_dist", type=int, default=6, help="Max number of distributions."
+    )
+    parser.add_argument(
+        "--n_dist", type=int, default=None, help="Number of distributions."
+    )
+    parser.add_argument(
+        "--lc", type=float, default=6e-2, help="Length characteristic of mesh elements."
+    )
+    parser.add_argument(
+        "--field_type", type=str, default="iso", help="Data type (aniso/iso)."
+    )
     # use padded scheme or full-scale scheme to sample central point of the bump  # noqa
-    parser.add_argument("--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full).")
-    parser.add_argument("--n_samples", type=int, default=100, help="Number of samples generated")
+    parser.add_argument(
+        "--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full)."
+    )
+    parser.add_argument(
+        "--n_samples", type=int, default=100, help="Number of samples generated"
+    )
     parser.add_argument("--rand_seed", type=int, default=63, help="Random seed")
-    
+
     parsed_args = parser.parse_args()
 
     # Handle dependency between max_dist and n_dist
@@ -40,11 +53,12 @@ def parse_arguments():
         parsed_args.max_dist = None  # Disable max_dist if n_dist is set
         print("Warning: max_dist is ignored because n_dist is set.")
     # QC:
-    print(parsed_args)
-    
+    # print(parsed_args)
+
     return parser.parse_args()
 
-def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None):
+
+def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None):
     """
     Set up directories for storing data, plots, and logs.
 
@@ -73,16 +87,20 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form
         project_dir = os.path.abspath(base_dir)
     else:
         project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-    
+
     # QC:
     print(f"Project Directory: {project_dir}")
 
     # Define the dataset directory
-    dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem)
+    dataset_dir = os.path.join(
+        project_dir, "data", f"dataset_meshtype_{mesh_type}", problem
+    )
 
     # Use the provided format string for the problem-specific directory
     if dir_format is None:
-        problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}")
+        problem_specific_dir = os.path.join(
+            dataset_dir, f"{problem}_meshtype_{mesh_type}"
+        )
     else:
         # check if dir_format is a valid string format
         if not isinstance(dir_format, str):
@@ -91,8 +109,17 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form
 
     # Define default subdirectories if not provided
     if subdirs is None:
-        subdirs = ["data", "plot", "log", "mesh", "mesh_fine",
-                   "plot_compare", "train", "test", "val"]
+        subdirs = [
+            "data",
+            "plot",
+            "log",
+            "mesh",
+            "mesh_fine",
+            "plot_compare",
+            "train",
+            "test",
+            "val",
+        ]
 
     # Create and clear directories
     directories = {}
@@ -107,10 +134,11 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form
         directories[subdir] = dir_path
 
     # QC:
-    print(f"Subdirectories created: {directories}")
+    # print(f"Subdirectories created: {directories}")
 
     return directories
 
+
 def output_csv(parameters, key_list, output_dir):
     """
     Write selected parameters to a CSV file.
@@ -135,51 +163,73 @@ def output_csv(parameters, key_list, output_dir):
         # Write data (values)
         csv_writer.writerow(csv_data)
 
-def move_data(target, source, start, num_files):
+
+def split_data(
+    source_dir,
+    train_dir,
+    test_dir,
+    val_dir,
+    train_ratio=0.75,
+    test_ratio=0.15,
+    val_ratio=0.1,
+):
     """
-    Move data files from the source directory to the target directory.
+    Split files in a source directory into train, test, and validation directories.
 
     Args:
-        target (str): The path to the target directory.
-        source (str): The path to the source directory.
-        start (int): The starting index of the files to move.
-        num_files (int): The total number of files to move.
+        source_dir (str): Path to the source directory containing files.
+        train_dir (str): Path to the train directory.
+        test_dir (str): Path to the test directory.
+        val_dir (str): Path to the validation directory.
+        train_ratio (float): Proportion of files to allocate to the train set.
+        test_ratio (float): Proportion of files to allocate to the test set.
+        val_ratio (float): Proportion of files to allocate to the validation set.
 
     Raises:
-        FileNotFoundError: If the source directory does not exist.
-        ValueError: If the start index or num_files is invalid.
+        ValueError: If the sum of train_ratio, test_ratio, and val_ratio is not 1.
     """
-    if not os.path.exists(source):
-        raise FileNotFoundError(f"Source directory '{source}' does not exist.")
-
-    if start < 0 or num_files <= 0:
-        raise ValueError("Invalid start index or number of files to move.")
+    # Validate ratios
+    if not (0 <= train_ratio <= 1 and 0 <= test_ratio <= 1 and 0 <= val_ratio <= 1):
+        raise ValueError("Ratios must be between 0 and 1.")
+    if train_ratio + test_ratio + val_ratio != 1:
+        raise ValueError(
+            "The sum of train_ratio, test_ratio, and val_ratio must equal 1."
+        )
 
-    # Create the target directory if it doesn't exist
-    if not os.path.exists(target):
-        os.makedirs(target)
-    else:
-        # Clear the target directory by removing all files
-        for file in os.listdir(target):
-            os.remove(os.path.join(target, file))
+    # Get all files in the source directory
+    files = [
+        f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f))
+    ]
+    random.shuffle(files)  # Shuffle files for unbiased distribution
 
-    # Copy files sequentially starting from the specified index
-    for i in range(start, start + num_files):
-        try:
-            # Copy the data file
+    # QC:
+    # print(f'files {files}')
+
+    # Calculate split indices - preference train > test > val
+    total_files = len(files)
+    num_train = int(total_files * train_ratio)
+    num_test = max(int(total_files * test_ratio), total_files - num_train)
+    num_val = total_files - num_train - num_test
+
+    # Distribute files
+    train_files = files[:num_train]
+    test_files = files[num_train : num_train + num_test]
+    val_files = files[num_train + num_test :]
+
+    for datafiles, target_dir in zip(
+        [train_files, test_files, val_files], [train_dir, test_dir, val_dir]
+    ):
+        for datafile in datafiles:
             shutil.copy(
-                os.path.join(source, f"data_{i:04d}.npy"),
-                os.path.join(target, f"data_{i:04d}.npy"),
+                os.path.join(source_dir, datafile), os.path.join(target_dir, datafile)
             )
-        except FileNotFoundError:
-            print(f"File data_{i:04d}.npy not found in {source}. Skipping.")
-            continue
-        except Exception as e:
-            print(f"An error occurred while copying data_{i:04d}.npy: {e}")
-            continue
 
-def process_features(parameters, dirs):
+    print(
+        f"Data split complete: {num_train} train, {num_test} test, {num_val} validation files."
+    )
+
 
+def process_features(parameters, directories):
     mesh_type = parameters["mesh_type"]
     scale_x = parameters["scale_x"]
     lc = parameters["lc"]
@@ -189,14 +239,13 @@ def process_features(parameters, dirs):
         scale=scale_x, mesh_type=mesh_type
     )  # noqa
     mesh = rand_poly_mesh_gen.generate_mesh(
-        res=lc, output_filename=os.path.join(dirs["mesh"], f"mesh{i}.msh")
+        res=lc, output_filename=os.path.join(directories["mesh"], f"mesh{i}.msh")
     )
     num_boundary = rand_poly_mesh_gen.num_boundary
 
     # Generate Random solution field
     rand_u_generator = UM2N.RandSourceGenerator(
-        use_iso= parameters["data_type"] == "iso",
-        dist_params= parameters
+        use_iso=parameters["data_type"] == "iso", dist_params=parameters
     )
 
     # generate equation
@@ -243,10 +292,9 @@ def process_features(parameters, dirs):
     mesh_gen = UM2N.MeshGenerator(params={"eq": helmholtz_eq, "mesh": mesh})
     monitor_val = mesh_gen.monitor_func(mesh)
     hessian = mesh_gen.get_hessian(mesh)
-    hessian_norm = fd.project(mesh_gen.get_hessian_norm(mesh),
-                                fd.FunctionSpace(mesh, "CG", 1)
-                                )
-
+    hessian_norm = fd.project(
+        mesh_gen.get_hessian_norm(mesh), fd.FunctionSpace(mesh, "CG", 1)
+    )
 
     # move the mesh?
     start = time.perf_counter()
@@ -286,7 +334,7 @@ def process_features(parameters, dirs):
         feature={
             "uh": uh.dat.data_ro.reshape(-1, 1),
             "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2),
-            "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), #ej321 - added
+            "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1),  # ej321 - added
             "hessian": hessian.dat.data_ro.reshape(-1, 4),
             "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
             "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
@@ -294,13 +342,13 @@ def process_features(parameters, dirs):
             "phi": phi.dat.data_ro.reshape(-1, 1),
             "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2),
             "f": f_rhs.dat.data_ro.reshape(-1, 1),
-            "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), # ej321 - added
+            "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1),  # ej321 - added
         },
         raw_feature={
             "uh": uh,
             "hessian_norm": hessian_norm,
-            "monitor_val": monitor_val, # ej321 - added
-            "grad_uh_norm": grad_norm, # ej321 - added needed for poly only
+            "monitor_val": monitor_val,  # ej321 - added
+            "grad_uh_norm": grad_norm,  # ej321 - added needed for poly only
             "jacobian": jacobian,
             "jacobian_det": jacobian_det,
         },
@@ -309,16 +357,16 @@ def process_features(parameters, dirs):
     )
 
     # save out data
-    mesh_processor.save_taining_data(
-        os.path.join(dirs["data"], "data_{}".format(i))
-    )
+    mesh_processor.save_taining_data(os.path.join(directories["data"], f"data_{i:04d}"))
 
     # ====  Plot Scripts ======================
     fig = plt.figure(figsize=(15, 10))
     ax1 = fig.add_subplot(2, 3, 1, projection="3d")
     # Plot the exact solution
     ax1.set_title("Exact Solution")
-    fd.trisurf(fd.interpolate(res["u_exact"], res["function_space"]), axes=ax1)
+    fd.trisurf(
+        fd.assemble(interpolate(res["u_exact"], res["function_space"])), axes=ax1
+    )
     # Plot the solved solution
     ax2 = fig.add_subplot(2, 3, 2, projection="3d")
     ax2.set_title("FEM Solution")
@@ -343,21 +391,20 @@ def process_features(parameters, dirs):
     fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6)
     fd.triplot(new_mesh, axes=ax6)
 
-    fig.savefig(os.path.join(dirs["plot"], "plot_{}.png".format(i)))
-
+    fig.savefig(os.path.join(directories["plot"], "plot_{}.png".format(i)))
 
     # ====  Log File ============================================
     high_res_mesh = rand_poly_mesh_gen.generate_mesh(
         res=1e-2,
-        output_filename=os.path.join(dirs["mesh_fine"], f"mesh{i}.msh"),
+        output_filename=os.path.join(directories["mesh_fine"], f"mesh{i}.msh"),
     )
 
     high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1)
 
     res_high_res = helmholtz_eq.discretise(high_res_mesh)
-    u_exact = fd.assemble(interpolate(res_high_res["u_exact"],
-                    res_high_res["function_space"])
-                    )
+    u_exact = fd.assemble(
+        interpolate(res_high_res["u_exact"], res_high_res["function_space"])
+    )
 
     uh_proj = fd.project(uh, high_res_function_space)
     uh_new_proj = fd.project(uh_new, high_res_function_space)
@@ -376,7 +423,9 @@ def process_features(parameters, dirs):
     # df.to_csv(os.path.join(problem_log_dir, "log{}.csv".format(i)))
 
     # Write to CSV
-    with open(os.path.join(dirs["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile:
+    with open(
+        os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline=""
+    ) as csvfile:
         csv_writer = csv.writer(csvfile)
         # Write header (keys)
         csv_writer.writerow(["error_og", "error_adapt", "time"])
@@ -385,12 +434,10 @@ def process_features(parameters, dirs):
     print("error og/optimal:", error_original_mesh, error_optimal_mesh)
 
 
-
 if __name__ == "__main__":
-
     # parse args
     args = parse_arguments()
-    
+
     # ====  Parameters ======================
     parameters = {
         # parameters for problem
@@ -438,42 +485,54 @@ def process_features(parameters, dirs):
     np.random.seed(args.rand_seed)
 
     # ====  Setup Directories ======================
-    problem_specific_dir = "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format(
-            parameters["z_min"], parameters["z_max"],
-            parameters["n_dist"],parameters["max_dist"],
-            parameters["lc"], parameters["n_samples"],
-            parameters["data_type"], parameters["scheme"], parameters["mesh_type"]
+    problem_specific_dir = (
+        "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format(
+            parameters["z_min"],
+            parameters["z_max"],
+            parameters["n_dist"],
+            parameters["max_dist"],
+            parameters["lc"],
+            parameters["n_samples"],
+            parameters["data_type"],
+            parameters["scheme"],
+            parameters["mesh_type"],
         )
+    )
 
     subdirs = [
-        "data", "plot", "plot_compare", "log", "mesh", "mesh_fine",
-        "train", "test", "val",
+        "data",
+        "plot",
+        "plot_compare",
+        "log",
+        "mesh",
+        "mesh_fine",
+        "train",
+        "test",
+        "val",
     ]
 
-    dirs = setup_directories(problem = parameters["problem"],
-                        mesh_type = parameters["mesh_type"],
-                        base_dir = None,
-                        subdirs = subdirs,
-                        dir_format = problem_specific_dir)
-
+    directories = setup_directories(
+        problem=parameters["problem"],
+        mesh_type=parameters["mesh_type"],
+        base_dir=None,
+        subdirs=subdirs,
+        dir_format=problem_specific_dir,
+    )
 
     # ====  Output CSV ======================
-    key_list = [
-        "cmin","cmax",
-        "data_type", "scheme", "n_samples", "lc", "mesh_type"
-    ]
-    output_csv(parameters, key_list, dirs["data"])
+    key_list = ["cmin", "cmax", "data_type", "scheme", "n_samples", "lc", "mesh_type"]
+    output_csv(parameters, key_list, directories["log"])
 
     # ====  Data Generation Scripts ======================
-    # QC: 
+    # QC:
     print("In build_dataset.py")
     # i = 0
     # while i < n_samples:
     for i in range(parameters["n_samples"]):
         try:
             print("Generating Sample: " + str(i))
-          
-            process_features(parameters, dirs)
+
+            process_features(parameters, directories)
             # i += 1
         except fd.exceptions.ConvergenceError:
             print(f"Iteration {i} did not converge.")
@@ -484,10 +543,13 @@ def process_features(parameters, dirs):
             pass
 
     # ====  Data Splits ============================================
-    num_train = int(parameters["n_samples"] * parameters["p_train"])
-    num_test = int(parameters["n_samples"] * parameters["p_test"])
-    num_val = parameters["n_samples"] - num_train - num_test
-
-    move_data(dirs["train"], dirs["data"], 0, num_train)
-    move_data(dirs["test"], dirs["data"], num_train, num_train + num_test)
-    move_data(dirs["val"], dirs["data"], num_train + num_test, num_train + num_test + num_val)
+    # TODO: this should probably be done in the training script, not the build script
+    split_data(
+        source_dir=directories["data"],
+        train_dir=directories["train"],
+        test_dir=directories["test"],
+        val_dir=directories["val"],
+        train_ratio=parameters["p_train"],
+        test_ratio=parameters["p_test"],
+        val_ratio=parameters["p_val"],
+    )
diff --git a/script/build_helmholtz_square.py b/script/build_helmholtz_square.py
index c66d46f..ca85846 100644
--- a/script/build_helmholtz_square.py
+++ b/script/build_helmholtz_square.py
@@ -12,24 +12,34 @@
 import matplotlib.pyplot as plt
 from firedrake.__future__ import interpolate
 
-
-# dd the parent directory to the Python path
-import sys
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import UM2N
 
 
 def parse_arguments():
     """Parse command-line arguments."""
     parser = ArgumentParser()
-    parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh")
-    parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions")
-    parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions")
-    parser.add_argument("--lc", type=float, default=5e-2, help="Length characteristic of mesh elements")
-    parser.add_argument("--field_type", type=str, default="aniso", help="Data type (aniso/iso)")
+    parser.add_argument(
+        "--mesh_type", type=int, default=2, help="Algorithm used to generate mesh"
+    )
+    parser.add_argument(
+        "--max_dist", type=int, default=6, help="Max number of distributions"
+    )
+    parser.add_argument(
+        "--n_dist", type=int, default=None, help="Number of distributions"
+    )
+    parser.add_argument(
+        "--lc", type=float, default=5e-2, help="Length characteristic of mesh elements"
+    )
+    parser.add_argument(
+        "--field_type", type=str, default="aniso", help="Data type (aniso/iso)"
+    )
     # use padded scheme or full-scale scheme to sample central point of the bump  # noqa
-    parser.add_argument("--boundary_scheme", type=str, default="full", help="Boundary scheme (pad/full)")
-    parser.add_argument("--n_samples", type=int, default=100, help="Number of samples generated")
+    parser.add_argument(
+        "--boundary_scheme", type=str, default="full", help="Boundary scheme (pad/full)"
+    )
+    parser.add_argument(
+        "--n_samples", type=int, default=100, help="Number of samples generated"
+    )
     parser.add_argument("--rand_seed", type=int, default=63, help="Random seed")
 
     parsed_args = parser.parse_args()
@@ -41,12 +51,12 @@ def parse_arguments():
         parsed_args.max_dist = None  # Disable max_dist if n_dist is set
         print("Warning: max_dist is ignored because n_dist is set.")
     # QC:
-    print(parsed_args)
+    # print(parsed_args)
 
     return parsed_args
 
 
-def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None):
+def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None):
     """
     Set up directories for storing data, plots, and logs.
 
@@ -75,16 +85,20 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form
         project_dir = os.path.abspath(base_dir)
     else:
         project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-    
+
     # QC:
     print(f"Project Directory: {project_dir}")
 
     # Define the dataset directory
-    dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem)
+    dataset_dir = os.path.join(
+        project_dir, "data", f"dataset_meshtype_{mesh_type}", problem
+    )
 
     # Use the provided format string for the problem-specific directory
     if dir_format is None:
-        problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}")
+        problem_specific_dir = os.path.join(
+            dataset_dir, f"{problem}_meshtype_{mesh_type}"
+        )
     else:
         # check if dir_format is a valid string format
         if not isinstance(dir_format, str):
@@ -93,8 +107,17 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form
 
     # Define default subdirectories if not provided
     if subdirs is None:
-        subdirs = ["data", "plot", "log", "mesh", "mesh_fine",
-                   "plot_compare", "train", "test", "val"]
+        subdirs = [
+            "data",
+            "plot",
+            "log",
+            "mesh",
+            "mesh_fine",
+            "plot_compare",
+            "train",
+            "test",
+            "val",
+        ]
 
     # Create and clear directories
     directories = {}
@@ -109,52 +132,10 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form
         directories[subdir] = dir_path
 
     # QC:
-    print(f"Subdirectories created: {directories}")
+    # print(f"Subdirectories created: {directories}")
 
     return directories
 
-def move_data(target, source, start, num_files):
-    """
-    Move data files from the source directory to the target directory.
-
-    Args:
-        target (str): The path to the target directory.
-        source (str): The path to the source directory.
-        start (int): The starting index of the files to move.
-        num_files (int): The total number of files to move.
-
-    Raises:
-        FileNotFoundError: If the source directory does not exist.
-        ValueError: If the start index or num_files is invalid.
-    """
-    if not os.path.exists(source):
-        raise FileNotFoundError(f"Source directory '{source}' does not exist.")
-
-    if start < 0 or num_files <= 0:
-        raise ValueError("Invalid start index or number of files to move.")
-
-    # Create the target directory if it doesn't exist
-    if not os.path.exists(target):
-        os.makedirs(target)
-    else:
-        # Clear the target directory by removing all files
-        for file in os.listdir(target):
-            os.remove(os.path.join(target, file))
-
-    # Copy files sequentially starting from the specified index
-    for i in range(start, start + num_files):
-        try:
-            # Copy the data file
-            shutil.copy(
-                os.path.join(source, f"data_{i:04d}.npy"),
-                os.path.join(target, f"data_{i:04d}.npy"),
-            )
-        except FileNotFoundError:
-            print(f"File data_{i:04d}.npy not found in {source}. Skipping.")
-            continue
-        except Exception as e:
-            print(f"An error occurred while copying data_{i:04d}.npy: {e}")
-            continue
 
 def create_mesh(i, mesh_type, lc, scale_x, problem_mesh_dir):
     """
@@ -184,16 +165,17 @@ def create_mesh(i, mesh_type, lc, scale_x, problem_mesh_dir):
 
 
 def process_features(parameters, directories):
-
     # create mesh
     mesh = create_mesh(
-        i, mesh_type = parameters["mesh_type"], lc = parameters["lc"],
-        scale_x = parameters["scale_x"], problem_mesh_dir = directories["mesh"]
+        i,
+        mesh_type=parameters["mesh_type"],
+        lc=parameters["lc"],
+        scale_x=parameters["scale_x"],
+        problem_mesh_dir=directories["mesh"],
     )
     # Generate Random solution field
     rand_u_generator = UM2N.RandSourceGenerator(
-        use_iso= parameters["data_type"] == "iso",
-        dist_params = parameters
+        use_iso=parameters["data_type"] == "iso", dist_params=parameters
     )
 
     # generate equation
@@ -214,9 +196,9 @@ def process_features(parameters, directories):
     # original solution field
     uh = solver.solve_eq()
 
-    grad_uh_interpolate = fd.assemble(interpolate(fd.grad(uh),
-                                fd.VectorFunctionSpace(mesh, "CG", 1)
-                                ))
+    grad_uh_interpolate = fd.assemble(
+        interpolate(fd.grad(uh), fd.VectorFunctionSpace(mesh, "CG", 1))
+    )
     grad_norm = fd.Function(res["function_space"])
     grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2)
     grad_norm /= grad_norm.vector().max()
@@ -225,15 +207,14 @@ def process_features(parameters, directories):
     # RHS of helmholtz problem
     f_rhs = fd.assemble(interpolate(helmholtz_eq.f, helmholtz_eq.function_space))
 
-
     # generate mesh?
 
     mesh_gen = UM2N.MeshGenerator(params={"eq": helmholtz_eq, "mesh": mesh})
     monitor_val = mesh_gen.monitor_func(mesh)
     hessian = mesh_gen.get_hessian(mesh)
-    hessian_norm = fd.project(mesh_gen.get_hessian_norm(mesh),
-                                fd.FunctionSpace(mesh, "CG", 1)
-                                )
+    hessian_norm = fd.project(
+        mesh_gen.get_hessian_norm(mesh), fd.FunctionSpace(mesh, "CG", 1)
+    )
 
     # move the mesh?
     start = time.perf_counter()
@@ -293,20 +274,21 @@ def process_features(parameters, directories):
     )
 
     # save out data
-    mesh_processor.save_taining_data(
-        os.path.join(directories["data"], f"data_{i:04d}")
-    )
+    mesh_processor.save_taining_data(os.path.join(directories["data"], f"data_{i:04d}"))
 
     # ====  Log File ============================================
     high_res_mesh = create_mesh(
-        i, mesh_type = parameters["mesh_type"], lc = 1e-2,
-        scale_x = parameters["scale_x"], problem_mesh_dir = directories["mesh_fine"]
+        i,
+        mesh_type=parameters["mesh_type"],
+        lc=1e-2,
+        scale_x=parameters["scale_x"],
+        problem_mesh_dir=directories["mesh_fine"],
     )
 
     res_high_res = helmholtz_eq.discretise(high_res_mesh)
-    u_exact = fd.assemble(interpolate(res_high_res["u_exact"],
-                            res_high_res["function_space"])
-                            )
+    u_exact = fd.assemble(
+        interpolate(res_high_res["u_exact"], res_high_res["function_space"])
+    )
 
     uh_proj = fd.project(uh, fd.FunctionSpace(high_res_mesh, "CG", 1))
     uh_new_proj = fd.project(uh_new, fd.FunctionSpace(high_res_mesh, "CG", 1))
@@ -315,7 +297,9 @@ def process_features(parameters, directories):
     error_optimal_mesh = fd.errornorm(u_exact, uh_new_proj)
 
     # Write to CSV
-    with open(os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile:
+    with open(
+        os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline=""
+    ) as csvfile:
         csv_writer = csv.writer(csvfile)
         # Write header (keys)
         csv_writer.writerow(["error_og", "error_adapt", "time"])
@@ -410,6 +394,7 @@ def process_features(parameters, directories):
     fig.savefig(os.path.join(directories["plot_compare"], f"plot_{i:04d}.png"))
     plt.close()
 
+
 def output_csv(parameters, key_list, output_dir):
     """
     Write selected parameters to a CSV file.
@@ -436,11 +421,76 @@ def output_csv(parameters, key_list, output_dir):
 
     print(f"Parameters saved to {csv_file_path}")
 
-if __name__ == "__main__":
 
+def split_data(
+    source_dir,
+    train_dir,
+    test_dir,
+    val_dir,
+    train_ratio=0.75,
+    test_ratio=0.15,
+    val_ratio=0.1,
+):
+    """
+    Split files in a source directory into train, test, and validation directories.
+
+    Args:
+        source_dir (str): Path to the source directory containing files.
+        train_dir (str): Path to the train directory.
+        test_dir (str): Path to the test directory.
+        val_dir (str): Path to the validation directory.
+        train_ratio (float): Proportion of files to allocate to the train set.
+        test_ratio (float): Proportion of files to allocate to the test set.
+        val_ratio (float): Proportion of files to allocate to the validation set.
+
+    Raises:
+        ValueError: If the sum of train_ratio, test_ratio, and val_ratio is not 1.
+    """
+    # Validate ratios
+    if not (0 <= train_ratio <= 1 and 0 <= test_ratio <= 1 and 0 <= val_ratio <= 1):
+        raise ValueError("Ratios must be between 0 and 1.")
+    if train_ratio + test_ratio + val_ratio != 1:
+        raise ValueError(
+            "The sum of train_ratio, test_ratio, and val_ratio must equal 1."
+        )
+
+    # Get all files in the source directory
+    files = [
+        f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f))
+    ]
+    random.shuffle(files)  # Shuffle files for unbiased distribution
+
+    # QC:
+    # print(f'files {files}')
+
+    # Calculate split indices - preference train > test > val
+    total_files = len(files)
+    num_train = int(total_files * train_ratio)
+    num_test = max(int(total_files * test_ratio), total_files - num_train)
+    num_val = total_files - num_train - num_test
+
+    # Distribute files
+    train_files = files[:num_train]
+    test_files = files[num_train : num_train + num_test]
+    val_files = files[num_train + num_test :]
+
+    for datafiles, target_dir in zip(
+        [train_files, test_files, val_files], [train_dir, test_dir, val_dir]
+    ):
+        for datafile in datafiles:
+            shutil.copy(
+                os.path.join(source_dir, datafile), os.path.join(target_dir, datafile)
+            )
+
+    print(
+        f"Data split complete: {num_train} train, {num_test} test, {num_val} validation files."
+    )
+
+
+if __name__ == "__main__":
     # parse args
     args = parse_arguments()
-    
+
     # ====  Parameters ======================
     parameters = {
         # parameters for problem
@@ -450,7 +500,7 @@ def output_csv(parameters, key_list, output_dir):
         "n_dist": args.n_dist,
         "max_dist": args.max_dist,
         "lc": args.lc,
-        # "n_grig": args.n_grid, # burgers problem only
+        # "n_grid": args.n_grid, # burgers problem only
         # parameters for ??????
         "n_samples": args.n_samples,
         "data_type": args.field_type,
@@ -487,31 +537,53 @@ def output_csv(parameters, key_list, output_dir):
     random.seed(args.rand_seed)
 
     # ====  Setup Directories ======================
-    problem_specific_dir = "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format(
-            parameters["z_min"], parameters["z_max"],
-            parameters["n_dist"],parameters["max_dist"],
-            parameters["lc"], parameters["n_samples"],
-            parameters["data_type"], parameters["scheme"], parameters["mesh_type"]
+    problem_specific_dir = (
+        "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format(
+            parameters["z_min"],
+            parameters["z_max"],
+            parameters["n_dist"],
+            parameters["max_dist"],
+            parameters["lc"],
+            parameters["n_samples"],
+            parameters["data_type"],
+            parameters["scheme"],
+            parameters["mesh_type"],
         )
+    )
 
     subdirs = [
-        "data", "plot", "plot_compare", "log", "mesh", "mesh_fine",
-        "train", "test", "val",
+        "data",
+        "plot",
+        "plot_compare",
+        "log",
+        "mesh",
+        "mesh_fine",
+        "train",
+        "test",
+        "val",
     ]
 
-    directories = setup_directories(problem = parameters["problem"],
-                        mesh_type = parameters["mesh_type"],
-                        base_dir = None,
-                        subdirs = subdirs,
-                        dir_format = problem_specific_dir)
-
+    directories = setup_directories(
+        problem=parameters["problem"],
+        mesh_type=parameters["mesh_type"],
+        base_dir=None,
+        subdirs=subdirs,
+        dir_format=problem_specific_dir,
+    )
 
     # ====  Output CSV ======================
     key_list = [
-        "cmin","cmax", "sigma_mean_scaler", "sigma_sigma_scaler", "sigma_eps"
-        "data_type", "scheme", "n_samples", "lc", "mesh_type"
+        "cmin",
+        "cmax",
+        "sigma_mean_scaler",
+        "sigma_sigma_scaler",
+        "sigma_eps" "data_type",
+        "scheme",
+        "n_samples",
+        "lc",
+        "mesh_type",
     ]
-    output_csv(parameters, key_list, directories["data"])
+    output_csv(parameters, key_list, directories["log"])
 
     # ====  Data Generation Scripts ======================
     for i in range(parameters["n_samples"]):
@@ -526,10 +598,13 @@ def output_csv(parameters, key_list, output_dir):
             continue
 
     # ====  Data Splits ============================================
-    num_train = int(parameters["n_samples"] * parameters["p_train"])
-    num_test = int(parameters["n_samples"] * parameters["p_test"])
-    num_val = parameters["n_samples"] - num_train - num_test
-
-    move_data(directories["train"], directories["data"], 0, num_train)
-    move_data(directories["test"], directories["data"], num_train, num_train + num_test)
-    move_data(directories["val"], directories["data"], num_train + num_test, num_train + num_test + num_val)
+    # TODO: this should probably be done in the training script, not the build script
+    split_data(
+        source_dir=directories["data"],
+        train_dir=directories["train"],
+        test_dir=directories["test"],
+        val_dir=directories["val"],
+        train_ratio=parameters["p_train"],
+        test_ratio=parameters["p_test"],
+        val_ratio=parameters["p_val"],
+    )
diff --git a/script/build_poisson_poly.py b/script/build_poisson_poly.py
index 929fad8..7f6403c 100644
--- a/script/build_poisson_poly.py
+++ b/script/build_poisson_poly.py
@@ -9,28 +9,38 @@
 
 import firedrake as fd
 import matplotlib.pyplot as plt
-import numpy as np
-# import pandas as pd
 from firedrake.__future__ import interpolate
 
-# dd the parent directory to the Python path
-import sys
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import UM2N
 
+
 def parse_arguments():
     """Parse command-line arguments."""
     parser = ArgumentParser(description="Build Burgers dataset with square meshes.")
-    parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.")
-    parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions.")
-    parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions.")
-    parser.add_argument("--lc", type=float, default=6e-2, help="Length characteristic of mesh elements.")
-    parser.add_argument("--field_type", type=str, default="iso", help="Data type (aniso/iso).")
+    parser.add_argument(
+        "--mesh_type", type=int, default=2, help="Algorithm used to generate mesh."
+    )
+    parser.add_argument(
+        "--max_dist", type=int, default=6, help="Max number of distributions."
+    )
+    parser.add_argument(
+        "--n_dist", type=int, default=None, help="Number of distributions."
+    )
+    parser.add_argument(
+        "--lc", type=float, default=6e-2, help="Length characteristic of mesh elements."
+    )
+    parser.add_argument(
+        "--field_type", type=str, default="iso", help="Data type (aniso/iso)."
+    )
     # use padded scheme or full-scale scheme to sample central point of the bump  # noqa
-    parser.add_argument("--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full).")
-    parser.add_argument("--n_samples", type=int, default=100, help="Number of samples generated")
+    parser.add_argument(
+        "--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full)."
+    )
+    parser.add_argument(
+        "--n_samples", type=int, default=100, help="Number of samples generated"
+    )
     parser.add_argument("--rand_seed", type=int, default=63, help="Random seed")
-    
+
     parsed_args = parser.parse_args()
 
     # Handle dependency between max_dist and n_dist
@@ -40,12 +50,12 @@ def parse_arguments():
         parsed_args.max_dist = None  # Disable max_dist if n_dist is set
         print("Warning: max_dist is ignored because n_dist is set.")
     # QC:
-    print(parsed_args)
-    
+    # print(parsed_args)
+
     return parser.parse_args()
 
 
-def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None):
+def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None):
     """
     Set up directories for storing data, plots, and logs.
 
@@ -74,16 +84,20 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form
         project_dir = os.path.abspath(base_dir)
     else:
         project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-    
+
     # QC:
     print(f"Project Directory: {project_dir}")
 
     # Define the dataset directory
-    dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem)
+    dataset_dir = os.path.join(
+        project_dir, "data", f"dataset_meshtype_{mesh_type}", problem
+    )
 
     # Use the provided format string for the problem-specific directory
     if dir_format is None:
-        problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}")
+        problem_specific_dir = os.path.join(
+            dataset_dir, f"{problem}_meshtype_{mesh_type}"
+        )
     else:
         # check if dir_format is a valid string format
         if not isinstance(dir_format, str):
@@ -92,8 +106,17 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form
 
     # Define default subdirectories if not provided
     if subdirs is None:
-        subdirs = ["data", "plot", "log", "mesh", "mesh_fine",
-                   "plot_compare", "train", "test", "val"]
+        subdirs = [
+            "data",
+            "plot",
+            "log",
+            "mesh",
+            "mesh_fine",
+            "plot_compare",
+            "train",
+            "test",
+            "val",
+        ]
 
     # Create and clear directories
     directories = {}
@@ -108,10 +131,11 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form
         directories[subdir] = dir_path
 
     # QC:
-    print(f"Subdirectories created: {directories}")
+    # print(f"Subdirectories created: {directories}")
 
     return directories
 
+
 def output_csv(parameters, key_list, output_dir):
     """
     Write selected parameters to a CSV file.
@@ -135,52 +159,74 @@ def output_csv(parameters, key_list, output_dir):
         csv_writer.writerow(csv_keys)
         # Write data (values)
         csv_writer.writerow(csv_data)
-        
-def move_data(target, source, start, num_files):
+
+
+def split_data(
+    source_dir,
+    train_dir,
+    test_dir,
+    val_dir,
+    train_ratio=0.75,
+    test_ratio=0.15,
+    val_ratio=0.1,
+):
     """
-    Move data files from the source directory to the target directory.
+    Split files in a source directory into train, test, and validation directories.
 
     Args:
-        target (str): The path to the target directory.
-        source (str): The path to the source directory.
-        start (int): The starting index of the files to move.
-        num_files (int): The total number of files to move.
+        source_dir (str): Path to the source directory containing files.
+        train_dir (str): Path to the train directory.
+        test_dir (str): Path to the test directory.
+        val_dir (str): Path to the validation directory.
+        train_ratio (float): Proportion of files to allocate to the train set.
+        test_ratio (float): Proportion of files to allocate to the test set.
+        val_ratio (float): Proportion of files to allocate to the validation set.
 
     Raises:
-        FileNotFoundError: If the source directory does not exist.
-        ValueError: If the start index or num_files is invalid.
+        ValueError: If the sum of train_ratio, test_ratio, and val_ratio is not 1.
     """
-    if not os.path.exists(source):
-        raise FileNotFoundError(f"Source directory '{source}' does not exist.")
-
-    if start < 0 or num_files <= 0:
-        raise ValueError("Invalid start index or number of files to move.")
+    # Validate ratios
+    if not (0 <= train_ratio <= 1 and 0 <= test_ratio <= 1 and 0 <= val_ratio <= 1):
+        raise ValueError("Ratios must be between 0 and 1.")
+    if train_ratio + test_ratio + val_ratio != 1:
+        raise ValueError(
+            "The sum of train_ratio, test_ratio, and val_ratio must equal 1."
+        )
 
-    # Create the target directory if it doesn't exist
-    if not os.path.exists(target):
-        os.makedirs(target)
-    else:
-        # Clear the target directory by removing all files
-        for file in os.listdir(target):
-            os.remove(os.path.join(target, file))
+    # Get all files in the source directory
+    files = [
+        f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f))
+    ]
+    random.shuffle(files)  # Shuffle files for unbiased distribution
 
-    # Copy files sequentially starting from the specified index
-    for i in range(start, start + num_files):
-        try:
-            # Copy the data file
+    # QC:
+    # print(f'files {files}')
+
+    # Calculate split indices - preference train > test > val
+    total_files = len(files)
+    num_train = int(total_files * train_ratio)
+    num_test = max(int(total_files * test_ratio), total_files - num_train)
+    num_val = total_files - num_train - num_test
+
+    # Distribute files
+    train_files = files[:num_train]
+    test_files = files[num_train : num_train + num_test]
+    val_files = files[num_train + num_test :]
+
+    for datafiles, target_dir in zip(
+        [train_files, test_files, val_files], [train_dir, test_dir, val_dir]
+    ):
+        for datafile in datafiles:
             shutil.copy(
-                os.path.join(source, f"data_{i:04d}.npy"),
-                os.path.join(target, f"data_{i:04d}.npy"),
+                os.path.join(source_dir, datafile), os.path.join(target_dir, datafile)
             )
-        except FileNotFoundError:
-            print(f"File data_{i:04d}.npy not found in {source}. Skipping.")
-            continue
-        except Exception as e:
-            print(f"An error occurred while copying data_{i:04d}.npy: {e}")
-            continue
 
-def process_features(parameters, dirs):
+    print(
+        f"Data split complete: {num_train} train, {num_test} test, {num_val} validation files."
+    )
 
+
+def process_features(parameters, directories):
     # create mesh
     scale_x = parameters["scale_x"]
     mesh_type = parameters["mesh_type"]
@@ -189,14 +235,13 @@ def process_features(parameters, dirs):
         scale=scale_x, mesh_type=mesh_type
     )  # noqa
     mesh = rand_poly_mesh_gen.generate_mesh(
-        res=lc, output_filename=os.path.join(dirs["mesh"], f"mesh{i}.msh")
+        res=lc, output_filename=os.path.join(directories["mesh"], f"mesh{i}.msh")
     )
     num_boundary = rand_poly_mesh_gen.num_boundary
 
     # Generate Random solution field
     rand_u_generator = UM2N.RandSourceGenerator(
-        use_iso= parameters["data_type"] == "iso",
-        dist_params = parameters
+        use_iso=parameters["data_type"] == "iso", dist_params=parameters
     )
 
     # generate equation
@@ -220,9 +265,9 @@ def process_features(parameters, dirs):
     mesh_gen = UM2N.MeshGenerator(params={"eq": poisson_eq, "mesh": mesh})
     monitor_val = mesh_gen.monitor_func(mesh)
     hessian = mesh_gen.get_hessian(mesh)
-    hessian_norm = fd.project(mesh_gen.get_hessian_norm(mesh),
-                                fd.FunctionSpace(mesh, "CG", 1)
-                                )
+    hessian_norm = fd.project(
+        mesh_gen.get_hessian_norm(mesh), fd.FunctionSpace(mesh, "CG", 1)
+    )
     # hessian = UM2N.MeshGenerator(
     #     params={
     #         "eq": poisson_eq,
@@ -247,7 +292,7 @@ def process_features(parameters, dirs):
     # hessian_norm = fd.project(hessian_norm, fd.FunctionSpace(mesh, "CG", 1))
 
     func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1)
-    grad_uh_interpolate = fd.interpolate(fd.grad(uh), func_vec_space)
+    grad_uh_interpolate = fd.assemble(interpolate(fd.grad(uh), func_vec_space))
 
     # ej321 - grad_norm copied from build_helmholtz_square.py
     grad_norm = fd.Function(res["function_space"])
@@ -263,7 +308,7 @@ def process_features(parameters, dirs):
     #         ),
     #     }
     # )
-    
+
     # move the mesh?
     start = time.perf_counter()
     new_mesh = mesh_gen.move_mesh()
@@ -302,21 +347,20 @@ def process_features(parameters, dirs):
         feature={
             "uh": uh.dat.data_ro.reshape(-1, 1),
             "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2),
-            "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), #ej321 - added
+            "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1),  # ej321 - added
             "hessian": hessian.dat.data_ro.reshape(-1, 4),
             "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
             "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
             "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1),
             "phi": phi.dat.data_ro.reshape(-1, 1),
             "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2),
-            "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), # ej321 - added
-
+            "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1),  # ej321 - added
         },
         raw_feature={
             "uh": uh,
             "hessian_norm": hessian_norm,
-            "monitor_val": monitor_val, # ej321 - added
-            "grad_uh_norm": grad_norm, # ej321 - added needed for poly only
+            "monitor_val": monitor_val,  # ej321 - added
+            "grad_uh_norm": grad_norm,  # ej321 - added needed for poly only
             "jacobian": jacobian,
             "jacobian_det": jacobian_det,
         },
@@ -324,16 +368,16 @@ def process_features(parameters, dirs):
         poly_mesh=True,
     )
 
-    mesh_processor.save_taining_data(
-        os.path.join(dirs["data"], "data_{}".format(i))
-    )
+    mesh_processor.save_taining_data(os.path.join(directories["data"], f"data_{i:04d}"))
 
     # ====  Plot Scripts ======================
     fig = plt.figure(figsize=(15, 10))
     ax1 = fig.add_subplot(2, 3, 1, projection="3d")
     # Plot the exact solution
     ax1.set_title("Exact Solution")
-    fd.trisurf(fd.interpolate(res["u_exact"], res["function_space"]), axes=ax1)
+    fd.trisurf(
+        fd.assemble(interpolate(res["u_exact"], res["function_space"])), axes=ax1
+    )
     # Plot the solved solution
     ax2 = fig.add_subplot(2, 3, 2, projection="3d")
     ax2.set_title("FEM Solution")
@@ -358,19 +402,19 @@ def process_features(parameters, dirs):
     fd.tripcolor(uh_new, cmap="coolwarm", axes=ax6)
     fd.triplot(new_mesh, axes=ax6)
 
-    fig.savefig(os.path.join(dirs["plot"], "plot_{}.png".format(i)))
+    fig.savefig(os.path.join(directories["plot"], "plot_{}.png".format(i)))
 
     # ====  Log File ============================================
     high_res_mesh = rand_poly_mesh_gen.generate_mesh(
         res=1e-2,
-        output_filename=os.path.join(dirs["mesh_fine"], f"mesh{i}.msh"),
+        output_filename=os.path.join(directories["mesh_fine"], f"mesh{i}.msh"),
     )
 
     high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1)
 
     res_high_res = poisson_eq.discretise(high_res_mesh)
-    u_exact = fd.interpolate(
-        res_high_res["u_exact"], res_high_res["function_space"]
+    u_exact = fd.assemble(
+        interpolate(res_high_res["u_exact"], res_high_res["function_space"])
     )
 
     uh_proj = fd.project(uh, high_res_function_space)
@@ -379,8 +423,10 @@ def process_features(parameters, dirs):
     error_original_mesh = fd.errornorm(u_exact, uh_proj)
     error_optimal_mesh = fd.errornorm(u_exact, uh_new_proj)
 
-     # Write to CSV
-    with open(os.path.join(dirs["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile:
+    # Write to CSV
+    with open(
+        os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline=""
+    ) as csvfile:
         csv_writer = csv.writer(csvfile)
         # Write header (keys)
         csv_writer.writerow(["error_og", "error_adapt", "time"])
@@ -389,8 +435,8 @@ def process_features(parameters, dirs):
 
     print("error og/optimal:", error_original_mesh, error_optimal_mesh)
 
-if __name__ == "__main__":
 
+if __name__ == "__main__":
     # parse args
     args = parse_arguments()
 
@@ -440,28 +486,33 @@ def process_features(parameters, dirs):
     random.seed(args.rand_seed)
 
     # ====  Setup Directories ======================
-    problem_specific_dir = "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format(
-            parameters["z_min"], parameters["z_max"],
-            parameters["n_dist"],parameters["max_dist"],
-            parameters["lc"], parameters["n_samples"],
-            parameters["data_type"], parameters["scheme"], parameters["mesh_type"]
+    problem_specific_dir = (
+        "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format(
+            parameters["z_min"],
+            parameters["z_max"],
+            parameters["n_dist"],
+            parameters["max_dist"],
+            parameters["lc"],
+            parameters["n_samples"],
+            parameters["data_type"],
+            parameters["scheme"],
+            parameters["mesh_type"],
         )
+    )
 
-    subdirs = ["data", "plot", "log", "mesh", "mesh_fine",
-               "train", "test", "val"
-               ]
+    subdirs = ["data", "plot", "log", "mesh", "mesh_fine", "train", "test", "val"]
 
-    dirs = setup_directories(problem = parameters["problem"],
-                            mesh_type = parameters["mesh_type"],
-                            base_dir = None,
-                            subdirs = subdirs,
-                            dir_format = problem_specific_dir)
+    directories = setup_directories(
+        problem=parameters["problem"],
+        mesh_type=parameters["mesh_type"],
+        base_dir=None,
+        subdirs=subdirs,
+        dir_format=problem_specific_dir,
+    )
 
     # ====  Output CSV ======================
-    key_list = [
-        "cmin","cmax", "data_type", "scheme", "n_samples", "lc", "mesh_type"
-    ]
-    output_csv(parameters, key_list, dirs["data"])
+    key_list = ["cmin", "cmax", "data_type", "scheme", "n_samples", "lc", "mesh_type"]
+    output_csv(parameters, key_list, directories["log"])
 
     # ====  Data Generation Scripts ======================
     print("In build_dataset.py")
@@ -470,7 +521,7 @@ def process_features(parameters, dirs):
     for i in range(parameters["n_samples"]):
         try:
             print("Generating Sample: " + str(i))
-            process_features(parameters, dirs)
+            process_features(parameters, directories)
             # i += 1
         except fd.exceptions.ConvergenceError:
             pass
@@ -479,13 +530,14 @@ def process_features(parameters, dirs):
         except ValueError:
             pass
 
-
-
     # ====  Data Splits ============================================
-    num_train = int(parameters["n_samples"] * parameters["p_train"])
-    num_test = int(parameters["n_samples"] * parameters["p_test"])
-    num_val = parameters["n_samples"] - num_train - num_test
-
-    move_data(dirs["train"], dirs["data"], 0, num_train)
-    move_data(dirs["test"], dirs["data"], num_train, num_train + num_test)
-    move_data(dirs["val"], dirs["data"], num_train + num_test, num_train + num_test + num_val)
+    # TODO: this should probably be done in the training script, not the build script
+    split_data(
+        source_dir=directories["data"],
+        train_dir=directories["train"],
+        test_dir=directories["test"],
+        val_dir=directories["val"],
+        train_ratio=parameters["p_train"],
+        test_ratio=parameters["p_test"],
+        val_ratio=parameters["p_val"],
+    )
diff --git a/script/build_poisson_square.py b/script/build_poisson_square.py
index 44b4f6e..5513e1b 100644
--- a/script/build_poisson_square.py
+++ b/script/build_poisson_square.py
@@ -9,27 +9,38 @@
 
 import firedrake as fd
 import matplotlib.pyplot as plt
-# import pandas as pd
 from firedrake.__future__ import interpolate
 
-# dd the parent directory to the Python path
-import sys
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import UM2N
 
+
 def parse_arguments():
     """Parse command-line arguments."""
     parser = ArgumentParser(description="Build Burgers dataset with square meshes.")
-    parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.")
-    parser.add_argument("--max_dist", type=int, default=6, help="Max number of distributions.")
-    parser.add_argument("--n_dist", type=int, default=None, help="Number of distributions.")
-    parser.add_argument("--lc", type=float, default=6e-2, help="Length characteristic of mesh elements.")
-    parser.add_argument("--field_type", type=str, default="iso", help="Data type (aniso/iso).")
+    parser.add_argument(
+        "--mesh_type", type=int, default=2, help="Algorithm used to generate mesh."
+    )
+    parser.add_argument(
+        "--max_dist", type=int, default=6, help="Max number of distributions."
+    )
+    parser.add_argument(
+        "--n_dist", type=int, default=None, help="Number of distributions."
+    )
+    parser.add_argument(
+        "--lc", type=float, default=6e-2, help="Length characteristic of mesh elements."
+    )
+    parser.add_argument(
+        "--field_type", type=str, default="iso", help="Data type (aniso/iso)."
+    )
     # use padded scheme or full-scale scheme to sample central point of the bump  # noqa
-    parser.add_argument("--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full).")
-    parser.add_argument("--n_samples", type=int, default=100, help="Number of samples generated")
+    parser.add_argument(
+        "--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full)."
+    )
+    parser.add_argument(
+        "--n_samples", type=int, default=100, help="Number of samples generated"
+    )
     parser.add_argument("--rand_seed", type=int, default=63, help="Random seed")
-    
+
     parsed_args = parser.parse_args()
 
     # Handle dependency between max_dist and n_dist
@@ -39,11 +50,12 @@ def parse_arguments():
         parsed_args.max_dist = None  # Disable max_dist if n_dist is set
         print("Warning: max_dist is ignored because n_dist is set.")
     # QC:
-    print(parsed_args)
-    
+    # print(parsed_args)
+
     return parser.parse_args()
 
-def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None):
+
+def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None):
     """
     Set up directories for storing data, plots, and logs.
 
@@ -72,16 +84,20 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form
         project_dir = os.path.abspath(base_dir)
     else:
         project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-    
+
     # QC:
     print(f"Project Directory: {project_dir}")
 
     # Define the dataset directory
-    dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem)
+    dataset_dir = os.path.join(
+        project_dir, "data", f"dataset_meshtype_{mesh_type}", problem
+    )
 
     # Use the provided format string for the problem-specific directory
     if dir_format is None:
-        problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}")
+        problem_specific_dir = os.path.join(
+            dataset_dir, f"{problem}_meshtype_{mesh_type}"
+        )
     else:
         # check if dir_format is a valid string format
         if not isinstance(dir_format, str):
@@ -90,8 +106,17 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form
 
     # Define default subdirectories if not provided
     if subdirs is None:
-        subdirs = ["data", "plot", "log", "mesh", "mesh_fine",
-                   "plot_compare", "train", "test", "val"]
+        subdirs = [
+            "data",
+            "plot",
+            "log",
+            "mesh",
+            "mesh_fine",
+            "plot_compare",
+            "train",
+            "test",
+            "val",
+        ]
 
     # Create and clear directories
     directories = {}
@@ -106,10 +131,11 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form
         directories[subdir] = dir_path
 
     # QC:
-    print(f"Subdirectories created: {directories}")
+    # print(f"Subdirectories created: {directories}")
 
     return directories
 
+
 def output_csv(parameters, key_list, output_dir):
     """
     Write selected parameters to a CSV file.
@@ -134,51 +160,73 @@ def output_csv(parameters, key_list, output_dir):
         # Write data (values)
         csv_writer.writerow(csv_data)
 
-def move_data(target, source, start, num_files):
+
+def split_data(
+    source_dir,
+    train_dir,
+    test_dir,
+    val_dir,
+    train_ratio=0.75,
+    test_ratio=0.15,
+    val_ratio=0.1,
+):
     """
-    Move data files from the source directory to the target directory.
+    Split files in a source directory into train, test, and validation directories.
 
     Args:
-        target (str): The path to the target directory.
-        source (str): The path to the source directory.
-        start (int): The starting index of the files to move.
-        num_files (int): The total number of files to move.
+        source_dir (str): Path to the source directory containing files.
+        train_dir (str): Path to the train directory.
+        test_dir (str): Path to the test directory.
+        val_dir (str): Path to the validation directory.
+        train_ratio (float): Proportion of files to allocate to the train set.
+        test_ratio (float): Proportion of files to allocate to the test set.
+        val_ratio (float): Proportion of files to allocate to the validation set.
 
     Raises:
-        FileNotFoundError: If the source directory does not exist.
-        ValueError: If the start index or num_files is invalid.
+        ValueError: If the sum of train_ratio, test_ratio, and val_ratio is not 1.
     """
-    if not os.path.exists(source):
-        raise FileNotFoundError(f"Source directory '{source}' does not exist.")
-
-    if start < 0 or num_files <= 0:
-        raise ValueError("Invalid start index or number of files to move.")
+    # Validate ratios
+    if not (0 <= train_ratio <= 1 and 0 <= test_ratio <= 1 and 0 <= val_ratio <= 1):
+        raise ValueError("Ratios must be between 0 and 1.")
+    if train_ratio + test_ratio + val_ratio != 1:
+        raise ValueError(
+            "The sum of train_ratio, test_ratio, and val_ratio must equal 1."
+        )
 
-    # Create the target directory if it doesn't exist
-    if not os.path.exists(target):
-        os.makedirs(target)
-    else:
-        # Clear the target directory by removing all files
-        for file in os.listdir(target):
-            os.remove(os.path.join(target, file))
+    # Get all files in the source directory
+    files = [
+        f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f))
+    ]
+    random.shuffle(files)  # Shuffle files for unbiased distribution
 
-    # Copy files sequentially starting from the specified index
-    for i in range(start, start + num_files):
-        try:
-            # Copy the data file
+    # QC:
+    # print(f'files {files}')
+
+    # Calculate split indices - preference train > test > val
+    total_files = len(files)
+    num_train = int(total_files * train_ratio)
+    num_test = max(int(total_files * test_ratio), total_files - num_train)
+    num_val = total_files - num_train - num_test
+
+    # Distribute files
+    train_files = files[:num_train]
+    test_files = files[num_train : num_train + num_test]
+    val_files = files[num_train + num_test :]
+
+    for datafiles, target_dir in zip(
+        [train_files, test_files, val_files], [train_dir, test_dir, val_dir]
+    ):
+        for datafile in datafiles:
             shutil.copy(
-                os.path.join(source, f"data_{i:04d}.npy"),
-                os.path.join(target, f"data_{i:04d}.npy"),
+                os.path.join(source_dir, datafile), os.path.join(target_dir, datafile)
             )
-        except FileNotFoundError:
-            print(f"File data_{i:04d}.npy not found in {source}. Skipping.")
-            continue
-        except Exception as e:
-            print(f"An error occurred while copying data_{i:04d}.npy: {e}")
-            continue
+
+    print(
+        f"Data split complete: {num_train} train, {num_test} test, {num_val} validation files."
+    )
+
 
 def process_features(parameters, problem_data_dir):
-    
     # create mesh
     scale_x = parameters["scale_x"]
     mesh_type = parameters["mesh_type"]
@@ -187,12 +235,11 @@ def process_features(parameters, problem_data_dir):
         scale=scale_x, mesh_type=mesh_type
     )  # noqa
     mesh = unstructured_square_mesh_gen.generate_mesh(
-        res=lc, output_filename=os.path.join(directories["data"], f"mesh{i}.msh")
+        res=lc, output_filename=os.path.join(directories["mesh"], f"mesh{i}.msh")
     )
     # Generate Random solution field
     rand_u_generator = UM2N.RandSourceGenerator(
-        use_iso= parameters["data_type"] == "iso",
-        dist_params = parameters
+        use_iso=parameters["data_type"] == "iso", dist_params=parameters
     )
 
     # generate equation
@@ -216,42 +263,18 @@ def process_features(parameters, problem_data_dir):
     mesh_gen = UM2N.MeshGenerator(params={"eq": poisson_eq, "mesh": mesh})
     monitor_val = mesh_gen.monitor_func(mesh)
     hessian = mesh_gen.get_hessian(mesh)
-    hessian_norm = fd.project(mesh_gen.get_hessian_norm(mesh),
-                                fd.FunctionSpace(mesh, "CG", 1)
-                                )
-
-    # Generate Mesh
-    # hessian = UM2N.MeshGenerator(
-    #     params={
-    #         "eq": poisson_eq,
-    #         "mesh": fd.Mesh(os.path.join(problem_mesh_dir, f"mesh{i}.msh")),  # noqa
-    #     }
-    # ).get_hessian(mesh)
-
-    # hessian_norm = UM2N.MeshGenerator(
-    #     params={
-    #         "eq": poisson_eq,
-    #         "mesh": fd.Mesh(os.path.join(problem_mesh_dir, f"mesh{i}.msh")),  # noqa
-    #     }
-    # ).monitor_func(mesh)
-
-    # hessian_norm = fd.project(hessian_norm, fd.FunctionSpace(mesh, "CG", 1))
+    hessian_norm = fd.project(
+        mesh_gen.get_hessian_norm(mesh), fd.FunctionSpace(mesh, "CG", 1)
+    )
 
     func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1)
-    grad_uh_interpolate = fd.interpolate(fd.grad(uh), func_vec_space)
+    grad_uh_interpolate = fd.assemble(interpolate(fd.grad(uh), func_vec_space))
 
     # ej321 - grad_norm copied from build_helmholtz_square.py
     grad_norm = fd.Function(res["function_space"])
     grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2)
     grad_norm /= grad_norm.vector().max()
 
-    # mesh_gen = UM2N.MeshGenerator(
-    #     params={
-    #         "eq": poisson_eq,
-    #         "mesh": fd.Mesh(os.path.join(problem_mesh_dir, f"mesh{i}.msh")),  # noqa
-    #     }
-    # )
-
     start = time.perf_counter()
     new_mesh = mesh_gen.move_mesh()
     end = time.perf_counter()
@@ -265,13 +288,7 @@ def process_features(parameters, problem_data_dir):
 
     # get phi/grad_phi projected to the original mesh
     phi = mesh_gen.get_phi()
-    # phi = fd.project(
-    #     phi, fd.FunctionSpace(mesh, "CG", 1)
-    # )
     grad_phi = mesh_gen.get_grad_phi()
-    # grad_phi = fd.project(
-    #     grad_phi, fd.VectorFunctionSpace(mesh, "CG", 1)
-    # )
 
     # solve the equation on the new mesh
     new_res = poisson_eq.discretise(new_mesh)
@@ -294,35 +311,39 @@ def process_features(parameters, problem_data_dir):
         feature={
             "uh": uh.dat.data_ro.reshape(-1, 1),
             "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2),
-            "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1), # ej321 - added grad_norm
+            "grad_uh_norm": grad_norm.dat.data_ro.reshape(
+                -1, 1
+            ),  # ej321 - added grad_norm
             "hessian": hessian.dat.data_ro.reshape(-1, 4),
             "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
             "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
             "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1),
             "phi": phi.dat.data_ro.reshape(-1, 1),
             "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2),
-            "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1), # ej321 - added monitor_val
+            "monitor_val": monitor_val.dat.data_ro.reshape(
+                -1, 1
+            ),  # ej321 - added monitor_val
         },
         raw_feature={
             "uh": uh,
             "hessian_norm": hessian_norm,
-            "monitor_val": monitor_val, # ej321 - added monitor_val
+            "monitor_val": monitor_val,  # ej321 - added monitor_val
             "jacobian": jacobian,
             "jacobian_det": jacobian_det,
         },
         dist_params=dist_params,
     )
 
-    mesh_processor.save_taining_data(
-        os.path.join(directories["data"], "data_{}".format(i))
-    )
+    mesh_processor.save_taining_data(os.path.join(directories["data"], f"data_{i:04d}"))
 
     # ====  Plot Scripts ======================
     fig = plt.figure(figsize=(15, 10))
     ax1 = fig.add_subplot(2, 3, 1, projection="3d")
     # Plot the exact solution
     ax1.set_title("Exact Solution")
-    fd.trisurf(fd.interpolate(res["u_exact"], res["function_space"]), axes=ax1)
+    fd.trisurf(
+        fd.assemble(interpolate(res["u_exact"], res["function_space"])), axes=ax1
+    )
     # Plot the solved solution
     ax2 = fig.add_subplot(2, 3, 2, projection="3d")
     ax2.set_title("FEM Solution")
@@ -358,8 +379,8 @@ def process_features(parameters, problem_data_dir):
     high_res_function_space = fd.FunctionSpace(high_res_mesh, "CG", 1)
 
     res_high_res = poisson_eq.discretise(high_res_mesh)
-    u_exact = fd.interpolate(
-        res_high_res["u_exact"], res_high_res["function_space"]
+    u_exact = fd.assemble(
+        interpolate(res_high_res["u_exact"], res_high_res["function_space"])
     )
 
     uh = fd.project(uh, high_res_function_space)
@@ -369,21 +390,22 @@ def process_features(parameters, problem_data_dir):
     error_optimal_mesh = fd.errornorm(u_exact, uh_new)
 
     # Write to CSV
-    with open(os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile:
+    with open(
+        os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline=""
+    ) as csvfile:
         csv_writer = csv.writer(csvfile)
         # Write header (keys)
         csv_writer.writerow(["error_og", "error_adapt", "time"])
         # Write data (values)
         csv_writer.writerow([error_original_mesh, error_optimal_mesh, dur])
-        
+
     print("error og/optimal:", error_original_mesh, error_optimal_mesh)
-    
 
-if __name__ == "__main__":
 
+if __name__ == "__main__":
     # parse args
     args = parse_arguments()
-    
+
     # ====  Parameters ======================
     parameters = {
         # parameters for problem
@@ -430,31 +452,42 @@ def process_features(parameters, problem_data_dir):
     random.seed(args.rand_seed)
 
     # ====  Setup Directories ======================
-    problem_specific_dir = "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format(
-            parameters["z_min"], parameters["z_max"],
-            parameters["n_dist"],parameters["max_dist"],
-            parameters["lc"], parameters["n_samples"],
-            parameters["data_type"], parameters["scheme"], parameters["mesh_type"]
+    problem_specific_dir = (
+        "z=<{},{}>_ndist={}_max_dist={}_lc={}_n={}_{}_{}_meshtype_{}".format(
+            parameters["z_min"],
+            parameters["z_max"],
+            parameters["n_dist"],
+            parameters["max_dist"],
+            parameters["lc"],
+            parameters["n_samples"],
+            parameters["data_type"],
+            parameters["scheme"],
+            parameters["mesh_type"],
         )
+    )
 
     subdirs = [
-        "data", "plot", "log", "mesh", "mesh_fine",
-        "train", "test", "val",
+        "data",
+        "plot",
+        "log",
+        "mesh",
+        "mesh_fine",
+        "train",
+        "test",
+        "val",
     ]
 
-    directories = setup_directories(problem = parameters["problem"],
-                        mesh_type = parameters["mesh_type"],
-                        base_dir = None,
-                        subdirs = subdirs,
-                        dir_format = problem_specific_dir)
-
+    directories = setup_directories(
+        problem=parameters["problem"],
+        mesh_type=parameters["mesh_type"],
+        base_dir=None,
+        subdirs=subdirs,
+        dir_format=problem_specific_dir,
+    )
 
     # ====  Output CSV ======================
-    key_list = [
-        "cmin","cmax",
-        "data_type", "scheme", "n_samples", "lc", "mesh_type"
-    ]
-    output_csv(parameters, key_list, directories["data"])
+    key_list = ["cmin", "cmax", "data_type", "scheme", "n_samples", "lc", "mesh_type"]
+    output_csv(parameters, key_list, directories["log"])
 
     # ====  Data Generation Scripts ======================
     for i in range(parameters["n_samples"]):
@@ -469,39 +502,13 @@ def process_features(parameters, problem_data_dir):
             continue
 
     # ====  Data Splits ============================================
-    num_train = int(parameters["n_samples"] * parameters["p_train"])
-    num_test = int(parameters["n_samples"] * parameters["p_test"])
-    num_val = parameters["n_samples"] - num_train - num_test
-
-    move_data(directories["train"], directories["data"], 0, num_train)
-    move_data(directories["test"], directories["data"], num_train, num_train + num_test)
-    move_data(directories["val"], directories["data"], num_train + num_test, num_train + num_test + num_val)
-
-
-# ====  Data Generation Scripts ======================
-if __name__ == "__main__":
-    print("In build_dataset.py")
-    # i = 0
-    # while i < n_samples:
-    for i in range(parameters["n_samples"]):
-        try:
-            print("Generating Sample: " + str(i))
-            # create dataset
-            process_features(parameters, directories)
-        #    i += 1
-        except fd.exceptions.ConvergenceError:
-            pass
-        except AttributeError:
-            pass
-        except ValueError:
-            pass
-
-    # ====  Data Splits ============================================
-    num_train = int(parameters["n_samples"] * parameters["p_train"])
-    num_test = int(parameters["n_samples"] * parameters["p_test"])
-    num_val = parameters["n_samples"] - num_train - num_test
-
-    move_data(directories["train"], directories["data"], 0, num_train)
-    move_data(directories["test"], directories["data"], num_train, num_train + num_test)
-    move_data(directories["val"], directories["data"], num_train + num_test, num_train + num_test + num_val)
-
+    # TODO: this should probably be done in the training script, not the build script
+    split_data(
+        source_dir=directories["data"],
+        train_dir=directories["train"],
+        test_dir=directories["test"],
+        val_dir=directories["val"],
+        train_ratio=parameters["p_train"],
+        test_ratio=parameters["p_test"],
+        val_ratio=parameters["p_val"],
+    )
diff --git a/script/build_swirl.py b/script/build_swirl.py
index 4ea7814..cc8b0ff 100644
--- a/script/build_swirl.py
+++ b/script/build_swirl.py
@@ -7,33 +7,51 @@
 
 import firedrake as fd
 import matplotlib.pyplot as plt
-import pandas as pd
 
-# import UM2N
-
-# import pandas as pd
-from firedrake.__future__ import interpolate
-
-# dd the parent directory to the Python path
-import sys
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import UM2N
 
+
 def parse_arguments():
     """Parse command-line arguments."""
     parser = ArgumentParser(description="Build Burgers dataset with square meshes.")
-    parser.add_argument("--mesh_type", type=int, default=2, help="Algorithm used to generate mesh.")
-    parser.add_argument("--sigma", type=float, default=(0.05 / 3), help="initial ring shape control")
+    parser.add_argument(
+        "--mesh_type", type=int, default=2, help="Algorithm used to generate mesh."
+    )
+    parser.add_argument(
+        "--sigma", type=float, default=(0.05 / 3), help="initial ring shape control"
+    )
     parser.add_argument("--r_0", type=float, default=0.2, help="initial ring radius")
-    parser.add_argument("--x_0", type=float, default=0.5, help="ring center x coordinate")
-    parser.add_argument("--y_0", type=float, default=0.5, help="ring center y coordinate")
-    parser.add_argument("--alpha", type=float, default=1.5, help="swirl (velocity) scalar coefficient")
-    parser.add_argument("--save_interval", type=int, default=10, help="output sample file interval")
-    parser.add_argument("--lc", type=float, default=5e-2, help="Length characteristic of unstructured mesh elements.")
-    parser.add_argument("--n_grid", type=int, default=20, help="number number of grids in a mesh when mesh_type is 0)")
-    parser.add_argument("--n_monitor_smooth", type=int, default=10, help="apply Laplacian smoother n time to monitor function")
-   
-    
+    parser.add_argument(
+        "--x_0", type=float, default=0.5, help="ring center x coordinate"
+    )
+    parser.add_argument(
+        "--y_0", type=float, default=0.5, help="ring center y coordinate"
+    )
+    parser.add_argument(
+        "--alpha", type=float, default=1.5, help="swirl (velocity) scalar coefficient"
+    )
+    parser.add_argument(
+        "--save_interval", type=int, default=10, help="output sample file interval"
+    )
+    parser.add_argument(
+        "--lc",
+        type=float,
+        default=5e-2,
+        help="Length characteristic of unstructured mesh elements.",
+    )
+    parser.add_argument(
+        "--n_grid",
+        type=int,
+        default=20,
+        help="number number of grids in a mesh when mesh_type is 0)",
+    )
+    parser.add_argument(
+        "--n_monitor_smooth",
+        type=int,
+        default=10,
+        help="apply Laplacian smoother n time to monitor function",
+    )
+
     parsed_args = parser.parse_args()
 
     # Handle dependency between max_dist and n_dist
@@ -43,12 +61,12 @@ def parse_arguments():
     #     parsed_args.max_dist = None  # Disable max_dist if n_dist is set
     #     print("Warning: max_dist is ignored because n_dist is set.")
     # QC:
-    print(parsed_args)
-    
-    return parser.parse_args()
+    # print(parsed_args)
+
+    return parsed_args
 
 
-def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_format=None):
+def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None):
     """
     Set up directories for storing data, plots, and logs.
 
@@ -77,16 +95,20 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form
         project_dir = os.path.abspath(base_dir)
     else:
         project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-    
+
     # QC:
     print(f"Project Directory: {project_dir}")
 
     # Define the dataset directory
-    dataset_dir = os.path.join(project_dir, "data", f"dataset_meshtype_{mesh_type}", problem)
+    dataset_dir = os.path.join(
+        project_dir, "data", f"dataset_meshtype_{mesh_type}", problem
+    )
 
     # Use the provided format string for the problem-specific directory
     if dir_format is None:
-        problem_specific_dir = os.path.join(dataset_dir, f"{problem}_meshtype_{mesh_type}")
+        problem_specific_dir = os.path.join(
+            dataset_dir, f"{problem}_meshtype_{mesh_type}"
+        )
     else:
         # check if dir_format is a valid string format
         if not isinstance(dir_format, str):
@@ -95,8 +117,17 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form
 
     # Define default subdirectories if not provided
     if subdirs is None:
-        subdirs = ["data", "plot", "log", "mesh", "mesh_fine",
-                   "plot_compare", "train", "test", "val"]
+        subdirs = [
+            "data",
+            "plot",
+            "log",
+            "mesh",
+            "mesh_fine",
+            "plot_compare",
+            "train",
+            "test",
+            "val",
+        ]
 
     # Create and clear directories
     directories = {}
@@ -111,10 +142,11 @@ def setup_directories(problem, mesh_type, base_dir= None, subdirs=None, dir_form
         directories[subdir] = dir_path
 
     # QC:
-    print(f"Subdirectories created: {directories}")
+    # print(f"Subdirectories created: {directories}")
 
     return directories
 
+
 def output_csv(parameters, key_list, output_dir):
     """
     Write selected parameters to a CSV file.
@@ -138,7 +170,8 @@ def output_csv(parameters, key_list, output_dir):
         csv_writer.writerow(csv_keys)
         # Write data (values)
         csv_writer.writerow(csv_data)
-        
+
+
 def move_data(target, source, start, num_files):
     """
     Move data files from the source directory to the target directory.
@@ -183,13 +216,13 @@ def move_data(target, source, start, num_files):
             continue
 
 
-
 def fail_callback(t):
     """
     Call back for failing cases.
     Log current time for those cases which MA did not converge.
     """
-    fail_t.append(t)
+    print(f"fail to converge at {t}")
+    # fail_t.append(t) #
 
 
 def sample_from_loop(
@@ -213,8 +246,8 @@ def sample_from_loop(
     sigma,
     alpha,
     r_0,
-    x_0, # ej321 - added x_0
-    y_0, # ej321 - added y_0
+    x_0,  # ej321 - added x_0
+    y_0,  # ej321 - added y_0
     t,
     error_og_list=[],
     error_adapt_list=[],
@@ -317,7 +350,9 @@ def sample_from_loop(
     error_optimal_mesh = fd.errornorm(uh_new_proj, uh_fine, norm_type="L2")
 
     # Write to CSV
-    with open(os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline="") as csvfile:
+    with open(
+        os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline=""
+    ) as csvfile:
         csv_writer = csv.writer(csvfile)
         # Write header (keys)
         csv_writer.writerow(["error_og", "error_adapt", "time"])
@@ -325,7 +360,6 @@ def sample_from_loop(
         csv_writer.writerow([error_original_mesh, error_optimal_mesh, dur])
     print("error og/optimal:", error_original_mesh, error_optimal_mesh)
 
-
     # ====  Plot mesh, solution, error ======================
     rows, cols = 3, 3
     fig, ax = plt.subplots(
@@ -406,10 +440,9 @@ def sample_from_loop(
 
 
 if __name__ == "__main__":
-
     # parse args
     args = parse_arguments()
-    
+
     # ====  Parameters ======================
     parameters = {
         # parameters for problem
@@ -423,7 +456,7 @@ def sample_from_loop(
         # "n_dist": args.n_dist,
         # "max_dist": args.max_dist,
         "lc": args.lc,
-        "n_grid": args.n_grid if args.n_grid else int(1 / lc),
+        "n_grid": args.n_grid if args.n_grid else int(1 / args.lc),
         # parameters for ??????
         # "n_samples": args.n_samples,
         # "data_type": args.field_type,
@@ -450,7 +483,6 @@ def sample_from_loop(
         # parameters for storing files
         "save_interval": args.save_interval,
         "fail_t": [],  # list storing failing dts
-        
         # parameters for isotropic data
         # "w_min": 0.05,
         # "w_max": 0.2,
@@ -472,42 +504,53 @@ def sample_from_loop(
 
     # ====  Setup Directories ======================
     problem_specific_dir = "sigma_{:.3f}_alpha_{}_r0_{}_x0_{}_y0_{}_lc_{}_ngrid_{}_interval_{}_meshtype_{}_smooth_{}".format(
-            parameters["sigma"], parameters["alpha"],
-            parameters["r_0"], parameters["x_0"], parameters["y_0"],
-            parameters["lc"], parameters["n_grid"],
-            parameters["save_interval"], parameters["mesh_type"],
-            parameters["n_monitor_smooth"]
+        parameters["sigma"],
+        parameters["alpha"],
+        parameters["r_0"],
+        parameters["x_0"],
+        parameters["y_0"],
+        parameters["lc"],
+        parameters["n_grid"],
+        parameters["save_interval"],
+        parameters["mesh_type"],
+        parameters["n_monitor_smooth"],
     )
 
     subdirs = [
-        "data", "plot","plot_compare","log", "mesh", "mesh_fine",
+        "data",
+        "plot",
+        "plot_compare",
+        "log",
+        "mesh",
+        "mesh_fine",
         # "train", "test", "val",
     ]
 
-    directories = setup_directories(problem = parameters["problem"],
-                        mesh_type = parameters["mesh_type"],
-                        base_dir = None,
-                        subdirs = subdirs,
-                        dir_format = problem_specific_dir)
-
+    directories = setup_directories(
+        problem=parameters["problem"],
+        mesh_type=parameters["mesh_type"],
+        base_dir=None,
+        subdirs=subdirs,
+        dir_format=problem_specific_dir,
+    )
 
     # ====  Output CSV ======================
     key_list = [
-            "sigma",
-            "alpha",
-            "r_0",
-            "x_0",
-            "y_0",
-            "save_interval",
-            "T",
-            "n_step",
-            "dt",
-            "fail_t",
-            "lc",
-            "fail_cases",
-            "mesh_type",
+        "sigma",
+        "alpha",
+        "r_0",
+        "x_0",
+        "y_0",
+        "save_interval",
+        "T",
+        "n_step",
+        "dt",
+        "fail_t",
+        "lc",
+        "fail_cases",
+        "mesh_type",
     ]
-    output_csv(parameters, key_list, directories["data"])
+    output_csv(parameters, key_list, directories["log"])
 
     # ====  Data Generation Scripts ======================
     print("In build_dataset.py")
@@ -547,7 +590,7 @@ def sample_from_loop(
         mesh_fine,
         mesh_new,
         mesh_model=mesh_model,
-        **parameters
+        **parameters,
         # sigma=sigma,
         # alpha=alpha,
         # r_0=r_0,
@@ -563,4 +606,3 @@ def sample_from_loop(
     swirl_solver.solve_problem(callback=sample_from_loop, fail_callback=fail_callback)
 
     print("Done!")
-

From 33c85db917456b9d21a36e8bd4b26e92ee7d44ac Mon Sep 17 00:00:00 2001
From: acse-ej321 <89605848+acse-ej321@users.noreply.github.com>
Date: Mon, 7 Jul 2025 08:33:59 +0100
Subject: [PATCH 3/7] #70 clean comments

---
 UM2N/generator/burgers_solver.py    |  45 ++++--------
 UM2N/generator/mesh_generator.py    |   3 +-
 UM2N/generator/swirl_solver.py      |  36 ++++-----
 UM2N/generator/swirl_solver_step.py |  14 ++--
 script/build_burgers_square.py      | 103 ++------------------------
 script/build_helmholtz_poly.py      |  42 +++--------
 script/build_helmholtz_square.py    |  16 ++--
 script/build_poisson_poly.py        |  54 +++-----------
 script/build_poisson_square.py      |  21 ++----
 script/build_swirl.py               | 109 +---------------------------
 script/ej321_helm_dataset_run.sh    |  57 +++++++++++++++
 script/make_build_all_test.sh       |  15 ++++
 12 files changed, 156 insertions(+), 359 deletions(-)
 create mode 100644 script/ej321_helm_dataset_run.sh
 create mode 100644 script/make_build_all_test.sh

diff --git a/UM2N/generator/burgers_solver.py b/UM2N/generator/burgers_solver.py
index fb901a6..5c78c46 100644
--- a/UM2N/generator/burgers_solver.py
+++ b/UM2N/generator/burgers_solver.py
@@ -9,7 +9,7 @@
 import movement as mv
 import numpy as np  # noqa
 
-from firedrake.__future__ import interpolate # ej321 add
+from firedrake.__future__ import interpolate
 
 __all__ = ["BurgersSolver"]
 
@@ -200,15 +200,15 @@ def solve_problem(self, callback=None):
             # solve on fine mesh
             fd.solve(self.F_fine == 0, self.u_fine)
             start = time.perf_counter()
-            adapter = mv.MongeAmpereMover(
+            adaptor = mv.MongeAmpereMover(
                 self.mesh,
                 monitor_function=self.monitor_function,
                 rtol=1e-3,
             )
-            # ej321 - added monitor_function for feature extraction
-            raw_monitor_val = self.monitor_function(self.mesh) # ej321 - is this the correct mesh to use?
 
-            adapter.move()
+            raw_monitor_val = self.monitor_function(self.mesh)
+
+            adaptor.move()
             end = time.perf_counter()
             dur_ms = (end - start) * 1000
 
@@ -221,8 +221,7 @@ def solve_problem(self, callback=None):
             function_space = fd.FunctionSpace(self.mesh, "CG", 1)
             uh_0 = fd.Function(function_space)
             uh_0.project(self.u[0])
-            
-            # ej321 - added monitor_function for feature extraction
+
             monitor_val = fd.Function(function_space)
             monitor_val.assign(raw_monitor_val)
 
@@ -250,41 +249,29 @@ def solve_problem(self, callback=None):
             func_vec_space = fd.VectorFunctionSpace(self.mesh, "CG", 1)
             uh_grad = fd.interpolate(fd.grad(uh_0), func_vec_space)
 
-            # ej321 - grad_norm copied from build_helmholtz_square.py
-            grad_uh_interpolate = fd.assemble(interpolate(fd.grad(self.u[0]),func_vec_space))
+            grad_uh_interpolate = fd.assemble(
+                interpolate(fd.grad(self.u[0]), func_vec_space)
+            )
             grad_norm = fd.Function(function_space)
             grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2)
             grad_norm /= grad_norm.vector().max()
 
             hessian_norm = self.f_norm
             hessian = self.l2_projection
-            phi = adapter.phi
-            phi_grad = adapter.grad_phi
-            # sigma = adapter.sigma
-            sigma = adapter.H # ej321 - this may be the updated hessian?
+            phi = adaptor.phi
+            phi_grad = adaptor.grad_phi
+            sigma = adaptor.H
             I = fd.Identity(2)  # noqa
             jacobian = I + sigma
-            # jacobian_det = fd.Function(function_space, name="jacobian_det")
-            # jacobian_det = fd.Function(adapter.P1, name="jacobian_det")
-            self.jacob_det = fd.Function(adapter.P1, name="jacobian_det").project(
+            self.jacob_det = fd.Function(adaptor.P1, name="jacobian_det").project(
                 jacobian[0, 0] * jacobian[1, 1] - jacobian[0, 1] * jacobian[1, 0]
             )
-            # self.jacob_det = jacobian_det
-            # self.jacob_det = fd.project(
-            #     jacobian_det, fd.FunctionSpace(self.mesh, "CG", 1)
-            # ) #  ej321 - not needed?
-            # self.jacob = jacobian # ej321 - this is copied from mesh_generator.py
-            self.jacob = fd.Function(adapter.P1_ten, name="jacobian").project(jacobian)
-            # self.jacob.project(jacobian)
-            # self.jacob = fd.project(
-            #     jacobian, fd.TensorFunctionSpace(self.mesh, "CG", 1)
-            # ) #  ej321 - not needed?
-            
+            self.jacob = fd.Function(adaptor.P1_ten, name="jacobian").project(jacobian)
 
             callback(
                 uh=uh_0,
                 uh_grad=uh_grad,
-                grad_norm = grad_norm,  # ej321 - added grad_norm
+                grad_norm=grad_norm,
                 hessian_norm=hessian_norm,
                 hessian=hessian,
                 phi=phi,
@@ -304,7 +291,7 @@ def solve_problem(self, callback=None):
                 dur=dur_ms,
                 t=t,
                 idx=self.idx,
-                monitor_val=monitor_val,  # ej321 - added monitor_val
+                monitor_val=monitor_val,
             )
 
             # step forward in time
diff --git a/UM2N/generator/mesh_generator.py b/UM2N/generator/mesh_generator.py
index 8853c50..1e7431f 100644
--- a/UM2N/generator/mesh_generator.py
+++ b/UM2N/generator/mesh_generator.py
@@ -54,8 +54,7 @@ def move_mesh(self):
         )
         mover.move()
         # extract Hessian of the movement
-        # sigma = mover.sigma
-        sigma = mover.H # ej321 - this may be the updated hessian?
+        sigma = mover.H
         I = fd.Identity(2)  # noqa
         jacobian = I + sigma
         jacobian_det = fd.Function(mover.P1, name="jacobian_det")
diff --git a/UM2N/generator/swirl_solver.py b/UM2N/generator/swirl_solver.py
index 750d7e7..2c98f35 100644
--- a/UM2N/generator/swirl_solver.py
+++ b/UM2N/generator/swirl_solver.py
@@ -575,7 +575,7 @@ def solve_problem(self, callback=None, fail_callback=None):
         print("In solve problem")
         self.t = 0.0
         step = 0
-        adapter = mv.MongeAmpereMover(
+        adaptor = mv.MongeAmpereMover(
             self.mesh, monitor_function=self.monitor_function, rtol=1e-3, maxiter=500
         )
         for i in range(self.n_step):
@@ -602,15 +602,15 @@ def solve_problem(self, callback=None, fail_callback=None):
                 # self.mesh.coordinates.dat.data[:] = self.adapt_coord_prev
                 # mesh movement - calculate the adapted coords
                 start = time.perf_counter()
-                # adapter = mv.MongeAmpereMover(
+                # adaptor = mv.MongeAmpereMover(
                 #     self.mesh, monitor_function=self.monitor_function, rtol=1e-3, maxiter=100
                 # )
-                adapter.move()
+                adaptor.move()
                 end = time.perf_counter()
                 dur_ms = (end - start) * 1e3
                 # self.mesh_new.coordinates.dat.data[:] = self.adapt_coord
                 self.mesh_new.coordinates.dat.data[:] = (
-                    adapter.mesh.coordinates.dat.data[:]
+                    adaptor.mesh.coordinates.dat.data[:]
                 )
                 # self.adapt_coord_prev = self.mesh_new.coordinates.dat.data[:]
 
@@ -658,26 +658,18 @@ def solve_problem(self, callback=None, fail_callback=None):
                 uh_grad = fd.interpolate(fd.grad(uh), func_vec_space)
 
                 hessian = self.l2_projection
-                phi = adapter.phi
-                phi_grad = adapter.grad_phi
-                # sigma = adapter.sigma
-                sigma = adapter.H # ej321 - this may be the updated hessian?
+                phi = adaptor.phi
+                phi_grad = adaptor.grad_phi
+                sigma = adaptor.H
                 I = fd.Identity(2)  # noqa
                 jacobian = I + sigma
-                # jacobian_det = fd.Function(function_space, name="jacobian_det")
-                # jacobian_det.project(
-                #     jacobian[0, 0] * jacobian[1, 1] - jacobian[0, 1] * jacobian[1, 0]
-                # )
-                # self.jacob_det = fd.project(
-                #     jacobian_det, fd.FunctionSpace(self.mesh, "CG", 1)
-                # )
-                self.jacob_det = fd.Function(adapter.P1, name="jacobian_det").project(
-                jacobian[0, 0] * jacobian[1, 1] - jacobian[0, 1] * jacobian[1, 0]
-            )
-                # self.jacob = fd.project(
-                #     jacobian, fd.TensorFunctionSpace(self.mesh, "CG", 1)
-                # )
-                self.jacob = fd.Function(adapter.P1_ten, name="jacobian").project(jacobian)
+                self.jacob_det = fd.Function(adaptor.P1, name="jacobian_det").project(
+                    jacobian[0, 0] * jacobian[1, 1] - jacobian[0, 1] * jacobian[1, 0]
+                )
+
+                self.jacob = fd.Function(adaptor.P1_ten, name="jacobian").project(
+                    jacobian
+                )
 
                 if ((step + 1) % self.save_interval == 0) or (step == 0):
                     callback(
diff --git a/UM2N/generator/swirl_solver_step.py b/UM2N/generator/swirl_solver_step.py
index a253ca0..e5d79fc 100644
--- a/UM2N/generator/swirl_solver_step.py
+++ b/UM2N/generator/swirl_solver_step.py
@@ -698,13 +698,13 @@ def solve_problem(self, callback=None, fail_callback=None):
                     # self.mesh.coordinates.dat.data[:] = self.adapt_coord_prev
                     # mesh movement - calculate the adapted coords
                     start = time.perf_counter()
-                    adapter = mv.MongeAmpereMover(
+                    adaptor = mv.MongeAmpereMover(
                         self.mesh,
                         monitor_function=self.monitor_function,
                         rtol=1e-3,
                         maxiter=100,
                     )
-                    adapter.move()
+                    adaptor.move()
                     end = time.perf_counter()
                     dur_ms = (end - start) * 1e3
                     self.mesh_new.coordinates.dat.data[:] = self.adapt_coord
@@ -750,13 +750,11 @@ def solve_problem(self, callback=None, fail_callback=None):
 
                     func_vec_space = fd.VectorFunctionSpace(self.mesh, "CG", 1)
                     uh_grad = fd.interpolate(fd.grad(self.uh), func_vec_space)
-                    # hessian_norm = self.f_norm
-                    # monitor_values = adapter.monitor
+
                     hessian = self.l2_projection
-                    phi = adapter.phi
-                    phi_grad = adapter.grad_phi
-                    # sigma = adapter.sigma
-                    sigma = adapter.H # ej321 - this may be the updated hessian?
+                    phi = adaptor.phi
+                    phi_grad = adaptor.grad_phi
+                    sigma = adaptor.H
                     I = fd.Identity(2)  # noqa
                     jacobian = I + sigma
                     jacobian_det = fd.Function(function_space, name="jacobian_det")
diff --git a/script/build_burgers_square.py b/script/build_burgers_square.py
index 6266040..ec5504d 100644
--- a/script/build_burgers_square.py
+++ b/script/build_burgers_square.py
@@ -31,9 +31,11 @@ def parse_arguments():
     parser.add_argument(
         "--field_type", type=str, default="iso", help="Data type (aniso/iso)."
     )
-    # use padded scheme or full-scale scheme to sample central point of the bump  # noqa
     parser.add_argument(
-        "--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full)."
+        "--boundary_scheme",
+        type=str,
+        default="pad",
+        help="use padded scheme or full-scale scheme to sample central point of the bump (pad/full).",
     )
     parser.add_argument(
         "--n_case", type=int, default=5, help="Number of simulation cases."
@@ -305,79 +307,10 @@ def get_sample_param_of_nu_generalization_by_idx_train(idx_in):
     return gauss_list_, nu_
 
 
-# def get_sample_params(idx):
-#     """Retrieve sample parameters for the Burgers problem."""
-#     return UM2N.get_sample_param_of_nu_generalization_by_idx_train(idx)
-
-
-# def solve_case(idx, mesh, mesh_new, mesh_fine, dirs):
-#     """Solve a single case of the Burgers problem."""
-#     gauss_list, nu = get_sample_params(idx)
-#     solver = UM2N.BurgersSolver(mesh, mesh_fine, mesh_new, gauss_list=gauss_list, nu=nu, idx=idx)
-#     solver.solve_problem(lambda *args: sample_from_loop(*args, dirs))
-
-
-# def sample_from_loop(
-#     uh, uh_grad, hessian, hessian_norm, phi, grad_phi, jacobian, jacobian_det,
-#     uh_new, mesh_og, mesh_new, function_space, function_space_fine, uh_fine,
-#     dur, nu, gauss_list, t, idx, dirs, error_og_list=[], error_adapt_list=[]
-# ):
-#     """Process and save data from a single simulation loop."""
-#     mesh_processor = UM2N.MeshProcessor(
-#         original_mesh=mesh_og,
-#         optimal_mesh=mesh_new,
-#         function_space=function_space,
-#         use_4_edge=True,
-#         feature={
-#             "uh": uh.dat.data_ro.reshape(-1, 1),
-#             "grad_uh": uh_grad.dat.data_ro.reshape(-1, 2),
-#             "hessian": hessian.dat.data_ro.reshape(-1, 4),
-#             "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
-#             "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
-#             "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1),
-#             "phi": phi.dat.data_ro.reshape(-1, 1),
-#             "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2),
-#         },
-#         raw_feature={
-#             "uh": uh,
-#             "hessian_norm": hessian_norm,
-#             "jacobian": jacobian,
-#             "jacobian_det": jacobian_det,
-#         },
-#         nu=nu,
-#         gauss_list=gauss_list,
-#         dur=dur,
-#         t=t,
-#         idx=idx,
-#     )
-
-#     mesh_processor.save_taining_data(os.path.join(dirs["data"], f"data_{idx}"))
-
-#     # Plot results
-#     fig, axes = plt.subplots(2, 3, figsize=(15, 10))
-#     fd.trisurf(uh_fine, axes=axes[0, 0]).set_title("Solution field (HR)")
-#     fd.trisurf(uh, axes=axes[0, 1]).set_title("Solution field (Original Mesh)")
-#     fd.trisurf(uh_new, axes=axes[0, 2]).set_title("Solution field (Adapted Mesh)")
-#     fd.triplot(mesh_og, axes=axes[1, 0]).set_title("Original Mesh")
-#     fd.triplot(mesh_new, axes=axes[1, 1]).set_title("Adapted Mesh")
-#     fd.tripcolor(uh_new, cmap="coolwarm", axes=axes[1, 2]).set_title("Solution on Optimal Mesh")
-#     fig.savefig(os.path.join(dirs["plot"], f"plot_{idx}.png"))
-#     plt.close(fig)
-
-#     # Save error metrics
-#     uh = fd.project(uh, function_space_fine)
-#     uh_new = fd.project(uh_new, function_space_fine)
-#     error_original_mesh = fd.errornorm(uh, uh_fine, norm_type="L2")
-#     error_optimal_mesh = fd.errornorm(uh_new, uh_fine, norm_type="L2")
-#     pd.DataFrame(
-#         {"error_og": [error_original_mesh], "error_adapt": [error_optimal_mesh], "time": [dur]}
-#     ).to_csv(os.path.join(dirs["log"], f"log_{idx}.csv"), index=False)
-
-
 def sample_from_loop(
     uh,
     uh_grad,
-    grad_norm,  # ej321 - added grad_norm
+    grad_norm,
     hessian,
     hessian_norm,
     phi,
@@ -409,9 +342,7 @@ def sample_from_loop(
         feature={
             "uh": uh.dat.data_ro.reshape(-1, 1),
             "grad_uh": uh_grad.dat.data_ro.reshape(-1, 2),
-            "grad_uh_norm": grad_norm.dat.data_ro.reshape(
-                -1, 1
-            ),  # ej321 - added grad_norm
+            "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1),
             "hessian": hessian.dat.data_ro.reshape(-1, 4),
             "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
             "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
@@ -423,7 +354,7 @@ def sample_from_loop(
         raw_feature={
             "uh": uh,
             "hessian_norm": hessian_norm,
-            "monitor_val": monitor_val,  # ej321 - added monitor_val
+            "monitor_val": monitor_val,
             "jacobian": jacobian,
             "jacobian_det": jacobian_det,
         },
@@ -469,13 +400,6 @@ def sample_from_loop(
     fig.savefig(os.path.join(directories["plot"], "plot_{}.png".format(i)))
     i += 1
 
-    # fig, ax = plt.subplots()
-    # ax.set_title("adapt error list")
-    # ax.plot(error_adapt_list, linestyle='--', color='blue', label='adapt')
-    # # ax.plot(error_og_list, linestyle='--', color='red', label='og')
-    # ax.legend()
-    # plt.show()
-
     # ==========================================
     uh = fd.project(uh, function_space_fine)
     uh_new = fd.project(uh_new, function_space_fine)
@@ -494,16 +418,6 @@ def sample_from_loop(
         csv_writer.writerow([error_original_mesh, error_optimal_mesh, dur])
 
     print("error og/optimal:", error_original_mesh, error_optimal_mesh)
-    # df = pd.DataFrame(
-    #     {
-    #         "error_og": error_original_mesh,
-    #         "error_adapt": error_optimal_mesh,
-    #         "time": dur,
-    #     },
-    #     index=[0],
-    # )
-    # df.to_csv(os.path.join(problem_log_dir, "log{}.csv".format(i)))
-    # print("error og/optimal:", error_original_mesh, error_optimal_mesh)
     return
 
 
@@ -521,8 +435,7 @@ def sample_from_loop(
         "max_dist": args.max_dist,
         "lc": args.lc,
         "n_grid": args.n_grid,
-        # parameters for ??????
-        # "n_samples": args.n_samples,
+        # parameters for mesh def
         "data_type": args.field_type,
         "scheme": args.boundary_scheme,
         "mesh_type": int(args.mesh_type),
diff --git a/script/build_helmholtz_poly.py b/script/build_helmholtz_poly.py
index c7bddb8..a330680 100644
--- a/script/build_helmholtz_poly.py
+++ b/script/build_helmholtz_poly.py
@@ -10,8 +10,6 @@
 import firedrake as fd
 import matplotlib.pyplot as plt
 import numpy as np
-
-# import pandas as pd
 from firedrake.__future__ import interpolate
 
 import UM2N
@@ -35,9 +33,11 @@ def parse_arguments():
     parser.add_argument(
         "--field_type", type=str, default="iso", help="Data type (aniso/iso)."
     )
-    # use padded scheme or full-scale scheme to sample central point of the bump  # noqa
     parser.add_argument(
-        "--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full)."
+        "--boundary_scheme",
+        type=str,
+        default="pad",
+        help="Use padded scheme or full-scale scheme to sample central point of the bump (pad/full).",
     )
     parser.add_argument(
         "--n_samples", type=int, default=100, help="Number of samples generated"
@@ -270,7 +270,6 @@ def process_features(parameters, directories):
     func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1)
     grad_uh_interpolate = fd.assemble(interpolate(fd.grad(uh), func_vec_space))
 
-    # ej321 - grad_norm copied from build_helmholtz_square.py
     grad_norm = fd.Function(res["function_space"])
     grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2)
     grad_norm /= grad_norm.vector().max()
@@ -278,17 +277,6 @@ def process_features(parameters, directories):
     # RHS of helmholtz problem
     f_rhs = fd.assemble(interpolate(helmholtz_eq.f, helmholtz_eq.function_space))
 
-    # ej321 - this seems extra - the mesh is never used, just to build 'eq'?
-    # hessian = UM2N.MeshGenerator(
-    #     params={
-    #         "eq": helmholtz_eq,
-    #         "mesh": rand_poly_mesh_gen.generate_mesh(
-    #             res=lc,
-    #             output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"),
-    #         ),
-    #     }
-    # ).get_hessian(mesh)
-    # ej321 - using script from build_helmholtz_square.py
     mesh_gen = UM2N.MeshGenerator(params={"eq": helmholtz_eq, "mesh": mesh})
     monitor_val = mesh_gen.monitor_func(mesh)
     hessian = mesh_gen.get_hessian(mesh)
@@ -296,7 +284,7 @@ def process_features(parameters, directories):
         mesh_gen.get_hessian_norm(mesh), fd.FunctionSpace(mesh, "CG", 1)
     )
 
-    # move the mesh?
+    # move the mesh
     start = time.perf_counter()
     new_mesh = mesh_gen.move_mesh()
     end = time.perf_counter()
@@ -334,7 +322,7 @@ def process_features(parameters, directories):
         feature={
             "uh": uh.dat.data_ro.reshape(-1, 1),
             "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2),
-            "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1),  # ej321 - added
+            "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1),
             "hessian": hessian.dat.data_ro.reshape(-1, 4),
             "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
             "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
@@ -342,13 +330,13 @@ def process_features(parameters, directories):
             "phi": phi.dat.data_ro.reshape(-1, 1),
             "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2),
             "f": f_rhs.dat.data_ro.reshape(-1, 1),
-            "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1),  # ej321 - added
+            "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1),
         },
         raw_feature={
             "uh": uh,
             "hessian_norm": hessian_norm,
-            "monitor_val": monitor_val,  # ej321 - added
-            "grad_uh_norm": grad_norm,  # ej321 - added needed for poly only
+            "monitor_val": monitor_val,
+            "grad_uh_norm": grad_norm,
             "jacobian": jacobian,
             "jacobian_det": jacobian_det,
         },
@@ -412,16 +400,6 @@ def process_features(parameters, directories):
     error_original_mesh = fd.errornorm(u_exact, uh_proj)
     error_optimal_mesh = fd.errornorm(u_exact, uh_new_proj)
 
-    # df = pd.DataFrame(
-    #     {
-    #         "error_og": error_original_mesh,
-    #         "error_adapt": error_optimal_mesh,
-    #         "time": dur,
-    #     },
-    #     index=[0],
-    # )
-    # df.to_csv(os.path.join(problem_log_dir, "log{}.csv".format(i)))
-
     # Write to CSV
     with open(
         os.path.join(directories["log"], f"log_{i:04d}.csv"), mode="w", newline=""
@@ -442,12 +420,10 @@ def process_features(parameters, directories):
     parameters = {
         # parameters for problem
         "problem": "holmholtz_poly",
-        # "n_case": args.n_case, # burgers problem only
         # parameters for random source
         "n_dist": args.n_dist,
         "max_dist": args.max_dist,
         "lc": args.lc,
-        # "n_grig": args.n_grid, # burgers problem only
         # parameters for ??????
         "n_samples": args.n_samples,
         "data_type": args.field_type,
diff --git a/script/build_helmholtz_square.py b/script/build_helmholtz_square.py
index ca85846..0a3452b 100644
--- a/script/build_helmholtz_square.py
+++ b/script/build_helmholtz_square.py
@@ -33,9 +33,11 @@ def parse_arguments():
     parser.add_argument(
         "--field_type", type=str, default="aniso", help="Data type (aniso/iso)"
     )
-    # use padded scheme or full-scale scheme to sample central point of the bump  # noqa
     parser.add_argument(
-        "--boundary_scheme", type=str, default="full", help="Boundary scheme (pad/full)"
+        "--boundary_scheme",
+        type=str,
+        default="full",
+        help="Use padded scheme or full-scale scheme to sample central point of the bump (pad/full)",
     )
     parser.add_argument(
         "--n_samples", type=int, default=100, help="Number of samples generated"
@@ -207,8 +209,7 @@ def process_features(parameters, directories):
     # RHS of helmholtz problem
     f_rhs = fd.assemble(interpolate(helmholtz_eq.f, helmholtz_eq.function_space))
 
-    # generate mesh?
-
+    # generate mesh
     mesh_gen = UM2N.MeshGenerator(params={"eq": helmholtz_eq, "mesh": mesh})
     monitor_val = mesh_gen.monitor_func(mesh)
     hessian = mesh_gen.get_hessian(mesh)
@@ -216,7 +217,7 @@ def process_features(parameters, directories):
         mesh_gen.get_hessian_norm(mesh), fd.FunctionSpace(mesh, "CG", 1)
     )
 
-    # move the mesh?
+    # move the mesh
     start = time.perf_counter()
     new_mesh = mesh_gen.move_mesh()  # noqa
     end = time.perf_counter()
@@ -309,7 +310,6 @@ def process_features(parameters, directories):
     print("error og/optimal:", error_original_mesh, error_optimal_mesh)
 
     # ====  Plot mesh, solution, error ======================
-
     rows, cols = 3, 3
     cmap = "seismic"
 
@@ -495,13 +495,11 @@ def split_data(
     parameters = {
         # parameters for problem
         "problem": "helmholtz",
-        # "n_case": args.n_case, # burgers problem only
         # parameters for random source
         "n_dist": args.n_dist,
         "max_dist": args.max_dist,
         "lc": args.lc,
-        # "n_grid": args.n_grid, # burgers problem only
-        # parameters for ??????
+        # parameters for mesh def
         "n_samples": args.n_samples,
         "data_type": args.field_type,
         "scheme": args.boundary_scheme,
diff --git a/script/build_poisson_poly.py b/script/build_poisson_poly.py
index 7f6403c..714a4f7 100644
--- a/script/build_poisson_poly.py
+++ b/script/build_poisson_poly.py
@@ -32,9 +32,12 @@ def parse_arguments():
     parser.add_argument(
         "--field_type", type=str, default="iso", help="Data type (aniso/iso)."
     )
-    # use padded scheme or full-scale scheme to sample central point of the bump  # noqa
+    #   # noqa
     parser.add_argument(
-        "--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full)."
+        "--boundary_scheme",
+        type=str,
+        default="pad",
+        help="Use padded scheme or full-scale scheme to sample central point of the bump (pad/full).",
     )
     parser.add_argument(
         "--n_samples", type=int, default=100, help="Number of samples generated"
@@ -268,48 +271,15 @@ def process_features(parameters, directories):
     hessian_norm = fd.project(
         mesh_gen.get_hessian_norm(mesh), fd.FunctionSpace(mesh, "CG", 1)
     )
-    # hessian = UM2N.MeshGenerator(
-    #     params={
-    #         "eq": poisson_eq,
-    #         "mesh": rand_poly_mesh_gen.generate_mesh(
-    #             res=lc,
-    #             output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"),
-    #         ),
-    #     }
-    # ).get_hessian(mesh)
-
-    # hessian_norm = UM2N.MeshGenerator(
-    #     params={
-    #         "eq": poisson_eq,
-    #         "mesh": rand_poly_mesh_gen.generate_mesh(
-    #             res=lc,
-    #             output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"),
-    #         ),
-    #     }
-    # ).monitor_func(mesh)
-
-    # is this the monitor function value?
-    # hessian_norm = fd.project(hessian_norm, fd.FunctionSpace(mesh, "CG", 1))
 
     func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1)
     grad_uh_interpolate = fd.assemble(interpolate(fd.grad(uh), func_vec_space))
 
-    # ej321 - grad_norm copied from build_helmholtz_square.py
     grad_norm = fd.Function(res["function_space"])
     grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2)
     grad_norm /= grad_norm.vector().max()
 
-    # mesh_gen = UM2N.MeshGenerator(
-    #     params={
-    #         "eq": poisson_eq,
-    #         "mesh": rand_poly_mesh_gen.generate_mesh(
-    #             res=lc,
-    #             output_filename=os.path.join(problem_mesh_dir, f"mesh{i}.msh"),
-    #         ),
-    #     }
-    # )
-
-    # move the mesh?
+    # move the mesh
     start = time.perf_counter()
     new_mesh = mesh_gen.move_mesh()
     end = time.perf_counter()
@@ -347,20 +317,20 @@ def process_features(parameters, directories):
         feature={
             "uh": uh.dat.data_ro.reshape(-1, 1),
             "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2),
-            "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1),  # ej321 - added
+            "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1),
             "hessian": hessian.dat.data_ro.reshape(-1, 4),
             "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
             "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
             "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1),
             "phi": phi.dat.data_ro.reshape(-1, 1),
             "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2),
-            "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1),  # ej321 - added
+            "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1),
         },
         raw_feature={
             "uh": uh,
             "hessian_norm": hessian_norm,
-            "monitor_val": monitor_val,  # ej321 - added
-            "grad_uh_norm": grad_norm,  # ej321 - added needed for poly only
+            "monitor_val": monitor_val,
+            "grad_uh_norm": grad_norm,
             "jacobian": jacobian,
             "jacobian_det": jacobian_det,
         },
@@ -444,13 +414,11 @@ def process_features(parameters, directories):
     parameters = {
         # parameters for problem
         "problem": "poisson_poly",
-        # "n_case": args.n_case, # burgers problem only
         # parameters for random source
         "n_dist": args.n_dist,
         "max_dist": args.max_dist,
         "lc": args.lc,
-        # "n_grig": args.n_grid, # burgers problem only
-        # parameters for ??????
+        # parameters for mesh def
         "n_samples": args.n_samples,
         "data_type": args.field_type,
         "scheme": args.boundary_scheme,
diff --git a/script/build_poisson_square.py b/script/build_poisson_square.py
index 5513e1b..7b4919f 100644
--- a/script/build_poisson_square.py
+++ b/script/build_poisson_square.py
@@ -32,9 +32,11 @@ def parse_arguments():
     parser.add_argument(
         "--field_type", type=str, default="iso", help="Data type (aniso/iso)."
     )
-    # use padded scheme or full-scale scheme to sample central point of the bump  # noqa
     parser.add_argument(
-        "--boundary_scheme", type=str, default="pad", help="Boundary scheme (pad/full)."
+        "--boundary_scheme",
+        type=str,
+        default="pad",
+        help="Use padded scheme or full-scale scheme to sample central point of the bump (pad/full).",
     )
     parser.add_argument(
         "--n_samples", type=int, default=100, help="Number of samples generated"
@@ -270,7 +272,6 @@ def process_features(parameters, problem_data_dir):
     func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1)
     grad_uh_interpolate = fd.assemble(interpolate(fd.grad(uh), func_vec_space))
 
-    # ej321 - grad_norm copied from build_helmholtz_square.py
     grad_norm = fd.Function(res["function_space"])
     grad_norm.project(grad_uh_interpolate[0] ** 2 + grad_uh_interpolate[1] ** 2)
     grad_norm /= grad_norm.vector().max()
@@ -311,23 +312,19 @@ def process_features(parameters, problem_data_dir):
         feature={
             "uh": uh.dat.data_ro.reshape(-1, 1),
             "grad_uh": grad_uh_interpolate.dat.data_ro.reshape(-1, 2),
-            "grad_uh_norm": grad_norm.dat.data_ro.reshape(
-                -1, 1
-            ),  # ej321 - added grad_norm
+            "grad_uh_norm": grad_norm.dat.data_ro.reshape(-1, 1),
             "hessian": hessian.dat.data_ro.reshape(-1, 4),
             "hessian_norm": hessian_norm.dat.data_ro.reshape(-1, 1),
             "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
             "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1),
             "phi": phi.dat.data_ro.reshape(-1, 1),
             "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2),
-            "monitor_val": monitor_val.dat.data_ro.reshape(
-                -1, 1
-            ),  # ej321 - added monitor_val
+            "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1),
         },
         raw_feature={
             "uh": uh,
             "hessian_norm": hessian_norm,
-            "monitor_val": monitor_val,  # ej321 - added monitor_val
+            "monitor_val": monitor_val,
             "jacobian": jacobian,
             "jacobian_det": jacobian_det,
         },
@@ -410,13 +407,11 @@ def process_features(parameters, problem_data_dir):
     parameters = {
         # parameters for problem
         "problem": "poisson",
-        # "n_case": args.n_case, # burgers problem only
         # parameters for random source
         "n_dist": args.n_dist,
         "max_dist": args.max_dist,
         "lc": args.lc,
-        # "n_grig": args.n_grid, # burgers problem only
-        # parameters for ??????
+        # parameters for mesh def
         "n_samples": args.n_samples,
         "data_type": args.field_type,
         "scheme": args.boundary_scheme,
diff --git a/script/build_swirl.py b/script/build_swirl.py
index cc8b0ff..8b5dd25 100644
--- a/script/build_swirl.py
+++ b/script/build_swirl.py
@@ -54,15 +54,6 @@ def parse_arguments():
 
     parsed_args = parser.parse_args()
 
-    # Handle dependency between max_dist and n_dist
-    # max number of distributions used to generate the dataset
-    # only if n_dist is not set if n_dist is set, max_dist will be disabled
-    # if parsed_args.n_dist is not None:
-    #     parsed_args.max_dist = None  # Disable max_dist if n_dist is set
-    #     print("Warning: max_dist is ignored because n_dist is set.")
-    # QC:
-    # print(parsed_args)
-
     return parsed_args
 
 
@@ -246,8 +237,8 @@ def sample_from_loop(
     sigma,
     alpha,
     r_0,
-    x_0,  # ej321 - added x_0
-    y_0,  # ej321 - added y_0
+    x_0,
+    y_0,
     t,
     error_og_list=[],
     error_adapt_list=[],
@@ -296,51 +287,6 @@ def sample_from_loop(
 
     mesh_processor.save_taining_data(os.path.join(directories["data"], f"data_{i:04d}"))
 
-    # # ====  Plot Scripts ======================
-    # fig = plt.figure(figsize=(15, 10))
-    # ax1 = fig.add_subplot(2, 3, 1, projection='3d')
-    # # Plot the exact solution
-    # ax1.set_title('Solution field (HR)')
-    # fd.trisurf(uh_fine, axes=ax1)
-    # # Plot the solved solution
-    # ax2 = fig.add_subplot(2, 3, 2, projection='3d')
-    # ax2.set_title('Solution field (Original Mesh)')
-    # fd.trisurf(uh, axes=ax2)
-
-    # ax3 = fig.add_subplot(2, 3, 3, projection='3d')
-    # ax3.set_title('Solution field (Adapted Mesh)')
-    # fd.trisurf(uh_new, axes=ax3)
-
-    # # Plot the mesh
-    # ax4 = fig.add_subplot(2, 3, 4)
-    # ax4.set_title('Original Mesh ')
-    # fd.triplot(mesh_og, axes=ax4)
-
-    # ax5 = fig.add_subplot(2, 3, 5)
-    # ax5.set_title('Optimal Mesh')
-    # # fd.tripcolor(
-    # #     uh, cmap='coolwarm', axes=ax5)
-    # fd.triplot(mesh_new, axes=ax5)
-
-    # # plot mesh with function evaluated on it
-    # ax6 = fig.add_subplot(2, 3, 6)
-    # ax6.set_title('Solution Projected on Optimal Mesh')
-    # fd.tripcolor(
-    #     uh_new, cmap='coolwarm', axes=ax6)
-    # fd.triplot(mesh_new, axes=ax6)
-
-    # fig.savefig(
-    #     os.path.join(
-    #         problem_plot_dir, f"plot_{i:04d}.png")
-    # )
-    # plt.close()
-    # fig, ax = plt.subplots()
-    # ax.set_title("adapt error list")
-    # ax.plot(error_adapt_list, linestyle='--', color='blue', label='adapt')
-    # # ax.plot(error_og_list, linestyle='--', color='red', label='og')
-    # ax.legend()
-    # plt.show()
-
     # ====  Log File ============================================
     # function_space_fine = fd.FunctionSpace(mesh_fine, 'CG', 1)
     uh_proj = fd.project(uh, function_space_fine)
@@ -402,11 +348,6 @@ def sample_from_loop(
     err_v_max = err_abs_max_val
     err_v_min = -err_v_max
 
-    # # Error on high resolution mesh
-    # cb = fd.tripcolor(fd.assemble(uh_fine - uh_fine), cmap=cmap, axes=ax[2, 0], vmax=err_v_max, vmin=err_v_min)
-    # ax[2, 0].set_title(f"Error Map High Resolution")
-    # plt.colorbar(cb)
-
     # Monitor values
     cb = fd.tripcolor(monitor_values, cmap=cmap, axes=ax[2, 0])
     ax[2, 0].set_title("Monitor Values")
@@ -451,29 +392,14 @@ def sample_from_loop(
         "T": 1,
         "dt": 1e-3,  # The CFL condition requires that the timestep is less than 0.0014 for fine mesh
         "n_step": 1000,
-        # "n_case": args.n_case, # burgers problem only
-        # parameters for random source
-        # "n_dist": args.n_dist,
-        # "max_dist": args.max_dist,
         "lc": args.lc,
         "n_grid": args.n_grid if args.n_grid else int(1 / args.lc),
-        # parameters for ??????
-        # "n_samples": args.n_samples,
-        # "data_type": args.field_type,
-        # "scheme": args.boundary_scheme,
+        # parameters for mesh def
         "mesh_type": int(args.mesh_type),
         "n_monitor_smooth": args.n_monitor_smooth,
         # parameters for domain scale
         "scale_x": 1,
         "scale_y": 1,
-        # parameters for anisotropic data - distribution height scaler
-        # "z_max": 1,
-        # "z_min": 0,
-        # parameters for ?????
-        # "x_start": 0,
-        # "x_end": 1,
-        # "y_start": 0,
-        # "y_end": 1,
         # parameters for initial condition
         "sigma": args.sigma,
         "r_0": args.r_0,
@@ -483,25 +409,8 @@ def sample_from_loop(
         # parameters for storing files
         "save_interval": args.save_interval,
         "fail_t": [],  # list storing failing dts
-        # parameters for isotropic data
-        # "w_min": 0.05,
-        # "w_max": 0.2,
-        # "c_min": 0.2 if args.boundary_scheme == "pad" else 0,
-        # "c_max": 0.8 if args.boundary_scheme == "pad" else 1,
-        # parameters for dataset challenging level
-        # larger, less challenging (because the gaussian is more like a circle)
-        # "sigma_mean_scaler": 1 / 4,
-        # "sigma_sigma_scaler": 1 / 6,
-        # "sigma_eps": 1 / 8,
-        # parameters for data split
-        # "p_train": 0.75,
-        # "p_test": 0.15,
-        # "p_val": 0.1,
     }
 
-    # # Set random seed
-    # random.seed(args.rand_seed)
-
     # ====  Setup Directories ======================
     problem_specific_dir = "sigma_{:.3f}_alpha_{}_r0_{}_x0_{}_y0_{}_lc_{}_ngrid_{}_interval_{}_meshtype_{}_smooth_{}".format(
         parameters["sigma"],
@@ -573,7 +482,7 @@ def sample_from_loop(
         mesh_model = mesh_gen.generate_mesh(
             res=lc, output_filename=os.path.join(directories["mesh"], "mesh.msh")
         )
-        # ej321 - is this extra call to mesh gen needed?
+        # is this extra call to mesh gen needed?
         mesh_gen_fine = UM2N.UnstructuredSquareMeshGenerator(mesh_type=mesh_type)
         mesh_fine = mesh_gen_fine.generate_mesh(
             res=1e-2, output_filename=os.path.join(directories["mesh_fine"], "mesh.msh")
@@ -591,16 +500,6 @@ def sample_from_loop(
         mesh_new,
         mesh_model=mesh_model,
         **parameters,
-        # sigma=sigma,
-        # alpha=alpha,
-        # r_0=r_0,
-        # x_0=x_0,
-        # y_0=y_0,
-        # save_interval=save_interval,
-        # T=T,
-        # dt=dt,
-        # n_step=n_step,
-        # n_monitor_smooth=n_monitor_smooth,
     )
 
     swirl_solver.solve_problem(callback=sample_from_loop, fail_callback=fail_callback)
diff --git a/script/ej321_helm_dataset_run.sh b/script/ej321_helm_dataset_run.sh
new file mode 100644
index 0000000..f3803d1
--- /dev/null
+++ b/script/ej321_helm_dataset_run.sh
@@ -0,0 +1,57 @@
+
+
+###################### Training dataset generation ######################
+# Training data
+# Problem type: Helmholtz
+# Meshtype: 2
+# n_samples: 300
+# Random_seed: 63
+# Resolution: 0.05, 0.055
+
+# use 2 / 6 / 0
+mesh_type=(6)
+# training set build
+rand_seed=63
+lcs=(0.05 0.055)
+n_samples_train=(300)
+
+# helmholtz square case
+for mt in "${mesh_type[@]}"; do
+    for i in "${lcs[@]}"; do
+        for n_s in "${n_samples_train[@]}"; do
+            echo "lc = $i meshtype = $mt num samples = $n_s"
+            python build_helmholtz_square.py --lc=$i   --rand_seed=$rand_seed --n_samples=$n_s --field_type="aniso" --boundary_scheme="full" --mesh_type=$mt
+            # python ./script/build_helmholtz_square.py --lc=$i   --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="pad" --mesh_type=$mesh_type
+            # python ./script/build_helmholtz_square.py --lc=$i   --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="full" --mesh_type=$mesh_type
+            # python ./script/build_helmholtz_square.py --lc=$i   --rand_seed $rand_seed --n_samples $n_samples_train --field_type "aniso" --boundary_scheme "pad" --mesh_type=$mesh_type
+        done
+    done
+done
+###################### Training dataset generation ######################
+
+
+# ###################### Testing dataset generation ######################
+# # Test data
+# # Problem type: Helmholtz
+# # Meshtype: 0, 2, 6  
+# # n_samples: 100
+# # Random_seed: 42
+# # Resolution: 0.05, 0.055, 0.028
+
+# n_samples_train=101
+# rand_seed=42
+# mesh_types=(6 2 0)
+# # length character for polygon mesh
+# lcs=(0.05 0.028)
+
+# # helmholtz square case
+# for m in "${mesh_types[@]}"; do
+#     for i in "${lcs[@]}"; do
+#         echo "lc = $i Meshtype = $m"
+#         python ./script/build_helmholtz_square.py --lc=$i   --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m
+#         # python ./script/build_helmholtz_square.py --lc=$i   --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="pad" --mesh_type=$mesh_type
+#         # python ./script/build_helmholtz_square.py --lc=$i   --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="full" --mesh_type=$mesh_type
+#         # python ./script/build_helmholtz_square.py --lc=$i   --rand_seed $rand_seed --n_samples $n_samples_train --field_type "aniso" --boundary_scheme "pad" --mesh_type=$mesh_type
+#     done
+# done
+# ###################### Testing dataset generation ######################
\ No newline at end of file
diff --git a/script/make_build_all_test.sh b/script/make_build_all_test.sh
new file mode 100644
index 0000000..a531450
--- /dev/null
+++ b/script/make_build_all_test.sh
@@ -0,0 +1,15 @@
+mesh_types=(6 2 0)
+rand_seed=42
+n_samples_train=3
+
+for m in "${mesh_types[@]}"; do
+    python build_burgers_square.py --rand_seed=$rand_seed --n_case=$n_samples_train --mesh_type=$m
+
+    # python build_helmholtz_square.py  --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m
+    # python build_helmholtz_poly.py --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m
+    # python build_poisson_square.py --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m
+    # python build_poisson_poly.py --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m
+    # python build_burgers_square.py --rand_seed=$rand_seed --n_case=$n_samples_train --mesh_type=$m
+    # python build_swirl.py --lc=0.028 --alpha=1.5 --r_0 0.20 --x_0 0.3 --y_0 0.3 --n_monitor_smooth=10 --mesh_type=$m
+
+done
\ No newline at end of file

From a899c3ddf9377db6901887d0f6e468f4a3fce623 Mon Sep 17 00:00:00 2001
From: acse-ej321 <89605848+acse-ej321@users.noreply.github.com>
Date: Mon, 7 Jul 2025 08:58:09 +0100
Subject: [PATCH 4/7] moved common build script to helper

---
 UM2N/generator/swirl_solver.py      |   8 +-
 UM2N/generator/swirl_solver_step.py |  14 +-
 script/build_burgers_square.py      | 157 +-------------------
 script/build_helmholtz_poly.py      | 177 +---------------------
 script/build_helmholtz_square.py    |  70 +--------
 script/build_helper.py              | 221 ++++++++++++++++++++++++++++
 script/build_poisson_poly.py        | 176 +---------------------
 script/build_poisson_square.py      | 176 +---------------------
 script/build_swirl.py               |   4 +-
 9 files changed, 241 insertions(+), 762 deletions(-)
 create mode 100644 script/build_helper.py

diff --git a/UM2N/generator/swirl_solver.py b/UM2N/generator/swirl_solver.py
index 2c98f35..3db102c 100644
--- a/UM2N/generator/swirl_solver.py
+++ b/UM2N/generator/swirl_solver.py
@@ -22,6 +22,8 @@
 from tqdm import tqdm  # noqa
 from UM2N.model.train_util import model_forward
 
+from firedrake.__future__ import interpolate
+
 
 def get_log_og(log_path, idx):
     """
@@ -453,7 +455,7 @@ def monitor_function(self, mesh, alpha=10, beta=5):
         )
 
         func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1)
-        uh_grad = fd.interpolate(fd.grad(self.u_cur), func_vec_space)
+        uh_grad = fd.assemble(interpolate(fd.grad(self.u_cur), func_vec_space))
         self.grad_norm.interpolate(uh_grad[0] ** 2 + uh_grad[1] ** 2)
 
         # filter_monitor_val = np.minimum(1e3, self.f_norm.dat.data[:])
@@ -526,7 +528,7 @@ def monitor_function_on_coarse_mesh(self, mesh, alpha=10, beta=5):
         )
 
         func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1)
-        uh_grad = fd.interpolate(fd.grad(self.u_cur), func_vec_space)
+        uh_grad = fd.assemble(interpolate(fd.grad(self.u_cur), func_vec_space))
         self.grad_norm.interpolate(uh_grad[0] ** 2 + uh_grad[1] ** 2)
 
         # Normlize the hessian
@@ -655,7 +657,7 @@ def solve_problem(self, callback=None, fail_callback=None):
                 )
 
                 func_vec_space = fd.VectorFunctionSpace(self.mesh, "CG", 1)
-                uh_grad = fd.interpolate(fd.grad(uh), func_vec_space)
+                uh_grad = fd.assemble(interpolate(fd.grad(uh), func_vec_space))
 
                 hessian = self.l2_projection
                 phi = adaptor.phi
diff --git a/UM2N/generator/swirl_solver_step.py b/UM2N/generator/swirl_solver_step.py
index e5d79fc..5dc9dd4 100644
--- a/UM2N/generator/swirl_solver_step.py
+++ b/UM2N/generator/swirl_solver_step.py
@@ -17,6 +17,8 @@
 
 from tqdm import tqdm  # noqa
 
+from firedrake.__future__ import interpolate
+
 
 def get_c(x, y, t, threshold=0.5, alpha=1.5):
     """
@@ -459,7 +461,7 @@ def monitor_function_grad(self, mesh, alpha=5):
         )
 
         func_vec_space = fd.VectorFunctionSpace(self.mesh, "CG", 1)
-        uh_grad = fd.interpolate(fd.grad(self.u_cur), func_vec_space)
+        uh_grad = fd.assemble(interpolate(fd.grad(self.u_cur), func_vec_space))
         self.grad_norm.project(uh_grad[0] ** 2 + uh_grad[1] ** 2)
 
         self.adapt_coord = mesh.coordinates.vector().array().reshape(-1, 2)  # noqa
@@ -481,7 +483,7 @@ def monitor_function_smoothed_grad(self, mesh, alpha=5):
         )
 
         func_vec_space = fd.VectorFunctionSpace(self.mesh, "CG", 1)
-        uh_grad = fd.interpolate(fd.grad(self.u_cur), func_vec_space)
+        uh_grad = fd.assemble(interpolate(fd.grad(self.u_cur), func_vec_space))
         self.grad_norm.project(uh_grad[0] ** 2 + uh_grad[1] ** 2)
 
         # Normlize the grad
@@ -508,7 +510,7 @@ def monitor_function_for_merge(self, mesh, alpha=10, beta=5):
         )
 
         func_vec_space = fd.VectorFunctionSpace(self.mesh, "CG", 1)
-        uh_grad = fd.interpolate(fd.grad(self.u_cur), func_vec_space)
+        uh_grad = fd.assemble(interpolate(fd.grad(self.u_cur), func_vec_space))
         self.grad_norm.project(uh_grad[0] ** 2 + uh_grad[1] ** 2)
 
         # Normlize the hessian
@@ -568,7 +570,7 @@ def monitor_function(self, mesh, alpha=10, beta=5):
         )
 
         func_vec_space = fd.VectorFunctionSpace(self.mesh, "CG", 1)
-        uh_grad = fd.interpolate(fd.grad(self.u_cur), func_vec_space)
+        uh_grad = fd.assemble(interpolate(fd.grad(self.u_cur), func_vec_space))
         self.grad_norm.project(uh_grad[0] ** 2 + uh_grad[1] ** 2)
 
         # Normlize the hessian
@@ -631,7 +633,7 @@ def monitor_function_on_coarse_mesh(self, mesh, alpha=10, beta=5):
         )
 
         func_vec_space = fd.VectorFunctionSpace(mesh, "CG", 1)
-        uh_grad = fd.interpolate(fd.grad(self.u_cur), func_vec_space)
+        uh_grad = fd.assemble(interpolate(fd.grad(self.u_cur), func_vec_space))
         self.grad_norm.project(uh_grad[0] ** 2 + uh_grad[1] ** 2)
 
         # Normlize the hessian
@@ -749,7 +751,7 @@ def solve_problem(self, callback=None, fail_callback=None):
                     uh_fine.project(self.u_cur_fine)
 
                     func_vec_space = fd.VectorFunctionSpace(self.mesh, "CG", 1)
-                    uh_grad = fd.interpolate(fd.grad(self.uh), func_vec_space)
+                    uh_grad = fd.assemble(interpolate(fd.grad(self.uh), func_vec_space))
 
                     hessian = self.l2_projection
                     phi = adaptor.phi
diff --git a/script/build_burgers_square.py b/script/build_burgers_square.py
index ec5504d..14a2828 100644
--- a/script/build_burgers_square.py
+++ b/script/build_burgers_square.py
@@ -1,14 +1,11 @@
 # Author: Chunyang Wang
 # GitHub Username: chunyang-w
 
-import csv
-import os
-import random
-import shutil
 from argparse import ArgumentParser
 
 import firedrake as fd
 import matplotlib.pyplot as plt
+from build_helper import *
 
 import UM2N
 
@@ -67,158 +64,6 @@ def parse_arguments():
     return parser.parse_args()
 
 
-def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None):
-    """
-    Set up directories for storing data, plots, and logs.
-
-    Args:
-        base_dir (str): Base directory for the project.
-        parameters (dict): Dictionary of parameters, including "mesh_type" and "problem".
-            - "mesh_type" (int): Type of mesh used in the simulation (default: 0).
-            - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem").
-        subdirs (list, optional): List of subdirectories to create. Defaults to:
-            ["data", "plot", "log", "mesh", "mesh_fine"].
-            Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz".
-        dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders
-            matching keys in the `parameters` dictionary. Example:
-            "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}".
-            If not provided, raises a ValueError.
-
-    Returns:
-        dict: A dictionary mapping subdirectory names to their full paths.
-
-    Raises:
-        ValueError: If `dir_format` is not provided or is invalid.
-    """
-
-    # Define the project directory
-    if base_dir:
-        project_dir = os.path.abspath(base_dir)
-    else:
-        project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-
-    # QC:
-    print(f"Project Directory: {project_dir}")
-
-    # Define the dataset directory
-    dataset_dir = os.path.join(
-        project_dir, "data", f"dataset_meshtype_{mesh_type}", problem
-    )
-
-    # Use the provided format string for the problem-specific directory
-    if dir_format is None:
-        problem_specific_dir = os.path.join(
-            dataset_dir, f"{problem}_meshtype_{mesh_type}"
-        )
-    else:
-        # check if dir_format is a valid string format
-        if not isinstance(dir_format, str):
-            raise ValueError("dir_format must be a string.")
-        problem_specific_dir = os.path.join(dataset_dir, dir_format)
-
-    # Define default subdirectories if not provided
-    if subdirs is None:
-        subdirs = [
-            "data",
-            "plot",
-            "log",
-            "mesh",
-            "mesh_fine",
-            "plot_compare",
-            "train",
-            "test",
-            "val",
-        ]
-
-    # Create and clear directories
-    directories = {}
-    for subdir in subdirs:
-        dir_path = os.path.join(problem_specific_dir, subdir)
-        if not os.path.exists(dir_path):
-            os.makedirs(dir_path)
-        else:
-            # Clear the directory by removing all files
-            for file in os.listdir(dir_path):
-                os.remove(os.path.join(dir_path, file))
-        directories[subdir] = dir_path
-
-    # QC:
-    # print(f"Subdirectories created: {directories}")
-
-    return directories
-
-
-def output_csv(parameters, key_list, output_dir):
-    """
-    Write selected parameters to a CSV file.
-
-    Args:
-        parameters (dict): Dictionary of parameters to write.
-        key_list (list): List of keys to include in the CSV.
-        output_dir (str): Directory where the CSV file will be saved.
-    """
-    # Filter parameters based on key_list
-    csv_keys = [key for key in key_list if key in parameters]
-    csv_data = [parameters[key] for key in csv_keys]
-
-    # Define the output file path
-    csv_file_path = os.path.join(output_dir, "info.csv")
-
-    # Write to CSV
-    with open(csv_file_path, mode="w", newline="") as csvfile:
-        csv_writer = csv.writer(csvfile)
-        # Write header (keys)
-        csv_writer.writerow(csv_keys)
-        # Write data (values)
-        csv_writer.writerow(csv_data)
-
-    print(f"Parameters saved to {csv_file_path}")
-
-
-def move_data(target, source, start, num_files):
-    """
-    Move data files from the source directory to the target directory.
-
-    Args:
-        target (str): The path to the target directory.
-        source (str): The path to the source directory.
-        start (int): The starting index of the files to move.
-        num_files (int): The total number of files to move.
-
-    Raises:
-        FileNotFoundError: If the source directory does not exist.
-        ValueError: If the start index or num_files is invalid.
-    """
-    if not os.path.exists(source):
-        raise FileNotFoundError(f"Source directory '{source}' does not exist.")
-
-    if start < 0 or num_files <= 0:
-        raise ValueError("Invalid start index or number of files to move.")
-
-    # Create the target directory if it doesn't exist
-    if not os.path.exists(target):
-        os.makedirs(target)
-    else:
-        # Clear the target directory by removing all files
-        for file in os.listdir(target):
-            os.remove(os.path.join(target, file))
-
-    # Copy files sequentially starting from the specified index
-    for i in range(start, start + num_files):
-        try:
-            # Copy the data file
-            shutil.copy(
-                os.path.join(source, f"data_{i:04d}.npy"),
-                os.path.join(target, f"data_{i:04d}.npy"),
-            )
-        except FileNotFoundError:
-            print(f"File data_{i:04d}.npy not found in {source}. Skipping.")
-            continue
-        except Exception as e:
-            print(f"An error occurred while copying data_{i:04d}.npy: {e}")
-            continue
-
-
 def generate_mesh(parameters, directories):
     """Generate the mesh based on the specified type."""
     if parameters["mesh_type"] != 0:
diff --git a/script/build_helmholtz_poly.py b/script/build_helmholtz_poly.py
index a330680..caf9b77 100644
--- a/script/build_helmholtz_poly.py
+++ b/script/build_helmholtz_poly.py
@@ -1,15 +1,13 @@
 # Author: Chunyang Wang
 # GitHub Username: chunyang-w
-import csv
-import os
-import random
-import shutil
+
 import time
 from argparse import ArgumentParser
 
 import firedrake as fd
 import matplotlib.pyplot as plt
 import numpy as np
+from build_helper import *
 from firedrake.__future__ import interpolate
 
 import UM2N
@@ -58,177 +56,6 @@ def parse_arguments():
     return parser.parse_args()
 
 
-def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None):
-    """
-    Set up directories for storing data, plots, and logs.
-
-    Args:
-        base_dir (str): Base directory for the project.
-        parameters (dict): Dictionary of parameters, including "mesh_type" and "problem".
-            - "mesh_type" (int): Type of mesh used in the simulation (default: 0).
-            - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem").
-        subdirs (list, optional): List of subdirectories to create. Defaults to:
-            ["data", "plot", "log", "mesh", "mesh_fine"].
-            Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz".
-        dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders
-            matching keys in the `parameters` dictionary. Example:
-            "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}".
-            If not provided, raises a ValueError.
-
-    Returns:
-        dict: A dictionary mapping subdirectory names to their full paths.
-
-    Raises:
-        ValueError: If `dir_format` is not provided or is invalid.
-    """
-
-    # Define the project directory
-    if base_dir:
-        project_dir = os.path.abspath(base_dir)
-    else:
-        project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-
-    # QC:
-    print(f"Project Directory: {project_dir}")
-
-    # Define the dataset directory
-    dataset_dir = os.path.join(
-        project_dir, "data", f"dataset_meshtype_{mesh_type}", problem
-    )
-
-    # Use the provided format string for the problem-specific directory
-    if dir_format is None:
-        problem_specific_dir = os.path.join(
-            dataset_dir, f"{problem}_meshtype_{mesh_type}"
-        )
-    else:
-        # check if dir_format is a valid string format
-        if not isinstance(dir_format, str):
-            raise ValueError("dir_format must be a string.")
-        problem_specific_dir = os.path.join(dataset_dir, dir_format)
-
-    # Define default subdirectories if not provided
-    if subdirs is None:
-        subdirs = [
-            "data",
-            "plot",
-            "log",
-            "mesh",
-            "mesh_fine",
-            "plot_compare",
-            "train",
-            "test",
-            "val",
-        ]
-
-    # Create and clear directories
-    directories = {}
-    for subdir in subdirs:
-        dir_path = os.path.join(problem_specific_dir, subdir)
-        if not os.path.exists(dir_path):
-            os.makedirs(dir_path)
-        else:
-            # Clear the directory by removing all files
-            for file in os.listdir(dir_path):
-                os.remove(os.path.join(dir_path, file))
-        directories[subdir] = dir_path
-
-    # QC:
-    # print(f"Subdirectories created: {directories}")
-
-    return directories
-
-
-def output_csv(parameters, key_list, output_dir):
-    """
-    Write selected parameters to a CSV file.
-
-    Args:
-        parameters (dict): Dictionary of parameters to write.
-        key_list (list): List of keys to include in the CSV.
-        output_dir (str): Directory where the CSV file will be saved.
-    """
-    # Filter parameters based on key_list
-    csv_keys = [key for key in key_list if key in parameters]
-    csv_data = [parameters[key] for key in csv_keys]
-
-    # Define the output file path
-    csv_file_path = os.path.join(output_dir, "info.csv")
-
-    # Write to CSV
-    with open(csv_file_path, mode="w", newline="") as csvfile:
-        csv_writer = csv.writer(csvfile)
-        # Write header (keys)
-        csv_writer.writerow(csv_keys)
-        # Write data (values)
-        csv_writer.writerow(csv_data)
-
-
-def split_data(
-    source_dir,
-    train_dir,
-    test_dir,
-    val_dir,
-    train_ratio=0.75,
-    test_ratio=0.15,
-    val_ratio=0.1,
-):
-    """
-    Split files in a source directory into train, test, and validation directories.
-
-    Args:
-        source_dir (str): Path to the source directory containing files.
-        train_dir (str): Path to the train directory.
-        test_dir (str): Path to the test directory.
-        val_dir (str): Path to the validation directory.
-        train_ratio (float): Proportion of files to allocate to the train set.
-        test_ratio (float): Proportion of files to allocate to the test set.
-        val_ratio (float): Proportion of files to allocate to the validation set.
-
-    Raises:
-        ValueError: If the sum of train_ratio, test_ratio, and val_ratio is not 1.
-    """
-    # Validate ratios
-    if not (0 <= train_ratio <= 1 and 0 <= test_ratio <= 1 and 0 <= val_ratio <= 1):
-        raise ValueError("Ratios must be between 0 and 1.")
-    if train_ratio + test_ratio + val_ratio != 1:
-        raise ValueError(
-            "The sum of train_ratio, test_ratio, and val_ratio must equal 1."
-        )
-
-    # Get all files in the source directory
-    files = [
-        f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f))
-    ]
-    random.shuffle(files)  # Shuffle files for unbiased distribution
-
-    # QC:
-    # print(f'files {files}')
-
-    # Calculate split indices - preference train > test > val
-    total_files = len(files)
-    num_train = int(total_files * train_ratio)
-    num_test = max(int(total_files * test_ratio), total_files - num_train)
-    num_val = total_files - num_train - num_test
-
-    # Distribute files
-    train_files = files[:num_train]
-    test_files = files[num_train : num_train + num_test]
-    val_files = files[num_train + num_test :]
-
-    for datafiles, target_dir in zip(
-        [train_files, test_files, val_files], [train_dir, test_dir, val_dir]
-    ):
-        for datafile in datafiles:
-            shutil.copy(
-                os.path.join(source_dir, datafile), os.path.join(target_dir, datafile)
-            )
-
-    print(
-        f"Data split complete: {num_train} train, {num_test} test, {num_val} validation files."
-    )
-
-
 def process_features(parameters, directories):
     mesh_type = parameters["mesh_type"]
     scale_x = parameters["scale_x"]
diff --git a/script/build_helmholtz_square.py b/script/build_helmholtz_square.py
index 0a3452b..642e004 100644
--- a/script/build_helmholtz_square.py
+++ b/script/build_helmholtz_square.py
@@ -1,15 +1,12 @@
 # Author: Chunyang Wang
 # GitHub Username: chunyang-w
 
-import csv
-import os
-import random
-import shutil
 import time
 from argparse import ArgumentParser
 
 import firedrake as fd
 import matplotlib.pyplot as plt
+from build_helper import *
 from firedrake.__future__ import interpolate
 
 import UM2N
@@ -422,71 +419,6 @@ def output_csv(parameters, key_list, output_dir):
     print(f"Parameters saved to {csv_file_path}")
 
 
-def split_data(
-    source_dir,
-    train_dir,
-    test_dir,
-    val_dir,
-    train_ratio=0.75,
-    test_ratio=0.15,
-    val_ratio=0.1,
-):
-    """
-    Split files in a source directory into train, test, and validation directories.
-
-    Args:
-        source_dir (str): Path to the source directory containing files.
-        train_dir (str): Path to the train directory.
-        test_dir (str): Path to the test directory.
-        val_dir (str): Path to the validation directory.
-        train_ratio (float): Proportion of files to allocate to the train set.
-        test_ratio (float): Proportion of files to allocate to the test set.
-        val_ratio (float): Proportion of files to allocate to the validation set.
-
-    Raises:
-        ValueError: If the sum of train_ratio, test_ratio, and val_ratio is not 1.
-    """
-    # Validate ratios
-    if not (0 <= train_ratio <= 1 and 0 <= test_ratio <= 1 and 0 <= val_ratio <= 1):
-        raise ValueError("Ratios must be between 0 and 1.")
-    if train_ratio + test_ratio + val_ratio != 1:
-        raise ValueError(
-            "The sum of train_ratio, test_ratio, and val_ratio must equal 1."
-        )
-
-    # Get all files in the source directory
-    files = [
-        f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f))
-    ]
-    random.shuffle(files)  # Shuffle files for unbiased distribution
-
-    # QC:
-    # print(f'files {files}')
-
-    # Calculate split indices - preference train > test > val
-    total_files = len(files)
-    num_train = int(total_files * train_ratio)
-    num_test = max(int(total_files * test_ratio), total_files - num_train)
-    num_val = total_files - num_train - num_test
-
-    # Distribute files
-    train_files = files[:num_train]
-    test_files = files[num_train : num_train + num_test]
-    val_files = files[num_train + num_test :]
-
-    for datafiles, target_dir in zip(
-        [train_files, test_files, val_files], [train_dir, test_dir, val_dir]
-    ):
-        for datafile in datafiles:
-            shutil.copy(
-                os.path.join(source_dir, datafile), os.path.join(target_dir, datafile)
-            )
-
-    print(
-        f"Data split complete: {num_train} train, {num_test} test, {num_val} validation files."
-    )
-
-
 if __name__ == "__main__":
     # parse args
     args = parse_arguments()
diff --git a/script/build_helper.py b/script/build_helper.py
new file mode 100644
index 0000000..78ac0e2
--- /dev/null
+++ b/script/build_helper.py
@@ -0,0 +1,221 @@
+import csv
+import os
+import random
+import shutil
+
+
+def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None):
+    """
+    Set up directories for storing data, plots, and logs.
+
+    Args:
+        base_dir (str): Base directory for the project.
+        parameters (dict): Dictionary of parameters, including "mesh_type" and "problem".
+            - "mesh_type" (int): Type of mesh used in the simulation (default: 0).
+            - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem").
+        subdirs (list, optional): List of subdirectories to create. Defaults to:
+            ["data", "plot", "log", "mesh", "mesh_fine"].
+            Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz".
+        dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders
+            matching keys in the `parameters` dictionary. Example:
+            "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}".
+            If not provided, raises a ValueError.
+
+    Returns:
+        dict: A dictionary mapping subdirectory names to their full paths.
+
+    Raises:
+        ValueError: If `dir_format` is not provided or is invalid.
+    """
+
+    # Define the project directory
+    if base_dir:
+        project_dir = os.path.abspath(base_dir)
+    else:
+        project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+    # QC:
+    print(f"Project Directory: {project_dir}")
+
+    # Define the dataset directory
+    dataset_dir = os.path.join(
+        project_dir, "data", f"dataset_meshtype_{mesh_type}", problem
+    )
+
+    # Use the provided format string for the problem-specific directory
+    if dir_format is None:
+        problem_specific_dir = os.path.join(
+            dataset_dir, f"{problem}_meshtype_{mesh_type}"
+        )
+    else:
+        # check if dir_format is a valid string format
+        if not isinstance(dir_format, str):
+            raise ValueError("dir_format must be a string.")
+        problem_specific_dir = os.path.join(dataset_dir, dir_format)
+
+    # Define default subdirectories if not provided
+    if subdirs is None:
+        subdirs = [
+            "data",
+            "plot",
+            "log",
+            "mesh",
+            "mesh_fine",
+            "plot_compare",
+            "train",
+            "test",
+            "val",
+        ]
+
+    # Create and clear directories
+    directories = {}
+    for subdir in subdirs:
+        dir_path = os.path.join(problem_specific_dir, subdir)
+        if not os.path.exists(dir_path):
+            os.makedirs(dir_path)
+        else:
+            # Clear the directory by removing all files
+            for file in os.listdir(dir_path):
+                os.remove(os.path.join(dir_path, file))
+        directories[subdir] = dir_path
+
+    # QC:
+    # print(f"Subdirectories created: {directories}")
+
+    return directories
+
+
+def output_csv(parameters, key_list, output_dir):
+    """
+    Write selected parameters to a CSV file.
+
+    Args:
+        parameters (dict): Dictionary of parameters to write.
+        key_list (list): List of keys to include in the CSV.
+        output_dir (str): Directory where the CSV file will be saved.
+    """
+    # Filter parameters based on key_list
+    csv_keys = [key for key in key_list if key in parameters]
+    csv_data = [parameters[key] for key in csv_keys]
+
+    # Define the output file path
+    csv_file_path = os.path.join(output_dir, "info.csv")
+
+    # Write to CSV
+    with open(csv_file_path, mode="w", newline="") as csvfile:
+        csv_writer = csv.writer(csvfile)
+        # Write header (keys)
+        csv_writer.writerow(csv_keys)
+        # Write data (values)
+        csv_writer.writerow(csv_data)
+
+    print(f"Parameters saved to {csv_file_path}")
+
+
+def move_data(target, source, start, num_files):
+    """
+    Move data files from the source directory to the target directory.
+
+    Args:
+        target (str): The path to the target directory.
+        source (str): The path to the source directory.
+        start (int): The starting index of the files to move.
+        num_files (int): The total number of files to move.
+
+    Raises:
+        FileNotFoundError: If the source directory does not exist.
+        ValueError: If the start index or num_files is invalid.
+    """
+    if not os.path.exists(source):
+        raise FileNotFoundError(f"Source directory '{source}' does not exist.")
+
+    if start < 0 or num_files <= 0:
+        raise ValueError("Invalid start index or number of files to move.")
+
+    # Create the target directory if it doesn't exist
+    if not os.path.exists(target):
+        os.makedirs(target)
+    else:
+        # Clear the target directory by removing all files
+        for file in os.listdir(target):
+            os.remove(os.path.join(target, file))
+
+    # Copy files sequentially starting from the specified index
+    for i in range(start, start + num_files):
+        try:
+            # Copy the data file
+            shutil.copy(
+                os.path.join(source, f"data_{i:04d}.npy"),
+                os.path.join(target, f"data_{i:04d}.npy"),
+            )
+        except FileNotFoundError:
+            print(f"File data_{i:04d}.npy not found in {source}. Skipping.")
+            continue
+        except Exception as e:
+            print(f"An error occurred while copying data_{i:04d}.npy: {e}")
+            continue
+
+
+def split_data(
+    source_dir,
+    train_dir,
+    test_dir,
+    val_dir,
+    train_ratio=0.75,
+    test_ratio=0.15,
+    val_ratio=0.1,
+):
+    """
+    Split files in a source directory into train, test, and validation directories.
+
+    Args:
+        source_dir (str): Path to the source directory containing files.
+        train_dir (str): Path to the train directory.
+        test_dir (str): Path to the test directory.
+        val_dir (str): Path to the validation directory.
+        train_ratio (float): Proportion of files to allocate to the train set.
+        test_ratio (float): Proportion of files to allocate to the test set.
+        val_ratio (float): Proportion of files to allocate to the validation set.
+
+    Raises:
+        ValueError: If the sum of train_ratio, test_ratio, and val_ratio is not 1.
+    """
+    # Validate ratios
+    if not (0 <= train_ratio <= 1 and 0 <= test_ratio <= 1 and 0 <= val_ratio <= 1):
+        raise ValueError("Ratios must be between 0 and 1.")
+    if train_ratio + test_ratio + val_ratio != 1:
+        raise ValueError(
+            "The sum of train_ratio, test_ratio, and val_ratio must equal 1."
+        )
+
+    # Get all files in the source directory
+    files = [
+        f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f))
+    ]
+    random.shuffle(files)  # Shuffle files for unbiased distribution
+
+    # QC:
+    # print(f'files {files}')
+
+    # Calculate split indices - preference train > test > val
+    total_files = len(files)
+    num_train = int(total_files * train_ratio)
+    num_test = max(int(total_files * test_ratio), total_files - num_train)
+    num_val = total_files - num_train - num_test
+
+    # Distribute files
+    train_files = files[:num_train]
+    test_files = files[num_train : num_train + num_test]
+    val_files = files[num_train + num_test :]
+
+    for datafiles, target_dir in zip(
+        [train_files, test_files, val_files], [train_dir, test_dir, val_dir]
+    ):
+        for datafile in datafiles:
+            shutil.copy(
+                os.path.join(source_dir, datafile), os.path.join(target_dir, datafile)
+            )
+
+    print(
+        f"Data split complete: {num_train} train, {num_test} test, {num_val} validation files."
+    )
diff --git a/script/build_poisson_poly.py b/script/build_poisson_poly.py
index 714a4f7..5bf33cb 100644
--- a/script/build_poisson_poly.py
+++ b/script/build_poisson_poly.py
@@ -1,14 +1,11 @@
 # Author: Chunyang Wang
 # GitHub Username: chunyang-w
-import csv
-import os
-import random
-import shutil
 import time
 from argparse import ArgumentParser
 
 import firedrake as fd
 import matplotlib.pyplot as plt
+from build_helper import *
 from firedrake.__future__ import interpolate
 
 import UM2N
@@ -58,177 +55,6 @@ def parse_arguments():
     return parser.parse_args()
 
 
-def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None):
-    """
-    Set up directories for storing data, plots, and logs.
-
-    Args:
-        base_dir (str): Base directory for the project.
-        parameters (dict): Dictionary of parameters, including "mesh_type" and "problem".
-            - "mesh_type" (int): Type of mesh used in the simulation (default: 0).
-            - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem").
-        subdirs (list, optional): List of subdirectories to create. Defaults to:
-            ["data", "plot", "log", "mesh", "mesh_fine"].
-            Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz".
-        dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders
-            matching keys in the `parameters` dictionary. Example:
-            "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}".
-            If not provided, raises a ValueError.
-
-    Returns:
-        dict: A dictionary mapping subdirectory names to their full paths.
-
-    Raises:
-        ValueError: If `dir_format` is not provided or is invalid.
-    """
-
-    # Define the project directory
-    if base_dir:
-        project_dir = os.path.abspath(base_dir)
-    else:
-        project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-
-    # QC:
-    print(f"Project Directory: {project_dir}")
-
-    # Define the dataset directory
-    dataset_dir = os.path.join(
-        project_dir, "data", f"dataset_meshtype_{mesh_type}", problem
-    )
-
-    # Use the provided format string for the problem-specific directory
-    if dir_format is None:
-        problem_specific_dir = os.path.join(
-            dataset_dir, f"{problem}_meshtype_{mesh_type}"
-        )
-    else:
-        # check if dir_format is a valid string format
-        if not isinstance(dir_format, str):
-            raise ValueError("dir_format must be a string.")
-        problem_specific_dir = os.path.join(dataset_dir, dir_format)
-
-    # Define default subdirectories if not provided
-    if subdirs is None:
-        subdirs = [
-            "data",
-            "plot",
-            "log",
-            "mesh",
-            "mesh_fine",
-            "plot_compare",
-            "train",
-            "test",
-            "val",
-        ]
-
-    # Create and clear directories
-    directories = {}
-    for subdir in subdirs:
-        dir_path = os.path.join(problem_specific_dir, subdir)
-        if not os.path.exists(dir_path):
-            os.makedirs(dir_path)
-        else:
-            # Clear the directory by removing all files
-            for file in os.listdir(dir_path):
-                os.remove(os.path.join(dir_path, file))
-        directories[subdir] = dir_path
-
-    # QC:
-    # print(f"Subdirectories created: {directories}")
-
-    return directories
-
-
-def output_csv(parameters, key_list, output_dir):
-    """
-    Write selected parameters to a CSV file.
-
-    Args:
-        parameters (dict): Dictionary of parameters to write.
-        key_list (list): List of keys to include in the CSV.
-        output_dir (str): Directory where the CSV file will be saved.
-    """
-    # Filter parameters based on key_list
-    csv_keys = [key for key in key_list if key in parameters]
-    csv_data = [parameters[key] for key in csv_keys]
-
-    # Define the output file path
-    csv_file_path = os.path.join(output_dir, "info.csv")
-
-    # Write to CSV
-    with open(csv_file_path, mode="w", newline="") as csvfile:
-        csv_writer = csv.writer(csvfile)
-        # Write header (keys)
-        csv_writer.writerow(csv_keys)
-        # Write data (values)
-        csv_writer.writerow(csv_data)
-
-
-def split_data(
-    source_dir,
-    train_dir,
-    test_dir,
-    val_dir,
-    train_ratio=0.75,
-    test_ratio=0.15,
-    val_ratio=0.1,
-):
-    """
-    Split files in a source directory into train, test, and validation directories.
-
-    Args:
-        source_dir (str): Path to the source directory containing files.
-        train_dir (str): Path to the train directory.
-        test_dir (str): Path to the test directory.
-        val_dir (str): Path to the validation directory.
-        train_ratio (float): Proportion of files to allocate to the train set.
-        test_ratio (float): Proportion of files to allocate to the test set.
-        val_ratio (float): Proportion of files to allocate to the validation set.
-
-    Raises:
-        ValueError: If the sum of train_ratio, test_ratio, and val_ratio is not 1.
-    """
-    # Validate ratios
-    if not (0 <= train_ratio <= 1 and 0 <= test_ratio <= 1 and 0 <= val_ratio <= 1):
-        raise ValueError("Ratios must be between 0 and 1.")
-    if train_ratio + test_ratio + val_ratio != 1:
-        raise ValueError(
-            "The sum of train_ratio, test_ratio, and val_ratio must equal 1."
-        )
-
-    # Get all files in the source directory
-    files = [
-        f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f))
-    ]
-    random.shuffle(files)  # Shuffle files for unbiased distribution
-
-    # QC:
-    # print(f'files {files}')
-
-    # Calculate split indices - preference train > test > val
-    total_files = len(files)
-    num_train = int(total_files * train_ratio)
-    num_test = max(int(total_files * test_ratio), total_files - num_train)
-    num_val = total_files - num_train - num_test
-
-    # Distribute files
-    train_files = files[:num_train]
-    test_files = files[num_train : num_train + num_test]
-    val_files = files[num_train + num_test :]
-
-    for datafiles, target_dir in zip(
-        [train_files, test_files, val_files], [train_dir, test_dir, val_dir]
-    ):
-        for datafile in datafiles:
-            shutil.copy(
-                os.path.join(source_dir, datafile), os.path.join(target_dir, datafile)
-            )
-
-    print(
-        f"Data split complete: {num_train} train, {num_test} test, {num_val} validation files."
-    )
-
-
 def process_features(parameters, directories):
     # create mesh
     scale_x = parameters["scale_x"]
diff --git a/script/build_poisson_square.py b/script/build_poisson_square.py
index 7b4919f..0a8b549 100644
--- a/script/build_poisson_square.py
+++ b/script/build_poisson_square.py
@@ -1,14 +1,11 @@
 # Author: Chunyang Wang
 # GitHub Username: chunyang-w
-import csv
-import os
-import random
-import shutil
 import time
 from argparse import ArgumentParser
 
 import firedrake as fd
 import matplotlib.pyplot as plt
+from build_helper import *
 from firedrake.__future__ import interpolate
 
 import UM2N
@@ -57,177 +54,6 @@ def parse_arguments():
     return parser.parse_args()
 
 
-def setup_directories(problem, mesh_type, base_dir=None, subdirs=None, dir_format=None):
-    """
-    Set up directories for storing data, plots, and logs.
-
-    Args:
-        base_dir (str): Base directory for the project.
-        parameters (dict): Dictionary of parameters, including "mesh_type" and "problem".
-            - "mesh_type" (int): Type of mesh used in the simulation (default: 0).
-            - "problem" (str): Name of the problem (e.g., "burgers" or "helmholtz") (default: "default_problem").
-        subdirs (list, optional): List of subdirectories to create. Defaults to:
-            ["data", "plot", "log", "mesh", "mesh_fine"].
-            Additional subdirectories like "plot_compare", "train", "test", and "val" are added for "helmholtz".
-        dir_format (str, optional): Format string for the problem-specific directory. Must use placeholders
-            matching keys in the `parameters` dictionary. Example:
-            "lc={lc}_ngrid_{n_grid}_n={n_case}_{data_type}_{scheme}_meshtype_{mesh_type}".
-            If not provided, raises a ValueError.
-
-    Returns:
-        dict: A dictionary mapping subdirectory names to their full paths.
-
-    Raises:
-        ValueError: If `dir_format` is not provided or is invalid.
-    """
-
-    # Define the project directory
-    if base_dir:
-        project_dir = os.path.abspath(base_dir)
-    else:
-        project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-
-    # QC:
-    print(f"Project Directory: {project_dir}")
-
-    # Define the dataset directory
-    dataset_dir = os.path.join(
-        project_dir, "data", f"dataset_meshtype_{mesh_type}", problem
-    )
-
-    # Use the provided format string for the problem-specific directory
-    if dir_format is None:
-        problem_specific_dir = os.path.join(
-            dataset_dir, f"{problem}_meshtype_{mesh_type}"
-        )
-    else:
-        # check if dir_format is a valid string format
-        if not isinstance(dir_format, str):
-            raise ValueError("dir_format must be a string.")
-        problem_specific_dir = os.path.join(dataset_dir, dir_format)
-
-    # Define default subdirectories if not provided
-    if subdirs is None:
-        subdirs = [
-            "data",
-            "plot",
-            "log",
-            "mesh",
-            "mesh_fine",
-            "plot_compare",
-            "train",
-            "test",
-            "val",
-        ]
-
-    # Create and clear directories
-    directories = {}
-    for subdir in subdirs:
-        dir_path = os.path.join(problem_specific_dir, subdir)
-        if not os.path.exists(dir_path):
-            os.makedirs(dir_path)
-        else:
-            # Clear the directory by removing all files
-            for file in os.listdir(dir_path):
-                os.remove(os.path.join(dir_path, file))
-        directories[subdir] = dir_path
-
-    # QC:
-    # print(f"Subdirectories created: {directories}")
-
-    return directories
-
-
-def output_csv(parameters, key_list, output_dir):
-    """
-    Write selected parameters to a CSV file.
-
-    Args:
-        parameters (dict): Dictionary of parameters to write.
-        key_list (list): List of keys to include in the CSV.
-        output_dir (str): Directory where the CSV file will be saved.
-    """
-    # Filter parameters based on key_list
-    csv_keys = [key for key in key_list if key in parameters]
-    csv_data = [parameters[key] for key in csv_keys]
-
-    # Define the output file path
-    csv_file_path = os.path.join(output_dir, "info.csv")
-
-    # Write to CSV
-    with open(csv_file_path, mode="w", newline="") as csvfile:
-        csv_writer = csv.writer(csvfile)
-        # Write header (keys)
-        csv_writer.writerow(csv_keys)
-        # Write data (values)
-        csv_writer.writerow(csv_data)
-
-
-def split_data(
-    source_dir,
-    train_dir,
-    test_dir,
-    val_dir,
-    train_ratio=0.75,
-    test_ratio=0.15,
-    val_ratio=0.1,
-):
-    """
-    Split files in a source directory into train, test, and validation directories.
-
-    Args:
-        source_dir (str): Path to the source directory containing files.
-        train_dir (str): Path to the train directory.
-        test_dir (str): Path to the test directory.
-        val_dir (str): Path to the validation directory.
-        train_ratio (float): Proportion of files to allocate to the train set.
-        test_ratio (float): Proportion of files to allocate to the test set.
-        val_ratio (float): Proportion of files to allocate to the validation set.
-
-    Raises:
-        ValueError: If the sum of train_ratio, test_ratio, and val_ratio is not 1.
-    """
-    # Validate ratios
-    if not (0 <= train_ratio <= 1 and 0 <= test_ratio <= 1 and 0 <= val_ratio <= 1):
-        raise ValueError("Ratios must be between 0 and 1.")
-    if train_ratio + test_ratio + val_ratio != 1:
-        raise ValueError(
-            "The sum of train_ratio, test_ratio, and val_ratio must equal 1."
-        )
-
-    # Get all files in the source directory
-    files = [
-        f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f))
-    ]
-    random.shuffle(files)  # Shuffle files for unbiased distribution
-
-    # QC:
-    # print(f'files {files}')
-
-    # Calculate split indices - preference train > test > val
-    total_files = len(files)
-    num_train = int(total_files * train_ratio)
-    num_test = max(int(total_files * test_ratio), total_files - num_train)
-    num_val = total_files - num_train - num_test
-
-    # Distribute files
-    train_files = files[:num_train]
-    test_files = files[num_train : num_train + num_test]
-    val_files = files[num_train + num_test :]
-
-    for datafiles, target_dir in zip(
-        [train_files, test_files, val_files], [train_dir, test_dir, val_dir]
-    ):
-        for datafile in datafiles:
-            shutil.copy(
-                os.path.join(source_dir, datafile), os.path.join(target_dir, datafile)
-            )
-
-    print(
-        f"Data split complete: {num_train} train, {num_test} test, {num_val} validation files."
-    )
-
-
 def process_features(parameters, problem_data_dir):
     # create mesh
     scale_x = parameters["scale_x"]
diff --git a/script/build_swirl.py b/script/build_swirl.py
index 8b5dd25..f246c36 100644
--- a/script/build_swirl.py
+++ b/script/build_swirl.py
@@ -1,12 +1,10 @@
 # Author: Chunyang Wang
 # GitHub Username: chunyang-w
-import csv
-import os
-import shutil
 from argparse import ArgumentParser
 
 import firedrake as fd
 import matplotlib.pyplot as plt
+from build_helper import *
 
 import UM2N
 

From e4bd8e10b11acfdbdf791918fef4b1d595f9f914 Mon Sep 17 00:00:00 2001
From: acse-ej321 <89605848+acse-ej321@users.noreply.github.com>
Date: Wed, 16 Jul 2025 11:24:06 +0100
Subject: [PATCH 5/7] #70 add flow control for feature parameters

---
 UM2N/processor/processor.py | 46 ++++++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 16 deletions(-)

diff --git a/UM2N/processor/processor.py b/UM2N/processor/processor.py
index 485404a..40a26cb 100644
--- a/UM2N/processor/processor.py
+++ b/UM2N/processor/processor.py
@@ -234,16 +234,18 @@ def get_conv_feat(self, fix_reso_x=20, fix_reso_y=20):
             for j in range(len(conv_y_fix)):
                 # (x, y) conv_feat
                 conv_xy_fix[:, i, j] = np.array([conv_x_fix[i], conv_y_fix[j]])
-                conv_uh_fix[:, i, j] = self.raw_feature["uh"].at(
-                    [conv_x_fix[i], conv_y_fix[j]], tolerance=1e-3
-                )
+                if "uh" in self.raw_feature:
+                    conv_uh_fix[:, i, j] = self.raw_feature["uh"].at(
+                        [conv_x_fix[i], conv_y_fix[j]], tolerance=1e-3
+                    )
                 if "grad_uh_norm" in self.raw_feature:
                     conv_grad_uh_norm_fix[:, i, j] = self.raw_feature[
                         "grad_uh_norm"
                     ].at([conv_x_fix[i], conv_y_fix[j]], tolerance=1e-3)
-                conv_hessian_norm_fix[:, i, j] = self.raw_feature["hessian_norm"].at(
-                    [conv_x_fix[i], conv_y_fix[j]], tolerance=1e-3
-                )
+                if "hessian_norm" in self.raw_feature:
+                    conv_hessian_norm_fix[:, i, j] = self.raw_feature[
+                        "hessian_norm"
+                    ].at([conv_x_fix[i], conv_y_fix[j]], tolerance=1e-3)
                 conv_monitor_val_fix[:, i, j] = self.raw_feature["monitor_val"].at(
                     [conv_x_fix[i], conv_y_fix[j]], tolerance=1e-3
                 )
@@ -316,16 +318,6 @@ def to_train_data(self):
         np_data = {
             "x": self.x,
             "coord": self.coordinates,
-            "u": self.feature["uh"],
-            "grad_u": self.feature["grad_uh"],
-            "grad_u_norm": self.feature["grad_uh_norm"],
-            "hessian": self.feature["hessian"],
-            "phi": self.feature["phi"],
-            "grad_phi": self.feature["grad_phi"],
-            "hessian_norm": self.feature["hessian_norm"],
-            "jacobian": self.feature["jacobian"],
-            "jacobian_det": self.feature["jacobian_det"],
-            "monitor_val": self.feature["monitor_val"],
             "edge_index": self.edge_T,
             "edge_index_bi": self.edge_bi_T,
             "cluster_edges": None,  # this will be added if we use data_transform.py to add cluster edges  # noqa
@@ -365,6 +357,28 @@ def to_train_data(self):
             "swirl_params": self.swirl_params,
             "t": self.t,  # time step when solving burgers eq.
             "idx": self.idx,  # index number for picking params for burgers tracer.  # noqa
+            "u": self.feature["uh"] if "uh" in self.feature else None,
+            "grad_u": self.feature["grad_uh"] if "grad_uh" in self.feature else None,
+            "grad_u_norm": self.feature["grad_uh_norm"]
+            if "grad_uh_norm" in self.feature
+            else None,
+            "hessian": self.feature["hessian"] if "hessian" in self.feature else None,
+            "phi": self.feature["phi"] if "phi" in self.feature else None,
+            "grad_phi": self.feature["grad_phi"]
+            if "grad_phi" in self.feature
+            else None,
+            "hessian_norm": self.feature["hessian_norm"]
+            if "hessian_norm" in self.feature
+            else None,
+            "jacobian": self.feature["jacobian"]
+            if "jacobian" in self.feature
+            else None,
+            "jacobian_det": self.feature["jacobian_det"]
+            if "jacobian_det" in self.feature
+            else None,
+            "monitor_val": self.feature["monitor_val"]
+            if "monitor_val" in self.feature
+            else None,
             "f": self.feature["f"] if "f" in self.feature else None,
         }
         if "uh_adapt" in self.feature:  # currently only in swirl case

From 6da913b56e82f59a617f621ad16c2468a3a48a5a Mon Sep 17 00:00:00 2001
From: acse-ej321 <89605848+acse-ej321@users.noreply.github.com>
Date: Wed, 16 Jul 2025 13:20:35 +0100
Subject: [PATCH 6/7] #70 add build from monitor only

---
 script/build_monitor_dataset.py | 324 ++++++++++++++++++++++++++++++++
 1 file changed, 324 insertions(+)
 create mode 100644 script/build_monitor_dataset.py

diff --git a/script/build_monitor_dataset.py b/script/build_monitor_dataset.py
new file mode 100644
index 0000000..3bb2f64
--- /dev/null
+++ b/script/build_monitor_dataset.py
@@ -0,0 +1,324 @@
+import time
+from argparse import ArgumentParser
+
+import firedrake as fd
+import matplotlib.pyplot as plt
+import movement as mv
+from build_helper import *
+from matplotlib.colors import LogNorm
+
+import UM2N
+
+
+def parse_arguments():
+    """Parse command-line arguments."""
+    parser = ArgumentParser()
+    parser.add_argument(
+        "--mesh_type", type=int, default=2, help="Algorithm used to generate mesh"
+    )
+    parser.add_argument(
+        "--max_dist", type=int, default=6, help="Max number of distributions"
+    )
+    parser.add_argument(
+        "--n_dist", type=int, default=None, help="Number of distributions"
+    )
+    parser.add_argument(
+        "--lc", type=float, default=5e-2, help="Length characteristic of mesh elements"
+    )
+    parser.add_argument(
+        "--field_type", type=str, default="aniso", help="Data type (aniso/iso)"
+    )
+    parser.add_argument(
+        "--boundary_scheme",
+        type=str,
+        default="full",
+        help="Use padded scheme or full-scale scheme to sample central point of the bump (pad/full)",
+    )
+    parser.add_argument(
+        "--n_samples", type=int, default=100, help="Number of samples generated"
+    )
+    parser.add_argument("--rand_seed", type=int, default=63, help="Random seed")
+
+    parsed_args = parser.parse_args()
+
+    # Handle dependency between max_dist and n_dist
+    # max number of distributions used to generate the dataset
+    # only if n_dist is not set if n_dist is set, max_dist will be disabled
+    if parsed_args.n_dist is not None:
+        parsed_args.max_dist = None  # Disable max_dist if n_dist is set
+        print("Warning: max_dist is ignored because n_dist is set.")
+    # QC:
+    # print(parsed_args)
+
+    return parsed_args
+
+
+def create_mesh(i, mesh_type, lc, scale_x, problem_mesh_dir):
+    """
+    Generate a mesh for the given sample index.
+
+    Args:
+        i: The sample index.
+        mesh_type: The type of mesh to generate.
+        lc: The length characteristic of the mesh.
+        scale_x: The scale of the mesh.
+        problem_mesh_dir: Directory to save the generated mesh.
+
+    Returns:
+        The generated mesh.
+    """
+    if mesh_type != 0:
+        unstructured_square_mesh_gen = UM2N.UnstructuredSquareMeshGenerator(
+            scale=scale_x, mesh_type=mesh_type
+        )  # noqa
+        return unstructured_square_mesh_gen.generate_mesh(
+            res=lc,
+            output_filename=os.path.join(problem_mesh_dir, f"mesh_{i:04d}.msh"),
+        )
+    else:
+        n_grid = int(1 / lc)
+        return fd.UnitSquareMesh(n_grid, n_grid)
+
+
+def generate_monitor():
+    """
+    Generate a monitor function and its parameters.
+
+    Returns:
+        monitor_func: A Function(mesh) which returns a Firedrake Form of the monitor function eq
+        monitor_params: A dictionary containing the parameters used to generate the monitor.
+    """
+    # Generate random monitor parameters
+    # Note: These parameters are specific to the RingMonitor function
+    monitor_params = {
+        "centre": (
+            round(random.uniform(0.2, 0.8), 3),  # Random x-coordinate of the center
+            round(random.uniform(0.2, 0.8), 3),  # Random y-coordinate of the center
+        ),
+        "radius": round(random.uniform(0.1, 0.5), 3),  # Random radius
+        "amplitude": int(random.uniform(10, 100)),  # Random amplitude
+        "width": int(random.uniform(10, 200)),  # Random width
+    }
+
+    # Initialize the monitor function
+    # The monitor function is created using the RingMonitorBuilder from the movement library.
+    # To modify this function for a different monitor, replace.
+    mb = mv.RingMonitorBuilder(
+        centre=monitor_params["centre"],
+        radius=monitor_params["radius"],
+        amplitude=monitor_params["amplitude"],
+        width=monitor_params["width"],
+    )
+    # Get the monitor function as a Firedrake Form
+    monitor_func = mb.get_monitor()
+
+    return monitor_func, monitor_params
+
+
+def process_features(parameters, directories):
+    # ====  Create the mesh ======================
+    mesh = create_mesh(
+        i,
+        mesh_type=parameters["mesh_type"],
+        lc=parameters["lc"],
+        scale_x=parameters["scale_x"],
+        problem_mesh_dir=directories["mesh"],
+    )
+
+    # ====  Generate monitor ======================
+    monitor_func, monitor_params = generate_monitor()
+
+    # output specific parameters to csv file
+    output_csv(monitor_params, list(monitor_params.keys()), directories["log"])
+
+    # get projection of the monitor function for feature output
+    monitor_val = monitor_func(mesh)
+
+    # ====  Move the mesh ======================
+
+    # create Monge Ampere obj
+    mover = mv.MongeAmpereMover(
+        mesh, monitor_func, method="relaxation", rtol=1e-3, maxiter=500
+    )
+
+    start = time.perf_counter()
+
+    # move the mesh
+    mover.move()
+
+    # assign new_mesh
+    new_mesh = mover.mesh
+
+    # ====  Extract features from moved mesh ======================
+
+    # this is the jacobian of x with respect to xi
+    jacobian = fd.project(
+        fd.Identity(2) + mover.H, fd.TensorFunctionSpace(new_mesh, "CG", 1)
+    )
+    jacobian_det = fd.Function(fd.FunctionSpace(new_mesh, "CG", 1), name="jacobian_det")
+    jacobian_det.project(
+        jacobian[0, 0] * jacobian[1, 1] - jacobian[0, 1] * jacobian[1, 0]
+    )
+
+    # get phi/grad_phi projected to the original mesh
+    phi = mover.phi
+    grad_phi = mover.grad_phi
+
+    end = time.perf_counter()
+    dur = (end - start) * 1000
+
+    # ====  Process data for training ======================
+    mesh_processor = UM2N.MeshProcessor(
+        original_mesh=mesh,
+        optimal_mesh=new_mesh,
+        function_space=fd.FunctionSpace(new_mesh, "CG", 1),
+        use_4_edge=True,
+        feature={
+            "jacobian": jacobian.dat.data_ro.reshape(-1, 4),
+            "jacobian_det": jacobian_det.dat.data_ro.reshape(-1, 1),
+            "phi": phi.dat.data_ro.reshape(-1, 1),
+            "grad_phi": grad_phi.dat.data_ro.reshape(-1, 2),
+            "monitor_val": monitor_val.dat.data_ro.reshape(-1, 1),
+        },
+        raw_feature={
+            "monitor_val": monitor_val,
+            "jacobian": jacobian,
+            "jacobian_det": jacobian_det,
+        },
+        # dist_params=None, # When nothing passed the default is used
+    )
+
+    # save out data
+    mesh_processor.save_taining_data(os.path.join(directories["data"], f"data_{i:04d}"))
+
+    # ====  Plot mesh, solution, error ======================
+    rows, cols = 2, 2
+    cmap = "plasma"
+
+    fig, ax = plt.subplots(
+        rows, cols, figsize=(cols * 5, rows * 5), layout="compressed"
+    )
+
+    # Orginal low resolution uniform mesh
+    fd.triplot(mesh, axes=ax[0, 0])
+    ax[0, 0].set_title("Original uniform Mesh")
+    # Adapted mesh
+    fd.triplot(new_mesh, axes=ax[0, 1])
+    ax[0, 1].set_title(f"Adapted Mesh (MA): time taken {dur:.2f} ms")
+
+    # Monitor on high resolution mesh
+    fd.triplot(mesh, axes=ax[1, 0])
+    cb = fd.tripcolor(monitor_val, cmap=cmap, axes=ax[1, 0], alpha=0.5, norm=LogNorm())
+    ax[1, 0].set_title(
+        f'Monitor (c: {monitor_params["centre"]} r: {monitor_params["radius"]} a: {monitor_params["amplitude"]} w: {monitor_params["width"]} )'
+    )
+    plt.colorbar(cb)
+
+    # Monitor on high resolution mesh
+    fd.triplot(new_mesh, axes=ax[1, 1])
+    cb = fd.tripcolor(monitor_val, cmap=cmap, axes=ax[1, 1], alpha=0.5, norm=LogNorm())
+    ax[1, 1].set_title("Monitor overlayed with Adapted Mesh")
+    plt.colorbar(cb)
+
+    for rr in range(rows):
+        for cc in range(cols):
+            ax[rr, cc].set_aspect("equal", "box")
+
+    fig.savefig(os.path.join(directories["plot_compare"], f"plot_{i:04d}.png"))
+    plt.close()
+
+
+if __name__ == "__main__":
+    # parse args
+    args = parse_arguments()
+
+    # ====  Parameters ======================
+    parameters = {
+        # parameters for problem
+        "problem": "ring_monitor_test",
+        "lc": args.lc,
+        # parameters for mesh def
+        "n_samples": args.n_samples,
+        "data_type": args.field_type,
+        "scheme": args.boundary_scheme,
+        "mesh_type": int(args.mesh_type),
+        # parameters for domain scale
+        "scale_x": 1,
+        "scale_y": 1,
+        # parameters for data split
+        "p_train": 0.75,
+        "p_test": 0.15,
+        "p_val": 0.1,
+    }
+
+    # Set random seed
+    random.seed(args.rand_seed)
+
+    # ====  Setup Directories ======================
+    problem_specific_dir = "lc={}_n={}_{}_{}_meshtype_{}".format(
+        parameters["lc"],
+        parameters["n_samples"],
+        parameters["data_type"],
+        parameters["scheme"],
+        parameters["mesh_type"],
+    )
+
+    subdirs = [
+        "data",
+        "plot",
+        "plot_compare",
+        "log",
+        "mesh",
+        "mesh_fine",
+        "train",
+        "test",
+        "val",
+    ]
+
+    directories = setup_directories(
+        problem=parameters["problem"],
+        mesh_type=parameters["mesh_type"],
+        base_dir=None,
+        subdirs=subdirs,
+        dir_format=problem_specific_dir,
+    )
+
+    # ====  Output CSV ======================
+    key_list = [
+        "cmin",
+        "cmax",
+        "sigma_mean_scaler",
+        "sigma_sigma_scaler",
+        "sigma_eps" "data_type",
+        "scheme",
+        "n_samples",
+        "lc",
+        "mesh_type",
+    ]
+    output_csv(parameters, key_list, directories["log"])
+
+    # ====  Data Generation Scripts ======================
+    i = 0
+    while i < parameters["n_samples"]:
+        try:
+            print(f"Generating Sample: {i}")
+
+            # create dataset
+            process_features(parameters, directories)
+            i += 1
+
+        except fd.exceptions.ConvergenceError:
+            print(f"Iteration {i} did not converge.")
+            continue
+
+    # ====  Data Splits ============================================
+    # TODO: this should probably be done in the training script, not the build script
+    split_data(
+        source_dir=directories["data"],
+        train_dir=directories["train"],
+        test_dir=directories["test"],
+        val_dir=directories["val"],
+        train_ratio=parameters["p_train"],
+        test_ratio=parameters["p_test"],
+        val_ratio=parameters["p_val"],
+    )

From 04808da04d940b9c75ef4463702aafb7c81839f1 Mon Sep 17 00:00:00 2001
From: acse-ej321 <89605848+acse-ej321@users.noreply.github.com>
Date: Wed, 16 Jul 2025 13:24:33 +0100
Subject: [PATCH 7/7] #70 remove extra files

---
 script/ej321_helm_dataset_run.sh | 57 --------------------------------
 script/make_build_all_test.sh    | 15 ---------
 2 files changed, 72 deletions(-)
 delete mode 100644 script/ej321_helm_dataset_run.sh
 delete mode 100644 script/make_build_all_test.sh

diff --git a/script/ej321_helm_dataset_run.sh b/script/ej321_helm_dataset_run.sh
deleted file mode 100644
index f3803d1..0000000
--- a/script/ej321_helm_dataset_run.sh
+++ /dev/null
@@ -1,57 +0,0 @@
-
-
-###################### Training dataset generation ######################
-# Training data
-# Problem type: Helmholtz
-# Meshtype: 2
-# n_samples: 300
-# Random_seed: 63
-# Resolution: 0.05, 0.055
-
-# use 2 / 6 / 0
-mesh_type=(6)
-# training set build
-rand_seed=63
-lcs=(0.05 0.055)
-n_samples_train=(300)
-
-# helmholtz square case
-for mt in "${mesh_type[@]}"; do
-    for i in "${lcs[@]}"; do
-        for n_s in "${n_samples_train[@]}"; do
-            echo "lc = $i meshtype = $mt num samples = $n_s"
-            python build_helmholtz_square.py --lc=$i   --rand_seed=$rand_seed --n_samples=$n_s --field_type="aniso" --boundary_scheme="full" --mesh_type=$mt
-            # python ./script/build_helmholtz_square.py --lc=$i   --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="pad" --mesh_type=$mesh_type
-            # python ./script/build_helmholtz_square.py --lc=$i   --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="full" --mesh_type=$mesh_type
-            # python ./script/build_helmholtz_square.py --lc=$i   --rand_seed $rand_seed --n_samples $n_samples_train --field_type "aniso" --boundary_scheme "pad" --mesh_type=$mesh_type
-        done
-    done
-done
-###################### Training dataset generation ######################
-
-
-# ###################### Testing dataset generation ######################
-# # Test data
-# # Problem type: Helmholtz
-# # Meshtype: 0, 2, 6  
-# # n_samples: 100
-# # Random_seed: 42
-# # Resolution: 0.05, 0.055, 0.028
-
-# n_samples_train=101
-# rand_seed=42
-# mesh_types=(6 2 0)
-# # length character for polygon mesh
-# lcs=(0.05 0.028)
-
-# # helmholtz square case
-# for m in "${mesh_types[@]}"; do
-#     for i in "${lcs[@]}"; do
-#         echo "lc = $i Meshtype = $m"
-#         python ./script/build_helmholtz_square.py --lc=$i   --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m
-#         # python ./script/build_helmholtz_square.py --lc=$i   --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="pad" --mesh_type=$mesh_type
-#         # python ./script/build_helmholtz_square.py --lc=$i   --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="iso" --boundary_scheme="full" --mesh_type=$mesh_type
-#         # python ./script/build_helmholtz_square.py --lc=$i   --rand_seed $rand_seed --n_samples $n_samples_train --field_type "aniso" --boundary_scheme "pad" --mesh_type=$mesh_type
-#     done
-# done
-# ###################### Testing dataset generation ######################
\ No newline at end of file
diff --git a/script/make_build_all_test.sh b/script/make_build_all_test.sh
deleted file mode 100644
index a531450..0000000
--- a/script/make_build_all_test.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-mesh_types=(6 2 0)
-rand_seed=42
-n_samples_train=3
-
-for m in "${mesh_types[@]}"; do
-    python build_burgers_square.py --rand_seed=$rand_seed --n_case=$n_samples_train --mesh_type=$m
-
-    # python build_helmholtz_square.py  --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m
-    # python build_helmholtz_poly.py --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m
-    # python build_poisson_square.py --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m
-    # python build_poisson_poly.py --rand_seed=$rand_seed --n_samples=$n_samples_train --field_type="aniso" --boundary_scheme="full" --mesh_type=$m
-    # python build_burgers_square.py --rand_seed=$rand_seed --n_case=$n_samples_train --mesh_type=$m
-    # python build_swirl.py --lc=0.028 --alpha=1.5 --r_0 0.20 --x_0 0.3 --y_0 0.3 --n_monitor_smooth=10 --mesh_type=$m
-
-done
\ No newline at end of file