diff --git a/docs/source/pics/2d_multi_tree_pgs.png b/docs/source/pics/2d_multi_tree_pgs.png new file mode 100644 index 000000000..31452c1ae Binary files /dev/null and b/docs/source/pics/2d_multi_tree_pgs.png differ diff --git a/docs/source/pics/2d_tree_pgs.png b/docs/source/pics/2d_tree_pgs.png new file mode 100644 index 000000000..832d6a330 Binary files /dev/null and b/docs/source/pics/2d_tree_pgs.png differ diff --git a/docs/source/pics/3d_tree_pgs.png b/docs/source/pics/3d_tree_pgs.png new file mode 100644 index 000000000..49eb7d568 Binary files /dev/null and b/docs/source/pics/3d_tree_pgs.png differ diff --git a/examples/11_plurigaussian/07_tree_pgs.py b/examples/11_plurigaussian/07_tree_pgs.py new file mode 100644 index 000000000..2b49a42ba --- /dev/null +++ b/examples/11_plurigaussian/07_tree_pgs.py @@ -0,0 +1,133 @@ +""" +PGS through decision trees +-------------------------- + +In typical PGS workflows, the lithotype is defined through a spatial rule. A +more flexible approach can be taken such that the lithotype is represented as a +decision tree. More specifically, this is given as a binary tree, where each +node is a decision based on the values of the spatial random +fields [Sektnan et al., 2024](https://doi.org/10.1007/s11004-024-10162-5). The +leaf nodes are then assigned a discrete value which is given to the cell that +is being evaluated. Here, a simple example is provided showing how to use the +tree based approach in conducting plurigaussian simulation. + +As in the previous examples, we define the simulation domain and generate the +necessary spatial random fields. +""" + +import matplotlib.pyplot as plt +import numpy as np + +import gstools as gs + +dim = 2 + +# no. of cells in both dimensions +N = [150, 150] + +x = np.arange(N[0]) +y = np.arange(N[1]) + +model = gs.Gaussian(dim=dim, var=1, len_scale=10) +srf = gs.SRF(model) +field1 = srf.structured([x, y], seed=215253419) +field2 = srf.structured([x, y], seed=192534221) + +############################################################################### +# Decisions within the tree are carried out through user defined functions. +# In this way, the lithotype is defined in a continuous space, not relying on +# discretization. In this example we will use an ellipse as a decision boundary. +# The function accepts a data dictionary, which contains the values of the +# spatial random fields, and the parameters of the ellipse. + + +def ellipse(data, key1, key2, c1, c2, s1, s2, angle=0): + x, y = data[key1] - c1, data[key2] - c2 + + if angle: + theta = np.deg2rad(angle) + c, s = np.cos(theta), np.sin(theta) + x, y = x * c + y * s, -x * s + y * c + + return (x / s1) ** 2 + (y / s2) ** 2 <= 1 + + +############################################################################### +# The decision tree is defined as a dictionary, where each node is a dictionary +# itself. The root node is the first decision, which branches into two nodes, +# one for each possible outcome. The leaf nodes are the final decisions, which +# assign a discrete value to the given cell. The `func` key in each decision +# node contains the function to be called, and the `args` key contains the +# arguments to be passed to the function. These arguments must match the +# parameters of the function. The `yes_branch` and `no_branch` keys contain the +# names of the nodes to follow based on the outcome of the decision. `root` +# must be given, but all other node names are arbitrary. +# +# Here, we define a simple decision tree with two phases. The first node +# checks if the point is inside an ellipse, and if it is, it assigns the value +# 1 to the lithotype. If it is not, it assigns the value 0. The parameters +# of the ellipse are defined in the `args` dictionary. The keys `key1` and +# `key2` refer to the spatial random fields, which are used to define the +# ellipse. In the algorithm, the fields are indexed as `Z1`, `Z2`, etc., +# depending on the order in which they are passed to the PGS object. In this +# case, `Z1` refers to `field1` and `Z2` refers to `field2`. The parameters +# `c1`, `c2`, `s1`, `s2`, and `angle` define the center, scale, and rotation of +# the ellipse, respectively. +# +# Lithotype decision functions operate in the latent Gaussian domain [-3,3]×[-3,3]. +# Since each GRF is N(0,1), approximately 99.7% of its values lie within ±3σ, +# making ±3 a natural “full” range for defining splits or shapes. + +config = { + "root": { + "type": "decision", + "func": ellipse, + "args": { + "key1": "Z1", + "key2": "Z2", + "c1": 0, + "c2": 0, + "s1": 2.5, + "s2": 0.8, + "angle": -45, + }, + "yes_branch": "phase1", + "no_branch": "phase0", + }, + "phase1": {"type": "leaf", "action": 1}, + "phase0": {"type": "leaf", "action": 0}, +} + +############################################################################### +# With the tree configuration ready, we can create the PGS object as normal, +# passing our domain size and spatial random fields. When generating the +# plurigaussian field, we pass the tree configuration so GSTools knows which +# PGS process to follow. + +pgs = gs.PGS(dim, [field1, field2]) + +P = pgs(tree=config) + +############################################################################### +# We can also compute the equivalent spatial lithotype. +# +# NB: If we want to compute L before P, we must pass the tree config to +# `pgs.compute_lithotype(tree=config)`. + +L = pgs.compute_lithotype() + +############################################################################### +# Finally, we plot `P` as well as the equivalent spatial lithotype `L`. + +fig, axs = plt.subplots(1, 2, figsize=(10, 5), dpi=150) + +im0 = axs[0].imshow(L, cmap="copper", origin="lower") +axs[0].set_title("L") +im1 = axs[1].imshow(P, cmap="copper", origin="lower") +axs[1].set_title("P") + +############################################################################### +# +# .. image:: ../../pics/2d_tree_pgs.png +# :width: 400px +# :align: center diff --git a/examples/11_plurigaussian/08_multi_field_pgs.py b/examples/11_plurigaussian/08_multi_field_pgs.py new file mode 100644 index 000000000..3c3b58ee5 --- /dev/null +++ b/examples/11_plurigaussian/08_multi_field_pgs.py @@ -0,0 +1,117 @@ +""" +From bigaussian to plurigaussian simulation +-------------------------------------------------- + +In many PGS implementations, the dimensions of the simulation domain often +matches the number of fields that are supplied. However, this is not a +requirement of the PGS algorithm. In fact, it is possible to use multiple +spatial random fields in PGS, which can be useful for more complex lithotype +definitions. In this example, we will demonstrate how to use multiple SRFs in +PGS. In GSTools, this is done by utlising the tree based architecture. + +Typically, PGS in two dimensions is carried out as a bigaussian simulation, +where two random fields are used. Here, we will employ four. We begin by +setting up the simulation domain and generating the necessary random fields, +where the length scales of two of the fields are much larger than the other two. +""" + +import matplotlib.pyplot as plt +import numpy as np + +import gstools as gs + +dim = 2 + +N = [200, 200] + +x = np.arange(N[0]) +y = np.arange(N[1]) + +model = gs.Gaussian(dim=dim, var=1, len_scale=15) +srf = gs.SRF(model) +field1 = srf.structured([x, y], seed=201519) +field2 = srf.structured([x, y], seed=199221) +model = gs.Gaussian(dim=dim, var=1, len_scale=3) +srf = gs.SRF(model) +field3 = srf.structured([x, y], seed=1345134) +field4 = srf.structured([x, y], seed=1351455) + +############################################################################### +# As in the previous example, an ellipse is used as the decision boundary. + + +def ellipse(data, key1, key2, c1, c2, s1, s2, angle=0): + x, y = data[key1] - c1, data[key2] - c2 + + if angle: + theta = np.deg2rad(angle) + c, s = np.cos(theta), np.sin(theta) + x, y = x * c + y * s, -x * s + y * c + + return (x / s1) ** 2 + (y / s2) ** 2 <= 1 + + +############################################################################### +# The configuration dictionary for the decision tree is defined as before, but +# this time we pass the additional keys `Z3` and `Z4`, which refer to the +# additional fields `field3` and `field4`. The decision tree is structured in a +# way that the first decision node is based on the first two fields, and the +# second decision node is based on the last two fields. + +config = { + "root": { + "type": "decision", + "func": ellipse, + "args": { + "key1": "Z1", + "key2": "Z2", + "c1": 0.7, + "c2": 0.7, + "s1": 1.3, + "s2": 1.3, + }, + "yes_branch": "phase1", + "no_branch": "node1", + }, + "node1": { + "type": "decision", + "func": ellipse, + "args": { + "key1": "Z3", + "key2": "Z4", + "c1": -0.7, + "c2": -0.7, + "s1": 1.3, + "s2": 1.3, + "angle": 30, + }, + "yes_branch": "phase2", + "no_branch": "phase0", + }, + "phase2": {"type": "leaf", "action": 2}, + "phase1": {"type": "leaf", "action": 1}, + "phase0": {"type": "leaf", "action": 0}, +} + +############################################################################### +# Next, we create the PGS object, passing in all four fields, and generate the +# plurigaussian field `P` + +pgs = gs.PGS(dim, [field1, field2, field3, field4]) + +P = pgs(tree=config) + +############################################################################### +# Finally, we plot `P` +# +# NB: In the current implementation, the calculation of the equivalent spatial +# lithotype `L` is not supported for multiple fields + +plt.figure(figsize=(8, 6)) +plt.imshow(P, cmap="copper", origin="lower") + +############################################################################### +# +# .. image:: ../../pics/2d_multi_tree_pgs.png +# :width: 400px +# :align: center diff --git a/examples/11_plurigaussian/09_3d_tree_pgs.py b/examples/11_plurigaussian/09_3d_tree_pgs.py new file mode 100644 index 000000000..fa3d16d20 --- /dev/null +++ b/examples/11_plurigaussian/09_3d_tree_pgs.py @@ -0,0 +1,83 @@ +""" +Three dimensional PGS through decision trees +-------------------------------------------- + +Let's apply the decision tree approach to three dimensional PGS +""" + +import numpy as np + +import gstools as gs + +dim = 3 + +# no. of cells in all dimensions +N = [60] * dim + +x = np.arange(N[0]) +y = np.arange(N[1]) +z = np.arange(N[2]) + +############################################################################### +# As we want to generate a three dimensional field, we generate three spatial +# random fields (SRF) as our input. In this example, the number of fields +# is equal to the number of dimensions, but as before, this is not a +# requirement. + +model = gs.Gaussian(dim=dim, var=1, len_scale=[15, 15, 15]) +srf = gs.SRF(model) +field1 = srf.structured([x, y, z], seed=20277519) +field2 = srf.structured([x, y, z], seed=19727221) +field3 = srf.structured([x, y, z], seed=21145612) + +############################################################################### +# The decision tree will now utilise an ellipsoid as the decision boundary, +# having a similar structure as the ellipse in the previous example. + + +def ellipsoid(data, key1, key2, key3, c1, c2, c3, s1, s2, s3): + return ((data[key1] - c1) / s1) ** 2 + ((data[key2] - c2) / s2) ** 2 + ( + (data[key3] - c3) / s3 + ) ** 2 <= 1 + + +config = { + "root": { + "type": "decision", + "func": ellipsoid, + "args": { + "key1": "Z1", + "key2": "Z2", + "key3": "Z3", + "c1": 0, + "c2": 0, + "c3": 0, + "s1": 3, + "s2": 1, + "s3": 0.4, + }, + "yes_branch": "phase1", + "no_branch": "phase0", + }, + "phase0": {"type": "leaf", "action": 0}, + "phase1": {"type": "leaf", "action": 1}, +} + +############################################################################### +# As before, we initialise the PGS process, generate `P`, and plot to +# visualise the results + +import pyvista as pv + +pgs = gs.PGS(dim, [field1, field2, field3]) +P = pgs(tree=config) + +grid = pv.ImageData(dimensions=N) +grid.point_data["PGS"] = P.reshape(-1) +grid.threshold(0.5, scalars="PGS").plot() + +############################################################################### +# +# .. image:: ../../pics/3d_tree_pgs.png +# :width: 400px +# :align: center diff --git a/examples/11_plurigaussian/README.rst b/examples/11_plurigaussian/README.rst index 66ccafa65..7a9a0835b 100644 --- a/examples/11_plurigaussian/README.rst +++ b/examples/11_plurigaussian/README.rst @@ -1,14 +1,27 @@ -Plurigaussian Field Generation -============================== +Plurigaussian Simulation +======================== -Plurigaussian field simulations (PGS) are used to simulate correlated fields +Plurigaussian simulation (PGS) is used to simulate correlated fields of categorical data, e.g. lithofacies, hydrofacies, soil types, or cementitious materials. -PGS uses one spatial random field (SRF) per dimension, e.g. two SRFs, when -working with two dimensional data. Furthermore, PGS needs a field, which -describes the categorical data and its spatial relations. -This might sound more complicated then it is, as we will see in the following -examples. +In general, we define a categorical rule which dictates the relative +frequency and connectivity of the phases present in the final realisation. +We employ spatial random fields (SRFs) to map to this rule. +This mapping determines the phase of a given point in the simulation domain. +The definition of this rule limits how we can interact with it. +For example, the rule may be defined spatially (e.g. as an image or array) +or via a decision tree. Both forms will be explored in the following +examples, highlighting their differences. +Many PGS approaches constrain the number of input SRFs to match the +dimensions of the simulation domain. This constraint stems from the +implementation, not the method itself. +With a spatial lithotype, we perform bigaussian and trigaussian +simulations for two- and three-dimensional realisations, respectively. +In contrast, the tree-based approach allows an arbitrary number of SRFs, +yielding a fully *pluri*gaussian simulation. +This may sound more complicated than it is; we will clarify everything +in the examples that follow. + Examples -------- diff --git a/src/gstools/field/pgs.py b/src/gstools/field/pgs.py index 2ab7b226c..2921c9319 100644 --- a/src/gstools/field/pgs.py +++ b/src/gstools/field/pgs.py @@ -55,11 +55,6 @@ class PGS: def __init__(self, dim, fields): # hard to test for 1d case - if dim > 1: - if dim != len(fields): - raise ValueError( - "PGS: Mismatch between dim. and no. of fields." - ) for d in range(1, dim): if not fields[0].shape == fields[d].shape: raise ValueError("PGS: Not all fields have the same shape.") @@ -67,6 +62,8 @@ def __init__(self, dim, fields): self._fields = np.array(fields) self._lithotypes = None self._pos_lith = None + self._tree = None + self._field_names = [f"Z{i+1}" for i in range(len(self._fields))] try: self._mapping = np.stack(self._fields, axis=1) except np.AxisError: @@ -78,31 +75,145 @@ def __init__(self, dim, fields): else: raise - def __call__(self, lithotypes): - """Generate the plurigaussian field. + def __call__(self, lithotypes=None, tree=None): + """ + Generate the plurigaussian field via spatial lithotype or decision tree. + + Either a lithotype field or a decision tree config must be provided. + If `lithotypes` is given, map lithotype codes to the PGS via index + scaling. If `tree` is given, build and apply a DecisionTree to assign + phase labels. Parameters ---------- - lithotypes : :class:`numpy.ndarray` - A `dim` dimensional structured field, whose values are mapped to the PGS. - It does not have to have the same shape as the `fields`, as the indices are - automatically scaled. + lithotypes : :class:`numpy.ndarray`, optional + `dim`-dimensional structured lithotype field. Shape may differ from + `fields`, as indices are automatically scaled. Mutually exclusive + with `tree`. + tree : dict, optional + Configuration dict for constructing a DecisionTree. Must contain + node specifications. Mutually exclusive with `lithotypes`. + Returns ------- pgs : :class:`numpy.ndarray` - the plurigaussian field + Plurigaussian field array: either the mapped lithotype field or + the labels assigned by the decision tree, matching the simulation + domain. + + Raises + ------ + ValueError + If neither or both of `lithotypes` and `tree` are provided. + ValueError + If `lithotypes` shape does not match `dim` or number of `fields`. + ValueError + If `dim` != len(fields) when using `lithotypes`. """ - self._lithotypes = np.array(lithotypes) - if len(self._lithotypes.shape) != self._dim: - raise ValueError("PGS: Mismatch between dim. and facies shape.") - self._pos_lith = self.calc_lithotype_axes(self._lithotypes.shape) - P_dig = [] - for d in range(self._dim): - P_dig.append(np.digitize(self._mapping[:, d], self._pos_lith[d])) + if lithotypes is not None or tree is not None: + if lithotypes is not None: + if self._dim > 1: + if self._dim != len(self._fields): + raise ValueError( + "PGS: Mismatch between dim. and no. of fields." + ) + + self._lithotypes = np.array(lithotypes) + if len(self._lithotypes.shape) != self._dim: + raise ValueError( + "PGS: Mismatch between dim. and facies shape." + ) + self._pos_lith = self.calc_lithotype_axes( + self._lithotypes.shape + ) + P_dig = [] + for d in range(self._dim): + P_dig.append( + np.digitize(self._mapping[:, d], self._pos_lith[d]) + ) + # once Py3.11 has reached its EOL, we can drop the 1-tuple :-) + return self._lithotypes[(*P_dig,)] + + if tree is not None: + self._tree = self.DecisionTree(tree) + self._tree = self._tree.build_tree() + + coords_P = np.stack( + [ + self._fields[d].ravel() + for d in range(len(self._fields)) + ], + axis=1, + ) + labels_P = np.array( + [ + self._tree.decide(dict(zip(self._field_names, pt))) + for pt in coords_P + ] + ) + return labels_P.reshape(self._fields.shape[1:]) + + raise ValueError( + "PGS: Must provide exactly one of `lithotypes` or `tree`" + ) + + def compute_lithotype(self, tree=None): + """ + Compute lithotype from input SRFs using a decision tree. - # once Py3.11 has reached its EOL, we can drop the 1-tuple :-) - return self._lithotypes[(*P_dig,)] + If `self._tree` is not set, a tree configuration must be provided via + the `tree` argument. The method then builds or reuses the decision tree + and applies it to the coordinates of the plurigaussian fields to assign + a lithotype phase at each point. + + Parameters + ---------- + tree : dict or None, optional + Configuration for the decision tree. If None, `self._tree` must + already be defined. Defaults to None. + + Returns + ------- + lithotype : :class:`numpy.ndarray` + Discrete label array of shape equal to the simulation domain, + where each entry is the phase index determined by the tree. + + Raises + ------ + ValueError + If no decision tree is available or if `self._dim` does not equal + the number of provided fields. + """ + if self._tree is None and tree is None: + raise ValueError( + "PGS: Please provide a decision tree config or compute P to assemble" + ) + if self._tree is None and tree is not None: + self._tree = self.DecisionTree(tree) + self._tree = self._tree.build_tree() + + if self._dim == len(self._fields): + axes = [ + np.linspace(-3, 3, self._fields[0].shape[0]) + for _ in self._fields.shape[1:] + ] # works 2D 2 Fields + mesh = np.meshgrid(*axes, indexing="ij") + coords_L = np.stack([m.ravel() for m in mesh], axis=1) + labels_L = np.array( + [ + self._tree.decide(dict(zip(self._field_names, pt))) + for pt in coords_L + ] + ) + L_shape = tuple( + [self._fields.shape[1]] * len(self._fields.shape[1:]) + ) + L = labels_L.reshape(L_shape) + else: + raise ValueError("PGS: Only implemented for dim == len(fields)") + + return L def calc_lithotype_axes(self, lithotypes_shape): """Calculate the axes on which the lithorypes are defined. @@ -171,3 +282,174 @@ def transform_coords(self, lithotypes_shape, pos): ) ) return pos_trans + + class DecisionTree: + """ + Build and traverse a decision tree for assigning lithotype labels. + + This class constructs a tree of DecisionNode and LeafNode instances + from a configuration mapping. Once built, it can classify input data + by following the decision branches to a leaf action. + + Parameters + ---------- + config : dict + Mapping of node IDs to node specifications. Each entry must include: + - 'type': 'decision' or 'leaf' + - For decision nodes: + • 'func' (callable) and 'args' (dict) + • Optional 'yes_branch' and 'no_branch' keys naming other nodes + - For leaf nodes: + Notes + ----- + - Call `build_tree()` to link nodes and obtain the root before using + `decide()`. + - The tree is immutable once built; rebuild to apply a new config. + """ + + def __init__(self, config): + self._config = config + self._tree = None + + def build_tree(self): + """ + Construct the decision tree structure from the configuration. + + Iterates through the config to create DecisionNode and LeafNode + instances, then links decision nodes to their yes/no branches. + + Returns + ------- + root : DecisionNode or LeafNode + The root node of the constructed decision tree. + + Raises + ------ + KeyError + If 'root' is not defined in the configuration. + """ + nodes = {} + for node_id, details in self._config.items(): + if details["type"] == "decision": + nodes[node_id] = self.DecisionNode( + func=details["func"], args=details["args"] + ) + elif details["type"] == "leaf": + nodes[node_id] = self.LeafNode(details["action"]) + for node_id, details in self._config.items(): + if details["type"] == "decision": + nodes[node_id].yes_branch = nodes.get( + details.get("yes_branch") + ) + nodes[node_id].no_branch = nodes.get( + details.get("no_branch") + ) + + return nodes["root"] + + def decide(self, data): + """ + Traverse the built tree to make a decision for the given data. + + Parameters + ---------- + data : dict + A mapping of feature names to values, passed to decision functions + in each DecisionNode. + + Returns + ------- + result + The action value from the reached LeafNode, or None if a branch + is missing. + + Raises + ------ + ValueError + If the tree has not been built (i.e., `build_tree` not called). + """ + if self._tree: + return self._tree.decide(data) + raise ValueError("The decision tree has not been built yet.") + + class DecisionNode: # pylint: disable=too-few-public-methods + """ + Internal node that evaluates a condition and routes to child branches. + + A DecisionNode wraps a boolean function and two optional branches + (yes_branch and no_branch), which may be further DecisionNode or + LeafNode instances. + + Parameters + ---------- + func : callable + A function that evaluates a condition on the input data. + Must accept `data` as first argument and keyword args. + args : dict + Keyword arguments to pass to `func` when called. + yes_branch : DecisionNode or LeafNode, optional + Node to traverse if `func(data, **args)` returns True. + no_branch : DecisionNode or LeafNode, optional + Node to traverse if `func(data, **args)` returns False. + """ + + def __init__(self, func, args, yes_branch=None, no_branch=None): + self.func = func + self.args = args + self.yes_branch = yes_branch + self.no_branch = no_branch + + def decide(self, data): + """ + Evaluate the decision function and traverse to the next node. + + Parameters + ---------- + data : dict + Feature mapping passed to `func`. + + Returns + ------- + result + The outcome of the subsequent node's `decide` method, or None + if the respective branch is not set. + """ + if self.func(data, **self.args): + return ( + self.yes_branch.decide(data) + if self.yes_branch + else None + ) + return self.no_branch.decide(data) if self.no_branch else None + + class LeafNode: # pylint: disable=too-few-public-methods + """ + Terminal node that returns a stored action when reached. + + A LeafNode represents the outcome of a decision path. Its `action` + value is returned directly by `decide()` + + Parameters + ---------- + action : any + The value to return when this leaf is reached. + """ + + def __init__(self, action): + self.action = action + + def decide(self, _data): + """ + Return the leaf action, terminating the traversal. + + Parameters + ---------- + data : dict + Ignored input data. + + Returns + ------- + action : any + The action value stored in this leaf. + """ + return self.action