Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def des_tables(
table3: FloatField,
table4: FloatField,
):
with computation(FORWARD), interval(...):
with computation(FORWARD), interval(0, -1):
des1 = max(0.0, table1[0, 0, 1] - table1)
des2 = max(0.0, table2[0, 0, 1] - table2)
des3 = max(0.0, table3[0, 0, 1] - table3)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,6 @@ def calculate_derived_states(
"""
Computes derived state fields required for the rest of the GFDL single moment
microphysics module.

This stencil MUST be built using Z_INTERFACE_DIM to function properly.
"""
from __externals__ import k_end

Expand Down Expand Up @@ -377,7 +375,6 @@ def __call__(
dsnow_dt=dsnowdt_macro,
dgraupel_dt=dgraupeldt_macro,
)

self._calculate_derived_states(
p_interface=p_interface,
p_interface_mb=local_p_interface_mb,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from __future__ import annotations

from functools import lru_cache
from math import prod
from types import ModuleType
from typing import List, Optional, Tuple, TypeAlias
from typing import Tuple, TypeAlias

import cffi
import numpy as np
import numpy.lib as npl
import numpy.typing as npt
from _cffi_backend import _CDataBase as CFFIObj

Expand Down Expand Up @@ -34,9 +36,19 @@ def __exit__(self, exc_type, exc_value, traceback):
pass


@lru_cache
def _compute_fortran_strides(shape: Tuple[int, ...], dtype: npt.DTypeLike) -> tuple[int, ...]:
cshp = np.cumprod(shape)
return np.concatenate(([1], cshp[:-1])) * np.dtype(dtype).itemsize


class FortranPythonConversion:
"""
Convert Fortran arrays to NumPy and vice-versa
Convert Fortran arrays to NumPy and vice-versa.

The python memory will be laid out Fortran-like,
e.g. column-order,
e.g. 3D strides will be [1, dim0, dim0*dim1].
"""

def __init__(
Expand Down Expand Up @@ -81,136 +93,97 @@ def device_sync(self):
def _fortran_to_numpy(
self,
fptr: cffi.FFI.CData,
dim: Optional[List[int]] = None,
dims: list[int] | None,
) -> np.ndarray:
"""
Input: Fortran data pointed to by fptr and of shape dim = (i, j, k)
Output: C-ordered double precision NumPy data of shape (i, j, k)
Output: F-ordered NumPy data of shape (i, j, k) - strides are the same as input
"""
if not dim:
dim = [self._npx, self._npy, self._npz]
if not dims:
dims = [self._npx, self._npy, self._npz]
ftype = self._ffi.getctype(self._ffi.typeof(fptr).item)
if ftype not in self._TYPEMAP:
raise ValueError(f"Fortran Python memory converter: cannot convert type {ftype}")
return np.frombuffer(
self._ffi.buffer(fptr, prod(dim) * self._ffi.sizeof(ftype)),
self._ffi.buffer(fptr, prod(dims) * self._ffi.sizeof(ftype)),
self._TYPEMAP[ftype],
)

def _upload_and_transform(
self,
host_array: np.ndarray,
dim: Optional[List[int]] = None,
swap_axes: Optional[Tuple[int, int]] = None,
dims: list[int] | None,
) -> DeviceArray:
"""Upload to device & transform to Pace compatible layout"""
with self._current_stream:
device_array = cp.asarray(host_array)
final_array = self._transform_from_fortran_layout(
device_array,
dim,
swap_axes,
)
self._current_stream = (
self._stream_A if self._current_stream == self._stream_B else self._stream_B
)
return final_array
device_array = cp.asarray(host_array)
final_array = self._transform_from_fortran_layout(device_array, dims)
self._current_stream = self._stream_A if self._current_stream == self._stream_B else self._stream_B
return final_array

def _transform_from_fortran_layout(
self,
array: PythonArray,
dim: Optional[List[int]] = None,
swap_axes: Optional[Tuple[int, int]] = None,
dims: list[int] | None,
) -> PythonArray:
"""Transform from Fortran layout into a Pace compatible layout"""
if not dim:
dim = [self._npx, self._npy, self._npz]
trf_array = array.reshape(tuple(reversed(dim))).transpose().astype(Float)
if swap_axes:
trf_array = self._target_np.swapaxes(
trf_array,
swap_axes[0],
swap_axes[1],
)
if not dims:
dims = [self._npx, self._npy, self._npz]
trf_array = npl.stride_tricks.as_strided(
array,
shape=dims,
strides=_compute_fortran_strides(tuple(dims), Float),
)
return trf_array

def fortran_to_python(
self,
fptr: cffi.FFI.CData,
dim: Optional[List[int]] = None,
swap_axes: Optional[Tuple[int, int]] = None,
dims: list[int] | None,
*,
allow_device_transfer: bool = True,
) -> PythonArray:
"""Move fortran memory into python space"""
np_array = self._fortran_to_numpy(fptr, dim)
if self._python_targets_gpu:
return self._upload_and_transform(np_array, dim, swap_axes)
"""Move fortran memory into python space."""
np_array = self._fortran_to_numpy(fptr, dims)
if allow_device_transfer and self._python_targets_gpu:
return self._upload_and_transform(np_array, dims)
else:
return self._transform_from_fortran_layout(
np_array,
dim,
swap_axes,
dims,
)

def _transform_and_download(
self,
device_array: DeviceArray,
dtype: type,
swap_axes: Optional[Tuple[int, int]] = None,
) -> np.ndarray:
with self._current_stream:
if swap_axes:
device_array = cp.swapaxes(
device_array,
swap_axes[0],
swap_axes[1],
)
host_array = cp.asnumpy(
device_array.astype(dtype).flatten(order="F"),
)
self._current_stream = (
self._stream_A if self._current_stream == self._stream_B else self._stream_B
)
return host_array
def _transform_and_download(self, device_array: DeviceArray, dtype: type) -> np.ndarray:
host_array = cp.asnumpy(device_array.astype(dtype).flatten(order="F"))
self._current_stream = self._stream_A if self._current_stream == self._stream_B else self._stream_B
return host_array

def _transform_from_python_layout(
self,
array: PythonArray,
dtype: type,
swap_axes: Optional[Tuple[int, int]] = None,
) -> np.ndarray:
"""Copy back a numpy array in python layout to Fortran"""

if self._python_targets_gpu:
numpy_array = self._transform_and_download(array, dtype, swap_axes)
numpy_array = self._transform_and_download(array, dtype)
else:
numpy_array = array.astype(dtype).flatten(order="F")
if swap_axes:
numpy_array = np.swapaxes(
numpy_array,
swap_axes[0],
swap_axes[1],
)
return numpy_array

def python_to_fortran(
self,
array: PythonArray,
fptr: cffi.FFI.CData,
ptr_offset: int = 0,
swap_axes: Optional[Tuple[int, int]] = None,
) -> None:
"""
Input: Fortran data pointed to by fptr and of shape dim = (i, j, k)
Output: C-ordered double precision NumPy data of shape (i, j, k)
Output: F-ordered NumPy data of shape (i, j, k)
"""
ftype = self._ffi.getctype(self._ffi.typeof(fptr).item)
assert ftype in self._TYPEMAP
dtype = self._TYPEMAP[ftype]
numpy_array = self._transform_from_python_layout(
array,
dtype,
swap_axes,
)
numpy_array = self._transform_from_python_layout(array, dtype)
self._ffi.memmove(fptr + ptr_offset, numpy_array, 4 * numpy_array.size)

def cast(self, dtype: npt.DTypeLike, void_ptr: CFFIObj) -> CFFIObj:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from ndsl import QuantityFactory, StencilFactory
from ndsl.constants import X_DIM, Y_DIM, Z_INTERFACE_DIM
from ndsl.dsl.gt4py_utils import is_gpu_backend
from pyMoist.GFDL_1M import GFDL1M, GFDL1MConfig, GFDL1MState
from pyMoist.interface import (
InterfaceTransferType,
Expand All @@ -16,16 +15,15 @@


class GFDL1MInterface:
def __init__(self, quantity_factory: QuantityFactory, stencil_factory: StencilFactory) -> None:
def __init__(
self,
quantity_factory: QuantityFactory,
stencil_factory: StencilFactory,
transfer_type: InterfaceTransferType,
) -> None:
self._stencil_factory = stencil_factory
self._quantity_factory = quantity_factory
self._gfdl_1m: GFDL1M | None = None

if is_gpu_backend(quantity_factory.backend):
transfer_type = InterfaceTransferType.CPU_TO_GPU_TO_CPU
else:
transfer_type = InterfaceTransferType.ALL_CPU

self._managed_state = MAPLManagedState(
GFDL1MState.zeros(quantity_factory),
transfer_type,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,19 @@

from ndsl import State
from ndsl.constants import X_DIM, Y_DIM, Z_DIM, Float
from ndsl.optional_imports import cupy as cp
from pyMoist.interface import InterfaceTransferType
from pyMoist.interface.mapl.memory_factory import MAPLMemoryRepository


class MAPLManagedState:
"""Manage a NDSL <> MAPL shared state by linking MAPL pointers to NDSL state fields"""

def __init__(self, py_state: State, transfer_type: InterfaceTransferType) -> None:
def __init__(
self,
py_state: State,
transfer_type: InterfaceTransferType,
) -> None:
self._ndsl_state = py_state
self._state_to_mapl_mapping: dict[str, Tuple[MAPLMemoryRepository, str]] = {}
self._transfer_type = transfer_type
Expand Down Expand Up @@ -62,10 +67,16 @@ def _pull_from_fortran(
)
else:
mapl_array = mapl_state_.get_from_fortran(mapl_field_)
if self._transfer_type == InterfaceTransferType.CPU_TO_GPU_TO_CPU:
cp.cuda.runtime.deviceSynchronize()
if mapl_array is None:
setattr(ndsl_state_, ndsl_field_, None)
else:
elif self._transfer_type == InterfaceTransferType.CPU_COPY:
getattr(ndsl_state_, ndsl_field_).field[:] = mapl_array[:]
elif self._transfer_type == InterfaceTransferType.CPU_MAP:
getattr(ndsl_state_, ndsl_field_).data = mapl_array
else:
raise ValueError("Transfer type unknown for Fortran/NDSL")

for ndsl_field, (mapl_state, mapl_field) in self._state_to_mapl_mapping.items():
try:
Expand All @@ -77,6 +88,10 @@ def _pull_from_fortran(
def ndsl_to_fortran(self) -> None:
"""Copy all Python memory back in Fortan"""

# Skip sending back - we are mapped
if self._transfer_type == InterfaceTransferType.CPU_MAP:
return

def _push_back_to_fortran(
mapl_field_: str,
mapl_state_: MAPLMemoryRepository,
Expand All @@ -93,10 +108,18 @@ def _push_back_to_fortran(
)
else:
mapl_array = mapl_state_.get_from_fortran(mapl_field_)
if mapl_array is not None:
if mapl_array is None:
pass
elif self._transfer_type == InterfaceTransferType.CPU_COPY:
ndsl_array = getattr(ndsl_state_, ndsl_field_).field[:]
mapl_array[:] = ndsl_array[:]
mapl_state_.send_to_fortran(mapl_field_)
elif self._transfer_type == InterfaceTransferType.CPU_MAP:
raise RuntimeError("Coding issue. We should never send back mapped data")
else:
raise ValueError("Transfer type unknown for NDSL/Fortran")

for ndsl_field, (mapl_state, mapl_field) in self._state_to_mapl_mapping.items():
_push_back_to_fortran(mapl_field, mapl_state, ndsl_field, self._ndsl_state)
if self._transfer_type == InterfaceTransferType.CPU_TO_GPU_TO_CPU:
cp.cuda.runtime.deviceSynchronize()
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class FortranMemory:
"""Is the pointer properly associated in Fortran"""
shape: tuple[int, ...]
"""Shape of the array"""
python_array: npt.NDArray
python_array: npt.NDArray | None
"""Synced python memory, check dirty"""
dirty: bool = False
"""Does the memory need to be synced"""
Expand Down Expand Up @@ -71,24 +71,33 @@ def register(
pointer=cast_ptr,
associated=is_associated,
shape=self._quantity_factory.sizer.get_extent(dims),
python_array=np.empty((0)),
python_array=None,
)

def get_from_fortran(
self,
name: str,
) -> npt.NDArray:
*,
allow_device_transfer: bool = True,
) -> npt.NDArray | None:
"""Retrieve the data from Fortran. Prefer using a MAPLManager."""
try:
fmem = self._fortran_pointers[name]
except KeyError:
raise KeyError(f"Pointer {name} was never registered.")

if not fmem.associated:
return
return None

# We rely here on `fmem.pointer` constant, therefore we could
# go ahead an _not_ re-map. We don't because we want to introduce
# a ZERO TRUST mode where we don't rely on Fortran being constant
# return fmem.python_array

fmem.python_array = self._f_py_converter.fortran_to_python(
fptr=fmem.pointer,
dim=list(fmem.shape),
dims=list(fmem.shape),
allow_device_transfer=allow_device_transfer,
)

return fmem.python_array
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ class MemorySpace(enum.Enum):


class InterfaceTransferType(enum.Enum):
ALL_CPU = enum.auto()
CPU_COPY = enum.auto() # Copies because of layout mismatch
CPU_MAP = enum.auto() # No copy - memory map - same layout
CPU_TO_GPU_TO_CPU = enum.auto()
Loading