Skip to content

GPU material law integration strategy #30

@mark-hobbs

Description

@mark-hobbs
@cuda.jit(device=True)
def linear_gpu(s, d, sc):
    """
    Linear constitutive model
    """

    d_temp = 0.0

    if s < sc:
        d_temp = 0.0

    elif s >= sc:
        d_temp = 1.0

    # Bond softening factor can only increase (damage is irreversible)
    if d_temp > d:
        d = d_temp

    return d


def make_material_law(sc):
    """
    Function factory: proof of concept

    Further work is required to define the integration strategy
    """
    sc_d = cuda.to_device(sc)

    @cuda.jit
    def material_law_kernel(s, d, sc):
        """
        Material law (calculate bond damage)
        """
        node_i = cuda.blockIdx.x
        thread_id = cuda.threadIdx.x
        return linear_gpu(s, d, sc[node_i, thread_id])

    def material_law(s, d):
        THREADS_PER_BLOCK = 256
        BLOCKS_PER_GRID = (s.size + THREADS_PER_BLOCK - 1) // THREADS_PER_BLOCK
        material_law_kernel[BLOCKS_PER_GRID, THREADS_PER_BLOCK](s, d, sc_d)

    return material_law


def compute_nodal_forces_gpu(
    node_force, x, u, cell_volume, nlist, d, c, surface_correction_factors
):
    """
    Compute particle forces (gpu optimised)
    """
    BLOCKS_PER_GRID = x.shape[0]
    compute_nodal_forces_kernel[BLOCKS_PER_GRID, THREADS_PER_BLOCK](
        node_force,
        x,
        u,
        cell_volume,
        nlist,
        d,
        c,
        surface_correction_factors,
    )


def make_compute_nodal_forces_kernel(material_law):

    @cuda.jit
    def compute_nodal_forces_kernel(
        node_force, x, u, cell_volume, nlist, d, c, surface_correction_factors
    ):
        """
        One block per node approach
        """

        shared_x = cuda.shared.array(THREADS_PER_BLOCK, dtype=node_force.dtype)
        shared_y = cuda.shared.array(THREADS_PER_BLOCK, dtype=node_force.dtype)

        node_i = cuda.blockIdx.x
        thread_id = cuda.threadIdx.x
        max_n_family_members = nlist.shape[1]

        val_x = 0.0
        val_y = 0.0

        if thread_id < max_n_family_members:
            node_j = nlist[node_i, thread_id]

            if node_j == -1 or node_j == node_i:
                val_x = 0.0
                val_y = 0.0
            else:
                xi_x = x[node_j, 0] - x[node_i, 0]
                xi_y = x[node_j, 1] - x[node_i, 1]

                xi_eta_x = xi_x + (u[node_j, 0] - u[node_i, 0])
                xi_eta_y = xi_y + (u[node_j, 1] - u[node_i, 1])

                xi = math.sqrt(xi_x**2 + xi_y**2)
                y = math.sqrt(xi_eta_x**2 + xi_eta_y**2)
                stretch = (y - xi) / xi

                d[node_i, thread_id] = material_law(stretch, d[node_i, thread_id])

                f = (
                    stretch
                    * c[node_i, thread_id]
                    * (1 - d[node_i, thread_id])
                    * cell_volume
                    * surface_correction_factors[node_i, thread_id]
                )

                val_x = f * xi_eta_x / y
                val_y = f * xi_eta_y / y

        shared_x[thread_id] = val_x
        shared_y[thread_id] = val_y

        cuda.syncthreads()

        # Reduction
        stride = THREADS_PER_BLOCK // 2
        while stride > 0:
            if thread_id < stride:
                shared_x[thread_id] += shared_x[thread_id + stride]
                shared_y[thread_id] += shared_y[thread_id + stride]
            cuda.syncthreads()
            stride //= 2

        if thread_id == 0:
            node_force[node_i, 0] = shared_x[0]
            node_force[node_i, 1] = shared_y[0]

    return compute_nodal_forces_kernel

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions