Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions src/matvis/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from pathlib import Path
from pyuvdata import UVBeam
from pyuvsim import AnalyticBeam, simsetup
from submatrices import *
from typing import Literal

from matvis import DATA_PATH, HAVE_GPU, coordinates, cpu, simulate_vis
Expand Down Expand Up @@ -78,6 +79,7 @@ def run_profile(
naz=360,
nza=180,
pairs=None,
matsets=None,
nchunks=1,
source_buffer=1.0,
):
Expand Down Expand Up @@ -136,6 +138,7 @@ def run_profile(
beam_idx=beam_idx,
matprod_method=f"{'GPU' if gpu else 'CPU'}{method}",
antpairs=pairs,
matsets=matsets,
min_chunks=nchunks,
source_buffer=source_buffer,
)
Expand Down Expand Up @@ -278,8 +281,9 @@ def get_redundancies(bls, ndecimals: int = 2):
)
@click.option("-k", "--keep-ants", type=str, default="")
@click.option("--outriggers/--no-outriggers", default=False)
@click.option("-msm", "--matset-method", type=str, default="")
@add_common_options
def hera_profile(hex_num, nside, keep_ants, outriggers, **kwargs):
def hera_profile(hex_num, nside, keep_ants, outriggers, matset_method, **kwargs):
"""Run profiling of matvis with a HERA-like array."""
from py21cmsense.antpos import hera

Expand All @@ -291,7 +295,16 @@ def hera_profile(hex_num, nside, keep_ants, outriggers, **kwargs):
bls = antpos[np.newaxis, :, :2] - antpos[:, np.newaxis, :2]
pairs = np.array(get_redundancies(bls.value))

run_profile(nsource=12 * nside**2, nants=antpos.shape[0], pairs=pairs, **kwargs)
if matset_method == "":
matsets = None

run_profile(
nsource=12 * nside**2,
nants=antpos.shape[0],
pairs=pairs,
matsets=matsets,
**kwargs,
)


def get_line_based_stats(lstats) -> tuple[dict, float]:
Expand Down
41 changes: 40 additions & 1 deletion src/matvis/core/matprod.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,13 @@
from abc import ABC, abstractmethod
from typing import Any

from matvis import HAVE_GPU

from .._utils import get_dtypes

if HAVE_GPU:
import cupy as cp


class MatProd(ABC):
"""
Expand All @@ -20,6 +25,9 @@ class MatProd(ABC):
Number of antennas.
antpairs
The antenna pairs to sum over. If None, all pairs are used.
matsets
The list of sub-matrices to calculate. If None, all pairs are used in a single
matrix multiplication.
precision
The precision of the data (1 or 2).
"""
Expand All @@ -29,16 +37,24 @@ def __init__(
nchunks: int,
nfeed: int,
nant: int,
antpairs: np.ndarray | None,
antpairs: np.ndarray | list | None,
matsets: list | None,
precision=1,
):
if antpairs is None:
self.all_pairs = True
self.antpairs = np.array([(i, j) for i in range(nant) for j in range(nant)])
self.matsets = None
else:
self.all_pairs = False
self.antpairs = antpairs

if HAVE_GPU:
for i in range(len(matsets)):
matsets[i][0] = cp.array(matsets[i][0])
matsets[i][1] = cp.array(matsets[i][1])
self.matsets = matsets

self.nchunks = nchunks
self.nfeed = nfeed
self.nant = nant
Expand All @@ -60,10 +76,33 @@ def allocate_vis(self):
(self.nchunks, self.npairs, self.nfeed, self.nfeed), 0.0, dtype=self.ctype
)

def check_antpairs_in_matsets(self):
"""Check that all non-redundant ant pairs are included in set of sub-matrices.

If using the CPUMatChunk method, make sure that all non-redudant antpairs are included somewhere
in the set of sub-matrices to be calculated. Otherwise, throw an exception.
"""
antpairs_set = set()
matsets_set = set()

for _, (ai, aj) in enumerate(self.antpairs):
antpairs_set.add((ai, aj))
Comment on lines +88 to +89
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you could simply do this as antpairs_set = set(self.antpairs)


for _, (ai, aj) in enumerate(self.matsets):
for i in range(len(ai)):
for j in range(len(aj)):
matsets_set.add((ai[i], aj[j]))

if not antpairs_set.issubset(matsets_set):
raise Exception("Some non-redundant pairs are missing from sub-matrices. ")

def setup(self):
"""Setup the memory for the object."""
self.allocate_vis()

if self.matsets is not None:
self.check_antpairs_in_matsets()

@abstractmethod
def compute(self, z: np.ndarray, out: np.ndarray):
"""
Expand Down
27 changes: 18 additions & 9 deletions src/matvis/cpu/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def simulate(
I_sky: np.ndarray,
beam_list: Sequence[UVBeam | Callable] | None,
antpairs: np.ndarray | list[tuple[int, int]] | None = None,
matsets: list[tuple[np.ndarray[int], np.ndarray[int]]] | None = None,
precision: int = 1,
polarized: bool = False,
beam_idx: np.ndarray | None = None,
Expand Down Expand Up @@ -81,6 +82,13 @@ def simulate(
Either a 2D array, shape ``(Npairs, 2)``, or list of 2-tuples of ints, with
the list of antenna-pairs to return as visibilities (all feed-pairs are always
calculated). If None, all feed-pairs are returned.
matsets : array_like, optional
A list of containing a set of 2-tuples of numpy arrays. Each entry in the list
corresponds to a different sub-matrix. The first element of the ith tuple lists the rows of Z corresponding
to the rows of the ith sub-matrix, and the second tuple element contains the list of columns. In the case
of vector-vector loop, the number of tuples is equal to Npairs and each element of the tuple contains only
one int, such that the sub-matrices contain only one element each. Outer dimension has to be a list because
numpy doesn't like inhomogeneous arrays.
precision : int, optional
Which precision level to use for floats and complex numbers.
Allowed values:
Expand All @@ -101,14 +109,15 @@ def simulate(
allows). Default is 100.
matprod_method : str, optional
The method to use for the final matrix multiplication. Default is 'CPUMatMul',
which simply uses `np.dot` over the two full matrices. Currently, the other
option is `CPUVectorLoop`, which uses a loop over the antenna pairs,
computing the sum over sources as a vector dot product.
Whether to calculate visibilities for each antpair in antpairs as a vector
dot-product instead of using a full matrix-matrix multiplication for all
possible pairs. Default is False. Setting to True can be faster for large
arrays where `antpairs` is small (possibly from high redundancy). You should
run a performance test before using this.
which simply uses `np.dot` over the two full matrices. The second option is
`CPUVectorLoop`, which uses a loop over the antenna pairs, computing the sum
over sources as a vector dot product. The third option is `CPUMatChunk`, which
divides the product into several matrix multiplications that are optimized to have the
highest possible density of non-redundant pairs without invoking the overhead of a large
for loop over vector products. For very large arrays, with very high redundancy, CPUVectorLoop
is usually fastest, while for cases with low redundancy, `CPUMatMul` is fastest. The `CPUMatChunk`
can sometimes be fastest for intermediate levels of redundancy and intermediately sized arrays. You should
run a performance test before choosing one of these options.
max_memory : int, optional
The maximum memory (in bytes) to use for the visibility calculation. This is
not a hard-set limit, but rather a guideline for how much memory to use. If the
Expand Down Expand Up @@ -175,7 +184,7 @@ def simulate(
source_buffer=source_buffer,
)
mpcls = getattr(mp, matprod_method)
matprod = mpcls(nchunks, nfeed, nant, antpairs, precision=precision)
matprod = mpcls(nchunks, nfeed, nant, antpairs, matsets, precision=precision)
zcalc = ZMatrixCalc(
nsrc=nsrc_alloc,
nfeed=nfeed,
Expand Down
35 changes: 35 additions & 0 deletions src/matvis/cpu/matprod.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,38 @@ def compute(self, z: np.ndarray, out: np.ndarray) -> np.ndarray:
out[i] = z[ai].conj().dot(z[aj].T)

return out


class CPUMatChunk(MatProd):
"""Loop over a small set of sub-matrix products which collectively contain all nont-redundant pairs."""

def compute(self, z: np.ndarray, out: np.ndarray) -> np.ndarray:
"""Perform the source-summing operation for a single time and chunk.

Parameters
----------
z
Complex integrand. Shape=(Nfeed, Nant, Nax, Nsrc).
out
Output array, shaped as (Nfeed, Nfeed, Npairs).
"""
z = z.reshape((self.nant, self.nfeed, -1))

mat_product = np.zeros(
(self.nant, self.nant, self.nfeed, self.nfeed), dtype=z.dtype
)
Comment on lines +70 to +72
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it not be faster to just save Nsets arrays in a list, and then index from them? This way, you have to put the results into a big array so they're very separated in memory space

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean, setup something like this:

mat_product = [
    np.zeros(len(ai), len(aj), self.nfeed, self.nfeed) for ai, aj in self.matsets
]

Copy link
Collaborator Author

@ccain002 ccain002 Jan 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason I didn't do it this way is because I wanted the final array that holds all the matrix products to be indexable with the elements of antpairs. That way, I can easily grab the non-redundant pairs at the end of return an array with the same format that you'd get using the vector-vector method. Otherwise, I have to re-calculate the mapping the mapping to the antpairs indices for each sub-matrix. I can change it if you think it would help with memory management to do it the other way though.


# Chris 12/20/23: instead we will use matsets
for j in range(self.nfeed):
for k in range(self.nfeed):
for i, (ai, aj) in enumerate(self.matsets):
AI, AJ = np.meshgrid(ai, aj)
mat_product[AI, AJ, j, k] = z[ai[:], j].conj().dot(z[aj[:], k].T).T

# Now, we need to identify the non-redundant pairs and put them into the final output array
for j in range(self.nfeed):
for k in range(self.nfeed):
for i, (ai, aj) in enumerate(self.antpairs):
out[i, j, k] = mat_product[ai, aj, j, k]

return out
3 changes: 2 additions & 1 deletion src/matvis/gpu/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def simulate(
beam_list: Sequence[UVBeam | Callable] | None,
polarized: bool = False,
antpairs: np.ndarray | list[tuple[int, int]] | None = None,
matsets: list[tuple[np.ndarray[int], np.ndarray[int]]] | None = None,
beam_idx: np.ndarray | None = None,
max_memory: int = np.inf,
min_chunks: int = 1,
Expand Down Expand Up @@ -120,7 +121,7 @@ def simulate(
ctype=ctype,
)
mpcls = getattr(mp, matprod_method)
matprod = mpcls(nchunks, nfeed, nant, antpairs, precision=precision)
matprod = mpcls(nchunks, nfeed, nant, antpairs, matsets, precision=precision)

npixc = nsrc // nchunks

Expand Down
44 changes: 44 additions & 0 deletions src/matvis/gpu/matprod.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,47 @@ def sum_chunks(self, out: np.ndarray):

out[:] = self.vis[0].transpose((2, 0, 1)).get()
cp.cuda.Device().synchronize()


class GPUMatChunk(MatProd):
"""Use a loop over specific pairs, performing a vdot over the source axis."""

def allocate_vis(self):
"""Allocate memory for the visibilities."""
self.vis = [
cp.full(
(self.nfeed, self.nfeed, self.npairs), 0.0, dtype=self.ctype, order="F"
)
for _ in range(self.nchunks)
]

def compute(self, z: cp.ndarray, out: cp.ndarray) -> cp.ndarray:
"""Perform the source-summing operation for a single time and chunk."""
z = z.reshape((self.nant, self.nfeed, -1))

mat_product = cp.zeros(
(self.nant, self.nant, self.nfeed, self.nfeed), dtype=z.dtype
)

for j in range(self.nfeed):
for k in range(self.nfeed):
for i, (ai, aj) in enumerate(self.matsets):
AI, AJ = cp.meshgrid(ai, aj)
mat_product[AI, AJ, j, k] = z[ai[:], j].conj().dot(z[aj[:], k].T).T

for j in range(self.nfeed):
for k in range(self.nfeed):
for i, (ai, aj) in enumerate(self.antpairs):
out[j, k, i] = mat_product[ai, aj, j, k]

cp.cuda.Device().synchronize()
return out

def sum_chunks(self, out: np.ndarray):
"""Sum the chunks into the output array."""
if self.nchunks > 1:
for i in range(1, len(self.vis)):
self.vis[0] += self.vis[i]

out[:] = self.vis[0].transpose((2, 0, 1)).get()
cp.cuda.Device().synchronize()
22 changes: 22 additions & 0 deletions src/matvis/submatrices.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""Module containing several options for computing sub-matrices for the MatChunk method."""
import numpy as np


def get_matrix_sets(bls, ndecimals: int = 2):
"""Find redundant baselines."""
uvbins = set()
msets = []

# Everything here is in wavelengths
bls = np.round(bls, decimals=ndecimals)
nant = bls.shape[0]

# group redundant baselines
for i in range(nant):
for j in range(i + 1, nant):
u, v = bls[i, j]
if (u, v) not in uvbins and (-u, -v) not in uvbins:
uvbins.add((u, v))
msets.append([np.array([i]), np.array([j])])

return msets
3 changes: 3 additions & 0 deletions src/matvis/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ def simulate_vis(
beam_spline_opts: dict | None = None,
beam_idx: np.ndarray | None = None,
antpairs: np.ndarray | list[tuple[int, int]] | None = None,
matsets: list[tuple[np.ndarray[int], np.ndarray[int]]]
| None = None, # set of sub-matrices
source_buffer: float = 1.0,
**backend_kwargs,
):
Expand Down Expand Up @@ -150,6 +152,7 @@ def simulate_vis(
beam_spline_opts=beam_spline_opts,
beam_idx=beam_idx,
antpairs=antpairs,
matsets=matsets,
source_buffer=source_buffer,
**backend_kwargs,
)
Expand Down
34 changes: 30 additions & 4 deletions tests/test_matprod.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Tests that the various matprod methods produce the same result."""


import pytest

import cupy as cp
import numpy as np

from matvis._utils import get_dtypes
Expand All @@ -16,13 +18,20 @@ def simple_matprod(z):
@pytest.mark.parametrize("antpairs", [True, False])
@pytest.mark.parametrize("precision", [1, 2])
@pytest.mark.parametrize(
"method", ["CPUMatMul", "CPUVectorDot", "GPUMatMul", "GPUVectorDot"]
"method",
[
"CPUMatMul",
"CPUVectorDot",
"CPUMatChunk",
"GPUMatMul",
"GPUVectorDot",
"GPUMatChunk",
],
)
def test_matprod(nfeed, antpairs, precision, method):
def test_matprod(nfeed, antpairs, matsets, precision, method):
"""Test that the various matprod methods produce the same result."""
if method.startswith("GPU"):
pytest.importorskip("cupy")
import cupy as cp

from matvis.gpu import matprod as module
else:
Expand All @@ -36,7 +45,24 @@ def test_matprod(nfeed, antpairs, precision, method):
else:
antpairs = None

obj = cls(nchunks=1, nfeed=nfeed, nant=nant, antpairs=antpairs, precision=precision)
if matsets:
matsets = [
(np.array([0, 1, 2, 3]), np.array([0, 1, 2, 3])),
(np.array([0, 1, 2, 3]), np.array([3, 4])),
(np.array([3, 4]), np.array([0, 1, 2, 3])),
(np.array([3, 4]), np.array([3, 4])),
]
else:
matsets = None

obj = cls(
nchunks=1,
nfeed=nfeed,
nant=nant,
antpairs=antpairs,
matsets=matsets,
precision=precision,
)
obj.setup()

ctype = get_dtypes(precision)[1]
Expand Down