diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ccb1c9f..2c0687c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -20,116 +20,74 @@ defaults:
     shell: bash -l {0}
 
 jobs:
-  style:
-    name: Style
+  code_style:
+    name: ${{ matrix.check.name }}
     runs-on: ubuntu-latest
-
+    strategy:
+      matrix:
+        check:
+          - name: Code Formatting | black
+            command: black --check .
+          - name: Import Ordering | isort
+            command: isort --check .
+          - name: Linting | flake8
+            command: flake8 .
+          - name: Type Checking | mypy
+            command: mypy . --cache-dir=/dev/null
     steps:
       - uses: actions/checkout@v3
-
+      
       - name: Setup Python
         uses: actions/setup-python@v4
         with:
           python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
 
-      - name: Install requirements
-        run: |
-          grep -E '^black' dev-requirements.txt | xargs pip install
+      - name: Cache Python Environment
+        uses: actions/cache@v2
+        with:
+          path: .venv
+          key: ${{ runner.os }}-pyenv-${{ hashFiles('dev-requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-pyenv-
 
-      - name: Debug info
+      - name: Install Development Requirements
         run: |
-          pip freeze
+          python -m venv .venv
+          source .venv/bin/activate
+          pip install -r dev-requirements.txt
 
-      - name: Run black
+      - name: Run Code Style Check
         run: |
-          black --check .
-
-  checks:
-    name: ${{ matrix.task.name }}
-    runs-on: ${{ matrix.task.runs_on }}
-    timeout-minutes: 30
-    strategy:
-      fail-fast: false
-      matrix:
-        task:
-          - name: Lint
-            runs_on: ubuntu-latest
-            coverage_report: false
-            platform: cpu
-            run: |
-              make flake8
-              make import-sort
-              make typecheck
-
-          - name: CPU Tests
-            runs_on: ubuntu-latest
-            coverage_report: true
-            platform: cpu
-            run: make tests
+          source .venv/bin/activate
+          ${{ matrix.check.command }}
 
+  unit_tests:
+    name: Unit Tests
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
-
-      - uses: conda-incubator/setup-miniconda@v2
+      
+      - name: Setup Python
+        uses: actions/setup-python@v4
         with:
-          miniconda-version: "latest"
           python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
 
-      - name: Set build variables
-        run: |
-          # Get the exact Python version to use in the cache key.
-          echo "PYTHON_VERSION=$(python --version)" >> $GITHUB_ENV
-          echo "RUNNER_ARCH=$(uname -m)" >> $GITHUB_ENV
-          # Use week number in cache key so we can refresh the cache weekly.
-          echo "WEEK_NUMBER=$(date +%V)" >> $GITHUB_ENV
-
-      - uses: actions/cache@v3
-        id: virtualenv-cache
+      - name: Cache Python Environment
+        uses: actions/cache@v2
         with:
           path: .venv
-          key: >
-            ${{ env.CACHE_PREFIX }}-${{ env.WEEK_NUMBER }}-${{ runner.os }}-${{ env.RUNNER_ARCH }}-
-            ${{ env.PYTHON_VERSION }}-${{ matrix.task.platform }}-${{ hashFiles('setup.py') }}-
-            ${{ hashFiles('*requirements.txt') }}
-
-      - name: Setup virtual environment (no cache hit)
-        if: steps.virtualenv-cache.outputs.cache-hit != 'true'
-        run: |
-          python${{ env.DEFAULT_PYTHON_VERSION }} -m venv .venv
-          source .venv/bin/activate
-          make install
-
-      - name: Setup virtual environment (cache hit)
-        if: steps.virtualenv-cache.outputs.cache-hit == 'true'
-        run: |
-          source .venv/bin/activate
-          pip install --no-deps -e .[all]
-
-      - name: Debug info
-        run: |
-          source .venv/bin/activate
-          pip freeze
+          key: ${{ runner.os }}-pyenv-${{ hashFiles('requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-pyenv-
 
-      - name: ${{ matrix.task.name }}
+      - name: Install Development Requirements
         run: |
+          python -m venv .venv
           source .venv/bin/activate
-          ${{ matrix.task.run }}
-
-      - name: Prepare coverage report
-        if: matrix.task.coverage_report
-        run: |
-          mkdir coverage
-          mv coverage.xml coverage/
-
-      - name: Save coverage report
-        if: matrix.task.coverage_report
-        uses: actions/upload-artifact@v3
-        with:
-          name: ${{ matrix.task.name }}-coverage
-          path: ./coverage
+          pip install -r requirements.txt
 
-      - name: Clean up
-        if: always()
+      - name: Run Tests
         run: |
           source .venv/bin/activate
-          pip uninstall --yes arrayfire
+          make tests
+        
\ No newline at end of file
diff --git a/.isort.cfg b/.isort.cfg
deleted file mode 100755
index ec54132..0000000
--- a/.isort.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-[settings]
-line_length = 119
-profile = black
diff --git a/Makefile b/Makefile
index 6e2cbe2..5ca9372 100755
--- a/Makefile
+++ b/Makefile
@@ -14,26 +14,17 @@ endif
 version : 
 	@python -c 'from arrayfire.version import VERSION; print(f"ArrayFire Python v{VERSION}")'
 
-.PHONY : install
-install :
-	pip install --upgrade pip
-	pip install pip-tools
-	pip-compile requirements.in -o final_requirements.txt --allow-unsafe --rebuild --verbose
-	pip install -e . -r final_requirements.txt
+.PHONY : build
+build :
+	@python -m build
 
-# Testing
-
-.PHONY : flake8
-flake8 :
-	flake8 arrayfire tests examples
+# Dev
 
-.PHONY : import-sort
-import-sort :
-	isort arrayfire tests examples
+.PHONY : pre-commit
+pre-commit :
+	black --check . && isort --check . && flake8 . && mypy . --cache-dir=/dev/null
 
-.PHONY : typecheck
-typecheck :
-	mypy arrayfire tests examples --cache-dir=/dev/null
+# Testing
 
 .PHONY : tests
 tests :
diff --git a/arrayfire/__init__.py b/arrayfire/__init__.py
index 0f8d850..9f1283c 100755
--- a/arrayfire/__init__.py
+++ b/arrayfire/__init__.py
@@ -95,6 +95,7 @@
     "shift",
     "tile",
     "transpose",
+    "lookup",
 ]
 
 from arrayfire.library.array_functions import (
@@ -110,6 +111,7 @@
     isnan,
     iszero,
     join,
+    lookup,
     lower,
     moddims,
     pad,
@@ -124,7 +126,7 @@
     upper,
 )
 
-__all__ += ["gloh", "orb", "sift", "dog", "fast", "harris", "susan", "hamming_matcher", "nearest_neighbour"]
+__all__ += ["gloh", "orb", "sift", "dog", "fast", "harris", "susan", "hamming_matcher", "nearest_neighbour", "match_template"]
 
 from arrayfire.library.computer_vision import (
     dog,
@@ -133,6 +135,7 @@
     hamming_matcher,
     harris,
     nearest_neighbour,
+    match_template,
     orb,
     sift,
     susan,
@@ -158,7 +161,6 @@
     "Interp",
     "IterativeDeconv",
     "Pad",
-    "pi",
 ]
 
 from arrayfire.library.constants import (
@@ -181,7 +183,6 @@
     TopK,
     VarianceBias,
     YCCStd,
-    pi,
 )
 
 __all__ += [
@@ -550,6 +551,7 @@
     "fft_convolve1",
     "fft_convolve2",
     "fft_convolve3",
+    "convolve2",
     "ifft",
     "ifft2",
     "ifft3",
@@ -560,6 +562,11 @@
     "approx1_uniform",
     "approx2",
     "approx2_uniform",
+    "convolve1",
+    "convolve2",
+    "convolve2_nn",
+    "convolve2_separable",
+    "convolve3",
 ]
 
 from arrayfire.library.signal_processing import (
@@ -567,6 +574,11 @@
     approx1_uniform,
     approx2,
     approx2_uniform,
+    convolve1,
+    convolve2,
+    convolve2_nn,
+    convolve2_separable,
+    convolve3,
     fft,
     fft2,
     fft2_c2r,
@@ -578,6 +590,7 @@
     fft_convolve1,
     fft_convolve2,
     fft_convolve3,
+    convolve2,
     fft_r2c,
     fir,
     ifft,
@@ -591,27 +604,6 @@
 
 from arrayfire.library.statistics import corrcoef, cov, mean, median, stdev, topk, var
 
-# TODO
-# Temp solution. Remove when arrayfire-binary-python-wrapper is finalized
-
-# __all__ += [
-#     "get_active_backend",
-#     "get_available_backends",
-#     "get_backend_count",
-#     "get_backend_id",
-#     "get_device_id",
-#     "set_backend",
-# ]
-
-# from arrayfire.library.unified_api_functions import (
-#     get_active_backend,
-#     get_available_backends,
-#     get_backend_count,
-#     get_backend_id,
-#     get_device_id,
-#     set_backend,
-# )
-
 __all__ += [
     "accum",
     "scan",
diff --git a/arrayfire/array_api/__init__.py b/arrayfire/array_api/__init__.py
new file mode 100644
index 0000000..798a886
--- /dev/null
+++ b/arrayfire/array_api/__init__.py
@@ -0,0 +1,231 @@
+# flake8: noqa
+
+__array_api_version__ = "2022.12"
+
+__all__ = ["__array_api_version__"]
+
+from ._constants import Device
+
+__all__ += ["Device"]
+
+from ._creation_function import (
+    arange,
+    asarray,
+    empty,
+    empty_like,
+    eye,
+    full,
+    full_like,
+    linspace,
+    meshgrid,
+    ones,
+    ones_like,
+    tril,
+    triu,
+    zeros,
+    zeros_like,
+)
+
+__all__ += [
+    "asarray",
+    "arange",
+    "empty",
+    "empty_like",
+    "eye",
+    "from_dlpack",
+    "full",
+    "full_like",
+    "linspace",
+    "meshgrid",
+    "ones",
+    "ones_like",
+    "tril",
+    "triu",
+    "zeros",
+    "zeros_like",
+]
+
+from ._data_type_functions import astype, broadcast_arrays, broadcast_to, can_cast, finfo, iinfo, isdtype, result_type
+
+__all__ += ["astype", "broadcast_arrays", "broadcast_to", "can_cast", "finfo", "iinfo", "result_type", "isdtype"]
+
+from ._dtypes import (
+    bool,
+    complex64,
+    complex128,
+    float32,
+    float64,
+    int8,
+    int16,
+    int32,
+    int64,
+    uint8,
+    uint16,
+    uint32,
+    uint64,
+)
+
+__all__ += [
+    "int8",
+    "int16",
+    "int32",
+    "int64",
+    "uint8",
+    "uint16",
+    "uint32",
+    "uint64",
+    "float32",
+    "float64",
+    "complex64",
+    "complex128",
+    "bool",
+]
+
+from ._elementwise_functions import (
+    abs,
+    acos,
+    acosh,
+    add,
+    asin,
+    asinh,
+    atan,
+    atan2,
+    atanh,
+    bitwise_and,
+    bitwise_invert,
+    bitwise_left_shift,
+    bitwise_or,
+    bitwise_right_shift,
+    bitwise_xor,
+    ceil,
+    conj,
+    cos,
+    cosh,
+    divide,
+    equal,
+    exp,
+    expm1,
+    floor,
+    floor_divide,
+    greater,
+    greater_equal,
+    imag,
+    isfinite,
+    isinf,
+    isnan,
+    less,
+    less_equal,
+    log,
+    log1p,
+    log2,
+    log10,
+    logaddexp,
+    logical_and,
+    logical_not,
+    logical_or,
+    logical_xor,
+    multiply,
+    negative,
+    not_equal,
+    positive,
+    pow,
+    real,
+    remainder,
+    round,
+    sign,
+    sin,
+    sinh,
+    sqrt,
+    square,
+    subtract,
+    tan,
+    tanh,
+    trunc,
+)
+
+__all__ += [
+    "abs",
+    "acos",
+    "acosh",
+    "add",
+    "asin",
+    "asinh",
+    "atan",
+    "atan2",
+    "atanh",
+    "bitwise_and",
+    "bitwise_left_shift",
+    "bitwise_invert",
+    "bitwise_or",
+    "bitwise_right_shift",
+    "bitwise_xor",
+    "ceil",
+    "cos",
+    "cosh",
+    "divide",
+    "equal",
+    "exp",
+    "expm1",
+    "floor",
+    "floor_divide",
+    "greater",
+    "greater_equal",
+    "isfinite",
+    "isinf",
+    "isnan",
+    "less",
+    "less_equal",
+    "log",
+    "log1p",
+    "log2",
+    "log10",
+    "logaddexp",
+    "logical_and",
+    "logical_not",
+    "logical_or",
+    "logical_xor",
+    "multiply",
+    "negative",
+    "not_equal",
+    "positive",
+    "pow",
+    "remainder",
+    "round",
+    "sign",
+    "sin",
+    "sinh",
+    "square",
+    "sqrt",
+    "subtract",
+    "tan",
+    "tanh",
+    "trunc",
+]
+
+from ._indexing_functions import take
+
+__all__ += ["take"]
+
+from ._manipulation_functions import concat, expand_dims, flip, permute_dims, reshape, roll, squeeze, stack
+
+__all__ += ["concat", "expand_dims", "flip", "permute_dims", "reshape", "roll", "squeeze", "stack"]
+
+from ._searching_functions import argmax, argmin, nonzero, where
+
+__all__ += ["argmax", "argmin", "nonzero", "where"]
+
+from ._set_functions import unique_all, unique_counts, unique_inverse, unique_values
+
+__all__ += ["unique_all", "unique_counts", "unique_inverse", "unique_values"]
+
+from ._sorting_functions import argsort, sort
+
+__all__ += ["argsort", "sort"]
+
+from ._statistical_functions import max, mean, min, prod, std, sum, var
+
+__all__ += ["max", "mean", "min", "prod", "std", "sum", "var"]
+
+from ._utility_functions import all, any
+
+__all__ += ["all", "any"]
diff --git a/arrayfire/array_api/_array_object.py b/arrayfire/array_api/_array_object.py
new file mode 100644
index 0000000..b5107ca
--- /dev/null
+++ b/arrayfire/array_api/_array_object.py
@@ -0,0 +1,908 @@
+from __future__ import annotations
+
+import types
+from typing import Any
+
+import arrayfire as af
+
+from ._constants import Device, NestedSequence, PyCapsule, SupportsBufferProtocol
+from ._dtypes import (
+    all_dtypes,
+    boolean_dtypes,
+    complex_floating_dtypes,
+    dtype_categories,
+    floating_dtypes,
+    integer_dtypes,
+    integer_or_boolean_dtypes,
+    numeric_dtypes,
+    promote_types,
+)
+
+# TODO
+# - add check if two arrays are on the same device (both backend and device)
+# - - include it in magic methods here
+# - - include it in elementwise functions also
+
+
+class Array:
+    _array: af.Array
+
+    def __new__(cls, *args: Any, **kwargs: Any) -> Array:
+        raise TypeError(
+            "The array_api Array object should not be instantiated directly. "
+            "Use an array creation function, such as asarray(), instead."
+        )
+
+    def _check_allowed_dtypes(self, other: bool | int | float | Array, dtype_category: str, op: str) -> Array:
+        """
+        Helper function for operators to only allow specific input dtypes
+
+        Use like
+
+            other = self._check_allowed_dtypes(other, 'numeric', '__add__')
+            if other is NotImplemented:
+                return other
+        """
+        if self.dtype not in dtype_categories[dtype_category]:
+            raise TypeError(f"Only {dtype_category} dtypes are allowed in {op}")
+        if isinstance(other, int | complex | float | bool):
+            other = self._promote_scalar(other)
+        elif isinstance(other, Array):
+            if other.dtype not in dtype_categories[dtype_category]:
+                raise TypeError(f"Only {dtype_category} dtypes are allowed in {op}")
+        else:
+            return NotImplemented
+
+        # This will raise TypeError for type combinations that are not allowed
+        # to promote in the spec (even if the NumPy array operator would
+        # promote them).
+        res_dtype = promote_types(self.dtype, other.dtype)
+        if op.startswith("__i"):
+            # Note: NumPy will allow in-place operators in some cases where
+            # the type promoted operator does not match the left-hand side
+            # operand. For example,
+
+            # >>> a = np.array(1, dtype=np.int8)
+            # >>> a += np.array(1, dtype=np.int16)
+
+            # The spec explicitly disallows this.
+            if res_dtype != self.dtype:
+                raise TypeError(f"Cannot perform {op} with dtypes {self.dtype} and {other.dtype}")
+
+        return other
+
+    def _promote_scalar(self, scalar: bool | int | float | complex) -> Array:
+        """
+        Returns a promoted version of a Python scalar appropriate for use with
+        operations on self.
+
+        This may raise an OverflowError in cases where the scalar is an
+        integer that is too large to fit in a NumPy integer dtype, or
+        TypeError when the scalar type is incompatible with the dtype of self.
+        """
+        from ._data_type_functions import iinfo
+
+        # NOTE
+        # Only Python scalar types that match the array dtype are allowed.
+        if isinstance(scalar, bool):
+            if self.dtype not in boolean_dtypes:
+                raise TypeError("Python bool scalars can only be promoted with bool arrays")
+
+        elif isinstance(scalar, int):
+            if self.dtype in boolean_dtypes:
+                raise TypeError("Python int scalars cannot be promoted with bool arrays")
+            if self.dtype in integer_dtypes:
+                info = iinfo(self.dtype)
+                if not (info.min <= scalar <= info.max):
+                    raise OverflowError("Python int scalars must be within the bounds of the dtype for integer arrays")
+
+        elif isinstance(scalar, float):
+            if self.dtype not in floating_dtypes:
+                raise TypeError("Python float scalars can only be promoted with floating-point arrays.")
+
+        elif isinstance(scalar, complex):
+            if self.dtype not in complex_floating_dtypes:
+                raise TypeError("Python complex scalars can only be promoted with complex floating-point arrays.")
+
+        else:
+            raise TypeError("'scalar' must be a Python scalar")
+
+        # NOTE
+        # Scalars are unconditionally cast to the same dtype as the array.
+
+        # NOTE (numpy-specific rule)
+        # The spec only specifies integer-dtype/int promotion behavior for integers within the bounds of the integer
+        # dtype. Outside of those bounds we use the default NumPy behavior (either cast or raise OverflowError).
+        return Array._new(af.constant(scalar, dtype=self.dtype, shape=(1,)))
+
+    @staticmethod
+    def _normalize_two_args(x1: Array, x2: Array) -> tuple[Array, Array]:
+        # BUG, FIXME
+        # """
+        # Normalize inputs to two arg functions to fix type promotion rules
+
+        # NumPy deviates from the spec type promotion rules in cases where one
+        # argument is 0-dimensional and the other is not. For example:
+
+        # >>> import numpy as np
+        # >>> a = np.array([1.0], dtype=np.float32)
+        # >>> b = np.array(1.0, dtype=np.float64)
+        # >>> np.add(a, b) # The spec says this should be float64
+        # array([2.], dtype=float32)
+
+        # To fix this, we add a dimension to the 0-dimension array before passing it
+        # through. This works because a dimension would be added anyway from
+        # broadcasting, so the resulting shape is the same, but this prevents NumPy
+        # from not promoting the dtype.
+        # """
+        # # Another option would be to use signature=(x1.dtype, x2.dtype, None),
+        # # but that only works for ufuncs, so we would have to call the ufuncs
+        # # directly in the operator methods. One should also note that this
+        # # sort of trick wouldn't work for functions like searchsorted, which
+        # # don't do normal broadcasting, but there aren't any functions like
+        # # that in the array API namespace.
+        # if x1.ndim == 0 and x2.ndim != 0:
+        #     # The _array[None] workaround was chosen because it is relatively
+        #     # performant. broadcast_to(x1._array, x2.shape) is much slower. We
+        #     # could also manually type promote x2, but that is more complicated
+        #     # and about the same performance as this.
+        #     x1 = Array._new(x1._array[None])
+        # elif x2.ndim == 0 and x1.ndim != 0:
+        #     x2 = Array._new(x2._array[None])
+        return (x1, x2)
+
+    @classmethod
+    def _new(cls, x: Array | bool | int | float | complex | NestedSequence | SupportsBufferProtocol, /) -> Array:
+        """
+        This is a private method for initializing the array API Array
+        object.
+
+        Functions outside of the array_api submodule should not use this
+        method. Use one of the creation functions instead, such as
+        ``asarray``.
+
+        """
+        obj = super().__new__(cls)
+        # Note: The spec does not have array scalars, only 0-D arrays.
+        if isinstance(x, bool | int | float | complex):
+            # Convert the array scalar to a 0-D array
+            x = af.constant(x)  # type: ignore[arg-type]
+        if x.dtype not in all_dtypes:  # type: ignore[union-attr]
+            raise TypeError(
+                f"The array_api namespace does not support the dtype '{x.dtype}'"  # type: ignore[union-attr]
+            )
+        obj._array = x  # type: ignore[assignment]
+        return obj
+
+    def __str__(self: Array, /) -> str:
+        """
+        Performs the operation __str__.
+        """
+        return self._array.__str__()  # .replace("array", "Array")
+
+    def __repr__(self: Array, /) -> str:
+        """
+        Performs the operation __repr__.
+        """
+        # TODO
+        # Numpy representation:
+        # suffix = f", dtype={self.dtype.name})"
+        # if 0 in self.shape:
+        #     prefix = "empty("
+        #     mid = str(self.shape)
+        # else:
+        #     prefix = "Array("
+        #     mid = np.array2string(self._array, separator=', ', prefix=prefix, suffix=suffix)
+        # return prefix + mid + suffix
+        return repr(self._array)
+
+    def __abs__(self: Array, /) -> Array:
+        """
+        Performs the operation __abs__.
+        """
+        if self.dtype not in numeric_dtypes:
+            raise TypeError("Only numeric dtypes are allowed in __abs__")
+        res = self._array.__abs__()
+        return self.__class__._new(res)
+
+    def __add__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __add__.
+        """
+        other = self._check_allowed_dtypes(other, "numeric", "__add__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__add__(other._array)
+        return self.__class__._new(res)
+
+    def __and__(self: Array, other: int | bool | Array, /) -> Array:
+        """
+        Performs the operation __and__.
+        """
+        other = self._check_allowed_dtypes(other, "integer or boolean", "__and__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__and__(other._array)
+        return self.__class__._new(res)
+
+    def __array_namespace__(self: Array, /, *, api_version: str | None = None) -> types.ModuleType:
+        if api_version is not None and not api_version.startswith("2021."):
+            raise ValueError(f"Unrecognized array API version: {api_version!r}")
+
+        from arrayfire import array_api
+
+        return array_api
+
+    def __bool__(self: Array, /) -> bool:
+        """
+        Performs the operation __bool__.
+        """
+        # Note: This is an error here.
+        if self._array.ndim != 0:
+            raise TypeError("bool is only allowed on arrays with 0 dimensions")
+        return self._array.is_bool
+
+    def __complex__(self: Array, /) -> complex:
+        """
+        Performs the operation __complex__.
+        """
+        # Note: This is an error here.
+        if self._array.ndim != 0:
+            raise TypeError("complex is only allowed on arrays with 0 dimensions")
+        res = self._array.__complex__()
+        return res
+
+    def __dlpack__(self: Array, /, *, stream: None = None) -> PyCapsule:
+        """
+        Performs the operation __dlpack__.
+        """
+        return self._array.__dlpack__(stream=stream)
+
+    # FIXME
+    # def __dlpack_device__(self: Array, /) -> Tuple[IntEnum, int]:
+    #     """
+    #     Performs the operation __dlpack_device__.
+    #     """
+    #     # Note: device support is required for this
+    #     return self._array.__dlpack_device__()
+
+    def __eq__(self: Array, other: int | float | bool | Array, /) -> Array:  # type: ignore[override]
+        """
+        Performs the operation __eq__.
+        """
+        # Even though "all" dtypes are allowed, we still require them to be
+        # promotable with each other.
+        other = self._check_allowed_dtypes(other, "all", "__eq__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__eq__(other._array)
+        return self.__class__._new(res)
+
+    def __float__(self: Array, /) -> float:
+        """
+        Performs the operation __float__.
+        """
+        # Note: This is an error here.
+        if self._array.ndim != 0:
+            raise TypeError("float is only allowed on arrays with 0 dimensions")
+        if self.dtype in complex_floating_dtypes:
+            raise TypeError("float is not allowed on complex floating-point arrays")
+        res = self._array.__float__()
+        return res
+
+    def __floordiv__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __floordiv__.
+        """
+        other = self._check_allowed_dtypes(other, "real numeric", "__floordiv__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__floordiv__(other._array)
+        return self.__class__._new(res)
+
+    def __ge__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __ge__.
+        """
+        other = self._check_allowed_dtypes(other, "real numeric", "__ge__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__ge__(other._array)
+        return self.__class__._new(res)
+
+    # def __getitem__(
+    #     self: Array,
+    #     key: Union[int, slice, ellipsis, Tuple[Union[int, slice, ellipsis], ...], Array],
+    #     /,
+    # ) -> Array:
+    #     """
+    #     Performs the operation __getitem__.
+    #     """
+    #     # Note: Only indices required by the spec are allowed. See the
+    #     # docstring of _validate_index
+    #     self._validate_index(key)
+    #     if isinstance(key, Array):
+    #         # Indexing self._array with array_api arrays can be erroneous
+    #         key = key._array
+    #     res = self._array.__getitem__(key)
+    #     return self._new(res)
+
+    def __gt__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __gt__.
+        """
+        other = self._check_allowed_dtypes(other, "real numeric", "__gt__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__gt__(other._array)
+        return self.__class__._new(res)
+
+    def __int__(self: Array, /) -> int:
+        """
+        Performs the operation __int__.
+        """
+        # Note: This is an error here.
+        if self._array.ndim != 0:
+            raise TypeError("int is only allowed on arrays with 0 dimensions")
+        if self.dtype in complex_floating_dtypes:
+            raise TypeError("int is not allowed on complex floating-point arrays")
+        res = self._array.__int__()
+        return res
+
+    def __index__(self: Array, /) -> int:
+        """
+        Performs the operation __index__.
+        """
+        res = self._array.__index__()
+        return res
+
+    def __invert__(self: Array, /) -> Array:
+        """
+        Performs the operation __invert__.
+        """
+        if self.dtype not in integer_or_boolean_dtypes:
+            raise TypeError("Only integer or boolean dtypes are allowed in __invert__")
+        res = self._array.__invert__()
+        return self.__class__._new(res)
+
+    def __le__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __le__.
+        """
+        other = self._check_allowed_dtypes(other, "real numeric", "__le__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__le__(other._array)
+        return self.__class__._new(res)
+
+    def __lshift__(self: Array, other: int | Array, /) -> Array:
+        """
+        Performs the operation __lshift__.
+        """
+        other = self._check_allowed_dtypes(other, "integer", "__lshift__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__lshift__(other._array)
+        return self.__class__._new(res)
+
+    def __lt__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __lt__.
+        """
+        other = self._check_allowed_dtypes(other, "real numeric", "__lt__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__lt__(other._array)
+        return self.__class__._new(res)
+
+    def __matmul__(self: Array, other: Array, /) -> Array:
+        """
+        Performs the operation __matmul__.
+        """
+        # matmul is not defined for scalars, but without this, we may get
+        # the wrong error message from asarray.
+        other = self._check_allowed_dtypes(other, "numeric", "__matmul__")
+        if other is NotImplemented:
+            return other
+        res = self._array.__matmul__(other._array)
+        return self.__class__._new(res)
+
+    def __mod__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __mod__.
+        """
+        other = self._check_allowed_dtypes(other, "real numeric", "__mod__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__mod__(other._array)
+        return self.__class__._new(res)
+
+    def __mul__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __mul__.
+        """
+        other = self._check_allowed_dtypes(other, "numeric", "__mul__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__mul__(other._array)
+        return self.__class__._new(res)
+
+    def __ne__(self: Array, other: int | float | bool | Array, /) -> Array:  # type: ignore[override]
+        """
+        Performs the operation __ne__.
+        """
+        other = self._check_allowed_dtypes(other, "all", "__ne__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__ne__(other._array)
+        return self.__class__._new(res)
+
+    def __neg__(self: Array, /) -> Array:
+        """
+        Performs the operation __neg__.
+        """
+        if self.dtype not in numeric_dtypes:
+            raise TypeError("Only numeric dtypes are allowed in __neg__")
+        res = self._array.__neg__()
+        return self.__class__._new(res)
+
+    def __or__(self: Array, other: int | bool | Array, /) -> Array:
+        """
+        Performs the operation __or__.
+        """
+        other = self._check_allowed_dtypes(other, "integer or boolean", "__or__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__or__(other._array)
+        return self.__class__._new(res)
+
+    def __pos__(self: Array, /) -> Array:
+        """
+        Performs the operation __pos__.
+        """
+        if self.dtype not in numeric_dtypes:
+            raise TypeError("Only numeric dtypes are allowed in __pos__")
+        res = self._array.__pos__()
+        return self.__class__._new(res)
+
+    # def __pow__(self: Array, other: int | float | Array, /) -> Array:
+    #     """
+    #     Performs the operation __pow__.
+    #     """
+    #     from ._elementwise_functions import pow
+
+    #     other = self._check_allowed_dtypes(other, "numeric", "__pow__")
+    #     if other is NotImplemented:
+    #         return other
+    #     # Note: NumPy's __pow__ does not follow type promotion rules for 0-d
+    #     # arrays, so we use pow() here instead.
+    #     return pow(self, other)
+
+    def __rshift__(self: Array, other: int | Array, /) -> Array:
+        """
+        Performs the operation __rshift__.
+        """
+        other = self._check_allowed_dtypes(other, "integer", "__rshift__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__rshift__(other._array)
+        return self.__class__._new(res)
+
+    # def __setitem__(
+    #     self,
+    #     key: Union[int, slice, ellipsis, Tuple[Union[int, slice, ellipsis], ...], Array],
+    #     value: int | float | bool | Array,
+    #     /,
+    # ) -> None:
+    #     """
+    #     Performs the operation __setitem__.
+    #     """
+    #     # Note: Only indices required by the spec are allowed. See the
+    #     # docstring of _validate_index
+    #     self._validate_index(key)
+    #     if isinstance(key, Array):
+    #         # Indexing self._array with array_api arrays can be erroneous
+    #         key = key._array
+    #     self._array.__setitem__(key, asarray(value)._array)
+
+    def __sub__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __sub__.
+        """
+        other = self._check_allowed_dtypes(other, "numeric", "__sub__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__sub__(other._array)
+        return self.__class__._new(res)
+
+    # PEP 484 requires int to be a subtype of float, but __truediv__ should
+    # not accept int.
+    def __truediv__(self: Array, other: float | Array, /) -> Array:
+        """
+        Performs the operation __truediv__.
+        """
+        other = self._check_allowed_dtypes(other, "floating-point", "__truediv__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__truediv__(other._array)
+        return self.__class__._new(res)
+
+    def __xor__(self: Array, other: int | bool | Array, /) -> Array:
+        """
+        Performs the operation __xor__.
+        """
+        other = self._check_allowed_dtypes(other, "integer or boolean", "__xor__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__xor__(other._array)
+        return self.__class__._new(res)
+
+    def __iadd__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __iadd__.
+        """
+        other = self._check_allowed_dtypes(other, "numeric", "__iadd__")
+        if other is NotImplemented:
+            return other
+        self._array.__iadd__(other._array)
+        return self
+
+    def __radd__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __radd__.
+        """
+        other = self._check_allowed_dtypes(other, "numeric", "__radd__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__radd__(other._array)
+        return self.__class__._new(res)
+
+    def __iand__(self: Array, other: int | bool | Array, /) -> Array:
+        """
+        Performs the operation __iand__.
+        """
+        other = self._check_allowed_dtypes(other, "integer or boolean", "__iand__")
+        if other is NotImplemented:
+            return other
+        self._array.__iand__(other._array)
+        return self
+
+    def __rand__(self: Array, other: int | bool | Array, /) -> Array:
+        """
+        Performs the operation __rand__.
+        """
+        other = self._check_allowed_dtypes(other, "integer or boolean", "__rand__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__rand__(other._array)
+        return self.__class__._new(res)
+
+    def __ifloordiv__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __ifloordiv__.
+        """
+        other = self._check_allowed_dtypes(other, "real numeric", "__ifloordiv__")
+        if other is NotImplemented:
+            return other
+        self._array.__ifloordiv__(other._array)
+        return self
+
+    def __rfloordiv__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __rfloordiv__.
+        """
+        other = self._check_allowed_dtypes(other, "real numeric", "__rfloordiv__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__rfloordiv__(other._array)
+        return self.__class__._new(res)
+
+    def __ilshift__(self: Array, other: int | Array, /) -> Array:
+        """
+        Performs the operation __ilshift__.
+        """
+        other = self._check_allowed_dtypes(other, "integer", "__ilshift__")
+        if other is NotImplemented:
+            return other
+        self._array.__ilshift__(other._array)
+        return self
+
+    def __rlshift__(self: Array, other: int | Array, /) -> Array:
+        """
+        Performs the operation __rlshift__.
+        """
+        other = self._check_allowed_dtypes(other, "integer", "__rlshift__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__rlshift__(other._array)
+        return self.__class__._new(res)
+
+    def __imatmul__(self: Array, other: Array, /) -> Array:
+        """
+        Performs the operation __imatmul__.
+        """
+        # matmul is not defined for scalars, but without this, we may get
+        # the wrong error message from asarray.
+        other = self._check_allowed_dtypes(other, "numeric", "__imatmul__")
+        if other is NotImplemented:
+            return other
+        res = self._array.__imatmul__(other._array)
+        return self.__class__._new(res)
+
+    def __rmatmul__(self: Array, other: Array, /) -> Array:
+        """
+        Performs the operation __rmatmul__.
+        """
+        # matmul is not defined for scalars, but without this, we may get
+        # the wrong error message from asarray.
+        other = self._check_allowed_dtypes(other, "numeric", "__rmatmul__")
+        if other is NotImplemented:
+            return other
+        res = self._array.__rmatmul__(other._array)
+        return self.__class__._new(res)
+
+    def __imod__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __imod__.
+        """
+        other = self._check_allowed_dtypes(other, "real numeric", "__imod__")
+        if other is NotImplemented:
+            return other
+        self._array.__imod__(other._array)
+        return self
+
+    def __rmod__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __rmod__.
+        """
+        other = self._check_allowed_dtypes(other, "real numeric", "__rmod__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__rmod__(other._array)
+        return self.__class__._new(res)
+
+    def __imul__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __imul__.
+        """
+        other = self._check_allowed_dtypes(other, "numeric", "__imul__")
+        if other is NotImplemented:
+            return other
+        self._array.__imul__(other._array)
+        return self
+
+    def __rmul__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __rmul__.
+        """
+        other = self._check_allowed_dtypes(other, "numeric", "__rmul__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__rmul__(other._array)
+        return self.__class__._new(res)
+
+    def __ior__(self: Array, other: int | bool | Array, /) -> Array:
+        """
+        Performs the operation __ior__.
+        """
+        other = self._check_allowed_dtypes(other, "integer or boolean", "__ior__")
+        if other is NotImplemented:
+            return other
+        self._array.__ior__(other._array)
+        return self
+
+    def __ror__(self: Array, other: int | bool | Array, /) -> Array:
+        """
+        Performs the operation __ror__.
+        """
+        other = self._check_allowed_dtypes(other, "integer or boolean", "__ror__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__ror__(other._array)
+        return self.__class__._new(res)
+
+    def __ipow__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __ipow__.
+        """
+        other = self._check_allowed_dtypes(other, "numeric", "__ipow__")
+        if other is NotImplemented:
+            return other
+        self._array.__ipow__(other._array)
+        return self
+
+    def __rpow__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __rpow__.
+        """
+        other = self._check_allowed_dtypes(other, "numeric", "__rpow__")
+        if other is NotImplemented:
+            return other
+        self._array.__rpow__(other._array)
+        return self
+
+    def __irshift__(self: Array, other: int | Array, /) -> Array:
+        """
+        Performs the operation __irshift__.
+        """
+        other = self._check_allowed_dtypes(other, "integer", "__irshift__")
+        if other is NotImplemented:
+            return other
+        self._array.__irshift__(other._array)
+        return self
+
+    def __rrshift__(self: Array, other: int | Array, /) -> Array:
+        """
+        Performs the operation __rrshift__.
+        """
+        other = self._check_allowed_dtypes(other, "integer", "__rrshift__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__rrshift__(other._array)
+        return self.__class__._new(res)
+
+    def __isub__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __isub__.
+        """
+        other = self._check_allowed_dtypes(other, "numeric", "__isub__")
+        if other is NotImplemented:
+            return other
+        self._array.__isub__(other._array)
+        return self
+
+    def __rsub__(self: Array, other: int | float | Array, /) -> Array:
+        """
+        Performs the operation __rsub__.
+        """
+        other = self._check_allowed_dtypes(other, "numeric", "__rsub__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__rsub__(other._array)
+        return self.__class__._new(res)
+
+    def __itruediv__(self: Array, other: float | Array, /) -> Array:
+        """
+        Performs the operation __itruediv__.
+        """
+        other = self._check_allowed_dtypes(other, "floating-point", "__itruediv__")
+        if other is NotImplemented:
+            return other
+        self._array.__itruediv__(other._array)
+        return self
+
+    def __rtruediv__(self: Array, other: float | Array, /) -> Array:
+        """
+        Performs the operation __rtruediv__.
+        """
+        other = self._check_allowed_dtypes(other, "floating-point", "__rtruediv__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__rtruediv__(other._array)
+        return self.__class__._new(res)
+
+    def __ixor__(self: Array, other: int | bool | Array, /) -> Array:
+        """
+        Performs the operation __ixor__.
+        """
+        other = self._check_allowed_dtypes(other, "integer or boolean", "__ixor__")
+        if other is NotImplemented:
+            return other
+        self._array.__ixor__(other._array)
+        return self
+
+    def __rxor__(self: Array, other: int | bool | Array, /) -> Array:
+        """
+        Performs the operation __rxor__.
+        """
+        other = self._check_allowed_dtypes(other, "integer or boolean", "__rxor__")
+        if other is NotImplemented:
+            return other
+        self, other = self._normalize_two_args(self, other)
+        res = self._array.__rxor__(other._array)
+        return self.__class__._new(res)
+
+    def to_device(self: Array, device: Device, /, stream: None = None) -> Array:
+        # TODO
+        # Pseudocode:
+        # af_malloc_host(...)
+        # af_write_array (void* allocated on host)
+        # af_create_array(host_pointer, new_device)
+        # af_free_host(host_pointer)
+
+        if stream is not None:
+            raise ValueError("The stream argument to to_device() is not supported")
+
+        return NotImplemented
+
+    @property
+    def dtype(self) -> af.Dtype:
+        """
+        Array API compatible wrapper for :py:meth:`np.ndarray.dtype <numpy.ndarray.dtype>`.
+
+        See its docstring for more information.
+        """
+        return self._array.dtype
+
+    @property
+    def device(self) -> Device:
+        # TODO
+        # Pseudocode:
+        # return Device(af.get_backend.get_arrays_active_backend(self), af.get_arrays_device(self))
+        return NotImplemented
+
+    @property
+    def mT(self) -> Array:
+        # TODO
+        return NotImplemented
+
+    @property
+    def ndim(self) -> int:
+        """
+        Array API compatible wrapper for :py:meth:`np.ndarray.ndim <numpy.ndarray.ndim>`.
+
+        See its docstring for more information.
+        """
+        return self._array.ndim
+
+    @property
+    def shape(self) -> tuple[int, ...]:
+        """
+        Array API compatible wrapper for :py:meth:`np.ndarray.shape <numpy.ndarray.shape>`.
+
+        See its docstring for more information.
+        """
+        return self._array.shape
+
+    @property
+    def size(self) -> int:
+        """
+        Array API compatible wrapper for :py:meth:`np.ndarray.size <numpy.ndarray.size>`.
+
+        See its docstring for more information.
+        """
+        return self._array.size
+
+    @property
+    def T(self) -> Array:
+        """
+        Array API compatible wrapper for :py:meth:`np.ndarray.T <numpy.ndarray.T>`.
+
+        See its docstring for more information.
+        """
+        # NOTE: T only works on 2-dimensional arrays. See the corresponding
+        # note in the specification:
+        # https://data-apis.org/array-api/latest/API_specification/array_object.html#t
+        if self.ndim != 2:
+            raise ValueError(
+                "x.T requires x to have 2 dimensions. "
+                "Use x.mT to transpose stacks of matrices and permute_dims() to permute dimensions."
+            )
+        return self.__class__._new(self._array.T)
diff --git a/arrayfire/array_api/_constants.py b/arrayfire/array_api/_constants.py
new file mode 100644
index 0000000..a578d13
--- /dev/null
+++ b/arrayfire/array_api/_constants.py
@@ -0,0 +1,107 @@
+"""
+This file defines the types for type annotations.
+
+These names aren't part of the module namespace, but they are used in the
+annotations in the function signatures. The functions in the module are only
+valid for inputs that match the given type annotations.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import arrayfire as af
+
+__all__ = [
+    "Device",
+    "SupportsDLPack",
+    "SupportsBufferProtocol",
+    "PyCapsule",
+]
+
+from typing import Any, Iterator, Protocol, TypeVar
+
+_T_co = TypeVar("_T_co", covariant=True)
+
+
+class NestedSequence(Protocol[_T_co]):
+    def __getitem__(self, key: int, /) -> _T_co | NestedSequence[_T_co]: ...
+
+    def __len__(self, /) -> int: ...
+
+    def __iter__(self, /) -> Iterator[_T_co | NestedSequence[_T_co]]: ...
+
+
+@dataclass
+class Device:
+    backend_type: af.BackendType
+    device_id: int = 0
+
+    @classmethod
+    def use_default(cls) -> Device:
+        _backend = af.get_backend()
+        return cls(_backend.backend_type, af.get_device())
+
+    def __post_init__(self) -> None:
+        if not isinstance(self.backend_type, af.BackendType):
+            raise ValueError("Bad backend type. Only support ones from af.BackendType.")
+
+        if self.device_id < 0:
+            raise ValueError("Device ID can not be lesser than 0")
+
+        if self.device_id > af.get_device_count() - 1:
+            raise ValueError("Device ID can not be higher than count of available devices.")
+
+        if self.backend_type == af.BackendType.unified:
+            raise ValueError(f"Uncompatible backend type '{self.backend_type.name}' with Array API.")
+
+        if self.backend_type == af.BackendType.cpu and self.device_id != 0:
+            raise ValueError(f"Device ID can not be greater than '{self.device_id}' with cpu backend.")
+
+
+# TODO
+# @dataclass
+# class Device:
+#     backend_type: BackendType
+#     device_id: int
+
+#     # TODO
+#     def __post_init__(self) -> None:
+#         # TODO
+#         # Double check all unified mentions here and in wrapper and remove them completely
+#         if self.backend_type == BackendType.unified:
+#             raise ValueError("Unsupported backend type for Array API.")
+
+#         if self.backend_type == BackendType.cpu and self.device_id != 0:
+#             raise ValueError("Device ID cant not be greater than 0 for CPU.")
+
+# Example 1:
+# Device(BackendType.cuda, 1)
+
+# Example 2:
+# gpu_1 = Device("oneapi", 0)
+# gpu_2 = Device("cuda", 1)
+# cpu = Device("cpu", 0)
+
+# aa = empty((2,3), dtype=float32, device=(Device.cuda, 1))
+# bb = empty((2,3), dtype=float32, device=Device.CPU)
+# aa + bb -> Error: bad devices
+
+
+# class Device(enum.Enum):
+#     CPU = enum.auto()
+#     GPU = enum.auto()
+
+#     def __repr__(self) -> str:
+#         return str(self.value)
+
+#     def __iter__(self) -> Iterator[Device]:
+#         yield self
+
+
+SupportsBufferProtocol = Any
+PyCapsule = Any
+
+
+class SupportsDLPack(Protocol):
+    def __dlpack__(self, /, *, stream: None = ...) -> PyCapsule: ...
diff --git a/arrayfire/array_api/_creation_function.py b/arrayfire/array_api/_creation_function.py
new file mode 100644
index 0000000..df7cbbe
--- /dev/null
+++ b/arrayfire/array_api/_creation_function.py
@@ -0,0 +1,1073 @@
+from __future__ import annotations
+
+from functools import wraps
+from typing import TYPE_CHECKING, Any, Callable
+
+import arrayfire as af
+
+from ._array_object import Array
+from ._constants import Device
+from ._dtypes import all_dtypes, float32, int32
+
+if TYPE_CHECKING:
+    from ._constants import NestedSequence, SupportsBufferProtocol
+
+
+def _check_valid_dtype(dtype: af.Dtype | None) -> None:
+    if dtype not in (None,) + all_dtypes:
+        raise ValueError("dtype must be one of the supported dtypes")
+
+
+def _flatten_sequence(sequence: NestedSequence[bool | int | float]) -> NestedSequence[bool | int | float]:
+    """Recursively flatten a nested list."""
+    if isinstance(sequence[0], list | tuple):
+        return [item for sublist in sequence for item in _flatten_sequence(sublist)]  # type: ignore[arg-type]  # FIXME
+    else:
+        return list(sequence)
+
+
+def _determine_shape(sequence: NestedSequence[bool | int | float]) -> tuple[int, ...]:
+    """Recursively determine the shape of the nested sequence."""
+    if isinstance(sequence, list | tuple) and isinstance(sequence[0], list | tuple):
+        return (len(sequence),) + _determine_shape(sequence[0])
+    else:
+        return (len(sequence),)
+
+
+def _process_nested_sequence(sequence: NestedSequence[bool | int | float]) -> af.Array:
+    """Process a nested sequence to create an ArrayFire array of appropriate dimensions."""
+    shape = _determine_shape(sequence)
+    flat_sequence = _flatten_sequence(sequence)
+    # TODO
+    # validate the default type
+    return af.Array(flat_sequence, shape=shape, dtype=af.float32)  # type: ignore[arg-type]  # FIXME
+
+
+def manage_device(func: Callable) -> Callable:
+    @wraps(func)
+    def wrapper(*args: Any, **kwargs: Any) -> Any:
+        _default_device = Device.use_default()
+
+        device = kwargs.get("device", _default_device)
+
+        # HACK
+        if func.__name__.endswith("_like") and device is None:
+            other = args[0]
+            device = other.device
+
+        if device is None:
+            device = _default_device
+
+        # Set the backend and device
+        af.set_backend(device.backend_type)
+        af.set_device(device.device_id)
+
+        try:
+            result = func(*args, **kwargs)
+        finally:
+            # Restore to the default backend and device settings
+            af.set_backend(_default_device.backend_type)
+            af.set_device(_default_device.device_id)
+
+        return result
+
+    return wrapper
+
+
+@manage_device
+def asarray(
+    obj: Array | bool | int | float | complex | NestedSequence[bool | int | float] | SupportsBufferProtocol,
+    /,
+    *,
+    dtype: af.Dtype | None = None,
+    device: Device | None = None,
+    copy: bool | None = None,
+) -> Array:
+    """
+    Converts the input to an array.
+
+    This function creates an array from an object, or wraps an existing array. If `obj` is already an array of the
+    desired data type and `copy` is False, no copy will be performed, and `obj` itself is returned. Otherwise, a new
+    array is created, possibly copying the data from `obj`, depending on the `copy` parameter.
+
+    Parameters
+    ----------
+    obj : Array | bool | int | float | complex | NestedSequence | SupportsBufferProtocol
+        The input object to convert to an array. This can be a scalar, nested sequence (like lists of lists), or any
+        object exposing the buffer interface, and of course, an existing array.
+    dtype : af.Dtype | None, optional
+        The desired data type for the array. If `None`, the data type is inferred from the input object. Explicitly
+        specifying the data type allows the creation of an array with the intended data type.
+    device : Device | None, optional
+        The device on which to create the array. If `None`, the array is created on the default device. This parameter
+        can be used to specify the computational device (CPU, GPU, etc.) for the array's storage.
+    copy : bool | None, optional
+        If True, a new array is always created. If False, a new array is only created if necessary (i.e., if `obj` is
+        not already an array of the specified data type). If `None`, the behavior defaults to True (i.e., always copy).
+
+    Returns
+    -------
+    Array
+        An array representation of `obj`. If `obj` is already an array, it may be returned directly based on the `copy`
+        parameter and the data type match.
+
+    Examples
+    --------
+    >>> asarray([1, 2, 3])
+    Array([1, 2, 3])
+
+    >>> asarray([1, 2, 3], dtype=float32)
+    Array([1.0, 2.0, 3.0])
+
+    >>> a = array([1, 2, 3], device='cpu')
+    >>> asarray(a, device='gpu')
+    # A new array on GPU, containing [1, 2, 3]
+
+    >>> asarray(1.0, dtype=int32)
+    Array(1)
+
+    Notes
+    -----
+    - The `asarray` function is a convenient way to create arrays or convert other objects to arrays with specified
+      data type and storage device criteria. It is particularly useful for ensuring that numerical data is in the form
+      of an array for computational tasks.
+    - The `copy` parameter offers control over whether data is duplicated when creating the array, which can be
+      important for managing memory use and computational efficiency.
+    """
+    _check_valid_dtype(dtype)
+
+    if dtype is None:
+        dtype = float32
+
+    if isinstance(obj, bool | int | float | complex):
+        afarray = af.constant(obj, dtype=dtype)
+    elif isinstance(obj, Array):
+        afarray = Array._array if not copy else af.copy_array(Array._array)
+    elif isinstance(obj, list | tuple):
+        afarray = _process_nested_sequence(obj)
+    else:
+        # HACK
+        afarray = af.Array(obj, dtype=dtype)  # type: ignore[arg-type]
+
+    return Array._new(afarray)
+
+
+def arange(
+    start: int | float,
+    /,
+    stop: int | float | None = None,
+    step: int | float = 1,
+    *,
+    dtype: af.Dtype | None = None,
+    device: Device | None = None,
+) -> Array:
+    # TODO
+    return NotImplemented
+
+
+@manage_device
+def empty(
+    shape: int | tuple[int, ...],
+    *,
+    dtype: af.Dtype | None = None,
+    device: Device | None = None,
+) -> Array:
+    """
+    Returns an uninitialized array of a specified shape. The contents of the array are uninitialized,
+    meaning they will contain arbitrary data. This function is often used for efficiency in scenarios
+    where the entire array will be populated with data immediately after its creation, eliminating the
+    need for initial zeroing of memory.
+
+    Parameters
+    ----------
+    shape : int or tuple of ints
+        The desired shape of the output array. An integer creates a one-dimensional array, while a tuple
+        creates an n-dimensional array with dimensions specified by the tuple.
+    dtype : af.Dtype | None, optional
+        The desired data type for the array. If not specified, the array's data type will be the default
+        floating-point data type determined by the array library. Specifying a data type is useful when
+        creating arrays intended for specific types of data.
+    device : Device | None, optional
+        The device on which to create the array. If not specified, the array will be created on the default
+        device. This parameter can be used to specify the computational device (e.g., CPU or GPU) for the
+        array's storage.
+
+    Returns
+    -------
+    Array
+        An array with the specified shape and data type, containing uninitialized data.
+
+    Examples
+    --------
+    >>> empty(3)
+    Array([..., ..., ...])  # Uninitialized data
+
+    >>> empty((2, 2), dtype=float64)
+    Array([[..., ...],
+           [..., ...]])  # Uninitialized data of float64 type
+
+    >>> empty((3, 4), device='gpu')
+    Array([[..., ..., ..., ...],
+           [..., ..., ..., ...],
+           [..., ..., ..., ...]])  # Uninitialized data on GPU
+
+    Notes
+    -----
+    - The contents of the returned array are uninitialized and accessing them without setting their values first
+      can result in undefined behavior.
+    - This function provides an efficient way to allocate memory for large arrays when it is known that all
+      elements will be explicitly assigned before any computation is performed on the array.
+
+    The use of `dtype` and `device` allows for control over the type and location of the allocated memory,
+    optimizing performance and compatibility with specific hardware or computational tasks.
+    """
+    _check_valid_dtype(dtype)
+
+    if isinstance(shape, int):
+        shape = (shape,)
+
+    if dtype is None:
+        dtype = float32
+
+    array = af.Array(None, dtype=dtype, shape=shape)
+
+    return Array._new(array)
+
+
+@manage_device
+def empty_like(x: Array, /, *, dtype: af.Dtype | None = None, device: Device | None = None) -> Array:
+    """
+    Returns an uninitialized array with the same shape and, optionally, the same data type as the input array `x`.
+    The contents of the new array are uninitialized, meaning they may contain arbitrary data (including potentially
+    sensitive data left over from other processes). This function is typically used for efficiency in scenarios where
+    the user intends to populate the array with data immediately after its creation.
+
+    Parameters
+    ----------
+    x : Array
+        The input array from which to derive the shape of the output array. The data type of the output array is also
+        inferred from `x` unless `dtype` is explicitly specified.
+    dtype : af.Dtype | None, optional
+        The desired data type for the new array. If `None` (the default), the data type of the input array `x` is used.
+        This parameter allows the user to specify a different data type for the new array.
+    device : Device | None, optional
+        The device on which to create the new array. If `None`, the new array is created on the same device as the
+        input array `x`. This allows for the creation of arrays on specific devices (e.g., on a GPU).
+
+    Returns
+    -------
+    Array
+        An array having the same shape as `x`, with uninitialized data. The data type of the array is determined by the
+        `dtype` parameter if specified, otherwise by the data type of `x`.
+
+    Examples
+    --------
+    >>> x = array([1, 2, 3])
+    >>> y = empty_like(x)
+    # y is an uninitialized array with the same shape and data type as x
+
+    >>> x = array([[1.0, 2.0], [3.0, 4.0]])
+    >>> y = empty_like(x, dtype=int32)
+    # y is an uninitialized array with the same shape as x but with an int32 data type
+
+    >>> x = array([1, 2, 3], device='cpu')
+    >>> y = empty_like(x, device='gpu')
+    # y is an uninitialized array with the same shape as x, created on a GPU
+
+    Notes
+    -----
+    - The contents of the returned array are uninitialized. Accessing the data without first initializing it
+      may result in unpredictable behavior.
+    - The `dtype` and `device` parameters offer flexibility in the creation of the new array, allowing the user
+      to specify the data type and the computational device for the array.
+    """
+    _check_valid_dtype(dtype)
+
+    if dtype is None:
+        dtype = x.dtype
+
+    return empty(x.shape, dtype=dtype, device=device)  # type: ignore[no-any-return]  # FIXME
+
+
+@manage_device
+def eye(
+    n_rows: int,
+    n_cols: int | None = None,
+    /,
+    *,
+    k: int = 0,
+    dtype: af.Dtype | None = None,
+    device: Device | None = None,
+) -> Array:
+    """
+    Returns a two-dimensional array with ones on the kth diagonal and zeros elsewhere.
+
+    This function is useful for creating identity matrices or variations thereof.
+
+    Parameters
+    ----------
+    n_rows : int
+        The number of rows in the output array.
+    n_cols : int, optional
+        The number of columns in the output array. If None (the default), the output array will be square,
+        with the number of columns equal to `n_rows`.
+    k : int, optional, default: 0
+        The index of the diagonal to be filled with ones. `k=0` refers to the main diagonal. A positive `k`
+        refers to an upper diagonal, and a negative `k` to a lower diagonal.
+    dtype : af.Dtype | None, optional
+        The desired data type of the output array. If None, the default floating-point data type is used.
+        Specifying a data type allows for the creation of identity matrices with elements of that type.
+    device : Device | None, optional
+        The device on which to create the array. If None, the array is created on the default device. This
+        can be useful for ensuring the array is created in the appropriate memory space (e.g., on a GPU).
+
+    Returns
+    -------
+    Array
+        A 2D array with ones on the specified diagonal and zeros elsewhere. The shape of the array is
+        (n_rows, n_cols), with `n_cols` defaulting to `n_rows` if not specified.
+
+    Examples
+    --------
+    >>> eye(3)
+    Array([[1, 0, 0],
+           [0, 1, 0],
+           [0, 0, 1]])
+
+    >>> eye(3, 4, k=1)
+    Array([[0, 1, 0, 0],
+           [0, 0, 1, 0],
+           [0, 0, 0, 1]])
+
+    >>> eye(4, 3, k=-1, dtype=int32)
+    Array([[0, 0, 0],
+           [1, 0, 0],
+           [0, 1, 0],
+           [0, 0, 1]])
+
+    >>> eye(3, device='gpu')
+    Array([[1, 0, 0],
+           [0, 1, 0],
+           [0, 0, 1]])
+
+    Notes
+    -----
+    - The `dtype` and `device` parameters allow for detailed control over the type and location of the
+      resulting array, which can be important for performance and compatibility with other arrays or
+      operations in a program.
+    """
+    _check_valid_dtype(dtype)
+
+    if n_cols is None:
+        n_cols = n_rows
+
+    if dtype is None:
+        dtype = float32
+
+    if n_rows <= abs(k):
+        return Array._new(af.constant(0, (n_rows, n_cols), dtype))
+
+    # Create an identity matrix as the base
+    array = af.identity((n_rows, n_cols), dtype=dtype)
+
+    if k == 0:
+        # No shift needed, directly return the identity matrix
+        return Array._new(array)
+
+    # Prepare a zeros array for padding
+    zeros_padding_vertical = af.constant(0, (abs(k), n_cols), dtype=dtype)
+    zeros_padding_horizontal = af.constant(0, (n_rows, abs(k)), dtype=dtype)
+
+    if k > 0:
+        # Shift the diagonal upwards by removing the last k columns and padding with zeros on the left
+        shifted_array = af.join(1, zeros_padding_horizontal, array[:, :-k])
+    else:
+        # Shift the diagonal downwards by removing the last k rows and padding with zeros on top
+        shifted_array = af.join(0, zeros_padding_vertical, array[: -abs(k), :])
+
+    return Array._new(shifted_array)
+
+
+@manage_device
+def full(
+    shape: int | tuple[int, ...],
+    fill_value: int | float,
+    *,
+    dtype: af.Dtype | None = None,
+    device: Device | None = None,
+) -> Array:
+    """
+    Returns a new array of a specified shape, filled with `fill_value`.
+
+    Parameters
+    ----------
+    shape : int or tuple of ints
+        The shape of the new array. If an integer is provided, the result will be a 1-D array of that length.
+        A tuple can be used to specify the shape of a multi-dimensional array.
+    fill_value : int or float
+        The value used to fill the array. Can be an integer or floating-point number.
+    dtype : af.Dtype | None, optional
+        The desired data type for the new array. If not specified, the data type is inferred from the `fill_value`.
+        If `fill_value` is an integer, the default integer data type is used. For a floating-point number, the
+        default floating-point data type is chosen. The behavior is unspecified if `fill_value`'s precision exceeds
+        the capabilities of the chosen data type.
+    device : Device | None, optional
+        The device on which to create the array. If not specified, the array is created on the default device.
+        This can be used to specify creation of the array on a particular device, such as a GPU.
+
+    Returns
+    -------
+    Array
+        An array of shape `shape`, where each element is `fill_value`.
+
+    Notes
+    -----
+    - The behavior is unspecified if the `fill_value` exceeds the precision of the chosen data type.
+    - The `dtype` argument allows for specifying the desired data type explicitly. If `dtype` is None,
+      the data type is inferred from the `fill_value`.
+
+    Examples
+    --------
+    >>> full(3, 5)
+    Array([5, 5, 5])
+
+    >>> full((2, 2), 0.5)
+    Array([[0.5, 0.5],
+           [0.5, 0.5]])
+
+    >>> full((2, 3), 1, dtype=float64)
+    Array([[1.0, 1.0, 1.0],
+           [1.0, 1.0, 1.0]])
+
+    >>> full(4, True, dtype=bool)
+    Array([True, True, True, True])
+
+    This function is useful for creating arrays with a constant value throughout. The `device` parameter
+    allows for control over where the computation is performed, which can be particularly important for
+    performance in computational environments with multiple devices.
+    """
+    _check_valid_dtype(dtype)
+
+    if isinstance(shape, int):
+        shape = (shape,)
+
+    # Default dtype handling based on 'fill_value' type if 'dtype' not provided
+    if dtype is None:
+        dtype = float32 if isinstance(fill_value, float) else int32
+
+    return Array._new(af.constant(fill_value, shape, dtype=dtype))
+
+
+@manage_device
+def full_like(
+    x: Array,
+    /,
+    fill_value: int | float,
+    *,
+    dtype: af.Dtype | None = None,
+    device: Device | None = None,
+) -> Array:
+    """
+    Returns a new array with the same shape as the input array `x`, filled with `fill_value`.
+
+    Parameters
+    ----------
+    x : Array
+        The input array whose shape and data type (if `dtype` is None) are used to create the new array.
+    fill_value : int | float
+        The scalar value to fill the new array with.
+    dtype : af.Dtype | None, optional
+        The desired data type for the new array. If `None`, the data type of `x` is used. This allows
+        overriding the data type of the input array while keeping its shape.
+    device : Device | None, optional
+        The device on which to place the created array. If `None`, the array is created on the same device as `x`.
+
+    Returns
+    -------
+    Array
+        A new array having the same shape as `x`, where every element is set to `fill_value`.
+
+    Notes
+    -----
+    - If `fill_value` is of a different data type than `x` and `dtype` is `None`, the data type of the new array
+      will be inferred in a way that can represent `fill_value`.
+    - If `dtype` is specified, it determines the data type of the new array, irrespective of the data type of `x`.
+
+    Examples
+    --------
+    >>> x = array([[1, 2], [3, 4]])
+    >>> full_like(x, 5)
+    Array([[5, 5],
+           [5, 5]])
+
+    >>> full_like(x, 0.5, dtype=float32)
+    Array([[0.5, 0.5],
+           [0.5, 0.5]])
+
+    >>> full_like(x, 7, device='gpu')
+    Array([[7, 7],
+           [7, 7]])
+
+    In the above examples, the `full_like` function creates new arrays with the same shape as `x`,
+    but filled with the specified `fill_value`, and optionally with the specified data type and device.
+    """
+    _check_valid_dtype(dtype)
+
+    if dtype is None:
+        dtype = x.dtype
+
+    return full(x.shape, fill_value, dtype=dtype, device=device)  # type: ignore[no-any-return] # FIXME
+
+
+@manage_device
+def linspace(
+    start: int | float,
+    stop: int | float,
+    /,
+    num: int,
+    *,
+    dtype: af.Dtype | None = None,
+    device: Device | None = None,
+    endpoint: bool = True,
+) -> Array:
+    """
+    Returns `num` evenly spaced samples, calculated over the interval [`start`, `stop`].
+
+    The endpoint of the interval can optionally be excluded.
+
+    Parameters
+    ----------
+    start : int | float
+        The starting value of the sequence.
+    stop : int | float
+        The end value of the sequence, unless `endpoint` is set to False. In that case, the sequence
+        consists of all but the last of `num + 1` evenly spaced samples, so that `stop` is excluded.
+        Note that the step size changes when `endpoint` is False.
+    num : int
+        Number of samples to generate. Must be non-negative.
+    dtype : af.Dtype | None, optional, default: None
+        The data type of the output array. If `None`, the data type is inferred from `start` and `stop`.
+        The inferred data type will never be an integer data type because `linspace` always generates
+        floating point values.
+    device : Device | None, optional, default: None
+        The device on which to place the created array. If `None`, the device is inferred from the current
+        device context.
+    endpoint : bool, optional, default: True
+        If True, `stop` is the last sample. Otherwise, it is not included.
+
+    Returns
+    -------
+    Array
+        An array of `num` equally spaced samples in the closed interval [`start`, `stop`] or the
+        half-open interval [`start`, `stop`) (depending on whether `endpoint` is True or False).
+
+    See Also
+    --------
+    arange : Similar to `linspace`, but uses a step size (instead of the number of samples).
+
+    Notes
+    -----
+    - The output array is always a floating point array, even if `start`, `stop`, and `dtype` are all integers.
+    - If `num` is 1, the output array only contains the `start` value.
+
+    Examples
+    --------
+    >>> linspace(2.0, 3.0, num=5)
+    Array([2. , 2.25, 2.5 , 2.75, 3. ])
+
+    >>> linspace(2.0, 3.0, num=5, endpoint=False)
+    Array([2. , 2.2, 2.4, 2.6, 2.8])
+    """
+    # BUG, FIXME
+    # # Default dtype handling based on 'start' and 'stop' types if 'dtype' not provided
+    # if dtype is None:
+    #     dtype = float32
+
+    # # Generate the linearly spaced array
+    # if endpoint:
+    #     step = (stop - start) / (num - 1) if num > 1 else 0
+    # else:
+    #     step = (stop - start) / num
+
+    # array = af.seq(start, stop, step, dtype=dtype)
+    # result_array = array if array.size == num else af.moddims(array, (num,))
+
+    # return Array._new(result_array)
+    return NotImplemented
+
+
+def meshgrid(*arrays: Array, indexing: str = "xy") -> list[Array]:
+    """
+    Returns coordinate matrices from coordinate vectors.
+
+    Make N-D coordinate arrays for vectorized evaluations of N-D scalar/vector fields over N-D grids,
+    given one-dimensional coordinate arrays x1, x2,..., xn.
+
+    Parameters
+    ----------
+    *arrays : Array
+        One-dimensional arrays representing the coordinates of a grid.
+    indexing : {'xy', 'ij'}, optional, default: 'xy'
+        Cartesian ('xy', default) or matrix ('ij') indexing of output.
+        In Cartesian indexing, the first dimension corresponds to the x-coordinate, and the second to the y-coordinate.
+        In matrix indexing, the first dimension corresponds to the row index, and the second to the column index.
+
+    Returns
+    -------
+    list[Array]
+        List of N arrays, where `N` is the number of provided one-dimensional input arrays.
+        Each returned array must have the same shape. If `indexing` is 'xy', the last dimension
+        of the arrays corresponds to changes in x1, and the second-to-last corresponds to changes in x2,
+        and so forth. If `indexing` is 'ij', then the first dimension of the arrays corresponds to changes
+        in x1, the second dimension to changes in x2, and so forth.
+
+    See Also
+    --------
+    arange, linspace
+
+    Notes
+    -----
+    This function supports both indexing conventions through the `indexing` keyword argument.
+    Giving the string 'xy' returns the grid with Cartesian indexing, while 'ij' returns the grid
+    with matrix indexing. The difference is that 'xy' indexing returns arrays where the last
+    dimension represents points that vary along what would conventionally be considered the x-axis,
+    and 'ij' indexing returns arrays where the first dimension represents points that vary along
+    what would conventionally be considered the row index of a matrix or 2D array.
+
+    Examples
+    --------
+    >>> x = array([1, 2, 3])
+    >>> y = array([4, 5, 6, 7])
+    >>> xv, yv = meshgrid(x, y)
+    >>> xv
+    Array([[1, 2, 3],
+           [1, 2, 3],
+           [1, 2, 3],
+           [1, 2, 3]])
+    >>> yv
+    Array([[4, 4, 4],
+           [5, 5, 5],
+           [6, 6, 6],
+           [7, 7, 7]])
+
+    With `indexing='ij'`, the shape is transposed.
+    >>> xv, yv = meshgrid(x, y, indexing='ij')
+    >>> xv
+    Array([[1, 1, 1, 1],
+           [2, 2, 2, 2],
+           [3, 3, 3, 3]])
+    >>> yv
+    Array([[4, 5, 6, 7],
+           [4, 5, 6, 7],
+           [4, 5, 6, 7]])
+    """
+    # BUG, FIXME
+    # arrays_af = [arr._array for arr in arrays]  # Convert custom Array to ArrayFire array for processing
+    # dims = [arr.size for arr in arrays_af]  # Get the number of elements in each array
+
+    # result = []
+    # for i, arr in enumerate(arrays_af):
+    #     import ipdb; ipdb.set_trace()
+    #     shape = [1] * len(arrays)
+    #     shape[i] = dims[i]
+    #     tiled_arr = af.tile(arr, tuple(shape))
+
+    #     # Expand each array to have the correct shape
+    #     for j, dim in enumerate(dims):
+    #         if j != i:
+    #             import ipdb; ipdb.set_trace()
+    #             tiled_arr = af.moddims(tiled_arr, tuple(shape))
+    #             shape[j] = dim
+    #         else:
+    #             shape[j] = 1
+
+    #     if indexing == "xy" and len(arrays) > 1 and i == 0:
+    #         # Swap the first two dimensions for the x array in 'xy' indexing
+    #         tiled_arr = af.moddims(tiled_arr, (dims[1], dims[0], *dims[2:]))
+    #     elif indexing == "xy" and len(arrays) > 1 and i == 1:
+    #         # Ensure the y array is correctly shaped in 'xy' indexing
+    #         tiled_arr = af.reorder(tiled_arr, shape=(1, 0, *range(2, len(dims))))
+
+    #     result.append(Array._new(tiled_arr))
+
+    # if indexing == "ij":
+    #     # No need to modify the order for 'ij' indexing
+    #     pass
+
+    # return result
+    return NotImplemented
+
+
+@manage_device
+def ones(
+    shape: int | tuple[int, ...],
+    *,
+    dtype: af.Dtype | None = None,
+    device: Device | None = None,
+) -> Array:
+    """
+    Returns a new array of given shape and type, filled with ones.
+
+    Parameters
+    ----------
+    shape : int or tuple of ints
+        The shape of the new array, e.g., (2, 3) or 2. If the shape is an integer, the output will be
+        a one-dimensional array of that length. If the shape is a tuple, the output will have the
+        specified shape.
+    dtype : af.Dtype | None, optional
+        The desired data type for the array, e.g., float32. If dtype is None, the default data type
+        (float64) is used. Note: The availability of data types can vary by implementation.
+    device : Device | None, optional
+        The device on which to place the created array. If None, the array is created on the default
+        device. This can be used to create arrays on, for example, a GPU.
+
+    Returns
+    -------
+    Array
+        An array of shape `shape` and data type `dtype`, where all elements are ones.
+
+    Notes
+    -----
+    - While the `dtype` parameter is optional, specifying it can be important for ensuring that
+      the array has the desired type, especially for operations that are sensitive to the data type.
+    - The `device` parameter allows control over where the array is stored, which can be important
+      for computational efficiency, especially in environments with multiple devices like CPUs and GPUs.
+
+    Examples
+    --------
+    >>> ones(5)
+    Array([1, 1, 1, 1, 1])
+
+    >>> ones((2, 3), dtype=float32)
+    Array([[1.0, 1.0, 1.0],
+           [1.0, 1.0, 1.0]])
+
+    >>> ones((2, 2), device='gpu')
+    Array([[1, 1],
+           [1, 1]])
+
+    The behavior of the `dtype` and `device` parameters may vary depending on the implementation
+    and the available hardware.
+    """
+    _check_valid_dtype(dtype)
+
+    if isinstance(shape, int):
+        shape = (shape,)
+
+    if dtype is None:
+        dtype = float32
+
+    return Array._new(af.constant(1, shape, dtype))
+
+
+@manage_device
+def ones_like(x: Array, /, *, dtype: af.Dtype | None = None, device: Device | None = None) -> Array:
+    """
+    Returns a new array with the same shape and type as a given array, filled with ones.
+
+    Parameters
+    ----------
+    x : Array
+        The shape and data type of `x` are used to create the new array. The contents of `x` are not used.
+    dtype : af.Dtype | None, optional
+        The desired data type for the new array. If None, the data type of `x` is used. This allows overriding
+        the data type of the input array while keeping its shape.
+    device : Device | None, optional
+        The device on which to place the created array. If None, the array is created on the same device as `x`.
+        This parameter allows the new array to be placed on a specified device, which can be different from the
+        device of `x`.
+
+    Returns
+    -------
+    Array
+        An array of the same shape as `x`, with all elements set to one (1). The data type of the array is determined
+        by the `dtype` parameter if specified, otherwise by the data type of `x`.
+
+    Notes
+    -----
+    - The `ones_like` function is useful for creating arrays that are initialized to 1 and have the same shape
+      and data type (or a specified data type) as another array.
+    - Specifying the `dtype` and `device` parameters can be useful for creating an array with a specific data
+      type or for placing the array on a specific computational device, which can be important for performance
+      and memory usage considerations.
+
+    Examples
+    --------
+    >>> x = array([[0, 1], [2, 3]])
+    >>> ones_like(x)
+    Array([[1, 1],
+           [1, 1]])
+
+    >>> ones_like(x, dtype=float32)
+    Array([[1.0, 1.0],
+           [1.0, 1.0]])
+
+    >>> ones_like(x, device='gpu')
+    Array([[1, 1],
+           [1, 1]])
+
+    The output reflects the shape and, optionally, the specified data type or device of the input array.
+    """
+    _check_valid_dtype(dtype)
+
+    if dtype is None:
+        dtype = x.dtype
+
+    return ones(x.shape, dtype=dtype, device=device)  # type: ignore[no-any-return]  # FIXME
+
+
+def tril(x: Array, /, *, k: int = 0) -> Array:
+    """
+    Returns the lower triangular part of the array `x`, with elements above the kth diagonal zeroed.
+
+    The kth diagonal refers to the diagonal that runs from the top-left corner of the matrix to the bottom-right
+    corner. For k = 0, the diagonal is the main diagonal. A positive value of k includes elements above the main
+    diagonal, and a negative value excludes elements below the main diagonal.
+
+    Parameters
+    ----------
+    x : Array
+        The input array from which the lower triangular part is extracted. The array must be at least two-dimensional.
+    k : int, optional, default: 0
+        Diagonal above which to zero elements. `k=0` is the main diagonal, `k>0` is above the main diagonal, and `k<0`
+        is below the main diagonal.
+
+    Returns
+    -------
+    Array
+        An array with the same shape and data type as `x`, where elements above the kth diagonal are zeroed, and the
+        lower triangular part is retained.
+
+    Examples
+    --------
+    >>> x = array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+    >>> tril(x)
+    Array([[1, 0, 0],
+           [4, 5, 0],
+           [7, 8, 9]])
+
+    >>> tril(x, k=1)
+    Array([[1, 2, 0],
+           [4, 5, 6],
+           [7, 8, 9]])
+
+    >>> tril(x, k=-1)
+    Array([[0, 0, 0],
+           [4, 0, 0],
+           [7, 8, 0]])
+
+    Notes
+    -----
+    - The function is used to extract the lower triangular part of a matrix, which can be useful in various
+      numerical and algebraic operations.
+    - The behavior of `tril` on arrays with more than two dimensions is not specified; it primarily operates
+      on two-dimensional matrices.
+
+    The `k` parameter allows for flexibility in defining which part of the array is considered the lower
+    triangular portion, offering a straightforward way to manipulate matrices for lower-triangular matrix operations.
+    """
+    dtype = x.dtype
+
+    _check_valid_dtype(dtype)
+
+    n_rows, n_cols = x.shape
+
+    array = x._array
+    row_indices = af.tile(af.iota((1, n_rows), tile_shape=(n_cols, 1)), (1, 1))
+    col_indices = af.tile(af.iota((n_cols, 1), tile_shape=(1, n_rows)), (1, 1))
+
+    mask = row_indices <= (col_indices + k)
+
+    return Array._new(array * af.cast(mask, dtype))
+
+
+def triu(x: Array, /, *, k: int = 0) -> Array:
+    """
+    Returns the upper triangular part of the array `x`, with elements below the kth diagonal zeroed.
+
+    The kth diagonal refers to the diagonal that runs from the top-left corner of the matrix to the bottom-right
+    corner. For k = 0, the diagonal is the main diagonal. A positive value of k includes elements above and on the main
+    diagonal, and a negative value includes elements further below the main diagonal.
+
+    Parameters
+    ----------
+    x : Array
+        The input array from which the upper triangular part is extracted. The array must be at least two-dimensional.
+    k : int, optional, default: 0
+        Diagonal below which to zero elements. `k=0` zeroes elements below the main diagonal, `k>0` zeroes elements
+        further above the main diagonal, and `k<0` zeroes elements starting from diagonals below the main diagonal.
+
+    Returns
+    -------
+    Array
+        An array with the same shape and data type as `x`, where elements below the kth diagonal are zeroed, and the
+        upper triangular part is retained.
+
+    Examples
+    --------
+    >>> x = array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+    >>> triu(x)
+    Array([[1, 2, 3],
+           [0, 5, 6],
+           [0, 0, 9]])
+
+    >>> triu(x, k=1)
+    Array([[0, 2, 3],
+           [0, 0, 6],
+           [0, 0, 0]])
+
+    >>> triu(x, k=-1)
+    Array([[1, 2, 3],
+           [4, 5, 6],
+           [0, 8, 9]])
+
+    Notes
+    -----
+    - The function is used to extract the upper triangular part of a matrix, which can be useful in various
+      numerical and algebraic operations, such as solving linear equations or matrix factorization.
+    - The behavior of `triu` on arrays with more than two dimensions is not specified; it primarily operates
+      on two-dimensional matrices.
+
+    The `k` parameter allows adjusting the definition of the "upper triangular" part, enabling more flexible
+    matrix manipulations based on the specific requirements of the operation or analysis being performed.
+    """
+    dtype = x.dtype
+
+    _check_valid_dtype(dtype)
+
+    n_rows, n_cols = x.shape
+
+    array = x._array
+    row_indices = af.tile(af.iota((1, n_rows), tile_shape=(n_cols, 1)), (1, 1))
+    col_indices = af.tile(af.iota((n_cols, 1), tile_shape=(1, n_rows)), (1, 1))
+
+    mask = col_indices <= (row_indices - k)
+
+    return Array._new(array * af.cast(mask, dtype))
+
+
+@manage_device
+def zeros(
+    shape: int | tuple[int, ...],
+    *,
+    dtype: af.Dtype | None = None,
+    device: Device | None = None,
+) -> Array:
+    """
+    Returns a new array of given shape and type, filled with zeros.
+
+    This function is useful for creating arrays that serve as initial placeholders in various numerical
+    computations, providing a base for algorithms that require an initial array filled with zeros.
+
+    Parameters
+    ----------
+    shape : int or tuple of ints
+        The shape of the new array. If an integer is provided, the result is a one-dimensional array of that length.
+        A tuple specifies the dimensions of the array for more than one dimension.
+    dtype : af.Dtype | None, optional
+        The desired data type for the array. If not specified, the default data type (typically float64) is used.
+        Specifying a data type can be important for the precise control of numerical computations.
+    device : Device | None, optional
+        The device on which to create the array. If not specified, the array is created on the default device.
+        This parameter can be used to control where the computation is performed, such as on a CPU or GPU,
+        which might be important for performance reasons or when working within specific computational environments.
+
+    Returns
+    -------
+    Array
+        An array with the specified shape and data type, where all elements are zeros.
+
+    Examples
+    --------
+    >>> zeros(5)
+    Array([0, 0, 0, 0, 0])
+
+    >>> zeros((2, 3), dtype=int32)
+    Array([[0, 0, 0],
+           [0, 0, 0]])
+
+    >>> zeros((2, 2), device='gpu')
+    Array([[0, 0],
+           [0, 0]])
+    # Note: The actual device specification will depend on the array library's implementation and the available
+    hardware.
+
+    Notes
+    -----
+    - The `dtype` and `device` parameters offer the flexibility to create the zero-filled array with specific
+      characteristics tailored to the needs of different computational tasks or hardware requirements.
+
+    The use of zeros in computational algorithms is widespread, serving as initial states, placeholders, or
+    default values in various numerical and data processing operations.
+    """
+    _check_valid_dtype(dtype)
+
+    if isinstance(shape, int):
+        shape = (shape,)
+
+    if dtype is None:
+        dtype = float32
+
+    return Array._new(af.constant(0, shape, dtype))
+
+
+@manage_device
+def zeros_like(x: Array, /, *, dtype: af.Dtype | None = None, device: Device | None = None) -> Array:
+    """
+    Returns a new array with the same shape as the input array `x`, filled with zeros.
+
+    This function is commonly used to create a new array with the same dimensions as an existing array
+    but initialized to zeros, which is useful for algorithms that require a zero-filled array of the same
+    shape as some input data.
+
+    Parameters
+    ----------
+    x : Array
+        The input array from which to derive the shape and, unless `dtype` is specified, the data type
+        of the output array.
+    dtype : af.Dtype | None, optional
+        The desired data type for the new array. If `None`, the data type of `x` is used. This parameter
+        allows the user to specify a data type different from that of `x` for the output array.
+    device : Device | None, optional
+        The device on which to create the new array. If `None`, the new array is created on the same device
+        as `x`. This parameter can be used to specify where the array should be stored, particularly
+        important in environments with multiple computational devices (e.g., CPUs and GPUs).
+
+    Returns
+    -------
+    Array
+        A new array having the same shape as `x`, with all elements set to zero. The data type of the
+        array is determined by the `dtype` parameter if specified, otherwise by the data type of `x`.
+
+    Examples
+    --------
+    >>> x = array([[1, 2, 3], [4, 5, 6]])
+    >>> zeros_like(x)
+    Array([[0, 0, 0],
+           [0, 0, 0]])
+
+    >>> zeros_like(x, dtype=float32)
+    Array([[0.0, 0.0, 0.0],
+           [0.0, 0.0, 0.0]])
+
+    >>> zeros_like(x, device='gpu')
+    Array([[0, 0, 0],
+           [0, 0, 0]])
+    # Note: The actual device specification will depend on the array library's implementation and the available
+    hardware.
+
+    Notes
+    -----
+    - The function `zeros_like` simplifies the process of creating new arrays that are intended to be
+      used as initial or default states in various numerical computations, matching the size and shape
+      of existing data structures.
+    - Specifying the `dtype` and `device` parameters provides flexibility, enabling the creation of the
+      zero-filled array with specific attributes tailored to the needs of different computational tasks
+      or hardware environments.
+    """
+    _check_valid_dtype(dtype)
+
+    if dtype is None:
+        dtype = x.dtype
+
+    return zeros(x.shape, dtype=dtype, device=device)  # type: ignore[no-any-return]
diff --git a/arrayfire/array_api/_data_type_functions.py b/arrayfire/array_api/_data_type_functions.py
new file mode 100644
index 0000000..2cd038a
--- /dev/null
+++ b/arrayfire/array_api/_data_type_functions.py
@@ -0,0 +1,357 @@
+from dataclasses import dataclass
+
+import arrayfire as af
+
+from ._array_object import Array
+from ._dtypes import (
+    all_dtypes,
+    boolean_dtypes,
+    complex_floating_dtypes,
+    float16,
+    float32,
+    float64,
+    int8,
+    int16,
+    int32,
+    int64,
+    integer_dtypes,
+    numeric_dtypes,
+    promote_types,
+    real_floating_dtypes,
+    signed_integer_dtypes,
+    unsigned_integer_dtypes,
+)
+
+
+def astype(x: Array, dtype: af.Dtype, /, *, copy: bool = True) -> Array:
+    """
+    Casts an array to a specified data type, respecting or overriding type promotion rules.
+
+    This function copies the input array `x` to a new array with the specified data type `dtype`. It does not follow
+    standard type promotion rules and allows for explicit data type conversions, which might include downcasting or
+    converting between incompatible types such as complex to real numbers.
+
+    Parameters
+    ----------
+    x : Array
+        The input array to cast.
+    dtype : af.Dtype
+        The desired data type for the new array.
+    copy : bool, optional
+        Specifies whether to always return a new array or return the original array when the new data type matches
+        the original data type of `x`. Defaults to True, which means a new array is always created.
+
+    Returns
+    -------
+    Array
+        An array with the specified data type. The shape of the returned array is identical to that of the input
+        array `x`.
+
+    Notes
+    -----
+    - Casting from complex to real data types is not permitted by this function to avoid data loss and ambiguity.
+    Instead, users must explicitly decide how to handle complex numbers (e.g., taking real or imaginary parts) before
+    casting.
+    - When casting from boolean to numeric types, `True` is treated as `1` and `False` as `0`.
+    - When casting from numeric types to boolean, `0` is treated as `False` and any non-zero value as `True`.
+    - For complex to boolean conversions, `0 + 0j` is `False`, and all other complex values are considered `True`.
+    - The behavior of casting NaN and infinity values to integral types is undefined and depends on the implementation.
+
+    Examples
+    --------
+    >>> a = asarray([1, 2, 3], dtype=int32)
+    >>> astype(a, dtype=float32)
+    Array([1.0, 2.0, 3.0])
+
+    >>> c = asarray([True, False, True], dtype=bool)
+    >>> astype(c, dtype=int32)
+    Array([1, 0, 1])
+
+    >>> d = asarray([0, -1, 1], dtype=int32)
+    >>> astype(d, dtype=bool)
+    Array([False, True, True])
+
+    Raises
+    ------
+    ValueError
+        If attempting to cast a complex array directly to a real or integral data type.
+
+    Implementation Details
+    -----------------------
+    This function ensures that the data conversion respects the intended data type and copy specifications, making it
+    suitable for both memory management and type control in computational tasks.
+    """
+    if x.dtype in complex_floating_dtypes:
+        raise ValueError("Casting is not allowed from complex dtypes.")
+
+    afarray = af.copy_array(x._array) if copy else x._array
+    return Array._new(af.cast(afarray, dtype))
+
+
+def broadcast_arrays(*arrays: Array) -> list[Array]:
+    return NotImplemented
+
+
+def broadcast_to(x: Array, /, shape: tuple[int, ...]) -> Array:
+    return NotImplemented
+
+
+def can_cast(from_: af.Dtype | Array, to: af.Dtype, /) -> bool:
+    return NotImplemented
+
+
+@dataclass
+class finfo_object:
+    bits: int
+    eps: float
+    max: float
+    min: float
+    smallest_normal: float
+    dtype: af.Dtype
+
+
+@dataclass
+class iinfo_object:
+    bits: int
+    max: int
+    min: int
+    dtype: af.Dtype
+
+
+def finfo(type: af.Dtype | Array, /) -> finfo_object:
+    """
+    Returns information about the machine limits for floating-point data types.
+
+    This function provides detailed attributes of floating-point data types such as precision, minimum and maximum
+    values, and epsilon. If a complex data type is provided, it will return information about the real component of
+    the complex type, as both real and imaginary parts of complex numbers share the same precision.
+
+    Parameters
+    ----------
+    type : af.Dtype | Array
+        The floating-point data type or an array from which to derive the data type. If a complex data type is used,
+        the information pertains to its real-valued component.
+
+    Returns
+    -------
+    finfo_object
+        An object with detailed attributes of the floating-point data type:
+        - bits (int): Number of bits occupied by the real-valued data type.
+        - eps (float): Smallest representable positive number such that 1.0 + eps != 1.0.
+        - max (float): Largest representable real-valued number.
+        - min (float): Smallest representable real-valued number.
+        - smallest_normal (float): Smallest positive normal number that can be represented accurately.
+        - dtype (af.Dtype): The data type queried.
+
+    Notes
+    -----
+    - As of version 2022.12, support for complex data types was added. Information provided for complex types relates
+      to the properties of the real and imaginary components, which are identical.
+    - This function is useful for understanding the characteristics of different floating-point types, especially
+      when precision and range are critical to the application.
+
+    Examples
+    --------
+    >>> finfo(float32)
+    finfo_object(bits=32, eps=1.19209290e-7, max=3.4028234e38, min=-3.4028234e38, smallest_normal=1.1754943e-38, dtype=float32)
+
+    >>> finfo(array([1.0, 2.0], dtype=float64))
+    finfo_object(bits=64, eps=2.2204460492503131e-16, max=1.7976931348623157e308, min=-1.7976931348623157e308, smallest_normal=2.2250738585072014e-308, dtype=float64)
+
+    Raises
+    ------
+    ValueError
+        If the `type` argument is neither a recognized floating-point `af.Dtype` nor an `Array` containing a supported
+        floating-point data type.
+
+    """  # noqa
+    if isinstance(type, af.Dtype):
+        dtype = type
+    elif isinstance(type, Array):
+        dtype = Array.dtype  # type: ignore[assignment]
+    else:
+        raise ValueError("Unsupported dtype.")
+
+    if dtype == float32:
+        return finfo_object(32, 1.19209290e-7, 3.4028234e38, -3.4028234e38, 1.1754943e-38, float32)
+    if dtype == float64:
+        return finfo_object(
+            64,
+            2.2204460492503131e-16,
+            1.7976931348623157e308,
+            -1.7976931348623157e308,
+            2.2250738585072014e-308,
+            float64,
+        )
+    if dtype == float16:
+        return finfo_object(16, 0.00097656, 65504, -65504, 0.00006103515625, float16)
+
+    raise ValueError("Unsupported dtype.")
+
+
+def iinfo(type: af.Dtype | Array, /) -> iinfo_object:
+    """
+    Returns information about the machine limits for integer data types.
+
+    This function provides attributes of integer data types such as the number of bits and the range of representable
+    values (minimum and maximum). It can accept either an integer data type directly or an array from which the integer
+    data type is inferred.
+
+    Parameters
+    ----------
+    type : af.Dtype | Array
+        The integer data type or an array from which to derive the data type.
+
+    Returns
+    -------
+    iinfo_object
+        An object with attributes detailing the properties of the integer data type:
+        - bits (int): Number of bits occupied by the integer data type.
+        - max (int): Largest representable integer.
+        - min (int): Smallest representable integer.
+        - dtype (af.Dtype): The data type queried.
+
+    Notes
+    -----
+    - This function is essential for understanding the storage and range limitations of various integer data types
+      within a machine's architecture, especially useful when precision and overflow issues are a concern.
+
+    Examples
+    --------
+    >>> iinfo(int32)
+    iinfo_object(bits=32, max=2147483647, min=-2147483648, dtype=int32)
+
+    >>> iinfo(array([1, 2, 3], dtype=int64))
+    iinfo_object(bits=64, max=9223372036854775807, min=-9223372036854775808, dtype=int64)
+
+    Raises
+    ------
+    ValueError
+        If the `type` argument is neither a recognized integer `af.Dtype` nor an `Array` containing a supported
+        integer data type.
+
+    """
+    if isinstance(type, af.Dtype):
+        dtype = type
+    elif isinstance(type, Array):
+        dtype = Array.dtype  # type: ignore[assignment]
+    else:
+        raise ValueError("Unsupported dtype.")
+
+    if dtype == int32:
+        return iinfo_object(32, 2147483648, -2147483647, int32)
+    if dtype == int16:
+        return iinfo_object(16, 32767, -32768, int16)
+    if dtype == int8:
+        return iinfo_object(8, 127, -128, int8)
+    if dtype == int64:
+        return iinfo_object(64, 9223372036854775807, -9223372036854775808, int64)
+
+    raise ValueError("Unsupported dtype.")
+
+
+def isdtype(dtype: af.Dtype, kind: af.Dtype | str | tuple[af.Dtype | str, ...]) -> bool:
+    """
+    Determines if a provided dtype matches a specified data type kind.
+
+    This function checks if the input dtype matches the specified kind. It supports checking against single data type
+    identifiers (both dtype objects and string representations) as well as combinations of data types specified in a
+    tuple.
+
+    Parameters
+    ----------
+    dtype : af.Dtype
+        The input data type to check.
+    kind : af.Dtype | str | tuple[af.Dtype | str, ...]
+        The kind against which to check the dtype. This can be a single data type descriptor (dtype or string), or a
+        tuple containing multiple data type descriptors. Supported string identifiers include:
+        - 'bool': For boolean data types.
+        - 'signed integer': For signed integer data types (e.g., int8, int16, int32, int64).
+        - 'unsigned integer': For unsigned integer data types (e.g., uint8, uint16, uint32, uint64).
+        - 'integral': Shorthand for all integer data types (signed and unsigned).
+        - 'real floating': For real-valued floating-point data types (e.g., float32, float64).
+        - 'complex floating': For complex floating-point data types (e.g., complex64, complex128).
+        - 'numeric': Shorthand for all numeric data types (integral, real floating, and complex floating).
+
+    Returns
+    -------
+    bool
+        True if the input dtype matches the specified kind; False otherwise.
+
+    Notes
+    -----
+    - This function is designed to be flexible and can handle extensions to the supported data types, as long as the
+      extensions remain consistent within their respective categories (e.g., only integer types in 'integral').
+    - The flexibility allows for the inclusion of additional data types such as float16 or bfloat16 under the
+      'real floating' category or int128 under 'signed integer', if they are supported by the implementation.
+
+    Examples
+    --------
+    >>> isdtype(float32, 'real floating')
+    True
+
+    >>> isdtype(int32, 'numeric')
+    True
+
+    >>> isdtype(complex64, ('complex floating', 'real floating'))
+    True
+
+    >>> isdtype(uint32, ('signed integer', 'unsigned integer'))
+    True
+
+    """
+    dtype_kinds = {
+        "bool": boolean_dtypes,
+        "signed integer": signed_integer_dtypes,
+        "unsigned integer": unsigned_integer_dtypes,
+        "integral": integer_dtypes,
+        "real floating": real_floating_dtypes,
+        "complex floating": complex_floating_dtypes,
+        "numeric": numeric_dtypes,
+    }
+
+    if isinstance(kind, tuple):
+        return any(isdtype(dtype, single_kind) for single_kind in kind)
+
+    elif isinstance(kind, af.Dtype):
+        return dtype == kind
+
+    elif isinstance(kind, str):
+        if kind in dtype_kinds:
+            return dtype in dtype_kinds[kind]
+
+        raise ValueError(f"Unsupported kind: {kind}")
+
+    raise ValueError("Kind must be a dtype, a string identifier, or a tuple of identifiers.")
+
+
+def result_type(*arrays_and_dtypes: Array | af.Dtype) -> af.Dtype:
+    """
+    Array API compatible wrapper for :py:func:`np.result_type <numpy.result_type>`.
+
+    See its docstring for more information.
+    """
+    # FIXME
+    # Code duplicate from numpy src
+
+    # Note: we use a custom implementation that gives only the type promotions
+    # required by the spec rather than using np.result_type. NumPy implements
+    # too many extra type promotions like int64 + uint64 -> float64, and does
+    # value-based casting on scalar arrays.
+    A = []
+    for a in arrays_and_dtypes:
+        if isinstance(a, Array):
+            a = a.dtype
+        elif isinstance(a, af.Array) or a not in all_dtypes:
+            raise TypeError("result_type() inputs must be array_api arrays or dtypes")
+        A.append(a)
+
+    if len(A) == 0:
+        raise ValueError("at least one array or dtype is required")
+    elif len(A) == 1:
+        return A[0]
+    else:
+        t = A[0]
+        for t2 in A[1:]:
+            t = promote_types(t, t2)
+        return t
diff --git a/arrayfire/array_api/_dtypes.py b/arrayfire/array_api/_dtypes.py
new file mode 100644
index 0000000..ddf227d
--- /dev/null
+++ b/arrayfire/array_api/_dtypes.py
@@ -0,0 +1,172 @@
+from __future__ import annotations
+
+import ctypes
+
+__all__ = [
+    "all_dtypes",
+    "boolean_dtypes",
+    "real_floating_dtypes",
+    "floating_dtypes",
+    "complex_floating_dtypes",
+    "integer_dtypes",
+    "signed_integer_dtypes",
+    "unsigned_integer_dtypes",
+    "integer_or_boolean_dtypes",
+    "real_numeric_dtypes",
+    "numeric_dtypes",
+    "dtype_categories",
+    # OG
+    "bool",
+    "complex64",
+    "complex128",
+    "float32",
+    "float64",
+    "int8",
+    "int16",
+    "int32",
+    "int64",
+    "uint8",
+    "uint16",
+    "uint32",
+    "uint64",
+]
+
+
+from arrayfire import bool
+from arrayfire import complex32 as afcomplex32
+from arrayfire import complex64 as afcomplex64
+from arrayfire import float16, float32, float64, int16, int32, int64, uint8, uint16, uint32, uint64
+from arrayfire.dtypes import Dtype
+
+int8 = Dtype("int8", "b8", ctypes.c_char, "int8", 4)  # HACK int8 is not supported in AF. Selecting same as b8
+complex64 = afcomplex32
+complex128 = afcomplex64
+
+all_dtypes = (
+    int8,
+    int16,
+    int32,
+    int64,
+    uint8,
+    uint16,
+    uint32,
+    uint64,
+    float16,
+    float32,
+    float64,
+    complex64,
+    complex128,
+    bool,
+)
+boolean_dtypes = (bool,)
+real_floating_dtypes = (float16, float32, float64)
+floating_dtypes = (float16, float32, float64, complex64, complex128)
+complex_floating_dtypes = (complex64, complex128)
+integer_dtypes = (int8, int16, int32, int64, uint8, uint16, uint32, uint64)
+signed_integer_dtypes = (int8, int16, int32, int64)
+unsigned_integer_dtypes = (uint8, uint16, uint32, uint64)
+integer_or_boolean_dtypes = boolean_dtypes + integer_dtypes
+real_numeric_dtypes = real_floating_dtypes + integer_dtypes
+numeric_dtypes = floating_dtypes + integer_dtypes + complex_floating_dtypes
+
+dtype_categories = {
+    "all": all_dtypes,
+    "real numeric": real_numeric_dtypes,
+    "numeric": numeric_dtypes,
+    "integer": integer_dtypes,
+    "integer or boolean": integer_or_boolean_dtypes,
+    "boolean": boolean_dtypes,
+    "real floating-point": floating_dtypes,
+    "complex floating-point": complex_floating_dtypes,
+    "floating-point": floating_dtypes,
+}
+
+
+# Note: the spec defines a restricted type promotion table compared to NumPy.
+# In particular, cross-kind promotions like integer + float or boolean +
+# integer are not allowed, even for functions that accept both kinds.
+# Additionally, NumPy promotes signed integer + uint64 to float64, but this
+# promotion is not allowed here. To be clear, Python scalar int objects are
+# allowed to promote to floating-point dtypes, but only in array operators
+# (see Array._promote_scalar) method in _array_object.py.
+_promotion_table = {
+    (int8, int8): int8,
+    (int8, int16): int16,
+    (int8, int32): int32,
+    (int8, int64): int64,
+    (int16, int8): int16,
+    (int16, int16): int16,
+    (int16, int32): int32,
+    (int16, int64): int64,
+    (int32, int8): int32,
+    (int32, int16): int32,
+    (int32, int32): int32,
+    (int32, int64): int64,
+    (int64, int8): int64,
+    (int64, int16): int64,
+    (int64, int32): int64,
+    (int64, int64): int64,
+    (uint8, uint8): uint8,
+    (uint8, uint16): uint16,
+    (uint8, uint32): uint32,
+    (uint8, uint64): uint64,
+    (uint16, uint8): uint16,
+    (uint16, uint16): uint16,
+    (uint16, uint32): uint32,
+    (uint16, uint64): uint64,
+    (uint32, uint8): uint32,
+    (uint32, uint16): uint32,
+    (uint32, uint32): uint32,
+    (uint32, uint64): uint64,
+    (uint64, uint8): uint64,
+    (uint64, uint16): uint64,
+    (uint64, uint32): uint64,
+    (uint64, uint64): uint64,
+    (int8, uint8): int16,
+    (int8, uint16): int32,
+    (int8, uint32): int64,
+    (int16, uint8): int16,
+    (int16, uint16): int32,
+    (int16, uint32): int64,
+    (int32, uint8): int32,
+    (int32, uint16): int32,
+    (int32, uint32): int64,
+    (int64, uint8): int64,
+    (int64, uint16): int64,
+    (int64, uint32): int64,
+    (uint8, int8): int16,
+    (uint16, int8): int32,
+    (uint32, int8): int64,
+    (uint8, int16): int16,
+    (uint16, int16): int32,
+    (uint32, int16): int64,
+    (uint8, int32): int32,
+    (uint16, int32): int32,
+    (uint32, int32): int64,
+    (uint8, int64): int64,
+    (uint16, int64): int64,
+    (uint32, int64): int64,
+    (float32, float32): float32,
+    (float32, float64): float64,
+    (float64, float32): float64,
+    (float64, float64): float64,
+    (complex64, complex64): complex64,
+    (complex64, complex128): complex128,
+    (complex128, complex64): complex128,
+    (complex128, complex128): complex128,
+    (float32, complex64): complex64,
+    (float32, complex128): complex128,
+    (float64, complex64): complex128,
+    (float64, complex128): complex128,
+    (complex64, float32): complex64,
+    (complex64, float64): complex128,
+    (complex128, float32): complex128,
+    (complex128, float64): complex128,
+    (bool, bool): bool,
+}
+
+
+def promote_types(type1: Dtype, type2: Dtype) -> Dtype:
+    if (type1, type2) in _promotion_table:
+        return _promotion_table[type1, type2]
+    raise TypeError(f"{type1} and {type2} cannot be type promoted together")
diff --git a/arrayfire/array_api/_elementwise_functions.py b/arrayfire/array_api/_elementwise_functions.py
new file mode 100644
index 0000000..45f7250
--- /dev/null
+++ b/arrayfire/array_api/_elementwise_functions.py
@@ -0,0 +1,248 @@
+from __future__ import annotations
+
+import arrayfire as af
+
+from ._array_object import Array
+
+# TODO
+# Add documentation and edge cases like types checking, array values, etc.
+
+
+def abs(x: Array, /) -> Array:
+    return Array._new(af.abs(x._array))
+
+
+def acos(x: Array, /) -> Array:
+    return Array._new(af.acos(x._array))
+
+
+def acosh(x: Array, /) -> Array:
+    return Array._new(af.acosh(x._array))
+
+
+def add(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.add(x1._array, x2._array))
+
+
+def asin(x: Array, /) -> Array:
+    return Array._new(af.asin(x._array))
+
+
+def asinh(x: Array, /) -> Array:
+    return Array._new(af.asinh(x._array))
+
+
+def atan(x: Array, /) -> Array:
+    return Array._new(af.atan(x._array))
+
+
+def atan2(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.atan2(x1._array, x2._array))
+
+
+def atanh(x: Array, /) -> Array:
+    return Array._new(af.atanh(x._array))
+
+
+def bitwise_and(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.bitand(x1._array, x2._array))
+
+
+def bitwise_left_shift(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.bitshiftl(x1._array, x2._array))
+
+
+def bitwise_invert(x: Array, /) -> Array:
+    return Array._new(af.bitnot(x._array))
+
+
+def bitwise_or(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.bitor(x1._array, x2._array))
+
+
+def bitwise_right_shift(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.bitshiftr(x1._array, x2._array))
+
+
+def bitwise_xor(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.bitxor(x1._array, x2._array))
+
+
+def ceil(x: Array, /) -> Array:
+    return Array._new(af.ceil(x._array))
+
+
+def conj(x: Array, /) -> Array:
+    return Array._new(af.conjg(x._array))
+
+
+def cos(x: Array, /) -> Array:
+    return Array._new(af.cos(x._array))
+
+
+def cosh(x: Array, /) -> Array:
+    return Array._new(af.cosh(x._array))
+
+
+def divide(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.div(x1._array, x2._array))
+
+
+def equal(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.eq(x1._array, x2._array))
+
+
+def exp(x: Array, /) -> Array:
+    return Array._new(af.exp(x._array))
+
+
+def expm1(x: Array, /) -> Array:
+    return Array._new(af.expm1(x._array))
+
+
+def floor(x: Array, /) -> Array:
+    return Array._new(af.floor(x._array))
+
+
+def floor_divide(x1: Array, x2: Array, /) -> Array:
+    # TODO
+    return NotImplemented
+
+
+def greater(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.gt(x1._array, x2._array))
+
+
+def greater_equal(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.ge(x1._array, x2._array))
+
+
+def imag(x: Array, /) -> Array:
+    return Array._new(af.imag(x._array))
+
+
+def isfinite(x: Array, /) -> Array:
+    # TODO
+    return NotImplemented
+
+
+def isinf(x: Array, /) -> Array:
+    return Array._new(af.isinf(x._array))
+
+
+def isnan(x: Array, /) -> Array:
+    return Array._new(af.isnan(x._array))
+
+
+def less(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.lt(x1._array, x2._array))
+
+
+def less_equal(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.le(x1._array, x2._array))
+
+
+def log(x: Array, /) -> Array:
+    return Array._new(af.log(x._array))
+
+
+def log1p(x: Array, /) -> Array:
+    return Array._new(af.log1p(x._array))
+
+
+def log2(x: Array, /) -> Array:
+    return Array._new(af.log2(x._array))
+
+
+def log10(x: Array, /) -> Array:
+    return Array._new(af.log10(x._array))
+
+
+def logaddexp(x1: Array, x2: Array) -> Array:
+    # TODO
+    return NotImplemented
+
+
+def logical_and(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.logical_and(x1._array, x2._array))
+
+
+def logical_not(x: Array, /) -> Array:
+    return Array._new(af.logical_not(x._array))
+
+
+def logical_or(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.logical_or(x1._array, x2._array))
+
+
+def logical_xor(x1: Array, x2: Array, /) -> Array:
+    # TODO
+    return NotImplemented
+
+
+def multiply(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.mul(x1._array, x2._array))
+
+
+def negative(x: Array, /) -> Array:
+    return Array._new(af.lt(x._array, 1))
+
+
+def not_equal(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.neq(x1._array, x2._array))
+
+
+def positive(x: Array, /) -> Array:
+    return Array._new(af.gt(x._array, 1))
+
+
+def pow(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.pow(x1._array, x2._array))
+
+
+def real(x: Array, /) -> Array:
+    return Array._new(af.real(x._array))
+
+
+def remainder(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.rem(x1._array, x2._array))
+
+
+def round(x: Array, /) -> Array:
+    return Array._new(af.round(x._array))
+
+
+def sign(x: Array, /) -> Array:
+    return Array._new(af.sign(x._array))
+
+
+def sin(x: Array, /) -> Array:
+    return Array._new(af.sin(x._array))
+
+
+def sinh(x: Array, /) -> Array:
+    return Array._new(af.sinh(x._array))
+
+
+def square(x: Array, /) -> Array:
+    return Array._new(af.pow(x._array, 2))
+
+
+def sqrt(x: Array, /) -> Array:
+    return Array._new(af.sqrt(x._array))
+
+
+def subtract(x1: Array, x2: Array, /) -> Array:
+    return Array._new(af.sub(x1._array, x2._array))
+
+
+def tan(x: Array, /) -> Array:
+    return Array._new(af.tan(x._array))
+
+
+def tanh(x: Array, /) -> Array:
+    return Array._new(af.tanh(x._array))
+
+
+def trunc(x: Array, /) -> Array:
+    return Array._new(af.trunc(x._array))
diff --git a/arrayfire/array_api/_indexing_functions.py b/arrayfire/array_api/_indexing_functions.py
new file mode 100644
index 0000000..67e35c9
--- /dev/null
+++ b/arrayfire/array_api/_indexing_functions.py
@@ -0,0 +1,51 @@
+import arrayfire as af
+
+from ._array_object import Array
+
+
+def take(x: Array, indices: Array, /, *, axis: int | None = None) -> Array:
+    """
+    Returns elements of an array along a specified axis using a set of indices.
+
+    This function extracts elements from the input array `x` at positions specified by the `indices` array. This
+    operation is similar to fancy indexing in NumPy, but it's limited to using one-dimensional arrays for indices.
+    The function allows selection along a specified axis. If no axis is specified and the input array is flat, it
+    behaves as if operating along the first axis.
+
+    Parameters
+    ----------
+    x : Array
+        The input array from which to take elements.
+    indices : Array
+        A one-dimensional array of integer indices specifying which elements to extract.
+    axis : int | None, optional
+        The axis over which to select values. If the axis is negative, the selection is made from the last dimension.
+        For one-dimensional `x`, `axis` is optional; for multi-dimensional `x`, `axis` is required.
+
+    Returns
+    -------
+    Array
+        An array that contains the selected elements. This output array will have the same rank as `x`, but the size of
+        the dimension along the specified `axis` will correspond to the number of elements in `indices`.
+
+    Notes
+    -----
+    - The function mimics part of the behavior of advanced indexing in NumPy but is constrained by the current
+      specification that avoids __setitem__ and mutation of the array through indexing.
+    - If `axis` is None and `x` is multi-dimensional, an exception will be raised since an axis must be specified.
+
+    Raises
+    ------
+    ValueError
+        If `axis` is None and `x` is multi-dimensional, or if `indices` is not a one-dimensional array.
+
+    """
+    if axis is None:
+        flat_array = af.flat(x._array)
+        return Array._new(af.lookup(flat_array, indices._array))
+
+    if axis != 0:
+        shape = (x._array.size,)
+        afarray = af.moddims(x._array, shape)
+
+    return Array._new(af.lookup(afarray, indices._array, axis=axis))
diff --git a/arrayfire/array_api/_manipulation_functions.py b/arrayfire/array_api/_manipulation_functions.py
new file mode 100644
index 0000000..4a9c08b
--- /dev/null
+++ b/arrayfire/array_api/_manipulation_functions.py
@@ -0,0 +1,382 @@
+from __future__ import annotations
+
+from functools import reduce
+
+import arrayfire as af
+
+from ._array_object import Array
+
+
+def concat(arrays: tuple[Array, ...] | list[Array], /, *, axis: int | None = 0) -> Array:
+    """
+    Concatenates a sequence of arrays along a specified axis. If `axis` is None, all arrays are flattened before
+    concatenation. Negative `axis` values are interpreted as counting from the last dimension backwards.
+
+    Parameters
+    ----------
+    arrays : tuple of Array or list of Array
+        A tuple or list of ArrayFire arrays to be concatenated. All arrays must have compatible shapes except along
+        the concatenation axis.
+    axis : int | None, optional
+        The axis along which the arrays will be joined. If None, arrays are flattened before concatenation.
+        If negative, the axis is determined from the last dimension. The default is 0.
+
+    Returns
+    -------
+    Array
+        An ArrayFire array resulting from the concatenation of the input arrays along the specified axis.
+
+    Raises
+    ------
+    ValueError
+        If the `arrays` argument is empty.
+    TypeError
+        If any element in `arrays` is not an Array.
+
+    Notes
+    -----
+    - Concatenation is performed on the specified `axis`. If `axis` is 0, it concatenates along the rows. If `axis`
+      is 1, it concatenates along the columns, etc.
+    - It is essential that all arrays have the same shape in every dimension except for the dimension corresponding
+      to `axis`.
+    - When `axis` is None, all input arrays are flattened into 1-D arrays before concatenation.
+    """
+    if not arrays:
+        raise ValueError("At least one array requires to concatenate.")
+
+    for array in arrays:
+        if not isinstance(array, Array):
+            raise TypeError("All elements must be Array arrays.")
+
+    if axis is None:
+        afarrays = [af.flat(array._array) for array in arrays]
+        axis = 0
+    elif axis < 0:
+        axes = arrays[0].ndim
+        axis += axes
+        afarrays = [array._array for array in arrays]
+    else:
+        afarrays = [array._array for array in arrays]
+
+    return Array._new(af.join(axis, *afarrays))
+
+
+def expand_dims(x: Array, /, *, axis: int) -> Array:
+    """
+    Expands the shape of an array by inserting a new axis (dimension) of size one at the position specified by axis.
+
+    Parameters
+    ----------
+    x : Array
+        Input ArrayFire array.
+    axis : int, optional
+        Axis position (zero-based). If x has rank N, a valid axis must reside on the closed-interval [-N-1, N].
+        If provided a negative axis, the axis position at which to insert a singleton dimension is computed
+        as N + axis + 1.
+        For example, if provided -1, the resolved axis position will be N, appending a dimension at the end.
+        If provided -N-1, the resolved axis position will be 0, prepending a dimension. An IndexError is raised
+        if provided an invalid axis position.
+
+    Returns
+    -------
+    out : Array
+        An expanded output array having the same data type as x with a new axis of size one inserted.
+
+    Raises
+    ------
+    IndexError
+        If the specified axis is out of the valid range for the input array's dimensions.
+    """
+    N = x.ndim
+
+    if axis < -N - 1 or axis > N:
+        raise IndexError(f"axis {axis} is out of bounds for array of dimension {N}.")
+
+    if axis < 0:
+        axis += N + 1
+
+    new_shape = [1 if i == axis else x.shape[i] for i in range(max(N, axis) + 1)]
+    if len(new_shape) < N + 1:
+        new_shape += [x.shape[i] for i in range(len(new_shape), N)]
+
+    return Array._new(af.moddims(x._array, tuple(new_shape)))
+
+
+def flip(x: Array, /, *, axis: int | tuple[int, ...] | None = None) -> Array:
+    """
+    Reverses the order of elements in an array along the given axis or axes. The shape of the array is preserved.
+
+    Parameters
+    ----------
+    x : Array
+        Input ArrayFire array.
+    axis : int | Tuple[int, ...] | None, optional
+        Axis or axes along which to flip the elements. If axis is None, the function flips the elements along all axes.
+        If axis is a negative number, it counts from the last dimension. If a tuple of axes is provided, it flips only
+        the specified axes. Default is None.
+
+    Returns
+    -------
+    out : Array
+        An output array having the same data type and shape as x, but with elements reversed along the specified axes.
+
+    Notes
+    -----
+    - The array's shape is maintained, and only the order of elements is reversed.
+    - Negative axis values are interpreted as counting from the last dimension towards the first.
+    """
+    if axis is None:
+        # TODO
+        return NotImplemented
+
+    if isinstance(axis, int):
+        if axis < 0:
+            axis += x.ndim
+        return Array._new(af.flip(x._array, axis=axis))
+
+    if isinstance(axis, tuple):
+        # TODO
+        return NotImplemented
+
+    raise TypeError("Axis must be an integer, a tuple of integers, or None")
+
+
+def permute_dims(x: Array, /, axes: tuple[int, ...]) -> Array:
+    """
+    Permutes the axes (dimensions) of an array according to a specified tuple.
+
+    Parameters
+    ----------
+    x : Array
+        Input ArrayFire array.
+    axes : Tuple[int, ...]
+        Tuple containing a permutation of indices (0, 1, ..., N-1) where N is the number of dimensions of x.
+        Each element in the tuple specifies the new position of the dimension at that index.
+
+    Returns
+    -------
+    out : Array
+        An array with the same data type as x, with its dimensions permuted according to the `axes` tuple.
+
+    Notes
+    -----
+    - The function requires that the `axes` tuple be a complete permutation of the array dimensions indices,
+      meaning all indices must be present, with no repeats and no omissions.
+    - Misconfiguration in the axes tuple, such as duplicate indices or indices out of bounds, will result
+      in a runtime error.
+    """
+    if len(axes) != x.shape:
+        raise ValueError("Length of axes tuple must match the number of axes in the array.")
+
+    if sorted(axes) != list(range(x.ndim)):
+        raise ValueError("Axes tuple must be a permutation of [0, ..., N-1] where N is the number of dimensions.")
+
+    return Array._new(af.reorder(x._array, shape=axes))
+
+
+def reshape(x: Array, /, shape: tuple[int, ...], *, copy: bool | None = None) -> Array:
+    """
+    Reshapes an array to a specified shape without changing the underlying data layout, with an option to copy the
+    data.
+
+    Parameters
+    ----------
+    x : Array
+        Input ArrayFire array to be reshaped.
+    shape : Tuple[int, ...]
+        A new shape for the array. One shape dimension can be -1, in which case that dimension is inferred from the
+        remaining dimensions and the total number of elements in the array.
+    copy : bool | None, optional
+        Specifies whether to forcibly copy the array:
+        - If True, the function always copies the array.
+        - If False, the function will not copy the array and will raise a ValueError if a copy is necessary for
+          reshaping.
+        - If None (default), the function reuses the existing memory buffer if possible, and copies otherwise.
+
+    Returns
+    -------
+    out : Array
+        An output array with the specified shape, having the same data type and elements as x.
+
+    Raises
+    ------
+    ValueError
+        If `copy` is False and a copy is necessary to achieve the requested shape, or if the specified shape is not
+        compatible.
+
+    Notes
+    -----
+    - Reshaping is done without altering the underlying data order when possible.
+    - The product of the new dimensions must exactly match the total number of elements in the input array.
+    """
+    if -1 in shape:
+        product_of_non_negative_dimensions = 1
+        negative_count = 0
+        for s in shape:
+            if s != -1:
+                product_of_non_negative_dimensions *= s
+            else:
+                negative_count += 1
+
+        if negative_count > 1:
+            raise ValueError("Only one dimension can be -1")
+
+        inferred_dimension = x.size // product_of_non_negative_dimensions
+        shape = tuple(inferred_dimension if s == -1 else s for s in shape)
+
+    if reduce(lambda x, y: x * y, shape) != x.size:
+        raise ValueError("Total elements mismatch between input array and new shape")
+
+    if copy is True:
+        # Explicitly copy the array if requested
+        new_array = af.copy_array(x._array)
+        return Array._new(af.moddims(new_array, shape))
+    elif copy is False and not x._array.is_linear:
+        raise ValueError("Reshape cannot be done without copying, but 'copy' is set to False")
+    else:
+        # Default case, reshape without copying if possible
+        return Array._new(af.moddims(x._array, shape))
+
+
+def roll(
+    x: Array,
+    /,
+    shift: int | tuple[int, ...],
+    *,
+    axis: int | tuple[int, ...] | None = None,
+) -> Array:
+    """
+    Rolls the elements of an array along specified axes. Elements that roll beyond the last position are reintroduced
+    at the first.
+
+    Parameters
+    ----------
+    x : Array
+        Input ArrayFire array to be rolled.
+    shift : int | Tuple[int, ...]
+        The number of places by which elements are to be shifted. If `shift` is an integer and `axis` is a tuple,
+        the same shift is applied to all specified axes. If both `shift` and `axis` are tuples, they must have
+        the same length, and each axis will be shifted by the corresponding element in `shift`.
+    axis : int | Tuple[int, ...] | None, optional
+        The axis or axes along which elements are to be shifted. If None, the array is flattened, shifted, and then
+        restored to its original shape.
+
+    Returns
+    -------
+    out : Array
+        An output array with the same data type as x, whose elements have been shifted as specified.
+
+    Notes
+    -----
+    - Positive shifts move elements toward higher indices, while negative shifts move them toward lower indices.
+    - The function wraps around the edges of the array, reintroducing elements that are shifted out of bounds.
+    """
+    if isinstance(shift, int):
+        shift = (shift,)
+
+    if axis is None:
+        flat_x = af.flat(x._array)
+        rolled_x = af.shift(flat_x, shift)
+        return Array._new(af.moddims(rolled_x, x.shape))
+
+    if isinstance(axis, int):
+        axis = (axis,)
+
+    # If axis and shift are tuples, validate their lengths
+    if isinstance(shift, tuple) and isinstance(axis, tuple) and len(shift) != len(axis):
+        raise ValueError("If both 'shift' and 'axis' are tuples, they must have the same length.")
+
+    result = x._array
+    for ax, sh in zip(axis, shift):
+        result = af.shift(result, (sh if ax == 0 else 0, sh if ax == 1 else 0, sh if ax == 2 else 0))
+
+    return Array._new(result)
+
+
+def squeeze(x: Array, /, axis: int | tuple[int, ...]) -> Array:
+    """
+    Removes singleton dimensions from an array along specified axes.
+
+    Parameters
+    ----------
+    x : Array
+        Input ArrayFire array.
+    axis : int | Tuple[int, ...]
+        Axis or axes to remove. These must be singleton dimensions in the array (i.e., dimensions with size 1).
+        If a specified axis has a size greater than one, a ValueError is raised.
+
+    Returns
+    -------
+    out : Array
+        An output array with singleton dimensions removed along the specified axes, having the same data type and
+        elements as x.
+
+    Raises
+    ------
+    ValueError
+        If any specified axis is not a singleton dimension.
+
+    Notes
+    -----
+    - If no axis is provided, all singleton dimensions are removed.
+    - If the dimensions along the specified axis are not of size 1, a ValueError is raised.
+    """
+    if isinstance(axis, int):
+        axis = (axis,)
+
+    new_dims = []
+    for i in range(len(x.shape)):
+        if i in axis:
+            if x.shape[i] != 1:
+                raise ValueError(f"Axis {i} is not a singleton dimension and cannot be squeezed.")
+        else:
+            new_dims.append(x.shape[i])
+
+    return Array._new(af.moddims(x._array, tuple(new_dims)))
+
+
+def stack(arrays: tuple[Array, ...] | list[Array], /, *, axis: int = 0) -> Array:
+    """
+    Joins a sequence of arrays along a new axis.
+
+    Parameters
+    ----------
+    arrays : Union[Tuple[Array, ...], List[Array]]
+        Input arrays to join. Each array must have the same shape.
+    axis : int, optional
+        Axis along which the arrays will be joined. This axis refers to the new axis in the output array.
+        The value of `axis` can range from -N-1 to N (exclusive of N), where N is the rank (number of dimensions)
+        of each input array. Default is 0.
+
+    Returns
+    -------
+    out : Array
+        An output array having rank N+1, where N is the rank (number of dimensions) of the input arrays.
+        The output array will have the same data type as the input arrays if they are of the same type.
+
+    Raises
+    ------
+    ValueError
+        If not all input arrays have the same shape or if the axis is out of the allowed range.
+
+    Notes
+    -----
+    - The new axis is inserted before the dimension specified by `axis`.
+    - Each array must have exactly the same shape.
+    """
+    if not arrays:
+        raise ValueError("No arrays provided for stacking.")
+
+    if not all(arr.shape == arrays[0].shape for arr in arrays):
+        raise ValueError("All input arrays must have the same shape.")
+
+    afarrays = [
+        af.moddims(arr._array, tuple([1 if i == axis else arr.shape[i] for i in range(arr.ndim)] + [1]))
+        for arr in arrays
+    ]
+
+    result = afarrays[0]
+    for arr in afarrays[1:]:
+        result = af.join(axis, result, arr)
+
+    return Array._new(result)
diff --git a/arrayfire/array_api/_searching_functions.py b/arrayfire/array_api/_searching_functions.py
new file mode 100644
index 0000000..7a2a9fe
--- /dev/null
+++ b/arrayfire/array_api/_searching_functions.py
@@ -0,0 +1,71 @@
+from __future__ import annotations
+
+import arrayfire as af
+
+from ._array_object import Array
+
+
+def argmax(x: Array, /, *, axis: int | None = None, keepdims: bool = False) -> Array:
+    # TODO
+    # [] Add documentation
+    # [] Figure out what to do with keepdims arg that is not actually works well with af case
+    # [] Fix typings
+    # source: https://data-apis.org/array-api/2022.12/API_specification/generated/array_api.argmax.html#argmax
+
+    if axis is None:
+        flat_array = af.flat(x._array)
+        _, indices = af.imax(flat_array, axis=0)
+    else:
+        _, indices = af.imax(x._array, axis=axis)
+
+    if keepdims:
+        shape = tuple([1] * x.ndim)
+        indices = af.moddims(indices, shape)  # type: ignore[arg-type]  # FIXME
+
+    return Array._new(indices)
+
+
+def argmin(x: Array, /, *, axis: int | None = None, keepdims: bool = False) -> Array:
+    # TODO
+    # [] Add documentation
+    # [] Figure out what to do with keepdims arg that is not actually works well with af case
+    # [] Fix typings
+    # source: https://data-apis.org/array-api/2022.12/API_specification/generated/array_api.argmin.html#argmin
+
+    if axis is None:
+        flat_array = af.flat(x._array)
+        _, indices = af.imin(flat_array, axis=0)
+    else:
+        _, indices = af.imin(x._array, axis=axis)
+
+    if keepdims:
+        shape = tuple([1] * x.ndim)
+        indices = af.moddims(indices, shape)  # type: ignore[arg-type]  # FIXME
+
+    return Array._new(indices)
+
+
+def nonzero(x: Array, /) -> tuple[Array, ...]:
+    # TODO
+    # Add documentation
+    # source: https://data-apis.org/array-api/2022.12/API_specification/generated/array_api.nonzero.html#nonzero
+    flat_array = af.flat(x._array)
+
+    non_zero_indices = af.where(flat_array != 0)
+
+    if len(x.shape) == 1:
+        return (Array._new(non_zero_indices),)
+    else:
+        idx = []
+        for dim in reversed(x.shape):
+            idx.append(Array._new(non_zero_indices % dim))
+            non_zero_indices = non_zero_indices // dim
+
+        return tuple(reversed(idx))
+
+
+def where(condition: Array, x1: Array, x2: Array, /) -> Array:
+    # TODO
+    # Add documentation
+    # source: https://data-apis.org/array-api/2022.12/API_specification/generated/array_api.where.html#where
+    return Array._new(af.select(x1._array, x2._array, condition._array))
diff --git a/arrayfire/array_api/_set_functions.py b/arrayfire/array_api/_set_functions.py
new file mode 100644
index 0000000..ca92f5e
--- /dev/null
+++ b/arrayfire/array_api/_set_functions.py
@@ -0,0 +1,38 @@
+from __future__ import annotations
+
+from typing import NamedTuple
+
+from ._array_object import Array
+
+
+class UniqueAllResult(NamedTuple):
+    values: Array
+    indices: Array
+    inverse_indices: Array
+    counts: Array
+
+
+class UniqueCountsResult(NamedTuple):
+    values: Array
+    counts: Array
+
+
+class UniqueInverseResult(NamedTuple):
+    values: Array
+    inverse_indices: Array
+
+
+def unique_all(x: Array, /) -> UniqueAllResult:
+    return NotImplemented
+
+
+def unique_counts(x: Array, /) -> UniqueCountsResult:
+    return NotImplemented
+
+
+def unique_inverse(x: Array, /) -> UniqueInverseResult:
+    return NotImplemented
+
+
+def unique_values(x: Array, /) -> Array:
+    return NotImplemented
diff --git a/arrayfire/array_api/_sorting_functions.py b/arrayfire/array_api/_sorting_functions.py
new file mode 100644
index 0000000..3956d19
--- /dev/null
+++ b/arrayfire/array_api/_sorting_functions.py
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+import arrayfire as af
+
+from ._array_object import Array
+
+
+def argsort(x: Array, /, *, axis: int = -1, descending: bool = False, stable: bool = True) -> Array:
+    """
+    Returns the indices that would sort an array along a specified axis.
+
+    Parameters
+    ----------
+    x : Array
+        Input array. Should be real-valued as complex numbers have unspecified ordering in this context.
+    axis : int, optional
+        Axis along which to sort the array. If -1, the array is sorted along the last axis. Default is -1.
+    descending : bool, optional
+        Sort order. If True, sorts in descending order. If False, sorts in ascending order. Default is False.
+    stable : bool, optional
+        Sort stability. If True, maintains the relative order of elements that compare as equal. If False,
+        the order of such elements is implementation-dependent. Default is True.
+
+    Returns
+    -------
+    out : Array
+        An array of indices that sort the array `x` along the specified axis. Has the same shape as `x`.
+
+    Notes
+    -----
+    - The function currently does not support complex number data types due to unspecified ordering rules.
+    - While the `stable` parameter is accepted to match API requirements, actual stability depends on ArrayFire's
+      implementation.
+    """
+    if axis == -1:
+        axis = x.ndim - 1
+
+    _, indices = af.sort(x._array, axis=axis, is_ascending=not descending, is_index_array=True)
+    return Array._new(indices)
+
+
+def sort(x: Array, /, *, axis: int = -1, descending: bool = False, stable: bool = True) -> Array:
+    """
+    Returns a sorted copy of an input array along a specified axis, with options for order and stability.
+
+    Parameters
+    ----------
+    x : Array
+        Input array. Should have a real-valued data type as the ordering of complex numbers is unspecified.
+    axis : int, optional
+        Axis along which to sort. If set to -1, the function sorts along the last axis. Default is -1.
+    descending : bool, optional
+        If True, the array is sorted in descending order. If False, the array is sorted in ascending order.
+        Default is False.
+    stable : bool, optional
+        If True, the sort is stable, meaning that the relative order of elements with equal values is preserved.
+        If False, the sort may not be stable, and the order of elements with equal values is implementation-dependent.
+        Default is True.
+
+    Returns
+    -------
+    out : Array
+        A sorted array with the same shape and data type as the input array.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.randu(5)  # Create a random 1D array of size 5
+    >>> sorted_a = sort(a)
+    >>> print(sorted_a)  # Displays the sorted array
+
+    Notes
+    -----
+    - The function does not support complex number data types due to unspecified ordering rules for such values.
+    - The `stable` flag may be limited by the capabilities of the underlying ArrayFire library.
+    """
+    if axis == -1:
+        axis = x.ndim - 1
+
+    return Array._new(af.sort(x._array, axis=axis, is_ascending=not descending))
diff --git a/arrayfire/array_api/_statistical_functions.py b/arrayfire/array_api/_statistical_functions.py
new file mode 100644
index 0000000..97f326e
--- /dev/null
+++ b/arrayfire/array_api/_statistical_functions.py
@@ -0,0 +1,336 @@
+from __future__ import annotations
+
+from typing import Any, Callable
+
+import arrayfire as af
+
+from ._array_object import Array
+from ._data_type_functions import astype
+
+
+def max(
+    x: Array,
+    /,
+    *,
+    axis: int | tuple[int, ...] | None = None,
+    keepdims: bool = False,
+) -> Array:
+    """
+    Calculates the maximum value of the input array along specified axes, optionally keeping the reduced dimensions.
+
+    Parameters
+    ----------
+    x : Array
+        Input array. Should have a real-valued data type.
+    axis : int | Tuple[int, ...] | None, optional
+        Axis or axes along which to compute the maximum. If None, the maximum is computed over the entire array.
+        If an axis or tuple of axes is specified, the maximum is computed over those axes.
+    keepdims : bool, optional
+        If True, keeps the reduced dimensions as singleton dimensions in the output. If False, reduces the dimensions.
+
+    Returns
+    -------
+    Array
+        If the maximum value is computed over the entire array, a zero-dimensional array containing the maximum value;
+        otherwise, an array containing the maximum values. The returned array has the same data type as x.
+
+    Raises
+    ------
+    ValueError
+        If the specified axis is out of bounds or if no elements are present to compute the maximum.
+
+    Notes
+    -----
+    - The function does not support complex number data types due to unspecified ordering rules.
+    - NaN values in floating-point arrays propagate. If a NaN is present in the reduction, the result is NaN.
+    """
+    return _compute_statistic(x, af.max, axis=axis, keepdims=keepdims)
+
+
+def mean(
+    x: Array,
+    /,
+    *,
+    axis: int | tuple[int, ...] | None = None,
+    keepdims: bool = False,
+) -> Array:
+    """
+    Calculates the arithmetic mean of the input array along specified axes.
+
+    Parameters
+    ----------
+    x : Array
+        Input array. Should be real-valued and floating-point.
+    axis : int | Tuple[int, ...] | None, optional
+        Axis or axes along which the arithmetic means are to be computed. If None, the mean is computed over
+        the entire array. If a tuple of integers, means are computed over multiple axes. Default is None.
+    keepdims : bool, optional
+        If True, keeps the reduced dimensions as singleton dimensions in the result, making the result
+        compatible with the input array. If False, reduced dimensions are not included in the result. Default is False.
+
+    Returns
+    -------
+    Array
+        If the mean is computed over the entire array, a zero-dimensional array containing the arithmetic mean;
+        otherwise, an array containing the arithmetic means. The returned array has the same data type as x.
+
+    Raises
+    ------
+    ValueError
+        If specified axes are out of range or the data type of x is not floating-point.
+
+    Notes
+    -----
+    - NaN values in the array propagate; if any element is NaN, the corresponding mean is NaN.
+    - Only supports real-valued floating-point data types for accurate computations.
+    """
+    return _compute_statistic(x, af.mean, axis=axis, keepdims=keepdims)
+
+
+def min(
+    x: Array,
+    /,
+    *,
+    axis: int | tuple[int, ...] | None = None,
+    keepdims: bool = False,
+) -> Array:
+    """
+    Calculates the minimum value of the input array along specified axes, optionally keeping the reduced dimensions.
+
+    Parameters
+    ----------
+    x : Array
+        Input array. Should have a real-valued data type.
+    axis : int | Tuple[int, ...] | None, optional
+        Axis or axes along which the minimum values are to be computed. If None, the minimum is computed over
+        the entire array. If a tuple of integers, minimums are computed over multiple axes. Default is None.
+    keepdims : bool, optional
+        If True, keeps the reduced dimensions as singleton dimensions in the result, making the result
+        compatible with the input array. If False, reduced dimensions are not included in the result. Default is False.
+
+    Returns
+    -------
+    Array
+        If the minimum value is computed over the entire array, a zero-dimensional array containing the minimum value;
+        otherwise, an array containing the minimum values. The returned array has the same data type as x.
+
+    Raises
+    ------
+    ValueError
+        If specified axes are out of range.
+
+    Notes
+    -----
+    - NaN values in the array propagate; if any element is NaN, the corresponding minimum is NaN.
+    - Only supports real-valued floating-point data types for accurate computations.
+    """
+    return _compute_statistic(x, af.min, axis=axis, keepdims=keepdims)
+
+
+def prod(
+    x: Array,
+    /,
+    *,
+    axis: int | tuple[int, ...] | None = None,
+    dtype: af.Dtype | None = None,
+    keepdims: bool = False,
+) -> Array:
+    """
+    Calculates the product of elements in the input array along specified axes, with an option to specify the data
+    type.
+
+    Parameters
+    ----------
+    x : Array
+        Input array. Should have a numeric data type.
+    axis : int | Tuple[int, ...] | None, optional
+        Axis or axes along which products are to be computed. If None, the product is computed over the entire array.
+        If a tuple of integers, products are computed over multiple axes. Default is None.
+    dtype : dtype | None, optional
+        Data type of the returned array. If None, the data type is determined based on the data type of x, with
+        adjustments to prevent overflow. If specified, the input array x is cast to this data type before computing
+        the product.
+    keepdims : bool, optional
+        If True, the reduced dimensions are kept as singleton dimensions in the result, making the result compatible
+        with the input array. If False, the reduced dimensions are not included in the result. Default is False.
+
+    Returns
+    -------
+    Array
+        If the product is computed over the entire array, a zero-dimensional array containing the product;
+        otherwise, an array containing the products. The returned array has the data type as specified or determined.
+
+    Notes
+    -----
+    - If N is 0, the product is 1 (empty product).
+    - NaN values in floating-point arrays will result in NaN for the entire product if encountered.
+    - Proper handling of data type to prevent overflow is crucial, especially for integer types.
+    """
+    if dtype is not None:
+        x = astype(x, dtype)
+
+    return _compute_statistic(x, af.product, axis=axis, keepdims=keepdims)
+
+
+def std(
+    x: Array,
+    /,
+    *,
+    axis: int | tuple[int, ...] | None = None,
+    correction: int | float = 0.0,
+    keepdims: bool = False,
+) -> Array:
+    """
+    Calculates the standard deviation of the input array along specified axes, with an option for degrees of freedom
+    adjustment.
+
+    Parameters
+    ----------
+    x : Array
+        Input array. Should have a real-valued floating-point data type.
+    axis : int | Tuple[int, ...] | None, optional
+        Axis or axes along which standard deviations are to be computed. If None, the standard deviation is computed
+        over the entire array. If a tuple of integers, standard deviations are computed over multiple axes.
+        Default is None.
+    correction : int | float, optional
+        Degrees of freedom adjustment. Setting this to 0 computes the population standard deviation. Setting this to 1
+        (Bessel's correction) computes the sample standard deviation. Default is 0.
+    keepdims : bool, optional
+        If True, keeps the reduced dimensions as singleton dimensions in the result, making the result compatible
+        with the input array. If False, reduced dimensions are not included in the result. Default is False.
+
+    Returns
+    -------
+    out : Array
+        If the standard deviation is computed over the entire array, a zero-dimensional array containing the standard
+        deviation; otherwise, an array containing the standard deviations. The returned array has the same data type
+        as x.
+
+    Notes
+    -----
+    - If N - correction is less than or equal to 0, the standard deviation is NaN.
+    - NaN values in the array propagate; if any element is NaN, the corresponding standard deviation is NaN.
+    """
+    if correction == 0:
+        bias = af.VarianceBias.POPULATION
+    elif correction == 1:
+        bias = af.VarianceBias.SAMPLE
+    else:
+        raise ValueError("Correction can only be set as 0 or 1. Other values are unsupported with arrayfire.")
+
+    return _compute_statistic(x, af.stdev, axis=axis, keepdims=keepdims, bias=bias)
+
+
+def sum(
+    x: Array,
+    /,
+    *,
+    axis: int | tuple[int, ...] | None = None,
+    dtype: af.Dtype | None = None,
+    keepdims: bool = False,
+) -> Array:
+    """
+    Calculates the sum of the input array along specified axes, with an option to specify the data type.
+
+    Parameters
+    ----------
+    x : Array
+        Input array. Should have a numeric data type.
+    axis : int | Tuple[int, ...] | None, optional
+        Axis or axes along which sums are to be computed. If None, the sum is computed over the entire array.
+        If a tuple of integers, sums are computed over multiple axes. Default is None.
+    dtype : dtype | None, optional
+        Data type of the returned array. If None, the returned array matches the data type of x. If specified,
+        the input array x is cast to this data type before computing the sum.
+    keepdims : bool, optional
+        If True, keeps the reduced dimensions as singleton dimensions in the result, making the result compatible
+        with the input array. If False, reduced dimensions are not included in the result. Default is False.
+
+    Returns
+    -------
+    out : Array
+        If the sum is computed over the entire array, a zero-dimensional array containing the sum;
+        otherwise, an array containing the sums. The returned array has the data type as specified or determined.
+
+    Notes
+    -----
+    - If N is 0, the sum is 0.
+    - NaN values in floating-point arrays will propagate.
+    - Careful consideration of data type can help prevent overflow in integer arrays or loss of precision in
+      floating-point arrays.
+    """
+    if dtype is not None:
+        x = astype(x, dtype)
+
+    return _compute_statistic(x, af.sum, axis=axis, keepdims=keepdims)
+
+
+def var(
+    x: Array,
+    /,
+    *,
+    axis: int | tuple[int, ...] | None = None,
+    correction: int | float = 0.0,
+    keepdims: bool = False,
+) -> Array:
+    """
+    Calculates the variance of the input array along specified axes, with an option for degrees of freedom adjustment.
+
+    Parameters
+    ----------
+    x : Array
+        Input array. Should be real-valued and floating-point.
+    axis : int | Tuple[int, ...] | None, optional
+        Axis or axes along which variances are to be computed. If None, the variance is computed over
+        the entire array. If a tuple of integers, variances are computed over multiple axes. Default is None.
+    correction : int | float, optional
+        Degrees of freedom adjustment. Setting this to 0 computes the population variance. Setting this to 1
+        (Bessel's correction) computes the sample variance. Default is 0.
+    keepdims : bool, optional
+        If True, keeps the reduced dimensions as singleton dimensions in the result, making the result compatible
+        with the input array. If False, reduced dimensions are not included in the result. Default is False.
+
+    Returns
+    -------
+    out : Array
+        If the variance is computed over the entire array, a zero-dimensional array containing the variance;
+        otherwise, an array containing the variances. The returned array has the same data type as x.
+
+    Notes
+    -----
+    - If N - correction is less than or equal to 0, the variance is NaN.
+    - NaN values in the array propagate; if any element is NaN, the corresponding variance is NaN.
+    """
+    if correction == 0:
+        bias = af.VarianceBias.POPULATION
+    elif correction == 1:
+        bias = af.VarianceBias.SAMPLE
+    else:
+        raise ValueError("Correction can only be set as 0 or 1. Other values are unsupported with arrayfire.")
+
+    return _compute_statistic(x, af.var, axis=axis, keepdims=keepdims, bias=bias)
+
+
+def _compute_statistic(
+    x: Array, operation: Callable, *, axis: int | tuple[int, ...] | None = None, keepdims: bool = False, **kwargs: Any
+) -> Array:
+    if axis is None:
+        result = operation(x._array, **kwargs)
+
+        if keepdims:
+            result = af.constant(result, (1, 1), dtype=x.dtype)
+
+        return Array._new(result)
+
+    if isinstance(axis, int):
+        axis = (axis,)
+
+    result = x._array
+    for ax in axis:
+        result = operation(result, axis=ax, **kwargs)
+
+    if keepdims:
+        new_shape = tuple(1 if i in axis else s for i, s in enumerate(x.shape))
+        result = af.moddims(result, new_shape)
+
+    return Array._new(result)
diff --git a/arrayfire/array_api/_utility_functions.py b/arrayfire/array_api/_utility_functions.py
new file mode 100644
index 0000000..12f0794
--- /dev/null
+++ b/arrayfire/array_api/_utility_functions.py
@@ -0,0 +1,93 @@
+from __future__ import annotations
+
+import arrayfire as af
+
+from ._array_object import Array
+from ._data_type_functions import astype
+from ._dtypes import bool as array_api_bool
+from ._statistical_functions import _compute_statistic
+
+
+def all(
+    x: Array,
+    /,
+    *,
+    axis: int | tuple[int, ...] | None = None,
+    keepdims: bool = False,
+) -> Array:
+    """
+    Tests whether all input array elements evaluate to True along a specified axis.
+
+    Parameters
+    ----------
+    x : Array
+        Input array.
+    axis : int | Tuple[int, ...] | None, optional
+        Axis or axes along which to perform a logical AND reduction. If None, a logical AND reduction is
+        performed over the entire array. If a tuple of integers, logical AND reductions are performed over
+        multiple axes. Default is None.
+    keepdims : bool, optional
+        If True, keeps the reduced dimensions as singleton dimensions in the result, making the result
+        compatible with the input array. If False, reduced dimensions are not included in the result. Default is False.
+
+    Returns
+    -------
+    out : Array
+        If a logical AND reduction was performed over the entire array, the returned array is a zero-dimensional
+        array containing the test result; otherwise, the returned array is a non-zero-dimensional array containing
+        the test results. The returned array has a data type of bool.
+
+    Notes
+    -----
+    - Positive infinity, negative infinity, and NaN must evaluate to True.
+    - If x has a complex floating-point data type, elements having a non-zero component must evaluate to True.
+    - If x is an empty array or the size of the axis along which to evaluate elements is zero, the test result must
+      be True.
+    """
+    if axis is None:
+        axis = x.shape
+
+    result = _compute_statistic(x, af.all_true, axis=axis, keepdims=keepdims)
+    return astype(result, array_api_bool)
+
+
+def any(
+    x: Array,
+    /,
+    *,
+    axis: int | tuple[int, ...] | None = None,
+    keepdims: bool = False,
+) -> Array:
+    """
+    Tests whether any input array elements evaluate to True along a specified axis.
+
+    Parameters
+    ----------
+    x : Array
+        Input array. Should have a numeric or boolean data type.
+    axis : int | Tuple[int, ...] | None, optional
+        Axis or axes along which to perform a logical OR reduction. If None, a logical OR reduction is
+        performed over the entire array. If a tuple of integers, logical OR reductions are performed over
+        multiple axes. A valid axis must be an integer within the interval [-N, N), where N is the rank
+        (number of dimensions) of x. Default is None.
+    keepdims : bool, optional
+        If True, the reduced axes (dimensions) are included in the result as singleton dimensions, making the
+        result compatible with the input array. If False, the reduced axes are not included in the result.
+        Default is False.
+
+    Returns
+    -------
+    out : Array
+        If a logical OR reduction was performed over the entire array, the returned array is a zero-dimensional
+        array containing the test result; otherwise, the returned array is a non-zero-dimensional array containing
+        the test results. The returned array has a data type of bool.
+
+    Notes
+    -----
+    - Positive infinity, negative infinity, and NaN are considered as True.
+    - If x has a complex floating-point data type, elements having a non-zero component must evaluate to True.
+    - If x is an empty array or the size of the axis along which to evaluate elements is zero, the test result must
+      be False.
+    """
+    result = _compute_statistic(x, af.any_true, axis=axis, keepdims=keepdims)
+    return astype(result, array_api_bool)
diff --git a/arrayfire/array_api/tests/__init__.py b/arrayfire/array_api/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/arrayfire/array_object.py b/arrayfire/array_object.py
index 683d205..d2e2228 100755
--- a/arrayfire/array_object.py
+++ b/arrayfire/array_object.py
@@ -13,7 +13,6 @@
 from .dtypes import Dtype
 from .dtypes import bool as afbool
 from .dtypes import c_api_value_to_dtype, float32, str_to_dtype
-from .library.device import PointerSource
 
 if TYPE_CHECKING:
     from ctypes import Array as CArray
@@ -41,14 +40,13 @@ def afarray_as_array(func: Callable[P, Array]) -> Callable[P, Array]:
     def decorated(*args: P.args, **kwargs: P.kwargs) -> Array:
         result = func(*args, **kwargs)
         return Array.from_afarray(result)  # type: ignore[arg-type]  # FIXME
-
     return decorated
 
 
 class Array:
     def __init__(
         self,
-        obj: None | Array | _pyarray.array | int | AFArray | list[int | float] = None,
+        obj: None | Array | _pyarray.array | int | AFArray | list[bool | int | float] = None,
         dtype: None | Dtype | str = None,
         shape: tuple[int, ...] = (),
         to_device: bool = False,
@@ -58,6 +56,9 @@ def __init__(
         self._arr = AFArray.create_null_pointer()
         _no_initial_dtype = False  # HACK, FIXME
 
+        if len(shape) > 4:
+            raise ValueError("Can not create 5 or more -dimensional arrays.")
+
         if isinstance(dtype, str):
             dtype = str_to_dtype(dtype)  # type: ignore[arg-type]
 
@@ -126,7 +127,7 @@ def __init__(
             return
 
         self._arr = wrapper.create_strided_array(
-            shape, dtype, _array_buffer, offset, strides, PointerSource.device  # type: ignore[arg-type]
+            shape, dtype, _array_buffer, offset, strides, wrapper.PointerSource(to_device)  # type: ignore[arg-type]
         )
 
     # Arithmetic Operators
@@ -547,41 +548,41 @@ def __ne__(self, other: int | float | bool | Array, /) -> Array:  # type: ignore
 
     # Reflected Arithmetic Operators
 
-    def __radd__(self, other: Array, /) -> Array:
+    def __radd__(self, other: int | float | Array, /) -> Array:
         """
         Return other + self.
         """
         return process_c_function(other, self, wrapper.add)
 
-    def __rsub__(self, other: Array, /) -> Array:
+    def __rsub__(self, other: int | float | Array, /) -> Array:
         """
         Return other - self.
         """
         return process_c_function(other, self, wrapper.sub)
 
-    def __rmul__(self, other: Array, /) -> Array:
+    def __rmul__(self, other: int | float | Array, /) -> Array:
         """
         Return other * self.
         """
         return process_c_function(other, self, wrapper.mul)
 
-    def __rtruediv__(self, other: Array, /) -> Array:
+    def __rtruediv__(self, other: int | float | Array, /) -> Array:
         """
         Return other / self.
         """
         return process_c_function(other, self, wrapper.div)
 
-    def __rfloordiv__(self, other: Array, /) -> Array:
+    def __rfloordiv__(self, other: int | float | Array, /) -> Array:
         # TODO
         return NotImplemented
 
-    def __rmod__(self, other: Array, /) -> Array:
+    def __rmod__(self, other: int | float | Array, /) -> Array:
         """
         Return other % self.
         """
         return process_c_function(other, self, wrapper.mod)
 
-    def __rpow__(self, other: Array, /) -> Array:
+    def __rpow__(self, other: int | float | Array, /) -> Array:
         """
         Return other ** self.
         """
@@ -595,31 +596,31 @@ def __rmatmul__(self, other: Array, /) -> Array:
 
     # Reflected Bitwise Operators
 
-    def __rand__(self, other: Array, /) -> Array:
+    def __rand__(self, other: int | bool | Array, /) -> Array:
         """
         Return other & self.
         """
         return process_c_function(other, self, wrapper.bitand)
 
-    def __ror__(self, other: Array, /) -> Array:
+    def __ror__(self, other: int | bool | Array, /) -> Array:
         """
         Return other | self.
         """
         return process_c_function(other, self, wrapper.bitor)
 
-    def __rxor__(self, other: Array, /) -> Array:
+    def __rxor__(self, other: int | bool | Array, /) -> Array:
         """
         Return other ^ self.
         """
         return process_c_function(other, self, wrapper.bitxor)
 
-    def __rlshift__(self, other: Array, /) -> Array:
+    def __rlshift__(self, other: int | Array, /) -> Array:
         """
         Return other << self.
         """
         return process_c_function(other, self, wrapper.bitshiftl)
 
-    def __rrshift__(self, other: Array, /) -> Array:
+    def __rrshift__(self, other: int | Array, /) -> Array:
         """
         Return other >> self.
         """
@@ -1124,11 +1125,11 @@ def process_c_function(lhs: int | float | Array, rhs: int | float | Array, c_fun
         lhs_array = lhs.arr
         rhs_array = rhs.arr
 
-    elif isinstance(lhs, Array) and isinstance(rhs, (int, float)):
+    elif isinstance(lhs, Array) and isinstance(rhs, int | float):
         lhs_array = lhs.arr
         rhs_array = wrapper.create_constant_array(rhs, lhs.shape, lhs.dtype)
 
-    elif isinstance(lhs, (int, float)) and isinstance(rhs, Array):
+    elif isinstance(lhs, int | float) and isinstance(rhs, Array):
         lhs_array = wrapper.create_constant_array(lhs, rhs.shape, rhs.dtype)
         rhs_array = rhs.arr
 
@@ -1166,6 +1167,12 @@ def _index_to_afindex(key: int | float | complex | bool | slice | wrapper.Parall
 
 
 def _slice_to_length(key: slice, axis: int) -> int:
+    # print(key, axis)
+
+    start = key.start
+    stop = key.stop
+    step = key.step
+
     if key.start is None:
         start = 0
     elif key.start < 0:
diff --git a/arrayfire/library/array_functions.py b/arrayfire/library/array_functions.py
index 2613626..e8028e5 100644
--- a/arrayfire/library/array_functions.py
+++ b/arrayfire/library/array_functions.py
@@ -23,6 +23,7 @@
     "shift",
     "tile",
     "transpose",
+    "lookup",
 ]
 
 import warnings
@@ -72,6 +73,41 @@ def constant(scalar: int | float | complex, shape: tuple[int, ...] = (1,), dtype
 
 @afarray_as_array
 def diag(array: Array, /, *, diag_index: int = 0, extract: bool = True) -> Array:
+    """
+    Extract a diagonal from or create a diagonal matrix based on an input array.
+
+    This method operates on an ArrayFire array, allowing for the extraction of a specified diagonal
+    from a 2-dimensional array or the creation of a diagonal matrix from a 1-dimensional array.
+
+    Parameters
+    ----------
+    array : Array
+        The input ArrayFire array. For diagonal extraction, this should be a 2-dimensional array.
+        For diagonal matrix creation, this should be a 1-dimensional array.
+
+    diag_index : int, optional, keyword-only, default: 0
+        The index of the diagonal that the operation pertains to.
+        - diag_index == 0 signifies the main diagonal.
+        - diag_index > 0 signifies a super diagonal.
+        - diag_index < 0 signifies a sub diagonal.
+
+    extract : bool, optional, keyword-only, default: True
+        Determines the operation to perform:
+        - If True, the method extracts the specified diagonal from a 2-dimensional array.
+        - If False, the method creates a diagonal matrix with the input array populating the specified diagonal.
+
+    Returns
+    -------
+    Array
+        - If `extract` is True, the returned Array contains the `diag_index`'th diagonal elements from the input array.
+        - If `extract` is False, the returned Array is a diagonal matrix with the input array elements placed along
+        the `diag_index`'th diagonal.
+
+    Notes
+    -----
+    The `diag_index` parameter allows for flexible selection of diagonals, enabling operations not just on the main
+    diagonal but also on any super or sub diagonals relative to the main.
+    """
     if extract:
         return cast(Array, wrapper.diag_extract(array.arr, diag_index))
 
@@ -113,34 +149,194 @@ def identity(shape: tuple[int, ...], dtype: Dtype = float32) -> Array:
     Examples
     --------
     >>> import arrayfire as af
-    >>> identity_matrix = af.identity((3, 3))  # Create a 3x3 identity matrix
-    >>> af.display(identity_matrix)
+    >>> af.identity((3, 3))  # Create a 3x3 identity matrix
     [3 3 1 1]
         1.0000     0.0000     0.0000
         0.0000     1.0000     0.0000
         0.0000     0.0000     1.0000
 
-    >>> identity_batch = af.identity((2, 2, 3))  # Create a batch of 3 identity 2x2 matrices
-    >>> af.display(identity_batch)
+    >>> af.identity((2, 2, 3))  # Create a batch of 3 identity 2x2 matrices
     [2 2 3 1]
         1.0000     0.0000     1.0000     0.0000     1.0000     0.0000
         0.0000     1.0000     0.0000     1.0000     0.0000     1.0000
     """
+    if not isinstance(shape, tuple) and len(shape) < 2:
+        raise ValueError("Argument shape must be a tuple with at least 2 values.")
+
     return cast(Array, wrapper.identity(shape, dtype))
 
 
 @afarray_as_array
-def iota(shape: tuple[int, ...], /, *, tile_shape: tuple[int, ...] = (), dtype: Dtype = float32) -> Array:
+def iota(shape: int | tuple[int, ...], /, *, tile_shape: tuple[int, ...] = (), dtype: Dtype = float32) -> Array:
+    """
+    Generate a multi-dimensional ArrayFire array with values populated based on their linear index within the array,
+    optionally tiling the result to create larger arrays.
+
+    This function creates an array where each element's value represents its linear index within the array, starting
+    from 0. It supports optional tiling, which repeats the array across specified dimensions to create a larger array.
+
+    Parameters
+    ----------
+    shape : tuple[int, ...]
+        The shape of the array to be generated. This parameter defines the dimensions of the array.
+        For example, `shape=5` creates a 1-dimensional array of length 5, `shape=(5, 4)` creates a 2D array
+        of size 5x4, and so on.
+
+    tile_shape : tuple[int, ...], optional, keyword-only, default: ()
+        The shape used for tiling the generated array. Each element in the tuple represents the number of times
+        the array is repeated along that dimension. By default, no tiling is applied.
+        For example, `tile_shape=(2, 3)` will tile the generated array 2 times along the first dimension and
+        3 times along the second dimension.
+
+    dtype : Dtype, optional, keyword-only, default: float32
+        The data type of the array elements. This determines the type of the values in the generated array.
+
+    Returns
+    -------
+    Array
+        A multi-dimensional ArrayFire array with elements populated based on their linear index, optionally tiled
+        according to `tile_shape`.
+
+    Raises
+    ------
+    ValueError
+        If `shape` is not int or tuple with less than one value.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> af.iota((3, 3))  # Generate a 3x3 array without tiling
+    [3 3 1 1]
+        0.0000     3.0000     6.0000
+        1.0000     4.0000     7.0000
+        2.0000     5.0000     8.0000
+
+    >>> af.iota((3, 3), tile_shape=(1, 2))  # Generate and tile the array along the second dimension
+    [3 6 1 1]
+        0.0000     3.0000     6.0000     0.0000     3.0000     6.0000
+        1.0000     4.0000     7.0000     1.0000     4.0000     7.0000
+        2.0000     5.0000     8.0000     2.0000     5.0000     8.0000
+    """
+    if isinstance(shape, int):
+        shape = (shape,)
+
+    if not isinstance(shape, tuple) or not shape:
+        raise ValueError("Argument shape must be a tuple with at least 1 value.")
+
     return cast(Array, wrapper.iota(shape, tile_shape, dtype))
 
 
 @afarray_as_array
 def lower(array: Array, /, *, is_unit_diag: bool = False) -> Array:
+    """
+    Extract the lower triangular part of a given multi-dimensional ArrayFire array.
+
+    This function returns the lower triangular matrix from the input array. If the `is_unit_diag` flag
+    is set to True, the diagonal is considered to be all ones, and therefore not explicitly stored in the
+    output.
+
+    Parameters
+    ----------
+    array : Array
+        The input ArrayFire array from which to extract the lower triangular part. This array must be
+        at least 2-dimensional.
+
+    is_unit_diag : bool, optional, keyword-only, default: False
+        A flag that specifies whether the diagonal elements of the lower triangular matrix are to be considered as 1.
+        If True, the diagonal elements are assumed to be 1, and thus not explicitly included in the output array.
+        If False, the diagonal elements are taken as they appear in the input array.
+
+    Returns
+    -------
+    Array
+        An ArrayFire array containing the lower triangular part of the input array. If `is_unit_diag` is True,
+        the diagonal elements are considered to be 1 but are not explicitly included in the array.
+
+    Notes
+    -----
+    - The function does not alter the elements above the main diagonal; it simply does not include them in the output.
+    - This function can be useful for mathematical operations that require lower triangular matrices, such as certain
+    types of matrix factorizations.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.randu((3, 3))  # Generate a random 3x3 array
+    >>> a
+    [3 3 1 1]
+        0.6010     0.2126     0.2864
+        0.0278     0.0655     0.3410
+        0.9806     0.5497     0.7509
+
+    >>> af.lower(a)  # Extract lower triangular part without unit diagonal
+    [3 3 1 1]
+        0.6010     0.0000     0.0000
+        0.0278     0.0655     0.0000
+        0.9806     0.5497     0.7509
+
+    >>> af.lower(a, is_unit_diag=True)  # Extract lower triangular part with unit diagonal
+    [3 3 1 1]
+        1.0000     0.0000     0.0000
+        0.0278     1.0000     0.0000
+        0.9806     0.5497     1.0000
+    """
     return cast(Array, wrapper.lower(array.arr, is_unit_diag))
 
 
 @afarray_as_array
 def upper(array: Array, /, *, is_unit_diag: bool = False) -> Array:
+    """
+    Extract the upper triangular part of a given multi-dimensional ArrayFire array.
+
+    This function returns the upper triangular matrix from the input array. If the `is_unit_diag` flag
+    is set to True, the diagonal elements are considered to be all ones, and therefore not explicitly stored
+    in the output.
+
+    Parameters
+    ----------
+    array : Array
+        The input ArrayFire array from which to extract the upper triangular part. This array must be
+        at least 2-dimensional.
+
+    is_unit_diag : bool, optional, keyword-only, default: False
+        A flag that specifies whether the diagonal elements of the upper triangular matrix are to be considered as 1.
+        If True, the diagonal elements are assumed to be 1, and thus not explicitly included in the output array.
+        If False, the diagonal elements are taken as they appear in the input array.
+
+    Returns
+    -------
+    Array
+        An ArrayFire array containing the upper triangular part of the input array. If `is_unit_diag` is True,
+        the diagonal elements are considered to be 1 but are not explicitly included in the array.
+
+    Notes
+    -----
+    - The function does not alter the elements below the main diagonal; it simply does not include them in the output.
+    - This function can be useful for mathematical operations that require upper triangular matrices, such as certain
+    types of matrix factorizations.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.randu((3, 3))  # Generate a random 3x3 array
+    >>> a
+    [3 3 1 1]
+        0.8962     0.6105     0.7896
+        0.3712     0.5232     0.8966
+        0.6755     0.5567     0.0536
+
+    >>> af.upper(a)  # Extract upper triangular part without unit diagonal
+    [3 3 1 1]
+        0.8962     0.6105     0.7896
+        0.0000     0.5232     0.8966
+        0.0000     0.0000     0.0536
+
+    >>> af.upper(a, is_unit_diag=True)  # Extract upper triangular part with unit diagonal
+    [3 3 1 1]
+        1.0000     0.6105     0.7896
+        0.0000     1.0000     0.8966
+        0.0000     0.0000     1.0000
+    """
     return cast(Array, wrapper.upper(array.arr, is_unit_diag))
 
 
@@ -148,11 +344,61 @@ def upper(array: Array, /, *, is_unit_diag: bool = False) -> Array:
 def pad(
     array: Array, start_shape: tuple[int, ...], end_shape: tuple[int, ...], /, *, fill_type: Pad = Pad.ZERO
 ) -> Array:
+    """
+    Pads an ArrayFire array with specified sizes of padding around its edges and fills the padding
+    with a specified value.
+
+    This function allows for the padding of an array on all sides with a variety of filling options
+    for the new elements added by the padding process. The amount of padding to add at the start and
+    end of each dimension is specified by `start_shape` and `end_shape`, respectively.
+
+    Parameters
+    ----------
+    array : Array
+        The input multi-dimensional ArrayFire array to be padded.
+
+    start_shape : tuple[int, ...]
+        The amount of padding to add at the beginning of each dimension of the array. Each value in the
+        tuple corresponds to a dimension in the array.
+
+    end_shape : tuple[int, ...]
+        The amount of padding to add at the end of each dimension of the array. Each value in the tuple
+        corresponds to a dimension in the array.
+
+    fill_type : Pad, optional, keyword-only, default: Pad.ZERO
+        The type of value to fill the padded areas with. The default is `Pad.ZERO`, which fills the padded
+        area with zeros. Other options may include constant values or methods of padding such as edge value
+        replication, depending on the library's implementation.
+
+    Returns
+    -------
+    Array
+        The padded ArrayFire array. The shape of the output array will be larger than the input array by
+        the amounts specified in `start_shape` and `end_shape`.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.randu((3, 3))
+    >>> a
+    [3 3 1 1]
+        0.4107     0.1794     0.3775
+        0.8224     0.4198     0.3027
+        0.9518     0.0081     0.6456
+
+    >>> af.pad(a, (1, 1), (1, 1))
+    [5 5 1 1]
+        0.0000     0.0000     0.0000     0.0000     0.0000
+        0.0000     0.4107     0.1794     0.3775     0.0000
+        0.0000     0.8224     0.4198     0.3027     0.0000
+        0.0000     0.9518     0.0081     0.6456     0.0000
+        0.0000     0.0000     0.0000     0.0000     0.0000
+    """
     return cast(Array, wrapper.pad(array.arr, start_shape, end_shape, fill_type))
 
 
 @afarray_as_array
-def range(shape: tuple[int, ...], /, *, axis: int = 0, dtype: Dtype = float32) -> Array:
+def range(shape: int | tuple[int, ...], /, *, axis: int = 0, dtype: Dtype = float32) -> Array:
     """
     Create a multi-dimensional array using the length of a dimension as a range.
 
@@ -179,6 +425,9 @@ def range(shape: tuple[int, ...], /, *, axis: int = 0, dtype: Dtype = float32) -
     ValueError
         If axis value is greater than the number of axes in resulting Array.
 
+    ValueError
+        If `shape` is not int or tuple with less than one value.
+
     Notes
     -----
     The `shape` parameter determines the dimensions of the resulting array:
@@ -191,19 +440,25 @@ def range(shape: tuple[int, ...], /, *, axis: int = 0, dtype: Dtype = float32) -
     --------
     >>> import arrayfire as af
     >>> a = af.range((3, 2))  # axis is not specified, range is along the first dimension.
-    >>> af.display(a)  # The data ranges from [0 - 2] (3 elements along the first dimension)
+    >>> a  # The data ranges from [0 - 2] (3 elements along the first dimension)
     [3 2 1 1]
         0.0000     0.0000
         1.0000     1.0000
         2.0000     2.0000
 
     >>> a = af.range((3, 2), axis=1)  # axis is 1, range is along the second dimension.
-    >>> af.display(a)  # The data ranges from [0 - 1] (2 elements along the second dimension)
+    >>> a  # The data ranges from [0 - 1] (2 elements along the second dimension)
     [3 2 1 1]
         0.0000     1.0000
         0.0000     1.0000
         0.0000     1.0000
     """
+    if isinstance(shape, int):
+        shape = (shape,)
+
+    if not isinstance(shape, tuple) or not shape:
+        raise ValueError("Argument shape must be a tuple with at least 1 value.")
+
     if axis > len(shape):
         raise ValueError(
             f"Can not calculate along {axis} dimension. The resulting Array is set to has {len(shape)} dimensions."
@@ -217,16 +472,79 @@ def range(shape: tuple[int, ...], /, *, axis: int = 0, dtype: Dtype = float32) -
 
 @afarray_as_array
 def isinf(array: Array, /) -> Array:
+    """
+    Check if each element of the input ArrayFire array is infinity.
+
+    This function iterates over each element of the input array to determine if it is infinity. The result is an array
+    where each element is a boolean indicating whether the corresponding element in the input array is infinity.
+
+    Parameters
+    ----------
+    array : Array
+        The input multi-dimensional ArrayFire array.
+
+    Returns
+    -------
+    Array
+        An ArrayFire array of booleans, where each element indicates whether the corresponding element in the input
+        array is infinity.
+
+    Notes
+    -----
+    - The input array must not be of a complex data type.
+    """
     return cast(Array, wrapper.isinf(array.arr))
 
 
 @afarray_as_array
 def isnan(array: Array, /) -> Array:
+    """
+    Check if each element of the input ArrayFire array is NaN (Not a Number).
+
+    This function iterates over each element of the input array to determine if it is NaN. The result is an array
+    where each element is a boolean indicating whether the corresponding element in the input array is NaN.
+
+    Parameters
+    ----------
+    array : Array
+        The input multi-dimensional ArrayFire array.
+
+    Returns
+    -------
+    Array
+        An ArrayFire array of booleans, where each element indicates whether the corresponding element in the input
+        array is NaN.
+
+    Notes
+    -----
+    - The input array must not be of a complex data type.
+    """
     return cast(Array, wrapper.isnan(array.arr))
 
 
 @afarray_as_array
 def iszero(array: Array, /) -> Array:
+    """
+    Check if each element of the input ArrayFire array is zero.
+
+    This function iterates over each element of the input array to determine if it is zero. The result is an array
+    where each element is a boolean indicating whether the corresponding element in the input array is zero.
+
+    Parameters
+    ----------
+    array : Array
+        The input multi-dimensional ArrayFire array.
+
+    Returns
+    -------
+    Array
+        An ArrayFire array of booleans, where each element indicates whether the corresponding element in the input
+        array is zero.
+
+    Notes
+    -----
+    - The input array must not be of a complex data type.
+    """
     return cast(Array, wrapper.iszero(array.arr))
 
 
@@ -235,10 +553,83 @@ def iszero(array: Array, /) -> Array:
 
 @afarray_as_array
 def copy_array(array: Array, /) -> Array:
+    """
+    Performs a deep copy of the given ArrayFire array.
+
+    This function creates a new ArrayFire array that is an identical copy of the input array.
+    It ensures that the new array is a separate instance with its own memory, independent of the original array.
+
+    Parameters
+    ----------
+    array : Array
+        The input ArrayFire array to be copied.
+
+    Returns
+    -------
+    Array
+        A new ArrayFire array that is an identical copy of the input array.
+        This copy is independent of the original, allowing for modifications without affecting the original array.
+
+    Example
+    -------
+    >>> import arrayfire as af
+    >>> original_array = af.randu((3, 3))  # Create a random 3x3 array
+    >>> copied_array = af.copy_array(original_array)  # Make a deep copy of the original array
+    >>> original_array == copied_array
+    [3 3 1 1]
+         1          1          1
+         1          1          1
+         1          1          1
+
+    >>> original_array.arr == copied_array.arr
+    False
+    """
     return cast(Array, wrapper.copy_array(array.arr))
 
 
 def eval(*arrays: Array) -> None:
+    """
+    Forces the evaluation of one or more ArrayFire arrays.
+
+    In ArrayFire, operations are typically executed lazily, meaning that computations are not actually performed
+    until the results are needed. This function forces the evaluation of its input arrays, potentially optimizing
+    the execution by combining multiple operations into a single kernel launch.
+
+    Parameters
+    ----------
+    *arrays : Array
+        Variable number of ArrayFire arrays to be evaluated. All input arrays should be of the same size.
+
+    Note
+    ----
+    It's important to ensure that all input arrays are of the same size to avoid runtime errors. This function
+    facilitates performing multiple array operations in a single step, improving performance by reducing the
+    number of kernel launches.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.constant(1, (3, 3))
+    >>> b = af.constant(2, (3, 3))
+    >>> c = a + b
+    >>> d = a - b
+    >>> af.eval(c, d)  # Forces evaluation, optimizing the execution
+    >>> c
+    [3 3 1 1]
+        3.0000     3.0000     3.0000
+        3.0000     3.0000     3.0000
+        3.0000     3.0000     3.0000
+
+    >>> d
+    [3 3 1 1]
+        -1.0000    -1.0000    -1.0000
+        -1.0000    -1.0000    -1.0000
+        -1.0000    -1.0000    -1.0000
+
+    In this example, `eval` is used to force the evaluation of `c` and `d`. Instead of executing two separate
+    operations (addition and subtraction), ArrayFire optimizes the process into a single kernel execution, thus
+    enhancing performance.
+    """
     if len(arrays) == 1:
         wrapper.eval(arrays[0].arr)
         return
@@ -269,8 +660,7 @@ def flat(array: Array, /) -> Array:
     --------
     >>> import arrayfire as af
     >>> arr = af.randu(3, 2)  # Create a 3x2 random array
-    >>> flattened = af.flat(arr)  # Flatten the array
-    >>> af.display(flattened)
+    >>> af.flat(arr)  # Flatten the array
     [6 1 1 1]
         0.8364
         0.5604
@@ -284,11 +674,102 @@ def flat(array: Array, /) -> Array:
 
 @afarray_as_array
 def flip(array: Array, /, *, axis: int = 0) -> Array:
+    """
+    Flip an ArrayFire array along a specified dimension.
+
+    This function reverses the order of the elements of the input array along the specified axis (dimension).
+    Flipping an array along its vertical axis (0) will invert the rows, whereas flipping along the horizontal
+    axis (1) will invert the columns, and so on for higher dimensions.
+
+    Parameters
+    ----------
+    array : Array
+        The input multi-dimensional ArrayFire array to be flipped.
+
+    axis : int, optional, keyword-only, default: 0
+        The dimension along which to flip the array. For a 2D array, 0 flips it vertically, and 1 flips it
+        horizontally.
+
+    Returns
+    -------
+    Array
+        The ArrayFire array resulting from flipping the input array along the specified axis.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.randu((3, 3))  # Generate a random 3x3 array
+    >>> a
+    [3 3 1 1]
+        0.7269     0.3569     0.3341
+        0.7104     0.1437     0.0899
+        0.5201     0.4563     0.5363
+
+    >>> af.flip(a, axis=0)  # Flip vertically
+    [3 3 1 1]
+        0.5201     0.4563     0.5363
+        0.7104     0.1437     0.0899
+        0.7269     0.3569     0.3341
+
+    >>> af.flip(a, axis=1)  # Flip horizontally
+    [3 3 1 1]
+        0.3341     0.3569     0.7269
+        0.0899     0.1437     0.7104
+        0.5363     0.4563     0.5201
+    """
     return cast(Array, wrapper.flip(array.arr, axis))
 
 
 @afarray_as_array
 def join(axis: int, /, *arrays: Array) -> Array:
+    """
+    Join two or more ArrayFire arrays along a specified dimension.
+
+    This function concatenates the given arrays along the specified axis (dimension). The arrays must have compatible
+    shapes in all dimensions except for the dimension along which they are being joined.
+
+    Parameters
+    ----------
+    axis : int
+        The dimension along which to join the arrays. For example, for 2D arrays, 0 would join the arrays vertically
+        (adding rows), and 1 would join them horizontally (adding columns).
+
+    *arrays : Array
+        A variable number of ArrayFire arrays to be joined. At least two arrays must be provided. The function is
+        capable of joining up to 10 arrays due to API limitations.
+
+    Returns
+    -------
+    Array
+        An ArrayFire array resulting from concatenating the input arrays along the specified axis.
+
+    Raises
+    ------
+    ValueError
+        If fewer than two arrays are provided as input.
+
+    Warning
+        If more than 10 arrays are provided, only the first 10 will be processed due to API limitations.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.randu((2, 3))
+    >>> b = af.randu((2, 3))
+    >>> c = af.join(0, a, b)  # Join vertically
+    >>> d = af.join(1, a, b)  # Join horizontally
+    >>> с
+    [4 3 1 1]
+        0.9508     0.2591     0.7928
+        0.5367     0.8359     0.8719
+        0.3266     0.6009     0.2442
+        0.6275     0.0495     0.6591
+
+    >>> d
+    [2 6 1 1]
+        0.9508     0.2591     0.7928     0.3266     0.6009     0.2442
+        0.5367     0.8359     0.8719     0.6275     0.0495     0.6591
+    """
     if len(arrays) < 2:
         raise ValueError("Shape should be at least 2 dimensional.")
     if len(arrays) == 2:
@@ -333,22 +814,139 @@ def moddims(array: Array, shape: tuple[int, ...], /) -> Array:
     This function modifies the shape of the input array without changing the
     data layout. The resulting array will have the same data, but with a
     different shape as specified by the `shape` parameter.
-
-    Examples
-    --------
-    >>> a = af.randu(2, 3, 4)  # Create a random 3D array
-    >>> b = moddims(a, (6, 2))  # Reshape to a 2D array with 6 rows and 2 columns
     """
-
+    # TODO add examples to doc
     return cast(Array, wrapper.moddims(array.arr, shape))
 
 
 @afarray_as_array
 def reorder(array: Array, /, *, shape: tuple[int, ...] = (1, 0, 2, 3)) -> Array:
+    """
+    Reorders the dimensions of the given ArrayFire array according to the specified order.
+
+    This function changes the order of the dimensions of the input array, which can be useful for data rearrangement
+    or alignment before further processing. The new dimension order is specified by the `shape` parameter.
+
+    Parameters
+    ----------
+    array : Array
+        The input multi-dimensional ArrayFire array whose dimensions are to be reordered.
+
+    shape : tuple[int, ...], optional, keyword-only, default: (1, 0, 2, 3)
+        The new order of the dimensions. The default value swaps the first two dimensions, similar to performing
+        a transpose operation on a 2D array. Each element in the tuple represents the index of the dimension in the
+        input array that should be moved to this position.
+
+    Returns
+    -------
+    Array
+        An ArrayFire array with its dimensions reordered according to the specified `shape`.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.randu((5, 5, 3))  # Generate a 5x5x3 random array
+    >>> a
+    [5 5 3 1]
+        0.4107     0.0081     0.6600     0.1046     0.8395
+        0.8224     0.3775     0.0764     0.8827     0.1933
+        0.9518     0.3027     0.0901     0.1647     0.7270
+        0.1794     0.6456     0.5933     0.8060     0.0322
+        0.4198     0.5591     0.1098     0.5938     0.0012
+
+        0.8703     0.9250     0.4387     0.6530     0.4224
+        0.5259     0.3063     0.3784     0.5476     0.5293
+        0.1443     0.9313     0.4002     0.8577     0.0212
+        0.3253     0.8684     0.4390     0.8370     0.1103
+        0.5081     0.6592     0.4718     0.0618     0.4420
+
+        0.8355     0.6767     0.1033     0.9426     0.9276
+        0.4878     0.6742     0.2119     0.4817     0.8662
+        0.2055     0.4523     0.5955     0.9097     0.3578
+        0.1794     0.1236     0.3745     0.6821     0.6263
+        0.5606     0.7924     0.9165     0.6056     0.9747
+
+    >>> b = af.reorder(a, shape=(2, 0, 1))  # Reorder dimensions: move the third dimension to the first
+    >>> b
+    [3 5 5 1]
+        0.4107     0.8224     0.9518     0.1794     0.4198
+        0.8703     0.5259     0.1443     0.3253     0.5081
+        0.8355     0.4878     0.2055     0.1794     0.5606
+
+        0.0081     0.3775     0.3027     0.6456     0.5591
+        0.9250     0.3063     0.9313     0.8684     0.6592
+        0.6767     0.6742     0.4523     0.1236     0.7924
+
+        0.6600     0.0764     0.0901     0.5933     0.1098
+        0.4387     0.3784     0.4002     0.4390     0.4718
+        0.1033     0.2119     0.5955     0.3745     0.9165
+
+        0.1046     0.8827     0.1647     0.8060     0.5938
+        0.6530     0.5476     0.8577     0.8370     0.0618
+        0.9426     0.4817     0.9097     0.6821     0.6056
+
+        0.8395     0.1933     0.7270     0.0322     0.0012
+        0.4224     0.5293     0.0212     0.1103     0.4420
+        0.9276     0.8662     0.3578     0.6263     0.9747
+
+    Note
+    ----
+    - The `shape` tuple must contain all integers from 0 up to the number of dimensions in `array` - 1, without
+    repetition.
+    - Reordering dimensions can be particularly useful in preparation for operations that require a specific dimension
+    order.
+    """
     return cast(Array, wrapper.reorder(array.arr, *shape))
 
 
 def replace(lhs: Array, rhs: Array | int | float, conditional: Array, /) -> None:
+    """
+    Conditionally replaces elements of one ArrayFire array with elements from another array or a scalar value.
+
+    This function iterates over each element of the `lhs` array and replaces it with the corresponding element from
+    `rhs` if the corresponding element in the `conditional` array is True. If `rhs` is a scalar, `lhs` is updated with
+    this scalar value where the condition is True.
+
+    Parameters
+    ----------
+    lhs : Array
+        The left-hand side ArrayFire array whose elements may be replaced based on the condition.
+
+    rhs : Array | int | float
+        The right-hand side value(s) used for replacement. This can be an ArrayFire array, integer, or floating-point
+        scalar. If `rhs` is an array, it must be the same size as `lhs`.
+
+    conditional : Array
+        An ArrayFire array of boolean values indicating where replacement should occur. Must be the same size as `lhs`.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.randu((3,3))  # Generate a random 3x3 array
+    >>> a
+    [3 3 1 1]
+        0.4107     0.1794     0.3775
+        0.8224     0.4198     0.3027
+        0.9518     0.0081     0.6456
+
+    >>> cond = (a >= 0.25) & (a <= 0.75)  # Generate a condition array
+    >>> cond
+    [3 3 1 1]
+        1          0          1
+        0          1          1
+        0          0          1
+
+    >>> af.replace(a, 0.3333, cond)  # Replace where condition is True with 0.3333
+    >>> a
+    [3 3 1 1]
+        0.3333     0.1794     0.3333
+        0.8224     0.3333     0.3333
+        0.9518     0.0081     0.3333
+
+    Note
+    ----
+    - The `lhs`, `rhs` (if an array), and `conditional` arrays must be of the same size.
+    """
     if isinstance(rhs, Array):
         wrapper.replace(lhs.arr, conditional.arr, rhs.arr)
         return
@@ -356,24 +954,131 @@ def replace(lhs: Array, rhs: Array | int | float, conditional: Array, /) -> None
     wrapper.replace_scalar(lhs.arr, conditional.arr, rhs)
 
 
-def select(lhs: Array | int | float, rhs: Array | int | float, conditional: Array, /) -> None:
+def select(lhs: Array | int | float, rhs: Array | int | float, conditional: Array, /) -> Array:
+    """
+    Conditionally selects elements from one of two sources (ArrayFire arrays or scalars) based on a condition array.
+
+    This function iterates over each element of the `conditional` array. For elements where `conditional` is True,
+    it selects the corresponding element from `lhs`; otherwise, it selects from `rhs`. The `lhs` and `rhs` can be
+    either ArrayFire arrays or scalar values, but at least one of them must be an ArrayFire array.
+
+    Parameters
+    ----------
+    lhs : Array | int | float
+        The left-hand side source for selection. Can be an ArrayFire array or a scalar value. Elements from `lhs`
+        are selected where `conditional` is True.
+
+    rhs : Array | int | float
+        The right-hand side source for selection. Can be an ArrayFire array or a scalar value. Elements from `rhs`
+        are selected where `conditional` is False.
+
+    conditional : Array
+        An ArrayFire array of boolean values that serve as the condition for selection.
+
+    Raises
+    ------
+    TypeError
+        If neither `lhs` nor `rhs` is an ArrayFire array.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.randu((3,3))  # Generate a random 3x3 array
+    >>> b = af.randu((3,3))  # Generate another random 3x3 array
+    >>> cond = a > b  # Generate a boolean condition array
+
+    >>> a
+    [3 3 1 1]
+        0.4107     0.1794     0.3775
+        0.8224     0.4198     0.3027
+        0.9518     0.0081     0.6456
+
+    >>> b
+    [3 3 1 1]
+        0.7269     0.3569     0.3341
+        0.7104     0.1437     0.0899
+        0.5201     0.4563     0.5363
+
+    >>> af.select(cond, a, b)  # Conditionally select between `a` and `b`
+    [3 3 1 1]
+        0.7269     0.3569     0.3775
+        0.8224     0.4198     0.3027
+        0.9518     0.4563     0.6456
+
+    Note
+    ----
+    - The `conditional` array must be of the same size as both `lhs` and `rhs` if they are arrays.
+    - At least one of `lhs` or `rhs` must be an ArrayFire array.
+    """
     if isinstance(lhs, Array) and isinstance(rhs, Array):
-        wrapper.select(lhs.arr, conditional.arr, rhs.arr)
-        return
+        return cast(Array, wrapper.select(lhs.arr, conditional.arr, rhs.arr))
 
     if isinstance(lhs, Array) and not isinstance(rhs, Array):
-        wrapper.select_scalar_r(lhs.arr, conditional.arr, rhs)
-        return
+        return cast(Array, wrapper.select_scalar_r(lhs.arr, conditional.arr, rhs))
 
     if not isinstance(lhs, Array) and isinstance(rhs, Array):
-        wrapper.select_scalar_l(lhs, conditional.arr, rhs.arr)
-        return
+        return cast(Array, wrapper.select_scalar_l(lhs, conditional.arr, rhs.arr))
 
     raise TypeError("At least one array (lhr or rhs) must be of type af.Array.")
 
 
 @afarray_as_array
 def shift(array: Array, shape: tuple[int, ...], /) -> Array:
+    """
+    Shifts the input ArrayFire array along each dimension by specified amounts.
+
+    This function cyclically shifts the elements of the input array along each dimension. The amount of shift for each
+    dimension is specified in the `shape` tuple. A positive shift moves elements towards higher indices, while a
+    negative shift moves them towards lower indices.
+
+    Parameters
+    ----------
+    array : Array
+        The input multi-dimensional ArrayFire array to be shifted.
+
+    shape : tuple[int, ...]
+        A tuple specifying the amount of shift along each dimension. Can contain up to four values, corresponding to
+        the shift along the first, second, third, and fourth dimensions, respectively. Unspecified dimensions are
+        assumed to have a shift of 0.
+
+    Raises
+    ------
+    ValueError
+        If the `shape` tuple contains more than four elements, as only up to 4-dimensional arrays are supported.
+
+    Returns
+    -------
+    Array
+        An ArrayFire array of the same shape as `array`, shifted by the specified amounts along each dimension.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.randu((3, 3))  # Generate a random 3x3 array
+    >>> a
+    [3 3 1 1]
+        0.7269     0.3569     0.3341
+        0.7104     0.1437     0.0899
+        0.5201     0.4563     0.5363
+
+    >>> b = af.shift(a, (2,))  # Shift along the first dimension by 2
+    >>> b
+    [3 3 1 1]
+        0.7104     0.1437     0.0899
+        0.5201     0.4563     0.5363
+        0.7269     0.3569     0.3341
+
+    >>> c = af.shift(a, (1, -1))  # Shift along the first dimension by 1 and the second by -1
+    >>> c
+    [3 3 1 1]
+        0.4563     0.5363     0.5201
+        0.3569     0.3341     0.7269
+        0.1437     0.0899     0.7104
+
+    Note
+    ----
+    - Shifts are performed cyclically, meaning that elements shifted "off" one end of the array reappear at the other.
+    """
     if len(shape) > 4:
         raise ValueError("Max 4-dimensional arrays are supported.")
 
@@ -382,6 +1087,66 @@ def shift(array: Array, shape: tuple[int, ...], /) -> Array:
 
 @afarray_as_array
 def tile(array: Array, /, shape: tuple[int, ...]) -> Array:
+    """
+    Repeats an ArrayFire array along specified dimensions to create a tiled array.
+
+    This function creates a larger array by repeating the input array a specified number of times along each dimension.
+    The amount of repetition for each dimension is specified in the `shape` tuple. This can be used to duplicate data
+    along one or more axes.
+
+    Parameters
+    ----------
+    array : Array
+        The input multi-dimensional ArrayFire array to be tiled.
+
+    shape : tuple[int, ...]
+        A tuple specifying the number of times the input array should be repeated along each dimension. Can contain up
+        to four values, corresponding to the repetition factor along the first, second, third, and fourth dimensions,
+        respectively. Dimensions not specified will not be tiled (i.e., treated as if they have a repetition factor
+        of 1).
+
+    Raises
+    ------
+    ValueError
+        If the `shape` tuple contains more than four elements, as only up to 4-dimensional arrays are supported.
+
+    Returns
+    -------
+    Array
+        An ArrayFire array resulting from tiling the input array according to the specified `shape`.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.randu((2, 3))  # Generate a 2x3 random array
+    >>> a
+    [2 3 1 1]
+        0.9508     0.2591     0.7928
+        0.5367     0.8359     0.8719
+
+    >>> af.tile(a, (2,))  # Tile along the first dimension by 2
+    [4 3 1 1]
+        0.9508     0.2591     0.7928
+        0.5367     0.8359     0.8719
+        0.9508     0.2591     0.7928
+        0.5367     0.8359     0.8719
+
+    >>> af.tile(a, (1, 2))  # Tile along the second dimension by 2
+    [2 6 1 1]
+        0.9508     0.2591     0.7928     0.9508     0.2591     0.7928
+        0.5367     0.8359     0.8719     0.5367     0.8359     0.8719
+
+    >>> af.tile(a, (2, 2))  # Tile both dimensions by 2
+    [4 6 1 1]
+        0.9508     0.2591     0.7928     0.9508     0.2591     0.7928
+        0.5367     0.8359     0.8719     0.5367     0.8359     0.8719
+        0.9508     0.2591     0.7928     0.9508     0.2591     0.7928
+        0.5367     0.8359     0.8719     0.5367     0.8359     0.8719
+
+    Note
+    ----
+    - The repetition factor of 1 means the dimension is not tiled.
+    """
     if len(shape) > 4:
         raise ValueError("Max 4-dimensional arrays are supported.")
 
@@ -390,8 +1155,110 @@ def tile(array: Array, /, shape: tuple[int, ...]) -> Array:
 
 @afarray_as_array
 def transpose(array: Array, /, *, conjugate: bool = False, inplace: bool = False) -> Array:
+    """
+    Perform the transpose (and optionally, the complex conjugate transpose) of an ArrayFire array.
+    The operation can be performed in-place for square matrices or square matrix batches.
+
+    Parameters
+    ----------
+    array : Array
+        The input multi-dimensional ArrayFire array to be transposed. If `inplace` is True, `array` must be a square
+        matrix or a batch of square matrices.
+
+    conjugate : bool, optional, keyword-only, default: False
+        If True, performs a complex conjugate transpose. This is only relevant for complex data types and is ignored
+        for other data types.
+
+    inplace : bool, optional, keyword-only, default: False
+        If True, performs the transpose operation in-place, modifying the input `array`. The input `array` must be a
+        square matrix or a batch of square matrices.
+
+    Returns
+    -------
+    Array
+        If `inplace` is False, returns a new ArrayFire array containing the transpose of `array`. If `inplace` is True,
+        returns the modified `array` containing its own transpose. For in-place operations, the input `array` is
+        directly modified.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.randu(3, 3)  # Generate a random 3x3 array
+    >>> a
+    [3 3 1 1]
+        0.7269     0.3569     0.3341
+        0.7104     0.1437     0.0899
+        0.5201     0.4563     0.5363
+
+    >>> af.transpose(a)  # Transpose the array
+    [3 3 1 1]
+        0.7269     0.7104     0.5201
+        0.3569     0.1437     0.4563
+        0.3341     0.0899     0.5363
+
+    Note
+    ----
+    - The `inplace` operation requires the input array to be a square matrix or a batch of square matrices.
+    Attempting an in-place transpose on non-square matrices will result in an error.
+    - For complex matrices, setting `conjugate` to True applies the complex conjugate in addition to the transpose
+    operation.
+    """
     if inplace:
         wrapper.transpose_inplace(array.arr, conjugate)
         return array
 
     return cast(Array, wrapper.transpose(array.arr, conjugate))
+
+
+@afarray_as_array
+def lookup(array: Array, indices: Array, /, *, axis: int = 0) -> Array:
+    """
+    Performs a lookup operation on the input ArrayFire array based on the specified indices along a given dimension.
+
+    This function gathers elements from the input array `array` at positions specified by the `indices` array.
+    The operation is performed along the dimension specified by `axis`.
+
+    Parameters
+    ----------
+    array : Array
+        The input multi-dimensional ArrayFire array from which elements are to be gathered.
+
+    indices : Array
+        An ArrayFire array containing the indices of elements to gather. The values in `indices` should be of integer
+        type.
+
+    axis : int, optional, keyword-only, default: 0
+        The dimension along which the lookup is performed.
+
+    Returns
+    -------
+    Array
+        An ArrayFire array containing the elements of `array` at the locations specified by `indices`. The shape of
+        the output array is determined by the shape of `indices` and the dimension specified by `axis`.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.Array([1, 0, 3, 4, 5, 6], shape=(2, 3))  # Create a 2x3 array
+    >>> a
+    [2 3 1 1]
+        1.0000     3.0000     5.0000
+        0.0000     4.0000     6.0000
+
+    >>> idx = af.Array([0, 2])  # Indices for lookup
+    >>> af.lookup(a, idx, axis=1)  # Lookup along the second dimension
+    [2 2 1 1]
+        1.0000     5.0000
+        0.0000     6.0000
+
+    >>> idx = af.Array([0])  # Indices for lookup
+    >>> af.lookup(arr, idx, axis=0)  # Lookup along the first dimension
+    [1 3 1 1]
+        1.0000     3.0000     5.0000
+
+    Note
+    ----
+    - The `indices` array must contain integer values indicating the positions of elements to gather from `array`.
+    - The dimension specified by `axis` must not exceed the number of dimensions in `array`.
+    """
+    return cast(Array, wrapper.lookup(array.arr, indices.arr, axis))
diff --git a/arrayfire/library/computer_vision.py b/arrayfire/library/computer_vision.py
index d4a0023..d230220 100644
--- a/arrayfire/library/computer_vision.py
+++ b/arrayfire/library/computer_vision.py
@@ -1,4 +1,15 @@
-__all__ = ["gloh", "orb", "sift", "dog", "fast", "harris", "susan", "hamming_matcher", "nearest_neighbour"]
+__all__ = [
+    "gloh",
+    "orb",
+    "sift",
+    "dog",
+    "fast",
+    "harris",
+    "susan",
+    "hamming_matcher",
+    "nearest_neighbour",
+    "match_template",
+]
 
 from typing import cast
 
@@ -8,6 +19,9 @@
 from arrayfire.library.constants import Match
 from arrayfire.library.features import Features
 
+# TODO
+# Add examples to docs
+
 
 def gloh(
     image: Array,
@@ -20,6 +34,43 @@ def gloh(
     intensity_scale: float = 1.0 / 255,
     feature_ratio: float = 0.05,
 ) -> tuple[Features, Array]:
+    """
+    Implements the GLOH (Gradient Location and Orientation Histogram) feature detection and descriptor extraction for
+    images.
+
+    Parameters
+    ----------
+    image : Array
+        A 2D ArrayFire array representing the input image.
+    n_layers : int, default: 3
+        The number of layers per octave. The number of octaves is calculated based on the image dimensions and
+        `initial_sigma`.
+    contrast_threshold : float, default: 0.04
+        The contrast threshold used to filter out weak features in low-contrast regions of the image.
+    edge_threshold : float, default: 10.0
+        The edge threshold used to filter out edge-like features to ensure feature points are from corners.
+    initial_sigma : float, default: 1.6
+        The initial sigma (scale) for the Gaussian blur applied to the image at the first layer.
+    dobule_input : bool, default: True
+        If True, the image size is doubled before processing to detect features at higher scales.
+    intensity_scale : float, default: 1.0 / 255
+        The scale factor applied to the image intensities, typically used to normalize the pixel values.
+    feature_ratio : float, default: 0.05
+        The ratio of the total number of pixels in the image used to limit the number of features detected.
+
+    Returns
+    -------
+    tuple[Features, Array]
+        A tuple containing two elements:
+        - `Features`: An object holding the detected features, including their locations and scales.
+        - `Array`: An ArrayFire array containing the GLOH descriptors for the detected features, with each descriptor
+          having 272 elements.
+
+    Note
+    ----
+    - The `gloh` function is particularly effective for object recognition and image matching tasks.
+    - The choice of parameters can significantly impact the number and quality of features detected and described.
+    """
     features, descriptors = wrapper.gloh(
         image.arr,
         n_layers,
@@ -42,6 +93,36 @@ def orb(
     n_levels: int = 4,
     blur_image: bool = False,
 ) -> tuple[Features, Array]:
+    """
+    Extracts ORB features and their descriptors from an image.
+
+    Parameters
+    ----------
+    image : Array
+        The input image as a 2D ArrayFire array.
+
+    fast_threshold : float, default=20.0
+        The FAST keypoint detection threshold.
+
+    max_features : int, default=400
+        The maximum number of keypoints to detect.
+
+    scale_factor : float, default=1.5
+        The scale factor between levels in the image pyramid.
+
+    n_levels : int, default=4
+        The number of levels in the image pyramid.
+
+    blur_image : bool, default=False
+        If True, the image is blurred before keypoint detection.
+
+    Returns
+    -------
+    tuple[Features, Array]
+        A tuple containing:
+        - An ArrayFire Features object with detected keypoints.
+        - An ArrayFire Array with corresponding descriptors.
+    """
     features, descriptors = wrapper.orb(image.arr, fast_threshold, max_features, scale_factor, n_levels, blur_image)
     return Features.from_affeatures(features), Array.from_afarray(descriptors)
 
@@ -57,6 +138,51 @@ def sift(
     intensity_scale: float = 1.0 / 255,
     feature_ratio: float = 0.05,
 ) -> tuple[Features, Array]:
+    """
+    Extracts SIFT features and their descriptors from an image using the ArrayFire library.
+
+    Parameters
+    ----------
+    image : Array
+        The input image as a 2D ArrayFire array on which SIFT features are to be detected.
+
+    n_layers : int, default=3
+        The number of layers per octave in the SIFT algorithm. The number of octaves is calculated based on the image
+        size.
+
+    contrast_threshold : float, default=0.04
+        The contrast threshold used to filter out weak features in low-contrast regions of the image.
+
+    edge_threshold : float, default=10.0
+        The edge threshold used to filter out edge-like features. Higher values mean fewer features rejected based on
+        edge-like characteristics.
+
+    initial_sigma : float, default=1.6
+        The initial sigma (standard deviation) for the Gaussian blur applied to the image before feature detection.
+
+    dobule_input : bool, default=True
+        If True, the input image will be upscaled by a factor of 2 before processing, which helps in detecting features
+        at larger scales.
+
+    intensity_scale : float, default=1.0 / 255
+        The scale factor applied to the image intensities. Typically used to normalize the image intensities.
+
+    feature_ratio : float, default=0.05
+        The maximum number of features to be detected, expressed as a ratio of the total number of image pixels.
+
+    Returns
+    -------
+    tuple[Features, Array]
+        A tuple containing:
+        - An ArrayFire Features object encapsulating the detected keypoints.
+        - An ArrayFire Array containing the corresponding descriptors for each keypoint. The descriptors are
+          128-dimensional vectors describing the local appearance around each keypoint.
+
+    Note
+    ----
+    The SIFT algorithm is a patented technique, and its use in commercial applications may require licensing. In
+    academic and research settings, it remains a popular choice due to its robustness and reliability.
+    """
     features, descriptors = wrapper.sift(
         image.arr,
         n_layers,
@@ -72,6 +198,36 @@ def sift(
 
 @afarray_as_array
 def dog(image: Array, radius1: int, radius2: int, /) -> Array:
+    """
+    Performs the Difference of Gaussians (DoG) operation on an image. This operation is a band-pass filter that
+    highlights regions of an image with high spatial frequency, which correspond to edges. Typically used in edge
+    detection and as a preprocessing step in feature extraction algorithms like SIFT.
+
+    Parameters
+    ----------
+    image : Array
+        The input image as a 2D ArrayFire array.
+
+    radius1 : int
+        The radius of the first Gaussian blur kernel. This parameter indirectly controls the sigma (standard deviation)
+        of the Gaussian function, with a larger radius resulting in a more significant blur.
+
+    radius2 : int
+        The radius of the second Gaussian blur kernel. As with `radius1`, this parameter controls the degree of blur,
+        but typically `radius2` > `radius1` to ensure a broader range of spatial frequencies are captured.
+
+    Returns
+    -------
+    Array
+        An ArrayFire array containing the result of the DoG operation. The output array highlights edges and
+        transitions in the input image, with higher intensity values corresponding to stronger edges.
+
+    Note
+    ----
+    The effective sigma values for the Gaussian blurs are calculated as 0.25 * radius, where the radius is the
+    parameter passed to the function. The DoG operation is sensitive to the choice of radius parameters, which should
+    be chosen based on the specific requirements of the application and the characteristics of the input image.
+    """
     return cast(Array, wrapper.dog(image.arr, radius1, radius2))
 
 
@@ -84,6 +240,46 @@ def fast(
     feature_ratio: float = 0.05,
     edge: int = 3,
 ) -> Features:
+    """
+    Detects corners and interest points in an image using the FAST (Features from Accelerated Segment Test) algorithm.
+
+    Parameters
+    ----------
+    image : Array
+        The input image as a 2D ArrayFire array. The image should be grayscale.
+
+    fast_threshold : float, default=20.0
+        The intensity threshold for considering a pixel to be brighter or darker than the circular set of pixels
+        around the candidate pixel. This value determines the sensitivity of the feature detection: higher values
+        result in fewer features being detected.
+
+    arc_length : int, default=9
+        The minimum number of contiguous edge pixels (in the circle around the candidate pixel) required for the
+        candidate pixel to be considered as a corner. The maximum length should be 16.
+
+    non_max : bool, default=True
+        If True, non-maximal suppression is applied to the detected features, ensuring that only the strongest
+        features are retained.
+
+    feature_ratio : float, default=0.05
+        Specifies the maximum ratio of features to pixels in the image, controlling the density of features detected.
+
+    edge : int, default=3
+        The number of pixels to ignore along the edge of the image. This parameter helps in excluding features that
+        are too close to the edge of the image, which may not be reliable.
+
+    Returns
+    -------
+    Features
+        An ArrayFire Features object containing the detected points. The features include locations and scores,
+        while orientations and sizes are not computed by the FAST algorithm.
+
+    Note
+    ----
+    The FAST algorithm is particularly well-suited for real-time applications due to its computational efficiency.
+    However, it is sensitive to the choice of `fast_threshold` and `arc_length` parameters, which should be tuned
+    based on the specific requirements of the application and the characteristics of the input images.
+    """
     return Features.from_affeatures(wrapper.fast(image.arr, fast_threshold, arc_length, non_max, feature_ratio, edge))
 
 
@@ -94,8 +290,46 @@ def harris(
     min_response: float = 1e5,
     sigma: float = 1.0,
     block_size: int = 0,
-    k_threshold: float = 0,
+    k_threshold: float = 0.04,
 ) -> Features:
+    """
+    Detects corners in an image using the Harris corner detection algorithm.
+
+    Parameters
+    ----------
+    image : Array
+        The input image as a 2D ArrayFire array. The image should be grayscale for optimal results.
+
+    max_corners : int, default=500
+        The maximum number of corners to return. If there are more corners than `max_corners`, only the strongest
+        ones (as determined by the Harris response) are returned.
+
+    min_response : float, default=1e5
+        The minimum response value for a corner to be considered. This value helps to filter out weak corners.
+
+    sigma : float, default=1.0
+        The standard deviation of the Gaussian filter applied to the input image. This parameter is used only
+        when `block_size` is 0. Valid ranges are 0.5 to 5.0.
+
+    block_size : int, default=0
+        The size of the neighborhood considered for corner detection. A larger value considers a larger neighborhood.
+        If set to 0, a circular window based on `sigma` is used instead.
+
+    k_threshold : float, default=0.04
+        The Harris detector free parameter in the equation. Common values are between 0.04 to 0.06.
+
+    Returns
+    -------
+    Features
+        An ArrayFire Features object containing the detected corners' locations and their Harris response scores.
+        Orientation and size are not computed.
+
+    Note
+    ----
+    The Harris corner detector is particularly sensitive to `sigma`, `block_size`, and `k_threshold` parameters,
+    which should be chosen based on the specific requirements of the application and the characteristics of the input
+    images. It's recommended to adjust these parameters to balance detection sensitivity and computational efficiency.
+    """
     return Features.from_affeatures(
         wrapper.harris(image.arr, max_corners, min_response, sigma, block_size, k_threshold)
     )
@@ -104,18 +338,93 @@ def harris(
 def susan(
     image: Array,
     /,
-    radius: int = 500,
-    diff_threshold: float = 1e5,
-    geom_threshold: float = 1.0,
+    radius: int = 3,
+    diff_threshold: float = 32.0,
+    geom_threshold: float = 10.0,
     feature_ratio: float = 0.05,
     edge: int = 3,
 ) -> Features:
+    """
+    Detects corners and edges in an image using the SUSAN corner detection algorithm.
+
+    Parameters
+    ----------
+    image : Array
+        The input image as a 2D ArrayFire array. The image should be grayscale.
+
+    radius : int, default=3
+        The radius of the circular mask applied to each pixel to determine if it's a corner. A smaller radius
+        will detect finer features, while a larger radius will detect broader features.
+
+    diff_threshold : float, default=32.0
+        The intensity difference threshold. This value determines how much the intensity of neighboring pixels
+        can differ from the nucleus (central pixel) to be considered part of the univalue segment.
+
+    geom_threshold : float, default=10.0
+        The geometric threshold. This value determines the minimum number of contiguous pixels within the
+        circular mask that need to be similar to the nucleus for a pixel to be considered a corner.
+
+    feature_ratio : float, default=0.05
+        Specifies the maximum number of features to detect as a ratio of total image pixels. This helps to control
+        the density of features detected in the image.
+
+    edge : int, default=3
+        Specifies the number of pixels to ignore along the edge of the image. This is useful for excluding features
+        that are too close to the edges and may not be reliable.
+
+    Returns
+    -------
+    Features
+        An ArrayFire Features object containing the detected corners and edges' locations. Orientation and size are
+        not computed for SUSAN features.
+
+    Note
+    ----
+    The SUSAN algorithm is sensitive to the choice of `radius`, `diff_threshold`, and `geom_threshold` parameters.
+    These should be carefully chosen based on the specific requirements of the application and the characteristics
+    of the input image. Adjusting these parameters can help balance the sensitivity to corners and computational
+    efficiency.
+    """
     return Features.from_affeatures(
         wrapper.susan(image.arr, radius, diff_threshold, geom_threshold, feature_ratio, edge)
     )
 
 
 def hamming_matcher(query: Array, train: Array, /, axis: int = 0, n_nearest: int = 1) -> tuple[Array, Array]:
+    """
+    Finds the nearest neighbors for each descriptor in a query set from a training set, based on the Hamming distance.
+
+    Parameters
+    ----------
+    query : Array
+        The query set of feature descriptors as an ArrayFire array. Each descriptor should be a row in a 2D array
+        or along the specified `axis` in higher dimensions.
+
+    train : Array
+        The training set of feature descriptors as an ArrayFire array. This serves as the database from which the
+        closest matches to the query descriptors are found.
+
+    axis : int, default=0
+        The dimension along which the feature descriptors are aligned. Typically, descriptors are arranged as rows
+        in a 2D array (axis=0).
+
+    n_nearest : int, default=1
+        The number of nearest neighbors to find for each query descriptor. Setting `n_nearest` greater than 1 enables
+        finding multiple close matches.
+
+    Returns
+    -------
+    tuple[Array, Array]
+        A tuple containing two ArrayFire arrays:
+        - The first array contains the indices of the closest matches in the training set for each query descriptor.
+        - The second array contains the Hamming distances of these matches.
+
+    Note
+    ----
+    The Hamming matcher is particularly effective for binary feature descriptors and is widely used in computer vision
+    tasks such as object recognition and tracking. When using `n_nearest` > 1, the function returns multiple matches
+    per query descriptor, which can be useful for robust matching strategies.
+    """
     indices, distance = wrapper.hamming_matcher(query.arr, train.arr, axis, n_nearest)
     return Array.from_afarray(indices), Array.from_afarray(distance)
 
@@ -123,5 +432,49 @@ def hamming_matcher(query: Array, train: Array, /, axis: int = 0, n_nearest: int
 def nearest_neighbour(
     query: Array, train: Array, /, axis: int = 0, n_nearest: int = 1, match_type: Match = Match.SSD
 ) -> tuple[Array, Array]:
+    """
+    Finds the nearest neighbors for each descriptor in a query set from a training set based on a specified metric.
+
+    Parameters
+    ----------
+    query : Array
+        The query set of feature descriptors as an ArrayFire array. Each descriptor should be aligned along the
+        specified `axis` in a multidimensional array.
+
+    train : Array
+        The training set of feature descriptors as an ArrayFire array. This serves as the database from which the
+        closest matches to the query descriptors are found.
+
+    axis : int, default=0
+        The dimension along which the feature descriptors are aligned. For a 2D array of descriptors, this is
+        typically 0, indicating that each descriptor is a row.
+
+    n_nearest : int, default=1
+        The number of nearest neighbors to find for each query descriptor. Allows for finding multiple matches per
+        query.
+
+    match_type : Match, default=Match.SSD
+        The matching metric to use for finding nearest neighbors. `Match.SSD` uses the sum of squared differences,
+        suitable for floating-point descriptors. Other metrics can be specified if supported.
+
+    Returns
+    -------
+    tuple[Array, Array]
+        A tuple containing two ArrayFire arrays:
+        - The first array contains the indices of the closest matches in the training set for each query descriptor.
+        - The second array contains the distances of these matches according to the specified metric.
+
+    Note
+    ----
+    The `nearest_neighbour` function is versatile, supporting various descriptor types and matching metrics. It is
+    particularly useful in computer vision tasks such as object recognition, where matching feature descriptors between
+    images is essential.
+    """
     indices, distance = wrapper.nearest_neighbour(query.arr, train.arr, axis, n_nearest, match_type)
     return Array.from_afarray(indices), Array.from_afarray(distance)
+
+
+def match_template(search_image: Array, template_image: Array, /, match_type: Match = Match.SAD) -> Array:
+
+    template = wrapper.g(search_image.arr, template_image.arr, match_type)
+    return Array.from_afarray(template)
diff --git a/arrayfire/library/constants.py b/arrayfire/library/constants.py
index f6cfb43..4fd76e2 100644
--- a/arrayfire/library/constants.py
+++ b/arrayfire/library/constants.py
@@ -51,5 +51,3 @@
 import arrayfire_wrapper.lib as wrapper
 
 import arrayfire as af
-
-pi = wrapper.constant(math.pi, (1,), af.float64)
diff --git a/arrayfire/library/device.py b/arrayfire/library/device.py
index c511011..52f49c9 100644
--- a/arrayfire/library/device.py
+++ b/arrayfire/library/device.py
@@ -24,8 +24,6 @@
     "set_mem_step_size",
 ]
 
-import enum
-
 from arrayfire_wrapper.lib import (
     alloc_device,
     alloc_host,
@@ -49,14 +47,25 @@
     set_device,
     set_kernel_cache_directory,
     set_mem_step_size,
-    sync,
 )
+from arrayfire_wrapper.lib import sync as wrapper_sync
 
 
-class PointerSource(enum.Enum):
+def sync(device_id: int | None = None) -> None:
     """
-    Source of the pointer.
+    Blocks until all the functions on the specified device have completed execution.
+
+    This function is used to synchronize the program execution with the operations
+    being carried out on a GPU or other computation device, ensuring that all
+    previously submitted operations are complete before the program proceeds.
+
+    Parameters
+    ----------
+    device_id : int | None, optional
+        The ID of the device on which to wait for all operations to complete.
+        If None is provided, the current active device is used. Default is None.
     """
+    if device_id is None:
+        device_id = get_device()
 
-    device = 0  # gpu
-    host = 1  # cpu
+    wrapper_sync(device_id)
diff --git a/arrayfire/library/image_processing.py b/arrayfire/library/image_processing.py
index 18c39c8..11f9d68 100644
--- a/arrayfire/library/image_processing.py
+++ b/arrayfire/library/image_processing.py
@@ -202,7 +202,7 @@ def sobel_operator(image: Array, /, *, kernel_size: int = 3) -> tuple[Array, Arr
 
 # Gaussian kernel
 
-
+@afarray_as_array
 def gaussian_kernel(
     rows: int, columns: int, /, *, rows_sigma: None | float = None, columns_sigma: None | float = None
 ) -> Array:
diff --git a/arrayfire/library/linear_algebra.py b/arrayfire/library/linear_algebra.py
index 543062e..ee1f09e 100644
--- a/arrayfire/library/linear_algebra.py
+++ b/arrayfire/library/linear_algebra.py
@@ -15,7 +15,7 @@
     "solve",
 ]
 
-from typing import cast
+from typing import Literal, cast, overload
 
 import arrayfire_wrapper.lib as wrapper
 from arrayfire_wrapper.lib import is_lapack_available
@@ -24,6 +24,17 @@
 from arrayfire.array_object import afarray_as_array
 from arrayfire.library.constants import MatProp, Norm
 
+# TODO
+# Add missing documentation
+
+
+@overload
+def dot(lhs: Array, rhs: Array, /, *, return_scalar: Literal[True]) -> int | float | complex: ...
+
+
+@overload
+def dot(lhs: Array, rhs: Array, /, *, return_scalar: Literal[False] = False) -> Array: ...
+
 
 def dot(
     lhs: Array,
@@ -184,6 +195,14 @@ def matmul(lhs: Array, rhs: Array, /, lhs_opts: MatProp = MatProp.NONE, rhs_opts
     return cast(Array, wrapper.matmul(lhs.arr, rhs.arr, lhs_opts, rhs_opts))
 
 
+@overload
+def cholesky(array: Array, /, is_upper: bool = True, *, inplace: Literal[True]) -> int: ...
+
+
+@overload
+def cholesky(array: Array, /, is_upper: bool = True, *, inplace: Literal[False] = False) -> tuple[Array, int]: ...
+
+
 def cholesky(array: Array, /, is_upper: bool = True, *, inplace: bool = False) -> int | tuple[Array, int]:
     if inplace:
         return wrapper.cholesky_inplace(array.arr, is_upper)
@@ -192,6 +211,14 @@ def cholesky(array: Array, /, is_upper: bool = True, *, inplace: bool = False) -
     return Array.from_afarray(matrix), info
 
 
+@overload
+def lu(array: Array, /, *, inplace: Literal[True], is_lapack_pivot: bool = True) -> Array: ...
+
+
+@overload
+def lu(array: Array, /, *, inplace: Literal[False] = False, is_lapack_pivot: bool = True) -> tuple[Array, ...]: ...
+
+
 def lu(array: Array, /, *, inplace: bool = False, is_lapack_pivot: bool = True) -> Array | tuple[Array, ...]:
     if inplace:
         return Array.from_afarray(wrapper.lu_inplace(array.arr, is_lapack_pivot))
@@ -200,6 +227,14 @@ def lu(array: Array, /, *, inplace: bool = False, is_lapack_pivot: bool = True)
     return Array.from_afarray(lower), Array.from_afarray(upper), Array.from_afarray(pivot)
 
 
+@overload
+def qr(array: Array, /, *, inplace: Literal[True]) -> Array: ...
+
+
+@overload
+def qr(array: Array, /, *, inplace: Literal[False] = False) -> tuple[Array, ...]: ...
+
+
 def qr(array: Array, /, *, inplace: bool = False) -> Array | tuple[Array, ...]:
     if inplace:
         return Array.from_afarray(wrapper.qr_inplace(array.arr))
@@ -247,5 +282,5 @@ def solve(a: Array, b: Array, /, *, options: MatProp = MatProp.NONE, pivot: None
     return cast(Array, wrapper.solve(a.arr, b.arr, options))
 
 
-# TODO #good_first_issue
-# Add Sparse functions
+# TODO
+# Create issues as #good_first_issue: add Sparse functions
diff --git a/arrayfire/library/random.py b/arrayfire/library/random.py
index 5f98193..143bacc 100644
--- a/arrayfire/library/random.py
+++ b/arrayfire/library/random.py
@@ -132,14 +132,14 @@ def from_engine(cls, engine: wrapper.AFRandomEngineHandle) -> RandomEngine:
 
 
 @afarray_as_array
-def randn(shape: tuple[int, ...], /, *, dtype: Dtype = float32, engine: RandomEngine | None = None) -> Array:
+def randn(shape: int | tuple[int, ...], /, *, dtype: Dtype = float32, engine: RandomEngine | None = None) -> Array:
     """
     Create a multi-dimensional array containing values sampled from a normal distribution with mean 0
     and standard deviation of 1.
 
     Parameters
     ----------
-    shape : tuple[int, ...]
+    shape : int | tuple[int, ...]
         The shape of the resulting array. Must be a tuple with at least one element, e.g., shape=(3,).
 
     dtype : Dtype, optional, default: `float32`
@@ -166,8 +166,11 @@ def randn(shape: tuple[int, ...], /, *, dtype: Dtype = float32, engine: RandomEn
     Raises
     ------
     ValueError
-        If `shape` is not a tuple or has less than one value.
+        If `shape` is not int or tuple with less than one value.
     """
+    if isinstance(shape, int):
+        shape = (shape,)
+
     if not isinstance(shape, tuple) or not shape:
         raise ValueError("Argument shape must be a tuple with at least 1 value.")
 
@@ -178,13 +181,13 @@ def randn(shape: tuple[int, ...], /, *, dtype: Dtype = float32, engine: RandomEn
 
 
 @afarray_as_array
-def randu(shape: tuple[int, ...], /, *, dtype: Dtype = float32, engine: RandomEngine | None = None) -> Array:
+def randu(shape: int | tuple[int, ...], /, *, dtype: Dtype = float32, engine: RandomEngine | None = None) -> Array:
     """
     Create a multi-dimensional array containing values from a uniform distribution.
 
     Parameters
     ----------
-    shape : tuple[int, ...]
+    shape : int | tuple[int, ...]
         The shape of the resulting array. Must have at least 1 element, e.g., shape=(3,)
 
     dtype : Dtype, optional, default: float32
@@ -211,6 +214,9 @@ def randu(shape: tuple[int, ...], /, *, dtype: Dtype = float32, engine: RandomEn
     ValueError
         If shape is not a tuple or has less than one value.
     """
+    if isinstance(shape, int):
+        shape = (shape,)
+
     if not isinstance(shape, tuple) or not shape:
         raise ValueError("Argument shape must be a tuple with at least 1 value.")
 
diff --git a/arrayfire/library/signal_processing.py b/arrayfire/library/signal_processing.py
index 93ef03b..bd81dec 100644
--- a/arrayfire/library/signal_processing.py
+++ b/arrayfire/library/signal_processing.py
@@ -11,6 +11,7 @@
     "fft_convolve1",
     "fft_convolve2",
     "fft_convolve3",
+    "convolve2"
     "ifft",
     "ifft2",
     "ifft3",
@@ -21,6 +22,11 @@
     "approx1_uniform",
     "approx2",
     "approx2_uniform",
+    "convolve1",
+    "convolve2",
+    "convolve2_nn",
+    "convolve2_separable",
+    "convolve3",
 ]
 
 from typing import cast
diff --git a/arrayfire/library/statistics.py b/arrayfire/library/statistics.py
index 7ad70c3..953ee4c 100644
--- a/arrayfire/library/statistics.py
+++ b/arrayfire/library/statistics.py
@@ -1,6 +1,6 @@
 __all__ = ["corrcoef", "cov", "mean", "median", "stdev", "topk", "var"]
 
-from typing import cast
+from typing import cast, overload
 
 import arrayfire_wrapper.lib as wrapper
 
@@ -8,6 +8,9 @@
 from arrayfire.array_object import afarray_as_array
 from arrayfire.library.constants import TopK, VarianceBias
 
+# TODO
+# Add missing documentation
+
 
 def corrcoef(x: Array, y: Array, /) -> int | float | complex:
     return wrapper.corrcoef(x.arr, y.arr)
@@ -18,6 +21,22 @@ def cov(x: Array, y: Array, /, *, bias: VarianceBias = VarianceBias.DEFAULT) ->
     return cast(Array, wrapper.cov(x.arr, y.arr, bias))
 
 
+@overload
+def mean(x: Array, /, axis: None = None, *, weights: None = None) -> int | float | complex: ...
+
+
+@overload
+def mean(x: Array, /, axis: int, *, weights: None = None) -> Array: ...
+
+
+@overload
+def mean(x: Array, /, axis: None, *, weights: Array) -> int | float | complex: ...
+
+
+@overload
+def mean(x: Array, /, axis: int, *, weights: Array) -> Array: ...
+
+
 def mean(x: Array, /, axis: None | int = None, *, weights: None | Array = None) -> int | float | complex | Array:
     if weights:
         if axis is None:
@@ -31,6 +50,14 @@ def mean(x: Array, /, axis: None | int = None, *, weights: None | Array = None)
     return Array.from_afarray(wrapper.mean(x.arr, axis))
 
 
+@overload
+def median(x: Array, /, axis: None = None) -> int | float | complex: ...
+
+
+@overload
+def median(x: Array, /, axis: int) -> Array: ...
+
+
 def median(x: Array, /, axis: None | int = None) -> int | float | complex | Array:
     if axis is None:
         return wrapper.median_all(x.arr)
@@ -38,6 +65,14 @@ def median(x: Array, /, axis: None | int = None) -> int | float | complex | Arra
     return Array.from_afarray(wrapper.median(x.arr, axis))
 
 
+@overload
+def stdev(x: Array, /, axis: None = None, *, bias: VarianceBias = VarianceBias.DEFAULT) -> int | float | complex: ...
+
+
+@overload
+def stdev(x: Array, /, axis: int, *, bias: VarianceBias = VarianceBias.DEFAULT) -> int | float | complex: ...
+
+
 def stdev(
     x: Array, /, axis: None | int = None, *, bias: VarianceBias = VarianceBias.DEFAULT
 ) -> int | float | complex | Array:
@@ -52,6 +87,26 @@ def topk(x: Array, k: int, /, *, axis: int = 0, order: TopK = TopK.DEFAULT) -> t
     return Array.from_afarray(values), Array.from_afarray(indices)
 
 
+@overload
+def var(
+    x: Array, /, axis: None = None, *, weights: None = None, bias: VarianceBias = VarianceBias.DEFAULT
+) -> int | float | complex: ...
+
+
+@overload
+def var(x: Array, /, axis: int, *, weights: None = None, bias: VarianceBias = VarianceBias.DEFAULT) -> Array: ...
+
+
+@overload
+def var(
+    x: Array, /, axis: None, *, weights: Array, bias: VarianceBias = VarianceBias.DEFAULT
+) -> int | float | complex: ...
+
+
+@overload
+def var(x: Array, /, axis: int, *, weights: Array, bias: VarianceBias = VarianceBias.DEFAULT) -> Array: ...
+
+
 def var(
     x: Array,
     /,
diff --git a/arrayfire/library/unified_api_functions.py b/arrayfire/library/unified_api_functions.py
deleted file mode 100644
index 1894b4d..0000000
--- a/arrayfire/library/unified_api_functions.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# TODO
-# Temp solution. Remove when arrayfire-binary-python-wrapper is finalized
-
-# __all__ = [
-#     "get_active_backend",
-#     "get_available_backends",
-#     "get_backend_count",
-#     "get_backend_id",
-#     "get_device_id",
-#     "set_backend",
-# ]
-
-# from arrayfire_wrapper.lib import get_active_backend, get_available_backends, get_backend_count
-# from arrayfire_wrapper.lib import get_backend_id as wrapped_get_backend_id
-# from arrayfire_wrapper.lib import get_device_id as wrapped_get_device_id
-# from arrayfire_wrapper.lib import set_backend
-
-# from arrayfire import Array
-
-
-# def get_backend_id(array: Array) -> int:
-#     return wrapped_get_backend_id(array.arr)
-
-
-# def get_device_id(array: Array) -> int:
-#     return wrapped_get_device_id(array.arr)
diff --git a/arrayfire/library/vector_algorithms.py b/arrayfire/library/vector_algorithms.py
index a645738..cd66c06 100644
--- a/arrayfire/library/vector_algorithms.py
+++ b/arrayfire/library/vector_algorithms.py
@@ -20,7 +20,7 @@
     "sort",
 ]
 
-from typing import cast
+from typing import Literal, cast, overload
 
 from arrayfire_wrapper import lib as wrapper
 
@@ -116,6 +116,14 @@ def where(array: Array, /) -> Array:
     return cast(Array, wrapper.where(array.arr))
 
 
+@overload
+def all_true(array: Array, axis: None = None) -> bool: ...
+
+
+@overload
+def all_true(array: Array, axis: int) -> Array: ...
+
+
 def all_true(array: Array, axis: int | None = None) -> bool | Array:
     """
     Check if all the elements along a specified dimension are true.
@@ -144,6 +152,14 @@ def all_true(array: Array, axis: int | None = None) -> bool | Array:
     return Array.from_afarray(wrapper.all_true(array.arr, axis))
 
 
+@overload
+def any_true(array: Array, axis: None = None) -> bool: ...
+
+
+@overload
+def any_true(array: Array, axis: int) -> Array: ...
+
+
 def any_true(array: Array, axis: int | None = None) -> bool | Array:
     """
     Check if any of the elements along a specified dimension are true.
@@ -172,6 +188,22 @@ def any_true(array: Array, axis: int | None = None) -> bool | Array:
     return Array.from_afarray(wrapper.any_true(array.arr, axis))
 
 
+@overload
+def sum(array: Array, /, *, axis: None = None, nan_value: None = None) -> int | float | complex: ...
+
+
+@overload
+def sum(array: Array, /, *, axis: None, nan_value: float) -> int | float | complex: ...
+
+
+@overload
+def sum(array: Array, /, *, axis: int, nan_value: None = None) -> Array: ...
+
+
+@overload
+def sum(array: Array, /, *, axis: int, nan_value: float) -> Array: ...
+
+
 def sum(array: Array, /, *, axis: int | None = None, nan_value: float | None = None) -> int | float | complex | Array:
     # FIXME documentation issues
     """
@@ -206,10 +238,26 @@ def sum(array: Array, /, *, axis: int | None = None, nan_value: float | None = N
 
         return wrapper.sum_nan_all(array.arr, nan_value)
 
-    if nan_value is not None:
+    if nan_value is None:
         return Array.from_afarray(wrapper.sum(array.arr, axis))
 
-    return Array.from_afarray(wrapper.sum_nan(array.arr, axis, nan_value=nan_value))  # type: ignore[call-arg]
+    return Array.from_afarray(wrapper.sum_nan(array.arr, axis, nan_value))
+
+
+@overload
+def product(array: Array, /, *, axis: None = None, nan_value: None = None) -> int | float | complex: ...
+
+
+@overload
+def product(array: Array, /, *, axis: None, nan_value: float) -> int | float | complex: ...
+
+
+@overload
+def product(array: Array, /, *, axis: int, nan_value: None = None) -> Array: ...
+
+
+@overload
+def product(array: Array, /, *, axis: int, nan_value: float) -> Array: ...
 
 
 def product(
@@ -247,12 +295,81 @@ def product(
     if nan_value is None:
         return Array.from_afarray(wrapper.product(array.arr, axis))
 
-    return Array.from_afarray(wrapper.product_nan(array.arr, axis, nan_value=nan_value))  # type: ignore[call-arg]
+    return Array.from_afarray(wrapper.product_nan(array.arr, axis, nan_value))
+
+
+@overload
+def count(array: Array, /, *, axis: None = None, keys: None = None) -> int | float | complex: ...
+
+
+@overload
+def count(array: Array, /, *, axis: int, keys: None = None) -> Array: ...
+
+
+@overload
+def count(array: Array, /, *, axis: None, keys: Array) -> tuple[Array, Array]: ...
+
+
+@overload
+def count(array: Array, /, *, axis: int, keys: Array) -> tuple[Array, Array]: ...
 
 
 def count(
     array: Array, /, *, axis: int | None = None, keys: Array | None = None
 ) -> int | float | complex | Array | tuple[Array, Array]:
+    """
+    Count the number of non-zero elements in an ArrayFire array along a specified dimension or across the entire array.
+    Optionally, perform counting based on unique keys.
+
+    Parameters
+    ----------
+    array : Array
+        The input multi-dimensional ArrayFire array whose non-zero elements are to be counted.
+
+    axis : int, optional, keyword-only
+        The dimension along which the non-zero elements are counted. If None, the total number of non-zero elements
+        across the entire array is returned.
+
+    keys : Array, optional, keyword-only
+        An optional one-dimensional ArrayFire array with keys for counting non-zero elements according to unique keys.
+        If provided, `axis` determines the dimension along which elements are counted per key. If `axis` is None, it
+        defaults to counting across all dimensions for each key.
+
+    Returns
+    -------
+    int | float | complex | Array | tuple[Array, Array]
+        - If `keys` is None and `axis` is None, returns a scalar (int, float, or complex) representing the total count
+          of non-zero elements in `array`.
+        - If `keys` is None and `axis` is specified, returns an ArrayFire array representing the count of non-zero
+          elements along the specified `axis`.
+        - If `keys` is provided, returns a tuple containing two ArrayFire arrays: the unique keys and their
+          corresponding counts. The counts are performed along the specified `axis` (or across all dimensions if
+          `axis` is None).
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.randu((3, 3))
+    >>> a
+    [3 3 1 1]
+        0.6010     0.2126     0.2864
+        0.0278     0.0655     0.3410
+        0.9806     0.5497     0.7509
+
+    >>> b = a > 0.5
+    >>> b
+    [3 3 1 1]
+        1          0          0
+        0          0          0
+        1          1          1
+
+    >>> af.count(b)
+    4.0
+
+    >>> af.count(b, axis=0)
+    [1 3 1 1]
+        2          1          1
+    """
     if keys:
         axis_ = -1 if axis is None else axis
         key, value = wrapper.count_by_key(keys.arr, array.arr, axis_)
@@ -264,7 +381,70 @@ def count(
     return Array.from_afarray(wrapper.count(array.arr, axis))
 
 
+@overload
+def imax(array: Array, /, *, axis: None = None) -> tuple[int | float | complex, int]: ...
+
+
+@overload
+def imax(array: Array, /, *, axis: int) -> tuple[Array, Array]: ...
+
+
 def imax(array: Array, /, *, axis: int | None = None) -> tuple[int | float | complex, int] | tuple[Array, Array]:
+    """
+    Find the maximum value and its location within an ArrayFire array along a specified dimension, or globally if no
+    dimension is specified.
+
+    Parameters
+    ----------
+    array : Array
+        The input multi-dimensional ArrayFire array.
+
+    axis : int, optional, keyword-only
+        The dimension along which to find the maximum value. If None (default), the global maximum value and its
+        location in the array are returned.
+
+    Returns
+    -------
+    tuple[int | float | complex, int] | tuple[Array, Array]
+        - If `axis` is None, returns a tuple containing the global maximum value (int, float, or complex) and its
+          linear index in the array.
+        - If `axis` is specified, returns two ArrayFire arrays in a tuple: the first array contains the maximum values
+          along the specified dimension, and the second array contains the locations of these maximum values along the
+          same dimension.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.randu((3, 3))
+    >>> a
+    [3 3 1 1]
+        0.6010     0.2126     0.2864
+        0.0278     0.0655     0.3410
+        0.9806     0.5497     0.7509
+
+    >>> af.imax(a)
+    (0.9805505871772766, 2)
+
+    >>> af.imax(a, axis=1)
+    (
+    [3 1 1 1]
+        0.6010
+        0.3410
+        0.9806
+    ,
+
+    [3 1 1 1]
+            0
+            2
+            0
+    )
+
+    Note
+    ----
+    - When `axis` is None, the global maximum is found, and the index is returned as a linear index relative to the
+      array's storage.
+    - The maximum values and their locations are returned as separate arrays when an axis is specified.
+    """
     if axis is None:
         return wrapper.imax_all(array.arr)
 
@@ -272,9 +452,68 @@ def imax(array: Array, /, *, axis: int | None = None) -> tuple[int | float | com
     return Array.from_afarray(maximum), Array.from_afarray(location)
 
 
+@overload
+def max(array: Array, /, *, axis: int, keys: None = None, ragged_len: None = None) -> Array: ...
+
+
+@overload
+def max(
+    array: Array, /, *, axis: None = None, keys: None = None, ragged_len: None = None
+) -> int | float | complex: ...
+
+
+@overload
+def max(array: Array, /, *, axis: int, keys: Array, ragged_len: None = None) -> tuple[Array, Array]: ...
+
+
+@overload
+def max(array: Array, /, *, axis: int, keys: None = None, ragged_len: Array) -> tuple[Array, Array]: ...
+
+
 def max(
     array: Array, /, *, axis: int | None = None, keys: Array | None = None, ragged_len: Array | None = None
 ) -> int | float | complex | Array | tuple[Array, Array]:
+    """
+    Find the maximum value(s) in an ArrayFire array along a specified dimension, optionally based on unique keys or
+    with ragged array dimensions.
+
+    Parameters
+    ----------
+    array : Array
+        The input multi-dimensional ArrayFire array to find the maximum values in.
+
+    axis : int, optional, keyword-only
+        The dimension along which to find the maximum values. If None, finds the global maximum.
+
+    keys : Array, optional, keyword-only
+        A one-dimensional ArrayFire array containing keys for grouped maximum calculations. Cannot be used
+        simultaneously with `ragged_len`.
+
+    ragged_len : Array, optional, keyword-only
+        A one-dimensional ArrayFire array containing lengths for ragged maximum calculations. Cannot be used
+        simultaneously with `keys`.
+
+    Returns
+    -------
+    int | float | complex | Array | tuple[Array, Array]
+        - If neither `keys` nor `ragged_len` is provided, returns the maximum value across the entire array or along
+          the specified `axis`.
+        - If `keys` is provided, returns a tuple containing two ArrayFire arrays: the unique keys and their
+          corresponding maximum values along the specified `axis`.
+        - If `ragged_len` is provided, returns a tuple containing two ArrayFire arrays: the maximum values and their
+          indices within each ragged segment along the specified `axis`.
+
+    Raises
+    ------
+    RuntimeError
+        If both `keys` and `ragged_len` are provided, as they cannot be used simultaneously.
+
+    Note
+    ----
+    - `axis` is ignored when finding the global maximum.
+    - `keys` and `ragged_len` cannot be used together.
+    - The `ragged_len` array determines the lengths of each segment along `axis` for ragged maximum calculations.
+    """
     if keys and ragged_len:
         raise RuntimeError("To process ragged max function, the keys value should be None and vice versa.")
 
@@ -294,7 +533,58 @@ def max(
     return Array.from_afarray(wrapper.max(array.arr, axis))
 
 
+@overload
+def imin(array: Array, /, *, axis: None = None) -> tuple[int | float | complex, int]: ...
+
+
+@overload
+def imin(array: Array, /, *, axis: int) -> tuple[Array, Array]: ...
+
+
 def imin(array: Array, /, *, axis: int | None = None) -> tuple[int | float | complex, int] | tuple[Array, Array]:
+    """
+    Find the value and location of the minimum value in an ArrayFire array along a specified dimension, or globally
+    across the entire array.
+
+    Parameters
+    ----------
+    array : Array
+        The input multi-dimensional ArrayFire array whose minimum value and location are to be determined.
+
+    axis : int, optional, keyword-only
+        The dimension along which the minimum value and its location are to be found. If None, the global minimum value
+        and its location across the entire array are returned.
+
+    Returns
+    -------
+    tuple[int | float | complex, int] | tuple[Array, Array]
+        If `axis` is None, returns a tuple containing a scalar (the global minimum value of `array`) and an integer
+        (the linear index where the global minimum occurs).
+        If `axis` is specified, returns a tuple of two ArrayFire arrays: the first array contains the minimum values
+        along the specified `axis`, and the second array contains the indices of these minimum values along the same
+        `axis`.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.randu((3, 3))
+    >>> a
+    [3 3 1 1]
+        0.6010     0.2126     0.2864
+        0.0278     0.0655     0.3410
+        0.9806     0.5497     0.7509
+
+    >>> af.imin(a)
+    (0.027758777141571045, 1)
+
+    >>> af.imin(a, axis=0)
+    (
+    [1 3 1 1]
+        0.0278     0.0655     0.2864 ,
+
+    [1 3 1 1]
+            1          1          0 )
+    """
     if axis is None:
         return wrapper.imin_all(array.arr)
 
@@ -302,7 +592,57 @@ def imin(array: Array, /, *, axis: int | None = None) -> tuple[int | float | com
     return Array.from_afarray(minimum), Array.from_afarray(location)
 
 
+@overload
+def min(array: Array, /, *, axis: None = None) -> int | float | complex: ...
+
+
+@overload
+def min(array: Array, /, *, axis: int) -> Array: ...
+
+
 def min(array: Array, /, *, axis: int | None = None) -> int | float | complex | Array:
+    """
+    Finds the minimum value in an ArrayFire array, optionally along a specified axis.
+
+    Parameters
+    ----------
+    array : Array
+        The input multi-dimensional ArrayFire array whose minimum value is sought.
+
+    axis : int, optional, keyword-only
+        The dimension along which to find the minimum value. If None (the default),
+        the minimum value of the entire array is returned.
+
+    Returns
+    -------
+    int | float | complex | Array
+        The minimum value found in the array. If `axis` is specified, an ArrayFire array
+        containing the minimum values along that axis is returned. If `axis` is None, a
+        single scalar value (int, float, or complex) representing the minimum value of
+        the entire array is returned.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.randu((3, 3))  # Generate a 3x3 array of random numbers
+    >>> a
+    [3 3 1 1]
+        0.6010     0.2126     0.2864
+        0.0278     0.0655     0.3410
+        0.9806     0.5497     0.7509
+
+    >>> af.min(a)  # Find the minimum value in the entire array
+    0.027758777141571045
+
+    >>> af.min(a, axis=0)  # Find the minimum values along the first axis (column-wise minimum)
+    [1 3 1 1]
+        0.0278     0.0655     0.2864
+
+    Note
+    ----
+    - If the array contains NaN values, the operation will return NaN because NaNs propagate through operations as per
+      IEEE standards.
+    """
     if axis is None:
         return wrapper.min_all(array.arr)
 
@@ -311,34 +651,312 @@ def min(array: Array, /, *, axis: int | None = None) -> int | float | complex |
 
 @afarray_as_array
 def diff1(array: Array, /, axis: int = 0) -> Array:
+    """
+    Computes the first-order differences of an ArrayFire array along a specified dimension.
+
+    The first-order difference is calculated as `array[i+1] - array[i]` along the specified axis.
+
+    Parameters
+    ----------
+    array : Array
+        The input ArrayFire array to compute differences on.
+
+    axis : int, optional, default: 0
+        The dimension along which the first-order differences are calculated. For a 2D array,
+        `axis=0` computes the difference between consecutive rows, while `axis=1` computes the
+        difference between consecutive columns.
+
+    Returns
+    -------
+    Array
+        An ArrayFire array of first-order differences. The size of this array along the specified
+        axis is one less than the input array, as differences are computed between consecutive
+        elements.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.Array([1, 2, 4, 7, 11])
+    >>> af.diff1(a)
+    [4 1 1 1]
+        1.0000
+        2.0000
+        3.0000
+        4.0000
+
+    Note
+    ----
+    - The differences for a 2D array along `axis=0` would be row-wise differences,
+    and along `axis=1` would be column-wise differences.
+    """
     return cast(Array, wrapper.diff1(array.arr, axis))
 
 
 @afarray_as_array
 def diff2(array: Array, /, axis: int = 0) -> Array:
+    """
+    Computes the second-order differences of an ArrayFire array along a specified dimension.
+
+    The second-order difference is calculated as `array[i+2] - 2*array[i+1] + array[i]` along the specified axis,
+    which is analogous to the second derivative in continuous functions.
+
+    Parameters
+    ----------
+    array : Array
+        The input ArrayFire array to compute second-order differences on.
+
+    axis : int, optional, default: 0
+        The dimension along which the second-order differences are calculated. For a 2D array,
+        `axis=0` computes the difference between consecutive rows (down each column), while
+        `axis=1` computes the difference between consecutive columns (across each row).
+
+    Returns
+    -------
+    Array
+        An ArrayFire array of second-order differences. The size of this array along the specified
+        axis is two less than the input array, as the operation effectively reduces the dimension
+        size by two.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.Array([1, 2, 4, 7, 11])
+    >>> af.diff2(a)
+    [3 1 1 1]
+        1.0000
+        1.0000
+        1.0000
+
+    Note
+    ----
+    The operation requires that the array has at least three elements along the specified axis to compute
+    the second-order differences. For arrays with fewer than three elements along the axis, the result will
+    be an empty array.
+    """
     return cast(Array, wrapper.diff2(array.arr, axis))
 
 
 def gradient(array: Array, /) -> tuple[Array, Array]:
+    """
+    Computes the horizontal and vertical gradients of a 2D ArrayFire array or a batch of 2D arrays.
+
+    The gradient is a vector that points in the direction of the greatest rate of increase of the function,
+    and its magnitude is the slope of the graph in that direction. For images, this operation can highlight
+    edges and changes in intensity.
+
+    Parameters
+    ----------
+    array : Array
+        The input ArrayFire array, which can be a 2D array representing a single image, or a multi-dimensional
+        array representing a batch of images. For batch processing, the gradient is computed for each image
+        in the batch independently.
+
+    Returns
+    -------
+    tuple[Array, Array]
+        A tuple containing two ArrayFire arrays:
+        - The first array (`dx`) contains the horizontal gradients of the input array.
+        - The second array (`dy`) contains the vertical gradients of the input array.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> image = af.randu((3, 3))  # Generate a random 3x3 "image"
+    >>> image
+    [3 3 1 1]
+        0.4105     0.3543     0.3636
+        0.1583     0.6450     0.4165
+        0.3712     0.9675     0.5814
+
+    >>> dx, dy = af.gradient(image)
+    >>> dx  # Display the horizontal gradients
+    [3 3 1 1]
+        -0.2522    0.2907     0.0528
+        -0.0196    0.3066     0.1089
+        0.2129     0.3225     0.1650
+
+    >>> dy  # Display the vertical gradients
+    [3 3 1 1]
+        -0.0562    -0.0234    0.0093
+        0.4867     0.1291    -0.2286
+        0.5962     0.1051    -0.3860
+
+    Note
+    ----
+    - The gradient operation is particularly useful in the context of image processing for identifying
+      edges and textural patterns within images.
+    - For higher-dimensional arrays representing batches of images, the gradient operation is applied
+      independently to each image in the batch.
+    """
     dx, dy = wrapper.gradient(array.arr)
     return Array.from_afarray(dx), Array.from_afarray(dy)
 
 
 @afarray_as_array
 def set_intersect(x: Array, y: Array, /, *, is_unique: bool = False) -> Array:
+    """
+    Calculates the intersection of two ArrayFire arrays, returning elements common to both arrays.
+
+    Parameters
+    ----------
+    x : Array
+        The first input 1D ArrayFire array.
+    y : Array
+        The second input 1D ArrayFire array.
+    is_unique : bool, optional, keyword-only, default: False
+        Specifies whether both input arrays contain unique elements. If set to True,
+        the function assumes that the arrays do not have repeated elements, which
+        can optimize the intersection operation.
+
+    Returns
+    -------
+    Array
+        An ArrayFire array containing the intersection of `x` and `y`. The returned array
+        includes elements that are common to both input arrays. If `is_unique` is True,
+        the function assumes no duplicates within each input array, potentially
+        enhancing performance.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.Array([1, 2, 3, 4, 5])
+    >>> b = af.Array([4, 5, 6, 7, 8])
+    >>> af.set_intersect(a, b)
+    [2 1 1 1]
+        4.0000
+        5.0000
+
+    Note
+    ----
+    - Both `x` and `y` must be 1D arrays.
+    - The `is_unique` parameter can be used to optimize the intersection calculation when both input arrays are known
+      to contain unique elements.
+    """
     return cast(Array, wrapper.set_intersect(x.arr, y.arr, is_unique))
 
 
 @afarray_as_array
 def set_union(x: Array, y: Array, /, *, is_unique: bool = False) -> Array:
+    """
+    Computes the union of two 1D ArrayFire arrays, effectively combining the elements from both arrays and removing
+    duplicates.
+
+    Parameters
+    ----------
+    x, y : Array
+        The input 1D ArrayFire arrays whose union is to be computed. These arrays can contain any numerical type.
+
+    is_unique : bool, optional, keyword-only, default: False
+        A flag that indicates whether both input arrays are guaranteed to contain unique elements. Setting this to True
+        can optimize the computation but should only be used if each element in both arrays is indeed unique.
+
+    Returns
+    -------
+    Array
+        An ArrayFire array containing the unique elements from both `x` and `y`.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.Array([1, 2, 3, 4, 5])
+    >>> b = af.Array([4, 5, 6, 7, 8])
+    >>> af.set_union(a, b)
+    [8 1 1 1]
+        1.0000
+        2.0000
+        3.0000
+        4.0000
+        5.0000
+        6.0000
+        7.0000
+        8.0000
+
+    Note
+    ----
+    The operation is performed on 1D arrays. For inputs that are not 1D, consider reshaping or flattening them before
+    performing the operation to ensure correct results. The `is_unique` flag should be used with caution; incorrect
+    usage (i.e., setting it to True when arrays are not composed of unique elements) may lead to unexpected results.
+    """
     return cast(Array, wrapper.set_union(x.arr, y.arr, is_unique))
 
 
 @afarray_as_array
 def set_unique(array: Array, /, *, is_sorted: bool = False) -> Array:
+    """
+    Extracts unique elements from a 1D ArrayFire array.
+
+    This function returns a new array containing only the unique elements of the input array. It can operate more
+    efficiently if the input array is known to be sorted.
+
+    Parameters
+    ----------
+    array : Array
+        The input 1D ArrayFire array from which unique elements are to be extracted.
+
+    is_sorted : bool, optional, keyword-only, default: False
+        Indicates whether the input array is already sorted. If True, the function can skip the sorting step,
+        potentially improving performance. However, setting this to True for an unsorted array will lead to incorrect
+        results.
+
+    Returns
+    -------
+    Array
+        An ArrayFire array containing the unique elements extracted from the input array.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.Array([1, 2, 2, 3, 4, 4, 5, 5, 5])
+    >>> af.set_unique(a)
+    [5 1 1 1]
+        1.0000
+        2.0000
+        3.0000
+        4.0000
+        5.0000
+
+    >>> sorted_a = af.sort(a)  # Assuming 'a' is not sorted
+    >>> af.set_unique(sorted_a, is_sorted=True)
+    [5 1 1 1]
+        1.0000
+        2.0000
+        3.0000
+        4.0000
+        5.0000
+
+    Note
+    ----
+    The input array must be 1D. If you have a multi-dimensional array, consider reshaping or flattening it before
+    using this function. Ensure the `is_sorted` flag accurately reflects the state of the input array to avoid
+    incorrect results.
+    """
     return cast(Array, wrapper.set_unique(array.arr, is_sorted))
 
 
+@overload
+def sort(
+    array: Array,
+    /,
+    axis: int = 0,
+    is_ascending: bool = True,
+    *,
+    keys: None = None,
+    is_index_array: Literal[False] = False,
+) -> Array: ...
+
+
+@overload
+def sort(
+    array: Array, /, axis: int = 0, is_ascending: bool = True, *, keys: Array, is_index_array: Literal[False] = False
+) -> tuple[Array, Array]: ...
+
+
+@overload
+def sort(
+    array: Array, /, axis: int = 0, is_ascending: bool = True, *, keys: None = None, is_index_array: Literal[True]
+) -> tuple[Array, Array]: ...
+
+
 def sort(
     array: Array,
     /,
@@ -348,6 +966,105 @@ def sort(
     keys: Array | None = None,
     is_index_array: bool = False,
 ) -> Array | tuple[Array, Array]:
+    """
+    Sorts the elements of an ArrayFire array along a specified dimension. Optionally, sorting can be performed based
+    on keys, or sorted indices can be returned.
+
+    Parameters
+    ----------
+    array : Array
+        The input multi-dimensional ArrayFire array to be sorted.
+
+    axis : int, default: 0
+        The dimension along which the sorting is to be performed.
+
+    is_ascending : bool, default: True
+        Determines the direction of the sort. If True, the sorting is done in ascending order; otherwise, in descending
+        order.
+
+    keys : Array, optional
+        An optional ArrayFire array containing keys based on which the sorting should be performed. If provided, the
+        elements in `array` are sorted according to the order determined by these keys.
+
+    is_index_array : bool, default: False
+        If True, the function returns a tuple of arrays - the sorted array and an array of indices that maps the sorted
+        array back to the original array.
+
+    Returns
+    -------
+    Array | tuple[Array, Array]
+        If neither `keys` nor `is_index_array` is provided, returns the sorted array.
+        If `keys` is provided, returns a tuple (sorted_keys, sorted_values) where `sorted_keys` is the keys sorted and
+        `sorted_values` are the elements of `array` sorted according to `sorted_keys`.
+        If `is_index_array` is true, returns a tuple (sorted_array, indices) where `sorted_array` is the sorted array
+        and `indices` maps the sorted array back to the original array.
+
+    Raises
+    ------
+    RuntimeError
+        If both `keys` and `is_index_array` are provided.
+
+    Examples
+    --------
+    >>> import arrayfire as af
+    >>> a = af.randu(5)  # Create a random 1D array
+    >>> a
+    [5 1 1 1]
+        0.6010
+        0.0278
+        0.9806
+        0.2126
+        0.0655
+
+    >>> af.sort(a)  # Sort the array in ascending order
+    [5 1 1 1]
+        0.0278
+        0.0655
+        0.2126
+        0.6010
+        0.9806
+
+    >>> keys = af.Array([3, 2, 1, 5, 4])
+    >>> values = af.Array([10, 20, 30, 40, 50])
+    >>> sorted_keys, sorted_values = af.sort(values, keys=keys)
+    >>> sorted_keys
+    [5 1 1 1]
+        1.0000
+        2.0000
+        3.0000
+        4.0000
+        5.0000
+
+    >>> sorted_values
+    [5 1 1 1]
+        30.0000
+        20.0000
+        10.0000
+        50.0000
+        40.0000
+
+    >>> sorted_array, indices = af.sort(a, is_index_array=True)
+    >>> sorted_array
+    [5 1 1 1]
+        0.0278
+        0.0655
+        0.2126
+        0.6010
+        0.9806
+
+    >>> indices
+    [5 1 1 1]
+        1
+        4
+        3
+        0
+        2
+
+    Note
+    ----
+    - The sorting based on keys (`sort_by_key`) or returning sorted indices (`sort_index`) cannot be performed
+      simultaneously. Select only one option per function call.
+    """
     if keys and is_index_array:
         raise RuntimeError("Could not process sorting by keys when `is_index_array` is True. Select only one option.")
 
diff --git a/assets b/assets
new file mode 160000
index 0000000..b86526a
--- /dev/null
+++ b/assets
@@ -0,0 +1 @@
+Subproject commit b86526ac7c63bfd19cbff67af71ff4f30d736d4b
diff --git a/dev-requirements.txt b/dev-requirements.txt
old mode 100755
new mode 100644
index 2c3ee7b..31e4b98
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,24 +1,20 @@
-# Testing-related packages
-
-# Checks style, syntax, etc.
-flake8>=4.0.1
-
-# Static type checking
-mypy==1.3.0
-
-# Check import order style
-isort>=5.10.1
-
-# Automatic code formatting
-black>=23.3.0
-
-# Allows generation of coverage reports with pytest.
-pytest-cov>=3.0.0
-
-# Allows codecov to generate coverage reports
-coverage[toml]>=6.4
-codecov>=2.1.12
-
-# Package-related packages
-setuptools
-wheel
+# Testing-related packages
+
+# Checks style, syntax, etc.
+flake8~=7.0.0
+
+# Static type checking
+mypy~=1.9.0
+
+# Check import order style
+isort>=5.13.2
+
+# Automatic code formatting
+black>=24.4.0
+
+# Allows generation of coverage reports with pytest.
+pytest-cov>=5.0.0
+
+# Allows codecov to generate coverage reports
+coverage[toml]>=6.4
+codecov>=2.1.12
diff --git a/examples/benchmarks/bench_blas.py b/examples/benchmarks/bench_blas.py
new file mode 100644
index 0000000..91cf478
--- /dev/null
+++ b/examples/benchmarks/bench_blas.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+
+#######################################################
+# Copyright (c) 2024, ArrayFire
+# All rights reserved.
+#
+# This file is distributed under 3-clause BSD license.
+# The complete license agreement can be obtained at:
+# http://arrayfire.com/licenses/BSD-3-Clause
+########################################################
+
+import sys
+from time import time
+from typing import Any, Callable
+
+import arrayfire as af
+
+try:
+    import numpy as np
+except ImportError:
+    raise ImportError("Please install arrayfire-python[benchmarks] or numpy directly to run this example.")
+
+
+def calc_arrayfire(n: int) -> Callable:
+    A = af.randu((n, n))
+    af.sync(-1)
+
+    def run(iters: int) -> None:
+        for t in range(iters):
+            B = af.matmul(A, A)  # noqa: F841
+        af.sync(-1)
+
+    return run
+
+
+def calc_numpy(n: int) -> Callable:
+    np.random.seed(1)
+    A = np.random.rand(n, n).astype(np.float32)
+
+    def run(iters: int) -> None:
+        for t in range(iters):
+            B = np.dot(A, A)  # noqa: F841
+
+    return run
+
+
+def bench(calc: Any, iters: int = 100, upto: int = 2048) -> None:
+    _, name = calc.__name__.split("_")
+    print("Benchmark N x N matrix multiply on %s" % name)
+
+    for n in range(128, upto + 128, 128):
+        run = calc(n)
+        start = time()
+        run(iters)
+        t = (time() - start) / iters
+        gflops = 2.0 * (n**3) / (t * 1e9)
+        print("Time taken for %4d x %4d: %0.4f Gflops" % (n, n, gflops))
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        af.set_device(int(sys.argv[1]))
+
+    af.info()
+
+    bench(calc_arrayfire)
+    if np:
+        bench(calc_numpy, upto=512)
diff --git a/examples/benchmarks/bench_cg.py b/examples/benchmarks/bench_cg.py
new file mode 100644
index 0000000..52f657f
--- /dev/null
+++ b/examples/benchmarks/bench_cg.py
@@ -0,0 +1,203 @@
+#!/usr/bin/python
+
+#######################################################
+# Copyright (c) 2015, ArrayFire
+# All rights reserved.
+#
+# This file is distributed under 3-clause BSD license.
+# The complete license agreement can be obtained at:
+# http://arrayfire.com/licenses/BSD-3-Clause
+########################################################
+
+
+import sys
+from time import time
+
+import arrayfire as af
+
+from arrayfire.library.linear_algebra import create_sparse_array_from_dense, Storage
+# TODO: Make sure file is working after sparse functions are added into library
+try:
+    import numpy as np
+except ImportError:
+    np = None
+
+try:
+    from scipy import sparse as sp
+    from scipy.sparse import linalg
+except ImportError:
+    sp = None
+
+
+def to_numpy(A):
+    return np.asarray(A, dtype=np.float32)
+
+
+def to_sparse(A):
+    return create_sparse_array_from_dense(A, Storage.CSR)
+
+
+def to_scipy_sparse(spA, fmt="csr"):
+    vals = np.asarray(af.sparse_get_values(spA).to_list(), dtype=np.float32)
+    rows = np.asarray(af.sparse_get_row_idx(spA).to_list(), dtype=np.int)
+    cols = np.asarray(af.sparse_get_col_idx(spA).to_list(), dtype=np.int)
+    return sp.csr_matrix((vals, cols, rows), dtype=np.float32)
+
+
+def setup_input(n, sparsity=7):
+    T = af.randu((n, n))
+    A = af.floor(T * 1000)
+    A = A * ((A % sparsity) == 0) / 1000
+    A = A.T + A + n * af.identity((n, n))
+    x0 = af.randu((n, 1))
+    b = af.matmul(A, x0)
+    # printing
+    # nnz = af.sum((A != 0))
+    # print "Sparsity of A: %2.2f %%" %(100*nnz/n**2,)
+    return A, b, x0
+
+
+def input_info(A, Asp):
+    m, n = A.shape
+    nnz = af.sum((A != 0))
+    print("    matrix size:                %i x %i" % (m, n))
+    print("    matrix sparsity:            %2.2f %%" % (100 * nnz / n**2,))
+    print("    dense matrix memory usage:  ")
+    print("    sparse matrix memory usage: ")
+
+
+def calc_arrayfire(A, b, x0, maxiter=10):
+    x = af.constant(0, (b.shape[0], 1), dtype=af.f32)
+    r = b - af.matmul(A, x)
+    p = r
+    for i in range(maxiter):
+        Ap = af.matmul(A, p)
+        alpha_num = af.dot(r, r)
+        alpha_den = af.dot(p, Ap)
+        alpha = alpha_num / alpha_den
+        r -= af.tile(alpha, (Ap.shape[0], 1)) * Ap
+        x += af.tile(alpha, (Ap.shape[0], 1)) * p
+        beta_num = af.dot(r, r)
+        beta = beta_num / alpha_num
+        p = r + af.tile(beta, (p.shape[0], 1)) * p
+    af.eval(x)
+    res = x0 - x
+    return x, af.dot(res, res)
+
+
+def calc_numpy(A, b, x0, maxiter=10):
+    x = np.zeros(len(b), dtype=np.float32)
+    r = b - np.dot(A, x)
+    p = r.copy()
+    for i in range(maxiter):
+        Ap = np.dot(A, p)
+        alpha_num = np.dot(r, r)
+        alpha_den = np.dot(p, Ap)
+        alpha = alpha_num / alpha_den
+        r -= alpha * Ap
+        x += alpha * p
+        beta_num = np.dot(r, r)
+        beta = beta_num / alpha_num
+        p = r + beta * p
+    res = x0 - x
+    return x, np.dot(res, res)
+
+
+def calc_scipy_sparse(A, b, x0, maxiter=10):
+    x = np.zeros(len(b), dtype=np.float32)
+    r = b - A * x
+    p = r.copy()
+    for i in range(maxiter):
+        Ap = A * p
+        alpha_num = np.dot(r, r)
+        alpha_den = np.dot(p, Ap)
+        alpha = alpha_num / alpha_den
+        r -= alpha * Ap
+        x += alpha * p
+        beta_num = np.dot(r, r)
+        beta = beta_num / alpha_num
+        p = r + beta * p
+    res = x0 - x
+    return x, np.dot(res, res)
+
+
+def calc_scipy_sparse_linalg_cg(A, b, x0, maxiter=10):
+    x = np.zeros(len(b), dtype=np.float32)
+    x, _ = linalg.cg(A, b, x, tol=0.0, maxiter=maxiter)
+    res = x0 - x
+    return x, np.dot(res, res)
+
+
+def timeit(calc, iters, args):
+    t0 = time()
+    for i in range(iters):
+        calc(*args)
+    dt = time() - t0
+    return 1000 * dt / iters  # ms
+
+
+def test():
+    print("\nTesting benchmark functions...")
+    A, b, x0 = setup_input(n=50, sparsity=7)  # dense A
+    Asp = to_sparse(A)
+    x1, _ = calc_arrayfire(A, b, x0)
+    x2, _ = calc_arrayfire(Asp, b, x0)
+    if af.sum(af.abs(x1 - x2) / x2 > 1e-5):
+        raise ValueError("arrayfire test failed")
+    if np:
+        An = to_numpy(A)
+        bn = to_numpy(b)
+        x0n = to_numpy(x0)
+        x3, _ = calc_numpy(An, bn, x0n)
+        if not np.allclose(x3, x1.to_list()):
+            raise ValueError("numpy test failed")
+    if sp:
+        Asc = to_scipy_sparse(Asp)
+        x4, _ = calc_scipy_sparse(Asc, bn, x0n)
+        if not np.allclose(x4, x1.to_list()):
+            raise ValueError("scipy.sparse test failed")
+        x5, _ = calc_scipy_sparse_linalg_cg(Asc, bn, x0n)
+        if not np.allclose(x5, x1.to_list()):
+            raise ValueError("scipy.sparse.linalg.cg test failed")
+    print("    all tests passed...")
+
+
+def bench(n=4 * 1024, sparsity=7, maxiter=10, iters=10):
+
+    # generate data
+    print("\nGenerating benchmark data for n = %i ..." % n)
+    A, b, x0 = setup_input(n, sparsity)  # dense A
+    Asp = to_sparse(A)  # sparse A
+    input_info(A, Asp)
+
+    # make benchmarks
+    print("Benchmarking CG solver for n = %i ..." % n)
+    t1 = timeit(calc_arrayfire, iters, args=(A, b, x0, maxiter))
+    print("    arrayfire - dense:            %f ms" % t1)
+    t2 = timeit(calc_arrayfire, iters, args=(Asp, b, x0, maxiter))
+    print("    arrayfire - sparse:           %f ms" % t2)
+    if np:
+        An = to_numpy(A)
+        bn = to_numpy(b)
+        x0n = to_numpy(x0)
+        t3 = timeit(calc_numpy, iters, args=(An, bn, x0n, maxiter))
+        print("    numpy     - dense:            %f ms" % t3)
+    if sp:
+        Asc = to_scipy_sparse(Asp)
+        t4 = timeit(calc_scipy_sparse, iters, args=(Asc, bn, x0n, maxiter))
+        print("    scipy     - sparse:           %f ms" % t4)
+        t5 = timeit(calc_scipy_sparse_linalg_cg, iters, args=(Asc, bn, x0n, maxiter))
+        print("    scipy     - sparse.linalg.cg: %f ms" % t5)
+
+
+if __name__ == "__main__":
+    # af.set_backend('cpu', unsafe=True)
+
+    if len(sys.argv) > 1:
+        af.set_device(int(sys.argv[1]))
+
+    af.info()
+    test()
+
+    for n in (128, 256, 512, 1024, 2048, 4096):
+        bench(n)
diff --git a/examples/benchmarks/bench_fft.py b/examples/benchmarks/bench_fft.py
new file mode 100644
index 0000000..826e85c
--- /dev/null
+++ b/examples/benchmarks/bench_fft.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+
+#######################################################
+# Copyright (c) 2024, ArrayFire
+# All rights reserved.
+#
+# This file is distributed under 3-clause BSD license.
+# The complete license agreement can be obtained at:
+# http://arrayfire.com/licenses/BSD-3-Clause
+########################################################
+
+import sys
+from time import time
+from typing import Any, Callable
+
+import arrayfire as af
+
+try:
+    import numpy as np
+except ImportError:
+    raise ImportError("Please install arrayfire-python[benchmarks] or numpy directly to run this example.")
+
+
+def calc_arrayfire(n: int) -> Callable:
+    A = af.randu((n, n))
+    af.sync(-1)
+
+    def run(iters: int) -> None:
+        for t in range(iters):
+            B = af.fft2(A)  # noqa: F841
+
+        af.sync(-1)
+
+    return run
+
+
+def calc_numpy(n: int) -> Callable:
+    np.random.seed(1)
+    A = np.random.rand(n, n).astype(np.float32)
+
+    def run(iters: int) -> None:
+        for t in range(iters):
+            B = np.fft.fft2(A)  # noqa: F841
+
+    return run
+
+
+def bench(calc: Any, iters: int = 100, upto: int = 13) -> None:
+    _, name = calc.__name__.split("_")
+    print("Benchmark N x N 2D fft on %s" % name)
+
+    for M in range(7, upto):
+        N = 1 << M
+        run = calc(N)
+        start = time()
+        run(iters)
+        t = (time() - start) / iters
+        gflops = (10.0 * N * N * M) / (t * 1e9)
+        print("Time taken for %4d x %4d: %0.4f Gflops" % (N, N, gflops))
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        af.set_device(int(sys.argv[1]))
+
+    af.info()
+
+    bench(calc_arrayfire)
+    if np:
+        bench(calc_numpy, upto=10)
diff --git a/examples/benchmarks/monte_carlo_pi.py b/examples/benchmarks/monte_carlo_pi.py
new file mode 100644
index 0000000..f7495ce
--- /dev/null
+++ b/examples/benchmarks/monte_carlo_pi.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+
+#######################################################
+# Copyright (c) 2024, ArrayFire
+# All rights reserved.
+#
+# This file is distributed under 3-clause BSD license.
+# The complete license agreement can be obtained at:
+# http://arrayfire.com/licenses/BSD-3-Clause
+########################################################
+
+import sys
+from random import random
+from time import time
+from typing import Any, overload
+
+try:
+    import numpy as np
+except ImportError:
+    raise ImportError("Please install arrayfire-python[benchmarks] or numpy directly to run this example.")
+
+import arrayfire as af
+
+try:
+    frange = xrange  # type: ignore[name-defined]
+except NameError:
+    frange = range  # Python3
+
+
+@overload
+def in_circle(x: af.Array, y: af.Array) -> af.Array:
+    return (x * x + y * y) < 1
+
+
+@overload
+def in_circle(x: np.ndarray, y: np.ndarray) -> np.ndarray: ...
+
+
+@overload
+def in_circle(x: float, y: float) -> bool: ...
+
+
+# Having the function outside is faster than the lambda inside
+def in_circle(x: af.Array | np.ndarray | float, y: af.Array | np.ndarray | float) -> af.Array | np.ndarray | float:
+    return (x * x + y * y) < 1  # type: ignore[operator]  # NOTE no override for np.ndarray
+
+
+def calc_pi_device(samples: int) -> af.Array:
+    x = af.randu((samples,))
+    y = af.randu((samples,))
+    res = in_circle(x, y)
+    return 4 * af.sum(res) / samples  # type: ignore[return-value, operator]
+
+
+def calc_pi_numpy(samples: int) -> af.Array:
+    np.random.seed(1)
+    x = np.random.rand(samples).astype(np.float32)
+    y = np.random.rand(samples).astype(np.float32)
+    res = in_circle(x, y)
+    return 4.0 * np.sum(res) / samples  # type: ignore[no-any-return]
+
+
+def calc_pi_host(samples: int) -> float:
+    count = sum(1 for k in frange(samples) if in_circle(random(), random()))
+    return 4 * float(count) / samples
+
+
+def bench(calc_pi: Any, samples: int = 1000000, iters: int = 25) -> None:
+    func_name = calc_pi.__name__[8:]
+    print(
+        "Monte carlo estimate of pi on %s with %d million samples: %f" % (func_name, samples / 1e6, calc_pi(samples))
+    )
+
+    start = time()
+    for k in frange(iters):
+        calc_pi(samples)
+    end = time()
+
+    print("Average time taken: %f ms" % (1000 * (end - start) / iters))
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        af.set_device(int(sys.argv[1]))
+    af.info()
+
+    bench(calc_pi_device)
+    if np:
+        bench(calc_pi_numpy)
+    bench(calc_pi_host)
diff --git a/examples/computer_vision/fast.py b/examples/computer_vision/fast.py
new file mode 100644
index 0000000..43402e1
--- /dev/null
+++ b/examples/computer_vision/fast.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+
+#######################################################
+# Copyright (c) 2024, ArrayFire
+# All rights reserved.
+#
+# This file is distributed under 3-clause BSD license.
+# The complete license agreement can be obtained at:
+# http://arrayfire.com/licenses/BSD-3-Clause
+########################################################
+
+import os
+import sys
+
+import arrayfire as af
+
+
+def draw_corners(img, x, y, draw_len):
+    # Draw vertical line of (draw_len * 2 + 1) pixels centered on  the corner
+    # Set only the first channel to 1 (green lines)
+
+    # print(img)
+
+    xmin = int(max(0, x - draw_len))
+    xmax = int(min(img.shape[1], x + draw_len))
+
+    img[y, xmin:xmax, 0] = 0.0
+    img[y, xmin:xmax, 1] = 1.0
+    img[y, xmin:xmax, 2] = 0.0
+
+    # Draw vertical line of (draw_len * 2 + 1) pixels centered on  the corner
+    # Set only the first channel to 1 (green lines)
+    ymin = int(max(0, y - draw_len))
+    ymax = int(min(img.shape[0], y + draw_len))
+
+    img[ymin:ymax, x, 0] = 0.0
+    img[ymin:ymax, x, 1] = 1.0
+    img[ymin:ymax, x, 2] = 0.0
+    return img
+
+
+def fast_demo(console):
+
+    root_path = os.path.dirname(os.path.abspath(__file__))
+    file_path = root_path
+    if console:
+        file_path += "/../../assets/examples/images/square.png"
+    else:
+        file_path += "/../../assets/examples/images/man.jpg"
+    img_color = af.load_image(file_path, is_color=True)
+
+    img = af.color_space(img_color, af.CSpace.GRAY, af.CSpace.RGB)
+    img_color /= 255.0
+
+    features = af.fast(img)
+
+    # import pdb;pdb.set_trace()
+    xs = features.x.copy()
+    ys = features.y.copy()
+
+    draw_len = 3
+    num_features = features.num_features
+    for f in range(num_features):
+        x = int(xs[f].scalar())
+        y = int(ys[f].scalar())
+
+        # import pdb;pdb.set_trace()
+        img_color = draw_corners(img_color, x, y, draw_len)
+
+    print("Features found: {}".format(num_features))
+    if not console:
+        # Previews color image with green crosshairs
+        file_path = os.path.join(os.getcwd(), "fast_image.png")
+        af.save_image(img_color, file_path)
+
+    else:
+        print(xs)
+        print(ys)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        af.set_device(int(sys.argv[1]))
+    console = (sys.argv[2] == "-") if len(sys.argv) > 2 else False
+
+    af.info()
+    print("** ArrayFire FAST Feature Detector Demo **\n")
+    fast_demo(console)
diff --git a/examples/computer_vision/harris.py b/examples/computer_vision/harris.py
new file mode 100644
index 0000000..4fb862b
--- /dev/null
+++ b/examples/computer_vision/harris.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python
+
+#######################################################
+# Copyright (c) 2018, ArrayFire
+# All rights reserved.
+#
+# This file is distributed under 3-clause BSD license.
+# The complete license agreement can be obtained at:
+# http://arrayfire.com/licenses/BSD-3-Clause
+########################################################
+
+from time import time
+import arrayfire as af
+import os
+import sys
+
+
+def draw_corners(img, x, y, draw_len):
+    # Draw vertical line of (draw_len * 2 + 1) pixels centered on  the corner
+    # Set only channel 1 to 1 (green lines)
+    xmin = max(0, x - draw_len)
+    xmax = min(img.shape[1], x + draw_len)
+
+    img[y, xmin : xmax, 0] = 0.0
+    img[y, xmin : xmax, 1] = 1.0
+    img[y, xmin : xmax, 2] = 0.0
+
+    # Draw vertical line of (draw_len * 2 + 1) pixels centered on  the corner
+    # Set only the first channel to 1 (green lines)
+    ymin = max(0, y - draw_len)
+    ymax = min(img.shape[0], y + draw_len)
+
+    img[ymin : ymax, x, 0] = 0.0
+    img[ymin : ymax, x, 1] = 1.0
+    img[ymin : ymax, x, 2] = 0.0
+    return img
+
+def harris_demo(console):
+
+    root_path = os.path.dirname(os.path.abspath(__file__))
+    file_path = root_path
+    if console:
+        file_path += "/../../assets/examples/images/square.png"
+    else:
+        file_path += "/../../assets/examples/images/man.jpg"
+    img_color = af.load_image(file_path, is_color=True);
+
+    img = af.color_space(img_color, af.CSpace.GRAY, af.CSpace.RGB)
+    img_color /= 255.0
+
+    ix, iy = af.gradient(img)
+    ixx = ix * ix
+    ixy = ix * iy
+    iyy = iy * iy
+
+    # Compute a Gaussian kernel with standard deviation of 1.0 and length of 5 pixels
+    # These values can be changed to use a smaller or larger window
+    gauss_filt = af.gaussian_kernel(5, 5, rows_sigma=1.0, columns_sigma=1.0)
+
+    # Filter second order derivatives
+    ixx = af.convolve2(ixx, gauss_filt)
+    ixy = af.convolve2(ixy, gauss_filt)
+    iyy = af.convolve2(iyy, gauss_filt)
+
+    # Calculate trace
+    itr = ixx + iyy
+
+    # Calculate determinant
+    idet = ixx * iyy - ixy * ixy
+
+    # Calculate Harris response
+    response = idet - 0.04 * (itr * itr)
+
+    # Get maximum response for each 3x3 neighborhood
+    msk = af.constant(1, (3, 3))
+    max_resp = af.dilate(response, mask=msk)
+
+    # Discard responses that are not greater than threshold
+    corners = response > float(1e5)
+    corners = corners * response
+
+    # Discard responses that are not equal to maximum neighborhood response,
+    # scale them to original value
+    corners = (corners == max_resp) * corners
+
+    # Copy device array to python list on host
+    corners_list = corners.copy()
+    # import pdb; pdb.set_trace()
+
+    draw_len = 3
+    good_corners = 0
+    for x in range(img_color.shape[1] - 1):
+        for y in range(img_color.shape[0] - 1):
+            # print(f"x:{x}, y:{y}")
+            # # print(corners_list[x, y])
+            if (0 <= x < corners_list.shape[0]) and (0 <= y < corners_list.shape[1]):
+                if corners_list[x, y] > 1e5:
+                    img_color = draw_corners(img_color, x, y, draw_len)
+                    good_corners += 1
+            else:
+                continue
+
+
+    print("Corners found: {}".format(good_corners))
+    if not console:
+        # Previews color image with green crosshairs
+        file_path = os.path.join(os.getcwd(), 'harris_image.png')
+        af.save_image(img_color, file_path)
+    else:
+        idx = af.where(corners)
+
+        corners_x = idx / float(corners.dims()[0])
+        corners_y = idx % float(corners.dims()[0])
+
+        print(corners_x)
+        print(corners_y)
+
+
+if __name__ == "__main__":
+    if (len(sys.argv) > 1):
+        af.set_device(int(sys.argv[1]))
+    console = (sys.argv[2] == '-') if len(sys.argv) > 2 else False
+
+    af.info()
+    print("** ArrayFire Harris Corner Detector Demo **\n")
+
+    harris_demo(console)
\ No newline at end of file
diff --git a/examples/computer_vision/matching.py b/examples/computer_vision/matching.py
new file mode 100644
index 0000000..99bb0f9
--- /dev/null
+++ b/examples/computer_vision/matching.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+
+#######################################################
+# Copyright (c) 2018, ArrayFire
+# All rights reserved.
+#
+# This file is distributed under 3-clause BSD license.
+# The complete license agreement can be obtained at:
+# http://arrayfire.com/licenses/BSD-3-Clause
+########################################################
+
+from time import time
+import arrayfire as af
+import os
+import sys
+
+from arrayfire.library.computer_vision import match_template
+
+def normalize(a):
+    max_ = float(af.max(a))
+    min_ = float(af.min(a))
+    return  (a - min_) /  (max_ - min_)
+
+def draw_rectangle(img, x, y, wx, wy):
+    print("\nMatching patch origin = ({}, {})\n".format(x, y))
+
+    # top edge
+    img[y, x : x + wx, 0] = 0.0
+    img[y, x : x + wx, 1] = 0.0
+    img[y, x : x + wx, 2] = 1.0
+
+    # bottom edge
+    img[y + wy, x : x + wx, 0] = 0.0
+    img[y + wy, x : x + wx, 1] = 0.0
+    img[y + wy, x : x + wx, 2] = 1.0
+
+    # left edge
+    img[y : y + wy, x, 0] = 0.0
+    img[y : y + wy, x, 1] = 0.0
+    img[y : y + wy, x, 2] = 1.0
+
+    # left edge
+    img[y : y + wy, x + wx, 0] = 0.0
+    img[y : y + wy, x + wx, 1] = 0.0
+    img[y : y + wy, x + wx, 2] = 1.0
+
+    return img
+
+def templateMatchingDemo(console):
+
+    root_path = os.path.dirname(os.path.abspath(__file__))
+    file_path = root_path
+    if console:
+        file_path += "/../../assets/examples/images/square.png"
+    else:
+        file_path += "/../../assets/examples/images/man.jpg"
+    img_color = af.load_image(file_path, is_color = True);
+
+    # Convert the image from RGB to gray-scale
+    img = af.color_space(img_color, af.CSpace.GRAY, af.CSpace.RGB)
+    iDims = img.shape
+    print("Input image dimensions: ", iDims)
+
+    # Extract a patch from the input image
+    patch_size = 100
+    tmp_img = img[100 : 100+patch_size, 100 : 100+patch_size]
+
+    result = match_template(img, tmp_img) # Default disparity metric is
+                                             # Sum of Absolute differences (SAD)
+                                             # Currently supported metrics are
+                                             # AF_SAD, AF_ZSAD, AF_LSAD, AF_SSD,
+                                             # AF_ZSSD, AF_LSSD
+
+    disp_img = img / 255.0
+    disp_tmp = tmp_img / 255.0
+    disp_res = normalize(result)
+
+    minval, minloc = af.imin(disp_res)
+    print("Location(linear index) of minimum disparity value = {}".format(minloc))
+
+    if not console:
+        marked_res = af.tile(disp_img, 1, 1, 3)
+        marked_res = draw_rectangle(marked_res, minloc%iDims[0], minloc/iDims[0],\
+                                    patch_size, patch_size)
+
+        print("Note: Based on the disparity metric option provided to matchTemplate function")
+        print("either minimum or maximum disparity location is the starting corner")
+        print("of our best matching patch to template image in the search image")
+
+        wnd = af.Window(512, 512, "Template Matching Demo")
+
+        while not wnd.close():
+            wnd.set_colormap(af.COLORMAP.DEFAULT)
+            wnd.grid(2, 2)
+            wnd[0, 0].image(disp_img, "Search Image" )
+            wnd[0, 1].image(disp_tmp, "Template Patch" )
+            wnd[1, 0].image(marked_res, "Best Match" )
+            wnd.set_colormap(af.COLORMAP.HEAT)
+            wnd[1, 1].image(disp_res, "Disparity Values")
+            wnd.show()
+
+
+if __name__ == "__main__":
+    if (len(sys.argv) > 1):
+        af.set_device(int(sys.argv[1]))
+    console = (sys.argv[2] == '-') if len(sys.argv) > 2 else False
+
+    af.info()
+    print("** ArrayFire template matching Demo **\n")
+    templateMatchingDemo(console)
\ No newline at end of file
diff --git a/examples/computer_vision/susan.py b/examples/computer_vision/susan.py
new file mode 100644
index 0000000..ba62ad7
--- /dev/null
+++ b/examples/computer_vision/susan.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+
+#######################################################
+# Copyright (c) 2024, ArrayFire
+# All rights reserved.
+#
+# This file is distributed under 3-clause BSD license.
+# The complete license agreement can be obtained at:
+# http://arrayfire.com/licenses/BSD-3-Clause
+########################################################
+import os
+import sys
+
+import arrayfire as af
+
+
+def draw_corners(img, x, y, draw_len):
+    # Draw vertical line of (draw_len * 2 + 1) pixels centered on  the corner
+    # Set only the first channel to 1 (green lines)
+
+    # print(img)
+
+    xmin = int(max(0, x - draw_len))
+    xmax = int(min(img.shape[1], x + draw_len))
+
+    img[y, xmin:xmax, 0] = 0.0
+    img[y, xmin:xmax, 1] = 1.0
+    img[y, xmin:xmax, 2] = 0.0
+
+    # Draw vertical line of (draw_len * 2 + 1) pixels centered on  the corner
+    # Set only the first channel to 1 (green lines)
+    ymin = int(max(0, y - draw_len))
+    ymax = int(min(img.shape[0], y + draw_len))
+
+    img[ymin:ymax, x, 0] = 0.0
+    img[ymin:ymax, x, 1] = 1.0
+    img[ymin:ymax, x, 2] = 0.0
+    return img
+
+
+def susan_demo(console):
+
+    root_path = os.path.dirname(os.path.abspath(__file__))
+    file_path = root_path
+    if console:
+        file_path += "/../../assets/examples/images/square.png"
+    else:
+        file_path += "/../../assets/examples/images/man.jpg"
+    img_color = af.load_image(file_path, is_color=True)
+
+    img = af.color_space(img_color, af.CSpace.GRAY, af.CSpace.RGB)
+    img_color /= 255.0
+
+    features = af.susan(img)
+
+    xs = features.x.copy()
+    ys = features.y.copy()
+
+    draw_len = 3
+    num_features = features.num_features
+    for f in range(num_features):
+        x = int(xs[f].scalar())
+        y = int(ys[f].scalar())
+
+        img_color = draw_corners(img_color, y, x, draw_len)
+
+    print("Features found: {}".format(num_features))
+    if not console:
+        # Previews color image with green crosshairs
+        file_path = os.path.join(os.getcwd(), "susan_image.png")
+        af.save_image(img_color, file_path)
+    else:
+        print(xs)
+        print(ys)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        af.set_device(int(sys.argv[1]))
+    console = (sys.argv[2] == "-") if len(sys.argv) > 2 else False
+
+    af.info()
+    print("** ArrayFire SUSAN Feature Detector Demo **\n")
+    susan_demo(console)
diff --git a/examples/financial/black_scholes_options.py b/examples/financial/black_scholes_options.py
new file mode 100644
index 0000000..23c0bac
--- /dev/null
+++ b/examples/financial/black_scholes_options.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+
+#######################################################
+# Copyright (c) 2024, ArrayFire
+# All rights reserved.
+#
+# This file is distributed under 3-clause BSD license.
+# The complete license agreement can be obtained at:
+# http://arrayfire.com/licenses/BSD-3-Clause
+########################################################
+
+import math
+import sys
+from time import time
+
+import arrayfire as af
+
+
+def initialize_device() -> None:
+    """Initialize the ArrayFire device based on command line arguments."""
+    device_id = int(sys.argv[1]) if len(sys.argv) > 1 else 0
+    af.set_device(device_id)
+    af.info()
+
+
+def cumulative_normal_distribution(x: af.Array) -> af.Array:
+    """Calculate the cumulative normal distribution using ArrayFire."""
+    sqrt2 = math.sqrt(2.0)
+    condition = x > 0
+    lhs = condition * (0.5 + af.erf(x / sqrt2) / 2)
+    rhs = (1 - condition) * (0.5 - af.erf((-x) / sqrt2) / 2)
+    return lhs + rhs
+
+
+def black_scholes(S: af.Array, X: af.Array, R: af.Array, V: af.Array, T: af.Array) -> tuple[af.Array, af.Array]:
+    """Compute call and put options prices using the Black-Scholes formula."""
+    d1 = (af.log(S / X) + (R + 0.5 * V**2) * T) / (V * af.sqrt(T))
+    d2 = d1 - V * af.sqrt(T)
+
+    cnd_d1 = cumulative_normal_distribution(d1)
+    cnd_d2 = cumulative_normal_distribution(d2)
+
+    C = S * cnd_d1 - X * af.exp(-R * T) * cnd_d2
+    P = X * af.exp(-R * T) * (1 - cnd_d2) - S * (1 - cnd_d1)
+    return C, P
+
+
+def benchmark_black_scholes(num_elements: int, num_iter: int = 100) -> None:
+    """Benchmark the Black-Scholes model over varying matrix sizes."""
+    M = 4000
+    for N in range(50, 501, 50):
+        S, X, R, V, T = (af.randu((M, N)) for _ in range(5))
+
+        print(f"Input data size: {M * N} elements")
+
+        start = time()
+        for _ in range(num_iter):
+            C, P = black_scholes(S, X, R, V, T)
+            af.eval(C, P)
+        af.sync()
+
+        sec = (time() - start) / num_iter
+        print(f"Mean GPU Time: {1000.0 * sec:.6f} ms\n")
+
+
+def main() -> None:
+    initialize_device()
+
+    # Run a small test to ensure that everything is set up correctly.
+    M = 4000
+    test_arrays = (af.randu((M, 1)) for _ in range(5))
+    C, P = black_scholes(*test_arrays)
+    af.eval(C, P)
+    af.sync()
+
+    # Benchmark Black-Scholes over varying sizes of input data.
+    benchmark_black_scholes(M)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/financial/heston_model.py b/examples/financial/heston_model.py
new file mode 100644
index 0000000..482eeb8
--- /dev/null
+++ b/examples/financial/heston_model.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+
+##############################################################################################
+# Copyright (c) 2015, Michael Nowotny
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation and/or other
+# materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its contributors may be used
+# to endorse or promote products derived from this software without specific
+# prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+###############################################################################################
+
+import math
+import time
+
+import arrayfire as af
+
+
+def initialize_parameters() -> tuple[float, float, float, float, float, float, float, float]:
+    """Initialize and return model parameters."""
+    r = math.log(1.0319)  # risk-free rate
+    rho = -0.82  # instantaneous correlation between Brownian motions
+    sigmaV = 0.14  # variance of volatility
+    kappa = 3.46  # mean reversion speed
+    vBar = 0.008  # mean variance
+    k = math.log(0.95)  # strike price, converted to log space
+    x0 = 0  # initial log stock price
+    v0 = 0.087**2  # initial volatility
+    return r, rho, sigmaV, kappa, vBar, k, x0, v0
+
+
+def simulate_heston_model(
+    T: int, N: int, R: int, mu: float, kappa: float, vBar: float, sigmaV: float, rho: float, x0: float, v0: float
+) -> tuple[af.Array, af.Array]:
+    """Simulate the Heston model for given parameters and return the resulting arrays."""
+    deltaT = T / (N - 1)
+    sqrtDeltaT = math.sqrt(deltaT)
+    sqrtOneMinusRhoSquare = math.sqrt(1 - rho**2)
+
+    m = af.constant(0, (2,))
+    m[0] = rho
+    m[1] = sqrtOneMinusRhoSquare
+    zeroArray = af.constant(0, (R, 1))
+
+    x = [af.constant(x0, (R,)) for _ in range(2)]
+    v = [af.constant(v0, (R,)) for _ in range(2)]
+
+    for t in range(1, N):
+        t_previous = (t + 1) % 2
+        t_current = t % 2
+
+        dBt = af.randn((R, 2)) * sqrtDeltaT
+        vLag = af.maxof(v[t_previous], zeroArray)
+        sqrtVLag = af.sqrt(vLag)
+
+        x[t_current] = x[t_previous] + (mu - 0.5 * vLag) * deltaT + sqrtVLag * dBt[:, 0]
+        v[t_current] = vLag + kappa * (vBar - vLag) * deltaT + sigmaV * sqrtVLag * af.matmul(dBt, m)
+
+    return x[t_current], af.maxof(v[t_current], zeroArray)
+
+
+def main() -> None:
+    T = 1
+    nT = 20 * T
+    R_first = 1000
+    R = 5000000
+    r, rho, sigmaV, kappa, vBar, k, x0, v0 = initialize_parameters()
+
+    # Initial simulation
+    simulate_heston_model(T, nT, R_first, r, kappa, vBar, sigmaV, rho, x0, v0)
+
+    # Time the pricing of a vanilla call option
+    tic = time.time()
+    x, v = simulate_heston_model(T, nT, R, r, kappa, vBar, sigmaV, rho, x0, v0)
+    af.sync()
+    toc = time.time() - tic
+    K = math.exp(k)
+    C_CPU = math.exp(-r * T) * af.mean(af.maxof(af.exp(x) - K, af.constant(0, (R,))))
+    print(f"Time elapsed = {toc:.3f} secs")
+    print(f"Call price = {C_CPU:.6f}")
+    print(f"Average final variance = {af.mean(v):.6f}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/financial/monte_carlo_options.py b/examples/financial/monte_carlo_options.py
new file mode 100644
index 0000000..d5cfcb8
--- /dev/null
+++ b/examples/financial/monte_carlo_options.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+
+#######################################################
+# Copyright (c) 2024, ArrayFire
+# All rights reserved.
+#
+# This file is distributed under 3-clause BSD license.
+# The complete license agreement can be obtained at:
+# http://arrayfire.com/licenses/BSD-3-Clause
+########################################################
+
+import math
+import sys
+from time import time
+from typing import cast
+
+import arrayfire as af
+
+
+def monte_carlo_options(
+    N: int,
+    K: float,
+    t: float,
+    vol: float,
+    r: float,
+    strike: int,
+    steps: int,
+    use_barrier: bool = True,
+    B: float | None = None,
+    ty: af.Dtype = af.float32,
+) -> float:
+    dt = t / (steps - 1)
+    s = af.constant(strike, (N, 1), dtype=ty)
+
+    randmat = af.randn((N, steps - 1), dtype=ty)
+    randmat = af.exp((r - (vol * vol * 0.5)) * dt + vol * math.sqrt(dt) * randmat)
+
+    S = af.product(af.join(1, s, randmat), axis=1)
+
+    if use_barrier:
+        if B is None:
+            raise ValueError("Barrier value B must be provided if use_barrier is True.")
+        S = S * af.all_true(S < B, 1)
+
+    payoff = af.maxof(0, S - K)
+    mean_payoff = cast(float, af.mean(payoff)) * math.exp(-r * t)
+
+    return mean_payoff
+
+
+def monte_carlo_simulate(N: int, use_barrier: bool, num_iter: int = 10) -> float:
+    steps = 180
+    stock_price = 100.0
+    maturity = 0.5
+    volatility = 0.3
+    rate = 0.01
+    strike = 100
+    barrier = 115.0 if use_barrier else None
+
+    total_time = time()
+    for _ in range(num_iter):
+        monte_carlo_options(N, stock_price, maturity, volatility, rate, strike, steps, use_barrier, barrier)
+    average_time = (time() - total_time) / num_iter
+
+    return average_time
+
+
+def main() -> None:
+    if len(sys.argv) > 1:
+        device_id = int(sys.argv[1])
+        af.set_device(device_id)
+    af.info()
+
+    # Initial simulation calls to test without and with barrier
+    print("Simulation without barrier:", monte_carlo_simulate(1000, use_barrier=False))
+    print("Simulation with barrier:", monte_carlo_simulate(1000, use_barrier=True))
+
+    af.sync()  # Synchronize ArrayFire computations before timing analysis
+
+    # Timing analysis for different numbers of paths
+    for n in range(10000, 100001, 10000):
+        time_vanilla = 1000 * monte_carlo_simulate(n, False, 100)
+        time_barrier = 1000 * monte_carlo_simulate(n, True, 100)
+        print(
+            f"Time for {n:7d} paths - vanilla method: {time_vanilla:4.3f} ms, barrier method: {time_barrier:4.3f} ms"
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/getting_started/convolve.py b/examples/getting_started/convolve.py
new file mode 100644
index 0000000..f98f4ab
--- /dev/null
+++ b/examples/getting_started/convolve.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+
+#######################################################
+# Copyright (c) 2024, ArrayFire
+# All rights reserved.
+#
+# This file is distributed under 3-clause BSD license.
+# The complete license agreement can be obtained at:
+# http://arrayfire.com/licenses/BSD-3-Clause
+########################################################
+
+import sys
+from array import array
+from timeit import timeit
+
+import arrayfire as af
+
+
+def set_device_from_args() -> None:
+    """Sets the ArrayFire device based on the command line argument."""
+    if len(sys.argv) > 1:
+        af.set_device(int(sys.argv[1]))
+    af.info()
+
+
+def create_arrays() -> tuple[af.Array, ...]:
+    """Creates and returns initialized ArrayFire arrays for convolution."""
+    h_dx = array("f", (1.0 / 12, -8.0 / 12, 0, 8.0 / 12, 1.0 / 12))
+    h_spread = array("f", (1.0 / 5, 1.0 / 5, 1.0 / 5, 1.0 / 5, 1.0 / 5))
+
+    img = af.randu((640, 480))
+    dx = af.Array(h_dx, shape=(5, 1))
+    spread = af.Array(h_spread, shape=(1, 5))
+
+    return img, dx, spread
+
+
+def perform_convolution(img: af.Array, dx: af.Array, spread: af.Array) -> tuple[af.Array, af.Array]:
+    """Performs and returns the result of full and separable 2D convolution."""
+    kernel = af.matmul(dx, spread)
+    full_res = af.convolve2(img, kernel)
+    sep_res = af.convolve2_separable(dx, spread, img)
+    return full_res, sep_res
+
+
+def af_assert(left: af.Array, right: af.Array, eps: float = 1e-6) -> None:
+    """Asserts that two arrays are equal within a specified precision."""
+    max_diff = af.max(af.abs(left - right))
+    if isinstance(max_diff, complex):
+        max_diff = max_diff.real
+    if max_diff > eps:
+        raise ValueError("Arrays not within dictated precision")
+
+
+def time_convolution_operations(img: af.Array, dx: af.Array, spread: af.Array, kernel: af.Array) -> None:
+    """Times and prints the convolution operations."""
+    time_convolve2 = timeit(lambda: af.convolve2(img, kernel), number=1000)
+    time_convolve2_sep = timeit(lambda: af.convolve2_separable(dx, spread, img), number=1000)
+
+    print(f"Full 2D convolution time: {time_convolve2 * 1000:.5f} ms")
+    print(f"Full separable 2D convolution time: {time_convolve2_sep * 1000:.5f} ms")
+
+
+def main() -> None:
+    try:
+        set_device_from_args()
+        img, dx, spread = create_arrays()
+        full_res, sep_res = perform_convolution(img, dx, spread)
+        af_assert(full_res, sep_res)
+        kernel = af.matmul(dx, spread)  # Reconstruct kernel for timing
+        time_convolution_operations(img, dx, spread, kernel)
+    except Exception as e:
+        print(f"Error: {str(e)}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/getting_started/intro.py b/examples/getting_started/intro.py
new file mode 100644
index 0000000..7f99bda
--- /dev/null
+++ b/examples/getting_started/intro.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+
+#######################################################
+# Copyright (c) 2024, ArrayFire
+# All rights reserved.
+#
+# This file is distributed under 3-clause BSD license.
+# The complete license agreement can be obtained at:
+# http://arrayfire.com/licenses/BSD-3-Clause
+########################################################
+
+import sys
+from array import array
+
+import arrayfire as af
+
+
+def set_device_from_args() -> None:
+    """Sets the ArrayFire device based on command line argument."""
+    if len(sys.argv) > 1:
+        af.set_device(int(sys.argv[1]))
+    af.info()
+
+
+def initialize_matrices() -> tuple[af.Array, ...]:
+    """Initializes matrices for demonstration."""
+    h_A = array("i", (1, 2, 4, -1, 2, 0, 4, 2, 3))
+    h_B = array("i", (2, 3, 5, 6, 0, 10, -12, 0, 1))
+    A = af.Array(obj=h_A, shape=(3, 3), dtype=af.int32)
+    B = af.Array(obj=h_B, shape=(3, 3), dtype=af.int32)
+
+    b_A = array("I", (1, 1, 1, 0, 1, 1, 0, 0, 0))
+    b_B = array("I", (1, 0, 1, 0, 1, 0, 1, 0, 1))
+    C = af.Array(obj=b_A, shape=(3, 3), dtype=af.uint32)
+    D = af.Array(obj=b_B, shape=(3, 3), dtype=af.uint32)
+
+    return A, B, C, D
+
+
+def demonstrate_array_operations(A: af.Array, B: af.Array, C: af.Array, D: af.Array) -> None:
+    """Performs and prints various ArrayFire operations."""
+    print("\n---- Sub referencing and sub assignment ----\n")
+    print(A)
+    print(A[0, :])
+    print(A[:, 0])
+    A[0, 0] = 11
+    A[1] = 100
+    print(A)
+    print(B)
+    A[1, :] = B[2, :]
+    print(A)
+
+    print("\n---- Bitwise operations ----\n")
+    print(C)
+    print(D)
+    print(af.bitand(C, D))
+    print(af.bitor(C, D))
+
+    print("\n---- Transpose ----\n")
+    print(A)
+    print(af.transpose(A))
+
+    print("\n---- Flip Vertically / Horizontally ----\n")
+    print(A)
+    print(af.flip(A, axis=0))
+    print(af.flip(A, axis=1))
+
+    print("\n---- Sum, Min, Max along row / columns ----\n")
+    print(A)
+    print(af.min(A, axis=0))
+    print(af.max(A, axis=0))
+    print(af.min(A, axis=1))
+    print(af.max(A, axis=1))
+    print(af.sum(A, axis=0))
+    print(af.sum(A, axis=1))
+
+    print("\n---- Get minimum with index ----\n")
+    (min_val, min_idx) = af.imin(A, axis=0)
+    print(min_val)
+    print(min_idx)
+
+
+def main() -> None:
+    """Main function to orchestrate the initialization and demonstration."""
+    try:
+        set_device_from_args()
+        print("\n---- Intro to ArrayFire using unsigned(s32) arrays ----\n")
+        A, B, C, D = initialize_matrices()
+        demonstrate_array_operations(A, B, C, D)
+    except Exception as e:
+        print("Error:", str(e))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/linear_algebra/cholesky.py b/examples/linear_algebra/cholesky.py
new file mode 100644
index 0000000..4c34067
--- /dev/null
+++ b/examples/linear_algebra/cholesky.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+#######################################################
+# Copyright (c) 2024, ArrayFire
+# All rights reserved.
+#
+# This file is distributed under 3-clause BSD license.
+# The complete license agreement can be obtained at:
+# http://arrayfire.com/licenses/BSD-3-Clause
+########################################################
+
+import arrayfire as af
+
+
+def generate_symmetric_positive_definite_matrix(n: int) -> af.Array:
+    """Generates a symmetric positive definite matrix of size n x n."""
+    t = af.randu((n, n))
+    return af.matmul(t, t, rhs_opts=af.MatProp.TRANS) + af.identity((n, n)) * n
+
+
+def run_cholesky_inplace(matrix: af.Array) -> None:
+    """Performs Cholesky decomposition in place and prints the upper and lower triangular results."""
+    print("Running Cholesky InPlace")
+    cin_upper = matrix.copy()
+    cin_lower = matrix.copy()
+
+    af.cholesky(cin_upper, is_upper=True)
+    af.cholesky(cin_lower, is_upper=False)
+
+    print(cin_upper)
+    print(cin_lower)
+
+
+def run_cholesky_out_of_place(matrix: af.Array) -> None:
+    """Performs Cholesky decomposition out of place and prints the results if successful."""
+    print("Running Cholesky Out of place")
+
+    out_upper, upper_success = af.cholesky(matrix, is_upper=True)
+    out_lower, lower_success = af.cholesky(matrix, is_upper=False)
+
+    if upper_success == 0:
+        print("Upper triangular matrix:")
+        print(out_upper)
+    if lower_success == 0:
+        print("Lower triangular matrix:")
+        print(out_lower)
+
+
+def main() -> None:
+    try:
+        af.info()
+        n = 5
+        spd_matrix = generate_symmetric_positive_definite_matrix(n)
+
+        run_cholesky_inplace(spd_matrix)
+        run_cholesky_out_of_place(spd_matrix)
+
+    except Exception as e:
+        print("Error: ", str(e))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/linear_algebra/lu.py b/examples/linear_algebra/lu.py
new file mode 100644
index 0000000..8b8a42e
--- /dev/null
+++ b/examples/linear_algebra/lu.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+
+#######################################################
+# Copyright (c) 2024, ArrayFire
+# All rights reserved.
+#
+# This file is distributed under 3-clause BSD license.
+# The complete license agreement can be obtained at:
+# http://arrayfire.com/licenses/BSD-3-Clause
+########################################################
+
+import arrayfire as af
+
+
+def run_lu_inplace(array: af.Array) -> None:
+    """Performs LU decomposition in place and prints the results."""
+    print("Running LU InPlace")
+    pivot = af.lu(array, inplace=True)
+    print(array)
+    print(pivot)
+
+
+def run_lu_factorization(array: af.Array) -> None:
+    """Performs LU decomposition, extracting and printing Lower and Upper matrices."""
+    print("Running LU with Upper Lower Factorization")
+    lower, upper, pivot = af.lu(array)
+    print(lower)
+    print(upper)
+    print(pivot)
+
+
+def main() -> None:
+    try:
+        af.info()  # Display ArrayFire library information
+        in_array = af.randu((5, 8))  # Generate a random 5x8 matrix
+
+        # Perform and print results of LU decomposition in place
+        run_lu_inplace(in_array.copy())  # Use a copy to preserve the original matrix for the next function
+        # Perform and print results of LU decomposition with L and U matrices
+        run_lu_factorization(in_array)
+
+    except Exception as e:
+        print("Error: ", str(e))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/linear_algebra/qr.py b/examples/linear_algebra/qr.py
new file mode 100644
index 0000000..9a6eb3b
--- /dev/null
+++ b/examples/linear_algebra/qr.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+#######################################################
+# Copyright (c) 2024, ArrayFire
+# All rights reserved.
+#
+# This file is distributed under 3-clause BSD license.
+# The complete license agreement can be obtained at:
+# http://arrayfire.com/licenses/BSD-3-Clause
+########################################################
+
+import arrayfire as af
+
+
+def run_qr_inplace(array: af.Array) -> None:
+    """Performs QR decomposition in place and prints the results."""
+    print("Running QR InPlace")
+    q_in = array.copy()
+    tau = af.qr(q_in, inplace=True)
+    print(q_in)
+    print(tau)
+
+
+def run_qr_factorization(array: af.Array) -> None:
+    """Performs QR decomposition, extracting and printing Q and R matrices."""
+    print("Running QR with Q and R factorization")
+    q, r, tau = af.qr(array)
+    print(q)
+    print(r)
+    print(tau)
+
+
+def main() -> None:
+    try:
+        af.info()
+        in_array = af.randu((5, 8))  # Random 5x8 matrix
+
+        run_qr_inplace(in_array)
+        run_qr_factorization(in_array)
+
+    except Exception as e:
+        print("Error: ", str(e))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/mypy.ini b/mypy.ini
deleted file mode 100755
index 0367984..0000000
--- a/mypy.ini
+++ /dev/null
@@ -1,10 +0,0 @@
-[mypy]
-disallow_incomplete_defs = true
-disallow_untyped_defs = true
-ignore_missing_imports = true
-show_error_codes = true
-warn_return_any = true
-
-exclude =
-    .venv
-    setup.py
diff --git a/pyproject.toml b/pyproject.toml
index ee5605a..4ea40fe 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,8 +1,56 @@
-[tool.black]
-line-length = 119
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "arrayfire"
+version = "0.1.0"
+dependencies = ["arrayfire-binary-python-wrapper == 0.7.0"]
+requires-python = ">=3.10"
+description = "ArrayFire Python"
+readme = "README.md"
+license = { file = "LICENSE" }
+maintainers = [{ name = "ArrayFire", email = "technical@arrayfire.com" }]
+keywords = [
+  "arrayfire",
+  "parallel computing",
+  "high performance computing",
+  "hpc",
+  "gpu",
+  "cpu",
+  "opencl",
+  "cuda",
+  "oneapi",
+  "python",
+  "wrapper",
+]
+classifiers = [
+  "Intended Audience :: Developers",
+  "Intended Audience :: Science/Research",
+  "Development Status :: 5 - Production/Stable",
+  "License :: OSI Approved :: BSD License",
+  "Programming Language :: Python",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.10",
+  "Topic :: Scientific/Engineering",
+  "Topic :: Scientific/Engineering :: Artificial Intelligence",
+  "Topic :: Scientific/Engineering :: Information Analysis",
+  "Topic :: Scientific/Engineering :: Mathematics",
+  "Topic :: Software Development :: Libraries",
+]
+
+[project.urls]
+Homepage = "http://arrayfire.com"
+"General Documentation" = "https://arrayfire.org/docs/index.htm"
 
-include = '\.pyi?$'
+[project.optional-dependencies]
+benchmarks = ["numpy ~= 1.26.4"]
 
+[project.entry-points.array_api]
+array_api = "arrayfire.array_api"
+
+[tool.black]
+line-length = 119
 exclude = '''
 (
       __pycache__
@@ -18,6 +66,14 @@ exclude = '''
 )
 '''
 
-[build-system]
-requires = ["setuptools", "wheel", "scikit-build", "cmake", "ninja"]
-build-backend = "setuptools.build_meta"
+[tool.isort]
+line_length = 119
+profile = "black"
+
+[tool.mypy]
+disallow_incomplete_defs = true
+disallow_untyped_defs = true
+ignore_missing_imports = true
+show_error_codes = true
+warn_return_any = true
+exclude = ".venv"
diff --git a/requirements.in b/requirements.in
deleted file mode 100755
index 7faf48c..0000000
--- a/requirements.in
+++ /dev/null
@@ -1,2 +0,0 @@
--r dev-requirements.txt
--r requirements.txt
diff --git a/requirements.txt b/requirements.txt
index 23e82e3..d3f5294 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1 @@
-arrayfire-binary-python-wrapper==0.6.0+af3.9.0
+arrayfire-binary-python-wrapper==0.7.0+af3.9.0
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 7f92de2..0000000
--- a/setup.py
+++ /dev/null
@@ -1,97 +0,0 @@
-from collections import defaultdict
-from pathlib import Path
-
-from setuptools import find_packages, setup
-
-# PEP0440 compatible formatted version, see:
-# https://www.python.org/dev/peps/pep-0440/
-#
-# release markers:
-#   X.Y
-#   X.Y.Z   # For bugfix releases
-#
-# pre-release markers:
-#   X.YaN   # Alpha release
-#   X.YbN   # Beta release
-#   X.YrcN  # Release Candidate
-#   X.Y     # Final release
-
-
-def parse_requirements_file(path: Path, allowed_extras: set = None, include_all_extra: bool = True):
-    requirements = []
-    extras = defaultdict(list)
-    with path.open("r") as requirements_file:
-        import re
-
-        def fix_url_dependencies(req: str) -> str:
-            """Pip and setuptools disagree about how URL dependencies should be handled."""
-            m = re.match(r"^(git\+)?(https|ssh)://(git@)?github\.com/([\w-]+)/(?P<name>[\w-]+)\.git", req)
-            if m is None:
-                return req
-            else:
-                return f"{m.group('name')} @ {req}"
-
-        for line in requirements_file:
-            line = line.strip()
-            if line.startswith("#") or len(line) <= 0:
-                continue
-            req, *needed_by = line.split("# needed by:")
-            req = fix_url_dependencies(req.strip())
-            if needed_by:
-                for extra in needed_by[0].strip().split(","):
-                    extra = extra.strip()
-                    if allowed_extras is not None and extra not in allowed_extras:
-                        raise ValueError(f"invalid extra '{extra}' in {path}")
-                    extras[extra].append(req)
-                if include_all_extra and req not in extras["all"]:
-                    extras["all"].append(req)
-            else:
-                requirements.append(req)
-    return requirements, extras
-
-
-ABS_PATH = Path().absolute()
-# exec is used here so we don't import arrayfire whilst setting up
-VERSION = {}  # type: ignore
-with (ABS_PATH / "arrayfire" / "version.py").open("r") as version_file:
-    exec(version_file.read(), VERSION)
-
-# Load requirements.
-install_requirements, extras = parse_requirements_file(ABS_PATH / "requirements.txt")
-dev_requirements, dev_extras = parse_requirements_file(
-    ABS_PATH / "dev-requirements.txt", allowed_extras={"examples"}, include_all_extra=False
-)
-extras["dev"] = dev_requirements
-extras.update(dev_extras)
-
-setup(
-    name="arrayfire",
-    version=VERSION["VERSION"],
-    description="ArrayFire Python Wrapper",
-    license="BSD",
-    long_description=(ABS_PATH / "README.md").open("r").read(),
-    long_description_content_type="text/markdown",
-    author="ArrayFire",
-    author_email="technical@arrayfire.com",
-    url="http://arrayfire.com",
-    classifiers=[
-        "Intended Audience :: Science/Research",
-        "Development Status :: 5 - Production/Stable",
-        "License :: OSI Approved :: BSD License",
-        "Programming Language :: Python",
-        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.10",
-        "Topic :: Scientific/Engineering",
-        "Topic :: Scientific/Engineering :: Artificial Intelligence",
-        "Topic :: Scientific/Engineering :: Information Analysis",
-        "Topic :: Scientific/Engineering :: Mathematics",
-        "Topic :: Software Development :: Libraries",
-    ],
-    keywords="arrayfire parallel computing gpu cpu opencl oneapi",
-    packages=find_packages(),
-    install_requires=install_requirements,
-    extras_require=extras,
-    include_package_data=True,
-    python_requires=">=3.10.0",
-    zip_safe=False,
-)
diff --git a/tests/_helpers.py b/tests/_helpers.py
index c508455..d7bc741 100644
--- a/tests/_helpers.py
+++ b/tests/_helpers.py
@@ -1,7 +1,7 @@
 import arrayfire as af
 
 
-def round_to(list_: list[int | float | complex | bool], symbols: int = 3) -> list[int | float]:
+def round_to(list_: list[float], symbols: int = 3) -> list[int | float]:
     # HACK replace for e.g. abs(x1-x2) < 1e-6 ~ https://davidamos.dev/the-right-way-to-compare-floats-in-python/
     return [round(x, symbols) for x in list_]
 
diff --git a/tests/test_library/test_mathematical_functions.py b/tests/test_library/test_mathematical_functions.py
index de98883..37a74f5 100644
--- a/tests/test_library/test_mathematical_functions.py
+++ b/tests/test_library/test_mathematical_functions.py
@@ -28,7 +28,7 @@ def test_mul(self) -> None:
     def test_div(self) -> None:
         res = af.div(self.array1, self.array2)
         res_quotient = self.array1 / self.array2
-        assert round_to(res.to_list()) == round_to(res_quotient.to_list()) == [0.25, 0.4, 0.5]
+        assert round_to(res.to_list()) == round_to(res_quotient.to_list()) == [0.25, 0.4, 0.5]  # type: ignore
 
     def test_mod(self) -> None:
         res = af.mod(self.array1, self.array2)