pygmt.blockm*: Add 'output_type' parameter for output in pandas/numpy/file formats (#3103)

seisman · web-flow · commit 855ebddc1deb · 2024-03-15T21:24:19.000+08:00
diff --git a/pygmt/src/blockm.py b/pygmt/src/blockm.py
@@ -1,22 +1,26 @@
 """
-blockm - Block average (x, y, z) data tables by mean, median, or mode
-estimation.
+blockm - Block average (x, y, z) data tables by mean, median, or mode estimation.
 """
 
+from typing import Literal
+
+import numpy as np
 import pandas as pd
 from pygmt.clib import Session
 from pygmt.helpers import (
-    GMTTempFile,
     build_arg_string,
     fmt_docstring,
     kwargs_to_strings,
     use_alias,
+    validate_output_table_type,
 )
 
 __doctest_skip__ = ["blockmean", "blockmedian", "blockmode"]
 
 
-def _blockm(block_method, data, x, y, z, outfile, **kwargs):
+def _blockm(
+    block_method, data, x, y, z, output_type, outfile, **kwargs
+) -> pd.DataFrame | np.ndarray | None:
     r"""
     Block average (x, y, z) data tables by mean, median, or mode estimation.
 
@@ -34,38 +38,34 @@ def _blockm(block_method, data, x, y, z, outfile, **kwargs):
 
     Returns
     -------
-    output : pandas.DataFrame or None
-        Return type depends on whether the ``outfile`` parameter is set:
+    ret
+        Return type depends on ``outfile`` and ``output_type``:
 
-        - :class:`pandas.DataFrame` table with (x, y, z) columns if ``outfile``
-          is not set
-        - None if ``outfile`` is set (filtered output will be stored in file
-          set by ``outfile``)
+        - ``None`` if ``outfile`` is set (output will be stored in file set by
+          ``outfile``)
+        - :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is not set
+          (depends on ``output_type``)
     """
-    with GMTTempFile(suffix=".csv") as tmpfile:
-        with Session() as lib:
-            with lib.virtualfile_in(
+    output_type = validate_output_table_type(output_type, outfile=outfile)
+
+    column_names = None
+    if output_type == "pandas" and isinstance(data, pd.DataFrame):
+        column_names = data.columns.to_list()
+
+    with Session() as lib:
+        with (
+            lib.virtualfile_in(
                 check_kind="vector", data=data, x=x, y=y, z=z, required_z=True
-            ) as vintbl:
-                # Run blockm* on data table
-                if outfile is None:
-                    outfile = tmpfile.name
-                lib.call_module(
-                    module=block_method,
-                    args=build_arg_string(kwargs, infile=vintbl, outfile=outfile),
-                )
-
-        # Read temporary csv output to a pandas table
-        if outfile == tmpfile.name:  # if user did not set outfile, return pd.DataFrame
-            try:
-                column_names = data.columns.to_list()
-                result = pd.read_csv(tmpfile.name, sep="\t", names=column_names)
-            except AttributeError:  # 'str' object has no attribute 'columns'
-                result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">")
-        elif outfile != tmpfile.name:  # return None if outfile set, output in outfile
-            result = None
-
-    return result
+            ) as vintbl,
+            lib.virtualfile_out(kind="dataset", fname=outfile) as vouttbl,
+        ):
+            lib.call_module(
+                module=block_method,
+                args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl),
+            )
+            return lib.virtualfile_to_dataset(
+                output_type=output_type, vfname=vouttbl, column_names=column_names
+            )
 
 
 @fmt_docstring
@@ -86,7 +86,15 @@ def _blockm(block_method, data, x, y, z, outfile, **kwargs):
     w="wrap",
 )
 @kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma", o="sequence_comma")
-def blockmean(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
+def blockmean(
+    data=None,
+    x=None,
+    y=None,
+    z=None,
+    output_type: Literal["pandas", "numpy", "file"] = "pandas",
+    outfile: str | None = None,
+    **kwargs,
+) -> pd.DataFrame | np.ndarray | None:
     r"""
     Block average (x, y, z) data tables by mean estimation.
 
@@ -111,9 +119,9 @@ def blockmean(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
         {table-classes}.
     x/y/z : 1-D arrays
         Arrays of x and y coordinates and values z of the data points.
-
+    {output_type}
+    {outfile}
     {spacing}
-
     summary : str
         [**m**\|\ **n**\|\ **s**\|\ **w**].
         Type of summary values calculated by blockmean.
@@ -122,12 +130,7 @@ def blockmean(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
         - **n** - report the number of input points inside each block
         - **s** - report the sum of all z-values inside a block
         - **w** - report the sum of weights
-
     {region}
-
-    outfile : str
-        The file name for the output ASCII file.
-
     {verbose}
     {aspatial}
     {binary}
@@ -142,13 +145,13 @@ def blockmean(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
 
     Returns
     -------
-    output : pandas.DataFrame or None
-        Return type depends on whether the ``outfile`` parameter is set:
+    ret
+        Return type depends on ``outfile`` and ``output_type``:
 
-        - :class:`pandas.DataFrame` table with (x, y, z) columns if ``outfile``
-          is not set.
-        - None if ``outfile`` is set (filtered output will be stored in file
-          set by ``outfile``).
+        - ``None`` if ``outfile`` is set (output will be stored in file set by
+          ``outfile``)
+        - :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is not set
+          (depends on ``output_type``)
 
     Example
     -------
@@ -159,7 +162,14 @@ def blockmean(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
     >>> data_bmean = pygmt.blockmean(data=data, region=[245, 255, 20, 30], spacing="5m")
     """
     return _blockm(
-        block_method="blockmean", data=data, x=x, y=y, z=z, outfile=outfile, **kwargs
+        block_method="blockmean",
+        data=data,
+        x=x,
+        y=y,
+        z=z,
+        output_type=output_type,
+        outfile=outfile,
+        **kwargs,
     )
 
 
@@ -180,7 +190,15 @@ def blockmean(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
     w="wrap",
 )
 @kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma", o="sequence_comma")
-def blockmedian(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
+def blockmedian(
+    data=None,
+    x=None,
+    y=None,
+    z=None,
+    output_type: Literal["pandas", "numpy", "file"] = "pandas",
+    outfile: str | None = None,
+    **kwargs,
+) -> pd.DataFrame | np.ndarray | None:
     r"""
     Block average (x, y, z) data tables by median estimation.
 
@@ -205,14 +223,10 @@ def blockmedian(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
         {table-classes}.
     x/y/z : 1-D arrays
         Arrays of x and y coordinates and values z of the data points.
-
+    {output_type}
+    {outfile}
     {spacing}
-
     {region}
-
-    outfile : str
-        The file name for the output ASCII file.
-
     {verbose}
     {aspatial}
     {binary}
@@ -227,13 +241,13 @@ def blockmedian(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
 
     Returns
     -------
-    output : pandas.DataFrame or None
-        Return type depends on whether the ``outfile`` parameter is set:
+    ret
+        Return type depends on ``outfile`` and ``output_type``:
 
-        - :class:`pandas.DataFrame` table with (x, y, z) columns if ``outfile``
-          is not set.
-        - None if ``outfile`` is set (filtered output will be stored in file
-          set by ``outfile``).
+        - ``None`` if ``outfile`` is set (output will be stored in file set by
+          ``outfile``)
+        - :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is not set
+          (depends on ``output_type``)
 
     Example
     -------
@@ -246,7 +260,14 @@ def blockmedian(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
     ... )
     """
     return _blockm(
-        block_method="blockmedian", data=data, x=x, y=y, z=z, outfile=outfile, **kwargs
+        block_method="blockmedian",
+        data=data,
+        x=x,
+        y=y,
+        z=z,
+        output_type=output_type,
+        outfile=outfile,
+        **kwargs,
     )
 
 
@@ -267,7 +288,15 @@ def blockmedian(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
     w="wrap",
 )
 @kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma", o="sequence_comma")
-def blockmode(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
+def blockmode(
+    data=None,
+    x=None,
+    y=None,
+    z=None,
+    output_type: Literal["pandas", "numpy", "file"] = "pandas",
+    outfile: str | None = None,
+    **kwargs,
+) -> pd.DataFrame | np.ndarray | None:
     r"""
     Block average (x, y, z) data tables by mode estimation.
 
@@ -292,14 +321,10 @@ def blockmode(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
         {table-classes}.
     x/y/z : 1-D arrays
         Arrays of x and y coordinates and values z of the data points.
-
+    {output_type}
+    {outfile}
     {spacing}
-
     {region}
-
-    outfile : str
-        The file name for the output ASCII file.
-
     {verbose}
     {aspatial}
     {binary}
@@ -314,13 +339,13 @@ def blockmode(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
 
     Returns
     -------
-    output : pandas.DataFrame or None
-        Return type depends on whether the ``outfile`` parameter is set:
+    ret
+        Return type depends on ``outfile`` and ``output_type``:
 
-        - :class:`pandas.DataFrame` table with (x, y, z) columns if ``outfile``
-          is not set.
-        - None if ``outfile`` is set (filtered output will be stored in file
-          set by ``outfile``).
+        - ``None`` if ``outfile`` is set (output will be stored in file set by
+          ``outfile``)
+        - :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is not set
+          (depends on ``output_type``)
 
     Example
     -------
@@ -331,5 +356,12 @@ def blockmode(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
     >>> data_bmode = pygmt.blockmode(data=data, region=[245, 255, 20, 30], spacing="5m")
     """
     return _blockm(
-        block_method="blockmode", data=data, x=x, y=y, z=z, outfile=outfile, **kwargs
+        block_method="blockmode",
+        data=data,
+        x=x,
+        y=y,
+        z=z,
+        output_type=output_type,
+        outfile=outfile,
+        **kwargs,
     )