Skip to content

Commit 40ab2d9

Browse files
willschlitzerweiji14seismanMax Jones
authored
Wrap binstats (#1652)
*Wrap gmtbinstats function *Add test_binstats.py *Add binstats to API index *Add remote files "@capitals.gmt" to cached files Co-authored-by: Wei Ji <[email protected]> Co-authored-by: Dongdong Tian <[email protected]> Co-authored-by: Max Jones <[email protected]>
1 parent f605b68 commit 40ab2d9

File tree

6 files changed

+175
-0
lines changed

6 files changed

+175
-0
lines changed

doc/api/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ Operations on tabular data
114114
.. autosummary::
115115
:toctree: generated
116116

117+
binstats
117118
blockmean
118119
blockmedian
119120
blockmode

pygmt/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from pygmt.session_management import begin as _begin
3030
from pygmt.session_management import end as _end
3131
from pygmt.src import (
32+
binstats,
3233
blockmean,
3334
blockmedian,
3435
blockmode,

pygmt/helpers/testing.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ def download_test_data():
171171
"@earth_age_01d_g",
172172
"@S90W180.earth_age_05m_g.nc", # Specific grid for 05m test
173173
# Other cache files
174+
"@capitals.gmt",
174175
"@earth_relief_20m_holes.grd",
175176
"@EGM96_to_36.txt",
176177
"@MaunaLoa_CO2.txt",

pygmt/src/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# pylint: disable=import-outside-toplevel
55

66
from pygmt.src.basemap import basemap
7+
from pygmt.src.binstats import binstats
78
from pygmt.src.blockm import blockmean, blockmedian, blockmode
89
from pygmt.src.coast import coast
910
from pygmt.src.colorbar import colorbar

pygmt/src/binstats.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
"""
2+
binstats - Bin spatial data and determine statistics per bin
3+
"""
4+
from pygmt.clib import Session
5+
from pygmt.helpers import (
6+
GMTTempFile,
7+
build_arg_string,
8+
fmt_docstring,
9+
kwargs_to_strings,
10+
use_alias,
11+
)
12+
from pygmt.io import load_dataarray
13+
14+
15+
@fmt_docstring
16+
@use_alias(
17+
C="statistic",
18+
E="empty",
19+
G="outgrid",
20+
I="spacing",
21+
N="normalize",
22+
R="region",
23+
S="search_radius",
24+
V="verbose",
25+
W="weight",
26+
a="aspatial",
27+
b="binary",
28+
h="header",
29+
i="incols",
30+
r="registration",
31+
)
32+
@kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma")
33+
def binstats(data, **kwargs):
34+
r"""
35+
Bin spatial data and determine statistics per bin.
36+
37+
Reads arbitrarily located (x,y[,z][,w]) points
38+
(2-4 columns) from ``data`` and for each
39+
node in the specified grid layout determines which points are
40+
within the given radius. These point are then used in the
41+
calculation of the specified statistic. The results may be
42+
presented as is or may be normalized by the circle area to
43+
perhaps give density estimates.
44+
45+
Full option list at :gmt-docs:`gmtbinstats.html`
46+
47+
{aliases}
48+
49+
Parameters
50+
----------
51+
data : str or {table-like}
52+
A file name of an ASCII data table or a 2D
53+
{table-classes}.
54+
outgrid : str or None
55+
The name of the output netCDF file with extension .nc to store the grid
56+
in.
57+
statistic : str
58+
**a**\|\ **d**\|\ **g**\|\ **i**\|\ **l**\|\ **L**\|\ **m**\|\ **n**\
59+
\|\ **o**\|\ **p**\|\ **q**\ [*quant*]\|\ **r**\|\ **s**\|\ **u**\
60+
\|\ **U**\|\ **z**.
61+
Choose the statistic that will be computed per node based on the
62+
points that are within *radius* distance of the node. Select one of:
63+
64+
- **a** for mean (average)
65+
- **d** for median absolute deviation (MAD)
66+
- **g** for full (max-min) range
67+
- **i** for 25-75% interquartile range
68+
- **l** for minimum (low)
69+
- **L** for minimum of positive values only
70+
- **m** for median
71+
- **n** the number of values
72+
- **o** for LMS scale
73+
- **p** for mode (maximum likelihood)
74+
- **q** for selected quantile (append desired quantile in
75+
0-100% range [50])
76+
- **r** for the r.m.s.
77+
- **s** for standard deviation
78+
- **u** for maximum (upper)
79+
- **U** for maximum of negative values only
80+
- **z** for the sum
81+
empty : float or int
82+
Set the value assigned to empty nodes [Default is NaN].
83+
normalize : bool
84+
Normalize the resulting grid values by the area represented by the
85+
search *radius* [no normalization].
86+
search_radius : float or str
87+
Sets the *search_radius* that determines which data points are
88+
considered close to a node. Append the distance unit.
89+
Not compatible with ``tiling``.
90+
weight : str
91+
Input data have an extra column containing observation point weight.
92+
If weights are given then weighted statistical quantities will be
93+
computed while the count will be the sum of the weights instead of
94+
number of points. If the weights are actually uncertainties
95+
(one sigma) then append **+s** and weight = 1/sigma.
96+
{I}
97+
{R}
98+
{V}
99+
{a}
100+
{b}
101+
{h}
102+
{i}
103+
{r}
104+
105+
Returns
106+
-------
107+
ret: xarray.DataArray or None
108+
Return type depends on whether the ``outgrid`` parameter is set:
109+
110+
- :class:`xarray.DataArray` if ``outgrid`` is not set
111+
- None if ``outgrid`` is set (grid output will be stored in file set by
112+
``outgrid``)
113+
"""
114+
with GMTTempFile(suffix=".nc") as tmpfile:
115+
with Session() as lib:
116+
file_context = lib.virtualfile_from_data(check_kind="vector", data=data)
117+
with file_context as infile:
118+
if (outgrid := kwargs.get("G")) is None:
119+
kwargs["G"] = outgrid = tmpfile.name # output to tmpfile
120+
lib.call_module(
121+
module="binstats", args=build_arg_string(kwargs, infile=infile)
122+
)
123+
124+
return load_dataarray(outgrid) if outgrid == tmpfile.name else None

pygmt/tests/test_binstats.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""
2+
Tests for binstats.
3+
"""
4+
import os
5+
6+
import numpy.testing as npt
7+
from pygmt import binstats
8+
from pygmt.helpers import GMTTempFile
9+
10+
11+
def test_binstats_outgrid():
12+
"""
13+
Test binstats with a set outgrid.
14+
"""
15+
with GMTTempFile(suffix=".nc") as tmpfile:
16+
result = binstats(
17+
data="@capitals.gmt",
18+
outgrid=tmpfile.name,
19+
spacing=5,
20+
statistic="z",
21+
search_radius="1000k",
22+
aspatial="2=population",
23+
region="g",
24+
)
25+
assert result is None # return value is None
26+
assert os.path.exists(path=tmpfile.name) # check that outgrid exists
27+
28+
29+
def test_binstats_no_outgrid():
30+
"""
31+
Test binstats with no set outgrid.
32+
"""
33+
temp_grid = binstats(
34+
data="@capitals.gmt",
35+
spacing=5,
36+
statistic="z",
37+
search_radius="1000k",
38+
aspatial="2=population",
39+
region="g",
40+
)
41+
assert temp_grid.dims == ("y", "x")
42+
assert temp_grid.gmt.gtype == 0 # Cartesian grid
43+
assert temp_grid.gmt.registration == 0 # Gridline registration
44+
npt.assert_allclose(temp_grid.max(), 35971536)
45+
npt.assert_allclose(temp_grid.min(), 53)
46+
npt.assert_allclose(temp_grid.median(), 1232714.5)
47+
npt.assert_allclose(temp_grid.mean(), 4227489)

0 commit comments

Comments
 (0)