Skip to content

Commit baa23e3

Browse files
committed
update PydapDataStore to use backend logic in dap4 to batch variables all together in single dap url
1 parent 5fdeab9 commit baa23e3

File tree

1 file changed

+62
-5
lines changed

1 file changed

+62
-5
lines changed

xarray/backends/pydap_.py

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import warnings
34
from collections.abc import Iterable
45
from typing import TYPE_CHECKING, Any
56

@@ -99,7 +100,7 @@ class PydapDataStore(AbstractDataStore):
99100
be useful if the netCDF4 library is not available.
100101
"""
101102

102-
def __init__(self, dataset, group=None):
103+
def __init__(self, dataset, group=None, session=None, batch=False, protocol=None):
103104
"""
104105
Parameters
105106
----------
@@ -109,6 +110,11 @@ def __init__(self, dataset, group=None):
109110
"""
110111
self.dataset = dataset
111112
self.group = group
113+
self.session = session
114+
self._batch = batch
115+
self._batch_done = False
116+
self._array_cache = {} # holds 1D dimension data
117+
self._protocol = protocol
112118

113119
@classmethod
114120
def open(
@@ -121,6 +127,7 @@ def open(
121127
timeout=None,
122128
verify=None,
123129
user_charset=None,
130+
batch=False,
124131
):
125132
from pydap.client import open_url
126133
from pydap.net import DEFAULT_TIMEOUT
@@ -135,6 +142,7 @@ def open(
135142
DeprecationWarning,
136143
)
137144
output_grid = False # new default behavior
145+
138146
kwargs = {
139147
"url": url,
140148
"application": application,
@@ -152,12 +160,26 @@ def open(
152160
dataset = url.ds
153161
args = {"dataset": dataset}
154162
if group:
155-
# only then, change the default
156163
args["group"] = group
164+
if url.startswith(("https", "dap2")):
165+
args["protocol"] = "dap2"
166+
else:
167+
args["protocol"] = "dap4"
168+
if batch:
169+
if args["protocol"] == "dap2":
170+
warnings.warn(
171+
f"`batch={batch}` is currently only compatible with the `DAP4` "
172+
"protocol. Make sue the OPeNDAP server implements the `DAP4` "
173+
"protocol and then replace the scheme of the url with `dap4` "
174+
"to make use of it. Setting `batch=False`.",
175+
stacklevel=2,
176+
)
177+
else:
178+
# only update if dap4
179+
args["batch"] = batch
157180
return cls(**args)
158181

159182
def open_store_variable(self, var):
160-
data = indexing.LazilyIndexedArray(PydapArrayWrapper(var))
161183
try:
162184
dimensions = [
163185
dim.split("/")[-1] if dim.startswith("/") else dim for dim in var.dims
@@ -166,6 +188,25 @@ def open_store_variable(self, var):
166188
# GridType does not have a dims attribute - instead get `dimensions`
167189
# see https://github.com/pydap/pydap/issues/485
168190
dimensions = var.dimensions
191+
if (
192+
self._protocol == "dap4"
193+
and var.name in dimensions
194+
and hasattr(var, "dataset") # only True for pydap>3.5.5
195+
):
196+
if not var.dataset._batch_mode:
197+
# for dap4, always batch all dimensions at once
198+
var.dataset.enable_batch_mode()
199+
data_array = self._get_data_array(var)
200+
data = indexing.LazilyIndexedArray(data_array)
201+
if not self._batch and var.dataset._batch_mode:
202+
# if `batch=False``, restore it for all other variables
203+
var.dataset.disable_batch_mode()
204+
else:
205+
# all non-dimension variables
206+
data = indexing.LazilyIndexedArray(
207+
PydapArrayWrapper(var, self._batch, self._array_cache)
208+
)
209+
169210
return Variable(dimensions, data, var.attributes)
170211

171212
def get_variables(self):
@@ -183,6 +224,7 @@ def get_variables(self):
183224
# check the key is not a BaseType or GridType
184225
if not isinstance(self.ds[var], GroupType)
185226
]
227+
186228
return FrozenDict((k, self.open_store_variable(self.ds[k])) for k in _vars)
187229

188230
def get_attrs(self):
@@ -194,9 +236,11 @@ def get_attrs(self):
194236
"libdap",
195237
"invocation",
196238
"dimensions",
239+
"path",
240+
"Maps",
197241
)
198-
attrs = self.ds.attributes
199-
list(map(attrs.pop, opendap_attrs, [None] * 6))
242+
attrs = dict(self.ds.attributes)
243+
list(map(attrs.pop, opendap_attrs, [None] * 8))
200244
return Frozen(attrs)
201245

202246
def get_dimensions(self):
@@ -206,6 +250,19 @@ def get_dimensions(self):
206250
def ds(self):
207251
return get_group(self.dataset, self.group)
208252

253+
def _get_data_array(self, var):
254+
"""gets dimension data all at once, storing the numpy
255+
arrays within a cached dictionary
256+
"""
257+
from pydap.lib import get_batch_data
258+
259+
if not self._batch_done or var.id not in self._array_cache:
260+
# store all dim data into a dict for reuse
261+
self._array_cache = get_batch_data(var.parent, self._array_cache)
262+
self._batch_done = True
263+
264+
return self._array_cache[var.id]
265+
209266

210267
class PydapBackendEntrypoint(BackendEntrypoint):
211268
"""

0 commit comments

Comments
 (0)