Skip to content

Data access #264

@Fisico2k

Description

@Fisico2k

I'm trying to test accessing_data_example.ipynb.
Consider that in my environment every request needs to pass a proxy.
Now, the line dsets = cat_subset.to_dataset_dict() generates the following output.
Any help?

ConnectionRefusedErrorTraceback (most recent call last)
/opt/conda/lib/python3.8/site-packages/aiohttp/connector.py in _wrap_create_connection(self, req, timeout, client_error, *args, **kwargs)
    985             async with ceil_timeout(timeout.sock_connect):
--> 986                 return await self._loop.create_connection(*args, **kwargs)  # type: ignore[return-value]  # noqa
    987         except cert_errors as exc:

/opt/conda/lib/python3.8/asyncio/base_events.py in create_connection(self, protocol_factory, host, port, ssl, family, proto, flags, sock, local_addr, server_hostname, ssl_handshake_timeout, happy_eyeballs_delay, interleave)
   1024                 if len(exceptions) == 1:
-> 1025                     raise exceptions[0]
   1026                 else:

/opt/conda/lib/python3.8/asyncio/base_events.py in create_connection(self, protocol_factory, host, port, ssl, family, proto, flags, sock, local_addr, server_hostname, ssl_handshake_timeout, happy_eyeballs_delay, interleave)
   1009                     try:
-> 1010                         sock = await self._connect_sock(
   1011                             exceptions, addrinfo, laddr_infos)

/opt/conda/lib/python3.8/asyncio/base_events.py in _connect_sock(self, exceptions, addr_info, local_addr_infos)
    923                     raise my_exceptions.pop()
--> 924             await self.sock_connect(sock, address)
    925             return sock

/opt/conda/lib/python3.8/asyncio/selector_events.py in sock_connect(self, sock, address)
    495         self._sock_connect(fut, sock, address)
--> 496         return await fut
    497 

/opt/conda/lib/python3.8/asyncio/selector_events.py in _sock_connect_cb(self, fut, sock, address)
    527                 # Jump to any except clause below.
--> 528                 raise OSError(err, f'Connect call failed {address}')
    529         except (BlockingIOError, InterruptedError):

ConnectionRefusedError: [Errno 111] Connect call failed ('20.150.83.132', 443)

The above exception was the direct cause of the following exception:

ClientConnectorErrorTraceback (most recent call last)
/opt/conda/lib/python3.8/site-packages/zarr/storage.py in __getitem__(self, key)
   1368         try:
-> 1369             return self.map[key]
   1370         except self.exceptions as e:

/opt/conda/lib/python3.8/site-packages/fsspec/mapping.py in __getitem__(self, key, default)
    136         try:
--> 137             result = self.fs.cat(k)
    138         except self.missing_exceptions:

/opt/conda/lib/python3.8/site-packages/fsspec/asyn.py in wrapper(*args, **kwargs)
    110         self = obj or args[0]
--> 111         return sync(self.loop, func, *args, **kwargs)
    112 

/opt/conda/lib/python3.8/site-packages/fsspec/asyn.py in sync(loop, func, timeout, *args, **kwargs)
     95     elif isinstance(return_result, BaseException):
---> 96         raise return_result
     97     else:

/opt/conda/lib/python3.8/site-packages/fsspec/asyn.py in _runner(event, coro, result, timeout)
     52     try:
---> 53         result[0] = await coro
     54     except Exception as ex:

/opt/conda/lib/python3.8/site-packages/fsspec/asyn.py in _cat(self, path, recursive, on_error, batch_size, **kwargs)
    422             if ex:
--> 423                 raise ex
    424         if (

/opt/conda/lib/python3.8/asyncio/tasks.py in wait_for(fut, timeout, loop)
    454     if timeout is None:
--> 455         return await fut
    456 

/opt/conda/lib/python3.8/site-packages/fsspec/implementations/http.py in _cat_file(self, url, start, end, **kwargs)
    219         session = await self.set_session()
--> 220         async with session.get(url, **kw) as r:
    221             out = await r.read()

/opt/conda/lib/python3.8/site-packages/aiohttp/client.py in __aenter__(self)
   1137     async def __aenter__(self) -> _RetType:
-> 1138         self._resp = await self._coro
   1139         return self._resp

/opt/conda/lib/python3.8/site-packages/aiohttp/client.py in _request(self, method, str_or_url, params, data, json, cookies, headers, skip_auto_headers, auth, allow_redirects, max_redirects, compress, chunked, expect100, raise_for_status, read_until_eof, proxy, proxy_auth, timeout, verify_ssl, fingerprint, ssl_context, ssl, proxy_headers, trace_request_ctx, read_bufsize)
    534                             assert self._connector is not None
--> 535                             conn = await self._connector.connect(
    536                                 req, traces=traces, timeout=real_timeout

/opt/conda/lib/python3.8/site-packages/aiohttp/connector.py in connect(self, req, traces, timeout)
    541             try:
--> 542                 proto = await self._create_connection(req, traces, timeout)
    543                 if self._closed:

/opt/conda/lib/python3.8/site-packages/aiohttp/connector.py in _create_connection(self, req, traces, timeout)
    906         else:
--> 907             _, proto = await self._create_direct_connection(req, traces, timeout)
    908 

/opt/conda/lib/python3.8/site-packages/aiohttp/connector.py in _create_direct_connection(self, req, traces, timeout, client_error)
   1205             assert last_exc is not None
-> 1206             raise last_exc
   1207 

/opt/conda/lib/python3.8/site-packages/aiohttp/connector.py in _create_direct_connection(self, req, traces, timeout, client_error)
   1174             try:
-> 1175                 transp, proto = await self._wrap_create_connection(
   1176                     self._factory,

/opt/conda/lib/python3.8/site-packages/aiohttp/connector.py in _wrap_create_connection(self, req, timeout, client_error, *args, **kwargs)
    991         except OSError as exc:
--> 992             raise client_error(req.connection_key, exc) from exc
    993 

ClientConnectorError: Cannot connect to host cpdataeuwest.blob.core.windows.net:443 ssl:default [Connect call failed ('20.150.83.132', 443)]

The above exception was the direct cause of the following exception:

KeyErrorTraceback (most recent call last)
/opt/conda/lib/python3.8/site-packages/xarray/backends/zarr.py in open_group(cls, store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, append_dim, write_region, safe_chunks, stacklevel)
    347             try:
--> 348                 zarr_group = zarr.open_consolidated(store, **open_kwargs)
    349             except KeyError:

/opt/conda/lib/python3.8/site-packages/zarr/convenience.py in open_consolidated(store, metadata_key, mode, **kwargs)
   1303     # setup metadata store
-> 1304     meta_store = ConsolidatedStoreClass(store, metadata_key=metadata_key)
   1305 

/opt/conda/lib/python3.8/site-packages/zarr/storage.py in __init__(self, store, metadata_key)
   2852         # retrieve consolidated metadata
-> 2853         meta = json_loads(self.store[metadata_key])
   2854 

/opt/conda/lib/python3.8/site-packages/zarr/storage.py in __getitem__(self, key)
   1370         except self.exceptions as e:
-> 1371             raise KeyError(key) from e
   1372 

KeyError: '.zmetadata'

During handling of the above exception, another exception occurred:

GroupNotFoundErrorTraceback (most recent call last)
/opt/conda/lib/python3.8/site-packages/intake_esm/merge_util.py in _open_asset(path, data_format, zarr_kwargs, cdf_kwargs, preprocess, varname, requested_variables)
    269         try:
--> 270             ds = xr.open_zarr(path, **zarr_kwargs)
    271         except Exception as exc:

/opt/conda/lib/python3.8/site-packages/xarray/backends/zarr.py in open_zarr(store, group, synchronizer, chunks, decode_cf, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, consolidated, overwrite_encoded_chunks, chunk_store, storage_options, decode_timedelta, use_cftime, **kwargs)
    751 
--> 752     ds = open_dataset(
    753         filename_or_obj=store,

/opt/conda/lib/python3.8/site-packages/xarray/backends/api.py in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, backend_kwargs, *args, **kwargs)
    494     overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 495     backend_ds = backend.open_dataset(
    496         filename_or_obj,

/opt/conda/lib/python3.8/site-packages/xarray/backends/zarr.py in open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, stacklevel)
    799         filename_or_obj = _normalize_path(filename_or_obj)
--> 800         store = ZarrStore.open_group(
    801             filename_or_obj,

/opt/conda/lib/python3.8/site-packages/xarray/backends/zarr.py in open_group(cls, store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, append_dim, write_region, safe_chunks, stacklevel)
    364                 )
--> 365                 zarr_group = zarr.open_group(store, **open_kwargs)
    366         elif consolidated:

/opt/conda/lib/python3.8/site-packages/zarr/hierarchy.py in open_group(store, mode, cache_attrs, synchronizer, path, chunk_store, storage_options, zarr_version)
   1346                 raise ContainsArrayError(path)
-> 1347             raise GroupNotFoundError(path)
   1348 

GroupNotFoundError: group not found at path ''

The above exception was the direct cause of the following exception:

OSErrorTraceback (most recent call last)
<ipython-input-16-020d1279ef81> in <module>
----> 1 dsets = cat_subset.to_dataset_dict()
      2 dsets

/opt/conda/lib/python3.8/site-packages/intake_esm/core.py in to_dataset_dict(self, zarr_kwargs, cdf_kwargs, preprocess, storage_options, progressbar, aggregate)
    920             ]
    921             for i, task in enumerate(concurrent.futures.as_completed(future_tasks)):
--> 922                 key, ds = task.result()
    923                 self._datasets[key] = ds
    924                 if self.progressbar:

/opt/conda/lib/python3.8/concurrent/futures/_base.py in result(self, timeout)
    430                 raise CancelledError()
    431             elif self._state == FINISHED:
--> 432                 return self.__get_result()
    433 
    434             self._condition.wait(timeout)

/opt/conda/lib/python3.8/concurrent/futures/_base.py in __get_result(self)
    386     def __get_result(self):
    387         if self._exception:
--> 388             raise self._exception
    389         else:
    390             return self._result

/opt/conda/lib/python3.8/concurrent/futures/thread.py in run(self)
     55 
     56         try:
---> 57             result = self.fn(*self.args, **self.kwargs)
     58         except BaseException as exc:
     59             self.future.set_exception(exc)

/opt/conda/lib/python3.8/site-packages/intake_esm/core.py in _load_source(key, source)
    906 
    907         def _load_source(key, source):
--> 908             return key, source.to_dask()
    909 
    910         sources = {key: source(**source_kwargs) for key, source in self.items()}

/opt/conda/lib/python3.8/site-packages/intake_esm/source.py in to_dask(self)
    243     def to_dask(self):
    244         """Return xarray object (which will have chunks)"""
--> 245         self._load_metadata()
    246         return self._ds
    247 

/opt/conda/lib/python3.8/site-packages/intake/source/base.py in _load_metadata(self)
    234         """load metadata only if needed"""
    235         if self._schema is None:
--> 236             self._schema = self._get_schema()
    237             self.dtype = self._schema.dtype
    238             self.shape = self._schema.shape

/opt/conda/lib/python3.8/site-packages/intake_esm/source.py in _get_schema(self)
    172 
    173         if self._ds is None:
--> 174             self._open_dataset()
    175 
    176             metadata = {

/opt/conda/lib/python3.8/site-packages/intake_esm/source.py in _open_dataset(self)
    224             for _, row in self.df.iterrows()
    225         ]
--> 226         datasets = dask.compute(*datasets)
    227         mapper_dict = dict(datasets)
    228         nd = create_nested_dict(self.df, self.path_column, self.aggregation_columns)

/opt/conda/lib/python3.8/site-packages/dask/base.py in compute(*args, **kwargs)
    565         postcomputes.append(x.__dask_postcompute__())
    566 
--> 567     results = schedule(dsk, keys, **kwargs)
    568     return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
    569 

/opt/conda/lib/python3.8/site-packages/dask/threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
     77             pool = MultiprocessingPoolExecutor(pool)
     78 
---> 79     results = get_async(
     80         pool.submit,
     81         pool._max_workers,

/opt/conda/lib/python3.8/site-packages/dask/local.py in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs)
    512                             _execute_task(task, data)  # Re-execute locally
    513                         else:
--> 514                             raise_exception(exc, tb)
    515                     res, worker_id = loads(res_info)
    516                     state["cache"][key] = res

/opt/conda/lib/python3.8/site-packages/dask/local.py in reraise(exc, tb)
    323     if exc.__traceback__ is not tb:
    324         raise exc.with_traceback(tb)
--> 325     raise exc
    326 
    327 

/opt/conda/lib/python3.8/site-packages/dask/local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
    221     try:
    222         task, data = loads(task_info)
--> 223         result = _execute_task(task, data)
    224         id = get_id()
    225         result = dumps((result, id))

/opt/conda/lib/python3.8/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
    119         # temporaries by their reference count and can execute certain
    120         # operations in-place.
--> 121         return func(*(_execute_task(a, cache) for a in args))
    122     elif not ishashable(arg):
    123         return arg

/opt/conda/lib/python3.8/site-packages/intake_esm/source.py in read_dataset(path, data_format, storage_options, cdf_kwargs, zarr_kwargs, preprocess, varname)
    201             # replace path column with mapper (dependent on filesystem type)
    202             mapper = _path_to_mapper(path, storage_options, data_format)
--> 203             ds = _open_asset(
    204                 mapper,
    205                 data_format=data_format,

/opt/conda/lib/python3.8/site-packages/intake_esm/merge_util.py in _open_asset(path, data_format, zarr_kwargs, cdf_kwargs, preprocess, varname, requested_variables)
    286             """
    287 
--> 288             raise IOError(message) from exc
    289 
    290     else:

OSError: 
            Failed to open zarr store.

            *** Arguments passed to xarray.open_zarr() ***:

            - store: https://cpdataeuwest.blob.core.windows.net/cp-cmip/version1/data/GARD-SV/CMIP.MIROC.MIROC6.historical.r1i1p1f1.day.GARD-SV.tasmax.zarr
            - kwargs: {}

            *** fsspec options used ***:

            - root: https://cpdataeuwest.blob.core.windows.net/cp-cmip/version1/data/GARD-SV/CMIP.MIROC.MIROC6.historical.r1i1p1f1.day.GARD-SV.tasmax.zarr
            - protocol: https

            ********************************************

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions