@@ -544,35 +544,43 @@ def dataset_open(self, dataset, flatten=True, verbose=None):
544544
545545 g ["cdl_filename" ] = cdl_filename
546546
547- u = urisplit (dataset )
548- storage_options = self ._get_storage_options (dataset , u )
549-
550- if u .scheme == "s3" :
547+ filesystem = g .get ("filesystem" )
548+ if filesystem is not None :
551549 # --------------------------------------------------------
552- # A file in an S3 object store
550+ # A pre-authenticated filesystem was provided: open the
551+ # dataset as a file-like object and pass it to the backend.
553552 # --------------------------------------------------------
554- from dask .base import tokenize
555-
556- # Create an openable S3 file object
557- fs_key = tokenize (("s3" , storage_options ))
558- file_systems = g ["file_systems" ]
559- file_system = file_systems .get (fs_key )
560- if file_system is None :
561- # An S3 file system with these options does not exist,
562- # so create one.
563- from s3fs import S3FileSystem
564-
565- file_system = S3FileSystem (** storage_options )
566- file_systems [fs_key ] = file_system
567-
568- # Reset 'dataset' to an s3fs.File object that can be
569- # passed to the netCDF backend
570- dataset = file_system .open (u .path [1 :], "rb" )
571-
572- if is_log_level_detail (logger ):
573- logger .detail (
574- f" S3: s3fs.S3FileSystem options: { storage_options } \n "
575- ) # pragma: no cover
553+ dataset = filesystem .open (dataset , "rb" )
554+ else :
555+ u = urisplit (dataset )
556+ storage_options = self ._get_storage_options (dataset , u )
557+
558+ if u .scheme == "s3" :
559+ # --------------------------------------------------------
560+ # A file in an S3 object store
561+ # --------------------------------------------------------
562+ from dask .base import tokenize
563+
564+ # Create an openable S3 file object
565+ fs_key = tokenize (("s3" , storage_options ))
566+ file_systems = g ["file_systems" ]
567+ file_system = file_systems .get (fs_key )
568+ if file_system is None :
569+ # An S3 file system with these options does not exist,
570+ # so create one.
571+ from s3fs import S3FileSystem
572+
573+ file_system = S3FileSystem (** storage_options )
574+ file_systems [fs_key ] = file_system
575+
576+ # Reset 'dataset' to an s3fs.File object that can be
577+ # passed to the netCDF backend
578+ dataset = file_system .open (u .path [1 :], "rb" )
579+
580+ if is_log_level_detail (logger ):
581+ logger .detail (
582+ f" S3: s3fs.S3FileSystem options: { storage_options } \n "
583+ ) # pragma: no cover
576584
577585 # Map backend names to dataset-open functions
578586 dataset_open_function = {
@@ -1015,6 +1023,7 @@ def read(
10151023 warn_valid = False ,
10161024 domain = False ,
10171025 storage_options = None ,
1026+ filesystem = None ,
10181027 _file_systems = None ,
10191028 netcdf_backend = None ,
10201029 cache = True ,
@@ -1085,6 +1094,11 @@ def read(
10851094
10861095 .. versionadded:: (cfdm) 1.11.2.0
10871096
1097+ filesystem: optional
1098+ See `cfdm.read` for details.
1099+
1100+ .. versionadded:: (cfdm) NEXTVERSION
1101+
10881102 netcdf_backend: `None` or `str`, optional
10891103 See `cfdm.read` for details.
10901104
@@ -1229,22 +1243,33 @@ def read(
12291243 # Note that the `dataset_type` method is much faster than the
12301244 # `dataset_open` method at returning for unrecognised types.
12311245 # ------------------------------------------------------------
1232- d_type = self .dataset_type (dataset , dataset_type )
1233- if not d_type :
1234- # Can't interpret the dataset as a recognised type, so
1235- # either raise an exception or return an empty list.
1236- if dataset_type is None :
1237- raise DatasetTypeError (
1238- f"Can't interpret { dataset } as a dataset of one of the "
1239- f"valid types: { valid_dataset_types !r} "
1240- )
1246+ if filesystem is not None :
1247+ # When a pre-authenticated filesystem is provided we cannot
1248+ # inspect the file locally, so we trust the caller. Use
1249+ # the explicitly requested dataset_type if given, otherwise
1250+ # default to 'netCDF'.
1251+ if dataset_type is not None and "netCDF" not in dataset_type :
1252+ # The caller explicitly excluded netCDF; nothing to do.
1253+ return []
12411254
1242- return []
1255+ d_type = "netCDF"
1256+ else :
1257+ d_type = self .dataset_type (dataset , dataset_type )
1258+ if not d_type :
1259+ # Can't interpret the dataset as a recognised type, so
1260+ # either raise an exception or return an empty list.
1261+ if dataset_type is None :
1262+ raise DatasetTypeError (
1263+ f"Can't interpret { dataset } as a dataset of one of the "
1264+ f"valid types: { valid_dataset_types !r} "
1265+ )
12431266
1244- # Can interpret the dataset as a recognised type, but return
1245- # an empty list if that type has been exlcuded.
1246- if dataset_type is not None and d_type not in dataset_type :
1247- return []
1267+ return []
1268+
1269+ # Can interpret the dataset as a recognised type, but return
1270+ # an empty list if that type has been exlcuded.
1271+ if dataset_type is not None and d_type not in dataset_type :
1272+ return []
12481273
12491274 # ------------------------------------------------------------
12501275 # Parse the 'netcdf_backend' keyword parameter
@@ -1532,6 +1557,8 @@ def read(
15321557 "file_system_storage_options" : {},
15331558 # Cached s3fs.S3FileSystem objects
15341559 "file_systems" : _file_systems ,
1560+ # Pre-authenticated filesystem object (e.g. fsspec)
1561+ "filesystem" : filesystem ,
15351562 # --------------------------------------------------------
15361563 # Array element caching
15371564 # --------------------------------------------------------
0 commit comments