Skip to content

Commit 24435d9

Browse files
committed
dev
1 parent 4c586e3 commit 24435d9

File tree

4 files changed

+155
-196
lines changed

4 files changed

+155
-196
lines changed

cf/read_write/read.py

Lines changed: 81 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from urllib.parse import urlparse
77

88
import cfdm
9-
from cfdm.read_write.exceptions import UnknownFileFormatError as FileTypeError
9+
from cfdm.read_write.exceptions import FileTypeError
1010
from cfdm.read_write.netcdf import NetCDFRead
1111

1212
from ..aggregate import aggregate as cf_aggregate
@@ -199,20 +199,16 @@ class read(cfdm.read):
199199
200200
{{read warnings: `bool`, optional}}
201201
202-
{{read ignore_unknown_type: `bool`, optional}}
203-
204-
.. versionadded:: NEXTVERSION
205-
206202
{{read file_type: (sequence of) `str`, optional}}
207203
208-
Valid files types are:
204+
Valid file types are:
209205
210206
============ ============================================
211-
*file_type* Description
207+
file type Description
212208
============ ============================================
213-
``'netCDF'`` Binary netCDF-3 or netCDF-4 file
214-
``'CDL'`` Text CDL representation of a netCDF file
215-
``'UM'`` UM fields file or PP file
209+
``'netCDF'`` Binary netCDF-3 or netCDF-4 files
210+
``'CDL'`` Text CDL representations of netCDF files
211+
``'UM'`` UM fields files or PP files
216212
============ ============================================
217213
218214
.. versionadded:: NEXTVERSION
@@ -429,7 +425,7 @@ class read(cfdm.read):
429425
Use the *file_type* parameter instead.
430426
431427
ignore_read_error: deprecated at version NEXTVERSION
432-
Use the *ignore_unknown_type* parameter instead.
428+
Use the *file_type* parameter instead.
433429
434430
:Returns:
435431
@@ -486,7 +482,6 @@ def __new__(
486482
external=None,
487483
verbose=None,
488484
warnings=False,
489-
ignore_unknown_type=False,
490485
aggregate=True,
491486
nfields=None,
492487
squeeze=False,
@@ -582,48 +577,25 @@ def __new__(
582577
_DEPRECATION_ERROR_FUNCTION_KWARGS(
583578
"cf.read",
584579
{"ignore_read_error": ignore_read_error},
585-
"Use keyword 'ignore_unknown_type' instead.",
580+
"Use keyword 'file_type' instead.",
586581
version="NEXTVERSION",
587582
removed_at="5.0.0",
588583
) # pragma: no cover
589584

585+
info = cfdm.is_log_level_info(logger)
586+
590587
cls.netcdf = NetCDFRead(cls.implementation)
591588
cls.um = UMRead(cls.implementation)
592589

593-
# Parse select
590+
# ------------------------------------------------------------
591+
# Parse the 'select' keyword parameter
592+
# ------------------------------------------------------------
594593
if isinstance(select, (str, Query, Pattern)):
595594
select = (select,)
596595

597-
info = cfdm.is_log_level_info(logger)
598-
599-
# Manage input parameters where contradictions are possible:
600-
if cdl_string and file_type:
601-
if file_type == "CDL":
602-
if info:
603-
logger.info(
604-
"It is not necessary to set the cf.read fmt as "
605-
"'CDL' when cdl_string is True, since that implies "
606-
"CDL is the format."
607-
) # pragma: no cover
608-
else:
609-
raise ValueError(
610-
"cdl_string can only be True when the format is CDL, "
611-
"though fmt is ignored in that case so there is no "
612-
"need to set it."
613-
)
614-
615-
if follow_symlinks and not recursive:
616-
raise ValueError(
617-
f"Can't set follow_symlinks={follow_symlinks!r} "
618-
f"when recursive={recursive!r}"
619-
)
620-
621-
# Initialise the output list of fields/domains
622-
if domain:
623-
out = DomainList()
624-
else:
625-
out = FieldList()
626-
596+
# ------------------------------------------------------------
597+
# Parse the 'aggregate' keyword parameter
598+
# ------------------------------------------------------------
627599
if isinstance(aggregate, dict):
628600
aggregate_options = aggregate.copy()
629601
aggregate = True
@@ -632,23 +604,45 @@ def __new__(
632604

633605
aggregate_options["copy"] = False
634606

635-
## ------------------------------------------------------------
636-
## Parse the 'fmt' keyword parameter
637-
## ------------------------------------------------------------
638-
#if file_type:
639-
# if isinstance(file_type, str):
640-
# file_type = (file_type,)
641-
#
642-
# file_type = set(file_type)
643-
#else:
644-
# file_type = set(("netCDF", "CDL", "UM"))
607+
# ------------------------------------------------------------
608+
# Parse the 'file_type' keyword parameter
609+
# ------------------------------------------------------------
610+
netCDF_file_types = set(("netCDF", "CDL"))
611+
UM_file_types = set(("UM",))
612+
if file_type is not None:
613+
if isinstance(file_type, str):
614+
file_type = (file_type,)
615+
616+
file_type = set(file_type)
645617

646618
# ------------------------------------------------------------
647619
# Parse the 'um' keyword parameter
648620
# ------------------------------------------------------------
649621
if not um:
650622
um = {}
651623

624+
# ------------------------------------------------------------
625+
# Parse the 'cdl_string' keyword parameter
626+
# ------------------------------------------------------------
627+
if cdl_string and file_type is not None:
628+
raise ValueError("Can't set file_type when cdl_string=True")
629+
630+
# ------------------------------------------------------------
631+
# Parse the 'follow_symlinks' and 'recursive' keyword
632+
# parameters
633+
# ------------------------------------------------------------
634+
if follow_symlinks and not recursive:
635+
raise ValueError(
636+
f"Can't set follow_symlinks={follow_symlinks!r} "
637+
f"when recursive={recursive!r}"
638+
)
639+
640+
# Initialise the output list of fields/domains
641+
if domain:
642+
out = DomainList()
643+
else:
644+
out = FieldList()
645+
652646
# Count the number of fields (in all files) and the number of
653647
# files
654648
field_counter = -1
@@ -661,6 +655,7 @@ def __new__(
661655
files = [
662656
NetCDFRead.string_to_cdl(cdl_string) for cdl_string in files
663657
]
658+
file_type = set(("CDL",))
664659

665660
for file_glob in flat(files):
666661
# Expand variables
@@ -674,8 +669,9 @@ def __new__(
674669
# Glob files on disk
675670
files2 = glob(file_glob)
676671

677-
if not files2 and not ignore_unknown_type:
678-
open(file_glob, "rb")
672+
if not files2:
673+
# Trigger a FileNotFoundError error
674+
open(file_glob)
679675

680676
files3 = []
681677
for x in files2:
@@ -694,7 +690,7 @@ def __new__(
694690

695691
files2 = files3
696692

697-
# How each file was read, as netCDF, or UM, etc.
693+
# The types of all of the input files
698694
ftypes = set()
699695

700696
for filename in files2:
@@ -704,14 +700,19 @@ def __new__(
704700
# ----------------------------------------------------
705701
# Read the file
706702
# ----------------------------------------------------
707-
file_types = file_type.copy()
703+
file_contents = []
704+
705+
# The type of this file
708706
ftype = None
709-
file_contents = None
710707

711-
# Record unknown file format errors
708+
# Record file type errors
712709
file_format_errors = []
713-
print ('---------', file_types)
714-
if file_types.intersection(("netCDF", "CDL")):
710+
711+
if ftype is None and (
712+
file_type is None
713+
or file_type.intersection(netCDF_file_types)
714+
):
715+
# Try to read as netCDF
715716
try:
716717
file_contents = super().__new__(
717718
cls,
@@ -735,28 +736,19 @@ def __new__(
735736
squeeze=squeeze,
736737
unsqueeze=unsqueeze,
737738
file_type=file_type,
738-
# ignore_unknown_type=ignore_unknown_type,
739739
)
740740
except FileTypeError as error:
741741
if file_type is None:
742742
file_format_errors.append(error)
743-
744-
file_types.difference_update(("netCDF", "CDL"))
745743
else:
746744
file_format_errors = []
747-
# if file_contents or not ignore_unknown_type:
748-
# Zero or more fields/domains were
749-
# successfully read. Set 'file_types' to
750-
# an empty set so that no other file
751-
# formats are attempted.
752-
file_types = set()
753745
ftype = "netCDF"
754-
755-
print ('here yyy',file_types, file_contents, file_format_errors)
756-
if file_types.intersection(("UM",)):
757-
print ('UM', filename)
746+
747+
if ftype is None and (
748+
file_type is None or file_type.intersection(UM_file_types)
749+
):
750+
# Try to read as UM
758751
try:
759-
print ('9999')
760752
file_contents = cls.um.read(
761753
filename,
762754
um_version=um.get("version"),
@@ -770,30 +762,16 @@ def __new__(
770762
squeeze=squeeze,
771763
unsqueeze=unsqueeze,
772764
domain=domain,
773-
# ignore_unknown_type=ignore_unknown_type,
765+
file_type=file_type,
774766
)
775767
except FileTypeError as error:
776768
if file_type is None:
777769
file_format_errors.append(error)
778-
779-
# print (1111111)
780-
file_types.difference_update(("UM",))
781-
# file_format_errors.append(error)
782770
else:
783-
print (1111155511, file_contents)
784-
# file_format_errors = []
785-
# if file_contents or not ignore_unknown_type:
786-
# print ('bon')
787-
# Zero or more fields/domains were
788-
# successfully read. Set 'file_types' to
789-
# an empty set so that no other file
790-
# formats are attempted.
791771
file_format_errors = []
792-
file_types = set()
793772
ftype = "UM"
794773

795774
if file_format_errors:
796-
print ('rrrr',file_format_errors, file_contents)
797775
error = "\n".join(map(str, file_format_errors))
798776
raise FileTypeError(f"\n{error}")
799777

@@ -805,28 +783,19 @@ def __new__(
805783
if ftype:
806784
ftypes.add(ftype)
807785

808-
# --------------------------------------------------------
809-
# Select matching fields (only from netCDF files at
810-
# this stage - we'll do UM fields later)
811-
# --------------------------------------------------------
786+
# Select matching fields (only for netCDF files at
787+
# this stage - we'll other it for other file types
788+
# later)
812789
if select and ftype == "netCDF":
813790
file_contents = file_contents.select_by_identity(*select)
814791

815-
# --------------------------------------------------------
816-
# Add this file's contents to that already read from other
817-
# files
818-
# --------------------------------------------------------
792+
# Add this file's contents to that already read from
793+
# other files
819794
out.extend(file_contents)
820795

821796
field_counter = len(out)
822797
file_counter += 1
823798

824-
if info:
825-
logger.info(
826-
f"Read {field_counter} field{cls._plural(field_counter)} "
827-
f"from {file_counter} file{cls._plural(file_counter)}"
828-
) # pragma: no cover
829-
830799
# ----------------------------------------------------------------
831800
# Aggregate the output fields/domains
832801
# ----------------------------------------------------------------
@@ -863,12 +832,18 @@ def __new__(
863832
del f._custom["standard_name"]
864833

865834
# ----------------------------------------------------------------
866-
# Select matching fields from UM/PP fields (post setting of
835+
# Select matching fields from UM files (post setting of their
867836
# standard names)
868837
# ----------------------------------------------------------------
869838
if select and "UM" in ftypes:
870839
out = out.select_by_identity(*select)
871840

841+
if info:
842+
logger.info(
843+
f"Read {field_counter} field{cls._plural(field_counter)} "
844+
f"from {file_counter} file{cls._plural(file_counter)}"
845+
) # pragma: no cover
846+
872847
if nfields is not None and len(out) != nfields:
873848
raise ValueError(
874849
f"{nfields} field{cls._plural(nfields)} requested but "

cf/read_write/um/umread.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import dask.array as da
1010
import numpy as np
1111
from cfdm import Constructs, is_log_level_info
12-
from cfdm.read_write.exceptions import UnknownFileFormatError
12+
from cfdm.read_write.exceptions import FileTypeError
1313
from dask.array.core import getter, normalize_chunks
1414
from dask.base import tokenize
1515
from netCDF4 import date2num as netCDF4_date2num
@@ -3390,6 +3390,7 @@ def read(
33903390
squeeze=False,
33913391
unsqueeze=False,
33923392
domain=False,
3393+
file_type=None,
33933394
ignore_unknown_type=False,
33943395
):
33953396
"""Read fields from a PP file or UM fields file.
@@ -3524,13 +3525,19 @@ def read(
35243525
else:
35253526
byte_ordering = None
35263527

3527-
# try:
3528+
# ------------------------------------------------------------
3529+
# Parse the 'file_type' keyword parameter
3530+
# ------------------------------------------------------------
3531+
if file_type is not None:
3532+
if isinstance(file_type, str):
3533+
file_type = (file_type,)
3534+
3535+
file_type = set(file_type)
3536+
if not file_type.intersection(("UM",)):
3537+
# Return now if there are valid file types
3538+
return []
3539+
35283540
f = self.file_open(filename, parse=True)
3529-
# except UnknownFileFormatError:
3530-
# if not ignore_unknown_type:
3531-
# raise
3532-
#
3533-
# return []
35343541

35353542
info = is_log_level_info(logger)
35363543

@@ -3601,7 +3608,7 @@ def _open_um_file(
36013608
except Exception:
36023609
pass
36033610

3604-
raise UnknownFileFormatError(
3611+
raise FileTypeError(
36053612
f"Can't interpret {filename} as a PP or UM dataset"
36063613
)
36073614

0 commit comments

Comments
 (0)