diff --git a/doc/source/examining/loading_data.rst b/doc/source/examining/loading_data.rst index 89d623a1efc..6139b0a02e8 100644 --- a/doc/source/examining/loading_data.rst +++ b/doc/source/examining/loading_data.rst @@ -3171,12 +3171,15 @@ There are three way to make yt detect all the particle fields. For example, if y particle_metallicity, d """ - Each line should contain the name of the field and its data type (``d`` for double precision, ``f`` for single precision, ``i`` for integer and ``l`` for long integer). You can also configure the auto detected fields for fluid types by adding a section ``ramses-hydro``, ``ramses-grav`` or ``ramses-rt`` in the config file. For example, if you customized your gravity files so that they contain the potential, the potential in the previous timestep and the x, y and z accelerations, you can use : + Each line should contain the name of the field and its data type (``d`` for double precision, ``f`` for single precision, ``i`` for integer and ``l`` for long integer). You can also configure the auto detected fields for fluid types by adding a section ``ramses-hydro``, ``ramses-grav`` or ``ramses-rt`` in the config file. For example, if you customized your gravity files so that they contain the potential, the potential in the previous timestep and the x, y and z accelerations, all in single-precision, you can use : .. code-block:: none [ramses-grav] - fields = [ "Potential", "Potential-old", "x-acceleration", "y-acceleration", "z-acceleration" ] + fields = [ "Potential,f", "Potential-old,f", "x-acceleration,f", "y-acceleration,f", "z-acceleration,f" ] + + Importantly, the order of the fields should match the order in which they are written in the RAMSES output files. + yt will also assume that each entry is formed of a field name followed by its type (``f`` for single precision, ``d`` for double precision), separated by a comma. Field names containing commas are not supported, other precisions (e.g. integers) are not supported either. Presently, all fields in a given file type (gravity, hydro, ...) need to have the same precision (e.g. all single-precision or all double-precision), combinations are not supported. Internally, yt will convert all data into double-precisoin floats, but this will allow it to read the data correctly from file. 3. New RAMSES way. Recent versions of RAMSES automatically write in their output an ``hydro_file_descriptor.txt`` file that gives information about which field is where. If you wish, you can simply create such a file in the folder containing the ``info_xxxxx.txt`` file @@ -3197,6 +3200,8 @@ There are three way to make yt detect all the particle fields. For example, if y 11, metallicity, d It is important to note that this file should not end with an empty line (but in this case with ``11, metallicity, d``). + Note that, for grid fields (hydro, gravity, rt), all the fields need to have the same precision (``f`` or ``d``). + Combinations are not supported. .. note:: diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index 4918d31ea2a..f3d85ba9a28 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -452,6 +452,7 @@ def _fill_no_ghostzones(self, fd, fields, selector, file_handler): fields, data, oct_handler, + file_handler.single_precision, ) return data @@ -507,6 +508,7 @@ def _fill_with_ghostzones( fields, tr, oct_handler, + file_handler.single_precision, domain_inds=domain_inds, ) return tr @@ -805,7 +807,7 @@ def __init__( if isinstance(fields, str): fields = field_aliases[fields] """ - fields: + fields: list[tuple[str, str]] | list[str] | None An array of hydro variable fields in order of position in the hydro_XXXXX.outYYYYY file. If set to None, will try a default set of fields. @@ -822,7 +824,13 @@ def __init__( This affects the fields related to cooling and the mean molecular weight. """ - self._fields_in_file = fields + self._fields_in_file: list[tuple[str, str]] = [] + if fields: + for field in fields: + if isinstance(field, tuple): + self._fields_in_file.append(field) + else: + self._fields_in_file.append((field, "d")) # By default, extra fields have not triggered a warning self._warned_extra_fields = defaultdict(lambda: False) self._extra_particle_fields = extra_particle_fields diff --git a/yt/frontends/ramses/field_handlers.py b/yt/frontends/ramses/field_handlers.py index fafd18b5967..01b98c43030 100644 --- a/yt/frontends/ramses/field_handlers.py +++ b/yt/frontends/ramses/field_handlers.py @@ -24,6 +24,54 @@ def register_field_handler(ph): DETECTED_FIELDS = {} # type: ignore +def validate_field_precision(fields: list[tuple[str, str]]): + """Check if all fields have the same precision. + + Parameters + ---------- + fields : list of (str, str) + List of tuples containing field names and their precision + ('f' for single, 'd' for double). + """ + if len(fields) == 0: + return + + if not all(precision == fields[0][1] for _, precision in fields): + raise RuntimeError("Mixed precision in field list. This is not supported.") + + if fields[0][1] not in ("f", "d"): + raise ValueError( + f"Unknown precision specifier '{fields[0][1]}'. " + "Only 'f' or 'd' are supported." + ) + + +def get_fields_and_single_precision( + fields: list[tuple[str, str]], +) -> tuple[list[str], bool]: + """Get the common precision of fields. + + Parameters + ---------- + fields : list of (str, str) + List of tuples containing field names and their precision + ('f' for single, 'd' for double). + + Returns + ------- + tuple of (list of str, bool) + A tuple containing a list of field names and a boolean indicating + if all fields are single precision, False if double precision. + """ + field_names = [field for field, _ in fields] + validate_field_precision(fields) + if fields: + single_precision = fields[0][1] == "f" + else: + single_precision = False + return field_names, single_precision + + class HandlerMixin: """This contains all the shared methods to handle RAMSES files. @@ -156,6 +204,7 @@ class FieldFileHandler(abc.ABC, HandlerMixin): field_types = ( None # Mapping from field to the type of the data (float, integer, ...) ) + parameters: dict # Parameters read from the header def __init_subclass__(cls, *args, **kwargs): """ @@ -169,7 +218,6 @@ def __init_subclass__(cls, *args, **kwargs): cls._unique_registry = {} cls.parameters = {} cls.rt_parameters = {} - cls._detected_field_list = {} return cls def __init__(self, domain): @@ -177,7 +225,7 @@ def __init__(self, domain): @classmethod @abc.abstractmethod - def detect_fields(cls, ds): + def detect_fields(cls, ds) -> tuple[list[str], bool]: """ Called once to setup the fields of this type @@ -192,7 +240,7 @@ def detect_fields(cls, ds): pass @classmethod - def get_detected_fields(cls, ds): + def get_detected_fields(cls, ds) -> tuple[list[str], bool] | None: """ Get the detected fields from the registry. """ @@ -204,14 +252,16 @@ def get_detected_fields(cls, ds): return None @classmethod - def set_detected_fields(cls, ds, fields): + def set_detected_fields(cls, ds, fields, single_precision): """ Store the detected fields into the registry. """ if ds.unique_identifier not in DETECTED_FIELDS: DETECTED_FIELDS[ds.unique_identifier] = {} - DETECTED_FIELDS[ds.unique_identifier].update({cls.ftype: fields}) + DETECTED_FIELDS[ds.unique_identifier].update( + {cls.ftype: (fields, single_precision)} + ) @classmethod def purge_detected_fields(cls, ds): @@ -237,7 +287,13 @@ def level_count(self): @property def field_list(self): - return self._detected_field_list[self.ds.unique_identifier] + field_list, _single_precision = self.detect_fields(self.ds) + return [(self.ftype, f) for f in field_list] + + @property + def single_precision(self): + _field_list, single_precision = self.detect_fields(self.ds) + return single_precision @cached_property def offset(self): @@ -250,7 +306,8 @@ def offset(self): It should be generic enough for most of the cases, but if the *structure* of your fluid file is non-canonical, change this. """ - nvars = len(self._detected_field_list[self.ds.unique_identifier]) + nvars = len(self.field_list) + with FortranFile(self.fname) as fd: # Skip headers nskip = len(self.attrs) @@ -269,6 +326,7 @@ def offset(self): # # So there are 8 * nvars records each with length (nocts, ) # at each (level, cpus) + offset, level_count = read_offset( fd, min_level, @@ -276,19 +334,36 @@ def offset(self): self.parameters[self.ds.unique_identifier]["nvar"], self.domain.amr_header, Nskip=nvars * 8, + single_precision=self.single_precision, ) self._level_count = level_count return offset @classmethod - def load_fields_from_yt_config(cls) -> list[str]: + def load_fields_from_yt_config(cls) -> tuple[list[str], bool]: if cls.config_field and ytcfg.has_section(cls.config_field): cfg = ytcfg.get(cls.config_field, "fields") - fields = [_.strip() for _ in cfg if _.strip() != ""] - return fields + fields = [] + for item in cfg: + item = item.strip() + if not item: + continue + content = item.split(",") + if len(content) == 2: + field_name, precision = content + elif len(content) == 1: + field_name, precision = content[0], "d" + else: + raise ValueError( + f"Invalid field specification '{item}' in config section " + f"'{cls.config_field}'. Expected format: field_name[,precision]" + ) + fields.append((field_name.strip(), precision.strip())) + + return get_fields_and_single_precision(fields) - return [] + return ([], False) class HydroFieldFileHandler(FieldFileHandler): @@ -307,7 +382,7 @@ class HydroFieldFileHandler(FieldFileHandler): ) @classmethod - def detect_fields(cls, ds): + def detect_fields(cls, ds) -> tuple[list[str], bool]: # Try to get the detected fields detected_fields = cls.get_detected_fields(ds) if detected_fields: @@ -329,24 +404,31 @@ def detect_fields(cls, ds): nvar = hvals["nvar"] ok = False + single_precision = False - if ds._fields_in_file is not None: + if ds._fields_in_file: # Case 1: fields are provided by users on construction of dataset - fields = list(ds._fields_in_file) + fields, single_precision = get_fields_and_single_precision( + ds._fields_in_file + ) ok = True else: # Case 2: fields are provided by users in the config - fields = cls.load_fields_from_yt_config() + fields, single_precision = cls.load_fields_from_yt_config() + ok = len(fields) > 0 if not ok and os.path.exists(fname_desc): # Case 3: there is a file descriptor # Or there is an hydro file descriptor mylog.debug("Reading hydro file descriptor.") - # For now, we can only read double precision fields - fields = [ - e[0] for e in _read_fluid_file_descriptor(fname_desc, prefix="hydro") - ] + + field_with_precision = _read_fluid_file_descriptor( + fname_desc, prefix="hydro" + ) + fields, single_precision = get_fields_and_single_precision( + field_with_precision + ) # We get no fields for old-style hydro file descriptor ok = len(fields) > 0 @@ -453,13 +535,10 @@ def detect_fields(cls, ds): count_extra += 1 if count_extra > 0: mylog.debug("Detected %s extra fluid fields.", count_extra) - cls._detected_field_list[ds.unique_identifier] = [ - (cls.ftype, e) for e in fields - ] - cls.set_detected_fields(ds, fields) + cls.set_detected_fields(ds, fields, single_precision) - return fields + return (fields, single_precision) class GravFieldFileHandler(FieldFileHandler): @@ -475,7 +554,7 @@ class GravFieldFileHandler(FieldFileHandler): ) @classmethod - def detect_fields(cls, ds): + def detect_fields(cls, ds) -> tuple[list[str], bool]: # Try to get the detected fields detected_fields = cls.get_detected_fields(ds) if detected_fields: @@ -491,7 +570,7 @@ def detect_fields(cls, ds): nvar = cls.parameters[ds.unique_identifier]["nvar"] ndim = ds.dimensionality - fields = cls.load_fields_from_yt_config() + fields, single_precision = cls.load_fields_from_yt_config() if not fields: if nvar == ndim + 1: @@ -507,13 +586,9 @@ def detect_fields(cls, ds): for i in range(nvar - ndetected): fields.append(f"var{i}") - cls._detected_field_list[ds.unique_identifier] = [ - (cls.ftype, e) for e in fields - ] + cls.set_detected_fields(ds, fields, single_precision) - cls.set_detected_fields(ds, fields) - - return fields + return (fields, single_precision) class RTFieldFileHandler(FieldFileHandler): @@ -530,6 +605,7 @@ class RTFieldFileHandler(FieldFileHandler): ("nboundary", 1, "i"), ("gamma", 1, "d"), ) + rt_parameters: dict @classmethod def detect_fields(cls, ds): @@ -542,10 +618,13 @@ def detect_fields(cls, ds): rheader = {} - def read_rhs(cast): + def read_rhs(cast, group=None): line = f.readline() p, v = line.split("=") - rheader[p.strip()] = cast(v) + if group is not None: + rheader[f"Group {group} {p.strip()}"] = cast(v) + else: + rheader[p.strip()] = cast(v) with open(fname) as f: # Read nRTvar, nions, ngroups, iions @@ -567,7 +646,7 @@ def read_rhs(cast): read_rhs(float) f.readline() - # Reat unit_np, unit_pfd + # Read unit_np, unit_pf for _ in range(2): read_rhs(float) @@ -580,9 +659,22 @@ def read_rhs(cast): # Read n star, t2star, g_star for _ in range(3): read_rhs(float) + f.readline() + f.readline() - # Touchy part, we have to read the photon group properties - mylog.debug("Not reading photon group properties") + # Get global group properties (groupL0, groupL1, spec2group) + for _ in range(3): + read_rhs(lambda line: [float(e) for e in line.split()]) + + # get egy for each group (used to get proper energy densities) + line = f.readline().strip() + while line.startswith("---Group"): + group = int(line.split()[1]) + read_rhs(lambda line: [float(e) for e in line.split()], group) + # Skip cross sections weighted by number/energy + f.readline() + f.readline() + line = f.readline().strip() cls.rt_parameters[ds.unique_identifier] = rheader @@ -597,23 +689,20 @@ def read_rhs(cast): ok = False - if ds._fields_in_file is not None: - # Case 1: fields are provided by users on construction of dataset - fields = list(ds._fields_in_file) - ok = True - else: - # Case 2: fields are provided by users in the config - fields = cls.load_fields_from_yt_config() - ok = len(fields) > 0 + # Are fields provided by users in the config? + fields, single_precision = cls.load_fields_from_yt_config() + ok = len(fields) > 0 if not ok and os.path.exists(fname_desc): # Case 3: there is a file descriptor # Or there is an hydro file descriptor mylog.debug("Reading rt file descriptor.") - # For now, we can only read double precision fields - fields = [ - e[0] for e in _read_fluid_file_descriptor(fname_desc, prefix="rt") - ] + + field_with_precision = _read_fluid_file_descriptor(fname_desc, prefix="rt") + fields, single_precision = get_fields_and_single_precision( + field_with_precision + ) + ok = len(fields) > 0 if not ok: @@ -627,12 +716,8 @@ def read_rhs(cast): for ng in range(ngroups): fields.extend([t % (ng + 1) for t in tmp]) - cls._detected_field_list[ds.unique_identifier] = [ - (cls.ftype, e) for e in fields - ] - - cls.set_detected_fields(ds, fields) - return fields + cls.set_detected_fields(ds, fields, single_precision) + return (fields, single_precision) @classmethod def get_rt_parameters(cls, ds): diff --git a/yt/frontends/ramses/io_utils.pyx b/yt/frontends/ramses/io_utils.pyx index 805c6e020d4..585c8b88881 100644 --- a/yt/frontends/ramses/io_utils.pyx +++ b/yt/frontends/ramses/io_utils.pyx @@ -19,10 +19,15 @@ ctypedef np.float64_t DOUBLE_t cdef INT64_t INT32_SIZE = sizeof(np.int32_t) cdef INT64_t INT64_SIZE = sizeof(np.int64_t) cdef INT64_t DOUBLE_SIZE = sizeof(np.float64_t) +cdef INT64_t SINGLE_SIZE = sizeof(np.float32_t) -cdef inline INT64_t skip_len(INT64_t Nskip, INT64_t record_len) noexcept nogil: - return Nskip * (record_len * DOUBLE_SIZE + INT64_SIZE) +cdef inline int skip_len(int Nskip, int record_len, np.npy_bool single_precision) noexcept nogil: + # If the data is single precision, we need to skip 4 bytes + if single_precision: + return Nskip * (record_len * SINGLE_SIZE + INT64_SIZE) + else: + return Nskip * (record_len * DOUBLE_SIZE + INT64_SIZE) @cython.cpow(True) @@ -111,7 +116,15 @@ def read_amr(FortranFile f, dict headers, @cython.wraparound(False) @cython.cdivision(True) @cython.nonecheck(False) -cpdef read_offset(FortranFile f, INT64_t min_level, INT64_t domain_id, INT64_t nvar, dict headers, int Nskip): +cpdef read_offset( + FortranFile f, + INT64_t min_level, + INT64_t domain_id, + INT64_t nvar, + dict headers, + int Nskip, + np.npy_bool single_precision +): cdef np.ndarray[np.int64_t, ndim=2] offset, level_count cdef INT64_t ndim, twotondim, nlevelmax, n_levels, nboundary, ncpu, ncpu_and_bound @@ -153,7 +166,7 @@ cpdef read_offset(FortranFile f, INT64_t min_level, INT64_t domain_id, INT64_t n if ilevel >= min_level: offset_view[icpu, ilevel - min_level] = f.tell() level_count_view[icpu, ilevel - min_level] = file_ncache - f.seek(skip_len(Nskip, file_ncache), SEEK_CUR) + f.seek(skip_len(Nskip, file_ncache, single_precision), SEEK_CUR) return offset, level_count @@ -172,7 +185,9 @@ def fill_hydro(FortranFile f, INT64_t ndim, list all_fields, list fields, dict tr, RAMSESOctreeContainer oct_handler, - np.ndarray[np.int32_t, ndim=1] domain_inds=np.array([], dtype='int32')): + np.npy_bool single_precision, + np.ndarray[np.int32_t, ndim=1] domain_inds=np.array([], dtype='int32'), + ): cdef INT64_t offset cdef dict tmp cdef str field @@ -197,7 +212,8 @@ def fill_hydro(FortranFile f, # The ordering is very important here, as we'll write directly into the memory # address the content of the files. - cdef np.float64_t[::1, :, :] buffer + cdef np.float32_t[::1, :, :] buffer_single + cdef np.float64_t[::1, :, :] buffer_double jump_len = 0 j = 0 @@ -211,7 +227,11 @@ def fill_hydro(FortranFile f, jumps[j] = jump_len cdef int first_field_index = jumps[0] - buffer = np.empty((level_count.max(), twotondim, nfields_selected), dtype="float64", order='F') + # The buffer is different depending on the size of the data + if single_precision: + buffer_single = np.empty((level_count.max(), twotondim, nfields_selected), dtype="float32", order='F') + else: + buffer_double = np.empty((level_count.max(), twotondim, nfields_selected), dtype="float64", order='F') # Precompute which levels we need to read Ncells = len(level_inds) @@ -231,7 +251,7 @@ def fill_hydro(FortranFile f, offset = offsets[icpu, ilevel] if offset == -1: continue - f.seek(offset + skip_len(first_field_index, nc), SEEK_SET) + f.seek(offset + skip_len(first_field_index, nc, single_precision), SEEK_SET) # We have already skipped the first fields (if any) # so we "rewind" (this will cancel the first seek) @@ -241,9 +261,12 @@ def fill_hydro(FortranFile f, for j in range(nfields_selected): jump_len += jumps[j] if jump_len > 0: - f.seek(skip_len(jump_len, nc), SEEK_CUR) + f.seek(skip_len(jump_len, nc, single_precision), SEEK_CUR) jump_len = 0 - f.read_vector_inplace('d', &buffer[0, i, j]) + if single_precision: + f.read_vector_inplace('f', &buffer_single[0, i, j]) + else: + f.read_vector_inplace('d', &buffer_double[0, i, j]) jump_len += jumps[nfields_selected] @@ -251,12 +274,15 @@ def fill_hydro(FortranFile f, # but since we're doing an absolute seek at the beginning of # the loop on CPUs, we can spare one seek here ## if jump_len > 0: - ## f.seek(skip_len(jump_len, nc), SEEK_CUR) + ## f.seek(skip_len(jump_len, nc, single_precision), SEEK_CUR) # Alias buffer into dictionary tmp = {} for i, field in enumerate(fields): - tmp[field] = buffer[:, :, i] + if single_precision: + tmp[field] = np.asarray(buffer_single[:, :, i], dtype="float64") + else: + tmp[field] = buffer_double[:, :, i] if ncpu_selected > 1: oct_handler.fill_level_with_domain(