|
| 1 | +# Copyright 2024 DeepMind Technologies Limited |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +"""File validation utilities for geometry file loading.""" |
| 16 | + |
| 17 | +import logging |
| 18 | +import os |
| 19 | +from pathlib import Path |
| 20 | +from typing import Union, List |
| 21 | + |
| 22 | +import numpy as np |
| 23 | +from torax._src.geometry import geometry_errors |
| 24 | + |
| 25 | +logger = logging.getLogger(__name__) |
| 26 | + |
| 27 | + |
| 28 | +def validate_file_access( |
| 29 | + file_path: Union[str, Path], file_type: str, validate_format: bool = True |
| 30 | +) -> Path: |
| 31 | + """Validate file exists and is accessible. |
| 32 | +
|
| 33 | + Args: |
| 34 | + file_path: Path to geometry file |
| 35 | + file_type: Type of geometry file ('chease', 'fbt', 'eqdsk') |
| 36 | + validate_format: Whether to validate file extension |
| 37 | +
|
| 38 | + Returns: |
| 39 | + Path object for the validated file |
| 40 | +
|
| 41 | + Raises: |
| 42 | + GeometryFileNotFoundError: If file doesn't exist |
| 43 | + GeometryFilePermissionError: If file can't be read |
| 44 | + GeometryFileFormatError: If file is empty or has wrong format |
| 45 | + """ |
| 46 | + path = Path(file_path) |
| 47 | + |
| 48 | + # Check file exists |
| 49 | + if not path.exists(): |
| 50 | + error_msg = ( |
| 51 | + f"Geometry file not found: {file_path}\n" |
| 52 | + "Please check:\n" |
| 53 | + " 1. File path is correct\n" |
| 54 | + " 2. File exists at the specified location\n" |
| 55 | + " 3. Filename spelling is correct" |
| 56 | + ) |
| 57 | + |
| 58 | + logger.error(error_msg) |
| 59 | + raise geometry_errors.GeometryFileNotFoundError(error_msg) |
| 60 | + |
| 61 | + # Check file permissions |
| 62 | + if not os.access(path, os.R_OK): |
| 63 | + error_msg = ( |
| 64 | + f"No read permission for geometry file: {file_path}\n" |
| 65 | + f"Please check file permissions and run:\n" |
| 66 | + f" chmod +r {file_path}" |
| 67 | + ) |
| 68 | + logger.error(error_msg) |
| 69 | + raise geometry_errors.GeometryFilePermissionError(error_msg) |
| 70 | + |
| 71 | + # Check file is not empty |
| 72 | + if path.stat().st_size == 0: |
| 73 | + error_msg = f"Geometry file is empty: {file_path}" |
| 74 | + logger.error(error_msg) |
| 75 | + raise geometry_errors.GeometryFileFormatError(error_msg) |
| 76 | + |
| 77 | + # Validate file type |
| 78 | + if validate_format: |
| 79 | + expected_extensions = { |
| 80 | + "chease": [".chease", ".txt", ".mat2cols"], |
| 81 | + "fbt": [".fbt", ".dat", ".mat"], |
| 82 | + "eqdsk": [".eqdsk", ".geqdsk"], |
| 83 | + } |
| 84 | + |
| 85 | + if file_type.lower() in expected_extensions: |
| 86 | + valid_ext = path.suffix.lower( |
| 87 | + ) in expected_extensions[file_type.lower()] |
| 88 | + if not valid_ext: |
| 89 | + logger.warning( |
| 90 | + "File extension '%s' unusual for %s files. Expected: %s", |
| 91 | + path.suffix, |
| 92 | + file_type.upper(), |
| 93 | + expected_extensions[file_type.lower()], |
| 94 | + ) |
| 95 | + |
| 96 | + return path |
| 97 | + |
| 98 | + |
| 99 | +def validate_geometry_data(data: dict, file_type: str, file_path: str) -> None: |
| 100 | + """Validate loaded geometry data structure. |
| 101 | +
|
| 102 | + This performs basic structural validation to catch corrupt files, |
| 103 | + but does not enforce strict physical constraints that might vary |
| 104 | + by use case. |
| 105 | +
|
| 106 | + Args: |
| 107 | + data: Loaded geometry data dictionary |
| 108 | + file_type: Type of geometry file |
| 109 | + file_path: Path to file for error messages |
| 110 | +
|
| 111 | + Raises: |
| 112 | + GeometryDataValidationError: If data fails validation |
| 113 | + """ |
| 114 | + issues = [] |
| 115 | + |
| 116 | + # Format-specific validation |
| 117 | + if file_type.lower() == "chease": |
| 118 | + _validate_chease_data(data, issues) |
| 119 | + elif file_type.lower() == "fbt": |
| 120 | + _validate_fbt_data(data, issues) |
| 121 | + elif file_type.lower() == "eqdsk": |
| 122 | + _validate_eqdsk_data(data, issues) |
| 123 | + |
| 124 | + if issues: |
| 125 | + error_msg = ( |
| 126 | + f"Geometry data validation failed for {file_type.upper()} file: { |
| 127 | + file_path}\n" |
| 128 | + + "\n".join(f" - {issue}" for issue in issues) |
| 129 | + ) |
| 130 | + logger.error(error_msg) |
| 131 | + raise geometry_errors.GeometryDataValidationError(error_msg) |
| 132 | + |
| 133 | + |
| 134 | +def _validate_chease_data(data: dict, issues: List[str]) -> None: |
| 135 | + """Validate CHEASE-specific data structure. |
| 136 | +
|
| 137 | + Checks for required fields and data consistency, but does not |
| 138 | + enforce strict physical constraints to allow for edge cases. |
| 139 | + """ |
| 140 | + required_fields = [ |
| 141 | + "PSIchease=psi/2pi", |
| 142 | + "Ipprofile", |
| 143 | + "RHO_TOR=sqrt(Phi/pi/B0)", |
| 144 | + "R_INBOARD", |
| 145 | + "R_OUTBOARD", |
| 146 | + "T=RBphi", |
| 147 | + ] |
| 148 | + |
| 149 | + for field in required_fields: |
| 150 | + if field not in data: |
| 151 | + issues.append(f"Missing required CHEASE field: {field}") |
| 152 | + |
| 153 | + # Check data structure consistency |
| 154 | + if data: |
| 155 | + array_fields = {k: v for k, |
| 156 | + v in data.items() if isinstance(v, np.ndarray)} |
| 157 | + if array_fields: |
| 158 | + lengths = [len(v) for v in array_fields.values()] |
| 159 | + if len(set(lengths)) > 1: |
| 160 | + issues.append( |
| 161 | + f"CHEASE arrays have inconsistent lengths: " |
| 162 | + f"{dict(zip(array_fields.keys(), lengths))}" |
| 163 | + ) |
| 164 | + |
| 165 | + # Basic sanity checks (only flag clearly invalid data) |
| 166 | + if "R_INBOARD" in data and "R_OUTBOARD" in data: |
| 167 | + r_in = np.asarray(data["R_INBOARD"]) |
| 168 | + r_out = np.asarray(data["R_OUTBOARD"]) |
| 169 | + |
| 170 | + # Only check for clearly invalid values (negative radii) |
| 171 | + if np.any(r_in < 0): |
| 172 | + issues.append( |
| 173 | + "Inboard radius (R_INBOARD) contains negative values") |
| 174 | + if np.any(r_out < 0): |
| 175 | + issues.append( |
| 176 | + "Outboard radius (R_OUTBOARD) contains negative values") |
| 177 | + |
| 178 | + # Only flag if data is consistently invalid (not just edge cases) |
| 179 | + # Use mean values to avoid issues with individual grid points |
| 180 | + mean_r_in = np.mean(r_in[r_in > 0]) if np.any(r_in > 0) else 0 |
| 181 | + mean_r_out = np.mean(r_out[r_out > 0]) if np.any(r_out > 0) else 0 |
| 182 | + |
| 183 | + if mean_r_out > 0 and mean_r_in > 0 and mean_r_out < mean_r_in: |
| 184 | + issues.append( |
| 185 | + "Average outboard radius less than inboard radius " |
| 186 | + "(possible column swap in file)" |
| 187 | + ) |
| 188 | + |
| 189 | + |
| 190 | +def _validate_fbt_data(data: dict, issues: List[str]) -> None: |
| 191 | + """Validate FBT-specific data structure.""" |
| 192 | + if "LY" in data: |
| 193 | + ly_data = data["LY"] |
| 194 | + # Check if it's a structured array |
| 195 | + if not hasattr(ly_data, "dtype"): |
| 196 | + issues.append("FBT LY bundle has unexpected structure") |
| 197 | + return |
| 198 | + |
| 199 | + # Check for some expected fields (not all required for compatibility) |
| 200 | + common_fields = ["rBt", "aminor", "rgeom"] |
| 201 | + if hasattr(ly_data.dtype, "names") and ly_data.dtype.names: |
| 202 | + available_fields = ly_data.dtype.names |
| 203 | + missing_common = [ |
| 204 | + f for f in common_fields if f not in available_fields] |
| 205 | + if len(missing_common) == len(common_fields): |
| 206 | + issues.append( |
| 207 | + f"FBT LY bundle missing common fields. " |
| 208 | + f"Expected at least one of: {common_fields}" |
| 209 | + ) |
| 210 | + else: |
| 211 | + # Non-LY format - just check for basic fields |
| 212 | + common_fields = ["rBt", "aminor", "rgeom"] |
| 213 | + missing = [f for f in common_fields if f not in data] |
| 214 | + if len(missing) == len(common_fields): |
| 215 | + issues.append( |
| 216 | + f"FBT file missing common fields. " |
| 217 | + f"Expected at least one of: {common_fields}" |
| 218 | + ) |
| 219 | + |
| 220 | + |
| 221 | +def _validate_eqdsk_data(data: dict, issues: List[str]) -> None: |
| 222 | + """Validate EQDSK-specific data structure.""" |
| 223 | + required_fields = [ |
| 224 | + "bcentre", |
| 225 | + "xmag", |
| 226 | + "zmag", |
| 227 | + "psimag", |
| 228 | + "psibdry", |
| 229 | + "xbdry", |
| 230 | + "zbdry", |
| 231 | + "fpol", |
| 232 | + "qpsi", |
| 233 | + "psi", |
| 234 | + ] |
| 235 | + |
| 236 | + for field in required_fields: |
| 237 | + if field not in data: |
| 238 | + issues.append(f"Missing required EQDSK field: {field}") |
| 239 | + |
| 240 | + # Basic structural checks |
| 241 | + if "xmag" in data: |
| 242 | + try: |
| 243 | + xmag_val = float(data["xmag"]) |
| 244 | + if xmag_val <= 0: |
| 245 | + issues.append( |
| 246 | + "Magnetic axis major radius (xmag) must be positive") |
| 247 | + except (ValueError, TypeError): |
| 248 | + issues.append( |
| 249 | + "Magnetic axis major radius (xmag) has invalid format") |
| 250 | + |
| 251 | + if "xbdry" in data and "zbdry" in data: |
| 252 | + try: |
| 253 | + x_bdy = np.asarray(data["xbdry"]) |
| 254 | + z_bdy = np.asarray(data["zbdry"]) |
| 255 | + |
| 256 | + if len(x_bdy) != len(z_bdy): |
| 257 | + issues.append( |
| 258 | + "Boundary x and z coordinates must have same length") |
| 259 | + elif len(x_bdy) < 3: |
| 260 | + issues.append("Boundary must have at least 3 points") |
| 261 | + except (ValueError, TypeError): |
| 262 | + issues.append("Boundary coordinates have invalid format") |
0 commit comments