Skip to content

Schema for config file autocompletion/linting #83

@benatouba

Description

@benatouba

Pydantic can help by validating config files at runtime or during coding.

However, there would be a benefit for users in implementing a schema for use with a language server/IDE for the toml files.
I don't know a way to get linting/autocomplete for the configuration without that.

I have no general solution, but it could theoretically be solved for users of taplo. But as taplo is a standalone LSP and I don't know how this is handled by common IDE's and/or Editors, I am not sure how helpful this would be.

taplo allows for specification of a local schema file in a local .taplo.toml file. Schema file creation is not too complicated/time-consuming, when all types are clearly defined and known.

A simple version for the config.toml could look like this:

{
  "title": "COSIPY Configuration Schema",
  "description": "Schema for validating the COSIPY configuration in TOML format.",
  "type": "object",
  "properties": {
    "SIMULATION_PERIOD": {
      "type": "object",
      "properties": {
        "time_start": {
          "type": "string",
          "pattern": "^[0-9]{4}-[0-1][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]$",
          "description": "The start time of the simulation."
        },
        "time_end": {
          "type": "string",
          "pattern": "^[0-9]{4}-[0-1][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]$",
          "description": "The end time of the simulation."
        }
      },
      "required": ["time_start", "time_end"]
    },
    "FILENAMES": {
      "type": "object",
      "properties": {
        "data_path": {
          "type": "string",
          "description": "Path to the data directory."
        },
        "input_netcdf": {
          "type": "string",
          "description": "Path to the input NetCDF file."
        },
        "output_prefix": {
          "type": "string",
          "description": "Prefix for output files."
        }
      },
      "required": ["data_path", "input_netcdf", "output_prefix"]
    },
    "RESTART": {
      "type": "object",
      "properties": {
        "restart": {
          "type": "boolean",
          "description": "Start from restart file if true."
        }
      },
      "required": ["restart"]
    },
    "STAKE_DATA": {
      "type": "object",
      "properties": {
        "stake_evaluation": {
          "type": "boolean",
          "description": "Perform stake evaluation if true."
        },
        "stakes_loc_file": {
          "type": "string",
          "description": "Path to the stake location file."
        },
        "stakes_data_file": {
          "type": "string",
          "description": "Path to the stake data file."
        },
        "eval_method": {
          "type": "string",
          "description": "Evaluation method (e.g., 'rmse')."
        },
        "obs_type": {
          "type": "string",
          "description": "Observation type ('mb' or 'snowheight')."
        }
      },
      "required": ["stake_evaluation", "stakes_loc_file", "stakes_data_file", "eval_method", "obs_type"]
    },
    "DIMENSIONS": {
      "type": "object",
      "properties": {
        "WRF": {
          "type": "boolean",
          "description": "Use WRF as input if true."
        },
        "WRF_X_CSPY": {
          "type": "boolean",
          "description": "Interactive simulation with WRF if true."
        },
        "northing": {
          "type": "string",
          "description": "Name of the northing coordinate."
        },
        "easting": {
          "type": "string",
          "description": "Name of the easting coordinate."
        }
      },
      "required": ["WRF", "WRF_X_CSPY", "northing", "easting"]
    },
    "COMPRESSION": {
      "type": "object",
      "properties": {
        "compression_level": {
          "type": "integer",
          "description": "Compression level for output NetCDF (1-9).",
          "minimum": 1,
          "maximum": 9
        }
      },
      "required": ["compression_level"]
    },
    "PARALLELIZATION": {
      "type": "object",
      "properties": {
        "slurm_use": {
          "type": "boolean",
          "description": "Use SLURM for parallelization if true."
        },
        "workers": {
          "type": "integer",
          "description": "Number of workers; 0 for all available cores."
        },
        "local_port": {
          "type": "integer",
          "description": "Port for the local cluster."
        }
      },
      "required": ["slurm_use", "workers", "local_port"]
    },
    "FULL_FIELDS": {
      "type": "object",
      "properties": {
        "full_field": {
          "type": "boolean",
          "description": "Write full 2D fields to file if true."
        }
      },
      "required": ["full_field"]
    },
    "FORCINGS": {
      "type": "object",
      "properties": {
        "force_use_TP": {
          "type": "boolean",
          "description": "Use total precipitation data if true."
        },
        "force_use_N": {
          "type": "boolean",
          "description": "Use cloud cover fraction if true."
        }
      },
      "required": ["force_use_TP", "force_use_N"]
    },
    "SUBSET": {
      "type": "object",
      "properties": {
        "tile": {
          "type": "boolean",
          "description": "Enable tiling if true."
        },
        "xstart": {
          "type": "integer",
          "description": "Start index for the x-dimension."
        },
        "xend": {
          "type": "integer",
          "description": "End index for the x-dimension."
        },
        "ystart": {
          "type": "integer",
          "description": "Start index for the y-dimension."
        },
        "yend": {
          "type": "integer",
          "description": "End index for the y-dimension."
        }
      },
      "required": ["tile", "xstart", "xend", "ystart", "yend"]
    },
    "OUTPUT_VARIABLES": {
      "type": "object",
      "properties": {
        "output_atm": {
          "type": "string",
          "description": "Comma-separated list of atmospheric output variables."
        },
        "output_internal": {
          "type": "string",
          "description": "Comma-separated list of internal output variables."
        },
        "output_full": {
          "type": "string",
          "description": "Comma-separated list of full output variables."
        }
      },
      "required": ["output_atm", "output_internal", "output_full"]
    }
  },
  "required": [
    "SIMULATION_PERIOD",
    "FILENAMES",
    "RESTART",
    "STAKE_DATA",
    "DIMENSIONS",
    "COMPRESSION",
    "PARALLELIZATION",
    "FULL_FIELDS",
    "FORCINGS",
    "SUBSET",
    "OUTPUT_VARIABLES"
  ]
}

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions