Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ dependencies = [
"tqdm",
"requests",
"parsl",
"jinja2"
"jinja2",
"CMIP7-data-request-api>=1.3"
]
dynamic = ["version"]

Expand Down
23 changes: 10 additions & 13 deletions src/access_moppy/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,19 +421,16 @@ def ordered(ds, core=("lat", "lon", "time", "height")):
self.ds = ordered(self.ds)

def _build_drs_path(self, attrs: Dict[str, str]) -> Path:
drs_components = [
attrs.get("mip_era", "CMIP6"),
attrs["activity_id"],
attrs["institution_id"],
attrs["source_id"],
attrs["experiment_id"],
attrs["variant_label"],
attrs["table_id"],
attrs["variable_id"],
attrs["grid_label"],
f"v{self.version_date}",
]
return self.drs_root.joinpath(*drs_components)
"""
Build DRS path using the vocabulary class's controlled vocabulary specifications.
"""
if not hasattr(self.vocab, "build_drs_path"):
raise AttributeError(
f"Vocabulary class {type(self.vocab).__name__} does not implement build_drs_path() method. "
"Please ensure you are using a proper CMIP vocabulary class (CMIP6Vocabulary or CMIP7Vocabulary)."
)

return self.vocab.build_drs_path(self.drs_root, self.version_date)

def _update_latest_symlink(self, versioned_path: Path):
latest_link = versioned_path.parent / "latest"
Expand Down
1,986 changes: 1,986 additions & 0 deletions src/access_moppy/mappings/cmip6_to_cmip7_compound_name_mapping.json

Large diffs are not rendered by default.

1,985 changes: 1,985 additions & 0 deletions src/access_moppy/mappings/cmip7_to_cmip6_compound_name_mapping.json

Large diffs are not rendered by default.

324 changes: 324 additions & 0 deletions src/access_moppy/utilities.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import json
import warnings
from importlib.resources import as_file, files
from pathlib import Path
from typing import Dict, List, Optional, Union

import numpy as np
import pandas as pd
import xarray as xr
from cftime import num2date
from data_request_api.content import dump_transformation as dt
from data_request_api.query import data_request as dr

type_mapping = {
"real": np.float32,
Expand Down Expand Up @@ -1499,3 +1502,324 @@ def validate_and_resample_if_needed(
)

return ds_resampled, True


def generate_cmip7_to_cmip6_mapping(
version: str = "latest_stable",
output_path: Optional[str] = None
) -> Dict[str, str]:
"""
Generate a mapping from CMIP7 compound names to CMIP6 compound names.

This function uses the data_request_api to query the latest CMIP7 data request
and creates a mapping between CMIP7 and CMIP6 compound names. The mapping
is optionally saved to a JSON file.

Note: This function internally calls generate_both_cmip_mappings() for efficiency
and only returns the forward mapping.

Args:
version: Version of the data request to use (default: "latest_stable")
output_path: Optional path to save the JSON mapping file. If None,
saves to the vocabularies directory within the package.

Returns:
Dictionary mapping CMIP7 compound names to CMIP6 compound names

Example:
>>> mapping = generate_cmip7_to_cmip6_mapping()
>>> print(mapping["Amon.tas"]) # Should return corresponding CMIP6 name
"""
# Generate both mappings efficiently and return only the forward mapping
forward_mapping, _ = generate_both_cmip_mappings(
version=version,
forward_output_path=output_path,
reverse_output_path=None # Use default path for reverse mapping
)
return forward_mapping


def load_cmip7_to_cmip6_mapping(mapping_path: Optional[str] = None) -> Dict[str, str]:
"""
Load the CMIP7 to CMIP6 compound name mapping from a JSON file.

Args:
mapping_path: Optional path to the mapping JSON file. If None,
loads from the default location in the vocabularies directory.

Returns:
Dictionary mapping CMIP7 compound names to CMIP6 compound names

Raises:
FileNotFoundError: If the mapping file doesn't exist
json.JSONDecodeError: If the mapping file contains invalid JSON
"""
if mapping_path is None:
# Default to mappings directory within the package
import access_moppy
package_path = Path(access_moppy.__file__).parent
mappings_path = package_path / "mappings"
mapping_path = mappings_path / "cmip7_to_cmip6_compound_name_mapping.json"
else:
mapping_path = Path(mapping_path)

if not mapping_path.exists():
raise FileNotFoundError(
f"CMIP7 to CMIP6 mapping file not found: {mapping_path}. "
"Please run generate_cmip7_to_cmip6_mapping() first to create it."
)

with open(mapping_path, 'r', encoding='utf-8') as f:
data = json.load(f)

# Filter out metadata if present
mapping = {k: v for k, v in data.items() if not k.startswith('_')}

print(f"βœ“ Loaded mapping for {len(mapping)} variables from: {mapping_path}")
return mapping


def generate_cmip6_to_cmip7_mapping(
version: str = "latest_stable",
output_path: Optional[str] = None
) -> Dict[str, str]:
"""
Generate a reverse mapping from CMIP6 compound names to CMIP7 compound names.

This function uses the data_request_api to query the latest CMIP7 data request
and creates a reverse mapping between CMIP6 and CMIP7 compound names. The mapping
is optionally saved to a JSON file.

Note: This function internally calls generate_both_cmip_mappings() for efficiency
and only returns the reverse mapping.

Args:
version: Version of the data request to use (default: "latest_stable")
output_path: Optional path to save the JSON mapping file. If None,
saves to the vocabularies directory within the package.

Returns:
Dictionary mapping CMIP6 compound names to CMIP7 compound names

Example:
>>> reverse_mapping = generate_cmip6_to_cmip7_mapping()
>>> print(reverse_mapping["AERmon.abs550aer"]) # Should return corresponding CMIP7 name
"""
# Generate both mappings efficiently and return only the reverse mapping
_, reverse_mapping = generate_both_cmip_mappings(
version=version,
forward_output_path=None, # Use default path for forward mapping
reverse_output_path=output_path
)
return reverse_mapping


def load_cmip6_to_cmip7_mapping(mapping_path: Optional[str] = None) -> Dict[str, str]:
"""
Load the CMIP6 to CMIP7 compound name mapping from a JSON file.

Args:
mapping_path: Optional path to the mapping JSON file. If None,
loads from the default location in the vocabularies directory.

Returns:
Dictionary mapping CMIP6 compound names to CMIP7 compound names

Raises:
FileNotFoundError: If the mapping file doesn't exist
json.JSONDecodeError: If the mapping file contains invalid JSON
"""
if mapping_path is None:
# Default to mappings directory within the package
import access_moppy
package_path = Path(access_moppy.__file__).parent
mappings_path = package_path / "mappings"
mapping_path = mappings_path / "cmip6_to_cmip7_compound_name_mapping.json"
else:
mapping_path = Path(mapping_path)

if not mapping_path.exists():
raise FileNotFoundError(
f"CMIP6 to CMIP7 mapping file not found: {mapping_path}. "
"Please run generate_cmip6_to_cmip7_mapping() first to create it."
)

with open(mapping_path, 'r', encoding='utf-8') as f:
data = json.load(f)

# Filter out metadata if present
mapping = {k: v for k, v in data.items() if not k.startswith('_')}

print(f"βœ“ Loaded reverse mapping for {len(mapping)} variables from: {mapping_path}")
return mapping


def get_cmip_mapping_metadata(mapping_type: str = "forward") -> Dict:
"""
Get metadata about the CMIP7↔CMIP6 compound name mapping files.

Args:
mapping_type: Either "forward" (CMIP7β†’CMIP6) or "reverse" (CMIP6β†’CMIP7)

Returns:
Dictionary containing metadata about the mapping file

Raises:
ValueError: If mapping_type is not "forward" or "reverse"
FileNotFoundError: If the mapping file doesn't exist
"""
if mapping_type not in ["forward", "reverse"]:
raise ValueError("mapping_type must be 'forward' or 'reverse'")

import access_moppy
package_path = Path(access_moppy.__file__).parent
mappings_path = package_path / "mappings"

if mapping_type == "forward":
mapping_path = mappings_path / "cmip7_to_cmip6_compound_name_mapping.json"
else:
mapping_path = mappings_path / "cmip6_to_cmip7_compound_name_mapping.json"

if not mapping_path.exists():
raise FileNotFoundError(
f"Mapping file not found: {mapping_path}. "
f"Please run generate_both_cmip_mappings() first to create it."
)

with open(mapping_path, 'r', encoding='utf-8') as f:
data = json.load(f)

# Extract metadata
metadata = data.get('_metadata', {})

if not metadata:
return {"error": "No metadata found in file"}

return metadata


def generate_both_cmip_mappings(
version: str = "latest_stable",
forward_output_path: Optional[str] = None,
reverse_output_path: Optional[str] = None
) -> tuple[Dict[str, str], Dict[str, str]]:
"""
Generate both forward (CMIP7->CMIP6) and reverse (CMIP6->CMIP7) mappings efficiently.

This function queries the CMIP7 data request API once and generates both mappings
from the same data, which is more efficient than calling the individual functions separately.

Args:
version: Version of the data request to use (default: "latest_stable")
forward_output_path: Optional path for CMIP7->CMIP6 mapping JSON file
reverse_output_path: Optional path for CMIP6->CMIP7 mapping JSON file

Returns:
Tuple of (forward_mapping, reverse_mapping) dictionaries

Example:
>>> forward, reverse = generate_both_cmip_mappings()
>>> cmip6_name = forward["aerosol.abs550aer.tavg-u-hxy-u.mon.GLB"]
>>> cmip7_name = reverse[cmip6_name]
"""
print("Generating both CMIP7<->CMIP6 compound name mappings...")
print("This may take a moment as it queries the CMIP7 data request API...")

# Use the latest_stable version of the DR content (default)
content_dic = dt.get_transformed_content(
version=version,
force_variable_name="CMIP7 Compound Name"
)

# Create DataRequest object from the content
DR = dr.DataRequest.from_separated_inputs(**content_dic)

# Find all CMIP7 variables
cmip7_variables = DR.find_variables(operation='any', skip_if_missing=True)

# Create both mapping dictionaries
forward_mapping = {} # CMIP7 -> CMIP6
reverse_mapping = {} # CMIP6 -> CMIP7

for var in cmip7_variables:
if hasattr(var, 'cmip7_compound_name') and hasattr(var, 'cmip6_compound_name'):
if var.cmip7_compound_name and var.cmip6_compound_name:
cmip7_name = var.cmip7_compound_name.name
cmip6_name = var.cmip6_compound_name.name

# Forward mapping (CMIP7 -> CMIP6)
forward_mapping[cmip7_name] = cmip6_name

# Reverse mapping (CMIP6 -> CMIP7) - handle potential one-to-many
if cmip6_name in reverse_mapping:
# If CMIP6 name already exists, store as list
if isinstance(reverse_mapping[cmip6_name], str):
reverse_mapping[cmip6_name] = [reverse_mapping[cmip6_name], cmip7_name]
else:
reverse_mapping[cmip6_name].append(cmip7_name)
else:
reverse_mapping[cmip6_name] = cmip7_name

# Save forward mapping
if forward_output_path is None:
import access_moppy
package_path = Path(access_moppy.__file__).parent
mappings_path = package_path / "mappings"
forward_output_path = mappings_path / "cmip7_to_cmip6_compound_name_mapping.json"
else:
forward_output_path = Path(forward_output_path)

# Create metadata for the JSON files
from datetime import datetime
metadata = {
"_metadata": {
"description": "CMIP7 to CMIP6 compound name mapping",
"generated_by": "access_moppy.utilities.generate_both_cmip_mappings()",
"source": "CMIP7 Data Request API",
"data_request_version": version,
"generated_on": datetime.now().isoformat(),
"total_mappings": len(forward_mapping),
"usage": "Use access_moppy.utilities.load_cmip7_to_cmip6_mapping() to load this file"
}
}

reverse_metadata = {
"_metadata": {
"description": "CMIP6 to CMIP7 compound name mapping (reverse of forward mapping)",
"generated_by": "access_moppy.utilities.generate_both_cmip_mappings()",
"source": "CMIP7 Data Request API",
"data_request_version": version,
"generated_on": datetime.now().isoformat(),
"total_mappings": len(reverse_mapping),
"usage": "Use access_moppy.utilities.load_cmip6_to_cmip7_mapping() to load this file",
"note": "Some CMIP6 names may map to multiple CMIP7 names (stored as arrays)"
}
}

# Combine metadata with mappings
forward_data = {**metadata, **forward_mapping}
reverse_data = {**reverse_metadata, **reverse_mapping}

forward_output_path.parent.mkdir(parents=True, exist_ok=True)
with open(forward_output_path, 'w', encoding='utf-8') as f:
json.dump(forward_data, f, indent=2, sort_keys=True)

# Save reverse mapping
if reverse_output_path is None:
import access_moppy
package_path = Path(access_moppy.__file__).parent
mappings_path = package_path / "mappings"
reverse_output_path = mappings_path / "cmip6_to_cmip7_compound_name_mapping.json"
else:
reverse_output_path = Path(reverse_output_path)

reverse_output_path.parent.mkdir(parents=True, exist_ok=True)
with open(reverse_output_path, 'w', encoding='utf-8') as f:
json.dump(reverse_data, f, indent=2, sort_keys=True)

print(f"βœ“ Generated forward mapping for {len(forward_mapping)} variables")
print(f"βœ“ Saved forward mapping to: {forward_output_path}")
print(f"βœ“ Generated reverse mapping for {len(reverse_mapping)} variables")
print(f"βœ“ Saved reverse mapping to: {reverse_output_path}")

return forward_mapping, reverse_mapping
2 changes: 1 addition & 1 deletion src/access_moppy/vocabularies/CMIP7_CVs
Submodule CMIP7_CVs updated 473 files
Loading
Loading