Source code for improver.standardise

# (C) Crown Copyright, Met Office. All rights reserved.
#
# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
# See LICENSE in the root of the repository for full licensing details.
"""Plugin to standardise metadata"""

import warnings
from typing import Any, Dict, List, Optional

import numpy as np
from iris.coords import CellMethod
from iris.cube import Cube
from iris.exceptions import AncillaryVariableNotFoundError, CoordinateNotFoundError
from numpy import dtype, ndarray

from improver import BasePlugin
from improver.metadata.amend import amend_attributes
from improver.metadata.check_datatypes import (
    _is_time_coord,
    check_units,
    get_required_dtype,
    get_required_units,
)
from improver.metadata.constants.time_types import TIME_COORDS
from improver.utilities.common_input_handle import as_cube
from improver.utilities.round import round_close


[docs] class StandardiseMetadata(BasePlugin): """Plugin to standardise cube metadata"""
[docs] def __init__( self, new_name: Optional[str] = None, new_units: Optional[str] = None, coords_to_remove: Optional[List[str]] = None, coord_modification: Optional[Dict[str, float]] = None, coord_attribute_modification: Optional[Dict[str, Dict[str, Any]]] = None, attributes_dict: Optional[Dict[str, Any]] = None, ancillary_variables_to_remove: Optional[List[str]] = None, ): """ Instantiate our class for standardising cube metadata. Args: new_name: Optional rename for output cube new_units: Optional unit conversion for output cube coords_to_remove: Optional list of scalar coordinates to remove from output cube coord_modification: Optional dictionary used to directly modify the values of scalar coordinates. To be used with extreme caution. For example this dictionary might take the form: {"height": 1.5} to set the height coordinate to have a value of 1.5m (assuming original units of m). This can be used to align e.g. temperatures defined at slightly different heights where this difference is considered small enough to ignore. Type is inferred, so providing a value of 2 will result in an integer type, whilst a value of 2.0 will result in a float type. coord_attribute_modification: Optional dictionary used to modify attributes of scalar coordinates. Takes the form: {"coord_name": {"attr_key": "attr_value", ...}, ...} For example: {"height": {"positive": "up"}} to set the positive attribute on the height coordinate. Multiple attributes can be set on a single coordinate, and multiple coordinates can be modified. If the coordinate does not exist, the modification is silently skipped. attributes_dict: Optional dictionary of required attribute updates. Keys are attribute names, and values are the required changes. See improver.metadata.amend.amend_attributes for details. ancillary_variables_to_remove: Optional list of ancillary variable names to remove from the output cube. """ self._new_name = new_name self._new_units = new_units self._coords_to_remove = coords_to_remove self._coord_modification = coord_modification self._coord_attribute_modification = coord_attribute_modification self._attributes_dict = attributes_dict self._ancillary_variables_to_remove = ancillary_variables_to_remove
[docs] @staticmethod def _collapse_scalar_dimensions(cube: Cube) -> Cube: """ Demote any scalar dimensions (excluding "realization") on the input cube to auxiliary coordinates. Args: cube: The cube Returns: The collapsed cube """ coords_to_collapse = [] for coord in cube.coords(dim_coords=True): if len(coord.points) == 1 and "realization" not in coord.name(): coords_to_collapse.append(coord) for coord in coords_to_collapse: cube = next(cube.slices_over(coord)) return cube
[docs] @staticmethod def _remove_scalar_coords(cube: Cube, coords_to_remove: List[str]) -> None: """Removes named coordinates from the input cube.""" for coord in coords_to_remove: try: cube.remove_coord(coord) except CoordinateNotFoundError: continue
[docs] @staticmethod def _remove_ancillary_variables( cube: Cube, ancillary_variables_to_remove: List[str] ) -> None: """Removes named ancillary variables from the input cube.""" for var in ancillary_variables_to_remove: try: cube.remove_ancillary_variable(var) except AncillaryVariableNotFoundError: warnings.warn( f"Ancillary variable '{var}' not found in cube '{cube.name()}'.", UserWarning, ) continue
[docs] @staticmethod def _modify_scalar_coord_value( cube: Cube, coord_modification: Dict[str, float] ) -> None: """Modifies the value of each specified scalar coord (dictionary key) to the provided value (dictionary value). Note that data types are not enforced here as the subsequent enforcement step will fulfil this requirement. Units are assumed to be the same as the original coordinate value. Modifying multi-valued coordinates or time coordinates is specifically prevented as there is greater scope to harm data integrity (i.e. the description of the data and the data becoming misaligned). If the coordinate does not exist the modification request is silently skipped. Args: cube: Cube to be updated in place coord_modification: Dictionary defining the coordinates (keys) to be modified and the values (values) to which they should be set. Raises: ValueError: If attempting to modify a dimension coordinate, a multi-valued coordinate, or a time coordinate. """ for coord, value in coord_modification.items(): if cube.coords(coord): if cube.coords(coord, dim_coords=True): raise ValueError( "Modifying dimension coordinate values is not allowed " "due to the risk of introducing errors." ) if hasattr(value, "__len__") and len(value) > 1: raise ValueError( "Modifying multi-valued coordinates is not allowed. " "This functionality should be used only for very " "modest changes to scalar coordinates." ) if _is_time_coord(cube.coord(coord)): raise ValueError("Modifying time coordinates is not allowed.") cube.coord(coord).points = np.array([value])
[docs] @staticmethod def _modify_scalar_coord_attributes( cube: Cube, coord_attribute_modification: Dict[str, Dict[str, Any]] ) -> None: """Modifies the attributes of each specified scalar coordinate. Modifying attributes of dimension coordinates or time coordinates is specifically prevented as there is greater scope to harm data integrity. If the coordinate does not exist the modification request is silently skipped. Args: cube: Cube to be updated in place coord_attribute_modification: Dictionary defining the coordinates (keys) and attributes (nested dict values) to be modified. For example: {"height": {"positive": "up"}} Raises: ValueError: If attempting to modify attributes on a dimension coordinate, a time coordinate, or if prohibited coordinate attributes are provided. """ for coord_name, attrs_dict in coord_attribute_modification.items(): if cube.coords(coord_name): if cube.coords(coord_name, dim_coords=True): raise ValueError( "Modifying attributes of dimension coordinate " f"'{coord_name}' is not allowed due to the risk of " "introducing errors." ) if _is_time_coord(cube.coord(coord_name)): raise ValueError( "Modifying attributes of time coordinate " f"'{coord_name}' is not allowed." ) cube.coord(coord_name).attributes.update(attrs_dict)
[docs] @staticmethod def _standardise_dtypes_and_units(cube: Cube) -> None: """ Modify input cube in place to conform to mandatory dtype and unit standards. Args: cube: Cube to be updated in place """ def as_correct_dtype(obj: ndarray, required_dtype: dtype) -> ndarray: """ Returns an object updated if necessary to the required dtype Args: obj: The object to be updated required_dtype: The dtype required Returns: The updated object """ if obj.dtype != required_dtype: return obj.astype(required_dtype) return obj cube.data = as_correct_dtype(cube.data, get_required_dtype(cube)) for coord in cube.coords(): if coord.name() in TIME_COORDS and not check_units(coord): coord.convert_units(get_required_units(coord)) req_dtype = get_required_dtype(coord) # ensure points and bounds have the same dtype if np.issubdtype(req_dtype, np.integer): coord.points = round_close(coord.points) coord.points = as_correct_dtype(coord.points, req_dtype) if coord.has_bounds(): if np.issubdtype(req_dtype, np.integer): coord.bounds = round_close(coord.bounds) coord.bounds = as_correct_dtype(coord.bounds, req_dtype)
[docs] @staticmethod def _discard_redundant_cell_methods(cube: Cube) -> None: """ Removes cell method "point": "time" from cube if present. """ if not cube.cell_methods: return removable_cms = [CellMethod(method="point", coords="time")] updated_cms = [] for cm in cube.cell_methods: if cm in removable_cms: continue updated_cms.append(cm) cube.cell_methods = updated_cms
[docs] @staticmethod def _remove_long_name_if_standard_name(cube: Cube) -> None: """ Remove the long_name attribute from cubes if the cube also has a standard_name defined """ if cube.standard_name and cube.long_name: cube.long_name = None
[docs] def process(self, cube: Cube) -> Cube: """ Perform compulsory and user-configurable metadata adjustments. The compulsory adjustments are: - to collapse any scalar dimensions apart from realization (which is expected always to be a dimension); - to cast the cube data and coordinates into suitable datatypes; - to convert time-related metadata into the required units - to remove cell method ("point": "time"). Args: cube: Input cube to be standardised Returns: The processed cube """ cube = as_cube(cube) if self._coords_to_remove: self._remove_scalar_coords(cube, self._coords_to_remove) if self._ancillary_variables_to_remove: self._remove_ancillary_variables(cube, self._ancillary_variables_to_remove) cube = self._collapse_scalar_dimensions(cube) if self._new_name: cube.rename(self._new_name) if self._new_units: cube.convert_units(self._new_units) if self._coord_modification: self._modify_scalar_coord_value(cube, self._coord_modification) if self._coord_attribute_modification: self._modify_scalar_coord_attributes( cube, self._coord_attribute_modification ) if self._attributes_dict: amend_attributes(cube, self._attributes_dict) self._discard_redundant_cell_methods(cube) self._remove_long_name_if_standard_name(cube) # this must be done after unit conversion as if the input is an integer # field, unit conversion outputs the new data as float64 self._standardise_dtypes_and_units(cube) return cube