Source code for improver.standardise
# (C) Crown Copyright, Met Office. All rights reserved.
#
# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
# See LICENSE in the root of the repository for full licensing details.
"""Plugin to standardise metadata"""
import warnings
from typing import Any, Dict, List, Optional
import numpy as np
from iris.coords import CellMethod
from iris.cube import Cube
from iris.exceptions import AncillaryVariableNotFoundError, CoordinateNotFoundError
from numpy import dtype, ndarray
from improver import BasePlugin
from improver.metadata.amend import amend_attributes
from improver.metadata.check_datatypes import (
_is_time_coord,
check_units,
get_required_dtype,
get_required_units,
)
from improver.metadata.constants.time_types import TIME_COORDS
from improver.utilities.common_input_handle import as_cube
from improver.utilities.round import round_close
[docs]
class StandardiseMetadata(BasePlugin):
"""Plugin to standardise cube metadata"""
[docs]
def __init__(
self,
new_name: Optional[str] = None,
new_units: Optional[str] = None,
coords_to_remove: Optional[List[str]] = None,
coord_modification: Optional[Dict[str, float]] = None,
coord_attribute_modification: Optional[Dict[str, Dict[str, Any]]] = None,
attributes_dict: Optional[Dict[str, Any]] = None,
ancillary_variables_to_remove: Optional[List[str]] = None,
):
"""
Instantiate our class for standardising cube metadata.
Args:
new_name:
Optional rename for output cube
new_units:
Optional unit conversion for output cube
coords_to_remove:
Optional list of scalar coordinates to remove from output cube
coord_modification:
Optional dictionary used to directly modify the values of
scalar coordinates. To be used with extreme caution.
For example this dictionary might take the form:
{"height": 1.5} to set the height coordinate to have a value
of 1.5m (assuming original units of m).
This can be used to align e.g. temperatures defined at slightly
different heights where this difference is considered small
enough to ignore. Type is inferred, so providing a value of 2
will result in an integer type, whilst a value of 2.0 will
result in a float type.
coord_attribute_modification:
Optional dictionary used to modify attributes of scalar
coordinates. Takes the form:
{"coord_name": {"attr_key": "attr_value", ...}, ...}
For example: {"height": {"positive": "up"}} to set the
positive attribute on the height coordinate. Multiple
attributes can be set on a single coordinate, and multiple
coordinates can be modified. If the coordinate does not exist,
the modification is silently skipped.
attributes_dict:
Optional dictionary of required attribute updates. Keys are
attribute names, and values are the required changes.
See improver.metadata.amend.amend_attributes for details.
ancillary_variables_to_remove:
Optional list of ancillary variable names to remove from the
output cube.
"""
self._new_name = new_name
self._new_units = new_units
self._coords_to_remove = coords_to_remove
self._coord_modification = coord_modification
self._coord_attribute_modification = coord_attribute_modification
self._attributes_dict = attributes_dict
self._ancillary_variables_to_remove = ancillary_variables_to_remove
[docs]
@staticmethod
def _collapse_scalar_dimensions(cube: Cube) -> Cube:
"""
Demote any scalar dimensions (excluding "realization") on the input
cube to auxiliary coordinates.
Args:
cube: The cube
Returns:
The collapsed cube
"""
coords_to_collapse = []
for coord in cube.coords(dim_coords=True):
if len(coord.points) == 1 and "realization" not in coord.name():
coords_to_collapse.append(coord)
for coord in coords_to_collapse:
cube = next(cube.slices_over(coord))
return cube
[docs]
@staticmethod
def _remove_scalar_coords(cube: Cube, coords_to_remove: List[str]) -> None:
"""Removes named coordinates from the input cube."""
for coord in coords_to_remove:
try:
cube.remove_coord(coord)
except CoordinateNotFoundError:
continue
[docs]
@staticmethod
def _remove_ancillary_variables(
cube: Cube, ancillary_variables_to_remove: List[str]
) -> None:
"""Removes named ancillary variables from the input cube."""
for var in ancillary_variables_to_remove:
try:
cube.remove_ancillary_variable(var)
except AncillaryVariableNotFoundError:
warnings.warn(
f"Ancillary variable '{var}' not found in cube '{cube.name()}'.",
UserWarning,
)
continue
[docs]
@staticmethod
def _modify_scalar_coord_value(
cube: Cube, coord_modification: Dict[str, float]
) -> None:
"""Modifies the value of each specified scalar coord (dictionary key)
to the provided value (dictionary value). Note that data types are not
enforced here as the subsequent enforcement step will fulfil this
requirement. Units are assumed to be the same as the original
coordinate value. Modifying multi-valued coordinates or time
coordinates is specifically prevented as there is greater scope to
harm data integrity (i.e. the description of the data and the data
becoming misaligned).
If the coordinate does not exist the modification request is silently
skipped.
Args:
cube:
Cube to be updated in place
coord_modification:
Dictionary defining the coordinates (keys) to be modified
and the values (values) to which they should be set.
Raises:
ValueError:
If attempting to modify a dimension coordinate, a
multi-valued coordinate, or a time coordinate.
"""
for coord, value in coord_modification.items():
if cube.coords(coord):
if cube.coords(coord, dim_coords=True):
raise ValueError(
"Modifying dimension coordinate values is not allowed "
"due to the risk of introducing errors."
)
if hasattr(value, "__len__") and len(value) > 1:
raise ValueError(
"Modifying multi-valued coordinates is not allowed. "
"This functionality should be used only for very "
"modest changes to scalar coordinates."
)
if _is_time_coord(cube.coord(coord)):
raise ValueError("Modifying time coordinates is not allowed.")
cube.coord(coord).points = np.array([value])
[docs]
@staticmethod
def _modify_scalar_coord_attributes(
cube: Cube, coord_attribute_modification: Dict[str, Dict[str, Any]]
) -> None:
"""Modifies the attributes of each specified scalar coordinate.
Modifying attributes of dimension coordinates or time coordinates is
specifically prevented as there is greater scope to harm data
integrity.
If the coordinate does not exist the modification request is silently
skipped.
Args:
cube:
Cube to be updated in place
coord_attribute_modification:
Dictionary defining the coordinates (keys) and attributes
(nested dict values) to be modified. For example:
{"height": {"positive": "up"}}
Raises:
ValueError:
If attempting to modify attributes on a dimension
coordinate, a time coordinate, or if prohibited
coordinate attributes are provided.
"""
for coord_name, attrs_dict in coord_attribute_modification.items():
if cube.coords(coord_name):
if cube.coords(coord_name, dim_coords=True):
raise ValueError(
"Modifying attributes of dimension coordinate "
f"'{coord_name}' is not allowed due to the risk of "
"introducing errors."
)
if _is_time_coord(cube.coord(coord_name)):
raise ValueError(
"Modifying attributes of time coordinate "
f"'{coord_name}' is not allowed."
)
cube.coord(coord_name).attributes.update(attrs_dict)
[docs]
@staticmethod
def _standardise_dtypes_and_units(cube: Cube) -> None:
"""
Modify input cube in place to conform to mandatory dtype and unit
standards.
Args:
cube:
Cube to be updated in place
"""
def as_correct_dtype(obj: ndarray, required_dtype: dtype) -> ndarray:
"""
Returns an object updated if necessary to the required dtype
Args:
obj:
The object to be updated
required_dtype:
The dtype required
Returns:
The updated object
"""
if obj.dtype != required_dtype:
return obj.astype(required_dtype)
return obj
cube.data = as_correct_dtype(cube.data, get_required_dtype(cube))
for coord in cube.coords():
if coord.name() in TIME_COORDS and not check_units(coord):
coord.convert_units(get_required_units(coord))
req_dtype = get_required_dtype(coord)
# ensure points and bounds have the same dtype
if np.issubdtype(req_dtype, np.integer):
coord.points = round_close(coord.points)
coord.points = as_correct_dtype(coord.points, req_dtype)
if coord.has_bounds():
if np.issubdtype(req_dtype, np.integer):
coord.bounds = round_close(coord.bounds)
coord.bounds = as_correct_dtype(coord.bounds, req_dtype)
[docs]
@staticmethod
def _discard_redundant_cell_methods(cube: Cube) -> None:
"""
Removes cell method "point": "time" from cube if present.
"""
if not cube.cell_methods:
return
removable_cms = [CellMethod(method="point", coords="time")]
updated_cms = []
for cm in cube.cell_methods:
if cm in removable_cms:
continue
updated_cms.append(cm)
cube.cell_methods = updated_cms
[docs]
@staticmethod
def _remove_long_name_if_standard_name(cube: Cube) -> None:
"""
Remove the long_name attribute from cubes if the cube also has a standard_name defined
"""
if cube.standard_name and cube.long_name:
cube.long_name = None
[docs]
def process(self, cube: Cube) -> Cube:
"""
Perform compulsory and user-configurable metadata adjustments.
The compulsory adjustments are:
- to collapse any scalar dimensions apart from realization (which is expected
always to be a dimension);
- to cast the cube data and coordinates into suitable datatypes;
- to convert time-related metadata into the required units
- to remove cell method ("point": "time").
Args:
cube:
Input cube to be standardised
Returns:
The processed cube
"""
cube = as_cube(cube)
if self._coords_to_remove:
self._remove_scalar_coords(cube, self._coords_to_remove)
if self._ancillary_variables_to_remove:
self._remove_ancillary_variables(cube, self._ancillary_variables_to_remove)
cube = self._collapse_scalar_dimensions(cube)
if self._new_name:
cube.rename(self._new_name)
if self._new_units:
cube.convert_units(self._new_units)
if self._coord_modification:
self._modify_scalar_coord_value(cube, self._coord_modification)
if self._coord_attribute_modification:
self._modify_scalar_coord_attributes(
cube, self._coord_attribute_modification
)
if self._attributes_dict:
amend_attributes(cube, self._attributes_dict)
self._discard_redundant_cell_methods(cube)
self._remove_long_name_if_standard_name(cube)
# this must be done after unit conversion as if the input is an integer
# field, unit conversion outputs the new data as float64
self._standardise_dtypes_and_units(cube)
return cube