# (C) Crown Copyright, Met Office. All rights reserved.
#
# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
# See LICENSE in the root of the repository for full licensing details.
"""Convert NetCDF files to realizations."""
from improver import cli
[docs]
@cli.clizefy
@cli.with_output
def process(
cube: cli.inputcube,
raw_cube: cli.inputcube = None,
*,
realizations_count: int = None,
sampling: str = "quantile",
random_seed: int = None,
tie_break: str = "random",
ensure_evenly_spaced_realizations: bool = True,
ignore_ecc_bounds_exceedance: bool = False,
skip_ecc_bounds: bool = False,
distribution: str = "gamma",
nan_mask_value: float = 0.0,
scale_percentiles_to_probability_lower_bound: bool = False,
):
"""Converts an incoming cube into one containing realizations.
Args:
cube (iris.cube.Cube):
A cube to be processed. Must contain either:
* A realization dimension (returned unchanged), or
* A percentile coordinate (resampled and optionally reordered), or
* Probabilities (converted to percentiles, then resampled and
optionally reordered).
raw_cube (iris.cube.Cube):
Cube of raw (not post processed) weather data.
If this argument is given ensemble realizations will be created
from percentiles by reshuffling them in correspondence to the rank
order of the raw ensemble. Otherwise, the percentiles are rebadged
as realizations. If the sampling option is set to "transformation",
this cube will be used to fit a distribution to the data at each grid
point, and the percentiles will be generated from this distribution.
realizations_count (int):
The number of ensemble realizations in the output.
sampling (str):
Type of sampling of the distribution to produce a set of
percentiles e.g. quantile or random.
Accepted options for sampling are:
* Quantile: A regular set of equally-spaced percentiles aimed
at dividing a Cumulative Distribution Function into
blocks of equal probability. This is the default option.
* Random: A random set of ordered percentiles.
* Transformation: Percentiles are generated by fitting a
distribution to the data. This generates a different set of
percentiles at each grid point, based on the local
distribution of probabilities. Follows Schefzik et al., 2013.
random_seed (int):
Option to specify a value for the random seed when reordering percentiles.
This value is for testing purposes only, to ensure reproduceable outputs.
It should not be used in real time operations as it may introduce a bias
into the reordered forecasts.
tie_break (str):
Option to specify the tie breaking method when reordering percentiles.
The available methods are "random" (default), to tie-break randomly, and
"realization", to tie-break by assigning values to the highest numbered
realizations first.
ensure_evenly_spaced_realizations (bool):
If True, the plugin will ensure that the output realizations are evenly
spaced in percentile space are centered on the 50th percentile, and
partition the space. If False, no check is performed. True is the default.
ignore_ecc_bounds_exceedance (bool):
If True where percentiles (calculated as an intermediate output
before realization) exceed the ECC bounds range, raises a
warning rather than an exception. False is the default.
skip_ecc_bounds (bool):
If True, ECC bounds are not included either when percentiles are resampled
as an intermediate step prior to creating realizations or when probabilities
are converted to percentiles as an intermediate step prior to creating
realizations. This has the effect that percentiles outside of the range
given by the input percentiles will be computed by nearest neighbour
interpolation from the nearest available percentile, rather than using
linear interpolation between the nearest available percentile and
the ECC bound. False is the default.
distribution (str):
Valid if the "transformation" option is selected for sampling
the probability distribution. Type of distribution to fit
(currently only 'gamma' is supported).
nan_mask_value (float):
Valid if the "transformation" option is selected for
sampling the probability distribution. Value to mask as NaN before
calculating mean and std. This option might be most useful for a
diagnostic, such as precipitation rate, where there is a high
frequency of zero values. If None, no masking is performed.
Default is 0.0.
scale_percentiles_to_probability_lower_bound (bool):
Valid if the "transformation" option is selected for sampling the
probability distribution. If True, the minimum value of the calculated
percentiles will be set to the minimum CDF probability implied by the
input probabilities, rather than zero. This has the effect of restricting
the percentiles to the non-zero part of the distribution, which is
useful when there is a high probability of zero values (e.g., for
precipitation). When False, percentiles are calculated over the
full [0, 1] range, regardless of the input probabilities. Default is False.
Returns:
iris.cube.Cube:
The processed cube.
Raises:
ValueError: If the input cube does not contain a percentile coordinate or
probability coordinates that can then be converted to a realization
coordinate.
ValueError: If neither realizations_count nor raw_cube is provided.
"""
from improver.ensemble_copula_coupling.ensemble_copula_coupling import (
ConvertProbabilitiesToPercentiles,
EnsembleReordering,
RebadgePercentilesAsRealizations,
ResamplePercentiles,
)
from improver.metadata.probabilistic import is_probability
if cube.coords("realization"):
return cube
if not cube.coords("percentile") and not is_probability(cube):
raise ValueError("Unable to convert to realizations:\n" + str(cube))
if realizations_count is None:
try:
realizations_count = len(raw_cube.coord("realization").points)
except AttributeError:
# raised if raw_cube is None, hence has no attribute "coord"
msg = "Either realizations_count or raw_cube must be provided"
raise ValueError(msg)
if cube.coords("percentile"):
percentiles = ResamplePercentiles(
ecc_bounds_warning=ignore_ecc_bounds_exceedance,
skip_ecc_bounds=skip_ecc_bounds,
)(cube, no_of_percentiles=realizations_count)
else:
percentiles = ConvertProbabilitiesToPercentiles(
ecc_bounds_warning=ignore_ecc_bounds_exceedance,
skip_ecc_bounds=skip_ecc_bounds,
distribution=distribution,
nan_mask_value=nan_mask_value,
scale_percentiles_to_probability_lower_bound=scale_percentiles_to_probability_lower_bound,
)(
cube,
no_of_percentiles=realizations_count,
sampling=sampling,
intensity_cube=raw_cube,
)
if raw_cube:
result = EnsembleReordering(
random_seed=random_seed,
tie_break=tie_break,
ensure_evenly_spaced_realizations=ensure_evenly_spaced_realizations,
)(percentiles, raw_cube)
else:
result = RebadgePercentilesAsRealizations()(percentiles)
return result