Source code for improver.cli.quantile_mapping

#!/usr/bin/env python
# (C) Crown Copyright, Met Office. All rights reserved.
#
# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
# See LICENSE in the root of the repository for full licensing details.
"""CLI to apply quantile mapping"""

from improver import cli



[docs]
@cli.clizefy
@cli.with_output
def process(
    *cubes: cli.inputcube,
    reference_attribute: str,
    preservation_threshold: float = None,
    method: str = "step",
):
    """Adjust forecast values to match the statistical distribution of reference
    data.

    Unlike grid-point methods that match values at each location, this approach uses
    all data across the spatial domain to build the statistical distributions. This is
    particularly useful when forecasts have been smoothed and you want to restore
    realistic variation in the values while preserving the spatial patterns.

    Args:
        cubes:
            A list of cubes containing the forecasts and reference data to be
            used for calibration. They must have the same cube name and will be
            separated based on the reference attribute. The reference cube defines
            what the "correct" distribution should look like. The forecast cube
            contains the data that you want to correct (e.g. smoothed model output).
        reference_attribute:
            An attribute and its value in the format of "attribute=value",
            which must be present on cubes to identify them as reference data.
            The remaining cubes will be treated as forecast data.
        preservation_threshold:
            Optional threshold value below which (exclusive) the forecast values
            are not adjusted. Useful for variables like precipitation where you
            may want to preserve small/zero values.
        method:
            Choose from two methods of converting forecast values into quantiles
            before mapping them onto the reference distribution: 'step' and
            'continuous'. These methods differ in three ways:
            1. How quantiles are assigned to ranked data ('plotting positions').
            - 'step' uses rank/number of points (i/n), which corresponds to the
            formal ECDF definition and treats the largest value as the 1.0
            quantile (100th percentile).
            - 'continuous' uses midpoint plotting positions ((i-0.5)/n), which
            place values in the centre of their rank intervals and avoids
            probabilities of exactly 0 or 1.
            2. How probabilities are mapped back to values.
            - 'step' uses flooring, so each probability maps to the nearest
            lower observed value in the reference distribution, creating the
            step-function mapping.
            - 'continuous' uses interpolation, creating a smoother mapping where
            small changes in probability lead to small changes in value.
            3. How repeated values are treated.
            - 'step' assigns the same quantile to repeated values, so they all
            map to the same value in the reference distribution (creating flat
            steps in the mapping).
            - 'continuous' assigns different quantiles to repeated values,
            spreading them evenly across their range, so they can map to
            different values in the reference distribution.

    Returns:
        Calibrated forecast cube with quantiles mapped to the reference
        distribution.

    Raises:
        ValueError:
            If reference and forecast cubes have incompatible units.
    """
    from improver.calibration import split_forecasts_and_truth
    from improver.calibration.quantile_mapping import QuantileMapping

    forecast_cube, reference_cube, _ = split_forecasts_and_truth(
        cubes, reference_attribute
    )
    plugin = QuantileMapping(
        preservation_threshold=preservation_threshold, method=method
    )
    return plugin.process(
        reference_cube,
        forecast_cube,
    )