Source code for improver.utilities.statistical

# (C) Crown Copyright, Met Office. All rights reserved.
#
# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
# See LICENSE in the root of the repository for full licensing details.
"""Module to contain methods for handling statistical operations."""

from typing import List, Optional, Union

import numpy as np
from iris.cube import Cube, CubeList

from improver import BasePlugin
from improver.metadata.utilities import create_new_diagnostic_cube



[docs]
class DistributionalParameters(BasePlugin):
    """
    Class for estimating distributional parameters given some statistics.
    """


[docs]
    def __init__(
        self,
        distribution: str = "norm",
        truncation_points: Optional[list[float]] = None,
    ):
        """
        Initialize class for estimating distributional parameters.

        Args:
            distribution:
                The distribution for which parameters are to be estimated. The default
                is a normal distribution.
            truncation_points:
                List containing the lower and upper truncation points for a truncated
                normal distribution.
        """
        self.distribution_dict = {
            "norm": self._normal_parameters,
            "truncnorm": self._truncated_normal_parameters,
            "gamma": self._gamma_parameters,
        }

        if distribution in self.distribution_dict.keys():
            self.distribution = distribution
        else:
            raise ValueError(
                f"Distribution '{distribution}' is not supported. "
                f"Supported distributions are: {list(self.distribution_dict.keys())}"
            )

        if self.distribution != "truncnorm" and truncation_points is not None:
            raise ValueError(
                "Truncation points should not be provided for non-truncated normal "
                f"distributions. The following distribution was chosen: {distribution}."
            )
        else:
            self.truncation_points = truncation_points



[docs]
    @staticmethod
    def _normal_parameters(
        mean: np.array, sd: np.array
    ) -> tuple[None, np.array, np.array]:
        """
        Estimate parameters for a normal distribution given mean and standard deviation
        cubes.

        Args:
            mean:
                Array of mean values.
            sd:
                Array of standard deviation values.

        Returns:
            Arrays containing location and scale parameters of a normal distribution.
        """
        shape = None
        loc = mean
        scale = sd
        return shape, loc, scale



[docs]
    def _truncated_normal_parameters(
        self, mean: np.array, sd: np.array
    ) -> tuple[List[np.array], np.array, np.array]:
        """
        Estimate parameters for a truncated normal distribution given mean and standard
        deviation cubes.

        Args:
            mean:
                Array of mean values.
            sd:
                Array of standard deviation values.

        Returns:
            Arrays containing location and scale parameters of a truncated normal
            distribution.

        Raises:
            ValueError:
                If truncation points are not provided or if the number of truncation
                points is not equal to two.
        """
        if self.truncation_points is None or len(self.truncation_points) != 2:
            raise ValueError(
                "Upper and lower truncation points must be provided for truncated "
                "normal distribution. The following truncation points were provided: "
                f"{self.truncation_points}."
            )

        shape = [
            np.full_like(mean, self.truncation_points[0]),
            np.full_like(mean, self.truncation_points[1]),
        ]
        loc = mean
        scale = sd
        return shape, loc, scale



[docs]
    @staticmethod
    def _gamma_parameters(
        mean: np.array, sd: np.array
    ) -> tuple[np.array, np.array, np.array]:
        """
        Estimate parameters for a gamma distribution given mean and standard deviation
        cubes. The estimation method is based on the method of moments, as described in
        Wilks (2019).

        Args:
            mean:
                Array of mean values.
            sd:
                Array of standard deviation values.

        Returns:
            Arrays containing shape and scale parameters of a gamma distribution.

        References:
            Wilks, D. S., 2019: Statistical Methods in the Atmospheric Sciences,
            Academic Press.
        """
        shape = (mean / sd) ** 2
        loc = np.full_like(mean, 0)
        scale = sd**2 / mean
        return shape, loc, scale



[docs]
    def process(
        self,
        mean_cube: Cube,
        sd_cube: Cube,
    ) -> tuple[Union[CubeList, Cube, None], Union[Cube, None], Union[Cube, None]]:
        """
        Estimate distributional parameters given mean and variance cubes.

        Args:
            mean_cube:
                Cube containing the mean values.
            sd_cube:
                Cube containing the standard deviation values.

        Returns:
            The shape, location and scale parameter cubes. The shape parameter(s) may
            be a cubelist if multiple shape parameters are returned. Any of the
            parameters may be None if not applicable for the chosen distribution.
        """
        shape, loc, scale = self.distribution_dict.get(self.distribution)(
            mean_cube.data, sd_cube.data
        )

        shape_parameter = location_parameter = scale_parameter = None

        if shape is not None:
            shape = shape if isinstance(shape, list) else [shape]
            shape_parameter = CubeList()
            for arr in shape:
                shape_parameter.append(
                    create_new_diagnostic_cube(
                        "shape_parameter",
                        mean_cube.units,
                        mean_cube,
                        mean_cube.attributes,
                        data=arr,
                    )
                )
            shape_parameter = (
                shape_parameter[0] if len(shape_parameter) == 1 else shape_parameter
            )

        if loc is not None:
            location_parameter = create_new_diagnostic_cube(
                "location_parameter",
                mean_cube.units,
                mean_cube,
                mean_cube.attributes,
                data=loc,
            )
        if scale is not None:
            scale_parameter = create_new_diagnostic_cube(
                "scale_parameter",
                mean_cube.units,
                mean_cube,
                mean_cube.attributes,
                data=scale,
            )

        return shape_parameter, location_parameter, scale_parameter