Source code for DashAI.back.metrics.base_metric

"""Base Metric abstract class."""

from typing import Any, Dict, Final, Tuple, Union

import numpy as np

from DashAI.back.dataloaders.classes.dashai_dataset import DashAIDataset



[docs]
class BaseMetric:
    """Abstract class of all metrics."""

    TYPE: Final[str] = "Metric"
    MAXIMIZE: Final[bool] = False
    metadata: Dict[str, Any] = {}

    @classmethod
    def get_metadata(cls) -> Dict[str, Any]:
        """
        Get metadata values for the current metric.

        Returns
        -------
        Dict[str, Any]
            Dictionary with the metadata
        """
        meta: Dict[str, Any] = dict(getattr(cls, "metadata", {}) or {})
        meta["maximize"] = cls.MAXIMIZE

        return meta



METRICS_MAP = {
    "classification": ["Accuracy", "F1", "Precision", "Recall"],
    "regression": ["RMSE", "MAE"],
    "translation": ["Bleu", "Ter"],
}


def validate_inputs(
    true: Union[np.ndarray, list], pred: Union[np.ndarray, list], metric_category: str
) -> None:
    """Validate inputs.

    Parameters
    ----------
    true: ndarray, list
        True labels.
    pred: nndarray, list
        Predicted labels by the model.
    metric_category: str
        The name of the category of metric to be used as base to validate the labels.

    """

    if len(true) != len(pred):
        if metric_category in ["classification", "translation"]:
            raise ValueError(
                "The length of the true labels and the predicted labels must be equal, "
                f"given: len(true_labels) = {len(true)} and "
                f"len(pred_labels) = {len(pred)}."
            )
        elif metric_category in ["regression"]:
            raise ValueError(
                "The length of the true and the predicted values must be equal, "
                f"given: len(true_values) = {len(true)} and "
                f"len(pred_values) = {len(pred)}."
            )


def prepare_to_metric(
    y: DashAIDataset,
    pre_pred: Union[np.ndarray, list],
    metric_type: str,
) -> Tuple[np.ndarray, Union[np.ndarray, list]]:
    """Prepare true and prediced labels to be used later in metrics.

    Parameters
    ----------
    y : DashAIDataset
        A DashAIDataset with the output columns of the data.
    probs_pred_labels : np.ndarray
        A two-dimensional matrix in which each column represents a class and the row
        values represent the probability that an example belongs to the class
        associated with the column.
    metric_type : str
        The name of the kind of metric to be used and how to prepare the labels.

    Returns
    -------
    Tuple[np.ndarray, np.ndarray]
        A tuple with the true and predicted labels in numpy format.
    """

    column_name = y.column_names[0]
    _pred = None
    _true = None

    for metric_category, metrics in METRICS_MAP.items():
        if metric_type in metrics:
            if metric_category == "classification":
                if not isinstance(pre_pred, np.ndarray):
                    raise TypeError(
                        f"Expected np.ndarray for regression, got {type(pre_pred)}"
                    )
                _true = np.array(y[column_name])
                validate_inputs(_true, pre_pred, metric_category)
                _pred = np.argmax(pre_pred, axis=1)

            elif metric_category == "regression":
                if not isinstance(pre_pred, np.ndarray):
                    raise TypeError(
                        f"Expected np.ndarray for regression, got {type(pre_pred)}"
                    )
                _true = np.array(y[column_name])
                validate_inputs(_true, pre_pred, metric_category)
                _pred = pre_pred

            elif metric_category == "translation":
                if not isinstance(pre_pred, list):
                    raise TypeError(
                        f"Expected list for translation, got {type(pre_pred)}"
                    )
                _true = np.array(y[column_name])
                validate_inputs(_true, pre_pred, metric_category)
                _pred = pre_pred
            else:
                raise ValueError(f"Unsupported metric type: {metric_type}")

    return _true, _pred