Source code for DashAI.back.converters.base_converter

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Any, Dict, Final, Type, Union

from DashAI.back.config_object import ConfigObject
from DashAI.back.core.schema_fields.base_schema import BaseSchema
from DashAI.back.dataloaders.classes.dashai_dataset import DashAIDataset
from DashAI.back.types.dashai_data_type import DashAIDataType


class BaseConverterSchema(BaseSchema):
    """
    Base schema for converters, it defines the parameters to be used in each converter.

    The schema should be assigned to the converter class to define the parameters of
    its configuration.
    """


[docs] class BaseConverter(ConfigObject, ABC): """ Base class for all converters Converters are for modifying the data in a supervised or unsupervised way (e.g. by adding, changing, or removing columns, but not by adding or removing rows) """ TYPE: Final[str] = "Converter" DISPLAY_NAME: Final[str] = "" DESCRIPTION: Final[str] = "" SHORT_DESCRIPTION: Final[str] = "" IMAGE_PREVIEW: Final[str] = "" CATEGORY: Final[str] = "Other" COLOR: Final[str] = "rgb(255, 255, 255)" SUPERVISED: bool = False SCHEMA: BaseConverterSchema @classmethod def get_metadata(cls) -> Dict[str, Any]: """ Get metadata values for the current converter. Returns ------- Dict[str, Any] Dictionary with the metadata """ meta: Dict[str, Any] = dict(getattr(cls, "metadata", {}) or {}) meta["display_name"] = cls.DISPLAY_NAME if cls.DISPLAY_NAME else cls.__name__ meta["short_description"] = ( cls.SHORT_DESCRIPTION if cls.SHORT_DESCRIPTION else "" ) meta["image_preview"] = cls.IMAGE_PREVIEW if cls.IMAGE_PREVIEW else "" meta["category"] = cls.CATEGORY if cls.CATEGORY else "Other" meta["color"] = cls.COLOR if cls.COLOR else "rgb(255, 255, 255)" meta["supervised"] = cls.SUPERVISED return meta def changes_row_count(self) -> bool: """ Indicates if the converter changes the number of rows in the dataset. Samplers typically do, while most other transformers do not. """ return False @abstractmethod def get_output_type(self, column_name: str = None) -> DashAIDataType: """ Get the output type for a specific column after transformation. This method must be implemented by each converter to specify what type of data it produces. The converter should determine the output type based on its transformation logic. Parameters ---------- column_name : str, optional The name of the column to get the output type for. Useful for converters that may produce different types per column. Returns ------- DashAIDataType The output type after transformation for the specified column. """ raise NotImplementedError @abstractmethod def fit( self, x: DashAIDataset, y: Union[DashAIDataset, None] = None ) -> Type[BaseConverter]: """Fit the converter. This method should allow to validate the converter's parameters. Parameters ---------- X : DashAIDataset Training data y: DashAIDataset Target data for supervised learning Returns ---------- self The fitted converter object. """ raise NotImplementedError @abstractmethod def transform( self, x: DashAIDataset, y: Union[DashAIDataset, None] = None ) -> DashAIDataset: """Transform the dataset. Parameters ---------- X : DashAIDataset Dataset to be converted y: DashAIDataset Target vectors Returns ------- Dataset converted """ raise NotImplementedError