from typing import List, Union
from datasets import DatasetDict
from DashAI.back.core.utils import MultilingualString
from DashAI.back.dataloaders.classes.dashai_dataset import DashAIDataset
from DashAI.back.tasks.classification_task import ClassificationTask
from DashAI.back.types.categorical import Categorical
from DashAI.back.types.value_types import Text
[docs]
class TextClassificationTask(ClassificationTask):
"""Base class for Text Classification Task."""
metadata: dict = {
"inputs_types": [Text],
"outputs_types": [Categorical],
"inputs_cardinality": 1,
"outputs_cardinality": 1,
}
DESCRIPTION: str = MultilingualString(
en="""
Text classification is an essential Natural Language Processing (NLP) task that
involves automatically assigning pre-defined categories or labels to text documents
based on their content. It serves as the foundation for applications like sentiment
analysis, spam filtering, topic classification, and document categorization.
""",
es="""
La clasificación de texto es una tarea esencial del Procesamiento de Lenguaje
Natural (PLN) que implica asignar automáticamente categorías o etiquetas
predefinidas a documentos de texto según su contenido. Sirve como base para
aplicaciones como el análisis de sentimientos, el filtrado de spam,
la clasificación de temas y la categorización de documentos.
""",
)
DISPLAY_NAME: str = MultilingualString(
en="Text Classification", es="Clasificación de Texto"
)
def prepare_for_task(
self,
dataset: Union[DatasetDict, DashAIDataset],
input_columns: List[str],
output_columns: List[str],
) -> DashAIDataset:
"""Convert the dataset to DashAIDataset and check the columns types
A copy of the dataset is created.
Parameters
----------
dataset : Union[DatasetDict, DashAIDataset]
Dataset to be changed
Returns
-------
DashAIDataset
Dataset with the new types
"""
dashai_dataset = super().prepare_for_task(
dataset, input_columns, output_columns
)
return dashai_dataset