import os
import pathlib
import numpy as np
import pandas as pd
from beartype.typing import Any, Dict
from DashAI.back.core.schema_fields import bool_field, int_field, schema_field
from DashAI.back.core.utils import MultilingualString
from DashAI.back.dataloaders.classes.dashai_dataset import ( # ClassLabel, Value,
DashAIDataset,
)
from DashAI.back.dependencies.database.models import Explorer, Notebook
from DashAI.back.exploration.base_explorer import BaseExplorerSchema
from DashAI.back.exploration.preview_inspection_explorer import (
PreviewInspectionExplorer,
)
class RowExplorerSchema(BaseExplorerSchema):
row_ammount: schema_field(
t=int_field(gt=0),
placeholder=50,
description=MultilingualString(
en="Maximum number of rows to take.",
es="Número máximo de filas a tomar.",
),
alias=MultilingualString(en="Number of rows", es="Número de filas"),
) # type: ignore
shuffle: schema_field(
t=bool_field(),
placeholder=False,
description=MultilingualString(
en="Shuffle the rows when exploring.",
es="Barajar las filas durante la exploración.",
),
alias=MultilingualString(en="Shuffle rows", es="Barajar filas"),
) # type: ignore
from_top: schema_field(
t=bool_field(),
placeholder=True,
description=MultilingualString(
en=(
"Take rows from the head of the dataset. Otherwise, take from the tail."
),
es=(
"Tomar filas desde el inicio del dataset. En caso contrario, "
"tomarlas desde el final."
),
),
alias=MultilingualString(en="From top", es="Desde el inicio"),
) # type: ignore
[docs]
class RowExplorer(PreviewInspectionExplorer):
"""
RowExplorer is an explorer that takes a number of rows from the dataset to
display them on tabular format. It can take the rows from the top or the
bottom of the dataset and shuffle them if needed.
"""
DISPLAY_NAME = MultilingualString(en="Show Rows", es="Mostrar Filas")
DESCRIPTION = MultilingualString(
en=(
"Displays a subset of rows from the dataset in tabular form. You can "
"take rows from the top or bottom and optionally shuffle them."
),
es=(
"Muestra un subconjunto de filas del dataset en formato tabular. "
"Puede tomar filas desde el inicio o el final y opcionalmente "
"barajarlas."
),
)
SHORT_DESCRIPTION = MultilingualString(
en="Display a sample of rows from the dataset.",
es="Muestra una muestra de filas del dataset.",
)
IMAGE_PREVIEW = "row_explorer.png"
SCHEMA = RowExplorerSchema
metadata: Dict[str, Any] = {
"allowed_dtypes": ["*"],
"restricted_dtypes": [],
"input_cardinality": {"min": 1},
}
[docs]
def __init__(self, **kwargs) -> None:
self.row_ammount = kwargs.get("row_ammount", 50)
self.shuffle = kwargs.get("shuffle", True)
self.from_top = kwargs.get("from_top", True)
super().__init__(**kwargs)
def launch_exploration(self, dataset: DashAIDataset, __explorer_info__: Explorer):
_df = dataset.to_pandas()
# Shuffle rows
if self.shuffle:
_df = _df.sample(frac=1)
# Take rows
if self.from_top:
_df = _df.head(self.row_ammount)
else:
_df = _df.tail(self.row_ammount)
return _df
def save_notebook(
self,
__notebook_info__: Notebook,
explorer_info: Explorer,
save_path: pathlib.Path,
result: pd.DataFrame,
) -> str:
filename = f"{explorer_info.id}.json"
path = pathlib.Path(os.path.join(save_path, filename))
result.to_json(path)
return path.as_posix()
def get_results(
self, exploration_path: str, options: Dict[str, Any]
) -> Dict[str, Any]:
resultType = "tabular"
orientation = options.get("orientation", "dict")
config = {"orient": orientation}
path = pathlib.Path(exploration_path)
result = pd.read_json(path).replace({np.nan: None}).to_dict(orient=orientation)
return {"type": resultType, "data": result, "config": config}