Source code for DashAI.back.models.scikit_learn.hist_gradient_boosting_classifier

from sklearn.ensemble import (
    HistGradientBoostingClassifier as _HistGradientBoostingClassifier,
)

from DashAI.back.core.schema_fields import (
    BaseSchema,
    optimizer_float_field,
    optimizer_int_field,
    schema_field,
)
from DashAI.back.models.scikit_learn.sklearn_like_classifier import (
    SklearnLikeClassifier,
)
from DashAI.back.models.tabular_classification_model import TabularClassificationModel


class HistGradientBoostingClassifierSchema(BaseSchema):
    """A gradient boosting classifier is a machine learning algorithm that combines
    multiple weak prediction models (typically decision trees) to create a strong
    predictive model by training the models sequentially, in which each new model is
    focused on correcting the errors made by the previous ones.
    """

    learning_rate: schema_field(
        optimizer_float_field(ge=0.0),
        placeholder={
            "optimize": False,
            "fixed_value": 0.1,
            "lower_bound": 0.1,
            "upper_bound": 1,
        },
        description="The learning rate, also known as shrinkage. This is used as a "
        "multiplicative factor for the leaves values. Use 1 for no shrinkage.",
    )  # type: ignore

    max_iter: schema_field(
        optimizer_int_field(ge=0),
        placeholder={
            "optimize": False,
            "fixed_value": 100,
            "lower_bound": 100,
            "upper_bound": 250,
        },
        description="The maximum number of iterations of the boosting process, i.e. "
        "the maximum number of trees for binary classification.",
    )  # type: ignore
    max_depth: schema_field(
        optimizer_int_field(ge=0),
        placeholder={
            "optimize": False,
            "fixed_value": 1,
            "lower_bound": 1,
            "upper_bound": 10,
        },
        description="The maximum depth of each tree. The depth of a tree is the "
        "number of edges to go from the root to the deepest leaf. Depth isn’t "
        "constrained by default.",
    )  # type: ignore
    max_leaf_nodes: schema_field(
        optimizer_int_field(ge=2),
        placeholder={
            "optimize": False,
            "fixed_value": 31,
            "lower_bound": 10,
            "upper_bound": 40,
        },
        description="The maximum number of leaves for each tree. Must be strictly "
        "greater than 1. If None, there is no maximum limit.",
    )  # type: ignore
    min_samples_leaf: schema_field(
        optimizer_int_field(ge=1),
        placeholder={
            "optimize": False,
            "fixed_value": 20,
            "lower_bound": 2,
            "upper_bound": 25,
        },
        description="The minimum number of samples required to be at a leaf node.",
    )  # type: ignore
    l2_regularization: schema_field(
        optimizer_float_field(ge=0.0),
        placeholder={
            "optimize": False,
            "fixed_value": 0.0,
            "lower_bound": 0.0,
            "upper_bound": 1.0,
        },
        description="The L2 regularization parameter. Use 0 for no regularization.",
    )  # type: ignore


[docs]class HistGradientBoostingClassifier(
    TabularClassificationModel, SklearnLikeClassifier, _HistGradientBoostingClassifier
):
    """Scikit-learn's HistGradientBoostingRegressor wrapper for DashAI."""

    SCHEMA = HistGradientBoostingClassifierSchema

[docs]    def __init__(self, **kwargs) -> None:
        super().__init__(**kwargs)