Source code for DashAI.back.optimizers.hyperopt_optimizer

import importlib

from hyperopt import Trials, fmin, hp, rand, tpe  # noqa: F401

from DashAI.back.core.schema_fields import (
    BaseSchema,
    enum_field,
    int_field,
    schema_field,
)
from DashAI.back.optimizers.base_optimizer import BaseOptimizer


class HyperOptSchema(BaseSchema):
    n_trials: schema_field(
        int_field(gt=0),
        placeholder=10,
        description="The parameter 'n_trials' is the quantity of trials"
        "per study. It must be of type positive integer.",
    )  # type: ignore
    sampler: schema_field(
        enum_field(enum=["tpe", "rand"]),
        placeholder="tpe",
        description="Coefficient for 'rbf', 'poly' and 'sigmoid' kernels"
        ". Must be in string format and can be 'scale' or 'auto'.",
    )  # type: ignore


[docs]class HyperOptOptimizer(BaseOptimizer): SCHEMA = HyperOptSchema COMPATIBLE_COMPONENTS = [ "TabularClassificationTask", "TextClassificationTask", "TranslationTask", ]
[docs] def __init__(self, n_trials=None, sampler=None): self.n_trials = n_trials self.sampler = importlib.import_module(f"hyperopt.{sampler}").suggest
def search_space(self, hyperparams_data): """ Configure the search space. Args: hyperparams_data (dict[str, any]): Dict with the range values for the possible search space Returns ------- search_space: Dict with the information for the search space . """ search_space = {} for hyperparameter, values in hyperparams_data.items(): if isinstance(values[0], int): search_space[hyperparameter] = hp.quniform( hyperparameter, values[0], values[1], 1 ) elif isinstance(values[0], float): search_space[hyperparameter] = hp.uniform( hyperparameter, values[0], values[1] ) return search_space def optimize(self, model, input_dataset, output_dataset, parameters, metric, task): """ Optimization process Args: model (class): class for the model from the current experiment input_dataset (dict): dict with train dataset output_dataset (dict): dict with validation dataset parameters (dict): dict with the information to create the search space metric (class): class for the metric to optimize task (string): Name of the current task Returns ------- None """ self.model = model self.input_dataset = input_dataset self.output_dataset = output_dataset self.parameters = parameters self.metric = metric["class"] search_space = self.search_space(self.parameters) if task == "TextClassificationTask": def objective(params): model_eval = self.model for key, value in params.items(): setattr(model_eval, key, value) model_eval.fit( self.input_dataset["train"], self.output_dataset["train"] ) y_pred = model_eval.predict(input_dataset["validation"]) score = 1 * self.metric.score(output_dataset["validation"], y_pred) return score else: def objective(params): model_eval = self.model for key, value in params.items(): int_value = int(value) setattr(model_eval, key, int_value) model_eval.fit( self.input_dataset["train"], self.output_dataset["train"] ) y_pred = model_eval.predict(input_dataset["validation"]) score = 1 * self.metric.score(output_dataset["validation"], y_pred) return score trials = Trials() fmin( fn=objective, space=search_space, algo=self.sampler, max_evals=self.n_trials, trials=trials, ) self.trials = trials def get_model(self): return self.model def get_trials_values(self): trials = [] for trial in self.trials: if trial["result"]["status"] == "ok": params = {key: val[0] for key, val in trial["misc"]["vals"].items()} trials.append({"params": params, "value": trial["result"]["loss"]}) return trials