import importlib
from hyperopt import Trials, fmin, hp, rand, tpe # noqa: F401
from DashAI.back.core.schema_fields import (
BaseSchema,
enum_field,
int_field,
schema_field,
)
from DashAI.back.optimizers.base_optimizer import BaseOptimizer
class HyperOptSchema(BaseSchema):
n_trials: schema_field(
int_field(gt=0),
placeholder=10,
description="The parameter 'n_trials' is the quantity of trials"
"per study. It must be of type positive integer.",
) # type: ignore
sampler: schema_field(
enum_field(enum=["tpe", "rand"]),
placeholder="tpe",
description="Coefficient for 'rbf', 'poly' and 'sigmoid' kernels"
". Must be in string format and can be 'scale' or 'auto'.",
) # type: ignore
[docs]
class HyperOptOptimizer(BaseOptimizer):
DISPLAY_NAME: str = "HyperOpt Optimizer"
COLOR: str = "#FF5722"
SCHEMA = HyperOptSchema
COMPATIBLE_COMPONENTS = [
"TabularClassificationTask",
"TextClassificationTask",
"TranslationTask",
]
[docs]
def __init__(self, n_trials=None, sampler=None):
self.n_trials = n_trials
self.sampler = importlib.import_module(f"hyperopt.{sampler}").suggest
def search_space(self, hyperparams_data):
"""
Configure the search space.
Args:
hyperparams_data (dict[str, any]): Dict with the range values
for the possible search space
Returns
-------
search_space: Dict with the information for the search space .
"""
search_space = {}
for _, hyperparameter, values, dtype in hyperparams_data:
if dtype == "integer":
search_space[hyperparameter] = hp.quniform(
hyperparameter, values[0], values[1], 1
)
elif dtype == "number":
search_space[hyperparameter] = hp.uniform(
hyperparameter, values[0], values[1]
)
return search_space
def optimize(self, model, input_dataset, output_dataset, parameters, metric, task):
"""
Optimization process
Args:
model (class): class for the model from the current experiment
input_dataset (dict): dict with train dataset
output_dataset (dict): dict with validation dataset
parameters (dict): dict with the information to create the search space
metric (class): class for the metric to optimize
task (string): Name of the current task
Returns
-------
None
"""
self.model = model
self.input_dataset = input_dataset
self.output_dataset = output_dataset
self.parameters = parameters
self.metric = metric["class"]
param_mapping = {key: (obj, key) for obj, key, _, _ in self.parameters}
search_space = self.search_space(self.parameters)
def objective(params):
for param_name, value in params.items():
obj, key = param_mapping[param_name]
setattr(obj, key, value)
self.model.fit(self.input_dataset["train"], self.output_dataset["train"])
y_pred = self.model.predict(input_dataset["validation"])
score = self.metric.score(output_dataset["validation"], y_pred)
return -score if metric["metadata"]["maximize"] else score
trials = Trials()
fmin(
fn=objective,
space=search_space,
algo=self.sampler,
max_evals=self.n_trials,
trials=trials,
)
self.trials = trials
def get_model(self):
return self.model
def get_trials_values(self):
trials = []
for trial in self.trials:
if trial["result"]["status"] == "ok":
params = {key: val[0] for key, val in trial["misc"]["vals"].items()}
trials.append({"params": params, "value": trial["result"]["loss"]})
return trials