Source code for orion.algo.skopt.bayes

# pylint: disable = no-name-in-module
# -*- coding: utf-8 -*-
"""
:mod:`orion.algo.skopt.bayes` -- Perform bayesian optimization
==============================================================

.. module:: bayes
   :platform: Unix
   :synopsis: Use Gaussian Process regression to locally search for a minimum.

"""
import contextlib
import copy
import logging
from collections import defaultdict

import numpy as np
from orion.algo.base import BaseAlgorithm
from orion.algo.parallel_strategy import strategy_factory
from orion.core.utils import format_trials
from skopt import Optimizer, Space
from skopt.learning import GaussianProcessRegressor
from skopt.space import Real

log = logging.getLogger(__name__)


def orion_space_to_skopt_space(orion_space):
    """Convert Oríon's definition of problem's domain to a skopt compatible."""
    dimensions = []
    for key, dimension in orion_space.items():
        low, high = dimension.interval()
        shape = dimension.shape
        assert not shape or shape == [1]
        if not shape:
            shape = (1,)
            low = (low,)
            high = (high,)
        dimensions.append(Real(name=key, prior="uniform", low=low[0], high=high[0]))

    return Space(dimensions)


[docs]class BayesianOptimizer(BaseAlgorithm):
    """Wrapper skopt's bayesian optimizer

    Parameters
    ----------
    space : ``orion.algo.space.Space``
       Problem's definition
    seed: int (default: None)
       Seed used for the random number generator
    n_initial_points : int (default: 10)
       Number of evaluations of trials with initialization points
       before approximating it with `base_estimator`. Points provided as
       ``x0`` count as initialization points. If ``len(x0) < n_initial_points``
       additional points are sampled at random.
    acq_func : str (default: gp_hedge)
       Function to minimize over the posterior distribution. Can be:
       ``["LCB", "EI", "PI", "gp_hedge", "EIps", "PIps"]``. Check skopt
       docs for details.
    alpha : float or array-like (default: 1e-10)
       Value added to the diagonal of the kernel matrix during fitting.
       Larger values correspond to increased noise level in the observations
       and reduce potential numerical issue during fitting. If an array is
       passed, it must have the same number of entries as the data used for
       fitting and is used as datapoint-dependent noise level. Note that this
       is equivalent to adding a WhiteKernel with c=alpha. Allowing to specify
       the noise level directly as a parameter is mainly for convenience and
       for consistency with Ridge.
    n_restarts_optimizer : int (default: 0)
       The number of restarts of the optimizer for finding the kernel's
       parameters which maximize the log-marginal likelihood. The first run
       of the optimizer is performed from the kernel's initial parameters,
       the remaining ones (if any) from thetas sampled log-uniform randomly
       from the space of allowed theta-values. If greater than 0, all bounds
       must be finite. Note that n_restarts_optimizer == 0 implies that one
       run is performed.
    noise: str (default: "gaussian")
       If set to "gaussian", then it is assumed that y is a noisy estimate of f(x) where the
       noise is gaussian.
    normalize_y : bool (default: False)
       Whether the target values y are normalized, i.e., the mean of the
       observed target values become zero. This parameter should be set to
       True if the target values' mean is expected to differ considerable from
       zero. When enabled, the normalization effectively modifies the GP's
       prior based on the data, which contradicts the likelihood principle;
       normalization is thus disabled per default.
    parallel_strategy: dict or None, optional
        The configuration of a parallel strategy to use for pending trials or broken trials.
        Default is a MaxParallelStrategy for broken trials and NoParallelStrategy for pending
        trials.
    convergence_duplicates: int, optional
        Number of duplicate points the algorithm may sample before considering itself as done.
        Default: 10.

    """

    requires_type = "real"
    requires_dist = "linear"
    requires_shape = "flattened"

    # pylint: disable = too-many-arguments
    def __init__(
        self,
        space,
        seed=None,
        n_initial_points=10,
        acq_func="gp_hedge",
        alpha=1e-10,
        n_restarts_optimizer=0,
        noise="gaussian",
        normalize_y=False,
        parallel_strategy=None,
        convergence_duplicates=5,
    ):
        if parallel_strategy is None:
            parallel_strategy = {
                "of_type": "StatusBasedParallelStrategy",
                "strategy_configs": {
                    "broken": {
                        "of_type": "MaxParallelStrategy",
                    },
                },
                "default_strategy": {"of_type": "MaxParallelStrategy"},
            }

        self.strategy = strategy_factory.create(**parallel_strategy)

        self.rng = None
        self._optimizer_state = {}
        self._suggested = []

        super(BayesianOptimizer, self).__init__(
            space,
            seed=seed,
            n_initial_points=n_initial_points,
            acq_func=acq_func,
            alpha=alpha,
            n_restarts_optimizer=n_restarts_optimizer,
            noise=noise,
            normalize_y=normalize_y,
            parallel_strategy=parallel_strategy,
            convergence_duplicates=convergence_duplicates,
        )

    @property
    def space(self):
        """Return transformed space of the BO"""
        return self._space

    @space.setter
    def space(self, space):
        """Set the space of the BO and initialize it"""
        self._original = self._space
        self._space = space

    @contextlib.contextmanager
    def get_optimizer(self):
        """Get resumed optimizer"""
        optimizer = Optimizer(
            base_estimator=GaussianProcessRegressor(
                alpha=self.alpha,
                n_restarts_optimizer=self.n_restarts_optimizer,
                noise=self.noise,
                normalize_y=self.normalize_y,
                random_state=self.rng.randint(0, np.iinfo(np.int32).max),
            ),
            random_state=self.rng,
            dimensions=orion_space_to_skopt_space(self.space),
            n_initial_points=self.n_initial_points,
            acq_func=self.acq_func,
            model_queue_size=1,
        )
        if "gains_" in self._optimizer_state:
            optimizer.gains_ = self._optimizer_state["gains_"]
        points, results = self.get_data()
        if points:
            optimizer.tell(points, results)

        yield optimizer

        # We keep gains_ to rebuild the Optimizer based on copy() method here:
        # https://github.com/scikit-optimize/scikit-optimize/blob/0.7.X/skopt/optimizer/optimizer.py#L272
        if hasattr(optimizer, "gains_"):
            self._optimizer_state["gains_"] = optimizer.gains_

    def seed_rng(self, seed):
        """Seed the state of the random number generator.

        :param seed: Integer seed for the random number generator.
        """
        if self.rng is None:
            self.rng = np.random.RandomState(seed)
        else:
            self.rng.seed(seed)

    @property
    def state_dict(self):
        """Return a state dict that can be used to reset the state of the algorithm."""
        state_dict = copy.deepcopy(super(BayesianOptimizer, self).state_dict)

        state_dict["rng_state"] = copy.deepcopy(self.rng.get_state())
        state_dict["strategy"] = copy.deepcopy(self.strategy.state_dict)
        state_dict["_suggested"] = copy.deepcopy(self._suggested)
        state_dict["_optimizer_state"] = copy.deepcopy(self._optimizer_state)

        return state_dict

    def set_state(self, state_dict):
        """Reset the state of the algorithm based on the given state_dict

        :param state_dict: Dictionary representing state of an algorithm
        """
        super(BayesianOptimizer, self).set_state(copy.deepcopy(state_dict))

        self.strategy.set_state(copy.deepcopy(state_dict["strategy"]))
        self.rng.set_state(copy.deepcopy(state_dict["rng_state"]))
        self._suggested = copy.deepcopy(state_dict["_suggested"])
        self._optimizer_state = copy.deepcopy(state_dict["_optimizer_state"])

    def suggest(self, num=None):
        """Suggest a `num`ber of new sets of parameters."""
        samples = []
        with self.get_optimizer() as optimizer:
            while len(samples) < num and not self.is_done:
                new_point = optimizer.ask()

                self._suggested.append(new_point)
                optimizer.tell(new_point, self.get_y(new_point))

                trial = format_trials.tuple_to_trial(new_point, self.space)

                if not self.has_suggested(trial):
                    self.register(trial)
                    samples.append(trial)

        return samples

    def get_data(self):
        """Get points with result or fake result if not completed"""
        points = copy.deepcopy(self._suggested)
        results = []
        for point in points:
            results.append(self.get_y(point))

        return points, results

    def get_y(self, point):
        """Get result or fake result if trial not completed"""
        trial = format_trials.tuple_to_trial(point, self.space)
        if self.has_observed(trial):
            return self._trials_info[self.get_id(trial)][0].objective.value

        return self.strategy.infer(trial).objective.value

    def observe(self, trials):
        """Observe evaluation `results` corresponding to list of `points` in
        space.

        """
        self.strategy.observe(trials)
        for trial in trials:
            self.register(trial)

    @property
    def is_done(self):
        """Whether the algorithm is done and will not make further suggestions.

        Return True, if an algorithm holds that there can be no further improvement.
        By default, the cardinality of the specified search space will be used to check
        if all possible sets of parameters has been tried.
        """
        hits = defaultdict(int)
        for point in self._suggested:
            hits[self.get_id(format_trials.tuple_to_trial(point, self.space))] += 1

        if hits and max(hits.values()) >= self.convergence_duplicates:
            return True

        if self.n_suggested >= self._original.cardinality:
            return True

        if self.n_suggested >= getattr(self, "max_trials", float("inf")):
            return True

        return False