Strategy Serialization with BoFire

Imports

from pydantic import TypeAdapter

import bofire.strategies.api as strategies
from bofire.benchmarks.multi import DTLZ2
from bofire.benchmarks.single import Himmelblau
from bofire.data_models.acquisition_functions.api import qLogNEI
from bofire.data_models.domain.api import Domain, Outputs
from bofire.data_models.kernels.api import RBFKernel, ScaleKernel
from bofire.data_models.strategies.api import AnyStrategy
from bofire.data_models.strategies.api import MoboStrategy as MoboStrategyDataModel
from bofire.data_models.strategies.api import RandomStrategy as RandomStrategyDataModel
from bofire.data_models.strategies.api import SoboStrategy as SoboStrategyDataModel
from bofire.data_models.surrogates.api import BotorchSurrogates, SingleTaskGPSurrogate
from bofire.surrogates.diagnostics import CvResults2CrossValidationValues
from bofire.surrogates.trainable import TrainableSurrogate

Single Objective Problem Setup

benchmark = Himmelblau()
samples = benchmark.domain.inputs.sample(n=10)

# this is the training data
experiments = benchmark.f(samples, return_complete=True)

# this are the pending candidates
pending_candidates = benchmark.domain.inputs.sample(2)

Random Strategy

The random strategy and other strategies that just inherit from Strategy and not PredictiveStrategy are special as they do not need defined output features in the domain and they do not need a call to tell before the ask. Furthermore they online provide input features in the candidates and no predictions for output features.

# setup the data model
domain = Domain(inputs=benchmark.domain.inputs)
strategy_data = RandomStrategyDataModel(domain=domain)

# we generate the json spec
jspec = strategy_data.model_dump_json()

jspec
'{"type":"RandomStrategy","domain":{"type":"Domain","inputs":{"type":"Inputs","features":[{"type":"ContinuousInput","key":"x_1","unit":null,"bounds":[-6.0,6.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false},{"type":"ContinuousInput","key":"x_2","unit":null,"bounds":[-6.0,6.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false}]},"outputs":{"type":"Outputs","features":[]},"constraints":{"type":"Constraints","constraints":[]}},"seed":null,"fallback_sampling_method":"UNIFORM","n_burnin":1000,"n_thinning":32,"num_base_samples":null,"max_iters":1000}'
# load it
strategy_data = TypeAdapter(AnyStrategy).validate_json(jspec)

# map it
strategy = strategies.map(strategy_data)

# ask it
df_candidates = strategy.ask(candidate_count=5)

# transform to spec
candidates = strategy.to_candidates(df_candidates)

candidates
[Candidate(inputValues={'x_1': InputValue(value='0.8651593484935756'), 'x_2': InputValue(value='3.2179862936359847')}, outputValues=None),
 Candidate(inputValues={'x_1': InputValue(value='3.40303202836882'), 'x_2': InputValue(value='1.8749816544282965')}, outputValues=None),
 Candidate(inputValues={'x_1': InputValue(value='-0.8215049438667723'), 'x_2': InputValue(value='4.744926393825569')}, outputValues=None),
 Candidate(inputValues={'x_1': InputValue(value='3.6498191378282776'), 'x_2': InputValue(value='-0.38668342458943883')}, outputValues=None),
 Candidate(inputValues={'x_1': InputValue(value='-3.410834067118959'), 'x_2': InputValue(value='-4.030675085931512')}, outputValues=None)]

SOBO Strategy

Setup the strategies data model.

# setup the data model
strategy_data = SoboStrategyDataModel(
    domain=benchmark.domain,
    acquisition_function=qLogNEI(),
)

# we generate the json spec
jspec = strategy_data.model_dump_json()

jspec
'{"type":"SoboStrategy","domain":{"type":"Domain","inputs":{"type":"Inputs","features":[{"type":"ContinuousInput","key":"x_1","unit":null,"bounds":[-6.0,6.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false},{"type":"ContinuousInput","key":"x_2","unit":null,"bounds":[-6.0,6.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false}]},"outputs":{"type":"Outputs","features":[{"type":"ContinuousOutput","key":"y","unit":null,"objective":{"type":"MinimizeObjective","w":1.0,"bounds":[0.0,1.0]}}]},"constraints":{"type":"Constraints","constraints":[]}},"seed":null,"acquisition_optimizer":{"prefer_exhaustive_search_for_purely_categorical_domains":true,"type":"BotorchOptimizer","n_restarts":20,"n_raw_samples":1024,"maxiter":2000,"batch_limit":20,"sequential":false,"local_search_config":null},"surrogate_specs":{"surrogates":[{"hyperconfig":{"type":"SingleTaskGPHyperconfig","hyperstrategy":"FractionalFactorialStrategy","inputs":{"type":"Inputs","features":[{"type":"CategoricalInput","key":"kernel","categories":["rbf","matern_1.5","matern_2.5"],"allowed":[true,true,true]},{"type":"CategoricalInput","key":"prior","categories":["mbo","threesix","hvarfner"],"allowed":[true,true,true]},{"type":"CategoricalInput","key":"scalekernel","categories":["True","False"],"allowed":[true,true]},{"type":"CategoricalInput","key":"ard","categories":["True","False"],"allowed":[true,true]}]},"n_iterations":null,"target_metric":"MAE","lengthscale_constraint":null,"outputscale_constraint":null},"aggregations":null,"type":"SingleTaskGPSurrogate","inputs":{"type":"Inputs","features":[{"type":"ContinuousInput","key":"x_1","unit":null,"bounds":[-6.0,6.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false},{"type":"ContinuousInput","key":"x_2","unit":null,"bounds":[-6.0,6.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false}]},"outputs":{"type":"Outputs","features":[{"type":"ContinuousOutput","key":"y","unit":null,"objective":{"type":"MinimizeObjective","w":1.0,"bounds":[0.0,1.0]}}]},"input_preprocessing_specs":{},"dump":null,"categorical_encodings":{},"scaler":"NORMALIZE","output_scaler":"STANDARDIZE","kernel":{"type":"RBFKernel","features":null,"ard":true,"lengthscale_prior":{"type":"DimensionalityScaledLogNormalPrior","loc":1.4142135623730951,"loc_scaling":0.5,"scale":1.7320508075688772,"scale_scaling":0.0},"lengthscale_constraint":null},"noise_prior":{"type":"LogNormalPrior","loc":-4.0,"scale":1.0}}]},"outlier_detection_specs":null,"min_experiments_before_outlier_check":1,"frequency_check":1,"frequency_hyperopt":0,"folds":5,"include_infeasible_exps_in_acqf_calc":false,"acquisition_function":{"type":"qLogNEI","prune_baseline":true,"n_mc_samples":512}}'

As SOBO is a predictive strategy, training data has to be provided before candidated can be requested.

# load it
strategy_data = TypeAdapter(AnyStrategy).validate_json(jspec)

# map it
strategy = strategies.map(strategy_data)

# tell it the pending candidates if present
if pending_candidates is not None:
    strategy.add_candidates(pending_candidates)

# tell it
strategy.tell(experiments=experiments)

# ask it
df_candidates = strategy.ask(candidate_count=2)

# transform to spec
candidates = strategy.to_candidates(df_candidates)

candidates
[Candidate(inputValues={'x_1': InputValue(value='6.0'), 'x_2': InputValue(value='-6.0')}, outputValues={'y': OutputValue(predictedValue='101.39178733113579', standardDeviation=53.6608463712698, objective=-101.39178733113579)}),
 Candidate(inputValues={'x_1': InputValue(value='-6.0'), 'x_2': InputValue(value='6.0')}, outputValues={'y': OutputValue(predictedValue='116.97685531377866', standardDeviation=53.72331757127347, objective=-116.97685531377866)})]

We can also save the trained models of the strategy, for more info look at the model_serial.ipynb notebook. It could be that the dumps command fails here. But this is already fixed in the main branch of the linear_operator package, and if not yet, it should be available in main soon.

jsurrogate_spec = strategy_data.surrogate_specs.surrogates[0].model_dump_json()
dump = strategy.surrogates.surrogates[0].dumps()

MOBO Strategy

As example for a multiobjective strategy we are using here the MoboStrategy. Related strategies would be Qparego, MultiplicativeSobo etc. To use it, we have to first generate a multiobjective domain.

benchmark = DTLZ2(dim=6)
samples = benchmark.domain.inputs.sample(n=20)
experiments = benchmark.f(samples, return_complete=True)
pending_candidates = benchmark.domain.inputs.sample(2)

Now the strategy spec is setup. Note that we can define there exactly which model to use.

# setup the data model
strategy_data = MoboStrategyDataModel(
    domain=benchmark.domain,
    surrogate_specs=BotorchSurrogates(
        surrogates=[
            SingleTaskGPSurrogate(
                inputs=benchmark.domain.inputs,
                outputs=Outputs(features=[benchmark.domain.outputs[0]]),
                kernel=ScaleKernel(base_kernel=RBFKernel(ard=False)),
            ),
        ],
    ),
)

# we generate the json spec
jspec = strategy_data.model_dump_json()

jspec
'{"type":"MoboStrategy","domain":{"type":"Domain","inputs":{"type":"Inputs","features":[{"type":"ContinuousInput","key":"x_0","unit":null,"bounds":[0.0,1.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false},{"type":"ContinuousInput","key":"x_1","unit":null,"bounds":[0.0,1.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false},{"type":"ContinuousInput","key":"x_2","unit":null,"bounds":[0.0,1.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false},{"type":"ContinuousInput","key":"x_3","unit":null,"bounds":[0.0,1.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false},{"type":"ContinuousInput","key":"x_4","unit":null,"bounds":[0.0,1.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false},{"type":"ContinuousInput","key":"x_5","unit":null,"bounds":[0.0,1.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false}]},"outputs":{"type":"Outputs","features":[{"type":"ContinuousOutput","key":"f_0","unit":null,"objective":{"type":"MinimizeObjective","w":1.0,"bounds":[0.0,1.0]}},{"type":"ContinuousOutput","key":"f_1","unit":null,"objective":{"type":"MinimizeObjective","w":1.0,"bounds":[0.0,1.0]}}]},"constraints":{"type":"Constraints","constraints":[]}},"seed":null,"acquisition_optimizer":{"prefer_exhaustive_search_for_purely_categorical_domains":true,"type":"BotorchOptimizer","n_restarts":20,"n_raw_samples":1024,"maxiter":2000,"batch_limit":20,"sequential":false,"local_search_config":null},"surrogate_specs":{"surrogates":[{"hyperconfig":{"type":"SingleTaskGPHyperconfig","hyperstrategy":"FractionalFactorialStrategy","inputs":{"type":"Inputs","features":[{"type":"CategoricalInput","key":"kernel","categories":["rbf","matern_1.5","matern_2.5"],"allowed":[true,true,true]},{"type":"CategoricalInput","key":"prior","categories":["mbo","threesix","hvarfner"],"allowed":[true,true,true]},{"type":"CategoricalInput","key":"scalekernel","categories":["True","False"],"allowed":[true,true]},{"type":"CategoricalInput","key":"ard","categories":["True","False"],"allowed":[true,true]}]},"n_iterations":null,"target_metric":"MAE","lengthscale_constraint":null,"outputscale_constraint":null},"aggregations":null,"type":"SingleTaskGPSurrogate","inputs":{"type":"Inputs","features":[{"type":"ContinuousInput","key":"x_0","unit":null,"bounds":[0.0,1.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false},{"type":"ContinuousInput","key":"x_1","unit":null,"bounds":[0.0,1.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false},{"type":"ContinuousInput","key":"x_2","unit":null,"bounds":[0.0,1.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false},{"type":"ContinuousInput","key":"x_3","unit":null,"bounds":[0.0,1.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false},{"type":"ContinuousInput","key":"x_4","unit":null,"bounds":[0.0,1.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false},{"type":"ContinuousInput","key":"x_5","unit":null,"bounds":[0.0,1.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false}]},"outputs":{"type":"Outputs","features":[{"type":"ContinuousOutput","key":"f_0","unit":null,"objective":{"type":"MinimizeObjective","w":1.0,"bounds":[0.0,1.0]}}]},"input_preprocessing_specs":{},"dump":null,"categorical_encodings":{},"scaler":"NORMALIZE","output_scaler":"STANDARDIZE","kernel":{"type":"ScaleKernel","base_kernel":{"type":"RBFKernel","features":null,"ard":false,"lengthscale_prior":null,"lengthscale_constraint":null},"outputscale_prior":null,"outputscale_constraint":null},"noise_prior":{"type":"LogNormalPrior","loc":-4.0,"scale":1.0}},{"hyperconfig":{"type":"SingleTaskGPHyperconfig","hyperstrategy":"FractionalFactorialStrategy","inputs":{"type":"Inputs","features":[{"type":"CategoricalInput","key":"kernel","categories":["rbf","matern_1.5","matern_2.5"],"allowed":[true,true,true]},{"type":"CategoricalInput","key":"prior","categories":["mbo","threesix","hvarfner"],"allowed":[true,true,true]},{"type":"CategoricalInput","key":"scalekernel","categories":["True","False"],"allowed":[true,true]},{"type":"CategoricalInput","key":"ard","categories":["True","False"],"allowed":[true,true]}]},"n_iterations":null,"target_metric":"MAE","lengthscale_constraint":null,"outputscale_constraint":null},"aggregations":null,"type":"SingleTaskGPSurrogate","inputs":{"type":"Inputs","features":[{"type":"ContinuousInput","key":"x_0","unit":null,"bounds":[0.0,1.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false},{"type":"ContinuousInput","key":"x_1","unit":null,"bounds":[0.0,1.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false},{"type":"ContinuousInput","key":"x_2","unit":null,"bounds":[0.0,1.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false},{"type":"ContinuousInput","key":"x_3","unit":null,"bounds":[0.0,1.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false},{"type":"ContinuousInput","key":"x_4","unit":null,"bounds":[0.0,1.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false},{"type":"ContinuousInput","key":"x_5","unit":null,"bounds":[0.0,1.0],"local_relative_bounds":null,"stepsize":null,"allow_zero":false}]},"outputs":{"type":"Outputs","features":[{"type":"ContinuousOutput","key":"f_1","unit":null,"objective":{"type":"MinimizeObjective","w":1.0,"bounds":[0.0,1.0]}}]},"input_preprocessing_specs":{},"dump":null,"categorical_encodings":{},"scaler":"NORMALIZE","output_scaler":"STANDARDIZE","kernel":{"type":"RBFKernel","features":null,"ard":true,"lengthscale_prior":{"type":"DimensionalityScaledLogNormalPrior","loc":1.4142135623730951,"loc_scaling":0.5,"scale":1.7320508075688772,"scale_scaling":0.0},"lengthscale_constraint":null},"noise_prior":{"type":"LogNormalPrior","loc":-4.0,"scale":1.0}}]},"outlier_detection_specs":null,"min_experiments_before_outlier_check":1,"frequency_check":1,"frequency_hyperopt":0,"folds":5,"include_infeasible_exps_in_acqf_calc":false,"ref_point":{"type":"ExplicitReferencePoint","values":{"f_0":{"type":"AbsoluteMovingReferenceValue","orient_at_best":false,"offset":0.0},"f_1":{"type":"AbsoluteMovingReferenceValue","orient_at_best":false,"offset":0.0}}},"acquisition_function":{"type":"qLogNEHVI","alpha":0.0,"prune_baseline":true,"n_mc_samples":512}}'

Generate the candidates.

# load it
strategy_data = TypeAdapter(AnyStrategy).validate_json(jspec)

# map it
strategy = strategies.map(strategy_data)

# tell it the pending candidates if available
if pending_candidates is not None:
    strategy.add_candidates(pending_candidates)

# tell it
strategy.tell(experiments=experiments)

# ask it
df_candidates = strategy.ask(candidate_count=1)

# transform to spec
candidates = strategy.to_candidates(df_candidates)

candidates
[Candidate(inputValues={'x_0': InputValue(value='0.0'), 'x_1': InputValue(value='1.0'), 'x_2': InputValue(value='0.9625337652408273'), 'x_3': InputValue(value='0.0'), 'x_4': InputValue(value='0.0'), 'x_5': InputValue(value='0.3782193135916786')}, outputValues={'f_0': OutputValue(predictedValue='0.7203197879050609', standardDeviation=0.3681668123169044, objective=-0.7203197879050609), 'f_1': OutputValue(predictedValue='0.02293389765221776', standardDeviation=0.18016379062642934, objective=-0.02293389765221776)})]

To fill the model info section accordingly, the following snippet has to be executed for every surrogate, incldung saving the actual models.

from typing import Literal

from pydantic import BaseModel


class TestMethod(BaseModel):
    type: str


class CrossValidation(TestMethod):
    type: Literal["CrossValidation"] = "CrossValidation"
    foldCount: int


for i in range(len(strategy_data.surrogate_specs.surrogates)):
    surrogate_data = strategy.surrogate_specs.surrogates[i]
    surrogate = strategy.surrogates.surrogates[i]
    # get the spec
    jsurrogate_spec = surrogate_data.model_dump_json()
    # get the dump
    dump = surrogate.dumps()
    # do the cross validation, only if we have a trainable model under the hood
    if isinstance(surrogate, TrainableSurrogate):
        cv_train, cv_test, _ = surrogate.cross_validate(strategy.experiments, folds=5)
        # transform the bofire objects to the backend objects
        testMethod = CrossValidation(foldCount=5)
        cvResultsTrain = CvResults2CrossValidationValues(cv_train)
        cvResultsTest = CvResults2CrossValidationValues(cv_test)
        metricsTrain = {
            surrogate.outputs[0].key: cv_train.get_metrics(combine_folds=False)
            .describe()
            .loc["mean"]
            .to_dict(),
        }
        metricsTest = {
            surrogate.outputs[0].key: cv_test.get_metrics(combine_folds=True)
            .describe()
            .loc["mean"]
            .to_dict(),
        }
        # save to backend
        # - jsurrogate_spec
        # - dump
        # - testMethod
        # - cvResultsTrain
        # - cvResultsTest
        # - metricsTrain
        # - metricsTest