Skip to content

Domain

constraints

Constraints

Bases: BaseModel, Generic[C]

Source code in bofire/data_models/domain/constraints.py
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
class Constraints(BaseModel, Generic[C]):
    type: Literal["Constraints"] = "Constraints"
    constraints: Sequence[C] = Field(default_factory=list)

    def __iter__(self) -> Iterator[C]:
        return iter(self.constraints)

    def __len__(self):
        return len(self.constraints)

    def __getitem__(self, i) -> C:
        return self.constraints[i]

    def __add__(
        self,
        other: Union[Sequence[CIncludes], "Constraints[CIncludes]"],
    ) -> "Constraints[Union[C, CIncludes]]":
        if isinstance(other, collections.abc.Sequence):
            other_constraints = other
        else:
            other_constraints = other.constraints
        constraints = list(chain(self.constraints, other_constraints))
        return Constraints(constraints=constraints)

    def __call__(self, experiments: pd.DataFrame) -> pd.DataFrame:
        """Numerically evaluate all constraints

        Args:
            experiments (pd.DataFrame): data to evaluate the constraint on

        Returns:
            pd.DataFrame: Constraint evaluation for each of the constraints

        """
        return pd.concat([c(experiments) for c in self.constraints], axis=1)

    def jacobian(self, experiments: pd.DataFrame) -> list:
        """Numerically evaluate the jacobians of all constraints

        Args:
            experiments (pd.DataFrame): data to evaluate the constraint jacobians on

        Returns:
            list: A list containing the jacobians as pd.DataFrames

        """
        return [c.jacobian(experiments) for c in self.constraints]

    def is_fulfilled(self, experiments: pd.DataFrame, tol: float = 1e-6) -> pd.Series:
        """Check if all constraints are fulfilled on all rows of the provided dataframe

        Args:
            experiments (pd.DataFrame): Dataframe with data, the constraint validity should be tested on
            tol (float, optional): tolerance parameter. A constraint is considered as not fulfilled if
                the violation is larger than tol. Defaults to 0.

        Returns:
            Boolean: True if all constraints are fulfilled for all rows, false if not

        """
        if len(self.constraints) == 0:
            return pd.Series([True] * len(experiments), index=experiments.index)
        return (
            pd.concat(
                [c.is_fulfilled(experiments, tol) for c in self.constraints],
                axis=1,
            )
            .fillna(True)
            .all(axis=1)
        )

    def get(
        self,
        includes: Union[Type[CIncludes], Sequence[Type[CIncludes]]] = Constraint,
        excludes: Optional[Union[Type[CExcludes], List[Type[CExcludes]]]] = None,
        exact: bool = False,
    ) -> "Constraints[CIncludes]":
        """Get constraints of the domain

        Args:
            includes: Constraint class or list of specific constraint classes to be returned. Defaults to Constraint.
            excludes: Constraint class or list of specific constraint classes to be excluded from the return. Defaults to None.
            exact: Boolean to distinguish if only the exact class listed in includes and no subclasses inherenting from this class shall be returned. Defaults to False.

        Returns:
            Constraints: constraints in the domain fitting to the passed requirements.

        """
        return Constraints(
            constraints=filter_by_class(
                self.constraints,
                includes=includes,
                excludes=excludes,
                exact=exact,
            ),
        )

    def get_reps_df(self):
        """Provides a tabular overwiev of all constraints within the domain

        Returns:
            pd.DataFrame: DataFrame listing all constraints of the domain with a description

        """
        df = pd.DataFrame(
            index=range(len(self.constraints)),
            columns=["Type", "Description"],
            data={
                "Type": [feat.__class__.__name__ for feat in self.get(Constraint)],
                "Description": [
                    constraint.__str__() for constraint in self.get(Constraint)
                ],
            },
        )
        return df

__call__(experiments)

Numerically evaluate all constraints

Parameters:

Name Type Description Default
experiments DataFrame

data to evaluate the constraint on

required

Returns:

Type Description
DataFrame

pd.DataFrame: Constraint evaluation for each of the constraints

Source code in bofire/data_models/domain/constraints.py
43
44
45
46
47
48
49
50
51
52
53
def __call__(self, experiments: pd.DataFrame) -> pd.DataFrame:
    """Numerically evaluate all constraints

    Args:
        experiments (pd.DataFrame): data to evaluate the constraint on

    Returns:
        pd.DataFrame: Constraint evaluation for each of the constraints

    """
    return pd.concat([c(experiments) for c in self.constraints], axis=1)

get(includes=Constraint, excludes=None, exact=False)

Get constraints of the domain

Parameters:

Name Type Description Default
includes Union[Type[CIncludes], Sequence[Type[CIncludes]]]

Constraint class or list of specific constraint classes to be returned. Defaults to Constraint.

Constraint
excludes Optional[Union[Type[CExcludes], List[Type[CExcludes]]]]

Constraint class or list of specific constraint classes to be excluded from the return. Defaults to None.

None
exact bool

Boolean to distinguish if only the exact class listed in includes and no subclasses inherenting from this class shall be returned. Defaults to False.

False

Returns:

Name Type Description
Constraints Constraints[CIncludes]

constraints in the domain fitting to the passed requirements.

Source code in bofire/data_models/domain/constraints.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def get(
    self,
    includes: Union[Type[CIncludes], Sequence[Type[CIncludes]]] = Constraint,
    excludes: Optional[Union[Type[CExcludes], List[Type[CExcludes]]]] = None,
    exact: bool = False,
) -> "Constraints[CIncludes]":
    """Get constraints of the domain

    Args:
        includes: Constraint class or list of specific constraint classes to be returned. Defaults to Constraint.
        excludes: Constraint class or list of specific constraint classes to be excluded from the return. Defaults to None.
        exact: Boolean to distinguish if only the exact class listed in includes and no subclasses inherenting from this class shall be returned. Defaults to False.

    Returns:
        Constraints: constraints in the domain fitting to the passed requirements.

    """
    return Constraints(
        constraints=filter_by_class(
            self.constraints,
            includes=includes,
            excludes=excludes,
            exact=exact,
        ),
    )

get_reps_df()

Provides a tabular overwiev of all constraints within the domain

Returns:

Type Description

pd.DataFrame: DataFrame listing all constraints of the domain with a description

Source code in bofire/data_models/domain/constraints.py
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
def get_reps_df(self):
    """Provides a tabular overwiev of all constraints within the domain

    Returns:
        pd.DataFrame: DataFrame listing all constraints of the domain with a description

    """
    df = pd.DataFrame(
        index=range(len(self.constraints)),
        columns=["Type", "Description"],
        data={
            "Type": [feat.__class__.__name__ for feat in self.get(Constraint)],
            "Description": [
                constraint.__str__() for constraint in self.get(Constraint)
            ],
        },
    )
    return df

is_fulfilled(experiments, tol=1e-06)

Check if all constraints are fulfilled on all rows of the provided dataframe

Parameters:

Name Type Description Default
experiments DataFrame

Dataframe with data, the constraint validity should be tested on

required
tol float

tolerance parameter. A constraint is considered as not fulfilled if the violation is larger than tol. Defaults to 0.

1e-06

Returns:

Name Type Description
Boolean Series

True if all constraints are fulfilled for all rows, false if not

Source code in bofire/data_models/domain/constraints.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def is_fulfilled(self, experiments: pd.DataFrame, tol: float = 1e-6) -> pd.Series:
    """Check if all constraints are fulfilled on all rows of the provided dataframe

    Args:
        experiments (pd.DataFrame): Dataframe with data, the constraint validity should be tested on
        tol (float, optional): tolerance parameter. A constraint is considered as not fulfilled if
            the violation is larger than tol. Defaults to 0.

    Returns:
        Boolean: True if all constraints are fulfilled for all rows, false if not

    """
    if len(self.constraints) == 0:
        return pd.Series([True] * len(experiments), index=experiments.index)
    return (
        pd.concat(
            [c.is_fulfilled(experiments, tol) for c in self.constraints],
            axis=1,
        )
        .fillna(True)
        .all(axis=1)
    )

jacobian(experiments)

Numerically evaluate the jacobians of all constraints

Parameters:

Name Type Description Default
experiments DataFrame

data to evaluate the constraint jacobians on

required

Returns:

Name Type Description
list list

A list containing the jacobians as pd.DataFrames

Source code in bofire/data_models/domain/constraints.py
55
56
57
58
59
60
61
62
63
64
65
def jacobian(self, experiments: pd.DataFrame) -> list:
    """Numerically evaluate the jacobians of all constraints

    Args:
        experiments (pd.DataFrame): data to evaluate the constraint jacobians on

    Returns:
        list: A list containing the jacobians as pd.DataFrames

    """
    return [c.jacobian(experiments) for c in self.constraints]

domain

Domain

Bases: BaseModel

Source code in bofire/data_models/domain/domain.py
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
class Domain(BaseModel):
    type: Literal["Domain"] = "Domain"

    inputs: Inputs = Field(default_factory=lambda: Inputs())
    outputs: Outputs = Field(default_factory=lambda: Outputs())
    constraints: Constraints = Field(default_factory=lambda: Constraints())

    """Representation of the optimization problem/domain

    Attributes:
        inputs (List[Input], optional): List of input features. Defaults to [].
        outputs (List[Output], optional): List of output features. Defaults to [].
        constraints (List[Constraint], optional): List of constraints. Defaults to [].
    """

    @classmethod
    def from_lists(
        cls,
        inputs: Optional[Sequence[AnyInput]] = None,
        outputs: Optional[Sequence[AnyOutput]] = None,
        constraints: Optional[Sequence[AnyConstraint]] = None,
    ):
        inputs = [] if inputs is None else inputs
        outputs = [] if outputs is None else outputs
        constraints = [] if constraints is None else constraints
        return cls(
            inputs=Inputs(features=inputs),
            outputs=Outputs(features=outputs),
            constraints=Constraints(constraints=constraints),
        )

    @field_validator("inputs", mode="before")
    @classmethod
    def validate_inputs_list(cls, v):
        if isinstance(v, collections.abc.Sequence):
            v = Inputs(features=v)
            return v
        if isinstance_or_union(v, AnyInput):
            return Inputs(features=[v])
        return v

    @field_validator("outputs", mode="before")
    @classmethod
    def validate_outputs_list(cls, v):
        if isinstance(v, collections.abc.Sequence):
            return Outputs(features=v)
        if isinstance_or_union(v, AnyOutput):
            return Outputs(features=[v])
        return v

    @field_validator("constraints", mode="before")
    @classmethod
    def validate_constraints_list(cls, v):
        if isinstance(v, list):
            return Constraints(constraints=v)
        if isinstance_or_union(v, AnyConstraint):
            return Constraints(constraints=[v])
        return v

    @model_validator(mode="after")
    def validate_unique_feature_keys(self):
        """Validates if provided input and output feature keys are unique

        Args:
            v (Outputs): List of all output features of the domain.
            value (Dict[str, Inputs]): Dict containing a list of input features as single entry.

        Raises:
            ValueError: Feature keys are not unique.

        Returns:
            Outputs: Keeps output features as given.

        """
        keys = self.outputs.get_keys() + self.inputs.get_keys()
        if len(set(keys)) != len(keys):
            raise ValueError("Feature keys are not unique")
        return self

    @model_validator(mode="after")
    def validate_constraints(self):
        """Validate that the constraints defined in the domain fit to the input features.

        Args:
            v (List[Constraint]): List of constraints or empty if no constraints are defined
            values (List[Input]): List of input features of the domain

        Raises:
            ValueError: Feature key in constraint is unknown.

        Returns:
            List[Constraint]: List of constraints defined for the domain

        """
        for c in self.constraints.get():
            c.validate_inputs(self.inputs)
        return self

    # TODO: tidy this up
    def get_nchoosek_combinations(self, exhaustive: bool = False):
        """Get all possible NChooseK combinations

        Args:
            exhaustive (bool, optional): if True all combinations are returned. Defaults to False.

        Returns:
            Tuple(used_features_list, unused_features_list): used_features_list is a list of lists containing features used in each NChooseK combination.
                unused_features_list is a list of lists containing features unused in each NChooseK combination.

        """
        if len(self.constraints.get(NChooseKConstraint)) == 0:
            used_continuous_features = self.inputs.get_keys(ContinuousInput)
            return used_continuous_features, []

        used_features_list_all = []

        # loops through each NChooseK constraint
        for con in self.constraints.get(NChooseKConstraint):
            assert isinstance(con, NChooseKConstraint)
            used_features_list = []

            if exhaustive:
                for n in range(con.min_count, con.max_count + 1):
                    used_features_list.extend(itertools.combinations(con.features, n))

                if con.none_also_valid:
                    used_features_list.append(())
            else:
                used_features_list.extend(
                    itertools.combinations(con.features, con.max_count),
                )

            used_features_list_all.append(used_features_list)

        used_features_list_all = list(
            itertools.product(*used_features_list_all),
        )  # product between NChooseK constraints

        # format into a list of used features
        used_features_list_formatted = []
        for used_features_list in used_features_list_all:
            used_features_list_flattened = [
                item for sublist in used_features_list for item in sublist
            ]
            used_features_list_formatted.append(list(set(used_features_list_flattened)))

        # sort lists
        used_features_list_sorted = []
        for used_features in used_features_list_formatted:
            used_features_list_sorted.append(sorted(used_features))

        # drop duplicates
        used_features_list_no_dup = []
        for used_features in used_features_list_sorted:
            if used_features not in used_features_list_no_dup:
                used_features_list_no_dup.append(used_features)

        # print(f"duplicates dropped: {len(used_features_list_sorted)-len(used_features_list_no_dup)}")

        # remove combinations not fulfilling constraints
        used_features_list_final = []
        for combo in used_features_list_no_dup:
            fulfil_constraints = []  # list of bools tracking if constraints are fulfilled
            for con in self.constraints.get(NChooseKConstraint):
                assert isinstance(con, NChooseKConstraint)
                count = 0  # count of features in combo that are in con.features
                for f in combo:
                    if f in con.features:
                        count += 1
                if (
                    count >= con.min_count
                    and count <= con.max_count
                    or count == 0
                    and con.none_also_valid
                ):
                    fulfil_constraints.append(True)
                else:
                    fulfil_constraints.append(False)
            if np.all(fulfil_constraints):
                used_features_list_final.append(combo)

        # print(f"violators dropped: {len(used_features_list_no_dup)-len(used_features_list_final)}")

        # features unused
        features_in_cc = []
        for con in self.constraints.get(NChooseKConstraint):
            assert isinstance(con, NChooseKConstraint)
            features_in_cc.extend(con.features)
        features_in_cc = list(set(features_in_cc))
        features_in_cc.sort()
        unused_features_list = []
        for used_features in used_features_list_final:
            unused_features_list.append(
                [f_key for f_key in features_in_cc if f_key not in used_features],
            )

        # postprocess
        # used_features_list_final2 = []
        # unused_features_list2 = []
        # for used, unused in zip(used_features_list_final,unused_features_list):
        #     if len(used) == 3:
        #         used_features_list_final2.append(used), unused_features_list2.append(unused)

        return used_features_list_final, unused_features_list

    def coerce_invalids(self, experiments: pd.DataFrame) -> pd.DataFrame:
        """Coerces all invalid output measurements to np.nan

        Args:
            experiments (pd.DataFrame): Dataframe containing experimental data

        Returns:
            pd.DataFrame: coerced dataframe

        """
        # coerce invalid to nan
        for feat in self.outputs.get_keys(Output):
            experiments.loc[experiments[f"valid_{feat}"] == 0, feat] = np.nan
        return experiments

    def aggregate_by_duplicates(
        self,
        experiments: pd.DataFrame,
        prec: int,
        delimiter: str = "-",
        method: Literal["mean", "median"] = "mean",
    ) -> Tuple[pd.DataFrame, list]:
        """Aggregate the dataframe by duplicate experiments

        Duplicates are identified based on the experiments with the same input
        features. Continuous input features are rounded before identifying the
        duplicates. Aggregation is performed by taking the average of the
        involved output features.

        Args:
            experiments (pd.DataFrame): Dataframe containing experimental data
            prec (int): Precision of the rounding of the continuous input features
            delimiter (str, optional): Delimiter used when combining the orig.
                labcodes to a new one. Defaults to "-".
            method (Literal["mean", "median"], optional): Which aggregation
                method to use. Defaults to "mean".

        Returns:
            Tuple[pd.DataFrame, list]: Dataframe holding the aggregated
                experiments, list of lists holding the labcodes of the duplicates

        """
        # prepare the parent frame
        if method not in ["mean", "median"]:
            raise ValueError(f"Unknown aggregation type provided: {method}")

        preprocessed = self.outputs.preprocess_experiments_any_valid_output(experiments)
        assert preprocessed is not None
        experiments = preprocessed.copy()
        if "labcode" not in experiments.columns:
            experiments["labcode"] = [
                str(i + 1).zfill(int(np.ceil(np.log10(experiments.shape[0]))))
                for i in range(experiments.shape[0])
            ]

        # round it if continuous inputs are present
        if len(self.inputs.get(ContinuousInput)) > 0:
            experiments[self.inputs.get_keys(ContinuousInput)] = experiments[
                self.inputs.get_keys(ContinuousInput)
            ].round(prec)

        # coerce invalid to nan
        experiments = self.coerce_invalids(experiments)

        # group and aggregate
        agg: Dict[str, Any] = {
            feat: method for feat in self.outputs.get_keys(ContinuousOutput)
        }
        agg["labcode"] = lambda x: delimiter.join(sorted(x.tolist()))
        for feat in self.outputs.get_keys(Output):
            agg[f"valid_{feat}"] = lambda x: 1

        grouped = experiments.groupby(self.inputs.get_keys(Input))
        duplicated_labcodes = [
            sorted(group.labcode.to_numpy().tolist())
            for _, group in grouped
            if group.shape[0] > 1
        ]

        experiments = grouped.aggregate(agg).reset_index(drop=False)
        for feat in self.outputs.get_keys(Output):
            experiments.loc[experiments[feat].isna(), f"valid_{feat}"] = 0

        experiments = experiments.sort_values(by="labcode")
        experiments = experiments.reset_index(drop=True)
        return experiments, sorted(duplicated_labcodes)

    def validate_experiments(
        self,
        experiments: pd.DataFrame,
        strict: bool = False,
    ) -> pd.DataFrame:
        """Checks the experimental data on validity

        Args:
            experiments (pd.DataFrame): Dataframe with experimental data
            strict (bool, optional): Boolean to distinguish if the occurrence of
                fixed features in the dataset should be considered or not.
                Defaults to False.

        Raises:
            ValueError: empty dataframe
            ValueError: the column for a specific feature is missing the provided data
            ValueError: there are labcodes with null value
            ValueError: there are labcodes with nan value
            ValueError: labcodes are not unique
            ValueError: the provided columns do no match to the defined domain
            ValueError: the provided columns do no match to the defined domain
            ValueError: Input with null values
            ValueError: Input with nan values

        Returns:
            pd.DataFrame: The provided dataframe with experimental data

        """
        if len(experiments) == 0:
            raise ValueError("no experiments provided (empty dataframe)")

        # we allow here for a column named labcode used to identify experiments
        if "labcode" in experiments.columns:
            # test that labcodes are not na
            if experiments.labcode.isnull().to_numpy().any():
                raise ValueError("there are labcodes with null value")
            if experiments.labcode.isna().to_numpy().any():
                raise ValueError("there are labcodes with nan value")
            # test that labcodes are distinct
            if (
                len(set(experiments.labcode.to_numpy().tolist()))
                != experiments.shape[0]
            ):
                raise ValueError("labcodes are not unique")

        # run the individual validators
        experiments = self.inputs.validate_experiments(
            experiments=experiments,
            strict=strict,
        )
        experiments = self.outputs.validate_experiments(experiments=experiments)
        return experiments

    def describe_experiments(self, experiments: pd.DataFrame) -> pd.DataFrame:
        """Method to get a tabular overview of how many measurements and how many valid entries are included in the input data for each output feature

        Args:
            experiments (pd.DataFrame): Dataframe with experimental data

        Returns:
            pd.DataFrame: Dataframe with counts how many measurements and how many valid entries are included in the input data for each output feature

        """
        data = {}
        for feat in self.outputs.get_keys(Output):
            data[feat] = [
                experiments.loc[experiments[feat].notna()].shape[0],
                experiments.loc[experiments[feat].notna(), "valid_%s" % feat].sum(),
            ]
        preprocessed = self.outputs.preprocess_experiments_all_valid_outputs(
            experiments,
        )
        assert preprocessed is not None
        data["all"] = [
            experiments.shape[0],
            preprocessed.shape[0],
        ]
        return pd.DataFrame.from_dict(
            data,
            orient="index",
            columns=["measured", "valid"],
        )

    def validate_candidates(
        self,
        candidates: pd.DataFrame,
        only_inputs: bool = False,
        tol: float = 1e-5,
        raise_validation_error: bool = True,
    ) -> pd.DataFrame:
        """Method to check the validty of proposed candidates

        Args:
            candidates (pd.DataFrame): Dataframe with suggested new experiments (candidates)
            only_inputs (bool,optional): If True, only the input columns are validated. Defaults to False.
            tol (float,optional): tolerance parameter for constraints. A constraint is considered as not fulfilled if the violation
                is larger than tol. Defaults to 1e-6.
            raise_validation_error (bool, optional): If true an error will be raised if candidates violate constraints,
                otherwise only a warning will be displayed. Defaults to True.

        Raises:
            ValueError: when a column is missing for a defined input feature
            ValueError: when a column is missing for a defined output feature
            ValueError: when a non-numerical value is proposed
            ValueError: when an additional column is found
            ConstraintNotFulfilledError: when the constraints are not fulfilled and `raise_validation_error = True`

        Returns:
            pd.DataFrame: dataframe with suggested experiments (candidates)

        """
        # check that each input feature has a col and is valid in itself
        assert isinstance(self.inputs, Inputs)
        candidates = self.inputs.validate_candidates(candidates)
        # check if all constraints are fulfilled
        if not self.constraints.is_fulfilled(candidates, tol=tol).all():
            if raise_validation_error:
                raise ConstraintNotFulfilledError(
                    f"Constraints not fulfilled: {candidates}",
                )
            warnings.warn("Not all constraints are fulfilled.")
        # for each continuous output feature with an attached objective object
        if not only_inputs:
            assert isinstance(self.outputs, Outputs)
            candidates = self.outputs.validate_candidates(candidates=candidates)
        return candidates

    def is_fulfilled(
        self,
        experiments: pd.DataFrame,
        tol: float = 1e-6,
        exlude_interpoint: bool = True,
    ) -> pd.Series:
        """Check if all constraints are fulfilled on all rows of the provided dataframe
        both constraints and inputs are checked.

        Args:
            experiments: Dataframe with data, the constraint validity should be tested on
            tol: Tolerance for checking the constraints. Defaults to 1e-6.
            exlude_interpoint: If True, InterpointConstraints are excluded from the check. Defaults to True.

        Returns:
            Boolean series indicating if all constraints are fulfilled for all rows.
        """
        constraints = (
            self.constraints.get(excludes=[InterpointConstraint])
            if exlude_interpoint
            else self.constraints.get()
        )
        return constraints.is_fulfilled(experiments, tol) & self.inputs.is_fulfilled(
            experiments
        )

    @property
    def experiment_column_names(self):
        """The columns in the experimental dataframe

        Returns:
            List[str]: List of columns in the experiment dataframe (output feature keys + valid_output feature keys)

        """
        return (self.inputs + self.outputs).get_keys() + [
            f"valid_{output_feature_key}"
            for output_feature_key in self.outputs.get_keys(Output)
        ]

    @property
    def candidate_column_names(self):
        """The columns in the candidate dataframe

        Returns:
            List[str]: List of columns in the candidate dataframe (input feature keys + input feature keys_pred, input feature keys_sd, input feature keys_des)

        """
        assert isinstance(self.outputs, Outputs)
        return (
            self.inputs.get_keys(Input)
            + [
                f"{output_feature_key}_pred"
                for output_feature_key in self.outputs.get_keys_by_objective(Objective)
            ]
            + [
                f"{output_feature_key}_sd"
                for output_feature_key in self.outputs.get_keys_by_objective(Objective)
            ]
            + [
                f"{output_feature_key}_des"
                for output_feature_key in self.outputs.get_keys_by_objective(Objective)
            ]
        )

candidate_column_names property

The columns in the candidate dataframe

Returns:

Type Description

List[str]: List of columns in the candidate dataframe (input feature keys + input feature keys_pred, input feature keys_sd, input feature keys_des)

constraints = Field(default_factory=lambda: Constraints()) class-attribute instance-attribute

Representation of the optimization problem/domain

Attributes:

Name Type Description
inputs List[Input]

List of input features. Defaults to [].

outputs List[Output]

List of output features. Defaults to [].

constraints List[Constraint]

List of constraints. Defaults to [].

experiment_column_names property

The columns in the experimental dataframe

Returns:

Type Description

List[str]: List of columns in the experiment dataframe (output feature keys + valid_output feature keys)

aggregate_by_duplicates(experiments, prec, delimiter='-', method='mean')

Aggregate the dataframe by duplicate experiments

Duplicates are identified based on the experiments with the same input features. Continuous input features are rounded before identifying the duplicates. Aggregation is performed by taking the average of the involved output features.

Parameters:

Name Type Description Default
experiments DataFrame

Dataframe containing experimental data

required
prec int

Precision of the rounding of the continuous input features

required
delimiter str

Delimiter used when combining the orig. labcodes to a new one. Defaults to "-".

'-'
method Literal['mean', 'median']

Which aggregation method to use. Defaults to "mean".

'mean'

Returns:

Type Description
Tuple[DataFrame, list]

Tuple[pd.DataFrame, list]: Dataframe holding the aggregated experiments, list of lists holding the labcodes of the duplicates

Source code in bofire/data_models/domain/domain.py
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
def aggregate_by_duplicates(
    self,
    experiments: pd.DataFrame,
    prec: int,
    delimiter: str = "-",
    method: Literal["mean", "median"] = "mean",
) -> Tuple[pd.DataFrame, list]:
    """Aggregate the dataframe by duplicate experiments

    Duplicates are identified based on the experiments with the same input
    features. Continuous input features are rounded before identifying the
    duplicates. Aggregation is performed by taking the average of the
    involved output features.

    Args:
        experiments (pd.DataFrame): Dataframe containing experimental data
        prec (int): Precision of the rounding of the continuous input features
        delimiter (str, optional): Delimiter used when combining the orig.
            labcodes to a new one. Defaults to "-".
        method (Literal["mean", "median"], optional): Which aggregation
            method to use. Defaults to "mean".

    Returns:
        Tuple[pd.DataFrame, list]: Dataframe holding the aggregated
            experiments, list of lists holding the labcodes of the duplicates

    """
    # prepare the parent frame
    if method not in ["mean", "median"]:
        raise ValueError(f"Unknown aggregation type provided: {method}")

    preprocessed = self.outputs.preprocess_experiments_any_valid_output(experiments)
    assert preprocessed is not None
    experiments = preprocessed.copy()
    if "labcode" not in experiments.columns:
        experiments["labcode"] = [
            str(i + 1).zfill(int(np.ceil(np.log10(experiments.shape[0]))))
            for i in range(experiments.shape[0])
        ]

    # round it if continuous inputs are present
    if len(self.inputs.get(ContinuousInput)) > 0:
        experiments[self.inputs.get_keys(ContinuousInput)] = experiments[
            self.inputs.get_keys(ContinuousInput)
        ].round(prec)

    # coerce invalid to nan
    experiments = self.coerce_invalids(experiments)

    # group and aggregate
    agg: Dict[str, Any] = {
        feat: method for feat in self.outputs.get_keys(ContinuousOutput)
    }
    agg["labcode"] = lambda x: delimiter.join(sorted(x.tolist()))
    for feat in self.outputs.get_keys(Output):
        agg[f"valid_{feat}"] = lambda x: 1

    grouped = experiments.groupby(self.inputs.get_keys(Input))
    duplicated_labcodes = [
        sorted(group.labcode.to_numpy().tolist())
        for _, group in grouped
        if group.shape[0] > 1
    ]

    experiments = grouped.aggregate(agg).reset_index(drop=False)
    for feat in self.outputs.get_keys(Output):
        experiments.loc[experiments[feat].isna(), f"valid_{feat}"] = 0

    experiments = experiments.sort_values(by="labcode")
    experiments = experiments.reset_index(drop=True)
    return experiments, sorted(duplicated_labcodes)

coerce_invalids(experiments)

Coerces all invalid output measurements to np.nan

Parameters:

Name Type Description Default
experiments DataFrame

Dataframe containing experimental data

required

Returns:

Type Description
DataFrame

pd.DataFrame: coerced dataframe

Source code in bofire/data_models/domain/domain.py
248
249
250
251
252
253
254
255
256
257
258
259
260
261
def coerce_invalids(self, experiments: pd.DataFrame) -> pd.DataFrame:
    """Coerces all invalid output measurements to np.nan

    Args:
        experiments (pd.DataFrame): Dataframe containing experimental data

    Returns:
        pd.DataFrame: coerced dataframe

    """
    # coerce invalid to nan
    for feat in self.outputs.get_keys(Output):
        experiments.loc[experiments[f"valid_{feat}"] == 0, feat] = np.nan
    return experiments

describe_experiments(experiments)

Method to get a tabular overview of how many measurements and how many valid entries are included in the input data for each output feature

Parameters:

Name Type Description Default
experiments DataFrame

Dataframe with experimental data

required

Returns:

Type Description
DataFrame

pd.DataFrame: Dataframe with counts how many measurements and how many valid entries are included in the input data for each output feature

Source code in bofire/data_models/domain/domain.py
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
def describe_experiments(self, experiments: pd.DataFrame) -> pd.DataFrame:
    """Method to get a tabular overview of how many measurements and how many valid entries are included in the input data for each output feature

    Args:
        experiments (pd.DataFrame): Dataframe with experimental data

    Returns:
        pd.DataFrame: Dataframe with counts how many measurements and how many valid entries are included in the input data for each output feature

    """
    data = {}
    for feat in self.outputs.get_keys(Output):
        data[feat] = [
            experiments.loc[experiments[feat].notna()].shape[0],
            experiments.loc[experiments[feat].notna(), "valid_%s" % feat].sum(),
        ]
    preprocessed = self.outputs.preprocess_experiments_all_valid_outputs(
        experiments,
    )
    assert preprocessed is not None
    data["all"] = [
        experiments.shape[0],
        preprocessed.shape[0],
    ]
    return pd.DataFrame.from_dict(
        data,
        orient="index",
        columns=["measured", "valid"],
    )

get_nchoosek_combinations(exhaustive=False)

Get all possible NChooseK combinations

Parameters:

Name Type Description Default
exhaustive bool

if True all combinations are returned. Defaults to False.

False

Returns:

Name Type Description
Tuple (used_features_list, unused_features_list)

used_features_list is a list of lists containing features used in each NChooseK combination. unused_features_list is a list of lists containing features unused in each NChooseK combination.

Source code in bofire/data_models/domain/domain.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
def get_nchoosek_combinations(self, exhaustive: bool = False):
    """Get all possible NChooseK combinations

    Args:
        exhaustive (bool, optional): if True all combinations are returned. Defaults to False.

    Returns:
        Tuple(used_features_list, unused_features_list): used_features_list is a list of lists containing features used in each NChooseK combination.
            unused_features_list is a list of lists containing features unused in each NChooseK combination.

    """
    if len(self.constraints.get(NChooseKConstraint)) == 0:
        used_continuous_features = self.inputs.get_keys(ContinuousInput)
        return used_continuous_features, []

    used_features_list_all = []

    # loops through each NChooseK constraint
    for con in self.constraints.get(NChooseKConstraint):
        assert isinstance(con, NChooseKConstraint)
        used_features_list = []

        if exhaustive:
            for n in range(con.min_count, con.max_count + 1):
                used_features_list.extend(itertools.combinations(con.features, n))

            if con.none_also_valid:
                used_features_list.append(())
        else:
            used_features_list.extend(
                itertools.combinations(con.features, con.max_count),
            )

        used_features_list_all.append(used_features_list)

    used_features_list_all = list(
        itertools.product(*used_features_list_all),
    )  # product between NChooseK constraints

    # format into a list of used features
    used_features_list_formatted = []
    for used_features_list in used_features_list_all:
        used_features_list_flattened = [
            item for sublist in used_features_list for item in sublist
        ]
        used_features_list_formatted.append(list(set(used_features_list_flattened)))

    # sort lists
    used_features_list_sorted = []
    for used_features in used_features_list_formatted:
        used_features_list_sorted.append(sorted(used_features))

    # drop duplicates
    used_features_list_no_dup = []
    for used_features in used_features_list_sorted:
        if used_features not in used_features_list_no_dup:
            used_features_list_no_dup.append(used_features)

    # print(f"duplicates dropped: {len(used_features_list_sorted)-len(used_features_list_no_dup)}")

    # remove combinations not fulfilling constraints
    used_features_list_final = []
    for combo in used_features_list_no_dup:
        fulfil_constraints = []  # list of bools tracking if constraints are fulfilled
        for con in self.constraints.get(NChooseKConstraint):
            assert isinstance(con, NChooseKConstraint)
            count = 0  # count of features in combo that are in con.features
            for f in combo:
                if f in con.features:
                    count += 1
            if (
                count >= con.min_count
                and count <= con.max_count
                or count == 0
                and con.none_also_valid
            ):
                fulfil_constraints.append(True)
            else:
                fulfil_constraints.append(False)
        if np.all(fulfil_constraints):
            used_features_list_final.append(combo)

    # print(f"violators dropped: {len(used_features_list_no_dup)-len(used_features_list_final)}")

    # features unused
    features_in_cc = []
    for con in self.constraints.get(NChooseKConstraint):
        assert isinstance(con, NChooseKConstraint)
        features_in_cc.extend(con.features)
    features_in_cc = list(set(features_in_cc))
    features_in_cc.sort()
    unused_features_list = []
    for used_features in used_features_list_final:
        unused_features_list.append(
            [f_key for f_key in features_in_cc if f_key not in used_features],
        )

    # postprocess
    # used_features_list_final2 = []
    # unused_features_list2 = []
    # for used, unused in zip(used_features_list_final,unused_features_list):
    #     if len(used) == 3:
    #         used_features_list_final2.append(used), unused_features_list2.append(unused)

    return used_features_list_final, unused_features_list

is_fulfilled(experiments, tol=1e-06, exlude_interpoint=True)

Check if all constraints are fulfilled on all rows of the provided dataframe both constraints and inputs are checked.

Parameters:

Name Type Description Default
experiments DataFrame

Dataframe with data, the constraint validity should be tested on

required
tol float

Tolerance for checking the constraints. Defaults to 1e-6.

1e-06
exlude_interpoint bool

If True, InterpointConstraints are excluded from the check. Defaults to True.

True

Returns:

Type Description
Series

Boolean series indicating if all constraints are fulfilled for all rows.

Source code in bofire/data_models/domain/domain.py
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
def is_fulfilled(
    self,
    experiments: pd.DataFrame,
    tol: float = 1e-6,
    exlude_interpoint: bool = True,
) -> pd.Series:
    """Check if all constraints are fulfilled on all rows of the provided dataframe
    both constraints and inputs are checked.

    Args:
        experiments: Dataframe with data, the constraint validity should be tested on
        tol: Tolerance for checking the constraints. Defaults to 1e-6.
        exlude_interpoint: If True, InterpointConstraints are excluded from the check. Defaults to True.

    Returns:
        Boolean series indicating if all constraints are fulfilled for all rows.
    """
    constraints = (
        self.constraints.get(excludes=[InterpointConstraint])
        if exlude_interpoint
        else self.constraints.get()
    )
    return constraints.is_fulfilled(experiments, tol) & self.inputs.is_fulfilled(
        experiments
    )

validate_candidates(candidates, only_inputs=False, tol=1e-05, raise_validation_error=True)

Method to check the validty of proposed candidates

Parameters:

Name Type Description Default
candidates DataFrame

Dataframe with suggested new experiments (candidates)

required
only_inputs (bool, optional)

If True, only the input columns are validated. Defaults to False.

False
tol (float, optional)

tolerance parameter for constraints. A constraint is considered as not fulfilled if the violation is larger than tol. Defaults to 1e-6.

1e-05
raise_validation_error bool

If true an error will be raised if candidates violate constraints, otherwise only a warning will be displayed. Defaults to True.

True

Raises:

Type Description
ValueError

when a column is missing for a defined input feature

ValueError

when a column is missing for a defined output feature

ValueError

when a non-numerical value is proposed

ValueError

when an additional column is found

ConstraintNotFulfilledError

when the constraints are not fulfilled and raise_validation_error = True

Returns:

Type Description
DataFrame

pd.DataFrame: dataframe with suggested experiments (candidates)

Source code in bofire/data_models/domain/domain.py
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
def validate_candidates(
    self,
    candidates: pd.DataFrame,
    only_inputs: bool = False,
    tol: float = 1e-5,
    raise_validation_error: bool = True,
) -> pd.DataFrame:
    """Method to check the validty of proposed candidates

    Args:
        candidates (pd.DataFrame): Dataframe with suggested new experiments (candidates)
        only_inputs (bool,optional): If True, only the input columns are validated. Defaults to False.
        tol (float,optional): tolerance parameter for constraints. A constraint is considered as not fulfilled if the violation
            is larger than tol. Defaults to 1e-6.
        raise_validation_error (bool, optional): If true an error will be raised if candidates violate constraints,
            otherwise only a warning will be displayed. Defaults to True.

    Raises:
        ValueError: when a column is missing for a defined input feature
        ValueError: when a column is missing for a defined output feature
        ValueError: when a non-numerical value is proposed
        ValueError: when an additional column is found
        ConstraintNotFulfilledError: when the constraints are not fulfilled and `raise_validation_error = True`

    Returns:
        pd.DataFrame: dataframe with suggested experiments (candidates)

    """
    # check that each input feature has a col and is valid in itself
    assert isinstance(self.inputs, Inputs)
    candidates = self.inputs.validate_candidates(candidates)
    # check if all constraints are fulfilled
    if not self.constraints.is_fulfilled(candidates, tol=tol).all():
        if raise_validation_error:
            raise ConstraintNotFulfilledError(
                f"Constraints not fulfilled: {candidates}",
            )
        warnings.warn("Not all constraints are fulfilled.")
    # for each continuous output feature with an attached objective object
    if not only_inputs:
        assert isinstance(self.outputs, Outputs)
        candidates = self.outputs.validate_candidates(candidates=candidates)
    return candidates

validate_constraints()

Validate that the constraints defined in the domain fit to the input features.

Parameters:

Name Type Description Default
v List[Constraint]

List of constraints or empty if no constraints are defined

required
values List[Input]

List of input features of the domain

required

Raises:

Type Description
ValueError

Feature key in constraint is unknown.

Returns:

Type Description

List[Constraint]: List of constraints defined for the domain

Source code in bofire/data_models/domain/domain.py
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
@model_validator(mode="after")
def validate_constraints(self):
    """Validate that the constraints defined in the domain fit to the input features.

    Args:
        v (List[Constraint]): List of constraints or empty if no constraints are defined
        values (List[Input]): List of input features of the domain

    Raises:
        ValueError: Feature key in constraint is unknown.

    Returns:
        List[Constraint]: List of constraints defined for the domain

    """
    for c in self.constraints.get():
        c.validate_inputs(self.inputs)
    return self

validate_experiments(experiments, strict=False)

Checks the experimental data on validity

Parameters:

Name Type Description Default
experiments DataFrame

Dataframe with experimental data

required
strict bool

Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not. Defaults to False.

False

Raises:

Type Description
ValueError

empty dataframe

ValueError

the column for a specific feature is missing the provided data

ValueError

there are labcodes with null value

ValueError

there are labcodes with nan value

ValueError

labcodes are not unique

ValueError

the provided columns do no match to the defined domain

ValueError

the provided columns do no match to the defined domain

ValueError

Input with null values

ValueError

Input with nan values

Returns:

Type Description
DataFrame

pd.DataFrame: The provided dataframe with experimental data

Source code in bofire/data_models/domain/domain.py
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
def validate_experiments(
    self,
    experiments: pd.DataFrame,
    strict: bool = False,
) -> pd.DataFrame:
    """Checks the experimental data on validity

    Args:
        experiments (pd.DataFrame): Dataframe with experimental data
        strict (bool, optional): Boolean to distinguish if the occurrence of
            fixed features in the dataset should be considered or not.
            Defaults to False.

    Raises:
        ValueError: empty dataframe
        ValueError: the column for a specific feature is missing the provided data
        ValueError: there are labcodes with null value
        ValueError: there are labcodes with nan value
        ValueError: labcodes are not unique
        ValueError: the provided columns do no match to the defined domain
        ValueError: the provided columns do no match to the defined domain
        ValueError: Input with null values
        ValueError: Input with nan values

    Returns:
        pd.DataFrame: The provided dataframe with experimental data

    """
    if len(experiments) == 0:
        raise ValueError("no experiments provided (empty dataframe)")

    # we allow here for a column named labcode used to identify experiments
    if "labcode" in experiments.columns:
        # test that labcodes are not na
        if experiments.labcode.isnull().to_numpy().any():
            raise ValueError("there are labcodes with null value")
        if experiments.labcode.isna().to_numpy().any():
            raise ValueError("there are labcodes with nan value")
        # test that labcodes are distinct
        if (
            len(set(experiments.labcode.to_numpy().tolist()))
            != experiments.shape[0]
        ):
            raise ValueError("labcodes are not unique")

    # run the individual validators
    experiments = self.inputs.validate_experiments(
        experiments=experiments,
        strict=strict,
    )
    experiments = self.outputs.validate_experiments(experiments=experiments)
    return experiments

validate_unique_feature_keys()

Validates if provided input and output feature keys are unique

Parameters:

Name Type Description Default
v Outputs

List of all output features of the domain.

required
value Dict[str, Inputs]

Dict containing a list of input features as single entry.

required

Raises:

Type Description
ValueError

Feature keys are not unique.

Returns:

Name Type Description
Outputs

Keeps output features as given.

Source code in bofire/data_models/domain/domain.py
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
@model_validator(mode="after")
def validate_unique_feature_keys(self):
    """Validates if provided input and output feature keys are unique

    Args:
        v (Outputs): List of all output features of the domain.
        value (Dict[str, Inputs]): Dict containing a list of input features as single entry.

    Raises:
        ValueError: Feature keys are not unique.

    Returns:
        Outputs: Keeps output features as given.

    """
    keys = self.outputs.get_keys() + self.inputs.get_keys()
    if len(set(keys)) != len(keys):
        raise ValueError("Feature keys are not unique")
    return self

features

Inputs

Bases: _BaseFeatures[AnyInput]

Container of input features, only input features are allowed.

Attributes:

Name Type Description
features List(Inputs

list of the features.

Source code in bofire/data_models/domain/features.py
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
class Inputs(_BaseFeatures[AnyInput]):
    """Container of input features, only input features are allowed.

    Attributes:
        features (List(Inputs)): list of the features.

    """

    type: Literal["Inputs"] = "Inputs"  # type: ignore

    @field_validator("features")
    @classmethod
    def validate_only_one_task_input(cls, features: Sequence[AnyInput]):
        filtered = filter_by_class(
            features,
            includes=TaskInput,
            excludes=None,
            exact=False,
        )
        if len(filtered) > 1:
            raise ValueError(f"Only one `TaskInput` is allowed, got {len(filtered)}.")
        return features

    def get_fixed(self) -> Inputs:
        """Gets all features in `self` that are fixed and returns them as new
        `Inputs` object.

        Returns:
            Inputs: Input features object containing only fixed features.

        """
        return Inputs(features=[feat for feat in self if feat.is_fixed()])

    def get_free(self) -> Inputs:
        """Gets all features in `self` that are not fixed and returns them as
        new `Inputs` object.

        Returns:
            Inputs: Input features object containing only non-fixed features.

        """
        return Inputs(features=[feat for feat in self if not feat.is_fixed()])

    @validate_call
    def sample(
        self,
        n: int = 1,
        method: SamplingMethodEnum = SamplingMethodEnum.UNIFORM,
        seed: Optional[int] = None,
    ) -> pd.DataFrame:
        """Draw sobol samples

        Args:
            n (int, optional): Number of samples, has to be larger than 0.
                Defaults to 1.
            method (SamplingMethodEnum, optional): Method to use, implemented
                methods are `UNIFORM`, `SOBOL` and `LHS`. Defaults to `UNIFORM`.
            reference_value
            seed (int, optional): random seed. Defaults to None.

        Returns:
            pd.DataFrame: Dataframe containing the samples.

        """
        if len(self) == 0:
            return pd.DataFrame()

        if method == SamplingMethodEnum.UNIFORM:
            # we cannot just propagate the provided seed to the sample methods
            # as they would then sample always the same value if the bounds
            # are the same for a feature.
            rng = np.random.default_rng(seed=seed)
            return self.validate_candidates(
                pd.concat(
                    [
                        feat.sample(n, seed=int(rng.integers(1, 1000000)))
                        for feat in self.get(Input)
                    ],
                    axis=1,
                ),
            )

        free_features = self.get_free()
        if method == SamplingMethodEnum.SOBOL:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                X = Sobol(len(free_features), seed=seed).random(n)
        else:
            X = LatinHypercube(len(free_features), seed=seed).random(n)

        res = []
        for i, feat in enumerate(free_features):
            if isinstance(feat, ContinuousInput):
                x = feat.from_unit_range(X[:, i])
            elif isinstance(feat, (DiscreteInput, CategoricalInput)):
                levels = (
                    feat.values
                    if isinstance(feat, DiscreteInput)
                    else feat.get_allowed_categories()
                )
                bins = np.linspace(0, 1, len(levels) + 1)
                idx = np.digitize(X[:, i], bins) - 1
                x = np.array(levels)[idx]
            else:
                raise ValueError(
                    f"Unknown input feature with key {feat.key} of type {feat.type}",
                )
            res.append(pd.Series(x, name=feat.key))

        samples = pd.concat(res, axis=1)

        for feat in self.get_fixed():
            samples[feat.key] = feat.fixed_value()[0]  # type: ignore

        return self.validate_candidates(samples)[self.get_keys(Input)]

    def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame:
        """Validate a pandas dataframe with input feature values.

        Args:
            candidates (pd.Dataframe): Inputs to validate.

        Raises:
            ValueError: Raises a Valueerror if a feature based validation raises an exception.

        Returns:
            pd.Dataframe: Validated dataframe

        """
        for feature in self:
            if feature.key not in candidates:
                raise ValueError(f"no col for input feature `{feature.key}`")
            candidates[feature.key] = feature.validate_candidental(
                candidates[feature.key],
            )
        if candidates[self.get_keys()].isnull().to_numpy().any():
            raise ValueError("there are null values")
        if candidates[self.get_keys()].isna().to_numpy().any():
            raise ValueError("there are na values")
        return candidates

    def validate_experiments(
        self,
        experiments: pd.DataFrame,
        strict=False,
    ) -> pd.DataFrame:
        for feature in self:
            if feature.key not in experiments:
                raise ValueError(f"no col for input feature `{feature.key}`")
            experiments[feature.key] = feature.validate_experimental(
                experiments[feature.key],
                strict=strict,
            )
        if experiments[self.get_keys()].isnull().to_numpy().any():
            raise ValueError("there are null values")
        if experiments[self.get_keys()].isna().to_numpy().any():
            raise ValueError("there are na values")
        return experiments

    def get_categorical_combinations(
        self,
        include: Union[Type, List[Type]] = Input,
        exclude: Union[Type, List[Type]] = None,  # type: ignore
    ):
        """Get a list of tuples pairing the feature keys with a list of valid categories

        Args:
            include (Feature, optional): Features to be included. Defaults to Input.
            exclude (Feature, optional): Features to be excluded, e.g. subclasses
                of the included features. Defaults to None.

        Returns:
            List[(str, List[str])]: Returns a list of tuples pairing the feature
                keys with a list of valid categories (str)

        """
        features = [
            f
            for f in self.get(includes=include, excludes=exclude)
            if (isinstance(f, CategoricalInput) and not f.is_fixed())
        ]
        list_of_lists = [
            [(f.key, cat) for cat in f.get_allowed_categories()] for f in features
        ]

        discretes = [
            f
            for f in self.get(includes=include, excludes=exclude)
            if (isinstance(f, DiscreteInput) and not f.is_fixed())
        ]

        list_of_lists_2 = [[(d.key, v) for v in d.values] for d in discretes]

        list_of_lists = list_of_lists + list_of_lists_2

        return list(itertools.product(*list_of_lists))

    # transformation related methods
    def _get_transform_info(
        self,
        specs: InputTransformSpecs,
    ) -> Tuple[Dict[str, Tuple[int]], Dict[str, Tuple[str]]]:
        """Generates two dictionaries. The first one specifies which key is mapped to
        which column indices when applying `transform`. The second one specifies
        which key is mapped to which transformed keys.

        Args:
            specs (InputTransformSpecs): Dictionary specifying which
                input feature is transformed by which encoder.

        Returns:
            Dict[str, Tuple[int]]: Dictionary mapping feature keys to column indices.
            Dict[str, Tuple[str]]: Dictionary mapping feature keys to transformed feature
                keys.

        """
        self._validate_transform_specs(specs)
        features2idx = {}
        features2names = {}
        counter = 0
        for _, feat in enumerate(self.get()):
            if feat.key not in specs.keys():
                features2idx[feat.key] = (counter,)
                features2names[feat.key] = (feat.key,)
                counter += 1
            elif specs[feat.key] == CategoricalEncodingEnum.ONE_HOT:
                assert isinstance(feat, CategoricalInput)
                features2idx[feat.key] = tuple(
                    (np.array(range(len(feat.categories))) + counter).tolist(),
                )
                features2names[feat.key] = tuple(
                    [get_encoded_name(feat.key, c) for c in feat.categories],
                )
                counter += len(feat.categories)
            elif specs[feat.key] == CategoricalEncodingEnum.ORDINAL:
                features2idx[feat.key] = (counter,)
                features2names[feat.key] = (feat.key,)
                counter += 1
            elif specs[feat.key] == CategoricalEncodingEnum.DUMMY:
                assert isinstance(feat, CategoricalInput)
                features2idx[feat.key] = tuple(
                    (np.array(range(len(feat.categories) - 1)) + counter).tolist(),
                )
                features2names[feat.key] = tuple(
                    [get_encoded_name(feat.key, c) for c in feat.categories[1:]],
                )
                counter += len(feat.categories) - 1
            elif specs[feat.key] == CategoricalEncodingEnum.DESCRIPTOR:
                assert isinstance(feat, CategoricalDescriptorInput)
                features2idx[feat.key] = tuple(
                    (np.array(range(len(feat.descriptors))) + counter).tolist(),
                )
                features2names[feat.key] = tuple(
                    [get_encoded_name(feat.key, d) for d in feat.descriptors],
                )
                counter += len(feat.descriptors)
            elif isinstance(specs[feat.key], MolFeatures):
                assert isinstance(feat, MolecularInput)
                descriptor_names = specs[feat.key].get_descriptor_names()  # type: ignore
                features2idx[feat.key] = tuple(
                    (np.array(range(len(descriptor_names))) + counter).tolist(),
                )
                features2names[feat.key] = tuple(
                    [get_encoded_name(feat.key, d) for d in descriptor_names],
                )
                counter += len(descriptor_names)
        return features2idx, features2names

    def transform(
        self,
        experiments: pd.DataFrame,
        specs: InputTransformSpecs,
    ) -> pd.DataFrame:
        """Transform a dataframe to the representation specified in `specs`.

        Currently only input categoricals are supported.

        Args:
            experiments (pd.DataFrame): Data dataframe to be transformed.
            specs (InputTransformSpecs): Dictionary specifying which
                input feature is transformed by which encoder.

        Returns:
            pd.DataFrame: Transformed dataframe. Only input features are included.

        """
        # TODO: clean this up and move it into the individual classes
        specs = self._validate_transform_specs(specs)
        transformed = []
        for feat in self.get():
            s = experiments[feat.key]
            if feat.key not in specs.keys():
                transformed.append(s)
            elif specs[feat.key] == CategoricalEncodingEnum.ONE_HOT:
                assert isinstance(feat, CategoricalInput)
                transformed.append(feat.to_onehot_encoding(s))
            elif specs[feat.key] == CategoricalEncodingEnum.ORDINAL:
                assert isinstance(feat, CategoricalInput)
                transformed.append(feat.to_ordinal_encoding(s))
            elif specs[feat.key] == CategoricalEncodingEnum.DUMMY:
                assert isinstance(feat, CategoricalInput)
                transformed.append(feat.to_dummy_encoding(s))
            elif specs[feat.key] == CategoricalEncodingEnum.DESCRIPTOR:
                assert isinstance(feat, CategoricalDescriptorInput)
                transformed.append(feat.to_descriptor_encoding(s))
            elif isinstance(specs[feat.key], MolFeatures):
                assert isinstance(feat, MolecularInput)
                transformed.append(feat.to_descriptor_encoding(specs[feat.key], s))  # type: ignore
        return pd.concat(transformed, axis=1)

    def inverse_transform(
        self,
        experiments: pd.DataFrame,
        specs: InputTransformSpecs,
    ) -> pd.DataFrame:
        """Transform a dataframe back to the original representations.

        The original applied transformation has to be provided via the specs dictionary.
        Currently only input categoricals are supported.

        Args:
            experiments (pd.DataFrame): Transformed data dataframe.
            specs (InputTransformSpecs): Dictionary specifying which
                input feature is transformed by which encoder.

        Returns:
            pd.DataFrame: Back transformed dataframe. Only input features are included.

        """
        # TODO: clean this up and move it into the individual classes
        self._validate_transform_specs(specs=specs)
        transformed = []
        for feat in self.get():
            if isinstance(feat, DiscreteInput):
                transformed.append(feat.from_continuous(experiments))
            elif feat.key not in specs.keys():
                transformed.append(experiments[feat.key])
            elif specs[feat.key] == CategoricalEncodingEnum.ONE_HOT:
                assert isinstance(feat, CategoricalInput)
                transformed.append(feat.from_onehot_encoding(experiments))
            elif specs[feat.key] == CategoricalEncodingEnum.ORDINAL:
                assert isinstance(feat, CategoricalInput)
                transformed.append(
                    feat.from_ordinal_encoding(experiments[feat.key].astype(int)),
                )
            elif specs[feat.key] == CategoricalEncodingEnum.DUMMY:
                assert isinstance(feat, CategoricalInput)
                transformed.append(feat.from_dummy_encoding(experiments))
            elif specs[feat.key] == CategoricalEncodingEnum.DESCRIPTOR:
                assert isinstance(feat, CategoricalDescriptorInput)
                transformed.append(feat.from_descriptor_encoding(experiments))
            elif isinstance(specs[feat.key], MolFeatures):
                assert isinstance(feat, CategoricalMolecularInput)
                transformed.append(
                    feat.from_descriptor_encoding(specs[feat.key], experiments),  # type: ignore
                )

        return pd.concat(transformed, axis=1)

    def _validate_transform_specs(
        self,
        specs: InputTransformSpecs,
    ) -> InputTransformSpecs:
        """Checks the validity of the transform specs .

        Args:
            specs (InputTransformSpecs): Transform specs to be validated.

        """
        # first check that the keys in the specs dict are correct also correct feature keys
        # next check that all values are of type CategoricalEncodingEnum or MolFeatures
        for key, value in specs.items():
            try:
                feat = self.get_by_key(key)
            except KeyError:
                raise ValueError(
                    f"Unknown feature with key {key} specified in transform specs.",
                )
            # TODO
            # this is ugly, on the long run we have to get rid of the transform enums
            # and replace them with classes, then the following lines collapse into just two
            assert isinstance(feat, Input)
            enums = [t for t in feat.valid_transform_types() if isinstance(t, Enum)]
            no_enums = [
                t for t in feat.valid_transform_types() if not isinstance(t, Enum)
            ]
            if isinstance(value, Enum):
                if value not in enums:
                    raise ValueError(
                        f"Forbidden transform type for feature with key {key}",
                    )
            else:
                if len(no_enums) == 0:
                    raise ValueError(
                        f"Forbidden transform type for feature with key {key}",
                    )
                if not isinstance(value, tuple(no_enums)):  # type: ignore
                    raise ValueError(
                        f"Forbidden transform type for feature with key {key}",
                    )

        return specs

    def get_bounds(
        self,
        specs: InputTransformSpecs,
        experiments: Optional[pd.DataFrame] = None,
        reference_experiment: Optional[pd.Series] = None,
    ) -> Tuple[List[float], List[float]]:
        """Returns the boundaries of the optimization problem based on the transformations
        defined in the  `specs` dictionary.

        Args:
            specs (InputTransformSpecs): Dictionary specifying which
                input feature is transformed by which encoder.
            experiments (Optional[pd.DataFrame], optional): Dataframe with input features.
                If provided the real feature bounds are returned based on both the opt.
                feature bounds and the extreme points in the dataframe. Defaults to None,
            reference_experiment (Optional[pd.Serues], optional): If a reference experiment provided,
            then the local bounds based on a local search region are provided as reference to the
                reference experiment. Currently only supported for continuous inputs.
                For more details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf. Defaults to None.

        Raises:
            ValueError: If a feature type is not known.
            ValueError: If no transformation is provided for a categorical feature.

        Returns:
            Tuple[List[float], List[float]]: list with lower bounds, list with upper bounds.

        """
        if reference_experiment is not None and experiments is not None:
            raise ValueError(
                "Only one can be used, `reference_experiments` or `experiments`.",
            )

        self._validate_transform_specs(specs=specs)

        lower = []
        upper = []

        for feat in self.get():
            assert isinstance(feat, Input)
            lo, up = feat.get_bounds(
                transform_type=specs.get(feat.key),  # type: ignore
                values=experiments[feat.key] if experiments is not None else None,  # type: ignore
                reference_value=(
                    reference_experiment[feat.key]
                    if reference_experiment is not None
                    else None
                ),
            )
            lower += lo
            upper += up
        return lower, upper

    def get_feature_indices(
        self,
        specs: InputTransformSpecs,
        feature_keys: List[str],
    ) -> List[int]:
        """Returns a list of indices of the given feature key list.

        Args:
            specs (InputTransformSpecs): Dictionary specifying which
                input feature is transformed by which encoder.
            feature_keys (List[str]): List of feature keys.

        Returns:
            List[int]: The list of indices.

        """
        features2idx, _ = self._get_transform_info(specs)
        return sorted(
            itertools.chain.from_iterable(
                [features2idx[feat] for feat in feature_keys]
            ),
        )

    def is_fulfilled(self, experiments: pd.DataFrame) -> pd.Series:
        """Check if the provided experiments fulfill all constraints defined on the
        input features itself like the bounds or the allowed categories.

        Args:
            experiments: Dataframe with input features.

        Returns:
            Series with boolean values indicating if the experiments fulfill the
                constraints on the input features.

        """
        return (
            pd.concat(
                [feat.is_fulfilled(experiments[feat.key]) for feat in self.get()],
                axis=1,
            )
            .fillna(True)
            .all(axis=1)
        )

get_bounds(specs, experiments=None, reference_experiment=None)

Returns the boundaries of the optimization problem based on the transformations defined in the specs dictionary.

Parameters:

Name Type Description Default
specs InputTransformSpecs

Dictionary specifying which input feature is transformed by which encoder.

required
experiments Optional[DataFrame]

Dataframe with input features. If provided the real feature bounds are returned based on both the opt. feature bounds and the extreme points in the dataframe. Defaults to None,

None
reference_experiment Optional[Serues]

If a reference experiment provided,

None

Raises:

Type Description
ValueError

If a feature type is not known.

ValueError

If no transformation is provided for a categorical feature.

Returns:

Type Description
Tuple[List[float], List[float]]

Tuple[List[float], List[float]]: list with lower bounds, list with upper bounds.

Source code in bofire/data_models/domain/features.py
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
def get_bounds(
    self,
    specs: InputTransformSpecs,
    experiments: Optional[pd.DataFrame] = None,
    reference_experiment: Optional[pd.Series] = None,
) -> Tuple[List[float], List[float]]:
    """Returns the boundaries of the optimization problem based on the transformations
    defined in the  `specs` dictionary.

    Args:
        specs (InputTransformSpecs): Dictionary specifying which
            input feature is transformed by which encoder.
        experiments (Optional[pd.DataFrame], optional): Dataframe with input features.
            If provided the real feature bounds are returned based on both the opt.
            feature bounds and the extreme points in the dataframe. Defaults to None,
        reference_experiment (Optional[pd.Serues], optional): If a reference experiment provided,
        then the local bounds based on a local search region are provided as reference to the
            reference experiment. Currently only supported for continuous inputs.
            For more details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf. Defaults to None.

    Raises:
        ValueError: If a feature type is not known.
        ValueError: If no transformation is provided for a categorical feature.

    Returns:
        Tuple[List[float], List[float]]: list with lower bounds, list with upper bounds.

    """
    if reference_experiment is not None and experiments is not None:
        raise ValueError(
            "Only one can be used, `reference_experiments` or `experiments`.",
        )

    self._validate_transform_specs(specs=specs)

    lower = []
    upper = []

    for feat in self.get():
        assert isinstance(feat, Input)
        lo, up = feat.get_bounds(
            transform_type=specs.get(feat.key),  # type: ignore
            values=experiments[feat.key] if experiments is not None else None,  # type: ignore
            reference_value=(
                reference_experiment[feat.key]
                if reference_experiment is not None
                else None
            ),
        )
        lower += lo
        upper += up
    return lower, upper

get_categorical_combinations(include=Input, exclude=None)

Get a list of tuples pairing the feature keys with a list of valid categories

Parameters:

Name Type Description Default
include Feature

Features to be included. Defaults to Input.

Input
exclude Feature

Features to be excluded, e.g. subclasses of the included features. Defaults to None.

None

Returns:

Type Description

List[(str, List[str])]: Returns a list of tuples pairing the feature keys with a list of valid categories (str)

Source code in bofire/data_models/domain/features.py
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
def get_categorical_combinations(
    self,
    include: Union[Type, List[Type]] = Input,
    exclude: Union[Type, List[Type]] = None,  # type: ignore
):
    """Get a list of tuples pairing the feature keys with a list of valid categories

    Args:
        include (Feature, optional): Features to be included. Defaults to Input.
        exclude (Feature, optional): Features to be excluded, e.g. subclasses
            of the included features. Defaults to None.

    Returns:
        List[(str, List[str])]: Returns a list of tuples pairing the feature
            keys with a list of valid categories (str)

    """
    features = [
        f
        for f in self.get(includes=include, excludes=exclude)
        if (isinstance(f, CategoricalInput) and not f.is_fixed())
    ]
    list_of_lists = [
        [(f.key, cat) for cat in f.get_allowed_categories()] for f in features
    ]

    discretes = [
        f
        for f in self.get(includes=include, excludes=exclude)
        if (isinstance(f, DiscreteInput) and not f.is_fixed())
    ]

    list_of_lists_2 = [[(d.key, v) for v in d.values] for d in discretes]

    list_of_lists = list_of_lists + list_of_lists_2

    return list(itertools.product(*list_of_lists))

get_feature_indices(specs, feature_keys)

Returns a list of indices of the given feature key list.

Parameters:

Name Type Description Default
specs InputTransformSpecs

Dictionary specifying which input feature is transformed by which encoder.

required
feature_keys List[str]

List of feature keys.

required

Returns:

Type Description
List[int]

List[int]: The list of indices.

Source code in bofire/data_models/domain/features.py
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
def get_feature_indices(
    self,
    specs: InputTransformSpecs,
    feature_keys: List[str],
) -> List[int]:
    """Returns a list of indices of the given feature key list.

    Args:
        specs (InputTransformSpecs): Dictionary specifying which
            input feature is transformed by which encoder.
        feature_keys (List[str]): List of feature keys.

    Returns:
        List[int]: The list of indices.

    """
    features2idx, _ = self._get_transform_info(specs)
    return sorted(
        itertools.chain.from_iterable(
            [features2idx[feat] for feat in feature_keys]
        ),
    )

get_fixed()

Gets all features in self that are fixed and returns them as new Inputs object.

Returns:

Name Type Description
Inputs Inputs

Input features object containing only fixed features.

Source code in bofire/data_models/domain/features.py
265
266
267
268
269
270
271
272
273
def get_fixed(self) -> Inputs:
    """Gets all features in `self` that are fixed and returns them as new
    `Inputs` object.

    Returns:
        Inputs: Input features object containing only fixed features.

    """
    return Inputs(features=[feat for feat in self if feat.is_fixed()])

get_free()

Gets all features in self that are not fixed and returns them as new Inputs object.

Returns:

Name Type Description
Inputs Inputs

Input features object containing only non-fixed features.

Source code in bofire/data_models/domain/features.py
275
276
277
278
279
280
281
282
283
def get_free(self) -> Inputs:
    """Gets all features in `self` that are not fixed and returns them as
    new `Inputs` object.

    Returns:
        Inputs: Input features object containing only non-fixed features.

    """
    return Inputs(features=[feat for feat in self if not feat.is_fixed()])

inverse_transform(experiments, specs)

Transform a dataframe back to the original representations.

The original applied transformation has to be provided via the specs dictionary. Currently only input categoricals are supported.

Parameters:

Name Type Description Default
experiments DataFrame

Transformed data dataframe.

required
specs InputTransformSpecs

Dictionary specifying which input feature is transformed by which encoder.

required

Returns:

Type Description
DataFrame

pd.DataFrame: Back transformed dataframe. Only input features are included.

Source code in bofire/data_models/domain/features.py
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
def inverse_transform(
    self,
    experiments: pd.DataFrame,
    specs: InputTransformSpecs,
) -> pd.DataFrame:
    """Transform a dataframe back to the original representations.

    The original applied transformation has to be provided via the specs dictionary.
    Currently only input categoricals are supported.

    Args:
        experiments (pd.DataFrame): Transformed data dataframe.
        specs (InputTransformSpecs): Dictionary specifying which
            input feature is transformed by which encoder.

    Returns:
        pd.DataFrame: Back transformed dataframe. Only input features are included.

    """
    # TODO: clean this up and move it into the individual classes
    self._validate_transform_specs(specs=specs)
    transformed = []
    for feat in self.get():
        if isinstance(feat, DiscreteInput):
            transformed.append(feat.from_continuous(experiments))
        elif feat.key not in specs.keys():
            transformed.append(experiments[feat.key])
        elif specs[feat.key] == CategoricalEncodingEnum.ONE_HOT:
            assert isinstance(feat, CategoricalInput)
            transformed.append(feat.from_onehot_encoding(experiments))
        elif specs[feat.key] == CategoricalEncodingEnum.ORDINAL:
            assert isinstance(feat, CategoricalInput)
            transformed.append(
                feat.from_ordinal_encoding(experiments[feat.key].astype(int)),
            )
        elif specs[feat.key] == CategoricalEncodingEnum.DUMMY:
            assert isinstance(feat, CategoricalInput)
            transformed.append(feat.from_dummy_encoding(experiments))
        elif specs[feat.key] == CategoricalEncodingEnum.DESCRIPTOR:
            assert isinstance(feat, CategoricalDescriptorInput)
            transformed.append(feat.from_descriptor_encoding(experiments))
        elif isinstance(specs[feat.key], MolFeatures):
            assert isinstance(feat, CategoricalMolecularInput)
            transformed.append(
                feat.from_descriptor_encoding(specs[feat.key], experiments),  # type: ignore
            )

    return pd.concat(transformed, axis=1)

is_fulfilled(experiments)

Check if the provided experiments fulfill all constraints defined on the input features itself like the bounds or the allowed categories.

Parameters:

Name Type Description Default
experiments DataFrame

Dataframe with input features.

required

Returns:

Type Description
Series

Series with boolean values indicating if the experiments fulfill the constraints on the input features.

Source code in bofire/data_models/domain/features.py
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
def is_fulfilled(self, experiments: pd.DataFrame) -> pd.Series:
    """Check if the provided experiments fulfill all constraints defined on the
    input features itself like the bounds or the allowed categories.

    Args:
        experiments: Dataframe with input features.

    Returns:
        Series with boolean values indicating if the experiments fulfill the
            constraints on the input features.

    """
    return (
        pd.concat(
            [feat.is_fulfilled(experiments[feat.key]) for feat in self.get()],
            axis=1,
        )
        .fillna(True)
        .all(axis=1)
    )

sample(n=1, method=SamplingMethodEnum.UNIFORM, seed=None)

Draw sobol samples

Parameters:

Name Type Description Default
n int

Number of samples, has to be larger than 0. Defaults to 1.

1
method SamplingMethodEnum

Method to use, implemented methods are UNIFORM, SOBOL and LHS. Defaults to UNIFORM.

UNIFORM
seed int

random seed. Defaults to None.

None

Returns:

Type Description
DataFrame

pd.DataFrame: Dataframe containing the samples.

Source code in bofire/data_models/domain/features.py
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
@validate_call
def sample(
    self,
    n: int = 1,
    method: SamplingMethodEnum = SamplingMethodEnum.UNIFORM,
    seed: Optional[int] = None,
) -> pd.DataFrame:
    """Draw sobol samples

    Args:
        n (int, optional): Number of samples, has to be larger than 0.
            Defaults to 1.
        method (SamplingMethodEnum, optional): Method to use, implemented
            methods are `UNIFORM`, `SOBOL` and `LHS`. Defaults to `UNIFORM`.
        reference_value
        seed (int, optional): random seed. Defaults to None.

    Returns:
        pd.DataFrame: Dataframe containing the samples.

    """
    if len(self) == 0:
        return pd.DataFrame()

    if method == SamplingMethodEnum.UNIFORM:
        # we cannot just propagate the provided seed to the sample methods
        # as they would then sample always the same value if the bounds
        # are the same for a feature.
        rng = np.random.default_rng(seed=seed)
        return self.validate_candidates(
            pd.concat(
                [
                    feat.sample(n, seed=int(rng.integers(1, 1000000)))
                    for feat in self.get(Input)
                ],
                axis=1,
            ),
        )

    free_features = self.get_free()
    if method == SamplingMethodEnum.SOBOL:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            X = Sobol(len(free_features), seed=seed).random(n)
    else:
        X = LatinHypercube(len(free_features), seed=seed).random(n)

    res = []
    for i, feat in enumerate(free_features):
        if isinstance(feat, ContinuousInput):
            x = feat.from_unit_range(X[:, i])
        elif isinstance(feat, (DiscreteInput, CategoricalInput)):
            levels = (
                feat.values
                if isinstance(feat, DiscreteInput)
                else feat.get_allowed_categories()
            )
            bins = np.linspace(0, 1, len(levels) + 1)
            idx = np.digitize(X[:, i], bins) - 1
            x = np.array(levels)[idx]
        else:
            raise ValueError(
                f"Unknown input feature with key {feat.key} of type {feat.type}",
            )
        res.append(pd.Series(x, name=feat.key))

    samples = pd.concat(res, axis=1)

    for feat in self.get_fixed():
        samples[feat.key] = feat.fixed_value()[0]  # type: ignore

    return self.validate_candidates(samples)[self.get_keys(Input)]

transform(experiments, specs)

Transform a dataframe to the representation specified in specs.

Currently only input categoricals are supported.

Parameters:

Name Type Description Default
experiments DataFrame

Data dataframe to be transformed.

required
specs InputTransformSpecs

Dictionary specifying which input feature is transformed by which encoder.

required

Returns:

Type Description
DataFrame

pd.DataFrame: Transformed dataframe. Only input features are included.

Source code in bofire/data_models/domain/features.py
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
def transform(
    self,
    experiments: pd.DataFrame,
    specs: InputTransformSpecs,
) -> pd.DataFrame:
    """Transform a dataframe to the representation specified in `specs`.

    Currently only input categoricals are supported.

    Args:
        experiments (pd.DataFrame): Data dataframe to be transformed.
        specs (InputTransformSpecs): Dictionary specifying which
            input feature is transformed by which encoder.

    Returns:
        pd.DataFrame: Transformed dataframe. Only input features are included.

    """
    # TODO: clean this up and move it into the individual classes
    specs = self._validate_transform_specs(specs)
    transformed = []
    for feat in self.get():
        s = experiments[feat.key]
        if feat.key not in specs.keys():
            transformed.append(s)
        elif specs[feat.key] == CategoricalEncodingEnum.ONE_HOT:
            assert isinstance(feat, CategoricalInput)
            transformed.append(feat.to_onehot_encoding(s))
        elif specs[feat.key] == CategoricalEncodingEnum.ORDINAL:
            assert isinstance(feat, CategoricalInput)
            transformed.append(feat.to_ordinal_encoding(s))
        elif specs[feat.key] == CategoricalEncodingEnum.DUMMY:
            assert isinstance(feat, CategoricalInput)
            transformed.append(feat.to_dummy_encoding(s))
        elif specs[feat.key] == CategoricalEncodingEnum.DESCRIPTOR:
            assert isinstance(feat, CategoricalDescriptorInput)
            transformed.append(feat.to_descriptor_encoding(s))
        elif isinstance(specs[feat.key], MolFeatures):
            assert isinstance(feat, MolecularInput)
            transformed.append(feat.to_descriptor_encoding(specs[feat.key], s))  # type: ignore
    return pd.concat(transformed, axis=1)

validate_candidates(candidates)

Validate a pandas dataframe with input feature values.

Parameters:

Name Type Description Default
candidates Dataframe

Inputs to validate.

required

Raises:

Type Description
ValueError

Raises a Valueerror if a feature based validation raises an exception.

Returns:

Type Description
DataFrame

pd.Dataframe: Validated dataframe

Source code in bofire/data_models/domain/features.py
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame:
    """Validate a pandas dataframe with input feature values.

    Args:
        candidates (pd.Dataframe): Inputs to validate.

    Raises:
        ValueError: Raises a Valueerror if a feature based validation raises an exception.

    Returns:
        pd.Dataframe: Validated dataframe

    """
    for feature in self:
        if feature.key not in candidates:
            raise ValueError(f"no col for input feature `{feature.key}`")
        candidates[feature.key] = feature.validate_candidental(
            candidates[feature.key],
        )
    if candidates[self.get_keys()].isnull().to_numpy().any():
        raise ValueError("there are null values")
    if candidates[self.get_keys()].isna().to_numpy().any():
        raise ValueError("there are na values")
    return candidates

Outputs

Bases: _BaseFeatures[AnyOutput]

Container of output features, only output features are allowed.

Attributes:

Name Type Description
features List(Outputs

list of the features.

Source code in bofire/data_models/domain/features.py
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
class Outputs(_BaseFeatures[AnyOutput]):
    """Container of output features, only output features are allowed.

    Attributes:
        features (List(Outputs)): list of the features.

    """

    type: Literal["Outputs"] = "Outputs"  # type: ignore

    def get_by_objective(
        self,
        includes: Union[
            List[Type[AbstractObjective]],
            Type[AbstractObjective],
            Type[Objective],
        ] = Objective,
        excludes: Union[
            List[Type[AbstractObjective]],
            Type[AbstractObjective],
            None,
        ] = None,
        exact: bool = False,
    ) -> Outputs:
        """Get output features filtered by the type of the attached objective.

        Args:
            includes (Union[List[TObjective], TObjective], optional): Objective class or list of objective classes
                to be returned. Defaults to Objective.
            excludes (Union[List[TObjective], TObjective, None], optional): Objective class or list of specific objective classes to be excluded from the return. Defaults to None.
            exact (bool, optional): Boolean to distinguish if only the exact classes listed in includes and no subclasses inherenting from this class shall be returned. Defaults to False.

        Returns:
            List[AnyOutput]: List of output features fitting to the passed requirements.

        """
        if len(self.features) == 0:
            return Outputs(features=[])
        return Outputs(
            features=sorted(
                filter_by_attribute(
                    self.get([ContinuousOutput, CategoricalOutput]).features,
                    lambda of: of.objective,
                    includes,
                    excludes,
                    exact,
                ),
            ),
        )

    def get_keys_by_objective(
        self,
        includes: Union[
            List[Type[AbstractObjective]],
            Type[AbstractObjective],
            Type[Objective],
        ] = Objective,
        excludes: Union[
            List[Type[AbstractObjective]],
            Type[AbstractObjective],
            None,
        ] = None,
        exact: bool = False,
    ) -> List[str]:
        """Get keys of output features filtered by the type of the attached objective.

        Args:
            includes (Union[List[TObjective], TObjective], optional): Objective class or list of objective classes
                to be returned. Defaults to Objective.
            excludes (Union[List[TObjective], TObjective, None], optional): Objective class or list of specific objective classes to be excluded from the return. Defaults to None.
            exact (bool, optional): Boolean to distinguish if only the exact classes listed in includes and no subclasses inherenting from this class shall be returned. Defaults to False.

        Returns:
            List[str]: List of output feature keys fitting to the passed requirements.

        """
        return [f.key for f in self.get_by_objective(includes, excludes, exact)]

    def __call__(
        self,
        experiments: pd.DataFrame,
        experiments_adapt: Optional[pd.DataFrame] = None,
        predictions: bool = False,
    ) -> pd.DataFrame:
        """Evaluate the objective for every feature.

        Args:
            experiments (pd.DataFrame): Experiments for which the objectives
                should be evaluated.
            experiments_adapt (pd.DataFrame, optional): Experimental values
                which are used to update the objective parameters on the fly.
                This is for example needed when a `MovingMaximizeSigmoidObjective`
                is used as this depends on the best experimental value achieved
                so far. For this reason `experiments_adapt` has to be provided
                if `predictions=True` ie. that the objectives of candidates
                are evaluated. Defaults to None.
            predictions (bool, optional): If True use the prediction columns in
                the dataframe to calc the desirabilities `f"{feat.key}_pred`,
                furthermore `experiments_adapt` has to be provided.

        Returns:
            pd.DataFrame: Objective values for the experiments of interest.

        """
        if predictions and experiments_adapt is None:
            raise ValueError(
                "If predictions are used, `experiments_adapt` has to be provided.",
            )
        else:
            experiments_adapt = (
                experiments if experiments_adapt is None else experiments_adapt
            )

        desis = pd.concat(
            [
                feat(
                    experiments[f"{feat.key}_pred" if predictions else feat.key],
                    experiments_adapt[feat.key].dropna(),  # type: ignore
                )
                for feat in self.features
                if feat.objective is not None
                and not isinstance(feat, CategoricalOutput)
            ]
            + [
                (
                    pd.Series(  # type: ignore
                        data=feat(
                            experiments.filter(regex=f"{feat.key}(.*)_prob"),  # type: ignore
                            experiments.filter(regex=f"{feat.key}(.*)_prob"),  # type: ignore
                        ),
                        name=f"{feat.key}_pred",
                    )
                    if predictions
                    else experiments[feat.key]
                )
                for feat in self.features
                if feat.objective is not None and isinstance(feat, CategoricalOutput)
            ],
            axis=1,
        )
        return desis.rename(
            {
                f"{feat.key}_pred" if predictions else feat.key: f"{feat.key}_des"
                for feat in self.features
                if feat.objective is not None
            },
            axis=1,
        )

    def add_valid_columns(self, experiments: pd.DataFrame) -> pd.DataFrame:
        """Add the `valid_{feature.key}` columns to the experiments dataframe,
        in case that they are not present.

        Args:
            experiments (pd.DataFrame): Dataframe holding the experiments.

        Returns:
            pd.DataFrame: Dataframe holding the experiments.

        """
        valid_keys = [
            f"valid_{output_feature_key}" for output_feature_key in self.get_keys()
        ]
        for valid_key in valid_keys:
            if valid_key not in experiments:
                experiments[valid_key] = True
            else:
                try:
                    experiments[valid_key] = (
                        experiments[valid_key].astype(int).astype(bool)
                    )
                except ValueError:
                    raise ValueError(f"Column {valid_key} cannot casted to dtype bool.")
        return experiments

    def validate_experiments(self, experiments: pd.DataFrame) -> pd.DataFrame:
        for feat in self.get():
            if feat.key not in experiments:
                raise ValueError(f"no col for input feature `{feat.key}`")
            experiments[feat.key] = feat.validate_experimental(experiments[feat.key])
        experiments = self.add_valid_columns(experiments=experiments)
        return experiments

    def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame:
        # for each continuous output feature with an attached objective object
        continuous_cols = list(
            itertools.chain.from_iterable(
                [
                    [f"{feat.key}_pred", f"{feat.key}_sd", f"{feat.key}_des"]
                    for feat in self.get_by_objective(
                        includes=Objective,
                        excludes=ConstrainedCategoricalObjective,
                    )
                ]
                + [
                    [f"{key}_pred", f"{key}_sd"]
                    for key in self.get_keys_by_objective(
                        excludes=Objective,
                        includes=None,  # type: ignore
                    )
                ],
            ),
        )
        # check that pred, sd, and des cols are specified and numerical
        for col in continuous_cols:
            if col not in candidates:
                raise ValueError(f"missing column {col}")
            try:
                candidates[col] = pd.to_numeric(candidates[col], errors="raise").astype(
                    "float64",
                )
            except ValueError:
                raise ValueError(f"Not all values of column `{col}` are numerical.")
            if candidates[col].isnull().to_numpy().any():
                raise ValueError(f"Nan values are present in {col}.")
        # Looping over features allows to check categories objective wise
        for feat in self.get(CategoricalOutput):
            cols = [f"{feat.key}_pred", f"{feat.key}_des"]
            for col in cols:
                if col not in candidates:
                    raise ValueError(f"missing column {col}")
                if col == f"{feat.key}_pred":
                    feat.validate_experimental(candidates[col])
                # Check sd and desirability
                elif candidates[col].isnull().to_numpy().any():
                    raise ValueError(f"Nan values are present in {col}.")
        return candidates

    def preprocess_experiments_one_valid_output(
        self,
        output_feature_key: str,
        experiments: pd.DataFrame,
    ) -> pd.DataFrame:
        """Method to get a dataframe where non-valid entries of the provided output feature are removed

        Args:
            experiments (pd.DataFrame): Dataframe with experimental data
            output_feature_key (str): The feature based on which non-valid entries rows are removed

        Returns:
            pd.DataFrame: Dataframe with all experiments where only valid entries of the specific feature are included

        """
        clean_exp = experiments.loc[
            (experiments["valid_%s" % output_feature_key] == 1)
            & (experiments[output_feature_key].notna())
        ]

        return clean_exp

    def preprocess_experiments_all_valid_outputs(
        self,
        experiments: pd.DataFrame,
        output_feature_keys: Optional[List] = None,
    ) -> pd.DataFrame:
        """Method to get a dataframe where non-valid entries of all output feature are removed

        Args:
            experiments (pd.DataFrame): Dataframe with experimental data
            output_feature_keys (Optional[List], optional): List of output feature keys which should be considered for removal of invalid values. Defaults to None.

        Returns:
            pd.DataFrame: Dataframe with all experiments where only valid entries of the selected features are included

        """
        if (output_feature_keys is None) or (len(output_feature_keys) == 0):
            output_feature_keys = self.get_keys(Output)

        clean_exp = experiments.query(
            " & ".join(["(`valid_%s` > 0)" % key for key in output_feature_keys]),
        )
        clean_exp = clean_exp.dropna(subset=output_feature_keys)

        return clean_exp

    def preprocess_experiments_any_valid_output(
        self,
        experiments: pd.DataFrame,
    ) -> pd.DataFrame:
        """Method to get a dataframe where at least one output feature has a valid entry

        Args:
            experiments (pd.DataFrame): Dataframe with experimental data

        Returns:
            pd.DataFrame: Dataframe with all experiments where at least one output feature has a valid entry

        """
        output_feature_keys = self.get_keys(Output)

        # clean_exp = experiments.query(" or ".join(["(valid_%s > 0)" % key for key in output_feature_keys]))
        # clean_exp = clean_exp.query(" or ".join(["%s.notna()" % key for key in output_feature_keys]))

        assert experiments is not None
        clean_exp = experiments.query(
            " or ".join(
                [
                    "((`valid_%s` >0) & `%s`.notna())" % (key, key)
                    for key in output_feature_keys
                ],
            ),
        )
        return clean_exp

__call__(experiments, experiments_adapt=None, predictions=False)

Evaluate the objective for every feature.

Parameters:

Name Type Description Default
experiments DataFrame

Experiments for which the objectives should be evaluated.

required
experiments_adapt DataFrame

Experimental values which are used to update the objective parameters on the fly. This is for example needed when a MovingMaximizeSigmoidObjective is used as this depends on the best experimental value achieved so far. For this reason experiments_adapt has to be provided if predictions=True ie. that the objectives of candidates are evaluated. Defaults to None.

None
predictions bool

If True use the prediction columns in the dataframe to calc the desirabilities f"{feat.key}_pred, furthermore experiments_adapt has to be provided.

False

Returns:

Type Description
DataFrame

pd.DataFrame: Objective values for the experiments of interest.

Source code in bofire/data_models/domain/features.py
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
def __call__(
    self,
    experiments: pd.DataFrame,
    experiments_adapt: Optional[pd.DataFrame] = None,
    predictions: bool = False,
) -> pd.DataFrame:
    """Evaluate the objective for every feature.

    Args:
        experiments (pd.DataFrame): Experiments for which the objectives
            should be evaluated.
        experiments_adapt (pd.DataFrame, optional): Experimental values
            which are used to update the objective parameters on the fly.
            This is for example needed when a `MovingMaximizeSigmoidObjective`
            is used as this depends on the best experimental value achieved
            so far. For this reason `experiments_adapt` has to be provided
            if `predictions=True` ie. that the objectives of candidates
            are evaluated. Defaults to None.
        predictions (bool, optional): If True use the prediction columns in
            the dataframe to calc the desirabilities `f"{feat.key}_pred`,
            furthermore `experiments_adapt` has to be provided.

    Returns:
        pd.DataFrame: Objective values for the experiments of interest.

    """
    if predictions and experiments_adapt is None:
        raise ValueError(
            "If predictions are used, `experiments_adapt` has to be provided.",
        )
    else:
        experiments_adapt = (
            experiments if experiments_adapt is None else experiments_adapt
        )

    desis = pd.concat(
        [
            feat(
                experiments[f"{feat.key}_pred" if predictions else feat.key],
                experiments_adapt[feat.key].dropna(),  # type: ignore
            )
            for feat in self.features
            if feat.objective is not None
            and not isinstance(feat, CategoricalOutput)
        ]
        + [
            (
                pd.Series(  # type: ignore
                    data=feat(
                        experiments.filter(regex=f"{feat.key}(.*)_prob"),  # type: ignore
                        experiments.filter(regex=f"{feat.key}(.*)_prob"),  # type: ignore
                    ),
                    name=f"{feat.key}_pred",
                )
                if predictions
                else experiments[feat.key]
            )
            for feat in self.features
            if feat.objective is not None and isinstance(feat, CategoricalOutput)
        ],
        axis=1,
    )
    return desis.rename(
        {
            f"{feat.key}_pred" if predictions else feat.key: f"{feat.key}_des"
            for feat in self.features
            if feat.objective is not None
        },
        axis=1,
    )

add_valid_columns(experiments)

Add the valid_{feature.key} columns to the experiments dataframe, in case that they are not present.

Parameters:

Name Type Description Default
experiments DataFrame

Dataframe holding the experiments.

required

Returns:

Type Description
DataFrame

pd.DataFrame: Dataframe holding the experiments.

Source code in bofire/data_models/domain/features.py
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
def add_valid_columns(self, experiments: pd.DataFrame) -> pd.DataFrame:
    """Add the `valid_{feature.key}` columns to the experiments dataframe,
    in case that they are not present.

    Args:
        experiments (pd.DataFrame): Dataframe holding the experiments.

    Returns:
        pd.DataFrame: Dataframe holding the experiments.

    """
    valid_keys = [
        f"valid_{output_feature_key}" for output_feature_key in self.get_keys()
    ]
    for valid_key in valid_keys:
        if valid_key not in experiments:
            experiments[valid_key] = True
        else:
            try:
                experiments[valid_key] = (
                    experiments[valid_key].astype(int).astype(bool)
                )
            except ValueError:
                raise ValueError(f"Column {valid_key} cannot casted to dtype bool.")
    return experiments

get_by_objective(includes=Objective, excludes=None, exact=False)

Get output features filtered by the type of the attached objective.

Parameters:

Name Type Description Default
includes Union[List[TObjective], TObjective]

Objective class or list of objective classes to be returned. Defaults to Objective.

Objective
excludes Union[List[TObjective], TObjective, None]

Objective class or list of specific objective classes to be excluded from the return. Defaults to None.

None
exact bool

Boolean to distinguish if only the exact classes listed in includes and no subclasses inherenting from this class shall be returned. Defaults to False.

False

Returns:

Type Description
Outputs

List[AnyOutput]: List of output features fitting to the passed requirements.

Source code in bofire/data_models/domain/features.py
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
def get_by_objective(
    self,
    includes: Union[
        List[Type[AbstractObjective]],
        Type[AbstractObjective],
        Type[Objective],
    ] = Objective,
    excludes: Union[
        List[Type[AbstractObjective]],
        Type[AbstractObjective],
        None,
    ] = None,
    exact: bool = False,
) -> Outputs:
    """Get output features filtered by the type of the attached objective.

    Args:
        includes (Union[List[TObjective], TObjective], optional): Objective class or list of objective classes
            to be returned. Defaults to Objective.
        excludes (Union[List[TObjective], TObjective, None], optional): Objective class or list of specific objective classes to be excluded from the return. Defaults to None.
        exact (bool, optional): Boolean to distinguish if only the exact classes listed in includes and no subclasses inherenting from this class shall be returned. Defaults to False.

    Returns:
        List[AnyOutput]: List of output features fitting to the passed requirements.

    """
    if len(self.features) == 0:
        return Outputs(features=[])
    return Outputs(
        features=sorted(
            filter_by_attribute(
                self.get([ContinuousOutput, CategoricalOutput]).features,
                lambda of: of.objective,
                includes,
                excludes,
                exact,
            ),
        ),
    )

get_keys_by_objective(includes=Objective, excludes=None, exact=False)

Get keys of output features filtered by the type of the attached objective.

Parameters:

Name Type Description Default
includes Union[List[TObjective], TObjective]

Objective class or list of objective classes to be returned. Defaults to Objective.

Objective
excludes Union[List[TObjective], TObjective, None]

Objective class or list of specific objective classes to be excluded from the return. Defaults to None.

None
exact bool

Boolean to distinguish if only the exact classes listed in includes and no subclasses inherenting from this class shall be returned. Defaults to False.

False

Returns:

Type Description
List[str]

List[str]: List of output feature keys fitting to the passed requirements.

Source code in bofire/data_models/domain/features.py
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
def get_keys_by_objective(
    self,
    includes: Union[
        List[Type[AbstractObjective]],
        Type[AbstractObjective],
        Type[Objective],
    ] = Objective,
    excludes: Union[
        List[Type[AbstractObjective]],
        Type[AbstractObjective],
        None,
    ] = None,
    exact: bool = False,
) -> List[str]:
    """Get keys of output features filtered by the type of the attached objective.

    Args:
        includes (Union[List[TObjective], TObjective], optional): Objective class or list of objective classes
            to be returned. Defaults to Objective.
        excludes (Union[List[TObjective], TObjective, None], optional): Objective class or list of specific objective classes to be excluded from the return. Defaults to None.
        exact (bool, optional): Boolean to distinguish if only the exact classes listed in includes and no subclasses inherenting from this class shall be returned. Defaults to False.

    Returns:
        List[str]: List of output feature keys fitting to the passed requirements.

    """
    return [f.key for f in self.get_by_objective(includes, excludes, exact)]

preprocess_experiments_all_valid_outputs(experiments, output_feature_keys=None)

Method to get a dataframe where non-valid entries of all output feature are removed

Parameters:

Name Type Description Default
experiments DataFrame

Dataframe with experimental data

required
output_feature_keys Optional[List]

List of output feature keys which should be considered for removal of invalid values. Defaults to None.

None

Returns:

Type Description
DataFrame

pd.DataFrame: Dataframe with all experiments where only valid entries of the selected features are included

Source code in bofire/data_models/domain/features.py
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
def preprocess_experiments_all_valid_outputs(
    self,
    experiments: pd.DataFrame,
    output_feature_keys: Optional[List] = None,
) -> pd.DataFrame:
    """Method to get a dataframe where non-valid entries of all output feature are removed

    Args:
        experiments (pd.DataFrame): Dataframe with experimental data
        output_feature_keys (Optional[List], optional): List of output feature keys which should be considered for removal of invalid values. Defaults to None.

    Returns:
        pd.DataFrame: Dataframe with all experiments where only valid entries of the selected features are included

    """
    if (output_feature_keys is None) or (len(output_feature_keys) == 0):
        output_feature_keys = self.get_keys(Output)

    clean_exp = experiments.query(
        " & ".join(["(`valid_%s` > 0)" % key for key in output_feature_keys]),
    )
    clean_exp = clean_exp.dropna(subset=output_feature_keys)

    return clean_exp

preprocess_experiments_any_valid_output(experiments)

Method to get a dataframe where at least one output feature has a valid entry

Parameters:

Name Type Description Default
experiments DataFrame

Dataframe with experimental data

required

Returns:

Type Description
DataFrame

pd.DataFrame: Dataframe with all experiments where at least one output feature has a valid entry

Source code in bofire/data_models/domain/features.py
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
def preprocess_experiments_any_valid_output(
    self,
    experiments: pd.DataFrame,
) -> pd.DataFrame:
    """Method to get a dataframe where at least one output feature has a valid entry

    Args:
        experiments (pd.DataFrame): Dataframe with experimental data

    Returns:
        pd.DataFrame: Dataframe with all experiments where at least one output feature has a valid entry

    """
    output_feature_keys = self.get_keys(Output)

    # clean_exp = experiments.query(" or ".join(["(valid_%s > 0)" % key for key in output_feature_keys]))
    # clean_exp = clean_exp.query(" or ".join(["%s.notna()" % key for key in output_feature_keys]))

    assert experiments is not None
    clean_exp = experiments.query(
        " or ".join(
            [
                "((`valid_%s` >0) & `%s`.notna())" % (key, key)
                for key in output_feature_keys
            ],
        ),
    )
    return clean_exp

preprocess_experiments_one_valid_output(output_feature_key, experiments)

Method to get a dataframe where non-valid entries of the provided output feature are removed

Parameters:

Name Type Description Default
experiments DataFrame

Dataframe with experimental data

required
output_feature_key str

The feature based on which non-valid entries rows are removed

required

Returns:

Type Description
DataFrame

pd.DataFrame: Dataframe with all experiments where only valid entries of the specific feature are included

Source code in bofire/data_models/domain/features.py
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
def preprocess_experiments_one_valid_output(
    self,
    output_feature_key: str,
    experiments: pd.DataFrame,
) -> pd.DataFrame:
    """Method to get a dataframe where non-valid entries of the provided output feature are removed

    Args:
        experiments (pd.DataFrame): Dataframe with experimental data
        output_feature_key (str): The feature based on which non-valid entries rows are removed

    Returns:
        pd.DataFrame: Dataframe with all experiments where only valid entries of the specific feature are included

    """
    clean_exp = experiments.loc[
        (experiments["valid_%s" % output_feature_key] == 1)
        & (experiments[output_feature_key].notna())
    ]

    return clean_exp