Domain

`constraints`

`Constraints`

Bases: BaseModel, Generic[C]

Source code in bofire/data_models/domain/constraints.py

class Constraints(BaseModel, Generic[C]):
    type: Literal["Constraints"] = "Constraints"
    constraints: Sequence[C] = Field(default_factory=list)

    def __iter__(self) -> Iterator[C]:
        return iter(self.constraints)

    def __len__(self):
        return len(self.constraints)

    def __getitem__(self, i) -> C:
        return self.constraints[i]

    def __add__(
        self,
        other: Union[Sequence[CIncludes], "Constraints[CIncludes]"],
    ) -> "Constraints[Union[C, CIncludes]]":
        if isinstance(other, collections.abc.Sequence):
            other_constraints = other
        else:
            other_constraints = other.constraints
        constraints = list(chain(self.constraints, other_constraints))
        return Constraints(constraints=constraints)

    def __call__(self, experiments: pd.DataFrame) -> pd.DataFrame:
        """Numerically evaluate all constraints

        Args:
            experiments (pd.DataFrame): data to evaluate the constraint on

        Returns:
            pd.DataFrame: Constraint evaluation for each of the constraints

        """
        return pd.concat([c(experiments) for c in self.constraints], axis=1)

    def jacobian(self, experiments: pd.DataFrame) -> list:
        """Numerically evaluate the jacobians of all constraints

        Args:
            experiments (pd.DataFrame): data to evaluate the constraint jacobians on

        Returns:
            list: A list containing the jacobians as pd.DataFrames

        """
        return [c.jacobian(experiments) for c in self.constraints]

    def is_fulfilled(self, experiments: pd.DataFrame, tol: float = 1e-6) -> pd.Series:
        """Check if all constraints are fulfilled on all rows of the provided dataframe

        Args:
            experiments (pd.DataFrame): Dataframe with data, the constraint validity should be tested on
            tol (float, optional): tolerance parameter. A constraint is considered as not fulfilled if
                the violation is larger than tol. Defaults to 0.

        Returns:
            Boolean: True if all constraints are fulfilled for all rows, false if not

        """
        if len(self.constraints) == 0:
            return pd.Series([True] * len(experiments), index=experiments.index)
        return (
            pd.concat(
                [c.is_fulfilled(experiments, tol) for c in self.constraints],
                axis=1,
            )
            .fillna(True)
            .all(axis=1)
        )

    def get(
        self,
        includes: Union[Type[CIncludes], Sequence[Type[CIncludes]]] = Constraint,
        excludes: Optional[Union[Type[CExcludes], List[Type[CExcludes]]]] = None,
        exact: bool = False,
    ) -> "Constraints[CIncludes]":
        """Get constraints of the domain

        Args:
            includes: Constraint class or list of specific constraint classes to be returned. Defaults to Constraint.
            excludes: Constraint class or list of specific constraint classes to be excluded from the return. Defaults to None.
            exact: Boolean to distinguish if only the exact class listed in includes and no subclasses inherenting from this class shall be returned. Defaults to False.

        Returns:
            Constraints: constraints in the domain fitting to the passed requirements.

        """
        return Constraints(
            constraints=filter_by_class(
                self.constraints,
                includes=includes,
                excludes=excludes,
                exact=exact,
            ),
        )

    def get_reps_df(self):
        """Provides a tabular overwiev of all constraints within the domain

        Returns:
            pd.DataFrame: DataFrame listing all constraints of the domain with a description

        """
        df = pd.DataFrame(
            index=range(len(self.constraints)),
            columns=["Type", "Description"],
            data={
                "Type": [feat.__class__.__name__ for feat in self.get(Constraint)],
                "Description": [
                    constraint.__str__() for constraint in self.get(Constraint)
                ],
            },
        )
        return df

`call(experiments)`

Numerically evaluate all constraints

Parameters:

Name	Type	Description	Default
`experiments`	`DataFrame`	data to evaluate the constraint on	required

Returns:

Type	Description
`DataFrame`	pd.DataFrame: Constraint evaluation for each of the constraints

Source code in bofire/data_models/domain/constraints.py

def __call__(self, experiments: pd.DataFrame) -> pd.DataFrame:
    """Numerically evaluate all constraints

    Args:
        experiments (pd.DataFrame): data to evaluate the constraint on

    Returns:
        pd.DataFrame: Constraint evaluation for each of the constraints

    """
    return pd.concat([c(experiments) for c in self.constraints], axis=1)

`get(includes=Constraint, excludes=None, exact=False)`

Get constraints of the domain

Parameters:

Name	Type	Description	Default
`includes`	`Union[Type[CIncludes], Sequence[Type[CIncludes]]]`	Constraint class or list of specific constraint classes to be returned. Defaults to Constraint.	`Constraint`
`excludes`	`Optional[Union[Type[CExcludes], List[Type[CExcludes]]]]`	Constraint class or list of specific constraint classes to be excluded from the return. Defaults to None.	`None`
`exact`	`bool`	Boolean to distinguish if only the exact class listed in includes and no subclasses inherenting from this class shall be returned. Defaults to False.	`False`

Returns:

Name	Type	Description
`Constraints`	`Constraints[CIncludes]`	constraints in the domain fitting to the passed requirements.

Source code in bofire/data_models/domain/constraints.py

def get(
    self,
    includes: Union[Type[CIncludes], Sequence[Type[CIncludes]]] = Constraint,
    excludes: Optional[Union[Type[CExcludes], List[Type[CExcludes]]]] = None,
    exact: bool = False,
) -> "Constraints[CIncludes]":
    """Get constraints of the domain

    Args:
        includes: Constraint class or list of specific constraint classes to be returned. Defaults to Constraint.
        excludes: Constraint class or list of specific constraint classes to be excluded from the return. Defaults to None.
        exact: Boolean to distinguish if only the exact class listed in includes and no subclasses inherenting from this class shall be returned. Defaults to False.

    Returns:
        Constraints: constraints in the domain fitting to the passed requirements.

    """
    return Constraints(
        constraints=filter_by_class(
            self.constraints,
            includes=includes,
            excludes=excludes,
            exact=exact,
        ),
    )

`get_reps_df()`

Provides a tabular overwiev of all constraints within the domain

Returns:

Type	Description
	pd.DataFrame: DataFrame listing all constraints of the domain with a description

Source code in bofire/data_models/domain/constraints.py

def get_reps_df(self):
    """Provides a tabular overwiev of all constraints within the domain

    Returns:
        pd.DataFrame: DataFrame listing all constraints of the domain with a description

    """
    df = pd.DataFrame(
        index=range(len(self.constraints)),
        columns=["Type", "Description"],
        data={
            "Type": [feat.__class__.__name__ for feat in self.get(Constraint)],
            "Description": [
                constraint.__str__() for constraint in self.get(Constraint)
            ],
        },
    )
    return df

`is_fulfilled(experiments, tol=1e-06)`

Check if all constraints are fulfilled on all rows of the provided dataframe

Parameters:

Name	Type	Description	Default
`experiments`	`DataFrame`	Dataframe with data, the constraint validity should be tested on	required
`tol`	`float`	tolerance parameter. A constraint is considered as not fulfilled if the violation is larger than tol. Defaults to 0.	`1e-06`

Returns:

Name	Type	Description
`Boolean`	`Series`	True if all constraints are fulfilled for all rows, false if not

Source code in bofire/data_models/domain/constraints.py

def is_fulfilled(self, experiments: pd.DataFrame, tol: float = 1e-6) -> pd.Series:
    """Check if all constraints are fulfilled on all rows of the provided dataframe

    Args:
        experiments (pd.DataFrame): Dataframe with data, the constraint validity should be tested on
        tol (float, optional): tolerance parameter. A constraint is considered as not fulfilled if
            the violation is larger than tol. Defaults to 0.

    Returns:
        Boolean: True if all constraints are fulfilled for all rows, false if not

    """
    if len(self.constraints) == 0:
        return pd.Series([True] * len(experiments), index=experiments.index)
    return (
        pd.concat(
            [c.is_fulfilled(experiments, tol) for c in self.constraints],
            axis=1,
        )
        .fillna(True)
        .all(axis=1)
    )

`jacobian(experiments)`

Numerically evaluate the jacobians of all constraints

Parameters:

Name	Type	Description	Default
`experiments`	`DataFrame`	data to evaluate the constraint jacobians on	required

Returns:

Name	Type	Description
`list`	`list`	A list containing the jacobians as pd.DataFrames

Source code in bofire/data_models/domain/constraints.py

def jacobian(self, experiments: pd.DataFrame) -> list:
    """Numerically evaluate the jacobians of all constraints

    Args:
        experiments (pd.DataFrame): data to evaluate the constraint jacobians on

    Returns:
        list: A list containing the jacobians as pd.DataFrames

    """
    return [c.jacobian(experiments) for c in self.constraints]

`domain`

`Domain`

Bases: BaseModel

Source code in bofire/data_models/domain/domain.py

class Domain(BaseModel):
    type: Literal["Domain"] = "Domain"

    inputs: Inputs = Field(default_factory=lambda: Inputs())
    outputs: Outputs = Field(default_factory=lambda: Outputs())
    constraints: Constraints = Field(default_factory=lambda: Constraints())

    """Representation of the optimization problem/domain

    Attributes:
        inputs (List[Input], optional): List of input features. Defaults to [].
        outputs (List[Output], optional): List of output features. Defaults to [].
        constraints (List[Constraint], optional): List of constraints. Defaults to [].
    """

    @classmethod
    def from_lists(
        cls,
        inputs: Optional[Sequence[AnyInput]] = None,
        outputs: Optional[Sequence[AnyOutput]] = None,
        constraints: Optional[Sequence[AnyConstraint]] = None,
    ):
        inputs = [] if inputs is None else inputs
        outputs = [] if outputs is None else outputs
        constraints = [] if constraints is None else constraints
        return cls(
            inputs=Inputs(features=inputs),
            outputs=Outputs(features=outputs),
            constraints=Constraints(constraints=constraints),
        )

    @field_validator("inputs", mode="before")
    @classmethod
    def validate_inputs_list(cls, v):
        if isinstance(v, collections.abc.Sequence):
            v = Inputs(features=v)
            return v
        if isinstance_or_union(v, AnyInput):
            return Inputs(features=[v])
        return v

    @field_validator("outputs", mode="before")
    @classmethod
    def validate_outputs_list(cls, v):
        if isinstance(v, collections.abc.Sequence):
            return Outputs(features=v)
        if isinstance_or_union(v, AnyOutput):
            return Outputs(features=[v])
        return v

    @field_validator("constraints", mode="before")
    @classmethod
    def validate_constraints_list(cls, v):
        if isinstance(v, list):
            return Constraints(constraints=v)
        if isinstance_or_union(v, AnyConstraint):
            return Constraints(constraints=[v])
        return v

    @model_validator(mode="after")
    def validate_unique_feature_keys(self):
        """Validates if provided input and output feature keys are unique

        Args:
            v (Outputs): List of all output features of the domain.
            value (Dict[str, Inputs]): Dict containing a list of input features as single entry.

        Raises:
            ValueError: Feature keys are not unique.

        Returns:
            Outputs: Keeps output features as given.

        """
        keys = self.outputs.get_keys() + self.inputs.get_keys()
        if len(set(keys)) != len(keys):
            raise ValueError("Feature keys are not unique")
        return self

    @model_validator(mode="after")
    def validate_constraints(self):
        """Validate that the constraints defined in the domain fit to the input features.

        Args:
            v (List[Constraint]): List of constraints or empty if no constraints are defined
            values (List[Input]): List of input features of the domain

        Raises:
            ValueError: Feature key in constraint is unknown.

        Returns:
            List[Constraint]: List of constraints defined for the domain

        """
        for c in self.constraints.get():
            c.validate_inputs(self.inputs)
        return self

    # TODO: tidy this up
    def get_nchoosek_combinations(self, exhaustive: bool = False):
        """Get all possible NChooseK combinations

        Args:
            exhaustive (bool, optional): if True all combinations are returned. Defaults to False.

        Returns:
            Tuple(used_features_list, unused_features_list): used_features_list is a list of lists containing features used in each NChooseK combination.
                unused_features_list is a list of lists containing features unused in each NChooseK combination.

        """
        if len(self.constraints.get(NChooseKConstraint)) == 0:
            used_continuous_features = self.inputs.get_keys(ContinuousInput)
            return used_continuous_features, []

        used_features_list_all = []

        # loops through each NChooseK constraint
        for con in self.constraints.get(NChooseKConstraint):
            assert isinstance(con, NChooseKConstraint)
            used_features_list = []

            if exhaustive:
                for n in range(con.min_count, con.max_count + 1):
                    used_features_list.extend(itertools.combinations(con.features, n))

                if con.none_also_valid:
                    used_features_list.append(())
            else:
                used_features_list.extend(
                    itertools.combinations(con.features, con.max_count),
                )

            used_features_list_all.append(used_features_list)

        used_features_list_all = list(
            itertools.product(*used_features_list_all),
        )  # product between NChooseK constraints

        # format into a list of used features
        used_features_list_formatted = []
        for used_features_list in used_features_list_all:
            used_features_list_flattened = [
                item for sublist in used_features_list for item in sublist
            ]
            used_features_list_formatted.append(list(set(used_features_list_flattened)))

        # sort lists
        used_features_list_sorted = []
        for used_features in used_features_list_formatted:
            used_features_list_sorted.append(sorted(used_features))

        # drop duplicates
        used_features_list_no_dup = []
        for used_features in used_features_list_sorted:
            if used_features not in used_features_list_no_dup:
                used_features_list_no_dup.append(used_features)

        # print(f"duplicates dropped: {len(used_features_list_sorted)-len(used_features_list_no_dup)}")

        # remove combinations not fulfilling constraints
        used_features_list_final = []
        for combo in used_features_list_no_dup:
            fulfil_constraints = []  # list of bools tracking if constraints are fulfilled
            for con in self.constraints.get(NChooseKConstraint):
                assert isinstance(con, NChooseKConstraint)
                count = 0  # count of features in combo that are in con.features
                for f in combo:
                    if f in con.features:
                        count += 1
                if (
                    count >= con.min_count
                    and count <= con.max_count
                    or count == 0
                    and con.none_also_valid
                ):
                    fulfil_constraints.append(True)
                else:
                    fulfil_constraints.append(False)
            if np.all(fulfil_constraints):
                used_features_list_final.append(combo)

        # print(f"violators dropped: {len(used_features_list_no_dup)-len(used_features_list_final)}")

        # features unused
        features_in_cc = []
        for con in self.constraints.get(NChooseKConstraint):
            assert isinstance(con, NChooseKConstraint)
            features_in_cc.extend(con.features)
        features_in_cc = list(set(features_in_cc))
        features_in_cc.sort()
        unused_features_list = []
        for used_features in used_features_list_final:
            unused_features_list.append(
                [f_key for f_key in features_in_cc if f_key not in used_features],
            )

        # postprocess
        # used_features_list_final2 = []
        # unused_features_list2 = []
        # for used, unused in zip(used_features_list_final,unused_features_list):
        #     if len(used) == 3:
        #         used_features_list_final2.append(used), unused_features_list2.append(unused)

        return used_features_list_final, unused_features_list

    def coerce_invalids(self, experiments: pd.DataFrame) -> pd.DataFrame:
        """Coerces all invalid output measurements to np.nan

        Args:
            experiments (pd.DataFrame): Dataframe containing experimental data

        Returns:
            pd.DataFrame: coerced dataframe

        """
        # coerce invalid to nan
        for feat in self.outputs.get_keys(Output):
            experiments.loc[experiments[f"valid_{feat}"] == 0, feat] = np.nan
        return experiments

    def aggregate_by_duplicates(
        self,
        experiments: pd.DataFrame,
        prec: int,
        delimiter: str = "-",
        method: Literal["mean", "median"] = "mean",
    ) -> Tuple[pd.DataFrame, list]:
        """Aggregate the dataframe by duplicate experiments

        Duplicates are identified based on the experiments with the same input
        features. Continuous input features are rounded before identifying the
        duplicates. Aggregation is performed by taking the average of the
        involved output features.

        Args:
            experiments (pd.DataFrame): Dataframe containing experimental data
            prec (int): Precision of the rounding of the continuous input features
            delimiter (str, optional): Delimiter used when combining the orig.
                labcodes to a new one. Defaults to "-".
            method (Literal["mean", "median"], optional): Which aggregation
                method to use. Defaults to "mean".

        Returns:
            Tuple[pd.DataFrame, list]: Dataframe holding the aggregated
                experiments, list of lists holding the labcodes of the duplicates

        """
        # prepare the parent frame
        if method not in ["mean", "median"]:
            raise ValueError(f"Unknown aggregation type provided: {method}")

        preprocessed = self.outputs.preprocess_experiments_any_valid_output(experiments)
        assert preprocessed is not None
        experiments = preprocessed.copy()
        if "labcode" not in experiments.columns:
            experiments["labcode"] = [
                str(i + 1).zfill(int(np.ceil(np.log10(experiments.shape[0]))))
                for i in range(experiments.shape[0])
            ]

        # round it if continuous inputs are present
        if len(self.inputs.get(ContinuousInput)) > 0:
            experiments[self.inputs.get_keys(ContinuousInput)] = experiments[
                self.inputs.get_keys(ContinuousInput)
            ].round(prec)

        # coerce invalid to nan
        experiments = self.coerce_invalids(experiments)

        # group and aggregate
        agg: Dict[str, Any] = {
            feat: method for feat in self.outputs.get_keys(ContinuousOutput)
        }
        agg["labcode"] = lambda x: delimiter.join(sorted(x.tolist()))
        for feat in self.outputs.get_keys(Output):
            agg[f"valid_{feat}"] = lambda x: 1

        grouped = experiments.groupby(self.inputs.get_keys(Input))
        duplicated_labcodes = [
            sorted(group.labcode.to_numpy().tolist())
            for _, group in grouped
            if group.shape[0] > 1
        ]

        experiments = grouped.aggregate(agg).reset_index(drop=False)
        for feat in self.outputs.get_keys(Output):
            experiments.loc[experiments[feat].isna(), f"valid_{feat}"] = 0

        experiments = experiments.sort_values(by="labcode")
        experiments = experiments.reset_index(drop=True)
        return experiments, sorted(duplicated_labcodes)

    def validate_experiments(
        self,
        experiments: pd.DataFrame,
        strict: bool = False,
    ) -> pd.DataFrame:
        """Checks the experimental data on validity

        Args:
            experiments (pd.DataFrame): Dataframe with experimental data
            strict (bool, optional): Boolean to distinguish if the occurrence of
                fixed features in the dataset should be considered or not.
                Defaults to False.

        Raises:
            ValueError: empty dataframe
            ValueError: the column for a specific feature is missing the provided data
            ValueError: there are labcodes with null value
            ValueError: there are labcodes with nan value
            ValueError: labcodes are not unique
            ValueError: the provided columns do no match to the defined domain
            ValueError: the provided columns do no match to the defined domain
            ValueError: Input with null values
            ValueError: Input with nan values

        Returns:
            pd.DataFrame: The provided dataframe with experimental data

        """
        if len(experiments) == 0:
            raise ValueError("no experiments provided (empty dataframe)")

        # we allow here for a column named labcode used to identify experiments
        if "labcode" in experiments.columns:
            # test that labcodes are not na
            if experiments.labcode.isnull().to_numpy().any():
                raise ValueError("there are labcodes with null value")
            if experiments.labcode.isna().to_numpy().any():
                raise ValueError("there are labcodes with nan value")
            # test that labcodes are distinct
            if (
                len(set(experiments.labcode.to_numpy().tolist()))
                != experiments.shape[0]
            ):
                raise ValueError("labcodes are not unique")

        # run the individual validators
        experiments = self.inputs.validate_experiments(
            experiments=experiments,
            strict=strict,
        )
        experiments = self.outputs.validate_experiments(experiments=experiments)
        return experiments

    def describe_experiments(self, experiments: pd.DataFrame) -> pd.DataFrame:
        """Method to get a tabular overview of how many measurements and how many valid entries are included in the input data for each output feature

        Args:
            experiments (pd.DataFrame): Dataframe with experimental data

        Returns:
            pd.DataFrame: Dataframe with counts how many measurements and how many valid entries are included in the input data for each output feature

        """
        data = {}
        for feat in self.outputs.get_keys(Output):
            data[feat] = [
                experiments.loc[experiments[feat].notna()].shape[0],
                experiments.loc[experiments[feat].notna(), "valid_%s" % feat].sum(),
            ]
        preprocessed = self.outputs.preprocess_experiments_all_valid_outputs(
            experiments,
        )
        assert preprocessed is not None
        data["all"] = [
            experiments.shape[0],
            preprocessed.shape[0],
        ]
        return pd.DataFrame.from_dict(
            data,
            orient="index",
            columns=["measured", "valid"],
        )

    def validate_candidates(
        self,
        candidates: pd.DataFrame,
        only_inputs: bool = False,
        tol: float = 1e-5,
        raise_validation_error: bool = True,
    ) -> pd.DataFrame:
        """Method to check the validty of proposed candidates

        Args:
            candidates (pd.DataFrame): Dataframe with suggested new experiments (candidates)
            only_inputs (bool,optional): If True, only the input columns are validated. Defaults to False.
            tol (float,optional): tolerance parameter for constraints. A constraint is considered as not fulfilled if the violation
                is larger than tol. Defaults to 1e-6.
            raise_validation_error (bool, optional): If true an error will be raised if candidates violate constraints,
                otherwise only a warning will be displayed. Defaults to True.

        Raises:
            ValueError: when a column is missing for a defined input feature
            ValueError: when a column is missing for a defined output feature
            ValueError: when a non-numerical value is proposed
            ValueError: when an additional column is found
            ConstraintNotFulfilledError: when the constraints are not fulfilled and `raise_validation_error = True`

        Returns:
            pd.DataFrame: dataframe with suggested experiments (candidates)

        """
        # check that each input feature has a col and is valid in itself
        assert isinstance(self.inputs, Inputs)
        candidates = self.inputs.validate_candidates(candidates)
        # check if all constraints are fulfilled
        if not self.constraints.is_fulfilled(candidates, tol=tol).all():
            if raise_validation_error:
                raise ConstraintNotFulfilledError(
                    f"Constraints not fulfilled: {candidates}",
                )
            warnings.warn("Not all constraints are fulfilled.")
        # for each continuous output feature with an attached objective object
        if not only_inputs:
            assert isinstance(self.outputs, Outputs)
            candidates = self.outputs.validate_candidates(candidates=candidates)
        return candidates

    def is_fulfilled(
        self,
        experiments: pd.DataFrame,
        tol: float = 1e-6,
        exlude_interpoint: bool = True,
    ) -> pd.Series:
        """Check if all constraints are fulfilled on all rows of the provided dataframe
        both constraints and inputs are checked.

        Args:
            experiments: Dataframe with data, the constraint validity should be tested on
            tol: Tolerance for checking the constraints. Defaults to 1e-6.
            exlude_interpoint: If True, InterpointConstraints are excluded from the check. Defaults to True.

        Returns:
            Boolean series indicating if all constraints are fulfilled for all rows.
        """
        constraints = (
            self.constraints.get(excludes=[InterpointConstraint])
            if exlude_interpoint
            else self.constraints.get()
        )
        return constraints.is_fulfilled(experiments, tol) & self.inputs.is_fulfilled(
            experiments
        )

    @property
    def experiment_column_names(self):
        """The columns in the experimental dataframe

        Returns:
            List[str]: List of columns in the experiment dataframe (output feature keys + valid_output feature keys)

        """
        return (self.inputs + self.outputs).get_keys() + [
            f"valid_{output_feature_key}"
            for output_feature_key in self.outputs.get_keys(Output)
        ]

    @property
    def candidate_column_names(self):
        """The columns in the candidate dataframe

        Returns:
            List[str]: List of columns in the candidate dataframe (input feature keys + input feature keys_pred, input feature keys_sd, input feature keys_des)

        """
        assert isinstance(self.outputs, Outputs)
        return (
            self.inputs.get_keys(Input)
            + [
                f"{output_feature_key}_pred"
                for output_feature_key in self.outputs.get_keys_by_objective(Objective)
            ]
            + [
                f"{output_feature_key}_sd"
                for output_feature_key in self.outputs.get_keys_by_objective(Objective)
            ]
            + [
                f"{output_feature_key}_des"
                for output_feature_key in self.outputs.get_keys_by_objective(Objective)
            ]
        )

`candidate_column_names` `property`

The columns in the candidate dataframe

Returns:

Type	Description
	List[str]: List of columns in the candidate dataframe (input feature keys + input feature keys_pred, input feature keys_sd, input feature keys_des)

`constraints = Field(default_factory=(lambda: Constraints()))` `class-attribute` `instance-attribute`

Representation of the optimization problem/domain

Attributes:

Name	Type	Description
`inputs`	`List[Input]`	List of input features. Defaults to [].
`outputs`	`List[Output]`	List of output features. Defaults to [].
`constraints`	`List[Constraint]`	List of constraints. Defaults to [].

`experiment_column_names` `property`

The columns in the experimental dataframe

Returns:

Type	Description
	List[str]: List of columns in the experiment dataframe (output feature keys + valid_output feature keys)

`aggregate_by_duplicates(experiments, prec, delimiter='-', method='mean')`

Aggregate the dataframe by duplicate experiments

Duplicates are identified based on the experiments with the same input features. Continuous input features are rounded before identifying the duplicates. Aggregation is performed by taking the average of the involved output features.

Parameters:

Name	Type	Description	Default
`experiments`	`DataFrame`	Dataframe containing experimental data	required
`prec`	`int`	Precision of the rounding of the continuous input features	required
`delimiter`	`str`	Delimiter used when combining the orig. labcodes to a new one. Defaults to "-".	`'-'`
`method`	`Literal['mean', 'median']`	Which aggregation method to use. Defaults to "mean".	`'mean'`

Returns:

Type	Description
`Tuple[DataFrame, list]`	Tuple[pd.DataFrame, list]: Dataframe holding the aggregated experiments, list of lists holding the labcodes of the duplicates

Source code in bofire/data_models/domain/domain.py

def aggregate_by_duplicates(
    self,
    experiments: pd.DataFrame,
    prec: int,
    delimiter: str = "-",
    method: Literal["mean", "median"] = "mean",
) -> Tuple[pd.DataFrame, list]:
    """Aggregate the dataframe by duplicate experiments

    Duplicates are identified based on the experiments with the same input
    features. Continuous input features are rounded before identifying the
    duplicates. Aggregation is performed by taking the average of the
    involved output features.

    Args:
        experiments (pd.DataFrame): Dataframe containing experimental data
        prec (int): Precision of the rounding of the continuous input features
        delimiter (str, optional): Delimiter used when combining the orig.
            labcodes to a new one. Defaults to "-".
        method (Literal["mean", "median"], optional): Which aggregation
            method to use. Defaults to "mean".

    Returns:
        Tuple[pd.DataFrame, list]: Dataframe holding the aggregated
            experiments, list of lists holding the labcodes of the duplicates

    """
    # prepare the parent frame
    if method not in ["mean", "median"]:
        raise ValueError(f"Unknown aggregation type provided: {method}")

    preprocessed = self.outputs.preprocess_experiments_any_valid_output(experiments)
    assert preprocessed is not None
    experiments = preprocessed.copy()
    if "labcode" not in experiments.columns:
        experiments["labcode"] = [
            str(i + 1).zfill(int(np.ceil(np.log10(experiments.shape[0]))))
            for i in range(experiments.shape[0])
        ]

    # round it if continuous inputs are present
    if len(self.inputs.get(ContinuousInput)) > 0:
        experiments[self.inputs.get_keys(ContinuousInput)] = experiments[
            self.inputs.get_keys(ContinuousInput)
        ].round(prec)

    # coerce invalid to nan
    experiments = self.coerce_invalids(experiments)

    # group and aggregate
    agg: Dict[str, Any] = {
        feat: method for feat in self.outputs.get_keys(ContinuousOutput)
    }
    agg["labcode"] = lambda x: delimiter.join(sorted(x.tolist()))
    for feat in self.outputs.get_keys(Output):
        agg[f"valid_{feat}"] = lambda x: 1

    grouped = experiments.groupby(self.inputs.get_keys(Input))
    duplicated_labcodes = [
        sorted(group.labcode.to_numpy().tolist())
        for _, group in grouped
        if group.shape[0] > 1
    ]

    experiments = grouped.aggregate(agg).reset_index(drop=False)
    for feat in self.outputs.get_keys(Output):
        experiments.loc[experiments[feat].isna(), f"valid_{feat}"] = 0

    experiments = experiments.sort_values(by="labcode")
    experiments = experiments.reset_index(drop=True)
    return experiments, sorted(duplicated_labcodes)

`coerce_invalids(experiments)`

Coerces all invalid output measurements to np.nan

Parameters:

Name	Type	Description	Default
`experiments`	`DataFrame`	Dataframe containing experimental data	required

Returns:

Type	Description
`DataFrame`	pd.DataFrame: coerced dataframe

Source code in bofire/data_models/domain/domain.py

def coerce_invalids(self, experiments: pd.DataFrame) -> pd.DataFrame:
    """Coerces all invalid output measurements to np.nan

    Args:
        experiments (pd.DataFrame): Dataframe containing experimental data

    Returns:
        pd.DataFrame: coerced dataframe

    """
    # coerce invalid to nan
    for feat in self.outputs.get_keys(Output):
        experiments.loc[experiments[f"valid_{feat}"] == 0, feat] = np.nan
    return experiments

`describe_experiments(experiments)`

Method to get a tabular overview of how many measurements and how many valid entries are included in the input data for each output feature

Parameters:

Name	Type	Description	Default
`experiments`	`DataFrame`	Dataframe with experimental data	required

Returns:

Type	Description
`DataFrame`	pd.DataFrame: Dataframe with counts how many measurements and how many valid entries are included in the input data for each output feature

Source code in bofire/data_models/domain/domain.py

def describe_experiments(self, experiments: pd.DataFrame) -> pd.DataFrame:
    """Method to get a tabular overview of how many measurements and how many valid entries are included in the input data for each output feature

    Args:
        experiments (pd.DataFrame): Dataframe with experimental data

    Returns:
        pd.DataFrame: Dataframe with counts how many measurements and how many valid entries are included in the input data for each output feature

    """
    data = {}
    for feat in self.outputs.get_keys(Output):
        data[feat] = [
            experiments.loc[experiments[feat].notna()].shape[0],
            experiments.loc[experiments[feat].notna(), "valid_%s" % feat].sum(),
        ]
    preprocessed = self.outputs.preprocess_experiments_all_valid_outputs(
        experiments,
    )
    assert preprocessed is not None
    data["all"] = [
        experiments.shape[0],
        preprocessed.shape[0],
    ]
    return pd.DataFrame.from_dict(
        data,
        orient="index",
        columns=["measured", "valid"],
    )

`get_nchoosek_combinations(exhaustive=False)`

Get all possible NChooseK combinations

Parameters:

Name	Type	Description	Default
`exhaustive`	`bool`	if True all combinations are returned. Defaults to False.	`False`

Returns:

Name	Type	Description
`Tuple`	`(used_features_list, unused_features_list)`	used_features_list is a list of lists containing features used in each NChooseK combination. unused_features_list is a list of lists containing features unused in each NChooseK combination.

Source code in bofire/data_models/domain/domain.py

def get_nchoosek_combinations(self, exhaustive: bool = False):
    """Get all possible NChooseK combinations

    Args:
        exhaustive (bool, optional): if True all combinations are returned. Defaults to False.

    Returns:
        Tuple(used_features_list, unused_features_list): used_features_list is a list of lists containing features used in each NChooseK combination.
            unused_features_list is a list of lists containing features unused in each NChooseK combination.

    """
    if len(self.constraints.get(NChooseKConstraint)) == 0:
        used_continuous_features = self.inputs.get_keys(ContinuousInput)
        return used_continuous_features, []

    used_features_list_all = []

    # loops through each NChooseK constraint
    for con in self.constraints.get(NChooseKConstraint):
        assert isinstance(con, NChooseKConstraint)
        used_features_list = []

        if exhaustive:
            for n in range(con.min_count, con.max_count + 1):
                used_features_list.extend(itertools.combinations(con.features, n))

            if con.none_also_valid:
                used_features_list.append(())
        else:
            used_features_list.extend(
                itertools.combinations(con.features, con.max_count),
            )

        used_features_list_all.append(used_features_list)

    used_features_list_all = list(
        itertools.product(*used_features_list_all),
    )  # product between NChooseK constraints

    # format into a list of used features
    used_features_list_formatted = []
    for used_features_list in used_features_list_all:
        used_features_list_flattened = [
            item for sublist in used_features_list for item in sublist
        ]
        used_features_list_formatted.append(list(set(used_features_list_flattened)))

    # sort lists
    used_features_list_sorted = []
    for used_features in used_features_list_formatted:
        used_features_list_sorted.append(sorted(used_features))

    # drop duplicates
    used_features_list_no_dup = []
    for used_features in used_features_list_sorted:
        if used_features not in used_features_list_no_dup:
            used_features_list_no_dup.append(used_features)

    # print(f"duplicates dropped: {len(used_features_list_sorted)-len(used_features_list_no_dup)}")

    # remove combinations not fulfilling constraints
    used_features_list_final = []
    for combo in used_features_list_no_dup:
        fulfil_constraints = []  # list of bools tracking if constraints are fulfilled
        for con in self.constraints.get(NChooseKConstraint):
            assert isinstance(con, NChooseKConstraint)
            count = 0  # count of features in combo that are in con.features
            for f in combo:
                if f in con.features:
                    count += 1
            if (
                count >= con.min_count
                and count <= con.max_count
                or count == 0
                and con.none_also_valid
            ):
                fulfil_constraints.append(True)
            else:
                fulfil_constraints.append(False)
        if np.all(fulfil_constraints):
            used_features_list_final.append(combo)

    # print(f"violators dropped: {len(used_features_list_no_dup)-len(used_features_list_final)}")

    # features unused
    features_in_cc = []
    for con in self.constraints.get(NChooseKConstraint):
        assert isinstance(con, NChooseKConstraint)
        features_in_cc.extend(con.features)
    features_in_cc = list(set(features_in_cc))
    features_in_cc.sort()
    unused_features_list = []
    for used_features in used_features_list_final:
        unused_features_list.append(
            [f_key for f_key in features_in_cc if f_key not in used_features],
        )

    # postprocess
    # used_features_list_final2 = []
    # unused_features_list2 = []
    # for used, unused in zip(used_features_list_final,unused_features_list):
    #     if len(used) == 3:
    #         used_features_list_final2.append(used), unused_features_list2.append(unused)

    return used_features_list_final, unused_features_list

`is_fulfilled(experiments, tol=1e-06, exlude_interpoint=True)`

Check if all constraints are fulfilled on all rows of the provided dataframe both constraints and inputs are checked.

Parameters:

Name	Type	Description	Default
`experiments`	`DataFrame`	Dataframe with data, the constraint validity should be tested on	required
`tol`	`float`	Tolerance for checking the constraints. Defaults to 1e-6.	`1e-06`
`exlude_interpoint`	`bool`	If True, InterpointConstraints are excluded from the check. Defaults to True.	`True`

Returns:

Type	Description
`Series`	Boolean series indicating if all constraints are fulfilled for all rows.

Source code in bofire/data_models/domain/domain.py

def is_fulfilled(
    self,
    experiments: pd.DataFrame,
    tol: float = 1e-6,
    exlude_interpoint: bool = True,
) -> pd.Series:
    """Check if all constraints are fulfilled on all rows of the provided dataframe
    both constraints and inputs are checked.

    Args:
        experiments: Dataframe with data, the constraint validity should be tested on
        tol: Tolerance for checking the constraints. Defaults to 1e-6.
        exlude_interpoint: If True, InterpointConstraints are excluded from the check. Defaults to True.

    Returns:
        Boolean series indicating if all constraints are fulfilled for all rows.
    """
    constraints = (
        self.constraints.get(excludes=[InterpointConstraint])
        if exlude_interpoint
        else self.constraints.get()
    )
    return constraints.is_fulfilled(experiments, tol) & self.inputs.is_fulfilled(
        experiments
    )

`validate_candidates(candidates, only_inputs=False, tol=1e-05, raise_validation_error=True)`

Method to check the validty of proposed candidates

Parameters:

Name	Type	Description	Default
`candidates`	`DataFrame`	Dataframe with suggested new experiments (candidates)	required
`only_inputs`	`(bool, optional)`	If True, only the input columns are validated. Defaults to False.	`False`
`tol`	`(float, optional)`	tolerance parameter for constraints. A constraint is considered as not fulfilled if the violation is larger than tol. Defaults to 1e-6.	`1e-05`
`raise_validation_error`	`bool`	If true an error will be raised if candidates violate constraints, otherwise only a warning will be displayed. Defaults to True.	`True`

Raises:

Type	Description
`ValueError`	when a column is missing for a defined input feature
`ValueError`	when a column is missing for a defined output feature
`ValueError`	when a non-numerical value is proposed
`ValueError`	when an additional column is found
`ConstraintNotFulfilledError`	when the constraints are not fulfilled and `raise_validation_error = True`

Returns:

Type	Description
`DataFrame`	pd.DataFrame: dataframe with suggested experiments (candidates)

Source code in bofire/data_models/domain/domain.py

def validate_candidates(
    self,
    candidates: pd.DataFrame,
    only_inputs: bool = False,
    tol: float = 1e-5,
    raise_validation_error: bool = True,
) -> pd.DataFrame:
    """Method to check the validty of proposed candidates

    Args:
        candidates (pd.DataFrame): Dataframe with suggested new experiments (candidates)
        only_inputs (bool,optional): If True, only the input columns are validated. Defaults to False.
        tol (float,optional): tolerance parameter for constraints. A constraint is considered as not fulfilled if the violation
            is larger than tol. Defaults to 1e-6.
        raise_validation_error (bool, optional): If true an error will be raised if candidates violate constraints,
            otherwise only a warning will be displayed. Defaults to True.

    Raises:
        ValueError: when a column is missing for a defined input feature
        ValueError: when a column is missing for a defined output feature
        ValueError: when a non-numerical value is proposed
        ValueError: when an additional column is found
        ConstraintNotFulfilledError: when the constraints are not fulfilled and `raise_validation_error = True`

    Returns:
        pd.DataFrame: dataframe with suggested experiments (candidates)

    """
    # check that each input feature has a col and is valid in itself
    assert isinstance(self.inputs, Inputs)
    candidates = self.inputs.validate_candidates(candidates)
    # check if all constraints are fulfilled
    if not self.constraints.is_fulfilled(candidates, tol=tol).all():
        if raise_validation_error:
            raise ConstraintNotFulfilledError(
                f"Constraints not fulfilled: {candidates}",
            )
        warnings.warn("Not all constraints are fulfilled.")
    # for each continuous output feature with an attached objective object
    if not only_inputs:
        assert isinstance(self.outputs, Outputs)
        candidates = self.outputs.validate_candidates(candidates=candidates)
    return candidates

`validate_constraints()`

Validate that the constraints defined in the domain fit to the input features.

Parameters:

Name	Type	Description	Default
`v`	`List[Constraint]`	List of constraints or empty if no constraints are defined	required
`values`	`List[Input]`	List of input features of the domain	required

Raises:

Type	Description
`ValueError`	Feature key in constraint is unknown.

Returns:

Type	Description
	List[Constraint]: List of constraints defined for the domain

Source code in bofire/data_models/domain/domain.py

@model_validator(mode="after")
def validate_constraints(self):
    """Validate that the constraints defined in the domain fit to the input features.

    Args:
        v (List[Constraint]): List of constraints or empty if no constraints are defined
        values (List[Input]): List of input features of the domain

    Raises:
        ValueError: Feature key in constraint is unknown.

    Returns:
        List[Constraint]: List of constraints defined for the domain

    """
    for c in self.constraints.get():
        c.validate_inputs(self.inputs)
    return self

`validate_experiments(experiments, strict=False)`

Checks the experimental data on validity

Parameters:

Name	Type	Description	Default
`experiments`	`DataFrame`	Dataframe with experimental data	required
`strict`	`bool`	Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not. Defaults to False.	`False`

Raises:

Type	Description
`ValueError`	empty dataframe
`ValueError`	the column for a specific feature is missing the provided data
`ValueError`	there are labcodes with null value
`ValueError`	there are labcodes with nan value
`ValueError`	labcodes are not unique
`ValueError`	the provided columns do no match to the defined domain
`ValueError`	the provided columns do no match to the defined domain
`ValueError`	Input with null values
`ValueError`	Input with nan values

Returns:

Type	Description
`DataFrame`	pd.DataFrame: The provided dataframe with experimental data

Source code in bofire/data_models/domain/domain.py

def validate_experiments(
    self,
    experiments: pd.DataFrame,
    strict: bool = False,
) -> pd.DataFrame:
    """Checks the experimental data on validity

    Args:
        experiments (pd.DataFrame): Dataframe with experimental data
        strict (bool, optional): Boolean to distinguish if the occurrence of
            fixed features in the dataset should be considered or not.
            Defaults to False.

    Raises:
        ValueError: empty dataframe
        ValueError: the column for a specific feature is missing the provided data
        ValueError: there are labcodes with null value
        ValueError: there are labcodes with nan value
        ValueError: labcodes are not unique
        ValueError: the provided columns do no match to the defined domain
        ValueError: the provided columns do no match to the defined domain
        ValueError: Input with null values
        ValueError: Input with nan values

    Returns:
        pd.DataFrame: The provided dataframe with experimental data

    """
    if len(experiments) == 0:
        raise ValueError("no experiments provided (empty dataframe)")

    # we allow here for a column named labcode used to identify experiments
    if "labcode" in experiments.columns:
        # test that labcodes are not na
        if experiments.labcode.isnull().to_numpy().any():
            raise ValueError("there are labcodes with null value")
        if experiments.labcode.isna().to_numpy().any():
            raise ValueError("there are labcodes with nan value")
        # test that labcodes are distinct
        if (
            len(set(experiments.labcode.to_numpy().tolist()))
            != experiments.shape[0]
        ):
            raise ValueError("labcodes are not unique")

    # run the individual validators
    experiments = self.inputs.validate_experiments(
        experiments=experiments,
        strict=strict,
    )
    experiments = self.outputs.validate_experiments(experiments=experiments)
    return experiments

`validate_unique_feature_keys()`

Validates if provided input and output feature keys are unique

Parameters:

Name	Type	Description	Default
`v`	`Outputs`	List of all output features of the domain.	required
`value`	`Dict[str, Inputs]`	Dict containing a list of input features as single entry.	required

Raises:

Type	Description
`ValueError`	Feature keys are not unique.

Returns:

Name	Type	Description
`Outputs`		Keeps output features as given.

Source code in bofire/data_models/domain/domain.py

@model_validator(mode="after")
def validate_unique_feature_keys(self):
    """Validates if provided input and output feature keys are unique

    Args:
        v (Outputs): List of all output features of the domain.
        value (Dict[str, Inputs]): Dict containing a list of input features as single entry.

    Raises:
        ValueError: Feature keys are not unique.

    Returns:
        Outputs: Keeps output features as given.

    """
    keys = self.outputs.get_keys() + self.inputs.get_keys()
    if len(set(keys)) != len(keys):
        raise ValueError("Feature keys are not unique")
    return self

`features`

`Inputs`

Bases: _BaseFeatures[AnyInput]

Container of input features, only input features are allowed.

Attributes:

Name	Type	Description
`features`	`List(Inputs`	list of the features.

Source code in bofire/data_models/domain/features.py

class Inputs(_BaseFeatures[AnyInput]):
    """Container of input features, only input features are allowed.

    Attributes:
        features (List(Inputs)): list of the features.

    """

    type: Literal["Inputs"] = "Inputs"  # type: ignore

    @field_validator("features")
    @classmethod
    def validate_only_one_task_input(cls, features: Sequence[AnyInput]):
        filtered = filter_by_class(
            features,
            includes=TaskInput,
            excludes=None,
            exact=False,
        )
        if len(filtered) > 1:
            raise ValueError(f"Only one `TaskInput` is allowed, got {len(filtered)}.")
        return features

    def get_fixed(self) -> Inputs:
        """Gets all features in `self` that are fixed and returns them as new
        `Inputs` object.

        Returns:
            Inputs: Input features object containing only fixed features.

        """
        return Inputs(features=[feat for feat in self if feat.is_fixed()])

    def get_free(self) -> Inputs:
        """Gets all features in `self` that are not fixed and returns them as
        new `Inputs` object.

        Returns:
            Inputs: Input features object containing only non-fixed features.

        """
        return Inputs(features=[feat for feat in self if not feat.is_fixed()])

    @validate_call
    def sample(
        self,
        n: int = 1,
        method: SamplingMethodEnum = SamplingMethodEnum.UNIFORM,
        seed: Optional[int] = None,
    ) -> pd.DataFrame:
        """Draw sobol samples

        Args:
            n (int, optional): Number of samples, has to be larger than 0.
                Defaults to 1.
            method (SamplingMethodEnum, optional): Method to use, implemented
                methods are `UNIFORM`, `SOBOL` and `LHS`. Defaults to `UNIFORM`.
            reference_value
            seed (int, optional): random seed. Defaults to None.

        Returns:
            pd.DataFrame: Dataframe containing the samples.

        """
        if len(self) == 0:
            return pd.DataFrame()

        if method == SamplingMethodEnum.UNIFORM:
            # we cannot just propagate the provided seed to the sample methods
            # as they would then sample always the same value if the bounds
            # are the same for a feature.
            rng = np.random.default_rng(seed=seed)
            return self.validate_candidates(
                pd.concat(
                    [
                        feat.sample(n, seed=int(rng.integers(1, 1000000)))
                        for feat in self.get(Input)
                    ],
                    axis=1,
                ),
            )

        free_features = self.get_free()
        if method == SamplingMethodEnum.SOBOL:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                X = Sobol(len(free_features), seed=seed).random(n)
        else:
            X = LatinHypercube(len(free_features), seed=seed).random(n)

        res = []
        for i, feat in enumerate(free_features):
            if isinstance(feat, ContinuousInput):
                x = feat.from_unit_range(X[:, i])
            elif isinstance(feat, (DiscreteInput, CategoricalInput)):
                levels = (
                    feat.values
                    if isinstance(feat, DiscreteInput)
                    else feat.get_allowed_categories()
                )
                bins = np.linspace(0, 1, len(levels) + 1)
                idx = np.digitize(X[:, i], bins) - 1
                x = np.array(levels)[idx]
            else:
                raise ValueError(
                    f"Unknown input feature with key {feat.key} of type {feat.type}",
                )
            res.append(pd.Series(x, name=feat.key))

        samples = pd.concat(res, axis=1)

        for feat in self.get_fixed():
            samples[feat.key] = feat.fixed_value()[0]  # type: ignore

        return self.validate_candidates(samples)[self.get_keys(Input)]

    def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame:
        """Validate a pandas dataframe with input feature values.

        Args:
            candidates (pd.Dataframe): Inputs to validate.

        Raises:
            ValueError: Raises a Valueerror if a feature based validation raises an exception.

        Returns:
            pd.Dataframe: Validated dataframe

        """
        for feature in self:
            if feature.key not in candidates:
                raise ValueError(f"no col for input feature `{feature.key}`")
            candidates[feature.key] = feature.validate_candidental(
                candidates[feature.key],
            )
        if candidates[self.get_keys()].isnull().to_numpy().any():
            raise ValueError("there are null values")
        if candidates[self.get_keys()].isna().to_numpy().any():
            raise ValueError("there are na values")
        return candidates

    def validate_experiments(
        self,
        experiments: pd.DataFrame,
        strict=False,
    ) -> pd.DataFrame:
        for feature in self:
            if feature.key not in experiments:
                raise ValueError(f"no col for input feature `{feature.key}`")
            experiments[feature.key] = feature.validate_experimental(
                experiments[feature.key],
                strict=strict,
            )
        if experiments[self.get_keys()].isnull().to_numpy().any():
            raise ValueError("there are null values")
        if experiments[self.get_keys()].isna().to_numpy().any():
            raise ValueError("there are na values")
        return experiments

    def get_number_of_categorical_combinations(
        self,
        include: Union[Type, List[Type]] = Input,
        exclude: Union[Type, List[Type]] = None,  # type: ignore
    ) -> int:
        """Get the total number of unique categorical combinations.

        This is used before generating all of the categorical combinations, which may
        cause memory issues if there are too many.

        Args:
            include (Feature, optional): Features to be included. Defaults to Input.
            exclude (Feature, optional): Features to be excluded, e.g. subclasses
                of the included features. Defaults to None.
        Returns:
            int: Returns the number of unique combinations of discrete and categorical
                features.
        """
        features = [
            f
            for f in self.get(includes=include, excludes=exclude)
            if (isinstance(f, CategoricalInput) and not f.is_fixed())
        ]
        num_cats = [len(f.get_allowed_categories()) for f in features]

        discretes = [
            f
            for f in self.get(includes=include, excludes=exclude)
            if (isinstance(f, DiscreteInput) and not f.is_fixed())
        ]

        num_discretes = [len(d.values) for d in discretes]

        num_values = num_cats + num_discretes

        return functools.reduce(operator.mul, num_values, 1)

    def get_categorical_combinations(
        self,
        include: Union[Type, List[Type]] = Input,
        exclude: Union[Type, List[Type]] = None,  # type: ignore
    ):
        """Get a list of tuples pairing the feature keys with a list of valid categories

        Args:
            include (Feature, optional): Features to be included. Defaults to Input.
            exclude (Feature, optional): Features to be excluded, e.g. subclasses
                of the included features. Defaults to None.

        Returns:
            List[(str, List[str])]: Returns a list of tuples pairing the feature
                keys with a list of valid categories (str)

        """
        features = [
            f
            for f in self.get(includes=include, excludes=exclude)
            if (isinstance(f, CategoricalInput) and not f.is_fixed())
        ]
        list_of_lists = [
            [(f.key, cat) for cat in f.get_allowed_categories()] for f in features
        ]

        discretes = [
            f
            for f in self.get(includes=include, excludes=exclude)
            if (isinstance(f, DiscreteInput) and not f.is_fixed())
        ]

        list_of_lists_2 = [[(d.key, v) for v in d.values] for d in discretes]

        list_of_lists = list_of_lists + list_of_lists_2

        return list(itertools.product(*list_of_lists))

    # transformation related methods
    def _get_transform_info(
        self,
        specs: InputTransformSpecs,
    ) -> Tuple[Dict[str, Tuple[int]], Dict[str, Tuple[str]]]:
        """Generates two dictionaries. The first one specifies which key is mapped to
        which column indices when applying `transform`. The second one specifies
        which key is mapped to which transformed keys.

        Args:
            specs (InputTransformSpecs): Dictionary specifying which
                input feature is transformed by which encoder.

        Returns:
            Dict[str, Tuple[int]]: Dictionary mapping feature keys to column indices.
            Dict[str, Tuple[str]]: Dictionary mapping feature keys to transformed feature
                keys.

        """
        self._validate_transform_specs(specs)
        features2idx = {}
        features2names = {}
        counter = 0
        for _, feat in enumerate(self.get()):
            if feat.key not in specs.keys():
                features2idx[feat.key] = (counter,)
                features2names[feat.key] = (feat.key,)
                counter += 1
            elif specs[feat.key] == CategoricalEncodingEnum.ONE_HOT:
                assert isinstance(feat, CategoricalInput)
                features2idx[feat.key] = tuple(
                    (np.array(range(len(feat.categories))) + counter).tolist(),
                )
                features2names[feat.key] = tuple(
                    [get_encoded_name(feat.key, c) for c in feat.categories],
                )
                counter += len(feat.categories)
            elif specs[feat.key] == CategoricalEncodingEnum.ORDINAL:
                features2idx[feat.key] = (counter,)
                features2names[feat.key] = (feat.key,)
                counter += 1
            elif specs[feat.key] == CategoricalEncodingEnum.DUMMY:
                assert isinstance(feat, CategoricalInput)
                features2idx[feat.key] = tuple(
                    (np.array(range(len(feat.categories) - 1)) + counter).tolist(),
                )
                features2names[feat.key] = tuple(
                    [get_encoded_name(feat.key, c) for c in feat.categories[1:]],
                )
                counter += len(feat.categories) - 1
            elif specs[feat.key] == CategoricalEncodingEnum.DESCRIPTOR:
                assert isinstance(feat, CategoricalDescriptorInput)
                features2idx[feat.key] = tuple(
                    (np.array(range(len(feat.descriptors))) + counter).tolist(),
                )
                features2names[feat.key] = tuple(
                    [get_encoded_name(feat.key, d) for d in feat.descriptors],
                )
                counter += len(feat.descriptors)
            elif isinstance(specs[feat.key], MolFeatures):
                assert isinstance(feat, MolecularInput)
                descriptor_names = specs[feat.key].get_descriptor_names()  # type: ignore
                features2idx[feat.key] = tuple(
                    (np.array(range(len(descriptor_names))) + counter).tolist(),
                )
                features2names[feat.key] = tuple(
                    [get_encoded_name(feat.key, d) for d in descriptor_names],
                )
                counter += len(descriptor_names)
        return features2idx, features2names

    def transform(
        self,
        experiments: pd.DataFrame,
        specs: InputTransformSpecs,
    ) -> pd.DataFrame:
        """Transform a dataframe to the representation specified in `specs`.

        Currently only input categoricals are supported.

        Args:
            experiments (pd.DataFrame): Data dataframe to be transformed.
            specs (InputTransformSpecs): Dictionary specifying which
                input feature is transformed by which encoder.

        Returns:
            pd.DataFrame: Transformed dataframe. Only input features are included.

        """
        # TODO: clean this up and move it into the individual classes
        specs = self._validate_transform_specs(specs)
        transformed = []
        for feat in self.get():
            s = experiments[feat.key]
            if feat.key not in specs.keys():
                transformed.append(s)
            elif specs[feat.key] == CategoricalEncodingEnum.ONE_HOT:
                assert isinstance(feat, CategoricalInput)
                transformed.append(feat.to_onehot_encoding(s))
            elif specs[feat.key] == CategoricalEncodingEnum.ORDINAL:
                assert isinstance(feat, CategoricalInput)
                transformed.append(feat.to_ordinal_encoding(s))
            elif specs[feat.key] == CategoricalEncodingEnum.DUMMY:
                assert isinstance(feat, CategoricalInput)
                transformed.append(feat.to_dummy_encoding(s))
            elif specs[feat.key] == CategoricalEncodingEnum.DESCRIPTOR:
                assert isinstance(feat, CategoricalDescriptorInput)
                transformed.append(feat.to_descriptor_encoding(s))
            elif isinstance(specs[feat.key], MolFeatures):
                assert isinstance(feat, MolecularInput)
                transformed.append(feat.to_descriptor_encoding(specs[feat.key], s))  # type: ignore
        return pd.concat(transformed, axis=1)

    def inverse_transform(
        self,
        experiments: pd.DataFrame,
        specs: InputTransformSpecs,
    ) -> pd.DataFrame:
        """Transform a dataframe back to the original representations.

        The original applied transformation has to be provided via the specs dictionary.
        Currently only input categoricals are supported.

        Args:
            experiments (pd.DataFrame): Transformed data dataframe.
            specs (InputTransformSpecs): Dictionary specifying which
                input feature is transformed by which encoder.

        Returns:
            pd.DataFrame: Back transformed dataframe. Only input features are included.

        """
        # TODO: clean this up and move it into the individual classes
        self._validate_transform_specs(specs=specs)
        transformed = []
        for feat in self.get():
            if isinstance(feat, DiscreteInput):
                transformed.append(feat.from_continuous(experiments))
            elif feat.key not in specs.keys():
                transformed.append(experiments[feat.key])
            elif specs[feat.key] == CategoricalEncodingEnum.ONE_HOT:
                assert isinstance(feat, CategoricalInput)
                transformed.append(feat.from_onehot_encoding(experiments))
            elif specs[feat.key] == CategoricalEncodingEnum.ORDINAL:
                assert isinstance(feat, CategoricalInput)
                transformed.append(
                    feat.from_ordinal_encoding(experiments[feat.key].astype(int)),
                )
            elif specs[feat.key] == CategoricalEncodingEnum.DUMMY:
                assert isinstance(feat, CategoricalInput)
                transformed.append(feat.from_dummy_encoding(experiments))
            elif specs[feat.key] == CategoricalEncodingEnum.DESCRIPTOR:
                assert isinstance(feat, CategoricalDescriptorInput)
                transformed.append(feat.from_descriptor_encoding(experiments))
            elif isinstance(specs[feat.key], MolFeatures):
                assert isinstance(feat, CategoricalMolecularInput)
                transformed.append(
                    feat.from_descriptor_encoding(specs[feat.key], experiments),  # type: ignore
                )

        return pd.concat(transformed, axis=1)

    def _validate_transform_specs(
        self,
        specs: InputTransformSpecs,
    ) -> InputTransformSpecs:
        """Checks the validity of the transform specs .

        Args:
            specs (InputTransformSpecs): Transform specs to be validated.

        """
        # first check that the keys in the specs dict are correct also correct feature keys
        # next check that all values are of type CategoricalEncodingEnum or MolFeatures
        for key, value in specs.items():
            try:
                feat = self.get_by_key(key)
            except KeyError:
                raise ValueError(
                    f"Unknown feature with key {key} specified in transform specs.",
                )
            # TODO
            # this is ugly, on the long run we have to get rid of the transform enums
            # and replace them with classes, then the following lines collapse into just two
            assert isinstance(feat, Input)
            enums = [t for t in feat.valid_transform_types() if isinstance(t, Enum)]
            no_enums = [
                t for t in feat.valid_transform_types() if not isinstance(t, Enum)
            ]
            if isinstance(value, Enum):
                if value not in enums:
                    raise ValueError(
                        f"Forbidden transform type for feature with key {key}",
                    )
            else:
                if len(no_enums) == 0:
                    raise ValueError(
                        f"Forbidden transform type for feature with key {key}",
                    )
                if not isinstance(value, tuple(no_enums)):  # type: ignore
                    raise ValueError(
                        f"Forbidden transform type for feature with key {key}",
                    )

        return specs

    def get_bounds(
        self,
        specs: InputTransformSpecs,
        experiments: Optional[pd.DataFrame] = None,
        reference_experiment: Optional[pd.Series] = None,
    ) -> Tuple[List[float], List[float]]:
        """Returns the boundaries of the optimization problem based on the transformations
        defined in the  `specs` dictionary.

        Args:
            specs (InputTransformSpecs): Dictionary specifying which
                input feature is transformed by which encoder.
            experiments (Optional[pd.DataFrame], optional): Dataframe with input features.
                If provided the real feature bounds are returned based on both the opt.
                feature bounds and the extreme points in the dataframe. Defaults to None,
            reference_experiment (Optional[pd.Serues], optional): If a reference experiment provided,
            then the local bounds based on a local search region are provided as reference to the
                reference experiment. Currently only supported for continuous inputs.
                For more details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf. Defaults to None.

        Raises:
            ValueError: If a feature type is not known.
            ValueError: If no transformation is provided for a categorical feature.

        Returns:
            Tuple[List[float], List[float]]: list with lower bounds, list with upper bounds.

        """
        if reference_experiment is not None and experiments is not None:
            raise ValueError(
                "Only one can be used, `reference_experiments` or `experiments`.",
            )

        self._validate_transform_specs(specs=specs)

        lower = []
        upper = []

        for feat in self.get():
            assert isinstance(feat, Input)
            lo, up = feat.get_bounds(
                transform_type=specs.get(feat.key),  # type: ignore
                values=experiments[feat.key] if experiments is not None else None,  # type: ignore
                reference_value=(
                    reference_experiment[feat.key]
                    if reference_experiment is not None
                    else None
                ),
            )
            lower += lo
            upper += up
        return lower, upper

    def get_feature_indices(
        self,
        specs: InputTransformSpecs,
        feature_keys: List[str],
    ) -> List[int]:
        """Returns a list of indices of the given feature key list.

        Args:
            specs (InputTransformSpecs): Dictionary specifying which
                input feature is transformed by which encoder.
            feature_keys (List[str]): List of feature keys.

        Returns:
            List[int]: The list of indices.

        """
        features2idx, _ = self._get_transform_info(specs)
        return sorted(
            itertools.chain.from_iterable(
                [features2idx[feat] for feat in feature_keys]
            ),
        )

    def is_fulfilled(self, experiments: pd.DataFrame) -> pd.Series:
        """Check if the provided experiments fulfill all constraints defined on the
        input features itself like the bounds or the allowed categories.

        Args:
            experiments: Dataframe with input features.

        Returns:
            Series with boolean values indicating if the experiments fulfill the
                constraints on the input features.

        """
        return (
            pd.concat(
                [feat.is_fulfilled(experiments[feat.key]) for feat in self.get()],
                axis=1,
            )
            .fillna(True)
            .all(axis=1)
        )

`get_bounds(specs, experiments=None, reference_experiment=None)`

Returns the boundaries of the optimization problem based on the transformations defined in the specs dictionary.

Parameters:

Name	Type	Description	Default
`specs`	`InputTransformSpecs`	Dictionary specifying which input feature is transformed by which encoder.	required
`experiments`	`Optional[DataFrame]`	Dataframe with input features. If provided the real feature bounds are returned based on both the opt. feature bounds and the extreme points in the dataframe. Defaults to None,	`None`
`reference_experiment`	`Optional[Serues]`	If a reference experiment provided,	`None`

Raises:

Type	Description
`ValueError`	If a feature type is not known.
`ValueError`	If no transformation is provided for a categorical feature.

Returns:

Type	Description
`Tuple[List[float], List[float]]`	Tuple[List[float], List[float]]: list with lower bounds, list with upper bounds.

Source code in bofire/data_models/domain/features.py

def get_bounds(
    self,
    specs: InputTransformSpecs,
    experiments: Optional[pd.DataFrame] = None,
    reference_experiment: Optional[pd.Series] = None,
) -> Tuple[List[float], List[float]]:
    """Returns the boundaries of the optimization problem based on the transformations
    defined in the  `specs` dictionary.

    Args:
        specs (InputTransformSpecs): Dictionary specifying which
            input feature is transformed by which encoder.
        experiments (Optional[pd.DataFrame], optional): Dataframe with input features.
            If provided the real feature bounds are returned based on both the opt.
            feature bounds and the extreme points in the dataframe. Defaults to None,
        reference_experiment (Optional[pd.Serues], optional): If a reference experiment provided,
        then the local bounds based on a local search region are provided as reference to the
            reference experiment. Currently only supported for continuous inputs.
            For more details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf. Defaults to None.

    Raises:
        ValueError: If a feature type is not known.
        ValueError: If no transformation is provided for a categorical feature.

    Returns:
        Tuple[List[float], List[float]]: list with lower bounds, list with upper bounds.

    """
    if reference_experiment is not None and experiments is not None:
        raise ValueError(
            "Only one can be used, `reference_experiments` or `experiments`.",
        )

    self._validate_transform_specs(specs=specs)

    lower = []
    upper = []

    for feat in self.get():
        assert isinstance(feat, Input)
        lo, up = feat.get_bounds(
            transform_type=specs.get(feat.key),  # type: ignore
            values=experiments[feat.key] if experiments is not None else None,  # type: ignore
            reference_value=(
                reference_experiment[feat.key]
                if reference_experiment is not None
                else None
            ),
        )
        lower += lo
        upper += up
    return lower, upper

`get_categorical_combinations(include=Input, exclude=None)`

Get a list of tuples pairing the feature keys with a list of valid categories

Parameters:

Name	Type	Description	Default
`include`	`Feature`	Features to be included. Defaults to Input.	`Input`
`exclude`	`Feature`	Features to be excluded, e.g. subclasses of the included features. Defaults to None.	`None`

Returns:

Type	Description
	List[(str, List[str])]: Returns a list of tuples pairing the feature keys with a list of valid categories (str)

Source code in bofire/data_models/domain/features.py

def get_categorical_combinations(
    self,
    include: Union[Type, List[Type]] = Input,
    exclude: Union[Type, List[Type]] = None,  # type: ignore
):
    """Get a list of tuples pairing the feature keys with a list of valid categories

    Args:
        include (Feature, optional): Features to be included. Defaults to Input.
        exclude (Feature, optional): Features to be excluded, e.g. subclasses
            of the included features. Defaults to None.

    Returns:
        List[(str, List[str])]: Returns a list of tuples pairing the feature
            keys with a list of valid categories (str)

    """
    features = [
        f
        for f in self.get(includes=include, excludes=exclude)
        if (isinstance(f, CategoricalInput) and not f.is_fixed())
    ]
    list_of_lists = [
        [(f.key, cat) for cat in f.get_allowed_categories()] for f in features
    ]

    discretes = [
        f
        for f in self.get(includes=include, excludes=exclude)
        if (isinstance(f, DiscreteInput) and not f.is_fixed())
    ]

    list_of_lists_2 = [[(d.key, v) for v in d.values] for d in discretes]

    list_of_lists = list_of_lists + list_of_lists_2

    return list(itertools.product(*list_of_lists))

`get_feature_indices(specs, feature_keys)`

Returns a list of indices of the given feature key list.

Parameters:

Name	Type	Description	Default
`specs`	`InputTransformSpecs`	Dictionary specifying which input feature is transformed by which encoder.	required
`feature_keys`	`List[str]`	List of feature keys.	required

Returns:

Type	Description
`List[int]`	List[int]: The list of indices.

Source code in bofire/data_models/domain/features.py

def get_feature_indices(
    self,
    specs: InputTransformSpecs,
    feature_keys: List[str],
) -> List[int]:
    """Returns a list of indices of the given feature key list.

    Args:
        specs (InputTransformSpecs): Dictionary specifying which
            input feature is transformed by which encoder.
        feature_keys (List[str]): List of feature keys.

    Returns:
        List[int]: The list of indices.

    """
    features2idx, _ = self._get_transform_info(specs)
    return sorted(
        itertools.chain.from_iterable(
            [features2idx[feat] for feat in feature_keys]
        ),
    )

`get_fixed()`

Gets all features in self that are fixed and returns them as new Inputs object.

Returns:

Name	Type	Description
`Inputs`	`Inputs`	Input features object containing only fixed features.

Source code in bofire/data_models/domain/features.py

def get_fixed(self) -> Inputs:
    """Gets all features in `self` that are fixed and returns them as new
    `Inputs` object.

    Returns:
        Inputs: Input features object containing only fixed features.

    """
    return Inputs(features=[feat for feat in self if feat.is_fixed()])

`get_free()`

Gets all features in self that are not fixed and returns them as new Inputs object.

Returns:

Name	Type	Description
`Inputs`	`Inputs`	Input features object containing only non-fixed features.

Source code in bofire/data_models/domain/features.py

def get_free(self) -> Inputs:
    """Gets all features in `self` that are not fixed and returns them as
    new `Inputs` object.

    Returns:
        Inputs: Input features object containing only non-fixed features.

    """
    return Inputs(features=[feat for feat in self if not feat.is_fixed()])

`get_number_of_categorical_combinations(include=Input, exclude=None)`

Get the total number of unique categorical combinations.

This is used before generating all of the categorical combinations, which may cause memory issues if there are too many.

Parameters:

Name	Type	Description	Default
`include`	`Feature`	Features to be included. Defaults to Input.	`Input`
`exclude`	`Feature`	Features to be excluded, e.g. subclasses of the included features. Defaults to None.	`None`

Returns: int: Returns the number of unique combinations of discrete and categorical features.

Source code in bofire/data_models/domain/features.py

def get_number_of_categorical_combinations(
    self,
    include: Union[Type, List[Type]] = Input,
    exclude: Union[Type, List[Type]] = None,  # type: ignore
) -> int:
    """Get the total number of unique categorical combinations.

    This is used before generating all of the categorical combinations, which may
    cause memory issues if there are too many.

    Args:
        include (Feature, optional): Features to be included. Defaults to Input.
        exclude (Feature, optional): Features to be excluded, e.g. subclasses
            of the included features. Defaults to None.
    Returns:
        int: Returns the number of unique combinations of discrete and categorical
            features.
    """
    features = [
        f
        for f in self.get(includes=include, excludes=exclude)
        if (isinstance(f, CategoricalInput) and not f.is_fixed())
    ]
    num_cats = [len(f.get_allowed_categories()) for f in features]

    discretes = [
        f
        for f in self.get(includes=include, excludes=exclude)
        if (isinstance(f, DiscreteInput) and not f.is_fixed())
    ]

    num_discretes = [len(d.values) for d in discretes]

    num_values = num_cats + num_discretes

    return functools.reduce(operator.mul, num_values, 1)

`inverse_transform(experiments, specs)`

Transform a dataframe back to the original representations.

The original applied transformation has to be provided via the specs dictionary. Currently only input categoricals are supported.

Parameters:

Name	Type	Description	Default
`experiments`	`DataFrame`	Transformed data dataframe.	required
`specs`	`InputTransformSpecs`	Dictionary specifying which input feature is transformed by which encoder.	required

Returns:

Type	Description
`DataFrame`	pd.DataFrame: Back transformed dataframe. Only input features are included.

Source code in bofire/data_models/domain/features.py

def inverse_transform(
    self,
    experiments: pd.DataFrame,
    specs: InputTransformSpecs,
) -> pd.DataFrame:
    """Transform a dataframe back to the original representations.

    The original applied transformation has to be provided via the specs dictionary.
    Currently only input categoricals are supported.

    Args:
        experiments (pd.DataFrame): Transformed data dataframe.
        specs (InputTransformSpecs): Dictionary specifying which
            input feature is transformed by which encoder.

    Returns:
        pd.DataFrame: Back transformed dataframe. Only input features are included.

    """
    # TODO: clean this up and move it into the individual classes
    self._validate_transform_specs(specs=specs)
    transformed = []
    for feat in self.get():
        if isinstance(feat, DiscreteInput):
            transformed.append(feat.from_continuous(experiments))
        elif feat.key not in specs.keys():
            transformed.append(experiments[feat.key])
        elif specs[feat.key] == CategoricalEncodingEnum.ONE_HOT:
            assert isinstance(feat, CategoricalInput)
            transformed.append(feat.from_onehot_encoding(experiments))
        elif specs[feat.key] == CategoricalEncodingEnum.ORDINAL:
            assert isinstance(feat, CategoricalInput)
            transformed.append(
                feat.from_ordinal_encoding(experiments[feat.key].astype(int)),
            )
        elif specs[feat.key] == CategoricalEncodingEnum.DUMMY:
            assert isinstance(feat, CategoricalInput)
            transformed.append(feat.from_dummy_encoding(experiments))
        elif specs[feat.key] == CategoricalEncodingEnum.DESCRIPTOR:
            assert isinstance(feat, CategoricalDescriptorInput)
            transformed.append(feat.from_descriptor_encoding(experiments))
        elif isinstance(specs[feat.key], MolFeatures):
            assert isinstance(feat, CategoricalMolecularInput)
            transformed.append(
                feat.from_descriptor_encoding(specs[feat.key], experiments),  # type: ignore
            )

    return pd.concat(transformed, axis=1)

`is_fulfilled(experiments)`

Check if the provided experiments fulfill all constraints defined on the input features itself like the bounds or the allowed categories.

Parameters:

Name	Type	Description	Default
`experiments`	`DataFrame`	Dataframe with input features.	required

Returns:

Type	Description
`Series`	Series with boolean values indicating if the experiments fulfill the constraints on the input features.

Source code in bofire/data_models/domain/features.py

def is_fulfilled(self, experiments: pd.DataFrame) -> pd.Series:
    """Check if the provided experiments fulfill all constraints defined on the
    input features itself like the bounds or the allowed categories.

    Args:
        experiments: Dataframe with input features.

    Returns:
        Series with boolean values indicating if the experiments fulfill the
            constraints on the input features.

    """
    return (
        pd.concat(
            [feat.is_fulfilled(experiments[feat.key]) for feat in self.get()],
            axis=1,
        )
        .fillna(True)
        .all(axis=1)
    )

`sample(n=1, method=SamplingMethodEnum.UNIFORM, seed=None)`

Draw sobol samples

Parameters:

Name	Type	Description	Default
`n`	`int`	Number of samples, has to be larger than 0. Defaults to 1.	`1`
`method`	`SamplingMethodEnum`	Method to use, implemented methods are `UNIFORM`, `SOBOL` and `LHS`. Defaults to `UNIFORM`.	`UNIFORM`
`seed`	`int`	random seed. Defaults to None.	`None`

Returns:

Type	Description
`DataFrame`	pd.DataFrame: Dataframe containing the samples.

Source code in bofire/data_models/domain/features.py

@validate_call
def sample(
    self,
    n: int = 1,
    method: SamplingMethodEnum = SamplingMethodEnum.UNIFORM,
    seed: Optional[int] = None,
) -> pd.DataFrame:
    """Draw sobol samples

    Args:
        n (int, optional): Number of samples, has to be larger than 0.
            Defaults to 1.
        method (SamplingMethodEnum, optional): Method to use, implemented
            methods are `UNIFORM`, `SOBOL` and `LHS`. Defaults to `UNIFORM`.
        reference_value
        seed (int, optional): random seed. Defaults to None.

    Returns:
        pd.DataFrame: Dataframe containing the samples.

    """
    if len(self) == 0:
        return pd.DataFrame()

    if method == SamplingMethodEnum.UNIFORM:
        # we cannot just propagate the provided seed to the sample methods
        # as they would then sample always the same value if the bounds
        # are the same for a feature.
        rng = np.random.default_rng(seed=seed)
        return self.validate_candidates(
            pd.concat(
                [
                    feat.sample(n, seed=int(rng.integers(1, 1000000)))
                    for feat in self.get(Input)
                ],
                axis=1,
            ),
        )

    free_features = self.get_free()
    if method == SamplingMethodEnum.SOBOL:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            X = Sobol(len(free_features), seed=seed).random(n)
    else:
        X = LatinHypercube(len(free_features), seed=seed).random(n)

    res = []
    for i, feat in enumerate(free_features):
        if isinstance(feat, ContinuousInput):
            x = feat.from_unit_range(X[:, i])
        elif isinstance(feat, (DiscreteInput, CategoricalInput)):
            levels = (
                feat.values
                if isinstance(feat, DiscreteInput)
                else feat.get_allowed_categories()
            )
            bins = np.linspace(0, 1, len(levels) + 1)
            idx = np.digitize(X[:, i], bins) - 1
            x = np.array(levels)[idx]
        else:
            raise ValueError(
                f"Unknown input feature with key {feat.key} of type {feat.type}",
            )
        res.append(pd.Series(x, name=feat.key))

    samples = pd.concat(res, axis=1)

    for feat in self.get_fixed():
        samples[feat.key] = feat.fixed_value()[0]  # type: ignore

    return self.validate_candidates(samples)[self.get_keys(Input)]

`transform(experiments, specs)`

Transform a dataframe to the representation specified in specs.

Currently only input categoricals are supported.

Parameters:

Name	Type	Description	Default
`experiments`	`DataFrame`	Data dataframe to be transformed.	required
`specs`	`InputTransformSpecs`	Dictionary specifying which input feature is transformed by which encoder.	required

Returns:

Type	Description
`DataFrame`	pd.DataFrame: Transformed dataframe. Only input features are included.

Source code in bofire/data_models/domain/features.py

def transform(
    self,
    experiments: pd.DataFrame,
    specs: InputTransformSpecs,
) -> pd.DataFrame:
    """Transform a dataframe to the representation specified in `specs`.

    Currently only input categoricals are supported.

    Args:
        experiments (pd.DataFrame): Data dataframe to be transformed.
        specs (InputTransformSpecs): Dictionary specifying which
            input feature is transformed by which encoder.

    Returns:
        pd.DataFrame: Transformed dataframe. Only input features are included.

    """
    # TODO: clean this up and move it into the individual classes
    specs = self._validate_transform_specs(specs)
    transformed = []
    for feat in self.get():
        s = experiments[feat.key]
        if feat.key not in specs.keys():
            transformed.append(s)
        elif specs[feat.key] == CategoricalEncodingEnum.ONE_HOT:
            assert isinstance(feat, CategoricalInput)
            transformed.append(feat.to_onehot_encoding(s))
        elif specs[feat.key] == CategoricalEncodingEnum.ORDINAL:
            assert isinstance(feat, CategoricalInput)
            transformed.append(feat.to_ordinal_encoding(s))
        elif specs[feat.key] == CategoricalEncodingEnum.DUMMY:
            assert isinstance(feat, CategoricalInput)
            transformed.append(feat.to_dummy_encoding(s))
        elif specs[feat.key] == CategoricalEncodingEnum.DESCRIPTOR:
            assert isinstance(feat, CategoricalDescriptorInput)
            transformed.append(feat.to_descriptor_encoding(s))
        elif isinstance(specs[feat.key], MolFeatures):
            assert isinstance(feat, MolecularInput)
            transformed.append(feat.to_descriptor_encoding(specs[feat.key], s))  # type: ignore
    return pd.concat(transformed, axis=1)

`validate_candidates(candidates)`

Validate a pandas dataframe with input feature values.

Parameters:

Name	Type	Description	Default
`candidates`	`Dataframe`	Inputs to validate.	required

Raises:

Type	Description
`ValueError`	Raises a Valueerror if a feature based validation raises an exception.

Returns:

Type	Description
`DataFrame`	pd.Dataframe: Validated dataframe

Source code in bofire/data_models/domain/features.py

def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame:
    """Validate a pandas dataframe with input feature values.

    Args:
        candidates (pd.Dataframe): Inputs to validate.

    Raises:
        ValueError: Raises a Valueerror if a feature based validation raises an exception.

    Returns:
        pd.Dataframe: Validated dataframe

    """
    for feature in self:
        if feature.key not in candidates:
            raise ValueError(f"no col for input feature `{feature.key}`")
        candidates[feature.key] = feature.validate_candidental(
            candidates[feature.key],
        )
    if candidates[self.get_keys()].isnull().to_numpy().any():
        raise ValueError("there are null values")
    if candidates[self.get_keys()].isna().to_numpy().any():
        raise ValueError("there are na values")
    return candidates

`Outputs`

Bases: _BaseFeatures[AnyOutput]

Container of output features, only output features are allowed.

Attributes:

Name	Type	Description
`features`	`List(Outputs`	list of the features.

Source code in bofire/data_models/domain/features.py

class Outputs(_BaseFeatures[AnyOutput]):
    """Container of output features, only output features are allowed.

    Attributes:
        features (List(Outputs)): list of the features.

    """

    type: Literal["Outputs"] = "Outputs"  # type: ignore

    def get_by_objective(
        self,
        includes: Union[
            List[Type[AbstractObjective]],
            Type[AbstractObjective],
            Type[Objective],
        ] = Objective,
        excludes: Union[
            List[Type[AbstractObjective]],
            Type[AbstractObjective],
            None,
        ] = None,
        exact: bool = False,
    ) -> Outputs:
        """Get output features filtered by the type of the attached objective.

        Args:
            includes (Union[List[TObjective], TObjective], optional): Objective class or list of objective classes
                to be returned. Defaults to Objective.
            excludes (Union[List[TObjective], TObjective, None], optional): Objective class or list of specific objective classes to be excluded from the return. Defaults to None.
            exact (bool, optional): Boolean to distinguish if only the exact classes listed in includes and no subclasses inherenting from this class shall be returned. Defaults to False.

        Returns:
            List[AnyOutput]: List of output features fitting to the passed requirements.

        """
        if len(self.features) == 0:
            return Outputs(features=[])
        return Outputs(
            features=sorted(
                filter_by_attribute(
                    self.get([ContinuousOutput, CategoricalOutput]).features,
                    lambda of: of.objective,
                    includes,
                    excludes,
                    exact,
                ),
            ),
        )

    def get_keys_by_objective(
        self,
        includes: Union[
            List[Type[AbstractObjective]],
            Type[AbstractObjective],
            Type[Objective],
        ] = Objective,
        excludes: Union[
            List[Type[AbstractObjective]],
            Type[AbstractObjective],
            None,
        ] = None,
        exact: bool = False,
    ) -> List[str]:
        """Get keys of output features filtered by the type of the attached objective.

        Args:
            includes (Union[List[TObjective], TObjective], optional): Objective class or list of objective classes
                to be returned. Defaults to Objective.
            excludes (Union[List[TObjective], TObjective, None], optional): Objective class or list of specific objective classes to be excluded from the return. Defaults to None.
            exact (bool, optional): Boolean to distinguish if only the exact classes listed in includes and no subclasses inherenting from this class shall be returned. Defaults to False.

        Returns:
            List[str]: List of output feature keys fitting to the passed requirements.

        """
        return [f.key for f in self.get_by_objective(includes, excludes, exact)]

    def __call__(
        self,
        experiments: pd.DataFrame,
        experiments_adapt: Optional[pd.DataFrame] = None,
        predictions: bool = False,
    ) -> pd.DataFrame:
        """Evaluate the objective for every feature.

        Args:
            experiments (pd.DataFrame): Experiments for which the objectives
                should be evaluated.
            experiments_adapt (pd.DataFrame, optional): Experimental values
                which are used to update the objective parameters on the fly.
                This is for example needed when a `MovingMaximizeSigmoidObjective`
                is used as this depends on the best experimental value achieved
                so far. For this reason `experiments_adapt` has to be provided
                if `predictions=True` ie. that the objectives of candidates
                are evaluated. Defaults to None.
            predictions (bool, optional): If True use the prediction columns in
                the dataframe to calc the desirabilities `f"{feat.key}_pred`,
                furthermore `experiments_adapt` has to be provided.

        Returns:
            pd.DataFrame: Objective values for the experiments of interest.

        """
        if predictions and experiments_adapt is None:
            raise ValueError(
                "If predictions are used, `experiments_adapt` has to be provided.",
            )
        else:
            experiments_adapt = (
                experiments if experiments_adapt is None else experiments_adapt
            )

        desis = pd.concat(
            [
                feat(
                    experiments[f"{feat.key}_pred" if predictions else feat.key],
                    experiments_adapt[feat.key].dropna(),  # type: ignore
                )
                for feat in self.features
                if feat.objective is not None
                and not isinstance(feat, CategoricalOutput)
            ]
            + [
                (
                    pd.Series(  # type: ignore
                        data=feat(
                            experiments.filter(regex=f"{feat.key}(.*)_prob"),  # type: ignore
                            experiments.filter(regex=f"{feat.key}(.*)_prob"),  # type: ignore
                        ),
                        name=f"{feat.key}_pred",
                    )
                    if predictions
                    else experiments[feat.key]
                )
                for feat in self.features
                if feat.objective is not None and isinstance(feat, CategoricalOutput)
            ],
            axis=1,
        )
        return desis.rename(
            {
                f"{feat.key}_pred" if predictions else feat.key: f"{feat.key}_des"
                for feat in self.features
                if feat.objective is not None
            },
            axis=1,
        )

    def add_valid_columns(self, experiments: pd.DataFrame) -> pd.DataFrame:
        """Add the `valid_{feature.key}` columns to the experiments dataframe,
        in case that they are not present.

        Args:
            experiments (pd.DataFrame): Dataframe holding the experiments.

        Returns:
            pd.DataFrame: Dataframe holding the experiments.

        """
        valid_keys = [
            f"valid_{output_feature_key}" for output_feature_key in self.get_keys()
        ]
        for valid_key in valid_keys:
            if valid_key not in experiments:
                experiments[valid_key] = True
            else:
                try:
                    experiments[valid_key] = (
                        experiments[valid_key].astype(int).astype(bool)
                    )
                except ValueError:
                    raise ValueError(f"Column {valid_key} cannot casted to dtype bool.")
        return experiments

    def validate_experiments(self, experiments: pd.DataFrame) -> pd.DataFrame:
        for feat in self.get():
            if feat.key not in experiments:
                raise ValueError(f"no col for output feature `{feat.key}`")
            experiments[feat.key] = feat.validate_experimental(experiments[feat.key])
        experiments = self.add_valid_columns(experiments=experiments)
        return experiments

    def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame:
        # for each continuous output feature with an attached objective object
        continuous_cols = list(
            itertools.chain.from_iterable(
                [
                    [f"{feat.key}_pred", f"{feat.key}_sd", f"{feat.key}_des"]
                    for feat in self.get_by_objective(
                        includes=Objective,
                        excludes=ConstrainedCategoricalObjective,
                    )
                ]
                + [
                    [f"{key}_pred", f"{key}_sd"]
                    for key in self.get_keys_by_objective(
                        excludes=Objective,
                        includes=None,  # type: ignore
                    )
                ],
            ),
        )
        # check that pred, sd, and des cols are specified and numerical
        for col in continuous_cols:
            if col not in candidates:
                raise ValueError(f"missing column {col}")
            try:
                candidates[col] = pd.to_numeric(candidates[col], errors="raise").astype(
                    "float64",
                )
            except ValueError:
                raise ValueError(f"Not all values of column `{col}` are numerical.")
            if candidates[col].isnull().to_numpy().any():
                raise ValueError(f"Nan values are present in {col}.")
        # Looping over features allows to check categories objective wise
        for feat in self.get(CategoricalOutput):
            cols = [f"{feat.key}_pred", f"{feat.key}_des"]
            for col in cols:
                if col not in candidates:
                    raise ValueError(f"missing column {col}")
                if col == f"{feat.key}_pred":
                    feat.validate_experimental(candidates[col])
                # Check sd and desirability
                elif candidates[col].isnull().to_numpy().any():
                    raise ValueError(f"Nan values are present in {col}.")
        return candidates

    def preprocess_experiments_one_valid_output(
        self,
        output_feature_key: str,
        experiments: pd.DataFrame,
    ) -> pd.DataFrame:
        """Method to get a dataframe where non-valid entries of the provided output feature are removed

        Args:
            experiments (pd.DataFrame): Dataframe with experimental data
            output_feature_key (str): The feature based on which non-valid entries rows are removed

        Returns:
            pd.DataFrame: Dataframe with all experiments where only valid entries of the specific feature are included

        """
        clean_exp = experiments.loc[
            (experiments["valid_%s" % output_feature_key] == 1)
            & (experiments[output_feature_key].notna())
        ]

        return clean_exp

    def preprocess_experiments_all_valid_outputs(
        self,
        experiments: pd.DataFrame,
        output_feature_keys: Optional[List] = None,
    ) -> pd.DataFrame:
        """Method to get a dataframe where non-valid entries of all output feature are removed

        Args:
            experiments (pd.DataFrame): Dataframe with experimental data
            output_feature_keys (Optional[List], optional): List of output feature keys which should be considered for removal of invalid values. Defaults to None.

        Returns:
            pd.DataFrame: Dataframe with all experiments where only valid entries of the selected features are included

        """
        if (output_feature_keys is None) or (len(output_feature_keys) == 0):
            output_feature_keys = self.get_keys(Output)

        clean_exp = experiments.query(
            " & ".join(["(`valid_%s` > 0)" % key for key in output_feature_keys]),
        )
        clean_exp = clean_exp.dropna(subset=output_feature_keys)

        return clean_exp

    def preprocess_experiments_any_valid_output(
        self,
        experiments: pd.DataFrame,
    ) -> pd.DataFrame:
        """Method to get a dataframe where at least one output feature has a valid entry

        Args:
            experiments (pd.DataFrame): Dataframe with experimental data

        Returns:
            pd.DataFrame: Dataframe with all experiments where at least one output feature has a valid entry

        """
        output_feature_keys = self.get_keys(Output)

        # clean_exp = experiments.query(" or ".join(["(valid_%s > 0)" % key for key in output_feature_keys]))
        # clean_exp = clean_exp.query(" or ".join(["%s.notna()" % key for key in output_feature_keys]))

        assert experiments is not None
        clean_exp = experiments.query(
            " or ".join(
                [
                    "((`valid_%s` >0) & `%s`.notna())" % (key, key)
                    for key in output_feature_keys
                ],
            ),
        )
        return clean_exp

`call(experiments, experiments_adapt=None, predictions=False)`

Evaluate the objective for every feature.

Parameters:

Name	Type	Description	Default
`experiments`	`DataFrame`	Experiments for which the objectives should be evaluated.	required
`experiments_adapt`	`DataFrame`	Experimental values which are used to update the objective parameters on the fly. This is for example needed when a `MovingMaximizeSigmoidObjective` is used as this depends on the best experimental value achieved so far. For this reason `experiments_adapt` has to be provided if `predictions=True` ie. that the objectives of candidates are evaluated. Defaults to None.	`None`
`predictions`	`bool`	If True use the prediction columns in the dataframe to calc the desirabilities `f"{feat.key}_pred`, furthermore `experiments_adapt` has to be provided.	`False`

Returns:

Type	Description
`DataFrame`	pd.DataFrame: Objective values for the experiments of interest.

Source code in bofire/data_models/domain/features.py

def __call__(
    self,
    experiments: pd.DataFrame,
    experiments_adapt: Optional[pd.DataFrame] = None,
    predictions: bool = False,
) -> pd.DataFrame:
    """Evaluate the objective for every feature.

    Args:
        experiments (pd.DataFrame): Experiments for which the objectives
            should be evaluated.
        experiments_adapt (pd.DataFrame, optional): Experimental values
            which are used to update the objective parameters on the fly.
            This is for example needed when a `MovingMaximizeSigmoidObjective`
            is used as this depends on the best experimental value achieved
            so far. For this reason `experiments_adapt` has to be provided
            if `predictions=True` ie. that the objectives of candidates
            are evaluated. Defaults to None.
        predictions (bool, optional): If True use the prediction columns in
            the dataframe to calc the desirabilities `f"{feat.key}_pred`,
            furthermore `experiments_adapt` has to be provided.

    Returns:
        pd.DataFrame: Objective values for the experiments of interest.

    """
    if predictions and experiments_adapt is None:
        raise ValueError(
            "If predictions are used, `experiments_adapt` has to be provided.",
        )
    else:
        experiments_adapt = (
            experiments if experiments_adapt is None else experiments_adapt
        )

    desis = pd.concat(
        [
            feat(
                experiments[f"{feat.key}_pred" if predictions else feat.key],
                experiments_adapt[feat.key].dropna(),  # type: ignore
            )
            for feat in self.features
            if feat.objective is not None
            and not isinstance(feat, CategoricalOutput)
        ]
        + [
            (
                pd.Series(  # type: ignore
                    data=feat(
                        experiments.filter(regex=f"{feat.key}(.*)_prob"),  # type: ignore
                        experiments.filter(regex=f"{feat.key}(.*)_prob"),  # type: ignore
                    ),
                    name=f"{feat.key}_pred",
                )
                if predictions
                else experiments[feat.key]
            )
            for feat in self.features
            if feat.objective is not None and isinstance(feat, CategoricalOutput)
        ],
        axis=1,
    )
    return desis.rename(
        {
            f"{feat.key}_pred" if predictions else feat.key: f"{feat.key}_des"
            for feat in self.features
            if feat.objective is not None
        },
        axis=1,
    )

`add_valid_columns(experiments)`

Add the valid_{feature.key} columns to the experiments dataframe, in case that they are not present.

Parameters:

Name	Type	Description	Default
`experiments`	`DataFrame`	Dataframe holding the experiments.	required

Returns:

Type	Description
`DataFrame`	pd.DataFrame: Dataframe holding the experiments.

Source code in bofire/data_models/domain/features.py

def add_valid_columns(self, experiments: pd.DataFrame) -> pd.DataFrame:
    """Add the `valid_{feature.key}` columns to the experiments dataframe,
    in case that they are not present.

    Args:
        experiments (pd.DataFrame): Dataframe holding the experiments.

    Returns:
        pd.DataFrame: Dataframe holding the experiments.

    """
    valid_keys = [
        f"valid_{output_feature_key}" for output_feature_key in self.get_keys()
    ]
    for valid_key in valid_keys:
        if valid_key not in experiments:
            experiments[valid_key] = True
        else:
            try:
                experiments[valid_key] = (
                    experiments[valid_key].astype(int).astype(bool)
                )
            except ValueError:
                raise ValueError(f"Column {valid_key} cannot casted to dtype bool.")
    return experiments

`get_by_objective(includes=Objective, excludes=None, exact=False)`

Get output features filtered by the type of the attached objective.

Parameters:

Name	Type	Description	Default
`includes`	`Union[List[TObjective], TObjective]`	Objective class or list of objective classes to be returned. Defaults to Objective.	`Objective`
`excludes`	`Union[List[TObjective], TObjective, None]`	Objective class or list of specific objective classes to be excluded from the return. Defaults to None.	`None`
`exact`	`bool`	Boolean to distinguish if only the exact classes listed in includes and no subclasses inherenting from this class shall be returned. Defaults to False.	`False`

Returns:

Type	Description
`Outputs`	List[AnyOutput]: List of output features fitting to the passed requirements.

Source code in bofire/data_models/domain/features.py

def get_by_objective(
    self,
    includes: Union[
        List[Type[AbstractObjective]],
        Type[AbstractObjective],
        Type[Objective],
    ] = Objective,
    excludes: Union[
        List[Type[AbstractObjective]],
        Type[AbstractObjective],
        None,
    ] = None,
    exact: bool = False,
) -> Outputs:
    """Get output features filtered by the type of the attached objective.

    Args:
        includes (Union[List[TObjective], TObjective], optional): Objective class or list of objective classes
            to be returned. Defaults to Objective.
        excludes (Union[List[TObjective], TObjective, None], optional): Objective class or list of specific objective classes to be excluded from the return. Defaults to None.
        exact (bool, optional): Boolean to distinguish if only the exact classes listed in includes and no subclasses inherenting from this class shall be returned. Defaults to False.

    Returns:
        List[AnyOutput]: List of output features fitting to the passed requirements.

    """
    if len(self.features) == 0:
        return Outputs(features=[])
    return Outputs(
        features=sorted(
            filter_by_attribute(
                self.get([ContinuousOutput, CategoricalOutput]).features,
                lambda of: of.objective,
                includes,
                excludes,
                exact,
            ),
        ),
    )

`get_keys_by_objective(includes=Objective, excludes=None, exact=False)`

Get keys of output features filtered by the type of the attached objective.

Parameters:

Name	Type	Description	Default
`includes`	`Union[List[TObjective], TObjective]`	Objective class or list of objective classes to be returned. Defaults to Objective.	`Objective`
`excludes`	`Union[List[TObjective], TObjective, None]`	Objective class or list of specific objective classes to be excluded from the return. Defaults to None.	`None`
`exact`	`bool`	Boolean to distinguish if only the exact classes listed in includes and no subclasses inherenting from this class shall be returned. Defaults to False.	`False`

Returns:

Type	Description
`List[str]`	List[str]: List of output feature keys fitting to the passed requirements.

Source code in bofire/data_models/domain/features.py

def get_keys_by_objective(
    self,
    includes: Union[
        List[Type[AbstractObjective]],
        Type[AbstractObjective],
        Type[Objective],
    ] = Objective,
    excludes: Union[
        List[Type[AbstractObjective]],
        Type[AbstractObjective],
        None,
    ] = None,
    exact: bool = False,
) -> List[str]:
    """Get keys of output features filtered by the type of the attached objective.

    Args:
        includes (Union[List[TObjective], TObjective], optional): Objective class or list of objective classes
            to be returned. Defaults to Objective.
        excludes (Union[List[TObjective], TObjective, None], optional): Objective class or list of specific objective classes to be excluded from the return. Defaults to None.
        exact (bool, optional): Boolean to distinguish if only the exact classes listed in includes and no subclasses inherenting from this class shall be returned. Defaults to False.

    Returns:
        List[str]: List of output feature keys fitting to the passed requirements.

    """
    return [f.key for f in self.get_by_objective(includes, excludes, exact)]

`preprocess_experiments_all_valid_outputs(experiments, output_feature_keys=None)`

Method to get a dataframe where non-valid entries of all output feature are removed

Parameters:

Name	Type	Description	Default
`experiments`	`DataFrame`	Dataframe with experimental data	required
`output_feature_keys`	`Optional[List]`	List of output feature keys which should be considered for removal of invalid values. Defaults to None.	`None`

Returns:

Type	Description
`DataFrame`	pd.DataFrame: Dataframe with all experiments where only valid entries of the selected features are included

Source code in bofire/data_models/domain/features.py

def preprocess_experiments_all_valid_outputs(
    self,
    experiments: pd.DataFrame,
    output_feature_keys: Optional[List] = None,
) -> pd.DataFrame:
    """Method to get a dataframe where non-valid entries of all output feature are removed

    Args:
        experiments (pd.DataFrame): Dataframe with experimental data
        output_feature_keys (Optional[List], optional): List of output feature keys which should be considered for removal of invalid values. Defaults to None.

    Returns:
        pd.DataFrame: Dataframe with all experiments where only valid entries of the selected features are included

    """
    if (output_feature_keys is None) or (len(output_feature_keys) == 0):
        output_feature_keys = self.get_keys(Output)

    clean_exp = experiments.query(
        " & ".join(["(`valid_%s` > 0)" % key for key in output_feature_keys]),
    )
    clean_exp = clean_exp.dropna(subset=output_feature_keys)

    return clean_exp

`preprocess_experiments_any_valid_output(experiments)`

Method to get a dataframe where at least one output feature has a valid entry

Parameters:

Name	Type	Description	Default
`experiments`	`DataFrame`	Dataframe with experimental data	required

Returns:

Type	Description
`DataFrame`	pd.DataFrame: Dataframe with all experiments where at least one output feature has a valid entry

Source code in bofire/data_models/domain/features.py

def preprocess_experiments_any_valid_output(
    self,
    experiments: pd.DataFrame,
) -> pd.DataFrame:
    """Method to get a dataframe where at least one output feature has a valid entry

    Args:
        experiments (pd.DataFrame): Dataframe with experimental data

    Returns:
        pd.DataFrame: Dataframe with all experiments where at least one output feature has a valid entry

    """
    output_feature_keys = self.get_keys(Output)

    # clean_exp = experiments.query(" or ".join(["(valid_%s > 0)" % key for key in output_feature_keys]))
    # clean_exp = clean_exp.query(" or ".join(["%s.notna()" % key for key in output_feature_keys]))

    assert experiments is not None
    clean_exp = experiments.query(
        " or ".join(
            [
                "((`valid_%s` >0) & `%s`.notna())" % (key, key)
                for key in output_feature_keys
            ],
        ),
    )
    return clean_exp

`preprocess_experiments_one_valid_output(output_feature_key, experiments)`

Method to get a dataframe where non-valid entries of the provided output feature are removed

Parameters:

Name	Type	Description	Default
`experiments`	`DataFrame`	Dataframe with experimental data	required
`output_feature_key`	`str`	The feature based on which non-valid entries rows are removed	required

Returns:

Type	Description
`DataFrame`	pd.DataFrame: Dataframe with all experiments where only valid entries of the specific feature are included

Source code in bofire/data_models/domain/features.py

def preprocess_experiments_one_valid_output(
    self,
    output_feature_key: str,
    experiments: pd.DataFrame,
) -> pd.DataFrame:
    """Method to get a dataframe where non-valid entries of the provided output feature are removed

    Args:
        experiments (pd.DataFrame): Dataframe with experimental data
        output_feature_key (str): The feature based on which non-valid entries rows are removed

    Returns:
        pd.DataFrame: Dataframe with all experiments where only valid entries of the specific feature are included

    """
    clean_exp = experiments.loc[
        (experiments["valid_%s" % output_feature_key] == 1)
        & (experiments[output_feature_key].notna())
    ]

    return clean_exp

Domain

constraints

Constraints

__call__(experiments)

get(includes=Constraint, excludes=None, exact=False)

get_reps_df()

is_fulfilled(experiments, tol=1e-06)

jacobian(experiments)

domain

Domain

candidate_column_names property

constraints = Field(default_factory=(lambda: Constraints())) class-attribute instance-attribute

experiment_column_names property

aggregate_by_duplicates(experiments, prec, delimiter='-', method='mean')

coerce_invalids(experiments)

describe_experiments(experiments)

get_nchoosek_combinations(exhaustive=False)

is_fulfilled(experiments, tol=1e-06, exlude_interpoint=True)

validate_candidates(candidates, only_inputs=False, tol=1e-05, raise_validation_error=True)

validate_constraints()

validate_experiments(experiments, strict=False)

validate_unique_feature_keys()

features

Inputs

get_bounds(specs, experiments=None, reference_experiment=None)

get_categorical_combinations(include=Input, exclude=None)

get_feature_indices(specs, feature_keys)

get_fixed()

get_free()

get_number_of_categorical_combinations(include=Input, exclude=None)

inverse_transform(experiments, specs)

is_fulfilled(experiments)

sample(n=1, method=SamplingMethodEnum.UNIFORM, seed=None)

transform(experiments, specs)

validate_candidates(candidates)

Outputs

__call__(experiments, experiments_adapt=None, predictions=False)

add_valid_columns(experiments)

get_by_objective(includes=Objective, excludes=None, exact=False)

get_keys_by_objective(includes=Objective, excludes=None, exact=False)

preprocess_experiments_all_valid_outputs(experiments, output_feature_keys=None)

preprocess_experiments_any_valid_output(experiments)

preprocess_experiments_one_valid_output(output_feature_key, experiments)

`constraints`

`Constraints`

`call(experiments)`

`get(includes=Constraint, excludes=None, exact=False)`

`get_reps_df()`

`is_fulfilled(experiments, tol=1e-06)`

`jacobian(experiments)`

`domain`

`Domain`

`candidate_column_names` `property`

`constraints = Field(default_factory=(lambda: Constraints()))` `class-attribute` `instance-attribute`

`experiment_column_names` `property`

`aggregate_by_duplicates(experiments, prec, delimiter='-', method='mean')`

`coerce_invalids(experiments)`

`describe_experiments(experiments)`

`get_nchoosek_combinations(exhaustive=False)`

`is_fulfilled(experiments, tol=1e-06, exlude_interpoint=True)`

`validate_candidates(candidates, only_inputs=False, tol=1e-05, raise_validation_error=True)`

`validate_constraints()`

`validate_experiments(experiments, strict=False)`

`validate_unique_feature_keys()`

`features`

`Inputs`

`get_bounds(specs, experiments=None, reference_experiment=None)`

`get_categorical_combinations(include=Input, exclude=None)`

`get_feature_indices(specs, feature_keys)`

`get_fixed()`

`get_free()`

`get_number_of_categorical_combinations(include=Input, exclude=None)`

`inverse_transform(experiments, specs)`

`is_fulfilled(experiments)`

`sample(n=1, method=SamplingMethodEnum.UNIFORM, seed=None)`

`transform(experiments, specs)`

`validate_candidates(candidates)`

`Outputs`

`call(experiments, experiments_adapt=None, predictions=False)`

`add_valid_columns(experiments)`

`get_by_objective(includes=Objective, excludes=None, exact=False)`

`get_keys_by_objective(includes=Objective, excludes=None, exact=False)`

`preprocess_experiments_all_valid_outputs(experiments, output_feature_keys=None)`

`preprocess_experiments_any_valid_output(experiments)`

`preprocess_experiments_one_valid_output(output_feature_key, experiments)`