Skip to content

Domain

Domain (BaseModel)

Source code in bofire/data_models/domain/domain.py
class Domain(BaseModel):
    type: Literal["Domain"] = "Domain"

    inputs: Inputs = Field(default_factory=lambda: Inputs())
    outputs: Outputs = Field(default_factory=lambda: Outputs())
    constraints: Constraints = Field(default_factory=lambda: Constraints())

    """Representation of the optimization problem/domain

    Attributes:
        inputs (List[Input], optional): List of input features. Defaults to [].
        outputs (List[Output], optional): List of output features. Defaults to [].
        constraints (List[Constraint], optional): List of constraints. Defaults to [].
    """

    @classmethod
    def from_lists(
        cls,
        inputs: Optional[Sequence[AnyInput]] = None,
        outputs: Optional[Sequence[AnyOutput]] = None,
        constraints: Optional[Sequence[AnyConstraint]] = None,
    ):
        inputs = [] if inputs is None else inputs
        outputs = [] if outputs is None else outputs
        constraints = [] if constraints is None else constraints
        return cls(
            inputs=Inputs(features=inputs),
            outputs=Outputs(features=outputs),
            constraints=Constraints(constraints=constraints),
        )

    @field_validator("inputs", mode="before")
    @classmethod
    def validate_inputs_list(cls, v):
        if isinstance(v, collections.abc.Sequence):
            v = Inputs(features=v)
            return v
        if isinstance_or_union(v, AnyInput):
            return Inputs(features=[v])
        return v

    @field_validator("outputs", mode="before")
    @classmethod
    def validate_outputs_list(cls, v):
        if isinstance(v, collections.abc.Sequence):
            return Outputs(features=v)
        if isinstance_or_union(v, AnyOutput):
            return Outputs(features=[v])
        return v

    @field_validator("constraints", mode="before")
    @classmethod
    def validate_constraints_list(cls, v):
        if isinstance(v, list):
            return Constraints(constraints=v)
        if isinstance_or_union(v, AnyConstraint):
            return Constraints(constraints=[v])
        return v

    @model_validator(mode="after")
    def validate_unique_feature_keys(self):
        """Validates if provided input and output feature keys are unique

        Args:
            v (Outputs): List of all output features of the domain.
            value (Dict[str, Inputs]): Dict containing a list of input features as single entry.

        Raises:
            ValueError: Feature keys are not unique.

        Returns:
            Outputs: Keeps output features as given.

        """
        keys = self.outputs.get_keys() + self.inputs.get_keys()
        if len(set(keys)) != len(keys):
            raise ValueError("Feature keys are not unique")
        return self

    @model_validator(mode="after")
    def validate_constraints(self):
        """Validate that the constraints defined in the domain fit to the input features.

        Args:
            v (List[Constraint]): List of constraints or empty if no constraints are defined
            values (List[Input]): List of input features of the domain

        Raises:
            ValueError: Feature key in constraint is unknown.

        Returns:
            List[Constraint]: List of constraints defined for the domain

        """
        for c in self.constraints.get():
            c.validate_inputs(self.inputs)
        return self

    # TODO: tidy this up
    def get_nchoosek_combinations(self, exhaustive: bool = False):
        """Get all possible NChooseK combinations

        Args:
            exhaustive (bool, optional): if True all combinations are returned. Defaults to False.

        Returns:
            Tuple(used_features_list, unused_features_list): used_features_list is a list of lists containing features used in each NChooseK combination.
                unused_features_list is a list of lists containing features unused in each NChooseK combination.

        """
        if len(self.constraints.get(NChooseKConstraint)) == 0:
            used_continuous_features = self.inputs.get_keys(ContinuousInput)
            return used_continuous_features, []

        used_features_list_all = []

        # loops through each NChooseK constraint
        for con in self.constraints.get(NChooseKConstraint):
            assert isinstance(con, NChooseKConstraint)
            used_features_list = []

            if exhaustive:
                for n in range(con.min_count, con.max_count + 1):
                    used_features_list.extend(itertools.combinations(con.features, n))

                if con.none_also_valid:
                    used_features_list.append(())
            else:
                used_features_list.extend(
                    itertools.combinations(con.features, con.max_count),
                )

            used_features_list_all.append(used_features_list)

        used_features_list_all = list(
            itertools.product(*used_features_list_all),
        )  # product between NChooseK constraints

        # format into a list of used features
        used_features_list_formatted = []
        for used_features_list in used_features_list_all:
            used_features_list_flattened = [
                item for sublist in used_features_list for item in sublist
            ]
            used_features_list_formatted.append(list(set(used_features_list_flattened)))

        # sort lists
        used_features_list_sorted = []
        for used_features in used_features_list_formatted:
            used_features_list_sorted.append(sorted(used_features))

        # drop duplicates
        used_features_list_no_dup = []
        for used_features in used_features_list_sorted:
            if used_features not in used_features_list_no_dup:
                used_features_list_no_dup.append(used_features)

        # print(f"duplicates dropped: {len(used_features_list_sorted)-len(used_features_list_no_dup)}")

        # remove combinations not fulfilling constraints
        used_features_list_final = []
        for combo in used_features_list_no_dup:
            fulfil_constraints = []  # list of bools tracking if constraints are fulfilled
            for con in self.constraints.get(NChooseKConstraint):
                assert isinstance(con, NChooseKConstraint)
                count = 0  # count of features in combo that are in con.features
                for f in combo:
                    if f in con.features:
                        count += 1
                if (
                    count >= con.min_count
                    and count <= con.max_count
                    or count == 0
                    and con.none_also_valid
                ):
                    fulfil_constraints.append(True)
                else:
                    fulfil_constraints.append(False)
            if np.all(fulfil_constraints):
                used_features_list_final.append(combo)

        # print(f"violators dropped: {len(used_features_list_no_dup)-len(used_features_list_final)}")

        # features unused
        features_in_cc = []
        for con in self.constraints.get(NChooseKConstraint):
            assert isinstance(con, NChooseKConstraint)
            features_in_cc.extend(con.features)
        features_in_cc = list(set(features_in_cc))
        features_in_cc.sort()
        unused_features_list = []
        for used_features in used_features_list_final:
            unused_features_list.append(
                [f_key for f_key in features_in_cc if f_key not in used_features],
            )

        # postprocess
        # used_features_list_final2 = []
        # unused_features_list2 = []
        # for used, unused in zip(used_features_list_final,unused_features_list):
        #     if len(used) == 3:
        #         used_features_list_final2.append(used), unused_features_list2.append(unused)

        return used_features_list_final, unused_features_list

    def coerce_invalids(self, experiments: pd.DataFrame) -> pd.DataFrame:
        """Coerces all invalid output measurements to np.nan

        Args:
            experiments (pd.DataFrame): Dataframe containing experimental data

        Returns:
            pd.DataFrame: coerced dataframe

        """
        # coerce invalid to nan
        for feat in self.outputs.get_keys(Output):
            experiments.loc[experiments[f"valid_{feat}"] == 0, feat] = np.nan
        return experiments

    def aggregate_by_duplicates(
        self,
        experiments: pd.DataFrame,
        prec: int,
        delimiter: str = "-",
        method: Literal["mean", "median"] = "mean",
    ) -> Tuple[pd.DataFrame, list]:
        """Aggregate the dataframe by duplicate experiments

        Duplicates are identified based on the experiments with the same input
        features. Continuous input features are rounded before identifying the
        duplicates. Aggregation is performed by taking the average of the
        involved output features.

        Args:
            experiments (pd.DataFrame): Dataframe containing experimental data
            prec (int): Precision of the rounding of the continuous input features
            delimiter (str, optional): Delimiter used when combining the orig.
                labcodes to a new one. Defaults to "-".
            method (Literal["mean", "median"], optional): Which aggregation
                method to use. Defaults to "mean".

        Returns:
            Tuple[pd.DataFrame, list]: Dataframe holding the aggregated
                experiments, list of lists holding the labcodes of the duplicates

        """
        # prepare the parent frame
        if method not in ["mean", "median"]:
            raise ValueError(f"Unknown aggregation type provided: {method}")

        preprocessed = self.outputs.preprocess_experiments_any_valid_output(experiments)
        assert preprocessed is not None
        experiments = preprocessed.copy()
        if "labcode" not in experiments.columns:
            experiments["labcode"] = [
                str(i + 1).zfill(int(np.ceil(np.log10(experiments.shape[0]))))
                for i in range(experiments.shape[0])
            ]

        # round it if continuous inputs are present
        if len(self.inputs.get(ContinuousInput)) > 0:
            experiments[self.inputs.get_keys(ContinuousInput)] = experiments[
                self.inputs.get_keys(ContinuousInput)
            ].round(prec)

        # coerce invalid to nan
        experiments = self.coerce_invalids(experiments)

        # group and aggregate
        agg: Dict[str, Any] = {
            feat: method for feat in self.outputs.get_keys(ContinuousOutput)
        }
        agg["labcode"] = lambda x: delimiter.join(sorted(x.tolist()))
        for feat in self.outputs.get_keys(Output):
            agg[f"valid_{feat}"] = lambda x: 1

        grouped = experiments.groupby(self.inputs.get_keys(Input))
        duplicated_labcodes = [
            sorted(group.labcode.to_numpy().tolist())
            for _, group in grouped
            if group.shape[0] > 1
        ]

        experiments = grouped.aggregate(agg).reset_index(drop=False)
        for feat in self.outputs.get_keys(Output):
            experiments.loc[experiments[feat].isna(), f"valid_{feat}"] = 0

        experiments = experiments.sort_values(by="labcode")
        experiments = experiments.reset_index(drop=True)
        return experiments, sorted(duplicated_labcodes)

    def validate_experiments(
        self,
        experiments: pd.DataFrame,
        strict: bool = False,
    ) -> pd.DataFrame:
        """Checks the experimental data on validity

        Args:
            experiments (pd.DataFrame): Dataframe with experimental data
            strict (bool, optional): Boolean to distinguish if the occurrence of
                fixed features in the dataset should be considered or not.
                Defaults to False.

        Raises:
            ValueError: empty dataframe
            ValueError: the column for a specific feature is missing the provided data
            ValueError: there are labcodes with null value
            ValueError: there are labcodes with nan value
            ValueError: labcodes are not unique
            ValueError: the provided columns do no match to the defined domain
            ValueError: the provided columns do no match to the defined domain
            ValueError: Input with null values
            ValueError: Input with nan values

        Returns:
            pd.DataFrame: The provided dataframe with experimental data

        """
        if len(experiments) == 0:
            raise ValueError("no experiments provided (empty dataframe)")

        # we allow here for a column named labcode used to identify experiments
        if "labcode" in experiments.columns:
            # test that labcodes are not na
            if experiments.labcode.isnull().to_numpy().any():
                raise ValueError("there are labcodes with null value")
            if experiments.labcode.isna().to_numpy().any():
                raise ValueError("there are labcodes with nan value")
            # test that labcodes are distinct
            if (
                len(set(experiments.labcode.to_numpy().tolist()))
                != experiments.shape[0]
            ):
                raise ValueError("labcodes are not unique")

        # run the individual validators
        experiments = self.inputs.validate_experiments(
            experiments=experiments,
            strict=strict,
        )
        experiments = self.outputs.validate_experiments(experiments=experiments)
        return experiments

    def describe_experiments(self, experiments: pd.DataFrame) -> pd.DataFrame:
        """Method to get a tabular overview of how many measurements and how many valid entries are included in the input data for each output feature

        Args:
            experiments (pd.DataFrame): Dataframe with experimental data

        Returns:
            pd.DataFrame: Dataframe with counts how many measurements and how many valid entries are included in the input data for each output feature

        """
        data = {}
        for feat in self.outputs.get_keys(Output):
            data[feat] = [
                experiments.loc[experiments[feat].notna()].shape[0],
                experiments.loc[experiments[feat].notna(), "valid_%s" % feat].sum(),
            ]
        preprocessed = self.outputs.preprocess_experiments_all_valid_outputs(
            experiments,
        )
        assert preprocessed is not None
        data["all"] = [
            experiments.shape[0],
            preprocessed.shape[0],
        ]
        return pd.DataFrame.from_dict(
            data,
            orient="index",
            columns=["measured", "valid"],
        )

    def validate_candidates(
        self,
        candidates: pd.DataFrame,
        only_inputs: bool = False,
        tol: float = 1e-5,
        raise_validation_error: bool = True,
    ) -> pd.DataFrame:
        """Method to check the validty of proposed candidates

        Args:
            candidates (pd.DataFrame): Dataframe with suggested new experiments (candidates)
            only_inputs (bool,optional): If True, only the input columns are validated. Defaults to False.
            tol (float,optional): tolerance parameter for constraints. A constraint is considered as not fulfilled if the violation
                is larger than tol. Defaults to 1e-6.
            raise_validation_error (bool, optional): If true an error will be raised if candidates violate constraints,
                otherwise only a warning will be displayed. Defaults to True.

        Raises:
            ValueError: when a column is missing for a defined input feature
            ValueError: when a column is missing for a defined output feature
            ValueError: when a non-numerical value is proposed
            ValueError: when an additional column is found
            ConstraintNotFulfilledError: when the constraints are not fulfilled and `raise_validation_error = True`

        Returns:
            pd.DataFrame: dataframe with suggested experiments (candidates)

        """
        # check that each input feature has a col and is valid in itself
        assert isinstance(self.inputs, Inputs)
        candidates = self.inputs.validate_candidates(candidates)
        # check if all constraints are fulfilled
        if not self.constraints.is_fulfilled(candidates, tol=tol).all():
            if raise_validation_error:
                raise ConstraintNotFulfilledError(
                    f"Constraints not fulfilled: {candidates}",
                )
            warnings.warn("Not all constraints are fulfilled.")
        # for each continuous output feature with an attached objective object
        if not only_inputs:
            assert isinstance(self.outputs, Outputs)
            candidates = self.outputs.validate_candidates(candidates=candidates)
        return candidates

    @property
    def experiment_column_names(self):
        """The columns in the experimental dataframe

        Returns:
            List[str]: List of columns in the experiment dataframe (output feature keys + valid_output feature keys)

        """
        return (self.inputs + self.outputs).get_keys() + [
            f"valid_{output_feature_key}"
            for output_feature_key in self.outputs.get_keys(Output)
        ]

    @property
    def candidate_column_names(self):
        """The columns in the candidate dataframe

        Returns:
            List[str]: List of columns in the candidate dataframe (input feature keys + input feature keys_pred, input feature keys_sd, input feature keys_des)

        """
        assert isinstance(self.outputs, Outputs)
        return (
            self.inputs.get_keys(Input)
            + [
                f"{output_feature_key}_pred"
                for output_feature_key in self.outputs.get_keys_by_objective(Objective)
            ]
            + [
                f"{output_feature_key}_sd"
                for output_feature_key in self.outputs.get_keys_by_objective(Objective)
            ]
            + [
                f"{output_feature_key}_des"
                for output_feature_key in self.outputs.get_keys_by_objective(Objective)
            ]
        )

__class_vars__ special

The names of the class variables defined on the model.

__private_attributes__ special

Metadata about the private attributes of the model.

__pydantic_complete__ special

Whether model building is completed, or if there are still undefined fields.

__pydantic_computed_fields__ special

A dictionary of computed field names and their corresponding [ComputedFieldInfo][pydantic.fields.ComputedFieldInfo] objects.

__pydantic_custom_init__ special

Whether the model has a custom __init__ method.

__pydantic_decorators__ special

Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.

__pydantic_fields__ special

A dictionary of field names and their corresponding [FieldInfo][pydantic.fields.FieldInfo] objects. This replaces Model.__fields__ from Pydantic V1.

__pydantic_generic_metadata__ special

Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.

__pydantic_parent_namespace__ special

Parent namespace of the model, used for automatic rebuilding of models.

__pydantic_post_init__ special

The name of the post-init method for the model, if defined.

__signature__ special

The synthesized __init__ [Signature][inspect.Signature] of the model.

candidate_column_names property readonly

The columns in the candidate dataframe

Returns:

Type Description
List[str]

List of columns in the candidate dataframe (input feature keys + input feature keys_pred, input feature keys_sd, input feature keys_des)

experiment_column_names property readonly

The columns in the experimental dataframe

Returns:

Type Description
List[str]

List of columns in the experiment dataframe (output feature keys + valid_output feature keys)

model_config

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

aggregate_by_duplicates(self, experiments, prec, delimiter='-', method='mean')

Aggregate the dataframe by duplicate experiments

Duplicates are identified based on the experiments with the same input features. Continuous input features are rounded before identifying the duplicates. Aggregation is performed by taking the average of the involved output features.

Parameters:

Name Type Description Default
experiments pd.DataFrame

Dataframe containing experimental data

required
prec int

Precision of the rounding of the continuous input features

required
delimiter str

Delimiter used when combining the orig. labcodes to a new one. Defaults to "-".

'-'
method Literal["mean", "median"]

Which aggregation method to use. Defaults to "mean".

'mean'

Returns:

Type Description
Tuple[pd.DataFrame, list]

Dataframe holding the aggregated experiments, list of lists holding the labcodes of the duplicates

Source code in bofire/data_models/domain/domain.py
def aggregate_by_duplicates(
    self,
    experiments: pd.DataFrame,
    prec: int,
    delimiter: str = "-",
    method: Literal["mean", "median"] = "mean",
) -> Tuple[pd.DataFrame, list]:
    """Aggregate the dataframe by duplicate experiments

    Duplicates are identified based on the experiments with the same input
    features. Continuous input features are rounded before identifying the
    duplicates. Aggregation is performed by taking the average of the
    involved output features.

    Args:
        experiments (pd.DataFrame): Dataframe containing experimental data
        prec (int): Precision of the rounding of the continuous input features
        delimiter (str, optional): Delimiter used when combining the orig.
            labcodes to a new one. Defaults to "-".
        method (Literal["mean", "median"], optional): Which aggregation
            method to use. Defaults to "mean".

    Returns:
        Tuple[pd.DataFrame, list]: Dataframe holding the aggregated
            experiments, list of lists holding the labcodes of the duplicates

    """
    # prepare the parent frame
    if method not in ["mean", "median"]:
        raise ValueError(f"Unknown aggregation type provided: {method}")

    preprocessed = self.outputs.preprocess_experiments_any_valid_output(experiments)
    assert preprocessed is not None
    experiments = preprocessed.copy()
    if "labcode" not in experiments.columns:
        experiments["labcode"] = [
            str(i + 1).zfill(int(np.ceil(np.log10(experiments.shape[0]))))
            for i in range(experiments.shape[0])
        ]

    # round it if continuous inputs are present
    if len(self.inputs.get(ContinuousInput)) > 0:
        experiments[self.inputs.get_keys(ContinuousInput)] = experiments[
            self.inputs.get_keys(ContinuousInput)
        ].round(prec)

    # coerce invalid to nan
    experiments = self.coerce_invalids(experiments)

    # group and aggregate
    agg: Dict[str, Any] = {
        feat: method for feat in self.outputs.get_keys(ContinuousOutput)
    }
    agg["labcode"] = lambda x: delimiter.join(sorted(x.tolist()))
    for feat in self.outputs.get_keys(Output):
        agg[f"valid_{feat}"] = lambda x: 1

    grouped = experiments.groupby(self.inputs.get_keys(Input))
    duplicated_labcodes = [
        sorted(group.labcode.to_numpy().tolist())
        for _, group in grouped
        if group.shape[0] > 1
    ]

    experiments = grouped.aggregate(agg).reset_index(drop=False)
    for feat in self.outputs.get_keys(Output):
        experiments.loc[experiments[feat].isna(), f"valid_{feat}"] = 0

    experiments = experiments.sort_values(by="labcode")
    experiments = experiments.reset_index(drop=True)
    return experiments, sorted(duplicated_labcodes)

coerce_invalids(self, experiments)

Coerces all invalid output measurements to np.nan

Parameters:

Name Type Description Default
experiments pd.DataFrame

Dataframe containing experimental data

required

Returns:

Type Description
pd.DataFrame

coerced dataframe

Source code in bofire/data_models/domain/domain.py
def coerce_invalids(self, experiments: pd.DataFrame) -> pd.DataFrame:
    """Coerces all invalid output measurements to np.nan

    Args:
        experiments (pd.DataFrame): Dataframe containing experimental data

    Returns:
        pd.DataFrame: coerced dataframe

    """
    # coerce invalid to nan
    for feat in self.outputs.get_keys(Output):
        experiments.loc[experiments[f"valid_{feat}"] == 0, feat] = np.nan
    return experiments

describe_experiments(self, experiments)

Method to get a tabular overview of how many measurements and how many valid entries are included in the input data for each output feature

Parameters:

Name Type Description Default
experiments pd.DataFrame

Dataframe with experimental data

required

Returns:

Type Description
pd.DataFrame

Dataframe with counts how many measurements and how many valid entries are included in the input data for each output feature

Source code in bofire/data_models/domain/domain.py
def describe_experiments(self, experiments: pd.DataFrame) -> pd.DataFrame:
    """Method to get a tabular overview of how many measurements and how many valid entries are included in the input data for each output feature

    Args:
        experiments (pd.DataFrame): Dataframe with experimental data

    Returns:
        pd.DataFrame: Dataframe with counts how many measurements and how many valid entries are included in the input data for each output feature

    """
    data = {}
    for feat in self.outputs.get_keys(Output):
        data[feat] = [
            experiments.loc[experiments[feat].notna()].shape[0],
            experiments.loc[experiments[feat].notna(), "valid_%s" % feat].sum(),
        ]
    preprocessed = self.outputs.preprocess_experiments_all_valid_outputs(
        experiments,
    )
    assert preprocessed is not None
    data["all"] = [
        experiments.shape[0],
        preprocessed.shape[0],
    ]
    return pd.DataFrame.from_dict(
        data,
        orient="index",
        columns=["measured", "valid"],
    )

get_nchoosek_combinations(self, exhaustive=False)

Get all possible NChooseK combinations

Parameters:

Name Type Description Default
exhaustive bool

if True all combinations are returned. Defaults to False.

False

Returns:

Type Description
Tuple(used_features_list, unused_features_list)

used_features_list is a list of lists containing features used in each NChooseK combination. unused_features_list is a list of lists containing features unused in each NChooseK combination.

Source code in bofire/data_models/domain/domain.py
def get_nchoosek_combinations(self, exhaustive: bool = False):
    """Get all possible NChooseK combinations

    Args:
        exhaustive (bool, optional): if True all combinations are returned. Defaults to False.

    Returns:
        Tuple(used_features_list, unused_features_list): used_features_list is a list of lists containing features used in each NChooseK combination.
            unused_features_list is a list of lists containing features unused in each NChooseK combination.

    """
    if len(self.constraints.get(NChooseKConstraint)) == 0:
        used_continuous_features = self.inputs.get_keys(ContinuousInput)
        return used_continuous_features, []

    used_features_list_all = []

    # loops through each NChooseK constraint
    for con in self.constraints.get(NChooseKConstraint):
        assert isinstance(con, NChooseKConstraint)
        used_features_list = []

        if exhaustive:
            for n in range(con.min_count, con.max_count + 1):
                used_features_list.extend(itertools.combinations(con.features, n))

            if con.none_also_valid:
                used_features_list.append(())
        else:
            used_features_list.extend(
                itertools.combinations(con.features, con.max_count),
            )

        used_features_list_all.append(used_features_list)

    used_features_list_all = list(
        itertools.product(*used_features_list_all),
    )  # product between NChooseK constraints

    # format into a list of used features
    used_features_list_formatted = []
    for used_features_list in used_features_list_all:
        used_features_list_flattened = [
            item for sublist in used_features_list for item in sublist
        ]
        used_features_list_formatted.append(list(set(used_features_list_flattened)))

    # sort lists
    used_features_list_sorted = []
    for used_features in used_features_list_formatted:
        used_features_list_sorted.append(sorted(used_features))

    # drop duplicates
    used_features_list_no_dup = []
    for used_features in used_features_list_sorted:
        if used_features not in used_features_list_no_dup:
            used_features_list_no_dup.append(used_features)

    # print(f"duplicates dropped: {len(used_features_list_sorted)-len(used_features_list_no_dup)}")

    # remove combinations not fulfilling constraints
    used_features_list_final = []
    for combo in used_features_list_no_dup:
        fulfil_constraints = []  # list of bools tracking if constraints are fulfilled
        for con in self.constraints.get(NChooseKConstraint):
            assert isinstance(con, NChooseKConstraint)
            count = 0  # count of features in combo that are in con.features
            for f in combo:
                if f in con.features:
                    count += 1
            if (
                count >= con.min_count
                and count <= con.max_count
                or count == 0
                and con.none_also_valid
            ):
                fulfil_constraints.append(True)
            else:
                fulfil_constraints.append(False)
        if np.all(fulfil_constraints):
            used_features_list_final.append(combo)

    # print(f"violators dropped: {len(used_features_list_no_dup)-len(used_features_list_final)}")

    # features unused
    features_in_cc = []
    for con in self.constraints.get(NChooseKConstraint):
        assert isinstance(con, NChooseKConstraint)
        features_in_cc.extend(con.features)
    features_in_cc = list(set(features_in_cc))
    features_in_cc.sort()
    unused_features_list = []
    for used_features in used_features_list_final:
        unused_features_list.append(
            [f_key for f_key in features_in_cc if f_key not in used_features],
        )

    # postprocess
    # used_features_list_final2 = []
    # unused_features_list2 = []
    # for used, unused in zip(used_features_list_final,unused_features_list):
    #     if len(used) == 3:
    #         used_features_list_final2.append(used), unused_features_list2.append(unused)

    return used_features_list_final, unused_features_list

validate_candidates(self, candidates, only_inputs=False, tol=1e-05, raise_validation_error=True)

Method to check the validty of proposed candidates

Parameters:

Name Type Description Default
candidates pd.DataFrame

Dataframe with suggested new experiments (candidates)

required
only_inputs bool,optional

If True, only the input columns are validated. Defaults to False.

False
tol float,optional

tolerance parameter for constraints. A constraint is considered as not fulfilled if the violation is larger than tol. Defaults to 1e-6.

1e-05
raise_validation_error bool

If true an error will be raised if candidates violate constraints, otherwise only a warning will be displayed. Defaults to True.

True

Exceptions:

Type Description
ValueError

when a column is missing for a defined input feature

ValueError

when a column is missing for a defined output feature

ValueError

when a non-numerical value is proposed

ValueError

when an additional column is found

ConstraintNotFulfilledError

when the constraints are not fulfilled and raise_validation_error = True

Returns:

Type Description
pd.DataFrame

dataframe with suggested experiments (candidates)

Source code in bofire/data_models/domain/domain.py
def validate_candidates(
    self,
    candidates: pd.DataFrame,
    only_inputs: bool = False,
    tol: float = 1e-5,
    raise_validation_error: bool = True,
) -> pd.DataFrame:
    """Method to check the validty of proposed candidates

    Args:
        candidates (pd.DataFrame): Dataframe with suggested new experiments (candidates)
        only_inputs (bool,optional): If True, only the input columns are validated. Defaults to False.
        tol (float,optional): tolerance parameter for constraints. A constraint is considered as not fulfilled if the violation
            is larger than tol. Defaults to 1e-6.
        raise_validation_error (bool, optional): If true an error will be raised if candidates violate constraints,
            otherwise only a warning will be displayed. Defaults to True.

    Raises:
        ValueError: when a column is missing for a defined input feature
        ValueError: when a column is missing for a defined output feature
        ValueError: when a non-numerical value is proposed
        ValueError: when an additional column is found
        ConstraintNotFulfilledError: when the constraints are not fulfilled and `raise_validation_error = True`

    Returns:
        pd.DataFrame: dataframe with suggested experiments (candidates)

    """
    # check that each input feature has a col and is valid in itself
    assert isinstance(self.inputs, Inputs)
    candidates = self.inputs.validate_candidates(candidates)
    # check if all constraints are fulfilled
    if not self.constraints.is_fulfilled(candidates, tol=tol).all():
        if raise_validation_error:
            raise ConstraintNotFulfilledError(
                f"Constraints not fulfilled: {candidates}",
            )
        warnings.warn("Not all constraints are fulfilled.")
    # for each continuous output feature with an attached objective object
    if not only_inputs:
        assert isinstance(self.outputs, Outputs)
        candidates = self.outputs.validate_candidates(candidates=candidates)
    return candidates

validate_constraints(self)

Validate that the constraints defined in the domain fit to the input features.

Parameters:

Name Type Description Default
v List[Constraint]

List of constraints or empty if no constraints are defined

required
values List[Input]

List of input features of the domain

required

Exceptions:

Type Description
ValueError

Feature key in constraint is unknown.

Returns:

Type Description
List[Constraint]

List of constraints defined for the domain

Source code in bofire/data_models/domain/domain.py
@model_validator(mode="after")
def validate_constraints(self):
    """Validate that the constraints defined in the domain fit to the input features.

    Args:
        v (List[Constraint]): List of constraints or empty if no constraints are defined
        values (List[Input]): List of input features of the domain

    Raises:
        ValueError: Feature key in constraint is unknown.

    Returns:
        List[Constraint]: List of constraints defined for the domain

    """
    for c in self.constraints.get():
        c.validate_inputs(self.inputs)
    return self

validate_experiments(self, experiments, strict=False)

Checks the experimental data on validity

Parameters:

Name Type Description Default
experiments pd.DataFrame

Dataframe with experimental data

required
strict bool

Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not. Defaults to False.

False

Exceptions:

Type Description
ValueError

empty dataframe

ValueError

the column for a specific feature is missing the provided data

ValueError

there are labcodes with null value

ValueError

there are labcodes with nan value

ValueError

labcodes are not unique

ValueError

the provided columns do no match to the defined domain

ValueError

the provided columns do no match to the defined domain

ValueError

Input with null values

ValueError

Input with nan values

Returns:

Type Description
pd.DataFrame

The provided dataframe with experimental data

Source code in bofire/data_models/domain/domain.py
def validate_experiments(
    self,
    experiments: pd.DataFrame,
    strict: bool = False,
) -> pd.DataFrame:
    """Checks the experimental data on validity

    Args:
        experiments (pd.DataFrame): Dataframe with experimental data
        strict (bool, optional): Boolean to distinguish if the occurrence of
            fixed features in the dataset should be considered or not.
            Defaults to False.

    Raises:
        ValueError: empty dataframe
        ValueError: the column for a specific feature is missing the provided data
        ValueError: there are labcodes with null value
        ValueError: there are labcodes with nan value
        ValueError: labcodes are not unique
        ValueError: the provided columns do no match to the defined domain
        ValueError: the provided columns do no match to the defined domain
        ValueError: Input with null values
        ValueError: Input with nan values

    Returns:
        pd.DataFrame: The provided dataframe with experimental data

    """
    if len(experiments) == 0:
        raise ValueError("no experiments provided (empty dataframe)")

    # we allow here for a column named labcode used to identify experiments
    if "labcode" in experiments.columns:
        # test that labcodes are not na
        if experiments.labcode.isnull().to_numpy().any():
            raise ValueError("there are labcodes with null value")
        if experiments.labcode.isna().to_numpy().any():
            raise ValueError("there are labcodes with nan value")
        # test that labcodes are distinct
        if (
            len(set(experiments.labcode.to_numpy().tolist()))
            != experiments.shape[0]
        ):
            raise ValueError("labcodes are not unique")

    # run the individual validators
    experiments = self.inputs.validate_experiments(
        experiments=experiments,
        strict=strict,
    )
    experiments = self.outputs.validate_experiments(experiments=experiments)
    return experiments

validate_unique_feature_keys(self)

Validates if provided input and output feature keys are unique

Parameters:

Name Type Description Default
v Outputs

List of all output features of the domain.

required
value Dict[str, Inputs]

Dict containing a list of input features as single entry.

required

Exceptions:

Type Description
ValueError

Feature keys are not unique.

Returns:

Type Description
Outputs

Keeps output features as given.

Source code in bofire/data_models/domain/domain.py
@model_validator(mode="after")
def validate_unique_feature_keys(self):
    """Validates if provided input and output feature keys are unique

    Args:
        v (Outputs): List of all output features of the domain.
        value (Dict[str, Inputs]): Dict containing a list of input features as single entry.

    Raises:
        ValueError: Feature keys are not unique.

    Returns:
        Outputs: Keeps output features as given.

    """
    keys = self.outputs.get_keys() + self.inputs.get_keys()
    if len(set(keys)) != len(keys):
        raise ValueError("Feature keys are not unique")
    return self