Domain

`categorical`

`CategoricalInput (Input)`

Base class for all categorical input features.

Attributes:

Name	Type	Description
`categories`	`List[str]`	Names of the categories.
`allowed`	`List[bool]`	List of bools indicating if a category is allowed within the optimization.

Source code in bofire/data_models/features/categorical.py

class CategoricalInput(Input):
    """Base class for all categorical input features.

    Attributes:
        categories (List[str]): Names of the categories.
        allowed (List[bool]): List of bools indicating if a category is allowed within the optimization.
    """

    type: Literal["CategoricalInput"] = "CategoricalInput"
    # order_id: ClassVar[int] = 5
    order_id: ClassVar[int] = 7

    categories: CategoryVals
    allowed: Optional[Annotated[List[bool], Field(min_length=2)]] = Field(
        default=None, validate_default=True
    )

    @field_validator("allowed")
    @classmethod
    def generate_allowed(cls, allowed, info):
        """Generates the list of allowed categories if not provided."""
        if allowed is None and "categories" in info.data.keys():
            return [True for _ in range(len(info.data["categories"]))]
        return allowed

    @model_validator(mode="after")
    def validate_categories_fitting_allowed(self):
        if len(self.allowed) != len(self.categories):  # type: ignore
            raise ValueError("allowed must have same length as categories")
        if sum(self.allowed) == 0:  # type: ignore
            raise ValueError("no category is allowed")
        return self

    @staticmethod
    def valid_transform_types() -> List[CategoricalEncodingEnum]:
        return [
            CategoricalEncodingEnum.ONE_HOT,
            CategoricalEncodingEnum.DUMMY,
            CategoricalEncodingEnum.ORDINAL,
        ]

    def is_fixed(self) -> bool:
        """Returns True if there is only one allowed category.

        Returns:
            [bool]: True if there is only one allowed category
        """
        if self.allowed is None:
            return False
        return sum(self.allowed) == 1

    def fixed_value(
        self, transform_type: Optional[TTransform] = None
    ) -> Union[List[str], List[float], None]:
        """Returns the categories to which the feature is fixed, None if the feature is not fixed

        Returns:
            List[str]: List of categories or None
        """
        if self.is_fixed():
            val = self.get_allowed_categories()[0]
            if transform_type is None:
                return [val]
            elif transform_type == CategoricalEncodingEnum.ONE_HOT:
                return self.to_onehot_encoding(pd.Series([val])).values[0].tolist()
            elif transform_type == CategoricalEncodingEnum.DUMMY:
                return self.to_dummy_encoding(pd.Series([val])).values[0].tolist()
            elif transform_type == CategoricalEncodingEnum.ORDINAL:
                return self.to_ordinal_encoding(pd.Series([val])).tolist()
            else:
                raise ValueError(
                    f"Unkwon transform type {transform_type} for categorical input {self.key}"
                )
        else:
            return None

    def get_allowed_categories(self):
        """Returns the allowed categories.

        Returns:
            list of str: The allowed categories
        """
        if self.allowed is None:
            return []
        return [c for c, a in zip(self.categories, self.allowed) if a]

    def validate_experimental(
        self, values: pd.Series, strict: bool = False
    ) -> pd.Series:
        """Method to validate the experimental dataFrame

        Args:
            values (pd.Series): A dataFrame with experiments
            strict (bool, optional): Boolean to distinguish if the occurence of fixed features in the dataset should be considered or not. Defaults to False.

        Raises:
            ValueError: when an entry is not in the list of allowed categories
            ValueError: when there is no variation in a feature provided by the experimental data

        Returns:
            pd.Series: A dataFrame with experiments
        """
        values = values.map(str)
        if sum(values.isin(self.categories)) != len(values):
            raise ValueError(
                f"invalid values for `{self.key}`, allowed are: `{self.categories}`"
            )
        if strict:
            possible_categories = self.get_possible_categories(values)
            if len(possible_categories) != len(self.categories):
                raise ValueError(
                    f"Categories {list(set(self.categories)-set(possible_categories))} of feature {self.key} not used. Remove them."
                )
        return values

    def validate_candidental(self, values: pd.Series) -> pd.Series:
        """Method to validate the suggested candidates

        Args:
            values (pd.Series): A dataFrame with candidates

        Raises:
            ValueError: when not all values for a feature are one of the allowed categories

        Returns:
            pd.Series: The passed dataFrame with candidates
        """
        values = values.map(str)
        if sum(values.isin(self.get_allowed_categories())) != len(values):
            raise ValueError(
                f"not all values of input feature `{self.key}` are a valid allowed category from {self.get_allowed_categories()}"
            )
        return values

    def get_forbidden_categories(self):
        """Returns the non-allowed categories

        Returns:
            List[str]: List of the non-allowed categories
        """
        return list(set(self.categories) - set(self.get_allowed_categories()))

    def get_possible_categories(self, values: pd.Series) -> list:
        """Return the superset of categories that have been used in the experimental dataset and
        that can be used in the optimization

        Args:
            values (pd.Series): Series with the values for this feature

        Returns:
            list: list of possible categories
        """
        return sorted(set(list(set(values.tolist())) + self.get_allowed_categories()))

    def to_onehot_encoding(self, values: pd.Series) -> pd.DataFrame:
        """Converts values to a one-hot encoding.

        Args:
            values (pd.Series): Series to be transformed.

        Returns:
            pd.DataFrame: One-hot transformed data frame.
        """
        return pd.DataFrame(
            {get_encoded_name(self.key, c): values == c for c in self.categories},
            dtype=float,
            index=values.index,
        )

    def from_onehot_encoding(self, values: pd.DataFrame) -> pd.Series:
        """Converts values back from one-hot encoding.

        Args:
            values (pd.DataFrame): One-hot encoded values.

        Raises:
            ValueError: If one-hot columns not present in `values`.

        Returns:
            pd.Series: Series with categorical values.
        """
        cat_cols = [get_encoded_name(self.key, c) for c in self.categories]
        # we allow here explicitly that the dataframe can have more columns than needed to have it
        # easier in the backtransform.
        if np.any([c not in values.columns for c in cat_cols]):
            raise ValueError(
                f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols}."
            )
        s = values[cat_cols].idxmax(1).str[(len(self.key) + 1) :]
        s.name = self.key
        return s

    def to_dummy_encoding(self, values: pd.Series) -> pd.DataFrame:
        """Converts values to a dummy-hot encoding, dropping the first categorical level.

        Args:
            values (pd.Series): Series to be transformed.

        Returns:
            pd.DataFrame: Dummy-hot transformed data frame.
        """
        return pd.DataFrame(
            {get_encoded_name(self.key, c): values == c for c in self.categories[1:]},
            dtype=float,
            index=values.index,
        )

    def from_dummy_encoding(self, values: pd.DataFrame) -> pd.Series:
        """Convert points back from dummy encoding.

        Args:
            values (pd.DataFrame): Dummy-hot encoded values.

        Raises:
            ValueError: If one-hot columns not present in `values`.

        Returns:
            pd.Series: Series with categorical values.
        """
        cat_cols = [get_encoded_name(self.key, c) for c in self.categories]
        # we allow here explicitly that the dataframe can have more columns than needed to have it
        # easier in the backtransform.
        if np.any([c not in values.columns for c in cat_cols[1:]]):
            raise ValueError(
                f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols[1:]}."
            )
        values = values.copy()
        values[cat_cols[0]] = 1 - values[cat_cols[1:]].sum(axis=1)
        s = values[cat_cols].idxmax(1).str[(len(self.key) + 1) :]
        s.name = self.key
        return s

    def to_ordinal_encoding(self, values: pd.Series) -> pd.Series:
        """Converts values to an ordinal integer based encoding.

        Args:
            values (pd.Series): Series to be transformed.

        Returns:
            pd.Series: Ordinal encoded values.
        """
        enc = pd.Series(range(len(self.categories)), index=list(self.categories))
        s = enc[values]
        s.index = values.index
        s.name = self.key
        return s

    def from_ordinal_encoding(self, values: pd.Series) -> pd.Series:
        """Convertes values back from ordinal encoding.

        Args:
            values (pd.Series): Ordinal encoded series.

        Returns:
            pd.Series: Series with categorical values.
        """
        enc = np.array(self.categories)
        return pd.Series(enc[values], index=values.index, name=self.key)

    def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
        """Draw random samples from the feature.

        Args:
            n (int): number of samples.

        Returns:
            pd.Series: drawn samples.
        """
        return pd.Series(
            name=self.key,
            data=np.random.default_rng(seed=seed).choice(
                self.get_allowed_categories(), n
            ),
        )

    def get_bounds(
        self,
        transform_type: TTransform,
        values: Optional[pd.Series] = None,
        reference_value: Optional[str] = None,
    ) -> Tuple[List[float], List[float]]:
        assert isinstance(transform_type, CategoricalEncodingEnum)
        if transform_type == CategoricalEncodingEnum.ORDINAL:
            return [0], [len(self.categories) - 1]
        if transform_type == CategoricalEncodingEnum.ONE_HOT:
            # in the case that values are None, we return the bounds
            # based on the optimization bounds, else we return the true
            # bounds as this is for model fitting.
            if values is None:
                lower = [0.0 for _ in self.categories]
                upper = [
                    1.0 if self.allowed[i] is True else 0.0  # type: ignore
                    for i, _ in enumerate(self.categories)
                ]
            else:
                lower = [0.0 for _ in self.categories]
                upper = [1.0 for _ in self.categories]
            return lower, upper
        if transform_type == CategoricalEncodingEnum.DUMMY:
            lower = [0.0 for _ in range(len(self.categories) - 1)]
            upper = [1.0 for _ in range(len(self.categories) - 1)]
            return lower, upper
        if transform_type == CategoricalEncodingEnum.DESCRIPTOR:
            raise ValueError(
                f"Invalid descriptor transform for categorical {self.key}."
            )
        else:
            raise ValueError(
                f"Invalid transform_type {transform_type} provided for categorical {self.key}."
            )

    def __str__(self) -> str:
        """Returns the number of categories as str

        Returns:
            str: Number of categories
        """
        return f"{len(self.categories)} categories"

`str(self)` `special`

Returns the number of categories as str

Returns:

Type	Description
`str`	Number of categories

Source code in bofire/data_models/features/categorical.py

def __str__(self) -> str:
    """Returns the number of categories as str

    Returns:
        str: Number of categories
    """
    return f"{len(self.categories)} categories"

`fixed_value(self, transform_type=None)`

Returns the categories to which the feature is fixed, None if the feature is not fixed

Returns:

Type	Description
`List[str]`	List of categories or None

Source code in bofire/data_models/features/categorical.py

def fixed_value(
    self, transform_type: Optional[TTransform] = None
) -> Union[List[str], List[float], None]:
    """Returns the categories to which the feature is fixed, None if the feature is not fixed

    Returns:
        List[str]: List of categories or None
    """
    if self.is_fixed():
        val = self.get_allowed_categories()[0]
        if transform_type is None:
            return [val]
        elif transform_type == CategoricalEncodingEnum.ONE_HOT:
            return self.to_onehot_encoding(pd.Series([val])).values[0].tolist()
        elif transform_type == CategoricalEncodingEnum.DUMMY:
            return self.to_dummy_encoding(pd.Series([val])).values[0].tolist()
        elif transform_type == CategoricalEncodingEnum.ORDINAL:
            return self.to_ordinal_encoding(pd.Series([val])).tolist()
        else:
            raise ValueError(
                f"Unkwon transform type {transform_type} for categorical input {self.key}"
            )
    else:
        return None

`from_dummy_encoding(self, values)`

Convert points back from dummy encoding.

Parameters:

Name	Type	Description	Default
`values`	`pd.DataFrame`	Dummy-hot encoded values.	required

Exceptions:

Type	Description
`ValueError`	If one-hot columns not present in `values`.

Returns:

Type	Description
`pd.Series`	Series with categorical values.

Source code in bofire/data_models/features/categorical.py

def from_dummy_encoding(self, values: pd.DataFrame) -> pd.Series:
    """Convert points back from dummy encoding.

    Args:
        values (pd.DataFrame): Dummy-hot encoded values.

    Raises:
        ValueError: If one-hot columns not present in `values`.

    Returns:
        pd.Series: Series with categorical values.
    """
    cat_cols = [get_encoded_name(self.key, c) for c in self.categories]
    # we allow here explicitly that the dataframe can have more columns than needed to have it
    # easier in the backtransform.
    if np.any([c not in values.columns for c in cat_cols[1:]]):
        raise ValueError(
            f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols[1:]}."
        )
    values = values.copy()
    values[cat_cols[0]] = 1 - values[cat_cols[1:]].sum(axis=1)
    s = values[cat_cols].idxmax(1).str[(len(self.key) + 1) :]
    s.name = self.key
    return s

`from_onehot_encoding(self, values)`

Converts values back from one-hot encoding.

Parameters:

Name	Type	Description	Default
`values`	`pd.DataFrame`	One-hot encoded values.	required

Exceptions:

Type	Description
`ValueError`	If one-hot columns not present in `values`.

Returns:

Type	Description
`pd.Series`	Series with categorical values.

Source code in bofire/data_models/features/categorical.py

def from_onehot_encoding(self, values: pd.DataFrame) -> pd.Series:
    """Converts values back from one-hot encoding.

    Args:
        values (pd.DataFrame): One-hot encoded values.

    Raises:
        ValueError: If one-hot columns not present in `values`.

    Returns:
        pd.Series: Series with categorical values.
    """
    cat_cols = [get_encoded_name(self.key, c) for c in self.categories]
    # we allow here explicitly that the dataframe can have more columns than needed to have it
    # easier in the backtransform.
    if np.any([c not in values.columns for c in cat_cols]):
        raise ValueError(
            f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols}."
        )
    s = values[cat_cols].idxmax(1).str[(len(self.key) + 1) :]
    s.name = self.key
    return s

`from_ordinal_encoding(self, values)`

Convertes values back from ordinal encoding.

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	Ordinal encoded series.	required

Returns:

Type	Description
`pd.Series`	Series with categorical values.

Source code in bofire/data_models/features/categorical.py

def from_ordinal_encoding(self, values: pd.Series) -> pd.Series:
    """Convertes values back from ordinal encoding.

    Args:
        values (pd.Series): Ordinal encoded series.

    Returns:
        pd.Series: Series with categorical values.
    """
    enc = np.array(self.categories)
    return pd.Series(enc[values], index=values.index, name=self.key)

`generate_allowed(allowed, info)` `classmethod`

Generates the list of allowed categories if not provided.

Source code in bofire/data_models/features/categorical.py

@field_validator("allowed")
@classmethod
def generate_allowed(cls, allowed, info):
    """Generates the list of allowed categories if not provided."""
    if allowed is None and "categories" in info.data.keys():
        return [True for _ in range(len(info.data["categories"]))]
    return allowed

`get_allowed_categories(self)`

Returns the allowed categories.

Returns:

Type	Description
`list of str`	The allowed categories

Source code in bofire/data_models/features/categorical.py

def get_allowed_categories(self):
    """Returns the allowed categories.

    Returns:
        list of str: The allowed categories
    """
    if self.allowed is None:
        return []
    return [c for c, a in zip(self.categories, self.allowed) if a]

`get_bounds(self, transform_type, values=None, reference_value=None)`

Returns the bounds of an input feature depending on the requested transform type.

Parameters:

Name	Type	Description	Default
`transform_type`	`Optional[TTransform]`	The requested transform type. Defaults to None.	required
`values`	`Optional[pd.Series]`	If values are provided the bounds are returned taking the most extreme values for the feature into account. Defaults to None.	`None`
`reference_value`	`Optional[float]`	If a reference value is provided, then the local bounds based on a local search region are provided. Currently only supported for continuous inputs. For more details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf.	`None`

Returns:

Type	Description
`Tuple[List[float], List[float]]`	List of lower bound values, list of upper bound values.

Source code in bofire/data_models/features/categorical.py

def get_bounds(
    self,
    transform_type: TTransform,
    values: Optional[pd.Series] = None,
    reference_value: Optional[str] = None,
) -> Tuple[List[float], List[float]]:
    assert isinstance(transform_type, CategoricalEncodingEnum)
    if transform_type == CategoricalEncodingEnum.ORDINAL:
        return [0], [len(self.categories) - 1]
    if transform_type == CategoricalEncodingEnum.ONE_HOT:
        # in the case that values are None, we return the bounds
        # based on the optimization bounds, else we return the true
        # bounds as this is for model fitting.
        if values is None:
            lower = [0.0 for _ in self.categories]
            upper = [
                1.0 if self.allowed[i] is True else 0.0  # type: ignore
                for i, _ in enumerate(self.categories)
            ]
        else:
            lower = [0.0 for _ in self.categories]
            upper = [1.0 for _ in self.categories]
        return lower, upper
    if transform_type == CategoricalEncodingEnum.DUMMY:
        lower = [0.0 for _ in range(len(self.categories) - 1)]
        upper = [1.0 for _ in range(len(self.categories) - 1)]
        return lower, upper
    if transform_type == CategoricalEncodingEnum.DESCRIPTOR:
        raise ValueError(
            f"Invalid descriptor transform for categorical {self.key}."
        )
    else:
        raise ValueError(
            f"Invalid transform_type {transform_type} provided for categorical {self.key}."
        )

`get_forbidden_categories(self)`

Returns the non-allowed categories

Returns:

Type	Description
`List[str]`	List of the non-allowed categories

Source code in bofire/data_models/features/categorical.py

def get_forbidden_categories(self):
    """Returns the non-allowed categories

    Returns:
        List[str]: List of the non-allowed categories
    """
    return list(set(self.categories) - set(self.get_allowed_categories()))

`get_possible_categories(self, values)`

Return the superset of categories that have been used in the experimental dataset and that can be used in the optimization

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	Series with the values for this feature	required

Returns:

Type	Description
`list`	list of possible categories

Source code in bofire/data_models/features/categorical.py

def get_possible_categories(self, values: pd.Series) -> list:
    """Return the superset of categories that have been used in the experimental dataset and
    that can be used in the optimization

    Args:
        values (pd.Series): Series with the values for this feature

    Returns:
        list: list of possible categories
    """
    return sorted(set(list(set(values.tolist())) + self.get_allowed_categories()))

`is_fixed(self)`

Returns True if there is only one allowed category.

Returns:

Type	Description
`[bool]`	True if there is only one allowed category

Source code in bofire/data_models/features/categorical.py

def is_fixed(self) -> bool:
    """Returns True if there is only one allowed category.

    Returns:
        [bool]: True if there is only one allowed category
    """
    if self.allowed is None:
        return False
    return sum(self.allowed) == 1

`sample(self, n, seed=None)`

Draw random samples from the feature.

Parameters:

Name	Type	Description	Default
`n`	`int`	number of samples.	required

Returns:

Type	Description
`pd.Series`	drawn samples.

Source code in bofire/data_models/features/categorical.py

def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
    """Draw random samples from the feature.

    Args:
        n (int): number of samples.

    Returns:
        pd.Series: drawn samples.
    """
    return pd.Series(
        name=self.key,
        data=np.random.default_rng(seed=seed).choice(
            self.get_allowed_categories(), n
        ),
    )

`to_dummy_encoding(self, values)`

Converts values to a dummy-hot encoding, dropping the first categorical level.

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	Series to be transformed.	required

Returns:

Type	Description
`pd.DataFrame`	Dummy-hot transformed data frame.

Source code in bofire/data_models/features/categorical.py

def to_dummy_encoding(self, values: pd.Series) -> pd.DataFrame:
    """Converts values to a dummy-hot encoding, dropping the first categorical level.

    Args:
        values (pd.Series): Series to be transformed.

    Returns:
        pd.DataFrame: Dummy-hot transformed data frame.
    """
    return pd.DataFrame(
        {get_encoded_name(self.key, c): values == c for c in self.categories[1:]},
        dtype=float,
        index=values.index,
    )

`to_onehot_encoding(self, values)`

Converts values to a one-hot encoding.

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	Series to be transformed.	required

Returns:

Type	Description
`pd.DataFrame`	One-hot transformed data frame.

Source code in bofire/data_models/features/categorical.py

def to_onehot_encoding(self, values: pd.Series) -> pd.DataFrame:
    """Converts values to a one-hot encoding.

    Args:
        values (pd.Series): Series to be transformed.

    Returns:
        pd.DataFrame: One-hot transformed data frame.
    """
    return pd.DataFrame(
        {get_encoded_name(self.key, c): values == c for c in self.categories},
        dtype=float,
        index=values.index,
    )

`to_ordinal_encoding(self, values)`

Converts values to an ordinal integer based encoding.

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	Series to be transformed.	required

Returns:

Type	Description
`pd.Series`	Ordinal encoded values.

Source code in bofire/data_models/features/categorical.py

def to_ordinal_encoding(self, values: pd.Series) -> pd.Series:
    """Converts values to an ordinal integer based encoding.

    Args:
        values (pd.Series): Series to be transformed.

    Returns:
        pd.Series: Ordinal encoded values.
    """
    enc = pd.Series(range(len(self.categories)), index=list(self.categories))
    s = enc[values]
    s.index = values.index
    s.name = self.key
    return s

`validate_candidental(self, values)`

Method to validate the suggested candidates

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	A dataFrame with candidates	required

Exceptions:

Type	Description
`ValueError`	when not all values for a feature are one of the allowed categories

Returns:

Type	Description
`pd.Series`	The passed dataFrame with candidates

Source code in bofire/data_models/features/categorical.py

def validate_candidental(self, values: pd.Series) -> pd.Series:
    """Method to validate the suggested candidates

    Args:
        values (pd.Series): A dataFrame with candidates

    Raises:
        ValueError: when not all values for a feature are one of the allowed categories

    Returns:
        pd.Series: The passed dataFrame with candidates
    """
    values = values.map(str)
    if sum(values.isin(self.get_allowed_categories())) != len(values):
        raise ValueError(
            f"not all values of input feature `{self.key}` are a valid allowed category from {self.get_allowed_categories()}"
        )
    return values

`validate_experimental(self, values, strict=False)`

Method to validate the experimental dataFrame

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	A dataFrame with experiments	required
`strict`	`bool`	Boolean to distinguish if the occurence of fixed features in the dataset should be considered or not. Defaults to False.	`False`

Exceptions:

Type	Description
`ValueError`	when an entry is not in the list of allowed categories
`ValueError`	when there is no variation in a feature provided by the experimental data

Returns:

Type	Description
`pd.Series`	A dataFrame with experiments

Source code in bofire/data_models/features/categorical.py

def validate_experimental(
    self, values: pd.Series, strict: bool = False
) -> pd.Series:
    """Method to validate the experimental dataFrame

    Args:
        values (pd.Series): A dataFrame with experiments
        strict (bool, optional): Boolean to distinguish if the occurence of fixed features in the dataset should be considered or not. Defaults to False.

    Raises:
        ValueError: when an entry is not in the list of allowed categories
        ValueError: when there is no variation in a feature provided by the experimental data

    Returns:
        pd.Series: A dataFrame with experiments
    """
    values = values.map(str)
    if sum(values.isin(self.categories)) != len(values):
        raise ValueError(
            f"invalid values for `{self.key}`, allowed are: `{self.categories}`"
        )
    if strict:
        possible_categories = self.get_possible_categories(values)
        if len(possible_categories) != len(self.categories):
            raise ValueError(
                f"Categories {list(set(self.categories)-set(possible_categories))} of feature {self.key} not used. Remove them."
            )
    return values

`CategoricalOutput (Output)`

Source code in bofire/data_models/features/categorical.py

class CategoricalOutput(Output):
    type: Literal["CategoricalOutput"] = "CategoricalOutput"
    order_id: ClassVar[int] = 10

    categories: CategoryVals
    objective: AnyCategoricalObjective

    @model_validator(mode="after")
    def validate_objective_categories(self):
        """validates that objective categories match the output categories

        Raises:
            ValueError: when categories do not match objective categories

        Returns:
            self
        """
        if self.objective.categories != self.categories:  # type: ignore
            raise ValueError("categories must match to objective categories")
        return self

    def __call__(self, values: pd.Series) -> pd.Series:
        if self.objective is None:
            return pd.Series(
                data=[np.nan for _ in range(len(values))],
                index=values.index,
                name=values.name,
            )
        return self.objective(values)  # type: ignore

    def validate_experimental(self, values: pd.Series) -> pd.Series:
        values = values.map(str)
        if sum(values.isin(self.categories)) != len(values):
            raise ValueError(
                f"invalid values for `{self.key}`, allowed are: `{self.categories}`"
            )
        return values

    def __str__(self) -> str:
        return "CategoricalOutputFeature"

`validate_experimental(self, values)`

Abstract method to validate the experimental Series

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	A dataFrame with values for the outcome	required

Returns:

Type	Description
`pd.Series`	The passed dataFrame with experiments

Source code in bofire/data_models/features/categorical.py

def validate_experimental(self, values: pd.Series) -> pd.Series:
    values = values.map(str)
    if sum(values.isin(self.categories)) != len(values):
        raise ValueError(
            f"invalid values for `{self.key}`, allowed are: `{self.categories}`"
        )
    return values

`validate_objective_categories(self)`

validates that objective categories match the output categories

Exceptions:

Type	Description
`ValueError`	when categories do not match objective categories

Returns:

Type	Description
	self

Source code in bofire/data_models/features/categorical.py

@model_validator(mode="after")
def validate_objective_categories(self):
    """validates that objective categories match the output categories

    Raises:
        ValueError: when categories do not match objective categories

    Returns:
        self
    """
    if self.objective.categories != self.categories:  # type: ignore
        raise ValueError("categories must match to objective categories")
    return self

`continuous`

`ContinuousInput (NumericalInput)`

Base class for all continuous input features.

Attributes:

Name	Type	Description
`bounds`	`Tuple[float, float]`	A tuple that stores the lower and upper bound of the feature.
`stepsize`	`float`	Float indicating the allowed stepsize between lower and upper. Defaults to None.
`local_relative_bounds`	`Tuple[float, float]`	A tuple that stores the lower and upper bounds relative to a reference value. Defaults to None.

Source code in bofire/data_models/features/continuous.py

class ContinuousInput(NumericalInput):
    """Base class for all continuous input features.

    Attributes:
        bounds (Tuple[float, float]): A tuple that stores the lower and upper bound of the feature.
        stepsize (float, optional): Float indicating the allowed stepsize between lower and upper. Defaults to None.
        local_relative_bounds (Tuple[float, float], optional): A tuple that stores the lower and upper bounds relative to a reference value.
            Defaults to None.
    """

    type: Literal["ContinuousInput"] = "ContinuousInput"
    order_id: ClassVar[int] = 1

    bounds: Tuple[float, float]
    local_relative_bounds: Optional[
        Tuple[Annotated[float, Field(gt=0)], Annotated[float, Field(gt=0)]]
    ] = None
    stepsize: Optional[float] = None

    @property
    def lower_bound(self) -> float:
        return self.bounds[0]

    @property
    def upper_bound(self) -> float:
        return self.bounds[1]

    @model_validator(mode="after")
    def validate_step_size(self):
        if self.stepsize is None:
            return self
        lower, upper = self.bounds
        if lower == upper and self.stepsize is not None:
            raise ValueError(
                "Stepsize cannot be provided for a fixed continuous input."
            )
        range = upper - lower
        if np.arange(lower, upper + self.stepsize, self.stepsize)[-1] != upper:
            raise ValueError(
                f"Stepsize of {self.stepsize} does not match the provided interval [{lower},{upper}]."
            )
        if range // self.stepsize == 1:
            raise ValueError("Stepsize is too big, only one value allowed.")
        return self

    def round(self, values: pd.Series) -> pd.Series:
        """Round values to the stepsize of the feature. If no stepsize is provided return the
        provided values.

        Args:
            values (pd.Series): The values that should be rounded.

        Returns:
            pd.Series: The rounded values
        """
        if self.stepsize is None:
            return values
        self.validate_candidental(values=values)
        allowed_values = np.arange(
            self.lower_bound, self.upper_bound + self.stepsize, self.stepsize
        )
        idx = abs(values.values.reshape([len(values), 1]) - allowed_values).argmin(  # type: ignore
            axis=1
        )
        return pd.Series(
            data=self.lower_bound + idx * self.stepsize, index=values.index
        )

    @field_validator("bounds")
    @classmethod
    def validate_lower_upper(cls, bounds):
        """Validates that the lower bound is lower than the upper bound

        Args:
            values (Dict): Dictionary with attributes key, lower and upper bound

        Raises:
            ValueError: when the lower bound is higher than the upper bound

        Returns:
            Dict: The attributes as dictionary
        """
        if bounds[0] > bounds[1]:
            raise ValueError(
                f"lower bound must be <= upper bound, got {bounds[0]} > {bounds[1]}"
            )
        return bounds

    def validate_candidental(self, values: pd.Series) -> pd.Series:
        """Method to validate the suggested candidates

        Args:
            values (pd.Series): A dataFrame with candidates

        Raises:
            ValueError: when non numerical values are passed
            ValueError: when values are larger than the upper bound of the feature
            ValueError: when values are lower than the lower bound of the feature

        Returns:
            pd.Series: The passed dataFrame with candidates
        """

        noise = 10e-6
        values = super().validate_candidental(values)
        if (values < self.lower_bound - noise).any():
            raise ValueError(
                f"not all values of input feature `{self.key}`are larger than lower bound `{self.lower_bound}` "
            )
        if (values > self.upper_bound + noise).any():
            raise ValueError(
                f"not all values of input feature `{self.key}`are smaller than upper bound `{self.upper_bound}` "
            )
        return values

    def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
        """Draw random samples from the feature.

        Args:
            n (int): number of samples.

        Returns:
            pd.Series: drawn samples.
        """
        return pd.Series(
            name=self.key,
            data=np.random.default_rng(seed=seed).uniform(
                self.lower_bound, self.upper_bound, n
            ),
        )

    def get_bounds(
        self,
        transform_type: Optional[TTransform] = None,
        values: Optional[pd.Series] = None,
        reference_value: Optional[float] = None,
    ) -> Tuple[List[float], List[float]]:
        assert transform_type is None
        if reference_value is not None and values is not None:
            raise ValueError("Only one can be used, `local_value` or `values`.")
        if values is None:
            if reference_value is None or self.is_fixed():
                return [self.lower_bound], [self.upper_bound]
            else:
                local_relative_bounds = self.local_relative_bounds or (
                    math.inf,
                    math.inf,
                )
                return [
                    max(
                        reference_value - local_relative_bounds[0],
                        self.lower_bound,
                    )
                ], [
                    min(
                        reference_value + local_relative_bounds[1],
                        self.upper_bound,
                    )
                ]
        lower = min(self.lower_bound, values.min())  # type: ignore
        upper = max(self.upper_bound, values.max())  # type: ignore
        return [lower], [upper]

    def __str__(self) -> str:
        """Method to return a string of lower and upper bound

        Returns:
            str: String of a list with lower and upper bound
        """
        return f"[{self.lower_bound},{self.upper_bound}]"

`str(self)` `special`

Method to return a string of lower and upper bound

Returns:

Type	Description
`str`	String of a list with lower and upper bound

Source code in bofire/data_models/features/continuous.py

def __str__(self) -> str:
    """Method to return a string of lower and upper bound

    Returns:
        str: String of a list with lower and upper bound
    """
    return f"[{self.lower_bound},{self.upper_bound}]"

`get_bounds(self, transform_type=None, values=None, reference_value=None)`

Returns the bounds of an input feature depending on the requested transform type.

Parameters:

Name	Type	Description	Default
`transform_type`	`Optional[TTransform]`	The requested transform type. Defaults to None.	`None`
`values`	`Optional[pd.Series]`	If values are provided the bounds are returned taking the most extreme values for the feature into account. Defaults to None.	`None`
`reference_value`	`Optional[float]`	If a reference value is provided, then the local bounds based on a local search region are provided. Currently only supported for continuous inputs. For more details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf.	`None`

Returns:

Type	Description
`Tuple[List[float], List[float]]`	List of lower bound values, list of upper bound values.

Source code in bofire/data_models/features/continuous.py

def get_bounds(
    self,
    transform_type: Optional[TTransform] = None,
    values: Optional[pd.Series] = None,
    reference_value: Optional[float] = None,
) -> Tuple[List[float], List[float]]:
    assert transform_type is None
    if reference_value is not None and values is not None:
        raise ValueError("Only one can be used, `local_value` or `values`.")
    if values is None:
        if reference_value is None or self.is_fixed():
            return [self.lower_bound], [self.upper_bound]
        else:
            local_relative_bounds = self.local_relative_bounds or (
                math.inf,
                math.inf,
            )
            return [
                max(
                    reference_value - local_relative_bounds[0],
                    self.lower_bound,
                )
            ], [
                min(
                    reference_value + local_relative_bounds[1],
                    self.upper_bound,
                )
            ]
    lower = min(self.lower_bound, values.min())  # type: ignore
    upper = max(self.upper_bound, values.max())  # type: ignore
    return [lower], [upper]

`round(self, values)`

Round values to the stepsize of the feature. If no stepsize is provided return the provided values.

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	The values that should be rounded.	required

Returns:

Type	Description
`pd.Series`	The rounded values

Source code in bofire/data_models/features/continuous.py

def round(self, values: pd.Series) -> pd.Series:
    """Round values to the stepsize of the feature. If no stepsize is provided return the
    provided values.

    Args:
        values (pd.Series): The values that should be rounded.

    Returns:
        pd.Series: The rounded values
    """
    if self.stepsize is None:
        return values
    self.validate_candidental(values=values)
    allowed_values = np.arange(
        self.lower_bound, self.upper_bound + self.stepsize, self.stepsize
    )
    idx = abs(values.values.reshape([len(values), 1]) - allowed_values).argmin(  # type: ignore
        axis=1
    )
    return pd.Series(
        data=self.lower_bound + idx * self.stepsize, index=values.index
    )

`sample(self, n, seed=None)`

Draw random samples from the feature.

Parameters:

Name	Type	Description	Default
`n`	`int`	number of samples.	required

Returns:

Type	Description
`pd.Series`	drawn samples.

Source code in bofire/data_models/features/continuous.py

def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
    """Draw random samples from the feature.

    Args:
        n (int): number of samples.

    Returns:
        pd.Series: drawn samples.
    """
    return pd.Series(
        name=self.key,
        data=np.random.default_rng(seed=seed).uniform(
            self.lower_bound, self.upper_bound, n
        ),
    )

`validate_candidental(self, values)`

Method to validate the suggested candidates

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	A dataFrame with candidates	required

Exceptions:

Type	Description
`ValueError`	when non numerical values are passed
`ValueError`	when values are larger than the upper bound of the feature
`ValueError`	when values are lower than the lower bound of the feature

Returns:

Type	Description
`pd.Series`	The passed dataFrame with candidates

Source code in bofire/data_models/features/continuous.py

def validate_candidental(self, values: pd.Series) -> pd.Series:
    """Method to validate the suggested candidates

    Args:
        values (pd.Series): A dataFrame with candidates

    Raises:
        ValueError: when non numerical values are passed
        ValueError: when values are larger than the upper bound of the feature
        ValueError: when values are lower than the lower bound of the feature

    Returns:
        pd.Series: The passed dataFrame with candidates
    """

    noise = 10e-6
    values = super().validate_candidental(values)
    if (values < self.lower_bound - noise).any():
        raise ValueError(
            f"not all values of input feature `{self.key}`are larger than lower bound `{self.lower_bound}` "
        )
    if (values > self.upper_bound + noise).any():
        raise ValueError(
            f"not all values of input feature `{self.key}`are smaller than upper bound `{self.upper_bound}` "
        )
    return values

`validate_lower_upper(bounds)` `classmethod`

Validates that the lower bound is lower than the upper bound

Parameters:

Name	Type	Description	Default
`values`	`Dict`	Dictionary with attributes key, lower and upper bound	required

Exceptions:

Type	Description
`ValueError`	when the lower bound is higher than the upper bound

Returns:

Type	Description
`Dict`	The attributes as dictionary

Source code in bofire/data_models/features/continuous.py

@field_validator("bounds")
@classmethod
def validate_lower_upper(cls, bounds):
    """Validates that the lower bound is lower than the upper bound

    Args:
        values (Dict): Dictionary with attributes key, lower and upper bound

    Raises:
        ValueError: when the lower bound is higher than the upper bound

    Returns:
        Dict: The attributes as dictionary
    """
    if bounds[0] > bounds[1]:
        raise ValueError(
            f"lower bound must be <= upper bound, got {bounds[0]} > {bounds[1]}"
        )
    return bounds

`ContinuousOutput (Output)`

The base class for a continuous output feature

Attributes:

Name	Type	Description
`objective`	`objective`	objective of the feature indicating in which direction it should be optimzed. Defaults to `MaximizeObjective`.

Source code in bofire/data_models/features/continuous.py

class ContinuousOutput(Output):
    """The base class for a continuous output feature

    Attributes:
        objective (objective, optional): objective of the feature indicating in which direction it should be optimzed. Defaults to `MaximizeObjective`.
    """

    type: Literal["ContinuousOutput"] = "ContinuousOutput"
    order_id: ClassVar[int] = 9
    unit: Optional[str] = None

    objective: Optional[AnyObjective] = Field(
        default_factory=lambda: MaximizeObjective(w=1.0)
    )

    def __call__(self, values: pd.Series) -> pd.Series:
        if self.objective is None:
            return pd.Series(
                data=[np.nan for _ in range(len(values))],
                index=values.index,
                name=values.name,
            )
        return self.objective(values)  # type: ignore

    def validate_experimental(self, values: pd.Series) -> pd.Series:
        try:
            values = pd.to_numeric(values, errors="raise").astype("float64")
        except ValueError:
            raise ValueError(
                f"not all values of input feature `{self.key}` are numerical"
            )
        return values

    def __str__(self) -> str:
        return "ContinuousOutputFeature"

`validate_experimental(self, values)`

Abstract method to validate the experimental Series

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	A dataFrame with values for the outcome	required

Returns:

Type	Description
`pd.Series`	The passed dataFrame with experiments

Source code in bofire/data_models/features/continuous.py

def validate_experimental(self, values: pd.Series) -> pd.Series:
    try:
        values = pd.to_numeric(values, errors="raise").astype("float64")
    except ValueError:
        raise ValueError(
            f"not all values of input feature `{self.key}` are numerical"
        )
    return values

`descriptor`

`CategoricalDescriptorInput (CategoricalInput)`

Class for categorical input features with descriptors

Attributes:

Name	Type	Description
`categories`	`List[str]`	Names of the categories.
`allowed`	`List[bool]`	List of bools indicating if a category is allowed within the optimization.
`descriptors`	`List[str]`	List of strings representing the names of the descriptors.
`values`	`List[List[float]]`	List of lists representing the descriptor values.

Source code in bofire/data_models/features/descriptor.py

class CategoricalDescriptorInput(CategoricalInput):
    """Class for categorical input features with descriptors

    Attributes:
        categories (List[str]): Names of the categories.
        allowed (List[bool]): List of bools indicating if a category is allowed within the optimization.
        descriptors (List[str]): List of strings representing the names of the descriptors.
        values (List[List[float]]): List of lists representing the descriptor values.
    """

    type: Literal["CategoricalDescriptorInput"] = "CategoricalDescriptorInput"
    order_id: ClassVar[int] = 6

    descriptors: Descriptors
    values: Annotated[
        List[List[float]],
        Field(min_length=1),
    ]

    @field_validator("values")
    @classmethod
    def validate_values(cls, v, info):
        """validates the compatability of passed values for the descriptors and the defined categories

        Args:
            v (List[List[float]]): Nested list with descriptor values
            values (Dict): Dictionary with attributes

        Raises:
            ValueError: when values have different length than categories
            ValueError: when rows in values have different length than descriptors
            ValueError: when a descriptor shows no variance in the data

        Returns:
            List[List[float]]: Nested list with descriptor values
        """
        if len(v) != len(info.data["categories"]):
            raise ValueError("values must have same length as categories")
        for row in v:
            if len(row) != len(info.data["descriptors"]):
                raise ValueError("rows in values must have same length as descriptors")
        a = np.array(v)
        for i, d in enumerate(info.data["descriptors"]):
            if len(set(a[:, i])) == 1:
                raise ValueError(f"No variation for descriptor {d}.")
        return v

    @staticmethod
    def valid_transform_types() -> List[CategoricalEncodingEnum]:
        return [
            CategoricalEncodingEnum.ONE_HOT,
            CategoricalEncodingEnum.DUMMY,
            CategoricalEncodingEnum.ORDINAL,
            CategoricalEncodingEnum.DESCRIPTOR,
        ]

    def to_df(self):
        """tabular overview of the feature as DataFrame

        Returns:
            pd.DataFrame: tabular overview of the feature as DataFrame
        """
        data = dict(zip(self.categories, self.values))
        return pd.DataFrame.from_dict(data, orient="index", columns=self.descriptors)

    def fixed_value(
        self, transform_type: Optional[TTransform] = None
    ) -> Union[List[str], List[float], None]:
        """Returns the categories to which the feature is fixed, None if the feature is not fixed

        Returns:
            List[str]: List of categories or None
        """
        if transform_type != CategoricalEncodingEnum.DESCRIPTOR:
            return super().fixed_value(transform_type)
        else:
            val = self.get_allowed_categories()[0]
            return self.to_descriptor_encoding(pd.Series([val])).values[0].tolist()

    def get_bounds(
        self,
        transform_type: TTransform,
        values: Optional[pd.Series] = None,
        reference_value: Optional[str] = None,
    ) -> Tuple[List[float], List[float]]:
        if transform_type != CategoricalEncodingEnum.DESCRIPTOR:
            return super().get_bounds(transform_type, values)
        else:
            # in case that values is None, we return the optimization bounds
            # else we return the complete bounds
            if values is None:
                df = self.to_df().loc[self.get_allowed_categories()]
            else:
                df = self.to_df()
            lower = df.min().values.tolist()  # type: ignore
            upper = df.max().values.tolist()  # type: ignore
            return lower, upper

    def validate_experimental(
        self, values: pd.Series, strict: bool = False
    ) -> pd.Series:
        """Method to validate the experimental dataFrame

        Args:
            values (pd.Series): A dataFrame with experiments
            strict (bool, optional): Boolean to distinguish if the occurence of fixed features in the dataset should be considered or not. Defaults to False.

        Raises:
            ValueError: when an entry is not in the list of allowed categories
            ValueError: when there is no variation in a feature provided by the experimental data
            ValueError: when no variation is present or planed for a given descriptor

        Returns:
            pd.Series: A dataFrame with experiments
        """
        values = super().validate_experimental(values, strict)
        if strict:
            lower, upper = self.get_bounds(
                transform_type=CategoricalEncodingEnum.DESCRIPTOR, values=values
            )
            for i, desc in enumerate(self.descriptors):
                if lower[i] == upper[i]:
                    raise ValueError(
                        f"No variation present or planned for descriptor {desc} for feature {self.key}. Remove the descriptor."
                    )
        return values

    @classmethod
    def from_df(cls, key: str, df: pd.DataFrame):
        """Creates a feature from a dataframe

        Args:
            key (str): The name of the feature
            df (pd.DataFrame): Categories as rows and descriptors as columns

        Returns:
            _type_: _description_
        """
        return cls(
            key=key,
            categories=list(df.index),
            allowed=[True for _ in range(len(df))],
            descriptors=list(df.columns),
            values=df.values.tolist(),
        )

    def to_descriptor_encoding(self, values: pd.Series) -> pd.DataFrame:
        """Converts values to descriptor encoding.

        Args:
            values (pd.Series): Values to transform.

        Returns:
            pd.DataFrame: Descriptor encoded dataframe.
        """
        return pd.DataFrame(
            data=values.map(dict(zip(self.categories, self.values))).values.tolist(),  # type: ignore
            columns=[get_encoded_name(self.key, d) for d in self.descriptors],
            index=values.index,
        )

    def from_descriptor_encoding(self, values: pd.DataFrame) -> pd.Series:
        """Converts values back from descriptor encoding.

        Args:
            values (pd.DataFrame): Descriptor encoded dataframe.

        Raises:
            ValueError: If descriptor columns not found in the dataframe.

        Returns:
            pd.Series: Series with categorical values.
        """
        cat_cols = [get_encoded_name(self.key, d) for d in self.descriptors]
        # we allow here explicitly that the dataframe can have more columns than needed to have it
        # easier in the backtransform.
        if np.any([c not in values.columns for c in cat_cols]):
            raise ValueError(
                f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols}."
            )
        s = pd.DataFrame(
            data=np.sqrt(
                np.sum(
                    (
                        values[cat_cols].to_numpy()[:, np.newaxis, :]
                        - self.to_df().iloc[self.allowed].to_numpy()
                    )
                    ** 2,
                    axis=2,
                )
            ),
            columns=self.get_allowed_categories(),
            index=values.index,
        ).idxmin(1)
        s.name = self.key
        return s

`fixed_value(self, transform_type=None)`

Returns the categories to which the feature is fixed, None if the feature is not fixed

Returns:

Type	Description
`List[str]`	List of categories or None

Source code in bofire/data_models/features/descriptor.py

def fixed_value(
    self, transform_type: Optional[TTransform] = None
) -> Union[List[str], List[float], None]:
    """Returns the categories to which the feature is fixed, None if the feature is not fixed

    Returns:
        List[str]: List of categories or None
    """
    if transform_type != CategoricalEncodingEnum.DESCRIPTOR:
        return super().fixed_value(transform_type)
    else:
        val = self.get_allowed_categories()[0]
        return self.to_descriptor_encoding(pd.Series([val])).values[0].tolist()

`from_descriptor_encoding(self, values)`

Converts values back from descriptor encoding.

Parameters:

Name	Type	Description	Default
`values`	`pd.DataFrame`	Descriptor encoded dataframe.	required

Exceptions:

Type	Description
`ValueError`	If descriptor columns not found in the dataframe.

Returns:

Type	Description
`pd.Series`	Series with categorical values.

Source code in bofire/data_models/features/descriptor.py

def from_descriptor_encoding(self, values: pd.DataFrame) -> pd.Series:
    """Converts values back from descriptor encoding.

    Args:
        values (pd.DataFrame): Descriptor encoded dataframe.

    Raises:
        ValueError: If descriptor columns not found in the dataframe.

    Returns:
        pd.Series: Series with categorical values.
    """
    cat_cols = [get_encoded_name(self.key, d) for d in self.descriptors]
    # we allow here explicitly that the dataframe can have more columns than needed to have it
    # easier in the backtransform.
    if np.any([c not in values.columns for c in cat_cols]):
        raise ValueError(
            f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols}."
        )
    s = pd.DataFrame(
        data=np.sqrt(
            np.sum(
                (
                    values[cat_cols].to_numpy()[:, np.newaxis, :]
                    - self.to_df().iloc[self.allowed].to_numpy()
                )
                ** 2,
                axis=2,
            )
        ),
        columns=self.get_allowed_categories(),
        index=values.index,
    ).idxmin(1)
    s.name = self.key
    return s

`from_df(key, df)` `classmethod`

Creates a feature from a dataframe

Parameters:

Name	Type	Description	Default
`key`	`str`	The name of the feature	required
`df`	`pd.DataFrame`	Categories as rows and descriptors as columns	required

Returns:

Type	Description
`_type_`	description

Source code in bofire/data_models/features/descriptor.py

@classmethod
def from_df(cls, key: str, df: pd.DataFrame):
    """Creates a feature from a dataframe

    Args:
        key (str): The name of the feature
        df (pd.DataFrame): Categories as rows and descriptors as columns

    Returns:
        _type_: _description_
    """
    return cls(
        key=key,
        categories=list(df.index),
        allowed=[True for _ in range(len(df))],
        descriptors=list(df.columns),
        values=df.values.tolist(),
    )

`get_bounds(self, transform_type, values=None, reference_value=None)`

Returns the bounds of an input feature depending on the requested transform type.

Parameters:

Name	Type	Description	Default
`transform_type`	`Optional[TTransform]`	The requested transform type. Defaults to None.	required
`values`	`Optional[pd.Series]`	If values are provided the bounds are returned taking the most extreme values for the feature into account. Defaults to None.	`None`
`reference_value`	`Optional[float]`	If a reference value is provided, then the local bounds based on a local search region are provided. Currently only supported for continuous inputs. For more details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf.	`None`

Returns:

Type	Description
`Tuple[List[float], List[float]]`	List of lower bound values, list of upper bound values.

Source code in bofire/data_models/features/descriptor.py

def get_bounds(
    self,
    transform_type: TTransform,
    values: Optional[pd.Series] = None,
    reference_value: Optional[str] = None,
) -> Tuple[List[float], List[float]]:
    if transform_type != CategoricalEncodingEnum.DESCRIPTOR:
        return super().get_bounds(transform_type, values)
    else:
        # in case that values is None, we return the optimization bounds
        # else we return the complete bounds
        if values is None:
            df = self.to_df().loc[self.get_allowed_categories()]
        else:
            df = self.to_df()
        lower = df.min().values.tolist()  # type: ignore
        upper = df.max().values.tolist()  # type: ignore
        return lower, upper

`to_descriptor_encoding(self, values)`

Converts values to descriptor encoding.

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	Values to transform.	required

Returns:

Type	Description
`pd.DataFrame`	Descriptor encoded dataframe.

Source code in bofire/data_models/features/descriptor.py

def to_descriptor_encoding(self, values: pd.Series) -> pd.DataFrame:
    """Converts values to descriptor encoding.

    Args:
        values (pd.Series): Values to transform.

    Returns:
        pd.DataFrame: Descriptor encoded dataframe.
    """
    return pd.DataFrame(
        data=values.map(dict(zip(self.categories, self.values))).values.tolist(),  # type: ignore
        columns=[get_encoded_name(self.key, d) for d in self.descriptors],
        index=values.index,
    )

`to_df(self)`

tabular overview of the feature as DataFrame

Returns:

Type	Description
`pd.DataFrame`	tabular overview of the feature as DataFrame

Source code in bofire/data_models/features/descriptor.py

def to_df(self):
    """tabular overview of the feature as DataFrame

    Returns:
        pd.DataFrame: tabular overview of the feature as DataFrame
    """
    data = dict(zip(self.categories, self.values))
    return pd.DataFrame.from_dict(data, orient="index", columns=self.descriptors)

`validate_experimental(self, values, strict=False)`

Method to validate the experimental dataFrame

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	A dataFrame with experiments	required
`strict`	`bool`	Boolean to distinguish if the occurence of fixed features in the dataset should be considered or not. Defaults to False.	`False`

Exceptions:

Type	Description
`ValueError`	when an entry is not in the list of allowed categories
`ValueError`	when there is no variation in a feature provided by the experimental data
`ValueError`	when no variation is present or planed for a given descriptor

Returns:

Type	Description
`pd.Series`	A dataFrame with experiments

Source code in bofire/data_models/features/descriptor.py

def validate_experimental(
    self, values: pd.Series, strict: bool = False
) -> pd.Series:
    """Method to validate the experimental dataFrame

    Args:
        values (pd.Series): A dataFrame with experiments
        strict (bool, optional): Boolean to distinguish if the occurence of fixed features in the dataset should be considered or not. Defaults to False.

    Raises:
        ValueError: when an entry is not in the list of allowed categories
        ValueError: when there is no variation in a feature provided by the experimental data
        ValueError: when no variation is present or planed for a given descriptor

    Returns:
        pd.Series: A dataFrame with experiments
    """
    values = super().validate_experimental(values, strict)
    if strict:
        lower, upper = self.get_bounds(
            transform_type=CategoricalEncodingEnum.DESCRIPTOR, values=values
        )
        for i, desc in enumerate(self.descriptors):
            if lower[i] == upper[i]:
                raise ValueError(
                    f"No variation present or planned for descriptor {desc} for feature {self.key}. Remove the descriptor."
                )
    return values

`validate_values(v, info)` `classmethod`

validates the compatability of passed values for the descriptors and the defined categories

Parameters:

Name	Type	Description	Default
`v`	`List[List[float]]`	Nested list with descriptor values	required
`values`	`Dict`	Dictionary with attributes	required

Exceptions:

Type	Description
`ValueError`	when values have different length than categories
`ValueError`	when rows in values have different length than descriptors
`ValueError`	when a descriptor shows no variance in the data

Returns:

Type	Description
`List[List[float]]`	Nested list with descriptor values

Source code in bofire/data_models/features/descriptor.py

@field_validator("values")
@classmethod
def validate_values(cls, v, info):
    """validates the compatability of passed values for the descriptors and the defined categories

    Args:
        v (List[List[float]]): Nested list with descriptor values
        values (Dict): Dictionary with attributes

    Raises:
        ValueError: when values have different length than categories
        ValueError: when rows in values have different length than descriptors
        ValueError: when a descriptor shows no variance in the data

    Returns:
        List[List[float]]: Nested list with descriptor values
    """
    if len(v) != len(info.data["categories"]):
        raise ValueError("values must have same length as categories")
    for row in v:
        if len(row) != len(info.data["descriptors"]):
            raise ValueError("rows in values must have same length as descriptors")
    a = np.array(v)
    for i, d in enumerate(info.data["descriptors"]):
        if len(set(a[:, i])) == 1:
            raise ValueError(f"No variation for descriptor {d}.")
    return v

`ContinuousDescriptorInput (ContinuousInput)`

Class for continuous input features with descriptors

Attributes:

Name	Type	Description
`lower_bound`	`float`	Lower bound of the feature in the optimization.
`upper_bound`	`float`	Upper bound of the feature in the optimization.
`descriptors`	`List[str]`	Names of the descriptors.
`values`	`List[float]`	Values of the descriptors.

Source code in bofire/data_models/features/descriptor.py

class ContinuousDescriptorInput(ContinuousInput):
    """Class for continuous input features with descriptors

    Attributes:
        lower_bound (float): Lower bound of the feature in the optimization.
        upper_bound (float): Upper bound of the feature in the optimization.
        descriptors (List[str]): Names of the descriptors.
        values (List[float]): Values of the descriptors.
    """

    type: Literal["ContinuousDescriptorInput"] = "ContinuousDescriptorInput"
    order_id: ClassVar[int] = 2

    descriptors: Descriptors
    values: DiscreteVals

    @model_validator(mode="after")
    def validate_list_lengths(self):
        """compares the length of the defined descriptors list with the provided values

        Args:
            values (Dict): Dictionary with all attribues

        Raises:
            ValueError: when the number of descriptors does not math the number of provided values

        Returns:
            Dict: Dict with the attributes
        """
        if len(self.descriptors) != len(self.values):
            raise ValueError(
                'must provide same number of descriptors and values, got {len(values["descriptors"])} != {len(values["values"])}'
            )
        return self

    def to_df(self) -> pd.DataFrame:
        """tabular overview of the feature as DataFrame

        Returns:
            pd.DataFrame: tabular overview of the feature as DataFrame
        """
        return pd.DataFrame(
            data=[self.values], index=[self.key], columns=self.descriptors
        )

`to_df(self)`

tabular overview of the feature as DataFrame

Returns:

Type	Description
`pd.DataFrame`	tabular overview of the feature as DataFrame

Source code in bofire/data_models/features/descriptor.py

def to_df(self) -> pd.DataFrame:
    """tabular overview of the feature as DataFrame

    Returns:
        pd.DataFrame: tabular overview of the feature as DataFrame
    """
    return pd.DataFrame(
        data=[self.values], index=[self.key], columns=self.descriptors
    )

`validate_list_lengths(self)`

compares the length of the defined descriptors list with the provided values

Parameters:

Name	Type	Description	Default
`values`	`Dict`	Dictionary with all attribues	required

Exceptions:

Type	Description
`ValueError`	when the number of descriptors does not math the number of provided values

Returns:

Type	Description
`Dict`	Dict with the attributes

Source code in bofire/data_models/features/descriptor.py

@model_validator(mode="after")
def validate_list_lengths(self):
    """compares the length of the defined descriptors list with the provided values

    Args:
        values (Dict): Dictionary with all attribues

    Raises:
        ValueError: when the number of descriptors does not math the number of provided values

    Returns:
        Dict: Dict with the attributes
    """
    if len(self.descriptors) != len(self.values):
        raise ValueError(
            'must provide same number of descriptors and values, got {len(values["descriptors"])} != {len(values["values"])}'
        )
    return self

`discrete`

`DiscreteInput (NumericalInput)`

Feature with discretized ordinal values allowed in the optimization.

Attributes:

Name	Type	Description
`key(str)`		key of the feature.
`values(List[float])`		the discretized allowed values during the optimization.

Source code in bofire/data_models/features/discrete.py

class DiscreteInput(NumericalInput):
    """Feature with discretized ordinal values allowed in the optimization.

    Attributes:
        key(str): key of the feature.
        values(List[float]): the discretized allowed values during the optimization.
    """

    type: Literal["DiscreteInput"] = "DiscreteInput"
    order_id: ClassVar[int] = 3

    values: DiscreteVals

    @field_validator("values")
    @classmethod
    def validate_values_unique(cls, values):
        """Validates that provided values are unique.

        Args:
            values (List[float]): List of values

        Raises:
            ValueError: when values are non-unique.
            ValueError: when values contains only one entry.
            ValueError: when values is empty.

        Returns:
            List[values]: Sorted list of values
        """
        if len(values) != len(set(values)):
            raise ValueError("Discrete values must be unique")
        if len(values) == 1:
            raise ValueError(
                "Fixed discrete inputs are not supported. Please use a fixed continuous input."
            )
        if len(values) == 0:
            raise ValueError("No values defined.")
        return sorted(values)

    @property
    def lower_bound(self) -> float:
        """Lower bound of the set of allowed values"""
        return min(self.values)

    @property
    def upper_bound(self) -> float:
        """Upper bound of the set of allowed values"""
        return max(self.values)

    def validate_candidental(self, values: pd.Series) -> pd.Series:
        """Method to validate the provided candidates.

        Args:
            values (pd.Series): suggested candidates for the feature

        Raises:
            ValueError: Raises error when one of the provided values is not contained in the list of allowed values.

        Returns:
            pd.Series: _uggested candidates for the feature
        """
        values = super().validate_candidental(values)
        if not np.isin(values.to_numpy(), np.array(self.values)).all():
            raise ValueError(
                f"Not allowed values in candidates for feature {self.key}."
            )
        return values

    def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
        """Draw random samples from the feature.

        Args:
            n (int): number of samples.

        Returns:
            pd.Series: drawn samples.
        """
        return pd.Series(
            name=self.key, data=np.random.default_rng(seed=seed).choice(self.values, n)
        )

    def from_continuous(self, values: pd.DataFrame) -> pd.Series:
        """Rounds continuous values to the closest discrete ones.

        Args:
            values (pd.DataFrame): Dataframe with continuous entries.

        Returns:
            pd.Series: Series with discrete values.
        """

        s = pd.DataFrame(
            data=np.abs(
                (values[self.key].to_numpy()[:, np.newaxis] - np.array(self.values))
            ),
            columns=self.values,
            index=values.index,
        ).idxmin(1)
        s.name = self.key
        return s

    def get_bounds(
        self,
        transform_type: Optional[TTransform] = None,
        values: Optional[pd.Series] = None,
        reference_value: Optional[float] = None,
    ) -> Tuple[List[float], List[float]]:
        assert transform_type is None
        if values is None:
            return [self.lower_bound], [self.upper_bound]  # type: ignore
        lower = min(self.lower_bound, values.min())  # type: ignore
        upper = max(self.upper_bound, values.max())  # type: ignore
        return [lower], [upper]  # type: ignore

`lower_bound: float` `property` `readonly`

Lower bound of the set of allowed values

`upper_bound: float` `property` `readonly`

Upper bound of the set of allowed values

`from_continuous(self, values)`

Rounds continuous values to the closest discrete ones.

Parameters:

Name	Type	Description	Default
`values`	`pd.DataFrame`	Dataframe with continuous entries.	required

Returns:

Type	Description
`pd.Series`	Series with discrete values.

Source code in bofire/data_models/features/discrete.py

def from_continuous(self, values: pd.DataFrame) -> pd.Series:
    """Rounds continuous values to the closest discrete ones.

    Args:
        values (pd.DataFrame): Dataframe with continuous entries.

    Returns:
        pd.Series: Series with discrete values.
    """

    s = pd.DataFrame(
        data=np.abs(
            (values[self.key].to_numpy()[:, np.newaxis] - np.array(self.values))
        ),
        columns=self.values,
        index=values.index,
    ).idxmin(1)
    s.name = self.key
    return s

`get_bounds(self, transform_type=None, values=None, reference_value=None)`

Returns the bounds of an input feature depending on the requested transform type.

Parameters:

Name	Type	Description	Default
`transform_type`	`Optional[TTransform]`	The requested transform type. Defaults to None.	`None`
`values`	`Optional[pd.Series]`	If values are provided the bounds are returned taking the most extreme values for the feature into account. Defaults to None.	`None`
`reference_value`	`Optional[float]`	If a reference value is provided, then the local bounds based on a local search region are provided. Currently only supported for continuous inputs. For more details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf.	`None`

Returns:

Type	Description
`Tuple[List[float], List[float]]`	List of lower bound values, list of upper bound values.

Source code in bofire/data_models/features/discrete.py

def get_bounds(
    self,
    transform_type: Optional[TTransform] = None,
    values: Optional[pd.Series] = None,
    reference_value: Optional[float] = None,
) -> Tuple[List[float], List[float]]:
    assert transform_type is None
    if values is None:
        return [self.lower_bound], [self.upper_bound]  # type: ignore
    lower = min(self.lower_bound, values.min())  # type: ignore
    upper = max(self.upper_bound, values.max())  # type: ignore
    return [lower], [upper]  # type: ignore

`sample(self, n, seed=None)`

Draw random samples from the feature.

Parameters:

Name	Type	Description	Default
`n`	`int`	number of samples.	required

Returns:

Type	Description
`pd.Series`	drawn samples.

Source code in bofire/data_models/features/discrete.py

def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
    """Draw random samples from the feature.

    Args:
        n (int): number of samples.

    Returns:
        pd.Series: drawn samples.
    """
    return pd.Series(
        name=self.key, data=np.random.default_rng(seed=seed).choice(self.values, n)
    )

`validate_candidental(self, values)`

Method to validate the provided candidates.

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	suggested candidates for the feature	required

Exceptions:

Type	Description
`ValueError`	Raises error when one of the provided values is not contained in the list of allowed values.

Returns:

Type	Description
`pd.Series`	_uggested candidates for the feature

Source code in bofire/data_models/features/discrete.py

def validate_candidental(self, values: pd.Series) -> pd.Series:
    """Method to validate the provided candidates.

    Args:
        values (pd.Series): suggested candidates for the feature

    Raises:
        ValueError: Raises error when one of the provided values is not contained in the list of allowed values.

    Returns:
        pd.Series: _uggested candidates for the feature
    """
    values = super().validate_candidental(values)
    if not np.isin(values.to_numpy(), np.array(self.values)).all():
        raise ValueError(
            f"Not allowed values in candidates for feature {self.key}."
        )
    return values

`validate_values_unique(values)` `classmethod`

Validates that provided values are unique.

Parameters:

Name	Type	Description	Default
`values`	`List[float]`	List of values	required

Exceptions:

Type	Description
`ValueError`	when values are non-unique.
`ValueError`	when values contains only one entry.
`ValueError`	when values is empty.

Returns:

Type	Description
`List[values]`	Sorted list of values

Source code in bofire/data_models/features/discrete.py

@field_validator("values")
@classmethod
def validate_values_unique(cls, values):
    """Validates that provided values are unique.

    Args:
        values (List[float]): List of values

    Raises:
        ValueError: when values are non-unique.
        ValueError: when values contains only one entry.
        ValueError: when values is empty.

    Returns:
        List[values]: Sorted list of values
    """
    if len(values) != len(set(values)):
        raise ValueError("Discrete values must be unique")
    if len(values) == 1:
        raise ValueError(
            "Fixed discrete inputs are not supported. Please use a fixed continuous input."
        )
    if len(values) == 0:
        raise ValueError("No values defined.")
    return sorted(values)

`feature`

`Feature (BaseModel)`

The base class for all features.

Source code in bofire/data_models/features/feature.py

class Feature(BaseModel):
    """The base class for all features."""

    type: str
    key: str
    order_id: ClassVar[int] = -1

    def __lt__(self, other) -> bool:
        """
        Method to compare two models to get them in the desired order.
        Return True if other is larger than self, else False. (see FEATURE_ORDER)

        Args:
            other: The other class to compare to self

        Returns:
            bool: True if the other class is larger than self, else False
        """
        order_self = self.order_id
        order_other = other.order_id
        if order_self == order_other:
            return self.key < other.key
        else:
            return order_self < order_other

`lt(self, other)` `special`

Method to compare two models to get them in the desired order. Return True if other is larger than self, else False. (see FEATURE_ORDER)

Parameters:

Name	Type	Description	Default
`other`		The other class to compare to self	required

Returns:

Type	Description
`bool`	True if the other class is larger than self, else False

Source code in bofire/data_models/features/feature.py

def __lt__(self, other) -> bool:
    """
    Method to compare two models to get them in the desired order.
    Return True if other is larger than self, else False. (see FEATURE_ORDER)

    Args:
        other: The other class to compare to self

    Returns:
        bool: True if the other class is larger than self, else False
    """
    order_self = self.order_id
    order_other = other.order_id
    if order_self == order_other:
        return self.key < other.key
    else:
        return order_self < order_other

`Input (Feature)`

Base class for all input features.

Source code in bofire/data_models/features/feature.py

class Input(Feature):
    """Base class for all input features."""

    @staticmethod
    @abstractmethod
    def valid_transform_types() -> List[Union[CategoricalEncodingEnum, AnyMolFeatures]]:
        pass

    @abstractmethod
    def is_fixed(self) -> bool:
        """Indicates if a variable is set to a fixed value.

        Returns:
            bool: True if fixed, els False.
        """
        pass

    @abstractmethod
    def fixed_value(
        self, transform_type: Optional[TTransform] = None
    ) -> Union[None, List[str], List[float]]:
        """Method to return the fixed value in case of a fixed feature.

        Returns:
            Union[None,str,float]: None in case the feature is not fixed, else the fixed value.
        """
        pass

    @abstractmethod
    def validate_experimental(
        self, values: pd.Series, strict: bool = False
    ) -> pd.Series:
        """Abstract method to validate the experimental dataFrame

        Args:
            values (pd.Series): A dataFrame with experiments
            strict (bool, optional): Boolean to distinguish if the occurence of fixed features in the dataset should be considered or not. Defaults to False.

        Returns:
            pd.Series: The passed dataFrame with experiments
        """
        pass

    @abstractmethod
    def validate_candidental(self, values: pd.Series) -> pd.Series:
        """Abstract method to validate the suggested candidates

        Args:
            values (pd.Series): A dataFrame with candidates

        Returns:
            pd.Series: The passed dataFrame with candidates
        """
        pass

    @abstractmethod
    def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
        """Sample a series of allowed values.

        Args:
            n (int): Number of samples

        Returns:
            pd.Series: Sampled values.
        """
        pass

    @abstractmethod
    def get_bounds(
        self,
        transform_type: Optional[TTransform] = None,
        values: Optional[pd.Series] = None,
        reference_value: Optional[Union[float, str]] = None,
    ) -> Tuple[List[float], List[float]]:
        """Returns the bounds of an input feature depending on the requested transform type.

        Args:
            transform_type (Optional[TTransform], optional): The requested transform type. Defaults to None.
            values (Optional[pd.Series], optional): If values are provided the bounds are returned taking
                the most extreme values for the feature into account. Defaults to None.
            reference_value (Optional[float], optional): If a reference value is provided, then the local bounds based
                on a local search region are provided. Currently only supported for continuous inputs. For more
                details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf.
        Returns:
            Tuple[List[float], List[float]]: List of lower bound values, list of upper bound values.
        """
        pass

`fixed_value(self, transform_type=None)`

Method to return the fixed value in case of a fixed feature.

Returns:

Type	Description
`Union[None,str,float]`	None in case the feature is not fixed, else the fixed value.

Source code in bofire/data_models/features/feature.py

@abstractmethod
def fixed_value(
    self, transform_type: Optional[TTransform] = None
) -> Union[None, List[str], List[float]]:
    """Method to return the fixed value in case of a fixed feature.

    Returns:
        Union[None,str,float]: None in case the feature is not fixed, else the fixed value.
    """
    pass

`get_bounds(self, transform_type=None, values=None, reference_value=None)`

Returns the bounds of an input feature depending on the requested transform type.

Parameters:

Name	Type	Description	Default
`transform_type`	`Optional[TTransform]`	The requested transform type. Defaults to None.	`None`
`values`	`Optional[pd.Series]`	If values are provided the bounds are returned taking the most extreme values for the feature into account. Defaults to None.	`None`
`reference_value`	`Optional[float]`	If a reference value is provided, then the local bounds based on a local search region are provided. Currently only supported for continuous inputs. For more details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf.	`None`

Returns:

Type	Description
`Tuple[List[float], List[float]]`	List of lower bound values, list of upper bound values.

Source code in bofire/data_models/features/feature.py

@abstractmethod
def get_bounds(
    self,
    transform_type: Optional[TTransform] = None,
    values: Optional[pd.Series] = None,
    reference_value: Optional[Union[float, str]] = None,
) -> Tuple[List[float], List[float]]:
    """Returns the bounds of an input feature depending on the requested transform type.

    Args:
        transform_type (Optional[TTransform], optional): The requested transform type. Defaults to None.
        values (Optional[pd.Series], optional): If values are provided the bounds are returned taking
            the most extreme values for the feature into account. Defaults to None.
        reference_value (Optional[float], optional): If a reference value is provided, then the local bounds based
            on a local search region are provided. Currently only supported for continuous inputs. For more
            details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf.
    Returns:
        Tuple[List[float], List[float]]: List of lower bound values, list of upper bound values.
    """
    pass

`is_fixed(self)`

Indicates if a variable is set to a fixed value.

Returns:

Type	Description
`bool`	True if fixed, els False.

Source code in bofire/data_models/features/feature.py

@abstractmethod
def is_fixed(self) -> bool:
    """Indicates if a variable is set to a fixed value.

    Returns:
        bool: True if fixed, els False.
    """
    pass

`sample(self, n, seed=None)`

Sample a series of allowed values.

Parameters:

Name	Type	Description	Default
`n`	`int`	Number of samples	required

Returns:

Type	Description
`pd.Series`	Sampled values.

Source code in bofire/data_models/features/feature.py

@abstractmethod
def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
    """Sample a series of allowed values.

    Args:
        n (int): Number of samples

    Returns:
        pd.Series: Sampled values.
    """
    pass

`validate_candidental(self, values)`

Abstract method to validate the suggested candidates

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	A dataFrame with candidates	required

Returns:

Type	Description
`pd.Series`	The passed dataFrame with candidates

Source code in bofire/data_models/features/feature.py

@abstractmethod
def validate_candidental(self, values: pd.Series) -> pd.Series:
    """Abstract method to validate the suggested candidates

    Args:
        values (pd.Series): A dataFrame with candidates

    Returns:
        pd.Series: The passed dataFrame with candidates
    """
    pass

`validate_experimental(self, values, strict=False)`

Abstract method to validate the experimental dataFrame

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	A dataFrame with experiments	required
`strict`	`bool`	Boolean to distinguish if the occurence of fixed features in the dataset should be considered or not. Defaults to False.	`False`

Returns:

Type	Description
`pd.Series`	The passed dataFrame with experiments

Source code in bofire/data_models/features/feature.py

@abstractmethod
def validate_experimental(
    self, values: pd.Series, strict: bool = False
) -> pd.Series:
    """Abstract method to validate the experimental dataFrame

    Args:
        values (pd.Series): A dataFrame with experiments
        strict (bool, optional): Boolean to distinguish if the occurence of fixed features in the dataset should be considered or not. Defaults to False.

    Returns:
        pd.Series: The passed dataFrame with experiments
    """
    pass

`Output (Feature)`

Base class for all output features.

Attributes:

Name	Type	Description
`key(str)`		Key of the Feature.

Source code in bofire/data_models/features/feature.py

class Output(Feature):
    """Base class for all output features.

    Attributes:
        key(str): Key of the Feature.
    """

    @abstractmethod
    def __call__(self, values: pd.Series) -> pd.Series:
        pass

    @abstractmethod
    def validate_experimental(self, values: pd.Series) -> pd.Series:
        """Abstract method to validate the experimental Series

        Args:
            values (pd.Series): A dataFrame with values for the outcome

        Returns:
            pd.Series: The passed dataFrame with experiments
        """
        pass

`validate_experimental(self, values)`

Abstract method to validate the experimental Series

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	A dataFrame with values for the outcome	required

Returns:

Type	Description
`pd.Series`	The passed dataFrame with experiments

Source code in bofire/data_models/features/feature.py

@abstractmethod
def validate_experimental(self, values: pd.Series) -> pd.Series:
    """Abstract method to validate the experimental Series

    Args:
        values (pd.Series): A dataFrame with values for the outcome

    Returns:
        pd.Series: The passed dataFrame with experiments
    """
    pass

`get_encoded_name(feature_key, option_name)`

Get the name of the encoded column. Option could be the category or the descriptor name.

Source code in bofire/data_models/features/feature.py

def get_encoded_name(feature_key: str, option_name: str) -> str:
    """Get the name of the encoded column. Option could be the category or the descriptor name."""
    return f"{feature_key}_{option_name}"

`molecular`

`CategoricalMolecularInput (CategoricalInput, MolecularInput)`

Source code in bofire/data_models/features/molecular.py

class CategoricalMolecularInput(CategoricalInput, MolecularInput):
    type: Literal["CategoricalMolecularInput"] = "CategoricalMolecularInput"
    # order_id: ClassVar[int] = 7
    order_id: ClassVar[int] = 5

    @field_validator("categories")
    @classmethod
    def validate_smiles(cls, categories: Sequence[str]):
        """validates that categories are valid smiles. Note that this check can only
        be executed when rdkit is available.

        Args:
            categories (List[str]): List of smiles

        Raises:
            ValueError: when string is not a smiles

        Returns:
            List[str]: List of the smiles
        """
        # check on rdkit availability:
        try:
            smiles2mol(categories[0])
        except NameError:
            warnings.warn("rdkit not installed, categories cannot be validated.")
            return categories

        for cat in categories:
            smiles2mol(cat)
        return categories

    @staticmethod
    def valid_transform_types() -> List[Union[AnyMolFeatures, CategoricalEncodingEnum]]:
        return CategoricalInput.valid_transform_types() + [
            Fingerprints,
            FingerprintsFragments,
            Fragments,
            MordredDescriptors,  # type: ignore
        ]

    def get_bounds(
        self,
        transform_type: Union[CategoricalEncodingEnum, AnyMolFeatures],
        values: Optional[pd.Series] = None,
        reference_value: Optional[str] = None,
    ) -> Tuple[List[float], List[float]]:
        if isinstance(transform_type, CategoricalEncodingEnum):
            # we are just using the standard categorical transformations
            return super().get_bounds(
                transform_type=transform_type,
                values=values,
                reference_value=reference_value,
            )
        else:
            # in case that values is None, we return the optimization bounds
            # else we return the complete bounds
            data = self.to_descriptor_encoding(
                transform_type=transform_type,
                values=(
                    pd.Series(self.get_allowed_categories())
                    if values is None
                    else pd.Series(self.categories)
                ),
            )
        lower = data.min(axis=0).values.tolist()
        upper = data.max(axis=0).values.tolist()
        return lower, upper

    def from_descriptor_encoding(
        self, transform_type: AnyMolFeatures, values: pd.DataFrame
    ) -> pd.Series:
        """Converts values back from descriptor encoding.

        Args:
            values (pd.DataFrame): Descriptor encoded dataframe.

        Raises:
            ValueError: If descriptor columns not found in the dataframe.

        Returns:
            pd.Series: Series with categorical values.
        """

        # This method is modified based on the categorical descriptor feature
        # TODO: move it to more central place
        cat_cols = [
            get_encoded_name(self.key, d) for d in transform_type.get_descriptor_names()
        ]
        # we allow here explicitly that the dataframe can have more columns than needed to have it
        # easier in the backtransform.
        if np.any([c not in values.columns for c in cat_cols]):
            raise ValueError(
                f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols}."
            )
        s = pd.DataFrame(
            data=np.sqrt(
                np.sum(
                    (
                        values[cat_cols].to_numpy()[:, np.newaxis, :]
                        - self.to_descriptor_encoding(
                            transform_type=transform_type,
                            values=pd.Series(self.get_allowed_categories()),
                        ).to_numpy()
                    )
                    ** 2,
                    axis=2,
                )
            ),
            columns=self.get_allowed_categories(),
            index=values.index,
        ).idxmin(1)
        s.name = self.key
        return s

`from_descriptor_encoding(self, transform_type, values)`

Converts values back from descriptor encoding.

Parameters:

Name	Type	Description	Default
`values`	`pd.DataFrame`	Descriptor encoded dataframe.	required

Exceptions:

Type	Description
`ValueError`	If descriptor columns not found in the dataframe.

Returns:

Type	Description
`pd.Series`	Series with categorical values.

Source code in bofire/data_models/features/molecular.py

def from_descriptor_encoding(
    self, transform_type: AnyMolFeatures, values: pd.DataFrame
) -> pd.Series:
    """Converts values back from descriptor encoding.

    Args:
        values (pd.DataFrame): Descriptor encoded dataframe.

    Raises:
        ValueError: If descriptor columns not found in the dataframe.

    Returns:
        pd.Series: Series with categorical values.
    """

    # This method is modified based on the categorical descriptor feature
    # TODO: move it to more central place
    cat_cols = [
        get_encoded_name(self.key, d) for d in transform_type.get_descriptor_names()
    ]
    # we allow here explicitly that the dataframe can have more columns than needed to have it
    # easier in the backtransform.
    if np.any([c not in values.columns for c in cat_cols]):
        raise ValueError(
            f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols}."
        )
    s = pd.DataFrame(
        data=np.sqrt(
            np.sum(
                (
                    values[cat_cols].to_numpy()[:, np.newaxis, :]
                    - self.to_descriptor_encoding(
                        transform_type=transform_type,
                        values=pd.Series(self.get_allowed_categories()),
                    ).to_numpy()
                )
                ** 2,
                axis=2,
            )
        ),
        columns=self.get_allowed_categories(),
        index=values.index,
    ).idxmin(1)
    s.name = self.key
    return s

`get_bounds(self, transform_type, values=None, reference_value=None)`

Calculates the lower and upper bounds for the feature based on the given transform type and values.

Parameters:

Name	Type	Description	Default
`transform_type`	`AnyMolFeatures`	The type of transformation to apply to the data.	required
`values`	`pd.Series`	The actual data over which the lower and upper bounds are calculated.	`None`
`reference_value`	`Optional[str]`	The reference value for the transformation. Not used here. Defaults to None.	`None`

Returns:

Type	Description
`Tuple[List[float], List[float]]`	A tuple containing the lower and upper bounds of the transformed data.

Exceptions:

Type	Description
`NotImplementedError`	Raised when `values` is None, as it is currently required for `MolecularInput`.

Source code in bofire/data_models/features/molecular.py

def get_bounds(
    self,
    transform_type: Union[CategoricalEncodingEnum, AnyMolFeatures],
    values: Optional[pd.Series] = None,
    reference_value: Optional[str] = None,
) -> Tuple[List[float], List[float]]:
    if isinstance(transform_type, CategoricalEncodingEnum):
        # we are just using the standard categorical transformations
        return super().get_bounds(
            transform_type=transform_type,
            values=values,
            reference_value=reference_value,
        )
    else:
        # in case that values is None, we return the optimization bounds
        # else we return the complete bounds
        data = self.to_descriptor_encoding(
            transform_type=transform_type,
            values=(
                pd.Series(self.get_allowed_categories())
                if values is None
                else pd.Series(self.categories)
            ),
        )
    lower = data.min(axis=0).values.tolist()
    upper = data.max(axis=0).values.tolist()
    return lower, upper

`validate_smiles(categories)` `classmethod`

validates that categories are valid smiles. Note that this check can only be executed when rdkit is available.

Parameters:

Name	Type	Description	Default
`categories`	`List[str]`	List of smiles	required

Exceptions:

Type	Description
`ValueError`	when string is not a smiles

Returns:

Type	Description
`List[str]`	List of the smiles

Source code in bofire/data_models/features/molecular.py

@field_validator("categories")
@classmethod
def validate_smiles(cls, categories: Sequence[str]):
    """validates that categories are valid smiles. Note that this check can only
    be executed when rdkit is available.

    Args:
        categories (List[str]): List of smiles

    Raises:
        ValueError: when string is not a smiles

    Returns:
        List[str]: List of the smiles
    """
    # check on rdkit availability:
    try:
        smiles2mol(categories[0])
    except NameError:
        warnings.warn("rdkit not installed, categories cannot be validated.")
        return categories

    for cat in categories:
        smiles2mol(cat)
    return categories

`MolecularInput (Input)`

Source code in bofire/data_models/features/molecular.py

class MolecularInput(Input):
    type: Literal["MolecularInput"] = "MolecularInput"
    # order_id: ClassVar[int] = 6
    order_id: ClassVar[int] = 4

    @staticmethod
    def valid_transform_types() -> List[AnyMolFeatures]:
        return [Fingerprints, FingerprintsFragments, Fragments, MordredDescriptors]  # type: ignore

    def validate_experimental(
        self, values: pd.Series, strict: bool = False
    ) -> pd.Series:
        values = values.map(str)
        for smi in values:
            smiles2mol(smi)

        return values

    def validate_candidental(self, values: pd.Series) -> pd.Series:
        values = values.map(str)
        for smi in values:
            smiles2mol(smi)
        return values

    def is_fixed(self) -> bool:
        return False

    def fixed_value(self, transform_type: Optional[AnyMolFeatures] = None) -> None:
        return None

    def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
        raise ValueError("Sampling not supported for `MolecularInput`")

    def get_bounds(
        self,
        transform_type: AnyMolFeatures,
        values: pd.Series,
        reference_value: Optional[str] = None,
    ) -> Tuple[List[float], List[float]]:
        """
        Calculates the lower and upper bounds for the feature based on the given transform type and values.

        Args:
            transform_type (AnyMolFeatures): The type of transformation to apply to the data.
            values (pd.Series): The actual data over which the lower and upper bounds are calculated.
            reference_value (Optional[str], optional): The reference value for the transformation. Not used here.
                Defaults to None.

        Returns:
            Tuple[List[float], List[float]]: A tuple containing the lower and upper bounds of the transformed data.

        Raises:
            NotImplementedError: Raised when `values` is None, as it is currently required for `MolecularInput`.
        """
        if values is None:
            raise NotImplementedError(
                "`values` is currently required for `MolecularInput`"
            )
        else:
            data = self.to_descriptor_encoding(transform_type, values)

        lower = data.min(axis=0).values.tolist()
        upper = data.max(axis=0).values.tolist()

        return lower, upper

    def to_descriptor_encoding(
        self, transform_type: AnyMolFeatures, values: pd.Series
    ) -> pd.DataFrame:
        """Converts values to descriptor encoding.

        Args:
            values (pd.Series): Values to transform.

        Returns:
            pd.DataFrame: Descriptor encoded dataframe.
        """
        descriptor_values = transform_type.get_descriptor_values(values)

        descriptor_values.columns = [
            get_encoded_name(self.key, d) for d in transform_type.get_descriptor_names()
        ]
        descriptor_values.index = values.index

        return descriptor_values

`fixed_value(self, transform_type=None)`

Method to return the fixed value in case of a fixed feature.

Returns:

Type	Description
`Union[None,str,float]`	None in case the feature is not fixed, else the fixed value.

Source code in bofire/data_models/features/molecular.py

def fixed_value(self, transform_type: Optional[AnyMolFeatures] = None) -> None:
    return None

`get_bounds(self, transform_type, values, reference_value=None)`

Calculates the lower and upper bounds for the feature based on the given transform type and values.

Parameters:

Name	Type	Description	Default
`transform_type`	`AnyMolFeatures`	The type of transformation to apply to the data.	required
`values`	`pd.Series`	The actual data over which the lower and upper bounds are calculated.	required
`reference_value`	`Optional[str]`	The reference value for the transformation. Not used here. Defaults to None.	`None`

Returns:

Type	Description
`Tuple[List[float], List[float]]`	A tuple containing the lower and upper bounds of the transformed data.

Exceptions:

Type	Description
`NotImplementedError`	Raised when `values` is None, as it is currently required for `MolecularInput`.

Source code in bofire/data_models/features/molecular.py

def get_bounds(
    self,
    transform_type: AnyMolFeatures,
    values: pd.Series,
    reference_value: Optional[str] = None,
) -> Tuple[List[float], List[float]]:
    """
    Calculates the lower and upper bounds for the feature based on the given transform type and values.

    Args:
        transform_type (AnyMolFeatures): The type of transformation to apply to the data.
        values (pd.Series): The actual data over which the lower and upper bounds are calculated.
        reference_value (Optional[str], optional): The reference value for the transformation. Not used here.
            Defaults to None.

    Returns:
        Tuple[List[float], List[float]]: A tuple containing the lower and upper bounds of the transformed data.

    Raises:
        NotImplementedError: Raised when `values` is None, as it is currently required for `MolecularInput`.
    """
    if values is None:
        raise NotImplementedError(
            "`values` is currently required for `MolecularInput`"
        )
    else:
        data = self.to_descriptor_encoding(transform_type, values)

    lower = data.min(axis=0).values.tolist()
    upper = data.max(axis=0).values.tolist()

    return lower, upper

`is_fixed(self)`

Indicates if a variable is set to a fixed value.

Returns:

Type	Description
`bool`	True if fixed, els False.

Source code in bofire/data_models/features/molecular.py

def is_fixed(self) -> bool:
    return False

`sample(self, n, seed=None)`

Sample a series of allowed values.

Parameters:

Name	Type	Description	Default
`n`	`int`	Number of samples	required

Returns:

Type	Description
`pd.Series`	Sampled values.

Source code in bofire/data_models/features/molecular.py

def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
    raise ValueError("Sampling not supported for `MolecularInput`")

`to_descriptor_encoding(self, transform_type, values)`

Converts values to descriptor encoding.

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	Values to transform.	required

Returns:

Type	Description
`pd.DataFrame`	Descriptor encoded dataframe.

Source code in bofire/data_models/features/molecular.py

def to_descriptor_encoding(
    self, transform_type: AnyMolFeatures, values: pd.Series
) -> pd.DataFrame:
    """Converts values to descriptor encoding.

    Args:
        values (pd.Series): Values to transform.

    Returns:
        pd.DataFrame: Descriptor encoded dataframe.
    """
    descriptor_values = transform_type.get_descriptor_values(values)

    descriptor_values.columns = [
        get_encoded_name(self.key, d) for d in transform_type.get_descriptor_names()
    ]
    descriptor_values.index = values.index

    return descriptor_values

`validate_candidental(self, values)`

Abstract method to validate the suggested candidates

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	A dataFrame with candidates	required

Returns:

Type	Description
`pd.Series`	The passed dataFrame with candidates

Source code in bofire/data_models/features/molecular.py

def validate_candidental(self, values: pd.Series) -> pd.Series:
    values = values.map(str)
    for smi in values:
        smiles2mol(smi)
    return values

`validate_experimental(self, values, strict=False)`

Abstract method to validate the experimental dataFrame

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	A dataFrame with experiments	required
`strict`	`bool`	Boolean to distinguish if the occurence of fixed features in the dataset should be considered or not. Defaults to False.	`False`

Returns:

Type	Description
`pd.Series`	The passed dataFrame with experiments

Source code in bofire/data_models/features/molecular.py

def validate_experimental(
    self, values: pd.Series, strict: bool = False
) -> pd.Series:
    values = values.map(str)
    for smi in values:
        smiles2mol(smi)

    return values

`numerical`

`NumericalInput (Input)`

Abstract base class for all numerical (ordinal) input features.

Source code in bofire/data_models/features/numerical.py

class NumericalInput(Input):
    """Abstract base class for all numerical (ordinal) input features."""

    unit: Optional[str] = None

    @staticmethod
    def valid_transform_types() -> List:
        return []

    def to_unit_range(
        self, values: Union[pd.Series, np.ndarray], use_real_bounds: bool = False
    ) -> Union[pd.Series, np.ndarray]:
        """Convert to the unit range between 0 and 1.

        Args:
            values (pd.Series): values to be transformed
            use_real_bounds (bool, optional): if True, use the bounds from the actual values else the bounds from the feature.
                Defaults to False.

        Raises:
            ValueError: If lower_bound == upper bound an error is raised

        Returns:
            pd.Series: transformed values.
        """
        if use_real_bounds:
            lower, upper = self.get_bounds(transform_type=None, values=values)
            lower = lower[0]
            upper = upper[0]
        else:
            lower, upper = self.lower_bound, self.upper_bound  # type: ignore
        if lower == upper:
            raise ValueError("Fixed feature cannot be transformed to unit range.")
        valrange = upper - lower
        return (values - lower) / valrange

    def from_unit_range(
        self, values: Union[pd.Series, np.ndarray]
    ) -> Union[pd.Series, np.ndarray]:
        """Convert from unit range.

        Args:
            values (pd.Series): values to transform from.

        Raises:
            ValueError: if the feature is fixed raise a value error.

        Returns:
            pd.Series: _description_
        """
        if self.is_fixed():
            raise ValueError("Fixed feature cannot be transformed from unit range.")
        valrange = self.upper_bound - self.lower_bound  # type: ignore
        return (values * valrange) + self.lower_bound  # type: ignore

    def is_fixed(self):
        """Method to check if the feature is fixed

        Returns:
            Boolean: True when the feature is fixed, false otherwise.
        """
        return self.lower_bound == self.upper_bound  # type: ignore

    def fixed_value(
        self, transform_type: Optional[TTransform] = None
    ) -> Union[None, List[float]]:
        """Method to get the value to which the feature is fixed

        Returns:
            Float: Return the feature value or None if the feature is not fixed.
        """
        assert transform_type is None
        if self.is_fixed():
            return [self.lower_bound]  # type: ignore
        else:
            return None

    def validate_experimental(self, values: pd.Series, strict=False) -> pd.Series:
        """Method to validate the experimental dataFrame

        Args:
            values (pd.Series): A dataFrame with experiments
            strict (bool, optional): Boolean to distinguish if the occurence of fixed features in the dataset should be considered or not.
                Defaults to False.

        Raises:
            ValueError: when a value is not numerical
            ValueError: when there is no variation in a feature provided by the experimental data

        Returns:
            pd.Series: A dataFrame with experiments
        """
        try:
            values = pd.to_numeric(values, errors="raise").astype("float64")
        except ValueError:
            raise ValueError(
                f"not all values of input feature `{self.key}` are numerical"
            )
        values = values.astype("float64")
        if strict:
            lower, upper = self.get_bounds(transform_type=None, values=values)
            if lower == upper:
                raise ValueError(
                    f"No variation present or planned for feature {self.key}. Remove it."
                )
        return values

    def validate_candidental(self, values: pd.Series) -> pd.Series:
        """Validate the suggested candidates for the feature.

        Args:
            values (pd.Series): suggested candidates for the feature

        Raises:
            ValueError: Error is raised when one of the values is not numerical.

        Returns:
            pd.Series: the original provided candidates
        """
        try:
            values = pd.to_numeric(values, errors="raise").astype("float64")
        except ValueError:
            raise ValueError(
                f"not all values of input feature `{self.key}` are numerical"
            )
        return values

`fixed_value(self, transform_type=None)`

Method to get the value to which the feature is fixed

Returns:

Type	Description
`Float`	Return the feature value or None if the feature is not fixed.

Source code in bofire/data_models/features/numerical.py

def fixed_value(
    self, transform_type: Optional[TTransform] = None
) -> Union[None, List[float]]:
    """Method to get the value to which the feature is fixed

    Returns:
        Float: Return the feature value or None if the feature is not fixed.
    """
    assert transform_type is None
    if self.is_fixed():
        return [self.lower_bound]  # type: ignore
    else:
        return None

`from_unit_range(self, values)`

Convert from unit range.

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	values to transform from.	required

Exceptions:

Type	Description
`ValueError`	if the feature is fixed raise a value error.

Returns:

Type	Description
`pd.Series`	description

Source code in bofire/data_models/features/numerical.py

def from_unit_range(
    self, values: Union[pd.Series, np.ndarray]
) -> Union[pd.Series, np.ndarray]:
    """Convert from unit range.

    Args:
        values (pd.Series): values to transform from.

    Raises:
        ValueError: if the feature is fixed raise a value error.

    Returns:
        pd.Series: _description_
    """
    if self.is_fixed():
        raise ValueError("Fixed feature cannot be transformed from unit range.")
    valrange = self.upper_bound - self.lower_bound  # type: ignore
    return (values * valrange) + self.lower_bound  # type: ignore

`is_fixed(self)`

Method to check if the feature is fixed

Returns:

Type	Description
`Boolean`	True when the feature is fixed, false otherwise.

Source code in bofire/data_models/features/numerical.py

def is_fixed(self):
    """Method to check if the feature is fixed

    Returns:
        Boolean: True when the feature is fixed, false otherwise.
    """
    return self.lower_bound == self.upper_bound  # type: ignore

`to_unit_range(self, values, use_real_bounds=False)`

Convert to the unit range between 0 and 1.

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	values to be transformed	required
`use_real_bounds`	`bool`	if True, use the bounds from the actual values else the bounds from the feature. Defaults to False.	`False`

Exceptions:

Type	Description
`ValueError`	If lower_bound == upper bound an error is raised

Returns:

Type	Description
`pd.Series`	transformed values.

Source code in bofire/data_models/features/numerical.py

def to_unit_range(
    self, values: Union[pd.Series, np.ndarray], use_real_bounds: bool = False
) -> Union[pd.Series, np.ndarray]:
    """Convert to the unit range between 0 and 1.

    Args:
        values (pd.Series): values to be transformed
        use_real_bounds (bool, optional): if True, use the bounds from the actual values else the bounds from the feature.
            Defaults to False.

    Raises:
        ValueError: If lower_bound == upper bound an error is raised

    Returns:
        pd.Series: transformed values.
    """
    if use_real_bounds:
        lower, upper = self.get_bounds(transform_type=None, values=values)
        lower = lower[0]
        upper = upper[0]
    else:
        lower, upper = self.lower_bound, self.upper_bound  # type: ignore
    if lower == upper:
        raise ValueError("Fixed feature cannot be transformed to unit range.")
    valrange = upper - lower
    return (values - lower) / valrange

`validate_candidental(self, values)`

Validate the suggested candidates for the feature.

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	suggested candidates for the feature	required

Exceptions:

Type	Description
`ValueError`	Error is raised when one of the values is not numerical.

Returns:

Type	Description
`pd.Series`	the original provided candidates

Source code in bofire/data_models/features/numerical.py

def validate_candidental(self, values: pd.Series) -> pd.Series:
    """Validate the suggested candidates for the feature.

    Args:
        values (pd.Series): suggested candidates for the feature

    Raises:
        ValueError: Error is raised when one of the values is not numerical.

    Returns:
        pd.Series: the original provided candidates
    """
    try:
        values = pd.to_numeric(values, errors="raise").astype("float64")
    except ValueError:
        raise ValueError(
            f"not all values of input feature `{self.key}` are numerical"
        )
    return values

`validate_experimental(self, values, strict=False)`

Method to validate the experimental dataFrame

Parameters:

Name	Type	Description	Default
`values`	`pd.Series`	A dataFrame with experiments	required
`strict`	`bool`	Boolean to distinguish if the occurence of fixed features in the dataset should be considered or not. Defaults to False.	`False`

Exceptions:

Type	Description
`ValueError`	when a value is not numerical
`ValueError`	when there is no variation in a feature provided by the experimental data

Returns:

Type	Description
`pd.Series`	A dataFrame with experiments

Source code in bofire/data_models/features/numerical.py

def validate_experimental(self, values: pd.Series, strict=False) -> pd.Series:
    """Method to validate the experimental dataFrame

    Args:
        values (pd.Series): A dataFrame with experiments
        strict (bool, optional): Boolean to distinguish if the occurence of fixed features in the dataset should be considered or not.
            Defaults to False.

    Raises:
        ValueError: when a value is not numerical
        ValueError: when there is no variation in a feature provided by the experimental data

    Returns:
        pd.Series: A dataFrame with experiments
    """
    try:
        values = pd.to_numeric(values, errors="raise").astype("float64")
    except ValueError:
        raise ValueError(
            f"not all values of input feature `{self.key}` are numerical"
        )
    values = values.astype("float64")
    if strict:
        lower, upper = self.get_bounds(transform_type=None, values=values)
        if lower == upper:
            raise ValueError(
                f"No variation present or planned for feature {self.key}. Remove it."
            )
    return values

Domain

categorical

CategoricalInput (Input)

__str__(self) special

fixed_value(self, transform_type=None)

from_dummy_encoding(self, values)

from_onehot_encoding(self, values)

from_ordinal_encoding(self, values)

generate_allowed(allowed, info) classmethod

get_allowed_categories(self)

get_bounds(self, transform_type, values=None, reference_value=None)

get_forbidden_categories(self)

get_possible_categories(self, values)

is_fixed(self)

sample(self, n, seed=None)

to_dummy_encoding(self, values)

to_onehot_encoding(self, values)

to_ordinal_encoding(self, values)

validate_candidental(self, values)

validate_experimental(self, values, strict=False)

CategoricalOutput (Output)

validate_experimental(self, values)

validate_objective_categories(self)

continuous

ContinuousInput (NumericalInput)

__str__(self) special

get_bounds(self, transform_type=None, values=None, reference_value=None)

round(self, values)

sample(self, n, seed=None)

validate_candidental(self, values)

validate_lower_upper(bounds) classmethod

ContinuousOutput (Output)

validate_experimental(self, values)

descriptor

CategoricalDescriptorInput (CategoricalInput)

fixed_value(self, transform_type=None)

from_descriptor_encoding(self, values)

from_df(key, df) classmethod

get_bounds(self, transform_type, values=None, reference_value=None)

to_descriptor_encoding(self, values)

to_df(self)

validate_experimental(self, values, strict=False)

validate_values(v, info) classmethod

ContinuousDescriptorInput (ContinuousInput)

to_df(self)

validate_list_lengths(self)

discrete

DiscreteInput (NumericalInput)

lower_bound: float property readonly

upper_bound: float property readonly

from_continuous(self, values)

get_bounds(self, transform_type=None, values=None, reference_value=None)

sample(self, n, seed=None)

validate_candidental(self, values)

validate_values_unique(values) classmethod

feature

Feature (BaseModel)

__lt__(self, other) special

Input (Feature)

fixed_value(self, transform_type=None)

get_bounds(self, transform_type=None, values=None, reference_value=None)

is_fixed(self)

sample(self, n, seed=None)

validate_candidental(self, values)

validate_experimental(self, values, strict=False)

Output (Feature)

validate_experimental(self, values)

get_encoded_name(feature_key, option_name)

molecular

CategoricalMolecularInput (CategoricalInput, MolecularInput)

from_descriptor_encoding(self, transform_type, values)

get_bounds(self, transform_type, values=None, reference_value=None)

validate_smiles(categories) classmethod

MolecularInput (Input)

fixed_value(self, transform_type=None)

get_bounds(self, transform_type, values, reference_value=None)

is_fixed(self)

sample(self, n, seed=None)

to_descriptor_encoding(self, transform_type, values)

validate_candidental(self, values)

`categorical`

`CategoricalInput (Input)`

`str(self)` `special`

`fixed_value(self, transform_type=None)`

`from_dummy_encoding(self, values)`

`from_onehot_encoding(self, values)`

`from_ordinal_encoding(self, values)`

`generate_allowed(allowed, info)` `classmethod`

`get_allowed_categories(self)`

`get_bounds(self, transform_type, values=None, reference_value=None)`

`get_forbidden_categories(self)`

`get_possible_categories(self, values)`

`is_fixed(self)`

`sample(self, n, seed=None)`

`to_dummy_encoding(self, values)`

`to_onehot_encoding(self, values)`

`to_ordinal_encoding(self, values)`

`validate_candidental(self, values)`

`validate_experimental(self, values, strict=False)`

`CategoricalOutput (Output)`

`validate_experimental(self, values)`

`validate_objective_categories(self)`

`continuous`

`ContinuousInput (NumericalInput)`

`str(self)` `special`

`get_bounds(self, transform_type=None, values=None, reference_value=None)`

`round(self, values)`

`sample(self, n, seed=None)`

`validate_candidental(self, values)`

`validate_lower_upper(bounds)` `classmethod`

`ContinuousOutput (Output)`

`validate_experimental(self, values)`

`descriptor`

`CategoricalDescriptorInput (CategoricalInput)`

`fixed_value(self, transform_type=None)`

`from_descriptor_encoding(self, values)`

`from_df(key, df)` `classmethod`

`get_bounds(self, transform_type, values=None, reference_value=None)`

`to_descriptor_encoding(self, values)`

`to_df(self)`

`validate_experimental(self, values, strict=False)`

`validate_values(v, info)` `classmethod`

`ContinuousDescriptorInput (ContinuousInput)`

`to_df(self)`

`validate_list_lengths(self)`

`discrete`

`DiscreteInput (NumericalInput)`

`lower_bound: float` `property` `readonly`

`upper_bound: float` `property` `readonly`

`from_continuous(self, values)`

`get_bounds(self, transform_type=None, values=None, reference_value=None)`

`sample(self, n, seed=None)`

`validate_candidental(self, values)`

`validate_values_unique(values)` `classmethod`

`feature`

`Feature (BaseModel)`

`lt(self, other)` `special`

`Input (Feature)`

`fixed_value(self, transform_type=None)`

`get_bounds(self, transform_type=None, values=None, reference_value=None)`

`is_fixed(self)`

`sample(self, n, seed=None)`

`validate_candidental(self, values)`

`validate_experimental(self, values, strict=False)`

`Output (Feature)`

`validate_experimental(self, values)`

`get_encoded_name(feature_key, option_name)`

`molecular`

`CategoricalMolecularInput (CategoricalInput, MolecularInput)`

`from_descriptor_encoding(self, transform_type, values)`

`get_bounds(self, transform_type, values=None, reference_value=None)`

`validate_smiles(categories)` `classmethod`

`MolecularInput (Input)`

`fixed_value(self, transform_type=None)`

`get_bounds(self, transform_type, values, reference_value=None)`

`is_fixed(self)`

`sample(self, n, seed=None)`

`to_descriptor_encoding(self, transform_type, values)`

`validate_candidental(self, values)`

`validate_experimental(self, values, strict=False)`