Domain
categorical
CategoricalInput (Input)
Base class for all categorical input features.
Attributes:
Name | Type | Description |
---|---|---|
categories |
List[str] |
Names of the categories. |
allowed |
List[bool] |
List of bools indicating if a category is allowed within the optimization. |
Source code in bofire/data_models/features/categorical.py
class CategoricalInput(Input):
"""Base class for all categorical input features.
Attributes:
categories (List[str]): Names of the categories.
allowed (List[bool]): List of bools indicating if a category is allowed within the optimization.
"""
type: Literal["CategoricalInput"] = "CategoricalInput" # type: ignore
# order_id: ClassVar[int] = 5
order_id: ClassVar[int] = 7
categories: CategoryVals
allowed: Optional[Annotated[List[bool], Field(min_length=2)]] = Field(
default=None,
validate_default=True,
)
@field_validator("allowed")
@classmethod
def generate_allowed(cls, allowed, info):
"""Generates the list of allowed categories if not provided."""
if allowed is None and "categories" in info.data.keys():
return [True for _ in range(len(info.data["categories"]))]
return allowed
@model_validator(mode="after")
def validate_categories_fitting_allowed(self):
if len(self.allowed) != len(self.categories): # type: ignore
raise ValueError("allowed must have same length as categories")
if sum(self.allowed) == 0: # type: ignore
raise ValueError("no category is allowed")
return self
@staticmethod
def valid_transform_types() -> List[CategoricalEncodingEnum]: # type: ignore
return [
CategoricalEncodingEnum.ONE_HOT,
CategoricalEncodingEnum.DUMMY,
CategoricalEncodingEnum.ORDINAL,
]
def is_fixed(self) -> bool:
"""Returns True if there is only one allowed category.
Returns:
[bool]: True if there is only one allowed category
"""
if self.allowed is None:
return False
return sum(self.allowed) == 1
def fixed_value(
self,
transform_type: Optional[TTransform] = None,
) -> Union[List[str], List[float], None]:
"""Returns the categories to which the feature is fixed, None if the feature is not fixed
Returns:
List[str]: List of categories or None
"""
if self.is_fixed():
val = self.get_allowed_categories()[0]
if transform_type is None:
return [val]
if transform_type == CategoricalEncodingEnum.ONE_HOT:
return self.to_onehot_encoding(pd.Series([val])).values[0].tolist()
if transform_type == CategoricalEncodingEnum.DUMMY:
return self.to_dummy_encoding(pd.Series([val])).values[0].tolist()
if transform_type == CategoricalEncodingEnum.ORDINAL:
return self.to_ordinal_encoding(pd.Series([val])).tolist()
raise ValueError(
f"Unkwon transform type {transform_type} for categorical input {self.key}",
)
return None
def get_allowed_categories(self):
"""Returns the allowed categories.
Returns:
list of str: The allowed categories
"""
if self.allowed is None:
return []
return [c for c, a in zip(self.categories, self.allowed) if a]
def validate_experimental(
self,
values: pd.Series,
strict: bool = False,
) -> pd.Series:
"""Method to validate the experimental dataFrame
Args:
values (pd.Series): A dataFrame with experiments
strict (bool, optional): Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not. Defaults to False.
Raises:
ValueError: when an entry is not in the list of allowed categories
ValueError: when there is no variation in a feature provided by the experimental data
Returns:
pd.Series: A dataFrame with experiments
"""
values = values.map(str)
if sum(values.isin(self.categories)) != len(values):
raise ValueError(
f"invalid values for `{self.key}`, allowed are: `{self.categories}`",
)
if strict:
possible_categories = self.get_possible_categories(values)
if len(possible_categories) != len(self.categories):
raise ValueError(
f"Categories {list(set(self.categories)-set(possible_categories))} of feature {self.key} not used. Remove them.",
)
return values
def validate_candidental(self, values: pd.Series) -> pd.Series:
"""Method to validate the suggested candidates
Args:
values (pd.Series): A dataFrame with candidates
Raises:
ValueError: when not all values for a feature are one of the allowed categories
Returns:
pd.Series: The passed dataFrame with candidates
"""
values = values.map(str)
if sum(values.isin(self.get_allowed_categories())) != len(values):
raise ValueError(
f"not all values of input feature `{self.key}` are a valid allowed category from {self.get_allowed_categories()}",
)
return values
def get_forbidden_categories(self):
"""Returns the non-allowed categories
Returns:
List[str]: List of the non-allowed categories
"""
return list(set(self.categories) - set(self.get_allowed_categories()))
def get_possible_categories(self, values: pd.Series) -> list:
"""Return the superset of categories that have been used in the experimental dataset and
that can be used in the optimization
Args:
values (pd.Series): Series with the values for this feature
Returns:
list: list of possible categories
"""
return sorted(set(list(set(values.tolist())) + self.get_allowed_categories()))
def to_onehot_encoding(self, values: pd.Series) -> pd.DataFrame:
"""Converts values to a one-hot encoding.
Args:
values (pd.Series): Series to be transformed.
Returns:
pd.DataFrame: One-hot transformed data frame.
"""
return pd.DataFrame(
{get_encoded_name(self.key, c): values == c for c in self.categories},
dtype=float,
index=values.index,
)
def from_onehot_encoding(self, values: pd.DataFrame) -> pd.Series:
"""Converts values back from one-hot encoding.
Args:
values (pd.DataFrame): One-hot encoded values.
Raises:
ValueError: If one-hot columns not present in `values`.
Returns:
pd.Series: Series with categorical values.
"""
cat_cols = [get_encoded_name(self.key, c) for c in self.categories]
# we allow here explicitly that the dataframe can have more columns than needed to have it
# easier in the backtransform.
if np.any([c not in values.columns for c in cat_cols]):
raise ValueError(
f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols}.",
)
s = values[cat_cols].idxmax(1).str[(len(self.key) + 1) :]
s.name = self.key
return s
def to_dummy_encoding(self, values: pd.Series) -> pd.DataFrame:
"""Converts values to a dummy-hot encoding, dropping the first categorical level.
Args:
values (pd.Series): Series to be transformed.
Returns:
pd.DataFrame: Dummy-hot transformed data frame.
"""
return pd.DataFrame(
{get_encoded_name(self.key, c): values == c for c in self.categories[1:]},
dtype=float,
index=values.index,
)
def from_dummy_encoding(self, values: pd.DataFrame) -> pd.Series:
"""Convert points back from dummy encoding.
Args:
values (pd.DataFrame): Dummy-hot encoded values.
Raises:
ValueError: If one-hot columns not present in `values`.
Returns:
pd.Series: Series with categorical values.
"""
cat_cols = [get_encoded_name(self.key, c) for c in self.categories]
# we allow here explicitly that the dataframe can have more columns than needed to have it
# easier in the backtransform.
if np.any([c not in values.columns for c in cat_cols[1:]]):
raise ValueError(
f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols[1:]}.",
)
values = values.copy()
values[cat_cols[0]] = 1 - values[cat_cols[1:]].sum(axis=1)
s = values[cat_cols].idxmax(1).str[(len(self.key) + 1) :]
s.name = self.key
return s
def to_ordinal_encoding(self, values: pd.Series) -> pd.Series:
"""Converts values to an ordinal integer based encoding.
Args:
values (pd.Series): Series to be transformed.
Returns:
pd.Series: Ordinal encoded values.
"""
enc = pd.Series(range(len(self.categories)), index=list(self.categories))
s = enc[values]
s.index = values.index
s.name = self.key
return s
def from_ordinal_encoding(self, values: pd.Series) -> pd.Series:
"""Convertes values back from ordinal encoding.
Args:
values (pd.Series): Ordinal encoded series.
Returns:
pd.Series: Series with categorical values.
"""
enc = np.array(self.categories)
return pd.Series(enc[values], index=values.index, name=self.key)
def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
"""Draw random samples from the feature.
Args:
n (int): number of samples.
seed (int, optional): random seed. Defaults to None.
Returns:
pd.Series: drawn samples.
"""
return pd.Series(
name=self.key,
data=np.random.default_rng(seed=seed).choice(
self.get_allowed_categories(),
n,
),
)
def get_bounds( # type: ignore
self,
transform_type: TTransform,
values: Optional[pd.Series] = None,
reference_value: Optional[str] = None,
) -> Tuple[List[float], List[float]]:
assert isinstance(transform_type, CategoricalEncodingEnum)
if transform_type == CategoricalEncodingEnum.ORDINAL:
return [0], [len(self.categories) - 1]
if transform_type == CategoricalEncodingEnum.ONE_HOT:
# in the case that values are None, we return the bounds
# based on the optimization bounds, else we return the true
# bounds as this is for model fitting.
if values is None:
lower = [0.0 for _ in self.categories]
upper = [
1.0 if self.allowed[i] is True else 0.0 # type: ignore
for i, _ in enumerate(self.categories)
]
else:
lower = [0.0 for _ in self.categories]
upper = [1.0 for _ in self.categories]
return lower, upper
if transform_type == CategoricalEncodingEnum.DUMMY:
lower = [0.0 for _ in range(len(self.categories) - 1)]
upper = [1.0 for _ in range(len(self.categories) - 1)]
return lower, upper
if transform_type == CategoricalEncodingEnum.DESCRIPTOR:
raise ValueError(
f"Invalid descriptor transform for categorical {self.key}.",
)
raise ValueError(
f"Invalid transform_type {transform_type} provided for categorical {self.key}.",
)
def __str__(self) -> str:
"""Returns the number of categories as str
Returns:
str: Number of categories
"""
return f"{len(self.categories)} categories"
__class_vars__
special
The names of the class variables defined on the model.
__private_attributes__
special
Metadata about the private attributes of the model.
__pydantic_complete__
special
Whether model building is completed, or if there are still undefined fields.
__pydantic_computed_fields__
special
A dictionary of computed field names and their corresponding [ComputedFieldInfo
][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_custom_init__
special
Whether the model has a custom __init__
method.
__pydantic_decorators__
special
Metadata containing the decorators defined on the model.
This replaces Model.__validators__
and Model.__root_validators__
from Pydantic V1.
__pydantic_fields__
special
A dictionary of field names and their corresponding [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
__pydantic_generic_metadata__
special
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
special
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
special
The name of the post-init method for the model, if defined.
__signature__
special
The synthesized __init__
[Signature
][inspect.Signature] of the model.
model_config
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
__str__(self)
special
Returns the number of categories as str
Returns:
Type | Description |
---|---|
str |
Number of categories |
Source code in bofire/data_models/features/categorical.py
def __str__(self) -> str:
"""Returns the number of categories as str
Returns:
str: Number of categories
"""
return f"{len(self.categories)} categories"
fixed_value(self, transform_type=None)
Returns the categories to which the feature is fixed, None if the feature is not fixed
Returns:
Type | Description |
---|---|
List[str] |
List of categories or None |
Source code in bofire/data_models/features/categorical.py
def fixed_value(
self,
transform_type: Optional[TTransform] = None,
) -> Union[List[str], List[float], None]:
"""Returns the categories to which the feature is fixed, None if the feature is not fixed
Returns:
List[str]: List of categories or None
"""
if self.is_fixed():
val = self.get_allowed_categories()[0]
if transform_type is None:
return [val]
if transform_type == CategoricalEncodingEnum.ONE_HOT:
return self.to_onehot_encoding(pd.Series([val])).values[0].tolist()
if transform_type == CategoricalEncodingEnum.DUMMY:
return self.to_dummy_encoding(pd.Series([val])).values[0].tolist()
if transform_type == CategoricalEncodingEnum.ORDINAL:
return self.to_ordinal_encoding(pd.Series([val])).tolist()
raise ValueError(
f"Unkwon transform type {transform_type} for categorical input {self.key}",
)
return None
from_dummy_encoding(self, values)
Convert points back from dummy encoding.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.DataFrame |
Dummy-hot encoded values. |
required |
Exceptions:
Type | Description |
---|---|
ValueError |
If one-hot columns not present in |
Returns:
Type | Description |
---|---|
pd.Series |
Series with categorical values. |
Source code in bofire/data_models/features/categorical.py
def from_dummy_encoding(self, values: pd.DataFrame) -> pd.Series:
"""Convert points back from dummy encoding.
Args:
values (pd.DataFrame): Dummy-hot encoded values.
Raises:
ValueError: If one-hot columns not present in `values`.
Returns:
pd.Series: Series with categorical values.
"""
cat_cols = [get_encoded_name(self.key, c) for c in self.categories]
# we allow here explicitly that the dataframe can have more columns than needed to have it
# easier in the backtransform.
if np.any([c not in values.columns for c in cat_cols[1:]]):
raise ValueError(
f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols[1:]}.",
)
values = values.copy()
values[cat_cols[0]] = 1 - values[cat_cols[1:]].sum(axis=1)
s = values[cat_cols].idxmax(1).str[(len(self.key) + 1) :]
s.name = self.key
return s
from_onehot_encoding(self, values)
Converts values back from one-hot encoding.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.DataFrame |
One-hot encoded values. |
required |
Exceptions:
Type | Description |
---|---|
ValueError |
If one-hot columns not present in |
Returns:
Type | Description |
---|---|
pd.Series |
Series with categorical values. |
Source code in bofire/data_models/features/categorical.py
def from_onehot_encoding(self, values: pd.DataFrame) -> pd.Series:
"""Converts values back from one-hot encoding.
Args:
values (pd.DataFrame): One-hot encoded values.
Raises:
ValueError: If one-hot columns not present in `values`.
Returns:
pd.Series: Series with categorical values.
"""
cat_cols = [get_encoded_name(self.key, c) for c in self.categories]
# we allow here explicitly that the dataframe can have more columns than needed to have it
# easier in the backtransform.
if np.any([c not in values.columns for c in cat_cols]):
raise ValueError(
f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols}.",
)
s = values[cat_cols].idxmax(1).str[(len(self.key) + 1) :]
s.name = self.key
return s
from_ordinal_encoding(self, values)
Convertes values back from ordinal encoding.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
Ordinal encoded series. |
required |
Returns:
Type | Description |
---|---|
pd.Series |
Series with categorical values. |
Source code in bofire/data_models/features/categorical.py
def from_ordinal_encoding(self, values: pd.Series) -> pd.Series:
"""Convertes values back from ordinal encoding.
Args:
values (pd.Series): Ordinal encoded series.
Returns:
pd.Series: Series with categorical values.
"""
enc = np.array(self.categories)
return pd.Series(enc[values], index=values.index, name=self.key)
generate_allowed(allowed, info)
classmethod
Generates the list of allowed categories if not provided.
Source code in bofire/data_models/features/categorical.py
@field_validator("allowed")
@classmethod
def generate_allowed(cls, allowed, info):
"""Generates the list of allowed categories if not provided."""
if allowed is None and "categories" in info.data.keys():
return [True for _ in range(len(info.data["categories"]))]
return allowed
get_allowed_categories(self)
Returns the allowed categories.
Returns:
Type | Description |
---|---|
list of str |
The allowed categories |
Source code in bofire/data_models/features/categorical.py
def get_allowed_categories(self):
"""Returns the allowed categories.
Returns:
list of str: The allowed categories
"""
if self.allowed is None:
return []
return [c for c, a in zip(self.categories, self.allowed) if a]
get_bounds(self, transform_type, values=None, reference_value=None)
Returns the bounds of an input feature depending on the requested transform type.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
transform_type |
Optional[TTransform] |
The requested transform type. Defaults to None. |
required |
values |
Optional[pd.Series] |
If values are provided the bounds are returned taking the most extreme values for the feature into account. Defaults to None. |
None |
reference_value |
Optional[float] |
If a reference value is provided, then the local bounds based on a local search region are provided. Currently only supported for continuous inputs. For more details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf. |
None |
Returns:
Type | Description |
---|---|
Tuple[List[float], List[float]] |
List of lower bound values, list of upper bound values. |
Source code in bofire/data_models/features/categorical.py
def get_bounds( # type: ignore
self,
transform_type: TTransform,
values: Optional[pd.Series] = None,
reference_value: Optional[str] = None,
) -> Tuple[List[float], List[float]]:
assert isinstance(transform_type, CategoricalEncodingEnum)
if transform_type == CategoricalEncodingEnum.ORDINAL:
return [0], [len(self.categories) - 1]
if transform_type == CategoricalEncodingEnum.ONE_HOT:
# in the case that values are None, we return the bounds
# based on the optimization bounds, else we return the true
# bounds as this is for model fitting.
if values is None:
lower = [0.0 for _ in self.categories]
upper = [
1.0 if self.allowed[i] is True else 0.0 # type: ignore
for i, _ in enumerate(self.categories)
]
else:
lower = [0.0 for _ in self.categories]
upper = [1.0 for _ in self.categories]
return lower, upper
if transform_type == CategoricalEncodingEnum.DUMMY:
lower = [0.0 for _ in range(len(self.categories) - 1)]
upper = [1.0 for _ in range(len(self.categories) - 1)]
return lower, upper
if transform_type == CategoricalEncodingEnum.DESCRIPTOR:
raise ValueError(
f"Invalid descriptor transform for categorical {self.key}.",
)
raise ValueError(
f"Invalid transform_type {transform_type} provided for categorical {self.key}.",
)
get_forbidden_categories(self)
Returns the non-allowed categories
Returns:
Type | Description |
---|---|
List[str] |
List of the non-allowed categories |
Source code in bofire/data_models/features/categorical.py
def get_forbidden_categories(self):
"""Returns the non-allowed categories
Returns:
List[str]: List of the non-allowed categories
"""
return list(set(self.categories) - set(self.get_allowed_categories()))
get_possible_categories(self, values)
Return the superset of categories that have been used in the experimental dataset and that can be used in the optimization
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
Series with the values for this feature |
required |
Returns:
Type | Description |
---|---|
list |
list of possible categories |
Source code in bofire/data_models/features/categorical.py
def get_possible_categories(self, values: pd.Series) -> list:
"""Return the superset of categories that have been used in the experimental dataset and
that can be used in the optimization
Args:
values (pd.Series): Series with the values for this feature
Returns:
list: list of possible categories
"""
return sorted(set(list(set(values.tolist())) + self.get_allowed_categories()))
is_fixed(self)
Returns True if there is only one allowed category.
Returns:
Type | Description |
---|---|
[bool] |
True if there is only one allowed category |
Source code in bofire/data_models/features/categorical.py
def is_fixed(self) -> bool:
"""Returns True if there is only one allowed category.
Returns:
[bool]: True if there is only one allowed category
"""
if self.allowed is None:
return False
return sum(self.allowed) == 1
sample(self, n, seed=None)
Draw random samples from the feature.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n |
int |
number of samples. |
required |
seed |
int |
random seed. Defaults to None. |
None |
Returns:
Type | Description |
---|---|
pd.Series |
drawn samples. |
Source code in bofire/data_models/features/categorical.py
def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
"""Draw random samples from the feature.
Args:
n (int): number of samples.
seed (int, optional): random seed. Defaults to None.
Returns:
pd.Series: drawn samples.
"""
return pd.Series(
name=self.key,
data=np.random.default_rng(seed=seed).choice(
self.get_allowed_categories(),
n,
),
)
to_dummy_encoding(self, values)
Converts values to a dummy-hot encoding, dropping the first categorical level.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
Series to be transformed. |
required |
Returns:
Type | Description |
---|---|
pd.DataFrame |
Dummy-hot transformed data frame. |
Source code in bofire/data_models/features/categorical.py
def to_dummy_encoding(self, values: pd.Series) -> pd.DataFrame:
"""Converts values to a dummy-hot encoding, dropping the first categorical level.
Args:
values (pd.Series): Series to be transformed.
Returns:
pd.DataFrame: Dummy-hot transformed data frame.
"""
return pd.DataFrame(
{get_encoded_name(self.key, c): values == c for c in self.categories[1:]},
dtype=float,
index=values.index,
)
to_onehot_encoding(self, values)
Converts values to a one-hot encoding.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
Series to be transformed. |
required |
Returns:
Type | Description |
---|---|
pd.DataFrame |
One-hot transformed data frame. |
Source code in bofire/data_models/features/categorical.py
def to_onehot_encoding(self, values: pd.Series) -> pd.DataFrame:
"""Converts values to a one-hot encoding.
Args:
values (pd.Series): Series to be transformed.
Returns:
pd.DataFrame: One-hot transformed data frame.
"""
return pd.DataFrame(
{get_encoded_name(self.key, c): values == c for c in self.categories},
dtype=float,
index=values.index,
)
to_ordinal_encoding(self, values)
Converts values to an ordinal integer based encoding.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
Series to be transformed. |
required |
Returns:
Type | Description |
---|---|
pd.Series |
Ordinal encoded values. |
Source code in bofire/data_models/features/categorical.py
def to_ordinal_encoding(self, values: pd.Series) -> pd.Series:
"""Converts values to an ordinal integer based encoding.
Args:
values (pd.Series): Series to be transformed.
Returns:
pd.Series: Ordinal encoded values.
"""
enc = pd.Series(range(len(self.categories)), index=list(self.categories))
s = enc[values]
s.index = values.index
s.name = self.key
return s
validate_candidental(self, values)
Method to validate the suggested candidates
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
A dataFrame with candidates |
required |
Exceptions:
Type | Description |
---|---|
ValueError |
when not all values for a feature are one of the allowed categories |
Returns:
Type | Description |
---|---|
pd.Series |
The passed dataFrame with candidates |
Source code in bofire/data_models/features/categorical.py
def validate_candidental(self, values: pd.Series) -> pd.Series:
"""Method to validate the suggested candidates
Args:
values (pd.Series): A dataFrame with candidates
Raises:
ValueError: when not all values for a feature are one of the allowed categories
Returns:
pd.Series: The passed dataFrame with candidates
"""
values = values.map(str)
if sum(values.isin(self.get_allowed_categories())) != len(values):
raise ValueError(
f"not all values of input feature `{self.key}` are a valid allowed category from {self.get_allowed_categories()}",
)
return values
validate_experimental(self, values, strict=False)
Method to validate the experimental dataFrame
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
A dataFrame with experiments |
required |
strict |
bool |
Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not. Defaults to False. |
False |
Exceptions:
Type | Description |
---|---|
ValueError |
when an entry is not in the list of allowed categories |
ValueError |
when there is no variation in a feature provided by the experimental data |
Returns:
Type | Description |
---|---|
pd.Series |
A dataFrame with experiments |
Source code in bofire/data_models/features/categorical.py
def validate_experimental(
self,
values: pd.Series,
strict: bool = False,
) -> pd.Series:
"""Method to validate the experimental dataFrame
Args:
values (pd.Series): A dataFrame with experiments
strict (bool, optional): Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not. Defaults to False.
Raises:
ValueError: when an entry is not in the list of allowed categories
ValueError: when there is no variation in a feature provided by the experimental data
Returns:
pd.Series: A dataFrame with experiments
"""
values = values.map(str)
if sum(values.isin(self.categories)) != len(values):
raise ValueError(
f"invalid values for `{self.key}`, allowed are: `{self.categories}`",
)
if strict:
possible_categories = self.get_possible_categories(values)
if len(possible_categories) != len(self.categories):
raise ValueError(
f"Categories {list(set(self.categories)-set(possible_categories))} of feature {self.key} not used. Remove them.",
)
return values
CategoricalOutput (Output)
Source code in bofire/data_models/features/categorical.py
class CategoricalOutput(Output):
type: Literal["CategoricalOutput"] = "CategoricalOutput" # type: ignore
order_id: ClassVar[int] = 10
categories: CategoryVals
objective: AnyCategoricalObjective
@model_validator(mode="after")
def validate_objective_categories(self):
"""Validates that objective categories match the output categories
Raises:
ValueError: when categories do not match objective categories
Returns:
self
"""
if self.objective.categories != self.categories:
raise ValueError("categories must match to objective categories")
return self
def __call__(self, values: pd.Series, values_adapt: pd.Series) -> pd.Series: # type: ignore
if self.objective is None:
return pd.Series(
data=[np.nan for _ in range(len(values))],
index=values.index,
name=values.name,
)
return self.objective(values, values_adapt) # type: ignore
def validate_experimental(self, values: pd.Series) -> pd.Series:
values = values.map(str)
if sum(values.isin(self.categories)) != len(values):
raise ValueError(
f"invalid values for `{self.key}`, allowed are: `{self.categories}`",
)
return values
def __str__(self) -> str:
return "CategoricalOutputFeature"
__class_vars__
special
The names of the class variables defined on the model.
__private_attributes__
special
Metadata about the private attributes of the model.
__pydantic_complete__
special
Whether model building is completed, or if there are still undefined fields.
__pydantic_computed_fields__
special
A dictionary of computed field names and their corresponding [ComputedFieldInfo
][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_custom_init__
special
Whether the model has a custom __init__
method.
__pydantic_decorators__
special
Metadata containing the decorators defined on the model.
This replaces Model.__validators__
and Model.__root_validators__
from Pydantic V1.
__pydantic_fields__
special
A dictionary of field names and their corresponding [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
__pydantic_generic_metadata__
special
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
special
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
special
The name of the post-init method for the model, if defined.
__signature__
special
The synthesized __init__
[Signature
][inspect.Signature] of the model.
model_config
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
validate_experimental(self, values)
Abstract method to validate the experimental Series
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
A dataFrame with values for the outcome |
required |
Returns:
Type | Description |
---|---|
pd.Series |
The passed dataFrame with experiments |
Source code in bofire/data_models/features/categorical.py
def validate_experimental(self, values: pd.Series) -> pd.Series:
values = values.map(str)
if sum(values.isin(self.categories)) != len(values):
raise ValueError(
f"invalid values for `{self.key}`, allowed are: `{self.categories}`",
)
return values
validate_objective_categories(self)
Validates that objective categories match the output categories
Exceptions:
Type | Description |
---|---|
ValueError |
when categories do not match objective categories |
Returns:
Type | Description |
---|---|
self |
Source code in bofire/data_models/features/categorical.py
@model_validator(mode="after")
def validate_objective_categories(self):
"""Validates that objective categories match the output categories
Raises:
ValueError: when categories do not match objective categories
Returns:
self
"""
if self.objective.categories != self.categories:
raise ValueError("categories must match to objective categories")
return self
continuous
ContinuousInput (NumericalInput)
Base class for all continuous input features.
Attributes:
Name | Type | Description |
---|---|---|
bounds |
Tuple[float, float] |
A tuple that stores the lower and upper bound of the feature. |
stepsize |
float |
Float indicating the allowed stepsize between lower and upper. Defaults to None. |
local_relative_bounds |
Tuple[float, float] |
A tuple that stores the lower and upper bounds relative to a reference value. Defaults to None. |
Source code in bofire/data_models/features/continuous.py
class ContinuousInput(NumericalInput):
"""Base class for all continuous input features.
Attributes:
bounds (Tuple[float, float]): A tuple that stores the lower and upper bound of the feature.
stepsize (float, optional): Float indicating the allowed stepsize between lower and upper. Defaults to None.
local_relative_bounds (Tuple[float, float], optional): A tuple that stores the lower and upper bounds relative to a reference value.
Defaults to None.
"""
type: Literal["ContinuousInput"] = "ContinuousInput" # type: ignore
order_id: ClassVar[int] = 1
bounds: Bounds
local_relative_bounds: Optional[
Tuple[Annotated[float, Field(gt=0)], Annotated[float, Field(gt=0)]]
] = None
stepsize: Optional[float] = None
@property
def lower_bound(self) -> float:
return self.bounds[0]
@property
def upper_bound(self) -> float:
return self.bounds[1]
@model_validator(mode="after")
def validate_step_size(self):
if self.stepsize is None:
return self
lower, upper = self.bounds
if lower == upper and self.stepsize is not None:
raise ValueError(
"Stepsize cannot be provided for a fixed continuous input.",
)
range = upper - lower
if np.arange(lower, upper + self.stepsize, self.stepsize)[-1] != upper:
raise ValueError(
f"Stepsize of {self.stepsize} does not match the provided interval [{lower},{upper}].",
)
if range // self.stepsize == 1:
raise ValueError("Stepsize is too big, only one value allowed.")
return self
def round(self, values: pd.Series) -> pd.Series:
"""Round values to the stepsize of the feature. If no stepsize is provided return the
provided values.
Args:
values (pd.Series): The values that should be rounded.
Returns:
pd.Series: The rounded values
"""
if self.stepsize is None:
return values
self.validate_candidental(values=values)
allowed_values = np.arange(
self.lower_bound,
self.upper_bound + self.stepsize,
self.stepsize,
)
idx = abs(values.values.reshape([len(values), 1]) - allowed_values).argmin( # type: ignore
axis=1,
)
return pd.Series(
data=self.lower_bound + idx * self.stepsize,
index=values.index,
)
def validate_candidental(self, values: pd.Series) -> pd.Series:
"""Method to validate the suggested candidates
Args:
values (pd.Series): A dataFrame with candidates
Raises:
ValueError: when non numerical values are passed
ValueError: when values are larger than the upper bound of the feature
ValueError: when values are lower than the lower bound of the feature
Returns:
pd.Series: The passed dataFrame with candidates
"""
noise = 10e-6
values = super().validate_candidental(values)
if (values < self.lower_bound - noise).any():
raise ValueError(
f"not all values of input feature `{self.key}`are larger than lower bound `{self.lower_bound}` ",
)
if (values > self.upper_bound + noise).any():
raise ValueError(
f"not all values of input feature `{self.key}`are smaller than upper bound `{self.upper_bound}` ",
)
return values
def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
"""Draw random samples from the feature.
Args:
n (int): number of samples.
seed (int, optional): random seed. Defaults to None.
Returns:
pd.Series: drawn samples.
"""
return pd.Series(
name=self.key,
data=np.random.default_rng(seed=seed).uniform(
self.lower_bound,
self.upper_bound,
n,
),
)
def get_bounds( # type: ignore
self,
transform_type: Optional[TTransform] = None,
values: Optional[pd.Series] = None,
reference_value: Optional[float] = None,
) -> Tuple[List[float], List[float]]:
assert transform_type is None
if reference_value is not None and values is not None:
raise ValueError("Only one can be used, `local_value` or `values`.")
if values is None:
if reference_value is None or self.is_fixed():
return [self.lower_bound], [self.upper_bound]
local_relative_bounds = self.local_relative_bounds or (
math.inf,
math.inf,
)
return [
max(
reference_value - local_relative_bounds[0],
self.lower_bound,
),
], [
min(
reference_value + local_relative_bounds[1],
self.upper_bound,
),
]
lower = min(self.lower_bound, values.min())
upper = max(self.upper_bound, values.max())
return [lower], [upper]
def __str__(self) -> str:
"""Method to return a string of lower and upper bound
Returns:
str: String of a list with lower and upper bound
"""
return f"[{self.lower_bound},{self.upper_bound}]"
__class_vars__
special
The names of the class variables defined on the model.
__private_attributes__
special
Metadata about the private attributes of the model.
__pydantic_complete__
special
Whether model building is completed, or if there are still undefined fields.
__pydantic_computed_fields__
special
A dictionary of computed field names and their corresponding [ComputedFieldInfo
][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_custom_init__
special
Whether the model has a custom __init__
method.
__pydantic_decorators__
special
Metadata containing the decorators defined on the model.
This replaces Model.__validators__
and Model.__root_validators__
from Pydantic V1.
__pydantic_fields__
special
A dictionary of field names and their corresponding [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
__pydantic_generic_metadata__
special
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
special
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
special
The name of the post-init method for the model, if defined.
__signature__
special
The synthesized __init__
[Signature
][inspect.Signature] of the model.
model_config
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
__str__(self)
special
Method to return a string of lower and upper bound
Returns:
Type | Description |
---|---|
str |
String of a list with lower and upper bound |
Source code in bofire/data_models/features/continuous.py
def __str__(self) -> str:
"""Method to return a string of lower and upper bound
Returns:
str: String of a list with lower and upper bound
"""
return f"[{self.lower_bound},{self.upper_bound}]"
get_bounds(self, transform_type=None, values=None, reference_value=None)
Returns the bounds of an input feature depending on the requested transform type.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
transform_type |
Optional[TTransform] |
The requested transform type. Defaults to None. |
None |
values |
Optional[pd.Series] |
If values are provided the bounds are returned taking the most extreme values for the feature into account. Defaults to None. |
None |
reference_value |
Optional[float] |
If a reference value is provided, then the local bounds based on a local search region are provided. Currently only supported for continuous inputs. For more details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf. |
None |
Returns:
Type | Description |
---|---|
Tuple[List[float], List[float]] |
List of lower bound values, list of upper bound values. |
Source code in bofire/data_models/features/continuous.py
def get_bounds( # type: ignore
self,
transform_type: Optional[TTransform] = None,
values: Optional[pd.Series] = None,
reference_value: Optional[float] = None,
) -> Tuple[List[float], List[float]]:
assert transform_type is None
if reference_value is not None and values is not None:
raise ValueError("Only one can be used, `local_value` or `values`.")
if values is None:
if reference_value is None or self.is_fixed():
return [self.lower_bound], [self.upper_bound]
local_relative_bounds = self.local_relative_bounds or (
math.inf,
math.inf,
)
return [
max(
reference_value - local_relative_bounds[0],
self.lower_bound,
),
], [
min(
reference_value + local_relative_bounds[1],
self.upper_bound,
),
]
lower = min(self.lower_bound, values.min())
upper = max(self.upper_bound, values.max())
return [lower], [upper]
round(self, values)
Round values to the stepsize of the feature. If no stepsize is provided return the provided values.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
The values that should be rounded. |
required |
Returns:
Type | Description |
---|---|
pd.Series |
The rounded values |
Source code in bofire/data_models/features/continuous.py
def round(self, values: pd.Series) -> pd.Series:
"""Round values to the stepsize of the feature. If no stepsize is provided return the
provided values.
Args:
values (pd.Series): The values that should be rounded.
Returns:
pd.Series: The rounded values
"""
if self.stepsize is None:
return values
self.validate_candidental(values=values)
allowed_values = np.arange(
self.lower_bound,
self.upper_bound + self.stepsize,
self.stepsize,
)
idx = abs(values.values.reshape([len(values), 1]) - allowed_values).argmin( # type: ignore
axis=1,
)
return pd.Series(
data=self.lower_bound + idx * self.stepsize,
index=values.index,
)
sample(self, n, seed=None)
Draw random samples from the feature.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n |
int |
number of samples. |
required |
seed |
int |
random seed. Defaults to None. |
None |
Returns:
Type | Description |
---|---|
pd.Series |
drawn samples. |
Source code in bofire/data_models/features/continuous.py
def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
"""Draw random samples from the feature.
Args:
n (int): number of samples.
seed (int, optional): random seed. Defaults to None.
Returns:
pd.Series: drawn samples.
"""
return pd.Series(
name=self.key,
data=np.random.default_rng(seed=seed).uniform(
self.lower_bound,
self.upper_bound,
n,
),
)
validate_candidental(self, values)
Method to validate the suggested candidates
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
A dataFrame with candidates |
required |
Exceptions:
Type | Description |
---|---|
ValueError |
when non numerical values are passed |
ValueError |
when values are larger than the upper bound of the feature |
ValueError |
when values are lower than the lower bound of the feature |
Returns:
Type | Description |
---|---|
pd.Series |
The passed dataFrame with candidates |
Source code in bofire/data_models/features/continuous.py
def validate_candidental(self, values: pd.Series) -> pd.Series:
"""Method to validate the suggested candidates
Args:
values (pd.Series): A dataFrame with candidates
Raises:
ValueError: when non numerical values are passed
ValueError: when values are larger than the upper bound of the feature
ValueError: when values are lower than the lower bound of the feature
Returns:
pd.Series: The passed dataFrame with candidates
"""
noise = 10e-6
values = super().validate_candidental(values)
if (values < self.lower_bound - noise).any():
raise ValueError(
f"not all values of input feature `{self.key}`are larger than lower bound `{self.lower_bound}` ",
)
if (values > self.upper_bound + noise).any():
raise ValueError(
f"not all values of input feature `{self.key}`are smaller than upper bound `{self.upper_bound}` ",
)
return values
ContinuousOutput (Output)
The base class for a continuous output feature
Attributes:
Name | Type | Description |
---|---|---|
objective |
objective |
objective of the feature indicating in which direction it should be optimized. Defaults to |
Source code in bofire/data_models/features/continuous.py
class ContinuousOutput(Output):
"""The base class for a continuous output feature
Attributes:
objective (objective, optional): objective of the feature indicating in which direction it should be optimized. Defaults to `MaximizeObjective`.
"""
type: Literal["ContinuousOutput"] = "ContinuousOutput" # type: ignore
order_id: ClassVar[int] = 9
unit: Optional[str] = None
objective: Optional[AnyObjective] = Field(
default_factory=lambda: MaximizeObjective(w=1.0),
)
def __call__(self, values: pd.Series, values_adapt: pd.Series) -> pd.Series: # type: ignore
if self.objective is None:
return pd.Series(
data=[np.nan for _ in range(len(values))],
index=values.index,
name=values.name,
)
return self.objective(values, values_adapt) # type: ignore
def validate_experimental(self, values: pd.Series) -> pd.Series:
try:
values = pd.to_numeric(values, errors="raise").astype("float64")
except ValueError:
raise ValueError(
f"not all values of input feature `{self.key}` are numerical",
)
return values
def __str__(self) -> str:
return "ContinuousOutputFeature"
__class_vars__
special
The names of the class variables defined on the model.
__private_attributes__
special
Metadata about the private attributes of the model.
__pydantic_complete__
special
Whether model building is completed, or if there are still undefined fields.
__pydantic_computed_fields__
special
A dictionary of computed field names and their corresponding [ComputedFieldInfo
][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_custom_init__
special
Whether the model has a custom __init__
method.
__pydantic_decorators__
special
Metadata containing the decorators defined on the model.
This replaces Model.__validators__
and Model.__root_validators__
from Pydantic V1.
__pydantic_fields__
special
A dictionary of field names and their corresponding [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
__pydantic_generic_metadata__
special
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
special
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
special
The name of the post-init method for the model, if defined.
__signature__
special
The synthesized __init__
[Signature
][inspect.Signature] of the model.
model_config
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
validate_experimental(self, values)
Abstract method to validate the experimental Series
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
A dataFrame with values for the outcome |
required |
Returns:
Type | Description |
---|---|
pd.Series |
The passed dataFrame with experiments |
Source code in bofire/data_models/features/continuous.py
def validate_experimental(self, values: pd.Series) -> pd.Series:
try:
values = pd.to_numeric(values, errors="raise").astype("float64")
except ValueError:
raise ValueError(
f"not all values of input feature `{self.key}` are numerical",
)
return values
descriptor
CategoricalDescriptorInput (CategoricalInput)
Class for categorical input features with descriptors
Attributes:
Name | Type | Description |
---|---|---|
categories |
List[str] |
Names of the categories. |
allowed |
List[bool] |
List of bools indicating if a category is allowed within the optimization. |
descriptors |
List[str] |
List of strings representing the names of the descriptors. |
values |
List[List[float]] |
List of lists representing the descriptor values. |
Source code in bofire/data_models/features/descriptor.py
class CategoricalDescriptorInput(CategoricalInput):
"""Class for categorical input features with descriptors
Attributes:
categories (List[str]): Names of the categories.
allowed (List[bool]): List of bools indicating if a category is allowed within the optimization.
descriptors (List[str]): List of strings representing the names of the descriptors.
values (List[List[float]]): List of lists representing the descriptor values.
"""
type: Literal["CategoricalDescriptorInput"] = "CategoricalDescriptorInput"
order_id: ClassVar[int] = 6
descriptors: Descriptors
values: Annotated[
List[List[float]],
Field(min_length=1),
]
@field_validator("values")
@classmethod
def validate_values(cls, v, info):
"""Validates the compatibility of passed values for the descriptors and the defined categories
Args:
v (List[List[float]]): Nested list with descriptor values
values (Dict): Dictionary with attributes
Raises:
ValueError: when values have different length than categories
ValueError: when rows in values have different length than descriptors
ValueError: when a descriptor shows no variance in the data
Returns:
List[List[float]]: Nested list with descriptor values
"""
if len(v) != len(info.data["categories"]):
raise ValueError("values must have same length as categories")
for row in v:
if len(row) != len(info.data["descriptors"]):
raise ValueError("rows in values must have same length as descriptors")
a = np.array(v)
for i, d in enumerate(info.data["descriptors"]):
if len(set(a[:, i])) == 1:
raise ValueError(f"No variation for descriptor {d}.")
return v
@staticmethod
def valid_transform_types() -> List[CategoricalEncodingEnum]:
return [
CategoricalEncodingEnum.ONE_HOT,
CategoricalEncodingEnum.DUMMY,
CategoricalEncodingEnum.ORDINAL,
CategoricalEncodingEnum.DESCRIPTOR,
]
def to_df(self):
"""Tabular overview of the feature as DataFrame
Returns:
pd.DataFrame: tabular overview of the feature as DataFrame
"""
data = dict(zip(self.categories, self.values))
return pd.DataFrame.from_dict(data, orient="index", columns=self.descriptors)
def fixed_value(
self,
transform_type: Optional[TTransform] = None,
) -> Union[List[str], List[float], None]:
"""Returns the categories to which the feature is fixed, None if the feature is not fixed
Returns:
List[str]: List of categories or None
"""
if transform_type != CategoricalEncodingEnum.DESCRIPTOR:
return super().fixed_value(transform_type)
val = self.get_allowed_categories()[0]
return self.to_descriptor_encoding(pd.Series([val])).values[0].tolist()
def get_bounds(
self,
transform_type: TTransform,
values: Optional[pd.Series] = None,
reference_value: Optional[str] = None,
) -> Tuple[List[float], List[float]]:
if transform_type != CategoricalEncodingEnum.DESCRIPTOR:
return super().get_bounds(transform_type, values)
# in case that values is None, we return the optimization bounds
# else we return the complete bounds
if values is None:
df = self.to_df().loc[self.get_allowed_categories()]
else:
df = self.to_df()
lower = df.min().values.tolist()
upper = df.max().values.tolist()
return lower, upper
def validate_experimental(
self,
values: pd.Series,
strict: bool = False,
) -> pd.Series:
"""Method to validate the experimental dataFrame
Args:
values (pd.Series): A dataFrame with experiments
strict (bool, optional): Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not. Defaults to False.
Raises:
ValueError: when an entry is not in the list of allowed categories
ValueError: when there is no variation in a feature provided by the experimental data
ValueError: when no variation is present or planned for a given descriptor
Returns:
pd.Series: A dataFrame with experiments
"""
values = super().validate_experimental(values, strict)
if strict:
lower, upper = self.get_bounds(
transform_type=CategoricalEncodingEnum.DESCRIPTOR,
values=values,
)
for i, desc in enumerate(self.descriptors):
if lower[i] == upper[i]:
raise ValueError(
f"No variation present or planned for descriptor {desc} for feature {self.key}. Remove the descriptor.",
)
return values
@classmethod
def from_df(cls, key: str, df: pd.DataFrame):
"""Creates a feature from a dataframe
Args:
key (str): The name of the feature
df (pd.DataFrame): Categories as rows and descriptors as columns
Returns:
_type_: _description_
"""
return cls(
key=key,
categories=list(df.index),
allowed=[True for _ in range(len(df))],
descriptors=list(df.columns),
values=df.values.tolist(),
)
def to_descriptor_encoding(self, values: pd.Series) -> pd.DataFrame:
"""Converts values to descriptor encoding.
Args:
values (pd.Series): Values to transform.
Returns:
pd.DataFrame: Descriptor encoded dataframe.
"""
return pd.DataFrame(
data=values.map(dict(zip(self.categories, self.values))).values.tolist(),
columns=[get_encoded_name(self.key, d) for d in self.descriptors],
index=values.index,
)
def from_descriptor_encoding(self, values: pd.DataFrame) -> pd.Series:
"""Converts values back from descriptor encoding.
Args:
values (pd.DataFrame): Descriptor encoded dataframe.
Raises:
ValueError: If descriptor columns not found in the dataframe.
Returns:
pd.Series: Series with categorical values.
"""
cat_cols = [get_encoded_name(self.key, d) for d in self.descriptors]
# we allow here explicitly that the dataframe can have more columns than needed to have it
# easier in the backtransform.
if np.any([c not in values.columns for c in cat_cols]):
raise ValueError(
f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols}.",
)
s = pd.DataFrame(
data=np.sqrt(
np.sum(
(
values[cat_cols].to_numpy()[:, np.newaxis, :]
- self.to_df().iloc[self.allowed].to_numpy()
)
** 2,
axis=2,
),
),
columns=self.get_allowed_categories(),
index=values.index,
).idxmin(1)
s.name = self.key
return s
__class_vars__
special
The names of the class variables defined on the model.
__private_attributes__
special
Metadata about the private attributes of the model.
__pydantic_complete__
special
Whether model building is completed, or if there are still undefined fields.
__pydantic_computed_fields__
special
A dictionary of computed field names and their corresponding [ComputedFieldInfo
][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_custom_init__
special
Whether the model has a custom __init__
method.
__pydantic_decorators__
special
Metadata containing the decorators defined on the model.
This replaces Model.__validators__
and Model.__root_validators__
from Pydantic V1.
__pydantic_fields__
special
A dictionary of field names and their corresponding [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
__pydantic_generic_metadata__
special
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
special
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
special
The name of the post-init method for the model, if defined.
__signature__
special
The synthesized __init__
[Signature
][inspect.Signature] of the model.
model_config
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
fixed_value(self, transform_type=None)
Returns the categories to which the feature is fixed, None if the feature is not fixed
Returns:
Type | Description |
---|---|
List[str] |
List of categories or None |
Source code in bofire/data_models/features/descriptor.py
def fixed_value(
self,
transform_type: Optional[TTransform] = None,
) -> Union[List[str], List[float], None]:
"""Returns the categories to which the feature is fixed, None if the feature is not fixed
Returns:
List[str]: List of categories or None
"""
if transform_type != CategoricalEncodingEnum.DESCRIPTOR:
return super().fixed_value(transform_type)
val = self.get_allowed_categories()[0]
return self.to_descriptor_encoding(pd.Series([val])).values[0].tolist()
from_descriptor_encoding(self, values)
Converts values back from descriptor encoding.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.DataFrame |
Descriptor encoded dataframe. |
required |
Exceptions:
Type | Description |
---|---|
ValueError |
If descriptor columns not found in the dataframe. |
Returns:
Type | Description |
---|---|
pd.Series |
Series with categorical values. |
Source code in bofire/data_models/features/descriptor.py
def from_descriptor_encoding(self, values: pd.DataFrame) -> pd.Series:
"""Converts values back from descriptor encoding.
Args:
values (pd.DataFrame): Descriptor encoded dataframe.
Raises:
ValueError: If descriptor columns not found in the dataframe.
Returns:
pd.Series: Series with categorical values.
"""
cat_cols = [get_encoded_name(self.key, d) for d in self.descriptors]
# we allow here explicitly that the dataframe can have more columns than needed to have it
# easier in the backtransform.
if np.any([c not in values.columns for c in cat_cols]):
raise ValueError(
f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols}.",
)
s = pd.DataFrame(
data=np.sqrt(
np.sum(
(
values[cat_cols].to_numpy()[:, np.newaxis, :]
- self.to_df().iloc[self.allowed].to_numpy()
)
** 2,
axis=2,
),
),
columns=self.get_allowed_categories(),
index=values.index,
).idxmin(1)
s.name = self.key
return s
from_df(key, df)
classmethod
Creates a feature from a dataframe
Parameters:
Name | Type | Description | Default |
---|---|---|---|
key |
str |
The name of the feature |
required |
df |
pd.DataFrame |
Categories as rows and descriptors as columns |
required |
Returns:
Type | Description |
---|---|
_type_ |
description |
Source code in bofire/data_models/features/descriptor.py
@classmethod
def from_df(cls, key: str, df: pd.DataFrame):
"""Creates a feature from a dataframe
Args:
key (str): The name of the feature
df (pd.DataFrame): Categories as rows and descriptors as columns
Returns:
_type_: _description_
"""
return cls(
key=key,
categories=list(df.index),
allowed=[True for _ in range(len(df))],
descriptors=list(df.columns),
values=df.values.tolist(),
)
get_bounds(self, transform_type, values=None, reference_value=None)
Returns the bounds of an input feature depending on the requested transform type.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
transform_type |
Optional[TTransform] |
The requested transform type. Defaults to None. |
required |
values |
Optional[pd.Series] |
If values are provided the bounds are returned taking the most extreme values for the feature into account. Defaults to None. |
None |
reference_value |
Optional[float] |
If a reference value is provided, then the local bounds based on a local search region are provided. Currently only supported for continuous inputs. For more details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf. |
None |
Returns:
Type | Description |
---|---|
Tuple[List[float], List[float]] |
List of lower bound values, list of upper bound values. |
Source code in bofire/data_models/features/descriptor.py
def get_bounds(
self,
transform_type: TTransform,
values: Optional[pd.Series] = None,
reference_value: Optional[str] = None,
) -> Tuple[List[float], List[float]]:
if transform_type != CategoricalEncodingEnum.DESCRIPTOR:
return super().get_bounds(transform_type, values)
# in case that values is None, we return the optimization bounds
# else we return the complete bounds
if values is None:
df = self.to_df().loc[self.get_allowed_categories()]
else:
df = self.to_df()
lower = df.min().values.tolist()
upper = df.max().values.tolist()
return lower, upper
to_descriptor_encoding(self, values)
Converts values to descriptor encoding.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
Values to transform. |
required |
Returns:
Type | Description |
---|---|
pd.DataFrame |
Descriptor encoded dataframe. |
Source code in bofire/data_models/features/descriptor.py
def to_descriptor_encoding(self, values: pd.Series) -> pd.DataFrame:
"""Converts values to descriptor encoding.
Args:
values (pd.Series): Values to transform.
Returns:
pd.DataFrame: Descriptor encoded dataframe.
"""
return pd.DataFrame(
data=values.map(dict(zip(self.categories, self.values))).values.tolist(),
columns=[get_encoded_name(self.key, d) for d in self.descriptors],
index=values.index,
)
to_df(self)
Tabular overview of the feature as DataFrame
Returns:
Type | Description |
---|---|
pd.DataFrame |
tabular overview of the feature as DataFrame |
Source code in bofire/data_models/features/descriptor.py
def to_df(self):
"""Tabular overview of the feature as DataFrame
Returns:
pd.DataFrame: tabular overview of the feature as DataFrame
"""
data = dict(zip(self.categories, self.values))
return pd.DataFrame.from_dict(data, orient="index", columns=self.descriptors)
validate_experimental(self, values, strict=False)
Method to validate the experimental dataFrame
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
A dataFrame with experiments |
required |
strict |
bool |
Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not. Defaults to False. |
False |
Exceptions:
Type | Description |
---|---|
ValueError |
when an entry is not in the list of allowed categories |
ValueError |
when there is no variation in a feature provided by the experimental data |
ValueError |
when no variation is present or planned for a given descriptor |
Returns:
Type | Description |
---|---|
pd.Series |
A dataFrame with experiments |
Source code in bofire/data_models/features/descriptor.py
def validate_experimental(
self,
values: pd.Series,
strict: bool = False,
) -> pd.Series:
"""Method to validate the experimental dataFrame
Args:
values (pd.Series): A dataFrame with experiments
strict (bool, optional): Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not. Defaults to False.
Raises:
ValueError: when an entry is not in the list of allowed categories
ValueError: when there is no variation in a feature provided by the experimental data
ValueError: when no variation is present or planned for a given descriptor
Returns:
pd.Series: A dataFrame with experiments
"""
values = super().validate_experimental(values, strict)
if strict:
lower, upper = self.get_bounds(
transform_type=CategoricalEncodingEnum.DESCRIPTOR,
values=values,
)
for i, desc in enumerate(self.descriptors):
if lower[i] == upper[i]:
raise ValueError(
f"No variation present or planned for descriptor {desc} for feature {self.key}. Remove the descriptor.",
)
return values
validate_values(v, info)
classmethod
Validates the compatibility of passed values for the descriptors and the defined categories
Parameters:
Name | Type | Description | Default |
---|---|---|---|
v |
List[List[float]] |
Nested list with descriptor values |
required |
values |
Dict |
Dictionary with attributes |
required |
Exceptions:
Type | Description |
---|---|
ValueError |
when values have different length than categories |
ValueError |
when rows in values have different length than descriptors |
ValueError |
when a descriptor shows no variance in the data |
Returns:
Type | Description |
---|---|
List[List[float]] |
Nested list with descriptor values |
Source code in bofire/data_models/features/descriptor.py
@field_validator("values")
@classmethod
def validate_values(cls, v, info):
"""Validates the compatibility of passed values for the descriptors and the defined categories
Args:
v (List[List[float]]): Nested list with descriptor values
values (Dict): Dictionary with attributes
Raises:
ValueError: when values have different length than categories
ValueError: when rows in values have different length than descriptors
ValueError: when a descriptor shows no variance in the data
Returns:
List[List[float]]: Nested list with descriptor values
"""
if len(v) != len(info.data["categories"]):
raise ValueError("values must have same length as categories")
for row in v:
if len(row) != len(info.data["descriptors"]):
raise ValueError("rows in values must have same length as descriptors")
a = np.array(v)
for i, d in enumerate(info.data["descriptors"]):
if len(set(a[:, i])) == 1:
raise ValueError(f"No variation for descriptor {d}.")
return v
ContinuousDescriptorInput (ContinuousInput)
Class for continuous input features with descriptors
Attributes:
Name | Type | Description |
---|---|---|
lower_bound |
float |
Lower bound of the feature in the optimization. |
upper_bound |
float |
Upper bound of the feature in the optimization. |
descriptors |
List[str] |
Names of the descriptors. |
values |
List[float] |
Values of the descriptors. |
Source code in bofire/data_models/features/descriptor.py
class ContinuousDescriptorInput(ContinuousInput):
"""Class for continuous input features with descriptors
Attributes:
lower_bound (float): Lower bound of the feature in the optimization.
upper_bound (float): Upper bound of the feature in the optimization.
descriptors (List[str]): Names of the descriptors.
values (List[float]): Values of the descriptors.
"""
type: Literal["ContinuousDescriptorInput"] = "ContinuousDescriptorInput"
order_id: ClassVar[int] = 2
descriptors: Descriptors
values: DiscreteVals
@model_validator(mode="after")
def validate_list_lengths(self):
"""Compares the length of the defined descriptors list with the provided values
Args:
values (Dict): Dictionary with all attributes
Raises:
ValueError: when the number of descriptors does not math the number of provided values
Returns:
Dict: Dict with the attributes
"""
if len(self.descriptors) != len(self.values):
raise ValueError(
'must provide same number of descriptors and values, got {len(values["descriptors"])} != {len(values["values"])}',
)
return self
def to_df(self) -> pd.DataFrame:
"""Tabular overview of the feature as DataFrame
Returns:
pd.DataFrame: tabular overview of the feature as DataFrame
"""
return pd.DataFrame(
data=[self.values],
index=[self.key],
columns=self.descriptors,
)
__class_vars__
special
The names of the class variables defined on the model.
__private_attributes__
special
Metadata about the private attributes of the model.
__pydantic_complete__
special
Whether model building is completed, or if there are still undefined fields.
__pydantic_computed_fields__
special
A dictionary of computed field names and their corresponding [ComputedFieldInfo
][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_custom_init__
special
Whether the model has a custom __init__
method.
__pydantic_decorators__
special
Metadata containing the decorators defined on the model.
This replaces Model.__validators__
and Model.__root_validators__
from Pydantic V1.
__pydantic_fields__
special
A dictionary of field names and their corresponding [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
__pydantic_generic_metadata__
special
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
special
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
special
The name of the post-init method for the model, if defined.
__signature__
special
The synthesized __init__
[Signature
][inspect.Signature] of the model.
model_config
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
to_df(self)
Tabular overview of the feature as DataFrame
Returns:
Type | Description |
---|---|
pd.DataFrame |
tabular overview of the feature as DataFrame |
Source code in bofire/data_models/features/descriptor.py
def to_df(self) -> pd.DataFrame:
"""Tabular overview of the feature as DataFrame
Returns:
pd.DataFrame: tabular overview of the feature as DataFrame
"""
return pd.DataFrame(
data=[self.values],
index=[self.key],
columns=self.descriptors,
)
validate_list_lengths(self)
Compares the length of the defined descriptors list with the provided values
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
Dict |
Dictionary with all attributes |
required |
Exceptions:
Type | Description |
---|---|
ValueError |
when the number of descriptors does not math the number of provided values |
Returns:
Type | Description |
---|---|
Dict |
Dict with the attributes |
Source code in bofire/data_models/features/descriptor.py
@model_validator(mode="after")
def validate_list_lengths(self):
"""Compares the length of the defined descriptors list with the provided values
Args:
values (Dict): Dictionary with all attributes
Raises:
ValueError: when the number of descriptors does not math the number of provided values
Returns:
Dict: Dict with the attributes
"""
if len(self.descriptors) != len(self.values):
raise ValueError(
'must provide same number of descriptors and values, got {len(values["descriptors"])} != {len(values["values"])}',
)
return self
discrete
DiscreteInput (NumericalInput)
Feature with discretized ordinal values allowed in the optimization.
Attributes:
Name | Type | Description |
---|---|---|
key(str) |
key of the feature. |
|
values(List[float]) |
the discretized allowed values during the optimization. |
Source code in bofire/data_models/features/discrete.py
class DiscreteInput(NumericalInput):
"""Feature with discretized ordinal values allowed in the optimization.
Attributes:
key(str): key of the feature.
values(List[float]): the discretized allowed values during the optimization.
"""
type: Literal["DiscreteInput"] = "DiscreteInput"
order_id: ClassVar[int] = 3
values: DiscreteVals
@field_validator("values")
@classmethod
def validate_values_unique(cls, values):
"""Validates that provided values are unique.
Args:
values (List[float]): List of values
Raises:
ValueError: when values are non-unique.
ValueError: when values contains only one entry.
ValueError: when values is empty.
Returns:
List[values]: Sorted list of values
"""
if len(values) != len(set(values)):
raise ValueError("Discrete values must be unique")
if len(values) == 1:
raise ValueError(
"Fixed discrete inputs are not supported. Please use a fixed continuous input.",
)
if len(values) == 0:
raise ValueError("No values defined.")
return sorted(values)
@property
def lower_bound(self) -> float:
"""Lower bound of the set of allowed values"""
return min(self.values)
@property
def upper_bound(self) -> float:
"""Upper bound of the set of allowed values"""
return max(self.values)
def validate_candidental(self, values: pd.Series) -> pd.Series:
"""Method to validate the provided candidates.
Args:
values (pd.Series): suggested candidates for the feature
Raises:
ValueError: Raises error when one of the provided values is not contained in the list of allowed values.
Returns:
pd.Series: _uggested candidates for the feature
"""
values = super().validate_candidental(values)
if not np.isin(values.to_numpy(), np.array(self.values)).all():
raise ValueError(
f"Not allowed values in candidates for feature {self.key}.",
)
return values
def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
"""Draw random samples from the feature.
Args:
n (int): number of samples.
seed (int, optional): random seed. Defaults to None.
Returns:
pd.Series: drawn samples.
"""
return pd.Series(
name=self.key,
data=np.random.default_rng(seed=seed).choice(self.values, n),
)
def from_continuous(self, values: pd.DataFrame) -> pd.Series:
"""Rounds continuous values to the closest discrete ones.
Args:
values (pd.DataFrame): Dataframe with continuous entries.
Returns:
pd.Series: Series with discrete values.
"""
s = pd.DataFrame(
data=np.abs(
values[self.key].to_numpy()[:, np.newaxis] - np.array(self.values),
),
columns=self.values,
index=values.index,
).idxmin(1)
s.name = self.key
return s
def get_bounds(
self,
transform_type: Optional[TTransform] = None,
values: Optional[pd.Series] = None,
reference_value: Optional[float] = None,
) -> Tuple[List[float], List[float]]:
assert transform_type is None
if values is None:
return [self.lower_bound], [self.upper_bound]
lower = min(self.lower_bound, values.min())
upper = max(self.upper_bound, values.max())
return [lower], [upper]
__class_vars__
special
The names of the class variables defined on the model.
__private_attributes__
special
Metadata about the private attributes of the model.
__pydantic_complete__
special
Whether model building is completed, or if there are still undefined fields.
__pydantic_computed_fields__
special
A dictionary of computed field names and their corresponding [ComputedFieldInfo
][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_custom_init__
special
Whether the model has a custom __init__
method.
__pydantic_decorators__
special
Metadata containing the decorators defined on the model.
This replaces Model.__validators__
and Model.__root_validators__
from Pydantic V1.
__pydantic_fields__
special
A dictionary of field names and their corresponding [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
__pydantic_generic_metadata__
special
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
special
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
special
The name of the post-init method for the model, if defined.
__signature__
special
The synthesized __init__
[Signature
][inspect.Signature] of the model.
lower_bound: float
property
readonly
Lower bound of the set of allowed values
model_config
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
upper_bound: float
property
readonly
Upper bound of the set of allowed values
from_continuous(self, values)
Rounds continuous values to the closest discrete ones.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.DataFrame |
Dataframe with continuous entries. |
required |
Returns:
Type | Description |
---|---|
pd.Series |
Series with discrete values. |
Source code in bofire/data_models/features/discrete.py
def from_continuous(self, values: pd.DataFrame) -> pd.Series:
"""Rounds continuous values to the closest discrete ones.
Args:
values (pd.DataFrame): Dataframe with continuous entries.
Returns:
pd.Series: Series with discrete values.
"""
s = pd.DataFrame(
data=np.abs(
values[self.key].to_numpy()[:, np.newaxis] - np.array(self.values),
),
columns=self.values,
index=values.index,
).idxmin(1)
s.name = self.key
return s
get_bounds(self, transform_type=None, values=None, reference_value=None)
Returns the bounds of an input feature depending on the requested transform type.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
transform_type |
Optional[TTransform] |
The requested transform type. Defaults to None. |
None |
values |
Optional[pd.Series] |
If values are provided the bounds are returned taking the most extreme values for the feature into account. Defaults to None. |
None |
reference_value |
Optional[float] |
If a reference value is provided, then the local bounds based on a local search region are provided. Currently only supported for continuous inputs. For more details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf. |
None |
Returns:
Type | Description |
---|---|
Tuple[List[float], List[float]] |
List of lower bound values, list of upper bound values. |
Source code in bofire/data_models/features/discrete.py
def get_bounds(
self,
transform_type: Optional[TTransform] = None,
values: Optional[pd.Series] = None,
reference_value: Optional[float] = None,
) -> Tuple[List[float], List[float]]:
assert transform_type is None
if values is None:
return [self.lower_bound], [self.upper_bound]
lower = min(self.lower_bound, values.min())
upper = max(self.upper_bound, values.max())
return [lower], [upper]
sample(self, n, seed=None)
Draw random samples from the feature.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n |
int |
number of samples. |
required |
seed |
int |
random seed. Defaults to None. |
None |
Returns:
Type | Description |
---|---|
pd.Series |
drawn samples. |
Source code in bofire/data_models/features/discrete.py
def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
"""Draw random samples from the feature.
Args:
n (int): number of samples.
seed (int, optional): random seed. Defaults to None.
Returns:
pd.Series: drawn samples.
"""
return pd.Series(
name=self.key,
data=np.random.default_rng(seed=seed).choice(self.values, n),
)
validate_candidental(self, values)
Method to validate the provided candidates.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
suggested candidates for the feature |
required |
Exceptions:
Type | Description |
---|---|
ValueError |
Raises error when one of the provided values is not contained in the list of allowed values. |
Returns:
Type | Description |
---|---|
pd.Series |
_uggested candidates for the feature |
Source code in bofire/data_models/features/discrete.py
def validate_candidental(self, values: pd.Series) -> pd.Series:
"""Method to validate the provided candidates.
Args:
values (pd.Series): suggested candidates for the feature
Raises:
ValueError: Raises error when one of the provided values is not contained in the list of allowed values.
Returns:
pd.Series: _uggested candidates for the feature
"""
values = super().validate_candidental(values)
if not np.isin(values.to_numpy(), np.array(self.values)).all():
raise ValueError(
f"Not allowed values in candidates for feature {self.key}.",
)
return values
validate_values_unique(values)
classmethod
Validates that provided values are unique.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
List[float] |
List of values |
required |
Exceptions:
Type | Description |
---|---|
ValueError |
when values are non-unique. |
ValueError |
when values contains only one entry. |
ValueError |
when values is empty. |
Returns:
Type | Description |
---|---|
List[values] |
Sorted list of values |
Source code in bofire/data_models/features/discrete.py
@field_validator("values")
@classmethod
def validate_values_unique(cls, values):
"""Validates that provided values are unique.
Args:
values (List[float]): List of values
Raises:
ValueError: when values are non-unique.
ValueError: when values contains only one entry.
ValueError: when values is empty.
Returns:
List[values]: Sorted list of values
"""
if len(values) != len(set(values)):
raise ValueError("Discrete values must be unique")
if len(values) == 1:
raise ValueError(
"Fixed discrete inputs are not supported. Please use a fixed continuous input.",
)
if len(values) == 0:
raise ValueError("No values defined.")
return sorted(values)
feature
Feature (BaseModel)
The base class for all features.
Source code in bofire/data_models/features/feature.py
class Feature(BaseModel):
"""The base class for all features."""
type: str
key: str
order_id: ClassVar[int] = -1
def __lt__(self, other) -> bool:
"""Method to compare two models to get them in the desired order.
Return True if other is larger than self, else False. (see FEATURE_ORDER)
Args:
other: The other class to compare to self
Returns:
bool: True if the other class is larger than self, else False
"""
order_self = self.order_id
order_other = other.order_id
if order_self == order_other:
return self.key < other.key
return order_self < order_other
__class_vars__
special
The names of the class variables defined on the model.
__private_attributes__
special
Metadata about the private attributes of the model.
__pydantic_complete__
special
Whether model building is completed, or if there are still undefined fields.
__pydantic_computed_fields__
special
A dictionary of computed field names and their corresponding [ComputedFieldInfo
][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_custom_init__
special
Whether the model has a custom __init__
method.
__pydantic_decorators__
special
Metadata containing the decorators defined on the model.
This replaces Model.__validators__
and Model.__root_validators__
from Pydantic V1.
__pydantic_fields__
special
A dictionary of field names and their corresponding [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
__pydantic_generic_metadata__
special
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
special
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
special
The name of the post-init method for the model, if defined.
__signature__
special
The synthesized __init__
[Signature
][inspect.Signature] of the model.
model_config
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
__lt__(self, other)
special
Method to compare two models to get them in the desired order. Return True if other is larger than self, else False. (see FEATURE_ORDER)
Parameters:
Name | Type | Description | Default |
---|---|---|---|
other |
The other class to compare to self |
required |
Returns:
Type | Description |
---|---|
bool |
True if the other class is larger than self, else False |
Source code in bofire/data_models/features/feature.py
def __lt__(self, other) -> bool:
"""Method to compare two models to get them in the desired order.
Return True if other is larger than self, else False. (see FEATURE_ORDER)
Args:
other: The other class to compare to self
Returns:
bool: True if the other class is larger than self, else False
"""
order_self = self.order_id
order_other = other.order_id
if order_self == order_other:
return self.key < other.key
return order_self < order_other
Input (Feature)
Base class for all input features.
Source code in bofire/data_models/features/feature.py
class Input(Feature):
"""Base class for all input features."""
@staticmethod
@abstractmethod
def valid_transform_types() -> List[Union[CategoricalEncodingEnum, AnyMolFeatures]]:
pass
@abstractmethod
def is_fixed(self) -> bool:
"""Indicates if a variable is set to a fixed value.
Returns:
bool: True if fixed, els False.
"""
@abstractmethod
def fixed_value(
self,
transform_type: Optional[TTransform] = None,
) -> Union[None, List[str], List[float]]:
"""Method to return the fixed value in case of a fixed feature.
Returns:
Union[None,str,float]: None in case the feature is not fixed, else the fixed value.
"""
@abstractmethod
def validate_experimental(
self,
values: pd.Series,
strict: bool = False,
) -> pd.Series:
"""Abstract method to validate the experimental dataFrame
Args:
values (pd.Series): A dataFrame with experiments
strict (bool, optional): Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not. Defaults to False.
Returns:
pd.Series: The passed dataFrame with experiments
"""
@abstractmethod
def validate_candidental(self, values: pd.Series) -> pd.Series:
"""Abstract method to validate the suggested candidates
Args:
values (pd.Series): A dataFrame with candidates
Returns:
pd.Series: The passed dataFrame with candidates
"""
@abstractmethod
def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
"""Sample a series of allowed values.
Args:
n (int): Number of samples
seed (int, optional): random seed. Defaults to None.
Returns:
pd.Series: Sampled values.
"""
@abstractmethod
def get_bounds(
self,
transform_type: Optional[TTransform] = None,
values: Optional[pd.Series] = None,
reference_value: Optional[Union[float, str]] = None,
) -> Tuple[List[float], List[float]]:
"""Returns the bounds of an input feature depending on the requested transform type.
Args:
transform_type (Optional[TTransform], optional): The requested transform type. Defaults to None.
values (Optional[pd.Series], optional): If values are provided the bounds are returned taking
the most extreme values for the feature into account. Defaults to None.
reference_value (Optional[float], optional): If a reference value is provided, then the local bounds based
on a local search region are provided. Currently only supported for continuous inputs. For more
details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf.
Returns:
Tuple[List[float], List[float]]: List of lower bound values, list of upper bound values.
"""
__class_vars__
special
The names of the class variables defined on the model.
__private_attributes__
special
Metadata about the private attributes of the model.
__pydantic_complete__
special
Whether model building is completed, or if there are still undefined fields.
__pydantic_computed_fields__
special
A dictionary of computed field names and their corresponding [ComputedFieldInfo
][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_custom_init__
special
Whether the model has a custom __init__
method.
__pydantic_decorators__
special
Metadata containing the decorators defined on the model.
This replaces Model.__validators__
and Model.__root_validators__
from Pydantic V1.
__pydantic_fields__
special
A dictionary of field names and their corresponding [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
__pydantic_generic_metadata__
special
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
special
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
special
The name of the post-init method for the model, if defined.
__signature__
special
The synthesized __init__
[Signature
][inspect.Signature] of the model.
model_config
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
fixed_value(self, transform_type=None)
Method to return the fixed value in case of a fixed feature.
Returns:
Type | Description |
---|---|
Union[None,str,float] |
None in case the feature is not fixed, else the fixed value. |
Source code in bofire/data_models/features/feature.py
@abstractmethod
def fixed_value(
self,
transform_type: Optional[TTransform] = None,
) -> Union[None, List[str], List[float]]:
"""Method to return the fixed value in case of a fixed feature.
Returns:
Union[None,str,float]: None in case the feature is not fixed, else the fixed value.
"""
get_bounds(self, transform_type=None, values=None, reference_value=None)
Returns the bounds of an input feature depending on the requested transform type.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
transform_type |
Optional[TTransform] |
The requested transform type. Defaults to None. |
None |
values |
Optional[pd.Series] |
If values are provided the bounds are returned taking the most extreme values for the feature into account. Defaults to None. |
None |
reference_value |
Optional[float] |
If a reference value is provided, then the local bounds based on a local search region are provided. Currently only supported for continuous inputs. For more details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf. |
None |
Returns:
Type | Description |
---|---|
Tuple[List[float], List[float]] |
List of lower bound values, list of upper bound values. |
Source code in bofire/data_models/features/feature.py
@abstractmethod
def get_bounds(
self,
transform_type: Optional[TTransform] = None,
values: Optional[pd.Series] = None,
reference_value: Optional[Union[float, str]] = None,
) -> Tuple[List[float], List[float]]:
"""Returns the bounds of an input feature depending on the requested transform type.
Args:
transform_type (Optional[TTransform], optional): The requested transform type. Defaults to None.
values (Optional[pd.Series], optional): If values are provided the bounds are returned taking
the most extreme values for the feature into account. Defaults to None.
reference_value (Optional[float], optional): If a reference value is provided, then the local bounds based
on a local search region are provided. Currently only supported for continuous inputs. For more
details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf.
Returns:
Tuple[List[float], List[float]]: List of lower bound values, list of upper bound values.
"""
is_fixed(self)
Indicates if a variable is set to a fixed value.
Returns:
Type | Description |
---|---|
bool |
True if fixed, els False. |
Source code in bofire/data_models/features/feature.py
@abstractmethod
def is_fixed(self) -> bool:
"""Indicates if a variable is set to a fixed value.
Returns:
bool: True if fixed, els False.
"""
sample(self, n, seed=None)
Sample a series of allowed values.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n |
int |
Number of samples |
required |
seed |
int |
random seed. Defaults to None. |
None |
Returns:
Type | Description |
---|---|
pd.Series |
Sampled values. |
Source code in bofire/data_models/features/feature.py
@abstractmethod
def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
"""Sample a series of allowed values.
Args:
n (int): Number of samples
seed (int, optional): random seed. Defaults to None.
Returns:
pd.Series: Sampled values.
"""
validate_candidental(self, values)
Abstract method to validate the suggested candidates
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
A dataFrame with candidates |
required |
Returns:
Type | Description |
---|---|
pd.Series |
The passed dataFrame with candidates |
Source code in bofire/data_models/features/feature.py
@abstractmethod
def validate_candidental(self, values: pd.Series) -> pd.Series:
"""Abstract method to validate the suggested candidates
Args:
values (pd.Series): A dataFrame with candidates
Returns:
pd.Series: The passed dataFrame with candidates
"""
validate_experimental(self, values, strict=False)
Abstract method to validate the experimental dataFrame
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
A dataFrame with experiments |
required |
strict |
bool |
Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not. Defaults to False. |
False |
Returns:
Type | Description |
---|---|
pd.Series |
The passed dataFrame with experiments |
Source code in bofire/data_models/features/feature.py
@abstractmethod
def validate_experimental(
self,
values: pd.Series,
strict: bool = False,
) -> pd.Series:
"""Abstract method to validate the experimental dataFrame
Args:
values (pd.Series): A dataFrame with experiments
strict (bool, optional): Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not. Defaults to False.
Returns:
pd.Series: The passed dataFrame with experiments
"""
Output (Feature)
Base class for all output features.
Attributes:
Name | Type | Description |
---|---|---|
key(str) |
Key of the Feature. |
Source code in bofire/data_models/features/feature.py
class Output(Feature):
"""Base class for all output features.
Attributes:
key(str): Key of the Feature.
"""
@abstractmethod
def __call__(self, values: pd.Series) -> pd.Series:
pass
@abstractmethod
def validate_experimental(self, values: pd.Series) -> pd.Series:
"""Abstract method to validate the experimental Series
Args:
values (pd.Series): A dataFrame with values for the outcome
Returns:
pd.Series: The passed dataFrame with experiments
"""
__class_vars__
special
The names of the class variables defined on the model.
__private_attributes__
special
Metadata about the private attributes of the model.
__pydantic_complete__
special
Whether model building is completed, or if there are still undefined fields.
__pydantic_computed_fields__
special
A dictionary of computed field names and their corresponding [ComputedFieldInfo
][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_custom_init__
special
Whether the model has a custom __init__
method.
__pydantic_decorators__
special
Metadata containing the decorators defined on the model.
This replaces Model.__validators__
and Model.__root_validators__
from Pydantic V1.
__pydantic_fields__
special
A dictionary of field names and their corresponding [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
__pydantic_generic_metadata__
special
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
special
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
special
The name of the post-init method for the model, if defined.
__signature__
special
The synthesized __init__
[Signature
][inspect.Signature] of the model.
model_config
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
validate_experimental(self, values)
Abstract method to validate the experimental Series
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
A dataFrame with values for the outcome |
required |
Returns:
Type | Description |
---|---|
pd.Series |
The passed dataFrame with experiments |
Source code in bofire/data_models/features/feature.py
@abstractmethod
def validate_experimental(self, values: pd.Series) -> pd.Series:
"""Abstract method to validate the experimental Series
Args:
values (pd.Series): A dataFrame with values for the outcome
Returns:
pd.Series: The passed dataFrame with experiments
"""
get_encoded_name(feature_key, option_name)
Get the name of the encoded column. Option could be the category or the descriptor name.
Source code in bofire/data_models/features/feature.py
def get_encoded_name(feature_key: str, option_name: str) -> str:
"""Get the name of the encoded column. Option could be the category or the descriptor name."""
return f"{feature_key}_{option_name}"
molecular
CategoricalMolecularInput (CategoricalInput, MolecularInput)
Source code in bofire/data_models/features/molecular.py
class CategoricalMolecularInput(CategoricalInput, MolecularInput): # type: ignore
type: Literal["CategoricalMolecularInput"] = "CategoricalMolecularInput" # type: ignore
# order_id: ClassVar[int] = 7
order_id: ClassVar[int] = 5
@field_validator("categories")
@classmethod
def validate_smiles(cls, categories: Sequence[str]):
"""Validates that categories are valid smiles. Note that this check can only
be executed when rdkit is available.
Args:
categories (List[str]): List of smiles
Raises:
ValueError: when string is not a smiles
Returns:
List[str]: List of the smiles
"""
# check on rdkit availability:
try:
smiles2mol(categories[0])
except NameError:
warnings.warn("rdkit not installed, categories cannot be validated.")
return categories
for cat in categories:
smiles2mol(cat)
return categories
@staticmethod
def valid_transform_types() -> List[Union[AnyMolFeatures, CategoricalEncodingEnum]]: # type: ignore
return CategoricalInput.valid_transform_types() + [ # type: ignore
Fingerprints,
FingerprintsFragments,
Fragments,
MordredDescriptors,
]
def get_bounds( # type: ignore
self,
transform_type: Union[CategoricalEncodingEnum, AnyMolFeatures],
values: Optional[pd.Series] = None,
reference_value: Optional[str] = None,
) -> Tuple[List[float], List[float]]:
if isinstance(transform_type, CategoricalEncodingEnum):
# we are just using the standard categorical transformations
return super().get_bounds(
transform_type=transform_type,
values=values,
reference_value=reference_value,
)
# in case that values is None, we return the optimization bounds
# else we return the complete bounds
data = self.to_descriptor_encoding(
transform_type=transform_type,
values=(
pd.Series(self.get_allowed_categories())
if values is None
else pd.Series(self.categories)
),
)
lower = data.min(axis=0).values.tolist()
upper = data.max(axis=0).values.tolist()
return lower, upper
def from_descriptor_encoding(
self,
transform_type: AnyMolFeatures,
values: pd.DataFrame,
) -> pd.Series:
"""Converts values back from descriptor encoding.
Args:
values (pd.DataFrame): Descriptor encoded dataframe.
Raises:
ValueError: If descriptor columns not found in the dataframe.
Returns:
pd.Series: Series with categorical values.
"""
# This method is modified based on the categorical descriptor feature
# TODO: move it to more central place
cat_cols = [
get_encoded_name(self.key, d) for d in transform_type.get_descriptor_names()
]
# we allow here explicitly that the dataframe can have more columns than needed to have it
# easier in the backtransform.
if np.any([c not in values.columns for c in cat_cols]):
raise ValueError(
f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols}.",
)
s = pd.DataFrame(
data=np.sqrt(
np.sum(
(
values[cat_cols].to_numpy()[:, np.newaxis, :]
- self.to_descriptor_encoding(
transform_type=transform_type,
values=pd.Series(self.get_allowed_categories()),
).to_numpy()
)
** 2,
axis=2,
),
),
columns=self.get_allowed_categories(),
index=values.index,
).idxmin(1)
s.name = self.key
return s
__class_vars__
special
The names of the class variables defined on the model.
__private_attributes__
special
Metadata about the private attributes of the model.
__pydantic_complete__
special
Whether model building is completed, or if there are still undefined fields.
__pydantic_computed_fields__
special
A dictionary of computed field names and their corresponding [ComputedFieldInfo
][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_custom_init__
special
Whether the model has a custom __init__
method.
__pydantic_decorators__
special
Metadata containing the decorators defined on the model.
This replaces Model.__validators__
and Model.__root_validators__
from Pydantic V1.
__pydantic_fields__
special
A dictionary of field names and their corresponding [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
__pydantic_generic_metadata__
special
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
special
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
special
The name of the post-init method for the model, if defined.
__signature__
special
The synthesized __init__
[Signature
][inspect.Signature] of the model.
model_config
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
from_descriptor_encoding(self, transform_type, values)
Converts values back from descriptor encoding.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.DataFrame |
Descriptor encoded dataframe. |
required |
Exceptions:
Type | Description |
---|---|
ValueError |
If descriptor columns not found in the dataframe. |
Returns:
Type | Description |
---|---|
pd.Series |
Series with categorical values. |
Source code in bofire/data_models/features/molecular.py
def from_descriptor_encoding(
self,
transform_type: AnyMolFeatures,
values: pd.DataFrame,
) -> pd.Series:
"""Converts values back from descriptor encoding.
Args:
values (pd.DataFrame): Descriptor encoded dataframe.
Raises:
ValueError: If descriptor columns not found in the dataframe.
Returns:
pd.Series: Series with categorical values.
"""
# This method is modified based on the categorical descriptor feature
# TODO: move it to more central place
cat_cols = [
get_encoded_name(self.key, d) for d in transform_type.get_descriptor_names()
]
# we allow here explicitly that the dataframe can have more columns than needed to have it
# easier in the backtransform.
if np.any([c not in values.columns for c in cat_cols]):
raise ValueError(
f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols}.",
)
s = pd.DataFrame(
data=np.sqrt(
np.sum(
(
values[cat_cols].to_numpy()[:, np.newaxis, :]
- self.to_descriptor_encoding(
transform_type=transform_type,
values=pd.Series(self.get_allowed_categories()),
).to_numpy()
)
** 2,
axis=2,
),
),
columns=self.get_allowed_categories(),
index=values.index,
).idxmin(1)
s.name = self.key
return s
get_bounds(self, transform_type, values=None, reference_value=None)
Calculates the lower and upper bounds for the feature based on the given transform type and values.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
transform_type |
AnyMolFeatures |
The type of transformation to apply to the data. |
required |
values |
pd.Series |
The actual data over which the lower and upper bounds are calculated. |
None |
reference_value |
Optional[str] |
The reference value for the transformation. Not used here. Defaults to None. |
None |
Returns:
Type | Description |
---|---|
Tuple[List[float], List[float]] |
A tuple containing the lower and upper bounds of the transformed data. |
Exceptions:
Type | Description |
---|---|
NotImplementedError |
Raised when |
Source code in bofire/data_models/features/molecular.py
def get_bounds( # type: ignore
self,
transform_type: Union[CategoricalEncodingEnum, AnyMolFeatures],
values: Optional[pd.Series] = None,
reference_value: Optional[str] = None,
) -> Tuple[List[float], List[float]]:
if isinstance(transform_type, CategoricalEncodingEnum):
# we are just using the standard categorical transformations
return super().get_bounds(
transform_type=transform_type,
values=values,
reference_value=reference_value,
)
# in case that values is None, we return the optimization bounds
# else we return the complete bounds
data = self.to_descriptor_encoding(
transform_type=transform_type,
values=(
pd.Series(self.get_allowed_categories())
if values is None
else pd.Series(self.categories)
),
)
lower = data.min(axis=0).values.tolist()
upper = data.max(axis=0).values.tolist()
return lower, upper
validate_smiles(categories)
classmethod
Validates that categories are valid smiles. Note that this check can only be executed when rdkit is available.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
categories |
List[str] |
List of smiles |
required |
Exceptions:
Type | Description |
---|---|
ValueError |
when string is not a smiles |
Returns:
Type | Description |
---|---|
List[str] |
List of the smiles |
Source code in bofire/data_models/features/molecular.py
@field_validator("categories")
@classmethod
def validate_smiles(cls, categories: Sequence[str]):
"""Validates that categories are valid smiles. Note that this check can only
be executed when rdkit is available.
Args:
categories (List[str]): List of smiles
Raises:
ValueError: when string is not a smiles
Returns:
List[str]: List of the smiles
"""
# check on rdkit availability:
try:
smiles2mol(categories[0])
except NameError:
warnings.warn("rdkit not installed, categories cannot be validated.")
return categories
for cat in categories:
smiles2mol(cat)
return categories
MolecularInput (Input)
Source code in bofire/data_models/features/molecular.py
class MolecularInput(Input):
type: Literal["MolecularInput"] = "MolecularInput" # type: ignore
# order_id: ClassVar[int] = 6
order_id: ClassVar[int] = 4
@staticmethod
def valid_transform_types() -> List[AnyMolFeatures]: # type: ignore
return [Fingerprints, FingerprintsFragments, Fragments, MordredDescriptors] # type: ignore
def validate_experimental(
self,
values: pd.Series,
strict: bool = False,
) -> pd.Series:
values = values.map(str)
for smi in values:
smiles2mol(smi)
return values
def validate_candidental(self, values: pd.Series) -> pd.Series:
values = values.map(str)
for smi in values:
smiles2mol(smi)
return values
def is_fixed(self) -> bool:
return False
def fixed_value(self, transform_type: Optional[AnyMolFeatures] = None) -> None: # type: ignore
return None
def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
raise ValueError("Sampling not supported for `MolecularInput`")
def get_bounds( # type: ignore
self,
transform_type: AnyMolFeatures,
values: pd.Series,
reference_value: Optional[str] = None,
) -> Tuple[List[float], List[float]]:
"""Calculates the lower and upper bounds for the feature based on the given transform type and values.
Args:
transform_type (AnyMolFeatures): The type of transformation to apply to the data.
values (pd.Series): The actual data over which the lower and upper bounds are calculated.
reference_value (Optional[str], optional): The reference value for the transformation. Not used here.
Defaults to None.
Returns:
Tuple[List[float], List[float]]: A tuple containing the lower and upper bounds of the transformed data.
Raises:
NotImplementedError: Raised when `values` is None, as it is currently required for `MolecularInput`.
"""
if values is None:
raise NotImplementedError(
"`values` is currently required for `MolecularInput`",
)
data = self.to_descriptor_encoding(transform_type, values)
lower = data.min(axis=0).values.tolist()
upper = data.max(axis=0).values.tolist()
return lower, upper
def to_descriptor_encoding(
self,
transform_type: AnyMolFeatures,
values: pd.Series,
) -> pd.DataFrame:
"""Converts values to descriptor encoding.
Args:
values (pd.Series): Values to transform.
Returns:
pd.DataFrame: Descriptor encoded dataframe.
"""
descriptor_values = transform_type.get_descriptor_values(values)
descriptor_values.columns = [
get_encoded_name(self.key, d) for d in transform_type.get_descriptor_names()
]
descriptor_values.index = values.index
return descriptor_values
__class_vars__
special
The names of the class variables defined on the model.
__private_attributes__
special
Metadata about the private attributes of the model.
__pydantic_complete__
special
Whether model building is completed, or if there are still undefined fields.
__pydantic_computed_fields__
special
A dictionary of computed field names and their corresponding [ComputedFieldInfo
][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_custom_init__
special
Whether the model has a custom __init__
method.
__pydantic_decorators__
special
Metadata containing the decorators defined on the model.
This replaces Model.__validators__
and Model.__root_validators__
from Pydantic V1.
__pydantic_fields__
special
A dictionary of field names and their corresponding [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
__pydantic_generic_metadata__
special
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
special
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
special
The name of the post-init method for the model, if defined.
__signature__
special
The synthesized __init__
[Signature
][inspect.Signature] of the model.
model_config
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
fixed_value(self, transform_type=None)
Method to return the fixed value in case of a fixed feature.
Returns:
Type | Description |
---|---|
Union[None,str,float] |
None in case the feature is not fixed, else the fixed value. |
Source code in bofire/data_models/features/molecular.py
def fixed_value(self, transform_type: Optional[AnyMolFeatures] = None) -> None: # type: ignore
return None
get_bounds(self, transform_type, values, reference_value=None)
Calculates the lower and upper bounds for the feature based on the given transform type and values.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
transform_type |
AnyMolFeatures |
The type of transformation to apply to the data. |
required |
values |
pd.Series |
The actual data over which the lower and upper bounds are calculated. |
required |
reference_value |
Optional[str] |
The reference value for the transformation. Not used here. Defaults to None. |
None |
Returns:
Type | Description |
---|---|
Tuple[List[float], List[float]] |
A tuple containing the lower and upper bounds of the transformed data. |
Exceptions:
Type | Description |
---|---|
NotImplementedError |
Raised when |
Source code in bofire/data_models/features/molecular.py
def get_bounds( # type: ignore
self,
transform_type: AnyMolFeatures,
values: pd.Series,
reference_value: Optional[str] = None,
) -> Tuple[List[float], List[float]]:
"""Calculates the lower and upper bounds for the feature based on the given transform type and values.
Args:
transform_type (AnyMolFeatures): The type of transformation to apply to the data.
values (pd.Series): The actual data over which the lower and upper bounds are calculated.
reference_value (Optional[str], optional): The reference value for the transformation. Not used here.
Defaults to None.
Returns:
Tuple[List[float], List[float]]: A tuple containing the lower and upper bounds of the transformed data.
Raises:
NotImplementedError: Raised when `values` is None, as it is currently required for `MolecularInput`.
"""
if values is None:
raise NotImplementedError(
"`values` is currently required for `MolecularInput`",
)
data = self.to_descriptor_encoding(transform_type, values)
lower = data.min(axis=0).values.tolist()
upper = data.max(axis=0).values.tolist()
return lower, upper
is_fixed(self)
Indicates if a variable is set to a fixed value.
Returns:
Type | Description |
---|---|
bool |
True if fixed, els False. |
Source code in bofire/data_models/features/molecular.py
def is_fixed(self) -> bool:
return False
sample(self, n, seed=None)
Sample a series of allowed values.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n |
int |
Number of samples |
required |
seed |
int |
random seed. Defaults to None. |
None |
Returns:
Type | Description |
---|---|
pd.Series |
Sampled values. |
Source code in bofire/data_models/features/molecular.py
def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
raise ValueError("Sampling not supported for `MolecularInput`")
to_descriptor_encoding(self, transform_type, values)
Converts values to descriptor encoding.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
Values to transform. |
required |
Returns:
Type | Description |
---|---|
pd.DataFrame |
Descriptor encoded dataframe. |
Source code in bofire/data_models/features/molecular.py
def to_descriptor_encoding(
self,
transform_type: AnyMolFeatures,
values: pd.Series,
) -> pd.DataFrame:
"""Converts values to descriptor encoding.
Args:
values (pd.Series): Values to transform.
Returns:
pd.DataFrame: Descriptor encoded dataframe.
"""
descriptor_values = transform_type.get_descriptor_values(values)
descriptor_values.columns = [
get_encoded_name(self.key, d) for d in transform_type.get_descriptor_names()
]
descriptor_values.index = values.index
return descriptor_values
validate_candidental(self, values)
Abstract method to validate the suggested candidates
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
A dataFrame with candidates |
required |
Returns:
Type | Description |
---|---|
pd.Series |
The passed dataFrame with candidates |
Source code in bofire/data_models/features/molecular.py
def validate_candidental(self, values: pd.Series) -> pd.Series:
values = values.map(str)
for smi in values:
smiles2mol(smi)
return values
validate_experimental(self, values, strict=False)
Abstract method to validate the experimental dataFrame
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
A dataFrame with experiments |
required |
strict |
bool |
Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not. Defaults to False. |
False |
Returns:
Type | Description |
---|---|
pd.Series |
The passed dataFrame with experiments |
Source code in bofire/data_models/features/molecular.py
def validate_experimental(
self,
values: pd.Series,
strict: bool = False,
) -> pd.Series:
values = values.map(str)
for smi in values:
smiles2mol(smi)
return values
numerical
NumericalInput (Input)
Abstract base class for all numerical (ordinal) input features.
Source code in bofire/data_models/features/numerical.py
class NumericalInput(Input):
"""Abstract base class for all numerical (ordinal) input features."""
unit: Optional[str] = None
@staticmethod
def valid_transform_types() -> List:
return []
@property
@abstractmethod
def lower_bound(self) -> float:
pass
@property
@abstractmethod
def upper_bound(self) -> float:
pass
def to_unit_range(
self,
values: Union[pd.Series, np.ndarray],
use_real_bounds: bool = False,
) -> Union[pd.Series, np.ndarray]:
"""Convert to the unit range between 0 and 1.
Args:
values (pd.Series): values to be transformed
use_real_bounds (bool, optional): if True, use the bounds from the
actual values else the bounds from the feature. Defaults to False.
Raises:
ValueError: If lower_bound == upper bound an error is raised
Returns:
pd.Series: transformed values.
"""
if use_real_bounds:
lower, upper = self.get_bounds(
transform_type=None,
values=values, # type: ignore
)
lower = lower[0]
upper = upper[0]
else:
lower, upper = self.lower_bound, self.upper_bound
if lower == upper:
raise ValueError("Fixed feature cannot be transformed to unit range.")
allowed_range = upper - lower
return (values - lower) / allowed_range
def from_unit_range(
self,
values: Union[pd.Series, np.ndarray],
) -> Union[pd.Series, np.ndarray]:
"""Convert from unit range.
Args:
values (pd.Series): values to transform from.
Raises:
ValueError: if the feature is fixed raise a value error.
Returns:
pd.Series: _description_
"""
if self.is_fixed():
raise ValueError("Fixed feature cannot be transformed from unit range.")
allowed_range = self.upper_bound - self.lower_bound
return (values * allowed_range) + self.lower_bound
def is_fixed(self):
"""Method to check if the feature is fixed
Returns:
Boolean: True when the feature is fixed, false otherwise.
"""
return self.lower_bound == self.upper_bound
def fixed_value(
self,
transform_type: Optional[TTransform] = None,
) -> Union[None, List[float]]:
"""Method to get the value to which the feature is fixed
Returns:
Float: Return the feature value or None if the feature is not fixed.
"""
assert transform_type is None
if self.is_fixed():
return [self.lower_bound]
return None
def validate_experimental(self, values: pd.Series, strict=False) -> pd.Series:
"""Method to validate the experimental dataFrame
Args:
values (pd.Series): A dataFrame with experiments
strict (bool, optional): Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not.
Defaults to False.
Raises:
ValueError: when a value is not numerical
ValueError: when there is no variation in a feature provided by the experimental data
Returns:
pd.Series: A dataFrame with experiments
"""
try:
values = pd.to_numeric(values, errors="raise").astype("float64")
except ValueError:
raise ValueError(
f"not all values of input feature `{self.key}` are numerical",
)
values = values.astype("float64")
if strict:
lower, upper = self.get_bounds(transform_type=None, values=values)
if lower == upper:
raise ValueError(
f"No variation present or planned for feature {self.key}. Remove it.",
)
return values
def validate_candidental(self, values: pd.Series) -> pd.Series:
"""Validate the suggested candidates for the feature.
Args:
values (pd.Series): suggested candidates for the feature
Raises:
ValueError: Error is raised when one of the values is not numerical.
Returns:
pd.Series: the original provided candidates
"""
try:
return pd.to_numeric(values, errors="raise").astype("float64")
except ValueError:
raise ValueError(
f"not all values of input feature `{self.key}` are numerical",
)
__class_vars__
special
The names of the class variables defined on the model.
__private_attributes__
special
Metadata about the private attributes of the model.
__pydantic_complete__
special
Whether model building is completed, or if there are still undefined fields.
__pydantic_computed_fields__
special
A dictionary of computed field names and their corresponding [ComputedFieldInfo
][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_custom_init__
special
Whether the model has a custom __init__
method.
__pydantic_decorators__
special
Metadata containing the decorators defined on the model.
This replaces Model.__validators__
and Model.__root_validators__
from Pydantic V1.
__pydantic_fields__
special
A dictionary of field names and their corresponding [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
__pydantic_generic_metadata__
special
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
special
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
special
The name of the post-init method for the model, if defined.
__signature__
special
The synthesized __init__
[Signature
][inspect.Signature] of the model.
model_config
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
fixed_value(self, transform_type=None)
Method to get the value to which the feature is fixed
Returns:
Type | Description |
---|---|
Float |
Return the feature value or None if the feature is not fixed. |
Source code in bofire/data_models/features/numerical.py
def fixed_value(
self,
transform_type: Optional[TTransform] = None,
) -> Union[None, List[float]]:
"""Method to get the value to which the feature is fixed
Returns:
Float: Return the feature value or None if the feature is not fixed.
"""
assert transform_type is None
if self.is_fixed():
return [self.lower_bound]
return None
from_unit_range(self, values)
Convert from unit range.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
values to transform from. |
required |
Exceptions:
Type | Description |
---|---|
ValueError |
if the feature is fixed raise a value error. |
Returns:
Type | Description |
---|---|
pd.Series |
description |
Source code in bofire/data_models/features/numerical.py
def from_unit_range(
self,
values: Union[pd.Series, np.ndarray],
) -> Union[pd.Series, np.ndarray]:
"""Convert from unit range.
Args:
values (pd.Series): values to transform from.
Raises:
ValueError: if the feature is fixed raise a value error.
Returns:
pd.Series: _description_
"""
if self.is_fixed():
raise ValueError("Fixed feature cannot be transformed from unit range.")
allowed_range = self.upper_bound - self.lower_bound
return (values * allowed_range) + self.lower_bound
is_fixed(self)
Method to check if the feature is fixed
Returns:
Type | Description |
---|---|
Boolean |
True when the feature is fixed, false otherwise. |
Source code in bofire/data_models/features/numerical.py
def is_fixed(self):
"""Method to check if the feature is fixed
Returns:
Boolean: True when the feature is fixed, false otherwise.
"""
return self.lower_bound == self.upper_bound
to_unit_range(self, values, use_real_bounds=False)
Convert to the unit range between 0 and 1.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
values to be transformed |
required |
use_real_bounds |
bool |
if True, use the bounds from the actual values else the bounds from the feature. Defaults to False. |
False |
Exceptions:
Type | Description |
---|---|
ValueError |
If lower_bound == upper bound an error is raised |
Returns:
Type | Description |
---|---|
pd.Series |
transformed values. |
Source code in bofire/data_models/features/numerical.py
def to_unit_range(
self,
values: Union[pd.Series, np.ndarray],
use_real_bounds: bool = False,
) -> Union[pd.Series, np.ndarray]:
"""Convert to the unit range between 0 and 1.
Args:
values (pd.Series): values to be transformed
use_real_bounds (bool, optional): if True, use the bounds from the
actual values else the bounds from the feature. Defaults to False.
Raises:
ValueError: If lower_bound == upper bound an error is raised
Returns:
pd.Series: transformed values.
"""
if use_real_bounds:
lower, upper = self.get_bounds(
transform_type=None,
values=values, # type: ignore
)
lower = lower[0]
upper = upper[0]
else:
lower, upper = self.lower_bound, self.upper_bound
if lower == upper:
raise ValueError("Fixed feature cannot be transformed to unit range.")
allowed_range = upper - lower
return (values - lower) / allowed_range
validate_candidental(self, values)
Validate the suggested candidates for the feature.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
suggested candidates for the feature |
required |
Exceptions:
Type | Description |
---|---|
ValueError |
Error is raised when one of the values is not numerical. |
Returns:
Type | Description |
---|---|
pd.Series |
the original provided candidates |
Source code in bofire/data_models/features/numerical.py
def validate_candidental(self, values: pd.Series) -> pd.Series:
"""Validate the suggested candidates for the feature.
Args:
values (pd.Series): suggested candidates for the feature
Raises:
ValueError: Error is raised when one of the values is not numerical.
Returns:
pd.Series: the original provided candidates
"""
try:
return pd.to_numeric(values, errors="raise").astype("float64")
except ValueError:
raise ValueError(
f"not all values of input feature `{self.key}` are numerical",
)
validate_experimental(self, values, strict=False)
Method to validate the experimental dataFrame
Parameters:
Name | Type | Description | Default |
---|---|---|---|
values |
pd.Series |
A dataFrame with experiments |
required |
strict |
bool |
Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not. Defaults to False. |
False |
Exceptions:
Type | Description |
---|---|
ValueError |
when a value is not numerical |
ValueError |
when there is no variation in a feature provided by the experimental data |
Returns:
Type | Description |
---|---|
pd.Series |
A dataFrame with experiments |
Source code in bofire/data_models/features/numerical.py
def validate_experimental(self, values: pd.Series, strict=False) -> pd.Series:
"""Method to validate the experimental dataFrame
Args:
values (pd.Series): A dataFrame with experiments
strict (bool, optional): Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not.
Defaults to False.
Raises:
ValueError: when a value is not numerical
ValueError: when there is no variation in a feature provided by the experimental data
Returns:
pd.Series: A dataFrame with experiments
"""
try:
values = pd.to_numeric(values, errors="raise").astype("float64")
except ValueError:
raise ValueError(
f"not all values of input feature `{self.key}` are numerical",
)
values = values.astype("float64")
if strict:
lower, upper = self.get_bounds(transform_type=None, values=values)
if lower == upper:
raise ValueError(
f"No variation present or planned for feature {self.key}. Remove it.",
)
return values
task
TaskInput (CategoricalInput)
Source code in bofire/data_models/features/task.py
class TaskInput(CategoricalInput):
order_id: ClassVar[int] = 8
type: Literal["TaskInput"] = "TaskInput"
fidelities: List[int] = []
@model_validator(mode="after")
def validate_fidelities(self):
n_tasks = len(self.categories)
if self.fidelities == []:
for _ in range(n_tasks):
self.fidelities.append(0)
if len(self.fidelities) != n_tasks:
raise ValueError(
"Length of fidelity lists must be equal to the number of tasks",
)
if list(set(self.fidelities)) != list(range(np.max(self.fidelities) + 1)):
raise ValueError(
"Fidelities must be a list containing integers, starting from 0 and increasing by 1",
)
return self
__class_vars__
special
The names of the class variables defined on the model.
__private_attributes__
special
Metadata about the private attributes of the model.
__pydantic_complete__
special
Whether model building is completed, or if there are still undefined fields.
__pydantic_computed_fields__
special
A dictionary of computed field names and their corresponding [ComputedFieldInfo
][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_custom_init__
special
Whether the model has a custom __init__
method.
__pydantic_decorators__
special
Metadata containing the decorators defined on the model.
This replaces Model.__validators__
and Model.__root_validators__
from Pydantic V1.
__pydantic_fields__
special
A dictionary of field names and their corresponding [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
__pydantic_generic_metadata__
special
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
special
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
special
The name of the post-init method for the model, if defined.
__signature__
special
The synthesized __init__
[Signature
][inspect.Signature] of the model.
model_config
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].