import logging
import os
import random
from collections.abc import Callable
from pathlib import Path
from typing import Any, Optional
from cyrxnopt.NestedVenv import NestedVenv
from cyrxnopt.OptimizerABC import OptimizerABC
from cyrxnopt.utilities.config.transforms import use_subkeys
logger = logging.getLogger(__name__)
[docs]
class OptimizerEDBOp(OptimizerABC):
# Private static data member to list dependency packages required
# by this class
_packages = ["benchmarking", "edboplus", "pandas"]
def __init__(self, venv: NestedVenv) -> None:
"""Optimizer class for the EDBO+ algorithm.
:param venv: Virtual environment to install the optimizer
:type venv: NestedVenv
"""
super().__init__(venv)
self._edbop_filename = "my_optimization.csv"
self._reaction_order_filename = "reaction_order.csv"
[docs]
def get_config(self) -> list[dict[str, Any]]:
"""Get the configuration options available for this optimizer.
See :py:meth:`OptimizerABC.get_config` for more information about the
config descriptions returned by this method and for general usage
information.
:return: List of configuration options with option name, data type,
and information about which values are allowed/defaulted.
:rtype: list[dict[str, Any]]
"""
config: list[dict[str, Any]] = [
{
"name": "continuous_feature_names",
"type": "list[str]",
"value": [],
},
{
"name": "continuous_feature_bounds",
"type": "list[list[float]]",
"value": [],
},
{
"name": "continuous_feature_resolutions",
"type": "list[float]",
"value": [],
},
{
"name": "categorical_feature_names",
"type": "list[str]",
"value": [],
},
{
"name": "categorical_feature_values",
"type": "list[list[str]]",
"value": [],
},
{
"name": "budget",
"type": "int",
"value": 100,
},
{
"name": "objectives",
"type": "list[str]",
"value": ["yield"],
},
{
"name": "direction",
"type": "list[str]",
"value": ["min"],
"range": ["min", "max"],
},
]
return config
[docs]
def set_config(self, experiment_dir: str, config: dict[str, Any]) -> None:
"""Generate all the necessary data files based on the given configuration.
See :py:meth:`OptimizerABC.set_config` for more information about how
to form the config dictionary and for general usage information.
:param experiment_dir: Output directory for the configuration file
:type experiment_dir: str
:param config: CyRxnOpt-level config for the optimizer
:type config: dict[str, Any]
"""
if not os.path.exists(experiment_dir):
os.makedirs(experiment_dir)
# Get reaction scope configurations from general config
config = self._config_translate(config)
# generate reaction scope for EDBO+
self._imports["EDBOplus"]().generate_reaction_scope(
components=config["reaction_components"],
directory=experiment_dir,
filename=self._edbop_filename,
check_overwrite=False,
)
# Initialize the EDBO+ file to be used for prediction
self._imports["EDBOplus"]().run(
directory=experiment_dir,
# Previously generated scope
filename=self._edbop_filename,
# Objectives to be optimized
# For example, maximize yield and ee but minimize side_product:
# objectives=['yield', 'ee', 'side_product'],
# objective_mode=['max', 'max', 'min'],
objectives=config["objectives"],
objective_mode=config["direction"],
# Number of experiments in parallel to perform in this round
batch=1,
# Features to be included in the model
columns_features="all",
# Initialization method
init_sampling_method="seed",
seed=random.randint(0, 2**32 - 1),
)
# Create file for preserving reaction order
# TODO: Rework this when we switch to multi-objective!
with open(
Path(experiment_dir) / self._reaction_order_filename, "w"
) as fout:
feature_names = config["continuous"]["feature_names"]
# If categorical feature names is an empty list, list.extend leaves
# the list unchanged
feature_names.extend(config["categorical"]["feature_names"])
objectives = config["objectives"]
# Collect the feature names and objective names as headers
headers = feature_names
headers.extend(objectives)
fout.write(",".join(headers) + "\n")
[docs]
def train(
self,
prev_param: list[Any],
yield_value: float,
experiment_dir: str,
config: dict[str, Any],
obj_func: Optional[Callable[..., float]] = None,
) -> list[Any]:
"""No training step for this algorithm.
:returns: List will always be empty.
:rtype: list[Any]
"""
return []
[docs]
def predict(
self,
prev_param: list[Any],
yield_value: float,
experiment_dir: str,
config: dict[str, Any],
obj_func: Optional[Callable[..., float]] = None,
) -> list[Any]:
"""Searches for the best parameters and records results from prior steps.
:py:meth:`OptimizerEDBOp.set_config` must be called prior to this method
to generate the necessary files.
:param prev_param: Parameters provided from the previous prediction,
provide an empty list for the first call
:type prev_param: list[Any]
:param yield_value: Experimental yield
:type yield_value: float
:param experiment_dir: Output directory for any generated files
:type experiment_dir: str
:param config: CyRxnOpt-level config for the optimizer
:type config: dict[str, Any]
:param obj_func: Ignored for this optimizer, defaults to None
:type obj_func: Optional[Callable[..., float]], optional
:returns: The next suggested reaction to perform
:rtype: list[Any]
"""
# Get reaction scope configurations from general config file
config = self._config_translate(config)
# Read optimization file with reaction conditions
df_edbo = self._imports["pd"].read_csv(
os.path.join(experiment_dir, self._edbop_filename)
)
# TODO: Writing the entire dataframe of shape (2085136, 6),
# 12,510,816 elements: 8.674756252000407 sec. This can probably
# be optimized quite a bit
if len(prev_param) != 0:
# [df_edbo.loc[0,config['objectives'][i]] =
# yield_value[i] for i in range(len(yield_value))]
df_edbo.loc[0, config["objectives"][0]] = yield_value
df_edbo.to_csv(
os.path.join(experiment_dir, self._edbop_filename), index=False
)
# Write the reaction parameters and results to the file preserving
# reaction order
# TODO: Rework this when we switch to multi-objective!
with open(
Path(experiment_dir) / self._reaction_order_filename, "a"
) as fout:
line = prev_param
line.extend([yield_value])
line = [str(element) for element in line]
fout.write(",".join(line))
fout.write("\n")
# Run one EDBO+ prediction
self._imports["EDBOplus"]().run(
directory=experiment_dir,
filename=self._edbop_filename,
objectives=config["objectives"],
objective_mode=config["direction"],
batch=1,
columns_features="all",
init_sampling_method="seed",
seed=random.randint(0, 2**32 - 1),
write_extra_data=False,
)
# After one cycle of prediction, read the reaction condition file to
# get the next reaction condition
df_edbo = self._imports["pd"].read_csv(
os.path.join(experiment_dir, self._edbop_filename)
)
next_combo = df_edbo.iloc[:1].values.tolist()
next_combo = next_combo[0][:-2]
return next_combo
def _config_translate(self, config: dict[str, Any]) -> dict[str, Any]:
"""Convers general config into EDBO+ reaction scope config format.
:param config: General configuration dictionary
:type config: dict[str, Any]
:return: Translated configuration dictionary
:rtype: dict[str, Any]
"""
self._import_deps()
reaction_components = {}
config = use_subkeys(config)
for i in range(len(config["continuous"]["feature_names"])):
low_bound = config["continuous"]["bounds"][i][0]
upper_bound = config["continuous"]["bounds"][i][1]
increment = config["continuous"]["resolutions"][i]
values = self._imports["np"].arange(
low_bound, upper_bound + increment, increment
)
reaction_components[config["continuous"]["feature_names"][i]] = (
values
)
if bool(config["categorical"]["feature_names"]):
for i in range(len(config["categorical"]["feature_names"])):
reaction_components[
config["categorical"]["feature_names"][i]
] = config["categorical"]["values"][i]
# EDBO+ supports multi-objective optimization, of which single-
# objective optimization is a subset. When providing arguments
# for single-objective optimization, only one objective and one
# corresponding direction must be given. This catches when the user
# does not provide single-element lists for the objectives and
# their directions, which could be an easy mistake.
if type(config["objectives"]) is str:
config["objectives"] = [config["objectives"]]
if type(config["direction"]) is str:
config["direction"] = [config["direction"]]
edbo_config = {
"reaction_components": reaction_components,
"objectives": config["objectives"],
"direction": config["direction"],
}
edbo_config = config | edbo_config
return edbo_config
def _import_deps(self) -> None:
"""Import packages needed to run the optimizer."""
import numpy as np # type: ignore
import pandas as pd # type: ignore
from edbo.plus.optimizer_botorch import EDBOplus # type: ignore
self._imports = {"EDBOplus": EDBOplus, "np": np, "pd": pd}