Source code for cyrxnopt.OptimizerEDBOp

import logging
import os
import random
from collections.abc import Callable
from pathlib import Path
from typing import Any, Optional

from cyrxnopt.NestedVenv import NestedVenv
from cyrxnopt.OptimizerABC import OptimizerABC
from cyrxnopt.utilities.config.transforms import use_subkeys

logger = logging.getLogger(__name__)



[docs]
class OptimizerEDBOp(OptimizerABC):
    # Private static data member to list dependency packages required
    # by this class
    _packages = ["benchmarking", "edboplus", "pandas"]

    def __init__(self, venv: NestedVenv) -> None:
        """Optimizer class for the EDBO+ algorithm.

        :param venv: Virtual environment to install the optimizer
        :type venv: NestedVenv
        """

        super().__init__(venv)

        self._edbop_filename = "my_optimization.csv"
        self._reaction_order_filename = "reaction_order.csv"


[docs]
    def get_config(self) -> list[dict[str, Any]]:
        """Get the configuration options available for this optimizer.

        See :py:meth:`OptimizerABC.get_config` for more information about the
        config descriptions returned by this method and for general usage
        information.

        :return: List of configuration options with option name, data type,
                 and information about which values are allowed/defaulted.
        :rtype: list[dict[str, Any]]
        """

        config: list[dict[str, Any]] = [
            {
                "name": "continuous_feature_names",
                "type": "list[str]",
                "value": [],
            },
            {
                "name": "continuous_feature_bounds",
                "type": "list[list[float]]",
                "value": [],
            },
            {
                "name": "continuous_feature_resolutions",
                "type": "list[float]",
                "value": [],
            },
            {
                "name": "categorical_feature_names",
                "type": "list[str]",
                "value": [],
            },
            {
                "name": "categorical_feature_values",
                "type": "list[list[str]]",
                "value": [],
            },
            {
                "name": "budget",
                "type": "int",
                "value": 100,
            },
            {
                "name": "objectives",
                "type": "list[str]",
                "value": ["yield"],
            },
            {
                "name": "direction",
                "type": "list[str]",
                "value": ["min"],
                "range": ["min", "max"],
            },
        ]

        return config



[docs]
    def set_config(self, experiment_dir: str, config: dict[str, Any]) -> None:
        """Generate all the necessary data files based on the given configuration.

        See :py:meth:`OptimizerABC.set_config` for more information about how
        to form the config dictionary and for general usage information.

        :param experiment_dir: Output directory for the configuration file
        :type experiment_dir: str
        :param config: CyRxnOpt-level config for the optimizer
        :type config: dict[str, Any]
        """

        if not os.path.exists(experiment_dir):
            os.makedirs(experiment_dir)

        # Get reaction scope configurations from general config
        config = self._config_translate(config)

        # generate reaction scope for EDBO+
        self._imports["EDBOplus"]().generate_reaction_scope(
            components=config["reaction_components"],
            directory=experiment_dir,
            filename=self._edbop_filename,
            check_overwrite=False,
        )

        # Initialize the EDBO+ file to be used for prediction
        self._imports["EDBOplus"]().run(
            directory=experiment_dir,
            # Previously generated scope
            filename=self._edbop_filename,
            # Objectives to be optimized
            # For example, maximize yield and ee but minimize side_product:
            # objectives=['yield', 'ee', 'side_product'],
            # objective_mode=['max', 'max', 'min'],
            objectives=config["objectives"],
            objective_mode=config["direction"],
            # Number of experiments in parallel to perform in this round
            batch=1,
            # Features to be included in the model
            columns_features="all",
            # Initialization method
            init_sampling_method="seed",
            seed=random.randint(0, 2**32 - 1),
        )

        # Create file for preserving reaction order
        # TODO: Rework this when we switch to multi-objective!
        with open(
            Path(experiment_dir) / self._reaction_order_filename, "w"
        ) as fout:
            feature_names = config["continuous"]["feature_names"]
            # If categorical feature names is an empty list, list.extend leaves
            # the list unchanged
            feature_names.extend(config["categorical"]["feature_names"])

            objectives = config["objectives"]

            # Collect the feature names and objective names as headers
            headers = feature_names
            headers.extend(objectives)

            fout.write(",".join(headers) + "\n")



[docs]
    def train(
        self,
        prev_param: list[Any],
        yield_value: float,
        experiment_dir: str,
        config: dict[str, Any],
        obj_func: Optional[Callable[..., float]] = None,
    ) -> list[Any]:
        """No training step for this algorithm.

        :returns: List will always be empty.
        :rtype: list[Any]
        """

        return []



[docs]
    def predict(
        self,
        prev_param: list[Any],
        yield_value: float,
        experiment_dir: str,
        config: dict[str, Any],
        obj_func: Optional[Callable[..., float]] = None,
    ) -> list[Any]:
        """Searches for the best parameters and records results from prior steps.

        :py:meth:`OptimizerEDBOp.set_config` must be called prior to this method
        to generate the necessary files.

        :param prev_param: Parameters provided from the previous prediction,
                           provide an empty list for the first call
        :type prev_param: list[Any]
        :param yield_value: Experimental yield
        :type yield_value: float
        :param experiment_dir: Output directory for any generated files
        :type experiment_dir: str
        :param config: CyRxnOpt-level config for the optimizer
        :type config: dict[str, Any]
        :param obj_func: Ignored for this optimizer, defaults to None
        :type obj_func: Optional[Callable[..., float]], optional

        :returns: The next suggested reaction to perform
        :rtype: list[Any]
        """

        # Get reaction scope configurations from general config file
        config = self._config_translate(config)

        # Read optimization file with reaction conditions
        df_edbo = self._imports["pd"].read_csv(
            os.path.join(experiment_dir, self._edbop_filename)
        )

        # TODO: Writing the entire dataframe of shape (2085136, 6),
        #       12,510,816 elements: 8.674756252000407 sec. This can probably
        #       be optimized quite a bit
        if len(prev_param) != 0:
            # [df_edbo.loc[0,config['objectives'][i]] =
            # yield_value[i] for i in range(len(yield_value))]
            df_edbo.loc[0, config["objectives"][0]] = yield_value
            df_edbo.to_csv(
                os.path.join(experiment_dir, self._edbop_filename), index=False
            )

            # Write the reaction parameters and results to the file preserving
            # reaction order
            # TODO: Rework this when we switch to multi-objective!
            with open(
                Path(experiment_dir) / self._reaction_order_filename, "a"
            ) as fout:
                line = prev_param
                line.extend([yield_value])
                line = [str(element) for element in line]
                fout.write(",".join(line))
                fout.write("\n")

        # Run one EDBO+ prediction
        self._imports["EDBOplus"]().run(
            directory=experiment_dir,
            filename=self._edbop_filename,
            objectives=config["objectives"],
            objective_mode=config["direction"],
            batch=1,
            columns_features="all",
            init_sampling_method="seed",
            seed=random.randint(0, 2**32 - 1),
            write_extra_data=False,
        )

        # After one cycle of prediction, read the reaction condition file to
        # get the next reaction condition
        df_edbo = self._imports["pd"].read_csv(
            os.path.join(experiment_dir, self._edbop_filename)
        )

        next_combo = df_edbo.iloc[:1].values.tolist()
        next_combo = next_combo[0][:-2]

        return next_combo


    def _config_translate(self, config: dict[str, Any]) -> dict[str, Any]:
        """Convers general config into EDBO+ reaction scope config format.

        :param config: General configuration dictionary
        :type config: dict[str, Any]

        :return: Translated configuration dictionary
        :rtype: dict[str, Any]
        """

        self._import_deps()
        reaction_components = {}

        config = use_subkeys(config)

        for i in range(len(config["continuous"]["feature_names"])):
            low_bound = config["continuous"]["bounds"][i][0]
            upper_bound = config["continuous"]["bounds"][i][1]
            increment = config["continuous"]["resolutions"][i]

            values = self._imports["np"].arange(
                low_bound, upper_bound + increment, increment
            )

            reaction_components[config["continuous"]["feature_names"][i]] = (
                values
            )

        if bool(config["categorical"]["feature_names"]):
            for i in range(len(config["categorical"]["feature_names"])):
                reaction_components[
                    config["categorical"]["feature_names"][i]
                ] = config["categorical"]["values"][i]

        # EDBO+ supports multi-objective optimization, of which single-
        # objective optimization is a subset. When providing arguments
        # for single-objective optimization, only one objective and one
        # corresponding direction must be given. This catches when the user
        # does not provide single-element lists for the objectives and
        # their directions, which could be an easy mistake.
        if type(config["objectives"]) is str:
            config["objectives"] = [config["objectives"]]
        if type(config["direction"]) is str:
            config["direction"] = [config["direction"]]

        edbo_config = {
            "reaction_components": reaction_components,
            "objectives": config["objectives"],
            "direction": config["direction"],
        }

        edbo_config = config | edbo_config

        return edbo_config

    def _import_deps(self) -> None:
        """Import packages needed to run the optimizer."""

        import numpy as np  # type: ignore
        import pandas as pd  # type: ignore
        from edbo.plus.optimizer_botorch import EDBOplus  # type: ignore

        self._imports = {"EDBOplus": EDBOplus, "np": np, "pd": pd}