From e0e317bfb1bef6dbaa267ffd6c37f4ecd4f502b1 Mon Sep 17 00:00:00 2001
From: Xueqing Liu <liususan091219@users.noreply.github.com>
Date: Fri, 3 Jun 2022 15:19:22 -0400
Subject: [PATCH] fixing trainable and update function, completing NOTE (#566)

* fix checkpoint naming + trial id for non-ray mode, fix the bug in running test mode, delete all the checkpoints in non-ray mode

* finished testing for checkpoint naming, delete checkpoint, ray, max iter = 1
---
 flaml/automl.py                 | 121 +++++++++++++++++---------------
 flaml/ml.py                     |   1 -
 test/automl/test_constraints.py |   2 +-
 3 files changed, 64 insertions(+), 60 deletions(-)

diff --git a/flaml/automl.py b/flaml/automl.py
index eb4e19619..564470795 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -237,7 +237,7 @@ class SearchState:
 
         if self.base_eci is None:
             self.base_eci = time_used
-        if (obj is not None) and (self.best_loss is None or obj < self.best_loss):
+        if (obj is not None) and (obj < self.best_loss):
             self.best_loss_old = self.best_loss if self.best_loss < np.inf else 2 * obj
             self.best_loss = obj
             self.best_result = result
@@ -286,7 +286,7 @@ class AutoMLState:
             sampled_y_train = self.y_train[:sample_size]
             weight = self.fit_kwargs.get(
                 "sample_weight"
-            )  # NOTE: _prepare_sample_train_data is before
+            )  # NOTE: _prepare_sample_train_data is before kwargs is updated to fit_kwargs_by_estimator
             if weight is not None:
                 sampled_weight = weight[:sample_size]
             if self.groups is not None:
@@ -296,7 +296,7 @@ class AutoMLState:
             sampled_y_train = self.y_train_all
             if (
                 "sample_weight" in self.fit_kwargs
-            ):  # NOTE: _prepare_sample_train_data is before
+            ):  # NOTE: _prepare_sample_train_data is before kwargs is updated to fit_kwargs_by_estimator
                 sampled_weight = self.sample_weight_all
             if self.groups is not None:
                 groups = self.groups_all
@@ -311,7 +311,7 @@ class AutoMLState:
 
         this_estimator_kwargs = state.fit_kwargs_by_estimator.get(
             estimator
-        ).copy()  # NOTE: _compute_with_config_base is after
+        ).copy()  # NOTE: _compute_with_config_base is after kwargs is updated to fit_kwargs_by_estimator
         (
             sampled_X_train,
             sampled_y_train,
@@ -398,7 +398,7 @@ class AutoMLState:
 
         this_estimator_kwargs = self.fit_kwargs_by_estimator.get(
             estimator
-        ).copy()  # NOTE: _train_with_config is after
+        ).copy()  # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator
         (
             sampled_X_train,
             sampled_y_train,
@@ -408,14 +408,14 @@ class AutoMLState:
         if sampled_weight is not None:
             weight = this_estimator_kwargs[
                 "sample_weight"
-            ]  # NOTE: _train_with_config is after
+            ]  # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator
             this_estimator_kwargs[
                 "sample_weight"
-            ] = sampled_weight  # NOTE: _train_with_config is after
+            ] = sampled_weight  # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator
         if groups is not None:
             this_estimator_kwargs[
                 "groups"
-            ] = groups  # NOTE: _train_with_config is after
+            ] = groups  # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator
 
         budget = (
             None
@@ -432,14 +432,14 @@ class AutoMLState:
             n_jobs=self.n_jobs,
             estimator_class=self.learner_classes.get(estimator),
             budget=budget,
-            fit_kwargs=this_estimator_kwargs,  # NOTE: _train_with_config is after
+            fit_kwargs=this_estimator_kwargs,  # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator
             eval_metric=self.metric if hasattr(self, "metric") else "train_time",
         )
 
         if sampled_weight is not None:
             this_estimator_kwargs[
                 "sample_weight"
-            ] = weight  # NOTE: _train_with_config is after
+            ] = weight  # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator
 
         return estimator, train_time
 
@@ -626,10 +626,12 @@ class AutoML(BaseEstimator):
                 augment rare classes.
             min_sample_size: int, default=MIN_SAMPLE_TRAIN | the minimal sample
                 size when sample=True.
-            use_ray: boolean, default=False | Whether to use ray to run the training
+            use_ray: boolean or dict.
+                If boolean: default=False | Whether to use ray to run the training
                 in separate processes. This can be used to prevent OOM for large
-                datasets, but will incur more overhead in time. Only use it if
-                you run into OOM failures.
+                datasets, but will incur more overhead in time.
+                If dict: the dict contains the keywords arguments to be passed to
+                [ray.tune.run](https://docs.ray.io/en/latest/tune/api_docs/execution.html).
             metric_constraints: list, default=[] | The list of metric constraints.
                 Each element in this list is a 3-tuple, which shall be expressed
                 in the following format: the first element of the 3-tuple is the name of the
@@ -1103,7 +1105,7 @@ class AutoML(BaseEstimator):
 
         self._sample_weight_full = self._state.fit_kwargs.get(
             "sample_weight"
-        )  # NOTE: _validate_data is before,
+        )  # NOTE: _validate_data is before kwargs is updated to fit_kwargs_by_estimator
         if X_val is not None and y_val is not None:
             assert (
                 isinstance(X_val, np.ndarray)
@@ -1164,7 +1166,7 @@ class AutoML(BaseEstimator):
             self._state.task in CLASSIFICATION
             and self._auto_augment
             and self._state.fit_kwargs.get("sample_weight")
-            is None  # NOTE: _prepare_data is before
+            is None  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
             and self._split_type in ["stratified", "uniform"]
             and self._state.task != TOKENCLASSIFICATION
         ):
@@ -1208,7 +1210,9 @@ class AutoML(BaseEstimator):
                 )
                 self._state.fit_kwargs[
                     "sample_weight"
-                ] = self._state.sample_weight_all  # NOTE: _prepare_data is before
+                ] = (
+                    self._state.sample_weight_all
+                )  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
             else:
                 X_train_all, y_train_all = shuffle(
                     X_train_all, y_train_all, random_state=RANDOM_SEED
@@ -1227,7 +1231,7 @@ class AutoML(BaseEstimator):
                     num_samples = X_train_all.shape[0]
                     period = self._state.fit_kwargs[
                         "period"
-                    ]  # NOTE: _prepare_data is before
+                    ]  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
                     assert (
                         period < num_samples
                     ), f"period={period}>#examples={num_samples}"
@@ -1239,7 +1243,7 @@ class AutoML(BaseEstimator):
                 else:
                     if (
                         "sample_weight" in self._state.fit_kwargs
-                    ):  # NOTE: _prepare_data is before
+                    ):  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
                         (
                             X_train,
                             X_val,
@@ -1247,14 +1251,14 @@ class AutoML(BaseEstimator):
                             y_val,
                             self._state.fit_kwargs[
                                 "sample_weight"
-                            ],  # NOTE: _prepare_data is before
+                            ],  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
                             self._state.weight_val,
                         ) = train_test_split(
                             X_train_all,
                             y_train_all,
                             self._state.fit_kwargs[
                                 "sample_weight"
-                            ],  # NOTE: _prepare_data is before
+                            ],  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
                             test_size=split_ratio,
                             shuffle=False,
                         )
@@ -1297,7 +1301,7 @@ class AutoML(BaseEstimator):
                 stratify = y_rest if self._split_type == "stratified" else None
                 if (
                     "sample_weight" in self._state.fit_kwargs
-                ):  # NOTE: _prepare_data is before
+                ):  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
                     (
                         X_train,
                         X_val,
@@ -1310,17 +1314,17 @@ class AutoML(BaseEstimator):
                         y_rest,
                         self._state.fit_kwargs["sample_weight"][
                             rest
-                        ],  # NOTE: _prepare_data is before
+                        ],  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
                         test_size=split_ratio,
                         random_state=RANDOM_SEED,
                     )
                     weight1 = self._state.fit_kwargs["sample_weight"][
                         first
-                    ]  # NOTE: _prepare_data is before
+                    ]  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
                     self._state.weight_val = concat(weight1, weight_val)
                     self._state.fit_kwargs[
                         "sample_weight"
-                    ] = concat(  # NOTE: _prepare_data is before
+                    ] = concat(  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
                         weight1, weight_train
                     )
                 else:
@@ -1346,7 +1350,7 @@ class AutoML(BaseEstimator):
             elif self._state.task in REGRESSION:
                 if (
                     "sample_weight" in self._state.fit_kwargs
-                ):  # NOTE: _prepare_data is before
+                ):  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
                     (
                         X_train,
                         X_val,
@@ -1354,14 +1358,14 @@ class AutoML(BaseEstimator):
                         y_val,
                         self._state.fit_kwargs[
                             "sample_weight"
-                        ],  # NOTE: _prepare_data is before
+                        ],  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
                         self._state.weight_val,
                     ) = train_test_split(
                         X_train_all,
                         y_train_all,
                         self._state.fit_kwargs[
                             "sample_weight"
-                        ],  # NOTE: _prepare_data is before
+                        ],  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
                         test_size=split_ratio,
                         random_state=RANDOM_SEED,
                     )
@@ -1409,7 +1413,7 @@ class AutoML(BaseEstimator):
             if self._state.task in TS_FORECAST:
                 period = self._state.fit_kwargs[
                     "period"
-                ]  # NOTE: _prepare_data is before
+                ]  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
                 if period * (n_splits + 1) > y_train_all.size:
                     n_splits = int(y_train_all.size / period - 1)
                     assert n_splits >= 2, (
@@ -1716,7 +1720,7 @@ class AutoML(BaseEstimator):
 
             assert isinstance(
                 self._state.fit_kwargs.get("period"),
-                int,  # NOTE: _decide_split_type is before
+                int,  # NOTE: _decide_split_type is before kwargs is updated to fit_kwargs_by_estimator
             ), f"missing a required integer 'period' for '{TS_FORECAST}' task."
         elif self._state.task == "rank":
             assert (
@@ -1897,32 +1901,14 @@ class AutoML(BaseEstimator):
     @property
     def trainable(self) -> Callable[[dict], Optional[float]]:
         """Training function.
-
         Returns:
             A function that evaluates each config and returns the loss.
         """
         self._state.time_from_start = 0
-        for estimator in self.estimator_list:
-            search_state = self._search_states[estimator]
-            if not hasattr(search_state, "training_function"):
-                if self._use_ray is not False:
-                    from ray.tune import with_parameters
-
-                    search_state.training_function = with_parameters(
-                        AutoMLState._compute_with_config_base,
-                        state=self._state,
-                        estimator=estimator,
-                    )
-                else:
-                    search_state.training_function = partial(
-                        AutoMLState._compute_with_config_base,
-                        state=self._state,
-                        estimator=estimator,
-                    )
         states = self._search_states
         mem_res = self._mem_thres
 
-        def train(config: dict):
+        def train(config: dict, state):
 
             sample_size = config.get("FLAML_sample_size")
             config = config.get("ml", config).copy()
@@ -1932,18 +1918,33 @@ class AutoML(BaseEstimator):
             # check memory constraints before training
             if states[estimator].learner_class.size(config) <= mem_res:
                 del config["learner"]
-                result = states[estimator].training_function(config)
-                return result
+                result = AutoMLState._compute_with_config_base(
+                    config, state=state, estimator=estimator
+                )
             else:
-                return {
+                # If search algorithm is not in flaml, it does not handle the config constraint, should also tune.report before return
+                result = {
                     "pred_time": 0,
                     "wall_clock_time": None,
                     "metric_for_logging": np.inf,
                     "val_loss": np.inf,
                     "trained_estimator": None,
                 }
+            tune.report(**result)
+            return result
 
-        return train
+        if self._use_ray is not False:
+            from ray.tune import with_parameters
+
+            return with_parameters(
+                train,
+                state=self._state,
+            )
+        else:
+            return partial(
+                train,
+                state=self._state,
+            )
 
     @property
     def metric_constraints(self) -> list:
@@ -2180,10 +2181,12 @@ class AutoML(BaseEstimator):
                 augment rare classes.
             min_sample_size: int, default=MIN_SAMPLE_TRAIN | the minimal sample
                 size when sample=True.
-            use_ray: boolean or dict
+            use_ray: boolean or dict.
                 If boolean: default=False | Whether to use ray to run the training
                 in separate processes. This can be used to prevent OOM for large
                 datasets, but will incur more overhead in time.
+                If dict: the dict contains the keywords arguments to be passed to
+                [ray.tune.run](https://docs.ray.io/en/latest/tune/api_docs/execution.html).
             metric_constraints: list, default=[] | The list of metric constraints.
                 Each element in this list is a 3-tuple, which shall be expressed
                 in the following format: the first element of the 3-tuple is the name of the
@@ -2565,7 +2568,7 @@ class AutoML(BaseEstimator):
                 this_estimator_kwargs = this_estimator_kwargs.copy()
                 this_estimator_kwargs.update(
                     self._state.fit_kwargs
-                )  # update the shallow copy
+                )  # update the shallow copy of fit_kwargs to fit_kwargs_by_estimator
                 self._state.fit_kwargs_by_estimator[
                     estimator_name
                 ] = this_estimator_kwargs  # set self._state.fit_kwargs_by_estimator[estimator_name] to the update, so only self._state.fit_kwargs_by_estimator will be updated
@@ -2579,7 +2582,9 @@ class AutoML(BaseEstimator):
                 data_size=self._state.data_size,
                 task=self._state.task,
                 starting_point=starting_points.get(estimator_name),
-                period=self._state.fit_kwargs.get("period"),  # NOTE: this is after
+                period=self._state.fit_kwargs.get(
+                    "period"
+                ),  # NOTE: this is after kwargs is updated to fit_kwargs_by_estimator
                 custom_hp=custom_hp and custom_hp.get(estimator_name),
                 max_iter=max_iter,
             )
@@ -2643,7 +2648,7 @@ class AutoML(BaseEstimator):
                 self._sample_weight_full,
                 self._state.fit_kwargs_by_estimator,
                 self._state.fit_kwargs,
-            )  # NOTE: this is after
+            )  # NOTE: this is after kwargs is updated to fit_kwargs_by_estimator
             del self._state.groups, self._state.groups_all, self._state.groups_val
         logger.setLevel(old_level)
 
@@ -3257,7 +3262,7 @@ class AutoML(BaseEstimator):
                     stacker.fit(
                         self._X_train_all,
                         self._y_train_all,
-                        **sample_weight_dict,  # NOTE: _search is after
+                        **sample_weight_dict,  # NOTE: _search is after kwargs is updated to fit_kwargs_by_estimator
                     )
                     logger.info(f"ensemble: {stacker}")
                     self._trained_estimator = stacker
@@ -3276,7 +3281,7 @@ class AutoML(BaseEstimator):
                         stacker.fit(
                             self._X_train_all,
                             self._y_train_all,
-                            **sample_weight_dict,  # NOTE: _search is after
+                            **sample_weight_dict,  # NOTE: _search is after kwargs is updated to fit_kwargs_by_estimator
                         )
                         logger.info(f"ensemble: {stacker}")
                         self._trained_estimator = stacker
diff --git a/flaml/ml.py b/flaml/ml.py
index 092a02565..81301db88 100644
--- a/flaml/ml.py
+++ b/flaml/ml.py
@@ -420,7 +420,6 @@ def get_val_loss(
     #     fit_kwargs['groups_val'] = groups_val
     #     fit_kwargs['X_val'] = X_val
     #     fit_kwargs['y_val'] = y_val
-
     estimator.fit(X_train, y_train, budget, **fit_kwargs)
     val_loss, metric_for_logging, pred_time, _ = _eval_estimator(
         config,
diff --git a/test/automl/test_constraints.py b/test/automl/test_constraints.py
index 53ce52127..a47be3078 100644
--- a/test/automl/test_constraints.py
+++ b/test/automl/test_constraints.py
@@ -21,7 +21,7 @@ def test_metric_constraints():
         "log_type": "all",
         "retrain_full": "budget",
         "keep_search_state": True,
-        "time_budget": 1,
+        "time_budget": 2,
         "pred_time_limit": 5.1e-05,
     }
     from sklearn.externals._arff import ArffException