set converge flag when no trial can be sampled (#217)

* set converge flag when no trial can be sampled * require custom_metric to return dict for logging close #218 * estimate time budget needed * log info per iteration
2026-02-09 10:19:18 +08:00 · 2021-09-23 10:49:02 -07:00
parent b1115d5347
commit 16a97bec76
3 changed files with 19 additions and 16 deletions
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -1312,7 +1312,7 @@ class AutoML:
                        return metric_to_minimize, metrics_to_log

                which returns a float number as the minimization objective,
-                and a tuple of floats or a dictionary as the metrics to log.
+                and a dictionary as the metrics to log.
            task: A string of the task type, e.g.,
                'classification', 'regression', 'forecast', 'rank'.
            n_jobs: An integer of the number of threads for training.
@@ -1853,6 +1853,16 @@ class AutoML:
                            self._search_states[e].init_eci / eci_base * self._eci[0]
                        )
                    self._estimator_index = 0
+                    min_budget = max(10 * self._eci[0], sum(self._eci))
+                    max_budget = 10000 * self._eci[0]
+                    if search_state.sample_size:
+                        ratio = search_state.data_size / search_state.sample_size
+                        min_budget *= ratio
+                        max_budget *= ratio
+                    logger.info(
+                        f"Estimated sufficient time budget={max_budget:.0f}s."
+                        f" Estimated necessary time budget={min_budget:.0f}s."
+                    )
                if result["wall_clock_time"] is not None:
                    self._state.time_from_start = result["wall_clock_time"]
                # logger.info(f"{self._search_states[estimator].sample_size}, {data_size}")
@@ -1923,7 +1933,7 @@ class AutoML:
                            mlflow.log_param("best_config", search_state.best_config)
                            mlflow.log_param("best_learner", self._best_estimator)
                logger.info(
-                    " at {:.1f}s,\tbest {}'s error={:.4f},\tbest {}'s error={:.4f}".format(
+                    " at {:.1f}s,\testimator {}'s best error={:.4f},\tbest estimator {}'s best error={:.4f}".format(
                        self._state.time_from_start,
                        estimator,
                        search_state.best_loss,
@@ -1949,18 +1959,16 @@ class AutoML:
                        f"exceeds {self._warn_threshold} times the time taken "
                        "to find the best model."
                    )
-                    self._warn_threshold *= 10
                    if self._early_stop:
                        logger.warning("Stopping search as early_stop is set to True.")
                        break
+                    self._warn_threshold *= 10
            else:
                logger.info(f"stop trying learner {estimator}")
                if self._estimator_index is not None:
                    self._active_estimators.remove(estimator)
                    self._estimator_index -= 1
-                self._state.time_from_start = time.time() - self._start_time_flag
-                if self._state.time_budget > self._state.time_from_start:
-                    search_state.search_alg.searcher._is_ls_ever_converged = True
+                search_state.search_alg.searcher._is_ls_ever_converged = True
            if (
                self._retrain_in_budget
                and best_config_sig
--- a/flaml/ml.py
+++ b/flaml/ml.py
@@ -232,8 +232,7 @@ def _eval_estimator(
            groups_test,
            fit_kwargs.get("groups"),
        )
-        if isinstance(metric_for_logging, dict):
-            pred_time = metric_for_logging.get("pred_time", 0)
+        pred_time = metric_for_logging.get("pred_time", 0)
        test_pred_y = None
        # eval_metric may return test_pred_y but not necessarily. Setting None for now.
    return test_loss, metric_for_logging, pred_time, test_pred_y
@@ -373,9 +372,7 @@ def evaluate_model_CV(
        total_fold_num += 1
        total_val_loss += val_loss_i
        if log_training_metric or not isinstance(eval_metric, str):
-            if isinstance(total_metric, list):
-                total_metric = [total_metric[i] + v for i, v in enumerate(metric_i)]
-            elif isinstance(total_metric, dict):
+            if isinstance(total_metric, dict):
                total_metric = {k: total_metric[k] + v for k, v in metric_i.items()}
            elif total_metric is not None:
                total_metric += metric_i
@@ -392,9 +389,7 @@ def evaluate_model_CV(
    val_loss = np.max(val_loss_list)
    n = total_fold_num
    if log_training_metric or not isinstance(eval_metric, str):
-        if isinstance(total_metric, list):
-            metric = [v / n for v in total_metric]
-        elif isinstance(total_metric, dict):
+        if isinstance(total_metric, dict):
            metric = {k: v / n for k, v in total_metric.items()}
        else:
            metric = total_metric / n
@@ -428,7 +423,7 @@ def compute_estimator(
 ):
    estimator_class = estimator_class or get_estimator_class(task, estimator_name)
    estimator = estimator_class(**config_dic, task=task, n_jobs=n_jobs)
-    if "holdout" in eval_method:
+    if "holdout" == eval_method:
        val_loss, metric_for_logging, train_time, pred_time = get_test_loss(
            config_dic,
            estimator,
--- a/flaml/version.py
+++ b/flaml/version.py
@@ -1 +1 @@
-__version__ = "0.6.4"
+__version__ = "0.6.5"