set converge flag when no trial can be sampled (#217)

* set converge flag when no trial can be sampled

* require custom_metric to return dict for logging
close #218

* estimate time budget needed

* log info per iteration
This commit is contained in:
Chi Wang
2021-09-23 10:49:02 -07:00
committed by GitHub
parent b1115d5347
commit 16a97bec76
3 changed files with 19 additions and 16 deletions

View File

@@ -1312,7 +1312,7 @@ class AutoML:
return metric_to_minimize, metrics_to_log
which returns a float number as the minimization objective,
and a tuple of floats or a dictionary as the metrics to log.
and a dictionary as the metrics to log.
task: A string of the task type, e.g.,
'classification', 'regression', 'forecast', 'rank'.
n_jobs: An integer of the number of threads for training.
@@ -1853,6 +1853,16 @@ class AutoML:
self._search_states[e].init_eci / eci_base * self._eci[0]
)
self._estimator_index = 0
min_budget = max(10 * self._eci[0], sum(self._eci))
max_budget = 10000 * self._eci[0]
if search_state.sample_size:
ratio = search_state.data_size / search_state.sample_size
min_budget *= ratio
max_budget *= ratio
logger.info(
f"Estimated sufficient time budget={max_budget:.0f}s."
f" Estimated necessary time budget={min_budget:.0f}s."
)
if result["wall_clock_time"] is not None:
self._state.time_from_start = result["wall_clock_time"]
# logger.info(f"{self._search_states[estimator].sample_size}, {data_size}")
@@ -1923,7 +1933,7 @@ class AutoML:
mlflow.log_param("best_config", search_state.best_config)
mlflow.log_param("best_learner", self._best_estimator)
logger.info(
" at {:.1f}s,\tbest {}'s error={:.4f},\tbest {}'s error={:.4f}".format(
" at {:.1f}s,\testimator {}'s best error={:.4f},\tbest estimator {}'s best error={:.4f}".format(
self._state.time_from_start,
estimator,
search_state.best_loss,
@@ -1949,18 +1959,16 @@ class AutoML:
f"exceeds {self._warn_threshold} times the time taken "
"to find the best model."
)
self._warn_threshold *= 10
if self._early_stop:
logger.warning("Stopping search as early_stop is set to True.")
break
self._warn_threshold *= 10
else:
logger.info(f"stop trying learner {estimator}")
if self._estimator_index is not None:
self._active_estimators.remove(estimator)
self._estimator_index -= 1
self._state.time_from_start = time.time() - self._start_time_flag
if self._state.time_budget > self._state.time_from_start:
search_state.search_alg.searcher._is_ls_ever_converged = True
search_state.search_alg.searcher._is_ls_ever_converged = True
if (
self._retrain_in_budget
and best_config_sig

View File

@@ -232,8 +232,7 @@ def _eval_estimator(
groups_test,
fit_kwargs.get("groups"),
)
if isinstance(metric_for_logging, dict):
pred_time = metric_for_logging.get("pred_time", 0)
pred_time = metric_for_logging.get("pred_time", 0)
test_pred_y = None
# eval_metric may return test_pred_y but not necessarily. Setting None for now.
return test_loss, metric_for_logging, pred_time, test_pred_y
@@ -373,9 +372,7 @@ def evaluate_model_CV(
total_fold_num += 1
total_val_loss += val_loss_i
if log_training_metric or not isinstance(eval_metric, str):
if isinstance(total_metric, list):
total_metric = [total_metric[i] + v for i, v in enumerate(metric_i)]
elif isinstance(total_metric, dict):
if isinstance(total_metric, dict):
total_metric = {k: total_metric[k] + v for k, v in metric_i.items()}
elif total_metric is not None:
total_metric += metric_i
@@ -392,9 +389,7 @@ def evaluate_model_CV(
val_loss = np.max(val_loss_list)
n = total_fold_num
if log_training_metric or not isinstance(eval_metric, str):
if isinstance(total_metric, list):
metric = [v / n for v in total_metric]
elif isinstance(total_metric, dict):
if isinstance(total_metric, dict):
metric = {k: v / n for k, v in total_metric.items()}
else:
metric = total_metric / n
@@ -428,7 +423,7 @@ def compute_estimator(
):
estimator_class = estimator_class or get_estimator_class(task, estimator_name)
estimator = estimator_class(**config_dic, task=task, n_jobs=n_jobs)
if "holdout" in eval_method:
if "holdout" == eval_method:
val_loss, metric_for_logging, train_time, pred_time = get_test_loss(
config_dic,
estimator,

View File

@@ -1 +1 @@
__version__ = "0.6.4"
__version__ = "0.6.5"