Logging multiple checkpoints (#394)

2026-02-09 02:09:16 +08:00 · 2022-01-12 22:50:39 -05:00
parent 303d40c76c
commit f41f1c2198
6 changed files with 46 additions and 16 deletions
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -2366,7 +2366,17 @@ class AutoML(BaseEstimator):
        if mlflow is not None and mlflow.active_run():
            with mlflow.start_run(nested=True):
                mlflow.log_metric("iter_counter", self._iter_per_learner[estimator])
-                mlflow.log_param("metric_for_logging", search_state.metric_for_logging)
+                if "intermediate_results" in search_state.metric_for_logging:
+                    for each_entry in search_state.metric_for_logging[
+                        "intermediate_results"
+                    ]:
+                        with mlflow.start_run(nested=True):
+                            mlflow.log_metrics(each_entry)
+                            mlflow.log_metric(
+                                "iter_counter", self._iter_per_learner[estimator]
+                            )
+                    del search_state.metric_for_logging["intermediate_results"]
+                mlflow.log_metrics(search_state.metric_for_logging)
                mlflow.log_metric("trial_time", search_state.trial_time)
                mlflow.log_metric("wall_clock_time", self._state.time_from_start)
                mlflow.log_metric("validation_loss", search_state.val_loss)
--- a/flaml/ml.py
+++ b/flaml/ml.py
@@ -94,11 +94,19 @@ huggingface_submetric_to_metric = {"rouge1": "rouge", "rouge2": "rouge"}
 def get_estimator_class(task, estimator_name):
    # when adding a new learner, need to add an elif branch
    if "xgboost" == estimator_name:
-        estimator_class = XGBoost_TS_Regressor if TS_FORECAST == task else XGBoostSklearnEstimator
+        estimator_class = (
+            XGBoost_TS_Regressor if TS_FORECAST == task else XGBoostSklearnEstimator
+        )
    elif "xgb_limitdepth" == estimator_name:
-        estimator_class = XGBoostLimitDepth_TS_Regressor if TS_FORECAST == task else XGBoostLimitDepthEstimator
+        estimator_class = (
+            XGBoostLimitDepth_TS_Regressor
+            if TS_FORECAST == task
+            else XGBoostLimitDepthEstimator
+        )
    elif "rf" == estimator_name:
-        estimator_class = RF_TS_Regressor if TS_FORECAST == task else RandomForestEstimator
+        estimator_class = (
+            RF_TS_Regressor if TS_FORECAST == task else RandomForestEstimator
+        )
    elif "lgbm" == estimator_name:
        estimator_class = LGBM_TS_Regressor if TS_FORECAST == task else LGBMEstimator
    elif "lrl1" == estimator_name:
@@ -108,7 +116,9 @@ def get_estimator_class(task, estimator_name):
    elif "catboost" == estimator_name:
        estimator_class = CatBoostEstimator
    elif "extra_tree" == estimator_name:
-        estimator_class = ExtraTrees_TS_Regressor if TS_FORECAST == task else ExtraTreesEstimator
+        estimator_class = (
+            ExtraTrees_TS_Regressor if TS_FORECAST == task else ExtraTreesEstimator
+        )
    elif "kneighbor" == estimator_name:
        estimator_class = KNeighborsEstimator
    elif "prophet" in estimator_name:
@@ -207,8 +217,10 @@ def metric_loss_score(
                    + ", ".join(huggingface_metric_to_mode.keys())
                    + ". Please pass a customized metric function to AutoML.fit(metric=func)"
                )
-        multiplier = -1 if metric_mode == "max" else 1
-        return score * multiplier
+        if metric_mode == "max":
+            return 1 - score
+        else:
+            return score


 def is_in_sklearn_metric_name_set(metric_name):
@@ -409,6 +421,8 @@ def get_val_loss(
        log_training_metric,
        fit_kwargs,
    )
+    if hasattr(estimator, "intermediate_results"):
+        metric_for_logging["intermediate_results"] = estimator.intermediate_results
    train_time = time.time() - start
    return val_loss, metric_for_logging, train_time, pred_time

--- a/flaml/model.py
+++ b/flaml/model.py
@@ -675,6 +675,9 @@ class TransformersEstimator(BaseEstimator):
                X_train=self._X_train,
                y_train=self._y_train,
            )
+            if not hasattr(self, "intermediate_results"):
+                self.intermediate_results = []
+            self.intermediate_results.append(metric_dict)
            return metric_dict

    def _init_model_for_predict(self, X_test):
@@ -702,6 +705,7 @@ class TransformersEstimator(BaseEstimator):
            )
            if self._task == MULTICHOICECLASSIFICATION
            else None,
+            compute_metrics=self._compute_metrics_by_dataset_name,
        )
        return test_dataset, training_args

--- a/test/nlp/run_gpu.py
+++ b/test/nlp/run_gpu.py
@@ -11,9 +11,9 @@ def _test_hf_data():
    from datasets import load_dataset

    try:
-        train_dataset = load_dataset("glue", "mrpc", split="train").to_pandas()
-        dev_dataset = load_dataset("glue", "mrpc", split="validation").to_pandas()
-        test_dataset = load_dataset("glue", "mrpc", split="test").to_pandas()
+        train_dataset = load_dataset("glue", "mrpc", split="train[:1%]").to_pandas()
+        dev_dataset = load_dataset("glue", "mrpc", split="validation[:1%]").to_pandas()
+        test_dataset = load_dataset("glue", "mrpc", split="test[:1%]").to_pandas()
    except requests.exceptions.ConnectionError:
        return

@@ -32,7 +32,7 @@ def _test_hf_data():

    automl_settings = {
        "gpu_per_trial": 1,
-        "max_iter": 5,
+        "max_iter": 2,
        "time_budget": 5000,
        "task": "seq-classification",
        "metric": "accuracy",
--- a/test/nlp/test_autohf_custom_metric.py
+++ b/test/nlp/test_autohf_custom_metric.py
@@ -17,6 +17,7 @@ def custom_metric(
 ):
    from datasets import Dataset
    from flaml.model import TransformersEstimator
+    from flaml.nlp.utils import load_default_huggingface_metric_for_task

    if estimator._trainer is None:
        estimator._init_model_for_predict(X_test)
@@ -31,12 +32,13 @@ def custom_metric(
        X_test, _ = estimator._preprocess(X_test)
        eval_dataset = Dataset.from_pandas(X_test)

-    trainer_compute_metrics_cache = trainer.compute_metrics
-    trainer.compute_metrics = None
+    estimator_metric_cache = estimator._metric
+    estimator._metric = load_default_huggingface_metric_for_task(estimator._task)

    metrics = trainer.evaluate(eval_dataset)
-    trainer.compute_metrics = trainer_compute_metrics_cache
-    return metrics["eval_loss"], metrics
+    estimator._metric = estimator_metric_cache
+
+    return metrics["eval_val_loss"], metrics


@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
--- a/test/nlp/test_autohf_regression.py
+++ b/test/nlp/test_autohf_regression.py
@@ -38,7 +38,7 @@ def test_regression():
        "max_iter": 2,
        "time_budget": 5,
        "task": "seq-regression",
-        "metric": "rmse",
+        "metric": "pearsonr",
        "starting_points": {"transformer": {"num_train_epochs": 1}},
        "use_ray": True,
    }