diff --git a/flaml/automl.py b/flaml/automl.py index db5f9d1c1..47b9169e9 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -2366,7 +2366,17 @@ class AutoML(BaseEstimator): if mlflow is not None and mlflow.active_run(): with mlflow.start_run(nested=True): mlflow.log_metric("iter_counter", self._iter_per_learner[estimator]) - mlflow.log_param("metric_for_logging", search_state.metric_for_logging) + if "intermediate_results" in search_state.metric_for_logging: + for each_entry in search_state.metric_for_logging[ + "intermediate_results" + ]: + with mlflow.start_run(nested=True): + mlflow.log_metrics(each_entry) + mlflow.log_metric( + "iter_counter", self._iter_per_learner[estimator] + ) + del search_state.metric_for_logging["intermediate_results"] + mlflow.log_metrics(search_state.metric_for_logging) mlflow.log_metric("trial_time", search_state.trial_time) mlflow.log_metric("wall_clock_time", self._state.time_from_start) mlflow.log_metric("validation_loss", search_state.val_loss) diff --git a/flaml/ml.py b/flaml/ml.py index 0655d5cc8..5564c5d79 100644 --- a/flaml/ml.py +++ b/flaml/ml.py @@ -94,11 +94,19 @@ huggingface_submetric_to_metric = {"rouge1": "rouge", "rouge2": "rouge"} def get_estimator_class(task, estimator_name): # when adding a new learner, need to add an elif branch if "xgboost" == estimator_name: - estimator_class = XGBoost_TS_Regressor if TS_FORECAST == task else XGBoostSklearnEstimator + estimator_class = ( + XGBoost_TS_Regressor if TS_FORECAST == task else XGBoostSklearnEstimator + ) elif "xgb_limitdepth" == estimator_name: - estimator_class = XGBoostLimitDepth_TS_Regressor if TS_FORECAST == task else XGBoostLimitDepthEstimator + estimator_class = ( + XGBoostLimitDepth_TS_Regressor + if TS_FORECAST == task + else XGBoostLimitDepthEstimator + ) elif "rf" == estimator_name: - estimator_class = RF_TS_Regressor if TS_FORECAST == task else RandomForestEstimator + estimator_class = ( + RF_TS_Regressor if TS_FORECAST == task else RandomForestEstimator + ) elif "lgbm" == estimator_name: estimator_class = LGBM_TS_Regressor if TS_FORECAST == task else LGBMEstimator elif "lrl1" == estimator_name: @@ -108,7 +116,9 @@ def get_estimator_class(task, estimator_name): elif "catboost" == estimator_name: estimator_class = CatBoostEstimator elif "extra_tree" == estimator_name: - estimator_class = ExtraTrees_TS_Regressor if TS_FORECAST == task else ExtraTreesEstimator + estimator_class = ( + ExtraTrees_TS_Regressor if TS_FORECAST == task else ExtraTreesEstimator + ) elif "kneighbor" == estimator_name: estimator_class = KNeighborsEstimator elif "prophet" in estimator_name: @@ -207,8 +217,10 @@ def metric_loss_score( + ", ".join(huggingface_metric_to_mode.keys()) + ". Please pass a customized metric function to AutoML.fit(metric=func)" ) - multiplier = -1 if metric_mode == "max" else 1 - return score * multiplier + if metric_mode == "max": + return 1 - score + else: + return score def is_in_sklearn_metric_name_set(metric_name): @@ -409,6 +421,8 @@ def get_val_loss( log_training_metric, fit_kwargs, ) + if hasattr(estimator, "intermediate_results"): + metric_for_logging["intermediate_results"] = estimator.intermediate_results train_time = time.time() - start return val_loss, metric_for_logging, train_time, pred_time diff --git a/flaml/model.py b/flaml/model.py index 71896f11a..045ba11f8 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -675,6 +675,9 @@ class TransformersEstimator(BaseEstimator): X_train=self._X_train, y_train=self._y_train, ) + if not hasattr(self, "intermediate_results"): + self.intermediate_results = [] + self.intermediate_results.append(metric_dict) return metric_dict def _init_model_for_predict(self, X_test): @@ -702,6 +705,7 @@ class TransformersEstimator(BaseEstimator): ) if self._task == MULTICHOICECLASSIFICATION else None, + compute_metrics=self._compute_metrics_by_dataset_name, ) return test_dataset, training_args diff --git a/test/nlp/run_gpu.py b/test/nlp/run_gpu.py index 8551dd70b..1574c7fc9 100644 --- a/test/nlp/run_gpu.py +++ b/test/nlp/run_gpu.py @@ -11,9 +11,9 @@ def _test_hf_data(): from datasets import load_dataset try: - train_dataset = load_dataset("glue", "mrpc", split="train").to_pandas() - dev_dataset = load_dataset("glue", "mrpc", split="validation").to_pandas() - test_dataset = load_dataset("glue", "mrpc", split="test").to_pandas() + train_dataset = load_dataset("glue", "mrpc", split="train[:1%]").to_pandas() + dev_dataset = load_dataset("glue", "mrpc", split="validation[:1%]").to_pandas() + test_dataset = load_dataset("glue", "mrpc", split="test[:1%]").to_pandas() except requests.exceptions.ConnectionError: return @@ -32,7 +32,7 @@ def _test_hf_data(): automl_settings = { "gpu_per_trial": 1, - "max_iter": 5, + "max_iter": 2, "time_budget": 5000, "task": "seq-classification", "metric": "accuracy", diff --git a/test/nlp/test_autohf_custom_metric.py b/test/nlp/test_autohf_custom_metric.py index 6df95b943..63461ea87 100644 --- a/test/nlp/test_autohf_custom_metric.py +++ b/test/nlp/test_autohf_custom_metric.py @@ -17,6 +17,7 @@ def custom_metric( ): from datasets import Dataset from flaml.model import TransformersEstimator + from flaml.nlp.utils import load_default_huggingface_metric_for_task if estimator._trainer is None: estimator._init_model_for_predict(X_test) @@ -31,12 +32,13 @@ def custom_metric( X_test, _ = estimator._preprocess(X_test) eval_dataset = Dataset.from_pandas(X_test) - trainer_compute_metrics_cache = trainer.compute_metrics - trainer.compute_metrics = None + estimator_metric_cache = estimator._metric + estimator._metric = load_default_huggingface_metric_for_task(estimator._task) metrics = trainer.evaluate(eval_dataset) - trainer.compute_metrics = trainer_compute_metrics_cache - return metrics["eval_loss"], metrics + estimator._metric = estimator_metric_cache + + return metrics["eval_val_loss"], metrics @pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os") diff --git a/test/nlp/test_autohf_regression.py b/test/nlp/test_autohf_regression.py index a4b4877db..7edc01751 100644 --- a/test/nlp/test_autohf_regression.py +++ b/test/nlp/test_autohf_regression.py @@ -38,7 +38,7 @@ def test_regression(): "max_iter": 2, "time_budget": 5, "task": "seq-regression", - "metric": "rmse", + "metric": "pearsonr", "starting_points": {"transformer": {"num_train_epochs": 1}}, "use_ray": True, }