mirror of
https://github.com/microsoft/FLAML.git
synced 2026-02-09 02:09:16 +08:00
Logging multiple checkpoints (#394)
This commit is contained in:
@@ -2366,7 +2366,17 @@ class AutoML(BaseEstimator):
|
||||
if mlflow is not None and mlflow.active_run():
|
||||
with mlflow.start_run(nested=True):
|
||||
mlflow.log_metric("iter_counter", self._iter_per_learner[estimator])
|
||||
mlflow.log_param("metric_for_logging", search_state.metric_for_logging)
|
||||
if "intermediate_results" in search_state.metric_for_logging:
|
||||
for each_entry in search_state.metric_for_logging[
|
||||
"intermediate_results"
|
||||
]:
|
||||
with mlflow.start_run(nested=True):
|
||||
mlflow.log_metrics(each_entry)
|
||||
mlflow.log_metric(
|
||||
"iter_counter", self._iter_per_learner[estimator]
|
||||
)
|
||||
del search_state.metric_for_logging["intermediate_results"]
|
||||
mlflow.log_metrics(search_state.metric_for_logging)
|
||||
mlflow.log_metric("trial_time", search_state.trial_time)
|
||||
mlflow.log_metric("wall_clock_time", self._state.time_from_start)
|
||||
mlflow.log_metric("validation_loss", search_state.val_loss)
|
||||
|
||||
26
flaml/ml.py
26
flaml/ml.py
@@ -94,11 +94,19 @@ huggingface_submetric_to_metric = {"rouge1": "rouge", "rouge2": "rouge"}
|
||||
def get_estimator_class(task, estimator_name):
|
||||
# when adding a new learner, need to add an elif branch
|
||||
if "xgboost" == estimator_name:
|
||||
estimator_class = XGBoost_TS_Regressor if TS_FORECAST == task else XGBoostSklearnEstimator
|
||||
estimator_class = (
|
||||
XGBoost_TS_Regressor if TS_FORECAST == task else XGBoostSklearnEstimator
|
||||
)
|
||||
elif "xgb_limitdepth" == estimator_name:
|
||||
estimator_class = XGBoostLimitDepth_TS_Regressor if TS_FORECAST == task else XGBoostLimitDepthEstimator
|
||||
estimator_class = (
|
||||
XGBoostLimitDepth_TS_Regressor
|
||||
if TS_FORECAST == task
|
||||
else XGBoostLimitDepthEstimator
|
||||
)
|
||||
elif "rf" == estimator_name:
|
||||
estimator_class = RF_TS_Regressor if TS_FORECAST == task else RandomForestEstimator
|
||||
estimator_class = (
|
||||
RF_TS_Regressor if TS_FORECAST == task else RandomForestEstimator
|
||||
)
|
||||
elif "lgbm" == estimator_name:
|
||||
estimator_class = LGBM_TS_Regressor if TS_FORECAST == task else LGBMEstimator
|
||||
elif "lrl1" == estimator_name:
|
||||
@@ -108,7 +116,9 @@ def get_estimator_class(task, estimator_name):
|
||||
elif "catboost" == estimator_name:
|
||||
estimator_class = CatBoostEstimator
|
||||
elif "extra_tree" == estimator_name:
|
||||
estimator_class = ExtraTrees_TS_Regressor if TS_FORECAST == task else ExtraTreesEstimator
|
||||
estimator_class = (
|
||||
ExtraTrees_TS_Regressor if TS_FORECAST == task else ExtraTreesEstimator
|
||||
)
|
||||
elif "kneighbor" == estimator_name:
|
||||
estimator_class = KNeighborsEstimator
|
||||
elif "prophet" in estimator_name:
|
||||
@@ -207,8 +217,10 @@ def metric_loss_score(
|
||||
+ ", ".join(huggingface_metric_to_mode.keys())
|
||||
+ ". Please pass a customized metric function to AutoML.fit(metric=func)"
|
||||
)
|
||||
multiplier = -1 if metric_mode == "max" else 1
|
||||
return score * multiplier
|
||||
if metric_mode == "max":
|
||||
return 1 - score
|
||||
else:
|
||||
return score
|
||||
|
||||
|
||||
def is_in_sklearn_metric_name_set(metric_name):
|
||||
@@ -409,6 +421,8 @@ def get_val_loss(
|
||||
log_training_metric,
|
||||
fit_kwargs,
|
||||
)
|
||||
if hasattr(estimator, "intermediate_results"):
|
||||
metric_for_logging["intermediate_results"] = estimator.intermediate_results
|
||||
train_time = time.time() - start
|
||||
return val_loss, metric_for_logging, train_time, pred_time
|
||||
|
||||
|
||||
@@ -675,6 +675,9 @@ class TransformersEstimator(BaseEstimator):
|
||||
X_train=self._X_train,
|
||||
y_train=self._y_train,
|
||||
)
|
||||
if not hasattr(self, "intermediate_results"):
|
||||
self.intermediate_results = []
|
||||
self.intermediate_results.append(metric_dict)
|
||||
return metric_dict
|
||||
|
||||
def _init_model_for_predict(self, X_test):
|
||||
@@ -702,6 +705,7 @@ class TransformersEstimator(BaseEstimator):
|
||||
)
|
||||
if self._task == MULTICHOICECLASSIFICATION
|
||||
else None,
|
||||
compute_metrics=self._compute_metrics_by_dataset_name,
|
||||
)
|
||||
return test_dataset, training_args
|
||||
|
||||
|
||||
@@ -11,9 +11,9 @@ def _test_hf_data():
|
||||
from datasets import load_dataset
|
||||
|
||||
try:
|
||||
train_dataset = load_dataset("glue", "mrpc", split="train").to_pandas()
|
||||
dev_dataset = load_dataset("glue", "mrpc", split="validation").to_pandas()
|
||||
test_dataset = load_dataset("glue", "mrpc", split="test").to_pandas()
|
||||
train_dataset = load_dataset("glue", "mrpc", split="train[:1%]").to_pandas()
|
||||
dev_dataset = load_dataset("glue", "mrpc", split="validation[:1%]").to_pandas()
|
||||
test_dataset = load_dataset("glue", "mrpc", split="test[:1%]").to_pandas()
|
||||
except requests.exceptions.ConnectionError:
|
||||
return
|
||||
|
||||
@@ -32,7 +32,7 @@ def _test_hf_data():
|
||||
|
||||
automl_settings = {
|
||||
"gpu_per_trial": 1,
|
||||
"max_iter": 5,
|
||||
"max_iter": 2,
|
||||
"time_budget": 5000,
|
||||
"task": "seq-classification",
|
||||
"metric": "accuracy",
|
||||
|
||||
@@ -17,6 +17,7 @@ def custom_metric(
|
||||
):
|
||||
from datasets import Dataset
|
||||
from flaml.model import TransformersEstimator
|
||||
from flaml.nlp.utils import load_default_huggingface_metric_for_task
|
||||
|
||||
if estimator._trainer is None:
|
||||
estimator._init_model_for_predict(X_test)
|
||||
@@ -31,12 +32,13 @@ def custom_metric(
|
||||
X_test, _ = estimator._preprocess(X_test)
|
||||
eval_dataset = Dataset.from_pandas(X_test)
|
||||
|
||||
trainer_compute_metrics_cache = trainer.compute_metrics
|
||||
trainer.compute_metrics = None
|
||||
estimator_metric_cache = estimator._metric
|
||||
estimator._metric = load_default_huggingface_metric_for_task(estimator._task)
|
||||
|
||||
metrics = trainer.evaluate(eval_dataset)
|
||||
trainer.compute_metrics = trainer_compute_metrics_cache
|
||||
return metrics["eval_loss"], metrics
|
||||
estimator._metric = estimator_metric_cache
|
||||
|
||||
return metrics["eval_val_loss"], metrics
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
||||
|
||||
@@ -38,7 +38,7 @@ def test_regression():
|
||||
"max_iter": 2,
|
||||
"time_budget": 5,
|
||||
"task": "seq-regression",
|
||||
"metric": "rmse",
|
||||
"metric": "pearsonr",
|
||||
"starting_points": {"transformer": {"num_train_epochs": 1}},
|
||||
"use_ray": True,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user