Files
FLAML/test/automl/test_regression.py
Li Jiang 1285700d7a Update readme, bump version to 2.4.0, fix CI errors (#1466)
* Update gitignore

* Bump version to 2.4.0

* Update readme

* Pre-download california housing data

* Use pre-downloaded california housing data

* Pin lightning<=2.5.6

* Fix typo in find and replace

* Fix estimators has no attribute __sklearn_tags__

* Pin torch to 2.2.2 in tests

* Fix conflict

* Update pytorch-forecasting

* Update pytorch-forecasting

* Update pytorch-forecasting

* Use numpy<2 for testing

* Update scikit-learn

* Run Build and UT every other day

* Pin pip<24.1

* Pin pip<24.1 in pipeline

* Loosen pip, install pytorch_forecasting only in py311

* Add support to new versions of nlp dependecies

* Fix formats

* Remove redefinition

* Update mlflow versions

* Fix mlflow version syntax

* Update gitignore

* Clean up cache to free space

* Remove clean up action cache

* Fix blendsearch

* Update test workflow

* Update setup.py

* Fix catboost version

* Update workflow

* Prepare for python 3.14

* Support no catboost

* Fix tests

* Fix python_requires

* Update test workflow

* Fix vw tests

* Remove python 3.9

* Fix nlp tests

* Fix prophet

* Print pip freeze for better debugging

* Fix Optuna search does not support parameters of type Float with samplers of type Quantized

* Save dependencies for later inspection

* Fix coverage.xml not exists

* Fix github action permission

* Handle python 3.13

* Address openml is not installed

* Check dependencies before run tests

* Update dependencies

* Fix syntax error

* Use bash

* Update dependencies

* Fix git error

* Loose mlflow constraints

* Add rerun, use mlflow-skinny

* Fix git error

* Remove ray tests

* Update xgboost versions

* Fix automl pickle error

* Don't test python 3.10 on macos as it's stuck

* Rebase before push

* Reduce number of branches
2026-01-09 13:40:52 +08:00

442 lines
16 KiB
Python

import unittest
from test.conftest import evaluate_cv_folds_with_underlying_model
import numpy as np
import pytest
import scipy.sparse
from sklearn.datasets import (
fetch_california_housing,
make_regression,
)
from flaml import AutoML
from flaml.automl.data import get_output_from_log
from flaml.automl.model import XGBoostEstimator
def logregobj(preds, dtrain):
labels = dtrain.get_label()
preds = 1.0 / (1.0 + np.exp(-preds)) # transform raw leaf weight
grad = preds - labels
hess = preds * (1.0 - preds)
return grad, hess
class MyXGB1(XGBoostEstimator):
"""XGBoostEstimator with logregobj as the objective function"""
def __init__(self, **config):
super().__init__(objective=logregobj, **config)
class MyXGB2(XGBoostEstimator):
"""XGBoostEstimator with 'reg:squarederror' as the objective function"""
def __init__(self, **config):
super().__init__(objective="reg:squarederror", **config)
class TestRegression(unittest.TestCase):
def test_regression(self):
automl = AutoML()
automl_settings = {
"time_budget": 2,
"task": "regression",
"log_file_name": "test/california.log",
"log_training_metric": True,
"n_jobs": 1,
"model_history": True,
}
X_train, y_train = fetch_california_housing(return_X_y=True, data_home="test")
n = int(len(y_train) * 9 // 10)
automl.fit(X_train=X_train[:n], y_train=y_train[:n], X_val=X_train[n:], y_val=y_train[n:], **automl_settings)
assert automl._state.eval_method == "holdout"
y_pred = automl.predict(X_train)
print(y_pred)
print(automl.model.estimator)
n_iter = automl.model.estimator.get_params("n_estimators")
print(automl.config_history)
print(automl.best_model_for_estimator("xgboost"))
print(automl.best_iteration)
print(automl.best_estimator)
print(get_output_from_log(automl_settings["log_file_name"], 1))
automl.retrain_from_log(
task="regression",
log_file_name=automl_settings["log_file_name"],
X_train=X_train,
y_train=y_train,
train_full=True,
time_budget=1,
)
automl.retrain_from_log(
task="regression",
log_file_name=automl_settings["log_file_name"],
X_train=X_train,
y_train=y_train,
time_budget=0,
)
automl = AutoML()
automl.retrain_from_log(
task="regression",
log_file_name=automl_settings["log_file_name"],
X_train=X_train[:n],
y_train=y_train[:n],
train_full=True,
)
print(automl.model.estimator)
y_pred2 = automl.predict(X_train)
# In some rare case, the last config is early stopped and it's the best config. But the logged config's n_estimator is not reduced.
assert n_iter != automl.model.estimator.get_params("n_estimator") or (y_pred == y_pred2).all()
def test_sparse_matrix_regression(self):
X_train = scipy.sparse.random(300, 900, density=0.0001)
y_train = np.random.uniform(size=300)
X_val = scipy.sparse.random(100, 900, density=0.0001)
y_val = np.random.uniform(size=100)
automl = AutoML()
settings = {
"time_budget": 2,
"metric": "mae",
"task": "regression",
"log_file_name": "test/sparse_regression.log",
"n_jobs": 1,
"model_history": True,
"keep_search_state": True,
"verbose": 0,
"early_stop": True,
}
automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **settings)
assert automl._state.X_val.shape == X_val.shape
print(automl.predict(X_train))
print(automl.model)
print(automl.config_history)
print(automl.best_model_for_estimator("rf"))
print(automl.best_iteration)
print(automl.best_estimator)
print(automl.best_config)
print(automl.best_loss)
print(automl.best_config_train_time)
settings.update(
{
"estimator_list": ["catboost"],
"keep_search_state": False,
"model_history": False,
"use_best_model": False,
"time_budget": None,
"max_iter": 2,
"custom_hp": {"catboost": {"n_estimators": {"domain": 100}}},
}
)
automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **settings)
def test_parallel(self, hpo_method=None):
automl_experiment = AutoML()
automl_settings = {
"time_budget": 10,
"task": "regression",
"log_file_name": "test/california.log",
"log_type": "all",
"n_jobs": 1,
"n_concurrent_trials": 10,
"hpo_method": hpo_method,
}
X_train, y_train = fetch_california_housing(return_X_y=True, data_home="test")
try:
automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
print(automl_experiment.predict(X_train))
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.best_model_for_estimator("xgboost"))
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
except ImportError:
return
def test_sparse_matrix_regression_holdout(self):
X_train = scipy.sparse.random(8, 100)
y_train = np.random.uniform(size=8)
automl_experiment = AutoML()
automl_settings = {
"time_budget": 1,
"eval_method": "holdout",
"task": "regression",
"log_file_name": "test/sparse_regression.log",
"n_jobs": 1,
"model_history": True,
"metric": "mse",
"sample_weight": np.ones(len(y_train)),
"early_stop": True,
}
automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
print(automl_experiment.predict(X_train))
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.best_model_for_estimator("rf"))
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
def test_regression_xgboost(self):
X_train = scipy.sparse.random(300, 900, density=0.0001)
y_train = np.random.uniform(size=300)
X_val = scipy.sparse.random(100, 900, density=0.0001)
y_val = np.random.uniform(size=100)
automl_experiment = AutoML()
automl_experiment.add_learner(learner_name="my_xgb1", learner_class=MyXGB1)
automl_experiment.add_learner(learner_name="my_xgb2", learner_class=MyXGB2)
automl_settings = {
"time_budget": 2,
"estimator_list": ["my_xgb1", "my_xgb2"],
"task": "regression",
"log_file_name": "test/regression_xgboost.log",
"n_jobs": 1,
"model_history": True,
"keep_search_state": True,
"early_stop": True,
}
automl_experiment.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
assert automl_experiment._state.X_val.shape == X_val.shape
print(automl_experiment.predict(X_train))
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.best_model_for_estimator("my_xgb2"))
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
print(automl_experiment.best_config)
print(automl_experiment.best_loss)
print(automl_experiment.best_config_train_time)
def test_multioutput():
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor, RegressorChain
# create regression data
X, y = make_regression(n_targets=3)
# split into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
# train the model
model = MultiOutputRegressor(AutoML(task="regression", time_budget=1))
model.fit(X_train, y_train)
# predict
print(model.predict(X_test))
# train the model
model = RegressorChain(AutoML(task="regression", time_budget=1))
model.fit(X_train, y_train)
# predict
print(model.predict(X_test))
@pytest.mark.parametrize(
"estimator",
[
"catboost",
"enet",
"extra_tree",
"histgb",
"kneighbor",
"lgbm",
"rf",
"xgboost",
"xgb_limitdepth",
],
)
def test_reproducibility_of_regression_models(estimator: str):
"""FLAML finds the best model for a given dataset, which it then provides to users.
However, there are reported issues where FLAML was providing an incorrect model - see here:
https://github.com/microsoft/FLAML/issues/1317
In this test we take the best regression model which FLAML provided us, and then retrain and test it on the
same folds, to verify that the result is reproducible.
"""
automl = AutoML()
automl_settings = {
"max_iter": 2,
"time_budget": -1,
"task": "regression",
"n_jobs": 1,
"estimator_list": [estimator],
"eval_method": "cv",
"n_splits": 3,
"metric": "r2",
"keep_search_state": True,
"skip_transform": True,
"retrain_full": True,
}
X, y = fetch_california_housing(return_X_y=True, as_frame=True, data_home="test")
automl.fit(X_train=X, y_train=y, **automl_settings)
best_model = automl.model
assert best_model is not None
config = best_model.get_params()
val_loss_flaml = automl.best_result["val_loss"]
# Take the best model, and see if we can reproduce the best result
reproduced_val_loss, metric_for_logging, train_time, pred_time = automl._state.task.evaluate_model_CV(
config=config,
estimator=best_model,
X_train_all=automl._state.X_train_all,
y_train_all=automl._state.y_train_all,
budget=None,
kf=automl._state.kf,
eval_metric="r2",
best_val_loss=None,
cv_score_agg_func=None,
log_training_metric=False,
fit_kwargs=None,
free_mem_ratio=0,
)
assert pytest.approx(val_loss_flaml) == reproduced_val_loss
def test_reproducibility_of_catboost_regression_model():
"""FLAML finds the best model for a given dataset, which it then provides to users.
However, there are reported issues around the catboost model - see here:
https://github.com/microsoft/FLAML/issues/1317
In this test we take the best catboost regression model which FLAML provided us, and then retrain and test it on the
same folds, to verify that the result is reproducible.
"""
automl = AutoML()
automl_settings = {
"time_budget": 7,
"task": "regression",
"n_jobs": 1,
"estimator_list": ["catboost"],
"eval_method": "cv",
"n_splits": 10,
"metric": "r2",
"keep_search_state": True,
"skip_transform": True,
"retrain_full": True,
}
X, y = fetch_california_housing(return_X_y=True, as_frame=True, data_home="test")
automl.fit(X_train=X, y_train=y, **automl_settings)
best_model = automl.model
assert best_model is not None
config = best_model.get_params()
val_loss_flaml = automl.best_result["val_loss"]
# Take the best model, and see if we can reproduce the best result
reproduced_val_loss, metric_for_logging, train_time, pred_time = automl._state.task.evaluate_model_CV(
config=config,
estimator=best_model,
X_train_all=automl._state.X_train_all,
y_train_all=automl._state.y_train_all,
budget=None,
kf=automl._state.kf,
eval_metric="r2",
best_val_loss=None,
cv_score_agg_func=None,
log_training_metric=False,
fit_kwargs=None,
free_mem_ratio=0,
)
assert pytest.approx(val_loss_flaml) == reproduced_val_loss
def test_reproducibility_of_lgbm_regression_model():
"""FLAML finds the best model for a given dataset, which it then provides to users.
However, there are reported issues around LGBMs - see here:
https://github.com/microsoft/FLAML/issues/1368
In this test we take the best LGBM regression model which FLAML provided us, and then retrain and test it on the
same folds, to verify that the result is reproducible.
"""
automl = AutoML()
automl_settings = {
"time_budget": 3,
"task": "regression",
"n_jobs": 1,
"estimator_list": ["lgbm"],
"eval_method": "cv",
"n_splits": 9,
"metric": "r2",
"keep_search_state": True,
"skip_transform": True,
"retrain_full": True,
}
X, y = fetch_california_housing(return_X_y=True, as_frame=True, data_home="test")
automl.fit(X_train=X, y_train=y, **automl_settings)
best_model = automl.model
assert best_model is not None
config = best_model.get_params()
val_loss_flaml = automl.best_result["val_loss"]
# Take the best model, and see if we can reproduce the best result
reproduced_val_loss, metric_for_logging, train_time, pred_time = automl._state.task.evaluate_model_CV(
config=config,
estimator=best_model,
X_train_all=automl._state.X_train_all,
y_train_all=automl._state.y_train_all,
budget=None,
kf=automl._state.kf,
eval_metric="r2",
best_val_loss=None,
cv_score_agg_func=None,
log_training_metric=False,
fit_kwargs=None,
free_mem_ratio=0,
)
assert pytest.approx(val_loss_flaml) == reproduced_val_loss or val_loss_flaml > reproduced_val_loss
@pytest.mark.parametrize(
"estimator",
[
"catboost",
"enet",
"extra_tree",
"histgb",
"kneighbor",
"lgbm",
"rf",
"xgboost",
"xgb_limitdepth",
],
)
def test_reproducibility_of_underlying_regression_models(estimator: str):
"""FLAML finds the best model for a given dataset, which it then provides to users.
However, there are reported issues where FLAML was providing an incorrect model - see here:
https://github.com/microsoft/FLAML/issues/1317
FLAML defines FLAMLised models, which wrap around the underlying (SKLearn/XGBoost/CatBoost) model.
Ideally, FLAMLised models should perform identically to the underlying model, when fitted
to the same data, with no budget. This verifies that this is the case for regression models.
In this test we take the best model which FLAML provided us, extract the underlying model,
before retraining and testing it on the same folds - to verify that the result is reproducible.
"""
automl = AutoML()
automl_settings = {
"max_iter": 5,
"time_budget": -1,
"task": "regression",
"n_jobs": 1,
"estimator_list": [estimator],
"eval_method": "cv",
"n_splits": 10,
"metric": "r2",
"keep_search_state": True,
"skip_transform": True,
"retrain_full": False,
}
X, y = fetch_california_housing(return_X_y=True, as_frame=True, data_home="test")
automl.fit(X_train=X, y_train=y, **automl_settings)
best_model = automl.model
assert best_model is not None
val_loss_flaml = automl.best_result["val_loss"]
reproduced_val_loss_underlying_model = np.mean(
evaluate_cv_folds_with_underlying_model(
automl._state.X_train_all, automl._state.y_train_all, automl._state.kf, best_model.model, "regression"
)
)
assert pytest.approx(val_loss_flaml) == reproduced_val_loss_underlying_model
if __name__ == "__main__":
unittest.main()