Files
FLAML/test/tune_example.py
Li Jiang 1285700d7a Update readme, bump version to 2.4.0, fix CI errors (#1466)
* Update gitignore

* Bump version to 2.4.0

* Update readme

* Pre-download california housing data

* Use pre-downloaded california housing data

* Pin lightning<=2.5.6

* Fix typo in find and replace

* Fix estimators has no attribute __sklearn_tags__

* Pin torch to 2.2.2 in tests

* Fix conflict

* Update pytorch-forecasting

* Update pytorch-forecasting

* Update pytorch-forecasting

* Use numpy<2 for testing

* Update scikit-learn

* Run Build and UT every other day

* Pin pip<24.1

* Pin pip<24.1 in pipeline

* Loosen pip, install pytorch_forecasting only in py311

* Add support to new versions of nlp dependecies

* Fix formats

* Remove redefinition

* Update mlflow versions

* Fix mlflow version syntax

* Update gitignore

* Clean up cache to free space

* Remove clean up action cache

* Fix blendsearch

* Update test workflow

* Update setup.py

* Fix catboost version

* Update workflow

* Prepare for python 3.14

* Support no catboost

* Fix tests

* Fix python_requires

* Update test workflow

* Fix vw tests

* Remove python 3.9

* Fix nlp tests

* Fix prophet

* Print pip freeze for better debugging

* Fix Optuna search does not support parameters of type Float with samplers of type Quantized

* Save dependencies for later inspection

* Fix coverage.xml not exists

* Fix github action permission

* Handle python 3.13

* Address openml is not installed

* Check dependencies before run tests

* Update dependencies

* Fix syntax error

* Use bash

* Update dependencies

* Fix git error

* Loose mlflow constraints

* Add rerun, use mlflow-skinny

* Fix git error

* Remove ray tests

* Update xgboost versions

* Fix automl pickle error

* Don't test python 3.10 on macos as it's stuck

* Rebase before push

* Reduce number of branches
2026-01-09 13:40:52 +08:00

66 lines
2.6 KiB
Python

import lightgbm
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from flaml import tune
from flaml.automl.model import LGBMEstimator
data = fetch_california_housing(return_X_y=False, as_frame=True, data_home="test")
df, X, y = data.frame, data.data, data.target
df_train, _, X_train, X_test, _, y_test = train_test_split(df, X, y, test_size=0.33, random_state=42)
csv_file_name = "test/housing.csv"
df_train.to_csv(csv_file_name, index=False)
# X, y = fetch_california_housing(return_X_y=True, as_frame=True, data_home="test")
# X_train, X_test, y_train, y_test = train_test_split(
# X, y, test_size=0.33, random_state=42
# )
def train_lgbm(config: dict) -> dict:
# convert config dict to lgbm params
params = LGBMEstimator(**config).params
# train the model
# train_set = lightgbm.Dataset(X_train, y_train)
# LightGBM only accepts the csv with valid number format, if even these string columns are set to ignore.
train_set = lightgbm.Dataset(csv_file_name, params={"label_column": "name:MedHouseVal", "header": True})
model = lightgbm.train(params, train_set)
# evaluate the model
pred = model.predict(X_test)
mse = mean_squared_error(y_test, pred)
# return eval results as a dictionary
return {"mse": mse}
def test_tune_lgbm_csv():
# load a built-in search space from flaml
flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape)
# specify the search space as a dict from hp name to domain; you can define your own search space same way
config_search_space = {hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()}
# give guidance about hp values corresponding to low training cost, i.e., {"n_estimators": 4, "num_leaves": 4}
low_cost_partial_config = {
hp: space["low_cost_init_value"]
for hp, space in flaml_lgbm_search_space.items()
if "low_cost_init_value" in space
}
# initial points to evaluate
points_to_evaluate = [
{hp: space["init_value"] for hp, space in flaml_lgbm_search_space.items() if "init_value" in space}
]
# run the tuning, minimizing mse, with total time budget 3 seconds
analysis = tune.run(
train_lgbm,
metric="mse",
mode="min",
config=config_search_space,
low_cost_partial_config=low_cost_partial_config,
points_to_evaluate=points_to_evaluate,
time_budget_s=3,
num_samples=-1,
)
print(analysis.best_result)
if __name__ == "__main__":
test_tune_lgbm_csv()