Files
FLAML/test/automl/test_python_log.py
Li Jiang 1285700d7a Update readme, bump version to 2.4.0, fix CI errors (#1466)
* Update gitignore

* Bump version to 2.4.0

* Update readme

* Pre-download california housing data

* Use pre-downloaded california housing data

* Pin lightning<=2.5.6

* Fix typo in find and replace

* Fix estimators has no attribute __sklearn_tags__

* Pin torch to 2.2.2 in tests

* Fix conflict

* Update pytorch-forecasting

* Update pytorch-forecasting

* Update pytorch-forecasting

* Use numpy<2 for testing

* Update scikit-learn

* Run Build and UT every other day

* Pin pip<24.1

* Pin pip<24.1 in pipeline

* Loosen pip, install pytorch_forecasting only in py311

* Add support to new versions of nlp dependecies

* Fix formats

* Remove redefinition

* Update mlflow versions

* Fix mlflow version syntax

* Update gitignore

* Clean up cache to free space

* Remove clean up action cache

* Fix blendsearch

* Update test workflow

* Update setup.py

* Fix catboost version

* Update workflow

* Prepare for python 3.14

* Support no catboost

* Fix tests

* Fix python_requires

* Update test workflow

* Fix vw tests

* Remove python 3.9

* Fix nlp tests

* Fix prophet

* Print pip freeze for better debugging

* Fix Optuna search does not support parameters of type Float with samplers of type Quantized

* Save dependencies for later inspection

* Fix coverage.xml not exists

* Fix github action permission

* Handle python 3.13

* Address openml is not installed

* Check dependencies before run tests

* Update dependencies

* Fix syntax error

* Use bash

* Update dependencies

* Fix git error

* Loose mlflow constraints

* Add rerun, use mlflow-skinny

* Fix git error

* Remove ray tests

* Update xgboost versions

* Fix automl pickle error

* Don't test python 3.10 on macos as it's stuck

* Rebase before push

* Reduce number of branches
2026-01-09 13:40:52 +08:00

122 lines
4.6 KiB
Python

import io
import logging
import os
import tempfile
import unittest
from sklearn.datasets import fetch_california_housing
from flaml import AutoML
from flaml.tune.space import unflatten_hierarchical
class TestLogging(unittest.TestCase):
def test_logging_level(self):
from flaml import logger, logger_formatter
with tempfile.TemporaryDirectory() as d:
training_log = os.path.join(d, "training.log")
# Configure logging for the FLAML logger
# and add a handler that outputs to a buffer.
logger.setLevel(logging.INFO)
buf = io.StringIO()
ch = logging.StreamHandler(buf)
ch.setFormatter(logger_formatter)
logger.addHandler(ch)
# Run a simple job.
automl = AutoML()
automl_settings = {
"time_budget": 1,
"metric": "rmse",
"task": "regression",
"log_file_name": training_log,
"log_training_metric": True,
"n_jobs": 1,
"model_history": True,
"keep_search_state": True,
"learner_selector": "roundrobin",
}
X_train, y_train = fetch_california_housing(return_X_y=True, data_home="test")
n = len(y_train) >> 1
print(automl.model, automl.classes_, automl.predict(X_train))
automl.fit(
X_train=X_train[:n], y_train=y_train[:n], X_val=X_train[n:], y_val=y_train[n:], **automl_settings
)
logger.info(automl.search_space)
logger.info(automl.low_cost_partial_config)
logger.info(automl.points_to_evaluate)
logger.info(automl.cat_hp_cost)
import optuna as ot
study = ot.create_study()
from flaml.tune.space import add_cost_to_space, define_by_run_func
sample = define_by_run_func(study.ask(), automl.search_space)
logger.info(sample)
logger.info(unflatten_hierarchical(sample, automl.search_space))
add_cost_to_space(automl.search_space, automl.low_cost_partial_config, automl.cat_hp_cost)
logger.info(automl.search_space["ml"].categories)
if automl.best_config:
config = automl.best_config.copy()
config["learner"] = automl.best_estimator
automl.trainable({"ml": config})
from functools import partial
from flaml import BlendSearch, tune
from flaml.automl import size
low_cost_partial_config = automl.low_cost_partial_config
search_alg = BlendSearch(
metric="val_loss",
mode="min",
space=automl.search_space,
low_cost_partial_config=low_cost_partial_config,
points_to_evaluate=automl.points_to_evaluate,
cat_hp_cost=automl.cat_hp_cost,
resource_attr=automl.resource_attr,
min_resource=automl.min_resource,
max_resource=automl.max_resource,
config_constraints=[
(
partial(size, automl._state.learner_classes),
"<=",
automl._mem_thres,
)
],
metric_constraints=automl.metric_constraints,
)
analysis = tune.run(
automl.trainable,
search_alg=search_alg, # verbose=2,
time_budget_s=1,
num_samples=-1,
)
print(min(trial.last_result["val_loss"] for trial in analysis.trials))
config = analysis.trials[-1].last_result["config"]["ml"]
automl._state._train_with_config(config.pop("learner"), config)
for _ in range(3):
print(
search_alg._ls.complete_config(
low_cost_partial_config,
search_alg._ls_bound_min,
search_alg._ls_bound_max,
)
)
# Check if the log buffer is populated.
self.assertTrue(len(buf.getvalue()) > 0)
import pickle
with open("automl.pkl", "wb") as f:
pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
print(automl.__version__)
pred1 = automl.predict(X_train)
with open("automl.pkl", "rb") as f:
automl = pickle.load(f)
pred2 = automl.predict(X_train)
delta = pred1 - pred2
assert max(delta) == 0 and min(delta) == 0
automl.save_best_config("test/housing.json")