remove big objects after fit (#176)

* remove big objects after fit

* xgboost>1.3.3 has a weird auc socre on:
kr-vs-kp, fold 5, 1h1c

* keep_search_state
This commit is contained in:
Chi Wang
2021-08-26 13:45:13 -07:00
committed by GitHub
parent a229a6112a
commit 1bc8786dcb
7 changed files with 22 additions and 6 deletions

View File

@@ -129,7 +129,7 @@ More examples can be found in [notebooks](https://github.com/microsoft/FLAML/tre
Please find the API documentation [here](https://microsoft.github.io/FLAML/).
Please find demo and tutorials of FLAML [here](https://www.youtube.com/channel/UCfU0zfFXHXdAd5x-WvFBk5A)
Please find demo and tutorials of FLAML [here](https://www.youtube.com/channel/UCfU0zfFXHXdAd5x-WvFBk5A).
For more technical details, please check our papers.
@@ -192,7 +192,6 @@ If all the tests are passed, please also test run notebook/flaml_automl to make
Contributors (alphabetical order): Amir Aghaei, Vijay Aski, Sebastien Bubeck, Surajit Chaudhuri, Nadiia Chepurko, Ofer Dekel, Alex Deng, Anshuman Dutt, Nicolo Fusi, Jianfeng Gao, Johannes Gehrke, Niklas Gustafsson, Silu Huang, Dongwoo Kim, Christian Konig, John Langford, Menghao Li, Mingqin Li, Zhe Liu, Naveen Gaur, Paul Mineiro, Vivek Narasayya, Jake Radzikowski, Marco Rossi, Amin Saied, Neil Tenenholtz, Olga Vrousgou, Markus Weimer, Yue Wang, Qingyun Wu, Qiufeng Yin, Haozhe Zhang, Minjia Zhang, XiaoYun Zhang, Eric Zhu, and open-source contributors.
## License
[MIT License](LICENSE)

View File

@@ -1074,6 +1074,7 @@ class AutoML:
starting_points={},
seed=None,
n_concurrent_trials=1,
keep_search_state=False,
**fit_kwargs):
'''Find a model for a given task
@@ -1169,6 +1170,9 @@ class AutoML:
n_concurrent_trials: [Experimental] int, default=1 | The number of
concurrent trials. For n_concurrent_trials > 1, installation of
ray is required: `pip install flaml[ray]`.
keep_search_state: boolean, default=False | Whether to keep search
state after fit(). By default the state is deleted for space
saving.
**fit_kwargs: Other key word arguments to pass to fit() function of
the searched learners, such as sample_weight. Include period as
a key word argument for 'forecast' task.
@@ -1306,6 +1310,15 @@ class AutoML:
"search converged. Consider increasing the time budget.".format(
self._time_taken_best_iter / time_budget * 100))
if not keep_search_state:
# release space
del self._X_train_all, self._y_train_all, self._state.kf
del self._state.X_train, self._state.X_train_all, self._state.X_val
del self._state.y_train, self._state.y_train_all, self._state.y_val
del self._sample_weight_full, self._state.fit_kwargs, self._state.groups
for state in self._search_states.values():
if state.trained_estimator:
del state.trained_estimator
if verbose == 0:
logger.setLevel(old_level)

View File

@@ -1 +1 @@
__version__ = "0.6.0"
__version__ = "0.6.1"

View File

@@ -16,10 +16,10 @@ __version__ = version["__version__"]
install_requires = [
"NumPy>=1.16.2",
"lightgbm>=2.3.1",
"xgboost>=0.90",
"xgboost>=0.90,<=1.3.3",
"scipy>=1.4.1",
"catboost>=0.23",
"scikit-learn>=0.23.2",
"scikit-learn>=0.24",
],

View File

@@ -457,6 +457,7 @@ class TestAutoML(unittest.TestCase):
"log_file_name": "test/sparse_regression.log",
"n_jobs": 1,
"model_history": True,
"keep_search_state": True,
"verbose": 0,
}
automl_experiment.fit(X_train=X_train, y_train=y_train,
@@ -615,6 +616,7 @@ class TestAutoML(unittest.TestCase):
"log_file_name": 'test/regression_xgboost.log',
"n_jobs": 1,
"model_history": True,
"keep_search_state": True,
}
automl_experiment.fit(X_train=X_train, y_train=y_train,
X_val=X_val, y_val=y_val,

View File

@@ -36,6 +36,7 @@ class TestLogging(unittest.TestCase):
"log_training_metric": True,
"n_jobs": 1,
"model_history": True,
"keep_search_state": True,
"learner_selector": "roundrobin",
}
X_train, y_train = load_boston(return_X_y=True)
@@ -78,7 +79,7 @@ class TestLogging(unittest.TestCase):
analysis = tune.run(
automl.trainable, search_alg=search_alg, # verbose=2,
time_budget_s=1, num_samples=-1)
print(min((trial.last_result["val_loss"], trial.last_result)
print(min(trial.last_result["val_loss"]
for trial in analysis.trials))
# Check if the log buffer is populated.
self.assertTrue(len(buf.getvalue()) > 0)

View File

@@ -41,6 +41,7 @@ def test_simple(method=None):
"hpo_method": method,
"log_type": "all",
"retrain_full": "budget",
"keep_search_state": True,
"time_budget": 1
}
from sklearn.externals._arff import ArffException