remove big objects after fit (#176)

* remove big objects after fit * xgboost>1.3.3 has a weird auc socre on: kr-vs-kp, fold 5, 1h1c * keep_search_state
2026-02-09 02:09:16 +08:00 · 2021-08-26 13:45:13 -07:00
parent a229a6112a
commit 1bc8786dcb
7 changed files with 22 additions and 6 deletions
--- a/README.md
+++ b/README.md
@@ -129,7 +129,7 @@ More examples can be found in [notebooks](https://github.com/microsoft/FLAML/tre

 Please find the API documentation [here](https://microsoft.github.io/FLAML/).

-Please find demo and tutorials of FLAML [here](https://www.youtube.com/channel/UCfU0zfFXHXdAd5x-WvFBk5A)
+Please find demo and tutorials of FLAML [here](https://www.youtube.com/channel/UCfU0zfFXHXdAd5x-WvFBk5A).

 For more technical details, please check our papers.

@@ -192,7 +192,6 @@ If all the tests are passed, please also test run notebook/flaml_automl to make

 Contributors (alphabetical order): Amir Aghaei, Vijay Aski, Sebastien Bubeck, Surajit Chaudhuri, Nadiia Chepurko, Ofer Dekel, Alex Deng, Anshuman Dutt, Nicolo Fusi, Jianfeng Gao, Johannes Gehrke, Niklas Gustafsson, Silu Huang, Dongwoo Kim, Christian Konig, John Langford, Menghao Li, Mingqin Li, Zhe Liu, Naveen Gaur, Paul Mineiro, Vivek Narasayya, Jake Radzikowski, Marco Rossi, Amin Saied, Neil Tenenholtz, Olga Vrousgou, Markus Weimer, Yue Wang, Qingyun Wu, Qiufeng Yin, Haozhe Zhang, Minjia Zhang, XiaoYun Zhang, Eric Zhu, and open-source contributors.

-
 ## License

 [MIT License](LICENSE)
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -1074,6 +1074,7 @@ class AutoML:
            starting_points={},
            seed=None,
            n_concurrent_trials=1,
+            keep_search_state=False,
            **fit_kwargs):
        '''Find a model for a given task

@@ -1169,6 +1170,9 @@ class AutoML:
            n_concurrent_trials: [Experimental] int, default=1 | The number of
                concurrent trials. For n_concurrent_trials > 1, installation of
                ray is required: `pip install flaml[ray]`.
+            keep_search_state: boolean, default=False | Whether to keep search
+                state after fit(). By default the state is deleted for space
+                saving.
            **fit_kwargs: Other key word arguments to pass to fit() function of
                the searched learners, such as sample_weight. Include period as
                a key word argument for 'forecast' task.
@@ -1306,6 +1310,15 @@ class AutoML:
                            "search converged. Consider increasing the time budget.".format(
                                self._time_taken_best_iter / time_budget * 100))

+        if not keep_search_state:
+            # release space
+            del self._X_train_all, self._y_train_all, self._state.kf
+            del self._state.X_train, self._state.X_train_all, self._state.X_val
+            del self._state.y_train, self._state.y_train_all, self._state.y_val
+            del self._sample_weight_full, self._state.fit_kwargs, self._state.groups
+            for state in self._search_states.values():
+                if state.trained_estimator:
+                    del state.trained_estimator
        if verbose == 0:
            logger.setLevel(old_level)

--- a/flaml/version.py
+++ b/flaml/version.py
@@ -1 +1 @@
-__version__ = "0.6.0"
+__version__ = "0.6.1"
--- a/setup.py
+++ b/setup.py
@@ -16,10 +16,10 @@ __version__ = version["__version__"]
 install_requires = [
    "NumPy>=1.16.2",
    "lightgbm>=2.3.1",
-    "xgboost>=0.90",
+    "xgboost>=0.90,<=1.3.3",
    "scipy>=1.4.1",
    "catboost>=0.23",
-    "scikit-learn>=0.23.2",
+    "scikit-learn>=0.24",
 ],


--- a/test/test_automl.py
+++ b/test/test_automl.py
@@ -457,6 +457,7 @@ class TestAutoML(unittest.TestCase):
            "log_file_name": "test/sparse_regression.log",
            "n_jobs": 1,
            "model_history": True,
+            "keep_search_state": True,
            "verbose": 0,
        }
        automl_experiment.fit(X_train=X_train, y_train=y_train,
@@ -615,6 +616,7 @@ class TestAutoML(unittest.TestCase):
            "log_file_name": 'test/regression_xgboost.log',
            "n_jobs": 1,
            "model_history": True,
+            "keep_search_state": True,
        }
        automl_experiment.fit(X_train=X_train, y_train=y_train,
                              X_val=X_val, y_val=y_val,
--- a/test/test_python_log.py
+++ b/test/test_python_log.py
@@ -36,6 +36,7 @@ class TestLogging(unittest.TestCase):
                "log_training_metric": True,
                "n_jobs": 1,
                "model_history": True,
+                "keep_search_state": True,
                "learner_selector": "roundrobin",
            }
            X_train, y_train = load_boston(return_X_y=True)
@@ -78,7 +79,7 @@ class TestLogging(unittest.TestCase):
            analysis = tune.run(
                automl.trainable, search_alg=search_alg,    # verbose=2,
                time_budget_s=1, num_samples=-1)
-            print(min((trial.last_result["val_loss"], trial.last_result)
+            print(min(trial.last_result["val_loss"]
                      for trial in analysis.trials))
            # Check if the log buffer is populated.
            self.assertTrue(len(buf.getvalue()) > 0)
--- a/test/test_xgboost2d.py
+++ b/test/test_xgboost2d.py
@@ -41,6 +41,7 @@ def test_simple(method=None):
        "hpo_method": method,
        "log_type": "all",
        "retrain_full": "budget",
+        "keep_search_state": True,
        "time_budget": 1
    }
    from sklearn.externals._arff import ArffException