mirror of
https://github.com/microsoft/FLAML.git
synced 2026-02-09 02:09:16 +08:00
update image url (#71)
* update image url * ArffException * OpenMLError is ValueError * CatBoostError * reduce build on push Co-authored-by: Chi Wang (MSR) <wang.chi@microsoft.com>
This commit is contained in:
2
.github/workflows/python-package.yml
vendored
2
.github/workflows/python-package.yml
vendored
@@ -5,7 +5,7 @@ name: Build
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ['*']
|
||||
branches: ['main']
|
||||
pull_request:
|
||||
branches: ['*']
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# FLAML - Fast and Lightweight AutoML
|
||||
|
||||
<p align="center">
|
||||
<img src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/FLAML.png" width=200>
|
||||
<img src="https://github.com/microsoft/FLAML/blob/main/docs/images/FLAML.png" width=200>
|
||||
<br>
|
||||
</p>
|
||||
|
||||
|
||||
113
flaml/model.py
113
flaml/model.py
@@ -103,8 +103,11 @@ class BaseEstimator:
|
||||
A numpy array of shape n*1.
|
||||
Each element is the label for a instance
|
||||
'''
|
||||
X_test = self._preprocess(X_test)
|
||||
return self._model.predict(X_test)
|
||||
if self._model is not None:
|
||||
X_test = self._preprocess(X_test)
|
||||
return self._model.predict(X_test)
|
||||
else:
|
||||
return np.ones(X_test.shape[0])
|
||||
|
||||
def predict_proba(self, X_test):
|
||||
'''Predict the probability of each class from features
|
||||
@@ -663,59 +666,63 @@ class CatBoostEstimator(BaseEstimator):
|
||||
include='category').columns)
|
||||
else:
|
||||
cat_features = []
|
||||
if (not CatBoostEstimator._time_per_iter or abs(
|
||||
CatBoostEstimator._train_size - len(y_train)) > 4) and budget:
|
||||
# measure the time per iteration
|
||||
self.params["n_estimators"] = 1
|
||||
CatBoostEstimator._smallmodel = self.estimator_class(**self.params)
|
||||
CatBoostEstimator._smallmodel.fit(
|
||||
X_train, y_train, cat_features=cat_features, **kwargs)
|
||||
CatBoostEstimator._t1 = time.time() - start_time
|
||||
if CatBoostEstimator._t1 >= budget:
|
||||
self.params["n_estimators"] = n_iter
|
||||
from catboost import CatBoostError
|
||||
try:
|
||||
if (not CatBoostEstimator._time_per_iter or abs(
|
||||
CatBoostEstimator._train_size - len(y_train)) > 4) and budget:
|
||||
# measure the time per iteration
|
||||
self.params["n_estimators"] = 1
|
||||
CatBoostEstimator._smallmodel = self.estimator_class(**self.params)
|
||||
CatBoostEstimator._smallmodel.fit(
|
||||
X_train, y_train, cat_features=cat_features, **kwargs)
|
||||
CatBoostEstimator._t1 = time.time() - start_time
|
||||
if CatBoostEstimator._t1 >= budget:
|
||||
self.params["n_estimators"] = n_iter
|
||||
self._model = CatBoostEstimator._smallmodel
|
||||
return CatBoostEstimator._t1
|
||||
self.params["n_estimators"] = 4
|
||||
CatBoostEstimator._smallmodel = self.estimator_class(**self.params)
|
||||
CatBoostEstimator._smallmodel.fit(
|
||||
X_train, y_train, cat_features=cat_features, **kwargs)
|
||||
CatBoostEstimator._time_per_iter = (
|
||||
time.time() - start_time - CatBoostEstimator._t1) / (
|
||||
self.params["n_estimators"] - 1)
|
||||
if CatBoostEstimator._time_per_iter <= 0:
|
||||
CatBoostEstimator._time_per_iter = CatBoostEstimator._t1
|
||||
CatBoostEstimator._train_size = len(y_train)
|
||||
if time.time() - start_time >= budget or n_iter == self.params[
|
||||
"n_estimators"]:
|
||||
self.params["n_estimators"] = n_iter
|
||||
self._model = CatBoostEstimator._smallmodel
|
||||
return time.time() - start_time
|
||||
if budget:
|
||||
train_times = 1
|
||||
self.params["n_estimators"] = min(n_iter, int(
|
||||
(budget - time.time() + start_time - CatBoostEstimator._t1)
|
||||
/ train_times / CatBoostEstimator._time_per_iter + 1))
|
||||
self._model = CatBoostEstimator._smallmodel
|
||||
return CatBoostEstimator._t1
|
||||
self.params["n_estimators"] = 4
|
||||
CatBoostEstimator._smallmodel = self.estimator_class(**self.params)
|
||||
CatBoostEstimator._smallmodel.fit(
|
||||
X_train, y_train, cat_features=cat_features, **kwargs)
|
||||
CatBoostEstimator._time_per_iter = (
|
||||
time.time() - start_time - CatBoostEstimator._t1) / (
|
||||
self.params["n_estimators"] - 1)
|
||||
if CatBoostEstimator._time_per_iter <= 0:
|
||||
CatBoostEstimator._time_per_iter = CatBoostEstimator._t1
|
||||
CatBoostEstimator._train_size = len(y_train)
|
||||
if time.time() - start_time >= budget or n_iter == self.params[
|
||||
"n_estimators"]:
|
||||
self.params["n_estimators"] = n_iter
|
||||
self._model = CatBoostEstimator._smallmodel
|
||||
return time.time() - start_time
|
||||
if budget:
|
||||
train_times = 1
|
||||
self.params["n_estimators"] = min(n_iter, int(
|
||||
(budget - time.time() + start_time - CatBoostEstimator._t1)
|
||||
/ train_times / CatBoostEstimator._time_per_iter + 1))
|
||||
self._model = CatBoostEstimator._smallmodel
|
||||
if self.params["n_estimators"] > 0:
|
||||
n = max(int(len(y_train) * 0.9), len(y_train) - 1000)
|
||||
X_tr, y_tr = X_train[:n], y_train[:n]
|
||||
if 'sample_weight' in kwargs:
|
||||
weight = kwargs['sample_weight']
|
||||
if self.params["n_estimators"] > 0:
|
||||
n = max(int(len(y_train) * 0.9), len(y_train) - 1000)
|
||||
X_tr, y_tr = X_train[:n], y_train[:n]
|
||||
if 'sample_weight' in kwargs:
|
||||
weight = kwargs['sample_weight']
|
||||
if weight is not None:
|
||||
kwargs['sample_weight'] = weight[:n]
|
||||
else:
|
||||
weight = None
|
||||
from catboost import Pool
|
||||
model = self.estimator_class(**self.params)
|
||||
model.fit(
|
||||
X_tr, y_tr, cat_features=cat_features,
|
||||
eval_set=Pool(
|
||||
data=X_train[n:], label=y_train[n:],
|
||||
cat_features=cat_features),
|
||||
**kwargs) # model.get_best_iteration()
|
||||
if weight is not None:
|
||||
kwargs['sample_weight'] = weight[:n]
|
||||
else:
|
||||
weight = None
|
||||
from catboost import Pool
|
||||
model = self.estimator_class(**self.params)
|
||||
model.fit(
|
||||
X_tr, y_tr, cat_features=cat_features,
|
||||
eval_set=Pool(
|
||||
data=X_train[n:], label=y_train[n:],
|
||||
cat_features=cat_features),
|
||||
**kwargs) # model.get_best_iteration()
|
||||
if weight is not None:
|
||||
kwargs['sample_weight'] = weight
|
||||
self._model = model
|
||||
kwargs['sample_weight'] = weight
|
||||
self._model = model
|
||||
except CatBoostError:
|
||||
self._model = None
|
||||
self.params["n_estimators"] = n_iter
|
||||
train_time = time.time() - start_time
|
||||
return train_time
|
||||
|
||||
@@ -94,7 +94,7 @@ $nnictl create --config ./config.yml
|
||||
## CFO: Frugal Optimization for Cost-related Hyperparameters
|
||||
|
||||
<p align="center">
|
||||
<img src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/CFO.png" width=200>
|
||||
<img src="https://github.com/microsoft/FLAML/blob/main/docs/images/CFO.png" width=200>
|
||||
<br>
|
||||
</p>
|
||||
|
||||
@@ -113,7 +113,7 @@ FLOW<sup>2</sup> only requires pairwise comparisons between function values to p
|
||||
The GIFs attached below demostrates an example search trajectory of FLOW<sup>2</sup> shown in the loss and evaluation cost (i.e., the training time ) space respectively. From the demonstration, we can see that (1) FLOW<sup>2</sup> can quickly move toward the low-loss region, showing good convergence property and (2) FLOW<sup>2</sup> tends to avoid exploring the high-cost region until necessary.
|
||||
|
||||
<p align="center">
|
||||
<img align="center", src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/heatmap_loss_cfo_12s.gif" width=360> <img align="center", src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/heatmap_cost_cfo_12s.gif" width=360>
|
||||
<img align="center", src="https://github.com/microsoft/FLAML/blob/main/docs/images/heatmap_loss_cfo_12s.gif" width=360> <img align="center", src="https://github.com/microsoft/FLAML/blob/main/docs/images/heatmap_cost_cfo_12s.gif" width=360>
|
||||
<br>
|
||||
<figcaption>Figure 1. FLOW<sup>2</sup> in tuning the # of leaves and the # of trees for XGBoost. The two background heatmaps show the loss and cost distribution of all configurations. The black dots are the points evaluated in FLOW<sup>2</sup>. Black dots connected by lines are points that yield better loss performance when evaluated.</figcaption>
|
||||
</p>
|
||||
@@ -136,7 +136,7 @@ using BlendSearch.
|
||||
## BlendSearch: Economical Hyperparameter Optimization With Blended Search Strategy
|
||||
|
||||
<p align="center">
|
||||
<img src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/BlendSearch.png" width=200>
|
||||
<img src="https://github.com/microsoft/FLAML/blob/main/docs/images/BlendSearch.png" width=200>
|
||||
<br>
|
||||
</p>
|
||||
|
||||
|
||||
@@ -42,9 +42,10 @@ def test_simple(method=None):
|
||||
"log_type": "all",
|
||||
"time_budget": 3
|
||||
}
|
||||
from sklearn.externals._arff import ArffException
|
||||
try:
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
except ValueError:
|
||||
except (ArffException, ValueError):
|
||||
from sklearn.datasets import load_wine
|
||||
X, y = load_wine(return_X_y=True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
|
||||
Reference in New Issue
Block a user