update image url (#71)

* update image url

* ArffException

* OpenMLError is ValueError

* CatBoostError

* reduce build on push

Co-authored-by: Chi Wang (MSR) <wang.chi@microsoft.com>
This commit is contained in:
Qingyun Wu
2021-04-21 04:36:06 -04:00
committed by GitHub
parent d08bb15475
commit f4f3f4f17b
5 changed files with 67 additions and 59 deletions

View File

@@ -5,7 +5,7 @@ name: Build
on:
push:
branches: ['*']
branches: ['main']
pull_request:
branches: ['*']

View File

@@ -7,7 +7,7 @@
# FLAML - Fast and Lightweight AutoML
<p align="center">
<img src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/FLAML.png" width=200>
<img src="https://github.com/microsoft/FLAML/blob/main/docs/images/FLAML.png" width=200>
<br>
</p>

View File

@@ -103,8 +103,11 @@ class BaseEstimator:
A numpy array of shape n*1.
Each element is the label for a instance
'''
X_test = self._preprocess(X_test)
return self._model.predict(X_test)
if self._model is not None:
X_test = self._preprocess(X_test)
return self._model.predict(X_test)
else:
return np.ones(X_test.shape[0])
def predict_proba(self, X_test):
'''Predict the probability of each class from features
@@ -663,59 +666,63 @@ class CatBoostEstimator(BaseEstimator):
include='category').columns)
else:
cat_features = []
if (not CatBoostEstimator._time_per_iter or abs(
CatBoostEstimator._train_size - len(y_train)) > 4) and budget:
# measure the time per iteration
self.params["n_estimators"] = 1
CatBoostEstimator._smallmodel = self.estimator_class(**self.params)
CatBoostEstimator._smallmodel.fit(
X_train, y_train, cat_features=cat_features, **kwargs)
CatBoostEstimator._t1 = time.time() - start_time
if CatBoostEstimator._t1 >= budget:
self.params["n_estimators"] = n_iter
from catboost import CatBoostError
try:
if (not CatBoostEstimator._time_per_iter or abs(
CatBoostEstimator._train_size - len(y_train)) > 4) and budget:
# measure the time per iteration
self.params["n_estimators"] = 1
CatBoostEstimator._smallmodel = self.estimator_class(**self.params)
CatBoostEstimator._smallmodel.fit(
X_train, y_train, cat_features=cat_features, **kwargs)
CatBoostEstimator._t1 = time.time() - start_time
if CatBoostEstimator._t1 >= budget:
self.params["n_estimators"] = n_iter
self._model = CatBoostEstimator._smallmodel
return CatBoostEstimator._t1
self.params["n_estimators"] = 4
CatBoostEstimator._smallmodel = self.estimator_class(**self.params)
CatBoostEstimator._smallmodel.fit(
X_train, y_train, cat_features=cat_features, **kwargs)
CatBoostEstimator._time_per_iter = (
time.time() - start_time - CatBoostEstimator._t1) / (
self.params["n_estimators"] - 1)
if CatBoostEstimator._time_per_iter <= 0:
CatBoostEstimator._time_per_iter = CatBoostEstimator._t1
CatBoostEstimator._train_size = len(y_train)
if time.time() - start_time >= budget or n_iter == self.params[
"n_estimators"]:
self.params["n_estimators"] = n_iter
self._model = CatBoostEstimator._smallmodel
return time.time() - start_time
if budget:
train_times = 1
self.params["n_estimators"] = min(n_iter, int(
(budget - time.time() + start_time - CatBoostEstimator._t1)
/ train_times / CatBoostEstimator._time_per_iter + 1))
self._model = CatBoostEstimator._smallmodel
return CatBoostEstimator._t1
self.params["n_estimators"] = 4
CatBoostEstimator._smallmodel = self.estimator_class(**self.params)
CatBoostEstimator._smallmodel.fit(
X_train, y_train, cat_features=cat_features, **kwargs)
CatBoostEstimator._time_per_iter = (
time.time() - start_time - CatBoostEstimator._t1) / (
self.params["n_estimators"] - 1)
if CatBoostEstimator._time_per_iter <= 0:
CatBoostEstimator._time_per_iter = CatBoostEstimator._t1
CatBoostEstimator._train_size = len(y_train)
if time.time() - start_time >= budget or n_iter == self.params[
"n_estimators"]:
self.params["n_estimators"] = n_iter
self._model = CatBoostEstimator._smallmodel
return time.time() - start_time
if budget:
train_times = 1
self.params["n_estimators"] = min(n_iter, int(
(budget - time.time() + start_time - CatBoostEstimator._t1)
/ train_times / CatBoostEstimator._time_per_iter + 1))
self._model = CatBoostEstimator._smallmodel
if self.params["n_estimators"] > 0:
n = max(int(len(y_train) * 0.9), len(y_train) - 1000)
X_tr, y_tr = X_train[:n], y_train[:n]
if 'sample_weight' in kwargs:
weight = kwargs['sample_weight']
if self.params["n_estimators"] > 0:
n = max(int(len(y_train) * 0.9), len(y_train) - 1000)
X_tr, y_tr = X_train[:n], y_train[:n]
if 'sample_weight' in kwargs:
weight = kwargs['sample_weight']
if weight is not None:
kwargs['sample_weight'] = weight[:n]
else:
weight = None
from catboost import Pool
model = self.estimator_class(**self.params)
model.fit(
X_tr, y_tr, cat_features=cat_features,
eval_set=Pool(
data=X_train[n:], label=y_train[n:],
cat_features=cat_features),
**kwargs) # model.get_best_iteration()
if weight is not None:
kwargs['sample_weight'] = weight[:n]
else:
weight = None
from catboost import Pool
model = self.estimator_class(**self.params)
model.fit(
X_tr, y_tr, cat_features=cat_features,
eval_set=Pool(
data=X_train[n:], label=y_train[n:],
cat_features=cat_features),
**kwargs) # model.get_best_iteration()
if weight is not None:
kwargs['sample_weight'] = weight
self._model = model
kwargs['sample_weight'] = weight
self._model = model
except CatBoostError:
self._model = None
self.params["n_estimators"] = n_iter
train_time = time.time() - start_time
return train_time

View File

@@ -94,7 +94,7 @@ $nnictl create --config ./config.yml
## CFO: Frugal Optimization for Cost-related Hyperparameters
<p align="center">
<img src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/CFO.png" width=200>
<img src="https://github.com/microsoft/FLAML/blob/main/docs/images/CFO.png" width=200>
<br>
</p>
@@ -113,7 +113,7 @@ FLOW<sup>2</sup> only requires pairwise comparisons between function values to p
The GIFs attached below demostrates an example search trajectory of FLOW<sup>2</sup> shown in the loss and evaluation cost (i.e., the training time ) space respectively. From the demonstration, we can see that (1) FLOW<sup>2</sup> can quickly move toward the low-loss region, showing good convergence property and (2) FLOW<sup>2</sup> tends to avoid exploring the high-cost region until necessary.
<p align="center">
<img align="center", src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/heatmap_loss_cfo_12s.gif" width=360> <img align="center", src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/heatmap_cost_cfo_12s.gif" width=360>
<img align="center", src="https://github.com/microsoft/FLAML/blob/main/docs/images/heatmap_loss_cfo_12s.gif" width=360> <img align="center", src="https://github.com/microsoft/FLAML/blob/main/docs/images/heatmap_cost_cfo_12s.gif" width=360>
<br>
<figcaption>Figure 1. FLOW<sup>2</sup> in tuning the # of leaves and the # of trees for XGBoost. The two background heatmaps show the loss and cost distribution of all configurations. The black dots are the points evaluated in FLOW<sup>2</sup>. Black dots connected by lines are points that yield better loss performance when evaluated.</figcaption>
</p>
@@ -136,7 +136,7 @@ using BlendSearch.
## BlendSearch: Economical Hyperparameter Optimization With Blended Search Strategy
<p align="center">
<img src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/BlendSearch.png" width=200>
<img src="https://github.com/microsoft/FLAML/blob/main/docs/images/BlendSearch.png" width=200>
<br>
</p>

View File

@@ -42,9 +42,10 @@ def test_simple(method=None):
"log_type": "all",
"time_budget": 3
}
from sklearn.externals._arff import ArffException
try:
X, y = fetch_openml(name=dataset, return_X_y=True)
except ValueError:
except (ArffException, ValueError):
from sklearn.datasets import load_wine
X, y = load_wine(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(