update image url (#71)

* update image url * ArffException * OpenMLError is ValueError * CatBoostError * reduce build on push Co-authored-by: Chi Wang (MSR) <wang.chi@microsoft.com>
2026-02-09 02:09:16 +08:00 · 2021-04-21 04:36:06 -04:00
parent d08bb15475
commit f4f3f4f17b
5 changed files with 67 additions and 59 deletions
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -5,7 +5,7 @@ name: Build

 on:
  push:
-    branches: ['*']
+    branches: ['main']
  pull_request:
    branches: ['*']

--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
 # FLAML - Fast and Lightweight AutoML

 <p align="center">
-    <img src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/FLAML.png"  width=200>
+    <img src="https://github.com/microsoft/FLAML/blob/main/docs/images/FLAML.png"  width=200>
    <br>
 </p>

--- a/flaml/model.py
+++ b/flaml/model.py
@@ -103,8 +103,11 @@ class BaseEstimator:
            A numpy array of shape n*1.
            Each element is the label for a instance
        '''
-        X_test = self._preprocess(X_test)
-        return self._model.predict(X_test)
+        if self._model is not None:
+            X_test = self._preprocess(X_test)
+            return self._model.predict(X_test)
+        else:
+            return np.ones(X_test.shape[0])

    def predict_proba(self, X_test):
        '''Predict the probability of each class from features
@@ -663,59 +666,63 @@ class CatBoostEstimator(BaseEstimator):
                include='category').columns)
        else:
            cat_features = []
-        if (not CatBoostEstimator._time_per_iter or abs(
-                CatBoostEstimator._train_size - len(y_train)) > 4) and budget:
-            # measure the time per iteration
-            self.params["n_estimators"] = 1
-            CatBoostEstimator._smallmodel = self.estimator_class(**self.params)
-            CatBoostEstimator._smallmodel.fit(
-                X_train, y_train, cat_features=cat_features, **kwargs)
-            CatBoostEstimator._t1 = time.time() - start_time
-            if CatBoostEstimator._t1 >= budget:
-                self.params["n_estimators"] = n_iter
+        from catboost import CatBoostError
+        try:
+            if (not CatBoostEstimator._time_per_iter or abs(
+                    CatBoostEstimator._train_size - len(y_train)) > 4) and budget:
+                # measure the time per iteration
+                self.params["n_estimators"] = 1
+                CatBoostEstimator._smallmodel = self.estimator_class(**self.params)
+                CatBoostEstimator._smallmodel.fit(
+                    X_train, y_train, cat_features=cat_features, **kwargs)
+                CatBoostEstimator._t1 = time.time() - start_time
+                if CatBoostEstimator._t1 >= budget:
+                    self.params["n_estimators"] = n_iter
+                    self._model = CatBoostEstimator._smallmodel
+                    return CatBoostEstimator._t1
+                self.params["n_estimators"] = 4
+                CatBoostEstimator._smallmodel = self.estimator_class(**self.params)
+                CatBoostEstimator._smallmodel.fit(
+                    X_train, y_train, cat_features=cat_features, **kwargs)
+                CatBoostEstimator._time_per_iter = (
+                    time.time() - start_time - CatBoostEstimator._t1) / (
+                        self.params["n_estimators"] - 1)
+                if CatBoostEstimator._time_per_iter <= 0:
+                    CatBoostEstimator._time_per_iter = CatBoostEstimator._t1
+                CatBoostEstimator._train_size = len(y_train)
+                if time.time() - start_time >= budget or n_iter == self.params[
+                        "n_estimators"]:
+                    self.params["n_estimators"] = n_iter
+                    self._model = CatBoostEstimator._smallmodel
+                    return time.time() - start_time
+            if budget:
+                train_times = 1
+                self.params["n_estimators"] = min(n_iter, int(
+                    (budget - time.time() + start_time - CatBoostEstimator._t1)
+                    / train_times / CatBoostEstimator._time_per_iter + 1))
                self._model = CatBoostEstimator._smallmodel
-                return CatBoostEstimator._t1
-            self.params["n_estimators"] = 4
-            CatBoostEstimator._smallmodel = self.estimator_class(**self.params)
-            CatBoostEstimator._smallmodel.fit(
-                X_train, y_train, cat_features=cat_features, **kwargs)
-            CatBoostEstimator._time_per_iter = (
-                time.time() - start_time - CatBoostEstimator._t1) / (
-                    self.params["n_estimators"] - 1)
-            if CatBoostEstimator._time_per_iter <= 0:
-                CatBoostEstimator._time_per_iter = CatBoostEstimator._t1
-            CatBoostEstimator._train_size = len(y_train)
-            if time.time() - start_time >= budget or n_iter == self.params[
-                    "n_estimators"]:
-                self.params["n_estimators"] = n_iter
-                self._model = CatBoostEstimator._smallmodel
-                return time.time() - start_time
-        if budget:
-            train_times = 1
-            self.params["n_estimators"] = min(n_iter, int(
-                (budget - time.time() + start_time - CatBoostEstimator._t1)
-                / train_times / CatBoostEstimator._time_per_iter + 1))
-            self._model = CatBoostEstimator._smallmodel
-        if self.params["n_estimators"] > 0:
-            n = max(int(len(y_train) * 0.9), len(y_train) - 1000)
-            X_tr, y_tr = X_train[:n], y_train[:n]
-            if 'sample_weight' in kwargs:
-                weight = kwargs['sample_weight']
+            if self.params["n_estimators"] > 0:
+                n = max(int(len(y_train) * 0.9), len(y_train) - 1000)
+                X_tr, y_tr = X_train[:n], y_train[:n]
+                if 'sample_weight' in kwargs:
+                    weight = kwargs['sample_weight']
+                    if weight is not None:
+                        kwargs['sample_weight'] = weight[:n]
+                else:
+                    weight = None
+                from catboost import Pool
+                model = self.estimator_class(**self.params)
+                model.fit(
+                    X_tr, y_tr, cat_features=cat_features,
+                    eval_set=Pool(
+                        data=X_train[n:], label=y_train[n:],
+                        cat_features=cat_features),
+                    **kwargs)   # model.get_best_iteration()
                if weight is not None:
-                    kwargs['sample_weight'] = weight[:n]
-            else:
-                weight = None
-            from catboost import Pool
-            model = self.estimator_class(**self.params)
-            model.fit(
-                X_tr, y_tr, cat_features=cat_features,
-                eval_set=Pool(
-                    data=X_train[n:], label=y_train[n:],
-                    cat_features=cat_features),
-                **kwargs)   # model.get_best_iteration()
-            if weight is not None:
-                kwargs['sample_weight'] = weight
-            self._model = model
+                    kwargs['sample_weight'] = weight
+                self._model = model
+        except CatBoostError:
+            self._model = None
        self.params["n_estimators"] = n_iter
        train_time = time.time() - start_time
        return train_time
--- a/flaml/tune/README.md
+++ b/flaml/tune/README.md
@@ -94,7 +94,7 @@ $nnictl create --config ./config.yml
 ## CFO: Frugal Optimization for Cost-related Hyperparameters

 <p align="center">
-    <img src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/CFO.png"  width=200>
+    <img src="https://github.com/microsoft/FLAML/blob/main/docs/images/CFO.png"  width=200>
    <br>
 </p>

@@ -113,7 +113,7 @@ FLOW<sup>2</sup> only requires pairwise comparisons between function values to p
 The GIFs attached below demostrates an example search trajectory of FLOW<sup>2</sup> shown in the loss and evaluation cost (i.e., the training time ) space respectively. From the demonstration, we can see that (1) FLOW<sup>2</sup> can quickly move toward the low-loss region, showing good convergence property and (2) FLOW<sup>2</sup> tends to avoid exploring the high-cost region until necessary.

 <p align="center">
-    <img align="center", src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/heatmap_loss_cfo_12s.gif"  width=360>  <img align="center", src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/heatmap_cost_cfo_12s.gif"  width=360> 
+    <img align="center", src="https://github.com/microsoft/FLAML/blob/main/docs/images/heatmap_loss_cfo_12s.gif"  width=360>  <img align="center", src="https://github.com/microsoft/FLAML/blob/main/docs/images/heatmap_cost_cfo_12s.gif"  width=360> 
    <br>
    <figcaption>Figure 1. FLOW<sup>2</sup> in tuning the # of leaves and the # of trees for XGBoost. The two background heatmaps show the loss and cost distribution of all configurations. The black dots are the points evaluated in FLOW<sup>2</sup>. Black dots connected by lines are points that yield better loss performance when evaluated.</figcaption>
 </p>
@@ -136,7 +136,7 @@ using BlendSearch.
 ## BlendSearch: Economical Hyperparameter Optimization With Blended Search Strategy

 <p align="center">
-    <img src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/BlendSearch.png"  width=200>
+    <img src="https://github.com/microsoft/FLAML/blob/main/docs/images/BlendSearch.png"  width=200>
    <br>
 </p>

--- a/test/test_xgboost2d.py
+++ b/test/test_xgboost2d.py
@@ -42,9 +42,10 @@ def test_simple(method=None):
        "log_type": "all",
        "time_budget": 3
    }
+    from sklearn.externals._arff import ArffException
    try:
        X, y = fetch_openml(name=dataset, return_X_y=True)
-    except ValueError:
+    except (ArffException, ValueError):
        from sklearn.datasets import load_wine
        X, y = load_wine(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(