mirror of
https://github.com/microsoft/FLAML.git
synced 2026-02-09 02:09:16 +08:00
api doc for chacha (#105)
* api doc for chacha * update params * link to paper * update dataset id Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: Qingyun Wu <qiw@microsoft.com>
This commit is contained in:
@@ -87,7 +87,7 @@ automl_settings = {
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
# Train with labeled input data
|
||||
automl.fit(X_train=X_train, y_train=y_train,
|
||||
**automl_settings)
|
||||
**automl_settings)
|
||||
# Predict
|
||||
print(automl.predict_proba(X_train))
|
||||
# Export the best model
|
||||
@@ -111,7 +111,7 @@ automl_settings = {
|
||||
X_train, y_train = load_boston(return_X_y=True)
|
||||
# Train with labeled input data
|
||||
automl.fit(X_train=X_train, y_train=y_train,
|
||||
**automl_settings)
|
||||
**automl_settings)
|
||||
# Predict
|
||||
print(automl.predict(X_train))
|
||||
# Export the best model
|
||||
@@ -140,7 +140,7 @@ For more technical details, please check our papers.
|
||||
```
|
||||
* [Frugal Optimization for Cost-related Hyperparameters](https://arxiv.org/abs/2005.01571). Qingyun Wu, Chi Wang, Silu Huang. AAAI 2021.
|
||||
* [Economical Hyperparameter Optimization With Blended Search Strategy](https://www.microsoft.com/en-us/research/publication/economical-hyperparameter-optimization-with-blended-search-strategy/). Chi Wang, Qingyun Wu, Silu Huang, Amin Saied. ICLR 2021.
|
||||
* ChaCha for online AutoML. Qingyun Wu, Chi Wang, John Langford, Paul Mineiro and Marco Rossi. To appear in ICML 2021.
|
||||
* [ChaCha for Online AutoML](https://arxiv.org/pdf/2106.04815.pdf). Qingyun Wu, Chi Wang, John Langford, Paul Mineiro and Marco Rossi. To appear in ICML 2021.
|
||||
|
||||
## Contributing
|
||||
|
||||
@@ -180,7 +180,7 @@ If all the tests are passed, please also test run notebook/flaml_automl to make
|
||||
* Chi Wang
|
||||
* Qingyun Wu
|
||||
|
||||
Contributors (alphabetical order): Sebastien Bubeck, Surajit Chaudhuri, Nadiia Chepurko, Ofer Dekel, Alex Deng, Anshuman Dutt, Nicolo Fusi, Jianfeng Gao, Johannes Gehrke, Silu Huang, Dongwoo Kim, Christian Konig, John Langford, Amin Saied, Neil Tenenholtz, Markus Weimer, Haozhe Zhang, Erkang Zhu.
|
||||
Contributors (alphabetical order): Sebastien Bubeck, Surajit Chaudhuri, Nadiia Chepurko, Ofer Dekel, Alex Deng, Anshuman Dutt, Nicolo Fusi, Jianfeng Gao, Johannes Gehrke, Silu Huang, Dongwoo Kim, Christian Konig, John Langford, Xueqing Liu, Paul Mineiro, Amin Saied, Neil Tenenholtz, Olga Vrousgou, Markus Weimer, Haozhe Zhang, Erkang Zhu.
|
||||
|
||||
## License
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ AutoML
|
||||
|
||||
|
||||
Tune
|
||||
------
|
||||
----
|
||||
|
||||
.. autofunction:: flaml.tune.run
|
||||
|
||||
@@ -38,8 +38,15 @@ Tune
|
||||
:members:
|
||||
|
||||
|
||||
Online AutoML
|
||||
-------------
|
||||
|
||||
.. autoclass:: flaml.AutoVW
|
||||
:members:
|
||||
|
||||
|
||||
NLP
|
||||
------
|
||||
---
|
||||
|
||||
.. autoclass:: flaml.nlp.AutoTransformers
|
||||
:members:
|
||||
|
||||
@@ -171,6 +171,9 @@ class BaseEstimator:
|
||||
|
||||
class SKLearnEstimator(BaseEstimator):
|
||||
|
||||
def __init__(self, task='binary:logistic', **params):
|
||||
super().__init__(task, **params)
|
||||
|
||||
def _preprocess(self, X):
|
||||
if isinstance(X, pd.DataFrame):
|
||||
X = X.copy()
|
||||
@@ -231,12 +234,7 @@ class LGBMEstimator(BaseEstimator):
|
||||
n_estimators = int(round(config['n_estimators']))
|
||||
return (num_leaves * 3 + (num_leaves - 1) * 4 + 1.0) * n_estimators * 8
|
||||
|
||||
def __init__(
|
||||
self, task='binary:logistic', n_jobs=1,
|
||||
n_estimators=2, num_leaves=2, min_child_samples=20, learning_rate=0.1,
|
||||
subsample=1.0, reg_lambda=1.0, reg_alpha=0.0,
|
||||
colsample_bytree=1.0, log_max_bin=8, **params
|
||||
):
|
||||
def __init__(self, task='binary:logistic', log_max_bin=8, **params):
|
||||
super().__init__(task, **params)
|
||||
# Default: ‘regression’ for LGBMRegressor,
|
||||
# ‘binary’ or ‘multiclass’ for LGBMClassifier
|
||||
@@ -248,20 +246,16 @@ class LGBMEstimator(BaseEstimator):
|
||||
objective = 'multiclass'
|
||||
else:
|
||||
objective = 'regression'
|
||||
self.params = {
|
||||
"n_estimators": int(round(n_estimators)),
|
||||
"num_leaves": int(round(num_leaves)),
|
||||
'objective': params.get("objective", objective),
|
||||
'n_jobs': n_jobs,
|
||||
'learning_rate': float(learning_rate),
|
||||
'reg_alpha': float(reg_alpha),
|
||||
'reg_lambda': float(reg_lambda),
|
||||
'min_child_samples': int(round(min_child_samples)),
|
||||
'colsample_bytree': float(colsample_bytree),
|
||||
'subsample': float(subsample),
|
||||
}
|
||||
self.params['max_bin'] = params['max_bin'] if 'max_bin' in params else (
|
||||
1 << int(round(log_max_bin))) - 1
|
||||
if "n_estimators" in self.params:
|
||||
self.params["n_estimators"] = int(round(self.params["n_estimators"]))
|
||||
if "num_leaves" in self.params:
|
||||
self.params["num_leaves"] = int(round(self.params["num_leaves"]))
|
||||
if "min_child_samples" in self.params:
|
||||
self.params["min_child_samples"] = int(round(self.params["min_child_samples"]))
|
||||
if "objective" not in self.params:
|
||||
self.params["objective"] = objective
|
||||
if "max_bin" not in self.params:
|
||||
self.params['max_bin'] = 1 << int(round(log_max_bin)) - 1
|
||||
if 'regression' in task:
|
||||
self.estimator_class = LGBMRegressor
|
||||
else:
|
||||
@@ -369,7 +363,7 @@ class XGBoostEstimator(SKLearnEstimator):
|
||||
):
|
||||
super().__init__(task, **params)
|
||||
self._n_estimators = int(round(n_estimators))
|
||||
self.params = {
|
||||
self.params.update({
|
||||
'max_leaves': int(round(max_leaves)),
|
||||
'max_depth': params.get('max_depth', 0),
|
||||
'grow_policy': params.get("grow_policy", 'lossguide'),
|
||||
@@ -385,7 +379,7 @@ class XGBoostEstimator(SKLearnEstimator):
|
||||
'colsample_bylevel': float(colsample_bylevel),
|
||||
'colsample_bytree': float(colsample_bytree),
|
||||
'objective': params.get("objective")
|
||||
}
|
||||
})
|
||||
if all_thread:
|
||||
del self.params['nthread']
|
||||
|
||||
@@ -445,7 +439,8 @@ class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
|
||||
**params
|
||||
):
|
||||
super().__init__(task, **params)
|
||||
self.params = {
|
||||
self.params = params
|
||||
self.params.update({
|
||||
"n_estimators": int(round(n_estimators)),
|
||||
'max_leaves': int(round(max_leaves)),
|
||||
'max_depth': 0,
|
||||
@@ -462,7 +457,7 @@ class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
|
||||
'colsample_bylevel': float(colsample_bylevel),
|
||||
'colsample_bytree': float(colsample_bytree),
|
||||
'use_label_encoder': params.get('use_label_encoder', False),
|
||||
}
|
||||
})
|
||||
|
||||
if 'regression' in task:
|
||||
self.estimator_class = xgb.XGBRegressor
|
||||
@@ -513,11 +508,12 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
|
||||
n_estimators=4, max_features=1.0, criterion='gini', **params
|
||||
):
|
||||
super().__init__(task, **params)
|
||||
self.params = {
|
||||
self.params = params
|
||||
self.params.update({
|
||||
"n_estimators": int(round(n_estimators)),
|
||||
"n_jobs": n_jobs,
|
||||
'max_features': float(max_features),
|
||||
}
|
||||
})
|
||||
if 'regression' in task:
|
||||
self.estimator_class = RandomForestRegressor
|
||||
else:
|
||||
@@ -565,13 +561,13 @@ class LRL1Classifier(SKLearnEstimator):
|
||||
**params
|
||||
):
|
||||
super().__init__(task, **params)
|
||||
self.params = {
|
||||
self.params.update({
|
||||
'penalty': params.get("penalty", 'l1'),
|
||||
'tol': float(tol),
|
||||
'C': float(C),
|
||||
'solver': params.get("solver", 'saga'),
|
||||
'n_jobs': n_jobs,
|
||||
}
|
||||
})
|
||||
if 'regression' in task:
|
||||
self.estimator_class = None
|
||||
raise NotImplementedError('LR does not support regression task')
|
||||
@@ -594,13 +590,13 @@ class LRL2Classifier(SKLearnEstimator):
|
||||
**params
|
||||
):
|
||||
super().__init__(task, **params)
|
||||
self.params = {
|
||||
self.params.update({
|
||||
'penalty': params.get("penalty", 'l2'),
|
||||
'tol': float(tol),
|
||||
'C': float(C),
|
||||
'solver': params.get("solver", 'lbfgs'),
|
||||
'n_jobs': n_jobs,
|
||||
}
|
||||
})
|
||||
if 'regression' in task:
|
||||
self.estimator_class = None
|
||||
raise NotImplementedError('LR does not support regression task')
|
||||
@@ -648,14 +644,14 @@ class CatBoostEstimator(BaseEstimator):
|
||||
n_estimators=8192, learning_rate=0.1, early_stopping_rounds=4, **params
|
||||
):
|
||||
super().__init__(task, **params)
|
||||
self.params = {
|
||||
self.params.update({
|
||||
"early_stopping_rounds": int(round(early_stopping_rounds)),
|
||||
"n_estimators": n_estimators,
|
||||
'learning_rate': learning_rate,
|
||||
'thread_count': n_jobs,
|
||||
'verbose': params.get('verbose', False),
|
||||
'random_seed': params.get("random_seed", 10242048),
|
||||
}
|
||||
})
|
||||
if 'regression' in task:
|
||||
from catboost import CatBoostRegressor
|
||||
self.estimator_class = CatBoostRegressor
|
||||
@@ -759,11 +755,12 @@ class KNeighborsEstimator(BaseEstimator):
|
||||
self, task='binary:logistic', n_jobs=1, n_neighbors=5, **params
|
||||
):
|
||||
super().__init__(task, **params)
|
||||
self.params = {
|
||||
self.params = params
|
||||
self.params.update({
|
||||
'n_neighbors': int(round(n_neighbors)),
|
||||
'weights': params.get('weights', 'distance'),
|
||||
'n_jobs': n_jobs,
|
||||
}
|
||||
})
|
||||
if 'regression' in task:
|
||||
from sklearn.neighbors import KNeighborsRegressor
|
||||
self.estimator_class = KNeighborsRegressor
|
||||
|
||||
@@ -4,7 +4,15 @@ FLAML includes *ChaCha* which is an automatic hyperparameter tuning solution for
|
||||
|
||||
For more technical details about *ChaCha*, please check our paper.
|
||||
|
||||
* ChaCha for online AutoML. Qingyun Wu, Chi Wang, John Langford, Paul Mineiro and Marco Rossi. To appear in ICML 2021.
|
||||
* [ChaCha for Online AutoML](https://arxiv.org/pdf/2106.04815.pdf). Qingyun Wu, Chi Wang, John Langford, Paul Mineiro and Marco Rossi. To appear in ICML 2021.
|
||||
```
|
||||
@inproceedings{wu2021chacha,
|
||||
title={ChaCha for online AutoML},
|
||||
author={Qingyun Wu and Chi Wang and John Langford and Paul Mineiro and Marco Rossi},
|
||||
year={2021},
|
||||
booktitle={ICML},
|
||||
}
|
||||
```
|
||||
|
||||
## `AutoVW`
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
"## 2. Online regression with AutoVW\n",
|
||||
"### Load data from openml and preprocess\n",
|
||||
"\n",
|
||||
"Download [dataset_sales](https://www.openml.org/d/42183) from OpenML."
|
||||
"Download [NewFuelCar](https://www.openml.org/d/41506) from OpenML."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -412,4 +412,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
@@ -273,7 +273,6 @@ class TestAutoML(unittest.TestCase):
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"metric": 'mse',
|
||||
"task": 'regression',
|
||||
"log_file_name": "test/boston.log",
|
||||
"log_training_metric": True,
|
||||
|
||||
@@ -5,7 +5,6 @@ import scipy.sparse
|
||||
|
||||
import pandas as pd
|
||||
from sklearn.metrics import mean_squared_error, mean_absolute_error
|
||||
import time
|
||||
import logging
|
||||
from flaml.tune import loguniform, polynomial_expansion_set
|
||||
from vowpalwabbit import pyvw
|
||||
@@ -13,6 +12,7 @@ from flaml import AutoVW
|
||||
import string
|
||||
import os
|
||||
import openml
|
||||
|
||||
VW_DS_DIR = 'test/data/'
|
||||
NS_LIST = list(string.ascii_lowercase) + list(string.ascii_uppercase)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
Reference in New Issue
Block a user