Compare commits

...

7 Commits

Author SHA1 Message Date
Qingyun Wu
2d3bd84038 Merge pull request #28 from microsoft/v0.2.4
v0.2.4
2021-02-17 18:14:24 -05:00
Chi Wang (MSR)
79a851e408 step curve 2021-02-17 14:03:19 -08:00
Chi Wang (MSR)
a1b0b303ed grid search check 2021-02-16 17:13:05 -08:00
Chi Wang (MSR)
3328157f31 requirements in example 2021-02-13 14:33:15 -08:00
Chi Wang (MSR)
da88aa77e3 None check 2021-02-13 10:58:49 -08:00
Chi Wang (MSR)
bd16eeee69 sample_weight; dependency; notebook 2021-02-13 10:43:11 -08:00
Qingyun Wu
d18d292081 Fix phasing in README.md 2021-02-11 14:40:29 -05:00
12 changed files with 967 additions and 433 deletions

View File

@@ -5,8 +5,8 @@
<br>
</p>
FLAML is a Python library designed to automatically produce accurate machine
learning models with low computational cost. It frees users from selecting
FLAML is a lightweight Python library that finds accurate machine
learning models automatically, efficiently and economically. It frees users from selecting
learners and hyperparameters for each learner. It is fast and cheap.
The simple and lightweight design makes it easy to extend, such as
adding customized learners or metrics. FLAML is powered by a new, [cost-effective

View File

@@ -402,7 +402,7 @@ class AutoML:
self._X_train_all, self._y_train_all = \
self._transformer.fit_transform(X, y, self._state.task)
self._label_transformer = self._transformer.label_transformer
self._sample_weight_full = self._state.fit_kwargs.get('sample_weight')
if X_val is not None and y_val is not None:
if not (isinstance(X_val, np.ndarray) or
issparse(X_val) or
@@ -446,7 +446,8 @@ class AutoML:
self._X_train_all, self._y_train_all
if issparse(X_train_all):
X_train_all = X_train_all.tocsr()
if self._state.task != 'regression':
if self._state.task != 'regression' and self._state.fit_kwargs.get(
'sample_weight') is None:
# logger.info(f"label {pd.unique(y_train_all)}")
label_set, counts = np.unique(y_train_all, return_counts=True)
# augment rare classes
@@ -1093,8 +1094,9 @@ class AutoML:
self._state.best_loss))
else:
logger.info(f"no enough budget for learner {estimator}")
self.estimator_list.remove(estimator)
self._estimator_index -= 1
if self._estimator_index is not None:
self.estimator_list.remove(estimator)
self._estimator_index -= 1
if self._retrain_full and best_config_sig and not better and (
self._search_states[self._best_estimator].sample_size ==
self._state.data_size) and (est_retrain_time <=
@@ -1151,7 +1153,11 @@ class AutoML:
stacker = Stacker(estimators, best_m,
n_jobs=self._state.n_jobs,
passthrough=True)
stacker.fit(self._X_train_all, self._y_train_all)
if self._sample_weight_full is not None:
self._state.fit_kwargs[
'sample_weight'] = self._sample_weight_full
stacker.fit(self._X_train_all, self._y_train_all,
**self._state.fit_kwargs)
logger.info(f'ensemble: {stacker}')
self._trained_estimator = stacker
self._trained_estimator.model = stacker

View File

@@ -121,8 +121,8 @@ class FLOW2(Searcher):
self._unordered_cat_hp = {}
self._cat_hp_cost = {}
for key, domain in self.space.items():
assert not isinstance(domain, dict), \
key+"'s domain is grid search which is not supported in FLOW2."
assert not (isinstance(domain, dict) and 'grid_search' in domain
), key+"'s domain is grid search which is not supported in FLOW2."
if callable(getattr(domain, 'get_sampler', None)):
self._tunable_keys.append(key)
sampler = domain.get_sampler()

View File

@@ -6,6 +6,7 @@ The API is compatible with ray tune.
Example:
```python
# require: pip install flaml[blendsearch]
from flaml import tune
import time
@@ -42,6 +43,7 @@ print(analysis.best_config) # the best config
Or, using ray tune's API:
```python
# require: pip install flaml[blendsearch] ray[tune]
from ray import tune as raytune
from flaml import CFO, BlendSearch
import time
@@ -146,6 +148,7 @@ based on optimism in face of uncertainty.
Example:
```python
# require: pip install flaml[blendsearch]
from flaml import BlendSearch
tune.run(...
search_alg = BlendSearch(points_to_evaluate=[init_config]),

View File

@@ -1 +1 @@
__version__ = "0.2.3"
__version__ = "0.2.4"

View File

@@ -6,11 +6,16 @@
"source": [
"This notebook uses the Huggingface transformers library to finetune a transformer model.\n",
"\n",
"**Requirements.** This notebook has additional requirements:\n",
"\n",
"```bash\n",
"pip install -r transformers_requirements.txt\n",
"```"
"**Requirements.** This notebook has additional requirements:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install torch transformers datasets ipywidgets"
]
},
{
@@ -699,7 +704,7 @@
"source": [
"### Step 3. Launch with `flaml.tune.run`\n",
"\n",
"We are now ready to laungh the tuning using `flaml.tune.run`:"
"We are now ready to launch the tuning using `flaml.tune.run`:"
],
"cell_type": "markdown",
"metadata": {}
@@ -766,9 +771,13 @@
],
"metadata": {
"kernelspec": {
"display_name": "flaml",
"language": "python",
"name": "flaml"
"name": "python3",
"display_name": "Python 3.7.7 64-bit ('flaml': conda)",
"metadata": {
"interpreter": {
"hash": "bfcd9a6a9254a5e160761a1fd7a9e444f011592c6770d9f4180dde058a9df5dd"
}
}
},
"language_info": {
"codemirror_mode": {
@@ -780,7 +789,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
"version": "3.7.7-final"
}
},
"nbformat": 4,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,4 +0,0 @@
torch
transformers
datasets
ipywidgets

View File

@@ -20,7 +20,6 @@ install_requires = [
"scipy>=1.4.1",
"catboost>=0.23",
"scikit-learn>=0.23.2",
"optuna==2.3.0"
],
@@ -48,6 +47,10 @@ setuptools.setup(
"coverage>=5.3",
"xgboost<1.3",
"rgf-python",
"optuna==2.3.0",
],
"blendsearch": [
"optuna==2.3.0"
],
"ray": [
"ray[tune]==1.1.0",

View File

@@ -1,3 +1,5 @@
'''Require: pip install torchvision ray
'''
import unittest
import os
import time

View File

@@ -1,5 +1,6 @@
'''Require: pip install flaml[test,ray]
'''
import unittest
import os
import time
from sklearn.model_selection import train_test_split
import sklearn.metrics
@@ -138,6 +139,7 @@ def _test_xgboost(method='BlendSearch'):
scheduler=scheduler, search_alg=algo)
ray.shutdown()
# # Load the best model checkpoint
# import os
# best_bst = xgb.Booster()
# best_bst.load_model(os.path.join(analysis.best_checkpoint,
# "model.xgb"))