Compare commits
39 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b206363c9a | ||
|
|
0925e2b308 | ||
|
|
3083229e40 | ||
|
|
0b23c3a028 | ||
|
|
363197cef8 | ||
|
|
e5123f5595 | ||
|
|
3e991e4352 | ||
|
|
730fd14ef6 | ||
|
|
068fb9f5c2 | ||
|
|
b6f57894ef | ||
|
|
ad42889a3b | ||
|
|
f4f3f4f17b | ||
|
|
d08bb15475 | ||
|
|
9ff4ae0cb2 | ||
|
|
06045703bf | ||
|
|
72d17b37c2 | ||
|
|
97a7c114ee | ||
|
|
b7a91e0385 | ||
|
|
37d7518a4c | ||
|
|
f28d093522 | ||
|
|
8bcdb2a0c2 | ||
|
|
b058e0e041 | ||
|
|
ae5f8e5426 | ||
|
|
bf95d7c455 | ||
|
|
0f99526b63 | ||
|
|
b8736bc600 | ||
|
|
4a8110c87b | ||
|
|
ec37ae8f8f | ||
|
|
840e3fc104 | ||
|
|
1560a6e52a | ||
|
|
7bd231e497 | ||
|
|
6ff0ed434b | ||
|
|
2d3bd84038 | ||
|
|
79a851e408 | ||
|
|
a1b0b303ed | ||
|
|
3328157f31 | ||
|
|
da88aa77e3 | ||
|
|
bd16eeee69 | ||
|
|
d18d292081 |
@@ -2,4 +2,4 @@
|
||||
branch = True
|
||||
source = flaml
|
||||
omit =
|
||||
*tests*
|
||||
*test*
|
||||
12
.github/workflows/python-package.yml
vendored
@@ -1,13 +1,13 @@
|
||||
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||
|
||||
name: Python package
|
||||
name: Build
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ['*']
|
||||
branches: ['main']
|
||||
pull_request:
|
||||
branches: ['*']
|
||||
branches: ['main']
|
||||
|
||||
jobs:
|
||||
build:
|
||||
@@ -38,7 +38,7 @@ jobs:
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -e .[test]
|
||||
- name: If linux or max, install ray
|
||||
- name: If linux or mac, install ray
|
||||
if: matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest'
|
||||
run: |
|
||||
pip install -e .[ray]
|
||||
@@ -49,13 +49,17 @@ jobs:
|
||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
||||
- name: Test with pytest
|
||||
if: ${{ matrix.python-version != '3.7' || matrix.os == 'macos-latest' }}
|
||||
run: |
|
||||
pytest test
|
||||
- name: Coverage
|
||||
if: ${{ matrix.python-version == '3.7' && matrix.os != 'macos-latest' }}
|
||||
run: |
|
||||
pip install coverage
|
||||
coverage run -a -m pytest test
|
||||
coverage xml
|
||||
- name: Upload coverage to Codecov
|
||||
if: ${{ matrix.python-version == '3.7' && matrix.os != 'macos-latest' }}
|
||||
uses: codecov/codecov-action@v1
|
||||
with:
|
||||
file: ./coverage.xml
|
||||
|
||||
6
.gitignore
vendored
@@ -146,6 +146,10 @@ dmypy.json
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
/catboost_info
|
||||
|
||||
catboost_info
|
||||
notebook/*.pkl
|
||||
notebook/.azureml
|
||||
mlruns
|
||||
logs
|
||||
automl.pkl
|
||||
|
||||
76
README.md
@@ -1,18 +1,42 @@
|
||||
[](https://badge.fury.io/py/FLAML)
|
||||
[](https://github.com/microsoft/FLAML/actions/workflows/python-package.yml)
|
||||

|
||||
[](https://pepy.tech/project/flaml)
|
||||
[](https://gitter.im/FLAMLer/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
|
||||
# FLAML - Fast and Lightweight AutoML
|
||||
|
||||
<p align="center">
|
||||
<img src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/FLAML.png" width=200>
|
||||
<img src="https://github.com/microsoft/FLAML/blob/main/docs/images/FLAML.png" width=200>
|
||||
<br>
|
||||
</p>
|
||||
|
||||
FLAML is a Python library designed to automatically produce accurate machine
|
||||
learning models with low computational cost. It frees users from selecting
|
||||
learners and hyperparameters for each learner. It is fast and cheap.
|
||||
FLAML is a lightweight Python library that finds accurate machine
|
||||
learning models automatically, efficiently and economically. It frees users from selecting
|
||||
learners and hyperparameters for each learner. It is fast and economical.
|
||||
The simple and lightweight design makes it easy to extend, such as
|
||||
adding customized learners or metrics. FLAML is powered by a new, [cost-effective
|
||||
hyperparameter optimization](https://github.com/microsoft/FLAML/tree/main/flaml/tune)
|
||||
and learner selection method invented by Microsoft Research.
|
||||
FLAML is easy to use:
|
||||
FLAML leverages the structure of the search space to choose a search order optimized for both cost and error. For example, the system tends to propose cheap configurations at the beginning stage of the search,
|
||||
but quickly moves to configurations with high model complexity and large sample size when needed in the later stage of the search. For another example, it favors cheap learners in the beginning but penalizes them later if the error improvement is slow. The cost-bounded search and cost-based prioritization make a big difference in the search efficiency under budget constraints.
|
||||
|
||||
## Installation
|
||||
|
||||
FLAML requires **Python version >= 3.6**. It can be installed from pip:
|
||||
|
||||
```bash
|
||||
pip install flaml
|
||||
```
|
||||
|
||||
To run the [`notebook example`](https://github.com/microsoft/FLAML/tree/main/notebook),
|
||||
install flaml with the [notebook] option:
|
||||
|
||||
```bash
|
||||
pip install flaml[notebook]
|
||||
```
|
||||
|
||||
## Quickstart
|
||||
|
||||
* With three lines of code, you can start using this economical and fast
|
||||
AutoML engine as a scikit-learn style estimator.
|
||||
@@ -31,23 +55,14 @@ automl.fit(X_train, y_train, task="classification", estimator_list=["lgbm"])
|
||||
* You can also run generic ray-tune style hyperparameter tuning for a custom function.
|
||||
```python
|
||||
from flaml import tune
|
||||
tune.run(train_with_config, config={…}, init_config={…}, time_budget_s=3600)
|
||||
tune.run(train_with_config, config={…}, low_cost_partial_config={…}, time_budget_s=3600)
|
||||
```
|
||||
|
||||
## Installation
|
||||
## Advantages
|
||||
|
||||
FLAML requires **Python version >= 3.6**. It can be installed from pip:
|
||||
|
||||
```bash
|
||||
pip install flaml
|
||||
```
|
||||
|
||||
To run the [`notebook example`](https://github.com/microsoft/FLAML/tree/main/notebook),
|
||||
install flaml with the [notebook] option:
|
||||
|
||||
```bash
|
||||
pip install flaml[notebook]
|
||||
```
|
||||
* For classification and regression tasks, find quality models with lower computational resources.
|
||||
* Users can choose their desired customizability: minimal customization (computational resource budget), medium customization (e.g., scikit-style learner, search space and metric), full customization (arbitrary training and evaluation code).
|
||||
* Allow human guidance in hyperparameter tuning to respect prior on certain subspaces but also able to explore other subspaces.
|
||||
|
||||
## Examples
|
||||
|
||||
@@ -112,7 +127,7 @@ And they can be used in distributed HPO frameworks such as ray tune or nni.
|
||||
|
||||
For more technical details, please check our papers.
|
||||
|
||||
* [FLAML: A Fast and Lightweight AutoML Library](https://arxiv.org/abs/1911.04706). Chi Wang, Qingyun Wu, Markus Weimer, Erkang Zhu. To appear in MLSys, 2021.
|
||||
* [FLAML: A Fast and Lightweight AutoML Library](https://www.microsoft.com/en-us/research/publication/flaml-a-fast-and-lightweight-automl-library/). Chi Wang, Qingyun Wu, Markus Weimer, Erkang Zhu. MLSys, 2021.
|
||||
```
|
||||
@inproceedings{wang2021flaml,
|
||||
title={FLAML: A Fast and Lightweight AutoML Library},
|
||||
@@ -122,7 +137,7 @@ For more technical details, please check our papers.
|
||||
}
|
||||
```
|
||||
* [Frugal Optimization for Cost-related Hyperparameters](https://arxiv.org/abs/2005.01571). Qingyun Wu, Chi Wang, Silu Huang. AAAI 2021.
|
||||
* Economical Hyperparameter Optimization With Blended Search Strategy. Chi Wang, Qingyun Wu, Silu Huang, Amin Saied. To appear in ICLR 2021.
|
||||
* [Economical Hyperparameter Optimization With Blended Search Strategy](https://www.microsoft.com/en-us/research/publication/economical-hyperparameter-optimization-with-blended-search-strategy/). Chi Wang, Qingyun Wu, Silu Huang, Amin Saied. ICLR 2021.
|
||||
|
||||
## Contributing
|
||||
|
||||
@@ -130,6 +145,8 @@ This project welcomes contributions and suggestions. Most contributions require
|
||||
Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
|
||||
the rights to use your contribution. For details, visit <https://cla.opensource.microsoft.com>.
|
||||
|
||||
If you are new to GitHub [here](https://help.github.com/categories/collaborating-with-issues-and-pull-requests/) is a detailed help source on getting involved with development on GitHub.
|
||||
|
||||
When you submit a pull request, a CLA bot will automatically determine whether you need to provide
|
||||
a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
|
||||
provided by the bot. You will only need to do this once across all repos using our CLA.
|
||||
@@ -138,6 +155,23 @@ This project has adopted the [Microsoft Open Source Code of Conduct](https://ope
|
||||
For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
|
||||
contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
|
||||
|
||||
## Developing
|
||||
|
||||
### Setup:
|
||||
|
||||
```
|
||||
git clone https://github.com/microsoft/FLAML.git
|
||||
pip install -e .[test,notebook]
|
||||
```
|
||||
|
||||
### Coverage
|
||||
Any code you commit should generally not significantly impact coverage. To run all unit tests:
|
||||
```
|
||||
coverage run -m pytest test
|
||||
```
|
||||
|
||||
If all the tests are passed, please also test run notebook/flaml_automl to make sure your commit does not break the notebook example.
|
||||
|
||||
## Authors
|
||||
|
||||
* Chi Wang
|
||||
|
||||
|
Before Width: | Height: | Size: 13 KiB After Width: | Height: | Size: 23 KiB |
|
Before Width: | Height: | Size: 2.8 KiB After Width: | Height: | Size: 9.6 KiB |
|
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
BIN
docs/images/FLOW2.png
Normal file
|
After Width: | Height: | Size: 35 KiB |
@@ -1,16 +1,8 @@
|
||||
from flaml.searcher import CFO, BlendSearch, FLOW2
|
||||
from flaml.automl import AutoML
|
||||
from flaml.searcher import CFO, BlendSearch, FLOW2, BlendSearchTuner
|
||||
from flaml.automl import AutoML, logger_formatter
|
||||
from flaml.version import __version__
|
||||
import logging
|
||||
|
||||
# Set the root logger.
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
# Add the console handler.
|
||||
_ch = logging.StreamHandler()
|
||||
logger_formatter = logging.Formatter(
|
||||
'[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s',
|
||||
'%m-%d %H:%M:%S')
|
||||
_ch.setFormatter(logger_formatter)
|
||||
logger.addHandler(_ch)
|
||||
573
flaml/automl.py
@@ -1,12 +1,12 @@
|
||||
'''!
|
||||
* Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License.
|
||||
* Licensed under the MIT License.
|
||||
'''
|
||||
|
||||
N_SPLITS = 5
|
||||
RANDOM_SEED = 1
|
||||
SPLIT_RATIO = 0.1
|
||||
MEM_THRES = 4*(1024**3)
|
||||
MEM_THRES = 4 * (1024 ** 3)
|
||||
SMALL_LARGE_THRES = 10000000
|
||||
MIN_SAMPLE_TRAIN = 10000
|
||||
CV_HOLDOUT_THRESHOLD = 100000
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
'''!
|
||||
* Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License.
|
||||
* Licensed under the MIT License.
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
@@ -8,9 +8,11 @@ from scipy.sparse import vstack, issparse
|
||||
import pandas as pd
|
||||
from .training_log import training_log_reader
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def load_openml_dataset(dataset_id, data_dir=None, random_state=0):
|
||||
'''Load dataset from open ML.
|
||||
'''Load dataset from open ML.
|
||||
|
||||
If the file is not cached locally, download it from open ML.
|
||||
|
||||
@@ -23,7 +25,7 @@ def load_openml_dataset(dataset_id, data_dir=None, random_state=0):
|
||||
X_train: A 2d numpy array of training data
|
||||
X_test: A 2d numpy array of test data
|
||||
y_train: A 1d numpy arrya of labels for training data
|
||||
y_test: A 1d numpy arrya of labels for test data
|
||||
y_test: A 1d numpy arrya of labels for test data
|
||||
'''
|
||||
import os
|
||||
import openml
|
||||
@@ -58,9 +60,9 @@ def load_openml_dataset(dataset_id, data_dir=None, random_state=0):
|
||||
|
||||
|
||||
def load_openml_task(task_id, data_dir):
|
||||
'''Load task from open ML.
|
||||
'''Load task from open ML.
|
||||
|
||||
Use the first fold of the task.
|
||||
Use the first fold of the task.
|
||||
If the file is not cached locally, download it from open ML.
|
||||
|
||||
Args:
|
||||
@@ -71,7 +73,7 @@ def load_openml_task(task_id, data_dir):
|
||||
X_train: A 2d numpy array of training data
|
||||
X_test: A 2d numpy array of test data
|
||||
y_train: A 1d numpy arrya of labels for training data
|
||||
y_test: A 1d numpy arrya of labels for test data
|
||||
y_test: A 1d numpy arrya of labels for test data
|
||||
'''
|
||||
import os
|
||||
import openml
|
||||
@@ -115,12 +117,12 @@ def get_output_from_log(filename, time_budget):
|
||||
|
||||
Returns:
|
||||
training_time_list: A list of the finished time of each logged iter
|
||||
best_error_list:
|
||||
best_error_list:
|
||||
A list of the best validation error after each logged iter
|
||||
error_list: A list of the validation error of each logged iter
|
||||
config_list:
|
||||
config_list:
|
||||
A list of the estimator, sample size and config of each logged iter
|
||||
logged_metric_list: A list of the logged metric of each logged iter
|
||||
logged_metric_list: A list of the logged metric of each logged iter
|
||||
'''
|
||||
|
||||
best_config = None
|
||||
@@ -186,17 +188,22 @@ class DataTransformer:
|
||||
'''transform X, y
|
||||
'''
|
||||
|
||||
|
||||
def fit_transform(self, X, y, task):
|
||||
if isinstance(X, pd.DataFrame):
|
||||
X = X.copy()
|
||||
n = X.shape[0]
|
||||
cat_columns, num_columns = [], []
|
||||
cat_columns, num_columns, datetime_columns = [], [], []
|
||||
drop = False
|
||||
for column in X.columns:
|
||||
# sklearn\utils\validation.py needs int/float values
|
||||
if X[column].dtype.name == 'datetime64[ns]':
|
||||
X[column] = X[column].map(datetime.toordinal)
|
||||
datetime_columns.append(column)
|
||||
if X[column].dtype.name in ('object', 'category'):
|
||||
if X[column].nunique() == 1 or X[column].nunique(
|
||||
dropna=True) == n - X[column].isnull().sum():
|
||||
X.drop(columns=column, inplace=True)
|
||||
drop = True
|
||||
elif X[column].dtype.name == 'category':
|
||||
current_categories = X[column].cat.categories
|
||||
if '__NAN__' not in current_categories:
|
||||
@@ -204,28 +211,36 @@ class DataTransformer:
|
||||
'__NAN__').fillna('__NAN__')
|
||||
cat_columns.append(column)
|
||||
else:
|
||||
X[column].fillna('__NAN__', inplace=True)
|
||||
X[column] = X[column].fillna('__NAN__')
|
||||
cat_columns.append(column)
|
||||
else:
|
||||
# print(X[column].dtype.name)
|
||||
if X[column].nunique(dropna=True) < 2:
|
||||
X.drop(columns=column, inplace=True)
|
||||
drop = True
|
||||
else:
|
||||
X[column].fillna(np.nan, inplace=True)
|
||||
X[column] = X[column].fillna(np.nan)
|
||||
num_columns.append(column)
|
||||
X = X[cat_columns + num_columns]
|
||||
if cat_columns:
|
||||
X[cat_columns] = X[cat_columns].astype('category')
|
||||
if num_columns:
|
||||
X_num = X[num_columns]
|
||||
if drop and np.issubdtype(X_num.columns.dtype, np.integer):
|
||||
X_num.columns = range(X_num.shape[1])
|
||||
else:
|
||||
drop = False
|
||||
from sklearn.impute import SimpleImputer
|
||||
from sklearn.compose import ColumnTransformer
|
||||
self.transformer = ColumnTransformer([(
|
||||
'continuous',
|
||||
SimpleImputer(missing_values=np.nan, strategy='median'),
|
||||
num_columns)])
|
||||
X[num_columns] = self.transformer.fit_transform(X)
|
||||
self._cat_columns, self._num_columns = cat_columns, num_columns
|
||||
|
||||
X_num.columns)])
|
||||
X[num_columns] = self.transformer.fit_transform(X_num)
|
||||
self._cat_columns, self._num_columns, self._datetime_columns = \
|
||||
cat_columns, num_columns, datetime_columns
|
||||
self._drop = drop
|
||||
|
||||
if task == 'regression':
|
||||
self.label_transformer = None
|
||||
else:
|
||||
@@ -235,13 +250,18 @@ class DataTransformer:
|
||||
return X, y
|
||||
|
||||
def transform(self, X):
|
||||
X = X.copy()
|
||||
if isinstance(X, pd.DataFrame):
|
||||
cat_columns, num_columns = self._cat_columns, self._num_columns
|
||||
cat_columns, num_columns, datetime_columns = self._cat_columns, \
|
||||
self._num_columns, self._datetime_columns
|
||||
X = X[cat_columns + num_columns].copy()
|
||||
if datetime_columns:
|
||||
for dt_column in datetime_columns:
|
||||
X[dt_column] = X[dt_column].map(datetime.toordinal)
|
||||
for column in cat_columns:
|
||||
# print(column, X[column].dtype.name)
|
||||
if X[column].dtype.name == 'object':
|
||||
X[column].fillna('__NAN__', inplace=True)
|
||||
X[column] = X[column].fillna('__NAN__')
|
||||
elif X[column].dtype.name == 'category':
|
||||
current_categories = X[column].cat.categories
|
||||
if '__NAN__' not in current_categories:
|
||||
@@ -250,6 +270,8 @@ class DataTransformer:
|
||||
if cat_columns:
|
||||
X[cat_columns] = X[cat_columns].astype('category')
|
||||
if num_columns:
|
||||
X[num_columns].fillna(np.nan, inplace=True)
|
||||
X[num_columns] = self.transformer.transform(X)
|
||||
X_num = X[num_columns].fillna(np.nan)
|
||||
if self._drop:
|
||||
X_num.columns = range(X_num.shape[1])
|
||||
X[num_columns] = self.transformer.transform(X_num)
|
||||
return X
|
||||
|
||||
189
flaml/ml.py
@@ -1,15 +1,19 @@
|
||||
'''!
|
||||
* Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License.
|
||||
* Licensed under the MIT License.
|
||||
'''
|
||||
|
||||
from .model import *
|
||||
|
||||
import time
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.metrics import mean_squared_error, r2_score, roc_auc_score, \
|
||||
accuracy_score, mean_absolute_error, log_loss, average_precision_score, \
|
||||
f1_score
|
||||
import numpy as np
|
||||
f1_score
|
||||
from sklearn.model_selection import RepeatedStratifiedKFold
|
||||
from .model import (
|
||||
XGBoostEstimator, XGBoostSklearnEstimator, RandomForestEstimator,
|
||||
LGBMEstimator, LRL1Classifier, LRL2Classifier, CatBoostEstimator,
|
||||
ExtraTreeEstimator, KNeighborsEstimator)
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -18,7 +22,6 @@ logger = logging.getLogger(__name__)
|
||||
def get_estimator_class(task, estimator_name):
|
||||
''' when adding a new learner, need to add an elif branch '''
|
||||
|
||||
|
||||
if 'xgboost' in estimator_name:
|
||||
if 'regression' in task:
|
||||
estimator_class = XGBoostEstimator
|
||||
@@ -31,7 +34,7 @@ def get_estimator_class(task, estimator_name):
|
||||
elif 'lrl1' in estimator_name:
|
||||
estimator_class = LRL1Classifier
|
||||
elif 'lrl2' in estimator_name:
|
||||
estimator_class = LRL2Classifier
|
||||
estimator_class = LRL2Classifier
|
||||
elif 'catboost' in estimator_name:
|
||||
estimator_class = CatBoostEstimator
|
||||
elif 'extra_tree' in estimator_name:
|
||||
@@ -39,22 +42,24 @@ def get_estimator_class(task, estimator_name):
|
||||
elif 'kneighbor' in estimator_name:
|
||||
estimator_class = KNeighborsEstimator
|
||||
else:
|
||||
raise ValueError(estimator_name + ' is not a built-in learner. '
|
||||
raise ValueError(
|
||||
estimator_name + ' is not a built-in learner. '
|
||||
'Please use AutoML.add_learner() to add a customized learner.')
|
||||
return estimator_class
|
||||
|
||||
|
||||
def sklearn_metric_loss_score(metric_name, y_predict, y_true, labels=None,
|
||||
sample_weight=None):
|
||||
|
||||
def sklearn_metric_loss_score(
|
||||
metric_name, y_predict, y_true, labels=None, sample_weight=None
|
||||
):
|
||||
'''Loss using the specified metric
|
||||
|
||||
Args:
|
||||
metric_name: A string of the mtric name, one of
|
||||
'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'log_loss',
|
||||
'f1', 'ap'
|
||||
metric_name: A string of the metric name, one of
|
||||
'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'log_loss',
|
||||
'f1', 'ap', 'micro_f1', 'macro_f1'
|
||||
y_predict: A 1d or 2d numpy array of the predictions which can be
|
||||
used to calculate the metric. E.g., 2d for log_loss and 1d
|
||||
for others.
|
||||
for others.
|
||||
y_true: A 1d numpy array of the true labels
|
||||
labels: A 1d numpy array of the unique labels
|
||||
sample_weight: A 1d numpy array of the sample weight
|
||||
@@ -66,69 +71,75 @@ def sklearn_metric_loss_score(metric_name, y_predict, y_true, labels=None,
|
||||
if 'r2' in metric_name:
|
||||
score = 1.0 - r2_score(y_true, y_predict, sample_weight=sample_weight)
|
||||
elif metric_name == 'rmse':
|
||||
score = np.sqrt(mean_squared_error(y_true, y_predict,
|
||||
sample_weight=sample_weight))
|
||||
score = np.sqrt(mean_squared_error(
|
||||
y_true, y_predict, sample_weight=sample_weight))
|
||||
elif metric_name == 'mae':
|
||||
score = mean_absolute_error(y_true, y_predict,
|
||||
sample_weight=sample_weight)
|
||||
score = mean_absolute_error(
|
||||
y_true, y_predict, sample_weight=sample_weight)
|
||||
elif metric_name == 'mse':
|
||||
score = mean_squared_error(y_true, y_predict,
|
||||
sample_weight=sample_weight)
|
||||
score = mean_squared_error(
|
||||
y_true, y_predict, sample_weight=sample_weight)
|
||||
elif metric_name == 'accuracy':
|
||||
score = 1.0 - accuracy_score(y_true, y_predict,
|
||||
sample_weight=sample_weight)
|
||||
score = 1.0 - accuracy_score(
|
||||
y_true, y_predict, sample_weight=sample_weight)
|
||||
elif 'roc_auc' in metric_name:
|
||||
score = 1.0 - roc_auc_score(y_true, y_predict,
|
||||
sample_weight=sample_weight)
|
||||
score = 1.0 - roc_auc_score(
|
||||
y_true, y_predict, sample_weight=sample_weight)
|
||||
elif 'log_loss' in metric_name:
|
||||
score = log_loss(y_true, y_predict, labels=labels,
|
||||
sample_weight=sample_weight)
|
||||
score = log_loss(
|
||||
y_true, y_predict, labels=labels, sample_weight=sample_weight)
|
||||
elif 'micro_f1' in metric_name:
|
||||
score = 1 - f1_score(
|
||||
y_true, y_predict, sample_weight=sample_weight, average='micro')
|
||||
elif 'macro_f1' in metric_name:
|
||||
score = 1 - f1_score(
|
||||
y_true, y_predict, sample_weight=sample_weight, average='macro')
|
||||
elif 'f1' in metric_name:
|
||||
score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight)
|
||||
elif 'ap' in metric_name:
|
||||
score = 1 - average_precision_score(y_true, y_predict,
|
||||
sample_weight=sample_weight)
|
||||
score = 1 - average_precision_score(
|
||||
y_true, y_predict, sample_weight=sample_weight)
|
||||
else:
|
||||
raise ValueError(metric_name+' is not a built-in metric, '
|
||||
'currently built-in metrics are: '
|
||||
'r2, rmse, mae, mse, accuracy, roc_auc, log_loss, f1, ap. '
|
||||
'please pass a customized metric function to AutoML.fit(metric=func)')
|
||||
raise ValueError(
|
||||
metric_name + ' is not a built-in metric, '
|
||||
'currently built-in metrics are: '
|
||||
'r2, rmse, mae, mse, accuracy, roc_auc, log_loss, f1, ap. '
|
||||
'please pass a customized metric function to AutoML.fit(metric=func)')
|
||||
return score
|
||||
|
||||
|
||||
def get_y_pred(estimator, X, eval_metric, obj):
|
||||
if eval_metric in ['roc_auc', 'ap'] and 'binary' in obj:
|
||||
y_pred_classes = estimator.predict_proba(X)
|
||||
y_pred = y_pred_classes[:,
|
||||
1] if y_pred_classes.ndim>1 else y_pred_classes
|
||||
y_pred_classes = estimator.predict_proba(X)
|
||||
y_pred = y_pred_classes[
|
||||
:, 1] if y_pred_classes.ndim > 1 else y_pred_classes
|
||||
elif eval_metric in ['log_loss', 'roc_auc']:
|
||||
y_pred = estimator.predict_proba(X)
|
||||
else:
|
||||
try:
|
||||
y_pred = estimator.predict(X)
|
||||
except:
|
||||
logger.debug("prediction failed. Using a constant predictor.")
|
||||
y_pred = np.ones(X.shape[0])
|
||||
y_pred = estimator.predict(X)
|
||||
return y_pred
|
||||
|
||||
|
||||
def get_test_loss(estimator, X_train, y_train, X_test, y_test, weight_test,
|
||||
eval_metric, obj, labels=None, budget=None, train_loss=False, fit_kwargs={}):
|
||||
def get_test_loss(
|
||||
estimator, X_train, y_train, X_test, y_test, weight_test,
|
||||
eval_metric, obj, labels=None, budget=None, train_loss=False, fit_kwargs={}
|
||||
):
|
||||
start = time.time()
|
||||
train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs)
|
||||
if isinstance(eval_metric, str):
|
||||
test_pred_y = get_y_pred(estimator, X_test, eval_metric, obj)
|
||||
test_loss = sklearn_metric_loss_score(eval_metric, test_pred_y, y_test,
|
||||
labels, weight_test)
|
||||
if train_loss != False:
|
||||
labels, weight_test)
|
||||
if train_loss is not False:
|
||||
test_pred_y = get_y_pred(estimator, X_train, eval_metric, obj)
|
||||
train_loss = sklearn_metric_loss_score(eval_metric, test_pred_y,
|
||||
y_train, labels, fit_kwargs.get('sample_weight'))
|
||||
else: # customized metric function
|
||||
train_loss = sklearn_metric_loss_score(
|
||||
eval_metric, test_pred_y,
|
||||
y_train, labels, fit_kwargs.get('sample_weight'))
|
||||
else: # customized metric function
|
||||
test_loss, train_loss = eval_metric(
|
||||
X_test, y_test, estimator, labels, X_train, y_train,
|
||||
weight_test, fit_kwargs.get('sample_weight'))
|
||||
train_time = time.time()-start
|
||||
train_time = time.time() - start
|
||||
return test_loss, train_time, train_loss
|
||||
|
||||
|
||||
@@ -137,9 +148,11 @@ def train_model(estimator, X_train, y_train, budget, fit_kwargs={}):
|
||||
return train_time
|
||||
|
||||
|
||||
def evaluate_model(estimator, X_train, y_train, X_val, y_val, weight_val,
|
||||
budget, kf, task, eval_method, eval_metric, best_val_loss, train_loss=False,
|
||||
fit_kwargs={}):
|
||||
def evaluate_model(
|
||||
estimator, X_train, y_train, X_val, y_val, weight_val,
|
||||
budget, kf, task, eval_method, eval_metric, best_val_loss, train_loss=False,
|
||||
fit_kwargs={}
|
||||
):
|
||||
if 'holdout' in eval_method:
|
||||
val_loss, train_loss, train_time = evaluate_model_holdout(
|
||||
estimator, X_train, y_train, X_val, y_val, weight_val, budget,
|
||||
@@ -147,33 +160,37 @@ def evaluate_model(estimator, X_train, y_train, X_val, y_val, weight_val,
|
||||
fit_kwargs=fit_kwargs)
|
||||
else:
|
||||
val_loss, train_loss, train_time = evaluate_model_CV(
|
||||
estimator, X_train, y_train, budget, kf, task,
|
||||
estimator, X_train, y_train, budget, kf, task,
|
||||
eval_metric, best_val_loss, train_loss=train_loss,
|
||||
fit_kwargs=fit_kwargs)
|
||||
return val_loss, train_loss, train_time
|
||||
|
||||
|
||||
def evaluate_model_holdout(estimator, X_train, y_train, X_val, y_val,
|
||||
weight_val, budget, task, eval_metric, best_val_loss, train_loss=False,
|
||||
fit_kwargs={}):
|
||||
def evaluate_model_holdout(
|
||||
estimator, X_train, y_train, X_val, y_val,
|
||||
weight_val, budget, task, eval_metric, best_val_loss, train_loss=False,
|
||||
fit_kwargs={}
|
||||
):
|
||||
val_loss, train_time, train_loss = get_test_loss(
|
||||
estimator, X_train, y_train, X_val, y_val, weight_val, eval_metric,
|
||||
task, budget = budget, train_loss=train_loss, fit_kwargs=fit_kwargs)
|
||||
return val_loss, train_loss, train_time
|
||||
task, budget=budget, train_loss=train_loss, fit_kwargs=fit_kwargs)
|
||||
return val_loss, train_loss, train_time
|
||||
|
||||
|
||||
def evaluate_model_CV(estimator, X_train_all, y_train_all, budget, kf,
|
||||
task, eval_metric, best_val_loss, train_loss=False, fit_kwargs={}):
|
||||
def evaluate_model_CV(
|
||||
estimator, X_train_all, y_train_all, budget, kf,
|
||||
task, eval_metric, best_val_loss, train_loss=False, fit_kwargs={}
|
||||
):
|
||||
start_time = time.time()
|
||||
total_val_loss = total_train_loss = 0
|
||||
train_time = 0
|
||||
valid_fold_num = 0
|
||||
n = kf.get_n_splits()
|
||||
X_train_split, y_train_split = X_train_all, y_train_all
|
||||
if task=='regression':
|
||||
if task == 'regression':
|
||||
labels = None
|
||||
else:
|
||||
labels = np.unique(y_train_all)
|
||||
labels = np.unique(y_train_all)
|
||||
|
||||
if isinstance(kf, RepeatedStratifiedKFold):
|
||||
kf = kf.split(X_train_split, y_train_split)
|
||||
@@ -181,7 +198,7 @@ def evaluate_model_CV(estimator, X_train_all, y_train_all, budget, kf,
|
||||
kf = kf.split(X_train_split)
|
||||
rng = np.random.RandomState(2020)
|
||||
val_loss_list = []
|
||||
budget_per_train = budget / (n+1)
|
||||
budget_per_train = budget / (n + 1)
|
||||
if 'sample_weight' in fit_kwargs:
|
||||
weight = fit_kwargs['sample_weight']
|
||||
weight_val = None
|
||||
@@ -207,24 +224,27 @@ def evaluate_model_CV(estimator, X_train_all, y_train_all, budget, kf,
|
||||
train_index], weight[val_index]
|
||||
val_loss_i, train_time_i, train_loss_i = get_test_loss(
|
||||
estimator, X_train, y_train, X_val, y_val, weight_val,
|
||||
eval_metric, task, labels, budget_per_train,
|
||||
eval_metric, task, labels, budget_per_train,
|
||||
train_loss=train_loss, fit_kwargs=fit_kwargs)
|
||||
if weight is not None:
|
||||
fit_kwargs['sample_weight'] = weight
|
||||
valid_fold_num += 1
|
||||
total_val_loss += val_loss_i
|
||||
if train_loss != False:
|
||||
if total_train_loss != 0: total_train_loss += train_loss_i
|
||||
else: total_train_loss = train_loss_i
|
||||
if train_loss is not False:
|
||||
if total_train_loss != 0:
|
||||
total_train_loss += train_loss_i
|
||||
else:
|
||||
total_train_loss = train_loss_i
|
||||
train_time += train_time_i
|
||||
if valid_fold_num == n:
|
||||
val_loss_list.append(total_val_loss/valid_fold_num)
|
||||
val_loss_list.append(total_val_loss / valid_fold_num)
|
||||
total_val_loss = valid_fold_num = 0
|
||||
elif time.time() - start_time >= budget:
|
||||
val_loss_list.append(total_val_loss/valid_fold_num)
|
||||
val_loss_list.append(total_val_loss / valid_fold_num)
|
||||
break
|
||||
val_loss = np.max(val_loss_list)
|
||||
if train_loss != False: train_loss = total_train_loss/n
|
||||
if train_loss is not False:
|
||||
train_loss = total_train_loss / n
|
||||
budget -= time.time() - start_time
|
||||
if val_loss < best_val_loss and budget > budget_per_train:
|
||||
estimator.cleanup()
|
||||
@@ -232,15 +252,17 @@ def evaluate_model_CV(estimator, X_train_all, y_train_all, budget, kf,
|
||||
return val_loss, train_loss, train_time
|
||||
|
||||
|
||||
def compute_estimator(X_train, y_train, X_val, y_val, weight_val, budget, kf,
|
||||
config_dic, task, estimator_name, eval_method, eval_metric,
|
||||
best_val_loss = np.Inf, n_jobs=1, estimator_class=None, train_loss=False,
|
||||
fit_kwargs = {}):
|
||||
def compute_estimator(
|
||||
X_train, y_train, X_val, y_val, weight_val, budget, kf,
|
||||
config_dic, task, estimator_name, eval_method, eval_metric,
|
||||
best_val_loss=np.Inf, n_jobs=1, estimator_class=None, train_loss=False,
|
||||
fit_kwargs={}
|
||||
):
|
||||
start_time = time.time()
|
||||
estimator_class = estimator_class or get_estimator_class(
|
||||
task, estimator_name)
|
||||
estimator = estimator_class(
|
||||
**config_dic, task = task, n_jobs=n_jobs)
|
||||
**config_dic, task=task, n_jobs=n_jobs)
|
||||
val_loss, train_loss, train_time = evaluate_model(
|
||||
estimator, X_train, y_train, X_val, y_val, weight_val, budget, kf, task,
|
||||
eval_method, eval_metric, best_val_loss, train_loss=train_loss,
|
||||
@@ -249,16 +271,17 @@ def compute_estimator(X_train, y_train, X_val, y_val, weight_val, budget, kf,
|
||||
return estimator, val_loss, train_loss, train_time, all_time
|
||||
|
||||
|
||||
def train_estimator(X_train, y_train, config_dic, task,
|
||||
estimator_name, n_jobs=1, estimator_class=None, budget=None, fit_kwargs={}):
|
||||
def train_estimator(
|
||||
X_train, y_train, config_dic, task,
|
||||
estimator_name, n_jobs=1, estimator_class=None, budget=None, fit_kwargs={}
|
||||
):
|
||||
start_time = time.time()
|
||||
estimator_class = estimator_class or get_estimator_class(task,
|
||||
estimator_name)
|
||||
estimator = estimator_class(**config_dic, task = task,
|
||||
n_jobs=n_jobs)
|
||||
estimator_class = estimator_class or get_estimator_class(
|
||||
task, estimator_name)
|
||||
estimator = estimator_class(**config_dic, task=task, n_jobs=n_jobs)
|
||||
if X_train is not None:
|
||||
train_time = train_model(estimator, X_train, y_train, budget,
|
||||
fit_kwargs)
|
||||
train_time = train_model(
|
||||
estimator, X_train, y_train, budget, fit_kwargs)
|
||||
else:
|
||||
estimator = estimator.estimator_class(**estimator.params)
|
||||
train_time = time.time() - start_time
|
||||
|
||||
494
flaml/model.py
@@ -1,6 +1,6 @@
|
||||
'''!
|
||||
* Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License.
|
||||
* Licensed under the MIT License.
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
@@ -24,13 +24,13 @@ class BaseEstimator:
|
||||
Typical example:
|
||||
XGBoostEstimator: for regression
|
||||
XGBoostSklearnEstimator: for classification
|
||||
LGBMEstimator, RandomForestEstimator, LRL1Classifier, LRL2Classifier:
|
||||
for both regression and classification
|
||||
LGBMEstimator, RandomForestEstimator, LRL1Classifier, LRL2Classifier:
|
||||
for both regression and classification
|
||||
'''
|
||||
|
||||
def __init__(self, task = 'binary:logistic', **params):
|
||||
def __init__(self, task='binary:logistic', **params):
|
||||
'''Constructor
|
||||
|
||||
|
||||
Args:
|
||||
task: A string of the task type, one of
|
||||
'binary:logistic', 'multi:softmax', 'regression'
|
||||
@@ -43,8 +43,8 @@ class BaseEstimator:
|
||||
if '_estimator_type' in params:
|
||||
self._estimator_type = params['_estimator_type']
|
||||
else:
|
||||
self._estimator_type = "regressor" if task=='regression' \
|
||||
else "classifier"
|
||||
self._estimator_type = "regressor" if task == 'regression' \
|
||||
else "classifier"
|
||||
|
||||
def get_params(self, deep=False):
|
||||
params = self.params.copy()
|
||||
@@ -58,7 +58,7 @@ class BaseEstimator:
|
||||
return self._model.classes_
|
||||
|
||||
@property
|
||||
def n_features_in_(self):
|
||||
def n_features_in_(self):
|
||||
return self.model.n_features_in_
|
||||
|
||||
@property
|
||||
@@ -70,19 +70,19 @@ class BaseEstimator:
|
||||
def _preprocess(self, X):
|
||||
return X
|
||||
|
||||
def _fit(self, X_train, y_train, **kwargs):
|
||||
def _fit(self, X_train, y_train, **kwargs):
|
||||
|
||||
curent_time = time.time()
|
||||
current_time = time.time()
|
||||
X_train = self._preprocess(X_train)
|
||||
model = self.estimator_class(**self.params)
|
||||
model.fit(X_train, y_train, **kwargs)
|
||||
train_time = time.time() - curent_time
|
||||
train_time = time.time() - current_time
|
||||
self._model = model
|
||||
return train_time
|
||||
|
||||
def fit(self, X_train, y_train, budget=None, **kwargs):
|
||||
'''Train the model from given training data
|
||||
|
||||
|
||||
Args:
|
||||
X_train: A numpy array of training data in shape n*m
|
||||
y_train: A numpy array of labels in shape n*1
|
||||
@@ -95,16 +95,19 @@ class BaseEstimator:
|
||||
|
||||
def predict(self, X_test):
|
||||
'''Predict label from features
|
||||
|
||||
|
||||
Args:
|
||||
X_test: A numpy array of featurized instances, shape n*m
|
||||
|
||||
Returns:
|
||||
A numpy array of shape n*1.
|
||||
A numpy array of shape n*1.
|
||||
Each element is the label for a instance
|
||||
'''
|
||||
X_test = self._preprocess(X_test)
|
||||
return self._model.predict(X_test)
|
||||
'''
|
||||
if self._model is not None:
|
||||
X_test = self._preprocess(X_test)
|
||||
return self._model.predict(X_test)
|
||||
else:
|
||||
return np.ones(X_test.shape[0])
|
||||
|
||||
def predict_proba(self, X_test):
|
||||
'''Predict the probability of each class from features
|
||||
@@ -121,31 +124,31 @@ class BaseEstimator:
|
||||
class j
|
||||
'''
|
||||
if 'regression' in self._task:
|
||||
print('Regression tasks do not support predict_prob')
|
||||
raise ValueError
|
||||
raise ValueError('Regression tasks do not support predict_prob')
|
||||
else:
|
||||
X_test = self._preprocess(X_test)
|
||||
return self._model.predict_proba(X_test)
|
||||
|
||||
def cleanup(self): pass
|
||||
def cleanup(self):
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, **params):
|
||||
def search_space(cls, **params):
|
||||
'''[required method] search space
|
||||
|
||||
Returns:
|
||||
A dictionary of the search space.
|
||||
A dictionary of the search space.
|
||||
Each key is the name of a hyperparameter, and value is a dict with
|
||||
its domain and init_value (optional), cat_hp_cost (optional)
|
||||
e.g.,
|
||||
its domain and init_value (optional), cat_hp_cost (optional)
|
||||
e.g.,
|
||||
{'domain': tune.randint(lower=1, upper=10), 'init_value': 1}
|
||||
'''
|
||||
return {}
|
||||
|
||||
@classmethod
|
||||
def size(cls, config):
|
||||
def size(cls, config):
|
||||
'''[optional method] memory size of the estimator in bytes
|
||||
|
||||
|
||||
Args:
|
||||
config - the dict of the hyperparameter config
|
||||
|
||||
@@ -160,10 +163,14 @@ class BaseEstimator:
|
||||
'''[optional method] relative cost compared to lightgbm'''
|
||||
return 1.0
|
||||
|
||||
@classmethod
|
||||
def init(cls):
|
||||
'''[optional method] initialize the class'''
|
||||
pass
|
||||
|
||||
|
||||
class SKLearnEstimator(BaseEstimator):
|
||||
|
||||
|
||||
def _preprocess(self, X):
|
||||
if isinstance(X, pd.DataFrame):
|
||||
X = X.copy()
|
||||
@@ -174,61 +181,64 @@ class SKLearnEstimator(BaseEstimator):
|
||||
|
||||
class LGBMEstimator(BaseEstimator):
|
||||
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, data_size, **params):
|
||||
upper = min(32768,int(data_size))
|
||||
def search_space(cls, data_size, **params):
|
||||
upper = min(32768, int(data_size))
|
||||
return {
|
||||
'n_estimators': {
|
||||
'domain': tune.qloguniform(lower=4, upper=upper, q=1),
|
||||
'init_value': 4,
|
||||
'low_cost_init_value': 4,
|
||||
},
|
||||
'max_leaves': {
|
||||
'num_leaves': {
|
||||
'domain': tune.qloguniform(lower=4, upper=upper, q=1),
|
||||
'init_value': 4,
|
||||
'low_cost_init_value': 4,
|
||||
},
|
||||
'min_child_weight': {
|
||||
'domain': tune.loguniform(lower=0.001, upper=20.0),
|
||||
'init_value': 20.0,
|
||||
'min_child_samples': {
|
||||
'domain': tune.qloguniform(lower=2, upper=2**7, q=1),
|
||||
'init_value': 20,
|
||||
},
|
||||
'learning_rate': {
|
||||
'domain': tune.loguniform(lower=0.01, upper=1.0),
|
||||
'domain': tune.loguniform(lower=1 / 1024, upper=1.0),
|
||||
'init_value': 0.1,
|
||||
},
|
||||
'subsample': {
|
||||
'domain': tune.uniform(lower=0.6, upper=1.0),
|
||||
'domain': tune.uniform(lower=0.1, upper=1.0),
|
||||
'init_value': 1.0,
|
||||
},
|
||||
},
|
||||
'log_max_bin': {
|
||||
'domain': tune.qloguniform(lower=3, upper=10, q=1),
|
||||
'init_value': 8,
|
||||
},
|
||||
},
|
||||
'colsample_bytree': {
|
||||
'domain': tune.uniform(lower=0.7, upper=1.0),
|
||||
'domain': tune.uniform(lower=0.01, upper=1.0),
|
||||
'init_value': 1.0,
|
||||
},
|
||||
},
|
||||
'reg_alpha': {
|
||||
'domain': tune.loguniform(lower=1e-10, upper=1.0),
|
||||
'init_value': 1e-10,
|
||||
},
|
||||
'domain': tune.loguniform(lower=1 / 1024, upper=1024),
|
||||
'init_value': 1 / 1024,
|
||||
},
|
||||
'reg_lambda': {
|
||||
'domain': tune.loguniform(lower=1e-10, upper=1.0),
|
||||
'domain': tune.loguniform(lower=1 / 1024, upper=1024),
|
||||
'init_value': 1.0,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def size(cls, config):
|
||||
max_leaves = int(round(config['max_leaves']))
|
||||
num_leaves = int(round(config.get('num_leaves') or config['max_leaves']))
|
||||
n_estimators = int(round(config['n_estimators']))
|
||||
return (max_leaves*3 + (max_leaves-1)*4 + 1.0)*n_estimators*8
|
||||
return (num_leaves * 3 + (num_leaves - 1) * 4 + 1.0) * n_estimators * 8
|
||||
|
||||
def __init__(self, task='binary:logistic', n_jobs=1,
|
||||
n_estimators=2, max_leaves=2, min_child_weight=1e-3, learning_rate=0.1,
|
||||
subsample=1.0, reg_lambda=1.0, reg_alpha=0.0, colsample_bylevel=1.0,
|
||||
colsample_bytree=1.0, log_max_bin=8, **params):
|
||||
def __init__(
|
||||
self, task='binary:logistic', n_jobs=1,
|
||||
n_estimators=2, num_leaves=2, min_child_samples=20, learning_rate=0.1,
|
||||
subsample=1.0, reg_lambda=1.0, reg_alpha=0.0,
|
||||
colsample_bytree=1.0, log_max_bin=8, **params
|
||||
):
|
||||
super().__init__(task, **params)
|
||||
# Default: ‘regression’ for LGBMRegressor,
|
||||
# Default: ‘regression’ for LGBMRegressor,
|
||||
# ‘binary’ or ‘multiclass’ for LGBMClassifier
|
||||
if 'regression' in task:
|
||||
objective = 'regression'
|
||||
@@ -236,24 +246,22 @@ class LGBMEstimator(BaseEstimator):
|
||||
objective = 'binary'
|
||||
elif 'multi' in task:
|
||||
objective = 'multiclass'
|
||||
else: objective = 'regression'
|
||||
else:
|
||||
objective = 'regression'
|
||||
self.params = {
|
||||
"n_estimators": int(round(n_estimators)),
|
||||
"num_leaves": params[
|
||||
'num_leaves'] if 'num_leaves' in params else int(
|
||||
round(max_leaves)),
|
||||
'objective': params[
|
||||
"objective"] if "objective" in params else objective,
|
||||
"num_leaves": int(round(num_leaves)),
|
||||
'objective': params.get("objective", objective),
|
||||
'n_jobs': n_jobs,
|
||||
'learning_rate': float(learning_rate),
|
||||
'reg_alpha': float(reg_alpha),
|
||||
'reg_lambda': float(reg_lambda),
|
||||
'min_child_weight': float(min_child_weight),
|
||||
'colsample_bytree':float(colsample_bytree),
|
||||
'min_child_samples': int(round(min_child_samples)),
|
||||
'colsample_bytree': float(colsample_bytree),
|
||||
'subsample': float(subsample),
|
||||
}
|
||||
self.params['max_bin'] = params['max_bin'] if 'max_bin' in params else (
|
||||
1<<int(round(log_max_bin)))-1
|
||||
1 << int(round(log_max_bin))) - 1
|
||||
if 'regression' in task:
|
||||
self.estimator_class = LGBMRegressor
|
||||
else:
|
||||
@@ -262,33 +270,35 @@ class LGBMEstimator(BaseEstimator):
|
||||
self._train_size = 0
|
||||
|
||||
def _preprocess(self, X):
|
||||
if not isinstance(X, pd.DataFrame) and issparse(
|
||||
X) and np.issubdtype(X.dtype, np.integer):
|
||||
if not isinstance(X, pd.DataFrame) and issparse(X) and np.issubdtype(
|
||||
X.dtype, np.integer):
|
||||
X = X.astype(float)
|
||||
return X
|
||||
|
||||
def fit(self, X_train, y_train, budget=None, **kwargs):
|
||||
start_time = time.time()
|
||||
n_iter = self.params["n_estimators"]
|
||||
if (not self._time_per_iter or
|
||||
abs(self._train_size-X_train.shape[0])>4) and budget is not None:
|
||||
if (not self._time_per_iter or abs(
|
||||
self._train_size - X_train.shape[0]) > 4) and budget is not None:
|
||||
self.params["n_estimators"] = 1
|
||||
self._t1 = self._fit(X_train, y_train, **kwargs)
|
||||
if self._t1 >= budget:
|
||||
if self._t1 >= budget:
|
||||
self.params["n_estimators"] = n_iter
|
||||
return self._t1
|
||||
self.params["n_estimators"] = 4
|
||||
self._t2 = self._fit(X_train, y_train, **kwargs)
|
||||
self._time_per_iter = (self._t2 - self._t1)/(
|
||||
self.params["n_estimators"]-1) if self._t2 > self._t1 \
|
||||
self._time_per_iter = (self._t2 - self._t1) / (
|
||||
self.params["n_estimators"] - 1) if self._t2 > self._t1 \
|
||||
else self._t1 if self._t1 else 0.001
|
||||
self._train_size = X_train.shape[0]
|
||||
if self._t1+self._t2>=budget or n_iter==self.params["n_estimators"]:
|
||||
if self._t1 + self._t2 >= budget or n_iter == self.params[
|
||||
"n_estimators"]:
|
||||
self.params["n_estimators"] = n_iter
|
||||
return time.time() - start_time
|
||||
if budget is not None:
|
||||
self.params["n_estimators"] = min(n_iter, int((budget-time.time()+
|
||||
start_time-self._t1)/self._time_per_iter+1))
|
||||
self.params["n_estimators"] = min(n_iter, int(
|
||||
(budget - time.time() + start_time - self._t1)
|
||||
/ self._time_per_iter + 1))
|
||||
if self.params["n_estimators"] > 0:
|
||||
self._fit(X_train, y_train, **kwargs)
|
||||
self.params["n_estimators"] = n_iter
|
||||
@@ -299,49 +309,50 @@ class LGBMEstimator(BaseEstimator):
|
||||
class XGBoostEstimator(SKLearnEstimator):
|
||||
''' not using sklearn API, used for regression '''
|
||||
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, data_size, **params):
|
||||
upper = min(32768,int(data_size))
|
||||
def search_space(cls, data_size, **params):
|
||||
upper = min(32768, int(data_size))
|
||||
return {
|
||||
'n_estimators': {
|
||||
'domain': tune.qloguniform(lower=4, upper=upper, q=1),
|
||||
'init_value': 4,
|
||||
'low_cost_init_value': 4,
|
||||
},
|
||||
'max_leaves': {
|
||||
'domain': tune.qloguniform(lower=4, upper=upper, q=1),
|
||||
'init_value': 4,
|
||||
'low_cost_init_value': 4,
|
||||
},
|
||||
'min_child_weight': {
|
||||
'domain': tune.loguniform(lower=0.001, upper=20.0),
|
||||
'init_value': 20.0,
|
||||
'domain': tune.loguniform(lower=0.001, upper=128),
|
||||
'init_value': 1,
|
||||
},
|
||||
'learning_rate': {
|
||||
'domain': tune.loguniform(lower=0.01, upper=1.0),
|
||||
'domain': tune.loguniform(lower=1 / 1024, upper=1.0),
|
||||
'init_value': 0.1,
|
||||
},
|
||||
'subsample': {
|
||||
'domain': tune.uniform(lower=0.6, upper=1.0),
|
||||
'domain': tune.uniform(lower=0.1, upper=1.0),
|
||||
'init_value': 1.0,
|
||||
},
|
||||
},
|
||||
'colsample_bylevel': {
|
||||
'domain': tune.uniform(lower=0.6, upper=1.0),
|
||||
'domain': tune.uniform(lower=0.01, upper=1.0),
|
||||
'init_value': 1.0,
|
||||
},
|
||||
},
|
||||
'colsample_bytree': {
|
||||
'domain': tune.uniform(lower=0.7, upper=1.0),
|
||||
'domain': tune.uniform(lower=0.01, upper=1.0),
|
||||
'init_value': 1.0,
|
||||
},
|
||||
},
|
||||
'reg_alpha': {
|
||||
'domain': tune.loguniform(lower=1e-10, upper=1.0),
|
||||
'init_value': 1e-10,
|
||||
},
|
||||
'domain': tune.loguniform(lower=1 / 1024, upper=1024),
|
||||
'init_value': 1 / 1024,
|
||||
},
|
||||
'reg_lambda': {
|
||||
'domain': tune.loguniform(lower=1e-10, upper=1.0),
|
||||
'domain': tune.loguniform(lower=1 / 1024, upper=1024),
|
||||
'init_value': 1.0,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@classmethod
|
||||
def size(cls, config):
|
||||
return LGBMEstimator.size(config)
|
||||
@@ -350,30 +361,31 @@ class XGBoostEstimator(SKLearnEstimator):
|
||||
def cost_relative2lgbm(cls):
|
||||
return 1.6
|
||||
|
||||
def __init__(self, task='regression', all_thread=False, n_jobs=1,
|
||||
n_estimators=4, max_leaves=4, subsample=1.0, min_child_weight=1,
|
||||
def __init__(
|
||||
self, task='regression', all_thread=False, n_jobs=1,
|
||||
n_estimators=4, max_leaves=4, subsample=1.0, min_child_weight=1,
|
||||
learning_rate=0.1, reg_lambda=1.0, reg_alpha=0.0, colsample_bylevel=1.0,
|
||||
colsample_bytree=1.0, tree_method='auto', **params):
|
||||
colsample_bytree=1.0, tree_method='auto', **params
|
||||
):
|
||||
super().__init__(task, **params)
|
||||
self._n_estimators = int(round(n_estimators))
|
||||
self._max_leaves = int(round(max_leaves))
|
||||
self.params = {
|
||||
'max_leaves': int(round(max_leaves)),
|
||||
'max_depth': 0,
|
||||
'grow_policy': params[
|
||||
"grow_policy"] if "grow_policy" in params else 'lossguide',
|
||||
'tree_method':tree_method,
|
||||
'verbosity': 0,
|
||||
'nthread':n_jobs,
|
||||
'max_depth': params.get('max_depth', 0),
|
||||
'grow_policy': params.get("grow_policy", 'lossguide'),
|
||||
'tree_method': tree_method,
|
||||
'verbosity': params.get('verbosity', 0),
|
||||
'nthread': n_jobs,
|
||||
'learning_rate': float(learning_rate),
|
||||
'subsample': float(subsample),
|
||||
'reg_alpha': float(reg_alpha),
|
||||
'reg_lambda': float(reg_lambda),
|
||||
'min_child_weight': float(min_child_weight),
|
||||
'booster': params['booster'] if 'booster' in params else 'gbtree',
|
||||
'booster': params.get('booster', 'gbtree'),
|
||||
'colsample_bylevel': float(colsample_bylevel),
|
||||
'colsample_bytree':float(colsample_bytree),
|
||||
}
|
||||
'colsample_bytree': float(colsample_bytree),
|
||||
'objective': params.get("objective")
|
||||
}
|
||||
if all_thread:
|
||||
del self.params['nthread']
|
||||
|
||||
@@ -383,22 +395,29 @@ class XGBoostEstimator(SKLearnEstimator):
|
||||
return params
|
||||
|
||||
def fit(self, X_train, y_train, budget=None, **kwargs):
|
||||
start_time = time.time()
|
||||
start_time = time.time()
|
||||
if not issparse(X_train):
|
||||
self.params['tree_method'] = 'hist'
|
||||
X_train = self._preprocess(X_train)
|
||||
dtrain = xgb.DMatrix(X_train, label=y_train)
|
||||
if self._max_leaves>0:
|
||||
if 'sample_weight' in kwargs:
|
||||
self._model = xgb.train(self.params, dtrain,
|
||||
self._n_estimators, weight=kwargs['sample_weight'])
|
||||
else:
|
||||
self._model = xgb.train(self.params, dtrain, self._n_estimators)
|
||||
del dtrain
|
||||
train_time = time.time() - start_time
|
||||
return train_time
|
||||
if 'sample_weight' in kwargs:
|
||||
dtrain = xgb.DMatrix(X_train, label=y_train, weight=kwargs[
|
||||
'sample_weight'])
|
||||
else:
|
||||
return None
|
||||
dtrain = xgb.DMatrix(X_train, label=y_train)
|
||||
|
||||
objective = self.params.get('objective')
|
||||
if isinstance(objective, str):
|
||||
obj = None
|
||||
else:
|
||||
obj = objective
|
||||
if 'objective' in self.params:
|
||||
del self.params['objective']
|
||||
self._model = xgb.train(self.params, dtrain, self._n_estimators,
|
||||
obj=obj)
|
||||
self.params['objective'] = objective
|
||||
del dtrain
|
||||
train_time = time.time() - start_time
|
||||
return train_time
|
||||
|
||||
def predict(self, X_test):
|
||||
if not issparse(X_test):
|
||||
@@ -410,38 +429,38 @@ class XGBoostEstimator(SKLearnEstimator):
|
||||
class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
|
||||
''' using sklearn API, used for classification '''
|
||||
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, data_size, **params):
|
||||
def search_space(cls, data_size, **params):
|
||||
return XGBoostEstimator.search_space(data_size)
|
||||
|
||||
@classmethod
|
||||
def cost_relative2lgbm(cls):
|
||||
return XGBoostEstimator.cost_relative2lgbm()
|
||||
|
||||
def __init__(self, task='binary:logistic', n_jobs=1,
|
||||
n_estimators=4, max_leaves=4, subsample=1.0,
|
||||
def __init__(
|
||||
self, task='binary:logistic', n_jobs=1,
|
||||
n_estimators=4, max_leaves=4, subsample=1.0,
|
||||
min_child_weight=1, learning_rate=0.1, reg_lambda=1.0, reg_alpha=0.0,
|
||||
colsample_bylevel=1.0, colsample_bytree=1.0, tree_method='hist',
|
||||
**params):
|
||||
colsample_bylevel=1.0, colsample_bytree=1.0, tree_method='hist',
|
||||
**params
|
||||
):
|
||||
super().__init__(task, **params)
|
||||
self.params = {
|
||||
"n_estimators": int(round(n_estimators)),
|
||||
'max_leaves': int(round(max_leaves)),
|
||||
'max_depth': 0,
|
||||
'grow_policy': params[
|
||||
"grow_policy"] if "grow_policy" in params else 'lossguide',
|
||||
'tree_method':tree_method,
|
||||
'verbosity': 0,
|
||||
'n_jobs': n_jobs,
|
||||
'learning_rate': float(learning_rate),
|
||||
'subsample': float(subsample),
|
||||
'reg_alpha': float(reg_alpha),
|
||||
'reg_lambda': float(reg_lambda),
|
||||
'min_child_weight': float(min_child_weight),
|
||||
'booster': params['booster'] if 'booster' in params else 'gbtree',
|
||||
'colsample_bylevel': float(colsample_bylevel),
|
||||
'colsample_bytree': float(colsample_bytree),
|
||||
"n_estimators": int(round(n_estimators)),
|
||||
'max_leaves': int(round(max_leaves)),
|
||||
'max_depth': 0,
|
||||
'grow_policy': params.get("grow_policy", 'lossguide'),
|
||||
'tree_method': tree_method,
|
||||
'verbosity': 0,
|
||||
'n_jobs': n_jobs,
|
||||
'learning_rate': float(learning_rate),
|
||||
'subsample': float(subsample),
|
||||
'reg_alpha': float(reg_alpha),
|
||||
'reg_lambda': float(reg_lambda),
|
||||
'min_child_weight': float(min_child_weight),
|
||||
'booster': params.get('booster', 'gbtree'),
|
||||
'colsample_bylevel': float(colsample_bylevel),
|
||||
'colsample_bytree': float(colsample_bytree),
|
||||
}
|
||||
|
||||
if 'regression' in task:
|
||||
@@ -455,18 +474,18 @@ class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
|
||||
if issparse(X_train):
|
||||
self.params['tree_method'] = 'auto'
|
||||
return super().fit(X_train, y_train, budget, **kwargs)
|
||||
|
||||
|
||||
|
||||
class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
|
||||
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, data_size, task, **params):
|
||||
def search_space(cls, data_size, task, **params):
|
||||
upper = min(2048, int(data_size))
|
||||
space = {
|
||||
'n_estimators': {
|
||||
'domain': tune.qloguniform(lower=4, upper=upper, q=1),
|
||||
'init_value': 4,
|
||||
'low_cost_init_value': 4,
|
||||
},
|
||||
'max_features': {
|
||||
'domain': tune.loguniform(lower=0.1, upper=1.0),
|
||||
@@ -488,13 +507,15 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
|
||||
def cost_relative2lgbm(cls):
|
||||
return 2.0
|
||||
|
||||
def __init__(self, task = 'binary:logistic', n_jobs = 1,
|
||||
n_estimators = 4, max_features = 1.0, criterion = 'gini', **params):
|
||||
def __init__(
|
||||
self, task='binary:logistic', n_jobs=1,
|
||||
n_estimators=4, max_features=1.0, criterion='gini', **params
|
||||
):
|
||||
super().__init__(task, **params)
|
||||
self.params = {
|
||||
"n_estimators": int(round(n_estimators)),
|
||||
"n_jobs": n_jobs,
|
||||
'max_features': float(max_features),
|
||||
"n_estimators": int(round(n_estimators)),
|
||||
"n_jobs": n_jobs,
|
||||
'max_features': float(max_features),
|
||||
}
|
||||
if 'regression' in task:
|
||||
self.estimator_class = RandomForestRegressor
|
||||
@@ -511,12 +532,11 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
|
||||
|
||||
class ExtraTreeEstimator(RandomForestEstimator):
|
||||
|
||||
|
||||
@classmethod
|
||||
def cost_relative2lgbm(cls):
|
||||
return 1.9
|
||||
|
||||
def __init__(self, task = 'binary:logistic', **params):
|
||||
def __init__(self, task='binary:logistic', **params):
|
||||
super().__init__(task, **params)
|
||||
if 'regression' in task:
|
||||
self.estimator_class = ExtraTreesRegressor
|
||||
@@ -526,9 +546,8 @@ class ExtraTreeEstimator(RandomForestEstimator):
|
||||
|
||||
class LRL1Classifier(SKLearnEstimator):
|
||||
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, **params):
|
||||
def search_space(cls, **params):
|
||||
return {
|
||||
'C': {
|
||||
'domain': tune.loguniform(lower=0.03125, upper=32768.0),
|
||||
@@ -540,66 +559,67 @@ class LRL1Classifier(SKLearnEstimator):
|
||||
def cost_relative2lgbm(cls):
|
||||
return 160
|
||||
|
||||
def __init__(self, task='binary:logistic', n_jobs=1, tol=0.0001, C=1.0,
|
||||
**params):
|
||||
def __init__(
|
||||
self, task='binary:logistic', n_jobs=1, tol=0.0001, C=1.0,
|
||||
**params
|
||||
):
|
||||
super().__init__(task, **params)
|
||||
self.params = {
|
||||
'penalty': 'l1',
|
||||
'penalty': params.get("penalty", 'l1'),
|
||||
'tol': float(tol),
|
||||
'C': float(C),
|
||||
'solver': 'saga',
|
||||
'solver': params.get("solver", 'saga'),
|
||||
'n_jobs': n_jobs,
|
||||
}
|
||||
if 'regression' in task:
|
||||
self.estimator_class = None
|
||||
print('LR does not support regression task')
|
||||
raise NotImplementedError
|
||||
raise NotImplementedError('LR does not support regression task')
|
||||
else:
|
||||
self.estimator_class = LogisticRegression
|
||||
|
||||
|
||||
class LRL2Classifier(SKLearnEstimator):
|
||||
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, **params):
|
||||
def search_space(cls, **params):
|
||||
return LRL1Classifier.search_space(**params)
|
||||
|
||||
@classmethod
|
||||
def cost_relative2lgbm(cls):
|
||||
return 25
|
||||
|
||||
def __init__(self, task='binary:logistic', n_jobs=1, tol=0.0001, C=1.0,
|
||||
**params):
|
||||
def __init__(
|
||||
self, task='binary:logistic', n_jobs=1, tol=0.0001, C=1.0,
|
||||
**params
|
||||
):
|
||||
super().__init__(task, **params)
|
||||
self.params = {
|
||||
'penalty': 'l2',
|
||||
'penalty': params.get("penalty", 'l2'),
|
||||
'tol': float(tol),
|
||||
'C': float(C),
|
||||
'solver': 'lbfgs',
|
||||
'solver': params.get("solver", 'lbfgs'),
|
||||
'n_jobs': n_jobs,
|
||||
}
|
||||
if 'regression' in task:
|
||||
self.estimator_class = None
|
||||
print('LR does not support regression task')
|
||||
raise NotImplementedError
|
||||
raise NotImplementedError('LR does not support regression task')
|
||||
else:
|
||||
self.estimator_class = LogisticRegression
|
||||
|
||||
|
||||
class CatBoostEstimator(BaseEstimator):
|
||||
|
||||
|
||||
_time_per_iter = None
|
||||
_train_size = 0
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, data_size, **params):
|
||||
upper = max(min(round(1500000/data_size),150), 11)
|
||||
def search_space(cls, data_size, **params):
|
||||
upper = max(min(round(1500000 / data_size), 150), 11)
|
||||
return {
|
||||
'early_stopping_rounds': {
|
||||
'domain': tune.qloguniform(lower=10, upper=upper, q=1),
|
||||
'init_value': 10,
|
||||
'low_cost_init_value': 10,
|
||||
},
|
||||
'learning_rate': {
|
||||
'domain': tune.loguniform(lower=.005, upper=.2),
|
||||
@@ -611,23 +631,29 @@ class CatBoostEstimator(BaseEstimator):
|
||||
def size(cls, config):
|
||||
n_estimators = 8192
|
||||
max_leaves = 64
|
||||
return (max_leaves*3 + (max_leaves-1)*4 + 1.0)*n_estimators*8
|
||||
return (max_leaves * 3 + (max_leaves - 1) * 4 + 1.0) * n_estimators * 8
|
||||
|
||||
@classmethod
|
||||
def cost_relative2lgbm(cls):
|
||||
return 15
|
||||
|
||||
def __init__(self, task = 'binary:logistic', n_jobs=1,
|
||||
n_estimators=8192, learning_rate=0.1, early_stopping_rounds=4, **params):
|
||||
@classmethod
|
||||
def init(cls):
|
||||
CatBoostEstimator._time_per_iter = None
|
||||
CatBoostEstimator._train_size = 0
|
||||
|
||||
def __init__(
|
||||
self, task='binary:logistic', n_jobs=1,
|
||||
n_estimators=8192, learning_rate=0.1, early_stopping_rounds=4, **params
|
||||
):
|
||||
super().__init__(task, **params)
|
||||
self.params = {
|
||||
"early_stopping_rounds": int(round(early_stopping_rounds)),
|
||||
"n_estimators": n_estimators,
|
||||
"n_estimators": n_estimators,
|
||||
'learning_rate': learning_rate,
|
||||
'thread_count': n_jobs,
|
||||
'verbose': False,
|
||||
'random_seed': params[
|
||||
"random_seed"] if "random_seed" in params else 10242048,
|
||||
'verbose': params.get('verbose', False),
|
||||
'random_seed': params.get("random_seed", 10242048),
|
||||
}
|
||||
if 'regression' in task:
|
||||
from catboost import CatBoostRegressor
|
||||
@@ -649,69 +675,78 @@ class CatBoostEstimator(BaseEstimator):
|
||||
include='category').columns)
|
||||
else:
|
||||
cat_features = []
|
||||
if (not CatBoostEstimator._time_per_iter or
|
||||
abs(CatBoostEstimator._train_size-len(y_train))>4) and budget:
|
||||
# measure the time per iteration
|
||||
self.params["n_estimators"] = 1
|
||||
CatBoostEstimator._smallmodel = self.estimator_class(**self.params)
|
||||
CatBoostEstimator._smallmodel.fit(X_train, y_train,
|
||||
cat_features=cat_features, **kwargs)
|
||||
CatBoostEstimator._t1 = time.time() - start_time
|
||||
if CatBoostEstimator._t1 >= budget:
|
||||
self.params["n_estimators"] = n_iter
|
||||
from catboost import CatBoostError
|
||||
try:
|
||||
if (not CatBoostEstimator._time_per_iter or abs(
|
||||
CatBoostEstimator._train_size - len(y_train)) > 4) and budget:
|
||||
# measure the time per iteration
|
||||
self.params["n_estimators"] = 1
|
||||
CatBoostEstimator._smallmodel = self.estimator_class(**self.params)
|
||||
CatBoostEstimator._smallmodel.fit(
|
||||
X_train, y_train, cat_features=cat_features, **kwargs)
|
||||
CatBoostEstimator._t1 = time.time() - start_time
|
||||
if CatBoostEstimator._t1 >= budget:
|
||||
self.params["n_estimators"] = n_iter
|
||||
self._model = CatBoostEstimator._smallmodel
|
||||
return CatBoostEstimator._t1
|
||||
self.params["n_estimators"] = 4
|
||||
CatBoostEstimator._smallmodel = self.estimator_class(**self.params)
|
||||
CatBoostEstimator._smallmodel.fit(
|
||||
X_train, y_train, cat_features=cat_features, **kwargs)
|
||||
CatBoostEstimator._time_per_iter = (
|
||||
time.time() - start_time - CatBoostEstimator._t1) / (
|
||||
self.params["n_estimators"] - 1)
|
||||
if CatBoostEstimator._time_per_iter <= 0:
|
||||
CatBoostEstimator._time_per_iter = CatBoostEstimator._t1
|
||||
CatBoostEstimator._train_size = len(y_train)
|
||||
if time.time() - start_time >= budget or n_iter == self.params[
|
||||
"n_estimators"]:
|
||||
self.params["n_estimators"] = n_iter
|
||||
self._model = CatBoostEstimator._smallmodel
|
||||
return time.time() - start_time
|
||||
if budget:
|
||||
train_times = 1
|
||||
self.params["n_estimators"] = min(n_iter, int(
|
||||
(budget - time.time() + start_time - CatBoostEstimator._t1)
|
||||
/ train_times / CatBoostEstimator._time_per_iter + 1))
|
||||
self._model = CatBoostEstimator._smallmodel
|
||||
return CatBoostEstimator._t1
|
||||
self.params["n_estimators"] = 4
|
||||
CatBoostEstimator._smallmodel = self.estimator_class(**self.params)
|
||||
CatBoostEstimator._smallmodel.fit(X_train, y_train,
|
||||
cat_features=cat_features, **kwargs)
|
||||
CatBoostEstimator._time_per_iter = (time.time() - start_time -
|
||||
CatBoostEstimator._t1)/(self.params["n_estimators"]-1)
|
||||
if CatBoostEstimator._time_per_iter <= 0:
|
||||
CatBoostEstimator._time_per_iter = CatBoostEstimator._t1
|
||||
CatBoostEstimator._train_size = len(y_train)
|
||||
if time.time()-start_time>=budget or n_iter==self.params[
|
||||
"n_estimators"]:
|
||||
self.params["n_estimators"] = n_iter
|
||||
self._model = CatBoostEstimator._smallmodel
|
||||
return time.time()-start_time
|
||||
if budget:
|
||||
train_times = 1
|
||||
self.params["n_estimators"] = min(n_iter, int((budget-time.time()+
|
||||
start_time-CatBoostEstimator._t1)/train_times/
|
||||
CatBoostEstimator._time_per_iter+1))
|
||||
self._model = CatBoostEstimator._smallmodel
|
||||
if self.params["n_estimators"] > 0:
|
||||
l = max(int(len(y_train)*0.9), len(y_train)-1000)
|
||||
X_tr, y_tr = X_train[:l], y_train[:l]
|
||||
if 'sample_weight' in kwargs:
|
||||
weight = kwargs['sample_weight']
|
||||
if weight is not None: kwargs['sample_weight'] = weight[:l]
|
||||
else: weight = None
|
||||
from catboost import Pool
|
||||
model = self.estimator_class(**self.params)
|
||||
model.fit(X_tr, y_tr, cat_features=cat_features, eval_set=Pool(
|
||||
data=X_train[l:], label=y_train[l:], cat_features=cat_features),
|
||||
**kwargs)
|
||||
if weight is not None: kwargs['sample_weight'] = weight
|
||||
# print(self.params["n_estimators"], model.get_best_iteration())
|
||||
self._model = model
|
||||
if self.params["n_estimators"] > 0:
|
||||
n = max(int(len(y_train) * 0.9), len(y_train) - 1000)
|
||||
X_tr, y_tr = X_train[:n], y_train[:n]
|
||||
if 'sample_weight' in kwargs:
|
||||
weight = kwargs['sample_weight']
|
||||
if weight is not None:
|
||||
kwargs['sample_weight'] = weight[:n]
|
||||
else:
|
||||
weight = None
|
||||
from catboost import Pool
|
||||
model = self.estimator_class(**self.params)
|
||||
model.fit(
|
||||
X_tr, y_tr, cat_features=cat_features,
|
||||
eval_set=Pool(
|
||||
data=X_train[n:], label=y_train[n:],
|
||||
cat_features=cat_features),
|
||||
**kwargs) # model.get_best_iteration()
|
||||
if weight is not None:
|
||||
kwargs['sample_weight'] = weight
|
||||
self._model = model
|
||||
except CatBoostError:
|
||||
self._model = None
|
||||
self.params["n_estimators"] = n_iter
|
||||
train_time = time.time() - start_time
|
||||
# print(budget, train_time)
|
||||
return train_time
|
||||
|
||||
|
||||
class KNeighborsEstimator(BaseEstimator):
|
||||
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, data_size, **params):
|
||||
upper = min(512, int(data_size/2))
|
||||
def search_space(cls, data_size, **params):
|
||||
upper = min(512, int(data_size / 2))
|
||||
return {
|
||||
'n_neighbors': {
|
||||
'domain': tune.qloguniform(lower=1, upper=upper, q=1),
|
||||
'init_value': 5,
|
||||
'low_cost_init_value': 1,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -719,12 +754,13 @@ class KNeighborsEstimator(BaseEstimator):
|
||||
def cost_relative2lgbm(cls):
|
||||
return 30
|
||||
|
||||
def __init__(self, task='binary:logistic', n_jobs=1,
|
||||
n_neighbors=5, **params):
|
||||
def __init__(
|
||||
self, task='binary:logistic', n_jobs=1, n_neighbors=5, **params
|
||||
):
|
||||
super().__init__(task, **params)
|
||||
self.params= {
|
||||
self.params = {
|
||||
'n_neighbors': int(round(n_neighbors)),
|
||||
'weights': 'distance',
|
||||
'weights': params.get('weights', 'distance'),
|
||||
'n_jobs': n_jobs,
|
||||
}
|
||||
if 'regression' in task:
|
||||
@@ -737,10 +773,8 @@ class KNeighborsEstimator(BaseEstimator):
|
||||
def _preprocess(self, X):
|
||||
if isinstance(X, pd.DataFrame):
|
||||
cat_columns = X.select_dtypes(['category']).columns
|
||||
# print(X.dtypes)
|
||||
# print(cat_columns)
|
||||
if X.shape[1] == len(cat_columns):
|
||||
raise ValueError(
|
||||
"kneighbor requires at least one numeric feature")
|
||||
X = X.drop(cat_columns, axis=1)
|
||||
"kneighbor requires at least one numeric feature")
|
||||
X = X.drop(cat_columns, axis=1)
|
||||
return X
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
from .blendsearch import CFO, BlendSearch
|
||||
from .flow2 import FLOW2
|
||||
from .blendsearch import CFO, BlendSearch, BlendSearchTuner
|
||||
from .flow2 import FLOW2
|
||||
|
||||
@@ -3,10 +3,11 @@
|
||||
* Licensed under the MIT License. See LICENSE file in the
|
||||
* project root for license information.
|
||||
'''
|
||||
from typing import Dict, Optional, List, Tuple
|
||||
from typing import Dict, Optional, List, Tuple, Callable
|
||||
import numpy as np
|
||||
import time
|
||||
import pickle
|
||||
|
||||
try:
|
||||
from ray.tune.suggest import Searcher
|
||||
from ray.tune.suggest.optuna import OptunaSearch as GlobalSearch
|
||||
@@ -25,19 +26,27 @@ class BlendSearch(Searcher):
|
||||
'''class for BlendSearch algorithm
|
||||
'''
|
||||
|
||||
cost_attr = "time_total_s" # cost attribute in result
|
||||
lagrange = '_lagrange' # suffix for lagrange-modified metric
|
||||
penalty = 1e+10 # penalty term for constraints
|
||||
|
||||
def __init__(self,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = None,
|
||||
space: Optional[dict] = None,
|
||||
points_to_evaluate: Optional[List[Dict]] = None,
|
||||
points_to_evaluate: Optional[List[dict]] = None,
|
||||
low_cost_partial_config: Optional[dict] = None,
|
||||
cat_hp_cost: Optional[dict] = None,
|
||||
prune_attr: Optional[str] = None,
|
||||
min_resource: Optional[float] = None,
|
||||
max_resource: Optional[float] = None,
|
||||
reduction_factor: Optional[float] = None,
|
||||
resources_per_trial: Optional[dict] = None,
|
||||
global_search_alg: Optional[Searcher] = None,
|
||||
mem_size = None):
|
||||
config_constraints: Optional[
|
||||
List[Tuple[Callable[[dict], float], str, float]]] = None,
|
||||
metric_constraints: Optional[
|
||||
List[Tuple[str, str, float]]] = None,
|
||||
seed: Optional[int] = 20):
|
||||
'''Constructor
|
||||
|
||||
Args:
|
||||
@@ -45,65 +54,76 @@ class BlendSearch(Searcher):
|
||||
minimization or maximization.
|
||||
mode: A string in ['min', 'max'] to specify the objective as
|
||||
space: A dictionary to specify the search space.
|
||||
points_to_evaluate: Initial parameter suggestions to be run first.
|
||||
The first element needs to be a dictionary from a subset of
|
||||
controlled dimensions to the initial low-cost values.
|
||||
points_to_evaluate: Initial parameter suggestions to be run first.
|
||||
low_cost_partial_config: A dictionary from a subset of
|
||||
controlled dimensions to the initial low-cost values.
|
||||
e.g.,
|
||||
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
[{'epochs': 1}]
|
||||
|
||||
{'n_estimators': 4, 'max_leaves': 4}
|
||||
|
||||
cat_hp_cost: A dictionary from a subset of categorical dimensions
|
||||
to the relative cost of each choice.
|
||||
to the relative cost of each choice.
|
||||
e.g.,
|
||||
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{'tree_method': [1, 1, 2]}
|
||||
|
||||
i.e., the relative cost of the
|
||||
|
||||
i.e., the relative cost of the
|
||||
three choices of 'tree_method' is 1, 1 and 2 respectively.
|
||||
prune_attr: A string of the attribute used for pruning.
|
||||
prune_attr: A string of the attribute used for pruning.
|
||||
Not necessarily in space.
|
||||
When prune_attr is in space, it is a hyperparameter, e.g.,
|
||||
When prune_attr is in space, it is a hyperparameter, e.g.,
|
||||
'n_iters', and the best value is unknown.
|
||||
When prune_attr is not in space, it is a resource dimension,
|
||||
When prune_attr is not in space, it is a resource dimension,
|
||||
e.g., 'sample_size', and the peak performance is assumed
|
||||
to be at the max_resource.
|
||||
min_resource: A float of the minimal resource to use for the
|
||||
min_resource: A float of the minimal resource to use for the
|
||||
prune_attr; only valid if prune_attr is not in space.
|
||||
max_resource: A float of the maximal resource to use for the
|
||||
max_resource: A float of the maximal resource to use for the
|
||||
prune_attr; only valid if prune_attr is not in space.
|
||||
reduction_factor: A float of the reduction factor used for
|
||||
incremental pruning.
|
||||
resources_per_trial: A dictionary of the resources permitted per
|
||||
trial, such as 'mem'.
|
||||
global_search_alg: A Searcher instance as the global search
|
||||
instance. If omitted, Optuna is used. The following algos have
|
||||
known issues when used as global_search_alg:
|
||||
- HyperOptSearch raises exception sometimes
|
||||
- TuneBOHB has its own scheduler
|
||||
mem_size: A function to estimate the memory size for a given config.
|
||||
config_constraints: A list of config constraints to be satisfied.
|
||||
e.g.,
|
||||
|
||||
.. code-block: python
|
||||
|
||||
config_constraints = [(mem_size, '<=', 1024**3)]
|
||||
|
||||
mem_size is a function which produces a float number for the bytes
|
||||
needed for a config.
|
||||
It is used to skip configs which do not fit in memory.
|
||||
metric_constraints: A list of metric constraints to be satisfied.
|
||||
e.g., `['precision', '>=', 0.9]`
|
||||
seed: An integer of the random seed.
|
||||
'''
|
||||
self._metric, self._mode = metric, mode
|
||||
if points_to_evaluate: init_config = points_to_evaluate[0]
|
||||
else: init_config = {}
|
||||
self._points_to_evaluate = points_to_evaluate
|
||||
init_config = low_cost_partial_config or {}
|
||||
self._points_to_evaluate = points_to_evaluate or []
|
||||
self._config_constraints = config_constraints
|
||||
self._metric_constraints = metric_constraints
|
||||
if self._metric_constraints:
|
||||
# metric modified by lagrange
|
||||
metric += self.lagrange
|
||||
if global_search_alg is not None:
|
||||
self._gs = global_search_alg
|
||||
elif getattr(self, '__name__', None) != 'CFO':
|
||||
self._gs = GlobalSearch(space=space, metric=metric, mode=mode)
|
||||
else:
|
||||
self._gs = None
|
||||
self._ls = LocalSearch(init_config, metric, mode, cat_hp_cost, space,
|
||||
prune_attr, min_resource, max_resource, reduction_factor)
|
||||
self._resources_per_trial = resources_per_trial
|
||||
self._mem_size = mem_size
|
||||
self._mem_threshold = resources_per_trial.get(
|
||||
'mem') if resources_per_trial else None
|
||||
self._ls = LocalSearch(
|
||||
init_config, metric, mode, cat_hp_cost, space,
|
||||
prune_attr, min_resource, max_resource, reduction_factor, seed)
|
||||
self._init_search()
|
||||
|
||||
|
||||
def set_search_properties(self,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = None,
|
||||
@@ -113,8 +133,16 @@ class BlendSearch(Searcher):
|
||||
self._deadline = config.get('time_budget_s') + time.time()
|
||||
if 'metric_target' in config:
|
||||
self._metric_target = config.get('metric_target')
|
||||
else:
|
||||
self._metric, self._mode = metric, mode
|
||||
else:
|
||||
if metric:
|
||||
self._metric = metric
|
||||
if self._metric_constraints:
|
||||
# metric modified by lagrange
|
||||
metric += self.lagrange
|
||||
# TODO: don't change metric for global search methods that
|
||||
# can handle constraints already
|
||||
if mode:
|
||||
self._mode = mode
|
||||
self._ls.set_search_properties(metric, mode, config)
|
||||
if self._gs is not None:
|
||||
self._gs.set_search_properties(metric, mode, config)
|
||||
@@ -128,30 +156,51 @@ class BlendSearch(Searcher):
|
||||
self._search_thread_pool = {
|
||||
# id: int -> thread: SearchThread
|
||||
0: SearchThread(self._ls.mode, self._gs)
|
||||
}
|
||||
self._thread_count = 1 # total # threads created
|
||||
}
|
||||
self._thread_count = 1 # total # threads created
|
||||
self._init_used = self._ls.init_config is None
|
||||
self._trial_proposed_by = {} # trial_id: str -> thread_id: int
|
||||
self._admissible_min = self._ls.normalize(self._ls.init_config)
|
||||
self._admissible_max = self._admissible_min.copy()
|
||||
self._result = {} # config_signature: tuple -> result: Dict
|
||||
self._trial_proposed_by = {} # trial_id: str -> thread_id: int
|
||||
self._ls_bound_min = self._ls.normalize(self._ls.init_config)
|
||||
self._ls_bound_max = self._ls_bound_min.copy()
|
||||
self._gs_admissible_min = self._ls_bound_min.copy()
|
||||
self._gs_admissible_max = self._ls_bound_max.copy()
|
||||
self._result = {} # config_signature: tuple -> result: Dict
|
||||
self._deadline = np.inf
|
||||
if self._metric_constraints:
|
||||
self._metric_constraint_satisfied = False
|
||||
self._metric_constraint_penalty = [
|
||||
self.penalty for _ in self._metric_constraints]
|
||||
else:
|
||||
self._metric_constraint_satisfied = True
|
||||
self._metric_constraint_penalty = None
|
||||
|
||||
def save(self, checkpoint_path: str):
|
||||
save_object = (self._metric_target, self._search_thread_pool,
|
||||
self._thread_count, self._init_used, self._trial_proposed_by,
|
||||
self._admissible_min, self._admissible_max, self._result,
|
||||
self._deadline)
|
||||
save_object = self
|
||||
with open(checkpoint_path, "wb") as outputFile:
|
||||
pickle.dump(save_object, outputFile)
|
||||
|
||||
|
||||
def restore(self, checkpoint_path: str):
|
||||
with open(checkpoint_path, "rb") as inputFile:
|
||||
save_object = pickle.load(inputFile)
|
||||
self._metric_target, self._search_thread_pool, \
|
||||
self._thread_count, self._init_used, self._trial_proposed_by, \
|
||||
self._admissible_min, self._admissible_max, self._result, \
|
||||
self._deadline = save_object
|
||||
state = pickle.load(inputFile)
|
||||
self._metric_target = state._metric_target
|
||||
self._search_thread_pool = state._search_thread_pool
|
||||
self._thread_count = state._thread_count
|
||||
self._init_used = state._init_used
|
||||
self._trial_proposed_by = state._trial_proposed_by
|
||||
self._ls_bound_min = state._ls_bound_min
|
||||
self._ls_bound_max = state._ls_bound_max
|
||||
self._gs_admissible_min = state._gs_admissible_min
|
||||
self._gs_admissible_max = state._gs_admissible_max
|
||||
self._result = state._result
|
||||
self._deadline = state._deadline
|
||||
self._metric, self._mode = state._metric, state._mode
|
||||
self._points_to_evaluate = state._points_to_evaluate
|
||||
self._gs = state._gs
|
||||
self._ls = state._ls
|
||||
self._config_constraints = state._config_constraints
|
||||
self._metric_constraints = state._metric_constraints
|
||||
self._metric_constraint_satisfied = state._metric_constraint_satisfied
|
||||
self._metric_constraint_penalty = state._metric_constraint_penalty
|
||||
|
||||
def restore_from_dir(self, checkpoint_dir: str):
|
||||
super.restore_from_dir(checkpoint_dir)
|
||||
@@ -160,56 +209,90 @@ class BlendSearch(Searcher):
|
||||
error: bool = False):
|
||||
''' search thread updater and cleaner
|
||||
'''
|
||||
metric_constraint_satisfied = True
|
||||
if result and not error and self._metric_constraints:
|
||||
# account for metric constraints if any
|
||||
objective = result[self._metric]
|
||||
for i, constraint in enumerate(self._metric_constraints):
|
||||
metric_constraint, sign, threshold = constraint
|
||||
value = result.get(metric_constraint)
|
||||
if value:
|
||||
# sign is <= or >=
|
||||
sign_op = 1 if sign == '<=' else -1
|
||||
violation = (value - threshold) * sign_op
|
||||
if violation > 0:
|
||||
# add penalty term to the metric
|
||||
objective += self._metric_constraint_penalty[
|
||||
i] * violation * self._ls.metric_op
|
||||
metric_constraint_satisfied = False
|
||||
if self._metric_constraint_penalty[i] < self.penalty:
|
||||
self._metric_constraint_penalty[i] += violation
|
||||
result[self._metric + self.lagrange] = objective
|
||||
if metric_constraint_satisfied and not self._metric_constraint_satisfied:
|
||||
# found a feasible point
|
||||
self._metric_constraint_penalty = [1 for _ in self._metric_constraints]
|
||||
self._metric_constraint_satisfied |= metric_constraint_satisfied
|
||||
thread_id = self._trial_proposed_by.get(trial_id)
|
||||
if thread_id in self._search_thread_pool:
|
||||
if thread_id in self._search_thread_pool:
|
||||
self._search_thread_pool[thread_id].on_trial_complete(
|
||||
trial_id, result, error)
|
||||
trial_id, result, error)
|
||||
del self._trial_proposed_by[trial_id]
|
||||
# if not thread_id: logger.info(f"result {result}")
|
||||
if result:
|
||||
config = {}
|
||||
for key, value in result.items():
|
||||
if key.startswith('config/'):
|
||||
config[key[7:]] = value
|
||||
if error: # remove from result cache
|
||||
if error: # remove from result cache
|
||||
del self._result[self._ls.config_signature(config)]
|
||||
else: # add to result cache
|
||||
else: # add to result cache
|
||||
self._result[self._ls.config_signature(config)] = result
|
||||
# update target metric if improved
|
||||
if (result[self._metric]-self._metric_target)*self._ls.metric_op<0:
|
||||
self._metric_target = result[self._metric]
|
||||
if thread_id: # from local search
|
||||
# update admissible region
|
||||
normalized_config = self._ls.normalize(config)
|
||||
for key in self._admissible_min:
|
||||
value = normalized_config[key]
|
||||
if value > self._admissible_max[key]:
|
||||
self._admissible_max[key] = value
|
||||
elif value < self._admissible_min[key]:
|
||||
self._admissible_min[key] = value
|
||||
elif self._create_condition(result):
|
||||
# thread creator
|
||||
self._search_thread_pool[self._thread_count] = SearchThread(
|
||||
self._ls.mode,
|
||||
self._ls.create(config, result[self._metric], cost=result[
|
||||
"time_total_s"])
|
||||
)
|
||||
thread_id = self._thread_count
|
||||
self._thread_count += 1
|
||||
|
||||
# update target metric if improved
|
||||
objective = result[
|
||||
self._metric + self.lagrange] if self._metric_constraints \
|
||||
else result[self._metric]
|
||||
if (objective - self._metric_target) * self._ls.metric_op < 0:
|
||||
self._metric_target = objective
|
||||
if not thread_id and metric_constraint_satisfied \
|
||||
and self._create_condition(result):
|
||||
# thread creator
|
||||
self._search_thread_pool[self._thread_count] = SearchThread(
|
||||
self._ls.mode,
|
||||
self._ls.create(
|
||||
config, objective, cost=result[self.cost_attr])
|
||||
)
|
||||
thread_id = self._thread_count
|
||||
self._thread_count += 1
|
||||
self._update_admissible_region(
|
||||
config, self._ls_bound_min, self._ls_bound_max)
|
||||
elif thread_id and not self._metric_constraint_satisfied:
|
||||
# no point has been found to satisfy metric constraint
|
||||
self._expand_admissible_region()
|
||||
# reset admissible region to ls bounding box
|
||||
self._gs_admissible_min.update(self._ls_bound_min)
|
||||
self._gs_admissible_max.update(self._ls_bound_max)
|
||||
# cleaner
|
||||
# logger.info(f"thread {thread_id} in search thread pool="
|
||||
# f"{thread_id in self._search_thread_pool}")
|
||||
if thread_id and thread_id in self._search_thread_pool:
|
||||
# local search thread
|
||||
self._clean(thread_id)
|
||||
|
||||
def _update_admissible_region(self, config, admissible_min, admissible_max):
|
||||
# update admissible region
|
||||
normalized_config = self._ls.normalize(config)
|
||||
for key in admissible_min:
|
||||
value = normalized_config[key]
|
||||
if value > admissible_max[key]:
|
||||
admissible_max[key] = value
|
||||
elif value < admissible_min[key]:
|
||||
admissible_min[key] = value
|
||||
|
||||
def _create_condition(self, result: Dict) -> bool:
|
||||
''' create thread condition
|
||||
'''
|
||||
if len(self._search_thread_pool) < 2: return True
|
||||
obj_median = np.median([thread.obj_best1 for id, thread in
|
||||
self._search_thread_pool.items() if id])
|
||||
if len(self._search_thread_pool) < 2:
|
||||
return True
|
||||
obj_median = np.median(
|
||||
[thread.obj_best1 for id, thread in self._search_thread_pool.items()
|
||||
if id])
|
||||
return result[self._metric] * self._ls.metric_op < obj_median
|
||||
|
||||
def _clean(self, thread_id: int):
|
||||
@@ -219,38 +302,46 @@ class BlendSearch(Searcher):
|
||||
assert thread_id
|
||||
todelete = set()
|
||||
for id in self._search_thread_pool:
|
||||
if id and id!=thread_id:
|
||||
if id and id != thread_id:
|
||||
if self._inferior(id, thread_id):
|
||||
todelete.add(id)
|
||||
for id in self._search_thread_pool:
|
||||
if id and id!=thread_id:
|
||||
if id and id != thread_id:
|
||||
if self._inferior(thread_id, id):
|
||||
todelete.add(thread_id)
|
||||
break
|
||||
# logger.info(f"thead {thread_id}.converged="
|
||||
# f"{self._search_thread_pool[thread_id].converged}")
|
||||
break
|
||||
if self._search_thread_pool[thread_id].converged:
|
||||
todelete.add(thread_id)
|
||||
for key in self._admissible_min:
|
||||
self._admissible_max[key] += self._ls.STEPSIZE
|
||||
self._admissible_min[key] -= self._ls.STEPSIZE
|
||||
self._expand_admissible_region()
|
||||
for id in todelete:
|
||||
del self._search_thread_pool[id]
|
||||
|
||||
def _expand_admissible_region(self):
|
||||
for key in self._ls_bound_max:
|
||||
self._ls_bound_max[key] += self._ls.STEPSIZE
|
||||
self._ls_bound_min[key] -= self._ls.STEPSIZE
|
||||
|
||||
def _inferior(self, id1: int, id2: int) -> bool:
|
||||
''' whether thread id1 is inferior to id2
|
||||
'''
|
||||
t1 = self._search_thread_pool[id1]
|
||||
t2 = self._search_thread_pool[id2]
|
||||
if t1.obj_best1 < t2.obj_best2: return False
|
||||
elif t1.resource and t1.resource < t2.resource: return False
|
||||
elif t2.reach(t1): return True
|
||||
else: return False
|
||||
if t1.obj_best1 < t2.obj_best2:
|
||||
return False
|
||||
elif t1.resource and t1.resource < t2.resource:
|
||||
return False
|
||||
elif t2.reach(t1):
|
||||
return True
|
||||
return False
|
||||
|
||||
def on_trial_result(self, trial_id: str, result: Dict):
|
||||
if trial_id not in self._trial_proposed_by: return
|
||||
if trial_id not in self._trial_proposed_by:
|
||||
return
|
||||
thread_id = self._trial_proposed_by[trial_id]
|
||||
if not thread_id in self._search_thread_pool: return
|
||||
if thread_id not in self._search_thread_pool:
|
||||
return
|
||||
if result and self._metric_constraints:
|
||||
result[self._metric + self.lagrange] = result[self._metric]
|
||||
self._search_thread_pool[thread_id].on_trial_result(trial_id, result)
|
||||
|
||||
def suggest(self, trial_id: str) -> Optional[Dict]:
|
||||
@@ -258,79 +349,98 @@ class BlendSearch(Searcher):
|
||||
'''
|
||||
if self._init_used and not self._points_to_evaluate:
|
||||
choice, backup = self._select_thread()
|
||||
# logger.debug(f"choice={choice}, backup={backup}")
|
||||
if choice < 0: return None # timeout
|
||||
if choice < 0: # timeout
|
||||
return None
|
||||
self._use_rs = False
|
||||
config = self._search_thread_pool[choice].suggest(trial_id)
|
||||
if choice and config is None:
|
||||
# local search thread finishes
|
||||
if self._search_thread_pool[choice].converged:
|
||||
self._expand_admissible_region()
|
||||
del self._search_thread_pool[choice]
|
||||
return None
|
||||
# preliminary check; not checking config validation
|
||||
skip = self._should_skip(choice, trial_id, config)
|
||||
if skip:
|
||||
if choice:
|
||||
# logger.info(f"skipping choice={choice}, config={config}")
|
||||
if choice:
|
||||
return None
|
||||
# use rs
|
||||
# use rs when BO fails to suggest a config
|
||||
self._use_rs = True
|
||||
for _, generated in generate_variants(
|
||||
{'config': self._ls.space}):
|
||||
for _, generated in generate_variants({'config': self._ls.space}):
|
||||
config = generated['config']
|
||||
break
|
||||
# logger.debug(f"random config {config}")
|
||||
break # get one random config
|
||||
skip = self._should_skip(choice, trial_id, config)
|
||||
if skip: return None
|
||||
# if not choice: logger.info(config)
|
||||
if choice or backup == choice or self._valid(config):
|
||||
if skip:
|
||||
return None
|
||||
if choice or self._valid(config):
|
||||
# LS or valid or no backup choice
|
||||
self._trial_proposed_by[trial_id] = choice
|
||||
else: # invalid config proposed by GS
|
||||
if not self._use_rs:
|
||||
self._search_thread_pool[choice].on_trial_complete(
|
||||
trial_id, {}, error=True) # tell GS there is an error
|
||||
else: # invalid config proposed by GS
|
||||
self._use_rs = False
|
||||
config = self._search_thread_pool[backup].suggest(trial_id)
|
||||
skip = self._should_skip(backup, trial_id, config)
|
||||
if skip:
|
||||
return None
|
||||
self._trial_proposed_by[trial_id] = backup
|
||||
choice = backup
|
||||
# if choice: self._pending.add(choice) # local search thread pending
|
||||
if not choice:
|
||||
if self._ls._resource:
|
||||
# TODO: add resource to config proposed by GS, min or median?
|
||||
if choice == backup:
|
||||
# use CFO's init point
|
||||
init_config = self._ls.init_config
|
||||
config = self._ls.complete_config(
|
||||
init_config, self._ls_bound_min, self._ls_bound_max)
|
||||
self._trial_proposed_by[trial_id] = choice
|
||||
else:
|
||||
config = self._search_thread_pool[backup].suggest(trial_id)
|
||||
skip = self._should_skip(backup, trial_id, config)
|
||||
if skip:
|
||||
return None
|
||||
self._trial_proposed_by[trial_id] = backup
|
||||
choice = backup
|
||||
if not choice: # global search
|
||||
if self._ls._resource:
|
||||
# TODO: min or median?
|
||||
config[self._ls.prune_attr] = self._ls.min_resource
|
||||
# temporarily relax admissible region for parallel proposals
|
||||
self._update_admissible_region(
|
||||
config, self._gs_admissible_min, self._gs_admissible_max)
|
||||
else:
|
||||
self._update_admissible_region(
|
||||
config, self._ls_bound_min, self._ls_bound_max)
|
||||
self._gs_admissible_min.update(self._ls_bound_min)
|
||||
self._gs_admissible_max.update(self._ls_bound_max)
|
||||
self._result[self._ls.config_signature(config)] = {}
|
||||
else: # use init config
|
||||
else: # use init config
|
||||
init_config = self._points_to_evaluate.pop(
|
||||
0) if self._points_to_evaluate else self._ls.init_config
|
||||
if init_config==self._ls.init_config:
|
||||
config = self._ls.complete_config(init_config,
|
||||
self._admissible_min, self._admissible_max)
|
||||
# logger.info(f"reset config to {config}")
|
||||
else: config = init_config
|
||||
config = self._ls.complete_config(
|
||||
init_config, self._ls_bound_min, self._ls_bound_max)
|
||||
config_signature = self._ls.config_signature(config)
|
||||
result = self._result.get(config_signature)
|
||||
if result: # tried before
|
||||
# self.on_trial_complete(trial_id, result)
|
||||
if result: # tried before
|
||||
return None
|
||||
elif result is None: # not tried before
|
||||
elif result is None: # not tried before
|
||||
self._result[config_signature] = {}
|
||||
else: return None # running but no result yet
|
||||
else: # running but no result yet
|
||||
return None
|
||||
self._init_used = True
|
||||
self._trial_proposed_by[trial_id] = 0
|
||||
# logger.info(f"config={config}")
|
||||
return config
|
||||
|
||||
def _should_skip(self, choice, trial_id, config) -> bool:
|
||||
''' if config is None or config's result is known or above mem threshold
|
||||
''' if config is None or config's result is known or constraints are violated
|
||||
return True; o.w. return False
|
||||
'''
|
||||
if config is None: return True
|
||||
if config is None:
|
||||
return True
|
||||
config_signature = self._ls.config_signature(config)
|
||||
exists = config_signature in self._result
|
||||
# check mem constraint
|
||||
if not exists and self._mem_threshold and self._mem_size(
|
||||
config)>self._mem_threshold:
|
||||
self._result[config_signature] = {
|
||||
self._metric:np.inf*self._ls.metric_op, 'time_total_s':1}
|
||||
exists = True
|
||||
# check constraints
|
||||
if not exists and self._config_constraints:
|
||||
for constraint in self._config_constraints:
|
||||
func, sign, threshold = constraint
|
||||
value = func(config)
|
||||
if (sign == '<=' and value > threshold
|
||||
or sign == '>=' and value < threshold):
|
||||
self._result[config_signature] = {
|
||||
self._metric: np.inf * self._ls.metric_op,
|
||||
'time_total_s': 1,
|
||||
}
|
||||
exists = True
|
||||
break
|
||||
if exists:
|
||||
if not self._use_rs:
|
||||
result = self._result.get(config_signature)
|
||||
@@ -340,10 +450,10 @@ class BlendSearch(Searcher):
|
||||
if choice:
|
||||
# local search thread
|
||||
self._clean(choice)
|
||||
else:
|
||||
# tell the thread there is an error
|
||||
self._search_thread_pool[choice].on_trial_complete(
|
||||
trial_id, {}, error=True)
|
||||
# else:
|
||||
# # tell the thread there is an error
|
||||
# self._search_thread_pool[choice].on_trial_complete(
|
||||
# trial_id, {}, error=True)
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -352,28 +462,30 @@ class BlendSearch(Searcher):
|
||||
'''
|
||||
# update priority
|
||||
min_eci = self._deadline - time.time()
|
||||
if min_eci <= 0: return -1, -1
|
||||
if min_eci <= 0:
|
||||
return -1, -1
|
||||
max_speed = 0
|
||||
for thread in self._search_thread_pool.values():
|
||||
if thread.speed > max_speed: max_speed = thread.speed
|
||||
for thread in self._search_thread_pool.values():
|
||||
for thread in self._search_thread_pool.values():
|
||||
if thread.speed > max_speed:
|
||||
max_speed = thread.speed
|
||||
for thread in self._search_thread_pool.values():
|
||||
thread.update_eci(self._metric_target, max_speed)
|
||||
if thread.eci < min_eci: min_eci = thread.eci
|
||||
if thread.eci < min_eci:
|
||||
min_eci = thread.eci
|
||||
for thread in self._search_thread_pool.values():
|
||||
thread.update_priority(min_eci)
|
||||
|
||||
top_thread_id = backup_thread_id = 0
|
||||
priority1 = priority2 = self._search_thread_pool[0].priority
|
||||
# logger.debug(f"priority of thread 0={priority1}")
|
||||
for thread_id, thread in self._search_thread_pool.items():
|
||||
# if thread_id:
|
||||
# logger.debug(
|
||||
# print(
|
||||
# f"priority of thread {thread_id}={thread.priority}")
|
||||
# logger.debug(
|
||||
# f"thread {thread_id}.can_suggest={thread.can_suggest}")
|
||||
if thread_id and thread.can_suggest:
|
||||
priority = thread.priority
|
||||
if priority > priority1:
|
||||
if priority > priority1:
|
||||
priority1 = priority
|
||||
top_thread_id = thread_id
|
||||
if priority > priority2 or backup_thread_id == 0:
|
||||
@@ -384,18 +496,101 @@ class BlendSearch(Searcher):
|
||||
def _valid(self, config: Dict) -> bool:
|
||||
''' config validator
|
||||
'''
|
||||
for key in self._admissible_min:
|
||||
normalized_config = self._ls.normalize(config)
|
||||
for key in self._gs_admissible_min:
|
||||
if key in config:
|
||||
value = config[key]
|
||||
# logger.info(
|
||||
# f"{key},{value},{self._admissible_min[key]},{self._admissible_max[key]}")
|
||||
if value<self._admissible_min[
|
||||
key] or value>self._admissible_max[key]:
|
||||
value = normalized_config[key]
|
||||
if value + self._ls.STEPSIZE < self._gs_admissible_min[key] \
|
||||
or value > self._gs_admissible_max[key] + self._ls.STEPSIZE:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class CFO(BlendSearch):
|
||||
try:
|
||||
from ray.tune import (uniform, quniform, choice, randint, qrandint, randn,
|
||||
qrandn, loguniform, qloguniform)
|
||||
except ImportError:
|
||||
from ..tune.sample import (uniform, quniform, choice, randint, qrandint, randn,
|
||||
qrandn, loguniform, qloguniform)
|
||||
|
||||
try:
|
||||
from nni.tuner import Tuner as NNITuner
|
||||
from nni.utils import extract_scalar_reward
|
||||
|
||||
class BlendSearchTuner(BlendSearch, NNITuner):
|
||||
'''Tuner class for NNI
|
||||
'''
|
||||
|
||||
def receive_trial_result(self, parameter_id, parameters, value,
|
||||
**kwargs):
|
||||
'''
|
||||
Receive trial's final result.
|
||||
parameter_id: int
|
||||
parameters: object created by 'generate_parameters()'
|
||||
value: final metrics of the trial, including default metric
|
||||
'''
|
||||
result = {}
|
||||
for key, value in parameters.items():
|
||||
result['config/' + key] = value
|
||||
reward = extract_scalar_reward(value)
|
||||
result[self._metric] = reward
|
||||
# if nni does not report training cost,
|
||||
# using sequence as an approximation.
|
||||
# if no sequence, using a constant 1
|
||||
result[self.cost_attr] = value.get(self.cost_attr, value.get(
|
||||
'sequence', 1))
|
||||
self.on_trial_complete(str(parameter_id), result)
|
||||
...
|
||||
|
||||
def generate_parameters(self, parameter_id, **kwargs) -> Dict:
|
||||
'''
|
||||
Returns a set of trial (hyper-)parameters, as a serializable object
|
||||
parameter_id: int
|
||||
'''
|
||||
return self.suggest(str(parameter_id))
|
||||
...
|
||||
|
||||
def update_search_space(self, search_space):
|
||||
'''
|
||||
Tuners are advised to support updating search space at run-time.
|
||||
If a tuner can only set search space once before generating first hyper-parameters,
|
||||
it should explicitly document this behaviour.
|
||||
search_space: JSON object created by experiment owner
|
||||
'''
|
||||
config = {}
|
||||
for key, value in search_space.items():
|
||||
v = value.get("_value")
|
||||
_type = value['_type']
|
||||
if _type == 'choice':
|
||||
config[key] = choice(v)
|
||||
elif _type == 'randint':
|
||||
config[key] = randint(v[0], v[1] - 1)
|
||||
elif _type == 'uniform':
|
||||
config[key] = uniform(v[0], v[1])
|
||||
elif _type == 'quniform':
|
||||
config[key] = quniform(v[0], v[1], v[2])
|
||||
elif _type == 'loguniform':
|
||||
config[key] = loguniform(v[0], v[1])
|
||||
elif _type == 'qloguniform':
|
||||
config[key] = qloguniform(v[0], v[1], v[2])
|
||||
elif _type == 'normal':
|
||||
config[key] = randn(v[1], v[2])
|
||||
elif _type == 'qnormal':
|
||||
config[key] = qrandn(v[1], v[2], v[3])
|
||||
else:
|
||||
raise ValueError(
|
||||
f'unsupported type in search_space {_type}')
|
||||
self._ls.set_search_properties(None, None, config)
|
||||
if self._gs is not None:
|
||||
self._gs.set_search_properties(None, None, config)
|
||||
self._init_search()
|
||||
|
||||
except ImportError:
|
||||
class BlendSearchTuner(BlendSearch):
|
||||
pass
|
||||
|
||||
|
||||
class CFO(BlendSearchTuner):
|
||||
''' class for CFO algorithm
|
||||
'''
|
||||
|
||||
@@ -403,7 +598,7 @@ class CFO(BlendSearch):
|
||||
|
||||
def suggest(self, trial_id: str) -> Optional[Dict]:
|
||||
# Number of threads is 1 or 2. Thread 0 is a vacuous thread
|
||||
assert len(self._search_thread_pool)<3, len(self._search_thread_pool)
|
||||
assert len(self._search_thread_pool) < 3, len(self._search_thread_pool)
|
||||
if len(self._search_thread_pool) < 2:
|
||||
# When a local converges, the number of threads is 1
|
||||
# Need to restart
|
||||
@@ -412,9 +607,92 @@ class CFO(BlendSearch):
|
||||
|
||||
def _select_thread(self) -> Tuple:
|
||||
for key in self._search_thread_pool:
|
||||
if key: return key, key
|
||||
if key:
|
||||
return key, key
|
||||
|
||||
def _create_condition(self, result: Dict) -> bool:
|
||||
''' create thread condition
|
||||
'''
|
||||
return len(self._search_thread_pool) < 2
|
||||
|
||||
|
||||
def create_next(client):
|
||||
''' functional API for HPO
|
||||
'''
|
||||
state = client.get_state()
|
||||
setting = client.get_settings_dict()
|
||||
if state is None:
|
||||
# first time call
|
||||
try:
|
||||
from ray.tune.trial import Trial
|
||||
except ImportError:
|
||||
from ..tune.trial import Trial
|
||||
method = setting.get('method', 'BlendSearch')
|
||||
mode = client.get_optimization_mode()
|
||||
if mode == 'minimize':
|
||||
mode = 'min'
|
||||
elif mode == 'maximize':
|
||||
mode = 'max'
|
||||
metric = client.get_primary_metric()
|
||||
hp_space = client.get_hyperparameter_space_dict()
|
||||
space = {}
|
||||
for key, value in hp_space.items():
|
||||
t = value["type"]
|
||||
if t == 'continuous':
|
||||
space[key] = uniform(value["min_val"], value["max_val"])
|
||||
elif t == 'discrete':
|
||||
space[key] = choice(value["values"])
|
||||
elif t == 'integral':
|
||||
space[key] = randint(value["min_val"], value["max_val"])
|
||||
elif t == 'quantized_continuous':
|
||||
space[key] = quniform(value["min_val"], value["max_val"],
|
||||
value["step"])
|
||||
init_config = setting.get('init_config', None)
|
||||
if init_config:
|
||||
points_to_evaluate = [init_config]
|
||||
else:
|
||||
points_to_evaluate = None
|
||||
cat_hp_cost = setting.get('cat_hp_cost', None)
|
||||
|
||||
if method == 'BlendSearch':
|
||||
Algo = BlendSearch
|
||||
elif method == 'CFO':
|
||||
Algo = CFO
|
||||
algo = Algo(
|
||||
mode=mode,
|
||||
metric=metric,
|
||||
space=space,
|
||||
points_to_evaluate=points_to_evaluate,
|
||||
cat_hp_cost=cat_hp_cost,
|
||||
)
|
||||
time_budget_s = setting.get('time_budget_s', None)
|
||||
if time_budget_s:
|
||||
algo._deadline = time_budget_s + time.time()
|
||||
config2trialid = {}
|
||||
else:
|
||||
algo = state['algo']
|
||||
config2trialid = state['config2trialid']
|
||||
# update finished trials
|
||||
trials_completed = []
|
||||
for trial in client.get_trials():
|
||||
if trial.end_time is not None:
|
||||
signature = algo._ls.config_signature(trial.hp_sample)
|
||||
if not algo._result[signature]:
|
||||
trials_completed.append((trial.end_time, trial))
|
||||
trials_completed.sort()
|
||||
for t in trials_completed:
|
||||
end_time, trial = t
|
||||
trial_id = config2trialid[trial.hp_sample]
|
||||
result = {}
|
||||
result[algo.metric] = trial.metrics[algo.metric].values[-1]
|
||||
result[algo.cost_attr] = (end_time - trial.start_time).total_seconds()
|
||||
for key, value in trial.hp_sample.items():
|
||||
result['config/' + key] = value
|
||||
algo.on_trial_complete(trial_id, result=result)
|
||||
# propose new trial
|
||||
trial_id = Trial.generate_id()
|
||||
config = algo.suggest(trial_id)
|
||||
if config:
|
||||
config2trialid[config] = trial_id
|
||||
client.launch_trial(config)
|
||||
client.update_state({'algo': algo, 'config2trialid': config2trialid})
|
||||
|
||||
@@ -9,9 +9,10 @@ try:
|
||||
from ray.tune.suggest import Searcher
|
||||
from ray.tune.suggest.variant_generator import generate_variants
|
||||
from ray.tune import sample
|
||||
from ray.tune.utils.util import flatten_dict, unflatten_dict
|
||||
except ImportError:
|
||||
from .suggestion import Searcher
|
||||
from .variant_generator import generate_variants
|
||||
from .variant_generator import generate_variants, flatten_dict, unflatten_dict
|
||||
from ..tune import sample
|
||||
|
||||
|
||||
@@ -41,32 +42,34 @@ class FLOW2(Searcher):
|
||||
'''Constructor
|
||||
|
||||
Args:
|
||||
init_config: a dictionary from a subset of controlled dimensions
|
||||
to the initial low-cost values. e.g. {'epochs':1}
|
||||
init_config: a dictionary of a partial or full initial config,
|
||||
e.g. from a subset of controlled dimensions
|
||||
to the initial low-cost values.
|
||||
e.g. {'epochs': 1}
|
||||
metric: A string of the metric name to optimize for.
|
||||
minimization or maximization.
|
||||
mode: A string in ['min', 'max'] to specify the objective as
|
||||
cat_hp_cost: A dictionary from a subset of categorical dimensions
|
||||
to the relative cost of each choice.
|
||||
to the relative cost of each choice.
|
||||
e.g.,
|
||||
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{'tree_method': [1, 1, 2]}
|
||||
|
||||
i.e., the relative cost of the
|
||||
|
||||
i.e., the relative cost of the
|
||||
three choices of 'tree_method' is 1, 1 and 2 respectively.
|
||||
space: A dictionary to specify the search space.
|
||||
prune_attr: A string of the attribute used for pruning.
|
||||
prune_attr: A string of the attribute used for pruning.
|
||||
Not necessarily in space.
|
||||
When prune_attr is in space, it is a hyperparameter, e.g.,
|
||||
When prune_attr is in space, it is a hyperparameter, e.g.,
|
||||
'n_iters', and the best value is unknown.
|
||||
When prune_attr is not in space, it is a resource dimension,
|
||||
When prune_attr is not in space, it is a resource dimension,
|
||||
e.g., 'sample_size', and the peak performance is assumed
|
||||
to be at the max_resource.
|
||||
min_resource: A float of the minimal resource to use for the
|
||||
min_resource: A float of the minimal resource to use for the
|
||||
prune_attr; only valid if prune_attr is not in space.
|
||||
max_resource: A float of the maximal resource to use for the
|
||||
max_resource: A float of the maximal resource to use for the
|
||||
prune_attr; only valid if prune_attr is not in space.
|
||||
resource_multiple_factor: A float of the multiplicative factor
|
||||
used for increasing resource.
|
||||
@@ -86,6 +89,7 @@ class FLOW2(Searcher):
|
||||
elif mode == "min":
|
||||
self.metric_op = 1.
|
||||
self.space = space or {}
|
||||
self.space = flatten_dict(self.space, prevent_delimiter=True)
|
||||
self._random = np.random.RandomState(seed)
|
||||
self._seed = seed
|
||||
if not init_config:
|
||||
@@ -94,8 +98,9 @@ class FLOW2(Searcher):
|
||||
"For cost-frugal search, "
|
||||
"consider providing init values for cost-related hps via "
|
||||
"'init_config'."
|
||||
)
|
||||
self.init_config = self.best_config = init_config
|
||||
)
|
||||
self.init_config = init_config
|
||||
self.best_config = flatten_dict(init_config)
|
||||
self.cat_hp_cost = cat_hp_cost
|
||||
self.prune_attr = prune_attr
|
||||
self.min_resource = min_resource
|
||||
@@ -109,11 +114,11 @@ class FLOW2(Searcher):
|
||||
def _init_search(self):
|
||||
self._tunable_keys = []
|
||||
self._bounded_keys = []
|
||||
# choices of numeric values. integer encoding.
|
||||
# choices of numeric values. integer encoding.
|
||||
# value: (ordered list of choices,
|
||||
# dict from choice to index in the ordered list)
|
||||
self._ordered_choice_hp = {}
|
||||
# choices with given cost. integer encoding.
|
||||
self._ordered_choice_hp = {}
|
||||
# choices with given cost. integer encoding.
|
||||
# value: (array of choices ordered by cost,
|
||||
# dict from choice to index in the ordered array)
|
||||
self._ordered_cat_hp = {}
|
||||
@@ -121,97 +126,102 @@ class FLOW2(Searcher):
|
||||
self._unordered_cat_hp = {}
|
||||
self._cat_hp_cost = {}
|
||||
for key, domain in self.space.items():
|
||||
assert not isinstance(domain, dict), \
|
||||
key+"'s domain is grid search which is not supported in FLOW2."
|
||||
assert not (isinstance(domain, dict) and 'grid_search' in domain), \
|
||||
f"{key}'s domain is grid search, not supported in FLOW^2."
|
||||
if callable(getattr(domain, 'get_sampler', None)):
|
||||
self._tunable_keys.append(key)
|
||||
sampler = domain.get_sampler()
|
||||
if isinstance(sampler, sample.Quantized):
|
||||
sampler_inner = sampler.get_sampler()
|
||||
if str(sampler_inner) == 'Uniform':
|
||||
self._step_lb = min(
|
||||
self._step_lb, sampler.q/(domain.upper-domain.lower))
|
||||
elif isinstance(domain, sample.Integer) and str(
|
||||
sampler) == 'Uniform':
|
||||
self._step_lb = min(
|
||||
self._step_lb, 1.0/(domain.upper-domain.lower))
|
||||
elif isinstance(domain, sample.Categorical):
|
||||
# if isinstance(sampler, sample.Quantized):
|
||||
# sampler_inner = sampler.get_sampler()
|
||||
# if str(sampler_inner) == 'Uniform':
|
||||
# self._step_lb = min(
|
||||
# self._step_lb, sampler.q/(domain.upper-domain.lower))
|
||||
# elif isinstance(domain, sample.Integer) and str(
|
||||
# sampler) == 'Uniform':
|
||||
# self._step_lb = min(
|
||||
# self._step_lb, 1.0/(domain.upper-domain.lower))
|
||||
if isinstance(domain, sample.Categorical):
|
||||
cat_hp_cost = self.cat_hp_cost
|
||||
if cat_hp_cost and key in cat_hp_cost:
|
||||
cost = np.array(cat_hp_cost[key])
|
||||
ind = np.argsort(cost)
|
||||
l = np.array(domain.categories)[ind]
|
||||
ordered = np.array(domain.categories)[ind]
|
||||
cost = self._cat_hp_cost[key] = cost[ind]
|
||||
d = {}
|
||||
for i, choice in enumerate(l):
|
||||
for i, choice in enumerate(ordered):
|
||||
d[choice] = i
|
||||
self._ordered_cat_hp[key] = (l, d)
|
||||
self._step_lb = min(self._step_lb, 1.0/len(l))
|
||||
self._ordered_cat_hp[key] = (ordered, d)
|
||||
elif all(isinstance(x, int) or isinstance(x, float)
|
||||
for x in domain.categories):
|
||||
l = sorted(domain.categories)
|
||||
for x in domain.categories):
|
||||
ordered = sorted(domain.categories)
|
||||
d = {}
|
||||
for i, choice in enumerate(l):
|
||||
for i, choice in enumerate(ordered):
|
||||
d[choice] = i
|
||||
self._ordered_choice_hp[key] = (l, d)
|
||||
self._step_lb = min(self._step_lb, 1.0/len(l))
|
||||
self._ordered_choice_hp[key] = (ordered, d)
|
||||
else:
|
||||
self._unordered_cat_hp[key] = l = len(domain.categories)
|
||||
self._step_lb = min(self._step_lb, 1.0/l)
|
||||
self._unordered_cat_hp[key] = len(domain.categories)
|
||||
if str(sampler) != 'Normal':
|
||||
self._bounded_keys.append(key)
|
||||
self._space_keys = list(self.space.keys())
|
||||
if (self.prune_attr and self.prune_attr not in self.space and
|
||||
self.max_resource):
|
||||
if (self.prune_attr and self.prune_attr not in self.space
|
||||
and self.max_resource):
|
||||
self._space_keys.append(self.prune_attr)
|
||||
self.min_resource = self.min_resource or self._min_resource()
|
||||
self._resource = self._round(self.min_resource)
|
||||
# logger.info(min_resource)
|
||||
# logger.info(max_resource)
|
||||
# logger.info(self._resource)
|
||||
else: self._resource = None
|
||||
else:
|
||||
self._resource = None
|
||||
self.incumbent = {}
|
||||
self.incumbent = self.normalize(self.init_config)
|
||||
self.incumbent = self.normalize(self.best_config) # flattened
|
||||
self.best_obj = self.cost_incumbent = None
|
||||
self.dim = len(self._tunable_keys) # total # tunable dimensions
|
||||
self._direction_tried = None
|
||||
self._direction_tried = None
|
||||
self._num_complete4incumbent = self._cost_complete4incumbent = 0
|
||||
self._num_allowed4incumbent = 2 * self.dim
|
||||
self._proposed_by = {} # trial_id: int -> incumbent: Dict
|
||||
self.step = self.STEPSIZE * np.sqrt(self.dim)
|
||||
lb = self.step_lower_bound
|
||||
if lb > self.step: self.step = lb * 2
|
||||
if lb > self.step:
|
||||
self.step = lb * 2
|
||||
# upper bound
|
||||
self.step_ub = np.sqrt(self.dim)
|
||||
if self.step > self.step_ub: self.step = self.step_ub
|
||||
if self.step > self.step_ub:
|
||||
self.step = self.step_ub
|
||||
# maximal # consecutive no improvements
|
||||
self.dir = 2**(self.dim)
|
||||
self._configs = {} # dict from trial_id to config
|
||||
self._configs = {} # dict from trial_id to (config, stepsize)
|
||||
self._K = 0
|
||||
self._iter_best_config = self.trial_count = 1
|
||||
self._iter_best_config = self.trial_count_proposed = self.trial_count_complete = 1
|
||||
self._num_proposedby_incumbent = 0
|
||||
self._reset_times = 0
|
||||
# record intermediate trial cost
|
||||
self._trial_cost = {}
|
||||
self._same = False # whether the proposedd config is the same as best_config
|
||||
self._init_phrase = True # initial phase to increase initial stepsize
|
||||
|
||||
@property
|
||||
def step_lower_bound(self) -> float:
|
||||
step_lb = self._step_lb
|
||||
for key in self._tunable_keys:
|
||||
if key not in self.best_config:
|
||||
continue
|
||||
domain = self.space[key]
|
||||
sampler = domain.get_sampler()
|
||||
if isinstance(sampler, sample.Quantized):
|
||||
sampler_inner = sampler.get_sampler()
|
||||
if str(sampler_inner) == 'LogUniform':
|
||||
step_lb = min(step_lb,
|
||||
np.log(1.0+sampler.q/self.best_config[key])/
|
||||
np.log(domain.upper/domain.lower))
|
||||
elif isinstance(domain, sample.Integer) and str(
|
||||
sampler) == 'LogUniform':
|
||||
step_lb = min(step_lb,
|
||||
np.log(1.0+1.0/self.best_config[key])/
|
||||
np.log(domain.upper/domain.lower))
|
||||
if np.isinf(step_lb): step_lb = self.STEP_LOWER_BOUND
|
||||
else: step_lb *= np.sqrt(self.dim)
|
||||
step_lb = min(
|
||||
step_lb, np.log(1.0 + sampler.q / self.best_config[key])
|
||||
/ np.log(domain.upper / domain.lower))
|
||||
elif isinstance(domain, sample.Integer) and str(sampler) == 'LogUniform':
|
||||
step_lb = min(
|
||||
step_lb, np.log(1.0 + 1.0 / self.best_config[key])
|
||||
/ np.log(domain.upper / domain.lower))
|
||||
if np.isinf(step_lb):
|
||||
step_lb = self.STEP_LOWER_BOUND
|
||||
else:
|
||||
step_lb *= np.sqrt(self.dim)
|
||||
return step_lb
|
||||
|
||||
|
||||
@property
|
||||
def resource(self) -> float:
|
||||
return self._resource
|
||||
@@ -228,58 +238,61 @@ class FLOW2(Searcher):
|
||||
return self.max_resource
|
||||
return resource
|
||||
|
||||
def rand_vector_gaussian(self, dim, std = 1.0):
|
||||
def rand_vector_gaussian(self, dim, std=1.0):
|
||||
vec = self._random.normal(0, std, dim)
|
||||
return vec
|
||||
|
||||
def complete_config(self, partial_config: Dict,
|
||||
lower: Optional[Dict] = None, upper: Optional[Dict] = None) -> Dict:
|
||||
|
||||
def complete_config(
|
||||
self, partial_config: Dict,
|
||||
lower: Optional[Dict] = None, upper: Optional[Dict] = None
|
||||
) -> Dict:
|
||||
''' generate a complete config from the partial config input
|
||||
add minimal resource to config if available
|
||||
'''
|
||||
if self._reset_times: # not the first time, use random gaussian
|
||||
if self._reset_times and partial_config == self.init_config:
|
||||
# not the first time to complete init_config, use random gaussian
|
||||
normalized = self.normalize(partial_config)
|
||||
for key in normalized:
|
||||
# don't change unordered cat choice
|
||||
# don't change unordered cat choice
|
||||
if key not in self._unordered_cat_hp:
|
||||
if upper and lower:
|
||||
u, l = upper[key], lower[key]
|
||||
gauss_std = u-l
|
||||
up, low = upper[key], lower[key]
|
||||
gauss_std = up - low or self.STEPSIZE
|
||||
# allowed bound
|
||||
u += self.STEPSIZE
|
||||
l -= self.STEPSIZE
|
||||
up += self.STEPSIZE
|
||||
low -= self.STEPSIZE
|
||||
elif key in self._bounded_keys:
|
||||
u, l, gauss_std = 1, 0, 1.0
|
||||
else: u, l, gauss_std = np.Inf, -np.Inf, 1.0
|
||||
up, low, gauss_std = 1, 0, 1.0
|
||||
else:
|
||||
up, low, gauss_std = np.Inf, -np.Inf, 1.0
|
||||
if key in self._bounded_keys:
|
||||
u = min(u, 1)
|
||||
l = max(l, 0)
|
||||
up = min(up, 1)
|
||||
low = max(low, 0)
|
||||
delta = self.rand_vector_gaussian(1, gauss_std)[0]
|
||||
normalized[key] = max(l, min(u, normalized[key] + delta))
|
||||
normalized[key] = max(low, min(up, normalized[key] + delta))
|
||||
# use best config for unordered cat choice
|
||||
config = self.denormalize(normalized)
|
||||
else:
|
||||
# first time init_config, or other configs, take as is
|
||||
config = partial_config.copy()
|
||||
|
||||
if partial_config == self.init_config:
|
||||
self._reset_times += 1
|
||||
config = flatten_dict(config)
|
||||
for key, value in self.space.items():
|
||||
if key not in config:
|
||||
config[key] = value
|
||||
logger.debug(f'before random {config}')
|
||||
for _, generated in generate_variants({'config': config}):
|
||||
config = generated['config']
|
||||
break
|
||||
logger.debug(f'after random {config}')
|
||||
|
||||
if self._resource:
|
||||
config[self.prune_attr] = self.min_resource
|
||||
self._reset_times += 1
|
||||
return config
|
||||
return unflatten_dict(config)
|
||||
|
||||
def create(self, init_config: Dict, obj: float, cost: float) -> Searcher:
|
||||
flow2 = FLOW2(init_config, self.metric, self.mode, self._cat_hp_cost,
|
||||
self.space, self.prune_attr, self.min_resource,
|
||||
self.max_resource, self.resource_multiple_factor,
|
||||
self._seed+1)
|
||||
unflatten_dict(self.space), self.prune_attr,
|
||||
self.min_resource, self.max_resource,
|
||||
self.resource_multiple_factor, self._seed + 1)
|
||||
flow2.best_obj = obj * self.metric_op # minimize internally
|
||||
flow2.cost_incumbent = cost
|
||||
return flow2
|
||||
@@ -288,7 +301,7 @@ class FLOW2(Searcher):
|
||||
''' normalize each dimension in config to [0,1]
|
||||
'''
|
||||
config_norm = {}
|
||||
for key, value in config.items():
|
||||
for key, value in flatten_dict(config).items():
|
||||
if key in self.space:
|
||||
# domain: sample.Categorical/Integer/Float/Function
|
||||
domain = self.space[key]
|
||||
@@ -299,16 +312,17 @@ class FLOW2(Searcher):
|
||||
# normalize categorical
|
||||
if key in self._ordered_cat_hp:
|
||||
l, d = self._ordered_cat_hp[key]
|
||||
config_norm[key] = d[value]/len(l)
|
||||
config_norm[key] = (d[value] + 0.5) / len(l)
|
||||
elif key in self._ordered_choice_hp:
|
||||
l, d = self._ordered_choice_hp[key]
|
||||
config_norm[key] = d[value]/len(l)
|
||||
config_norm[key] = (d[value] + 0.5) / len(l)
|
||||
elif key in self.incumbent:
|
||||
config_norm[key] = self.incumbent[
|
||||
key] if value == self.best_config[
|
||||
key] else (self.incumbent[
|
||||
key]+1)%self._unordered_cat_hp[key]
|
||||
else: config_norm[key] = 0
|
||||
key] + 1) % self._unordered_cat_hp[key]
|
||||
else:
|
||||
config_norm[key] = 0
|
||||
continue
|
||||
# Uniform/LogUniform/Normal/Base
|
||||
sampler = domain.get_sampler()
|
||||
@@ -316,11 +330,11 @@ class FLOW2(Searcher):
|
||||
# sampler is sample.Quantized
|
||||
sampler = sampler.get_sampler()
|
||||
if str(sampler) == 'LogUniform':
|
||||
config_norm[key] = np.log(
|
||||
value/domain.lower)/np.log(domain.upper/domain.lower)
|
||||
config_norm[key] = np.log(value / domain.lower) / np.log(
|
||||
domain.upper / domain.lower)
|
||||
elif str(sampler) == 'Uniform':
|
||||
config_norm[key] = (
|
||||
value-domain.lower)/(domain.upper-domain.lower)
|
||||
value - domain.lower) / (domain.upper - domain.lower)
|
||||
elif str(sampler) == 'Normal':
|
||||
# N(mean, sd) -> N(0,1)
|
||||
config_norm[key] = (value - sampler.mean) / sampler.sd
|
||||
@@ -328,7 +342,6 @@ class FLOW2(Searcher):
|
||||
# TODO? elif str(sampler) == 'Base': # sample.Function._CallSampler
|
||||
# e.g., {test: sample_from(lambda spec: randn(10, 2).sample() * 0.01)}
|
||||
config_norm[key] = value
|
||||
# print(key+"'s value is not normalized")
|
||||
else: # prune_attr
|
||||
config_norm[key] = value
|
||||
return config_norm
|
||||
@@ -349,19 +362,19 @@ class FLOW2(Searcher):
|
||||
if key in self._ordered_cat_hp:
|
||||
l, _ = self._ordered_cat_hp[key]
|
||||
n = len(l)
|
||||
config_denorm[key] = l[min(n-1,int(np.floor(value*n)))]
|
||||
config_denorm[key] = l[min(n - 1, int(np.floor(value * n)))]
|
||||
elif key in self._ordered_choice_hp:
|
||||
l, _ = self._ordered_choice_hp[key]
|
||||
n = len(l)
|
||||
config_denorm[key] = l[min(n-1,int(np.floor(value*n)))]
|
||||
config_denorm[key] = l[min(n - 1, int(np.floor(value * n)))]
|
||||
else:
|
||||
assert key in self.incumbent
|
||||
if round(value) == self.incumbent[key]:
|
||||
config_denorm[key] = self.best_config[key]
|
||||
else: # ****random value each time!****
|
||||
config_denorm[key] = self._random.choice([x
|
||||
for x in domain.categories
|
||||
if x!=self.best_config[key]])
|
||||
else: # ****random value each time!****
|
||||
config_denorm[key] = self._random.choice(
|
||||
[x for x in domain.categories
|
||||
if x != self.best_config[key]])
|
||||
continue
|
||||
# Uniform/LogUniform/Normal/Base
|
||||
sampler = domain.get_sampler()
|
||||
@@ -371,10 +384,10 @@ class FLOW2(Searcher):
|
||||
# Handle Log/Uniform
|
||||
if str(sampler) == 'LogUniform':
|
||||
config_denorm[key] = (
|
||||
domain.upper/domain.lower)**value*domain.lower
|
||||
domain.upper / domain.lower) ** value * domain.lower
|
||||
elif str(sampler) == 'Uniform':
|
||||
config_denorm[key] = value * (
|
||||
domain.upper-domain.lower) + domain.lower
|
||||
domain.upper - domain.lower) + domain.lower
|
||||
elif str(sampler) == 'Normal':
|
||||
# denormalization for 'Normal'
|
||||
config_denorm[key] = value * sampler.sd + sampler.mean
|
||||
@@ -388,8 +401,6 @@ class FLOW2(Searcher):
|
||||
# Handle int (4.6 -> 5)
|
||||
if isinstance(domain, sample.Integer):
|
||||
config_denorm[key] = int(round(config_denorm[key]))
|
||||
# Handle int (4.6 -> 4)
|
||||
# config_denorm[key] = domain.cast(config_denorm[key])
|
||||
else: # prune_attr
|
||||
config_denorm[key] = value
|
||||
return config_denorm
|
||||
@@ -402,6 +413,7 @@ class FLOW2(Searcher):
|
||||
self._metric = metric
|
||||
if mode:
|
||||
assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
|
||||
self._mode = mode
|
||||
if mode == "max":
|
||||
self.metric_op = -1.
|
||||
elif mode == "min":
|
||||
@@ -417,61 +429,59 @@ class FLOW2(Searcher):
|
||||
'''
|
||||
# if better, move, reset num_complete and num_proposed
|
||||
# if not better and num_complete >= 2*dim, num_allowed += 2
|
||||
self.trial_count += 1
|
||||
self.trial_count_complete += 1
|
||||
if not error and result:
|
||||
obj = result.get(self._metric)
|
||||
if obj:
|
||||
if obj:
|
||||
obj *= self.metric_op
|
||||
if obj < self.best_obj:
|
||||
self.best_obj, self.best_config = obj, self._configs[
|
||||
trial_id]
|
||||
if self.best_obj is None or obj < self.best_obj:
|
||||
self.best_obj = obj
|
||||
self.best_config, self.step = self._configs[trial_id]
|
||||
self.incumbent = self.normalize(self.best_config)
|
||||
self.cost_incumbent = result.get(self.cost_attr)
|
||||
if self._resource:
|
||||
self._resource = self.best_config[self.prune_attr]
|
||||
self._num_complete4incumbent = 0
|
||||
self._cost_complete4incumbent = 0
|
||||
self._num_proposedby_incumbent = 0
|
||||
self._num_allowed4incumbent = 2 * self.dim
|
||||
self._proposed_by.clear()
|
||||
if self._K > 0:
|
||||
self.step *= np.sqrt(self._K/self._oldK)
|
||||
if self.step > self.step_ub: self.step = self.step_ub
|
||||
self._iter_best_config = self.trial_count
|
||||
# self._oldK must have been set when self._K>0
|
||||
self.step *= np.sqrt(self._K / self._oldK)
|
||||
if self.step > self.step_ub:
|
||||
self.step = self.step_ub
|
||||
self._iter_best_config = self.trial_count_complete
|
||||
return
|
||||
proposed_by = self._proposed_by.get(trial_id)
|
||||
if proposed_by == self.incumbent:
|
||||
# proposed by current incumbent and no better
|
||||
self._num_complete4incumbent += 1
|
||||
cost = result.get(self.cost_attr)
|
||||
if cost: self._cost_complete4incumbent += cost
|
||||
if self._num_complete4incumbent >= 2*self.dim and \
|
||||
self._num_allowed4incumbent == 0:
|
||||
cost = result.get(
|
||||
self.cost_attr) if result else self._trial_cost.get(trial_id)
|
||||
if cost:
|
||||
self._cost_complete4incumbent += cost
|
||||
if self._num_complete4incumbent >= 2 * self.dim and \
|
||||
self._num_allowed4incumbent == 0:
|
||||
self._num_allowed4incumbent = 2
|
||||
if self._num_complete4incumbent == self.dir and (not self._resource
|
||||
or self._resource == self.max_resource):
|
||||
# check stuck condition if using max resource
|
||||
if self.step >= self.step_lower_bound:
|
||||
# decrease step size
|
||||
self._oldK = self._K if self._K else self._iter_best_config
|
||||
self._K = self.trial_count+1
|
||||
self.step *= np.sqrt(self._oldK/self._K)
|
||||
# logger.info(f"step={self.step}, lb={self.step_lower_bound}")
|
||||
if self._num_complete4incumbent == self.dir and (
|
||||
not self._resource or self._resource == self.max_resource):
|
||||
# check stuck condition if using max resource
|
||||
self._num_complete4incumbent -= 2
|
||||
if self._num_allowed4incumbent < 2:
|
||||
self._num_allowed4incumbent = 2
|
||||
# elif proposed_by: # proposed by older incumbent
|
||||
# del self._proposed_by[trial_id]
|
||||
|
||||
# elif proposed_by: del self._proposed_by[trial_id]
|
||||
|
||||
def on_trial_result(self, trial_id: str, result: Dict):
|
||||
''' early update of incumbent
|
||||
'''
|
||||
if result:
|
||||
obj = result.get(self._metric)
|
||||
if obj:
|
||||
if obj:
|
||||
obj *= self.metric_op
|
||||
if obj < self.best_obj:
|
||||
if self.best_obj is None or obj < self.best_obj:
|
||||
self.best_obj = obj
|
||||
config = self._configs[trial_id]
|
||||
config = self._configs[trial_id][0]
|
||||
if self.best_config != config:
|
||||
self.best_config = config
|
||||
if self._resource:
|
||||
@@ -480,14 +490,18 @@ class FLOW2(Searcher):
|
||||
self.cost_incumbent = result.get(self.cost_attr)
|
||||
self._cost_complete4incumbent = 0
|
||||
self._num_complete4incumbent = 0
|
||||
self._num_proposedby_incumbent = 0
|
||||
self._num_allowed4incumbent = 2 * self.dim
|
||||
self._proposed_by.clear()
|
||||
self._iter_best_config = self.trial_count
|
||||
self._iter_best_config = self.trial_count_complete
|
||||
cost = result.get(self.cost_attr)
|
||||
# record the cost in case it is pruned and cost info is lost
|
||||
self._trial_cost[trial_id] = cost
|
||||
|
||||
def rand_vector_unit_sphere(self, dim) -> np.ndarray:
|
||||
vec = self._random.normal(0, 1, dim)
|
||||
mag = np.linalg.norm(vec)
|
||||
return vec/mag
|
||||
mag = np.linalg.norm(vec)
|
||||
return vec / mag
|
||||
|
||||
def suggest(self, trial_id: str) -> Optional[Dict]:
|
||||
''' suggest a new config, one of the following cases:
|
||||
@@ -495,37 +509,75 @@ class FLOW2(Searcher):
|
||||
2. same resource, move from the incumbent to a random direction
|
||||
3. same resource, move from the incumbent to the opposite direction
|
||||
'''
|
||||
self.trial_count_proposed += 1
|
||||
if self._num_complete4incumbent > 0 and self.cost_incumbent and \
|
||||
self._resource and self._resource < self.max_resource and (
|
||||
self._cost_complete4incumbent >=
|
||||
self.cost_incumbent * self.resource_multiple_factor):
|
||||
self._cost_complete4incumbent
|
||||
>= self.cost_incumbent * self.resource_multiple_factor):
|
||||
# consider increasing resource using sum eval cost of complete
|
||||
# configs
|
||||
old_resource = self._resource
|
||||
self._resource = self._round(
|
||||
self._resource * self.resource_multiple_factor)
|
||||
self.cost_incumbent *= self._resource / old_resource
|
||||
config = self.best_config.copy()
|
||||
config[self.prune_attr] = self._resource
|
||||
# self.incumbent[self.prune_attr] = self._resource
|
||||
self._direction_tried = None
|
||||
self._configs[trial_id] = config
|
||||
self._configs[trial_id] = (config, self.step)
|
||||
return config
|
||||
self._num_allowed4incumbent -= 1
|
||||
move = self.incumbent.copy()
|
||||
if self._direction_tried is not None:
|
||||
# return negative direction
|
||||
for i, key in enumerate(self._tunable_keys):
|
||||
move[key] -= self._direction_tried[i]
|
||||
move[key] -= self._direction_tried[i]
|
||||
self._direction_tried = None
|
||||
# propose a new direction
|
||||
self._direction_tried = self.rand_vector_unit_sphere(
|
||||
self.dim) * self.step
|
||||
for i, key in enumerate(self._tunable_keys):
|
||||
move[key] += self._direction_tried[i]
|
||||
else:
|
||||
# propose a new direction
|
||||
self._direction_tried = self.rand_vector_unit_sphere(
|
||||
self.dim) * self.step
|
||||
for i, key in enumerate(self._tunable_keys):
|
||||
move[key] += self._direction_tried[i]
|
||||
self._project(move)
|
||||
config = self.denormalize(move)
|
||||
self._proposed_by[trial_id] = self.incumbent
|
||||
self._configs[trial_id] = config
|
||||
return config
|
||||
self._configs[trial_id] = (config, self.step)
|
||||
self._num_proposedby_incumbent += 1
|
||||
if self._init_phrase:
|
||||
if self._direction_tried is None:
|
||||
if self._same:
|
||||
# check if the new config is different from self.best_config
|
||||
same = True
|
||||
for key, value in config.items():
|
||||
if key not in self.best_config or value != self.best_config[key]:
|
||||
same = False
|
||||
break
|
||||
if same:
|
||||
# increase step size
|
||||
self.step += self.STEPSIZE
|
||||
if self.step > self.step_ub:
|
||||
self.step = self.step_ub
|
||||
else:
|
||||
# check if the new config is different from self.best_config
|
||||
same = True
|
||||
for key, value in config.items():
|
||||
if key not in self.best_config or value != self.best_config[key]:
|
||||
same = False
|
||||
break
|
||||
self._same = same
|
||||
if self._num_proposedby_incumbent == self.dir and (
|
||||
not self._resource or self._resource == self.max_resource):
|
||||
# check stuck condition if using max resource
|
||||
self._num_proposedby_incumbent -= 2
|
||||
self._init_phrase = False
|
||||
if self.step >= self.step_lower_bound:
|
||||
# decrease step size
|
||||
self._oldK = self._K if self._K else self._iter_best_config
|
||||
self._K = self.trial_count_proposed + 1
|
||||
self.step *= np.sqrt(self._oldK / self._K)
|
||||
else:
|
||||
return None
|
||||
return unflatten_dict(config)
|
||||
|
||||
def _project(self, config):
|
||||
''' project normalized config in the feasible region and set prune_attr
|
||||
@@ -533,7 +585,8 @@ class FLOW2(Searcher):
|
||||
for key in self._bounded_keys:
|
||||
value = config[key]
|
||||
config[key] = max(0, min(1, value))
|
||||
if self._resource: config[self.prune_attr] = self._resource
|
||||
if self._resource:
|
||||
config[self.prune_attr] = self._resource
|
||||
|
||||
@property
|
||||
def can_suggest(self) -> bool:
|
||||
@@ -545,6 +598,7 @@ class FLOW2(Searcher):
|
||||
def config_signature(self, config) -> tuple:
|
||||
''' return the signature tuple of a config
|
||||
'''
|
||||
config = flatten_dict(config)
|
||||
value_list = []
|
||||
for key in self._space_keys:
|
||||
if key in config:
|
||||
@@ -567,22 +621,23 @@ class FLOW2(Searcher):
|
||||
def converged(self) -> bool:
|
||||
''' return whether the local search has converged
|
||||
'''
|
||||
if self._num_complete4incumbent < self.dir-2: return False
|
||||
if self._num_complete4incumbent < self.dir - 2:
|
||||
return False
|
||||
# check stepsize after enough configs are completed
|
||||
return self.step < self.step_lower_bound
|
||||
|
||||
def reach(self, other: Searcher) -> bool:
|
||||
''' whether the incumbent can reach the incumbent of other
|
||||
'''
|
||||
config1, config2 = self.best_config, other.best_config
|
||||
config1, config2 = self.best_config, other.best_config
|
||||
incumbent1, incumbent2 = self.incumbent, other.incumbent
|
||||
if self._resource and config1[self.prune_attr]>config2[self.prune_attr]:
|
||||
if self._resource and config1[self.prune_attr] > config2[self.prune_attr]:
|
||||
# resource will not decrease
|
||||
return False
|
||||
for key in self._unordered_cat_hp:
|
||||
# unordered cat choice is hard to reach by chance
|
||||
if config1[key] != config2[key]: return False
|
||||
delta = np.array([incumbent1[key]-incumbent2[key]
|
||||
for key in self._tunable_keys])
|
||||
if config1[key] != config2[key]:
|
||||
return False
|
||||
delta = np.array(
|
||||
[incumbent1[key] - incumbent2[key] for key in self._tunable_keys])
|
||||
return np.linalg.norm(delta) <= self.step
|
||||
|
||||
|
||||
@@ -20,23 +20,26 @@ class SearchThread:
|
||||
'''
|
||||
|
||||
cost_attr = 'time_total_s'
|
||||
eps = 1e-10
|
||||
|
||||
def __init__(self, mode: str = "min",
|
||||
def __init__(self, mode: str = "min",
|
||||
search_alg: Optional[Searcher] = None):
|
||||
''' When search_alg is omitted, use local search FLOW2
|
||||
'''
|
||||
self._search_alg = search_alg
|
||||
self._is_ls = isinstance(search_alg, FLOW2)
|
||||
self._mode = mode
|
||||
self._metric_op = 1 if mode=='min' else -1
|
||||
self._metric_op = 1 if mode == 'min' else -1
|
||||
self.cost_best = self.cost_last = self.cost_total = self.cost_best1 = \
|
||||
getattr(search_alg, 'cost_incumbent', 0)
|
||||
self.cost_best2 = 0
|
||||
self.obj_best1 = self.obj_best2 = getattr(
|
||||
search_alg, 'best_obj', np.inf) # inherently minimize
|
||||
# eci: expected cost for improvement
|
||||
search_alg, 'best_obj', np.inf) # inherently minimize
|
||||
# eci: estimated cost for improvement
|
||||
self.eci = self.cost_best
|
||||
self.priority = self.speed = 0
|
||||
|
||||
self._init_config = True
|
||||
|
||||
def suggest(self, trial_id: str) -> Optional[Dict]:
|
||||
''' use the suggest() of the underlying search algorithm
|
||||
'''
|
||||
@@ -45,9 +48,9 @@ class SearchThread:
|
||||
else:
|
||||
try:
|
||||
config = self._search_alg.suggest(trial_id)
|
||||
except:
|
||||
except FloatingPointError:
|
||||
logger.warning(
|
||||
f'The global search method raises error. '
|
||||
'The global search method raises FloatingPointError. '
|
||||
'Ignoring for this iteration.')
|
||||
config = None
|
||||
return config
|
||||
@@ -57,37 +60,43 @@ class SearchThread:
|
||||
self.priority = eci * self.speed - self.obj_best1
|
||||
|
||||
def update_eci(self, metric_target: float,
|
||||
max_speed: Optional[float] = np.inf):
|
||||
# calculate eci: expected cost for improvement over metric_target;
|
||||
max_speed: Optional[float] = np.inf):
|
||||
# calculate eci: estimated cost for improvement over metric_target
|
||||
best_obj = metric_target * self._metric_op
|
||||
if not self.speed: self.speed = max_speed
|
||||
if not self.speed:
|
||||
self.speed = max_speed
|
||||
self.eci = max(self.cost_total - self.cost_best1,
|
||||
self.cost_best1 - self.cost_best2)
|
||||
self.cost_best1 - self.cost_best2)
|
||||
if self.obj_best1 > best_obj and self.speed > 0:
|
||||
self.eci = max(self.eci, 2*(self.obj_best1-best_obj)/self.speed)
|
||||
self.eci = max(self.eci, 2 * (self.obj_best1 - best_obj) / self.speed)
|
||||
|
||||
def _update_speed(self):
|
||||
# calculate speed; use 0 for invalid speed temporarily
|
||||
if self.obj_best2 > self.obj_best1:
|
||||
if self.obj_best2 > self.obj_best1:
|
||||
self.speed = (self.obj_best2 - self.obj_best1) / (
|
||||
self.cost_total - self.cost_best2)
|
||||
else: self.speed = 0
|
||||
self.cost_total - self.cost_best2 + self.eps)
|
||||
else:
|
||||
self.speed = 0
|
||||
|
||||
def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None,
|
||||
error: bool = False):
|
||||
''' update the statistics of the thread
|
||||
'''
|
||||
if not self._search_alg: return
|
||||
if not hasattr(self._search_alg, '_ot_trials') or (not error and
|
||||
trial_id in self._search_alg._ot_trials):
|
||||
if not self._search_alg:
|
||||
return
|
||||
if not hasattr(self._search_alg, '_ot_trials') or (
|
||||
not error and trial_id in self._search_alg._ot_trials):
|
||||
# optuna doesn't handle error
|
||||
self._search_alg.on_trial_complete(trial_id, result, error)
|
||||
if self._is_ls or not self._init_config:
|
||||
self._search_alg.on_trial_complete(trial_id, result, error)
|
||||
else:
|
||||
# init config is not proposed by self._search_alg
|
||||
# under this thread
|
||||
self._init_config = False
|
||||
if result:
|
||||
if self.cost_attr in result:
|
||||
self.cost_last = result[self.cost_attr]
|
||||
self.cost_total += self.cost_last
|
||||
# if not isinstance(self._search_alg, FLOW2):
|
||||
# logger.info(f"result.metric{result[self._search_alg.metric]}")
|
||||
if self._search_alg.metric in result:
|
||||
obj = result[self._search_alg.metric] * self._metric_op
|
||||
if obj < self.obj_best1:
|
||||
@@ -98,14 +107,14 @@ class SearchThread:
|
||||
self.obj_best1 = obj
|
||||
self.cost_best = self.cost_last
|
||||
self._update_speed()
|
||||
|
||||
|
||||
def on_trial_result(self, trial_id: str, result: Dict):
|
||||
''' TODO update the statistics of the thread with partial result?
|
||||
'''
|
||||
# print('[SearchThread] on trial result')
|
||||
if not self._search_alg: return
|
||||
if not self._search_alg:
|
||||
return
|
||||
if not hasattr(self._search_alg, '_ot_trials') or (
|
||||
trial_id in self._search_alg._ot_trials):
|
||||
trial_id in self._search_alg._ot_trials):
|
||||
self._search_alg.on_trial_result(trial_id, result)
|
||||
if self.cost_attr in result and self.cost_last < result[self.cost_attr]:
|
||||
self.cost_last = result[self.cost_attr]
|
||||
@@ -129,4 +138,3 @@ class SearchThread:
|
||||
''' whether the thread can suggest new configs
|
||||
'''
|
||||
return self._search_alg.can_suggest
|
||||
|
||||
|
||||
@@ -21,6 +21,11 @@ import logging
|
||||
import os
|
||||
import time
|
||||
from typing import Dict, Optional, Union, List, Tuple
|
||||
import pickle
|
||||
from .variant_generator import parse_spec_vars
|
||||
from ..tune.sample import Categorical, Domain, Float, Integer, LogUniform, \
|
||||
Quantized, Uniform
|
||||
from ..tune.trial import flatten_dict, unflatten_dict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -72,7 +77,7 @@ def log_once(key):
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
|
||||
class Searcher:
|
||||
"""Abstract class for wrapping suggesting algorithms.
|
||||
@@ -407,12 +412,6 @@ class ConcurrencyLimiter(Searcher):
|
||||
return self.searcher.set_search_properties(metric, mode, config)
|
||||
|
||||
|
||||
import pickle
|
||||
from .variant_generator import parse_spec_vars
|
||||
from ..tune.sample import Categorical, Domain, Float, Integer, LogUniform, \
|
||||
Quantized, Uniform
|
||||
from ..tune.trial import flatten_dict, unflatten_dict
|
||||
|
||||
try:
|
||||
import optuna as ot
|
||||
from optuna.samplers import BaseSampler
|
||||
@@ -689,4 +688,4 @@ class OptunaSearch(Searcher):
|
||||
for path, domain in domain_vars
|
||||
]
|
||||
|
||||
return values
|
||||
return values
|
||||
|
||||
@@ -28,6 +28,46 @@ from ..tune.sample import Categorical, Domain, Function
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def flatten_dict(dt, delimiter="/", prevent_delimiter=False):
|
||||
dt = copy.deepcopy(dt)
|
||||
if prevent_delimiter and any(delimiter in key for key in dt):
|
||||
# Raise if delimiter is any of the keys
|
||||
raise ValueError(
|
||||
"Found delimiter `{}` in key when trying to flatten array."
|
||||
"Please avoid using the delimiter in your specification.")
|
||||
while any(isinstance(v, dict) for v in dt.values()):
|
||||
remove = []
|
||||
add = {}
|
||||
for key, value in dt.items():
|
||||
if isinstance(value, dict):
|
||||
for subkey, v in value.items():
|
||||
if prevent_delimiter and delimiter in subkey:
|
||||
# Raise if delimiter is in any of the subkeys
|
||||
raise ValueError(
|
||||
"Found delimiter `{}` in key when trying to "
|
||||
"flatten array. Please avoid using the delimiter "
|
||||
"in your specification.")
|
||||
add[delimiter.join([key, str(subkey)])] = v
|
||||
remove.append(key)
|
||||
dt.update(add)
|
||||
for k in remove:
|
||||
del dt[k]
|
||||
return dt
|
||||
|
||||
|
||||
def unflatten_dict(dt, delimiter="/"):
|
||||
"""Unflatten dict. Does not support unflattening lists."""
|
||||
dict_type = type(dt)
|
||||
out = dict_type()
|
||||
for key, val in dt.items():
|
||||
path = key.split(delimiter)
|
||||
item = out
|
||||
for k in path[:-1]:
|
||||
item = item.setdefault(k, dict_type())
|
||||
item[path[-1]] = val
|
||||
return out
|
||||
|
||||
|
||||
class TuneError(Exception):
|
||||
"""General error class raised by ray.tune."""
|
||||
pass
|
||||
@@ -393,4 +433,4 @@ class _UnresolvedAccessGuard(dict):
|
||||
|
||||
class RecursiveDependencyError(Exception):
|
||||
def __init__(self, msg: str):
|
||||
Exception.__init__(self, msg)
|
||||
Exception.__init__(self, msg)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
'''!
|
||||
* Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License.
|
||||
* Licensed under the MIT License.
|
||||
'''
|
||||
|
||||
import json
|
||||
@@ -118,6 +118,7 @@ class TrainingLogWriter(object):
|
||||
|
||||
def close(self):
|
||||
self.file.close()
|
||||
self.file = None # for pickle
|
||||
|
||||
|
||||
class TrainingLogReader(object):
|
||||
@@ -141,6 +142,7 @@ class TrainingLogReader(object):
|
||||
|
||||
def close(self):
|
||||
self.file.close()
|
||||
self.file = None # for pickle
|
||||
|
||||
def get_record(self, record_id) -> TrainingLogRecord:
|
||||
if self.file is None:
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
# Economical Hyperparameter Optimization
|
||||
|
||||
`flaml.tune` is a module for economical hyperparameter tuning. It frees users from manually tuning many hyperparameters for a software, such as machine learning training procedures.
|
||||
The API is compatible with ray tune.
|
||||
It can be used standalone, or together with ray tune or nni.
|
||||
|
||||
Example:
|
||||
* Example for sequential tuning (recommended when compute resource is limited and each trial can consume all the resources):
|
||||
|
||||
```python
|
||||
# require: pip install flaml[blendsearch]
|
||||
from flaml import tune
|
||||
import time
|
||||
|
||||
@@ -26,7 +27,7 @@ analysis = tune.run(
|
||||
'x': tune.qloguniform(lower=1, upper=100000, q=1),
|
||||
'y': tune.randint(lower=1, upper=100000)
|
||||
}, # the search space
|
||||
init_config={'x':1}, # a initial (partial) config with low cost
|
||||
low_cost_partial_config={'x':1}, # a initial (partial) config with low cost
|
||||
metric='metric', # the name of the metric used for optimization
|
||||
mode='min', # the optimization mode, 'min' or 'max'
|
||||
num_samples=-1, # the maximal number of configs to try, -1 means infinite
|
||||
@@ -40,8 +41,10 @@ print(analysis.best_trial.last_result) # the best trial's result
|
||||
print(analysis.best_config) # the best config
|
||||
```
|
||||
|
||||
Or, using ray tune's API:
|
||||
* Example for using ray tune's API:
|
||||
|
||||
```python
|
||||
# require: pip install flaml[blendsearch] ray[tune]
|
||||
from ray import tune as raytune
|
||||
from flaml import CFO, BlendSearch
|
||||
import time
|
||||
@@ -68,15 +71,20 @@ analysis = raytune.run(
|
||||
num_samples=-1, # the maximal number of configs to try, -1 means infinite
|
||||
time_budget_s=60, # the time budget in seconds
|
||||
local_dir='logs/', # the local directory to store logs
|
||||
search_alg=CFO(points_to_evaluate=[{'x':1}]) # or BlendSearch
|
||||
# other algo example: raytune.create_searcher('optuna'),
|
||||
search_alg=CFO(low_cost_partial_config=[{'x':1}]) # or BlendSearch
|
||||
)
|
||||
|
||||
print(analysis.best_trial.last_result) # the best trial's result
|
||||
print(analysis.best_config) # the best config
|
||||
```
|
||||
|
||||
For more examples, please check out
|
||||
* Example for using NNI: An example of using BlendSearch with NNI can be seen in [test](https://github.com/microsoft/FLAML/tree/main/test/nni). CFO can be used as well in a similar manner. To run the example, first make sure you have [NNI](https://nni.readthedocs.io/en/stable/) installed, then run:
|
||||
|
||||
```shell
|
||||
$nnictl create --config ./config.yml
|
||||
```
|
||||
|
||||
* For more examples, please check out
|
||||
[notebooks](https://github.com/microsoft/FLAML/tree/main/notebook/).
|
||||
|
||||
|
||||
@@ -86,7 +94,7 @@ For more examples, please check out
|
||||
## CFO: Frugal Optimization for Cost-related Hyperparameters
|
||||
|
||||
<p align="center">
|
||||
<img src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/CFO.png" width=200>
|
||||
<img src="https://github.com/microsoft/FLAML/blob/main/docs/images/CFO.png" width=200>
|
||||
<br>
|
||||
</p>
|
||||
|
||||
@@ -105,7 +113,7 @@ FLOW<sup>2</sup> only requires pairwise comparisons between function values to p
|
||||
The GIFs attached below demostrates an example search trajectory of FLOW<sup>2</sup> shown in the loss and evaluation cost (i.e., the training time ) space respectively. From the demonstration, we can see that (1) FLOW<sup>2</sup> can quickly move toward the low-loss region, showing good convergence property and (2) FLOW<sup>2</sup> tends to avoid exploring the high-cost region until necessary.
|
||||
|
||||
<p align="center">
|
||||
<img align="center", src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/heatmap_loss_cfo_12s.gif" width=360> <img align="center", src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/heatmap_cost_cfo_12s.gif" width=360>
|
||||
<img align="center", src="https://github.com/microsoft/FLAML/blob/main/docs/images/heatmap_loss_cfo_12s.gif" width=360> <img align="center", src="https://github.com/microsoft/FLAML/blob/main/docs/images/heatmap_cost_cfo_12s.gif" width=360>
|
||||
<br>
|
||||
<figcaption>Figure 1. FLOW<sup>2</sup> in tuning the # of leaves and the # of trees for XGBoost. The two background heatmaps show the loss and cost distribution of all configurations. The black dots are the points evaluated in FLOW<sup>2</sup>. Black dots connected by lines are points that yield better loss performance when evaluated.</figcaption>
|
||||
</p>
|
||||
@@ -116,7 +124,7 @@ Example:
|
||||
```python
|
||||
from flaml import CFO
|
||||
tune.run(...
|
||||
search_alg = CFO(points_to_evaluate=[init_config]),
|
||||
search_alg = CFO(low_cost_partial_config=low_cost_partial_config),
|
||||
)
|
||||
```
|
||||
|
||||
@@ -128,7 +136,7 @@ using BlendSearch.
|
||||
## BlendSearch: Economical Hyperparameter Optimization With Blended Search Strategy
|
||||
|
||||
<p align="center">
|
||||
<img src="https://github.com/microsoft/FLAML/raw/v0.2.2/docs/images/BlendSearch.png" width=200>
|
||||
<img src="https://github.com/microsoft/FLAML/blob/main/docs/images/BlendSearch.png" width=200>
|
||||
<br>
|
||||
</p>
|
||||
|
||||
@@ -146,16 +154,21 @@ based on optimism in face of uncertainty.
|
||||
Example:
|
||||
|
||||
```python
|
||||
# require: pip install flaml[blendsearch]
|
||||
from flaml import BlendSearch
|
||||
tune.run(...
|
||||
search_alg = BlendSearch(points_to_evaluate=[init_config]),
|
||||
search_alg = BlendSearch(low_cost_partial_config=low_cost_partial_config),
|
||||
)
|
||||
```
|
||||
|
||||
Recommended scenario: cost-related hyperparameters exist, a low-cost
|
||||
- Recommended scenario: cost-related hyperparameters exist, a low-cost
|
||||
initial point is known, and the search space is complex such that local search
|
||||
is prone to be stuck at local optima.
|
||||
|
||||
|
||||
- Suggestion about using larger search space in BlendSearch:
|
||||
In hyperparameter optimization, a larger search space is desirable because it is more likely to include the optimal configuration (or one of the optimal configurations) in hindsight. However the performance (especially anytime performance) of most existing HPO methods is undesirable if the cost of the configurations in the search space has a large variation. Thus hand-crafted small search spaces (with relatively homogeneous cost) are often used in practice for these methods, which is subject to idiosyncrasy. BlendSearch combines the benefits of local search and global search, which enables a smart (economical) way of deciding where to explore in the search space even though it is larger than necessary. This allows users to specify a larger search space in BlendSearch, which is often easier and a better practice than narrowing down the search space by hand.
|
||||
|
||||
For more technical details, please check our papers.
|
||||
|
||||
* [Frugal Optimization for Cost-related Hyperparameters](https://arxiv.org/abs/2005.01571). Qingyun Wu, Chi Wang, Silu Huang. AAAI 2021.
|
||||
@@ -169,7 +182,7 @@ For more technical details, please check our papers.
|
||||
}
|
||||
```
|
||||
|
||||
* Economical Hyperparameter Optimization With Blended Search Strategy. Chi Wang, Qingyun Wu, Silu Huang, Amin Saied. To appear in ICLR 2021.
|
||||
* [Economical Hyperparameter Optimization With Blended Search Strategy](https://www.microsoft.com/en-us/research/publication/economical-hyperparameter-optimization-with-blended-search-strategy/). Chi Wang, Qingyun Wu, Silu Huang, Amin Saied. ICLR 2021.
|
||||
|
||||
```
|
||||
@inproceedings{wang2021blendsearch,
|
||||
@@ -178,4 +191,4 @@ For more technical details, please check our papers.
|
||||
year={2021},
|
||||
booktitle={ICLR'21},
|
||||
}
|
||||
```
|
||||
```
|
||||
@@ -1,7 +1,7 @@
|
||||
try:
|
||||
from ray.tune import (uniform, quniform, choice, randint, qrandint, randn,
|
||||
qrandn, loguniform, qloguniform)
|
||||
except:
|
||||
qrandn, loguniform, qloguniform)
|
||||
except ImportError:
|
||||
from .sample import (uniform, quniform, choice, randint, qrandint, randn,
|
||||
qrandn, loguniform, qloguniform)
|
||||
from .tune import run, report
|
||||
qrandn, loguniform, qloguniform)
|
||||
from .tune import run, report
|
||||
|
||||
@@ -532,4 +532,4 @@ def qrandn(mean: float, sd: float, q: float):
|
||||
q (float): Quantization number. The result will be rounded to an
|
||||
integer increment of this value.
|
||||
"""
|
||||
return Float(None, None).normal(mean, sd).quantized(q)
|
||||
return Float(None, None).normal(mean, sd).quantized(q)
|
||||
|
||||
@@ -121,8 +121,7 @@ class Trial:
|
||||
self.metric_analysis[metric]["min"] = min(
|
||||
value, self.metric_analysis[metric]["min"])
|
||||
self.metric_analysis[metric]["avg"] = 1 / step * (
|
||||
value +
|
||||
(step - 1) * self.metric_analysis[metric]["avg"])
|
||||
value + (step - 1) * self.metric_analysis[metric]["avg"])
|
||||
self.metric_analysis[metric]["last"] = value
|
||||
|
||||
for n in self.n_steps:
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
from typing import Optional
|
||||
try:
|
||||
from ray.tune.trial import Trial
|
||||
except:
|
||||
except ImportError:
|
||||
from .trial import Trial
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -16,14 +16,15 @@ class Nologger():
|
||||
'''Logger without logging
|
||||
'''
|
||||
|
||||
def on_result(self, result): pass
|
||||
def on_result(self, result):
|
||||
pass
|
||||
|
||||
|
||||
class SimpleTrial(Trial):
|
||||
'''A simple trial class
|
||||
'''
|
||||
|
||||
def __init__(self, config, trial_id = None):
|
||||
def __init__(self, config, trial_id=None):
|
||||
self.trial_id = Trial.generate_id() if trial_id is None else trial_id
|
||||
self.config = config or {}
|
||||
self.status = Trial.PENDING
|
||||
@@ -46,17 +47,16 @@ class BaseTrialRunner:
|
||||
Note that the caller usually should not mutate trial state directly.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
search_alg = None,
|
||||
scheduler = None,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = 'min'):
|
||||
def __init__(self,
|
||||
search_alg=None, scheduler=None,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = 'min'):
|
||||
self._search_alg = search_alg
|
||||
self._scheduler_alg = scheduler
|
||||
self._scheduler_alg = scheduler
|
||||
self._trials = []
|
||||
self._metric = metric
|
||||
self._mode = mode
|
||||
|
||||
|
||||
def get_trials(self):
|
||||
"""Returns the list of trials managed by this TrialRunner.
|
||||
|
||||
@@ -81,22 +81,22 @@ class BaseTrialRunner:
|
||||
self._search_alg.on_trial_result(trial.trial_id, result)
|
||||
if self._scheduler_alg:
|
||||
decision = self._scheduler_alg.on_trial_result(self, trial, result)
|
||||
if decision == "STOP": trial.set_status(Trial.TERMINATED)
|
||||
elif decision == "PAUSE": trial.set_status(Trial.PAUSED)
|
||||
if decision == "STOP":
|
||||
trial.set_status(Trial.TERMINATED)
|
||||
elif decision == "PAUSE":
|
||||
trial.set_status(Trial.PAUSED)
|
||||
|
||||
def stop_trial(self, trial):
|
||||
"""Stops trial.
|
||||
"""
|
||||
if not trial.status in [Trial.ERROR, Trial.TERMINATED]:
|
||||
if trial.status not in [Trial.ERROR, Trial.TERMINATED]:
|
||||
if self._scheduler_alg:
|
||||
self._scheduler_alg.on_trial_complete(self,
|
||||
trial.trial_id, trial.last_result)
|
||||
self._search_alg.on_trial_complete(
|
||||
trial.trial_id, trial.last_result)
|
||||
self._scheduler_alg.on_trial_complete(
|
||||
self, trial.trial_id, trial.last_result)
|
||||
self._search_alg.on_trial_complete(trial.trial_id, trial.last_result)
|
||||
trial.set_status(Trial.TERMINATED)
|
||||
else:
|
||||
if self._scheduler_alg:
|
||||
self._scheduler_alg.on_trial_remove(self, trial)
|
||||
elif self._scheduler_alg:
|
||||
self._scheduler_alg.on_trial_remove(self, trial)
|
||||
|
||||
|
||||
class SequentialTrialRunner(BaseTrialRunner):
|
||||
@@ -112,10 +112,11 @@ class SequentialTrialRunner(BaseTrialRunner):
|
||||
"""
|
||||
trial_id = Trial.generate_id()
|
||||
config = self._search_alg.suggest(trial_id)
|
||||
if config:
|
||||
if config:
|
||||
trial = SimpleTrial(config, trial_id)
|
||||
self.add_trial(trial)
|
||||
trial.set_status(Trial.RUNNING)
|
||||
else: trial = None
|
||||
else:
|
||||
trial = None
|
||||
self.running_trial = trial
|
||||
return trial
|
||||
|
||||
@@ -3,13 +3,13 @@
|
||||
* Licensed under the MIT License. See LICENSE file in the
|
||||
* project root for license information.
|
||||
'''
|
||||
from typing import Optional, Union
|
||||
import datetime, time
|
||||
from typing import Optional, Union, List, Callable, Tuple
|
||||
import datetime
|
||||
import time
|
||||
try:
|
||||
from ray.tune.analysis import ExperimentAnalysis as EA
|
||||
except:
|
||||
except ImportError:
|
||||
from .analysis import ExperimentAnalysis as EA
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -17,6 +17,8 @@ logger = logging.getLogger(__name__)
|
||||
_use_ray = True
|
||||
_runner = None
|
||||
_verbose = 0
|
||||
_running_trial = None
|
||||
_training_iteration = 0
|
||||
|
||||
|
||||
class ExperimentAnalysis(EA):
|
||||
@@ -26,7 +28,7 @@ class ExperimentAnalysis(EA):
|
||||
def __init__(self, trials, metric, mode):
|
||||
try:
|
||||
super().__init__(self, None, trials, metric, mode)
|
||||
except:
|
||||
except (TypeError, ValueError):
|
||||
self.trials = trials
|
||||
self.default_metric = metric
|
||||
self.default_mode = mode
|
||||
@@ -51,14 +53,13 @@ def report(_metric=None, **kwargs):
|
||||
|
||||
analysis = tune.run(
|
||||
compute_with_config,
|
||||
init_config={},
|
||||
config={
|
||||
'x': tune.qloguniform(lower=1, upper=1000000, q=1),
|
||||
'y': tune.randint(lower=1, upper=1000000)
|
||||
},
|
||||
metric='metric2minimize', mode='min',
|
||||
num_samples=1000000, time_budget_s=60, use_ray=False)
|
||||
|
||||
|
||||
print(analysis.trials[-1].last_result)
|
||||
|
||||
Args:
|
||||
@@ -68,6 +69,8 @@ def report(_metric=None, **kwargs):
|
||||
'''
|
||||
global _use_ray
|
||||
global _verbose
|
||||
global _running_trial
|
||||
global _training_iteration
|
||||
if _use_ray:
|
||||
from ray import tune
|
||||
return tune.report(_metric, **kwargs)
|
||||
@@ -75,23 +78,32 @@ def report(_metric=None, **kwargs):
|
||||
result = kwargs
|
||||
if _verbose == 2:
|
||||
logger.info(f"result: {kwargs}")
|
||||
if _metric: result['_default_anonymous_metric'] = _metric
|
||||
if _metric:
|
||||
result['_default_anonymous_metric'] = _metric
|
||||
trial = _runner.running_trial
|
||||
if _running_trial == trial:
|
||||
_training_iteration += 1
|
||||
else:
|
||||
_training_iteration = 0
|
||||
_running_trial = trial
|
||||
result["training_iteration"] = _training_iteration
|
||||
result['config'] = trial.config
|
||||
for key, value in trial.config.items():
|
||||
result['config/'+key] = value
|
||||
result['config/' + key] = value
|
||||
_runner.process_trial_result(_runner.running_trial, result)
|
||||
result['time_total_s'] = trial.last_update_time - trial.start_time
|
||||
if _verbose > 2:
|
||||
logger.info(f"result: {result}")
|
||||
if _runner.running_trial.is_finished():
|
||||
return None
|
||||
else: return True
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def run(training_function,
|
||||
init_config: dict,
|
||||
config: Optional[dict] = None,
|
||||
points_to_evaluate: Optional[List[dict]] = None,
|
||||
low_cost_partial_config: Optional[dict] = None,
|
||||
cat_hp_cost: Optional[dict] = None,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = None,
|
||||
@@ -101,14 +113,16 @@ def run(training_function,
|
||||
max_resource: Optional[float] = None,
|
||||
reduction_factor: Optional[float] = None,
|
||||
report_intermediate_result: Optional[bool] = False,
|
||||
search_alg = None,
|
||||
verbose: Optional[int] = 2,
|
||||
search_alg=None,
|
||||
verbose: Optional[int] = 2,
|
||||
local_dir: Optional[str] = None,
|
||||
num_samples: Optional[int] = 1,
|
||||
resources_per_trial: Optional[dict] = None,
|
||||
mem_size = None,
|
||||
use_ray: Optional[bool] = False,
|
||||
):
|
||||
config_constraints: Optional[
|
||||
List[Tuple[Callable[[dict], float], str, float]]] = None,
|
||||
metric_constraints: Optional[
|
||||
List[Tuple[str, str, float]]] = None,
|
||||
use_ray: Optional[bool] = False):
|
||||
'''The trigger for HPO.
|
||||
|
||||
Example:
|
||||
@@ -126,58 +140,59 @@ def run(training_function,
|
||||
|
||||
analysis = tune.run(
|
||||
compute_with_config,
|
||||
init_config={},
|
||||
config={
|
||||
'x': tune.qloguniform(lower=1, upper=1000000, q=1),
|
||||
'y': tune.randint(lower=1, upper=1000000)
|
||||
},
|
||||
metric='metric2minimize', mode='min',
|
||||
num_samples=-1, time_budget_s=60, use_ray=False)
|
||||
|
||||
|
||||
print(analysis.trials[-1].last_result)
|
||||
|
||||
Args:
|
||||
training_function: A user-defined training function.
|
||||
init_config: A dictionary from a subset of controlled dimensions
|
||||
to the initial low-cost values. e.g.,
|
||||
training_function: A user-defined training function.
|
||||
config: A dictionary to specify the search space.
|
||||
points_to_evaluate: A list of initial hyperparameter
|
||||
configurations to run first.
|
||||
low_cost_partial_config: A dictionary from a subset of
|
||||
controlled dimensions to the initial low-cost values.
|
||||
e.g.,
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{'epochs': 1}
|
||||
{'n_estimators': 4, 'max_leaves': 4}
|
||||
|
||||
If no such dimension, pass an empty dict {}.
|
||||
config: A dictionary to specify the search space.
|
||||
cat_hp_cost: A dictionary from a subset of categorical dimensions
|
||||
to the relative cost of each choice.
|
||||
to the relative cost of each choice.
|
||||
e.g.,
|
||||
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{'tree_method': [1, 1, 2]}
|
||||
|
||||
i.e., the relative cost of the
|
||||
|
||||
i.e., the relative cost of the
|
||||
three choices of 'tree_method' is 1, 1 and 2 respectively
|
||||
metric: A string of the metric name to optimize for.
|
||||
mode: A string in ['min', 'max'] to specify the objective as
|
||||
minimization or maximization.
|
||||
time_budget_s: A float of the time budget in seconds.
|
||||
prune_attr: A string of the attribute used for pruning.
|
||||
prune_attr: A string of the attribute used for pruning.
|
||||
Not necessarily in space.
|
||||
When prune_attr is in space, it is a hyperparameter, e.g.,
|
||||
When prune_attr is in space, it is a hyperparameter, e.g.,
|
||||
'n_iters', and the best value is unknown.
|
||||
When prune_attr is not in space, it is a resource dimension,
|
||||
When prune_attr is not in space, it is a resource dimension,
|
||||
e.g., 'sample_size', and the peak performance is assumed
|
||||
to be at the max_resource.
|
||||
min_resource: A float of the minimal resource to use for the
|
||||
min_resource: A float of the minimal resource to use for the
|
||||
prune_attr; only valid if prune_attr is not in space.
|
||||
max_resource: A float of the maximal resource to use for the
|
||||
max_resource: A float of the maximal resource to use for the
|
||||
prune_attr; only valid if prune_attr is not in space.
|
||||
reduction_factor: A float of the reduction factor used for incremental
|
||||
pruning.
|
||||
report_intermediate_result: A boolean of whether intermediate results
|
||||
are reported. If so, early stopping and pruning can be used.
|
||||
search_alg: An instance of BlendSearch as the search algorithm
|
||||
to be used. The same instance can be used for iterative tuning.
|
||||
to be used. The same instance can be used for iterative tuning.
|
||||
e.g.,
|
||||
|
||||
.. code-block:: python
|
||||
@@ -185,36 +200,53 @@ def run(training_function,
|
||||
from flaml import BlendSearch
|
||||
algo = BlendSearch(metric='val_loss', mode='min',
|
||||
space=search_space,
|
||||
points_to_evaluate=points_to_evaluate)
|
||||
low_cost_partial_config=low_cost_partial_config)
|
||||
for i in range(10):
|
||||
analysis = tune.run(compute_with_config, init_config=None,
|
||||
analysis = tune.run(compute_with_config,
|
||||
search_alg=algo, use_ray=False)
|
||||
print(analysis.trials[-1].last_result)
|
||||
|
||||
|
||||
verbose: 0, 1, 2, or 3. Verbosity mode for ray if ray backend is used.
|
||||
0 = silent, 1 = only status updates, 2 = status and brief trial
|
||||
results, 3 = status and detailed trial results. Defaults to 2.
|
||||
local_dir: A string of the local dir to save ray logs if ray backend is
|
||||
used.
|
||||
used; or a local dir to save the tuning log.
|
||||
num_samples: An integer of the number of configs to try. Defaults to 1.
|
||||
resources_per_trial: A dictionary of the hardware resources to allocate
|
||||
per trial, e.g., `{'mem': 1024**3}`. When not using ray backend,
|
||||
only 'mem' is used as approximate resource constraints
|
||||
(in conjunction with mem_size).
|
||||
mem_size: A function to estimate the memory size for a given config.
|
||||
per trial, e.g., `{'cpu': 1}`. Only valid when using ray backend.
|
||||
config_constraints: A list of config constraints to be satisfied.
|
||||
e.g.,
|
||||
|
||||
.. code-block: python
|
||||
|
||||
config_constraints = [(mem_size, '<=', 1024**3)]
|
||||
|
||||
mem_size is a function which produces a float number for the bytes
|
||||
needed for a config.
|
||||
It is used to skip configs which do not fit in memory.
|
||||
metric_constraints: A list of metric constraints to be satisfied.
|
||||
e.g., `['precision', '>=', 0.9]`
|
||||
use_ray: A boolean of whether to use ray as the backend
|
||||
'''
|
||||
'''
|
||||
global _use_ray
|
||||
global _verbose
|
||||
if not use_ray:
|
||||
_verbose = verbose
|
||||
if verbose > 0:
|
||||
import os
|
||||
os.makedirs(local_dir, exist_ok=True)
|
||||
logger.addHandler(logging.FileHandler(local_dir+'/tune_'+str(
|
||||
datetime.datetime.now())+'.log'))
|
||||
if verbose<=2:
|
||||
if local_dir:
|
||||
os.makedirs(local_dir, exist_ok=True)
|
||||
logger.addHandler(logging.FileHandler(local_dir + '/tune_' + str(
|
||||
datetime.datetime.now()).replace(':', '-') + '.log'))
|
||||
elif not logger.handlers:
|
||||
# Add the console handler.
|
||||
_ch = logging.StreamHandler()
|
||||
logger_formatter = logging.Formatter(
|
||||
'[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s',
|
||||
'%m-%d %H:%M:%S')
|
||||
_ch.setFormatter(logger_formatter)
|
||||
logger.addHandler(_ch)
|
||||
if verbose <= 2:
|
||||
logger.setLevel(logging.INFO)
|
||||
else:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
@@ -223,54 +255,54 @@ def run(training_function,
|
||||
|
||||
if search_alg is None:
|
||||
from ..searcher.blendsearch import BlendSearch
|
||||
search_alg = BlendSearch(points_to_evaluate=[init_config],
|
||||
metric=metric, mode=mode,
|
||||
cat_hp_cost=cat_hp_cost,
|
||||
space=config, prune_attr=prune_attr,
|
||||
min_resource=min_resource,
|
||||
max_resource=max_resource,
|
||||
reduction_factor=reduction_factor,
|
||||
resources_per_trial=resources_per_trial,
|
||||
mem_size=mem_size)
|
||||
search_alg = BlendSearch(
|
||||
metric=metric, mode=mode, space=config,
|
||||
points_to_evaluate=points_to_evaluate,
|
||||
low_cost_partial_config=low_cost_partial_config,
|
||||
cat_hp_cost=cat_hp_cost,
|
||||
prune_attr=prune_attr,
|
||||
min_resource=min_resource, max_resource=max_resource,
|
||||
reduction_factor=reduction_factor,
|
||||
config_constraints=config_constraints,
|
||||
metric_constraints=metric_constraints)
|
||||
if time_budget_s:
|
||||
search_alg.set_search_properties(metric, mode, config={
|
||||
'time_budget_s':time_budget_s})
|
||||
'time_budget_s': time_budget_s})
|
||||
scheduler = None
|
||||
if report_intermediate_result:
|
||||
params = {}
|
||||
# scheduler resource_dimension=prune_attr
|
||||
if prune_attr: params['time_attr'] = prune_attr
|
||||
if max_resource: params['max_t'] = max_resource
|
||||
if min_resource: params['grace_period'] = min_resource
|
||||
if reduction_factor: params['reduction_factor'] = reduction_factor
|
||||
if prune_attr:
|
||||
params['time_attr'] = prune_attr
|
||||
if max_resource:
|
||||
params['max_t'] = max_resource
|
||||
if min_resource:
|
||||
params['grace_period'] = min_resource
|
||||
if reduction_factor:
|
||||
params['reduction_factor'] = reduction_factor
|
||||
try:
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
scheduler = ASHAScheduler(**params)
|
||||
except:
|
||||
scheduler = None
|
||||
else:
|
||||
scheduler = None
|
||||
|
||||
except ImportError:
|
||||
pass
|
||||
if use_ray:
|
||||
try:
|
||||
from ray import tune
|
||||
except:
|
||||
except ImportError:
|
||||
raise ImportError("Failed to import ray tune. "
|
||||
"Please install ray[tune] or set use_ray=False")
|
||||
"Please install ray[tune] or set use_ray=False")
|
||||
_use_ray = True
|
||||
return tune.run(training_function,
|
||||
metric=metric,
|
||||
mode=mode,
|
||||
search_alg=search_alg,
|
||||
scheduler=scheduler,
|
||||
time_budget_s=time_budget_s,
|
||||
verbose=verbose,
|
||||
local_dir=local_dir,
|
||||
num_samples=num_samples,
|
||||
resources_per_trial=resources_per_trial
|
||||
)
|
||||
metric=metric, mode=mode,
|
||||
search_alg=search_alg,
|
||||
scheduler=scheduler,
|
||||
time_budget_s=time_budget_s,
|
||||
verbose=verbose, local_dir=local_dir,
|
||||
num_samples=num_samples,
|
||||
resources_per_trial=resources_per_trial)
|
||||
|
||||
# simple sequential run without using tune.run() from ray
|
||||
time_start = time.time()
|
||||
time_start = time.time()
|
||||
_use_ray = False
|
||||
if scheduler:
|
||||
scheduler.set_search_properties(metric=metric, mode=mode)
|
||||
@@ -281,10 +313,10 @@ def run(training_function,
|
||||
scheduler=scheduler,
|
||||
metric=metric,
|
||||
mode=mode,
|
||||
)
|
||||
)
|
||||
num_trials = 0
|
||||
while time.time()-time_start<time_budget_s and (
|
||||
num_samples<0 or num_trials<num_samples):
|
||||
while time.time() - time_start < time_budget_s and (
|
||||
num_samples < 0 or num_trials < num_samples):
|
||||
trial_to_run = _runner.step()
|
||||
if trial_to_run:
|
||||
num_trials += 1
|
||||
@@ -292,4 +324,4 @@ def run(training_function,
|
||||
logger.info(f'trial {num_trials} config: {trial_to_run.config}')
|
||||
training_function(trial_to_run.config)
|
||||
_runner.stop_trial(trial_to_run)
|
||||
return ExperimentAnalysis(_runner.get_trials(), metric=metric, mode=mode)
|
||||
return ExperimentAnalysis(_runner.get_trials(), metric=metric, mode=mode)
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "0.2.3"
|
||||
__version__ = "0.4.0"
|
||||
|
||||
@@ -1,788 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebook uses the Huggingface transformers library to finetune a transformer model.\n",
|
||||
"\n",
|
||||
"**Requirements.** This notebook has additional requirements:\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install -r transformers_requirements.txt\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tokenizer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from transformers import AutoTokenizer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"MODEL_CHECKPOINT = \"distilbert-base-uncased\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input_ids': [101, 2023, 2003, 1037, 3231, 102], 'attention_mask': [1, 1, 1, 1, 1, 1]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tokenizer(\"this is a test\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"TASK = \"cola\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import datasets"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Reusing dataset glue (/home/amin/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
|
||||
"/home/amin/miniconda/lib/python3.7/site-packages/torch/cuda/__init__.py:52: UserWarning: CUDA initialization: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx (Triggered internally at /pytorch/c10/cuda/CUDAFunctions.cpp:100.)\n",
|
||||
" return torch._C._cuda_getDeviceCount() > 0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"raw_dataset = datasets.load_dataset(\"glue\", TASK)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# define tokenization function used to process data\n",
|
||||
"COLUMN_NAME = \"sentence\"\n",
|
||||
"def tokenize(examples):\n",
|
||||
" return tokenizer(examples[COLUMN_NAME], truncation=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "5bd7b23a478043eaaf6e14e119143fcd",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "d7b648c2dbdc4fb9907e43da7db8af9a",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "36a9d6e62dbe462d94b1769f36fbd0f3",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"encoded_dataset = raw_dataset.map(tokenize, batched=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
|
||||
" 'idx': 0,\n",
|
||||
" 'input_ids': [101,\n",
|
||||
" 2256,\n",
|
||||
" 2814,\n",
|
||||
" 2180,\n",
|
||||
" 1005,\n",
|
||||
" 1056,\n",
|
||||
" 4965,\n",
|
||||
" 2023,\n",
|
||||
" 4106,\n",
|
||||
" 1010,\n",
|
||||
" 2292,\n",
|
||||
" 2894,\n",
|
||||
" 1996,\n",
|
||||
" 2279,\n",
|
||||
" 2028,\n",
|
||||
" 2057,\n",
|
||||
" 16599,\n",
|
||||
" 1012,\n",
|
||||
" 102],\n",
|
||||
" 'label': 1,\n",
|
||||
" 'sentence': \"Our friends won't buy this analysis, let alone the next one we propose.\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"encoded_dataset[\"train\"][0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from transformers import AutoModelForSequenceClassification"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "35b76e51b5c8406fae416fcdc3dd885e",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=267967963.0, style=ProgressStyle(descri…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
|
||||
"- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n",
|
||||
"- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
|
||||
"Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
|
||||
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"NUM_LABELS = 2\n",
|
||||
"model = AutoModelForSequenceClassification.from_pretrained(MODEL_CHECKPOINT, num_labels=NUM_LABELS)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"DistilBertForSequenceClassification(\n",
|
||||
" (distilbert): DistilBertModel(\n",
|
||||
" (embeddings): Embeddings(\n",
|
||||
" (word_embeddings): Embedding(30522, 768, padding_idx=0)\n",
|
||||
" (position_embeddings): Embedding(512, 768)\n",
|
||||
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||||
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||||
" )\n",
|
||||
" (transformer): Transformer(\n",
|
||||
" (layer): ModuleList(\n",
|
||||
" (0): TransformerBlock(\n",
|
||||
" (attention): MultiHeadSelfAttention(\n",
|
||||
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||||
" (q_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (k_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (v_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (out_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" )\n",
|
||||
" (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||||
" (ffn): FFN(\n",
|
||||
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||||
" (lin1): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||||
" (lin2): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||||
" )\n",
|
||||
" (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||||
" )\n",
|
||||
" (1): TransformerBlock(\n",
|
||||
" (attention): MultiHeadSelfAttention(\n",
|
||||
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||||
" (q_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (k_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (v_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (out_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" )\n",
|
||||
" (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||||
" (ffn): FFN(\n",
|
||||
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||||
" (lin1): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||||
" (lin2): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||||
" )\n",
|
||||
" (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||||
" )\n",
|
||||
" (2): TransformerBlock(\n",
|
||||
" (attention): MultiHeadSelfAttention(\n",
|
||||
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||||
" (q_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (k_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (v_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (out_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" )\n",
|
||||
" (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||||
" (ffn): FFN(\n",
|
||||
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||||
" (lin1): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||||
" (lin2): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||||
" )\n",
|
||||
" (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||||
" )\n",
|
||||
" (3): TransformerBlock(\n",
|
||||
" (attention): MultiHeadSelfAttention(\n",
|
||||
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||||
" (q_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (k_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (v_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (out_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" )\n",
|
||||
" (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||||
" (ffn): FFN(\n",
|
||||
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||||
" (lin1): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||||
" (lin2): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||||
" )\n",
|
||||
" (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||||
" )\n",
|
||||
" (4): TransformerBlock(\n",
|
||||
" (attention): MultiHeadSelfAttention(\n",
|
||||
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||||
" (q_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (k_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (v_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (out_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" )\n",
|
||||
" (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||||
" (ffn): FFN(\n",
|
||||
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||||
" (lin1): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||||
" (lin2): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||||
" )\n",
|
||||
" (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||||
" )\n",
|
||||
" (5): TransformerBlock(\n",
|
||||
" (attention): MultiHeadSelfAttention(\n",
|
||||
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||||
" (q_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (k_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (v_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (out_lin): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" )\n",
|
||||
" (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||||
" (ffn): FFN(\n",
|
||||
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||||
" (lin1): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||||
" (lin2): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||||
" )\n",
|
||||
" (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" (pre_classifier): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||
" (classifier): Linear(in_features=768, out_features=2, bias=True)\n",
|
||||
" (dropout): Dropout(p=0.2, inplace=False)\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Metric"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metric = datasets.load_metric(\"glue\", TASK)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Metric(name: \"glue\", features: {'predictions': Value(dtype='int64', id=None), 'references': Value(dtype='int64', id=None)}, usage: \"\"\"\n",
|
||||
"Compute GLUE evaluation metric associated to each GLUE dataset.\n",
|
||||
"Args:\n",
|
||||
" predictions: list of translations to score.\n",
|
||||
" Each translation should be tokenized into a list of tokens.\n",
|
||||
" references: list of lists of references for each translation.\n",
|
||||
" Each reference should be tokenized into a list of tokens.\n",
|
||||
"Returns: depending on the GLUE subset, one or several of:\n",
|
||||
" \"accuracy\": Accuracy\n",
|
||||
" \"f1\": F1\n",
|
||||
" \"pearson\": Pearson Correlation\n",
|
||||
" \"spearmanr\": Spearman Correlation\n",
|
||||
" \"matthews_correlation\": Matthew Correlation\n",
|
||||
"\"\"\", stored examples: 0)"
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"metric"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def compute_metrics(eval_pred):\n",
|
||||
" predictions, labels = eval_pred\n",
|
||||
" predictions = np.argmax(predictions, axis=1)\n",
|
||||
" return metric.compute(predictions=predictions, references=labels)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Training (aka Finetuning)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from transformers import Trainer\n",
|
||||
"from transformers import TrainingArguments"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"args = TrainingArguments(\n",
|
||||
" output_dir='output',\n",
|
||||
" do_eval=True,\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"trainer = Trainer(\n",
|
||||
" model=model,\n",
|
||||
" args=args,\n",
|
||||
" train_dataset=encoded_dataset[\"train\"],\n",
|
||||
" eval_dataset=encoded_dataset[\"validation\"],\n",
|
||||
" tokenizer=tokenizer,\n",
|
||||
" compute_metrics=compute_metrics,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"\n",
|
||||
" <div>\n",
|
||||
" <style>\n",
|
||||
" /* Turns off some styling */\n",
|
||||
" progress {\n",
|
||||
" /* gets rid of default border in Firefox and Opera. */\n",
|
||||
" border: none;\n",
|
||||
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
|
||||
" background-size: auto;\n",
|
||||
" }\n",
|
||||
" </style>\n",
|
||||
" \n",
|
||||
" <progress value='322' max='3207' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
||||
" [ 322/3207 02:51 < 25:41, 1.87 it/s, Epoch 0.30/3]\n",
|
||||
" </div>\n",
|
||||
" <table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: left;\">\n",
|
||||
" <th>Step</th>\n",
|
||||
" <th>Training Loss</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" </tbody>\n",
|
||||
"</table><p>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"trainer.train()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"source": [
|
||||
"## Hyperparameter Optimization\n",
|
||||
"\n",
|
||||
"`flaml.tune` is a module for economical hyperparameter tuning. It frees users from manually tuning many hyperparameters for a software, such as machine learning training procedures. \n",
|
||||
"The API is compatible with ray tune.\n",
|
||||
"\n",
|
||||
"### Step 1. Define training method\n",
|
||||
"\n",
|
||||
"We define a function `train_distilbert(config: dict)` that accepts a hyperparameter configuration dict `config`. The specific configs will be generated by flaml's search algorithm in a given search space.\n"
|
||||
],
|
||||
"cell_type": "markdown",
|
||||
"metadata": {}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import flaml\n",
|
||||
"\n",
|
||||
"def train_distilbert(config: dict):\n",
|
||||
"\n",
|
||||
" # Define tokenize method\n",
|
||||
" tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)\n",
|
||||
" def tokenize(examples):\n",
|
||||
" return tokenizer(examples[COLUMN_NAME], truncation=True)\n",
|
||||
" # Load CoLA dataset and apply tokenizer\n",
|
||||
" cola_raw = load_dataset(\"glue\", TASK)\n",
|
||||
" cola_encoded = cola_raw.map(tokenize, batched=True)\n",
|
||||
" # QUESTION: Write processed data to disk?\n",
|
||||
" train_dataset, eval_dataset = cola_encoded[\"train\"], cola_encoded[\"validation\"]\n",
|
||||
"\n",
|
||||
" model = AutoModelForSequenceClassification.from_pretrained(\n",
|
||||
" MODEL_CHECKPOINT, num_labels=NUM_LABELS\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" metric = load_metric(\"glue\", TASK)\n",
|
||||
"\n",
|
||||
" training_args = TrainingArguments(\n",
|
||||
" output_dir='.',\n",
|
||||
" do_eval=False,\n",
|
||||
" **config,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" trainer = Trainer(\n",
|
||||
" model,\n",
|
||||
" training_args,\n",
|
||||
" train_dataset=train_dataset,\n",
|
||||
" eval_dataset=eval_dataset,\n",
|
||||
" tokenizer=tokenizer,\n",
|
||||
" compute_metrics=compute_metrics,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # train model\n",
|
||||
" trainer.train()\n",
|
||||
"\n",
|
||||
" # evaluate model\n",
|
||||
" eval_output = trainer.evaluate()\n",
|
||||
"\n",
|
||||
" # report the metric to optimize\n",
|
||||
" flaml.tune.report(\n",
|
||||
" loss=eval_output[\"eval_loss\"],\n",
|
||||
" matthews_correlation=eval_output[\"eval_matthews_correlation\"],\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"source": [
|
||||
"### Step 2. Define the search\n",
|
||||
"\n",
|
||||
"We are now ready to define our search. This includes:\n",
|
||||
"\n",
|
||||
"- The `search_space` for our hyperparameters\n",
|
||||
"- The metric and the mode ('max' or 'min') for optimization\n",
|
||||
"- The constraints (`n_cpus`, `n_gpus`, `num_samples`, and `time_budget_s`)"
|
||||
],
|
||||
"cell_type": "markdown",
|
||||
"metadata": {}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"max_num_epoch = 4\n",
|
||||
"search_space = {\n",
|
||||
" # You can mix constants with search space objects.\n",
|
||||
" \"num_train_epochs\": flaml.tune.loguniform(1, max_num_epoch),\n",
|
||||
" \"learning_rate\": flaml.tune.loguniform(1e-6, 1e-4),\n",
|
||||
" \"adam_epsilon\": flaml.tune.loguniform(1e-9, 1e-7),\n",
|
||||
" }"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# optimization objective\n",
|
||||
"HP_METRIC, MODE = \"matthews_correlation\", \"max\"\n",
|
||||
"\n",
|
||||
"# resources\n",
|
||||
"num_cpus = 2\n",
|
||||
"num_gpus = 2\n",
|
||||
"\n",
|
||||
"# constraints\n",
|
||||
"num_samples = -1 # number of trials, -1 means unlimited\n",
|
||||
"time_budget_s = 3600 # time budget in seconds"
|
||||
]
|
||||
},
|
||||
{
|
||||
"source": [
|
||||
"### Step 3. Launch with `flaml.tune.run`\n",
|
||||
"\n",
|
||||
"We are now ready to laungh the tuning using `flaml.tune.run`:"
|
||||
],
|
||||
"cell_type": "markdown",
|
||||
"metadata": {}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"import ray\n",
|
||||
"start_time = time.time()\n",
|
||||
"ray.init(num_cpus=num_cpus, num_gpus=num_gpus)\n",
|
||||
"\n",
|
||||
"print(\"Tuning started...\")\n",
|
||||
"analysis = flaml.tune.run(\n",
|
||||
" train_distilbert,\n",
|
||||
" config=search_space,\n",
|
||||
" init_config={\n",
|
||||
" \"num_train_epochs\": 1,\n",
|
||||
" },\n",
|
||||
" metric=HP_METRIC,\n",
|
||||
" mode=MODE,\n",
|
||||
" report_intermediate_result=False,\n",
|
||||
" # uncomment the following if report_intermediate_result = True\n",
|
||||
" # max_resource=max_num_epoch, min_resource=1,\n",
|
||||
" resources_per_trial={\"gpu\": 1},\n",
|
||||
" local_dir='logs/',\n",
|
||||
" num_samples=num_samples,\n",
|
||||
" time_budget_s=time_budget_s,\n",
|
||||
" use_ray=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"ray.shutdown()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_trial = analysis.get_best_trial(HP_METRIC, MODE, \"all\")\n",
|
||||
"metric = best_trial.metric_analysis[HP_METRIC][MODE]\n",
|
||||
"print(f\"n_trials={len(analysis.trials)}\")\n",
|
||||
"print(f\"time={time.time()-start_time}\")\n",
|
||||
"print(f\"Best model eval {HP_METRIC}: {metric:.4f}\")\n",
|
||||
"print(f\"Best model parameters: {best_trial.config}\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"source": [
|
||||
"## Next Steps\n",
|
||||
"\n",
|
||||
"Notice that we only reported the metric with `flaml.tune.report` at the end of full training loop. It is possible to enable reporting of intermediate performance - allowing early stopping - as follows:\n",
|
||||
"\n",
|
||||
"- Huggingface provides _Callbacks_ which can be used to insert the `flaml.tune.report` call inside the training loop\n",
|
||||
"- Make sure to set `do_eval=True` in the `TrainingArguments` provided to `Trainer` and adjust theevaluation frequency accordingly"
|
||||
],
|
||||
"cell_type": "markdown",
|
||||
"metadata": {}
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "flaml",
|
||||
"language": "python",
|
||||
"name": "flaml"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
1025
notebook/flaml_automl.ipynb
Normal file
1601
notebook/flaml_finetune_transformer.ipynb
Normal file
883
notebook/flaml_lightgbm.ipynb
Normal file
769
notebook/flaml_xgboost.ipynb
Normal file
@@ -1,4 +0,0 @@
|
||||
torch
|
||||
transformers
|
||||
datasets
|
||||
ipywidgets
|
||||
16
setup.py
@@ -20,7 +20,6 @@ install_requires = [
|
||||
"scipy>=1.4.1",
|
||||
"catboost>=0.23",
|
||||
"scikit-learn>=0.23.2",
|
||||
"optuna==2.3.0"
|
||||
],
|
||||
|
||||
|
||||
@@ -48,19 +47,26 @@ setuptools.setup(
|
||||
"coverage>=5.3",
|
||||
"xgboost<1.3",
|
||||
"rgf-python",
|
||||
"optuna==2.3.0",
|
||||
],
|
||||
"blendsearch": [
|
||||
"optuna==2.3.0"
|
||||
],
|
||||
"ray": [
|
||||
"ray[tune]==1.1.0",
|
||||
"pyyaml<5.3.1",
|
||||
"ray[tune]==1.2.0",
|
||||
"pyyaml<5.3.1",
|
||||
],
|
||||
"azureml": [
|
||||
"azureml-mlflow"
|
||||
"azureml-mlflow",
|
||||
],
|
||||
"nni": [
|
||||
"nni",
|
||||
],
|
||||
},
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Operating System :: OS Independent",
|
||||
],
|
||||
python_requires=">=3.6",
|
||||
)
|
||||
|
||||
251
test/hf/test_deberta.py
Normal file
@@ -0,0 +1,251 @@
|
||||
'''Require: pip install torch transformers datasets flaml[blendsearch,ray]
|
||||
'''
|
||||
import time
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
try:
|
||||
import ray
|
||||
from datasets import (
|
||||
load_dataset,
|
||||
load_metric,
|
||||
)
|
||||
from transformers import (
|
||||
AutoModelForSequenceClassification,
|
||||
AutoTokenizer,
|
||||
Trainer,
|
||||
TrainingArguments,
|
||||
)
|
||||
import flaml
|
||||
MODEL_CHECKPOINT = "microsoft/deberta-base"
|
||||
task_to_keys = {
|
||||
"cola": ("sentence", None),
|
||||
"mnli": ("premise", "hypothesis"),
|
||||
"mrpc": ("sentence1", "sentence2"),
|
||||
"qnli": ("question", "sentence"),
|
||||
"qqp": ("question1", "question2"),
|
||||
"rte": ("sentence1", "sentence2"),
|
||||
"sst2": ("sentence", None),
|
||||
"stsb": ("sentence1", "sentence2"),
|
||||
"wnli": ("sentence1", "sentence2"),
|
||||
}
|
||||
max_seq_length = 128
|
||||
overwrite_cache = False
|
||||
pad_to_max_length = True
|
||||
padding = "max_length"
|
||||
|
||||
TASK = "qnli"
|
||||
# HP_METRIC, MODE = "loss", "min"
|
||||
HP_METRIC, MODE = "accuracy", "max"
|
||||
|
||||
sentence1_key, sentence2_key = task_to_keys[TASK]
|
||||
# Define tokenize method
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)
|
||||
|
||||
def tokenize(examples):
|
||||
args = (
|
||||
(examples[sentence1_key],) if sentence2_key is None else (
|
||||
examples[sentence1_key], examples[sentence2_key])
|
||||
)
|
||||
return tokenizer(*args, padding=padding, max_length=max_seq_length,
|
||||
truncation=True)
|
||||
|
||||
except ImportError:
|
||||
print("pip install torch transformers datasets flaml[blendsearch,ray]")
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
os.makedirs('logs', exist_ok=True)
|
||||
logger.addHandler(logging.FileHandler('logs/tune_deberta.log'))
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
def train_deberta(config: dict):
|
||||
|
||||
# Load dataset and apply tokenizer
|
||||
data_raw = load_dataset("glue", TASK)
|
||||
data_encoded = data_raw.map(tokenize, batched=True)
|
||||
train_dataset, eval_dataset = data_encoded["train"], data_encoded["validation"]
|
||||
|
||||
NUM_LABELS = len(train_dataset.features["label"].names)
|
||||
|
||||
metric = load_metric("glue", TASK)
|
||||
|
||||
def compute_metrics(eval_pred):
|
||||
predictions, labels = eval_pred
|
||||
predictions = np.argmax(predictions, axis=1)
|
||||
return metric.compute(predictions=predictions, references=labels)
|
||||
|
||||
model = AutoModelForSequenceClassification.from_pretrained(
|
||||
MODEL_CHECKPOINT, num_labels=NUM_LABELS
|
||||
)
|
||||
|
||||
training_args = TrainingArguments(
|
||||
output_dir='.',
|
||||
do_eval=False,
|
||||
disable_tqdm=True,
|
||||
logging_steps=20000,
|
||||
save_total_limit=0,
|
||||
fp16=True,
|
||||
**config,
|
||||
)
|
||||
|
||||
trainer = Trainer(
|
||||
model,
|
||||
training_args,
|
||||
train_dataset=train_dataset,
|
||||
eval_dataset=eval_dataset,
|
||||
tokenizer=tokenizer,
|
||||
compute_metrics=compute_metrics,
|
||||
)
|
||||
|
||||
# train model
|
||||
trainer.train()
|
||||
|
||||
# evaluate model
|
||||
eval_output = trainer.evaluate()
|
||||
|
||||
flaml.tune.report(
|
||||
loss=eval_output["eval_loss"],
|
||||
accuracy=eval_output["eval_accuracy"],
|
||||
)
|
||||
|
||||
try:
|
||||
from azureml.core import Run
|
||||
run = Run.get_context()
|
||||
run.log('accuracy', eval_output["eval_accuracy"])
|
||||
run.log('loss', eval_output["eval_loss"])
|
||||
run.log('config', config)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
def _test_deberta(method='BlendSearch'):
|
||||
|
||||
max_num_epoch = 100
|
||||
num_samples = -1
|
||||
time_budget_s = 3600
|
||||
|
||||
search_space = {
|
||||
# You can mix constants with search space objects.
|
||||
"num_train_epochs": flaml.tune.loguniform(1, max_num_epoch),
|
||||
"learning_rate": flaml.tune.loguniform(3e-5, 1.5e-4),
|
||||
"weight_decay": flaml.tune.uniform(0, 0.3),
|
||||
"per_device_train_batch_size": flaml.tune.choice([16, 32, 64, 128]),
|
||||
"seed": flaml.tune.choice([12, 22, 33, 42]),
|
||||
}
|
||||
|
||||
start_time = time.time()
|
||||
ray.init(num_cpus=4, num_gpus=4)
|
||||
if 'ASHA' == method:
|
||||
algo = None
|
||||
elif 'BOHB' == method:
|
||||
from ray.tune.schedulers import HyperBandForBOHB
|
||||
from ray.tune.suggest.bohb import tuneBOHB
|
||||
algo = tuneBOHB(max_concurrent=4)
|
||||
scheduler = HyperBandForBOHB(max_t=max_num_epoch)
|
||||
elif 'Optuna' == method:
|
||||
from ray.tune.suggest.optuna import OptunaSearch
|
||||
algo = OptunaSearch()
|
||||
elif 'CFO' == method:
|
||||
from flaml import CFO
|
||||
algo = CFO(low_cost_partial_config={
|
||||
"num_train_epochs": 1,
|
||||
"per_device_train_batch_size": 128,
|
||||
})
|
||||
elif 'BlendSearch' == method:
|
||||
from flaml import BlendSearch
|
||||
algo = BlendSearch(low_cost_partial_config={
|
||||
"num_train_epochs": 1,
|
||||
"per_device_train_batch_size": 128,
|
||||
})
|
||||
elif 'Dragonfly' == method:
|
||||
from ray.tune.suggest.dragonfly import DragonflySearch
|
||||
algo = DragonflySearch()
|
||||
elif 'SkOpt' == method:
|
||||
from ray.tune.suggest.skopt import SkOptSearch
|
||||
algo = SkOptSearch()
|
||||
elif 'Nevergrad' == method:
|
||||
from ray.tune.suggest.nevergrad import NevergradSearch
|
||||
import nevergrad as ng
|
||||
algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
|
||||
elif 'ZOOpt' == method:
|
||||
from ray.tune.suggest.zoopt import ZOOptSearch
|
||||
algo = ZOOptSearch(budget=num_samples)
|
||||
elif 'Ax' == method:
|
||||
from ray.tune.suggest.ax import AxSearch
|
||||
algo = AxSearch(max_concurrent=3)
|
||||
elif 'HyperOpt' == method:
|
||||
from ray.tune.suggest.hyperopt import HyperOptSearch
|
||||
algo = HyperOptSearch()
|
||||
scheduler = None
|
||||
if method != 'BOHB':
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
scheduler = ASHAScheduler(
|
||||
max_t=max_num_epoch,
|
||||
grace_period=1)
|
||||
scheduler = None
|
||||
analysis = ray.tune.run(
|
||||
train_deberta,
|
||||
metric=HP_METRIC,
|
||||
mode=MODE,
|
||||
resources_per_trial={"gpu": 4, "cpu": 4},
|
||||
config=search_space, local_dir='logs/',
|
||||
num_samples=num_samples, time_budget_s=time_budget_s,
|
||||
keep_checkpoints_num=1, checkpoint_score_attr=HP_METRIC,
|
||||
scheduler=scheduler, search_alg=algo)
|
||||
|
||||
ray.shutdown()
|
||||
|
||||
best_trial = analysis.get_best_trial(HP_METRIC, MODE, "all")
|
||||
metric = best_trial.metric_analysis[HP_METRIC][MODE]
|
||||
|
||||
logger.info(f"method={method}")
|
||||
logger.info(f"n_trials={len(analysis.trials)}")
|
||||
logger.info(f"time={time.time()-start_time}")
|
||||
logger.info(f"Best model eval {HP_METRIC}: {metric:.4f}")
|
||||
logger.info(f"Best model parameters: {best_trial.config}")
|
||||
|
||||
|
||||
def _test_deberta_cfo():
|
||||
_test_deberta('CFO')
|
||||
|
||||
|
||||
def _test_deberta_dragonfly():
|
||||
_test_deberta('Dragonfly')
|
||||
|
||||
|
||||
def _test_deberta_skopt():
|
||||
_test_deberta('SkOpt')
|
||||
|
||||
|
||||
def _test_deberta_nevergrad():
|
||||
_test_deberta('Nevergrad')
|
||||
|
||||
|
||||
def _test_deberta_zoopt():
|
||||
_test_deberta('ZOOpt')
|
||||
|
||||
|
||||
def _test_deberta_ax():
|
||||
_test_deberta('Ax')
|
||||
|
||||
|
||||
def __test_deberta_hyperopt():
|
||||
_test_deberta('HyperOpt')
|
||||
|
||||
|
||||
def _test_deberta_optuna():
|
||||
_test_deberta('Optuna')
|
||||
|
||||
|
||||
def _test_deberta_asha():
|
||||
_test_deberta('ASHA')
|
||||
|
||||
|
||||
def _test_deberta_bohb():
|
||||
_test_deberta('BOHB')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test_deberta()
|
||||
220
test/hf/test_distillbert.py
Normal file
@@ -0,0 +1,220 @@
|
||||
'''Require: pip install torch transformers datasets flaml[blendsearch,ray]
|
||||
'''
|
||||
import time
|
||||
import numpy as np
|
||||
import logging
|
||||
import os
|
||||
|
||||
try:
|
||||
import ray
|
||||
from datasets import (
|
||||
load_dataset,
|
||||
load_metric,
|
||||
)
|
||||
from transformers import (
|
||||
AutoModelForSequenceClassification,
|
||||
AutoTokenizer,
|
||||
Trainer,
|
||||
TrainingArguments,
|
||||
)
|
||||
import flaml
|
||||
MODEL_CHECKPOINT = "distilbert-base-uncased"
|
||||
TASK = "cola"
|
||||
NUM_LABELS = 2
|
||||
COLUMN_NAME = "sentence"
|
||||
METRIC_NAME = "matthews_correlation"
|
||||
|
||||
# HP_METRIC, MODE = "loss", "min"
|
||||
HP_METRIC, MODE = "matthews_correlation", "max"
|
||||
|
||||
# Define tokenize method
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)
|
||||
|
||||
def tokenize(examples):
|
||||
return tokenizer(examples[COLUMN_NAME], truncation=True)
|
||||
|
||||
except ImportError:
|
||||
print("pip install torch transformers datasets flaml[blendsearch,ray]")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
os.makedirs('logs', exist_ok=True)
|
||||
logger.addHandler(logging.FileHandler('logs/tune_distilbert.log'))
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
def train_distilbert(config: dict):
|
||||
|
||||
metric = load_metric("glue", TASK)
|
||||
|
||||
def compute_metrics(eval_pred):
|
||||
predictions, labels = eval_pred
|
||||
predictions = np.argmax(predictions, axis=1)
|
||||
return metric.compute(predictions=predictions, references=labels)
|
||||
|
||||
# Load CoLA dataset and apply tokenizer
|
||||
cola_raw = load_dataset("glue", TASK)
|
||||
|
||||
cola_encoded = cola_raw.map(tokenize, batched=True)
|
||||
train_dataset, eval_dataset = cola_encoded["train"], cola_encoded["validation"]
|
||||
|
||||
model = AutoModelForSequenceClassification.from_pretrained(
|
||||
MODEL_CHECKPOINT, num_labels=NUM_LABELS
|
||||
)
|
||||
|
||||
training_args = TrainingArguments(
|
||||
output_dir='.',
|
||||
do_eval=False,
|
||||
disable_tqdm=True,
|
||||
logging_steps=20000,
|
||||
save_total_limit=0,
|
||||
**config,
|
||||
)
|
||||
|
||||
trainer = Trainer(
|
||||
model,
|
||||
training_args,
|
||||
train_dataset=train_dataset,
|
||||
eval_dataset=eval_dataset,
|
||||
tokenizer=tokenizer,
|
||||
compute_metrics=compute_metrics,
|
||||
)
|
||||
|
||||
# train model
|
||||
trainer.train()
|
||||
|
||||
# evaluate model
|
||||
eval_output = trainer.evaluate()
|
||||
|
||||
flaml.tune.report(
|
||||
loss=eval_output["eval_loss"],
|
||||
matthews_correlation=eval_output["eval_matthews_correlation"],
|
||||
)
|
||||
|
||||
|
||||
def _test_distillbert(method='BlendSearch'):
|
||||
|
||||
max_num_epoch = 64
|
||||
num_samples = -1
|
||||
time_budget_s = 3600
|
||||
|
||||
search_space = {
|
||||
# You can mix constants with search space objects.
|
||||
"num_train_epochs": flaml.tune.loguniform(1, max_num_epoch),
|
||||
"learning_rate": flaml.tune.loguniform(1e-6, 1e-4),
|
||||
"adam_beta1": flaml.tune.uniform(0.8, 0.99),
|
||||
"adam_beta2": flaml.tune.loguniform(98e-2, 9999e-4),
|
||||
"adam_epsilon": flaml.tune.loguniform(1e-9, 1e-7),
|
||||
}
|
||||
|
||||
start_time = time.time()
|
||||
ray.init(num_cpus=4, num_gpus=4)
|
||||
if 'ASHA' == method:
|
||||
algo = None
|
||||
elif 'BOHB' == method:
|
||||
from ray.tune.schedulers import HyperBandForBOHB
|
||||
from ray.tune.suggest.bohb import tuneBOHB
|
||||
algo = tuneBOHB(max_concurrent=4)
|
||||
scheduler = HyperBandForBOHB(max_t=max_num_epoch)
|
||||
elif 'Optuna' == method:
|
||||
from ray.tune.suggest.optuna import OptunaSearch
|
||||
algo = OptunaSearch()
|
||||
elif 'CFO' == method:
|
||||
from flaml import CFO
|
||||
algo = CFO(low_cost_partial_config={
|
||||
"num_train_epochs": 1,
|
||||
})
|
||||
elif 'BlendSearch' == method:
|
||||
from flaml import BlendSearch
|
||||
algo = BlendSearch(low_cost_partial_config={
|
||||
"num_train_epochs": 1,
|
||||
})
|
||||
elif 'Dragonfly' == method:
|
||||
from ray.tune.suggest.dragonfly import DragonflySearch
|
||||
algo = DragonflySearch()
|
||||
elif 'SkOpt' == method:
|
||||
from ray.tune.suggest.skopt import SkOptSearch
|
||||
algo = SkOptSearch()
|
||||
elif 'Nevergrad' == method:
|
||||
from ray.tune.suggest.nevergrad import NevergradSearch
|
||||
import nevergrad as ng
|
||||
algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
|
||||
elif 'ZOOpt' == method:
|
||||
from ray.tune.suggest.zoopt import ZOOptSearch
|
||||
algo = ZOOptSearch(budget=num_samples)
|
||||
elif 'Ax' == method:
|
||||
from ray.tune.suggest.ax import AxSearch
|
||||
algo = AxSearch(max_concurrent=3)
|
||||
elif 'HyperOpt' == method:
|
||||
from ray.tune.suggest.hyperopt import HyperOptSearch
|
||||
algo = HyperOptSearch()
|
||||
scheduler = None
|
||||
if method != 'BOHB':
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
scheduler = ASHAScheduler(
|
||||
max_t=max_num_epoch,
|
||||
grace_period=1)
|
||||
scheduler = None
|
||||
analysis = ray.tune.run(
|
||||
train_distilbert,
|
||||
metric=HP_METRIC,
|
||||
mode=MODE,
|
||||
resources_per_trial={"gpu": 4, "cpu": 4},
|
||||
config=search_space, local_dir='logs/',
|
||||
num_samples=num_samples, time_budget_s=time_budget_s,
|
||||
keep_checkpoints_num=1, checkpoint_score_attr=HP_METRIC,
|
||||
scheduler=scheduler, search_alg=algo)
|
||||
|
||||
ray.shutdown()
|
||||
|
||||
best_trial = analysis.get_best_trial(HP_METRIC, MODE, "all")
|
||||
metric = best_trial.metric_analysis[HP_METRIC][MODE]
|
||||
|
||||
logger.info(f"method={method}")
|
||||
logger.info(f"n_trials={len(analysis.trials)}")
|
||||
logger.info(f"time={time.time()-start_time}")
|
||||
logger.info(f"Best model eval {HP_METRIC}: {metric:.4f}")
|
||||
logger.info(f"Best model parameters: {best_trial.config}")
|
||||
|
||||
|
||||
def _test_distillbert_cfo():
|
||||
_test_distillbert('CFO')
|
||||
|
||||
|
||||
def _test_distillbert_dragonfly():
|
||||
_test_distillbert('Dragonfly')
|
||||
|
||||
|
||||
def _test_distillbert_skopt():
|
||||
_test_distillbert('SkOpt')
|
||||
|
||||
|
||||
def _test_distillbert_nevergrad():
|
||||
_test_distillbert('Nevergrad')
|
||||
|
||||
|
||||
def _test_distillbert_zoopt():
|
||||
_test_distillbert('ZOOpt')
|
||||
|
||||
|
||||
def _test_distillbert_ax():
|
||||
_test_distillbert('Ax')
|
||||
|
||||
|
||||
def __test_distillbert_hyperopt():
|
||||
_test_distillbert('HyperOpt')
|
||||
|
||||
|
||||
def _test_distillbert_optuna():
|
||||
_test_distillbert('Optuna')
|
||||
|
||||
|
||||
def _test_distillbert_asha():
|
||||
_test_distillbert('ASHA')
|
||||
|
||||
|
||||
def _test_distillbert_bohb():
|
||||
_test_distillbert('BOHB')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test_distillbert()
|
||||
251
test/hf/test_electra.py
Normal file
@@ -0,0 +1,251 @@
|
||||
'''Require: pip install torch transformers datasets flaml[blendsearch,ray]
|
||||
'''
|
||||
import time
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
try:
|
||||
import ray
|
||||
from datasets import (
|
||||
load_dataset,
|
||||
load_metric,
|
||||
)
|
||||
from transformers import (
|
||||
AutoModelForSequenceClassification,
|
||||
AutoTokenizer,
|
||||
Trainer,
|
||||
TrainingArguments,
|
||||
)
|
||||
import flaml
|
||||
MODEL_CHECKPOINT = "google/electra-base-discriminator"
|
||||
task_to_keys = {
|
||||
"cola": ("sentence", None),
|
||||
"mnli": ("premise", "hypothesis"),
|
||||
"mrpc": ("sentence1", "sentence2"),
|
||||
"qnli": ("question", "sentence"),
|
||||
"qqp": ("question1", "question2"),
|
||||
"rte": ("sentence1", "sentence2"),
|
||||
"sst2": ("sentence", None),
|
||||
"stsb": ("sentence1", "sentence2"),
|
||||
"wnli": ("sentence1", "sentence2"),
|
||||
}
|
||||
max_seq_length = 128
|
||||
overwrite_cache = False
|
||||
pad_to_max_length = True
|
||||
padding = "max_length"
|
||||
|
||||
TASK = "qnli"
|
||||
# HP_METRIC, MODE = "loss", "min"
|
||||
HP_METRIC, MODE = "accuracy", "max"
|
||||
|
||||
sentence1_key, sentence2_key = task_to_keys[TASK]
|
||||
# Define tokenize method
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)
|
||||
|
||||
def tokenize(examples):
|
||||
args = (
|
||||
(examples[sentence1_key],) if sentence2_key is None else (
|
||||
examples[sentence1_key], examples[sentence2_key])
|
||||
)
|
||||
return tokenizer(*args, padding=padding, max_length=max_seq_length,
|
||||
truncation=True)
|
||||
|
||||
except ImportError:
|
||||
print("pip install torch transformers datasets flaml[blendsearch,ray]")
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
os.makedirs('logs', exist_ok=True)
|
||||
logger.addHandler(logging.FileHandler('logs/tune_electra.log'))
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
def train_electra(config: dict):
|
||||
|
||||
# Load dataset and apply tokenizer
|
||||
data_raw = load_dataset("glue", TASK)
|
||||
data_encoded = data_raw.map(tokenize, batched=True)
|
||||
train_dataset, eval_dataset = data_encoded["train"], data_encoded["validation"]
|
||||
|
||||
NUM_LABELS = len(train_dataset.features["label"].names)
|
||||
|
||||
metric = load_metric("glue", TASK)
|
||||
|
||||
def compute_metrics(eval_pred):
|
||||
predictions, labels = eval_pred
|
||||
predictions = np.argmax(predictions, axis=1)
|
||||
return metric.compute(predictions=predictions, references=labels)
|
||||
|
||||
model = AutoModelForSequenceClassification.from_pretrained(
|
||||
MODEL_CHECKPOINT, num_labels=NUM_LABELS
|
||||
)
|
||||
|
||||
training_args = TrainingArguments(
|
||||
output_dir='.',
|
||||
do_eval=False,
|
||||
disable_tqdm=True,
|
||||
logging_steps=20000,
|
||||
save_total_limit=0,
|
||||
fp16=True,
|
||||
**config,
|
||||
)
|
||||
|
||||
trainer = Trainer(
|
||||
model,
|
||||
training_args,
|
||||
train_dataset=train_dataset,
|
||||
eval_dataset=eval_dataset,
|
||||
tokenizer=tokenizer,
|
||||
compute_metrics=compute_metrics,
|
||||
)
|
||||
|
||||
# train model
|
||||
trainer.train()
|
||||
|
||||
# evaluate model
|
||||
eval_output = trainer.evaluate()
|
||||
|
||||
flaml.tune.report(
|
||||
loss=eval_output["eval_loss"],
|
||||
accuracy=eval_output["eval_accuracy"],
|
||||
)
|
||||
|
||||
try:
|
||||
from azureml.core import Run
|
||||
run = Run.get_context()
|
||||
run.log('accuracy', eval_output["eval_accuracy"])
|
||||
run.log('loss', eval_output["eval_loss"])
|
||||
run.log('config', config)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
def _test_electra(method='BlendSearch'):
|
||||
|
||||
max_num_epoch = 9
|
||||
num_samples = -1
|
||||
time_budget_s = 3600
|
||||
|
||||
search_space = {
|
||||
# You can mix constants with search space objects.
|
||||
"num_train_epochs": flaml.tune.loguniform(1, max_num_epoch),
|
||||
"learning_rate": flaml.tune.loguniform(3e-5, 1.5e-4),
|
||||
"weight_decay": flaml.tune.uniform(0, 0.3),
|
||||
"per_device_train_batch_size": flaml.tune.choice([16, 32, 64, 128]),
|
||||
"seed": flaml.tune.choice([12, 22, 33, 42]),
|
||||
}
|
||||
|
||||
start_time = time.time()
|
||||
ray.init(num_cpus=4, num_gpus=4)
|
||||
if 'ASHA' == method:
|
||||
algo = None
|
||||
elif 'BOHB' == method:
|
||||
from ray.tune.schedulers import HyperBandForBOHB
|
||||
from ray.tune.suggest.bohb import tuneBOHB
|
||||
algo = tuneBOHB(max_concurrent=4)
|
||||
scheduler = HyperBandForBOHB(max_t=max_num_epoch)
|
||||
elif 'Optuna' == method:
|
||||
from ray.tune.suggest.optuna import OptunaSearch
|
||||
algo = OptunaSearch()
|
||||
elif 'CFO' == method:
|
||||
from flaml import CFO
|
||||
algo = CFO(low_cost_partial_config={
|
||||
"num_train_epochs": 1,
|
||||
"per_device_train_batch_size": 128,
|
||||
})
|
||||
elif 'BlendSearch' == method:
|
||||
from flaml import BlendSearch
|
||||
algo = BlendSearch(low_cost_partial_config={
|
||||
"num_train_epochs": 1,
|
||||
"per_device_train_batch_size": 128,
|
||||
})
|
||||
elif 'Dragonfly' == method:
|
||||
from ray.tune.suggest.dragonfly import DragonflySearch
|
||||
algo = DragonflySearch()
|
||||
elif 'SkOpt' == method:
|
||||
from ray.tune.suggest.skopt import SkOptSearch
|
||||
algo = SkOptSearch()
|
||||
elif 'Nevergrad' == method:
|
||||
from ray.tune.suggest.nevergrad import NevergradSearch
|
||||
import nevergrad as ng
|
||||
algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
|
||||
elif 'ZOOpt' == method:
|
||||
from ray.tune.suggest.zoopt import ZOOptSearch
|
||||
algo = ZOOptSearch(budget=num_samples)
|
||||
elif 'Ax' == method:
|
||||
from ray.tune.suggest.ax import AxSearch
|
||||
algo = AxSearch(max_concurrent=3)
|
||||
elif 'HyperOpt' == method:
|
||||
from ray.tune.suggest.hyperopt import HyperOptSearch
|
||||
algo = HyperOptSearch()
|
||||
scheduler = None
|
||||
if method != 'BOHB':
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
scheduler = ASHAScheduler(
|
||||
max_t=max_num_epoch,
|
||||
grace_period=1)
|
||||
scheduler = None
|
||||
analysis = ray.tune.run(
|
||||
train_electra,
|
||||
metric=HP_METRIC,
|
||||
mode=MODE,
|
||||
resources_per_trial={"gpu": 4, "cpu": 4},
|
||||
config=search_space, local_dir='logs/',
|
||||
num_samples=num_samples, time_budget_s=time_budget_s,
|
||||
keep_checkpoints_num=1, checkpoint_score_attr=HP_METRIC,
|
||||
scheduler=scheduler, search_alg=algo)
|
||||
|
||||
ray.shutdown()
|
||||
|
||||
best_trial = analysis.get_best_trial(HP_METRIC, MODE, "all")
|
||||
metric = best_trial.metric_analysis[HP_METRIC][MODE]
|
||||
|
||||
logger.info(f"method={method}")
|
||||
logger.info(f"n_trials={len(analysis.trials)}")
|
||||
logger.info(f"time={time.time()-start_time}")
|
||||
logger.info(f"Best model eval {HP_METRIC}: {metric:.4f}")
|
||||
logger.info(f"Best model parameters: {best_trial.config}")
|
||||
|
||||
|
||||
def _test_electra_cfo():
|
||||
_test_electra('CFO')
|
||||
|
||||
|
||||
def _test_electra_dragonfly():
|
||||
_test_electra('Dragonfly')
|
||||
|
||||
|
||||
def _test_electra_skopt():
|
||||
_test_electra('SkOpt')
|
||||
|
||||
|
||||
def _test_electra_nevergrad():
|
||||
_test_electra('Nevergrad')
|
||||
|
||||
|
||||
def _test_electra_zoopt():
|
||||
_test_electra('ZOOpt')
|
||||
|
||||
|
||||
def _test_electra_ax():
|
||||
_test_electra('Ax')
|
||||
|
||||
|
||||
def __test_electra_hyperopt():
|
||||
_test_electra('HyperOpt')
|
||||
|
||||
|
||||
def _test_electra_optuna():
|
||||
_test_electra('Optuna')
|
||||
|
||||
|
||||
def _test_electra_asha():
|
||||
_test_electra('ASHA')
|
||||
|
||||
|
||||
def _test_electra_bohb():
|
||||
_test_electra('BOHB')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test_electra()
|
||||
251
test/hf/test_roberta.py
Normal file
@@ -0,0 +1,251 @@
|
||||
'''Require: pip install torch transformers datasets flaml[blendsearch,ray]
|
||||
'''
|
||||
import time
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
try:
|
||||
import ray
|
||||
from datasets import (
|
||||
load_dataset,
|
||||
load_metric,
|
||||
)
|
||||
from transformers import (
|
||||
AutoModelForSequenceClassification,
|
||||
AutoTokenizer,
|
||||
Trainer,
|
||||
TrainingArguments,
|
||||
)
|
||||
import flaml
|
||||
MODEL_CHECKPOINT = "roberta-base"
|
||||
task_to_keys = {
|
||||
"cola": ("sentence", None),
|
||||
"mnli": ("premise", "hypothesis"),
|
||||
"mrpc": ("sentence1", "sentence2"),
|
||||
"qnli": ("question", "sentence"),
|
||||
"qqp": ("question1", "question2"),
|
||||
"rte": ("sentence1", "sentence2"),
|
||||
"sst2": ("sentence", None),
|
||||
"stsb": ("sentence1", "sentence2"),
|
||||
"wnli": ("sentence1", "sentence2"),
|
||||
}
|
||||
max_seq_length = 128
|
||||
overwrite_cache = False
|
||||
pad_to_max_length = True
|
||||
padding = "max_length"
|
||||
|
||||
TASK = "qnli"
|
||||
# HP_METRIC, MODE = "loss", "min"
|
||||
HP_METRIC, MODE = "accuracy", "max"
|
||||
|
||||
sentence1_key, sentence2_key = task_to_keys[TASK]
|
||||
# Define tokenize method
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)
|
||||
|
||||
def tokenize(examples):
|
||||
args = (
|
||||
(examples[sentence1_key],) if sentence2_key is None else (
|
||||
examples[sentence1_key], examples[sentence2_key])
|
||||
)
|
||||
return tokenizer(*args, padding=padding, max_length=max_seq_length,
|
||||
truncation=True)
|
||||
|
||||
except ImportError:
|
||||
print("pip install torch transformers datasets flaml[blendsearch,ray]")
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
os.makedirs('logs', exist_ok=True)
|
||||
logger.addHandler(logging.FileHandler('logs/tune_roberta.log'))
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
def train_roberta(config: dict):
|
||||
|
||||
# Load dataset and apply tokenizer
|
||||
data_raw = load_dataset("glue", TASK)
|
||||
data_encoded = data_raw.map(tokenize, batched=True)
|
||||
train_dataset, eval_dataset = data_encoded["train"], data_encoded["validation"]
|
||||
|
||||
NUM_LABELS = len(train_dataset.features["label"].names)
|
||||
|
||||
metric = load_metric("glue", TASK)
|
||||
|
||||
def compute_metrics(eval_pred):
|
||||
predictions, labels = eval_pred
|
||||
predictions = np.argmax(predictions, axis=1)
|
||||
return metric.compute(predictions=predictions, references=labels)
|
||||
|
||||
model = AutoModelForSequenceClassification.from_pretrained(
|
||||
MODEL_CHECKPOINT, num_labels=NUM_LABELS
|
||||
)
|
||||
|
||||
training_args = TrainingArguments(
|
||||
output_dir='.',
|
||||
do_eval=False,
|
||||
disable_tqdm=True,
|
||||
logging_steps=20000,
|
||||
save_total_limit=0,
|
||||
fp16=True,
|
||||
**config,
|
||||
)
|
||||
|
||||
trainer = Trainer(
|
||||
model,
|
||||
training_args,
|
||||
train_dataset=train_dataset,
|
||||
eval_dataset=eval_dataset,
|
||||
tokenizer=tokenizer,
|
||||
compute_metrics=compute_metrics,
|
||||
)
|
||||
|
||||
# train model
|
||||
trainer.train()
|
||||
|
||||
# evaluate model
|
||||
eval_output = trainer.evaluate()
|
||||
|
||||
flaml.tune.report(
|
||||
loss=eval_output["eval_loss"],
|
||||
accuracy=eval_output["eval_accuracy"],
|
||||
)
|
||||
|
||||
try:
|
||||
from azureml.core import Run
|
||||
run = Run.get_context()
|
||||
run.log('accuracy', eval_output["eval_accuracy"])
|
||||
run.log('loss', eval_output["eval_loss"])
|
||||
run.log('config', config)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
def _test_roberta(method='BlendSearch'):
|
||||
|
||||
max_num_epoch = 100
|
||||
num_samples = -1
|
||||
time_budget_s = 3600
|
||||
|
||||
search_space = {
|
||||
# You can mix constants with search space objects.
|
||||
"num_train_epochs": flaml.tune.loguniform(1, max_num_epoch),
|
||||
"learning_rate": flaml.tune.loguniform(1e-5, 3e-5),
|
||||
"weight_decay": flaml.tune.uniform(0, 0.3),
|
||||
"per_device_train_batch_size": flaml.tune.choice([16, 32, 64, 128]),
|
||||
"seed": flaml.tune.choice([12, 22, 33, 42]),
|
||||
}
|
||||
|
||||
start_time = time.time()
|
||||
ray.init(num_cpus=4, num_gpus=4)
|
||||
if 'ASHA' == method:
|
||||
algo = None
|
||||
elif 'BOHB' == method:
|
||||
from ray.tune.schedulers import HyperBandForBOHB
|
||||
from ray.tune.suggest.bohb import tuneBOHB
|
||||
algo = tuneBOHB(max_concurrent=4)
|
||||
scheduler = HyperBandForBOHB(max_t=max_num_epoch)
|
||||
elif 'Optuna' == method:
|
||||
from ray.tune.suggest.optuna import OptunaSearch
|
||||
algo = OptunaSearch()
|
||||
elif 'CFO' == method:
|
||||
from flaml import CFO
|
||||
algo = CFO(low_cost_partial_config={
|
||||
"num_train_epochs": 1,
|
||||
"per_device_train_batch_size": 128,
|
||||
})
|
||||
elif 'BlendSearch' == method:
|
||||
from flaml import BlendSearch
|
||||
algo = BlendSearch(low_cost_partial_config={
|
||||
"num_train_epochs": 1,
|
||||
"per_device_train_batch_size": 128,
|
||||
})
|
||||
elif 'Dragonfly' == method:
|
||||
from ray.tune.suggest.dragonfly import DragonflySearch
|
||||
algo = DragonflySearch()
|
||||
elif 'SkOpt' == method:
|
||||
from ray.tune.suggest.skopt import SkOptSearch
|
||||
algo = SkOptSearch()
|
||||
elif 'Nevergrad' == method:
|
||||
from ray.tune.suggest.nevergrad import NevergradSearch
|
||||
import nevergrad as ng
|
||||
algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
|
||||
elif 'ZOOpt' == method:
|
||||
from ray.tune.suggest.zoopt import ZOOptSearch
|
||||
algo = ZOOptSearch(budget=num_samples)
|
||||
elif 'Ax' == method:
|
||||
from ray.tune.suggest.ax import AxSearch
|
||||
algo = AxSearch(max_concurrent=3)
|
||||
elif 'HyperOpt' == method:
|
||||
from ray.tune.suggest.hyperopt import HyperOptSearch
|
||||
algo = HyperOptSearch()
|
||||
scheduler = None
|
||||
if method != 'BOHB':
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
scheduler = ASHAScheduler(
|
||||
max_t=max_num_epoch,
|
||||
grace_period=1)
|
||||
scheduler = None
|
||||
analysis = ray.tune.run(
|
||||
train_roberta,
|
||||
metric=HP_METRIC,
|
||||
mode=MODE,
|
||||
resources_per_trial={"gpu": 4, "cpu": 4},
|
||||
config=search_space, local_dir='logs/',
|
||||
num_samples=num_samples, time_budget_s=time_budget_s,
|
||||
keep_checkpoints_num=1, checkpoint_score_attr=HP_METRIC,
|
||||
scheduler=scheduler, search_alg=algo)
|
||||
|
||||
ray.shutdown()
|
||||
|
||||
best_trial = analysis.get_best_trial(HP_METRIC, MODE, "all")
|
||||
metric = best_trial.metric_analysis[HP_METRIC][MODE]
|
||||
|
||||
logger.info(f"method={method}")
|
||||
logger.info(f"n_trials={len(analysis.trials)}")
|
||||
logger.info(f"time={time.time()-start_time}")
|
||||
logger.info(f"Best model eval {HP_METRIC}: {metric:.4f}")
|
||||
logger.info(f"Best model parameters: {best_trial.config}")
|
||||
|
||||
|
||||
def _test_roberta_cfo():
|
||||
_test_roberta('CFO')
|
||||
|
||||
|
||||
def _test_roberta_dragonfly():
|
||||
_test_roberta('Dragonfly')
|
||||
|
||||
|
||||
def _test_roberta_skopt():
|
||||
_test_roberta('SkOpt')
|
||||
|
||||
|
||||
def _test_roberta_nevergrad():
|
||||
_test_roberta('Nevergrad')
|
||||
|
||||
|
||||
def _test_roberta_zoopt():
|
||||
_test_roberta('ZOOpt')
|
||||
|
||||
|
||||
def _test_roberta_ax():
|
||||
_test_roberta('Ax')
|
||||
|
||||
|
||||
def __test_roberta_hyperopt():
|
||||
_test_roberta('HyperOpt')
|
||||
|
||||
|
||||
def _test_roberta_optuna():
|
||||
_test_roberta('Optuna')
|
||||
|
||||
|
||||
def _test_roberta_asha():
|
||||
_test_roberta('ASHA')
|
||||
|
||||
|
||||
def _test_roberta_bohb():
|
||||
_test_roberta('BOHB')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test_roberta()
|
||||
19
test/nni/config.yml
Normal file
@@ -0,0 +1,19 @@
|
||||
# usage: nnictl create --config ./config.yml
|
||||
authorName: default
|
||||
experimentName: example_mnist
|
||||
trialConcurrency: 1
|
||||
maxExecDuration: 1h
|
||||
maxTrialNum: 10
|
||||
trainingServicePlatform: local
|
||||
# The path to Search Space
|
||||
searchSpacePath: search_space.json
|
||||
useAnnotation: false
|
||||
tuner:
|
||||
codeDir: ./
|
||||
classFileName: flaml_nni_wrap.py
|
||||
className: BlendSearchTuner
|
||||
# The path and the running command of trial
|
||||
trial:
|
||||
command: python3 mnist.py
|
||||
codeDir: .
|
||||
gpuNum: 0
|
||||
7
test/nni/flaml_nni_wrap.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from flaml.searcher.blendsearch import BlendSearchTuner as BST
|
||||
|
||||
|
||||
class BlendSearchTuner(BST):
|
||||
# for best performance pass low cost initial parameters here
|
||||
def __init__(self, low_cost_partial_config={"hidden_size": 128}):
|
||||
super.__init__(self, low_cost_partial_config=low_cost_partial_config)
|
||||
168
test/nni/mnist.py
Normal file
@@ -0,0 +1,168 @@
|
||||
# This file is copied from NNI project
|
||||
# https://github.com/microsoft/nni/blob/master/examples/trials/mnist-tfv1/mnist.py
|
||||
|
||||
"""
|
||||
A deep MNIST classifier using convolutional layers.
|
||||
|
||||
This file is a modification of the official pytorch mnist example:
|
||||
https://github.com/pytorch/examples/blob/master/mnist/main.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import argparse
|
||||
import logging
|
||||
import nni
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
from nni.utils import merge_parameter
|
||||
from torchvision import datasets, transforms
|
||||
|
||||
logger = logging.getLogger('mnist_AutoML')
|
||||
|
||||
|
||||
class Net(nn.Module):
|
||||
def __init__(self, hidden_size):
|
||||
super(Net, self).__init__()
|
||||
self.conv1 = nn.Conv2d(1, 20, 5, 1)
|
||||
self.conv2 = nn.Conv2d(20, 50, 5, 1)
|
||||
self.fc1 = nn.Linear(4 * 4 * 50, hidden_size)
|
||||
self.fc2 = nn.Linear(hidden_size, 10)
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(self.conv1(x))
|
||||
x = F.max_pool2d(x, 2, 2)
|
||||
x = F.relu(self.conv2(x))
|
||||
x = F.max_pool2d(x, 2, 2)
|
||||
x = x.view(-1, 4 * 4 * 50)
|
||||
x = F.relu(self.fc1(x))
|
||||
x = self.fc2(x)
|
||||
return F.log_softmax(x, dim=1)
|
||||
|
||||
|
||||
def train(args, model, device, train_loader, optimizer, epoch):
|
||||
model.train()
|
||||
for batch_idx, (data, target) in enumerate(train_loader):
|
||||
if (args['batch_num'] is not None) and batch_idx >= args['batch_num']:
|
||||
break
|
||||
data, target = data.to(device), target.to(device)
|
||||
optimizer.zero_grad()
|
||||
output = model(data)
|
||||
loss = F.nll_loss(output, target)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
if batch_idx % args['log_interval'] == 0:
|
||||
logger.info('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
|
||||
epoch, batch_idx * len(data), len(train_loader.dataset),
|
||||
100. * batch_idx / len(train_loader), loss.item()))
|
||||
|
||||
|
||||
def test(args, model, device, test_loader):
|
||||
model.eval()
|
||||
test_loss = 0
|
||||
correct = 0
|
||||
with torch.no_grad():
|
||||
for data, target in test_loader:
|
||||
data, target = data.to(device), target.to(device)
|
||||
output = model(data)
|
||||
# sum up batch loss
|
||||
test_loss += F.nll_loss(output, target, reduction='sum').item()
|
||||
# get the index of the max log-probability
|
||||
pred = output.argmax(dim=1, keepdim=True)
|
||||
correct += pred.eq(target.view_as(pred)).sum().item()
|
||||
|
||||
test_loss /= len(test_loader.dataset)
|
||||
|
||||
accuracy = 100. * correct / len(test_loader.dataset)
|
||||
|
||||
logger.info('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
|
||||
test_loss, correct, len(test_loader.dataset), accuracy))
|
||||
|
||||
return accuracy
|
||||
|
||||
|
||||
def main(args):
|
||||
use_cuda = not args['no_cuda'] and torch.cuda.is_available()
|
||||
|
||||
torch.manual_seed(args['seed'])
|
||||
|
||||
device = torch.device("cuda" if use_cuda else "cpu")
|
||||
|
||||
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
|
||||
|
||||
data_dir = args['data_dir']
|
||||
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
datasets.MNIST(data_dir, train=True, download=True,
|
||||
transform=transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.1307,), (0.3081,))
|
||||
])),
|
||||
batch_size=args['batch_size'], shuffle=True, **kwargs)
|
||||
test_loader = torch.utils.data.DataLoader(
|
||||
datasets.MNIST(data_dir, train=False, transform=transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.1307,), (0.3081,))
|
||||
])),
|
||||
batch_size=1000, shuffle=True, **kwargs)
|
||||
|
||||
hidden_size = args['hidden_size']
|
||||
|
||||
model = Net(hidden_size=hidden_size).to(device)
|
||||
optimizer = optim.SGD(model.parameters(), lr=args['lr'],
|
||||
momentum=args['momentum'])
|
||||
|
||||
for epoch in range(1, args['epochs'] + 1):
|
||||
train(args, model, device, train_loader, optimizer, epoch)
|
||||
test_acc = test(args, model, device, test_loader)
|
||||
|
||||
# report intermediate result
|
||||
nni.report_intermediate_result(test_acc)
|
||||
logger.debug('test accuracy %g', test_acc)
|
||||
logger.debug('Pipe send intermediate result done.')
|
||||
|
||||
# report final result
|
||||
nni.report_final_result(test_acc)
|
||||
logger.debug('Final result is %g', test_acc)
|
||||
logger.debug('Send final result done.')
|
||||
|
||||
|
||||
def get_params():
|
||||
# Training settings
|
||||
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
|
||||
parser.add_argument("--data_dir", type=str,
|
||||
default='./data', help="data directory")
|
||||
parser.add_argument('--batch_size', type=int, default=64, metavar='N',
|
||||
help='input batch size for training (default: 64)')
|
||||
parser.add_argument("--batch_num", type=int, default=None)
|
||||
parser.add_argument("--hidden_size", type=int, default=512, metavar='N',
|
||||
help='hidden layer size (default: 512)')
|
||||
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
|
||||
help='learning rate (default: 0.01)')
|
||||
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
|
||||
help='SGD momentum (default: 0.5)')
|
||||
parser.add_argument('--epochs', type=int, default=10, metavar='N',
|
||||
help='number of epochs to train (default: 10)')
|
||||
parser.add_argument('--seed', type=int, default=1, metavar='S',
|
||||
help='random seed (default: 1)')
|
||||
parser.add_argument('--no_cuda', action='store_true', default=False,
|
||||
help='disables CUDA training')
|
||||
parser.add_argument('--log_interval', type=int, default=1000, metavar='N',
|
||||
help='how many batches to wait before logging training status')
|
||||
|
||||
args, _ = parser.parse_known_args()
|
||||
return args
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
# get parameters form tuner
|
||||
tuner_params = nni.get_next_parameter()
|
||||
logger.debug(tuner_params)
|
||||
params = vars(merge_parameter(get_params(), tuner_params))
|
||||
print(params)
|
||||
main(params)
|
||||
except Exception as exception:
|
||||
logger.exception(exception)
|
||||
raise
|
||||
6
test/nni/search_space.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"batch_size": {"_type":"choice", "_value": [16, 32, 64, 128]},
|
||||
"hidden_size":{"_type":"choice","_value":[128, 256, 512, 1024]},
|
||||
"lr":{"_type":"choice","_value":[0.0001, 0.001, 0.01, 0.1]},
|
||||
"momentum":{"_type":"uniform","_value":[0, 1]}
|
||||
}
|
||||
19
test/run_electra.py
Normal file
@@ -0,0 +1,19 @@
|
||||
from azureml.core import Workspace, Experiment, ScriptRunConfig
|
||||
ws = Workspace.from_config()
|
||||
|
||||
compute_target = ws.compute_targets['V100-4']
|
||||
# compute_target = ws.compute_targets['K80']
|
||||
command = [
|
||||
"pip install torch transformers datasets flaml[blendsearch,ray] && ",
|
||||
"python test_electra.py"]
|
||||
|
||||
config = ScriptRunConfig(
|
||||
source_directory='hf/',
|
||||
command=command,
|
||||
compute_target=compute_target,
|
||||
)
|
||||
|
||||
exp = Experiment(ws, 'test-electra')
|
||||
run = exp.submit(config)
|
||||
print(run.get_portal_url()) # link to ml.azure.com
|
||||
run.wait_for_completion(show_output=True)
|
||||
@@ -4,20 +4,22 @@ import numpy as np
|
||||
import scipy.sparse
|
||||
from sklearn.datasets import load_boston, load_iris, load_wine
|
||||
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
|
||||
from flaml import AutoML
|
||||
from flaml.data import get_output_from_log
|
||||
|
||||
from flaml.model import SKLearnEstimator
|
||||
from flaml.model import SKLearnEstimator, XGBoostEstimator
|
||||
from rgf.sklearn import RGFClassifier, RGFRegressor
|
||||
from flaml import tune
|
||||
|
||||
|
||||
class MyRegularizedGreedyForest(SKLearnEstimator):
|
||||
|
||||
|
||||
def __init__(self, task = 'binary:logistic', n_jobs = 1, max_leaf = 4,
|
||||
n_iter = 1, n_tree_search = 1, opt_interval = 1, learning_rate = 1.0,
|
||||
min_samples_leaf = 1, **params):
|
||||
def __init__(self, task='binary:logistic', n_jobs=1, max_leaf=4,
|
||||
n_iter=1, n_tree_search=1, opt_interval=1, learning_rate=1.0,
|
||||
min_samples_leaf=1, **params):
|
||||
|
||||
super().__init__(task, **params)
|
||||
|
||||
@@ -34,24 +36,24 @@ class MyRegularizedGreedyForest(SKLearnEstimator):
|
||||
'n_tree_search': int(round(n_tree_search)),
|
||||
'opt_interval': int(round(opt_interval)),
|
||||
'learning_rate': learning_rate,
|
||||
'min_samples_leaf':int(round(min_samples_leaf))
|
||||
}
|
||||
'min_samples_leaf': int(round(min_samples_leaf))
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, data_size, task):
|
||||
space = {
|
||||
'max_leaf': {'domain': tune.qloguniform(
|
||||
lower = 4, upper = data_size, q = 1), 'init_value': 4},
|
||||
'n_iter': {'domain': tune.qloguniform(
|
||||
lower = 1, upper = data_size, q = 1), 'init_value': 1},
|
||||
'n_tree_search': {'domain': tune.qloguniform(
|
||||
lower = 1, upper = 32768, q = 1), 'init_value': 1},
|
||||
'opt_interval': {'domain': tune.qloguniform(
|
||||
lower = 1, upper = 10000, q = 1), 'init_value': 100},
|
||||
'learning_rate': {'domain': tune.loguniform(
|
||||
lower = 0.01, upper = 20.0)},
|
||||
'min_samples_leaf': {'domain': tune.qloguniform(
|
||||
lower = 1, upper = 20, q = 1), 'init_value': 20},
|
||||
'max_leaf': {'domain': tune.qloguniform(
|
||||
lower=4, upper=data_size, q=1), 'init_value': 4},
|
||||
'n_iter': {'domain': tune.qloguniform(
|
||||
lower=1, upper=data_size, q=1), 'init_value': 1},
|
||||
'n_tree_search': {'domain': tune.qloguniform(
|
||||
lower=1, upper=32768, q=1), 'init_value': 1},
|
||||
'opt_interval': {'domain': tune.qloguniform(
|
||||
lower=1, upper=10000, q=1), 'init_value': 100},
|
||||
'learning_rate': {'domain': tune.loguniform(
|
||||
lower=0.01, upper=20.0)},
|
||||
'min_samples_leaf': {'domain': tune.qloguniform(
|
||||
lower=1, upper=20, q=1), 'init_value': 20},
|
||||
}
|
||||
return space
|
||||
|
||||
@@ -59,22 +61,46 @@ class MyRegularizedGreedyForest(SKLearnEstimator):
|
||||
def size(cls, config):
|
||||
max_leaves = int(round(config['max_leaf']))
|
||||
n_estimators = int(round(config['n_iter']))
|
||||
return (max_leaves*3 + (max_leaves-1)*4 + 1.0)*n_estimators*8
|
||||
return (max_leaves * 3 + (max_leaves - 1) * 4 + 1.0) * n_estimators * 8
|
||||
|
||||
@classmethod
|
||||
def cost_relative2lgbm(cls):
|
||||
return 1.0
|
||||
return 1.0
|
||||
|
||||
|
||||
def logregobj(preds, dtrain):
|
||||
labels = dtrain.get_label()
|
||||
preds = 1.0 / (1.0 + np.exp(-preds)) # transform raw leaf weight
|
||||
grad = preds - labels
|
||||
hess = preds * (1.0 - preds)
|
||||
return grad, hess
|
||||
|
||||
|
||||
class MyXGB1(XGBoostEstimator):
|
||||
'''XGBoostEstimator with logregobj as the objective function
|
||||
'''
|
||||
|
||||
def __init__(self, **params):
|
||||
super().__init__(objective=logregobj, **params)
|
||||
|
||||
|
||||
class MyXGB2(XGBoostEstimator):
|
||||
'''XGBoostEstimator with 'reg:squarederror' as the objective function
|
||||
'''
|
||||
|
||||
def __init__(self, **params):
|
||||
super().__init__(objective='reg:squarederror', **params)
|
||||
|
||||
|
||||
def custom_metric(X_test, y_test, estimator, labels, X_train, y_train,
|
||||
weight_test=None, weight_train=None):
|
||||
weight_test=None, weight_train=None):
|
||||
from sklearn.metrics import log_loss
|
||||
y_pred = estimator.predict_proba(X_test)
|
||||
test_loss = log_loss(y_test, y_pred, labels=labels,
|
||||
sample_weight=weight_test)
|
||||
sample_weight=weight_test)
|
||||
y_pred = estimator.predict_proba(X_train)
|
||||
train_loss = log_loss(y_train, y_pred, labels=labels,
|
||||
sample_weight=weight_train)
|
||||
sample_weight=weight_train)
|
||||
alpha = 0.5
|
||||
return test_loss * (1 + alpha) - alpha * train_loss, [test_loss, train_loss]
|
||||
|
||||
@@ -83,41 +109,42 @@ class TestAutoML(unittest.TestCase):
|
||||
|
||||
def test_custom_learner(self):
|
||||
automl = AutoML()
|
||||
automl.add_learner(learner_name = 'RGF',
|
||||
learner_class = MyRegularizedGreedyForest)
|
||||
automl.add_learner(learner_name='RGF',
|
||||
learner_class=MyRegularizedGreedyForest)
|
||||
X_train, y_train = load_wine(return_X_y=True)
|
||||
settings = {
|
||||
"time_budget": 10, # total running time in seconds
|
||||
"estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'],
|
||||
"task": 'classification', # task type
|
||||
"sample": True, # whether to subsample training data
|
||||
"time_budget": 10, # total running time in seconds
|
||||
"estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'],
|
||||
"task": 'classification', # task type
|
||||
"sample": True, # whether to subsample training data
|
||||
"log_file_name": "test/wine.log",
|
||||
"log_training_metric": True, # whether to log training metric
|
||||
"log_training_metric": True, # whether to log training metric
|
||||
"n_jobs": 1,
|
||||
}
|
||||
|
||||
'''The main flaml automl API'''
|
||||
automl.fit(X_train = X_train, y_train = y_train, **settings)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
||||
# print the best model found for RGF
|
||||
print(automl.best_model_for_estimator("RGF"))
|
||||
|
||||
def test_ensemble(self):
|
||||
automl = AutoML()
|
||||
automl.add_learner(learner_name = 'RGF',
|
||||
learner_class = MyRegularizedGreedyForest)
|
||||
automl.add_learner(learner_name='RGF',
|
||||
learner_class=MyRegularizedGreedyForest)
|
||||
X_train, y_train = load_wine(return_X_y=True)
|
||||
settings = {
|
||||
"time_budget": 10, # total running time in seconds
|
||||
# "estimator_list": ['lgbm', 'xgboost'],
|
||||
"estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'],
|
||||
"task": 'classification', # task type
|
||||
"sample": True, # whether to subsample training data
|
||||
"time_budget": 10, # total running time in seconds
|
||||
"estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'],
|
||||
"task": 'classification', # task type
|
||||
"sample": True, # whether to subsample training data
|
||||
"log_file_name": "test/wine.log",
|
||||
"log_training_metric": True, # whether to log training metric
|
||||
"log_training_metric": True, # whether to log training metric
|
||||
"ensemble": True,
|
||||
"n_jobs": 1,
|
||||
}
|
||||
|
||||
'''The main flaml automl API'''
|
||||
automl.fit(X_train = X_train, y_train = y_train, **settings)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
||||
|
||||
def test_dataframe(self):
|
||||
self.test_classification(True)
|
||||
@@ -170,6 +197,10 @@ class TestAutoML(unittest.TestCase):
|
||||
"model_history": True
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
|
||||
if as_frame:
|
||||
# test drop column
|
||||
X_train.columns = range(X_train.shape[1])
|
||||
X_train[X_train.shape[1]] = np.zeros(len(y_train))
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train,
|
||||
**automl_settings)
|
||||
print(automl_experiment.classes_)
|
||||
@@ -191,6 +222,46 @@ class TestAutoML(unittest.TestCase):
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.predict_proba(X_train)[:5])
|
||||
|
||||
def test_datetime_columns(self):
|
||||
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"metric": 'mse',
|
||||
"task": 'regression',
|
||||
"log_file_name": "test/datetime_columns.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True
|
||||
}
|
||||
|
||||
fake_df = pd.DataFrame({'A': [datetime(1900, 2, 3), datetime(1900, 3, 4)]})
|
||||
y = np.array([0, 1])
|
||||
automl_experiment.fit(
|
||||
X_train=fake_df, X_val=fake_df, y_train=y, y_val=y, **automl_settings)
|
||||
|
||||
y_pred = automl_experiment.predict(fake_df)
|
||||
print(y_pred)
|
||||
|
||||
def test_micro_macro_f1(self):
|
||||
automl_experiment = AutoML()
|
||||
automl_experiment_macro = AutoML()
|
||||
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"task": 'classification',
|
||||
"log_file_name": "test/micro_macro_f1.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True
|
||||
}
|
||||
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl_experiment.fit(
|
||||
X_train=X_train, y_train=y_train, metric='micro_f1', **automl_settings)
|
||||
automl_experiment_macro.fit(
|
||||
X_train=X_train, y_train=y_train, metric='macro_f1', **automl_settings)
|
||||
|
||||
def test_regression(self):
|
||||
|
||||
automl_experiment = AutoML()
|
||||
@@ -204,7 +275,7 @@ class TestAutoML(unittest.TestCase):
|
||||
"model_history": True
|
||||
}
|
||||
X_train, y_train = load_boston(return_X_y=True)
|
||||
n = int(len(y_train)*9//10)
|
||||
n = int(len(y_train) * 9 // 10)
|
||||
automl_experiment.fit(X_train=X_train[:n], y_train=y_train[:n],
|
||||
X_val=X_train[n:], y_val=y_train[n:],
|
||||
**automl_settings)
|
||||
@@ -243,6 +314,10 @@ class TestAutoML(unittest.TestCase):
|
||||
|
||||
def test_sparse_matrix_regression(self):
|
||||
|
||||
X_train = scipy.sparse.random(300, 900, density=0.0001)
|
||||
y_train = np.random.uniform(size=300)
|
||||
X_val = scipy.sparse.random(100, 900, density=0.0001)
|
||||
y_val = np.random.uniform(size=100)
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
@@ -250,12 +325,9 @@ class TestAutoML(unittest.TestCase):
|
||||
"task": 'regression',
|
||||
"log_file_name": "test/sparse_regression.log",
|
||||
"n_jobs": 1,
|
||||
"model_history": True
|
||||
"model_history": True,
|
||||
"verbose": 0,
|
||||
}
|
||||
X_train = scipy.sparse.random(300, 900, density=0.0001)
|
||||
y_train = np.random.uniform(size=300)
|
||||
X_val = scipy.sparse.random(100, 900, density=0.0001)
|
||||
y_val = np.random.uniform(size=100)
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train,
|
||||
X_val=X_val, y_val=y_val,
|
||||
**automl_settings)
|
||||
@@ -274,7 +346,7 @@ class TestAutoML(unittest.TestCase):
|
||||
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"time_budget": 3,
|
||||
"metric": 'ap',
|
||||
"task": 'classification',
|
||||
"log_file_name": "test/sparse_classification.log",
|
||||
@@ -318,6 +390,8 @@ class TestAutoML(unittest.TestCase):
|
||||
|
||||
def test_sparse_matrix_regression_cv(self):
|
||||
|
||||
X_train = scipy.sparse.random(8, 100)
|
||||
y_train = np.random.uniform(size=8)
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
@@ -325,10 +399,10 @@ class TestAutoML(unittest.TestCase):
|
||||
"task": 'regression',
|
||||
"log_file_name": "test/sparse_regression.log",
|
||||
"n_jobs": 1,
|
||||
"model_history": True
|
||||
"model_history": True,
|
||||
"metric": "mse",
|
||||
"sample_weight": np.ones(len(y_train)),
|
||||
}
|
||||
X_train = scipy.sparse.random(100, 100)
|
||||
y_train = np.random.uniform(size=100)
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train,
|
||||
**automl_settings)
|
||||
print(automl_experiment.predict(X_train))
|
||||
@@ -338,6 +412,36 @@ class TestAutoML(unittest.TestCase):
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
|
||||
def test_regression_xgboost(self):
|
||||
X_train = scipy.sparse.random(300, 900, density=0.0001)
|
||||
y_train = np.random.uniform(size=300)
|
||||
X_val = scipy.sparse.random(100, 900, density=0.0001)
|
||||
y_val = np.random.uniform(size=100)
|
||||
automl_experiment = AutoML()
|
||||
automl_experiment.add_learner(learner_name='my_xgb1', learner_class=MyXGB1)
|
||||
automl_experiment.add_learner(learner_name='my_xgb2', learner_class=MyXGB2)
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"estimator_list": ['my_xgb1', 'my_xgb2'],
|
||||
"task": 'regression',
|
||||
"log_file_name": 'test/regression_xgboost.log',
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
}
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train,
|
||||
X_val=X_val, y_val=y_val,
|
||||
**automl_settings)
|
||||
assert automl_experiment._state.X_val.shape == X_val.shape
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.model_history)
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
print(automl_experiment.best_config)
|
||||
print(automl_experiment.best_loss)
|
||||
print(automl_experiment.best_config_train_time)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -26,7 +26,7 @@ class TestLogging(unittest.TestCase):
|
||||
logger.addHandler(ch)
|
||||
|
||||
# Run a simple job.
|
||||
automl_experiment = AutoML()
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 1,
|
||||
"metric": 'mse',
|
||||
@@ -34,13 +34,18 @@ class TestLogging(unittest.TestCase):
|
||||
"log_file_name": training_log,
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True
|
||||
"model_history": True,
|
||||
}
|
||||
X_train, y_train = load_boston(return_X_y=True)
|
||||
n = len(y_train) >> 1
|
||||
automl_experiment.fit(X_train=X_train[:n], y_train=y_train[:n],
|
||||
X_val=X_train[n:], y_val=y_train[n:],
|
||||
**automl_settings)
|
||||
automl.fit(X_train=X_train[:n], y_train=y_train[:n],
|
||||
X_val=X_train[n:], y_val=y_train[n:],
|
||||
**automl_settings)
|
||||
|
||||
# Check if the log buffer is populated.
|
||||
self.assertTrue(len(buf.getvalue()) > 0)
|
||||
|
||||
import pickle
|
||||
with open('automl.pkl', 'wb') as f:
|
||||
pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
|
||||
print(automl.__version__)
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
'''Require: pip install torchvision ray flaml[blendsearch]
|
||||
'''
|
||||
import unittest
|
||||
import os
|
||||
import time
|
||||
@@ -7,24 +9,6 @@ logger = logging.getLogger(__name__)
|
||||
logger.addHandler(logging.FileHandler('test/tune_pytorch_cifar10.log'))
|
||||
|
||||
|
||||
# __load_data_begin__
|
||||
def load_data(data_dir="./data"):
|
||||
transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
|
||||
])
|
||||
|
||||
trainset = torchvision.datasets.CIFAR10(
|
||||
root=data_dir, train=True, download=True, transform=transform)
|
||||
|
||||
testset = torchvision.datasets.CIFAR10(
|
||||
root=data_dir, train=False, download=True, transform=transform)
|
||||
|
||||
return trainset, testset
|
||||
# __load_data_end__
|
||||
|
||||
|
||||
import numpy as np
|
||||
try:
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
@@ -34,9 +18,9 @@ try:
|
||||
import torchvision
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
|
||||
# __net_begin__
|
||||
class Net(nn.Module):
|
||||
|
||||
def __init__(self, l1=120, l2=84):
|
||||
super(Net, self).__init__()
|
||||
self.conv1 = nn.Conv2d(3, 6, 5)
|
||||
@@ -78,7 +62,7 @@ def load_data(data_dir="test/data"):
|
||||
|
||||
# __train_begin__
|
||||
def train_cifar(config, checkpoint_dir=None, data_dir=None):
|
||||
if not "l1" in config:
|
||||
if "l1" not in config:
|
||||
logger.warning(config)
|
||||
net = Net(2 ** config["l1"], 2 ** config["l2"])
|
||||
|
||||
@@ -199,8 +183,9 @@ def _test_accuracy(net, device="cpu"):
|
||||
|
||||
|
||||
# __main_begin__
|
||||
def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100,
|
||||
gpus_per_trial=2):
|
||||
def cifar10_main(
|
||||
method='BlendSearch', num_samples=10, max_num_epochs=100, gpus_per_trial=2
|
||||
):
|
||||
data_dir = os.path.abspath("test/data")
|
||||
load_data(data_dir) # Download data for all trials before starting the run
|
||||
if method == 'BlendSearch':
|
||||
@@ -213,15 +198,15 @@ def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100,
|
||||
"l2": tune.randint(2, 8),
|
||||
"lr": tune.loguniform(1e-4, 1e-1),
|
||||
"num_epochs": tune.qloguniform(1, max_num_epochs, q=1),
|
||||
"batch_size": tune.randint(1, 4)#tune.choice([2, 4, 8, 16])
|
||||
"batch_size": tune.randint(1, 4)
|
||||
}
|
||||
else:
|
||||
config = {
|
||||
"l1": tune.randint(2, 9),
|
||||
"l2": tune.randint(2, 9),
|
||||
"lr": tune.loguniform(1e-4, 1e-1),
|
||||
"num_epochs": tune.qloguniform(1, max_num_epochs+1, q=1),
|
||||
"batch_size": tune.randint(1, 5)#tune.choice([2, 4, 8, 16])
|
||||
"num_epochs": tune.qloguniform(1, max_num_epochs + 1, q=1),
|
||||
"batch_size": tune.randint(1, 5)
|
||||
}
|
||||
import ray
|
||||
time_budget_s = 3600
|
||||
@@ -229,7 +214,8 @@ def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100,
|
||||
if method == 'BlendSearch':
|
||||
result = tune.run(
|
||||
ray.tune.with_parameters(train_cifar, data_dir=data_dir),
|
||||
init_config={
|
||||
config=config,
|
||||
low_cost_partial_config={
|
||||
"l1": 2,
|
||||
"l2": 2,
|
||||
"num_epochs": 1,
|
||||
@@ -241,7 +227,6 @@ def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100,
|
||||
min_resource=1,
|
||||
report_intermediate_result=True,
|
||||
resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
|
||||
config=config,
|
||||
local_dir='logs/',
|
||||
num_samples=num_samples,
|
||||
time_budget_s=time_budget_s,
|
||||
@@ -259,12 +244,12 @@ def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100,
|
||||
algo = OptunaSearch()
|
||||
elif 'CFO' == method:
|
||||
from flaml import CFO
|
||||
algo = CFO(points_to_evaluate=[{
|
||||
algo = CFO(low_cost_partial_config={
|
||||
"l1": 2,
|
||||
"l2": 2,
|
||||
"num_epochs": 1,
|
||||
"batch_size": 4,
|
||||
}])
|
||||
})
|
||||
elif 'Nevergrad' == method:
|
||||
from ray.tune.suggest.nevergrad import NevergradSearch
|
||||
import nevergrad as ng
|
||||
@@ -273,7 +258,7 @@ def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100,
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
scheduler = ASHAScheduler(
|
||||
max_t=max_num_epochs,
|
||||
grace_period=1)
|
||||
grace_period=1)
|
||||
result = tune.run(
|
||||
tune.with_parameters(train_cifar, data_dir=data_dir),
|
||||
resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
|
||||
@@ -296,7 +281,7 @@ def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100,
|
||||
best_trial.metric_analysis["accuracy"]["max"]))
|
||||
|
||||
best_trained_model = Net(2**best_trial.config["l1"],
|
||||
2**best_trial.config["l2"])
|
||||
2**best_trial.config["l2"])
|
||||
device = "cpu"
|
||||
if torch.cuda.is_available():
|
||||
device = "cuda:0"
|
||||
@@ -314,8 +299,8 @@ def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100,
|
||||
# __main_end__
|
||||
|
||||
|
||||
gpus_per_trial=0#.5
|
||||
num_samples=500
|
||||
gpus_per_trial = 0 # 0.5 on GPU server
|
||||
num_samples = 500
|
||||
|
||||
|
||||
def _test_cifar10_bs():
|
||||
@@ -324,27 +309,27 @@ def _test_cifar10_bs():
|
||||
|
||||
def _test_cifar10_cfo():
|
||||
cifar10_main('CFO',
|
||||
num_samples=num_samples, gpus_per_trial=gpus_per_trial)
|
||||
num_samples=num_samples, gpus_per_trial=gpus_per_trial)
|
||||
|
||||
|
||||
def _test_cifar10_optuna():
|
||||
cifar10_main('Optuna',
|
||||
num_samples=num_samples, gpus_per_trial=gpus_per_trial)
|
||||
num_samples=num_samples, gpus_per_trial=gpus_per_trial)
|
||||
|
||||
|
||||
def _test_cifar10_asha():
|
||||
cifar10_main('ASHA',
|
||||
num_samples=num_samples, gpus_per_trial=gpus_per_trial)
|
||||
num_samples=num_samples, gpus_per_trial=gpus_per_trial)
|
||||
|
||||
|
||||
def _test_cifar10_bohb():
|
||||
cifar10_main('BOHB',
|
||||
num_samples=num_samples, gpus_per_trial=gpus_per_trial)
|
||||
num_samples=num_samples, gpus_per_trial=gpus_per_trial)
|
||||
|
||||
|
||||
def _test_cifar10_nevergrad():
|
||||
cifar10_main('Nevergrad',
|
||||
num_samples=num_samples, gpus_per_trial=gpus_per_trial)
|
||||
num_samples=num_samples, gpus_per_trial=gpus_per_trial)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -24,7 +24,7 @@ def _test(split_type):
|
||||
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
|
||||
random_state=42)
|
||||
random_state=42)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
|
||||
pred = automl.predict(X_test)
|
||||
@@ -32,6 +32,7 @@ def _test(split_type):
|
||||
|
||||
print(acc)
|
||||
|
||||
|
||||
def _test_uniform():
|
||||
_test(split_type="uniform")
|
||||
|
||||
|
||||
@@ -23,11 +23,12 @@ class TestTrainingLog(unittest.TestCase):
|
||||
"task": 'regression',
|
||||
"log_file_name": filename,
|
||||
"log_training_metric": True,
|
||||
"mem_thres": 1024*1024,
|
||||
"mem_thres": 1024 * 1024,
|
||||
"n_jobs": 1,
|
||||
"model_history": True
|
||||
"model_history": True,
|
||||
"verbose": 2,
|
||||
}
|
||||
X_train, y_train = load_boston(return_X_y=True)
|
||||
X_train, y_train = load_boston(return_X_y=True)
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train,
|
||||
**automl_settings)
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ import flaml
|
||||
|
||||
class TestVersion(unittest.TestCase):
|
||||
|
||||
|
||||
def test_version(self):
|
||||
self.assertTrue(hasattr(flaml, '__version__'))
|
||||
self.assertTrue(len(flaml.__version__) > 0)
|
||||
|
||||
@@ -8,18 +8,14 @@ from flaml.model import XGBoostSklearnEstimator
|
||||
from flaml import tune
|
||||
|
||||
|
||||
# dataset = "blood-transfusion-service-center"
|
||||
# dataset = "Australian"
|
||||
dataset = "credit-g"
|
||||
# dataset = "phoneme"
|
||||
# dataset = "kc1"
|
||||
|
||||
|
||||
class XGBoost2D(XGBoostSklearnEstimator):
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, data_size, task):
|
||||
upper = min(32768,int(data_size))
|
||||
upper = min(32768, int(data_size))
|
||||
return {
|
||||
'n_estimators': {
|
||||
'domain': tune.qloguniform(lower=4, upper=upper, q=1),
|
||||
@@ -34,26 +30,26 @@ class XGBoost2D(XGBoostSklearnEstimator):
|
||||
|
||||
def test_simple(method=None):
|
||||
automl = AutoML()
|
||||
automl.add_learner(learner_name = 'XGBoost2D',
|
||||
learner_class = XGBoost2D)
|
||||
automl.add_learner(learner_name='XGBoost2D',
|
||||
learner_class=XGBoost2D)
|
||||
|
||||
automl_settings = {
|
||||
"estimator_list": ['XGBoost2D'],
|
||||
# "metric": 'accuracy',
|
||||
"task": 'classification',
|
||||
"log_file_name": f"test/xgboost2d_{dataset}_{method}.log",
|
||||
# "model_history": True,
|
||||
# "log_training_metric": True,
|
||||
# "split_type": split_type,
|
||||
"n_jobs": 1,
|
||||
"hpo_method": method,
|
||||
"log_type": "all",
|
||||
"time_budget": 3#6000,
|
||||
"time_budget": 3
|
||||
}
|
||||
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
|
||||
random_state=42)
|
||||
from sklearn.externals._arff import ArffException
|
||||
try:
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
except (ArffException, ValueError):
|
||||
from sklearn.datasets import load_wine
|
||||
X, y = load_wine(return_X_y=True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.33, random_state=42)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
|
||||
|
||||
|
||||
76
test/test_xgboost2d_sample_size.py
Normal file
@@ -0,0 +1,76 @@
|
||||
import unittest
|
||||
|
||||
from sklearn.datasets import fetch_openml
|
||||
from sklearn.model_selection import train_test_split
|
||||
import numpy as np
|
||||
from flaml.automl import AutoML
|
||||
from flaml.model import XGBoostSklearnEstimator
|
||||
from flaml import tune
|
||||
|
||||
|
||||
dataset = "credit-g"
|
||||
|
||||
|
||||
class XGBoost2D(XGBoostSklearnEstimator):
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, data_size, task):
|
||||
upper = min(32768, int(data_size))
|
||||
return {
|
||||
'n_estimators': {
|
||||
'domain': tune.qloguniform(lower=4, upper=upper, q=1),
|
||||
'init_value': 4,
|
||||
},
|
||||
'max_leaves': {
|
||||
'domain': tune.qloguniform(lower=4, upper=upper, q=1),
|
||||
'init_value': 4,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _test_simple(method=None, size_ratio=1.0):
|
||||
automl = AutoML()
|
||||
automl.add_learner(learner_name='XGBoost2D',
|
||||
learner_class=XGBoost2D)
|
||||
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
|
||||
random_state=42)
|
||||
|
||||
final_size = int(len(y_train) * size_ratio)
|
||||
X_train = X_train[:final_size]
|
||||
y_train = y_train[:final_size]
|
||||
automl_settings = {
|
||||
"estimator_list": ['XGBoost2D'],
|
||||
# "metric": 'accuracy',
|
||||
"task": 'classification',
|
||||
"log_file_name": f"test/xgboost2d_{dataset}_{method}_{final_size}.log",
|
||||
# "model_history": True,
|
||||
# "log_training_metric": True,
|
||||
# "split_type": split_type,
|
||||
"n_jobs": 1,
|
||||
"hpo_method": method,
|
||||
"log_type": "all",
|
||||
"time_budget": 3600,
|
||||
}
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
|
||||
|
||||
def _test_grid_1():
|
||||
_test_simple(method="grid", size_ratio=1.0 / 3.0)
|
||||
|
||||
|
||||
def _test_grid_2():
|
||||
_test_simple(method="grid", size_ratio=2.0 / 3.0)
|
||||
|
||||
|
||||
def _test_grid_4():
|
||||
_test_simple(method="grid", size_ratio=0.5)
|
||||
|
||||
|
||||
def _test_grid_3():
|
||||
_test_simple(method="grid", size_ratio=1.0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
0
test/tune/__init__.py
Normal file
@@ -1,18 +1,21 @@
|
||||
import unittest
|
||||
import os
|
||||
'''Require: pip install flaml[test,ray]
|
||||
'''
|
||||
import time
|
||||
import os
|
||||
from sklearn.model_selection import train_test_split
|
||||
import sklearn.metrics
|
||||
import sklearn.datasets
|
||||
try:
|
||||
from ray.tune.integration.xgboost import TuneReportCheckpointCallback
|
||||
except ImportError:
|
||||
print("skip test_tune because ray tune cannot be imported.")
|
||||
print("skip test_xgboost because ray tune cannot be imported.")
|
||||
import xgboost as xgb
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.addHandler(logging.FileHandler('test/tune_xgboost.log'))
|
||||
os.makedirs('logs', exist_ok=True)
|
||||
logger.addHandler(logging.FileHandler('logs/tune_xgboost.log'))
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
def train_breast_cancer(config: dict):
|
||||
@@ -48,7 +51,6 @@ def _test_xgboost(method='BlendSearch'):
|
||||
else:
|
||||
from ray import tune
|
||||
search_space = {
|
||||
# You can mix constants with search space objects.
|
||||
"max_depth": tune.randint(1, 8) if method in [
|
||||
"BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9),
|
||||
"min_child_weight": tune.choice([1, 2, 3]),
|
||||
@@ -56,17 +58,18 @@ def _test_xgboost(method='BlendSearch'):
|
||||
"eta": tune.loguniform(1e-4, 1e-1)
|
||||
}
|
||||
max_iter = 10
|
||||
for num_samples in [256]:
|
||||
time_budget_s = 60 #None
|
||||
for num_samples in [128]:
|
||||
time_budget_s = 60
|
||||
for n_cpu in [8]:
|
||||
start_time = time.time()
|
||||
ray.init(num_cpus=n_cpu, num_gpus=0)
|
||||
# ray.init(address='auto')
|
||||
if method == 'BlendSearch':
|
||||
analysis = tune.run(
|
||||
train_breast_cancer,
|
||||
init_config={
|
||||
config=search_space,
|
||||
low_cost_partial_config={
|
||||
"max_depth": 1,
|
||||
"min_child_weight": 3,
|
||||
},
|
||||
cat_hp_cost={
|
||||
"min_child_weight": [6, 3, 2],
|
||||
@@ -78,9 +81,8 @@ def _test_xgboost(method='BlendSearch'):
|
||||
report_intermediate_result=True,
|
||||
# You can add "gpu": 0.1 to allocate GPUs
|
||||
resources_per_trial={"cpu": 1},
|
||||
config=search_space,
|
||||
local_dir='logs/',
|
||||
num_samples=num_samples*n_cpu,
|
||||
num_samples=num_samples * n_cpu,
|
||||
time_budget_s=time_budget_s,
|
||||
use_ray=True)
|
||||
else:
|
||||
@@ -96,10 +98,9 @@ def _test_xgboost(method='BlendSearch'):
|
||||
algo = OptunaSearch()
|
||||
elif 'CFO' == method:
|
||||
from flaml import CFO
|
||||
algo = CFO(points_to_evaluate=[{
|
||||
algo = CFO(low_cost_partial_config={
|
||||
"max_depth": 1,
|
||||
"min_child_weight": 3,
|
||||
}], cat_hp_cost={
|
||||
}, cat_hp_cost={
|
||||
"min_child_weight": [6, 3, 2],
|
||||
})
|
||||
elif 'Dragonfly' == method:
|
||||
@@ -114,7 +115,7 @@ def _test_xgboost(method='BlendSearch'):
|
||||
algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
|
||||
elif 'ZOOpt' == method:
|
||||
from ray.tune.suggest.zoopt import ZOOptSearch
|
||||
algo = ZOOptSearch(budget=num_samples*n_cpu)
|
||||
algo = ZOOptSearch(budget=num_samples * n_cpu)
|
||||
elif 'Ax' == method:
|
||||
from ray.tune.suggest.ax import AxSearch
|
||||
algo = AxSearch()
|
||||
@@ -134,14 +135,16 @@ def _test_xgboost(method='BlendSearch'):
|
||||
# You can add "gpu": 0.1 to allocate GPUs
|
||||
resources_per_trial={"cpu": 1},
|
||||
config=search_space, local_dir='logs/',
|
||||
num_samples=num_samples*n_cpu, time_budget_s=time_budget_s,
|
||||
num_samples=num_samples * n_cpu,
|
||||
time_budget_s=time_budget_s,
|
||||
scheduler=scheduler, search_alg=algo)
|
||||
ray.shutdown()
|
||||
# # Load the best model checkpoint
|
||||
# import os
|
||||
# best_bst = xgb.Booster()
|
||||
# best_bst.load_model(os.path.join(analysis.best_checkpoint,
|
||||
# "model.xgb"))
|
||||
best_trial = analysis.get_best_trial("eval-logloss","min","all")
|
||||
best_trial = analysis.get_best_trial("eval-logloss", "min", "all")
|
||||
accuracy = 1. - best_trial.metric_analysis["eval-error"]["min"]
|
||||
logloss = best_trial.metric_analysis["eval-logloss"]["min"]
|
||||
logger.info(f"method={method}")
|
||||
@@ -152,6 +155,40 @@ def _test_xgboost(method='BlendSearch'):
|
||||
logger.info(f"Best model parameters: {best_trial.config}")
|
||||
|
||||
|
||||
def test_nested():
|
||||
from flaml import tune
|
||||
search_space = {
|
||||
# test nested search space
|
||||
"cost_related": {
|
||||
"a": tune.randint(1, 8),
|
||||
},
|
||||
"b": tune.uniform(0.5, 1.0),
|
||||
}
|
||||
|
||||
def simple_func(config):
|
||||
obj = (config["cost_related"]["a"] - 4)**2 \
|
||||
+ (config["b"] - config["cost_related"]["a"])**2
|
||||
tune.report(obj=obj)
|
||||
tune.report(obj=obj, ab=config["cost_related"]["a"] * config["b"])
|
||||
|
||||
analysis = tune.run(
|
||||
simple_func,
|
||||
config=search_space,
|
||||
low_cost_partial_config={
|
||||
"cost_related": {"a": 1}
|
||||
},
|
||||
metric="obj",
|
||||
mode="min",
|
||||
metric_constraints=[("ab", "<=", 4)],
|
||||
local_dir='logs/',
|
||||
num_samples=-1,
|
||||
time_budget_s=1)
|
||||
|
||||
best_trial = analysis.get_best_trial()
|
||||
logger.info(f"Best config: {best_trial.config}")
|
||||
logger.info(f"Best result: {best_trial.last_result}")
|
||||
|
||||
|
||||
def test_xgboost_bs():
|
||||
_test_xgboost()
|
||||
|
||||
@@ -197,4 +234,4 @@ def _test_xgboost_bohb():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
test_xgboost_bs()
|
||||