mirror of
https://github.com/microsoft/FLAML.git
synced 2026-02-15 05:09:16 +08:00
Compare commits
18 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
840e3fc104 | ||
|
|
1560a6e52a | ||
|
|
7bd231e497 | ||
|
|
6ff0ed434b | ||
|
|
2d3bd84038 | ||
|
|
79a851e408 | ||
|
|
a1b0b303ed | ||
|
|
3328157f31 | ||
|
|
da88aa77e3 | ||
|
|
bd16eeee69 | ||
|
|
d18d292081 | ||
|
|
80d3b14097 | ||
|
|
f757a55097 | ||
|
|
20ce01b33d | ||
|
|
9d661759b4 | ||
|
|
6393cc81e9 | ||
|
|
38775b16c0 | ||
|
|
d659079a5d |
2
.github/workflows/python-package.yml
vendored
2
.github/workflows/python-package.yml
vendored
@@ -1,7 +1,7 @@
|
||||
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||
|
||||
name: Python package
|
||||
name: Build
|
||||
|
||||
on:
|
||||
push:
|
||||
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -146,6 +146,9 @@ dmypy.json
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
/catboost_info
|
||||
|
||||
catboost_info
|
||||
notebook/*.pkl
|
||||
notebook/.azureml
|
||||
mlruns
|
||||
logs
|
||||
13
README.md
13
README.md
@@ -1,3 +1,8 @@
|
||||
[](https://badge.fury.io/py/FLAML)
|
||||
[](https://github.com/microsoft/FLAML/actions/workflows/python-package.yml)
|
||||

|
||||
[](https://pepy.tech/project/flaml)
|
||||
|
||||
# FLAML - Fast and Lightweight AutoML
|
||||
|
||||
<p align="center">
|
||||
@@ -5,8 +10,8 @@
|
||||
<br>
|
||||
</p>
|
||||
|
||||
FLAML is a Python library designed to automatically produce accurate machine
|
||||
learning models with low computational cost. It frees users from selecting
|
||||
FLAML is a lightweight Python library that finds accurate machine
|
||||
learning models automatically, efficiently and economically. It frees users from selecting
|
||||
learners and hyperparameters for each learner. It is fast and cheap.
|
||||
The simple and lightweight design makes it easy to extend, such as
|
||||
adding customized learners or metrics. FLAML is powered by a new, [cost-effective
|
||||
@@ -115,7 +120,7 @@ For more technical details, please check our papers.
|
||||
* [FLAML: A Fast and Lightweight AutoML Library](https://arxiv.org/abs/1911.04706). Chi Wang, Qingyun Wu, Markus Weimer, Erkang Zhu. To appear in MLSys, 2021.
|
||||
```
|
||||
@inproceedings{wang2021flaml,
|
||||
title={Frugal Optimization for Cost-related Hyperparameters},
|
||||
title={FLAML: A Fast and Lightweight AutoML Library},
|
||||
author={Chi Wang and Qingyun Wu and Markus Weimer and Erkang Zhu},
|
||||
year={2021},
|
||||
booktitle={MLSys},
|
||||
@@ -143,7 +148,7 @@ contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additio
|
||||
* Chi Wang
|
||||
* Qingyun Wu
|
||||
|
||||
Contributors (alphabetical order): Alex Deng, Silu Huang, John Langford, Amin Saied, Markus Weimer, Haozhe Zhang, Erkang Zhu.
|
||||
Contributors (alphabetical order): Sebastien Bubeck, Surajit Chaudhuri, Nadiia Chepurko, Ofer Dekel, Alex Deng, Anshuman Dutt, Nicolo Fusi, Jianfeng Gao, Johannes Gehrke, Silu Huang, Dongwoo Kim, Christian Konig, John Langford, Amin Saied, Neil Tenenholtz, Markus Weimer, Haozhe Zhang, Erkang Zhu.
|
||||
|
||||
## License
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from flaml.searcher import CFO, BlendSearch, FLOW2
|
||||
from flaml.automl import AutoML
|
||||
from flaml.automl import AutoML, logger_formatter
|
||||
from flaml.version import __version__
|
||||
import logging
|
||||
|
||||
@@ -7,10 +7,3 @@ import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
# Add the console handler.
|
||||
_ch = logging.StreamHandler()
|
||||
logger_formatter = logging.Formatter(
|
||||
'[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s',
|
||||
'%m-%d %H:%M:%S')
|
||||
_ch.setFormatter(logger_formatter)
|
||||
logger.addHandler(_ch)
|
||||
@@ -25,6 +25,10 @@ from .training_log import training_log_reader, training_log_writer
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
logger_formatter = logging.Formatter(
|
||||
'[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s',
|
||||
'%m-%d %H:%M:%S')
|
||||
|
||||
try:
|
||||
import mlflow
|
||||
except:
|
||||
@@ -326,6 +330,10 @@ class AutoML:
|
||||
A numpy array of shape n * 1 - - each element is a predicted class
|
||||
label for an instance.
|
||||
'''
|
||||
if self._trained_estimator is None:
|
||||
warnings.warn(
|
||||
"No estimator is trained. Please run fit with enough budget.")
|
||||
return None
|
||||
X_test = self._preprocess(X_test)
|
||||
y_pred = self._trained_estimator.predict(X_test)
|
||||
if y_pred.ndim > 1: y_pred = y_pred.flatten()
|
||||
@@ -402,7 +410,7 @@ class AutoML:
|
||||
self._X_train_all, self._y_train_all = \
|
||||
self._transformer.fit_transform(X, y, self._state.task)
|
||||
self._label_transformer = self._transformer.label_transformer
|
||||
|
||||
self._sample_weight_full = self._state.fit_kwargs.get('sample_weight')
|
||||
if X_val is not None and y_val is not None:
|
||||
if not (isinstance(X_val, np.ndarray) or
|
||||
issparse(X_val) or
|
||||
@@ -446,7 +454,8 @@ class AutoML:
|
||||
self._X_train_all, self._y_train_all
|
||||
if issparse(X_train_all):
|
||||
X_train_all = X_train_all.tocsr()
|
||||
if self._state.task != 'regression':
|
||||
if self._state.task != 'regression' and self._state.fit_kwargs.get(
|
||||
'sample_weight') is None:
|
||||
# logger.info(f"label {pd.unique(y_train_all)}")
|
||||
label_set, counts = np.unique(y_train_all, return_counts=True)
|
||||
# augment rare classes
|
||||
@@ -836,6 +845,11 @@ class AutoML:
|
||||
if eval_method == 'auto' or self._state.X_val is not None:
|
||||
eval_method = self._decide_eval_method(time_budget)
|
||||
self._state.eval_method = eval_method
|
||||
if (not mlflow or not mlflow.active_run()) and not logger.handlers:
|
||||
# Add the console handler.
|
||||
_ch = logging.StreamHandler()
|
||||
_ch.setFormatter(logger_formatter)
|
||||
logger.addHandler(_ch)
|
||||
logger.info("Evaluation method: {}".format(eval_method))
|
||||
|
||||
self._retrain_full = retrain_full and (eval_method == 'holdout' and
|
||||
@@ -1060,7 +1074,7 @@ class AutoML:
|
||||
search_state.best_config,
|
||||
estimator,
|
||||
search_state.sample_size)
|
||||
if mlflow is not None:
|
||||
if mlflow is not None and mlflow.active_run():
|
||||
with mlflow.start_run(nested=True) as run:
|
||||
mlflow.log_metric('iter_counter',
|
||||
self._iter_per_learner[estimator])
|
||||
@@ -1093,8 +1107,9 @@ class AutoML:
|
||||
self._state.best_loss))
|
||||
else:
|
||||
logger.info(f"no enough budget for learner {estimator}")
|
||||
self.estimator_list.remove(estimator)
|
||||
self._estimator_index -= 1
|
||||
if self._estimator_index is not None:
|
||||
self.estimator_list.remove(estimator)
|
||||
self._estimator_index -= 1
|
||||
if self._retrain_full and best_config_sig and not better and (
|
||||
self._search_states[self._best_estimator].sample_size ==
|
||||
self._state.data_size) and (est_retrain_time <=
|
||||
@@ -1151,7 +1166,11 @@ class AutoML:
|
||||
stacker = Stacker(estimators, best_m,
|
||||
n_jobs=self._state.n_jobs,
|
||||
passthrough=True)
|
||||
stacker.fit(self._X_train_all, self._y_train_all)
|
||||
if self._sample_weight_full is not None:
|
||||
self._state.fit_kwargs[
|
||||
'sample_weight'] = self._sample_weight_full
|
||||
stacker.fit(self._X_train_all, self._y_train_all,
|
||||
**self._state.fit_kwargs)
|
||||
logger.info(f'ensemble: {stacker}')
|
||||
self._trained_estimator = stacker
|
||||
self._trained_estimator.model = stacker
|
||||
|
||||
@@ -506,7 +506,6 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
|
||||
|
||||
def get_params(self, deep=False):
|
||||
params = super().get_params()
|
||||
params["criterion"] = 1 if params["criterion"]=='gini' else 2
|
||||
return params
|
||||
|
||||
|
||||
|
||||
@@ -25,6 +25,8 @@ class BlendSearch(Searcher):
|
||||
'''class for BlendSearch algorithm
|
||||
'''
|
||||
|
||||
cost_attr = "time_total_s" # cost attribute in result
|
||||
|
||||
def __init__(self,
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = None,
|
||||
@@ -113,8 +115,9 @@ class BlendSearch(Searcher):
|
||||
self._deadline = config.get('time_budget_s') + time.time()
|
||||
if 'metric_target' in config:
|
||||
self._metric_target = config.get('metric_target')
|
||||
else:
|
||||
self._metric, self._mode = metric, mode
|
||||
else:
|
||||
if metric: self._metric = metric
|
||||
if mode: self._mode = mode
|
||||
self._ls.set_search_properties(metric, mode, config)
|
||||
if self._gs is not None:
|
||||
self._gs.set_search_properties(metric, mode, config)
|
||||
@@ -132,15 +135,17 @@ class BlendSearch(Searcher):
|
||||
self._thread_count = 1 # total # threads created
|
||||
self._init_used = self._ls.init_config is None
|
||||
self._trial_proposed_by = {} # trial_id: str -> thread_id: int
|
||||
self._admissible_min = self._ls.normalize(self._ls.init_config)
|
||||
self._admissible_max = self._admissible_min.copy()
|
||||
self._ls_bound_min = self._ls.normalize(self._ls.init_config)
|
||||
self._ls_bound_max = self._ls_bound_min.copy()
|
||||
self._gs_admissible_min = self._ls_bound_min.copy()
|
||||
self._gs_admissible_max = self._ls_bound_max.copy()
|
||||
self._result = {} # config_signature: tuple -> result: Dict
|
||||
self._deadline = np.inf
|
||||
|
||||
def save(self, checkpoint_path: str):
|
||||
save_object = (self._metric_target, self._search_thread_pool,
|
||||
self._thread_count, self._init_used, self._trial_proposed_by,
|
||||
self._admissible_min, self._admissible_max, self._result,
|
||||
self._ls_bound_min, self._ls_bound_max, self._result,
|
||||
self._deadline)
|
||||
with open(checkpoint_path, "wb") as outputFile:
|
||||
pickle.dump(save_object, outputFile)
|
||||
@@ -150,7 +155,7 @@ class BlendSearch(Searcher):
|
||||
save_object = pickle.load(inputFile)
|
||||
self._metric_target, self._search_thread_pool, \
|
||||
self._thread_count, self._init_used, self._trial_proposed_by, \
|
||||
self._admissible_min, self._admissible_max, self._result, \
|
||||
self._ls_bound_min, self._ls_bound_max, self._result, \
|
||||
self._deadline = save_object
|
||||
|
||||
def restore_from_dir(self, checkpoint_dir: str):
|
||||
@@ -178,25 +183,20 @@ class BlendSearch(Searcher):
|
||||
# update target metric if improved
|
||||
if (result[self._metric]-self._metric_target)*self._ls.metric_op<0:
|
||||
self._metric_target = result[self._metric]
|
||||
if thread_id: # from local search
|
||||
# update admissible region
|
||||
normalized_config = self._ls.normalize(config)
|
||||
for key in self._admissible_min:
|
||||
value = normalized_config[key]
|
||||
if value > self._admissible_max[key]:
|
||||
self._admissible_max[key] = value
|
||||
elif value < self._admissible_min[key]:
|
||||
self._admissible_min[key] = value
|
||||
elif self._create_condition(result):
|
||||
if not thread_id and self._create_condition(result):
|
||||
# thread creator
|
||||
self._search_thread_pool[self._thread_count] = SearchThread(
|
||||
self._ls.mode,
|
||||
self._ls.create(config, result[self._metric], cost=result[
|
||||
"time_total_s"])
|
||||
self.cost_attr])
|
||||
)
|
||||
thread_id = self._thread_count
|
||||
self._thread_count += 1
|
||||
|
||||
self._update_admissible_region(config, self._ls_bound_min,
|
||||
self._ls_bound_max)
|
||||
# reset admissible region to ls bounding box
|
||||
self._gs_admissible_min.update(self._ls_bound_min)
|
||||
self._gs_admissible_max.update(self._ls_bound_max)
|
||||
# cleaner
|
||||
# logger.info(f"thread {thread_id} in search thread pool="
|
||||
# f"{thread_id in self._search_thread_pool}")
|
||||
@@ -204,6 +204,16 @@ class BlendSearch(Searcher):
|
||||
# local search thread
|
||||
self._clean(thread_id)
|
||||
|
||||
def _update_admissible_region(self, config, admissible_min, admissible_max):
|
||||
# update admissible region
|
||||
normalized_config = self._ls.normalize(config)
|
||||
for key in admissible_min:
|
||||
value = normalized_config[key]
|
||||
if value > admissible_max[key]:
|
||||
admissible_max[key] = value
|
||||
elif value < admissible_min[key]:
|
||||
admissible_min[key] = value
|
||||
|
||||
def _create_condition(self, result: Dict) -> bool:
|
||||
''' create thread condition
|
||||
'''
|
||||
@@ -231,9 +241,9 @@ class BlendSearch(Searcher):
|
||||
# f"{self._search_thread_pool[thread_id].converged}")
|
||||
if self._search_thread_pool[thread_id].converged:
|
||||
todelete.add(thread_id)
|
||||
for key in self._admissible_min:
|
||||
self._admissible_max[key] += self._ls.STEPSIZE
|
||||
self._admissible_min[key] -= self._ls.STEPSIZE
|
||||
for key in self._ls_bound_max:
|
||||
self._ls_bound_max[key] += self._ls.STEPSIZE
|
||||
self._ls_bound_min[key] -= self._ls.STEPSIZE
|
||||
for id in todelete:
|
||||
del self._search_thread_pool[id]
|
||||
|
||||
@@ -258,53 +268,67 @@ class BlendSearch(Searcher):
|
||||
'''
|
||||
if self._init_used and not self._points_to_evaluate:
|
||||
choice, backup = self._select_thread()
|
||||
# logger.debug(f"choice={choice}, backup={backup}")
|
||||
# print(f"choice={choice}, backup={backup}")
|
||||
if choice < 0: return None # timeout
|
||||
self._use_rs = False
|
||||
config = self._search_thread_pool[choice].suggest(trial_id)
|
||||
# preliminary check; not checking config validation
|
||||
skip = self._should_skip(choice, trial_id, config)
|
||||
if skip:
|
||||
if choice:
|
||||
# logger.info(f"skipping choice={choice}, config={config}")
|
||||
# print(f"skipping choice={choice}, config={config}")
|
||||
return None
|
||||
# use rs
|
||||
# use rs when BO fails to suggest a config
|
||||
self._use_rs = True
|
||||
for _, generated in generate_variants(
|
||||
{'config': self._ls.space}):
|
||||
config = generated['config']
|
||||
break
|
||||
break # get one random config
|
||||
# logger.debug(f"random config {config}")
|
||||
skip = self._should_skip(choice, trial_id, config)
|
||||
if skip: return None
|
||||
# if not choice: logger.info(config)
|
||||
if choice or backup == choice or self._valid(config):
|
||||
# if not choice: print(config)
|
||||
if choice or self._valid(config):
|
||||
# LS or valid or no backup choice
|
||||
self._trial_proposed_by[trial_id] = choice
|
||||
else: # invalid config proposed by GS
|
||||
if not self._use_rs:
|
||||
self._search_thread_pool[choice].on_trial_complete(
|
||||
trial_id, {}, error=True) # tell GS there is an error
|
||||
# if not self._use_rs:
|
||||
# self._search_thread_pool[choice].on_trial_complete(
|
||||
# trial_id, {}, error=True) # tell GS there is an error
|
||||
self._use_rs = False
|
||||
config = self._search_thread_pool[backup].suggest(trial_id)
|
||||
skip = self._should_skip(backup, trial_id, config)
|
||||
if skip:
|
||||
return None
|
||||
self._trial_proposed_by[trial_id] = backup
|
||||
choice = backup
|
||||
# if choice: self._pending.add(choice) # local search thread pending
|
||||
if not choice:
|
||||
if choice == backup:
|
||||
# use CFO's init point
|
||||
init_config = self._ls.init_config
|
||||
config = self._ls.complete_config(init_config,
|
||||
self._ls_bound_min, self._ls_bound_max)
|
||||
self._trial_proposed_by[trial_id] = choice
|
||||
else:
|
||||
config = self._search_thread_pool[backup].suggest(trial_id)
|
||||
skip = self._should_skip(backup, trial_id, config)
|
||||
if skip:
|
||||
return None
|
||||
self._trial_proposed_by[trial_id] = backup
|
||||
choice = backup
|
||||
if not choice: # global search
|
||||
if self._ls._resource:
|
||||
# TODO: add resource to config proposed by GS, min or median?
|
||||
config[self._ls.prune_attr] = self._ls.min_resource
|
||||
# temporarily relax admissible region for parallel proposals
|
||||
self._update_admissible_region(config, self._gs_admissible_min,
|
||||
self._gs_admissible_max)
|
||||
else:
|
||||
self._update_admissible_region(config, self._ls_bound_min,
|
||||
self._ls_bound_max)
|
||||
self._gs_admissible_min.update(self._ls_bound_min)
|
||||
self._gs_admissible_max.update(self._ls_bound_max)
|
||||
self._result[self._ls.config_signature(config)] = {}
|
||||
else: # use init config
|
||||
# print("use init config")
|
||||
init_config = self._points_to_evaluate.pop(
|
||||
0) if self._points_to_evaluate else self._ls.init_config
|
||||
if init_config==self._ls.init_config:
|
||||
config = self._ls.complete_config(init_config,
|
||||
self._admissible_min, self._admissible_max)
|
||||
config = self._ls.complete_config(init_config,
|
||||
self._ls_bound_min, self._ls_bound_max)
|
||||
# logger.info(f"reset config to {config}")
|
||||
else: config = init_config
|
||||
config_signature = self._ls.config_signature(config)
|
||||
result = self._result.get(config_signature)
|
||||
if result: # tried before
|
||||
@@ -340,10 +364,10 @@ class BlendSearch(Searcher):
|
||||
if choice:
|
||||
# local search thread
|
||||
self._clean(choice)
|
||||
else:
|
||||
# tell the thread there is an error
|
||||
self._search_thread_pool[choice].on_trial_complete(
|
||||
trial_id, {}, error=True)
|
||||
# else:
|
||||
# # tell the thread there is an error
|
||||
# self._search_thread_pool[choice].on_trial_complete(
|
||||
# trial_id, {}, error=True)
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -364,10 +388,10 @@ class BlendSearch(Searcher):
|
||||
|
||||
top_thread_id = backup_thread_id = 0
|
||||
priority1 = priority2 = self._search_thread_pool[0].priority
|
||||
# logger.debug(f"priority of thread 0={priority1}")
|
||||
# print(f"priority of thread 0={priority1}, obj_best1={self._search_thread_pool[0].obj_best1}")
|
||||
for thread_id, thread in self._search_thread_pool.items():
|
||||
# if thread_id:
|
||||
# logger.debug(
|
||||
# print(
|
||||
# f"priority of thread {thread_id}={thread.priority}")
|
||||
# logger.debug(
|
||||
# f"thread {thread_id}.can_suggest={thread.can_suggest}")
|
||||
@@ -384,18 +408,100 @@ class BlendSearch(Searcher):
|
||||
def _valid(self, config: Dict) -> bool:
|
||||
''' config validator
|
||||
'''
|
||||
for key in self._admissible_min:
|
||||
for key in self._gs_admissible_min:
|
||||
if key in config:
|
||||
value = config[key]
|
||||
# logger.info(
|
||||
# f"{key},{value},{self._admissible_min[key]},{self._admissible_max[key]}")
|
||||
if value<self._admissible_min[
|
||||
key] or value>self._admissible_max[key]:
|
||||
if value+self._ls.STEPSIZE<self._gs_admissible_min[
|
||||
key] or value>self._gs_admissible_max[key]+self._ls.STEPSIZE:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class CFO(BlendSearch):
|
||||
try:
|
||||
from nni.tuner import Tuner as NNITuner
|
||||
from nni.utils import extract_scalar_reward
|
||||
try:
|
||||
from ray.tune import (uniform, quniform, choice, randint, qrandint, randn,
|
||||
qrandn, loguniform, qloguniform)
|
||||
except:
|
||||
from ..tune.sample import (uniform, quniform, choice, randint, qrandint, randn,
|
||||
qrandn, loguniform, qloguniform)
|
||||
|
||||
class BlendSearchTuner(BlendSearch, NNITuner):
|
||||
'''Tuner class for NNI
|
||||
'''
|
||||
|
||||
def receive_trial_result(self, parameter_id, parameters, value,
|
||||
**kwargs):
|
||||
'''
|
||||
Receive trial's final result.
|
||||
parameter_id: int
|
||||
parameters: object created by 'generate_parameters()'
|
||||
value: final metrics of the trial, including default metric
|
||||
'''
|
||||
result = {}
|
||||
for key, value in parameters:
|
||||
result['config/'+key] = value
|
||||
reward = extract_scalar_reward(value)
|
||||
result[self._metric] = reward
|
||||
# if nni does not report training cost,
|
||||
# using sequence as an approximation.
|
||||
# if no sequence, using a constant 1
|
||||
result[self.cost_attr] = value.get(self.cost_attr, value.get(
|
||||
'sequence', 1))
|
||||
self.on_trial_complete(str(parameter_id), result)
|
||||
...
|
||||
|
||||
def generate_parameters(self, parameter_id, **kwargs) -> Dict:
|
||||
'''
|
||||
Returns a set of trial (hyper-)parameters, as a serializable object
|
||||
parameter_id: int
|
||||
'''
|
||||
return self.suggest(str(parameter_id))
|
||||
...
|
||||
|
||||
def update_search_space(self, search_space):
|
||||
'''
|
||||
Tuners are advised to support updating search space at run-time.
|
||||
If a tuner can only set search space once before generating first hyper-parameters,
|
||||
it should explicitly document this behaviour.
|
||||
search_space: JSON object created by experiment owner
|
||||
'''
|
||||
config = {}
|
||||
for key, value in search_space.items():
|
||||
v = value.get("_value")
|
||||
_type = value['_type']
|
||||
if _type == 'choice':
|
||||
config[key] = choice(v)
|
||||
elif _type == 'randint':
|
||||
config[key] = randint(v[0], v[1]-1)
|
||||
elif _type == 'uniform':
|
||||
config[key] = uniform(v[0], v[1])
|
||||
elif _type == 'quniform':
|
||||
config[key] = quniform(v[0], v[1], v[2])
|
||||
elif _type == 'loguniform':
|
||||
config[key] = loguniform(v[0], v[1])
|
||||
elif _type == 'qloguniform':
|
||||
config[key] = qloguniform(v[0], v[1], v[2])
|
||||
elif _type == 'normal':
|
||||
config[key] = randn(v[1], v[2])
|
||||
elif _type == 'qnormal':
|
||||
config[key] = qrandn(v[1], v[2], v[3])
|
||||
else:
|
||||
raise ValueError(
|
||||
f'unsupported type in search_space {_type}')
|
||||
self._ls.set_search_properties(None, None, config)
|
||||
if self._gs is not None:
|
||||
self._gs.set_search_properties(None, None, config)
|
||||
self._init_search()
|
||||
|
||||
except:
|
||||
class BlendSearchTuner(BlendSearch): pass
|
||||
|
||||
|
||||
class CFO(BlendSearchTuner):
|
||||
''' class for CFO algorithm
|
||||
'''
|
||||
|
||||
@@ -418,3 +524,5 @@ class CFO(BlendSearch):
|
||||
''' create thread condition
|
||||
'''
|
||||
return len(self._search_thread_pool) < 2
|
||||
|
||||
|
||||
|
||||
@@ -9,9 +9,10 @@ try:
|
||||
from ray.tune.suggest import Searcher
|
||||
from ray.tune.suggest.variant_generator import generate_variants
|
||||
from ray.tune import sample
|
||||
from ray.tune.utils.util import flatten_dict, unflatten_dict
|
||||
except ImportError:
|
||||
from .suggestion import Searcher
|
||||
from .variant_generator import generate_variants
|
||||
from .variant_generator import generate_variants, flatten_dict, unflatten_dict
|
||||
from ..tune import sample
|
||||
|
||||
|
||||
@@ -86,6 +87,7 @@ class FLOW2(Searcher):
|
||||
elif mode == "min":
|
||||
self.metric_op = 1.
|
||||
self.space = space or {}
|
||||
self.space = flatten_dict(self.space, prevent_delimiter=True)
|
||||
self._random = np.random.RandomState(seed)
|
||||
self._seed = seed
|
||||
if not init_config:
|
||||
@@ -95,7 +97,8 @@ class FLOW2(Searcher):
|
||||
"consider providing init values for cost-related hps via "
|
||||
"'init_config'."
|
||||
)
|
||||
self.init_config = self.best_config = init_config
|
||||
self.init_config = init_config
|
||||
self.best_config = flatten_dict(init_config)
|
||||
self.cat_hp_cost = cat_hp_cost
|
||||
self.prune_attr = prune_attr
|
||||
self.min_resource = min_resource
|
||||
@@ -121,21 +124,21 @@ class FLOW2(Searcher):
|
||||
self._unordered_cat_hp = {}
|
||||
self._cat_hp_cost = {}
|
||||
for key, domain in self.space.items():
|
||||
assert not isinstance(domain, dict), \
|
||||
key+"'s domain is grid search which is not supported in FLOW2."
|
||||
assert not (isinstance(domain, dict) and 'grid_search' in domain
|
||||
), key+"'s domain is grid search which is not supported in FLOW2."
|
||||
if callable(getattr(domain, 'get_sampler', None)):
|
||||
self._tunable_keys.append(key)
|
||||
sampler = domain.get_sampler()
|
||||
if isinstance(sampler, sample.Quantized):
|
||||
sampler_inner = sampler.get_sampler()
|
||||
if str(sampler_inner) == 'Uniform':
|
||||
self._step_lb = min(
|
||||
self._step_lb, sampler.q/(domain.upper-domain.lower))
|
||||
elif isinstance(domain, sample.Integer) and str(
|
||||
sampler) == 'Uniform':
|
||||
self._step_lb = min(
|
||||
self._step_lb, 1.0/(domain.upper-domain.lower))
|
||||
elif isinstance(domain, sample.Categorical):
|
||||
# if isinstance(sampler, sample.Quantized):
|
||||
# sampler_inner = sampler.get_sampler()
|
||||
# if str(sampler_inner) == 'Uniform':
|
||||
# self._step_lb = min(
|
||||
# self._step_lb, sampler.q/(domain.upper-domain.lower))
|
||||
# elif isinstance(domain, sample.Integer) and str(
|
||||
# sampler) == 'Uniform':
|
||||
# self._step_lb = min(
|
||||
# self._step_lb, 1.0/(domain.upper-domain.lower))
|
||||
if isinstance(domain, sample.Categorical):
|
||||
cat_hp_cost = self.cat_hp_cost
|
||||
if cat_hp_cost and key in cat_hp_cost:
|
||||
cost = np.array(cat_hp_cost[key])
|
||||
@@ -146,7 +149,7 @@ class FLOW2(Searcher):
|
||||
for i, choice in enumerate(l):
|
||||
d[choice] = i
|
||||
self._ordered_cat_hp[key] = (l, d)
|
||||
self._step_lb = min(self._step_lb, 1.0/len(l))
|
||||
# self._step_lb = min(self._step_lb, 1.0/len(l))
|
||||
elif all(isinstance(x, int) or isinstance(x, float)
|
||||
for x in domain.categories):
|
||||
l = sorted(domain.categories)
|
||||
@@ -154,10 +157,10 @@ class FLOW2(Searcher):
|
||||
for i, choice in enumerate(l):
|
||||
d[choice] = i
|
||||
self._ordered_choice_hp[key] = (l, d)
|
||||
self._step_lb = min(self._step_lb, 1.0/len(l))
|
||||
# self._step_lb = min(self._step_lb, 1.0/len(l))
|
||||
else:
|
||||
self._unordered_cat_hp[key] = l = len(domain.categories)
|
||||
self._step_lb = min(self._step_lb, 1.0/l)
|
||||
# self._step_lb = min(self._step_lb, 1.0/l)
|
||||
if str(sampler) != 'Normal':
|
||||
self._bounded_keys.append(key)
|
||||
self._space_keys = list(self.space.keys())
|
||||
@@ -171,7 +174,7 @@ class FLOW2(Searcher):
|
||||
# logger.info(self._resource)
|
||||
else: self._resource = None
|
||||
self.incumbent = {}
|
||||
self.incumbent = self.normalize(self.init_config)
|
||||
self.incumbent = self.normalize(self.best_config) # flattened
|
||||
self.best_obj = self.cost_incumbent = None
|
||||
self.dim = len(self._tunable_keys) # total # tunable dimensions
|
||||
self._direction_tried = None
|
||||
@@ -190,6 +193,8 @@ class FLOW2(Searcher):
|
||||
self._K = 0
|
||||
self._iter_best_config = self.trial_count = 1
|
||||
self._reset_times = 0
|
||||
# record intermediate trial cost
|
||||
self._trial_cost = {}
|
||||
|
||||
@property
|
||||
def step_lower_bound(self) -> float:
|
||||
@@ -237,14 +242,15 @@ class FLOW2(Searcher):
|
||||
''' generate a complete config from the partial config input
|
||||
add minimal resource to config if available
|
||||
'''
|
||||
if self._reset_times: # not the first time, use random gaussian
|
||||
if self._reset_times and partial_config==self.init_config:
|
||||
# not the first time to complete init_config, use random gaussian
|
||||
normalized = self.normalize(partial_config)
|
||||
for key in normalized:
|
||||
# don't change unordered cat choice
|
||||
if key not in self._unordered_cat_hp:
|
||||
if upper and lower:
|
||||
u, l = upper[key], lower[key]
|
||||
gauss_std = u-l
|
||||
gauss_std = u-l or self.STEPSIZE
|
||||
# allowed bound
|
||||
u += self.STEPSIZE
|
||||
l -= self.STEPSIZE
|
||||
@@ -259,27 +265,28 @@ class FLOW2(Searcher):
|
||||
# use best config for unordered cat choice
|
||||
config = self.denormalize(normalized)
|
||||
else:
|
||||
# first time init_config, or other configs, take as is
|
||||
config = partial_config.copy()
|
||||
|
||||
if partial_config == self.init_config: self._reset_times += 1
|
||||
config = flatten_dict(config)
|
||||
for key, value in self.space.items():
|
||||
if key not in config:
|
||||
config[key] = value
|
||||
logger.debug(f'before random {config}')
|
||||
# logger.debug(f'before random {config}')
|
||||
for _, generated in generate_variants({'config': config}):
|
||||
config = generated['config']
|
||||
break
|
||||
logger.debug(f'after random {config}')
|
||||
# logger.debug(f'after random {config}')
|
||||
|
||||
if self._resource:
|
||||
config[self.prune_attr] = self.min_resource
|
||||
self._reset_times += 1
|
||||
return config
|
||||
return unflatten_dict(config)
|
||||
|
||||
def create(self, init_config: Dict, obj: float, cost: float) -> Searcher:
|
||||
flow2 = FLOW2(init_config, self.metric, self.mode, self._cat_hp_cost,
|
||||
self.space, self.prune_attr, self.min_resource,
|
||||
self.max_resource, self.resource_multiple_factor,
|
||||
self._seed+1)
|
||||
unflatten_dict(self.space), self.prune_attr,
|
||||
self.min_resource, self.max_resource,
|
||||
self.resource_multiple_factor, self._seed+1)
|
||||
flow2.best_obj = obj * self.metric_op # minimize internally
|
||||
flow2.cost_incumbent = cost
|
||||
return flow2
|
||||
@@ -288,7 +295,7 @@ class FLOW2(Searcher):
|
||||
''' normalize each dimension in config to [0,1]
|
||||
'''
|
||||
config_norm = {}
|
||||
for key, value in config.items():
|
||||
for key, value in flatten_dict(config).items():
|
||||
if key in self.space:
|
||||
# domain: sample.Categorical/Integer/Float/Function
|
||||
domain = self.space[key]
|
||||
@@ -299,10 +306,10 @@ class FLOW2(Searcher):
|
||||
# normalize categorical
|
||||
if key in self._ordered_cat_hp:
|
||||
l, d = self._ordered_cat_hp[key]
|
||||
config_norm[key] = d[value]/len(l)
|
||||
config_norm[key] = (d[value]+0.5)/len(l) # center
|
||||
elif key in self._ordered_choice_hp:
|
||||
l, d = self._ordered_choice_hp[key]
|
||||
config_norm[key] = d[value]/len(l)
|
||||
config_norm[key] = (d[value]+0.5)/len(l) # center
|
||||
elif key in self.incumbent:
|
||||
config_norm[key] = self.incumbent[
|
||||
key] if value == self.best_config[
|
||||
@@ -402,6 +409,7 @@ class FLOW2(Searcher):
|
||||
self._metric = metric
|
||||
if mode:
|
||||
assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
|
||||
self._mode = mode
|
||||
if mode == "max":
|
||||
self.metric_op = -1.
|
||||
elif mode == "min":
|
||||
@@ -422,7 +430,7 @@ class FLOW2(Searcher):
|
||||
obj = result.get(self._metric)
|
||||
if obj:
|
||||
obj *= self.metric_op
|
||||
if obj < self.best_obj:
|
||||
if self.best_obj is None or obj < self.best_obj:
|
||||
self.best_obj, self.best_config = obj, self._configs[
|
||||
trial_id]
|
||||
self.incumbent = self.normalize(self.best_config)
|
||||
@@ -433,7 +441,8 @@ class FLOW2(Searcher):
|
||||
self._cost_complete4incumbent = 0
|
||||
self._num_allowed4incumbent = 2 * self.dim
|
||||
self._proposed_by.clear()
|
||||
if self._K > 0:
|
||||
if self._K > 0:
|
||||
# self._oldK must have been set when self._K>0
|
||||
self.step *= np.sqrt(self._K/self._oldK)
|
||||
if self.step > self.step_ub: self.step = self.step_ub
|
||||
self._iter_best_config = self.trial_count
|
||||
@@ -442,7 +451,8 @@ class FLOW2(Searcher):
|
||||
if proposed_by == self.incumbent:
|
||||
# proposed by current incumbent and no better
|
||||
self._num_complete4incumbent += 1
|
||||
cost = result.get(self.cost_attr)
|
||||
cost = result.get(
|
||||
self.cost_attr) if result else self._trial_cost.get(trial_id)
|
||||
if cost: self._cost_complete4incumbent += cost
|
||||
if self._num_complete4incumbent >= 2*self.dim and \
|
||||
self._num_allowed4incumbent == 0:
|
||||
@@ -469,7 +479,7 @@ class FLOW2(Searcher):
|
||||
obj = result.get(self._metric)
|
||||
if obj:
|
||||
obj *= self.metric_op
|
||||
if obj < self.best_obj:
|
||||
if self.best_obj is None or obj < self.best_obj:
|
||||
self.best_obj = obj
|
||||
config = self._configs[trial_id]
|
||||
if self.best_config != config:
|
||||
@@ -483,6 +493,9 @@ class FLOW2(Searcher):
|
||||
self._num_allowed4incumbent = 2 * self.dim
|
||||
self._proposed_by.clear()
|
||||
self._iter_best_config = self.trial_count
|
||||
cost = result.get(self.cost_attr)
|
||||
# record the cost in case it is pruned and cost info is lost
|
||||
self._trial_cost[trial_id] = cost
|
||||
|
||||
def rand_vector_unit_sphere(self, dim) -> np.ndarray:
|
||||
vec = self._random.normal(0, 1, dim)
|
||||
@@ -520,12 +533,12 @@ class FLOW2(Searcher):
|
||||
self._direction_tried = self.rand_vector_unit_sphere(
|
||||
self.dim) * self.step
|
||||
for i, key in enumerate(self._tunable_keys):
|
||||
move[key] += self._direction_tried[i]
|
||||
move[key] += self._direction_tried[i]
|
||||
self._project(move)
|
||||
config = self.denormalize(move)
|
||||
self._proposed_by[trial_id] = self.incumbent
|
||||
self._configs[trial_id] = config
|
||||
return config
|
||||
return unflatten_dict(config)
|
||||
|
||||
def _project(self, config):
|
||||
''' project normalized config in the feasible region and set prune_attr
|
||||
@@ -545,6 +558,7 @@ class FLOW2(Searcher):
|
||||
def config_signature(self, config) -> tuple:
|
||||
''' return the signature tuple of a config
|
||||
'''
|
||||
config = flatten_dict(config)
|
||||
value_list = []
|
||||
for key in self._space_keys:
|
||||
if key in config:
|
||||
|
||||
@@ -20,12 +20,14 @@ class SearchThread:
|
||||
'''
|
||||
|
||||
cost_attr = 'time_total_s'
|
||||
eps = 1e-10
|
||||
|
||||
def __init__(self, mode: str = "min",
|
||||
search_alg: Optional[Searcher] = None):
|
||||
''' When search_alg is omitted, use local search FLOW2
|
||||
'''
|
||||
self._search_alg = search_alg
|
||||
self._is_ls = isinstance(search_alg, FLOW2)
|
||||
self._mode = mode
|
||||
self._metric_op = 1 if mode=='min' else -1
|
||||
self.cost_best = self.cost_last = self.cost_total = self.cost_best1 = \
|
||||
@@ -36,6 +38,7 @@ class SearchThread:
|
||||
# eci: expected cost for improvement
|
||||
self.eci = self.cost_best
|
||||
self.priority = self.speed = 0
|
||||
self._init_config = True
|
||||
|
||||
def suggest(self, trial_id: str) -> Optional[Dict]:
|
||||
''' use the suggest() of the underlying search algorithm
|
||||
@@ -70,7 +73,7 @@ class SearchThread:
|
||||
# calculate speed; use 0 for invalid speed temporarily
|
||||
if self.obj_best2 > self.obj_best1:
|
||||
self.speed = (self.obj_best2 - self.obj_best1) / (
|
||||
self.cost_total - self.cost_best2)
|
||||
self.cost_total - self.cost_best2 + self.eps)
|
||||
else: self.speed = 0
|
||||
|
||||
def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None,
|
||||
@@ -81,7 +84,12 @@ class SearchThread:
|
||||
if not hasattr(self._search_alg, '_ot_trials') or (not error and
|
||||
trial_id in self._search_alg._ot_trials):
|
||||
# optuna doesn't handle error
|
||||
self._search_alg.on_trial_complete(trial_id, result, error)
|
||||
if self._is_ls or not self._init_config:
|
||||
self._search_alg.on_trial_complete(trial_id, result, error)
|
||||
else:
|
||||
# init config is not proposed by self._search_alg
|
||||
# under this thread
|
||||
self._init_config = False
|
||||
if result:
|
||||
if self.cost_attr in result:
|
||||
self.cost_last = result[self.cost_attr]
|
||||
|
||||
@@ -28,6 +28,46 @@ from ..tune.sample import Categorical, Domain, Function
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def flatten_dict(dt, delimiter="/", prevent_delimiter=False):
|
||||
dt = copy.deepcopy(dt)
|
||||
if prevent_delimiter and any(delimiter in key for key in dt):
|
||||
# Raise if delimiter is any of the keys
|
||||
raise ValueError(
|
||||
"Found delimiter `{}` in key when trying to flatten array."
|
||||
"Please avoid using the delimiter in your specification.")
|
||||
while any(isinstance(v, dict) for v in dt.values()):
|
||||
remove = []
|
||||
add = {}
|
||||
for key, value in dt.items():
|
||||
if isinstance(value, dict):
|
||||
for subkey, v in value.items():
|
||||
if prevent_delimiter and delimiter in subkey:
|
||||
# Raise if delimiter is in any of the subkeys
|
||||
raise ValueError(
|
||||
"Found delimiter `{}` in key when trying to "
|
||||
"flatten array. Please avoid using the delimiter "
|
||||
"in your specification.")
|
||||
add[delimiter.join([key, str(subkey)])] = v
|
||||
remove.append(key)
|
||||
dt.update(add)
|
||||
for k in remove:
|
||||
del dt[k]
|
||||
return dt
|
||||
|
||||
|
||||
def unflatten_dict(dt, delimiter="/"):
|
||||
"""Unflatten dict. Does not support unflattening lists."""
|
||||
dict_type = type(dt)
|
||||
out = dict_type()
|
||||
for key, val in dt.items():
|
||||
path = key.split(delimiter)
|
||||
item = out
|
||||
for k in path[:-1]:
|
||||
item = item.setdefault(k, dict_type())
|
||||
item[path[-1]] = val
|
||||
return out
|
||||
|
||||
|
||||
class TuneError(Exception):
|
||||
"""General error class raised by ray.tune."""
|
||||
pass
|
||||
|
||||
@@ -6,6 +6,7 @@ The API is compatible with ray tune.
|
||||
Example:
|
||||
|
||||
```python
|
||||
# require: pip install flaml[blendsearch]
|
||||
from flaml import tune
|
||||
import time
|
||||
|
||||
@@ -42,15 +43,16 @@ print(analysis.best_config) # the best config
|
||||
|
||||
Or, using ray tune's API:
|
||||
```python
|
||||
# require: pip install flaml[blendsearch] ray[tune]
|
||||
from ray import tune as raytune
|
||||
from flaml import CFO, BlendSearch
|
||||
import time
|
||||
|
||||
def evaluate_config(config):
|
||||
'''evaluate a hyperparameter configuration'''
|
||||
# we uss a toy example with 2 hyperparameters
|
||||
# we use a toy example with 2 hyperparameters
|
||||
metric = (round(config['x'])-85000)**2 - config['x']/config['y']
|
||||
# usually the evaluation takes an non-neglible cost
|
||||
# usually the evaluation takes a non-neglible cost
|
||||
# and the cost could be related to certain hyperparameters
|
||||
# in this example, we assume it's proportional to x
|
||||
time.sleep(config['x']/100000)
|
||||
@@ -146,6 +148,7 @@ based on optimism in face of uncertainty.
|
||||
Example:
|
||||
|
||||
```python
|
||||
# require: pip install flaml[blendsearch]
|
||||
from flaml import BlendSearch
|
||||
tune.run(...
|
||||
search_alg = BlendSearch(points_to_evaluate=[init_config]),
|
||||
@@ -178,4 +181,4 @@ For more technical details, please check our papers.
|
||||
year={2021},
|
||||
booktitle={ICLR'21},
|
||||
}
|
||||
```
|
||||
```
|
||||
|
||||
@@ -17,6 +17,8 @@ logger = logging.getLogger(__name__)
|
||||
_use_ray = True
|
||||
_runner = None
|
||||
_verbose = 0
|
||||
_running_trial = None
|
||||
_training_iteration = 0
|
||||
|
||||
|
||||
class ExperimentAnalysis(EA):
|
||||
@@ -68,6 +70,8 @@ def report(_metric=None, **kwargs):
|
||||
'''
|
||||
global _use_ray
|
||||
global _verbose
|
||||
global _running_trial
|
||||
global _training_iteration
|
||||
if _use_ray:
|
||||
from ray import tune
|
||||
return tune.report(_metric, **kwargs)
|
||||
@@ -77,6 +81,12 @@ def report(_metric=None, **kwargs):
|
||||
logger.info(f"result: {kwargs}")
|
||||
if _metric: result['_default_anonymous_metric'] = _metric
|
||||
trial = _runner.running_trial
|
||||
if _running_trial == trial:
|
||||
_training_iteration += 1
|
||||
else:
|
||||
_training_iteration = 0
|
||||
_running_trial = trial
|
||||
result["training_iteration"] = _training_iteration
|
||||
result['config'] = trial.config
|
||||
for key, value in trial.config.items():
|
||||
result['config/'+key] = value
|
||||
@@ -213,7 +223,7 @@ def run(training_function,
|
||||
import os
|
||||
os.makedirs(local_dir, exist_ok=True)
|
||||
logger.addHandler(logging.FileHandler(local_dir+'/tune_'+str(
|
||||
datetime.datetime.now())+'.log'))
|
||||
datetime.datetime.now()).replace(':', '-')+'.log'))
|
||||
if verbose<=2:
|
||||
logger.setLevel(logging.INFO)
|
||||
else:
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "0.2.2"
|
||||
__version__ = "0.2.8"
|
||||
|
||||
969
notebook/flaml_automl.ipynb
Normal file
969
notebook/flaml_automl.ipynb
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
1216
notebook/flaml_finetune_transformer.ipynb
Normal file
1216
notebook/flaml_finetune_transformer.ipynb
Normal file
File diff suppressed because one or more lines are too long
649
notebook/flaml_lightgbm.ipynb
Normal file
649
notebook/flaml_lightgbm.ipynb
Normal file
File diff suppressed because one or more lines are too long
556
notebook/flaml_xgboost.ipynb
Normal file
556
notebook/flaml_xgboost.ipynb
Normal file
File diff suppressed because one or more lines are too long
10
setup.py
10
setup.py
@@ -20,7 +20,6 @@ install_requires = [
|
||||
"scipy>=1.4.1",
|
||||
"catboost>=0.23",
|
||||
"scikit-learn>=0.23.2",
|
||||
"optuna==2.3.0"
|
||||
],
|
||||
|
||||
|
||||
@@ -48,14 +47,21 @@ setuptools.setup(
|
||||
"coverage>=5.3",
|
||||
"xgboost<1.3",
|
||||
"rgf-python",
|
||||
"optuna==2.3.0",
|
||||
],
|
||||
"blendsearch": [
|
||||
"optuna==2.3.0"
|
||||
],
|
||||
"ray": [
|
||||
"ray[tune]==1.1.0",
|
||||
"pyyaml<5.3.1",
|
||||
],
|
||||
"azureml": [
|
||||
"azureml-mlflow"
|
||||
"azureml-mlflow",
|
||||
],
|
||||
"nni": [
|
||||
"nni",
|
||||
]
|
||||
},
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
|
||||
219
test/hf/test_distillbert.py
Normal file
219
test/hf/test_distillbert.py
Normal file
@@ -0,0 +1,219 @@
|
||||
'''Require: pip install torch transformers datasets flaml[blendsearch,ray]
|
||||
'''
|
||||
import time
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
import ray
|
||||
from datasets import (
|
||||
load_dataset,
|
||||
load_metric,
|
||||
)
|
||||
from transformers import (
|
||||
AutoModelForSequenceClassification,
|
||||
AutoTokenizer,
|
||||
Trainer,
|
||||
TrainingArguments,
|
||||
)
|
||||
MODEL_CHECKPOINT = "distilbert-base-uncased"
|
||||
TASK = "cola"
|
||||
NUM_LABELS = 2
|
||||
COLUMN_NAME = "sentence"
|
||||
METRIC_NAME = "matthews_correlation"
|
||||
|
||||
# HP_METRIC, MODE = "loss", "min"
|
||||
HP_METRIC, MODE = "matthews_correlation", "max"
|
||||
|
||||
# Define tokenize method
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)
|
||||
def tokenize(examples):
|
||||
return tokenizer(examples[COLUMN_NAME], truncation=True)
|
||||
|
||||
except:
|
||||
print("pip install torch transformers datasets flaml[blendsearch,ray]")
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import os
|
||||
os.makedirs('logs', exist_ok=True)
|
||||
logger.addHandler(logging.FileHandler('logs/tune_distilbert.log'))
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
import flaml
|
||||
|
||||
def train_distilbert(config: dict):
|
||||
|
||||
metric = load_metric("glue", TASK)
|
||||
|
||||
def compute_metrics(eval_pred):
|
||||
predictions, labels = eval_pred
|
||||
predictions = np.argmax(predictions, axis=1)
|
||||
return metric.compute(predictions=predictions, references=labels)
|
||||
|
||||
# Load CoLA dataset and apply tokenizer
|
||||
cola_raw = load_dataset("glue", TASK)
|
||||
|
||||
cola_encoded = cola_raw.map(tokenize, batched=True)
|
||||
train_dataset, eval_dataset = cola_encoded["train"], cola_encoded["validation"]
|
||||
|
||||
model = AutoModelForSequenceClassification.from_pretrained(
|
||||
MODEL_CHECKPOINT, num_labels=NUM_LABELS
|
||||
)
|
||||
|
||||
training_args = TrainingArguments(
|
||||
output_dir='.',
|
||||
do_eval=False,
|
||||
disable_tqdm=True,
|
||||
logging_steps=20000,
|
||||
save_total_limit=0,
|
||||
**config,
|
||||
)
|
||||
|
||||
trainer = Trainer(
|
||||
model,
|
||||
training_args,
|
||||
train_dataset=train_dataset,
|
||||
eval_dataset=eval_dataset,
|
||||
tokenizer=tokenizer,
|
||||
compute_metrics=compute_metrics,
|
||||
)
|
||||
|
||||
# train model
|
||||
trainer.train()
|
||||
|
||||
# evaluate model
|
||||
eval_output = trainer.evaluate()
|
||||
|
||||
flaml.tune.report(
|
||||
loss=eval_output["eval_loss"],
|
||||
matthews_correlation=eval_output["eval_matthews_correlation"],
|
||||
)
|
||||
|
||||
|
||||
def _test_distillbert(method='BlendSearch'):
|
||||
|
||||
max_num_epoch = 64
|
||||
num_samples = -1
|
||||
time_budget_s = 3600
|
||||
|
||||
search_space = {
|
||||
# You can mix constants with search space objects.
|
||||
"num_train_epochs": flaml.tune.loguniform(1, max_num_epoch),
|
||||
"learning_rate": flaml.tune.loguniform(1e-6, 1e-4),
|
||||
"adam_beta1": flaml.tune.uniform(0.8, 0.99),
|
||||
"adam_beta2": flaml.tune.loguniform(98e-2, 9999e-4),
|
||||
"adam_epsilon": flaml.tune.loguniform(1e-9, 1e-7),
|
||||
}
|
||||
|
||||
start_time = time.time()
|
||||
ray.init(num_cpus=4, num_gpus=4)
|
||||
if 'ASHA' == method:
|
||||
algo = None
|
||||
elif 'BOHB' == method:
|
||||
from ray.tune.schedulers import HyperBandForBOHB
|
||||
from ray.tune.suggest.bohb import tuneBOHB
|
||||
algo = tuneBOHB(max_concurrent=4)
|
||||
scheduler = HyperBandForBOHB(max_t=max_num_epoch)
|
||||
elif 'Optuna' == method:
|
||||
from ray.tune.suggest.optuna import OptunaSearch
|
||||
algo = OptunaSearch()
|
||||
elif 'CFO' == method:
|
||||
from flaml import CFO
|
||||
algo = CFO(points_to_evaluate=[{
|
||||
"num_train_epochs": 1,
|
||||
}])
|
||||
elif 'BlendSearch' == method:
|
||||
from flaml import BlendSearch
|
||||
algo = BlendSearch(points_to_evaluate=[{
|
||||
"num_train_epochs": 1,
|
||||
}])
|
||||
elif 'Dragonfly' == method:
|
||||
from ray.tune.suggest.dragonfly import DragonflySearch
|
||||
algo = DragonflySearch()
|
||||
elif 'SkOpt' == method:
|
||||
from ray.tune.suggest.skopt import SkOptSearch
|
||||
algo = SkOptSearch()
|
||||
elif 'Nevergrad' == method:
|
||||
from ray.tune.suggest.nevergrad import NevergradSearch
|
||||
import nevergrad as ng
|
||||
algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
|
||||
elif 'ZOOpt' == method:
|
||||
from ray.tune.suggest.zoopt import ZOOptSearch
|
||||
algo = ZOOptSearch(budget=num_samples)
|
||||
elif 'Ax' == method:
|
||||
from ray.tune.suggest.ax import AxSearch
|
||||
algo = AxSearch(max_concurrent=3)
|
||||
elif 'HyperOpt' == method:
|
||||
from ray.tune.suggest.hyperopt import HyperOptSearch
|
||||
algo = HyperOptSearch()
|
||||
scheduler = None
|
||||
if method != 'BOHB':
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
scheduler = ASHAScheduler(
|
||||
max_t=max_num_epoch,
|
||||
grace_period=1)
|
||||
scheduler = None
|
||||
analysis = ray.tune.run(
|
||||
train_distilbert,
|
||||
metric=HP_METRIC,
|
||||
mode=MODE,
|
||||
resources_per_trial={"gpu": 4, "cpu": 4},
|
||||
config=search_space, local_dir='logs/',
|
||||
num_samples=num_samples, time_budget_s=time_budget_s,
|
||||
keep_checkpoints_num=1, checkpoint_score_attr=HP_METRIC,
|
||||
scheduler=scheduler, search_alg=algo)
|
||||
|
||||
ray.shutdown()
|
||||
|
||||
best_trial = analysis.get_best_trial(HP_METRIC, MODE, "all")
|
||||
metric = best_trial.metric_analysis[HP_METRIC][MODE]
|
||||
|
||||
logger.info(f"method={method}")
|
||||
logger.info(f"n_trials={len(analysis.trials)}")
|
||||
logger.info(f"time={time.time()-start_time}")
|
||||
logger.info(f"Best model eval {HP_METRIC}: {metric:.4f}")
|
||||
logger.info(f"Best model parameters: {best_trial.config}")
|
||||
|
||||
|
||||
def _test_distillbert_cfo():
|
||||
_test_distillbert('CFO')
|
||||
|
||||
|
||||
def _test_distillbert_dragonfly():
|
||||
_test_distillbert('Dragonfly')
|
||||
|
||||
|
||||
def _test_distillbert_skopt():
|
||||
_test_distillbert('SkOpt')
|
||||
|
||||
|
||||
def _test_distillbert_nevergrad():
|
||||
_test_distillbert('Nevergrad')
|
||||
|
||||
|
||||
def _test_distillbert_zoopt():
|
||||
_test_distillbert('ZOOpt')
|
||||
|
||||
|
||||
def _test_distillbert_ax():
|
||||
_test_distillbert('Ax')
|
||||
|
||||
|
||||
def __test_distillbert_hyperopt():
|
||||
_test_distillbert('HyperOpt')
|
||||
|
||||
|
||||
def _test_distillbert_optuna():
|
||||
_test_distillbert('Optuna')
|
||||
|
||||
|
||||
def _test_distillbert_asha():
|
||||
_test_distillbert('ASHA')
|
||||
|
||||
|
||||
def _test_distillbert_bohb():
|
||||
_test_distillbert('BOHB')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test_distillbert()
|
||||
256
test/hf/test_electra.py
Normal file
256
test/hf/test_electra.py
Normal file
@@ -0,0 +1,256 @@
|
||||
'''Require: pip install torch transformers datasets flaml[blendsearch,ray]
|
||||
'''
|
||||
import time
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
import ray
|
||||
from datasets import (
|
||||
load_dataset,
|
||||
load_metric,
|
||||
)
|
||||
from transformers import (
|
||||
AutoModelForSequenceClassification,
|
||||
AutoTokenizer,
|
||||
Trainer,
|
||||
TrainingArguments,
|
||||
)
|
||||
MODEL_CHECKPOINT = "google/electra-base-discriminator"
|
||||
task_to_keys = {
|
||||
"cola": ("sentence", None),
|
||||
"mnli": ("premise", "hypothesis"),
|
||||
"mrpc": ("sentence1", "sentence2"),
|
||||
"qnli": ("question", "sentence"),
|
||||
"qqp": ("question1", "question2"),
|
||||
"rte": ("sentence1", "sentence2"),
|
||||
"sst2": ("sentence", None),
|
||||
"stsb": ("sentence1", "sentence2"),
|
||||
"wnli": ("sentence1", "sentence2"),
|
||||
}
|
||||
max_seq_length=128
|
||||
overwrite_cache=False
|
||||
pad_to_max_length=True
|
||||
padding = "max_length"
|
||||
|
||||
TASK = "qnli"
|
||||
# HP_METRIC, MODE = "loss", "min"
|
||||
HP_METRIC, MODE = "accuracy", "max"
|
||||
|
||||
sentence1_key, sentence2_key = task_to_keys[TASK]
|
||||
# Define tokenize method
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)
|
||||
|
||||
def tokenize(examples):
|
||||
args = (
|
||||
(examples[sentence1_key],) if sentence2_key is None else (
|
||||
examples[sentence1_key], examples[sentence2_key])
|
||||
)
|
||||
return tokenizer(*args, padding=padding, max_length=max_seq_length,
|
||||
truncation=True)
|
||||
|
||||
except:
|
||||
print("pip install torch transformers datasets flaml[blendsearch,ray]")
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import os
|
||||
os.makedirs('logs', exist_ok=True)
|
||||
logger.addHandler(logging.FileHandler('logs/tune_electra.log'))
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
import flaml
|
||||
|
||||
def train_electra(config: dict):
|
||||
|
||||
# Load dataset and apply tokenizer
|
||||
data_raw = load_dataset("glue", TASK)
|
||||
data_encoded = data_raw.map(tokenize, batched=True)
|
||||
train_dataset, eval_dataset = data_encoded["train"], data_encoded["validation"]
|
||||
|
||||
NUM_LABELS = len(train_dataset.features["label"].names)
|
||||
|
||||
metric = load_metric("glue", TASK)
|
||||
|
||||
def compute_metrics(eval_pred):
|
||||
predictions, labels = eval_pred
|
||||
predictions = np.argmax(predictions, axis=1)
|
||||
return metric.compute(predictions=predictions, references=labels)
|
||||
|
||||
|
||||
model = AutoModelForSequenceClassification.from_pretrained(
|
||||
MODEL_CHECKPOINT, num_labels=NUM_LABELS
|
||||
)
|
||||
|
||||
training_args = TrainingArguments(
|
||||
output_dir='.',
|
||||
do_eval=False,
|
||||
disable_tqdm=True,
|
||||
logging_steps=20000,
|
||||
save_total_limit=0,
|
||||
fp16=True,
|
||||
**config,
|
||||
)
|
||||
|
||||
trainer = Trainer(
|
||||
model,
|
||||
training_args,
|
||||
train_dataset=train_dataset,
|
||||
eval_dataset=eval_dataset,
|
||||
tokenizer=tokenizer,
|
||||
compute_metrics=compute_metrics,
|
||||
)
|
||||
|
||||
# train model
|
||||
trainer.train()
|
||||
|
||||
# evaluate model
|
||||
eval_output = trainer.evaluate()
|
||||
|
||||
flaml.tune.report(
|
||||
loss=eval_output["eval_loss"],
|
||||
accuracy=eval_output["eval_accuracy"],
|
||||
)
|
||||
|
||||
try:
|
||||
from azureml.core import Run
|
||||
run = Run.get_context()
|
||||
run.log('accuracy', eval_output["eval_accuracy"])
|
||||
run.log('loss', eval_output["eval_loss"])
|
||||
run.log('config', config)
|
||||
except: pass
|
||||
|
||||
def _test_electra(method='BlendSearch'):
|
||||
|
||||
max_num_epoch = 9
|
||||
num_samples = -1
|
||||
time_budget_s = 3600
|
||||
|
||||
search_space = {
|
||||
# You can mix constants with search space objects.
|
||||
"num_train_epochs": flaml.tune.loguniform(1, max_num_epoch),
|
||||
"learning_rate": flaml.tune.loguniform(3e-5, 1.5e-4),
|
||||
"weight_decay": flaml.tune.uniform(0, 0.3),
|
||||
# "warmup_ratio": flaml.tune.uniform(0, 0.2),
|
||||
# "hidden_dropout_prob": flaml.tune.uniform(0, 0.2),
|
||||
# "attention_probs_dropout_prob": flaml.tune.uniform(0, 0.2),
|
||||
"per_device_train_batch_size": flaml.tune.choice([16, 32, 64, 128]),
|
||||
"seed": flaml.tune.choice([12, 22, 33, 42]),
|
||||
# "adam_beta1": flaml.tune.uniform(0.8, 0.99),
|
||||
# "adam_beta2": flaml.tune.loguniform(98e-2, 9999e-4),
|
||||
# "adam_epsilon": flaml.tune.loguniform(1e-9, 1e-7),
|
||||
}
|
||||
|
||||
start_time = time.time()
|
||||
ray.init(num_cpus=4, num_gpus=4)
|
||||
if 'ASHA' == method:
|
||||
algo = None
|
||||
elif 'BOHB' == method:
|
||||
from ray.tune.schedulers import HyperBandForBOHB
|
||||
from ray.tune.suggest.bohb import tuneBOHB
|
||||
algo = tuneBOHB(max_concurrent=4)
|
||||
scheduler = HyperBandForBOHB(max_t=max_num_epoch)
|
||||
elif 'Optuna' == method:
|
||||
from ray.tune.suggest.optuna import OptunaSearch
|
||||
algo = OptunaSearch()
|
||||
elif 'CFO' == method:
|
||||
from flaml import CFO
|
||||
algo = CFO(points_to_evaluate=[{
|
||||
"num_train_epochs": 1,
|
||||
"per_device_train_batch_size": 128,
|
||||
}])
|
||||
elif 'BlendSearch' == method:
|
||||
from flaml import BlendSearch
|
||||
algo = BlendSearch(points_to_evaluate=[{
|
||||
"num_train_epochs": 1,
|
||||
"per_device_train_batch_size": 128,
|
||||
}])
|
||||
elif 'Dragonfly' == method:
|
||||
from ray.tune.suggest.dragonfly import DragonflySearch
|
||||
algo = DragonflySearch()
|
||||
elif 'SkOpt' == method:
|
||||
from ray.tune.suggest.skopt import SkOptSearch
|
||||
algo = SkOptSearch()
|
||||
elif 'Nevergrad' == method:
|
||||
from ray.tune.suggest.nevergrad import NevergradSearch
|
||||
import nevergrad as ng
|
||||
algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
|
||||
elif 'ZOOpt' == method:
|
||||
from ray.tune.suggest.zoopt import ZOOptSearch
|
||||
algo = ZOOptSearch(budget=num_samples)
|
||||
elif 'Ax' == method:
|
||||
from ray.tune.suggest.ax import AxSearch
|
||||
algo = AxSearch(max_concurrent=3)
|
||||
elif 'HyperOpt' == method:
|
||||
from ray.tune.suggest.hyperopt import HyperOptSearch
|
||||
algo = HyperOptSearch()
|
||||
scheduler = None
|
||||
if method != 'BOHB':
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
scheduler = ASHAScheduler(
|
||||
max_t=max_num_epoch,
|
||||
grace_period=1)
|
||||
scheduler = None
|
||||
analysis = ray.tune.run(
|
||||
train_electra,
|
||||
metric=HP_METRIC,
|
||||
mode=MODE,
|
||||
resources_per_trial={"gpu": 4, "cpu": 4},
|
||||
config=search_space, local_dir='logs/',
|
||||
num_samples=num_samples, time_budget_s=time_budget_s,
|
||||
keep_checkpoints_num=1, checkpoint_score_attr=HP_METRIC,
|
||||
scheduler=scheduler, search_alg=algo)
|
||||
|
||||
ray.shutdown()
|
||||
|
||||
best_trial = analysis.get_best_trial(HP_METRIC, MODE, "all")
|
||||
metric = best_trial.metric_analysis[HP_METRIC][MODE]
|
||||
|
||||
logger.info(f"method={method}")
|
||||
logger.info(f"n_trials={len(analysis.trials)}")
|
||||
logger.info(f"time={time.time()-start_time}")
|
||||
logger.info(f"Best model eval {HP_METRIC}: {metric:.4f}")
|
||||
logger.info(f"Best model parameters: {best_trial.config}")
|
||||
|
||||
|
||||
def _test_electra_cfo():
|
||||
_test_electra('CFO')
|
||||
|
||||
|
||||
def _test_electra_dragonfly():
|
||||
_test_electra('Dragonfly')
|
||||
|
||||
|
||||
def _test_electra_skopt():
|
||||
_test_electra('SkOpt')
|
||||
|
||||
|
||||
def _test_electra_nevergrad():
|
||||
_test_electra('Nevergrad')
|
||||
|
||||
|
||||
def _test_electra_zoopt():
|
||||
_test_electra('ZOOpt')
|
||||
|
||||
|
||||
def _test_electra_ax():
|
||||
_test_electra('Ax')
|
||||
|
||||
|
||||
def __test_electra_hyperopt():
|
||||
_test_electra('HyperOpt')
|
||||
|
||||
|
||||
def _test_electra_optuna():
|
||||
_test_electra('Optuna')
|
||||
|
||||
|
||||
def _test_electra_asha():
|
||||
_test_electra('ASHA')
|
||||
|
||||
|
||||
def _test_electra_bohb():
|
||||
_test_electra('BOHB')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test_electra()
|
||||
19
test/run_electra.py
Normal file
19
test/run_electra.py
Normal file
@@ -0,0 +1,19 @@
|
||||
from azureml.core import Workspace, Experiment, ScriptRunConfig
|
||||
ws = Workspace.from_config()
|
||||
|
||||
compute_target = ws.compute_targets['V100-4']
|
||||
# compute_target = ws.compute_targets['K80']
|
||||
command = [
|
||||
"pip install torch transformers datasets flaml[blendsearch,ray] ax-platform sqlalchemy && ",
|
||||
"python test_electra.py"]
|
||||
|
||||
config = ScriptRunConfig(
|
||||
source_directory='hf/',
|
||||
command=command,
|
||||
compute_target=compute_target,
|
||||
)
|
||||
|
||||
exp = Experiment(ws, 'test-electra')
|
||||
run = exp.submit(config)
|
||||
print(run.get_portal_url()) # link to ml.azure.com
|
||||
run.wait_for_completion(show_output=True)
|
||||
@@ -274,7 +274,7 @@ class TestAutoML(unittest.TestCase):
|
||||
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"time_budget": 3,
|
||||
"metric": 'ap',
|
||||
"task": 'classification',
|
||||
"log_file_name": "test/sparse_classification.log",
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
'''Require: pip install torchvision ray flaml[blendsearch]
|
||||
'''
|
||||
import unittest
|
||||
import os
|
||||
import time
|
||||
@@ -24,7 +26,6 @@ def load_data(data_dir="./data"):
|
||||
# __load_data_end__
|
||||
|
||||
|
||||
import numpy as np
|
||||
try:
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
'''Require: pip install flaml[test,ray]
|
||||
'''
|
||||
import unittest
|
||||
import os
|
||||
import time
|
||||
from sklearn.model_selection import train_test_split
|
||||
import sklearn.metrics
|
||||
@@ -48,7 +49,6 @@ def _test_xgboost(method='BlendSearch'):
|
||||
else:
|
||||
from ray import tune
|
||||
search_space = {
|
||||
# You can mix constants with search space objects.
|
||||
"max_depth": tune.randint(1, 8) if method in [
|
||||
"BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9),
|
||||
"min_child_weight": tune.choice([1, 2, 3]),
|
||||
@@ -56,7 +56,7 @@ def _test_xgboost(method='BlendSearch'):
|
||||
"eta": tune.loguniform(1e-4, 1e-1)
|
||||
}
|
||||
max_iter = 10
|
||||
for num_samples in [256]:
|
||||
for num_samples in [128]:
|
||||
time_budget_s = 60 #None
|
||||
for n_cpu in [8]:
|
||||
start_time = time.time()
|
||||
@@ -138,6 +138,7 @@ def _test_xgboost(method='BlendSearch'):
|
||||
scheduler=scheduler, search_alg=algo)
|
||||
ray.shutdown()
|
||||
# # Load the best model checkpoint
|
||||
# import os
|
||||
# best_bst = xgb.Booster()
|
||||
# best_bst.load_model(os.path.join(analysis.best_checkpoint,
|
||||
# "model.xgb"))
|
||||
@@ -152,6 +153,33 @@ def _test_xgboost(method='BlendSearch'):
|
||||
logger.info(f"Best model parameters: {best_trial.config}")
|
||||
|
||||
|
||||
def test_nested():
|
||||
from flaml import tune
|
||||
search_space = {
|
||||
# test nested search space
|
||||
"cost_related": {
|
||||
"a": tune.randint(1, 8),
|
||||
},
|
||||
"b": tune.uniform(0.5, 1.0),
|
||||
}
|
||||
|
||||
def simple_func(config):
|
||||
tune.report(
|
||||
metric=(config["cost_related"]["a"]-4)**2 * (config["b"]-0.7)**2)
|
||||
|
||||
analysis = tune.run(
|
||||
simple_func,
|
||||
init_config={
|
||||
"cost_related": {"a": 1,}
|
||||
},
|
||||
metric="metric",
|
||||
mode="min",
|
||||
config=search_space,
|
||||
local_dir='logs/',
|
||||
num_samples=-1,
|
||||
time_budget_s=1)
|
||||
|
||||
|
||||
def test_xgboost_bs():
|
||||
_test_xgboost()
|
||||
|
||||
|
||||
@@ -8,11 +8,7 @@ from flaml.model import XGBoostSklearnEstimator
|
||||
from flaml import tune
|
||||
|
||||
|
||||
# dataset = "blood-transfusion-service-center"
|
||||
# dataset = "Australian"
|
||||
dataset = "credit-g"
|
||||
# dataset = "phoneme"
|
||||
# dataset = "kc1"
|
||||
|
||||
|
||||
class XGBoost2D(XGBoostSklearnEstimator):
|
||||
@@ -50,8 +46,11 @@ def test_simple(method=None):
|
||||
"log_type": "all",
|
||||
"time_budget": 3#6000,
|
||||
}
|
||||
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
try:
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
except:
|
||||
from sklearn.datasets import load_wine
|
||||
X, y = load_wine(return_X_y=True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
|
||||
random_state=42)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
|
||||
Reference in New Issue
Block a user