mirror of
https://github.com/microsoft/FLAML.git
synced 2026-02-18 06:32:25 +08:00
Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
840e3fc104 | ||
|
|
1560a6e52a |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -151,3 +151,4 @@ catboost_info
|
||||
notebook/*.pkl
|
||||
notebook/.azureml
|
||||
mlruns
|
||||
logs
|
||||
@@ -135,15 +135,17 @@ class BlendSearch(Searcher):
|
||||
self._thread_count = 1 # total # threads created
|
||||
self._init_used = self._ls.init_config is None
|
||||
self._trial_proposed_by = {} # trial_id: str -> thread_id: int
|
||||
self._admissible_min = self._ls.normalize(self._ls.init_config)
|
||||
self._admissible_max = self._admissible_min.copy()
|
||||
self._ls_bound_min = self._ls.normalize(self._ls.init_config)
|
||||
self._ls_bound_max = self._ls_bound_min.copy()
|
||||
self._gs_admissible_min = self._ls_bound_min.copy()
|
||||
self._gs_admissible_max = self._ls_bound_max.copy()
|
||||
self._result = {} # config_signature: tuple -> result: Dict
|
||||
self._deadline = np.inf
|
||||
|
||||
def save(self, checkpoint_path: str):
|
||||
save_object = (self._metric_target, self._search_thread_pool,
|
||||
self._thread_count, self._init_used, self._trial_proposed_by,
|
||||
self._admissible_min, self._admissible_max, self._result,
|
||||
self._ls_bound_min, self._ls_bound_max, self._result,
|
||||
self._deadline)
|
||||
with open(checkpoint_path, "wb") as outputFile:
|
||||
pickle.dump(save_object, outputFile)
|
||||
@@ -153,7 +155,7 @@ class BlendSearch(Searcher):
|
||||
save_object = pickle.load(inputFile)
|
||||
self._metric_target, self._search_thread_pool, \
|
||||
self._thread_count, self._init_used, self._trial_proposed_by, \
|
||||
self._admissible_min, self._admissible_max, self._result, \
|
||||
self._ls_bound_min, self._ls_bound_max, self._result, \
|
||||
self._deadline = save_object
|
||||
|
||||
def restore_from_dir(self, checkpoint_dir: str):
|
||||
@@ -181,16 +183,7 @@ class BlendSearch(Searcher):
|
||||
# update target metric if improved
|
||||
if (result[self._metric]-self._metric_target)*self._ls.metric_op<0:
|
||||
self._metric_target = result[self._metric]
|
||||
if thread_id: # from local search
|
||||
# update admissible region
|
||||
normalized_config = self._ls.normalize(config)
|
||||
for key in self._admissible_min:
|
||||
value = normalized_config[key]
|
||||
if value > self._admissible_max[key]:
|
||||
self._admissible_max[key] = value
|
||||
elif value < self._admissible_min[key]:
|
||||
self._admissible_min[key] = value
|
||||
elif self._create_condition(result):
|
||||
if not thread_id and self._create_condition(result):
|
||||
# thread creator
|
||||
self._search_thread_pool[self._thread_count] = SearchThread(
|
||||
self._ls.mode,
|
||||
@@ -199,7 +192,11 @@ class BlendSearch(Searcher):
|
||||
)
|
||||
thread_id = self._thread_count
|
||||
self._thread_count += 1
|
||||
|
||||
self._update_admissible_region(config, self._ls_bound_min,
|
||||
self._ls_bound_max)
|
||||
# reset admissible region to ls bounding box
|
||||
self._gs_admissible_min.update(self._ls_bound_min)
|
||||
self._gs_admissible_max.update(self._ls_bound_max)
|
||||
# cleaner
|
||||
# logger.info(f"thread {thread_id} in search thread pool="
|
||||
# f"{thread_id in self._search_thread_pool}")
|
||||
@@ -207,6 +204,16 @@ class BlendSearch(Searcher):
|
||||
# local search thread
|
||||
self._clean(thread_id)
|
||||
|
||||
def _update_admissible_region(self, config, admissible_min, admissible_max):
|
||||
# update admissible region
|
||||
normalized_config = self._ls.normalize(config)
|
||||
for key in admissible_min:
|
||||
value = normalized_config[key]
|
||||
if value > admissible_max[key]:
|
||||
admissible_max[key] = value
|
||||
elif value < admissible_min[key]:
|
||||
admissible_min[key] = value
|
||||
|
||||
def _create_condition(self, result: Dict) -> bool:
|
||||
''' create thread condition
|
||||
'''
|
||||
@@ -234,9 +241,9 @@ class BlendSearch(Searcher):
|
||||
# f"{self._search_thread_pool[thread_id].converged}")
|
||||
if self._search_thread_pool[thread_id].converged:
|
||||
todelete.add(thread_id)
|
||||
for key in self._admissible_min:
|
||||
self._admissible_max[key] += self._ls.STEPSIZE
|
||||
self._admissible_min[key] -= self._ls.STEPSIZE
|
||||
for key in self._ls_bound_max:
|
||||
self._ls_bound_max[key] += self._ls.STEPSIZE
|
||||
self._ls_bound_min[key] -= self._ls.STEPSIZE
|
||||
for id in todelete:
|
||||
del self._search_thread_pool[id]
|
||||
|
||||
@@ -261,50 +268,66 @@ class BlendSearch(Searcher):
|
||||
'''
|
||||
if self._init_used and not self._points_to_evaluate:
|
||||
choice, backup = self._select_thread()
|
||||
# logger.debug(f"choice={choice}, backup={backup}")
|
||||
# print(f"choice={choice}, backup={backup}")
|
||||
if choice < 0: return None # timeout
|
||||
self._use_rs = False
|
||||
config = self._search_thread_pool[choice].suggest(trial_id)
|
||||
# preliminary check; not checking config validation
|
||||
skip = self._should_skip(choice, trial_id, config)
|
||||
if skip:
|
||||
if choice:
|
||||
# logger.info(f"skipping choice={choice}, config={config}")
|
||||
# print(f"skipping choice={choice}, config={config}")
|
||||
return None
|
||||
# use rs
|
||||
# use rs when BO fails to suggest a config
|
||||
self._use_rs = True
|
||||
for _, generated in generate_variants(
|
||||
{'config': self._ls.space}):
|
||||
config = generated['config']
|
||||
break
|
||||
break # get one random config
|
||||
# logger.debug(f"random config {config}")
|
||||
skip = self._should_skip(choice, trial_id, config)
|
||||
if skip: return None
|
||||
# if not choice: logger.info(config)
|
||||
if choice or backup == choice or self._valid(config):
|
||||
# if not choice: print(config)
|
||||
if choice or self._valid(config):
|
||||
# LS or valid or no backup choice
|
||||
self._trial_proposed_by[trial_id] = choice
|
||||
else: # invalid config proposed by GS
|
||||
if not self._use_rs:
|
||||
self._search_thread_pool[choice].on_trial_complete(
|
||||
trial_id, {}, error=True) # tell GS there is an error
|
||||
# if not self._use_rs:
|
||||
# self._search_thread_pool[choice].on_trial_complete(
|
||||
# trial_id, {}, error=True) # tell GS there is an error
|
||||
self._use_rs = False
|
||||
config = self._search_thread_pool[backup].suggest(trial_id)
|
||||
skip = self._should_skip(backup, trial_id, config)
|
||||
if skip:
|
||||
return None
|
||||
self._trial_proposed_by[trial_id] = backup
|
||||
choice = backup
|
||||
# if choice: self._pending.add(choice) # local search thread pending
|
||||
if not choice:
|
||||
if choice == backup:
|
||||
# use CFO's init point
|
||||
init_config = self._ls.init_config
|
||||
config = self._ls.complete_config(init_config,
|
||||
self._ls_bound_min, self._ls_bound_max)
|
||||
self._trial_proposed_by[trial_id] = choice
|
||||
else:
|
||||
config = self._search_thread_pool[backup].suggest(trial_id)
|
||||
skip = self._should_skip(backup, trial_id, config)
|
||||
if skip:
|
||||
return None
|
||||
self._trial_proposed_by[trial_id] = backup
|
||||
choice = backup
|
||||
if not choice: # global search
|
||||
if self._ls._resource:
|
||||
# TODO: add resource to config proposed by GS, min or median?
|
||||
config[self._ls.prune_attr] = self._ls.min_resource
|
||||
# temporarily relax admissible region for parallel proposals
|
||||
self._update_admissible_region(config, self._gs_admissible_min,
|
||||
self._gs_admissible_max)
|
||||
else:
|
||||
self._update_admissible_region(config, self._ls_bound_min,
|
||||
self._ls_bound_max)
|
||||
self._gs_admissible_min.update(self._ls_bound_min)
|
||||
self._gs_admissible_max.update(self._ls_bound_max)
|
||||
self._result[self._ls.config_signature(config)] = {}
|
||||
else: # use init config
|
||||
# print("use init config")
|
||||
init_config = self._points_to_evaluate.pop(
|
||||
0) if self._points_to_evaluate else self._ls.init_config
|
||||
config = self._ls.complete_config(init_config,
|
||||
self._admissible_min, self._admissible_max)
|
||||
self._ls_bound_min, self._ls_bound_max)
|
||||
# logger.info(f"reset config to {config}")
|
||||
config_signature = self._ls.config_signature(config)
|
||||
result = self._result.get(config_signature)
|
||||
@@ -315,6 +338,7 @@ class BlendSearch(Searcher):
|
||||
self._result[config_signature] = {}
|
||||
else: return None # running but no result yet
|
||||
self._init_used = True
|
||||
self._trial_proposed_by[trial_id] = 0
|
||||
# logger.info(f"config={config}")
|
||||
return config
|
||||
|
||||
@@ -340,10 +364,10 @@ class BlendSearch(Searcher):
|
||||
if choice:
|
||||
# local search thread
|
||||
self._clean(choice)
|
||||
else:
|
||||
# tell the thread there is an error
|
||||
self._search_thread_pool[choice].on_trial_complete(
|
||||
trial_id, {}, error=True)
|
||||
# else:
|
||||
# # tell the thread there is an error
|
||||
# self._search_thread_pool[choice].on_trial_complete(
|
||||
# trial_id, {}, error=True)
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -364,10 +388,10 @@ class BlendSearch(Searcher):
|
||||
|
||||
top_thread_id = backup_thread_id = 0
|
||||
priority1 = priority2 = self._search_thread_pool[0].priority
|
||||
# logger.debug(f"priority of thread 0={priority1}")
|
||||
# print(f"priority of thread 0={priority1}, obj_best1={self._search_thread_pool[0].obj_best1}")
|
||||
for thread_id, thread in self._search_thread_pool.items():
|
||||
# if thread_id:
|
||||
# logger.debug(
|
||||
# print(
|
||||
# f"priority of thread {thread_id}={thread.priority}")
|
||||
# logger.debug(
|
||||
# f"thread {thread_id}.can_suggest={thread.can_suggest}")
|
||||
@@ -384,13 +408,13 @@ class BlendSearch(Searcher):
|
||||
def _valid(self, config: Dict) -> bool:
|
||||
''' config validator
|
||||
'''
|
||||
for key in self._admissible_min:
|
||||
for key in self._gs_admissible_min:
|
||||
if key in config:
|
||||
value = config[key]
|
||||
# logger.info(
|
||||
# f"{key},{value},{self._admissible_min[key]},{self._admissible_max[key]}")
|
||||
if value<self._admissible_min[
|
||||
key] or value>self._admissible_max[key]:
|
||||
if value+self._ls.STEPSIZE<self._gs_admissible_min[
|
||||
key] or value>self._gs_admissible_max[key]+self._ls.STEPSIZE:
|
||||
return False
|
||||
return True
|
||||
|
||||
@@ -402,7 +426,7 @@ try:
|
||||
from ray.tune import (uniform, quniform, choice, randint, qrandint, randn,
|
||||
qrandn, loguniform, qloguniform)
|
||||
except:
|
||||
from .sample import (uniform, quniform, choice, randint, qrandint, randn,
|
||||
from ..tune.sample import (uniform, quniform, choice, randint, qrandint, randn,
|
||||
qrandn, loguniform, qloguniform)
|
||||
|
||||
class BlendSearchTuner(BlendSearch, NNITuner):
|
||||
@@ -446,7 +470,7 @@ try:
|
||||
search_space: JSON object created by experiment owner
|
||||
'''
|
||||
config = {}
|
||||
for key, value in search_space:
|
||||
for key, value in search_space.items():
|
||||
v = value.get("_value")
|
||||
_type = value['_type']
|
||||
if _type == 'choice':
|
||||
|
||||
@@ -129,16 +129,16 @@ class FLOW2(Searcher):
|
||||
if callable(getattr(domain, 'get_sampler', None)):
|
||||
self._tunable_keys.append(key)
|
||||
sampler = domain.get_sampler()
|
||||
if isinstance(sampler, sample.Quantized):
|
||||
sampler_inner = sampler.get_sampler()
|
||||
if str(sampler_inner) == 'Uniform':
|
||||
self._step_lb = min(
|
||||
self._step_lb, sampler.q/(domain.upper-domain.lower))
|
||||
elif isinstance(domain, sample.Integer) and str(
|
||||
sampler) == 'Uniform':
|
||||
self._step_lb = min(
|
||||
self._step_lb, 1.0/(domain.upper-domain.lower))
|
||||
elif isinstance(domain, sample.Categorical):
|
||||
# if isinstance(sampler, sample.Quantized):
|
||||
# sampler_inner = sampler.get_sampler()
|
||||
# if str(sampler_inner) == 'Uniform':
|
||||
# self._step_lb = min(
|
||||
# self._step_lb, sampler.q/(domain.upper-domain.lower))
|
||||
# elif isinstance(domain, sample.Integer) and str(
|
||||
# sampler) == 'Uniform':
|
||||
# self._step_lb = min(
|
||||
# self._step_lb, 1.0/(domain.upper-domain.lower))
|
||||
if isinstance(domain, sample.Categorical):
|
||||
cat_hp_cost = self.cat_hp_cost
|
||||
if cat_hp_cost and key in cat_hp_cost:
|
||||
cost = np.array(cat_hp_cost[key])
|
||||
@@ -149,7 +149,7 @@ class FLOW2(Searcher):
|
||||
for i, choice in enumerate(l):
|
||||
d[choice] = i
|
||||
self._ordered_cat_hp[key] = (l, d)
|
||||
self._step_lb = min(self._step_lb, 1.0/len(l))
|
||||
# self._step_lb = min(self._step_lb, 1.0/len(l))
|
||||
elif all(isinstance(x, int) or isinstance(x, float)
|
||||
for x in domain.categories):
|
||||
l = sorted(domain.categories)
|
||||
@@ -157,10 +157,10 @@ class FLOW2(Searcher):
|
||||
for i, choice in enumerate(l):
|
||||
d[choice] = i
|
||||
self._ordered_choice_hp[key] = (l, d)
|
||||
self._step_lb = min(self._step_lb, 1.0/len(l))
|
||||
# self._step_lb = min(self._step_lb, 1.0/len(l))
|
||||
else:
|
||||
self._unordered_cat_hp[key] = l = len(domain.categories)
|
||||
self._step_lb = min(self._step_lb, 1.0/l)
|
||||
# self._step_lb = min(self._step_lb, 1.0/l)
|
||||
if str(sampler) != 'Normal':
|
||||
self._bounded_keys.append(key)
|
||||
self._space_keys = list(self.space.keys())
|
||||
@@ -306,10 +306,10 @@ class FLOW2(Searcher):
|
||||
# normalize categorical
|
||||
if key in self._ordered_cat_hp:
|
||||
l, d = self._ordered_cat_hp[key]
|
||||
config_norm[key] = d[value]/len(l)
|
||||
config_norm[key] = (d[value]+0.5)/len(l) # center
|
||||
elif key in self._ordered_choice_hp:
|
||||
l, d = self._ordered_choice_hp[key]
|
||||
config_norm[key] = d[value]/len(l)
|
||||
config_norm[key] = (d[value]+0.5)/len(l) # center
|
||||
elif key in self.incumbent:
|
||||
config_norm[key] = self.incumbent[
|
||||
key] if value == self.best_config[
|
||||
@@ -409,6 +409,7 @@ class FLOW2(Searcher):
|
||||
self._metric = metric
|
||||
if mode:
|
||||
assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
|
||||
self._mode = mode
|
||||
if mode == "max":
|
||||
self.metric_op = -1.
|
||||
elif mode == "min":
|
||||
@@ -532,7 +533,7 @@ class FLOW2(Searcher):
|
||||
self._direction_tried = self.rand_vector_unit_sphere(
|
||||
self.dim) * self.step
|
||||
for i, key in enumerate(self._tunable_keys):
|
||||
move[key] += self._direction_tried[i]
|
||||
move[key] += self._direction_tried[i]
|
||||
self._project(move)
|
||||
config = self.denormalize(move)
|
||||
self._proposed_by[trial_id] = self.incumbent
|
||||
|
||||
@@ -27,6 +27,7 @@ class SearchThread:
|
||||
''' When search_alg is omitted, use local search FLOW2
|
||||
'''
|
||||
self._search_alg = search_alg
|
||||
self._is_ls = isinstance(search_alg, FLOW2)
|
||||
self._mode = mode
|
||||
self._metric_op = 1 if mode=='min' else -1
|
||||
self.cost_best = self.cost_last = self.cost_total = self.cost_best1 = \
|
||||
@@ -37,6 +38,7 @@ class SearchThread:
|
||||
# eci: expected cost for improvement
|
||||
self.eci = self.cost_best
|
||||
self.priority = self.speed = 0
|
||||
self._init_config = True
|
||||
|
||||
def suggest(self, trial_id: str) -> Optional[Dict]:
|
||||
''' use the suggest() of the underlying search algorithm
|
||||
@@ -82,7 +84,12 @@ class SearchThread:
|
||||
if not hasattr(self._search_alg, '_ot_trials') or (not error and
|
||||
trial_id in self._search_alg._ot_trials):
|
||||
# optuna doesn't handle error
|
||||
self._search_alg.on_trial_complete(trial_id, result, error)
|
||||
if self._is_ls or not self._init_config:
|
||||
self._search_alg.on_trial_complete(trial_id, result, error)
|
||||
else:
|
||||
# init config is not proposed by self._search_alg
|
||||
# under this thread
|
||||
self._init_config = False
|
||||
if result:
|
||||
if self.cost_attr in result:
|
||||
self.cost_last = result[self.cost_attr]
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "0.2.6"
|
||||
__version__ = "0.2.8"
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install torch transformers datasets ipywidgets"
|
||||
"!pip install torch transformers datasets ipywidgets flaml[blendsearch,ray];"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -526,15 +526,9 @@
|
||||
"\n",
|
||||
"def train_distilbert(config: dict):\n",
|
||||
"\n",
|
||||
" # Define tokenize method\n",
|
||||
" tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)\n",
|
||||
" def tokenize(examples):\n",
|
||||
" return tokenizer(examples[COLUMN_NAME], truncation=True)\n",
|
||||
"\n",
|
||||
" # Load CoLA dataset and apply tokenizer\n",
|
||||
" cola_raw = datasets.load_dataset(\"glue\", TASK)\n",
|
||||
" cola_encoded = cola_raw.map(tokenize, batched=True)\n",
|
||||
" # QUESTION: Write processed data to disk?\n",
|
||||
" train_dataset, eval_dataset = cola_encoded[\"train\"], cola_encoded[\"validation\"]\n",
|
||||
"\n",
|
||||
" model = AutoModelForSequenceClassification.from_pretrained(\n",
|
||||
|
||||
@@ -26,12 +26,17 @@ try:
|
||||
|
||||
# Define tokenize method
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)
|
||||
def tokenize(examples):
|
||||
return tokenizer(examples[COLUMN_NAME], truncation=True)
|
||||
|
||||
except:
|
||||
print("pip install torch transformers datasets flaml[blendsearch,ray]")
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.addHandler(logging.FileHandler('test/tune_distilbert.log'))
|
||||
import os
|
||||
os.makedirs('logs', exist_ok=True)
|
||||
logger.addHandler(logging.FileHandler('logs/tune_distilbert.log'))
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
import flaml
|
||||
@@ -40,9 +45,6 @@ def train_distilbert(config: dict):
|
||||
|
||||
metric = load_metric("glue", TASK)
|
||||
|
||||
def tokenize(examples):
|
||||
return tokenizer(examples[COLUMN_NAME], truncation=True)
|
||||
|
||||
def compute_metrics(eval_pred):
|
||||
predictions, labels = eval_pred
|
||||
predictions = np.argmax(predictions, axis=1)
|
||||
@@ -156,7 +158,7 @@ def _test_distillbert(method='BlendSearch'):
|
||||
metric=HP_METRIC,
|
||||
mode=MODE,
|
||||
resources_per_trial={"gpu": 4, "cpu": 4},
|
||||
config=search_space, local_dir='test/logs/',
|
||||
config=search_space, local_dir='logs/',
|
||||
num_samples=num_samples, time_budget_s=time_budget_s,
|
||||
keep_checkpoints_num=1, checkpoint_score_attr=HP_METRIC,
|
||||
scheduler=scheduler, search_alg=algo)
|
||||
@@ -214,4 +216,4 @@ def _test_distillbert_bohb():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test_distillbert()
|
||||
_test_distillbert()
|
||||
256
test/hf/test_electra.py
Normal file
256
test/hf/test_electra.py
Normal file
@@ -0,0 +1,256 @@
|
||||
'''Require: pip install torch transformers datasets flaml[blendsearch,ray]
|
||||
'''
|
||||
import time
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
import ray
|
||||
from datasets import (
|
||||
load_dataset,
|
||||
load_metric,
|
||||
)
|
||||
from transformers import (
|
||||
AutoModelForSequenceClassification,
|
||||
AutoTokenizer,
|
||||
Trainer,
|
||||
TrainingArguments,
|
||||
)
|
||||
MODEL_CHECKPOINT = "google/electra-base-discriminator"
|
||||
task_to_keys = {
|
||||
"cola": ("sentence", None),
|
||||
"mnli": ("premise", "hypothesis"),
|
||||
"mrpc": ("sentence1", "sentence2"),
|
||||
"qnli": ("question", "sentence"),
|
||||
"qqp": ("question1", "question2"),
|
||||
"rte": ("sentence1", "sentence2"),
|
||||
"sst2": ("sentence", None),
|
||||
"stsb": ("sentence1", "sentence2"),
|
||||
"wnli": ("sentence1", "sentence2"),
|
||||
}
|
||||
max_seq_length=128
|
||||
overwrite_cache=False
|
||||
pad_to_max_length=True
|
||||
padding = "max_length"
|
||||
|
||||
TASK = "qnli"
|
||||
# HP_METRIC, MODE = "loss", "min"
|
||||
HP_METRIC, MODE = "accuracy", "max"
|
||||
|
||||
sentence1_key, sentence2_key = task_to_keys[TASK]
|
||||
# Define tokenize method
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)
|
||||
|
||||
def tokenize(examples):
|
||||
args = (
|
||||
(examples[sentence1_key],) if sentence2_key is None else (
|
||||
examples[sentence1_key], examples[sentence2_key])
|
||||
)
|
||||
return tokenizer(*args, padding=padding, max_length=max_seq_length,
|
||||
truncation=True)
|
||||
|
||||
except:
|
||||
print("pip install torch transformers datasets flaml[blendsearch,ray]")
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import os
|
||||
os.makedirs('logs', exist_ok=True)
|
||||
logger.addHandler(logging.FileHandler('logs/tune_electra.log'))
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
import flaml
|
||||
|
||||
def train_electra(config: dict):
|
||||
|
||||
# Load dataset and apply tokenizer
|
||||
data_raw = load_dataset("glue", TASK)
|
||||
data_encoded = data_raw.map(tokenize, batched=True)
|
||||
train_dataset, eval_dataset = data_encoded["train"], data_encoded["validation"]
|
||||
|
||||
NUM_LABELS = len(train_dataset.features["label"].names)
|
||||
|
||||
metric = load_metric("glue", TASK)
|
||||
|
||||
def compute_metrics(eval_pred):
|
||||
predictions, labels = eval_pred
|
||||
predictions = np.argmax(predictions, axis=1)
|
||||
return metric.compute(predictions=predictions, references=labels)
|
||||
|
||||
|
||||
model = AutoModelForSequenceClassification.from_pretrained(
|
||||
MODEL_CHECKPOINT, num_labels=NUM_LABELS
|
||||
)
|
||||
|
||||
training_args = TrainingArguments(
|
||||
output_dir='.',
|
||||
do_eval=False,
|
||||
disable_tqdm=True,
|
||||
logging_steps=20000,
|
||||
save_total_limit=0,
|
||||
fp16=True,
|
||||
**config,
|
||||
)
|
||||
|
||||
trainer = Trainer(
|
||||
model,
|
||||
training_args,
|
||||
train_dataset=train_dataset,
|
||||
eval_dataset=eval_dataset,
|
||||
tokenizer=tokenizer,
|
||||
compute_metrics=compute_metrics,
|
||||
)
|
||||
|
||||
# train model
|
||||
trainer.train()
|
||||
|
||||
# evaluate model
|
||||
eval_output = trainer.evaluate()
|
||||
|
||||
flaml.tune.report(
|
||||
loss=eval_output["eval_loss"],
|
||||
accuracy=eval_output["eval_accuracy"],
|
||||
)
|
||||
|
||||
try:
|
||||
from azureml.core import Run
|
||||
run = Run.get_context()
|
||||
run.log('accuracy', eval_output["eval_accuracy"])
|
||||
run.log('loss', eval_output["eval_loss"])
|
||||
run.log('config', config)
|
||||
except: pass
|
||||
|
||||
def _test_electra(method='BlendSearch'):
|
||||
|
||||
max_num_epoch = 9
|
||||
num_samples = -1
|
||||
time_budget_s = 3600
|
||||
|
||||
search_space = {
|
||||
# You can mix constants with search space objects.
|
||||
"num_train_epochs": flaml.tune.loguniform(1, max_num_epoch),
|
||||
"learning_rate": flaml.tune.loguniform(3e-5, 1.5e-4),
|
||||
"weight_decay": flaml.tune.uniform(0, 0.3),
|
||||
# "warmup_ratio": flaml.tune.uniform(0, 0.2),
|
||||
# "hidden_dropout_prob": flaml.tune.uniform(0, 0.2),
|
||||
# "attention_probs_dropout_prob": flaml.tune.uniform(0, 0.2),
|
||||
"per_device_train_batch_size": flaml.tune.choice([16, 32, 64, 128]),
|
||||
"seed": flaml.tune.choice([12, 22, 33, 42]),
|
||||
# "adam_beta1": flaml.tune.uniform(0.8, 0.99),
|
||||
# "adam_beta2": flaml.tune.loguniform(98e-2, 9999e-4),
|
||||
# "adam_epsilon": flaml.tune.loguniform(1e-9, 1e-7),
|
||||
}
|
||||
|
||||
start_time = time.time()
|
||||
ray.init(num_cpus=4, num_gpus=4)
|
||||
if 'ASHA' == method:
|
||||
algo = None
|
||||
elif 'BOHB' == method:
|
||||
from ray.tune.schedulers import HyperBandForBOHB
|
||||
from ray.tune.suggest.bohb import tuneBOHB
|
||||
algo = tuneBOHB(max_concurrent=4)
|
||||
scheduler = HyperBandForBOHB(max_t=max_num_epoch)
|
||||
elif 'Optuna' == method:
|
||||
from ray.tune.suggest.optuna import OptunaSearch
|
||||
algo = OptunaSearch()
|
||||
elif 'CFO' == method:
|
||||
from flaml import CFO
|
||||
algo = CFO(points_to_evaluate=[{
|
||||
"num_train_epochs": 1,
|
||||
"per_device_train_batch_size": 128,
|
||||
}])
|
||||
elif 'BlendSearch' == method:
|
||||
from flaml import BlendSearch
|
||||
algo = BlendSearch(points_to_evaluate=[{
|
||||
"num_train_epochs": 1,
|
||||
"per_device_train_batch_size": 128,
|
||||
}])
|
||||
elif 'Dragonfly' == method:
|
||||
from ray.tune.suggest.dragonfly import DragonflySearch
|
||||
algo = DragonflySearch()
|
||||
elif 'SkOpt' == method:
|
||||
from ray.tune.suggest.skopt import SkOptSearch
|
||||
algo = SkOptSearch()
|
||||
elif 'Nevergrad' == method:
|
||||
from ray.tune.suggest.nevergrad import NevergradSearch
|
||||
import nevergrad as ng
|
||||
algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
|
||||
elif 'ZOOpt' == method:
|
||||
from ray.tune.suggest.zoopt import ZOOptSearch
|
||||
algo = ZOOptSearch(budget=num_samples)
|
||||
elif 'Ax' == method:
|
||||
from ray.tune.suggest.ax import AxSearch
|
||||
algo = AxSearch(max_concurrent=3)
|
||||
elif 'HyperOpt' == method:
|
||||
from ray.tune.suggest.hyperopt import HyperOptSearch
|
||||
algo = HyperOptSearch()
|
||||
scheduler = None
|
||||
if method != 'BOHB':
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
scheduler = ASHAScheduler(
|
||||
max_t=max_num_epoch,
|
||||
grace_period=1)
|
||||
scheduler = None
|
||||
analysis = ray.tune.run(
|
||||
train_electra,
|
||||
metric=HP_METRIC,
|
||||
mode=MODE,
|
||||
resources_per_trial={"gpu": 4, "cpu": 4},
|
||||
config=search_space, local_dir='logs/',
|
||||
num_samples=num_samples, time_budget_s=time_budget_s,
|
||||
keep_checkpoints_num=1, checkpoint_score_attr=HP_METRIC,
|
||||
scheduler=scheduler, search_alg=algo)
|
||||
|
||||
ray.shutdown()
|
||||
|
||||
best_trial = analysis.get_best_trial(HP_METRIC, MODE, "all")
|
||||
metric = best_trial.metric_analysis[HP_METRIC][MODE]
|
||||
|
||||
logger.info(f"method={method}")
|
||||
logger.info(f"n_trials={len(analysis.trials)}")
|
||||
logger.info(f"time={time.time()-start_time}")
|
||||
logger.info(f"Best model eval {HP_METRIC}: {metric:.4f}")
|
||||
logger.info(f"Best model parameters: {best_trial.config}")
|
||||
|
||||
|
||||
def _test_electra_cfo():
|
||||
_test_electra('CFO')
|
||||
|
||||
|
||||
def _test_electra_dragonfly():
|
||||
_test_electra('Dragonfly')
|
||||
|
||||
|
||||
def _test_electra_skopt():
|
||||
_test_electra('SkOpt')
|
||||
|
||||
|
||||
def _test_electra_nevergrad():
|
||||
_test_electra('Nevergrad')
|
||||
|
||||
|
||||
def _test_electra_zoopt():
|
||||
_test_electra('ZOOpt')
|
||||
|
||||
|
||||
def _test_electra_ax():
|
||||
_test_electra('Ax')
|
||||
|
||||
|
||||
def __test_electra_hyperopt():
|
||||
_test_electra('HyperOpt')
|
||||
|
||||
|
||||
def _test_electra_optuna():
|
||||
_test_electra('Optuna')
|
||||
|
||||
|
||||
def _test_electra_asha():
|
||||
_test_electra('ASHA')
|
||||
|
||||
|
||||
def _test_electra_bohb():
|
||||
_test_electra('BOHB')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test_electra()
|
||||
19
test/run_electra.py
Normal file
19
test/run_electra.py
Normal file
@@ -0,0 +1,19 @@
|
||||
from azureml.core import Workspace, Experiment, ScriptRunConfig
|
||||
ws = Workspace.from_config()
|
||||
|
||||
compute_target = ws.compute_targets['V100-4']
|
||||
# compute_target = ws.compute_targets['K80']
|
||||
command = [
|
||||
"pip install torch transformers datasets flaml[blendsearch,ray] ax-platform sqlalchemy && ",
|
||||
"python test_electra.py"]
|
||||
|
||||
config = ScriptRunConfig(
|
||||
source_directory='hf/',
|
||||
command=command,
|
||||
compute_target=compute_target,
|
||||
)
|
||||
|
||||
exp = Experiment(ws, 'test-electra')
|
||||
run = exp.submit(config)
|
||||
print(run.get_portal_url()) # link to ml.azure.com
|
||||
run.wait_for_completion(show_output=True)
|
||||
@@ -56,7 +56,7 @@ def _test_xgboost(method='BlendSearch'):
|
||||
"eta": tune.loguniform(1e-4, 1e-1)
|
||||
}
|
||||
max_iter = 10
|
||||
for num_samples in [256]:
|
||||
for num_samples in [128]:
|
||||
time_budget_s = 60 #None
|
||||
for n_cpu in [8]:
|
||||
start_time = time.time()
|
||||
|
||||
Reference in New Issue
Block a user