Fix bug in NNI tuner (#34 )

* fix bug in nni tuner * Update version.py Co-authored-by: liuzhe <zhe.liu@microsoft.com> Co-authored-by: Chi Wang <wang.chi@microsoft.com>
V0.2.7 (#35 )
2026-02-18 06:32:25 +08:00 · 2021-03-06 10:38:33 -08:00 · 2021-03-05 23:39:14 -08:00
10 changed files with 383 additions and 79 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -151,3 +151,4 @@ catboost_info
 notebook/*.pkl
 notebook/.azureml
 mlruns
+logs
--- a/flaml/searcher/blendsearch.py
+++ b/flaml/searcher/blendsearch.py
@@ -135,15 +135,17 @@ class BlendSearch(Searcher):
        self._thread_count = 1 # total # threads created
        self._init_used = self._ls.init_config is None
        self._trial_proposed_by = {} # trial_id: str -> thread_id: int
-        self._admissible_min = self._ls.normalize(self._ls.init_config)
-        self._admissible_max = self._admissible_min.copy()
+        self._ls_bound_min = self._ls.normalize(self._ls.init_config)
+        self._ls_bound_max = self._ls_bound_min.copy()
+        self._gs_admissible_min = self._ls_bound_min.copy()
+        self._gs_admissible_max = self._ls_bound_max.copy()
        self._result = {} # config_signature: tuple -> result: Dict
        self._deadline = np.inf

    def save(self, checkpoint_path: str):
        save_object = (self._metric_target, self._search_thread_pool,
            self._thread_count, self._init_used, self._trial_proposed_by,
-            self._admissible_min, self._admissible_max, self._result,
+            self._ls_bound_min, self._ls_bound_max, self._result,
            self._deadline)
        with open(checkpoint_path, "wb") as outputFile:
            pickle.dump(save_object, outputFile)
@@ -153,7 +155,7 @@ class BlendSearch(Searcher):
            save_object = pickle.load(inputFile)
        self._metric_target, self._search_thread_pool, \
            self._thread_count, self._init_used, self._trial_proposed_by, \
-            self._admissible_min, self._admissible_max, self._result, \
+            self._ls_bound_min, self._ls_bound_max, self._result, \
            self._deadline = save_object

    def restore_from_dir(self, checkpoint_dir: str):
@@ -181,16 +183,7 @@ class BlendSearch(Searcher):
            # update target metric if improved
            if (result[self._metric]-self._metric_target)*self._ls.metric_op<0:
                self._metric_target = result[self._metric]
-            if thread_id: # from local search
-                # update admissible region
-                normalized_config = self._ls.normalize(config)
-                for key in self._admissible_min:
-                    value = normalized_config[key]
-                    if value > self._admissible_max[key]:
-                        self._admissible_max[key] = value
-                    elif value < self._admissible_min[key]:
-                        self._admissible_min[key] = value
-            elif self._create_condition(result):
+            if not thread_id and self._create_condition(result): 
                # thread creator
                self._search_thread_pool[self._thread_count] = SearchThread(
                    self._ls.mode,
@@ -199,7 +192,11 @@ class BlendSearch(Searcher):
                )
                thread_id = self._thread_count
                self._thread_count += 1
-                
+                self._update_admissible_region(config, self._ls_bound_min,
+                    self._ls_bound_max)
+            # reset admissible region to ls bounding box
+            self._gs_admissible_min.update(self._ls_bound_min)
+            self._gs_admissible_max.update(self._ls_bound_max)
        # cleaner
        # logger.info(f"thread {thread_id} in search thread pool="
        #     f"{thread_id in self._search_thread_pool}")
@@ -207,6 +204,16 @@ class BlendSearch(Searcher):
            # local search thread
            self._clean(thread_id)

+    def _update_admissible_region(self, config, admissible_min, admissible_max):
+        # update admissible region
+        normalized_config = self._ls.normalize(config)
+        for key in admissible_min:
+            value = normalized_config[key]
+            if value > admissible_max[key]:
+                admissible_max[key] = value
+            elif value < admissible_min[key]:
+                admissible_min[key] = value
+
    def _create_condition(self, result: Dict) -> bool:
        ''' create thread condition
        '''
@@ -234,9 +241,9 @@ class BlendSearch(Searcher):
        #     f"{self._search_thread_pool[thread_id].converged}")
        if self._search_thread_pool[thread_id].converged:
            todelete.add(thread_id)
-            for key in self._admissible_min:
-                self._admissible_max[key] += self._ls.STEPSIZE
-                self._admissible_min[key] -= self._ls.STEPSIZE            
+            for key in self._ls_bound_max:
+                self._ls_bound_max[key] += self._ls.STEPSIZE
+                self._ls_bound_min[key] -= self._ls.STEPSIZE            
        for id in todelete:
            del self._search_thread_pool[id]

@@ -261,50 +268,66 @@ class BlendSearch(Searcher):
        '''
        if self._init_used and not self._points_to_evaluate:
            choice, backup = self._select_thread()
-            # logger.debug(f"choice={choice}, backup={backup}")
+            # print(f"choice={choice}, backup={backup}")
            if choice < 0: return None # timeout
            self._use_rs = False
            config = self._search_thread_pool[choice].suggest(trial_id)
+            # preliminary check; not checking config validation
            skip = self._should_skip(choice, trial_id, config)
            if skip:
                if choice: 
-                    # logger.info(f"skipping choice={choice}, config={config}")
+                    # print(f"skipping choice={choice}, config={config}")
                    return None
-                # use rs
+                # use rs when BO fails to suggest a config
                self._use_rs = True
                for _, generated in generate_variants(
                    {'config': self._ls.space}):
                    config = generated['config']
-                    break
+                    break # get one random config
                # logger.debug(f"random config {config}")
                skip = self._should_skip(choice, trial_id, config)
                if skip: return None
-            # if not choice: logger.info(config)
-            if choice or backup == choice or self._valid(config): 
+            # if not choice: print(config)
+            if choice or self._valid(config): 
                # LS or valid or no backup choice
                self._trial_proposed_by[trial_id] = choice
            else: # invalid config proposed by GS
-                if not self._use_rs:
-                    self._search_thread_pool[choice].on_trial_complete(
-                        trial_id, {}, error=True) # tell GS there is an error
+                # if not self._use_rs:
+                #     self._search_thread_pool[choice].on_trial_complete(
+                #         trial_id, {}, error=True) # tell GS there is an error
                self._use_rs = False
-                config = self._search_thread_pool[backup].suggest(trial_id)
-                skip = self._should_skip(backup, trial_id, config)
-                if skip: 
-                    return None
-                self._trial_proposed_by[trial_id] = backup
-                choice = backup
-            # if choice: self._pending.add(choice) # local search thread pending
-            if not choice:
+                if choice == backup:
+                    # use CFO's init point
+                    init_config = self._ls.init_config
+                    config = self._ls.complete_config(init_config,
+                        self._ls_bound_min, self._ls_bound_max)
+                    self._trial_proposed_by[trial_id] = choice
+                else:
+                    config = self._search_thread_pool[backup].suggest(trial_id)
+                    skip = self._should_skip(backup, trial_id, config)
+                    if skip: 
+                        return None
+                    self._trial_proposed_by[trial_id] = backup
+                    choice = backup
+            if not choice: # global search
                if self._ls._resource: 
                # TODO: add resource to config proposed by GS, min or median?
                    config[self._ls.prune_attr] = self._ls.min_resource
+                # temporarily relax admissible region for parallel proposals
+                self._update_admissible_region(config, self._gs_admissible_min,
+                    self._gs_admissible_max)
+            else:
+                self._update_admissible_region(config, self._ls_bound_min,
+                    self._ls_bound_max)
+                self._gs_admissible_min.update(self._ls_bound_min)
+                self._gs_admissible_max.update(self._ls_bound_max)
            self._result[self._ls.config_signature(config)] = {}
        else: # use init config
+            # print("use init config")
            init_config = self._points_to_evaluate.pop(
                0) if self._points_to_evaluate else self._ls.init_config
            config = self._ls.complete_config(init_config,
-             self._admissible_min, self._admissible_max)
+             self._ls_bound_min, self._ls_bound_max)
                # logger.info(f"reset config to {config}")
            config_signature = self._ls.config_signature(config)
            result = self._result.get(config_signature)
@@ -315,6 +338,7 @@ class BlendSearch(Searcher):
                self._result[config_signature] = {}
            else: return None # running but no result yet
            self._init_used = True
+            self._trial_proposed_by[trial_id] = 0
        # logger.info(f"config={config}")
        return config

@@ -340,10 +364,10 @@ class BlendSearch(Searcher):
                    if choice:
                        # local search thread
                        self._clean(choice)
-                else:
-                    # tell the thread there is an error
-                    self._search_thread_pool[choice].on_trial_complete(
-                        trial_id, {}, error=True) 
+                # else:
+                #     # tell the thread there is an error
+                #     self._search_thread_pool[choice].on_trial_complete(
+                #         trial_id, {}, error=True) 
            return True
        return False

@@ -364,10 +388,10 @@ class BlendSearch(Searcher):

        top_thread_id = backup_thread_id = 0
        priority1 = priority2 = self._search_thread_pool[0].priority
-        # logger.debug(f"priority of thread 0={priority1}")
+        # print(f"priority of thread 0={priority1}, obj_best1={self._search_thread_pool[0].obj_best1}")
        for thread_id, thread in self._search_thread_pool.items():
            # if thread_id:
-            #     logger.debug(
+            #     print(
            #         f"priority of thread {thread_id}={thread.priority}")
            #     logger.debug(
            #         f"thread {thread_id}.can_suggest={thread.can_suggest}")
@@ -384,13 +408,13 @@ class BlendSearch(Searcher):
    def _valid(self, config: Dict) -> bool:
        ''' config validator
        '''
-        for key in self._admissible_min:
+        for key in self._gs_admissible_min:
            if key in config:
                value = config[key]
                # logger.info(
                #     f"{key},{value},{self._admissible_min[key]},{self._admissible_max[key]}")
-                if value<self._admissible_min[
-                    key] or value>self._admissible_max[key]:
+                if value+self._ls.STEPSIZE<self._gs_admissible_min[
+                    key] or value>self._gs_admissible_max[key]+self._ls.STEPSIZE:
                    return False
        return True

@@ -402,7 +426,7 @@ try:
        from ray.tune import (uniform, quniform, choice, randint, qrandint, randn,
    qrandn, loguniform, qloguniform)
    except:
-        from .sample import (uniform, quniform, choice, randint, qrandint, randn,
+        from ..tune.sample import (uniform, quniform, choice, randint, qrandint, randn,
    qrandn, loguniform, qloguniform)

    class BlendSearchTuner(BlendSearch, NNITuner):
@@ -446,7 +470,7 @@ try:
            search_space: JSON object created by experiment owner
            '''
            config = {}
-            for key, value in search_space:
+            for key, value in search_space.items():
                v = value.get("_value")
                _type = value['_type']
                if _type == 'choice':
--- a/flaml/searcher/flow2.py
+++ b/flaml/searcher/flow2.py
@@ -129,16 +129,16 @@ class FLOW2(Searcher):
            if callable(getattr(domain, 'get_sampler', None)):
                self._tunable_keys.append(key)
                sampler = domain.get_sampler()
-                if isinstance(sampler, sample.Quantized):
-                    sampler_inner = sampler.get_sampler()
-                    if str(sampler_inner) == 'Uniform':
-                        self._step_lb = min(
-                            self._step_lb, sampler.q/(domain.upper-domain.lower))
-                elif isinstance(domain, sample.Integer) and str(
-                    sampler) == 'Uniform':
-                    self._step_lb = min(
-                        self._step_lb, 1.0/(domain.upper-domain.lower))
-                elif isinstance(domain, sample.Categorical):
+                # if isinstance(sampler, sample.Quantized):
+                #     sampler_inner = sampler.get_sampler()
+                #     if str(sampler_inner) == 'Uniform':
+                #         self._step_lb = min(
+                #             self._step_lb, sampler.q/(domain.upper-domain.lower))
+                # elif isinstance(domain, sample.Integer) and str(
+                #     sampler) == 'Uniform':
+                #     self._step_lb = min(
+                #         self._step_lb, 1.0/(domain.upper-domain.lower))
+                if isinstance(domain, sample.Categorical):
                    cat_hp_cost = self.cat_hp_cost
                    if cat_hp_cost and key in cat_hp_cost:
                        cost = np.array(cat_hp_cost[key])
@@ -149,7 +149,7 @@ class FLOW2(Searcher):
                        for i, choice in enumerate(l):
                            d[choice] = i
                        self._ordered_cat_hp[key] = (l, d)
-                        self._step_lb = min(self._step_lb, 1.0/len(l))
+                        # self._step_lb = min(self._step_lb, 1.0/len(l))
                    elif all(isinstance(x, int) or isinstance(x, float)
                     for x in domain.categories):
                        l = sorted(domain.categories)
@@ -157,10 +157,10 @@ class FLOW2(Searcher):
                        for i, choice in enumerate(l):
                            d[choice] = i
                        self._ordered_choice_hp[key] = (l, d) 
-                        self._step_lb = min(self._step_lb, 1.0/len(l))
+                        # self._step_lb = min(self._step_lb, 1.0/len(l))
                    else:
                        self._unordered_cat_hp[key] = l = len(domain.categories)
-                        self._step_lb = min(self._step_lb, 1.0/l)
+                        # self._step_lb = min(self._step_lb, 1.0/l)
                if str(sampler) != 'Normal':
                    self._bounded_keys.append(key)
        self._space_keys = list(self.space.keys())
@@ -306,10 +306,10 @@ class FLOW2(Searcher):
                        # normalize categorical
                        if key in self._ordered_cat_hp:
                            l, d = self._ordered_cat_hp[key]
-                            config_norm[key] = d[value]/len(l)
+                            config_norm[key] = (d[value]+0.5)/len(l) # center
                        elif key in self._ordered_choice_hp:
                            l, d = self._ordered_choice_hp[key]
-                            config_norm[key] = d[value]/len(l)
+                            config_norm[key] = (d[value]+0.5)/len(l) # center
                        elif key in self.incumbent:
                            config_norm[key] = self.incumbent[
                                key] if value == self.best_config[
@@ -409,6 +409,7 @@ class FLOW2(Searcher):
            self._metric = metric
        if mode:
            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
+            self._mode = mode
            if mode == "max":
                self.metric_op = -1.
            elif mode == "min":
@@ -532,7 +533,7 @@ class FLOW2(Searcher):
        self._direction_tried = self.rand_vector_unit_sphere(
            self.dim) * self.step
        for i, key in enumerate(self._tunable_keys):
-            move[key] += self._direction_tried[i]            
+            move[key] += self._direction_tried[i]
        self._project(move)
        config = self.denormalize(move)
        self._proposed_by[trial_id] = self.incumbent
--- a/flaml/searcher/search_thread.py
+++ b/flaml/searcher/search_thread.py
@@ -27,6 +27,7 @@ class SearchThread:
        ''' When search_alg is omitted, use local search FLOW2
        '''
        self._search_alg = search_alg
+        self._is_ls = isinstance(search_alg, FLOW2)
        self._mode = mode
        self._metric_op = 1 if mode=='min' else -1
        self.cost_best = self.cost_last = self.cost_total = self.cost_best1 = \
@@ -37,6 +38,7 @@ class SearchThread:
        # eci: expected cost for improvement
        self.eci = self.cost_best
        self.priority = self.speed = 0
+        self._init_config = True 
        
    def suggest(self, trial_id: str) -> Optional[Dict]:
        ''' use the suggest() of the underlying search algorithm
@@ -82,7 +84,12 @@ class SearchThread:
        if not hasattr(self._search_alg, '_ot_trials') or (not error and
            trial_id in self._search_alg._ot_trials):
            # optuna doesn't handle error
-            self._search_alg.on_trial_complete(trial_id, result, error)
+            if self._is_ls or not self._init_config:
+                self._search_alg.on_trial_complete(trial_id, result, error)
+            else: 
+                # init config is not proposed by self._search_alg
+                # under this thread
+                self._init_config = False
        if result:
            if self.cost_attr in result:
                self.cost_last = result[self.cost_attr]
--- a/flaml/version.py
+++ b/flaml/version.py
@@ -1 +1 @@
-__version__ = "0.2.6"
+__version__ = "0.2.8"
--- a/notebook/flaml_finetune_transformer.ipynb
+++ b/notebook/flaml_finetune_transformer.ipynb
@@ -15,7 +15,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "!pip install torch transformers datasets ipywidgets"
+    "!pip install torch transformers datasets ipywidgets flaml[blendsearch,ray];"
   ]
  },
  {
@@ -526,15 +526,9 @@
    "\n",
    "def train_distilbert(config: dict):\n",
    "\n",
-    "    # Define tokenize method\n",
-    "    tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)\n",
-    "    def tokenize(examples):\n",
-    "        return tokenizer(examples[COLUMN_NAME], truncation=True)\n",
-    "\n",
    "    # Load CoLA dataset and apply tokenizer\n",
    "    cola_raw = datasets.load_dataset(\"glue\", TASK)\n",
    "    cola_encoded = cola_raw.map(tokenize, batched=True)\n",
-    "    # QUESTION: Write processed data to disk?\n",
    "    train_dataset, eval_dataset = cola_encoded[\"train\"], cola_encoded[\"validation\"]\n",
    "\n",
    "    model = AutoModelForSequenceClassification.from_pretrained(\n",
--- a/test/hf/test_distillbert.py
+++ b/test/hf/test_distillbert.py
@@ -26,12 +26,17 @@ try:

    # Define tokenize method
    tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)
+    def tokenize(examples):
+        return tokenizer(examples[COLUMN_NAME], truncation=True)
+
 except:
    print("pip install torch transformers datasets flaml[blendsearch,ray]")
    
 import logging
 logger = logging.getLogger(__name__)
-logger.addHandler(logging.FileHandler('test/tune_distilbert.log'))
+import os
+os.makedirs('logs', exist_ok=True)
+logger.addHandler(logging.FileHandler('logs/tune_distilbert.log'))
 logger.setLevel(logging.INFO)

 import flaml
@@ -40,9 +45,6 @@ def train_distilbert(config: dict):

    metric = load_metric("glue", TASK)

-    def tokenize(examples):
-        return tokenizer(examples[COLUMN_NAME], truncation=True)
-
    def compute_metrics(eval_pred):
        predictions, labels = eval_pred
        predictions = np.argmax(predictions, axis=1)
@@ -156,7 +158,7 @@ def _test_distillbert(method='BlendSearch'):
        metric=HP_METRIC,
        mode=MODE,
        resources_per_trial={"gpu": 4, "cpu": 4},
-        config=search_space, local_dir='test/logs/',
+        config=search_space, local_dir='logs/',
        num_samples=num_samples, time_budget_s=time_budget_s,
        keep_checkpoints_num=1, checkpoint_score_attr=HP_METRIC,
        scheduler=scheduler, search_alg=algo)
@@ -214,4 +216,4 @@ def _test_distillbert_bohb():


 if __name__ == "__main__":
-    _test_distillbert()
+    _test_distillbert()
--- a/test/hf/test_electra.py
+++ b/test/hf/test_electra.py
@@ -0,0 +1,256 @@
+'''Require: pip install torch transformers datasets flaml[blendsearch,ray]
+'''
+import time
+import numpy as np
+
+try:
+    import ray
+    from datasets import (
+        load_dataset,
+        load_metric,
+    )
+    from transformers import (
+        AutoModelForSequenceClassification,
+        AutoTokenizer,
+        Trainer,
+        TrainingArguments,
+    )
+    MODEL_CHECKPOINT = "google/electra-base-discriminator"
+    task_to_keys = {
+        "cola": ("sentence", None),
+        "mnli": ("premise", "hypothesis"),
+        "mrpc": ("sentence1", "sentence2"),
+        "qnli": ("question", "sentence"),
+        "qqp": ("question1", "question2"),
+        "rte": ("sentence1", "sentence2"),
+        "sst2": ("sentence", None),
+        "stsb": ("sentence1", "sentence2"),
+        "wnli": ("sentence1", "sentence2"),
+    }
+    max_seq_length=128
+    overwrite_cache=False
+    pad_to_max_length=True
+    padding = "max_length"
+
+    TASK = "qnli"
+    # HP_METRIC, MODE = "loss", "min"
+    HP_METRIC, MODE = "accuracy", "max"
+
+    sentence1_key, sentence2_key = task_to_keys[TASK]
+    # Define tokenize method
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)
+
+    def tokenize(examples):
+        args = (
+            (examples[sentence1_key],) if sentence2_key is None else (
+                examples[sentence1_key], examples[sentence2_key])
+        )
+        return tokenizer(*args, padding=padding, max_length=max_seq_length,
+         truncation=True)
+
+except:
+    print("pip install torch transformers datasets flaml[blendsearch,ray]")
+    
+import logging
+logger = logging.getLogger(__name__)
+import os
+os.makedirs('logs', exist_ok=True)
+logger.addHandler(logging.FileHandler('logs/tune_electra.log'))
+logger.setLevel(logging.INFO)
+
+import flaml
+
+def train_electra(config: dict):
+
+    # Load dataset and apply tokenizer
+    data_raw = load_dataset("glue", TASK)
+    data_encoded = data_raw.map(tokenize, batched=True)
+    train_dataset, eval_dataset = data_encoded["train"], data_encoded["validation"]
+
+    NUM_LABELS = len(train_dataset.features["label"].names)
+
+    metric = load_metric("glue", TASK)
+
+    def compute_metrics(eval_pred):
+        predictions, labels = eval_pred
+        predictions = np.argmax(predictions, axis=1)
+        return metric.compute(predictions=predictions, references=labels)
+
+
+    model = AutoModelForSequenceClassification.from_pretrained(
+        MODEL_CHECKPOINT, num_labels=NUM_LABELS
+    )
+
+    training_args = TrainingArguments(
+        output_dir='.',
+        do_eval=False,
+        disable_tqdm=True,
+        logging_steps=20000,
+        save_total_limit=0,
+        fp16=True,
+        **config,
+    )
+
+    trainer = Trainer(
+        model,
+        training_args,
+        train_dataset=train_dataset,
+        eval_dataset=eval_dataset,
+        tokenizer=tokenizer,
+        compute_metrics=compute_metrics,
+    )
+
+    # train model
+    trainer.train()
+
+    # evaluate model
+    eval_output = trainer.evaluate()
+
+    flaml.tune.report(
+        loss=eval_output["eval_loss"],
+        accuracy=eval_output["eval_accuracy"],
+        )
+
+    try:
+        from azureml.core import Run
+        run = Run.get_context()
+        run.log('accuracy', eval_output["eval_accuracy"])
+        run.log('loss', eval_output["eval_loss"])
+        run.log('config', config)
+    except: pass
+
+def _test_electra(method='BlendSearch'):
+ 
+    max_num_epoch = 9
+    num_samples = -1
+    time_budget_s = 3600
+
+    search_space = {
+        # You can mix constants with search space objects.
+        "num_train_epochs": flaml.tune.loguniform(1, max_num_epoch),
+        "learning_rate": flaml.tune.loguniform(3e-5, 1.5e-4),
+        "weight_decay": flaml.tune.uniform(0, 0.3),
+        # "warmup_ratio": flaml.tune.uniform(0, 0.2),
+        # "hidden_dropout_prob": flaml.tune.uniform(0, 0.2),
+        # "attention_probs_dropout_prob": flaml.tune.uniform(0, 0.2),
+        "per_device_train_batch_size": flaml.tune.choice([16, 32, 64, 128]),
+        "seed": flaml.tune.choice([12, 22, 33, 42]),
+        # "adam_beta1": flaml.tune.uniform(0.8, 0.99),
+        # "adam_beta2": flaml.tune.loguniform(98e-2, 9999e-4),
+        # "adam_epsilon": flaml.tune.loguniform(1e-9, 1e-7),
+    }
+
+    start_time = time.time()
+    ray.init(num_cpus=4, num_gpus=4)
+    if 'ASHA' == method:
+        algo = None
+    elif 'BOHB' == method:
+        from ray.tune.schedulers import HyperBandForBOHB
+        from ray.tune.suggest.bohb import tuneBOHB
+        algo = tuneBOHB(max_concurrent=4)
+        scheduler = HyperBandForBOHB(max_t=max_num_epoch)
+    elif 'Optuna' == method:
+        from ray.tune.suggest.optuna import OptunaSearch
+        algo = OptunaSearch()
+    elif 'CFO' == method:
+        from flaml import CFO
+        algo = CFO(points_to_evaluate=[{
+            "num_train_epochs": 1,
+            "per_device_train_batch_size": 128,
+        }])
+    elif 'BlendSearch' == method:
+        from flaml import BlendSearch
+        algo = BlendSearch(points_to_evaluate=[{
+            "num_train_epochs": 1,
+            "per_device_train_batch_size": 128,
+        }])
+    elif 'Dragonfly' == method:
+        from ray.tune.suggest.dragonfly import DragonflySearch
+        algo = DragonflySearch()
+    elif 'SkOpt' == method:
+        from ray.tune.suggest.skopt import SkOptSearch
+        algo = SkOptSearch()
+    elif 'Nevergrad' == method:
+        from ray.tune.suggest.nevergrad import NevergradSearch
+        import nevergrad as ng
+        algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
+    elif 'ZOOpt' == method:
+        from ray.tune.suggest.zoopt import ZOOptSearch
+        algo = ZOOptSearch(budget=num_samples)
+    elif 'Ax' == method:
+        from ray.tune.suggest.ax import AxSearch
+        algo = AxSearch(max_concurrent=3)
+    elif 'HyperOpt' == method:
+        from ray.tune.suggest.hyperopt import HyperOptSearch
+        algo = HyperOptSearch()
+        scheduler = None
+    if method != 'BOHB':
+        from ray.tune.schedulers import ASHAScheduler
+        scheduler = ASHAScheduler(
+            max_t=max_num_epoch,
+            grace_period=1)
+    scheduler = None
+    analysis = ray.tune.run(
+        train_electra,
+        metric=HP_METRIC,
+        mode=MODE,
+        resources_per_trial={"gpu": 4, "cpu": 4},
+        config=search_space, local_dir='logs/',
+        num_samples=num_samples, time_budget_s=time_budget_s,
+        keep_checkpoints_num=1, checkpoint_score_attr=HP_METRIC,
+        scheduler=scheduler, search_alg=algo)
+
+    ray.shutdown()
+
+    best_trial = analysis.get_best_trial(HP_METRIC, MODE, "all")
+    metric = best_trial.metric_analysis[HP_METRIC][MODE]
+
+    logger.info(f"method={method}")
+    logger.info(f"n_trials={len(analysis.trials)}")
+    logger.info(f"time={time.time()-start_time}")
+    logger.info(f"Best model eval {HP_METRIC}: {metric:.4f}")
+    logger.info(f"Best model parameters: {best_trial.config}")
+
+
+def _test_electra_cfo():
+    _test_electra('CFO')
+
+
+def _test_electra_dragonfly():
+    _test_electra('Dragonfly')
+
+
+def _test_electra_skopt():
+    _test_electra('SkOpt')
+
+
+def _test_electra_nevergrad():
+    _test_electra('Nevergrad')
+
+
+def _test_electra_zoopt():
+    _test_electra('ZOOpt')
+
+
+def _test_electra_ax():
+    _test_electra('Ax')
+
+
+def __test_electra_hyperopt():
+    _test_electra('HyperOpt')
+
+
+def _test_electra_optuna():
+    _test_electra('Optuna')
+
+
+def _test_electra_asha():
+    _test_electra('ASHA')
+
+
+def _test_electra_bohb():
+    _test_electra('BOHB')
+
+
+if __name__ == "__main__":
+    _test_electra()
--- a/test/run_electra.py
+++ b/test/run_electra.py
@@ -0,0 +1,19 @@
+from azureml.core import Workspace, Experiment, ScriptRunConfig
+ws = Workspace.from_config()
+
+compute_target = ws.compute_targets['V100-4']
+# compute_target = ws.compute_targets['K80']
+command = [
+    "pip install torch transformers datasets flaml[blendsearch,ray] ax-platform sqlalchemy && ",
+    "python test_electra.py"]
+
+config = ScriptRunConfig(
+    source_directory='hf/',
+    command=command,
+    compute_target=compute_target,
+)
+
+exp = Experiment(ws, 'test-electra')
+run = exp.submit(config)
+print(run.get_portal_url()) # link to ml.azure.com
+run.wait_for_completion(show_output=True)
--- a/test/test_tune.py
+++ b/test/test_tune.py
@@ -56,7 +56,7 @@ def _test_xgboost(method='BlendSearch'):
        "eta": tune.loguniform(1e-4, 1e-1)
    }
    max_iter = 10
-    for num_samples in [256]:
+    for num_samples in [128]:
        time_budget_s = 60 #None
        for n_cpu in [8]:
            start_time = time.time()