From 926589bddaf2c61605c511e71fd475aef3c18540 Mon Sep 17 00:00:00 2001 From: Xueqing Liu Date: Mon, 14 Jun 2021 17:11:40 -0400 Subject: [PATCH] exception, coverage for autohf (#106) * increase coverage * fixing exception messages * fixing import --- .gitignore | 1 + flaml/nlp/README.md | 2 +- flaml/nlp/__init__.py | 5 +- flaml/nlp/autotransformers.py | 339 ++++----- flaml/nlp/dataset/dataprocess_auto.py | 10 +- flaml/nlp/dataset/metric_auto.py | 5 +- flaml/nlp/dataset/submission_auto.py | 19 +- flaml/nlp/dataset/task_auto.py | 4 +- flaml/nlp/hpo/get_grid_search_space.py | 49 +- flaml/nlp/hpo/grid_searchspace_auto.py | 16 +- flaml/nlp/hpo/hpo_searchspace.py | 124 ++-- flaml/nlp/hpo/scheduler_auto.py | 10 +- flaml/nlp/hpo/searchalgo_auto.py | 62 +- flaml/nlp/huggingface/__init__.py | 0 flaml/nlp/huggingface/switch_head_auto.py | 4 +- flaml/nlp/huggingface/trainer.py | 29 +- flaml/nlp/result_analysis/azure_utils.py | 643 ++++++++---------- .../generate_result_summary.py | 357 ---------- flaml/nlp/result_analysis/wandb_utils.py | 66 +- flaml/nlp/utils.py | 35 +- notebook/flaml_autohf.ipynb | 43 -- setup.py | 6 + test/hf/run_analysis.py | 75 -- test/hf/run_autohf.py | 285 -------- test/hf/test_cover_azure.py | 126 ++++ test/hf/test_cover_other.py | 214 ++++++ test/hf/test_mobilebert.py | 37 +- 27 files changed, 1066 insertions(+), 1500 deletions(-) create mode 100644 flaml/nlp/huggingface/__init__.py delete mode 100644 flaml/nlp/result_analysis/generate_result_summary.py delete mode 100644 notebook/flaml_autohf.ipynb delete mode 100644 test/hf/run_analysis.py delete mode 100644 test/hf/run_autohf.py create mode 100644 test/hf/test_cover_azure.py create mode 100644 test/hf/test_cover_other.py diff --git a/.gitignore b/.gitignore index 83d5baf49..c3452c202 100644 --- a/.gitignore +++ b/.gitignore @@ -155,3 +155,4 @@ logs automl.pkl .idea/* +.DS_Store diff --git a/flaml/nlp/README.md b/flaml/nlp/README.md index 217f91727..284cef273 100644 --- a/flaml/nlp/README.md +++ b/flaml/nlp/README.md @@ -5,7 +5,7 @@ from flaml.nlp.autotransformers import AutoTransformers autohf = AutoTransformers() preparedata_setting = { - "dataset_subdataset_name": "glue:rte", + "dataset_subdataset_name": "glue:mrpc", "pretrained_model_size": "electra-base-discriminator:base", "data_root_path": "data/", "max_seq_length": 128, diff --git a/flaml/nlp/__init__.py b/flaml/nlp/__init__.py index 34444752e..5648f0cf4 100644 --- a/flaml/nlp/__init__.py +++ b/flaml/nlp/__init__.py @@ -1,2 +1,3 @@ -from flaml.nlp.autotransformers import AutoTransformers -from flaml.nlp.result_analysis.azure_utils import AzureUtils, JobID +from .hpo.hpo_searchspace import AutoHPOSearchSpace +from .autotransformers import AutoTransformers +from .result_analysis.azure_utils import AzureUtils, JobID diff --git a/flaml/nlp/autotransformers.py b/flaml/nlp/autotransformers.py index cb76ebe32..d369810fe 100644 --- a/flaml/nlp/autotransformers.py +++ b/flaml/nlp/autotransformers.py @@ -1,41 +1,21 @@ import json import os - -import torch -import transformers -import wandb - -from .dataset.dataprocess_auto import AutoEncodeText import numpy as np - -from ray.tune import CLIReporter - import time -import ray -import datasets -from datasets import load_dataset -from transformers.trainer_utils import IntervalStrategy, HPSearchBackend - -from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig, TrainingArguments - -from .dataset.metric_auto import get_default_and_alternative_metric -from .dataset.submission_auto import auto_output_prediction -from .dataset.task_auto import get_default_task -from .hpo.grid_searchspace_auto import AutoGridSearchSpace -from .hpo.hpo_searchspace import AutoHPOSearchSpace -from .huggingface.switch_head_auto import AutoSeqClassificationHead, MODEL_CLASSIFICATION_HEAD_MAPPING -from .utils import PathUtils, _variable_override_default_alternative -from .hpo.searchalgo_auto import AutoSearchAlgorithm -from .hpo.scheduler_auto import AutoScheduler -from .result_analysis.wandb_utils import WandbUtils -from .result_analysis.azure_utils import JobID -from .utils import load_console_args - -from .huggingface.trainer import TrainerForAutoTransformers - import logging -transformers.logging.set_verbosity_error() +try: + import ray + from transformers import TrainingArguments + import datasets + import torch +except ImportError: + print("To use the nlp component in flaml, run pip install flaml[nlp]") + +from .dataset.task_auto import get_default_task +from .result_analysis.azure_utils import JobID +from .huggingface.trainer import TrainerForAutoTransformers + logger = logging.getLogger(__name__) logger_formatter = logging.Formatter( '[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s', @@ -99,55 +79,25 @@ class AutoTransformers: def _set_search_space(self, **custom_hpo_args): - search_space_dict_hpo = search_space_dict_grid = None - if self.jobid_config.mod == "grid": - search_space_grid_json = AutoGridSearchSpace.from_model_and_dataset_name(self.jobid_config.pre, - self.jobid_config.presz, - self.get_full_data_name(), - self.jobid_config.subdat, "grid") - search_space_dict_grid \ - = AutoTransformers._convert_dict_to_ray_tune_space(search_space_grid_json, mode="grid") - search_space_dict_hpo = search_space_dict_grid - if self.jobid_config.mod != "grid" and self.jobid_config.mod != "gridbert": - search_space_hpo_json \ - = AutoHPOSearchSpace.from_model_and_dataset_name(logger, - self.jobid_config.spa, - self.jobid_config.pre, - self.jobid_config.presz, - self.get_full_data_name(), - self.jobid_config.subdat, - **custom_hpo_args) - search_space_dict_hpo = AutoTransformers._convert_dict_to_ray_tune_space(search_space_hpo_json, mode="hpo") - elif self.jobid_config.mod == "gridbert": - search_space_hpo_json = AutoGridSearchSpace.from_model_and_dataset_name( - "bert", - "base", - self.get_full_data_name(), - self.jobid_config.subdat, "grid") - search_space_dict_hpo = AutoTransformers._convert_dict_to_ray_tune_space(search_space_hpo_json, mode="grid") + from .hpo.hpo_searchspace import AutoHPOSearchSpace - """ - resolve the conflict in search_space_dict_hpo: only one of "max_steps" and "num_train_epochs" can exist - in the search space. If both exists, num_train_epochs is removed. Similarly, if "warmup_steps" and - "warmup_ratio" both exist, warmup_ratio is removed - """ - search_space_dict_hpo = TrainerForAutoTransformers.resolve_hp_conflict(search_space_dict_hpo) - self._search_space_hpo = search_space_dict_hpo - if self.jobid_config.mod == "grid": - search_space_dict_grid = TrainerForAutoTransformers.resolve_hp_conflict(search_space_dict_grid) - self._search_space_grid = search_space_dict_grid - else: - self._search_space_grid = None + search_space_hpo_json \ + = AutoHPOSearchSpace.from_model_and_dataset_name(self.jobid_config.spa, + self.jobid_config.pre, + self.jobid_config.presz, + self.jobid_config.dat, + self.jobid_config.subdat, + **custom_hpo_args) + self._search_space_hpo = AutoTransformers._convert_dict_to_ray_tune_space( + search_space_hpo_json, + mode=self.jobid_config.mod) - try: - self.ds_config = custom_hpo_args["ds_config"] - except KeyError: - self.ds_config = None - - def _wrapper(self, func, *args): # with star + @staticmethod + def _wrapper(func, *args): # with star return func(*args) - def _get_split_name(self, data_raw, fold_name=None): + @staticmethod + def _get_split_name(data_raw, fold_name=None): if fold_name: return fold_name fold_keys = data_raw.keys() @@ -157,7 +107,7 @@ class AutoTransformers: for each_split_name in {"train", "validation", "test"}: assert not (each_key.startswith(each_split_name) and each_key != each_split_name), \ "Dataset split must be within {}, must be explicitly specified in dataset_config, e.g.," \ - "'fold_name': ['train', 'validation_matched', 'test_matched']. Please refer to the example in the " \ + "'fold_name': ['train','validation_matched','test_matched']. Please refer to the example in the " \ "documentation of AutoTransformers.prepare_data()".format(",".join(fold_keys)) return "train", "validation", "test" @@ -187,28 +137,47 @@ class AutoTransformers: Args: server_name: - a string variable, which can be tmdev or azureml + A string variable, which can be tmdev or azureml data_root_path: - the root path for storing the checkpoints and output results, e.g., "data/" + The root path for storing the checkpoints and output results, e.g., "data/" jobid_config: - a JobID object describing the profile of job + A JobID object describing the profile of job wandb_utils: - a WandbUtils object for wandb operations + A WandbUtils object for wandb operations max_seq_length (optional): - max_seq_lckpt_per_epochength for the huggingface, this hyperparameter must be specified + Max_seq_lckpt_per_epochength for the huggingface, this hyperparameter must be specified at the data processing step resplit_portion: - the proportion for resplitting the train and dev data when split_mode="resplit". + The proportion for resplitting the train and dev data when split_mode="resplit". If args.resplit_mode = "rspt", resplit_portion is required + is_wandb_on: + A boolean variable indicating whether wandb is used ''' - console_args = load_console_args(**custom_data_args) + from .dataset.dataprocess_auto import AutoEncodeText + from transformers import AutoTokenizer + from datasets import load_dataset + from .utils import PathUtils + from .utils import load_dft_args + self._max_seq_length = max_seq_length self._server_name = server_name if server_name is not None else "tmdev" - self.jobid_config = jobid_config if jobid_config is not None else JobID(console_args) - self.wandb_utils = WandbUtils(is_wandb_on=is_wandb_on, - console_args=console_args, - jobid_config=self.jobid_config) - self.wandb_utils.set_wandb_per_run() + """ + loading the jobid config from console args + """ + console_args = load_dft_args() + self.jobid_config = JobID(console_args) + if jobid_config: + self.jobid_config = jobid_config + if len(custom_data_args) > 0: + self.jobid_config.set_jobid_from_console_args(console_args=custom_data_args) + if is_wandb_on: + from .result_analysis.wandb_utils import WandbUtils + self.wandb_utils = WandbUtils(is_wandb_on=is_wandb_on, + console_args=console_args, + jobid_config=self.jobid_config) + self.wandb_utils.set_wandb_per_run() + else: + self.wandb_utils = None self.path_utils = PathUtils(self.jobid_config, hpo_data_root_path=data_root_path) @@ -216,11 +185,14 @@ class AutoTransformers: assert resplit_portion, "If split mode is 'rspt', the resplit_portion must be provided. Please " \ "refer to the example in the documentation of AutoTransformers.prepare_data()" if self.jobid_config.subdat: - data_raw = load_dataset(self.get_full_data_name(), self.jobid_config.subdat) + data_raw = load_dataset(JobID.dataset_list_to_str(self.jobid_config.dat), + self.jobid_config.subdat) else: - data_raw = self._wrapper(load_dataset, *self.jobid_config.dat) + data_raw = AutoTransformers._wrapper(load_dataset, *self.jobid_config.dat) - self._train_name, self._dev_name, self._test_name = self._get_split_name(data_raw, fold_name=fold_name) + self._train_name, self._dev_name, self._test_name = AutoTransformers._get_split_name( + data_raw, + fold_name=fold_name) auto_tokentoids_config = {"max_seq_length": self._max_seq_length} self._tokenizer = AutoTokenizer.from_pretrained(self.jobid_config.pre_full, use_fast=True) @@ -228,7 +200,7 @@ class AutoTransformers: return AutoEncodeText.from_model_and_dataset_name( data_raw, self.jobid_config.pre_full, - self.get_full_data_name(), + self.jobid_config.dat, self.jobid_config.subdat, **auto_tokentoids_config) @@ -247,7 +219,8 @@ class AutoTransformers: if self.jobid_config.spt == "rspt": all_folds_from_source = [] assert "source" in resplit_portion.keys(), "Must specify the source for resplitting the dataset in" \ - "resplit_portion, which is a list of folder names, e.g., resplit_portion = {'source': ['train']}" + "resplit_portion, which is a list of folder names, e.g., " \ + "resplit_portion = {'source': ['train']}" source_fold_names = resplit_portion['source'] for each_fold_name in source_fold_names: @@ -279,8 +252,11 @@ class AutoTransformers: def _load_model(self, checkpoint_path=None, per_model_config=None): + from transformers import AutoConfig + from .huggingface.switch_head_auto import AutoSeqClassificationHead, MODEL_CLASSIFICATION_HEAD_MAPPING - this_task = get_default_task(self.get_full_data_name(), self.jobid_config.subdat) + this_task = get_default_task(self.jobid_config.dat, + self.jobid_config.subdat) if this_task == "seq-classification": self._num_labels = len(self.train_dataset.features["label"].names) elif this_task == "regression": @@ -290,6 +266,7 @@ class AutoTransformers: checkpoint_path = self.jobid_config.pre_full def get_this_model(): + from transformers import AutoModelForSequenceClassification return AutoModelForSequenceClassification.from_pretrained(checkpoint_path, config=model_config) def is_pretrained_model_in_classification_head_list(): @@ -331,15 +308,17 @@ class AutoTransformers: this_model.resize_token_embeddings(len(self._tokenizer)) return this_model elif this_task == "regression": - model_config = self._set_model_config(checkpoint_path, per_model_config, 1) + model_config_num_labels = 1 + model_config = _set_model_config() this_model = get_this_model() return this_model def _get_metric_func(self): - if self.get_full_data_name() in ("glue", "super_glue"): - metric = datasets.load.load_metric(self.get_full_data_name(), self.jobid_config.subdat) - elif self.get_full_data_name() in ("squad", "squad_v2"): - metric = datasets.load.load_metric(self.get_full_data_name()) + data_name = JobID.dataset_list_to_str(self.jobid_config.dat) + if data_name in ("glue", "super_glue"): + metric = datasets.load.load_metric(data_name, self.jobid_config.subdat) + elif data_name in ("squad", "squad_v2"): + metric = datasets.load.load_metric(data_name) else: metric = datasets.load.load_metric(self.metric_name) return metric @@ -366,6 +345,7 @@ class AutoTransformers: @staticmethod def _separate_config(config): + training_args_config = {} per_model_config = {} @@ -378,10 +358,12 @@ class AutoTransformers: return training_args_config, per_model_config def _objective(self, config, reporter, checkpoint_dir=None): - def model_init(): - return self._load_model() + from transformers import IntervalStrategy from transformers.trainer_utils import set_seed + + def model_init(): + return self._load_model() set_seed(config["seed"]) training_args_config, per_model_config = AutoTransformers._separate_config(config) @@ -404,7 +386,6 @@ class AutoTransformers: save_steps=ckpt_freq, save_total_limit=0, fp16=self._fp16, - deepspeed=self.ds_config, **training_args_config, ) @@ -423,10 +404,15 @@ class AutoTransformers: """ create a wandb run. If os.environ["WANDB_MODE"] == "offline", run = None """ - run = self.wandb_utils.set_wandb_per_trial() - if os.environ["WANDB_MODE"] == "online": + + if self.wandb_utils: + run = self.wandb_utils.set_wandb_per_trial() + import wandb for each_hp in config: wandb.log({each_hp: config[each_hp]}) + else: + run = None + trainer.train() trainer.evaluate(self.eval_dataset) """ @@ -466,6 +452,8 @@ class AutoTransformers: search_algo_name, search_algo_args_mode, **custom_hpo_args): + from .hpo.searchalgo_auto import AutoSearchAlgorithm + if search_algo_name in ("bs", "cfo"): self._verify_init_config(**custom_hpo_args) search_algo = AutoSearchAlgorithm.from_method_name( @@ -488,9 +476,6 @@ class AutoTransformers: assert len(subdirs) == 1, subdirs return subdirs[0] - def get_full_data_name(self): - return JobID.dataset_list_to_str(self.jobid_config.dat, "dat") - def _save_ckpt_json(self, best_ckpt): json.dump({"best_ckpt": best_ckpt}, @@ -517,11 +502,15 @@ class AutoTransformers: raise err def _set_metric(self, custom_metric_name=None, custom_metric_mode_name=None): - default_metric, default_mode, all_metrics, all_modes = get_default_and_alternative_metric( - self.get_full_data_name(), - subdataset_name=self.jobid_config.subdat, - custom_metric_name=custom_metric_name, - custom_metric_mode_name=custom_metric_mode_name) + from .dataset.metric_auto import get_default_and_alternative_metric + from .utils import _variable_override_default_alternative + + default_metric, default_mode, all_metrics, all_modes = \ + get_default_and_alternative_metric( + dataset_name_list=self.jobid_config.dat, + subdataset_name=self.jobid_config.subdat, + custom_metric_name=custom_metric_name, + custom_metric_mode_name=custom_metric_mode_name) _variable_override_default_alternative(logger, self, "metric_name", @@ -538,7 +527,8 @@ class AutoTransformers: self._all_modes = all_modes def _set_task(self): - self.task_name = get_default_task(self.get_full_data_name(), self.jobid_config.subdat) + self.task_name = get_default_task(self.jobid_config.dat, + self.jobid_config.subdat) def fit_hf(self, resources_per_trial, @@ -549,47 +539,46 @@ class AutoTransformers: _fp16=True, **custom_hpo_args ): + from transformers.trainer_utils import HPSearchBackend + '''Fine tuning the huggingface using HF's API Transformers.hyperparameter_search (for comparitive purpose). - Transformers.hyperparameter_search has the following disadvantages: - (1) it does not return tune.analysis.Analysis result, what is analysis used for - (2) it is inconvenient to develop on top of Transformers.hyperparameter_search, whose trainable function, - search space, etc. are defined inside of Transformers.hyperparameter_search. - - An example: - autohf_settings = {"resources_per_trial": {"cpu": 1}, - "num_samples": 1, - "time_budget": 100000, - "ckpt_per_epoch": 1, - "fp16": False, - } - validation_metric, analysis = autohf.fit(**autohf_settings,) - - Args: - resources_per_trial: - A dict showing the resources used by each trial, - e.g., {"gpu": 4, "cpu": 4} - num_samples: - An int variable of the maximum number of trials - time_budget: - An int variable of the maximum time budget - custom_metric_name: - A string of the dataset name or a function, - e.g., 'accuracy', 'f1', 'loss', - custom_metric_mode_name: - A string of the mode name, - e.g., "max", "min", "last", "all" - fp16: - boolean, default = True | whether to use fp16 - custom_hpo_args: - The additional keyword arguments, e.g., - custom_hpo_args = {"points_to_evaluate": [{ - "num_train_epochs": 1, - "per_device_train_batch_size": 128, }]} - - Returns: - validation_metric: - a dict storing the validation score - ''' + Transformers.hyperparameter_search has the following disadvantages: + (1) it does not return tune.analysis.Analysis result, what is analysis used for + (2) it is inconvenient to develop on top of Transformers.hyperparameter_search, whose trainable function, + search space, etc. are defined inside of Transformers.hyperparameter_search. + An example: + autohf_settings = {"resources_per_trial": {"cpu": 1}, + "num_samples": 1, + "time_budget": 100000, + "ckpt_per_epoch": 1, + "fp16": False, + } + validation_metric, analysis = autohf.fit(**autohf_settings,) + Args: + resources_per_trial: + A dict showing the resources used by each trial, + e.g., {"gpu": 4, "cpu": 4} + num_samples: + An int variable of the maximum number of trials + time_budget: + An int variable of the maximum time budget + custom_metric_name: + A string of the dataset name or a function, + e.g., 'accuracy', 'f1', 'loss', + custom_metric_mode_name: + A string of the mode name, + e.g., "max", "min", "last", "all" + fp16: + boolean, default = True | whether to use fp16 + custom_hpo_args: + The additional keyword arguments, e.g., + custom_hpo_args = {"points_to_evaluate": [{ + "num_train_epochs": 1, + "per_device_train_batch_size": 128, }]} + Returns: + validation_metric: + a dict storing the validation score + ''' def model_init(): return self._load_model() @@ -626,7 +615,7 @@ class AutoTransformers: best_run = trainer.hyperparameter_search( n_trials=num_samples, time_budget_s=time_budget, - hp_space=ray_hp_space, + # hp_space=ray_hp_space, backend=HPSearchBackend.RAY, resources_per_trial=resources_per_trial) duration = time.time() - start_time @@ -669,7 +658,8 @@ class AutoTransformers: ckpt_per_epoch=1, fp16=True, verbose=1, - resources_per_trial={"gpu": 1, "cpu": 1}, + resources_per_trial=None, + ray_local_mode=False, **custom_hpo_args): '''Fine tuning the huggingface using the hpo setting @@ -703,6 +693,8 @@ class AutoTransformers: messages fp16: boolean, default = True | whether to use fp16 + ray_local_mode: + boolean, default = False | whether to use the local mode (debugging mode) for ray tune.run custom_hpo_args: The additional keyword arguments, e.g., custom_hpo_args = {"points_to_evaluate": [{ @@ -716,13 +708,22 @@ class AutoTransformers: a ray.tune.analysis.Analysis object storing the analysis results from tune.run ''' + from .hpo.scheduler_auto import AutoScheduler + + """ + Specify the other parse of jobid configs from custom_hpo_args, e.g., if the search algorithm was not specified + previously, can specify the algorithm here + """ + if len(custom_hpo_args) > 0: + self.jobid_config.set_jobid_from_console_args(console_args=custom_hpo_args) + self._resources_per_trial = resources_per_trial self._set_metric(custom_metric_name, custom_metric_mode_name) self._set_task() self._fp16 = fp16 - ray.init(local_mode=True) - + ray.init(local_mode=ray_local_mode) self._set_search_space(**custom_hpo_args) + search_algo = self._get_search_algo(self.jobid_config.alg, self.jobid_config.arg, **custom_hpo_args) scheduler = AutoScheduler.from_scheduler_name(self.jobid_config.pru) self.ckpt_per_epoch = ckpt_per_epoch @@ -802,17 +803,16 @@ class AutoTransformers: test_trainer = TrainerForAutoTransformers(best_model, training_args) if self.jobid_config.spt == "ori": - try: + if "label" in self.test_dataset.keys(): self.test_dataset.remove_columns_("label") - except ValueError: - pass + print("Cleaning the existing label column from test data") test_dataloader = test_trainer.get_test_dataloader(self.test_dataset) predictions, labels, _ = test_trainer.prediction_loop(test_dataloader, description="Prediction") predictions = np.squeeze(predictions) \ - if get_default_task(self.get_full_data_name(), self.jobid_config.subdat) == "regression" \ + if get_default_task(self.jobid_config.dat, + self.jobid_config.subdat) == "regression" \ else np.argmax(predictions, axis=1) - torch.cuda.empty_cache() if self.jobid_config.spt == "rspt": assert labels is not None @@ -847,6 +847,11 @@ class AutoTransformers: Returns: the path of the output .zip file """ - return auto_output_prediction(self.get_full_data_name(), output_prediction_path, - output_zip_file_name, predictions, self.train_dataset, - self._dev_name, self.jobid_config.subdat) + from .dataset.submission_auto import auto_output_prediction + return auto_output_prediction(self.jobid_config.dat, + output_prediction_path, + output_zip_file_name, + predictions, + self.train_dataset, + self._dev_name, + self.jobid_config.subdat) diff --git a/flaml/nlp/dataset/dataprocess_auto.py b/flaml/nlp/dataset/dataprocess_auto.py index ab3c49e3a..058610963 100644 --- a/flaml/nlp/dataset/dataprocess_auto.py +++ b/flaml/nlp/dataset/dataprocess_auto.py @@ -178,7 +178,7 @@ class AutoEncodeText: def from_model_and_dataset_name(cls, data_raw, model_checkpoint_path, - dataset_name, + dataset_name_list: list = None, subdataset_name=None, **kwargs): """ @@ -193,8 +193,8 @@ class AutoEncodeText: model_checkpoint_path: A string variable which specifies the model path, e.g., "google/electra-base-discriminator" - dataset_name: - A string variable which is the dataset name, e.g., "glue" + dataset_name_list: + A list which is the dataset name, e.g., ["glue"] subdataset_name: A string variable which is the sub dataset name,e.g., "rte" @@ -208,6 +208,8 @@ class AutoEncodeText: >>> AutoEncodeText.from_model_and_dataset_name(data_raw, "google/electra-base-discriminator", ["glue"], "rte") """ + from ..result_analysis.azure_utils import JobID + dataset_name = JobID.dataset_list_to_str(dataset_name_list) if (dataset_name, subdataset_name) in TOKENIZER_MAPPING.keys(): this_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint_path, use_fast=True) token_func = TOKENIZER_MAPPING[(dataset_name, subdataset_name)] @@ -220,6 +222,6 @@ class AutoEncodeText: raise ValueError( "Unrecognized method {},{} for this kind of AutoGridSearchSpace: {}.\n" "Method name should be one of {}.".format( - dataset_name, subdataset_name, cls.__name__, ", ".join(c.__name__ for c in TOKENIZER_MAPPING.keys()) + dataset_name, subdataset_name, cls.__name__, ", ".join(c[0] for c in TOKENIZER_MAPPING.keys()) ) ) diff --git a/flaml/nlp/dataset/metric_auto.py b/flaml/nlp/dataset/metric_auto.py index 6dbb35524..c0210c32b 100644 --- a/flaml/nlp/dataset/metric_auto.py +++ b/flaml/nlp/dataset/metric_auto.py @@ -1,5 +1,6 @@ # https://github.com/huggingface/datasets/blob/master/metrics/glue/glue.py from collections import OrderedDict +import typing metric_mode_mapping_glue = { "cola": [("matthews_correlation", "max")], @@ -42,10 +43,12 @@ METRIC_MAPPING = OrderedDict( ) -def get_default_and_alternative_metric(dataset_name, +def get_default_and_alternative_metric(dataset_name_list: typing.List, subdataset_name=None, custom_metric_name=None, custom_metric_mode_name=None): + from ..result_analysis.azure_utils import JobID + dataset_name = JobID.dataset_list_to_str(dataset_name_list) if dataset_name not in METRIC_MAPPING.keys(): assert custom_metric_name and custom_metric_mode_name, \ "The dataset is not in {}, you must explicitly specify " \ diff --git a/flaml/nlp/dataset/submission_auto.py b/flaml/nlp/dataset/submission_auto.py index 667419e35..2c63337b0 100644 --- a/flaml/nlp/dataset/submission_auto.py +++ b/flaml/nlp/dataset/submission_auto.py @@ -42,12 +42,13 @@ test_size_glue = { } -def output_prediction_glue(output_path, output_dir_name, predictions, train_data, dev_name, subdataset_name): - output_dir = os.path.join(output_path, output_dir_name) +def output_prediction_glue(output_path, zip_file_name, predictions, train_data, dev_name, subdataset_name): + output_dir = os.path.join(output_path, zip_file_name) if os.path.exists(output_dir): assert os.path.isdir(output_dir) else: - os.mkdir(output_dir) + import pathlib + pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) if subdataset_name != "stsb": label_list = train_data.features["label"].names @@ -81,8 +82,8 @@ def output_prediction_glue(output_path, output_dir_name, predictions, train_data else: writer.write(f"{index}\t{item:3.3f}\n") - shutil.make_archive(os.path.join(output_path, output_dir_name), 'zip', output_dir) - return os.path.join(output_path, output_dir_name + ".zip") + shutil.make_archive(os.path.join(output_path, zip_file_name), 'zip', output_dir) + return os.path.join(output_path, zip_file_name + ".zip") OUTPUT_PREDICTION_MAPPING = OrderedDict( @@ -92,16 +93,18 @@ OUTPUT_PREDICTION_MAPPING = OrderedDict( ) -def auto_output_prediction(dataset_name, +def auto_output_prediction(dataset_name_list: list, output_path, - output_dir_name, + zip_file_name, predictions, train_data, dev_name, subset_name): + from ..result_analysis.azure_utils import JobID + dataset_name = JobID.dataset_list_to_str(dataset_name_list) if dataset_name in OUTPUT_PREDICTION_MAPPING.keys(): return OUTPUT_PREDICTION_MAPPING[dataset_name](output_path, - output_dir_name, + zip_file_name, predictions, train_data, dev_name, diff --git a/flaml/nlp/dataset/task_auto.py b/flaml/nlp/dataset/task_auto.py index 71419a463..bdc3335f6 100644 --- a/flaml/nlp/dataset/task_auto.py +++ b/flaml/nlp/dataset/task_auto.py @@ -30,7 +30,9 @@ TASK_MAPPING = OrderedDict( ) -def get_default_task(dataset_name, subdataset_name=None): +def get_default_task(dataset_name_list: list, subdataset_name=None): + from ..result_analysis.azure_utils import JobID + dataset_name = JobID.dataset_list_to_str(dataset_name_list) assert dataset_name in TASK_MAPPING.keys(), "The dataset is not in {}, you must explicitly specify " \ "the custom_metric_name and custom_metric_mode_name".format( ",".join(TASK_MAPPING.keys())) diff --git a/flaml/nlp/hpo/get_grid_search_space.py b/flaml/nlp/hpo/get_grid_search_space.py index c6b1f0d1d..1fef1e616 100644 --- a/flaml/nlp/hpo/get_grid_search_space.py +++ b/flaml/nlp/hpo/get_grid_search_space.py @@ -1,8 +1,9 @@ # lookup table for the grid configs in each pre-trained language huggingface for different tasks -import copy -def get_space_union_and_unique(search_space_common, search_space_unique, this_case_tags: list): +def get_space_union_and_unique(search_space_common, + search_space_unique, + this_case_tags: list): """ get the recommended search configs for each pre-trained language models @@ -37,7 +38,7 @@ def get_space_union_and_unique(search_space_common, search_space_unique, this_ca def get_deberta_space(model_size_type=None, - dataset_name=None, + dataset_name_list: list = None, subdataset_name=None, algo_mode=None): """ @@ -64,18 +65,17 @@ def get_deberta_space(model_size_type=None, def get_longformer_space(model_size_type=None, - dataset_name=None, + dataset_name_list: list = None, subdataset_name=None, algo_mode=None): """ TODO: Longformer: The Long-Document Transformer """ - if dataset_name == "glue": - return + return def get_funnel_space(model_size_type=None, - dataset_name=None, + dataset_name_list: list = None, subdataset_name=None, algo_mode=None): """ @@ -154,11 +154,13 @@ def get_funnel_space(model_size_type=None, } from ..result_analysis.azure_utils import JobID return get_space_union_and_unique(search_space_common, search_space_unique, - [JobID.get_full_data_name(dataset_name, subdataset_name)]) + [JobID.get_full_data_name( + dataset_name_list, + subdataset_name)]) def get_bert_space(model_size_type=None, - dataset_name=None, + dataset_name_list: list = None, subdataset_name=None, algo_mode=None): """ @@ -203,11 +205,13 @@ def get_bert_space(model_size_type=None, "num_train_epochs": [2, 3, 4], } } - return get_space_union_and_unique(search_space_common, search_space_unique, [dataset_name]) + return get_space_union_and_unique(search_space_common, + search_space_unique, + dataset_name_list) def get_roberta_space(model_size_type=None, - dataset_name=None, + dataset_name_list: list = None, subdataset_name=None, algo_mode=None): # RoBERTa: A Robustly Optimized BERT Pretraining Approach @@ -241,11 +245,13 @@ def get_roberta_space(model_size_type=None, "num_train_epochs": [2], } } - return get_space_union_and_unique(search_space_common, search_space_unique, [dataset_name]) + return get_space_union_and_unique(search_space_common, + search_space_unique, + dataset_name_list) def get_electra_space(model_size_type=None, - dataset_name=None, + dataset_name_list: list = None, subdataset_name=None, algo_mode=None): """ @@ -255,8 +261,7 @@ def get_electra_space(model_size_type=None, assert model_size_type in ("small", "base", "large", "intermediate", "xlarge"), \ "Electra paper has only provided hyperparameter for the small and base huggingface" search_space_common = { - "learning_rate": [3e-5, 5e-5, 1e-4, 1.5e-4] if algo_mode == "grid" - else [3e-5, 5e-5, 1e-4, 1.5e-4, 2e-4, 3e-4, 5e-3], + "learning_rate": [3e-5, 5e-5, 1e-4, 1.5e-4], "weight_decay": [0.0], "adam_epsilon": [1e-6], "warmup_ratio": [0.1], @@ -282,7 +287,7 @@ def get_electra_space(model_size_type=None, "num_train_epochs": [3], }, "glue_mrpc": { - "num_train_epochs": [3], + "num_train_epochs": [0.2], }, "glue_cola": { "num_train_epochs": [3], @@ -302,11 +307,13 @@ def get_electra_space(model_size_type=None, } from ..result_analysis.azure_utils import JobID return get_space_union_and_unique(search_space_common, search_space_unique, - [JobID.get_full_data_name(dataset_name, subdataset_name), model_size_type]) + [JobID.get_full_data_name( + dataset_name_list, + subdataset_name), model_size_type]) def get_mobilebert_space(model_size_type=None, - dataset_name=None, + dataset_name_list: list = None, subdataset_name=None, algo_mode=None): """ @@ -326,7 +333,7 @@ def get_mobilebert_space(model_size_type=None, def get_albert_space(model_size_type=None, - dataset_name=None, + dataset_name_list: list = None, subdataset_name=None, algo_mode=None): """ @@ -453,4 +460,6 @@ def get_albert_space(model_size_type=None, # rates ((1-10) * e-5), and the number of epochs (2-10) from ..result_analysis.azure_utils import JobID return get_space_union_and_unique(search_space_common, search_space_unique, - [JobID.get_full_data_name(dataset_name, subdataset_name)]) + [JobID.get_full_data_name( + dataset_name_list, + subdataset_name)]) diff --git a/flaml/nlp/hpo/grid_searchspace_auto.py b/flaml/nlp/hpo/grid_searchspace_auto.py index 654f569f2..3e420bd6f 100644 --- a/flaml/nlp/hpo/grid_searchspace_auto.py +++ b/flaml/nlp/hpo/grid_searchspace_auto.py @@ -6,7 +6,9 @@ from .get_grid_search_space import \ get_roberta_space, get_funnel_space, get_deberta_space, - get_albert_space + get_albert_space, + get_longformer_space, + get_mobilebert_space ) GRID_SEARCH_SPACE_MAPPING = OrderedDict( @@ -17,6 +19,8 @@ GRID_SEARCH_SPACE_MAPPING = OrderedDict( ("funnel", get_funnel_space), ("deberta", get_deberta_space), ("albert", get_albert_space), + ("mobilebert", get_mobilebert_space), + ("longformer", get_longformer_space) ] ) @@ -53,7 +57,7 @@ class AutoGridSearchSpace: def from_model_and_dataset_name(cls, model_type, model_size_type, - dataset_name, + dataset_name_list: list = None, subdataset_name=None, algo_mode=None): """ @@ -67,7 +71,7 @@ class AutoGridSearchSpace: model_size_type: A string variable which is the size of the model, e.g., "small" - dataset_name: + dataset_name_list: A string variable which is the dataset name, e.g., "glue" subdataset_name: @@ -77,17 +81,17 @@ class AutoGridSearchSpace: A string variable which is the algorithm mode for grid search, e.g., "gridbert" Example: - >>> AutoGridSearchSpace.from_model_and_dataset_name("electra", "small", "glue", "rte", "grid") + >>> AutoGridSearchSpace.from_model_and_dataset_name("electra", "small", ["glue"], "rte", "grid") """ if model_type in GRID_SEARCH_SPACE_MAPPING.keys(): this_model_recommended_space = GRID_SEARCH_SPACE_MAPPING[model_type]( - model_size_type, dataset_name, subdataset_name, algo_mode) + model_size_type, dataset_name_list, subdataset_name, algo_mode) return this_model_recommended_space raise ValueError( "Unrecognized method {},{} for this kind of AutoGridSearchSpace: {}.\n" "Method name should be one of {}.".format( - model_type, dataset_name, cls.__name__, ", ".join(c.__name__ for c in GRID_SEARCH_SPACE_MAPPING.keys()) + model_type, dataset_name_list, cls.__name__, ", ".join(GRID_SEARCH_SPACE_MAPPING.keys()) ) ) diff --git a/flaml/nlp/hpo/hpo_searchspace.py b/flaml/nlp/hpo/hpo_searchspace.py index b19dac369..8b8d107e8 100644 --- a/flaml/nlp/hpo/hpo_searchspace.py +++ b/flaml/nlp/hpo/hpo_searchspace.py @@ -1,29 +1,32 @@ from collections import OrderedDict - -from ..huggingface.trainer import TrainerForAutoTransformers -from ray import tune -from transformers import TrainingArguments - from .grid_searchspace_auto import AutoGridSearchSpace -def hpo_space_custom(**custom_hpo_args): +def hpo_space_custom(model_type=None, + model_size_type=None, + dataset_name_list: list = None, + subdataset_name=None, + algo_mode=None, + **custom_hpo_args): + """ + The 5 arguments here cannot be deleted, they need to be kept consistent with + other functions in HPO_SEARCH_SPACE_MAPPING + """ assert "hpo_space" in custom_hpo_args custom_search_space = custom_hpo_args["hpo_space"] return custom_search_space -def bounded_gridunion(logger=None, - model_type=None, +def bounded_gridunion(model_type=None, model_size_type=None, - dataset_name=None, + dataset_name_list: list = None, subdataset_name=None, + algo_mode=None, **custom_hpo_args): assert "bound" in custom_hpo_args - gridunion_space = HPO_SEARCH_SPACE_MAPPING["uni"](logger, - model_type, + gridunion_space = HPO_SEARCH_SPACE_MAPPING["uni"](model_type, model_size_type, - dataset_name, + dataset_name_list, subdataset_name, **custom_hpo_args) for each_key in custom_hpo_args["bound"].keys(): @@ -50,29 +53,30 @@ def bounded_gridunion(logger=None, return gridunion_space -def hpo_space_gridunion(logger=None, - model_type=None, +def hpo_space_gridunion(model_type=None, model_size_type=None, - dataset_name=None, + dataset_name_list: list = None, subdataset_name=None, + algo_mode=None, **custom_hpo_args): output_config = {} - for each_model_type in {"electra", "roberta", "bert"}: + for each_model_type in ["bert", "roberta", "electra"]: # if each_model_type == model_type: continue this_config = AutoGridSearchSpace.from_model_and_dataset_name( - each_model_type, model_size_type, dataset_name, subdataset_name, "hpo") + each_model_type, model_size_type, dataset_name_list, subdataset_name, "hpo") from ..utils import merge_dicts output_config = merge_dicts(output_config, this_config) default_values = {} """ adding the default configuration from transformers/training_args.py into hpo space """ + from transformers import TrainingArguments training_args = TrainingArguments(output_dir=".") for each_hp in output_config.keys(): try: default_values[each_hp] = [getattr(training_args, each_hp)] except AttributeError: - pass + print("training args do not contain {}, passed".format(each_hp)) output_config = merge_dicts(output_config, default_values) @@ -80,27 +84,26 @@ def hpo_space_gridunion(logger=None, def hpo_space_gridunion_smoke_test( - logger=None, model_type=None, model_size_type=None, - dataset_name=None, + dataset_name_list: list = None, subdataset_name=None, + algo_mode=None, **custom_hpo_args): - return {'learning_rate': [1e-5], - 'weight_decay': [0.0], - 'adam_epsilon': [1e-08], - 'warmup_ratio': [0.1], - 'per_device_train_batch_size': [2], - 'hidden_dropout_prob': [0.1], - 'attention_probs_dropout_prob': [0.1], - 'num_train_epochs': [0.1]} + return { + "learning_rate": {"l": 1e-6, "u": 1e-3, "space": "log"}, + "num_train_epochs": [0.01], + "per_device_train_batch_size": [2], + "warmup_ratio": {"l": 0.0, "u": 0.3, "space": "linear"}, + "weight_decay": {"l": 0.0, "u": 0.3, "space": "linear"} + } -def hpo_space_generic(logger=None, - model_type=None, +def hpo_space_generic(model_type=None, model_size_type=None, - dataset_name=None, + dataset_name_list: list = None, subdataset_name=None, + algo_mode=None, **custom_hpo_args): output_config = { "learning_rate": {"l": 1e-6, "u": 1e-3, "space": "log"}, @@ -112,11 +115,11 @@ def hpo_space_generic(logger=None, return output_config -def hpo_space_generic_grid(logger=None, - model_type=None, +def hpo_space_generic_grid(model_type=None, model_size_type=None, - dataset_name=None, + dataset_name_list: list = None, subdataset_name=None, + algo_mode=None, **custom_hpo_args): output_config = { "learning_rate": [1e-5, 2e-5, 3e-5, 4e-5, 5e-5, 1e-4, 1.5e-4], @@ -128,14 +131,14 @@ def hpo_space_generic_grid(logger=None, return output_config -def hpo_space_small(logger=None, - model_type=None, +def hpo_space_small(model_type=None, model_size_type=None, - dataset_name=None, + dataset_name_list: list = None, subdataset_name=None, + algo_mode=None, **custom_hpo_args): config_json = AutoGridSearchSpace.from_model_and_dataset_name( - model_type, model_size_type, dataset_name, subdataset_name, "hpo") + model_type, model_size_type, dataset_name_list, subdataset_name, "hpo") output_config = {} for each_hp in config_json.keys(): @@ -158,13 +161,28 @@ def hpo_space_small(logger=None, return output_config +def hpo_space_grid(model_type=None, + model_size_type=None, + dataset_name_list: list = None, + subdataset_name=None, + algo_mode=None, + **custom_hpo_args): + return AutoGridSearchSpace.from_model_and_dataset_name(model_type, + model_size_type, + dataset_name_list, + subdataset_name, + algo_mode + ) + + HPO_SEARCH_SPACE_MAPPING = OrderedDict( [ + ("grid", hpo_space_grid), ("uni", hpo_space_gridunion), ("gnr", hpo_space_generic), ("uni_test", hpo_space_gridunion_smoke_test), ("cus", hpo_space_custom), - ("buni", bounded_gridunion) + ("buni", bounded_gridunion), ] ) @@ -181,29 +199,33 @@ class AutoHPOSearchSpace: def __init__(self): raise EnvironmentError( "AutoHPOSearchSpace is designed to be instantiated " - "using the `AutoHPOSearchSpace.from_config_and_method_name(cls, logger,hpo_searchspace_name," - "model_type,model_size_type,dataset_name,subdataset_name = None,**custom_hpo_args)` methods." + "using the `AutoHPOSearchSpace.from_config_and_method_name(cls, hpo_searchspace_name," + "model_type,model_size_type,dataset_name,subdataset_name=None,**custom_hpo_args)` methods." ) @classmethod def from_model_and_dataset_name(cls, - logger, hpo_searchspace_mode, model_type, model_size_type, - dataset_name, + dataset_name_list: list = None, subdataset_name=None, + algo_mode=None, **custom_hpo_args): """ Instantiate one of the classes for getting the hpo search space from the search space name, model type, model size type, dataset name and sub dataset name Args: - logger: - Reference to the logger hpo_searchspace_mode: - A string variable which is name of the hpo search space, e.g., "uni" + A string variable which is the mode of the hpo search space, it must be chosen from the following options: + - uni: the union of BERT, RoBERTa and Electra's grid configs + - grid: the recommended grid config of the LM specified in jobconfig.pre + - gnr: the generic continuous search space + - uni_test: the search space for smoke test + - cus: user customized search space, specified in the "hpo_space" argument in AutoTransformers.fit + - buni: bounded grid union search space model_type: A string variable which is the type of the model, e.g., "electra" @@ -221,22 +243,22 @@ class AutoHPOSearchSpace: Any additional keyword argument to be used for the function for the HPO search space Example: - >>> AutoHPOSearchSpace.from_model_and_dataset_name(logger, "uni", "electra", "small", "glue", "rte") + >>> AutoHPOSearchSpace.from_model_and_dataset_name("uni", "electra", "small", ["glue"], "rte", "hpo") """ if hpo_searchspace_mode in HPO_SEARCH_SPACE_MAPPING.keys(): hpo_space = HPO_SEARCH_SPACE_MAPPING[hpo_searchspace_mode]( - logger, model_type, model_size_type, - dataset_name, + dataset_name_list, subdataset_name, + algo_mode, **custom_hpo_args) return hpo_space raise ValueError( "Unrecognized method {},{} for this kind of AutoHPOSearchSpace: {}.\n" "Method name should be one of {}.".format( - hpo_searchspace_mode, dataset_name, cls.__name__, - ", ".join(c.__name__ for c in HPO_SEARCH_SPACE_MAPPING.keys()) + hpo_searchspace_mode, dataset_name_list, cls.__name__, + ", ".join(HPO_SEARCH_SPACE_MAPPING.keys()) ) ) diff --git a/flaml/nlp/hpo/scheduler_auto.py b/flaml/nlp/hpo/scheduler_auto.py index 7e54afb65..eca680de1 100644 --- a/flaml/nlp/hpo/scheduler_auto.py +++ b/flaml/nlp/hpo/scheduler_auto.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from ray.tune.schedulers import ASHAScheduler, HyperBandScheduler, MedianStoppingRule +from ray.tune.schedulers import ASHAScheduler, HyperBandScheduler SCHEDULER_MAPPING = OrderedDict( [ @@ -37,15 +37,13 @@ class AutoScheduler: Example: >>> AutoScheduler.from_scheduler_name("asha") """ - if scheduler_name in SCHEDULER_MAPPING.keys(): - try: - return SCHEDULER_MAPPING[scheduler_name](**kwargs) - except TypeError: + if SCHEDULER_MAPPING[scheduler_name] is None: return None + return SCHEDULER_MAPPING[scheduler_name](**kwargs) raise ValueError( "Unrecognized scheduler {} for this kind of AutoScheduler: {}.\n" "Scheduler name should be one of {}.".format( - scheduler_name, cls.__name__, ", ".join(c.__name__ for c in SCHEDULER_MAPPING.keys()) + scheduler_name, cls.__name__, ", ".join(SCHEDULER_MAPPING.keys()) ) ) diff --git a/flaml/nlp/hpo/searchalgo_auto.py b/flaml/nlp/hpo/searchalgo_auto.py index eec7d8e3a..5cdf97888 100644 --- a/flaml/nlp/hpo/searchalgo_auto.py +++ b/flaml/nlp/hpo/searchalgo_auto.py @@ -3,6 +3,7 @@ from collections import OrderedDict import ray from ray.tune.suggest.optuna import OptunaSearch + from flaml import CFO, BlendSearch SEARCH_ALGO_MAPPING = OrderedDict( @@ -55,7 +56,7 @@ class AutoSearchAlgorithm: Example: >>> from flaml.nlp.hpo.hpo_searchspace import AutoHPOSearchSpace - >>> search_space_hpo=AutoHPOSearchSpace.from_model_and_dataset_name(logger, "uni", "electra", "small", "glue", "rte") + >>> search_space_hpo=AutoHPOSearchSpace.from_model_and_dataset_name("uni", "electra", "small", ["glue"], "rte") >>> search_algo = AutoSearchAlgorithm.from_method_name("bs", "cus", search_space_hpo, {"points_to_evaluate": [{"learning_rate": 1e-5, "num_train_epochs": 10}]) """ @@ -64,40 +65,39 @@ class AutoSearchAlgorithm: if not search_algo_name: search_algo_name = "grid" if search_algo_name in SEARCH_ALGO_MAPPING.keys(): - try: - """ - filtering the customized args for hpo from custom_hpo_args, keep those - which are in the input variable name list of the constructor of - the algorithm, remove those which does not appear in the input variables - of the constructor function - """ - this_search_algo_kwargs = None - allowed_arguments = SEARCH_ALGO_MAPPING[search_algo_name].__init__.__code__.co_varnames - allowed_custom_args = {key: custom_hpo_args[key] for key in custom_hpo_args.keys() if - key in allowed_arguments} - - """ - If the search_algo_args_mode is "dft", set the args to the default args, e.g.,the default args for - BlendSearch is "low_cost_partial_config": {"num_train_epochs": min_epoch,"per_device_train_batch_size" - : max(hpo_search_space["per_device_train_batch_size"].categories)}, - """ - if search_algo_args_mode == "dft": - this_search_algo_kwargs = DEFAULT_SEARCH_ALGO_ARGS_MAPPING[search_algo_name]( - "dft", hpo_search_space=hpo_search_space, **allowed_custom_args) - elif search_algo_args_mode == "cus": - this_search_algo_kwargs = DEFAULT_SEARCH_ALGO_ARGS_MAPPING[search_algo_name]( - "cus", hpo_search_space=hpo_search_space, **allowed_custom_args) - - """ - returning the hpo algorithm with the arguments - """ - return SEARCH_ALGO_MAPPING[search_algo_name](**this_search_algo_kwargs) - except KeyError: + if search_algo_name == "grid": return None + """ + filtering the customized args for hpo from custom_hpo_args, keep those + which are in the input variable name list of the constructor of + the algorithm, remove those which does not appear in the input variables + of the constructor function + """ + this_search_algo_kwargs = None + allowed_arguments = SEARCH_ALGO_MAPPING[search_algo_name].__init__.__code__.co_varnames + allowed_custom_args = {key: custom_hpo_args[key] for key in custom_hpo_args.keys() if + key in allowed_arguments} + + """ + If the search_algo_args_mode is "dft", set the args to the default args, e.g.,the default args for + BlendSearch is "low_cost_partial_config": {"num_train_epochs": min_epoch,"per_device_train_batch_size" + : max(hpo_search_space["per_device_train_batch_size"].categories)}, + """ + if search_algo_args_mode == "dft": + this_search_algo_kwargs = DEFAULT_SEARCH_ALGO_ARGS_MAPPING[search_algo_name]( + "dft", hpo_search_space=hpo_search_space, **allowed_custom_args) + elif search_algo_args_mode == "cus": + this_search_algo_kwargs = DEFAULT_SEARCH_ALGO_ARGS_MAPPING[search_algo_name]( + "cus", hpo_search_space=hpo_search_space, **allowed_custom_args) + + """ + returning the hpo algorithm with the arguments + """ + return SEARCH_ALGO_MAPPING[search_algo_name](**this_search_algo_kwargs) raise ValueError( "Unrecognized method {} for this kind of AutoSearchAlgorithm: {}.\n" "Method name should be one of {}.".format( - search_algo_name, cls.__name__, ", ".join(c.__name__ for c in SEARCH_ALGO_MAPPING.keys()) + search_algo_name, cls.__name__, ", ".join(SEARCH_ALGO_MAPPING.keys()) ) ) diff --git a/flaml/nlp/huggingface/__init__.py b/flaml/nlp/huggingface/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/flaml/nlp/huggingface/switch_head_auto.py b/flaml/nlp/huggingface/switch_head_auto.py index d92f12fc3..60bf2e8fd 100644 --- a/flaml/nlp/huggingface/switch_head_auto.py +++ b/flaml/nlp/huggingface/switch_head_auto.py @@ -45,8 +45,8 @@ class AutoSeqClassificationHead: if model_type in MODEL_CLASSIFICATION_HEAD_MAPPING.keys(): return MODEL_CLASSIFICATION_HEAD_MAPPING[model_type](config) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" + "Unrecognized configuration class {} for class {}.\n" "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_CLASSIFICATION_HEAD_MAPPING.keys()) + config.__class__, cls.__name__, ", ".join(MODEL_CLASSIFICATION_HEAD_MAPPING.keys()) ) ) diff --git a/flaml/nlp/huggingface/trainer.py b/flaml/nlp/huggingface/trainer.py index 80a25d2ad..ce2c2c438 100644 --- a/flaml/nlp/huggingface/trainer.py +++ b/flaml/nlp/huggingface/trainer.py @@ -1,14 +1,6 @@ -import copy import os - import transformers -from ray import tune -import torch -from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR - -transformers.logging.set_verbosity_error() - class TrainerForAutoTransformers(transformers.Trainer): """ @@ -18,12 +10,6 @@ class TrainerForAutoTransformers(transformers.Trainer): huggingface (:class:`~transformers.PreTrainedModel` or :obj:`torch.nn.Module`, `optional`): """ - def get_optimizers( - self, num_training_steps - ): - self.current_optimizer, self.current_scheduler = super().get_optimizers(num_training_steps) - return (self.current_optimizer, self.current_scheduler) - def evaluate(self, eval_dataset=None): """ @@ -33,7 +19,8 @@ class TrainerForAutoTransformers(transformers.Trainer): eval_dataset: the dataset to be evaluated """ - import wandb + from ray import tune + eval_dataloader = self.get_eval_dataloader(eval_dataset) output = self.prediction_loop( eval_dataloader, description="Evaluation") @@ -53,6 +40,10 @@ class TrainerForAutoTransformers(transformers.Trainer): Overriding transformers.Trainer.save_state. It is only through saving the states can best_trial.get_best_checkpoint return a non-empty value. """ + import torch + from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR + from ray import tune + with tune.checkpoint_dir(step=self.state.global_step) as checkpoint_dir: self.args.output_dir = checkpoint_dir # This is the directory name that Huggingface requires. @@ -111,11 +102,3 @@ class TrainerForAutoTransformers(transformers.Trainer): per_device_train_batch_size, device_count) return float(warmup_steps / max_steps) - - @staticmethod - def resolve_hp_conflict(search_space_dict): - if "max_steps" in search_space_dict and "num_train_epochs" in search_space_dict: - del search_space_dict["num_train_epochs"] - if "warmup_ratio" in search_space_dict and "warmup_steps" in search_space_dict: - del search_space_dict["warmup_ratio"] - return search_space_dict diff --git a/flaml/nlp/result_analysis/azure_utils.py b/flaml/nlp/result_analysis/azure_utils.py index 7be3e5fd4..6315a1327 100644 --- a/flaml/nlp/result_analysis/azure_utils.py +++ b/flaml/nlp/result_analysis/azure_utils.py @@ -1,14 +1,60 @@ import re import pathlib import os -from azure.storage.blob import BlobServiceClient, ContainerClient -from transformers import AutoConfig - -from ..utils import get_wandb_azure_key from datetime import datetime from dataclasses import dataclass, field -from ..hpo.grid_searchspace_auto import HF_MODEL_LIST import json +from typing import Tuple, List, Union, Optional +import argparse + + +class ConfigScore: + trial_id: str = field(default=None) + start_time: float = field(default=None) + last_update_time: float = field(default=None) + config: dict = field(default=None) + metric_score: dict = field(default=None) + time_stamp: float = field(default=None) + + def __init__(self, + trial_id: str = None, + start_time: float = None, + last_update_time: float = None, + config: dict = None, + metric_score: dict = None, + time_stamp: float = None + ): + self.trial_id = trial_id + self.start_time = start_time + self.last_update_time = last_update_time + self.config = config + self.metric_score = metric_score + self.time_stamp = time_stamp + + +class ConfigScoreList: + + def __init__(self, + config_score_list: List[ConfigScore], + jobid_config=None, + blob_file=None, + ): + self._config_score_list = config_score_list + self._blob_file = blob_file + self._jobid_config = jobid_config + + def sorted(self, sort_method="unsorted", metric_mode="max"): + if sort_method == "unsorted": + self._config_score_list = self._config_score_list + elif sort_method == "sort_time": + self._config_score_list = sorted(self._config_score_list, key=lambda x: x.start_time, reverse=False) + else: + self._config_score_list = sorted(self._config_score_list, + key=lambda x: getattr(x, "metric_score")[metric_mode], reverse=True) + + def get_best_config(self, + metric_mode="max"): + return max(self._config_score_list, key=lambda x: getattr(x, "metric_score")[metric_mode]) @dataclass @@ -27,6 +73,8 @@ class JobID: rep: int = field(default=0) sddt: int = field(default=None) sdhf: int = field(default=None) + var1: Optional[float] = field(default=None) + var2: Optional[float] = field(default=None) def __init__(self, console_args=None): @@ -41,7 +89,7 @@ class JobID: self.subdat = "mrpc" self.mod = "hpo" self.spa = "uni_test" - self.arg = "dft" + self.arg = "cus" self.alg = "bs" self.pru = "None" self.pre_full = "google/mobilebert-uncased" @@ -51,6 +99,8 @@ class JobID: self.rep = 0 self.sddt = 43 self.sdhf = 42 + self.var1 = None + self.var2 = None def is_match(self, partial_jobid): """ @@ -90,7 +140,7 @@ class JobID: preparing for the job ID for wandb """ field_dict = self.__dict__ - keytoval_str = "_".join([JobID.dataset_list_to_str(field_dict[key], key) + keytoval_str = "_".join([JobID.dataset_list_to_str(field_dict[key]) if type(field_dict[key]) == list else str(field_dict[key]) for key in field_dict.keys() if not key.endswith("_full")]) @@ -102,7 +152,7 @@ class JobID: """ list_keys = list(JobID.__dataclass_fields__.keys()) field_dict = self.__dict__ - keytoval_str = "_".join([key + "=" + JobID.dataset_list_to_str(field_dict[key], key) + keytoval_str = "_".join([key + "=" + JobID.dataset_list_to_str(field_dict[key]) if type(field_dict[key]) == list else key + "=" + str(field_dict[key]) for key in list_keys if not key.endswith("_full")]) @@ -114,7 +164,7 @@ class JobID: """ list_keys = list(JobID.__dataclass_fields__.keys()) field_dict = self.__dict__ # field_dict contains fields whose values are not None - keytoval_str = "_".join([key + "=" + JobID.dataset_list_to_str(field_dict[key], key) + keytoval_str = "_".join([key + "=" + JobID.dataset_list_to_str(field_dict[key]) if type(field_dict[key]) == list else key + "=" + str(field_dict[key]) for key in list_keys if key in field_dict.keys()]) @@ -131,9 +181,20 @@ class JobID: pre = 'funnel', presz = 'xlarge', spt = 'rspt', rep = 0, sddt = 43, sdhf = 42) """ - field_keys = [key for key in - list(JobID.__dataclass_fields__.keys()) if not key.endswith("_full")] - regex_expression = ".*" + "_".join([key + "=(?P<" + key + ">.*)" for key in field_keys]) + ".(json|zip)" + field_keys = [key for key in list(JobID.__dataclass_fields__.keys()) if not key.endswith("_full")] + regex_expression = ".*" + is_first = True + for key in field_keys: + if is_first: + prefix = "" + is_first = False + else: + prefix = "_" + if key.startswith("sd") or key.startswith("var"): + regex_expression += "(" + prefix + key + "=(?P<" + key + ">[^_]*))?" + else: + regex_expression += prefix + key + "=(?P<" + key + ">[^_]*)" + regex_expression += ".(json|zip)" result = re.search(regex_expression, keytoval_str) if result: result_dict = {} @@ -142,8 +203,13 @@ class JobID: result_dict[key] = [result.group(key)] elif key == "rep": try: - result_dict[key] = int(result.group(key)) - except IndexError: + try: + result_dict[key] = int(result.group(key)) + except IndexError: + print("No group {} in the regex result".format(key)) + result_dict[key] = -1 + except ValueError: + print("Cannot parse integer {}".format(result.group(key))) result_dict[key] = -1 else: result_dict[key] = result.group(key) @@ -152,24 +218,23 @@ class JobID: return None @staticmethod - def dataset_list_to_str(dataset_name, key): - if key == "dat": - assert isinstance(dataset_name, list) + def dataset_list_to_str(dataset_name, key="dat"): + if isinstance(dataset_name, list): return "-".join(dataset_name) else: return dataset_name - @staticmethod def set_jobid_from_arg_list(self, **jobid_list ): """ set the jobid from a dict object """ - for key in jobid_list.keys(): assert key in JobID.__dataclass_fields__.keys() setattr(self, key, jobid_list[key]) + if self.mod == "grid": + self.alg = "grid" @staticmethod def convert_blobname_to_jobid(blobname): @@ -185,11 +250,14 @@ class JobID: return None @staticmethod - def get_full_data_name(dataset_name, subdataset_name=None): + def get_full_data_name(dataset_name: Union[list, str], subdataset_name=None): """ convert a dataset name and sub dataset name to a full dataset name """ - full_dataset_name = dataset_name + if isinstance(dataset_name, list): + full_dataset_name = JobID.dataset_list_to_str(dataset_name) + else: + full_dataset_name = dataset_name if subdataset_name: full_dataset_name = full_dataset_name + "_" + subdataset_name return full_dataset_name @@ -198,10 +266,11 @@ class JobID: """ get the full dataset name of the current JobID object """ - return JobID.get_full_data_name(JobID.dataset_list_to_str(self.dat, "dat"), self.subdat) + return JobID.get_full_data_name(JobID.dataset_list_to_str(self.dat), self.subdat) @staticmethod def _extract_model_type_with_keywords_match(pre_full): + from ..hpo.grid_searchspace_auto import HF_MODEL_LIST matched_model_type = [] for each_model_type in HF_MODEL_LIST: if each_model_type in pre_full: @@ -211,109 +280,64 @@ class JobID: @staticmethod def extract_model_type(full_model_name): + from transformers import AutoConfig model_config = AutoConfig.from_pretrained(full_model_name) config_json_file = model_config.get_config_dict(full_model_name)[0] try: model_type = config_json_file["model_type"] except KeyError: - model_type = JobID._extract_model_type_with_keywords_match() + print("config_json_file does not contain model_type, re-extracting with keywords matching") + model_type = JobID._extract_model_type_with_keywords_match(full_model_name) return model_type - def set_jobid_from_console_args(self, console_args): - self.dat = console_args.dataset_subdataset_name.split(":")[0].split(",") - self.subdat = console_args.dataset_subdataset_name.split(":")[1] - self.mod = console_args.algo_mode - self.spa = console_args.space_mode - self.arg = console_args.search_alg_args_mode - self.alg = console_args.algo_name - self.pru = console_args.pruner - self.pre_full = console_args.pretrained_model_size.split(":")[0] - self.pre = JobID.extract_model_type(self.pre_full) - self.presz = console_args.pretrained_model_size.split(":")[1] - self.spt = console_args.resplit_mode - self.rep = console_args.rep_id - self.sddt = console_args.seed_data - self.sdhf = console_args.seed_transformers - @staticmethod - def legacy_old_blobname_to_new_blobname(self, - old_blobname): - spa_id2val = { - 0: "gnr", - 1: "uni" + def get_attrval_from_arg_or_dict(console_args: Union[argparse.ArgumentParser, dict], each_key): + if type(console_args) == argparse.Namespace: + return getattr(console_args, each_key) + else: + return console_args[each_key] + + def set_jobid_from_console_args(self, console_args: Union[argparse.ArgumentParser, dict]): + from ..utils import pretrained_model_size_format_check, dataset_subdataset_name_format_check + console_to_jobid_key_mapping = { + "pretrained_model_size": "pre", + "dataset_subdataset_name": "dat", + "algo_mode": "mod", + "space_mode": "spa", + "search_alg_args_mode": "arg", + "algo_name": "alg", + "pruner": "pru", + "resplit_mode": "spt", + "rep_id": "rep", + "seed_data": "sddt", + "seed_transformers": "sdhf", + "optarg1": "var1", + "optarg2": "var2" } - alg_id2val = { - 0: "bs", - 1: "optuna", - 2: "cfo" - } - pre_id2val = { - 0: "xlnet-base-cased", - 1: "albert-large-v1", - 2: "distilbert-base-uncased", - 3: "microsoft/deberta-base", - 4: "funnel-transformer/small-base", - 5: "microsoft/deberta-large", - 6: "funnel-transformer/large-base", - 7: "funnel-transformer/intermediate-base", - 8: "funnel-transformer/xlarge-base" - } - presz_id2val = { - 0: "base", - 1: "small", - 2: "base", - 3: "base", - 4: "base", - 5: "large", - 6: "large", - 7: "intermediate", - 8: "xlarge" - } - spt_id2val = { - 0: "rspt", - 1: "ori" - } - result_grid = re.search(r".*_mod(el)?(?P\d+)_None_None(_spt(?P\d+))?_rep(?P\d+).log", - old_blobname) - result = re.search( - r".*_mod(el)?(?P\d+)_(alg)?(?P\d+)_(spa)?" - r"(?P\d+)(_spt(?P\d+))?_rep(?P\d+).log", - old_blobname) - if result_grid: - dat = [old_blobname.split("/")[1].split("_")[0]] - subdat = old_blobname.split("/")[1].split("_")[1] - mod = "hpo" - spa = None - arg = None - alg = None - pru = None - pre = pre_id2val[int(result_grid.group("model_id"))] - presz = presz_id2val[int(result_grid.group("model_id"))] + for each_key in console_to_jobid_key_mapping.keys(): try: - spt = spt_id2val[int(result_grid.group("split_id"))] + try: + if each_key == "dataset_subdataset_name": + dataset_subdataset_name_format_check(getattr(console_args, each_key)) + self.dat = JobID.get_attrval_from_arg_or_dict(console_args, each_key).split(":")[0].split(",") + self.subdat = JobID.get_attrval_from_arg_or_dict(console_args, each_key).split(":")[1] + elif each_key == "pretrained_model_size": + pretrained_model_size_format_check(getattr(console_args, each_key)) + self.pre_full = JobID.get_attrval_from_arg_or_dict(console_args, each_key).split(":")[0] + self.pre = JobID.extract_model_type(self.pre_full) + self.presz = JobID.get_attrval_from_arg_or_dict(console_args, each_key).split(":")[1] + else: + jobid_key = console_to_jobid_key_mapping[each_key] + attrval = JobID.get_attrval_from_arg_or_dict(console_args, each_key) + setattr(self, jobid_key, attrval) + except AttributeError: + print("console_args has no attribute {}, continue".format(each_key)) + continue except KeyError: - spt = spt_id2val[0] - rep = None - self.set_jobid_from_arg_list(dat, subdat, mod, spa, arg, alg, pru, pre, presz, spt, rep) - return self.to_jobid_string() - if result: - dat = [old_blobname.split("/")[1].split("_")[0]] - subdat = old_blobname.split("/")[1].split("_")[1] - mod = "hpo" - spa = spa_id2val[int(result.group("space_id"))] - arg = "dft" - alg = alg_id2val[int(result.group("algo_id"))] - pru = "None" - pre = pre_id2val[int(result_grid.group("model_id"))] - presz = presz_id2val[int(result_grid.group("model_id"))] - try: - spt = spt_id2val[int(result_grid.group("split_id"))] - except KeyError: - spt = spt_id2val[0] - rep = int(result.group("rep_id")) - self.set_jobid_from_arg_list(dat, subdat, mod, spa, arg, alg, pru, pre, presz, spt, rep) - return self.to_jobid_string() - return None + print("console_args has no attribute {}, continue".format(each_key)) + continue + if self.mod == "grid": + self.alg = "grid" class AzureUtils: @@ -321,62 +345,89 @@ class AzureUtils: def __init__(self, root_log_path=None, console_args=None, - jobid=None, autohf=None): + from ..utils import get_wandb_azure_key if root_log_path: self.root_log_path = root_log_path else: self.root_log_path = "logs_azure" - self.jobid = jobid + self.jobid = autohf.jobid_config self.console_args = console_args self.autohf = autohf if console_args: wandb_key, azure_key, container_name = get_wandb_azure_key(console_args.key_path) self._container_name = container_name self._azure_key = azure_key + else: + self._container_name = self._azure_key = "" def _get_complete_connection_string(self): - return "DefaultEndpointsProtocol=https;AccountName=docws5141197765;AccountKey=" \ - + self._azure_key + ";EndpointSuffix=core.windows.net" + try: + return "DefaultEndpointsProtocol=https;AccountName=docws5141197765;AccountKey=" \ + + self._azure_key + ";EndpointSuffix=core.windows.net" + except AttributeError: + return "DefaultEndpointsProtocol=https;AccountName=docws5141197765;AccountKey=" \ + ";EndpointSuffix=core.windows.net" def _init_azure_clients(self): - connection_string = self._get_complete_connection_string() - container_client = ContainerClient.from_connection_string(conn_str=connection_string, - container_name=self._container_name) - return container_client + try: + from azure.storage.blob import ContainerClient + connection_string = self._get_complete_connection_string() + try: + container_client = ContainerClient.from_connection_string(conn_str=connection_string, + container_name=self._container_name) + return container_client + except ValueError: + print("AzureUtils._container_name is specified as: {}, " + "please correctly specify AzureUtils._container_name".format(self._container_name)) + return None + except ImportError: + print("To use the azure storage component in flaml.nlp, run pip install azure-storage-blob") def _init_blob_client(self, local_file_path): - connection_string = self._get_complete_connection_string() - blob_service_client = BlobServiceClient.from_connection_string(connection_string) - blob_client = blob_service_client.get_blob_client(container=self._container_name, blob=local_file_path) - return blob_client + try: + from azure.storage.blob import BlobServiceClient + + connection_string = self._get_complete_connection_string() + blob_service_client = BlobServiceClient.from_connection_string(connection_string) + try: + blob_client = blob_service_client.get_blob_client(container=self._container_name, blob=local_file_path) + return blob_client + except ValueError: + print("_container_name is unspecified or wrongly specified, please specify _container_name in AzureUtils") + return None + except ImportError: + print("To use the azure storage component in flaml.nlp, run pip install azure-storage-blob") def upload_local_file_to_azure(self, local_file_path): - blob_client = self._init_blob_client(local_file_path) - with open(local_file_path, "rb") as fin: - blob_client.upload_blob(fin, overwrite=True) + try: + from azure.core.exceptions import HttpResponseError + try: + blob_client = self._init_blob_client(local_file_path) + if blob_client: + with open(local_file_path, "rb") as fin: + blob_client.upload_blob(fin, overwrite=True) + except HttpResponseError as err: + print("Cannot upload blob due to {}: {}".format("azure.core.exceptions.HttpResponseError", + err)) + except ImportError: + print("To use the azure storage component in flaml.nlp, run pip install azure-storage-blob") def download_azure_blob(self, blobname): blob_client = self._init_blob_client(blobname) - pathlib.Path(re.search("(?P^.*)/[^/]+$", blobname).group("parent_path")).mkdir( - parents=True, exist_ok=True) - with open(blobname, "wb") as fout: - fout.write(blob_client.download_blob().readall()) + if blob_client: + pathlib.Path(re.search("(?P^.*)/[^/]+$", blobname).group("parent_path")).mkdir( + parents=True, exist_ok=True) + with open(blobname, "wb") as fout: + fout.write(blob_client.download_blob().readall()) - def write_exception(self): - result_json = { - "timestamp": datetime.now(), - } - local_file_path = self.generate_local_json_path() - self.create_local_json_and_upload(result_json, local_file_path) - - def extract_log_from_analysis(self, - analysis): + def extract_configscore_list_from_analysis(self, + analysis): """ Extracting a json object for storing the key information returned from tune.run """ - json_log = [] + configscore_list = [] for each_trial in analysis.trials: trial_id = each_trial.trial_id start_time = each_trial.start_time @@ -385,18 +436,23 @@ class AzureUtils: try: metric_score = each_trial.metric_analysis["eval_" + analysis.default_metric] time_stamp = each_trial.metric_analysis['timestamp'] - json_log.append({"trial_id": trial_id, - "start_time": start_time, - "last_update_time": last_update_time, - "config": config, - "metric_score": metric_score, - "time_stamp": time_stamp}) except KeyError: - pass - return json_log + print("KeyError, {} does not contain the key {} or {}".format("each_trial.metric_analysis", + "eval_" + analysis.default_metric, + "timestamp")) + metric_score = 0 + time_stamp = 0 + configscore_list.append(ConfigScore( + trial_id=trial_id, + start_time=start_time, + last_update_time=last_update_time, + config=config, + metric_score=metric_score, + time_stamp=time_stamp)) + return configscore_list def write_autohf_output(self, - json_log=None, + configscore_list=None, valid_metric=None, predictions=None, duration=None): @@ -405,8 +461,8 @@ class AzureUtils: """ local_file_path = self.generate_local_json_path() output_json = {} - if json_log: - output_json["val_log"] = json_log + if configscore_list: + output_json["val_log"] = [configscore.__dict__ for configscore in configscore_list] if valid_metric: output_json["valid_metric"] = valid_metric if duration: @@ -432,47 +488,6 @@ class AzureUtils: fout.flush() self.upload_local_file_to_azure(local_file_path) - def legacy_to_json(self): - container_client = self._init_azure_clients() - for old_blob in container_client.list_blobs(): - new_jobid_str = self.jobid.legacy_old_blobname_to_new_blobname(old_blob.name) - if new_jobid_str: - self.download_azure_blob(old_blob.name) - with open(old_blob.name, "r") as fin: - alllines = fin.readlines() - wandb_group_name = alllines[0].rstrip("\n:") - timestamp = re.search( - r"timestamp:(?P.*):", - alllines[1].strip("\n")).group("timestamp") - duration = re.search( - r"duration:(?P.*)$", - alllines[3].strip("\n")).group("duration") - sample_num = int(re.search( - r"sample_num: (?P\d+)$", - alllines[4].strip("\n")).group("sample_num")) - validation = {"accuracy": float(re.search( - "validation accuracy: (?P.*)$", - alllines[2].strip("\n")).group("validation"))} - test = None - if len(alllines) > 6: - result_test = re.search("test accuracy:(?P.*)$", alllines[6].strip("\n")) - if result_test: - test = json.loads(result_test.group("test")) - yml_file = None - if len(alllines) > 8: - if alllines[8].startswith("aml"): - yml_file = alllines[8].strip("\n") - new_json = {"wandb_group_name": wandb_group_name, - "validation": validation, - "test": test, - "timestamp": timestamp, - "duration": duration, - "sample_num": sample_num, - "yml_file": yml_file} - full_dataset_name = self.jobid.get_jobid_full_data_name() - new_blobname = os.path.join("logs_azure/", full_dataset_name, new_jobid_str + ".json") - self.create_local_json_and_upload(new_json, new_blobname) - def create_local_prediction_and_upload(self, local_json_file, predictions): @@ -480,198 +495,104 @@ class AzureUtils: store predictions (a .zip file) locally and upload """ azure_save_file_name = local_json_file.split("/")[-1][:-5] + try: + output_dir = self.console_args.data_root_dir + except AttributeError: + print("console_args does not contain data_root_dir, loading the default value") + from ..utils import load_dft_args + console_args = load_dft_args() + output_dir = getattr(console_args, "data_root_dir") local_archive_path = self.autohf.output_prediction(predictions, - output_prediction_path=self.console_args.data_root_dir + "result/", + output_prediction_path=output_dir + "result/", output_zip_file_name=azure_save_file_name) self.upload_local_file_to_azure(local_archive_path) - def get_ranked_configs(self, metric_mode): - """ - extract the configs (ranked in descebding order by the score) for the azure file of the current object - (defined by self.jobid_config) - """ - azure_file_path = self.generate_local_json_path() - self.download_azure_blob(azure_file_path) - - json_log = json.load(open(azure_file_path, "r")) - assert "val_log" in json_log - - trialid_to_score = {} - trialid_to_config = {} - - for each_entry in json_log["val_log"]: - trial_id = each_entry["trial_id"] - config = each_entry["config"] - this_score = each_entry["metric_score"][metric_mode] - trialid_to_config[trial_id] = config - trialid_to_score[trial_id] = this_score - - sorted_trialid_to_score = sorted(trialid_to_score.items(), key=lambda x: x[1], reverse=True) - return [trialid_to_config[entry[0]] for entry in sorted_trialid_to_score] - @staticmethod - def is_after_earliest_time(this_blob, earliest_time): + def is_after_earliest_time(this_blob, earliest_time: Tuple[int, int, int]): import pytz utc = pytz.UTC if this_blob.last_modified >= utc.localize(datetime(earliest_time[0], earliest_time[1], earliest_time[2])): return True return False - def get_blob_list_matching_partial_jobid(self, root_log_path, partial_jobid, earliest_time=None): + def get_configblob_from_partial_jobid(self, + root_log_path, + partial_jobid, + earliest_time: Tuple[int, int, int] = None): """ get all blobs whose jobid configs match the partial_jobid """ blob_list = [] container_client = self._init_azure_clients() - jobid_config = JobID() - for each_blob in container_client.list_blobs(): - if each_blob.name.startswith(root_log_path): - each_jobconfig = jobid_config.convert_blobname_to_jobid(each_blob.name) - is_append = False - if each_jobconfig: - if each_jobconfig.is_match(partial_jobid): - is_append = True - if earliest_time and not AzureUtils.is_after_earliest_time(each_blob, earliest_time): - is_append = False - if is_append: - blob_list.append((each_jobconfig, each_blob)) + if container_client: + for each_blob in container_client.list_blobs(): + if each_blob.name.startswith(root_log_path): + each_jobconfig = JobID.convert_blobname_to_jobid(each_blob.name) + is_append = False + if each_jobconfig: + if each_jobconfig.is_match(partial_jobid): + is_append = True + if earliest_time and not AzureUtils.is_after_earliest_time(each_blob, earliest_time): + is_append = False + if is_append: + blob_list.append((each_jobconfig, each_blob)) return blob_list - @staticmethod - def extract_config_and_score(blobname): - data_json = json.load(open(blobname, "r")) - return [(x['config'], x['metric_score']["max"], x['start_time']) for x in data_json['val_log']] - def get_config_and_score_from_partial_jobid(self, - root_log_path, - partial_jobid, - group_attrs, - method, - earliest_time=None): + root_log_path: str, + partial_jobid: JobID, + earliest_time: Tuple[int, int, int] = None): """ - get the best config and best score for each job matching the partial_jobid - """ - matched_blob_list = self.get_blob_list_matching_partial_jobid( + Extract the config and score list from a partial config id + + Args: + root_log_path: + The root log path in azure blob storage, e.g., "logs_seed/" + + partial_jobid: + The partial jobid for matching the blob list + + earliest_time (optional): + The earliest starting time for any matched blob, for filtering out out-dated jobs, + format: (YYYY, MM, DD) + + Return: + a ConfigScore list object which stores the config and scores list for each matched blob lists + + """ + assert isinstance(root_log_path, str), "root_log_path must be of type str" + assert isinstance(partial_jobid, JobID), "partial_jobid must be of type JobID" + if earliest_time: + assert isinstance(earliest_time, tuple), "earliest_time must be a tuple of (YYYY, MM, DD)" + + matched_blob_list = self.get_configblob_from_partial_jobid( root_log_path, partial_jobid, earliest_time=earliest_time) - group_dict = {} - for (each_jobconfig, each_blob) in matched_blob_list: - self.download_azure_blob(each_blob.name) - config_and_score = AzureUtils.extract_config_and_score(each_blob.name) - if method == "unsorted": - sorted_config_and_score = config_and_score - elif method == "sort_time": - sorted_config_and_score = sorted(config_and_score, key=lambda x: x[2], reverse=False) - else: - sorted_config_and_score = sorted(config_and_score, key=lambda x: x[1], reverse=True) - group_attr_list = [] - for each_attr in group_attrs: - group_val = getattr(each_jobconfig, each_attr) - if isinstance(group_val, list): - group_attr_list.append(JobID.dataset_list_to_str(group_val, each_attr)) - else: - group_attr_list.append(group_val) - group_attr_tuple = tuple(group_attr_list) - group_dict.setdefault(group_attr_tuple, []) - group_dict[group_attr_tuple].append([(config, score, each_blob.name) - for (config, score, ts) in sorted_config_and_score]) - return group_dict + return self.get_config_and_score_from_matched_blob_list(matched_blob_list, + earliest_time) - def get_validation_perf(self, console_args=None, partial_jobid_config=None): + def get_config_and_score_from_matched_blob_list(self, + matched_blob_list, + earliest_time: Tuple[int, int, int] = None): """ - get the validation score for all blobs matching the partial_jobid_config - """ - if partial_jobid_config.pre == "electra": - dataset_namelist = ["wnli", "rte", "mrpc", "cola", "stsb", "sst2", "qnli", "mnli"] - else: - dataset_namelist = ["wnli", "rte", "mrpc", "cola", "stsb", "sst2"] - dataset_vallist1 = [0] * len(dataset_namelist) - dataset_vallist2 = [0] * len(dataset_namelist) + Extract the config and score list of one or multiple blobs - matched_blob_list = self.get_blob_list_matching_partial_jobid(console_args.azure_root_log_path, - partial_jobid_config) + Args: + matched_blob_list: + matched blob list + + Return: + a ConfigScore list object which stores the config and scores list for each matched blob lists + + """ + matched_config_score_lists = [] for (each_jobconfig, each_blob) in matched_blob_list: - subdat_name = each_jobconfig.subdat self.download_azure_blob(each_blob.name) data_json = json.load(open(each_blob.name, "r")) - print(len(data_json["val_log"])) - validation_metric = data_json['valid_metric'] - try: - dataset_idx = dataset_namelist.index(subdat_name) - dataset_vallist1[dataset_idx], dataset_vallist2[dataset_idx] \ - = self.get_validation_metricstr(validation_metric) - except ValueError: - pass - # print(" & ".join(dataset_vallist1)) - # print(", ,".join(dataset_vallist2)) - - def get_validation_metricstr(self, validation_metric): - """ - get a string representing validations for pasting to Google spreadsheet - """ - validation_str1 = validation_str2 = "" - is_first = True - for key in ["f1", "accuracy", "pearson", "spearmanr", "matthews_correlation"]: - if "eval_" + key in validation_metric.keys(): - if is_first: - validation_str1 += str("%.1f" % (validation_metric["eval_" + key] * 100)) - validation_str2 += str(validation_metric["eval_" + key] * 100) - is_first = False - else: - validation_str1 += "/" + str("%.1f" % (validation_metric["eval_" + key] * 100)) - validation_str2 += "," + str(validation_metric["eval_" + key] * 100) - return validation_str1, validation_str2 - - def get_test_perf(self, partial_jobid_config=None, result_root_dir=None): - """ - get the test scores for all blobs matching the partial_jobid_config - """ - import shutil - from flaml.nlp.dataset.submission_auto import file_name_mapping_glue, output_blank_tsv - matched_blob_list = self.get_blob_list_matching_partial_jobid("data/", partial_jobid_config) - partial_jobid_str = partial_jobid_config.to_partial_jobid_string() - output_dir = os.path.join(result_root_dir, partial_jobid_str) - if os.path.exists(output_dir): - assert os.path.isdir(output_dir) - else: - os.mkdir(output_dir) - output_blank_tsv(output_dir) - - for (each_jobconfig, each_blob) in matched_blob_list: - subdat_name = each_jobconfig.subdat - self.download_azure_blob(each_blob.name) - import zipfile - if os.path.exists(each_blob.name[:-4]): - assert os.path.isdir(each_blob.name[:-4]) - else: - os.mkdir(each_blob.name[:-4]) - with zipfile.ZipFile(each_blob.name, 'r') as zip_ref: - zip_ref.extractall(each_blob.name[:-4]) - src = os.path.join(each_blob.name[:-4], file_name_mapping_glue[subdat_name][0]) - dst = os.path.join(output_dir, file_name_mapping_glue[subdat_name][0]) - shutil.copy(src, dst) - shutil.make_archive(os.path.join(output_dir), 'zip', output_dir) - - def get_best_perf_config(self, console_args, jobid_config): - """ - get the config of the best performed trial - """ - matched_blob_list = self.get_blob_list_matching_partial_jobid(console_args.azure_root_log_path, jobid_config) - try: - assert len(matched_blob_list) == 1 - except AssertionError: - import pdb - pdb.set_trace() - - each_jobconfig, each_blob = matched_blob_list[0] - self.download_azure_blob(each_blob.name) - data_json = json.load(open(each_blob.name, "r")) - - sorted_entries = sorted(data_json['val_log'], key=lambda x: x['metric_score']['max'], reverse=True) - best_config = sorted_entries[0]['config'] - if jobid_config.subdat != "mrpc": - best_score = sorted_entries[0]['metric_score']['max'] - else: - best_score = (data_json["valid_metric"]["eval_f1"], data_json["valid_metric"]["eval_accuracy"]) - return best_config, best_score + each_config_and_score_list = ConfigScoreList( + jobid_config=each_jobconfig, + blob_file=each_blob, + config_score_list=[ConfigScore(**each_dict) for each_dict in data_json['val_log']]) + matched_config_score_lists.append(each_config_and_score_list) + return matched_config_score_lists diff --git a/flaml/nlp/result_analysis/generate_result_summary.py b/flaml/nlp/result_analysis/generate_result_summary.py deleted file mode 100644 index efea1641a..000000000 --- a/flaml/nlp/result_analysis/generate_result_summary.py +++ /dev/null @@ -1,357 +0,0 @@ -def extract_ranked_config_score(console_args, partial_config_dict): - from .azure_utils import AzureUtils - azure_utils = AzureUtils(console_args=console_args) - - for method, each_partial_config in partial_config_dict.items(): - dataset2configscorelist = azure_utils.get_config_and_score_from_partial_config(each_partial_config, - ["dat", "subdat"], method) - for each_dataset, configscorelist in dataset2configscorelist.items(): - for config_idx in range(len(configscorelist)): - avg_scores = configscorelist[config_idx][0][1] - top_config = configscorelist[config_idx][0][0] - print(avg_scores) - print(top_config) - # print(method + "," + str(each_dataset) + ",rep=" + str(config_idx)) - # print("avg score :" + str(avg_scores)) - # print(''.join(['{0}={1}\n'.format(key, top_config[key]) for key in sorted(top_config.keys())])) - - -def extract_sorted_config_list(dataset2configscorelist, topk): - dataset2topkconfigs = {} - for dataset, configscorelist in dataset2configscorelist.items(): - all_configscorelist = [] - for scorelist in configscorelist: - for item in scorelist: - if item[0] not in [x[0] for x in all_configscorelist]: - all_configscorelist.append(item) - sorted_all_configscorelist = sorted(all_configscorelist, key=lambda x: x[1], reverse=True) - topk_configs = [] - - for each_hp in ("learning_rate", "num_train_epochs", "per_device_train_batch_size", "warmup_ratio", - "weight_decay", "adam_epsilon"): - topk_configs.append((each_hp, [sorted_all_configscorelist[x][0][each_hp] for x in range(topk)])) - topk_configs.append(("perf", [sorted_all_configscorelist[x][1] for x in range(topk)])) - - dataset2topkconfigs[dataset] = topk_configs - return dataset2topkconfigs - - -def dict2tuple(this_dict): - tuple_list = [] - for key in sorted(this_dict.keys()): - tuple_list.append(this_dict[key]) - return tuple(tuple_list) - - -def merge_configscore_list(small_dataset2configscorelist): - dataset2merged_configscorelist = {} - for (dataset, each_configscore_list) in small_dataset2configscorelist.items(): - merged_configscore_list = {} - for rep_id in range(len(each_configscore_list)): - for each_configscore_entry in each_configscore_list[rep_id]: - is_exist = False - for configscore in merged_configscore_list.keys(): - if configscore[0] == each_configscore_entry[0]: - is_exist = True - break - if is_exist is False: - merged_configscore_list[dict2tuple(each_configscore_entry[0])] = each_configscore_entry[1] - dataset2merged_configscorelist[dataset] = merged_configscore_list - return dataset2merged_configscorelist - - -def get_result(console_args, partial_jobid_config): - from .azure_utils import AzureUtils, JobID - azure_utils = AzureUtils(console_args=console_args) - dataset2configscorelist = \ - azure_utils.get_config_and_score_from_partial_config( - console_args.azure_root_log_path, - partial_jobid_config, - ["dat", "subdat"], - "hpo") - for dataset, configscore_list in dataset2configscorelist.items(): - for rep_id in range(len(configscore_list)): - config_dict = configscore_list[rep_id][0][0] - score = configscore_list[rep_id][0][1] - print(dataset, rep_id) - print_config(config_dict) - print(score) - print() - - -def print_config(config_dict): - for key in sorted(config_dict.keys()): - if key in ("attention_probs_dropout_prob", "hidden_dropout_prob", "seed"): - continue - if key == "per_device_train_batch_size": - short_key = "batch_size" - elif key == "num_train_epochs": - short_key = "epochs" - else: - short_key = key - print(short_key, config_dict[key]) - - -def compare_small_vs_large(console_args): - from .azure_utils import AzureUtils, JobID - azure_utils = AzureUtils(console_args=console_args) - - partial_jobid_config = JobID() - partial_jobid_config.pre = "deberta" - partial_jobid_config.mod = "hpo" - partial_jobid_config.spa = "uni" - partial_jobid_config.presz = "base" - - small_dataset2configscorelist = azure_utils.get_config_and_score_from_partial_config(partial_jobid_config, - ["dat", "subdat"], "list") - - small_mergedconfiglist = merge_configscore_list(small_dataset2configscorelist) - - partial_jobid_config = JobID() - partial_jobid_config.pre = "deberta" - partial_jobid_config.mod = "hpo" - partial_jobid_config.spa = "uni" - partial_jobid_config.presz = "large" - - large_dataset2configscorelist = azure_utils.get_config_and_score_from_partial_config(partial_jobid_config, - ["dat", "subdat"], "hpo") - - large_mergedconfiglist = merge_configscore_list(large_dataset2configscorelist) - - for (each_dataset, merged_small_configlist) in small_mergedconfiglist.items(): - merged_large_configlist = large_mergedconfiglist[each_dataset] - print(each_dataset) - print() - for (each_tuple, large_score) in sorted(merged_large_configlist.items(), key=lambda x: x[1], reverse=True): - # small_score = merged_small_configlist[each_tuple] - is_in_onlysmall = each_tuple in small_mergedconfiglist[each_dataset] - for each_val in each_tuple: - print(each_val, end=", ") - print(large_score, is_in_onlysmall, sep=",") - print() - for (each_tuple, small_score) in \ - sorted(small_mergedconfiglist[each_dataset].items(), key=lambda x: x[1], reverse=True): - is_in_large = each_tuple in large_mergedconfiglist[each_dataset] - for each_val in each_tuple: - print(each_val, end=", ") - print(small_score, is_in_large, sep=",") - - -def check_conflict(console_args, partial_jobid_config_list): - from .azure_utils import AzureUtils, JobID - azure_utils = AzureUtils(console_args=console_args) - for each_partial_config in partial_jobid_config_list: - dataset2configscorelist = \ - azure_utils.get_config_and_score_from_partial_config( - console_args.azure_root_log_path, - each_partial_config, - ["dat", "subdat"], - "unsorted") - for (dataset, configscorelists) in dataset2configscorelist.items(): - config2score = {} - for each_configscorelist in configscorelists: - for (config, score, blobname) in each_configscorelist: - config_dict = dict2tuple(config) - try: - config2score[config_dict].append((score, blobname)) - except KeyError: - config2score.setdefault(config_dict, []) - config2score[config_dict].append((score, blobname)) - dup_keys = [config for config in config2score.keys() if len(config2score[config]) > 1] - dupkey_count = [len(set([y[0] for y in config2score[x]])) for x in dup_keys] - print(dataset) - print(len(config2score)) - print(len(dupkey_count)) - print(dupkey_count) - - -def print_cfo(console_args): - from .azure_utils import JobID, AzureUtils - jobid_config = JobID() - jobid_config.mod = "bestnn" - jobid_config.spa = "buni" - jobid_config.alg = "bs" - jobid_config.pre = "funnel" - jobid_config.presz = "xlarge" - - for each_rep in range(3): - jobid_config.rep = each_rep - azure_utils = AzureUtils(console_args=console_args, jobid=jobid_config) - - dataset2configscorelist = \ - azure_utils.get_config_and_score_from_partial_config( - console_args.azure_root_log_path, - jobid_config, - ["dat", "subdat"], - "sort_time") - dataset = ('glue', 'mrpc') - configscorelist = dataset2configscorelist[dataset] - count = 0 - print(dataset) - for (config, score, blobname) in sorted(configscorelist[0], key=lambda x: x[1], reverse=True)[0:1]: - print(count) - print(score) - print_config(config) - print() - count += 1 - - -def download_validation(console_args, result_root_dir): - from .azure_utils import JobID, AzureUtils - partial_jobid_config = JobID() - partial_jobid_config.mod = "grid" - partial_jobid_config.pre = "roberta" - partial_jobid_config.presz = "base" - # partial_jobid_config.alg = "optuna" - # partial_jobid_config.pru = "asha" - partial_jobid_config.rep = 0 - - azure_utils = AzureUtils(console_args=console_args, jobid=partial_jobid_config) - azure_utils.get_validation_perf(console_args=console_args, partial_jobid_config=partial_jobid_config) - azure_utils.get_test_perf(partial_jobid_config, result_root_dir) - - -def get_result_str(jobid_config, val_score, test_score, best_config, subdat2config=None, mode="grid"): - result_str = jobid_config.subdat.upper() + "," - if jobid_config.alg: - result_str += jobid_config.alg.upper().replace("OPTUNA", "Optuna") - if jobid_config.pru is not None and jobid_config.pru != "None": - result_str += "+" + jobid_config.pru.upper() - if jobid_config.subdat != "mrpc": - result_str += ",rep " + str(jobid_config.rep) + " & " + str( - "%.1f" % (val_score * 100)) + " & " + str(test_score) - else: - result_str += ",rep " + str(jobid_config.rep) + " & " + str( - "%.1f" % (val_score[0] * 100)) + "/" + str( - "%.1f" % (val_score[1] * 100)) + " & " + str(test_score) - for hp in ["learning_rate", "warmup_ratio", "per_device_train_batch_size", "hidden_dropout", "attention_dropout", - "weight_decay"]: - if hp not in best_config: - result_str += " & " - else: - if mode == "hpo": - if best_config[hp] > 1.2 * subdat2config[jobid_config.subdat][hp]: - wrap_left = "\\cellcolor{green!85}{" - elif best_config[hp] > subdat2config[jobid_config.subdat][hp]: - wrap_left = "\\cellcolor{green!15}{" - elif best_config[hp] < subdat2config[jobid_config.subdat][hp] / 1.2: - wrap_left = "\\cellcolor{red!85}{" - else: - wrap_left = "\\cellcolor{red!15}{" - wrap_right = "}" - else: - wrap_left = wrap_right = "" - if hp == "per_device_train_batch_size" or hp == "learning_rate": - wrap_left = wrap_right = "" - if hp == "learning_rate": - result_str += " & " + wrap_left + "{:.1e}".format(best_config[hp]) + wrap_right - elif hp == "per_device_train_batch_size": - result_str += " & " + wrap_left + str(best_config[hp]) + wrap_right - else: - result_str += " & " + wrap_left + str("%.3f" % best_config[hp]) + wrap_right - return result_str + "\\\\" - - -def extract_grid(console_args, jobid_config, overfitting_subdat, test_scores): - from .azure_utils import JobID, AzureUtils - key2printstr = {} - subdat2config = {} - for idx in range(len(overfitting_subdat)): - jobid_config.subdat = overfitting_subdat[idx] - jobid_config.mod = "grid" - jobid_config.rep = 0 - azure_utils = AzureUtils(console_args=console_args, jobid=jobid_config) - best_config, val_score = azure_utils.get_best_perf_config(console_args, jobid_config) - best_config["hidden_dropout"] = 0.1 - best_config["attention_dropout"] = 0.1 - test_score = test_scores[idx] - key2printstr[jobid_config.subdat.upper() + ", grid"] = get_result_str(jobid_config, val_score, - test_score, best_config) - subdat2config[jobid_config.subdat] = best_config - print() - for key, printstr in sorted(key2printstr.items(), key=lambda x: x[0]): - print(printstr) - return subdat2config - - -def extract_hpo( - console_args, - jobid_config, - overfitting_subdat, - overfitting_alg, - overfitting_pru, - overfitting_rep, - subdat2config, - test_scores): - from .azure_utils import AzureUtils - key2printstr = {} - for idx in range(len(overfitting_subdat)): - jobid_config.subdat = overfitting_subdat[idx] - jobid_config.alg = overfitting_alg[idx] - jobid_config.pru = overfitting_pru[idx] - jobid_config.rep = overfitting_rep[idx] - azure_utils = AzureUtils(console_args=console_args, jobid=jobid_config) - best_config, val_score = azure_utils.get_best_perf_config(console_args, jobid_config) - test_score = test_scores[idx] - key2printstr[jobid_config.subdat.upper() + "," + jobid_config.alg.upper() + "," - + jobid_config.pru + ",rep " + str(jobid_config.rep)] \ - = get_result_str(jobid_config, val_score, test_score, best_config, subdat2config, mode="hpo") - - for key, printstr in sorted(key2printstr.items(), key=lambda x: x[0]): - print(printstr) - - -def extract_roberta_overfitting_configs(console_args): - from .azure_utils import JobID, AzureUtils - jobid_config = JobID() - jobid_config.pre = "roberta" - jobid_config.presz = "base" - - overfitting_subdat = ["rte", "mrpc", "cola", "sst2", "stsb"] - test_scores = ["73.1", "91.4/88.5", "61.4", "96", "89.5/88.7"] - subdat2config = extract_grid(console_args, jobid_config, overfitting_subdat, test_scores) - - jobid_config = JobID() - jobid_config.pre = "roberta" - jobid_config.presz = "base" - - overfitting_subdat = ["rte", "rte", "rte", "mrpc", "mrpc", "mrpc", "sst2", - "rte", "mrpc", "mrpc", "stsb", "sst2", "sst2", - "rte", "rte", "mrpc", "mrpc", "sst2", "sst2"] - overfitting_alg = ["rs", "rs", "rs", "rs", "rs", "rs", "rs", - "rs", "rs", "rs", "rs", "rs", "rs", - "optuna", "optuna", "optuna", "optuna", "optuna", "optuna"] - overfitting_pru = ["None", "None", "None", "None", "None", "None", "None", - "asha", "asha", "asha", "asha", "asha", "asha", - "asha", "asha", "asha", "asha", "asha", "asha"] - overfitting_rep = [0, 1, 2, 0, 1, 2, 0, - 1, 0, 2, 2, 1, 2, - 1, 2, 0, 1, 1, 2] - test_scores = ["71.5", "72.3", "72.2", "90.5/87.1", "90.5/87.4", "90.5/87.2", "95.6", - "72.4", "90.7/87.4", "91.0/87.9", "89.4/88.8", "95.2", "95.7", - "72.4", "72.4", "90.8/87.4", "90.3/86.5", "95.1", "95.8"] - extract_hpo(console_args, jobid_config, overfitting_subdat, overfitting_alg, overfitting_pru, overfitting_rep, - subdat2config, test_scores) - - -def extract_electra_overfitting_configs(console_args): - from .azure_utils import JobID, AzureUtils - jobid_config = JobID() - jobid_config.pre = "electra" - jobid_config.presz = "base" - - overfitting_subdat = ["rte", "qnli", "cola"] - test_scores = ["74.4", "93.2", "64.8"] - subdat2config = extract_grid(console_args, jobid_config, overfitting_subdat, test_scores) - - jobid_config = JobID() - jobid_config.pre = "electra" - jobid_config.presz = "base" - - overfitting_subdat = ["rte", "rte", "qnli", "cola", "qnli", "cola"] - overfitting_alg = ["rs", "rs", "rs", "rs", "rs", "optuna"] - overfitting_pru = ["None", "None", "None", "asha", "asha", "asha"] - overfitting_rep = [0, 1, 0, 2, 0, 0] - test_scores = ["73.8", "74.3", "92.8", "64.7", "92.9", "63.6"] - extract_hpo(console_args, jobid_config, overfitting_subdat, overfitting_alg, overfitting_pru, overfitting_rep, - subdat2config, test_scores) diff --git a/flaml/nlp/result_analysis/wandb_utils.py b/flaml/nlp/result_analysis/wandb_utils.py index 84535dc3d..ce67325d3 100644 --- a/flaml/nlp/result_analysis/wandb_utils.py +++ b/flaml/nlp/result_analysis/wandb_utils.py @@ -1,7 +1,5 @@ import os -from ..utils import get_wandb_azure_key import subprocess -import wandb import hashlib from time import time @@ -26,12 +24,14 @@ class WandbUtils: # https://docs.ray.io/en/master/tune/tutorials/tune-wandb.html def __init__(self, - is_wandb_on=None, + is_wandb_on=False, console_args=None, jobid_config=None): if is_wandb_on: + from ..utils import get_wandb_azure_key wandb_key, azure_key, container_name = get_wandb_azure_key(console_args.key_path) - subprocess.run(["wandb", "login", "--relogin", wandb_key]) + if wandb_key != "": + subprocess.run(["wandb", "login", "--relogin", wandb_key]) os.environ["WANDB_API_KEY"] = wandb_key os.environ["WANDB_MODE"] = "online" else: @@ -40,16 +40,24 @@ class WandbUtils: def set_wandb_per_trial(self): print("before wandb.init\n\n\n") - if os.environ["WANDB_MODE"] == "online": - os.environ["WANDB_SILENT"] = "false" - return wandb.init(project=self.jobid_config.get_jobid_full_data_name(), - group=self.wandb_group_name, - name=str(WandbUtils._get_next_trial_ids()), - settings=wandb.Settings( - _disable_stats=True), - reinit=False) - else: - return None + try: + import wandb + try: + if os.environ["WANDB_MODE"] == "online": + os.environ["WANDB_SILENT"] = "false" + return wandb.init(project=self.jobid_config.get_jobid_full_data_name(), + group=self.wandb_group_name, + name=str(WandbUtils._get_next_trial_ids()), + settings=wandb.Settings( + _disable_stats=True), + reinit=False) + else: + return None + except wandb.errors.UsageError as err: + print(err) + return None + except ImportError: + print("To use the wandb component in flaml.nlp, run pip install wandb==0.10.26") @staticmethod def _get_next_trial_ids(): @@ -58,14 +66,22 @@ class WandbUtils: return "trial_" + hash.hexdigest()[:3] def set_wandb_per_run(self): - os.environ["WANDB_RUN_GROUP"] = self.jobid_config.to_wandb_string() + wandb.util.generate_id() - self.wandb_group_name = os.environ["WANDB_RUN_GROUP"] - if os.environ["WANDB_MODE"] == "online": - os.environ["WANDB_SILENT"] = "false" - return wandb.init(project=self.jobid_config.get_jobid_full_data_name(), - group=os.environ["WANDB_RUN_GROUP"], - settings=wandb.Settings( - _disable_stats=True), - reinit=False) - else: - return None + try: + import wandb + os.environ["WANDB_RUN_GROUP"] = self.jobid_config.to_wandb_string() + wandb.util.generate_id() + self.wandb_group_name = os.environ["WANDB_RUN_GROUP"] + try: + if os.environ["WANDB_MODE"] == "online": + os.environ["WANDB_SILENT"] = "false" + return wandb.init(project=self.jobid_config.get_jobid_full_data_name(), + group=os.environ["WANDB_RUN_GROUP"], + settings=wandb.Settings( + _disable_stats=True), + reinit=False) + else: + return None + except wandb.errors.UsageError as err: + print(err) + return None + except ImportError: + print("To use the wandb component in flaml.nlp, run pip install wandb==0.10.26") diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py index c6f11f787..ee298ff21 100644 --- a/flaml/nlp/utils.py +++ b/flaml/nlp/utils.py @@ -8,31 +8,31 @@ from dataclasses import dataclass, field def dataset_subdataset_name_format_check(val_str): regex = re.compile(r"^[^:]*:[^:]*$") - if not regex.match(val_str): + if (val_str is not None) and (not regex.match(val_str)): raise argparse.ArgumentTypeError("dataset_subdataset_name must be in the format {data_name}:{subdata_name}") return val_str def pretrained_model_size_format_check(val_str): regex = re.compile(r"^[^:]*:(small|base|large|xlarge)") - if not regex.match(val_str): + if (val_str is not None) and (not regex.search(val_str)): raise argparse.ArgumentTypeError("pretrained_model_size must be in the format {model_name}:{model_size}," "where {model_name} is the name from huggingface.co/models, {model_size}" "is chosen from small, base, large, xlarge") return val_str -def load_console_args(**custom_data_args): +def load_dft_args(): arg_parser = argparse.ArgumentParser() arg_parser.add_argument('--server_name', type=str, help='server name', required=False, choices=["tmdev", "dgx", "azureml"], default="tmdev") arg_parser.add_argument('--algo_mode', type=str, help='hpo or grid search', required=False, - choices=["grid", "gridbert", "hpo", "hfhpo", "list_s", "list", "bestnn"], default="hpo") + choices=["grid", "hpo", "hfhpo"], default="hpo") arg_parser.add_argument('--data_root_dir', type=str, help='data dir', required=False, default="data/") arg_parser.add_argument('--dataset_subdataset_name', type=dataset_subdataset_name_format_check, help='dataset and subdataset name', required=False, default=None) arg_parser.add_argument('--space_mode', type=str, help='space mode', required=False, - choices=["gnr", "uni", "uni_test", "cus", "buni"], default="uni") + choices=["grid", "gnr", "uni", "uni_test", "cus", "buni"], default="uni") arg_parser.add_argument('--search_alg_args_mode', type=str, help='search algorithm args mode', required=False, choices=["dft", "exp", "cus"], default="dft") arg_parser.add_argument('--algo_name', type=str, help='algorithm', required=False, @@ -56,25 +56,22 @@ def load_console_args(**custom_data_args): arg_parser.add_argument('--round_idx', type=int, help='round idx for acl experiments', required=False, default=0) arg_parser.add_argument('--seed_data', type=int, help='seed of data shuffling', required=False, default=43) arg_parser.add_argument('--seed_transformers', type=int, help='seed of transformers', required=False, default=42) + arg_parser.add_argument('--optarg1', type=float, help='place holder for optional arg', required=False) + arg_parser.add_argument('--optarg2', type=float, help='place holder for optional arg', required=False) args, unknown = arg_parser.parse_known_args() - - for each_key in custom_data_args.keys(): - if args.__contains__(each_key): - try: - check_key_format_func = globals()[each_key + "_format_check"] - check_key_format_func(custom_data_args[each_key]) - except KeyError: - pass - setattr(args, each_key, custom_data_args[each_key]) return args def get_wandb_azure_key(key_path): - key_json = json.load(open(os.path.join(key_path, "key.json"), "r")) - wandb_key = key_json["wandb_key"] - azure_key = key_json["azure_key"] - azure_container_name = key_json["container_name"] - return wandb_key, azure_key, azure_container_name + try: + key_json = json.load(open(os.path.join(key_path, "key.json"), "r")) + wandb_key = key_json["wandb_key"] + azure_key = key_json["azure_key"] + azure_container_name = key_json["container_name"] + return wandb_key, azure_key, azure_container_name + except FileNotFoundError: + print("File not found for key.json under", key_path) + return "", "", "" def merge_dicts(dict1, dict2): diff --git a/notebook/flaml_autohf.ipynb b/notebook/flaml_autohf.ipynb deleted file mode 100644 index 7985500aa..000000000 --- a/notebook/flaml_autohf.ipynb +++ /dev/null @@ -1,43 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "source": [ - "1. Electra Example" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/setup.py b/setup.py index 167839653..e7e80458a 100644 --- a/setup.py +++ b/setup.py @@ -72,6 +72,12 @@ setuptools.setup( ], "vw": [ "vowpalwabbit", + ], + "nlp": [ + "ray[tune]>=1.2.0", + "transformers", + "datasets==1.4", + "torch" ] }, classifiers=[ diff --git a/test/hf/run_analysis.py b/test/hf/run_analysis.py deleted file mode 100644 index 655780a5a..000000000 --- a/test/hf/run_analysis.py +++ /dev/null @@ -1,75 +0,0 @@ -'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray] -''' -# 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 -import argparse -from flaml.nlp.result_analysis.azure_utils import JobID - - -def create_partial_config_bestnn(): - jobid_config = JobID() - # funnel xlarge - # jobid_config.mod = "bestnn" - jobid_config.spa = "uni" - # jobid_config.arg = "cus" - # jobid_config.alg = "cfo" - jobid_config.pre = "funnel" - jobid_config.presz = "xlarge" - # funnel small - # jobid_config.mod = "list" - # jobid_config.pre = "funnel" - # jobid_config.presz = "small" - # jobid_config.rep = 0 - - # # deberta large - # jobid_config.mod = "bestnn" - # jobid_config.spa = "uni" - # jobid_config.arg = "cus" - # jobid_config.alg = "cfo" - # jobid_config.pre = "deberta" - # jobid_config.presz = "large" - - # # deberta base - # jobid_config.mod = "hpo" - # jobid_config.pre = "deberta" - # jobid_config.presz = "base" - # jobid_config.rep = 0 - - # # deberta large - # jobid_config.mod = "hpo" - # jobid_config.pre = "deberta" - # jobid_config.presz = "large" - - return jobid_config - - -def create_partial_config_list(): - jobid_config = JobID() - jobid_config.mod = "list" - jobid_config.spa = "uni" - jobid_config.presz = "xlarge" - return jobid_config - - -def create_partial_config_hpo(): - jobid_config = JobID() - jobid_config.mod = "hpo" - jobid_config.spa = "uni" - return jobid_config - - -if __name__ == "__main__": - arg_parser = argparse.ArgumentParser() - arg_parser.add_argument('--key_path', type=str, help='key path', required=False, default="../../") - arg_parser.add_argument('--azure_root_log_path', type=str, - help='root log path of blob storage', required=True, default="logs_azure/") - args = arg_parser.parse_args() - - partial_config_large = create_partial_config_bestnn() - from flaml.nlp.result_analysis.generate_result_summary import compare_small_vs_large, get_result, check_conflict, \ - print_cfo, download_validation, extract_roberta_overfitting_configs, extract_electra_overfitting_configs - - # get_result(args, partial_config_large) - # check_conflict(args, [partial_config_large]) - download_validation(args, "/data/xliu127/projects/hyperopt/data/result/") - - # extract_roberta_overfitting_configs(args) diff --git a/test/hf/run_autohf.py b/test/hf/run_autohf.py deleted file mode 100644 index 7d0256949..000000000 --- a/test/hf/run_autohf.py +++ /dev/null @@ -1,285 +0,0 @@ -'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray] -''' -# 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 -import os -import shutil - -from flaml.nlp import AutoTransformers -from flaml.nlp import AzureUtils, JobID -from flaml.nlp.utils import load_console_args - -global azure_log_path -global azure_key - - -def get_resplit_portion(jobid_config): - if jobid_config.dat == ["glue"] and jobid_config.subdat in {"mnli"}: - return {"source": ["train", "validation_matched"], "train": [0, 0.8], "validation": [0.8, 0.9], - "test": [0.9, 1.0]} - else: - return {"source": ["train", "validation"], "train": [0, 0.8], "validation": [0.8, 0.9], "test": [0.9, 1.0]} - - -def get_preparedata_setting(args, jobid_config): - preparedata_setting = { - "server_name": args.server_name, - "data_root_path": args.data_root_dir, - "max_seq_length": 128, - "jobid_config": jobid_config, - "is_wandb_on": True - } - if jobid_config.spt == 'rspt': - preparedata_setting["resplit_portion"] = get_resplit_portion(jobid_config) - if ("albert" == jobid_config.pre and jobid_config.dat == ["squad"]) or \ - ("funnel" in jobid_config.pre and jobid_config.dat[0] in {"imdb", "yelp_review_full", "yelp_polarity", - "amazon_polarity", "amazon_review_multi"}): - preparedata_setting["max_seq_length"] = 512 - if jobid_config.dat[0] == "glue" and jobid_config.subdat == "mnli": - preparedata_setting["fold_name"] = ['train', 'validation_matched', 'test_matched'] - return preparedata_setting - - -def get_autohf_settings(args, **custom_args): - autohf_settings = {"resources_per_trial": {"gpu": 1, "cpu": 1}, - "num_samples": args.sample_num, - "time_budget": args.time_budget, - "ckpt_per_epoch": 1, - } - for other_attr in ["ds_config", "rep_id"]: - if hasattr(args, other_attr): - autohf_settings[other_attr] = getattr(args, other_attr) - else: - autohf_settings[other_attr] = None - if len(custom_args) > 0: - autohf_settings.update(custom_args) - return autohf_settings - - -def rm_home_result(): - from os.path import expanduser - home = expanduser("~") - if os.path.exists(home + "/ray_results/"): - shutil.rmtree(home + "/ray_results/") - - -def get_best_base_config(args, jobid_config, autohf): - import copy - import re - args_small = copy.deepcopy(args) - args_small.algo_name = "optuna" - args_small.search_alg_args_mode = "dft" - args_small.algo_mode = "hpo" - args_small.space_mode = "uni" - args_small.pruner = "None" - - if "funnel" not in args_small.pretrained_model_size: - args_small.algo_mode = "hpo" - else: - args_small.algo_mode = "list" - args_small.sample_num = 10000 - args_small.time_budget = 3600 - args_small.rep_id = 0 - jobid_config_small = JobID(args_small) - if jobid_config_small.pre == "deberta": - jobid_config_small.presz = "base" - else: - jobid_config_small.presz = "small" - jobid_config_small.pre_full = re.sub("(xlarge|large|intermediate)", jobid_config_small.presz, - jobid_config_small.pre_full) - azure_utils_small = AzureUtils( - console_args=args_small, - jobid=jobid_config_small, - autohf=autohf) - preparedata_setting = get_preparedata_setting(args, jobid_config) - autohf.prepare_data(**preparedata_setting) - autohf.set_metric() - - best_config = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)[0] - return best_config - - -def search_base_and_search_lower_lr(args, jobid_config, autohf): - best_config = get_best_base_config(args, jobid_config, autohf) - - import copy - args_large = copy.deepcopy(args) - args_large.time_budget = args.time_budget - 3600 - args_large.sample_num = 100000 - args_large.algo_name = args.algo_name - args_large.search_alg_args_mode = "cus" - args_large.space_mode = "buni" - args_large.pruner = "None" - jobid_config_large = JobID(args_large) - jobid_config_large.presz = jobid_config.presz - jobid_config_large.pre_full = jobid_config.pre_full - azure_utils_large = AzureUtils(console_args=args_large, jobid=jobid_config_large, autohf=autohf) - - _test_hpo(args_large, - jobid_config_large, - autohf, - azure_utils_large, - autohf_settings=get_autohf_settings(args_large, **{"points_to_evaluate": [best_config], - "bound": {"learning_rate": { - "u": best_config["learning_rate"]}}})) - - -def search_base_and_search_around_best(args, jobid_config, autohf): - args.algo_name = "bs" - args.search_alg_args_mode = "dft" - args.spa = "uni" - args.pru = "None" - best_config = get_best_base_config(args, jobid_config, autohf) - - import copy - args_large = copy.deepcopy(args) - args_large.time_budget = args.time_budget - 3600 - args_large.sample_num = 100000 - args_large.algo_name = "cfo" - args_large.search_alg_args_mode = "cus" - args_large.space_mode = "uni" - jobid_config_large = JobID(args_large) - jobid_config_large.presz = jobid_config.presz - jobid_config_large.pre_full = jobid_config.pre_full - azure_utils_large = AzureUtils(console_args=args_large, jobid=jobid_config_large, autohf=autohf) - - _test_hpo(args_large, - jobid_config_large, - autohf, - azure_utils_large, - autohf_settings=get_autohf_settings(args_large, **{"points_to_evaluate": [best_config]})) - - -def evaluate_configs(autohf, args, ranked_all_configs): - import copy - this_args = copy.deepcopy(args) - this_args.time_budget = 100000 - this_args.sample_num = int(len(ranked_all_configs)) - this_args.search_alg_args_mode = "cus" - jobid_config = JobID(this_args) - azure_utils_large = AzureUtils(console_args=this_args, jobid=jobid_config, autohf=autohf) - _test_hpo(this_args, - jobid_config, - autohf, - azure_utils_large, - autohf_settings=get_autohf_settings(this_args, **{"points_to_evaluate": ranked_all_configs})) - - -def convert_config_to_different_size(origin_config, mode): - import re - import copy - if mode == "small": - new_config = copy.deepcopy(origin_config) - if new_config.pre == "funnel": - new_config.mod = "list" - else: - new_config.mod = "hpo" - if new_config.pre == "funnel": - new_config.presz = "small" - else: - new_config.presz = "base" - new_config.pre_full = re.sub("(xlarge|large|intermediate)", new_config.presz, origin_config.pre_full) - elif mode == "large": - new_config = copy.deepcopy(origin_config) - new_config.mod = "hpo" - if new_config.pre == "funnel": - new_config.presz = "xlarge" - new_config.pre_full = re.sub("(small)", "xlarge", origin_config.pre_full) - else: - new_config.presz = "large" - new_config.pre_full = re.sub("(small)", "large", origin_config.pre_full) - - return new_config - - -def evaluate_small_best_configs_on_large(large_args, autohf): - jobid_config_small = convert_config_to_different_size(JobID(large_args), mode="small") - jobid_config_small.rep = 0 - azure_utils_small = AzureUtils(console_args=None, jobid=jobid_config_small, autohf=autohf) - ranked_all_small_configs = azure_utils_small.get_ranked_configs(autohf.metric_mode_name) - evaluate_configs(large_args, ranked_all_small_configs[:int(len(ranked_all_small_configs) / 2)]) - - -def add_dict_item_to_list(this_list, this_dict): - is_exist = len([x for x in this_list if x == this_dict]) > 0 - if not is_exist: - this_list.append(this_dict) - return this_list - - -def evaluate_large_best_configs_on_small(small_args, autohf): - jobid_config_large = convert_config_to_different_size(JobID(small_args), mode="large") - autohf.jobid_config = jobid_config_large - autohf.set_metric() - all_configs_from_large = [] - for rep_id in range(3): - jobid_config_large.rep = rep_id - azure_utils_large = AzureUtils(console_args=small_args, jobid=jobid_config_large, autohf=autohf) - ranked_all_large_configs = azure_utils_large.get_ranked_configs(autohf.metric_mode_name) - for each_config in ranked_all_large_configs: - all_configs_from_large = add_dict_item_to_list(all_configs_from_large, each_config) - jobid_config_small = convert_config_to_different_size(JobID(small_args), mode="small") - jobid_config_small.rep = 0 - azure_utils_small = AzureUtils(console_args=small_args, jobid=jobid_config_small, autohf=autohf) - ranked_all_small_configs = azure_utils_small.get_ranked_configs(autohf.metric_mode_name) - for each_config in ranked_all_small_configs: - all_configs_from_large = add_dict_item_to_list(all_configs_from_large, each_config) - - evaluate_configs(autohf, small_args, list(all_configs_from_large)) - - -def _test_hpo(args, - jobid_config, - autohf, - azure_utils=None, - autohf_settings=None, - ): - try: - if not azure_utils: - azure_utils = AzureUtils(console_args=args, jobid=jobid_config, autohf=autohf) - preparedata_setting = get_preparedata_setting(args, jobid_config) - autohf.prepare_data(**preparedata_setting) - - analysis = validation_metric = test_metric = None - if not autohf_settings: - autohf_settings = get_autohf_settings(args) - if args.algo_mode != "hfhpo": - validation_metric, analysis = autohf.fit(**autohf_settings, ) - else: - autohf.fit_hf(**autohf_settings) - - if jobid_config.spt == "ori": - predictions, test_metric = autohf.predict() - if validation_metric: - test_metric.update({"validation": validation_metric}) - else: - predictions = None - if test_metric: - validation_metric.update({"test": test_metric}) - - if analysis is not None: - json_log = azure_utils.extract_log_from_analysis(analysis) - else: - json_log = None - azure_utils.write_autohf_output(json_log=json_log, - valid_metric=validation_metric, - predictions=predictions, - duration=autohf.last_run_duration) - - except AssertionError: - azure_utils.write_exception() - rm_home_result() - - -if __name__ == "__main__": - autohf = AutoTransformers() - args = load_console_args() - jobid_config = JobID(args) - - if args.algo_mode in ("hpo", "hfhpo", "grid", "gridbert"): - _test_hpo(args, jobid_config, autohf) - elif args.algo_mode == "bestnn": - search_base_and_search_lower_lr(args, jobid_config, autohf) - elif args.algo_mode == "list": - evaluate_small_best_configs_on_large(args, autohf) - elif args.algo_mode == "list_s": - evaluate_large_best_configs_on_small(args, autohf) diff --git a/test/hf/test_cover_azure.py b/test/hf/test_cover_azure.py new file mode 100644 index 000000000..612b2f21a --- /dev/null +++ b/test/hf/test_cover_azure.py @@ -0,0 +1,126 @@ +""" + test suites for covering azure_utils.py +""" + + +def get_preparedata_setting(jobid_config): + preparedata_setting = { + "server_name": "tmdev", + "data_root_path": "data/", + "max_seq_length": 128, + "jobid_config": jobid_config, + "resplit_portion": {"source": ["train", "validation"], + "train": [0, 0.8], + "validation": [0.8, 0.9], + "test": [0.9, 1.0]} + } + return preparedata_setting + + +def get_console_args(): + from flaml.nlp.utils import load_dft_args + args = load_dft_args() + args.dataset_subdataset_name = "glue:mrpc" + args.algo_mode = "hpo" + args.space_mode = "uni" + args.search_alg_args_mode = "dft" + args.algo_name = "bs" + args.pruner = "None" + args.pretrained_model_size = "google/electra-base-discriminator:base" + args.resplit_mode = "rspt" + args.rep_id = 0 + args.seed_data = 43 + args.seed_transformers = 42 + return args + + +def test_get_configblob_from_partial_jobid(): + try: + import ray + except ImportError: + return + + from flaml.nlp.result_analysis.azure_utils import JobID + each_blob_name = "dat=glue_subdat=cola_mod=grid_spa=cus_arg=dft_alg=grid" \ + "_pru=None_pre=deberta_presz=large_spt=rspt_rep=0_sddt=43" \ + "_sdhf=42_var1=1e-05_var2=0.0.json" + partial_jobid = JobID() + partial_jobid.pre = "deberta" + partial_jobid.mod = "grid" + partial_jobid.spa = "cus" + partial_jobid.presz = "large" + + each_jobconfig = JobID.convert_blobname_to_jobid(each_blob_name) + each_jobconfig.is_match(partial_jobid) + + partial_jobid = JobID() + partial_jobid.pre = "deberta" + partial_jobid.mod = "hpo" + partial_jobid.spa = "cus" + partial_jobid.presz = "large" + partial_jobid.sddt = None + + each_jobconfig = JobID.convert_blobname_to_jobid(each_blob_name) + each_jobconfig.is_match(partial_jobid) + + +def test_jobid(): + try: + import ray + except ImportError: + return + + from flaml.nlp.result_analysis.azure_utils import JobID + args = get_console_args() + + jobid_config = JobID(args) + jobid_config.to_partial_jobid_string() + JobID.convert_blobname_to_jobid("test") + JobID.dataset_list_to_str("glue") + JobID.get_full_data_name(["glue"], "mrpc") + JobID._extract_model_type_with_keywords_match("google/electra-base-discriminator:base") + + jobid_config.to_wandb_string() + + +def test_azureutils(): + try: + import ray + except ImportError: + return + + from flaml.nlp.result_analysis.azure_utils import AzureUtils, ConfigScore, JobID, ConfigScoreList + from flaml.nlp import AutoTransformers + + args = get_console_args() + args.key_path = "." + jobid_config = JobID(args) + autohf = AutoTransformers() + autohf.jobid_config = jobid_config + + preparedata_setting = get_preparedata_setting(jobid_config) + autohf.prepare_data(**preparedata_setting) + + each_configscore = ConfigScore(trial_id="test", start_time=0.0, last_update_time=0.0, + config={}, metric_score={"max": 0.0}, time_stamp=0.0) + configscore_list = ConfigScoreList([each_configscore]) + for each_method in ["unsorted", "sort_time", "sort_accuracy"]: + configscore_list.sorted(each_method) + configscore_list.get_best_config() + + azureutils = AzureUtils(console_args=args, autohf=autohf) + azureutils.autohf = autohf + azureutils.root_log_path = "logs_azure/" + + azureutils.write_autohf_output(configscore_list=[each_configscore], + valid_metric={}, + predictions=[], + duration=0) + + azureutils.get_config_and_score_from_partial_jobid(root_log_path="data/", partial_jobid=jobid_config) + + +if __name__ == "__main__": + test_get_configblob_from_partial_jobid() + test_jobid() + test_azureutils() diff --git a/test/hf/test_cover_other.py b/test/hf/test_cover_other.py new file mode 100644 index 000000000..90efabbd9 --- /dev/null +++ b/test/hf/test_cover_other.py @@ -0,0 +1,214 @@ +""" + test suites for covering other functions +""" + +from transformers import AutoConfig +from flaml.nlp.huggingface.trainer import TrainerForAutoTransformers + + +def get_console_args(): + from flaml.nlp.utils import load_dft_args + args = load_dft_args() + args.dataset_subdataset_name = "glue:mrpc" + args.algo_mode = "hpo" + args.space_mode = "uni" + args.search_alg_args_mode = "dft" + args.algo_name = "bs" + args.pruner = "None" + args.pretrained_model_size = "google/electra-base-discriminator:base" + args.resplit_mode = "rspt" + args.rep_id = 0 + args.seed_data = 43 + args.seed_transformers = 42 + return args + + +def model_init(): + from flaml.nlp.result_analysis.azure_utils import JobID + jobid_config = JobID() + jobid_config.set_unittest_config() + from flaml.nlp import AutoTransformers + autohf = AutoTransformers() + + preparedata_setting = get_preparedata_setting(jobid_config) + autohf.prepare_data(**preparedata_setting) + return autohf._load_model() + + +def get_preparedata_setting(jobid_config): + preparedata_setting = { + "server_name": "tmdev", + "data_root_path": "data/", + "max_seq_length": 128, + "jobid_config": jobid_config, + "resplit_portion": {"source": ["train", "validation"], + "train": [0, 0.8], + "validation": [0.8, 0.9], + "test": [0.9, 1.0]} + } + return preparedata_setting + + +def test_dataprocess(): + """ + test to increase the coverage for flaml.nlp.dataprocess_auto + """ + try: + import ray + except ImportError: + return + + from flaml.nlp import AutoTransformers + from flaml.nlp import JobID + from flaml.nlp.dataset.dataprocess_auto import TOKENIZER_MAPPING + + jobid_config = JobID() + jobid_config.set_unittest_config() + autohf = AutoTransformers() + + dataset_name = JobID.dataset_list_to_str(jobid_config.dat) + default_func = TOKENIZER_MAPPING[(dataset_name, jobid_config.subdat)] + + funcs_to_eval = set([(dat, subdat) for (dat, subdat) in TOKENIZER_MAPPING.keys() + if TOKENIZER_MAPPING[(dat, subdat)] != default_func]) + + for (dat, subdat) in funcs_to_eval: + print("loading dataset for {}, {}".format(dat, subdat)) + jobid_config.dat = dat.split(",") + jobid_config.subdat = subdat + + preparedata_setting = get_preparedata_setting(jobid_config) + autohf.prepare_data(**preparedata_setting) + + +def test_gridsearch_space(): + try: + import ray + except ImportError: + return + + from flaml.nlp.hpo.grid_searchspace_auto import GRID_SEARCH_SPACE_MAPPING, AutoGridSearchSpace + from flaml.nlp.result_analysis.azure_utils import JobID + jobid_config = JobID() + jobid_config.set_unittest_config() + + for each_model_type in GRID_SEARCH_SPACE_MAPPING.keys(): + AutoGridSearchSpace.from_model_and_dataset_name( + each_model_type, + "base", + jobid_config.dat, + jobid_config.subdat, "hpo") + + +def test_hpo_space(): + try: + import ray + except ImportError: + return + + from flaml.nlp.hpo.hpo_searchspace import AutoHPOSearchSpace, HPO_SEARCH_SPACE_MAPPING + from flaml.nlp.result_analysis.azure_utils import JobID + jobid_config = JobID() + jobid_config.set_unittest_config() + + for spa in HPO_SEARCH_SPACE_MAPPING.keys(): + jobid_config.spa = spa + if jobid_config.spa == "cus": + custom_hpo_args = {"hpo_space": {"learning_rate": [1e-5]}} + elif jobid_config.spa == "buni": + best_config = {"learning_rate": 1e-5} + custom_hpo_args = {"points_to_evaluate": [best_config], + "bound": {"learning_rate": {"u": best_config["learning_rate"]}}} + else: + custom_hpo_args = {} + + AutoHPOSearchSpace.from_model_and_dataset_name(jobid_config.spa, jobid_config.pre, jobid_config.presz, + jobid_config.dat, jobid_config.subdat, **custom_hpo_args) + + +def test_trainer(): + try: + import ray + except ImportError: + return + + num_train_epochs = 3 + num_train_examples = 100 + per_device_train_batch_size = 32 + device_count = 1 + max_steps = 1000 + warmup_steps = 100 + warmup_ratio = 0.1 + trainer = TrainerForAutoTransformers(model_init=model_init) + trainer.convert_num_train_epochs_to_max_steps(num_train_epochs, + num_train_examples, + per_device_train_batch_size, + device_count) + trainer.convert_max_steps_to_num_train_epochs(max_steps, + num_train_examples, + per_device_train_batch_size, + device_count) + trainer.convert_warmup_ratio_to_warmup_steps(warmup_ratio, + max_steps=max_steps, + num_train_epochs=num_train_epochs, + num_train_examples=num_train_examples, + per_device_train_batch_size=per_device_train_batch_size, + device_count=device_count) + trainer.convert_warmup_steps_to_warmup_ratio(warmup_steps, + num_train_epochs, + num_train_examples, + per_device_train_batch_size, + device_count) + + +def test_switch_head(): + try: + import ray + except ImportError: + return + + from flaml.nlp.huggingface.switch_head_auto import AutoSeqClassificationHead, MODEL_CLASSIFICATION_HEAD_MAPPING + from flaml.nlp.result_analysis.azure_utils import JobID + jobid_config = JobID() + jobid_config.set_unittest_config() + checkpoint_path = jobid_config.pre_full + + model_config = AutoConfig.from_pretrained( + checkpoint_path, + num_labels=AutoConfig.from_pretrained(checkpoint_path).num_labels) + + for model in list(MODEL_CLASSIFICATION_HEAD_MAPPING.keys()): + jobid_config.pre = model + AutoSeqClassificationHead \ + .from_model_type_and_config(jobid_config.pre, + model_config) + + +def test_wandb_utils(): + try: + import ray + except ImportError: + return + + from flaml.nlp.result_analysis.wandb_utils import WandbUtils + from flaml.nlp.result_analysis.azure_utils import JobID + import os + + args = get_console_args() + args.key_path = "." + jobid_config = JobID(args) + + wandb_utils = WandbUtils(is_wandb_on=True, console_args=args, jobid_config=jobid_config) + os.environ["WANDB_MODE"] = "online" + wandb_utils.wandb_group_name = "test" + wandb_utils._get_next_trial_ids() + wandb_utils.set_wandb_per_run() + + +if __name__ == "__main__": + test_wandb_utils() + test_dataprocess() + test_gridsearch_space() + test_hpo_space() + test_trainer() + test_switch_head() diff --git a/test/hf/test_mobilebert.py b/test/hf/test_mobilebert.py index dba768a76..35a843e0a 100644 --- a/test/hf/test_mobilebert.py +++ b/test/hf/test_mobilebert.py @@ -1,7 +1,5 @@ '''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray] ''' -# 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 - global azure_log_path global azure_key @@ -26,6 +24,7 @@ def get_autohf_settings(): "time_budget": 100000, "ckpt_per_epoch": 1, "fp16": False, + "ray_local_mode": True } return autohf_settings @@ -38,24 +37,38 @@ def test_hpo(): from flaml.nlp import AutoTransformers from flaml.nlp import JobID + from flaml.nlp import AzureUtils jobid_config = JobID() jobid_config.set_unittest_config() autohf = AutoTransformers() - try: - preparedata_setting = get_preparedata_setting(jobid_config) - autohf.prepare_data(**preparedata_setting) + preparedata_setting = get_preparedata_setting(jobid_config) + autohf.prepare_data(**preparedata_setting) - autohf_settings = get_autohf_settings() - validation_metric, analysis = autohf.fit(**autohf_settings, ) + autohf_settings = get_autohf_settings() + autohf_settings["points_to_evaluate"] = [{"learning_rate": 2e-5}] + validation_metric, analysis = autohf.fit(**autohf_settings) - predictions, test_metric = autohf.predict() - if test_metric: - validation_metric.update({"test": test_metric}) + predictions, test_metric = autohf.predict() + if test_metric: + validation_metric.update({"test": test_metric}) - except AssertionError: - pass + azure_utils = AzureUtils(root_log_path="logs_test/", autohf=autohf) + azure_utils._azure_key = "test" + azure_utils._container_name = "test" + + configscore_list = azure_utils.extract_configscore_list_from_analysis(analysis) + azure_utils.write_autohf_output(configscore_list=configscore_list, + valid_metric=validation_metric, + predictions=predictions, + duration=autohf.last_run_duration) + + jobid_config.mod = "grid" + autohf = AutoTransformers() + + preparedata_setting = get_preparedata_setting(jobid_config) + autohf.prepare_data(**preparedata_setting) if __name__ == "__main__":