From e0e317bfb1bef6dbaa267ffd6c37f4ecd4f502b1 Mon Sep 17 00:00:00 2001 From: Xueqing Liu Date: Fri, 3 Jun 2022 15:19:22 -0400 Subject: [PATCH] fixing trainable and update function, completing NOTE (#566) * fix checkpoint naming + trial id for non-ray mode, fix the bug in running test mode, delete all the checkpoints in non-ray mode * finished testing for checkpoint naming, delete checkpoint, ray, max iter = 1 --- flaml/automl.py | 121 +++++++++++++++++--------------- flaml/ml.py | 1 - test/automl/test_constraints.py | 2 +- 3 files changed, 64 insertions(+), 60 deletions(-) diff --git a/flaml/automl.py b/flaml/automl.py index eb4e19619..564470795 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -237,7 +237,7 @@ class SearchState: if self.base_eci is None: self.base_eci = time_used - if (obj is not None) and (self.best_loss is None or obj < self.best_loss): + if (obj is not None) and (obj < self.best_loss): self.best_loss_old = self.best_loss if self.best_loss < np.inf else 2 * obj self.best_loss = obj self.best_result = result @@ -286,7 +286,7 @@ class AutoMLState: sampled_y_train = self.y_train[:sample_size] weight = self.fit_kwargs.get( "sample_weight" - ) # NOTE: _prepare_sample_train_data is before + ) # NOTE: _prepare_sample_train_data is before kwargs is updated to fit_kwargs_by_estimator if weight is not None: sampled_weight = weight[:sample_size] if self.groups is not None: @@ -296,7 +296,7 @@ class AutoMLState: sampled_y_train = self.y_train_all if ( "sample_weight" in self.fit_kwargs - ): # NOTE: _prepare_sample_train_data is before + ): # NOTE: _prepare_sample_train_data is before kwargs is updated to fit_kwargs_by_estimator sampled_weight = self.sample_weight_all if self.groups is not None: groups = self.groups_all @@ -311,7 +311,7 @@ class AutoMLState: this_estimator_kwargs = state.fit_kwargs_by_estimator.get( estimator - ).copy() # NOTE: _compute_with_config_base is after + ).copy() # NOTE: _compute_with_config_base is after kwargs is updated to fit_kwargs_by_estimator ( sampled_X_train, sampled_y_train, @@ -398,7 +398,7 @@ class AutoMLState: this_estimator_kwargs = self.fit_kwargs_by_estimator.get( estimator - ).copy() # NOTE: _train_with_config is after + ).copy() # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator ( sampled_X_train, sampled_y_train, @@ -408,14 +408,14 @@ class AutoMLState: if sampled_weight is not None: weight = this_estimator_kwargs[ "sample_weight" - ] # NOTE: _train_with_config is after + ] # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator this_estimator_kwargs[ "sample_weight" - ] = sampled_weight # NOTE: _train_with_config is after + ] = sampled_weight # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator if groups is not None: this_estimator_kwargs[ "groups" - ] = groups # NOTE: _train_with_config is after + ] = groups # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator budget = ( None @@ -432,14 +432,14 @@ class AutoMLState: n_jobs=self.n_jobs, estimator_class=self.learner_classes.get(estimator), budget=budget, - fit_kwargs=this_estimator_kwargs, # NOTE: _train_with_config is after + fit_kwargs=this_estimator_kwargs, # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator eval_metric=self.metric if hasattr(self, "metric") else "train_time", ) if sampled_weight is not None: this_estimator_kwargs[ "sample_weight" - ] = weight # NOTE: _train_with_config is after + ] = weight # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator return estimator, train_time @@ -626,10 +626,12 @@ class AutoML(BaseEstimator): augment rare classes. min_sample_size: int, default=MIN_SAMPLE_TRAIN | the minimal sample size when sample=True. - use_ray: boolean, default=False | Whether to use ray to run the training + use_ray: boolean or dict. + If boolean: default=False | Whether to use ray to run the training in separate processes. This can be used to prevent OOM for large - datasets, but will incur more overhead in time. Only use it if - you run into OOM failures. + datasets, but will incur more overhead in time. + If dict: the dict contains the keywords arguments to be passed to + [ray.tune.run](https://docs.ray.io/en/latest/tune/api_docs/execution.html). metric_constraints: list, default=[] | The list of metric constraints. Each element in this list is a 3-tuple, which shall be expressed in the following format: the first element of the 3-tuple is the name of the @@ -1103,7 +1105,7 @@ class AutoML(BaseEstimator): self._sample_weight_full = self._state.fit_kwargs.get( "sample_weight" - ) # NOTE: _validate_data is before, + ) # NOTE: _validate_data is before kwargs is updated to fit_kwargs_by_estimator if X_val is not None and y_val is not None: assert ( isinstance(X_val, np.ndarray) @@ -1164,7 +1166,7 @@ class AutoML(BaseEstimator): self._state.task in CLASSIFICATION and self._auto_augment and self._state.fit_kwargs.get("sample_weight") - is None # NOTE: _prepare_data is before + is None # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator and self._split_type in ["stratified", "uniform"] and self._state.task != TOKENCLASSIFICATION ): @@ -1208,7 +1210,9 @@ class AutoML(BaseEstimator): ) self._state.fit_kwargs[ "sample_weight" - ] = self._state.sample_weight_all # NOTE: _prepare_data is before + ] = ( + self._state.sample_weight_all + ) # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator else: X_train_all, y_train_all = shuffle( X_train_all, y_train_all, random_state=RANDOM_SEED @@ -1227,7 +1231,7 @@ class AutoML(BaseEstimator): num_samples = X_train_all.shape[0] period = self._state.fit_kwargs[ "period" - ] # NOTE: _prepare_data is before + ] # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator assert ( period < num_samples ), f"period={period}>#examples={num_samples}" @@ -1239,7 +1243,7 @@ class AutoML(BaseEstimator): else: if ( "sample_weight" in self._state.fit_kwargs - ): # NOTE: _prepare_data is before + ): # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator ( X_train, X_val, @@ -1247,14 +1251,14 @@ class AutoML(BaseEstimator): y_val, self._state.fit_kwargs[ "sample_weight" - ], # NOTE: _prepare_data is before + ], # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator self._state.weight_val, ) = train_test_split( X_train_all, y_train_all, self._state.fit_kwargs[ "sample_weight" - ], # NOTE: _prepare_data is before + ], # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator test_size=split_ratio, shuffle=False, ) @@ -1297,7 +1301,7 @@ class AutoML(BaseEstimator): stratify = y_rest if self._split_type == "stratified" else None if ( "sample_weight" in self._state.fit_kwargs - ): # NOTE: _prepare_data is before + ): # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator ( X_train, X_val, @@ -1310,17 +1314,17 @@ class AutoML(BaseEstimator): y_rest, self._state.fit_kwargs["sample_weight"][ rest - ], # NOTE: _prepare_data is before + ], # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator test_size=split_ratio, random_state=RANDOM_SEED, ) weight1 = self._state.fit_kwargs["sample_weight"][ first - ] # NOTE: _prepare_data is before + ] # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator self._state.weight_val = concat(weight1, weight_val) self._state.fit_kwargs[ "sample_weight" - ] = concat( # NOTE: _prepare_data is before + ] = concat( # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator weight1, weight_train ) else: @@ -1346,7 +1350,7 @@ class AutoML(BaseEstimator): elif self._state.task in REGRESSION: if ( "sample_weight" in self._state.fit_kwargs - ): # NOTE: _prepare_data is before + ): # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator ( X_train, X_val, @@ -1354,14 +1358,14 @@ class AutoML(BaseEstimator): y_val, self._state.fit_kwargs[ "sample_weight" - ], # NOTE: _prepare_data is before + ], # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator self._state.weight_val, ) = train_test_split( X_train_all, y_train_all, self._state.fit_kwargs[ "sample_weight" - ], # NOTE: _prepare_data is before + ], # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator test_size=split_ratio, random_state=RANDOM_SEED, ) @@ -1409,7 +1413,7 @@ class AutoML(BaseEstimator): if self._state.task in TS_FORECAST: period = self._state.fit_kwargs[ "period" - ] # NOTE: _prepare_data is before + ] # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator if period * (n_splits + 1) > y_train_all.size: n_splits = int(y_train_all.size / period - 1) assert n_splits >= 2, ( @@ -1716,7 +1720,7 @@ class AutoML(BaseEstimator): assert isinstance( self._state.fit_kwargs.get("period"), - int, # NOTE: _decide_split_type is before + int, # NOTE: _decide_split_type is before kwargs is updated to fit_kwargs_by_estimator ), f"missing a required integer 'period' for '{TS_FORECAST}' task." elif self._state.task == "rank": assert ( @@ -1897,32 +1901,14 @@ class AutoML(BaseEstimator): @property def trainable(self) -> Callable[[dict], Optional[float]]: """Training function. - Returns: A function that evaluates each config and returns the loss. """ self._state.time_from_start = 0 - for estimator in self.estimator_list: - search_state = self._search_states[estimator] - if not hasattr(search_state, "training_function"): - if self._use_ray is not False: - from ray.tune import with_parameters - - search_state.training_function = with_parameters( - AutoMLState._compute_with_config_base, - state=self._state, - estimator=estimator, - ) - else: - search_state.training_function = partial( - AutoMLState._compute_with_config_base, - state=self._state, - estimator=estimator, - ) states = self._search_states mem_res = self._mem_thres - def train(config: dict): + def train(config: dict, state): sample_size = config.get("FLAML_sample_size") config = config.get("ml", config).copy() @@ -1932,18 +1918,33 @@ class AutoML(BaseEstimator): # check memory constraints before training if states[estimator].learner_class.size(config) <= mem_res: del config["learner"] - result = states[estimator].training_function(config) - return result + result = AutoMLState._compute_with_config_base( + config, state=state, estimator=estimator + ) else: - return { + # If search algorithm is not in flaml, it does not handle the config constraint, should also tune.report before return + result = { "pred_time": 0, "wall_clock_time": None, "metric_for_logging": np.inf, "val_loss": np.inf, "trained_estimator": None, } + tune.report(**result) + return result - return train + if self._use_ray is not False: + from ray.tune import with_parameters + + return with_parameters( + train, + state=self._state, + ) + else: + return partial( + train, + state=self._state, + ) @property def metric_constraints(self) -> list: @@ -2180,10 +2181,12 @@ class AutoML(BaseEstimator): augment rare classes. min_sample_size: int, default=MIN_SAMPLE_TRAIN | the minimal sample size when sample=True. - use_ray: boolean or dict + use_ray: boolean or dict. If boolean: default=False | Whether to use ray to run the training in separate processes. This can be used to prevent OOM for large datasets, but will incur more overhead in time. + If dict: the dict contains the keywords arguments to be passed to + [ray.tune.run](https://docs.ray.io/en/latest/tune/api_docs/execution.html). metric_constraints: list, default=[] | The list of metric constraints. Each element in this list is a 3-tuple, which shall be expressed in the following format: the first element of the 3-tuple is the name of the @@ -2565,7 +2568,7 @@ class AutoML(BaseEstimator): this_estimator_kwargs = this_estimator_kwargs.copy() this_estimator_kwargs.update( self._state.fit_kwargs - ) # update the shallow copy + ) # update the shallow copy of fit_kwargs to fit_kwargs_by_estimator self._state.fit_kwargs_by_estimator[ estimator_name ] = this_estimator_kwargs # set self._state.fit_kwargs_by_estimator[estimator_name] to the update, so only self._state.fit_kwargs_by_estimator will be updated @@ -2579,7 +2582,9 @@ class AutoML(BaseEstimator): data_size=self._state.data_size, task=self._state.task, starting_point=starting_points.get(estimator_name), - period=self._state.fit_kwargs.get("period"), # NOTE: this is after + period=self._state.fit_kwargs.get( + "period" + ), # NOTE: this is after kwargs is updated to fit_kwargs_by_estimator custom_hp=custom_hp and custom_hp.get(estimator_name), max_iter=max_iter, ) @@ -2643,7 +2648,7 @@ class AutoML(BaseEstimator): self._sample_weight_full, self._state.fit_kwargs_by_estimator, self._state.fit_kwargs, - ) # NOTE: this is after + ) # NOTE: this is after kwargs is updated to fit_kwargs_by_estimator del self._state.groups, self._state.groups_all, self._state.groups_val logger.setLevel(old_level) @@ -3257,7 +3262,7 @@ class AutoML(BaseEstimator): stacker.fit( self._X_train_all, self._y_train_all, - **sample_weight_dict, # NOTE: _search is after + **sample_weight_dict, # NOTE: _search is after kwargs is updated to fit_kwargs_by_estimator ) logger.info(f"ensemble: {stacker}") self._trained_estimator = stacker @@ -3276,7 +3281,7 @@ class AutoML(BaseEstimator): stacker.fit( self._X_train_all, self._y_train_all, - **sample_weight_dict, # NOTE: _search is after + **sample_weight_dict, # NOTE: _search is after kwargs is updated to fit_kwargs_by_estimator ) logger.info(f"ensemble: {stacker}") self._trained_estimator = stacker diff --git a/flaml/ml.py b/flaml/ml.py index 092a02565..81301db88 100644 --- a/flaml/ml.py +++ b/flaml/ml.py @@ -420,7 +420,6 @@ def get_val_loss( # fit_kwargs['groups_val'] = groups_val # fit_kwargs['X_val'] = X_val # fit_kwargs['y_val'] = y_val - estimator.fit(X_train, y_train, budget, **fit_kwargs) val_loss, metric_for_logging, pred_time, _ = _eval_estimator( config, diff --git a/test/automl/test_constraints.py b/test/automl/test_constraints.py index 53ce52127..a47be3078 100644 --- a/test/automl/test_constraints.py +++ b/test/automl/test_constraints.py @@ -21,7 +21,7 @@ def test_metric_constraints(): "log_type": "all", "retrain_full": "budget", "keep_search_state": True, - "time_budget": 1, + "time_budget": 2, "pred_time_limit": 5.1e-05, } from sklearn.externals._arff import ArffException