metric constraint (#90 )

* penalty change * metric modification * catboost init
constraints (#88 )
2026-02-18 06:32:25 +08:00 · 2021-05-22 08:51:38 -07:00 · 2021-05-18 15:57:42 -07:00 · 2021-05-07 19:50:50 -07:00 · 2021-05-06 21:29:38 -07:00
16 changed files with 1802 additions and 759 deletions
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -922,6 +922,7 @@ class AutoML:
        # set up learner search space
        for estimator_name in estimator_list:
            estimator_class = self._state.learner_classes[estimator_name]
+            estimator_class.init()
            self._search_states[estimator_name] = SearchState(
                learner_class=estimator_class,
                data_size=self._state.data_size, task=self._state.task,
@@ -1036,9 +1037,8 @@ class AutoML:
                        prune_attr=prune_attr,
                        min_resource=min_resource,
                        max_resource=max_resource,
-                        resources_per_trial={"cpu": self._state.n_jobs,
-                                             "mem": self._mem_thres},
-                        mem_size=learner_class.size)
+                        config_constraints=[(learner_class.size, '<=', self._mem_thres)]
+                    )
                else:
                    algo = SearchAlgo(
                        metric='val_loss', mode='min', space=search_space,
--- a/flaml/data.py
+++ b/flaml/data.py
@@ -237,8 +237,8 @@ class DataTransformer:
                    SimpleImputer(missing_values=np.nan, strategy='median'),
                    X_num.columns)])
                X[num_columns] = self.transformer.fit_transform(X_num)
-            self._cat_columns, self._num_columns, self._datetime_columns = cat_columns, \
-                                                                           num_columns, datetime_columns
+            self._cat_columns, self._num_columns, self._datetime_columns = \
+                cat_columns, num_columns, datetime_columns
            self._drop = drop

        if task == 'regression':
@@ -275,4 +275,3 @@ class DataTransformer:
                    X_num.columns = range(X_num.shape[1])
                X[num_columns] = self.transformer.transform(X_num)
        return X
-
--- a/flaml/ml.py
+++ b/flaml/ml.py
@@ -89,9 +89,11 @@ def sklearn_metric_loss_score(
        score = log_loss(
            y_true, y_predict, labels=labels, sample_weight=sample_weight)
    elif 'micro_f1' in metric_name:
-        score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight, average='micro')
+        score = 1 - f1_score(
+            y_true, y_predict, sample_weight=sample_weight, average='micro')
    elif 'macro_f1' in metric_name:
-        score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight, average='macro')
+        score = 1 - f1_score(
+            y_true, y_predict, sample_weight=sample_weight, average='macro')
    elif 'f1' in metric_name:
        score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight)
    elif 'ap' in metric_name:
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -124,8 +124,7 @@ class BaseEstimator:
                class j
        '''
        if 'regression' in self._task:
-            print('Regression tasks do not support predict_prob')
-            raise ValueError
+            raise ValueError('Regression tasks do not support predict_prob')
        else:
            X_test = self._preprocess(X_test)
            return self._model.predict_proba(X_test)
@@ -164,6 +163,11 @@ class BaseEstimator:
        '''[optional method] relative cost compared to lightgbm'''
        return 1.0

+    @classmethod
+    def init(cls):
+        '''[optional method] initialize the class'''
+        pass
+

 class SKLearnEstimator(BaseEstimator):

@@ -633,6 +637,11 @@ class CatBoostEstimator(BaseEstimator):
    def cost_relative2lgbm(cls):
        return 15

+    @classmethod
+    def init(cls):
+        CatBoostEstimator._time_per_iter = None
+        CatBoostEstimator._train_size = 0
+
    def __init__(
        self, task='binary:logistic', n_jobs=1,
        n_estimators=8192, learning_rate=0.1, early_stopping_rounds=4, **params
--- a/flaml/searcher/blendsearch.py
+++ b/flaml/searcher/blendsearch.py
@@ -27,6 +27,8 @@ class BlendSearch(Searcher):
    '''

    cost_attr = "time_total_s"  # cost attribute in result
+    lagrange = '_lagrange'      # suffix for lagrange-modified metric
+    penalty = 1e+10             # penalty term for constraints

    def __init__(self,
                 metric: Optional[str] = None,
@@ -39,9 +41,11 @@ class BlendSearch(Searcher):
                 min_resource: Optional[float] = None,
                 max_resource: Optional[float] = None,
                 reduction_factor: Optional[float] = None,
-                 resources_per_trial: Optional[dict] = None,
                 global_search_alg: Optional[Searcher] = None,
-                 mem_size: Callable[[dict], float] = None,
+                 config_constraints: Optional[
+                     List[Tuple[Callable[[dict], float], str, float]]] = None,
+                 metric_constraints: Optional[
+                     List[Tuple[str, str, float]]] = None,
                 seed: Optional[int] = 20):
        '''Constructor

@@ -82,19 +86,33 @@ class BlendSearch(Searcher):
                prune_attr; only valid if prune_attr is not in space.
            reduction_factor: A float of the reduction factor used for
                incremental pruning.
-            resources_per_trial: A dictionary of the resources permitted per
-                trial, such as 'mem'.
            global_search_alg: A Searcher instance as the global search
                instance. If omitted, Optuna is used. The following algos have
                known issues when used as global_search_alg:
                - HyperOptSearch raises exception sometimes
                - TuneBOHB has its own scheduler
-            mem_size: A function to estimate the memory size for a given config.
+            config_constraints: A list of config constraints to be satisfied.
+                e.g.,
+
+                .. code-block: python
+
+                    config_constraints = [(mem_size, '<=', 1024**3)]
+
+                mem_size is a function which produces a float number for the bytes
+                needed for a config.
+                It is used to skip configs which do not fit in memory.
+            metric_constraints: A list of metric constraints to be satisfied.
+                e.g., `['precision', '>=', 0.9]`
            seed: An integer of the random seed.
        '''
        self._metric, self._mode = metric, mode
        init_config = low_cost_partial_config or {}
        self._points_to_evaluate = points_to_evaluate or []
+        self._config_constraints = config_constraints
+        self._metric_constraints = metric_constraints
+        if self._metric_constraints:
+            # metric modified by lagrange
+            metric += self.lagrange
        if global_search_alg is not None:
            self._gs = global_search_alg
        elif getattr(self, '__name__', None) != 'CFO':
@@ -104,10 +122,6 @@ class BlendSearch(Searcher):
        self._ls = LocalSearch(
            init_config, metric, mode, cat_hp_cost, space,
            prune_attr, min_resource, max_resource, reduction_factor, seed)
-        self._resources_per_trial = resources_per_trial
-        self._mem_size = mem_size
-        self._mem_threshold = resources_per_trial.get(
-            'mem') if resources_per_trial else None
        self._init_search()

    def set_search_properties(self,
@@ -122,6 +136,11 @@ class BlendSearch(Searcher):
        else:
            if metric:
                self._metric = metric
+                if self._metric_constraints:
+                    # metric modified by lagrange
+                    metric += self.lagrange
+                    # TODO: don't change metric for global search methods that
+                    # can handle constraints already
            if mode:
                self._mode = mode
            self._ls.set_search_properties(metric, mode, config)
@@ -147,6 +166,13 @@ class BlendSearch(Searcher):
        self._gs_admissible_max = self._ls_bound_max.copy()
        self._result = {}  # config_signature: tuple -> result: Dict
        self._deadline = np.inf
+        if self._metric_constraints:
+            self._metric_constraint_satisfied = False
+            self._metric_constraint_penalty = [
+                self.penalty for _ in self._metric_constraints]
+        else:
+            self._metric_constraint_satisfied = True
+            self._metric_constraint_penalty = None

    def save(self, checkpoint_path: str):
        save_object = self
@@ -171,9 +197,10 @@ class BlendSearch(Searcher):
        self._points_to_evaluate = state._points_to_evaluate
        self._gs = state._gs
        self._ls = state._ls
-        self._resources_per_trial = state._resources_per_trial
-        self._mem_size = state._mem_size
-        self._mem_threshold = state._mem_threshold
+        self._config_constraints = state._config_constraints
+        self._metric_constraints = state._metric_constraints
+        self._metric_constraint_satisfied = state._metric_constraint_satisfied
+        self._metric_constraint_penalty = state._metric_constraint_penalty

    def restore_from_dir(self, checkpoint_dir: str):
        super.restore_from_dir(checkpoint_dir)
@@ -182,6 +209,29 @@ class BlendSearch(Searcher):
                          error: bool = False):
        ''' search thread updater and cleaner
        '''
+        metric_constraint_satisfied = True
+        if result and not error and self._metric_constraints:
+            # account for metric constraints if any
+            objective = result[self._metric]
+            for i, constraint in enumerate(self._metric_constraints):
+                metric_constraint, sign, threshold = constraint
+                value = result.get(metric_constraint)
+                if value:
+                    # sign is <= or >=
+                    sign_op = 1 if sign == '<=' else -1
+                    violation = (value - threshold) * sign_op
+                    if violation > 0:
+                        # add penalty term to the metric
+                        objective += self._metric_constraint_penalty[
+                            i] * violation * self._ls.metric_op
+                        metric_constraint_satisfied = False
+                        if self._metric_constraint_penalty[i] < self.penalty:
+                            self._metric_constraint_penalty[i] += violation
+            result[self._metric + self.lagrange] = objective
+            if metric_constraint_satisfied and not self._metric_constraint_satisfied:
+                # found a feasible point
+                self._metric_constraint_penalty = [1 for _ in self._metric_constraints]
+            self._metric_constraint_satisfied |= metric_constraint_satisfied
        thread_id = self._trial_proposed_by.get(trial_id)
        if thread_id in self._search_thread_pool:
            self._search_thread_pool[thread_id].on_trial_complete(
@@ -196,23 +246,30 @@ class BlendSearch(Searcher):
                del self._result[self._ls.config_signature(config)]
            else:  # add to result cache
                self._result[self._ls.config_signature(config)] = result
-            # update target metric if improved
-            if (result[self._metric] - self._metric_target) * self._ls.metric_op < 0:
-                self._metric_target = result[self._metric]
-            if not thread_id and self._create_condition(result):
-                # thread creator
-                self._search_thread_pool[self._thread_count] = SearchThread(
-                    self._ls.mode,
-                    self._ls.create(config, result[self._metric], cost=result[
-                        self.cost_attr])
-                )
-                thread_id = self._thread_count
-                self._thread_count += 1
-                self._update_admissible_region(
-                    config, self._ls_bound_min, self._ls_bound_max)
-            # reset admissible region to ls bounding box
-            self._gs_admissible_min.update(self._ls_bound_min)
-            self._gs_admissible_max.update(self._ls_bound_max)
+                # update target metric if improved
+                objective = result[
+                    self._metric + self.lagrange] if self._metric_constraints \
+                    else result[self._metric]
+                if (objective - self._metric_target) * self._ls.metric_op < 0:
+                    self._metric_target = objective
+                if not thread_id and metric_constraint_satisfied \
+                        and self._create_condition(result):
+                    # thread creator
+                    self._search_thread_pool[self._thread_count] = SearchThread(
+                        self._ls.mode,
+                        self._ls.create(
+                            config, objective, cost=result[self.cost_attr])
+                    )
+                    thread_id = self._thread_count
+                    self._thread_count += 1
+                    self._update_admissible_region(
+                        config, self._ls_bound_min, self._ls_bound_max)
+                elif thread_id and not self._metric_constraint_satisfied:
+                    # no point has been found to satisfy metric constraint
+                    self._expand_admissible_region()
+                # reset admissible region to ls bounding box
+                self._gs_admissible_min.update(self._ls_bound_min)
+                self._gs_admissible_max.update(self._ls_bound_max)
        # cleaner
        if thread_id and thread_id in self._search_thread_pool:
            # local search thread
@@ -255,12 +312,15 @@ class BlendSearch(Searcher):
                    break
        if self._search_thread_pool[thread_id].converged:
            todelete.add(thread_id)
-            for key in self._ls_bound_max:
-                self._ls_bound_max[key] += self._ls.STEPSIZE
-                self._ls_bound_min[key] -= self._ls.STEPSIZE
+            self._expand_admissible_region()
        for id in todelete:
            del self._search_thread_pool[id]

+    def _expand_admissible_region(self):
+        for key in self._ls_bound_max:
+            self._ls_bound_max[key] += self._ls.STEPSIZE
+            self._ls_bound_min[key] -= self._ls.STEPSIZE
+
    def _inferior(self, id1: int, id2: int) -> bool:
        ''' whether thread id1 is inferior to id2
        '''
@@ -280,6 +340,8 @@ class BlendSearch(Searcher):
        thread_id = self._trial_proposed_by[trial_id]
        if thread_id not in self._search_thread_pool:
            return
+        if result and self._metric_constraints:
+            result[self._metric + self.lagrange] = result[self._metric]
        self._search_thread_pool[thread_id].on_trial_result(trial_id, result)

    def suggest(self, trial_id: str) -> Optional[Dict]:
@@ -291,6 +353,12 @@ class BlendSearch(Searcher):
                return None
            self._use_rs = False
            config = self._search_thread_pool[choice].suggest(trial_id)
+            if choice and config is None:
+                # local search thread finishes
+                if self._search_thread_pool[choice].converged:
+                    self._expand_admissible_region()
+                    del self._search_thread_pool[choice]
+                return None
            # preliminary check; not checking config validation
            skip = self._should_skip(choice, trial_id, config)
            if skip:
@@ -353,20 +421,26 @@ class BlendSearch(Searcher):
        return config

    def _should_skip(self, choice, trial_id, config) -> bool:
-        ''' if config is None or config's result is known or above mem threshold
+        ''' if config is None or config's result is known or constraints are violated
            return True; o.w. return False
        '''
        if config is None:
            return True
        config_signature = self._ls.config_signature(config)
        exists = config_signature in self._result
-        # check mem constraint
-        if not exists and self._mem_threshold and self._mem_size(
-                config) > self._mem_threshold:
-            self._result[config_signature] = {
-                self._metric: np.inf * self._ls.metric_op, 'time_total_s': 1
-            }
-            exists = True
+        # check constraints
+        if not exists and self._config_constraints:
+            for constraint in self._config_constraints:
+                func, sign, threshold = constraint
+                value = func(config)
+                if (sign == '<=' and value > threshold
+                        or sign == '>=' and value < threshold):
+                    self._result[config_signature] = {
+                        self._metric: np.inf * self._ls.metric_op,
+                        'time_total_s': 1,
+                    }
+                    exists = True
+                    break
        if exists:
            if not self._use_rs:
                result = self._result.get(config_signature)
--- a/flaml/searcher/flow2.py
+++ b/flaml/searcher/flow2.py
@@ -188,12 +188,15 @@ class FLOW2(Searcher):
            self.step = self.step_ub
        # maximal # consecutive no improvements
        self.dir = 2**(self.dim)
-        self._configs = {}  # dict from trial_id to config
+        self._configs = {}  # dict from trial_id to (config, stepsize)
        self._K = 0
-        self._iter_best_config = self.trial_count = 1
+        self._iter_best_config = self.trial_count_proposed = self.trial_count_complete = 1
+        self._num_proposedby_incumbent = 0
        self._reset_times = 0
        # record intermediate trial cost
        self._trial_cost = {}
+        self._same = False  # whether the proposedd config is the same as best_config
+        self._init_phrase = True  # initial phase to increase initial stepsize

    @property
    def step_lower_bound(self) -> float:
@@ -426,20 +429,21 @@ class FLOW2(Searcher):
        '''
        # if better, move, reset num_complete and num_proposed
        # if not better and num_complete >= 2*dim, num_allowed += 2
-        self.trial_count += 1
+        self.trial_count_complete += 1
        if not error and result:
            obj = result.get(self._metric)
            if obj:
                obj *= self.metric_op
                if self.best_obj is None or obj < self.best_obj:
-                    self.best_obj, self.best_config = obj, self._configs[
-                        trial_id]
+                    self.best_obj = obj
+                    self.best_config, self.step = self._configs[trial_id]
                    self.incumbent = self.normalize(self.best_config)
                    self.cost_incumbent = result.get(self.cost_attr)
                    if self._resource:
                        self._resource = self.best_config[self.prune_attr]
                    self._num_complete4incumbent = 0
                    self._cost_complete4incumbent = 0
+                    self._num_proposedby_incumbent = 0
                    self._num_allowed4incumbent = 2 * self.dim
                    self._proposed_by.clear()
                    if self._K > 0:
@@ -447,7 +451,7 @@ class FLOW2(Searcher):
                        self.step *= np.sqrt(self._K / self._oldK)
                    if self.step > self.step_ub:
                        self.step = self.step_ub
-                    self._iter_best_config = self.trial_count
+                    self._iter_best_config = self.trial_count_complete
                    return
        proposed_by = self._proposed_by.get(trial_id)
        if proposed_by == self.incumbent:
@@ -463,11 +467,6 @@ class FLOW2(Searcher):
            if self._num_complete4incumbent == self.dir and (
                    not self._resource or self._resource == self.max_resource):
                # check stuck condition if using max resource
-                if self.step >= self.step_lower_bound:
-                    # decrease step size
-                    self._oldK = self._K if self._K else self._iter_best_config
-                    self._K = self.trial_count + 1
-                    self.step *= np.sqrt(self._oldK / self._K)
                self._num_complete4incumbent -= 2
                if self._num_allowed4incumbent < 2:
                    self._num_allowed4incumbent = 2
@@ -482,7 +481,7 @@ class FLOW2(Searcher):
                obj *= self.metric_op
                if self.best_obj is None or obj < self.best_obj:
                    self.best_obj = obj
-                    config = self._configs[trial_id]
+                    config = self._configs[trial_id][0]
                    if self.best_config != config:
                        self.best_config = config
                        if self._resource:
@@ -491,9 +490,10 @@ class FLOW2(Searcher):
                        self.cost_incumbent = result.get(self.cost_attr)
                        self._cost_complete4incumbent = 0
                        self._num_complete4incumbent = 0
+                        self._num_proposedby_incumbent = 0
                        self._num_allowed4incumbent = 2 * self.dim
                        self._proposed_by.clear()
-                        self._iter_best_config = self.trial_count
+                        self._iter_best_config = self.trial_count_complete
            cost = result.get(self.cost_attr)
            # record the cost in case it is pruned and cost info is lost
            self._trial_cost[trial_id] = cost
@@ -509,18 +509,21 @@ class FLOW2(Searcher):
        2. same resource, move from the incumbent to a random direction
        3. same resource, move from the incumbent to the opposite direction
        '''
+        self.trial_count_proposed += 1
        if self._num_complete4incumbent > 0 and self.cost_incumbent and \
            self._resource and self._resource < self.max_resource and (
                self._cost_complete4incumbent
                >= self.cost_incumbent * self.resource_multiple_factor):
            # consider increasing resource using sum eval cost of complete
            # configs
+            old_resource = self._resource
            self._resource = self._round(
                self._resource * self.resource_multiple_factor)
+            self.cost_incumbent *= self._resource / old_resource
            config = self.best_config.copy()
            config[self.prune_attr] = self._resource
            self._direction_tried = None
-            self._configs[trial_id] = config
+            self._configs[trial_id] = (config, self.step)
            return config
        self._num_allowed4incumbent -= 1
        move = self.incumbent.copy()
@@ -538,7 +541,42 @@ class FLOW2(Searcher):
        self._project(move)
        config = self.denormalize(move)
        self._proposed_by[trial_id] = self.incumbent
-        self._configs[trial_id] = config
+        self._configs[trial_id] = (config, self.step)
+        self._num_proposedby_incumbent += 1
+        if self._init_phrase:
+            if self._direction_tried is None:
+                if self._same:
+                    # check if the new config is different from self.best_config
+                    same = True
+                    for key, value in config.items():
+                        if key not in self.best_config or value != self.best_config[key]:
+                            same = False
+                            break
+                    if same:
+                        # increase step size
+                        self.step += self.STEPSIZE
+                        if self.step > self.step_ub:
+                            self.step = self.step_ub
+            else:
+                # check if the new config is different from self.best_config
+                same = True
+                for key, value in config.items():
+                    if key not in self.best_config or value != self.best_config[key]:
+                        same = False
+                        break
+                self._same = same
+        if self._num_proposedby_incumbent == self.dir and (
+                not self._resource or self._resource == self.max_resource):
+            # check stuck condition if using max resource
+            self._num_proposedby_incumbent -= 2
+            self._init_phrase = False
+            if self.step >= self.step_lower_bound:
+                # decrease step size
+                self._oldK = self._K if self._K else self._iter_best_config
+                self._K = self.trial_count_proposed + 1
+                self.step *= np.sqrt(self._oldK / self._K)
+            else:
+                return None
        return unflatten_dict(config)

    def _project(self, config):
--- a/flaml/tune/tune.py
+++ b/flaml/tune/tune.py
@@ -3,7 +3,7 @@
 * Licensed under the MIT License. See LICENSE file in the
 * project root for license information.
 '''
-from typing import Optional, Union, List, Callable
+from typing import Optional, Union, List, Callable, Tuple
 import datetime
 import time
 try:
@@ -118,7 +118,10 @@ def run(training_function,
        local_dir: Optional[str] = None,
        num_samples: Optional[int] = 1,
        resources_per_trial: Optional[dict] = None,
-        mem_size: Callable[[dict], float] = None,
+        config_constraints: Optional[
+            List[Tuple[Callable[[dict], float], str, float]]] = None,
+        metric_constraints: Optional[
+            List[Tuple[str, str, float]]] = None,
        use_ray: Optional[bool] = False):
    '''The trigger for HPO.

@@ -210,11 +213,19 @@ def run(training_function,
            used; or a local dir to save the tuning log.
        num_samples: An integer of the number of configs to try. Defaults to 1.
        resources_per_trial: A dictionary of the hardware resources to allocate
-            per trial, e.g., `{'mem': 1024**3}`. When not using ray backend,
-            only 'mem' is used as approximate resource constraints
-            (in conjunction with mem_size).
-        mem_size: A function to estimate the memory size for a given config.
+            per trial, e.g., `{'cpu': 1}`. Only valid when using ray backend.
+        config_constraints: A list of config constraints to be satisfied.
+            e.g.,
+
+            .. code-block: python
+
+                config_constraints = [(mem_size, '<=', 1024**3)]
+
+            mem_size is a function which produces a float number for the bytes
+            needed for a config.
            It is used to skip configs which do not fit in memory.
+        metric_constraints: A list of metric constraints to be satisfied.
+            e.g., `['precision', '>=', 0.9]`
        use_ray: A boolean of whether to use ray as the backend
    '''
    global _use_ray
@@ -252,8 +263,8 @@ def run(training_function,
            prune_attr=prune_attr,
            min_resource=min_resource, max_resource=max_resource,
            reduction_factor=reduction_factor,
-            resources_per_trial=resources_per_trial,
-            mem_size=mem_size)
+            config_constraints=config_constraints,
+            metric_constraints=metric_constraints)
    if time_budget_s:
        search_alg.set_search_properties(metric, mode, config={
            'time_budget_s': time_budget_s})
--- a/flaml/version.py
+++ b/flaml/version.py
@@ -1 +1 @@
-__version__ = "0.3.5"
+__version__ = "0.4.0"
--- a/notebook/flaml_automl.ipynb
+++ b/notebook/flaml_automl.ipynb
--- a/notebook/flaml_azureml.ipynb
+++ b/notebook/flaml_azureml.ipynb
--- a/notebook/flaml_finetune_transformer.ipynb
+++ b/notebook/flaml_finetune_transformer.ipynb
--- a/notebook/flaml_lightgbm.ipynb
+++ b/notebook/flaml_lightgbm.ipynb
--- a/notebook/flaml_xgboost.ipynb
+++ b/notebook/flaml_xgboost.ipynb
--- a/test/test_automl.py
+++ b/test/test_automl.py
@@ -70,7 +70,7 @@ class MyRegularizedGreedyForest(SKLearnEstimator):

 def logregobj(preds, dtrain):
    labels = dtrain.get_label()
-    preds = 1.0 / (1.0 + np.exp(-preds)) # transform raw leaf weight
+    preds = 1.0 / (1.0 + np.exp(-preds))  # transform raw leaf weight
    grad = preds - labels
    hess = preds * (1.0 - preds)
    return grad, hess
@@ -81,7 +81,7 @@ class MyXGB1(XGBoostEstimator):
    '''

    def __init__(self, **params):
-        super().__init__(objective=logregobj, **params) 
+        super().__init__(objective=logregobj, **params)


 class MyXGB2(XGBoostEstimator):
@@ -226,37 +226,41 @@ class TestAutoML(unittest.TestCase):

        automl_experiment = AutoML()
        automl_settings = {
-            "time_budget":         2,
-            "metric":              'mse',
-            "task":                'regression',
-            "log_file_name":       "test/datetime_columns.log",
+            "time_budget": 2,
+            "metric": 'mse',
+            "task": 'regression',
+            "log_file_name": "test/datetime_columns.log",
            "log_training_metric": True,
-            "n_jobs":              1,
-            "model_history":       True
+            "n_jobs": 1,
+            "model_history": True
        }

        fake_df = pd.DataFrame({'A': [datetime(1900, 2, 3), datetime(1900, 3, 4)]})
        y = np.array([0, 1])
-        automl_experiment.fit(X_train=fake_df, X_val=fake_df, y_train=y, y_val=y, **automl_settings)
+        automl_experiment.fit(
+            X_train=fake_df, X_val=fake_df, y_train=y, y_val=y, **automl_settings)

        y_pred = automl_experiment.predict(fake_df)
+        print(y_pred)

    def test_micro_macro_f1(self):
        automl_experiment = AutoML()
        automl_experiment_macro = AutoML()

        automl_settings = {
-            "time_budget":         2,
-            "task":                'classification',
-            "log_file_name":       "test/micro_macro_f1.log",
+            "time_budget": 2,
+            "task": 'classification',
+            "log_file_name": "test/micro_macro_f1.log",
            "log_training_metric": True,
-            "n_jobs":              1,
-            "model_history":       True
+            "n_jobs": 1,
+            "model_history": True
        }

        X_train, y_train = load_iris(return_X_y=True)
-        automl_experiment.fit(X_train=X_train, y_train=y_train, metric='micro_f1', **automl_settings)
-        automl_experiment_macro.fit(X_train=X_train, y_train=y_train, metric='macro_f1', **automl_settings)
+        automl_experiment.fit(
+            X_train=X_train, y_train=y_train, metric='micro_f1', **automl_settings)
+        automl_experiment_macro.fit(
+            X_train=X_train, y_train=y_train, metric='macro_f1', **automl_settings)

    def test_regression(self):

--- a/test/tune/init.py
+++ b/test/tune/init.py
--- a/test/tune/test_tune.py
+++ b/test/tune/test_tune.py
@@ -1,19 +1,21 @@
 '''Require: pip install flaml[test,ray]
 '''
-import unittest
 import time
+import os
 from sklearn.model_selection import train_test_split
 import sklearn.metrics
 import sklearn.datasets
 try:
    from ray.tune.integration.xgboost import TuneReportCheckpointCallback
 except ImportError:
-    print("skip test_tune because ray tune cannot be imported.")
+    print("skip test_xgboost because ray tune cannot be imported.")
 import xgboost as xgb

 import logging
 logger = logging.getLogger(__name__)
-logger.addHandler(logging.FileHandler('test/tune_xgboost.log'))
+os.makedirs('logs', exist_ok=True)
+logger.addHandler(logging.FileHandler('logs/tune_xgboost.log'))
+logger.setLevel(logging.INFO)


 def train_breast_cancer(config: dict):
@@ -61,6 +63,7 @@ def _test_xgboost(method='BlendSearch'):
        for n_cpu in [8]:
            start_time = time.time()
            ray.init(num_cpus=n_cpu, num_gpus=0)
+            # ray.init(address='auto')
            if method == 'BlendSearch':
                analysis = tune.run(
                    train_breast_cancer,
@@ -163,21 +166,28 @@ def test_nested():
    }

    def simple_func(config):
-        tune.report(metric=(config["cost_related"]["a"] - 4)**2
-                    * (config["b"] - 0.7)**2)
+        obj = (config["cost_related"]["a"] - 4)**2 \
+            + (config["b"] - config["cost_related"]["a"])**2
+        tune.report(obj=obj)
+        tune.report(obj=obj, ab=config["cost_related"]["a"] * config["b"])

-    tune.run(
+    analysis = tune.run(
        simple_func,
        config=search_space,
        low_cost_partial_config={
            "cost_related": {"a": 1}
        },
-        metric="metric",
+        metric="obj",
        mode="min",
+        metric_constraints=[("ab", "<=", 4)],
        local_dir='logs/',
        num_samples=-1,
        time_budget_s=1)

+    best_trial = analysis.get_best_trial()
+    logger.info(f"Best config: {best_trial.config}")
+    logger.info(f"Best result: {best_trial.last_result}")
+

 def test_xgboost_bs():
    _test_xgboost()
@@ -224,4 +234,4 @@ def _test_xgboost_bohb():


 if __name__ == "__main__":
-    unittest.main()
+    test_xgboost_bs()
Author	SHA1	Message	Date
Chi Wang	b206363c9a	metric constraint (#90 ) * penalty change * metric modification * catboost init	2021-05-22 08:51:38 -07:00
Chi Wang	0925e2b308	constraints (#88 ) * pre-training constraints * metric constraints after training	2021-05-18 15:57:42 -07:00
Chi Wang	3083229e40	Notebook (#87 ) * notebook update	2021-05-07 19:50:50 -07:00
Chi Wang	0b23c3a028	stepsize (#86 ) * decrease step size in suggest * initialization of the counters * increase step size * init phase * check converge in suggest	2021-05-06 21:29:38 -07:00