mirror of
https://github.com/microsoft/FLAML.git
synced 2026-02-18 06:32:25 +08:00
Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b206363c9a | ||
|
|
0925e2b308 | ||
|
|
3083229e40 | ||
|
|
0b23c3a028 |
@@ -922,6 +922,7 @@ class AutoML:
|
||||
# set up learner search space
|
||||
for estimator_name in estimator_list:
|
||||
estimator_class = self._state.learner_classes[estimator_name]
|
||||
estimator_class.init()
|
||||
self._search_states[estimator_name] = SearchState(
|
||||
learner_class=estimator_class,
|
||||
data_size=self._state.data_size, task=self._state.task,
|
||||
@@ -1036,9 +1037,8 @@ class AutoML:
|
||||
prune_attr=prune_attr,
|
||||
min_resource=min_resource,
|
||||
max_resource=max_resource,
|
||||
resources_per_trial={"cpu": self._state.n_jobs,
|
||||
"mem": self._mem_thres},
|
||||
mem_size=learner_class.size)
|
||||
config_constraints=[(learner_class.size, '<=', self._mem_thres)]
|
||||
)
|
||||
else:
|
||||
algo = SearchAlgo(
|
||||
metric='val_loss', mode='min', space=search_space,
|
||||
|
||||
@@ -237,8 +237,8 @@ class DataTransformer:
|
||||
SimpleImputer(missing_values=np.nan, strategy='median'),
|
||||
X_num.columns)])
|
||||
X[num_columns] = self.transformer.fit_transform(X_num)
|
||||
self._cat_columns, self._num_columns, self._datetime_columns = cat_columns, \
|
||||
num_columns, datetime_columns
|
||||
self._cat_columns, self._num_columns, self._datetime_columns = \
|
||||
cat_columns, num_columns, datetime_columns
|
||||
self._drop = drop
|
||||
|
||||
if task == 'regression':
|
||||
@@ -275,4 +275,3 @@ class DataTransformer:
|
||||
X_num.columns = range(X_num.shape[1])
|
||||
X[num_columns] = self.transformer.transform(X_num)
|
||||
return X
|
||||
|
||||
|
||||
@@ -89,9 +89,11 @@ def sklearn_metric_loss_score(
|
||||
score = log_loss(
|
||||
y_true, y_predict, labels=labels, sample_weight=sample_weight)
|
||||
elif 'micro_f1' in metric_name:
|
||||
score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight, average='micro')
|
||||
score = 1 - f1_score(
|
||||
y_true, y_predict, sample_weight=sample_weight, average='micro')
|
||||
elif 'macro_f1' in metric_name:
|
||||
score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight, average='macro')
|
||||
score = 1 - f1_score(
|
||||
y_true, y_predict, sample_weight=sample_weight, average='macro')
|
||||
elif 'f1' in metric_name:
|
||||
score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight)
|
||||
elif 'ap' in metric_name:
|
||||
|
||||
@@ -124,8 +124,7 @@ class BaseEstimator:
|
||||
class j
|
||||
'''
|
||||
if 'regression' in self._task:
|
||||
print('Regression tasks do not support predict_prob')
|
||||
raise ValueError
|
||||
raise ValueError('Regression tasks do not support predict_prob')
|
||||
else:
|
||||
X_test = self._preprocess(X_test)
|
||||
return self._model.predict_proba(X_test)
|
||||
@@ -164,6 +163,11 @@ class BaseEstimator:
|
||||
'''[optional method] relative cost compared to lightgbm'''
|
||||
return 1.0
|
||||
|
||||
@classmethod
|
||||
def init(cls):
|
||||
'''[optional method] initialize the class'''
|
||||
pass
|
||||
|
||||
|
||||
class SKLearnEstimator(BaseEstimator):
|
||||
|
||||
@@ -633,6 +637,11 @@ class CatBoostEstimator(BaseEstimator):
|
||||
def cost_relative2lgbm(cls):
|
||||
return 15
|
||||
|
||||
@classmethod
|
||||
def init(cls):
|
||||
CatBoostEstimator._time_per_iter = None
|
||||
CatBoostEstimator._train_size = 0
|
||||
|
||||
def __init__(
|
||||
self, task='binary:logistic', n_jobs=1,
|
||||
n_estimators=8192, learning_rate=0.1, early_stopping_rounds=4, **params
|
||||
|
||||
@@ -27,6 +27,8 @@ class BlendSearch(Searcher):
|
||||
'''
|
||||
|
||||
cost_attr = "time_total_s" # cost attribute in result
|
||||
lagrange = '_lagrange' # suffix for lagrange-modified metric
|
||||
penalty = 1e+10 # penalty term for constraints
|
||||
|
||||
def __init__(self,
|
||||
metric: Optional[str] = None,
|
||||
@@ -39,9 +41,11 @@ class BlendSearch(Searcher):
|
||||
min_resource: Optional[float] = None,
|
||||
max_resource: Optional[float] = None,
|
||||
reduction_factor: Optional[float] = None,
|
||||
resources_per_trial: Optional[dict] = None,
|
||||
global_search_alg: Optional[Searcher] = None,
|
||||
mem_size: Callable[[dict], float] = None,
|
||||
config_constraints: Optional[
|
||||
List[Tuple[Callable[[dict], float], str, float]]] = None,
|
||||
metric_constraints: Optional[
|
||||
List[Tuple[str, str, float]]] = None,
|
||||
seed: Optional[int] = 20):
|
||||
'''Constructor
|
||||
|
||||
@@ -82,19 +86,33 @@ class BlendSearch(Searcher):
|
||||
prune_attr; only valid if prune_attr is not in space.
|
||||
reduction_factor: A float of the reduction factor used for
|
||||
incremental pruning.
|
||||
resources_per_trial: A dictionary of the resources permitted per
|
||||
trial, such as 'mem'.
|
||||
global_search_alg: A Searcher instance as the global search
|
||||
instance. If omitted, Optuna is used. The following algos have
|
||||
known issues when used as global_search_alg:
|
||||
- HyperOptSearch raises exception sometimes
|
||||
- TuneBOHB has its own scheduler
|
||||
mem_size: A function to estimate the memory size for a given config.
|
||||
config_constraints: A list of config constraints to be satisfied.
|
||||
e.g.,
|
||||
|
||||
.. code-block: python
|
||||
|
||||
config_constraints = [(mem_size, '<=', 1024**3)]
|
||||
|
||||
mem_size is a function which produces a float number for the bytes
|
||||
needed for a config.
|
||||
It is used to skip configs which do not fit in memory.
|
||||
metric_constraints: A list of metric constraints to be satisfied.
|
||||
e.g., `['precision', '>=', 0.9]`
|
||||
seed: An integer of the random seed.
|
||||
'''
|
||||
self._metric, self._mode = metric, mode
|
||||
init_config = low_cost_partial_config or {}
|
||||
self._points_to_evaluate = points_to_evaluate or []
|
||||
self._config_constraints = config_constraints
|
||||
self._metric_constraints = metric_constraints
|
||||
if self._metric_constraints:
|
||||
# metric modified by lagrange
|
||||
metric += self.lagrange
|
||||
if global_search_alg is not None:
|
||||
self._gs = global_search_alg
|
||||
elif getattr(self, '__name__', None) != 'CFO':
|
||||
@@ -104,10 +122,6 @@ class BlendSearch(Searcher):
|
||||
self._ls = LocalSearch(
|
||||
init_config, metric, mode, cat_hp_cost, space,
|
||||
prune_attr, min_resource, max_resource, reduction_factor, seed)
|
||||
self._resources_per_trial = resources_per_trial
|
||||
self._mem_size = mem_size
|
||||
self._mem_threshold = resources_per_trial.get(
|
||||
'mem') if resources_per_trial else None
|
||||
self._init_search()
|
||||
|
||||
def set_search_properties(self,
|
||||
@@ -122,6 +136,11 @@ class BlendSearch(Searcher):
|
||||
else:
|
||||
if metric:
|
||||
self._metric = metric
|
||||
if self._metric_constraints:
|
||||
# metric modified by lagrange
|
||||
metric += self.lagrange
|
||||
# TODO: don't change metric for global search methods that
|
||||
# can handle constraints already
|
||||
if mode:
|
||||
self._mode = mode
|
||||
self._ls.set_search_properties(metric, mode, config)
|
||||
@@ -147,6 +166,13 @@ class BlendSearch(Searcher):
|
||||
self._gs_admissible_max = self._ls_bound_max.copy()
|
||||
self._result = {} # config_signature: tuple -> result: Dict
|
||||
self._deadline = np.inf
|
||||
if self._metric_constraints:
|
||||
self._metric_constraint_satisfied = False
|
||||
self._metric_constraint_penalty = [
|
||||
self.penalty for _ in self._metric_constraints]
|
||||
else:
|
||||
self._metric_constraint_satisfied = True
|
||||
self._metric_constraint_penalty = None
|
||||
|
||||
def save(self, checkpoint_path: str):
|
||||
save_object = self
|
||||
@@ -171,9 +197,10 @@ class BlendSearch(Searcher):
|
||||
self._points_to_evaluate = state._points_to_evaluate
|
||||
self._gs = state._gs
|
||||
self._ls = state._ls
|
||||
self._resources_per_trial = state._resources_per_trial
|
||||
self._mem_size = state._mem_size
|
||||
self._mem_threshold = state._mem_threshold
|
||||
self._config_constraints = state._config_constraints
|
||||
self._metric_constraints = state._metric_constraints
|
||||
self._metric_constraint_satisfied = state._metric_constraint_satisfied
|
||||
self._metric_constraint_penalty = state._metric_constraint_penalty
|
||||
|
||||
def restore_from_dir(self, checkpoint_dir: str):
|
||||
super.restore_from_dir(checkpoint_dir)
|
||||
@@ -182,6 +209,29 @@ class BlendSearch(Searcher):
|
||||
error: bool = False):
|
||||
''' search thread updater and cleaner
|
||||
'''
|
||||
metric_constraint_satisfied = True
|
||||
if result and not error and self._metric_constraints:
|
||||
# account for metric constraints if any
|
||||
objective = result[self._metric]
|
||||
for i, constraint in enumerate(self._metric_constraints):
|
||||
metric_constraint, sign, threshold = constraint
|
||||
value = result.get(metric_constraint)
|
||||
if value:
|
||||
# sign is <= or >=
|
||||
sign_op = 1 if sign == '<=' else -1
|
||||
violation = (value - threshold) * sign_op
|
||||
if violation > 0:
|
||||
# add penalty term to the metric
|
||||
objective += self._metric_constraint_penalty[
|
||||
i] * violation * self._ls.metric_op
|
||||
metric_constraint_satisfied = False
|
||||
if self._metric_constraint_penalty[i] < self.penalty:
|
||||
self._metric_constraint_penalty[i] += violation
|
||||
result[self._metric + self.lagrange] = objective
|
||||
if metric_constraint_satisfied and not self._metric_constraint_satisfied:
|
||||
# found a feasible point
|
||||
self._metric_constraint_penalty = [1 for _ in self._metric_constraints]
|
||||
self._metric_constraint_satisfied |= metric_constraint_satisfied
|
||||
thread_id = self._trial_proposed_by.get(trial_id)
|
||||
if thread_id in self._search_thread_pool:
|
||||
self._search_thread_pool[thread_id].on_trial_complete(
|
||||
@@ -196,23 +246,30 @@ class BlendSearch(Searcher):
|
||||
del self._result[self._ls.config_signature(config)]
|
||||
else: # add to result cache
|
||||
self._result[self._ls.config_signature(config)] = result
|
||||
# update target metric if improved
|
||||
if (result[self._metric] - self._metric_target) * self._ls.metric_op < 0:
|
||||
self._metric_target = result[self._metric]
|
||||
if not thread_id and self._create_condition(result):
|
||||
# thread creator
|
||||
self._search_thread_pool[self._thread_count] = SearchThread(
|
||||
self._ls.mode,
|
||||
self._ls.create(config, result[self._metric], cost=result[
|
||||
self.cost_attr])
|
||||
)
|
||||
thread_id = self._thread_count
|
||||
self._thread_count += 1
|
||||
self._update_admissible_region(
|
||||
config, self._ls_bound_min, self._ls_bound_max)
|
||||
# reset admissible region to ls bounding box
|
||||
self._gs_admissible_min.update(self._ls_bound_min)
|
||||
self._gs_admissible_max.update(self._ls_bound_max)
|
||||
# update target metric if improved
|
||||
objective = result[
|
||||
self._metric + self.lagrange] if self._metric_constraints \
|
||||
else result[self._metric]
|
||||
if (objective - self._metric_target) * self._ls.metric_op < 0:
|
||||
self._metric_target = objective
|
||||
if not thread_id and metric_constraint_satisfied \
|
||||
and self._create_condition(result):
|
||||
# thread creator
|
||||
self._search_thread_pool[self._thread_count] = SearchThread(
|
||||
self._ls.mode,
|
||||
self._ls.create(
|
||||
config, objective, cost=result[self.cost_attr])
|
||||
)
|
||||
thread_id = self._thread_count
|
||||
self._thread_count += 1
|
||||
self._update_admissible_region(
|
||||
config, self._ls_bound_min, self._ls_bound_max)
|
||||
elif thread_id and not self._metric_constraint_satisfied:
|
||||
# no point has been found to satisfy metric constraint
|
||||
self._expand_admissible_region()
|
||||
# reset admissible region to ls bounding box
|
||||
self._gs_admissible_min.update(self._ls_bound_min)
|
||||
self._gs_admissible_max.update(self._ls_bound_max)
|
||||
# cleaner
|
||||
if thread_id and thread_id in self._search_thread_pool:
|
||||
# local search thread
|
||||
@@ -255,12 +312,15 @@ class BlendSearch(Searcher):
|
||||
break
|
||||
if self._search_thread_pool[thread_id].converged:
|
||||
todelete.add(thread_id)
|
||||
for key in self._ls_bound_max:
|
||||
self._ls_bound_max[key] += self._ls.STEPSIZE
|
||||
self._ls_bound_min[key] -= self._ls.STEPSIZE
|
||||
self._expand_admissible_region()
|
||||
for id in todelete:
|
||||
del self._search_thread_pool[id]
|
||||
|
||||
def _expand_admissible_region(self):
|
||||
for key in self._ls_bound_max:
|
||||
self._ls_bound_max[key] += self._ls.STEPSIZE
|
||||
self._ls_bound_min[key] -= self._ls.STEPSIZE
|
||||
|
||||
def _inferior(self, id1: int, id2: int) -> bool:
|
||||
''' whether thread id1 is inferior to id2
|
||||
'''
|
||||
@@ -280,6 +340,8 @@ class BlendSearch(Searcher):
|
||||
thread_id = self._trial_proposed_by[trial_id]
|
||||
if thread_id not in self._search_thread_pool:
|
||||
return
|
||||
if result and self._metric_constraints:
|
||||
result[self._metric + self.lagrange] = result[self._metric]
|
||||
self._search_thread_pool[thread_id].on_trial_result(trial_id, result)
|
||||
|
||||
def suggest(self, trial_id: str) -> Optional[Dict]:
|
||||
@@ -291,6 +353,12 @@ class BlendSearch(Searcher):
|
||||
return None
|
||||
self._use_rs = False
|
||||
config = self._search_thread_pool[choice].suggest(trial_id)
|
||||
if choice and config is None:
|
||||
# local search thread finishes
|
||||
if self._search_thread_pool[choice].converged:
|
||||
self._expand_admissible_region()
|
||||
del self._search_thread_pool[choice]
|
||||
return None
|
||||
# preliminary check; not checking config validation
|
||||
skip = self._should_skip(choice, trial_id, config)
|
||||
if skip:
|
||||
@@ -353,20 +421,26 @@ class BlendSearch(Searcher):
|
||||
return config
|
||||
|
||||
def _should_skip(self, choice, trial_id, config) -> bool:
|
||||
''' if config is None or config's result is known or above mem threshold
|
||||
''' if config is None or config's result is known or constraints are violated
|
||||
return True; o.w. return False
|
||||
'''
|
||||
if config is None:
|
||||
return True
|
||||
config_signature = self._ls.config_signature(config)
|
||||
exists = config_signature in self._result
|
||||
# check mem constraint
|
||||
if not exists and self._mem_threshold and self._mem_size(
|
||||
config) > self._mem_threshold:
|
||||
self._result[config_signature] = {
|
||||
self._metric: np.inf * self._ls.metric_op, 'time_total_s': 1
|
||||
}
|
||||
exists = True
|
||||
# check constraints
|
||||
if not exists and self._config_constraints:
|
||||
for constraint in self._config_constraints:
|
||||
func, sign, threshold = constraint
|
||||
value = func(config)
|
||||
if (sign == '<=' and value > threshold
|
||||
or sign == '>=' and value < threshold):
|
||||
self._result[config_signature] = {
|
||||
self._metric: np.inf * self._ls.metric_op,
|
||||
'time_total_s': 1,
|
||||
}
|
||||
exists = True
|
||||
break
|
||||
if exists:
|
||||
if not self._use_rs:
|
||||
result = self._result.get(config_signature)
|
||||
|
||||
@@ -188,12 +188,15 @@ class FLOW2(Searcher):
|
||||
self.step = self.step_ub
|
||||
# maximal # consecutive no improvements
|
||||
self.dir = 2**(self.dim)
|
||||
self._configs = {} # dict from trial_id to config
|
||||
self._configs = {} # dict from trial_id to (config, stepsize)
|
||||
self._K = 0
|
||||
self._iter_best_config = self.trial_count = 1
|
||||
self._iter_best_config = self.trial_count_proposed = self.trial_count_complete = 1
|
||||
self._num_proposedby_incumbent = 0
|
||||
self._reset_times = 0
|
||||
# record intermediate trial cost
|
||||
self._trial_cost = {}
|
||||
self._same = False # whether the proposedd config is the same as best_config
|
||||
self._init_phrase = True # initial phase to increase initial stepsize
|
||||
|
||||
@property
|
||||
def step_lower_bound(self) -> float:
|
||||
@@ -426,20 +429,21 @@ class FLOW2(Searcher):
|
||||
'''
|
||||
# if better, move, reset num_complete and num_proposed
|
||||
# if not better and num_complete >= 2*dim, num_allowed += 2
|
||||
self.trial_count += 1
|
||||
self.trial_count_complete += 1
|
||||
if not error and result:
|
||||
obj = result.get(self._metric)
|
||||
if obj:
|
||||
obj *= self.metric_op
|
||||
if self.best_obj is None or obj < self.best_obj:
|
||||
self.best_obj, self.best_config = obj, self._configs[
|
||||
trial_id]
|
||||
self.best_obj = obj
|
||||
self.best_config, self.step = self._configs[trial_id]
|
||||
self.incumbent = self.normalize(self.best_config)
|
||||
self.cost_incumbent = result.get(self.cost_attr)
|
||||
if self._resource:
|
||||
self._resource = self.best_config[self.prune_attr]
|
||||
self._num_complete4incumbent = 0
|
||||
self._cost_complete4incumbent = 0
|
||||
self._num_proposedby_incumbent = 0
|
||||
self._num_allowed4incumbent = 2 * self.dim
|
||||
self._proposed_by.clear()
|
||||
if self._K > 0:
|
||||
@@ -447,7 +451,7 @@ class FLOW2(Searcher):
|
||||
self.step *= np.sqrt(self._K / self._oldK)
|
||||
if self.step > self.step_ub:
|
||||
self.step = self.step_ub
|
||||
self._iter_best_config = self.trial_count
|
||||
self._iter_best_config = self.trial_count_complete
|
||||
return
|
||||
proposed_by = self._proposed_by.get(trial_id)
|
||||
if proposed_by == self.incumbent:
|
||||
@@ -463,11 +467,6 @@ class FLOW2(Searcher):
|
||||
if self._num_complete4incumbent == self.dir and (
|
||||
not self._resource or self._resource == self.max_resource):
|
||||
# check stuck condition if using max resource
|
||||
if self.step >= self.step_lower_bound:
|
||||
# decrease step size
|
||||
self._oldK = self._K if self._K else self._iter_best_config
|
||||
self._K = self.trial_count + 1
|
||||
self.step *= np.sqrt(self._oldK / self._K)
|
||||
self._num_complete4incumbent -= 2
|
||||
if self._num_allowed4incumbent < 2:
|
||||
self._num_allowed4incumbent = 2
|
||||
@@ -482,7 +481,7 @@ class FLOW2(Searcher):
|
||||
obj *= self.metric_op
|
||||
if self.best_obj is None or obj < self.best_obj:
|
||||
self.best_obj = obj
|
||||
config = self._configs[trial_id]
|
||||
config = self._configs[trial_id][0]
|
||||
if self.best_config != config:
|
||||
self.best_config = config
|
||||
if self._resource:
|
||||
@@ -491,9 +490,10 @@ class FLOW2(Searcher):
|
||||
self.cost_incumbent = result.get(self.cost_attr)
|
||||
self._cost_complete4incumbent = 0
|
||||
self._num_complete4incumbent = 0
|
||||
self._num_proposedby_incumbent = 0
|
||||
self._num_allowed4incumbent = 2 * self.dim
|
||||
self._proposed_by.clear()
|
||||
self._iter_best_config = self.trial_count
|
||||
self._iter_best_config = self.trial_count_complete
|
||||
cost = result.get(self.cost_attr)
|
||||
# record the cost in case it is pruned and cost info is lost
|
||||
self._trial_cost[trial_id] = cost
|
||||
@@ -509,18 +509,21 @@ class FLOW2(Searcher):
|
||||
2. same resource, move from the incumbent to a random direction
|
||||
3. same resource, move from the incumbent to the opposite direction
|
||||
'''
|
||||
self.trial_count_proposed += 1
|
||||
if self._num_complete4incumbent > 0 and self.cost_incumbent and \
|
||||
self._resource and self._resource < self.max_resource and (
|
||||
self._cost_complete4incumbent
|
||||
>= self.cost_incumbent * self.resource_multiple_factor):
|
||||
# consider increasing resource using sum eval cost of complete
|
||||
# configs
|
||||
old_resource = self._resource
|
||||
self._resource = self._round(
|
||||
self._resource * self.resource_multiple_factor)
|
||||
self.cost_incumbent *= self._resource / old_resource
|
||||
config = self.best_config.copy()
|
||||
config[self.prune_attr] = self._resource
|
||||
self._direction_tried = None
|
||||
self._configs[trial_id] = config
|
||||
self._configs[trial_id] = (config, self.step)
|
||||
return config
|
||||
self._num_allowed4incumbent -= 1
|
||||
move = self.incumbent.copy()
|
||||
@@ -538,7 +541,42 @@ class FLOW2(Searcher):
|
||||
self._project(move)
|
||||
config = self.denormalize(move)
|
||||
self._proposed_by[trial_id] = self.incumbent
|
||||
self._configs[trial_id] = config
|
||||
self._configs[trial_id] = (config, self.step)
|
||||
self._num_proposedby_incumbent += 1
|
||||
if self._init_phrase:
|
||||
if self._direction_tried is None:
|
||||
if self._same:
|
||||
# check if the new config is different from self.best_config
|
||||
same = True
|
||||
for key, value in config.items():
|
||||
if key not in self.best_config or value != self.best_config[key]:
|
||||
same = False
|
||||
break
|
||||
if same:
|
||||
# increase step size
|
||||
self.step += self.STEPSIZE
|
||||
if self.step > self.step_ub:
|
||||
self.step = self.step_ub
|
||||
else:
|
||||
# check if the new config is different from self.best_config
|
||||
same = True
|
||||
for key, value in config.items():
|
||||
if key not in self.best_config or value != self.best_config[key]:
|
||||
same = False
|
||||
break
|
||||
self._same = same
|
||||
if self._num_proposedby_incumbent == self.dir and (
|
||||
not self._resource or self._resource == self.max_resource):
|
||||
# check stuck condition if using max resource
|
||||
self._num_proposedby_incumbent -= 2
|
||||
self._init_phrase = False
|
||||
if self.step >= self.step_lower_bound:
|
||||
# decrease step size
|
||||
self._oldK = self._K if self._K else self._iter_best_config
|
||||
self._K = self.trial_count_proposed + 1
|
||||
self.step *= np.sqrt(self._oldK / self._K)
|
||||
else:
|
||||
return None
|
||||
return unflatten_dict(config)
|
||||
|
||||
def _project(self, config):
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
* Licensed under the MIT License. See LICENSE file in the
|
||||
* project root for license information.
|
||||
'''
|
||||
from typing import Optional, Union, List, Callable
|
||||
from typing import Optional, Union, List, Callable, Tuple
|
||||
import datetime
|
||||
import time
|
||||
try:
|
||||
@@ -118,7 +118,10 @@ def run(training_function,
|
||||
local_dir: Optional[str] = None,
|
||||
num_samples: Optional[int] = 1,
|
||||
resources_per_trial: Optional[dict] = None,
|
||||
mem_size: Callable[[dict], float] = None,
|
||||
config_constraints: Optional[
|
||||
List[Tuple[Callable[[dict], float], str, float]]] = None,
|
||||
metric_constraints: Optional[
|
||||
List[Tuple[str, str, float]]] = None,
|
||||
use_ray: Optional[bool] = False):
|
||||
'''The trigger for HPO.
|
||||
|
||||
@@ -210,11 +213,19 @@ def run(training_function,
|
||||
used; or a local dir to save the tuning log.
|
||||
num_samples: An integer of the number of configs to try. Defaults to 1.
|
||||
resources_per_trial: A dictionary of the hardware resources to allocate
|
||||
per trial, e.g., `{'mem': 1024**3}`. When not using ray backend,
|
||||
only 'mem' is used as approximate resource constraints
|
||||
(in conjunction with mem_size).
|
||||
mem_size: A function to estimate the memory size for a given config.
|
||||
per trial, e.g., `{'cpu': 1}`. Only valid when using ray backend.
|
||||
config_constraints: A list of config constraints to be satisfied.
|
||||
e.g.,
|
||||
|
||||
.. code-block: python
|
||||
|
||||
config_constraints = [(mem_size, '<=', 1024**3)]
|
||||
|
||||
mem_size is a function which produces a float number for the bytes
|
||||
needed for a config.
|
||||
It is used to skip configs which do not fit in memory.
|
||||
metric_constraints: A list of metric constraints to be satisfied.
|
||||
e.g., `['precision', '>=', 0.9]`
|
||||
use_ray: A boolean of whether to use ray as the backend
|
||||
'''
|
||||
global _use_ray
|
||||
@@ -252,8 +263,8 @@ def run(training_function,
|
||||
prune_attr=prune_attr,
|
||||
min_resource=min_resource, max_resource=max_resource,
|
||||
reduction_factor=reduction_factor,
|
||||
resources_per_trial=resources_per_trial,
|
||||
mem_size=mem_size)
|
||||
config_constraints=config_constraints,
|
||||
metric_constraints=metric_constraints)
|
||||
if time_budget_s:
|
||||
search_alg.set_search_properties(metric, mode, config={
|
||||
'time_budget_s': time_budget_s})
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "0.3.5"
|
||||
__version__ = "0.4.0"
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -70,7 +70,7 @@ class MyRegularizedGreedyForest(SKLearnEstimator):
|
||||
|
||||
def logregobj(preds, dtrain):
|
||||
labels = dtrain.get_label()
|
||||
preds = 1.0 / (1.0 + np.exp(-preds)) # transform raw leaf weight
|
||||
preds = 1.0 / (1.0 + np.exp(-preds)) # transform raw leaf weight
|
||||
grad = preds - labels
|
||||
hess = preds * (1.0 - preds)
|
||||
return grad, hess
|
||||
@@ -81,7 +81,7 @@ class MyXGB1(XGBoostEstimator):
|
||||
'''
|
||||
|
||||
def __init__(self, **params):
|
||||
super().__init__(objective=logregobj, **params)
|
||||
super().__init__(objective=logregobj, **params)
|
||||
|
||||
|
||||
class MyXGB2(XGBoostEstimator):
|
||||
@@ -226,37 +226,41 @@ class TestAutoML(unittest.TestCase):
|
||||
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"metric": 'mse',
|
||||
"task": 'regression',
|
||||
"log_file_name": "test/datetime_columns.log",
|
||||
"time_budget": 2,
|
||||
"metric": 'mse',
|
||||
"task": 'regression',
|
||||
"log_file_name": "test/datetime_columns.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True
|
||||
"n_jobs": 1,
|
||||
"model_history": True
|
||||
}
|
||||
|
||||
fake_df = pd.DataFrame({'A': [datetime(1900, 2, 3), datetime(1900, 3, 4)]})
|
||||
y = np.array([0, 1])
|
||||
automl_experiment.fit(X_train=fake_df, X_val=fake_df, y_train=y, y_val=y, **automl_settings)
|
||||
automl_experiment.fit(
|
||||
X_train=fake_df, X_val=fake_df, y_train=y, y_val=y, **automl_settings)
|
||||
|
||||
y_pred = automl_experiment.predict(fake_df)
|
||||
print(y_pred)
|
||||
|
||||
def test_micro_macro_f1(self):
|
||||
automl_experiment = AutoML()
|
||||
automl_experiment_macro = AutoML()
|
||||
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"task": 'classification',
|
||||
"log_file_name": "test/micro_macro_f1.log",
|
||||
"time_budget": 2,
|
||||
"task": 'classification',
|
||||
"log_file_name": "test/micro_macro_f1.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True
|
||||
"n_jobs": 1,
|
||||
"model_history": True
|
||||
}
|
||||
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, metric='micro_f1', **automl_settings)
|
||||
automl_experiment_macro.fit(X_train=X_train, y_train=y_train, metric='macro_f1', **automl_settings)
|
||||
automl_experiment.fit(
|
||||
X_train=X_train, y_train=y_train, metric='micro_f1', **automl_settings)
|
||||
automl_experiment_macro.fit(
|
||||
X_train=X_train, y_train=y_train, metric='macro_f1', **automl_settings)
|
||||
|
||||
def test_regression(self):
|
||||
|
||||
|
||||
0
test/tune/__init__.py
Normal file
0
test/tune/__init__.py
Normal file
@@ -1,19 +1,21 @@
|
||||
'''Require: pip install flaml[test,ray]
|
||||
'''
|
||||
import unittest
|
||||
import time
|
||||
import os
|
||||
from sklearn.model_selection import train_test_split
|
||||
import sklearn.metrics
|
||||
import sklearn.datasets
|
||||
try:
|
||||
from ray.tune.integration.xgboost import TuneReportCheckpointCallback
|
||||
except ImportError:
|
||||
print("skip test_tune because ray tune cannot be imported.")
|
||||
print("skip test_xgboost because ray tune cannot be imported.")
|
||||
import xgboost as xgb
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.addHandler(logging.FileHandler('test/tune_xgboost.log'))
|
||||
os.makedirs('logs', exist_ok=True)
|
||||
logger.addHandler(logging.FileHandler('logs/tune_xgboost.log'))
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
def train_breast_cancer(config: dict):
|
||||
@@ -61,6 +63,7 @@ def _test_xgboost(method='BlendSearch'):
|
||||
for n_cpu in [8]:
|
||||
start_time = time.time()
|
||||
ray.init(num_cpus=n_cpu, num_gpus=0)
|
||||
# ray.init(address='auto')
|
||||
if method == 'BlendSearch':
|
||||
analysis = tune.run(
|
||||
train_breast_cancer,
|
||||
@@ -163,21 +166,28 @@ def test_nested():
|
||||
}
|
||||
|
||||
def simple_func(config):
|
||||
tune.report(metric=(config["cost_related"]["a"] - 4)**2
|
||||
* (config["b"] - 0.7)**2)
|
||||
obj = (config["cost_related"]["a"] - 4)**2 \
|
||||
+ (config["b"] - config["cost_related"]["a"])**2
|
||||
tune.report(obj=obj)
|
||||
tune.report(obj=obj, ab=config["cost_related"]["a"] * config["b"])
|
||||
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
simple_func,
|
||||
config=search_space,
|
||||
low_cost_partial_config={
|
||||
"cost_related": {"a": 1}
|
||||
},
|
||||
metric="metric",
|
||||
metric="obj",
|
||||
mode="min",
|
||||
metric_constraints=[("ab", "<=", 4)],
|
||||
local_dir='logs/',
|
||||
num_samples=-1,
|
||||
time_budget_s=1)
|
||||
|
||||
best_trial = analysis.get_best_trial()
|
||||
logger.info(f"Best config: {best_trial.config}")
|
||||
logger.info(f"Best result: {best_trial.last_result}")
|
||||
|
||||
|
||||
def test_xgboost_bs():
|
||||
_test_xgboost()
|
||||
@@ -224,4 +234,4 @@ def _test_xgboost_bohb():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
test_xgboost_bs()
|
||||
Reference in New Issue
Block a user