mirror of
https://github.com/microsoft/FLAML.git
synced 2026-02-17 22:22:26 +08:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d747800509 | ||
|
|
c35b844030 | ||
|
|
49e8f7f028 | ||
|
|
7748e0ff49 | ||
|
|
f8babac21b | ||
|
|
dcc640c1a3 | ||
|
|
515a77ac71 | ||
|
|
d402c63312 |
407
flaml/automl.py
407
flaml/automl.py
@@ -164,6 +164,9 @@ class SearchState:
|
||||
assert (
|
||||
"domain" in space
|
||||
), f"{name}'s domain is missing in the search space spec {space}"
|
||||
if space["domain"] is None:
|
||||
# don't search this hp
|
||||
continue
|
||||
self._search_space_domain[name] = space["domain"]
|
||||
|
||||
if "low_cost_init_value" in space:
|
||||
@@ -475,176 +478,176 @@ class AutoML(BaseEstimator):
|
||||
def __init__(self, **settings):
|
||||
"""Constructor.
|
||||
|
||||
Many settings in fit() can be passed to the constructor too.
|
||||
If an argument in fit() is provided, it will override the setting passed to the constructor.
|
||||
If an argument in fit() is not provided but provided in the constructor, the value passed to the constructor will be used.
|
||||
Many settings in fit() can be passed to the constructor too.
|
||||
If an argument in fit() is provided, it will override the setting passed to the constructor.
|
||||
If an argument in fit() is not provided but provided in the constructor, the value passed to the constructor will be used.
|
||||
|
||||
Args:
|
||||
metric: A string of the metric name or a function,
|
||||
e.g., 'accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo',
|
||||
'f1', 'micro_f1', 'macro_f1', 'log_loss', 'mae', 'mse', 'r2',
|
||||
'mape'. Default is 'auto'.
|
||||
If passing a customized metric function, the function needs to
|
||||
have the follwing signature:
|
||||
Args:
|
||||
metric: A string of the metric name or a function,
|
||||
e.g., 'accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo',
|
||||
'f1', 'micro_f1', 'macro_f1', 'log_loss', 'mae', 'mse', 'r2',
|
||||
'mape'. Default is 'auto'.
|
||||
If passing a customized metric function, the function needs to
|
||||
have the follwing signature:
|
||||
|
||||
```python
|
||||
def custom_metric(
|
||||
X_test, y_test, estimator, labels,
|
||||
X_train, y_train, weight_test=None, weight_train=None,
|
||||
config=None, groups_test=None, groups_train=None,
|
||||
):
|
||||
return metric_to_minimize, metrics_to_log
|
||||
```
|
||||
which returns a float number as the minimization objective,
|
||||
and a dictionary as the metrics to log. E.g.,
|
||||
```python
|
||||
def custom_metric(
|
||||
X_test, y_test, estimator, labels,
|
||||
X_train, y_train, weight_test=None, weight_train=None,
|
||||
config=None, groups_test=None, groups_train=None,
|
||||
):
|
||||
return metric_to_minimize, metrics_to_log
|
||||
```
|
||||
which returns a float number as the minimization objective,
|
||||
and a dictionary as the metrics to log. E.g.,
|
||||
|
||||
```python
|
||||
def custom_metric(
|
||||
X_val, y_val, estimator, labels,
|
||||
X_train, y_train, weight_val=None, weight_train=None,
|
||||
*args,
|
||||
):
|
||||
from sklearn.metrics import log_loss
|
||||
import time
|
||||
```python
|
||||
def custom_metric(
|
||||
X_val, y_val, estimator, labels,
|
||||
X_train, y_train, weight_val=None, weight_train=None,
|
||||
*args,
|
||||
):
|
||||
from sklearn.metrics import log_loss
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
y_pred = estimator.predict_proba(X_val)
|
||||
pred_time = (time.time() - start) / len(X_val)
|
||||
val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
|
||||
y_pred = estimator.predict_proba(X_train)
|
||||
train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
|
||||
alpha = 0.5
|
||||
return val_loss * (1 + alpha) - alpha * train_loss, {
|
||||
"val_loss": val_loss,
|
||||
"train_loss": train_loss,
|
||||
"pred_time": pred_time,
|
||||
}
|
||||
```
|
||||
task: A string of the task type, e.g.,
|
||||
'classification', 'regression', 'ts_forecast', 'rank',
|
||||
'seq-classification', 'seq-regression', 'summarization'.
|
||||
n_jobs: An integer of the number of threads for training | default=-1.
|
||||
Use all available resources when n_jobs == -1.
|
||||
log_file_name: A string of the log file name | default="". To disable logging,
|
||||
set it to be an empty string "".
|
||||
estimator_list: A list of strings for estimator names, or 'auto'
|
||||
e.g., ```['lgbm', 'xgboost', 'xgb_limitdepth', 'catboost', 'rf', 'extra_tree']```
|
||||
time_budget: A float number of the time budget in seconds.
|
||||
Use -1 if no time limit.
|
||||
max_iter: An integer of the maximal number of iterations.
|
||||
sample: A boolean of whether to sample the training data during
|
||||
search.
|
||||
start = time.time()
|
||||
y_pred = estimator.predict_proba(X_val)
|
||||
pred_time = (time.time() - start) / len(X_val)
|
||||
val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
|
||||
y_pred = estimator.predict_proba(X_train)
|
||||
train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
|
||||
alpha = 0.5
|
||||
return val_loss * (1 + alpha) - alpha * train_loss, {
|
||||
"val_loss": val_loss,
|
||||
"train_loss": train_loss,
|
||||
"pred_time": pred_time,
|
||||
}
|
||||
```
|
||||
task: A string of the task type, e.g.,
|
||||
'classification', 'regression', 'ts_forecast', 'rank',
|
||||
'seq-classification', 'seq-regression', 'summarization'.
|
||||
n_jobs: An integer of the number of threads for training | default=-1.
|
||||
Use all available resources when n_jobs == -1.
|
||||
log_file_name: A string of the log file name | default="". To disable logging,
|
||||
set it to be an empty string "".
|
||||
estimator_list: A list of strings for estimator names, or 'auto'.
|
||||
e.g., ```['lgbm', 'xgboost', 'xgb_limitdepth', 'catboost', 'rf', 'extra_tree']```.
|
||||
time_budget: A float number of the time budget in seconds.
|
||||
Use -1 if no time limit.
|
||||
max_iter: An integer of the maximal number of iterations.
|
||||
sample: A boolean of whether to sample the training data during
|
||||
search.
|
||||
ensemble: boolean or dict | default=False. Whether to perform
|
||||
ensemble after search. Can be a dict with keys 'passthrough'
|
||||
and 'final_estimator' to specify the passthrough and
|
||||
final_estimator in the stacker. The dict can also contain
|
||||
'n_jobs' as the key to specify the number of jobs for the stacker.
|
||||
eval_method: A string of resampling strategy, one of
|
||||
['auto', 'cv', 'holdout'].
|
||||
split_ratio: A float of the valiation data percentage for holdout.
|
||||
n_splits: An integer of the number of folds for cross - validation.
|
||||
log_type: A string of the log type, one of
|
||||
['better', 'all'].
|
||||
'better' only logs configs with better loss than previos iters
|
||||
'all' logs all the tried configs.
|
||||
model_history: A boolean of whether to keep the best
|
||||
model per estimator. Make sure memory is large enough if setting to True.
|
||||
log_training_metric: A boolean of whether to log the training
|
||||
metric for each model.
|
||||
mem_thres: A float of the memory size constraint in bytes.
|
||||
pred_time_limit: A float of the prediction latency constraint in seconds.
|
||||
It refers to the average prediction time per row in validation data.
|
||||
train_time_limit: A float of the training time constraint in seconds.
|
||||
verbose: int, default=3 | Controls the verbosity, higher means more
|
||||
messages.
|
||||
retrain_full: bool or str, default=True | whether to retrain the
|
||||
selected model on the full training data when using holdout.
|
||||
True - retrain only after search finishes; False - no retraining;
|
||||
'budget' - do best effort to retrain without violating the time
|
||||
budget.
|
||||
split_type: str or splitter object, default="auto" | the data split type.
|
||||
* A valid splitter object is an instance of a derived class of scikit-learn
|
||||
[KFold](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold)
|
||||
and have ``split`` and ``get_n_splits`` methods with the same signatures.
|
||||
Set eval_method to "cv" to use the splitter object.
|
||||
* Valid str options depend on different tasks.
|
||||
For classification tasks, valid choices are
|
||||
["auto", 'stratified', 'uniform', 'time', 'group']. "auto" -> stratified.
|
||||
For regression tasks, valid choices are ["auto", 'uniform', 'time'].
|
||||
"auto" -> uniform.
|
||||
For ts_forecast tasks, must be "auto" or 'time'.
|
||||
For ranking task, must be "auto" or 'group'.
|
||||
hpo_method: str, default="auto" | The hyperparameter
|
||||
optimization method. By default, CFO is used for sequential
|
||||
search and BlendSearch is used for parallel search.
|
||||
No need to set when using flaml's default search space or using
|
||||
a simple customized search space. When set to 'bs', BlendSearch
|
||||
is used. BlendSearch can be tried when the search space is
|
||||
complex, for example, containing multiple disjoint, discontinuous
|
||||
subspaces. When set to 'random', random search is used.
|
||||
starting_points: A dictionary or a str to specify the starting hyperparameter
|
||||
config for the estimators | default="static".
|
||||
If str:
|
||||
- if "data", use data-dependent defaults;
|
||||
- if "data:path" use data-dependent defaults which are stored at path;
|
||||
- if "static", use data-independent defaults.
|
||||
If dict, keys are the name of the estimators, and values are the starting
|
||||
hyperparamter configurations for the corresponding estimators.
|
||||
The value can be a single hyperparamter configuration dict or a list
|
||||
of hyperparamter configuration dicts.
|
||||
In the following code example, we get starting_points from the
|
||||
`automl` object and use them in the `new_automl` object.
|
||||
e.g.,
|
||||
eval_method: A string of resampling strategy, one of
|
||||
['auto', 'cv', 'holdout'].
|
||||
split_ratio: A float of the valiation data percentage for holdout.
|
||||
n_splits: An integer of the number of folds for cross - validation.
|
||||
log_type: A string of the log type, one of
|
||||
['better', 'all'].
|
||||
'better' only logs configs with better loss than previos iters
|
||||
'all' logs all the tried configs.
|
||||
model_history: A boolean of whether to keep the best
|
||||
model per estimator. Make sure memory is large enough if setting to True.
|
||||
log_training_metric: A boolean of whether to log the training
|
||||
metric for each model.
|
||||
mem_thres: A float of the memory size constraint in bytes.
|
||||
pred_time_limit: A float of the prediction latency constraint in seconds.
|
||||
It refers to the average prediction time per row in validation data.
|
||||
train_time_limit: A float of the training time constraint in seconds.
|
||||
verbose: int, default=3 | Controls the verbosity, higher means more
|
||||
messages.
|
||||
retrain_full: bool or str, default=True | whether to retrain the
|
||||
selected model on the full training data when using holdout.
|
||||
True - retrain only after search finishes; False - no retraining;
|
||||
'budget' - do best effort to retrain without violating the time
|
||||
budget.
|
||||
split_type: str or splitter object, default="auto" | the data split type.
|
||||
* A valid splitter object is an instance of a derived class of scikit-learn
|
||||
[KFold](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold)
|
||||
and have ``split`` and ``get_n_splits`` methods with the same signatures.
|
||||
Set eval_method to "cv" to use the splitter object.
|
||||
* Valid str options depend on different tasks.
|
||||
For classification tasks, valid choices are
|
||||
["auto", 'stratified', 'uniform', 'time', 'group']. "auto" -> stratified.
|
||||
For regression tasks, valid choices are ["auto", 'uniform', 'time'].
|
||||
"auto" -> uniform.
|
||||
For ts_forecast tasks, must be "auto" or 'time'.
|
||||
For ranking task, must be "auto" or 'group'.
|
||||
hpo_method: str, default="auto" | The hyperparameter
|
||||
optimization method. By default, CFO is used for sequential
|
||||
search and BlendSearch is used for parallel search.
|
||||
No need to set when using flaml's default search space or using
|
||||
a simple customized search space. When set to 'bs', BlendSearch
|
||||
is used. BlendSearch can be tried when the search space is
|
||||
complex, for example, containing multiple disjoint, discontinuous
|
||||
subspaces. When set to 'random', random search is used.
|
||||
starting_points: A dictionary or a str to specify the starting hyperparameter
|
||||
config for the estimators | default="static".
|
||||
If str:
|
||||
- if "data", use data-dependent defaults;
|
||||
- if "data:path" use data-dependent defaults which are stored at path;
|
||||
- if "static", use data-independent defaults.
|
||||
If dict, keys are the name of the estimators, and values are the starting
|
||||
hyperparamter configurations for the corresponding estimators.
|
||||
The value can be a single hyperparamter configuration dict or a list
|
||||
of hyperparamter configuration dicts.
|
||||
In the following code example, we get starting_points from the
|
||||
`automl` object and use them in the `new_automl` object.
|
||||
e.g.,
|
||||
|
||||
```python
|
||||
from flaml import AutoML
|
||||
automl = AutoML()
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl.fit(X_train, y_train)
|
||||
starting_points = automl.best_config_per_estimator
|
||||
```python
|
||||
from flaml import AutoML
|
||||
automl = AutoML()
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl.fit(X_train, y_train)
|
||||
starting_points = automl.best_config_per_estimator
|
||||
|
||||
new_automl = AutoML()
|
||||
new_automl.fit(X_train, y_train, starting_points=starting_points)
|
||||
```
|
||||
new_automl = AutoML()
|
||||
new_automl.fit(X_train, y_train, starting_points=starting_points)
|
||||
```
|
||||
|
||||
seed: int or None, default=None | The random seed for hpo.
|
||||
n_concurrent_trials: [Experimental] int, default=1 | The number of
|
||||
concurrent trials. When n_concurrent_trials > 1, flaml performes
|
||||
[parallel tuning](https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#parallel-tuning)
|
||||
and installation of ray is required: `pip install flaml[ray]`.
|
||||
keep_search_state: boolean, default=False | Whether to keep data needed
|
||||
for model search after fit(). By default the state is deleted for
|
||||
space saving.
|
||||
early_stop: boolean, default=False | Whether to stop early if the
|
||||
search is considered to converge.
|
||||
append_log: boolean, default=False | Whetehr to directly append the log
|
||||
records to the input log file if it exists.
|
||||
auto_augment: boolean, default=True | Whether to automatically
|
||||
augment rare classes.
|
||||
min_sample_size: int, default=MIN_SAMPLE_TRAIN | the minimal sample
|
||||
size when sample=True.
|
||||
use_ray: boolean, default=False | Whether to use ray to run the training
|
||||
in separate processes. This can be used to prevent OOM for large
|
||||
datasets, but will incur more overhead in time. Only use it if
|
||||
you run into OOM failures.
|
||||
metric_constraints: list, default=[] | The list of metric constraints.
|
||||
Each element in this list is a 3-tuple, which shall be expressed
|
||||
in the following format: the first element of the 3-tuple is the name of the
|
||||
metric, the second element is the inequality sign chosen from ">=" and "<=",
|
||||
and the third element is the constraint value. E.g., `('val_loss', '<=', 0.1)`.
|
||||
Note that all the metric names in metric_constraints need to be reported via
|
||||
the metrics_to_log dictionary returned by a customized metric function.
|
||||
The customized metric function shall be provided via the `metric` key word
|
||||
argument of the fit() function or the automl constructor.
|
||||
Find an example in the 4th constraint type in this [doc](https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#constraint).
|
||||
If `pred_time_limit` is provided as one of keyword arguments to fit() function or
|
||||
the automl constructor, flaml will automatically (and under the hood)
|
||||
add it as an additional element in the metric_constraints. Essentially 'pred_time_limit'
|
||||
specifies a constraint about the prediction latency constraint in seconds.
|
||||
custom_hp: dict, default=None | The custom search space specified by user
|
||||
Each key is the estimator name, each value is a dict of the custom search space for that estimator. Notice the
|
||||
domain of the custom search space can either be a value of a sample.Domain object.
|
||||
e.g.,
|
||||
seed: int or None, default=None | The random seed for hpo.
|
||||
n_concurrent_trials: [Experimental] int, default=1 | The number of
|
||||
concurrent trials. When n_concurrent_trials > 1, flaml performes
|
||||
[parallel tuning](https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#parallel-tuning)
|
||||
and installation of ray is required: `pip install flaml[ray]`.
|
||||
keep_search_state: boolean, default=False | Whether to keep data needed
|
||||
for model search after fit(). By default the state is deleted for
|
||||
space saving.
|
||||
early_stop: boolean, default=False | Whether to stop early if the
|
||||
search is considered to converge.
|
||||
append_log: boolean, default=False | Whetehr to directly append the log
|
||||
records to the input log file if it exists.
|
||||
auto_augment: boolean, default=True | Whether to automatically
|
||||
augment rare classes.
|
||||
min_sample_size: int, default=MIN_SAMPLE_TRAIN | the minimal sample
|
||||
size when sample=True.
|
||||
use_ray: boolean, default=False | Whether to use ray to run the training
|
||||
in separate processes. This can be used to prevent OOM for large
|
||||
datasets, but will incur more overhead in time. Only use it if
|
||||
you run into OOM failures.
|
||||
metric_constraints: list, default=[] | The list of metric constraints.
|
||||
Each element in this list is a 3-tuple, which shall be expressed
|
||||
in the following format: the first element of the 3-tuple is the name of the
|
||||
metric, the second element is the inequality sign chosen from ">=" and "<=",
|
||||
and the third element is the constraint value. E.g., `('val_loss', '<=', 0.1)`.
|
||||
Note that all the metric names in metric_constraints need to be reported via
|
||||
the metrics_to_log dictionary returned by a customized metric function.
|
||||
The customized metric function shall be provided via the `metric` key word
|
||||
argument of the fit() function or the automl constructor.
|
||||
Find an example in the 4th constraint type in this [doc](https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#constraint).
|
||||
If `pred_time_limit` is provided as one of keyword arguments to fit() function or
|
||||
the automl constructor, flaml will automatically (and under the hood)
|
||||
add it as an additional element in the metric_constraints. Essentially 'pred_time_limit'
|
||||
specifies a constraint about the prediction latency constraint in seconds.
|
||||
custom_hp: dict, default=None | The custom search space specified by user
|
||||
Each key is the estimator name, each value is a dict of the custom search space for that estimator. Notice the
|
||||
domain of the custom search space can either be a value of a sample.Domain object.
|
||||
e.g.,
|
||||
|
||||
```python
|
||||
custom_hp = {
|
||||
@@ -657,26 +660,27 @@ class AutoML(BaseEstimator):
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
fit_kwargs_by_estimator: dict, default=None | The user specified keywords arguments, grouped by estimator name.
|
||||
e.g.,
|
||||
```
|
||||
fit_kwargs_by_estimator: dict, default=None | The user specified keywords arguments, grouped by estimator name.
|
||||
e.g.,
|
||||
|
||||
```python
|
||||
fit_kwargs_by_estimator = {
|
||||
"transformer": {
|
||||
"output_dir": "test/data/output/",
|
||||
"ckpt_per_epoch": 1,
|
||||
"fp16": False,
|
||||
}
|
||||
}
|
||||
```
|
||||
```python
|
||||
fit_kwargs_by_estimator = {
|
||||
"transformer": {
|
||||
"output_dir": "test/data/output/",
|
||||
"ckpt_per_epoch": 1,
|
||||
"fp16": False,
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
"""
|
||||
self._track_iter = 0
|
||||
self._state = AutoMLState()
|
||||
self._state.learner_classes = {}
|
||||
self._settings = settings
|
||||
settings["time_budget"] = settings.get("time_budget", 60)
|
||||
# no budget by default
|
||||
settings["time_budget"] = settings.get("time_budget", -1)
|
||||
settings["task"] = settings.get("task", "classification")
|
||||
settings["n_jobs"] = settings.get("n_jobs", -1)
|
||||
settings["eval_method"] = settings.get("eval_method", "auto")
|
||||
@@ -686,7 +690,7 @@ class AutoML(BaseEstimator):
|
||||
settings["metric"] = settings.get("metric", "auto")
|
||||
settings["estimator_list"] = settings.get("estimator_list", "auto")
|
||||
settings["log_file_name"] = settings.get("log_file_name", "")
|
||||
settings["max_iter"] = settings.get("max_iter", 1000000)
|
||||
settings["max_iter"] = settings.get("max_iter") # no budget by default
|
||||
settings["sample"] = settings.get("sample", True)
|
||||
settings["ensemble"] = settings.get("ensemble", False)
|
||||
settings["log_type"] = settings.get("log_type", "better")
|
||||
@@ -1558,6 +1562,7 @@ class AutoML(BaseEstimator):
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
fit_kwargs_by_estimator: dict, default=None | The user specified keywords arguments, grouped by estimator name.
|
||||
e.g.,
|
||||
|
||||
@@ -2060,17 +2065,18 @@ class AutoML(BaseEstimator):
|
||||
task: A string of the task type, e.g.,
|
||||
'classification', 'regression', 'ts_forecast_regression',
|
||||
'ts_forecast_classification', 'rank', 'seq-classification',
|
||||
'seq-regression', 'summarization'
|
||||
'seq-regression', 'summarization'.
|
||||
n_jobs: An integer of the number of threads for training | default=-1.
|
||||
Use all available resources when n_jobs == -1.
|
||||
log_file_name: A string of the log file name | default="". To disable logging,
|
||||
set it to be an empty string "".
|
||||
estimator_list: A list of strings for estimator names, or 'auto'
|
||||
e.g., ```['lgbm', 'xgboost', 'xgb_limitdepth', 'catboost', 'rf', 'extra_tree']```
|
||||
|
||||
estimator_list: A list of strings for estimator names, or 'auto'.
|
||||
e.g., ```['lgbm', 'xgboost', 'xgb_limitdepth', 'catboost', 'rf', 'extra_tree']```.
|
||||
time_budget: A float number of the time budget in seconds.
|
||||
Use -1 if no time limit.
|
||||
max_iter: An integer of the maximal number of iterations.
|
||||
NOTE: when both time_budget and max_iter are unspecified,
|
||||
only one model will be trained per estimator.
|
||||
sample: A boolean of whether to sample the training data during
|
||||
search.
|
||||
ensemble: boolean or dict | default=False. Whether to perform
|
||||
@@ -2251,7 +2257,9 @@ class AutoML(BaseEstimator):
|
||||
else log_file_name
|
||||
)
|
||||
max_iter = self._settings.get("max_iter") if max_iter is None else max_iter
|
||||
sample = self._settings.get("sample") if sample is None else sample
|
||||
sample_is_none = sample is None
|
||||
if sample_is_none:
|
||||
sample = self._settings.get("sample")
|
||||
ensemble = self._settings.get("ensemble") if ensemble is None else ensemble
|
||||
log_type = log_type or self._settings.get("log_type")
|
||||
model_history = (
|
||||
@@ -2279,11 +2287,9 @@ class AutoML(BaseEstimator):
|
||||
split_type = split_type or self._settings.get("split_type")
|
||||
hpo_method = hpo_method or self._settings.get("hpo_method")
|
||||
learner_selector = learner_selector or self._settings.get("learner_selector")
|
||||
starting_points = (
|
||||
self._settings.get("starting_points")
|
||||
if starting_points is None
|
||||
else starting_points
|
||||
)
|
||||
no_starting_points = starting_points is None
|
||||
if no_starting_points:
|
||||
starting_points = self._settings.get("starting_points")
|
||||
n_concurrent_trials = n_concurrent_trials or self._settings.get(
|
||||
"n_concurrent_trials"
|
||||
)
|
||||
@@ -2295,6 +2301,8 @@ class AutoML(BaseEstimator):
|
||||
early_stop = (
|
||||
self._settings.get("early_stop") if early_stop is None else early_stop
|
||||
)
|
||||
# no search budget is provided?
|
||||
no_budget = time_budget == -1 and max_iter is None and not early_stop
|
||||
append_log = (
|
||||
self._settings.get("append_log") if append_log is None else append_log
|
||||
)
|
||||
@@ -2373,14 +2381,6 @@ class AutoML(BaseEstimator):
|
||||
self._retrain_in_budget = retrain_full == "budget" and (
|
||||
eval_method == "holdout" and self._state.X_val is None
|
||||
)
|
||||
self._state.retrain_final = (
|
||||
retrain_full is True
|
||||
and eval_method == "holdout"
|
||||
and (self._state.X_val is None or self._use_ray is not False)
|
||||
or eval_method == "cv"
|
||||
and (max_iter > 0 or retrain_full is True)
|
||||
or max_iter == 1
|
||||
)
|
||||
self._auto_augment = auto_augment
|
||||
self._min_sample_size = min_sample_size
|
||||
self._prepare_data(eval_method, split_ratio, n_splits)
|
||||
@@ -2485,7 +2485,32 @@ class AutoML(BaseEstimator):
|
||||
estimator_list += ["arima", "sarimax"]
|
||||
elif "regression" != self._state.task:
|
||||
estimator_list += ["lrl1"]
|
||||
|
||||
# When no search budget is specified
|
||||
if no_budget:
|
||||
max_iter = len(estimator_list)
|
||||
self._learner_selector = "roundrobin"
|
||||
if sample_is_none:
|
||||
self._sample = False
|
||||
if no_starting_points:
|
||||
starting_points = "data"
|
||||
logger.warning(
|
||||
"No search budget is provided via time_budget or max_iter."
|
||||
" Training only one model per estimator."
|
||||
" To tune hyperparameters for each estimator,"
|
||||
" please provide budget either via time_budget or max_iter."
|
||||
)
|
||||
elif max_iter is None:
|
||||
# set to a large number
|
||||
max_iter = 1000000
|
||||
self._state.retrain_final = (
|
||||
retrain_full is True
|
||||
and eval_method == "holdout"
|
||||
and (X_val is None or self._use_ray is not False)
|
||||
or eval_method == "cv"
|
||||
and (max_iter > 0 or retrain_full is True)
|
||||
or max_iter == 1
|
||||
)
|
||||
# add custom learner
|
||||
for estimator_name in estimator_list:
|
||||
if estimator_name not in self._state.learner_classes:
|
||||
self.add_learner(
|
||||
|
||||
@@ -25,9 +25,14 @@ def meta_feature(task, X_train, y_train, meta_feature_names):
|
||||
elif each_feature_name == "NumberOfClasses":
|
||||
this_feature.append(len(np.unique(y_train)) if is_classification else 0)
|
||||
elif each_feature_name == "PercentageOfNumericFeatures":
|
||||
this_feature.append(
|
||||
X_train.select_dtypes(include=np.number).shape[1] / n_feat
|
||||
)
|
||||
try:
|
||||
# this is feature is only supported for dataframe
|
||||
this_feature.append(
|
||||
X_train.select_dtypes(include=np.number).shape[1] / n_feat
|
||||
)
|
||||
except AttributeError:
|
||||
# 'numpy.ndarray' object has no attribute 'select_dtypes'
|
||||
this_feature.append(1) # all features are numeric
|
||||
else:
|
||||
raise ValueError("Feature {} not implemented. ".format(each_feature_name))
|
||||
|
||||
|
||||
@@ -547,7 +547,6 @@ class TransformersEstimator(BaseEstimator):
|
||||
add_prefix_space=True
|
||||
if "roberta" in self._training_args.model_path
|
||||
else False, # If roberta model, must set add_prefix_space to True to avoid the assertion error at
|
||||
|
||||
# https://github.com/huggingface/transformers/blob/main/src/transformers/models/roberta/tokenization_roberta_fast.py#L249
|
||||
)
|
||||
|
||||
@@ -956,10 +955,6 @@ class LGBMEstimator(BaseEstimator):
|
||||
"domain": tune.loguniform(lower=1 / 1024, upper=1.0),
|
||||
"init_value": 0.1,
|
||||
},
|
||||
# 'subsample': {
|
||||
# 'domain': tune.uniform(lower=0.1, upper=1.0),
|
||||
# 'init_value': 1.0,
|
||||
# },
|
||||
"log_max_bin": { # log transformed with base 2
|
||||
"domain": tune.lograndint(lower=3, upper=11),
|
||||
"init_value": 8,
|
||||
|
||||
@@ -311,25 +311,6 @@ class BlendSearch(Searcher):
|
||||
)
|
||||
self._gs_admissible_min = self._ls_bound_min.copy()
|
||||
self._gs_admissible_max = self._ls_bound_max.copy()
|
||||
# config_signature: tuple -> result: Dict
|
||||
self._result = (
|
||||
{
|
||||
self._ls.config_signature(
|
||||
*self._ls.complete_config(
|
||||
self._evaluated_points[i],
|
||||
self._ls_bound_min,
|
||||
self._ls_bound_max,
|
||||
)
|
||||
): {
|
||||
self._metric: r,
|
||||
self.cost_attr: 1,
|
||||
"config": self._evaluated_points[i],
|
||||
}
|
||||
for i, r in enumerate(self._all_rewards)
|
||||
}
|
||||
if self._evaluated_rewards # store all the evaluated rewards
|
||||
else {}
|
||||
)
|
||||
|
||||
if self._metric_constraints:
|
||||
self._metric_constraint_satisfied = False
|
||||
@@ -340,6 +321,14 @@ class BlendSearch(Searcher):
|
||||
self._metric_constraint_satisfied = True
|
||||
self._metric_constraint_penalty = None
|
||||
self.best_resource = self._ls.min_resource
|
||||
i = 0
|
||||
# config_signature: tuple -> result: Dict
|
||||
self._result = {}
|
||||
while self._evaluated_rewards:
|
||||
# go over the evaluated rewards
|
||||
trial_id = f"trial_for_evaluated_{i}"
|
||||
self.suggest(trial_id)
|
||||
i += 1
|
||||
|
||||
def save(self, checkpoint_path: str):
|
||||
"""save states to a checkpoint path."""
|
||||
@@ -747,8 +736,8 @@ class BlendSearch(Searcher):
|
||||
self._search_thread_pool[0].running += 1
|
||||
self._subspace[trial_id] = space
|
||||
if reward is not None:
|
||||
# result = {self._metric: reward, self.cost_attr: 1, "config": config}
|
||||
result = self._result[config_signature]
|
||||
result = {self._metric: reward, self.cost_attr: 1, "config": config}
|
||||
# result = self._result[config_signature]
|
||||
self.on_trial_complete(trial_id, result)
|
||||
return None
|
||||
if self._use_incumbent_result_in_evaluation:
|
||||
|
||||
@@ -58,10 +58,6 @@ class FLOW2(Searcher):
|
||||
metric: A string of the metric name to optimize for.
|
||||
mode: A string in ['min', 'max'] to specify the objective as
|
||||
minimization or maximization.
|
||||
cat_hp_cost: A dictionary from a subset of categorical dimensions
|
||||
to the relative cost of each choice.
|
||||
E.g., ```{'tree_method': [1, 1, 2]}```. I.e., the relative cost
|
||||
of the three choices of 'tree_method' is 1, 1 and 2 respectively.
|
||||
space: A dictionary to specify the search space.
|
||||
resource_attr: A string to specify the resource dimension and the best
|
||||
performance is assumed to be at the max_resource.
|
||||
|
||||
@@ -371,7 +371,7 @@ def run(
|
||||
)
|
||||
else:
|
||||
if metric is None or mode is None:
|
||||
metric = metric or search_alg.metric
|
||||
metric = metric or search_alg.metric or DEFAULT_METRIC
|
||||
mode = mode or search_alg.mode
|
||||
if ray_import:
|
||||
from ray.tune.suggest import ConcurrencyLimiter
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "1.0.2"
|
||||
__version__ = "1.0.3"
|
||||
|
||||
7
setup.py
7
setup.py
@@ -14,7 +14,7 @@ with open(os.path.join(here, "flaml/version.py")) as fp:
|
||||
__version__ = version["__version__"]
|
||||
|
||||
install_requires = [
|
||||
"NumPy>=1.16.2",
|
||||
"NumPy>=1.17.0rc1",
|
||||
"lightgbm>=2.3.1",
|
||||
"xgboost>=0.90,<=1.3.3",
|
||||
"scipy>=1.4.1",
|
||||
@@ -33,6 +33,10 @@ setuptools.setup(
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://github.com/microsoft/FLAML",
|
||||
packages=setuptools.find_packages(include=["flaml*"]),
|
||||
package_data={
|
||||
"flaml.default": ["*/*.json"],
|
||||
},
|
||||
include_package_data=True,
|
||||
install_requires=install_requires,
|
||||
extras_require={
|
||||
"notebook": [
|
||||
@@ -62,6 +66,7 @@ setuptools.setup(
|
||||
"rouge_score",
|
||||
"hcrystalball==0.1.10",
|
||||
"seqeval",
|
||||
"protobuf<4", # to prevent TypeError in ray
|
||||
],
|
||||
"catboost": ["catboost>=0.26"],
|
||||
"blendsearch": ["optuna==2.8.0"],
|
||||
|
||||
66
test/automl/test_custom_hp.py
Normal file
66
test/automl/test_custom_hp.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import sys
|
||||
import pytest
|
||||
from flaml import AutoML, tune
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
||||
def test_custom_hp_nlp():
|
||||
from test.nlp.utils import get_toy_data_seqclassification, get_automl_settings
|
||||
|
||||
X_train, y_train, X_val, y_val, X_test = get_toy_data_seqclassification()
|
||||
|
||||
automl = AutoML()
|
||||
|
||||
automl_settings = get_automl_settings()
|
||||
automl_settings["custom_hp"] = None
|
||||
automl_settings["custom_hp"] = {
|
||||
"transformer": {
|
||||
"model_path": {
|
||||
"domain": tune.choice(["google/electra-small-discriminator"]),
|
||||
},
|
||||
"num_train_epochs": {"domain": 3},
|
||||
}
|
||||
}
|
||||
automl_settings["fit_kwargs_by_estimator"] = {
|
||||
"transformer": {
|
||||
"output_dir": "test/data/output/",
|
||||
"ckpt_per_epoch": 1,
|
||||
"fp16": False,
|
||||
}
|
||||
}
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
|
||||
|
||||
def test_custom_hp():
|
||||
from sklearn.datasets import load_iris
|
||||
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl = AutoML()
|
||||
custom_hp = {
|
||||
"xgboost": {
|
||||
"n_estimators": {
|
||||
"domain": tune.lograndint(lower=1, upper=100),
|
||||
"low_cost_init_value": 1,
|
||||
},
|
||||
},
|
||||
"rf": {
|
||||
"max_leaves": {
|
||||
"domain": None, # disable search
|
||||
},
|
||||
},
|
||||
"lgbm": {
|
||||
"subsample": {
|
||||
"domain": tune.uniform(lower=0.1, upper=1.0),
|
||||
"init_value": 1.0,
|
||||
},
|
||||
"subsample_freq": {
|
||||
"domain": 1, # subsample_freq must > 0 to enable subsample
|
||||
},
|
||||
},
|
||||
}
|
||||
automl.fit(X_train, y_train, custom_hp=custom_hp, time_budget=2)
|
||||
print(automl.best_config_per_estimator)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_custom_hp()
|
||||
@@ -167,9 +167,8 @@ def test_multivariate_forecast_num(budget=5):
|
||||
split_idx = num_samples - time_horizon
|
||||
train_df = df[:split_idx]
|
||||
test_df = df[split_idx:]
|
||||
X_test = test_df[
|
||||
["timeStamp", "temp", "precip"]
|
||||
] # test dataframe must contain values for the regressors / multivariate variables
|
||||
# test dataframe must contain values for the regressors / multivariate variables
|
||||
X_test = test_df[["timeStamp", "temp", "precip"]]
|
||||
y_test = test_df["demand"]
|
||||
# return
|
||||
automl = AutoML()
|
||||
|
||||
@@ -48,6 +48,7 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
|
||||
"Training duration of best run: {0:.4g} s".format(automl.best_config_train_time)
|
||||
)
|
||||
print(automl.model.estimator)
|
||||
print(automl.best_config_per_estimator)
|
||||
print("time taken to find best model:", automl.time_to_find_best_model)
|
||||
""" pickle and save the automl object """
|
||||
import pickle
|
||||
@@ -92,6 +93,11 @@ def test_automl_array():
|
||||
test_automl(5, "array", "bs")
|
||||
|
||||
|
||||
def _test_nobudget():
|
||||
# needs large RAM to run this test
|
||||
test_automl(-1)
|
||||
|
||||
|
||||
def test_mlflow():
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
@@ -8,7 +8,7 @@ from flaml import tune
|
||||
|
||||
class TestWarmStart(unittest.TestCase):
|
||||
def test_fit_w_freezinghp_starting_point(self, as_frame=True):
|
||||
automl_experiment = AutoML()
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 1,
|
||||
"metric": "accuracy",
|
||||
@@ -24,20 +24,20 @@ class TestWarmStart(unittest.TestCase):
|
||||
# test drop column
|
||||
X_train.columns = range(X_train.shape[1])
|
||||
X_train[X_train.shape[1]] = np.zeros(len(y_train))
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
automl_val_accuracy = 1.0 - automl_experiment.best_loss
|
||||
print("Best ML leaner:", automl_experiment.best_estimator)
|
||||
print("Best hyperparmeter config:", automl_experiment.best_config)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
automl_val_accuracy = 1.0 - automl.best_loss
|
||||
print("Best ML leaner:", automl.best_estimator)
|
||||
print("Best hyperparmeter config:", automl.best_config)
|
||||
print("Best accuracy on validation data: {0:.4g}".format(automl_val_accuracy))
|
||||
print(
|
||||
"Training duration of best run: {0:.4g} s".format(
|
||||
automl_experiment.best_config_train_time
|
||||
automl.best_config_train_time
|
||||
)
|
||||
)
|
||||
# 1. Get starting points from previous experiments.
|
||||
starting_points = automl_experiment.best_config_per_estimator
|
||||
starting_points = automl.best_config_per_estimator
|
||||
print("starting_points", starting_points)
|
||||
print("loss of the starting_points", automl_experiment.best_loss_per_estimator)
|
||||
print("loss of the starting_points", automl.best_loss_per_estimator)
|
||||
starting_point = starting_points["lgbm"]
|
||||
hps_to_freeze = ["colsample_bytree", "reg_alpha", "reg_lambda", "log_max_bin"]
|
||||
|
||||
@@ -85,8 +85,8 @@ class TestWarmStart(unittest.TestCase):
|
||||
return space
|
||||
|
||||
new_estimator_name = "large_lgbm"
|
||||
new_automl_experiment = AutoML()
|
||||
new_automl_experiment.add_learner(
|
||||
new_automl = AutoML()
|
||||
new_automl.add_learner(
|
||||
learner_name=new_estimator_name, learner_class=MyPartiallyFreezedLargeLGBM
|
||||
)
|
||||
|
||||
@@ -103,22 +103,26 @@ class TestWarmStart(unittest.TestCase):
|
||||
"starting_points": {new_estimator_name: starting_point},
|
||||
}
|
||||
|
||||
new_automl_experiment.fit(
|
||||
X_train=X_train, y_train=y_train, **automl_settings_resume
|
||||
)
|
||||
new_automl.fit(X_train=X_train, y_train=y_train, **automl_settings_resume)
|
||||
|
||||
new_automl_val_accuracy = 1.0 - new_automl_experiment.best_loss
|
||||
print("Best ML leaner:", new_automl_experiment.best_estimator)
|
||||
print("Best hyperparmeter config:", new_automl_experiment.best_config)
|
||||
new_automl_val_accuracy = 1.0 - new_automl.best_loss
|
||||
print("Best ML leaner:", new_automl.best_estimator)
|
||||
print("Best hyperparmeter config:", new_automl.best_config)
|
||||
print(
|
||||
"Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy)
|
||||
)
|
||||
print(
|
||||
"Training duration of best run: {0:.4g} s".format(
|
||||
new_automl_experiment.best_config_train_time
|
||||
new_automl.best_config_train_time
|
||||
)
|
||||
)
|
||||
|
||||
def test_nobudget(self):
|
||||
automl = AutoML()
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl.fit(X_train, y_train)
|
||||
print(automl.best_config_per_estimator)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
import sys
|
||||
import pytest
|
||||
from utils import get_toy_data_seqclassification, get_automl_settings
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
||||
def test_custom_hp_nlp():
|
||||
from flaml import AutoML
|
||||
import flaml
|
||||
|
||||
X_train, y_train, X_val, y_val, X_test = get_toy_data_seqclassification()
|
||||
|
||||
automl = AutoML()
|
||||
|
||||
automl_settings = get_automl_settings()
|
||||
automl_settings["custom_hp"] = None
|
||||
automl_settings["custom_hp"] = {
|
||||
"transformer": {
|
||||
"model_path": {
|
||||
"domain": flaml.tune.choice(["google/electra-small-discriminator"]),
|
||||
},
|
||||
"num_train_epochs": {"domain": 3},
|
||||
}
|
||||
}
|
||||
automl_settings["fit_kwargs_by_estimator"] = {
|
||||
"transformer": {
|
||||
"output_dir": "test/data/output/",
|
||||
"ckpt_per_epoch": 1,
|
||||
"fp16": False,
|
||||
}
|
||||
}
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_custom_hp_nlp()
|
||||
@@ -19,7 +19,7 @@ class AbstractWarmStartTest:
|
||||
# ray.shutdown()
|
||||
|
||||
def set_basic_conf(self):
|
||||
raise NotImplementedError()
|
||||
raise NotImplementedError
|
||||
|
||||
def run_part_from_scratch(self):
|
||||
np.random.seed(162)
|
||||
@@ -36,7 +36,6 @@ class AbstractWarmStartTest:
|
||||
search_alg2, cost = self.set_basic_conf()
|
||||
search_alg2 = ConcurrencyLimiter(search_alg2, 1)
|
||||
search_alg2.restore(checkpoint_path)
|
||||
np.random.set_state(random_state)
|
||||
return tune.run(cost, num_samples=5, search_alg=search_alg2, verbose=0)
|
||||
|
||||
def run_full(self):
|
||||
@@ -82,16 +81,16 @@ class CFOWarmStartTest(AbstractWarmStartTest, unittest.TestCase):
|
||||
return search_alg, cost
|
||||
|
||||
|
||||
# # # Not doing test for BS because of problems with random seed in OptunaSearch
|
||||
# class BlendsearchWarmStartTest(AbstractWarmStartTest, unittest.TestCase):
|
||||
# def set_basic_conf(self):
|
||||
# from flaml import BlendSearch
|
||||
# space = {
|
||||
# "height": tune.uniform(-100, 100),
|
||||
# "width": tune.randint(0, 100),
|
||||
# }
|
||||
|
||||
# def cost(param):
|
||||
# tune.report(loss=(param["height"] - 14)**2 - abs(param["width"] - 3))
|
||||
# tune.report(loss=(param["height"] - 14) ** 2 - abs(param["width"] - 3))
|
||||
|
||||
# search_alg = BlendSearch(
|
||||
# space=space,
|
||||
|
||||
@@ -28,14 +28,14 @@ For example, with three lines of code, you can start using this economical and f
|
||||
```python
|
||||
from flaml import AutoML
|
||||
automl = AutoML()
|
||||
automl.fit(X_train, y_train, task="classification")
|
||||
automl.fit(X_train, y_train, task="classification", time_budget=60)
|
||||
```
|
||||
|
||||
It automatically tunes the hyperparameters and selects the best model from default learners such as LightGBM, XGBoost, random forest etc. [Customizing](Use-Cases/task-oriented-automl#customize-automlfit) the optimization metrics, learners and search spaces etc. is very easy. For example,
|
||||
It automatically tunes the hyperparameters and selects the best model from default learners such as LightGBM, XGBoost, random forest etc. for the specified time budget 60 seconds. [Customizing](Use-Cases/task-oriented-automl#customize-automlfit) the optimization metrics, learners and search spaces etc. is very easy. For example,
|
||||
|
||||
```python
|
||||
automl.add_learner("mylgbm", MyLGBMEstimator)
|
||||
automl.fit(X_train, y_train, task="classification", metric=custom_metric, estimator_list=["mylgbm"])
|
||||
automl.fit(X_train, y_train, task="classification", metric=custom_metric, estimator_list=["mylgbm"], time_budget=60)
|
||||
```
|
||||
|
||||
#### [Tune user-defined function](Use-Cases/Tune-User-Defined-Function)
|
||||
@@ -88,7 +88,7 @@ Then, you can use it just like you use the original `LGMBClassifier`. Your other
|
||||
|
||||
### Where to Go Next?
|
||||
|
||||
* Understand the use cases for [Task-oriented AutoML](Use-Cases/task-oriented-automl) and [Tune user-defined function](Use-Cases/Tune-User-Defined-Function).
|
||||
* Understand the use cases for [Task-oriented AutoML](Use-Cases/task-oriented-automl), [Tune user-defined function](Use-Cases/Tune-User-Defined-Function) and [Zero-shot AutoML](Use-Cases/Zero-Shot-AutoML).
|
||||
* Find code examples under "Examples": from [AutoML - Classification](Examples/AutoML-Classification) to [Tune - PyTorch](Examples/Tune-PyTorch).
|
||||
* Watch [video tutorials](https://www.youtube.com/channel/UCfU0zfFXHXdAd5x-WvFBk5A).
|
||||
* Learn about [research](Research) around FLAML.
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
- 'token-classification': token classification.
|
||||
- 'multichoice-classification': multichoice classification.
|
||||
|
||||
An optional input is `time_budget` for searching models and hyperparameters. When not specified, a default budget of 60 seconds will be used.
|
||||
Two optional inputs are `time_budget` and `max_iter` for searching models and hyperparameters. When both are unspecified, only one model per estimator will be trained (using our [zero-shot](Zero-Shot-AutoML) technique).
|
||||
|
||||
A typical way to use `flaml.AutoML`:
|
||||
|
||||
@@ -39,7 +39,7 @@ with open("automl.pkl", "rb") as f:
|
||||
pred = automl.predict(X_test)
|
||||
```
|
||||
|
||||
If users provide the minimal inputs only, `AutoML` uses the default settings for time budget, optimization metric, estimator list etc.
|
||||
If users provide the minimal inputs only, `AutoML` uses the default settings for optimization metric, estimator list etc.
|
||||
|
||||
## Customize AutoML.fit()
|
||||
|
||||
@@ -191,9 +191,6 @@ Each estimator class, built-in or not, must have a `search_space` function. In t
|
||||
|
||||
In the example above, we tune four hyperparameters, three integers and one float. They all follow a log-uniform distribution. "max_leaf" and "n_iter" have "low_cost_init_value" specified as their values heavily influence the training cost.
|
||||
|
||||
|
||||
|
||||
|
||||
To customize the search space for a built-in estimator, use a similar approach to define a class that inherits the existing estimator. For example,
|
||||
|
||||
```python
|
||||
@@ -234,17 +231,46 @@ class XGBoost2D(XGBoostSklearnEstimator):
|
||||
|
||||
We override the `search_space` function to tune two hyperparameters only, "n_estimators" and "max_leaves". They are both random integers in the log space, ranging from 4 to data-dependent upper bound. The lower bound for each corresponds to low training cost, hence the "low_cost_init_value" for each is set to 4.
|
||||
|
||||
##### A shortcut to override the search space
|
||||
|
||||
One can use the `custom_hp` argument in [`AutoML.fit()`](../reference/automl#fit) to override the search space for an existing estimator quickly. For example, if you would like to temporarily change the search range of "n_estimators" of xgboost, disable searching "max_leaves" in random forest, and add "subsample" in the search space of lightgbm, you can set:
|
||||
|
||||
```python
|
||||
custom_hp = {
|
||||
"xgboost": {
|
||||
"n_estimators": {
|
||||
"domain": tune.lograndint(lower=new_lower, upper=new_upper),
|
||||
"low_cost_init_value": new_lower,
|
||||
},
|
||||
},
|
||||
"rf": {
|
||||
"max_leaves": {
|
||||
"domain": None, # disable search
|
||||
},
|
||||
},
|
||||
"lgbm": {
|
||||
"subsample": {
|
||||
"domain": tune.uniform(lower=0.1, upper=1.0),
|
||||
"init_value": 1.0,
|
||||
},
|
||||
"subsample_freq": {
|
||||
"domain": 1, # subsample_freq must > 0 to enable subsample
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
### Constraint
|
||||
|
||||
There are several types of constraints you can impose.
|
||||
|
||||
1. End-to-end constraints on the AutoML process.
|
||||
1. Constraints on the AutoML process.
|
||||
|
||||
- `time_budget`: constrains the wall-clock time (seconds) used by the AutoML process. We provide some tips on [how to set time budget](#how-to-set-time-budget).
|
||||
|
||||
- `max_iter`: constrains the maximal number of models to try in the AutoML process.
|
||||
|
||||
2. Constraints on the (hyperparameters of) the estimators.
|
||||
2. Constraints on the constructor arguments of the estimators.
|
||||
|
||||
Some constraints on the estimator can be implemented via the custom learner. For example,
|
||||
|
||||
@@ -255,7 +281,18 @@ class MonotonicXGBoostEstimator(XGBoostSklearnEstimator):
|
||||
return super().search_space(**args).update({"monotone_constraints": "(1, -1)"})
|
||||
```
|
||||
|
||||
It adds a monotonicity constraint to XGBoost. This approach can be used to set any constraint that is a parameter in the underlying estimator's constructor.
|
||||
It adds a monotonicity constraint to XGBoost. This approach can be used to set any constraint that is an argument in the underlying estimator's constructor.
|
||||
A shortcut to do this is to use the [`custom_hp`](#a-shortcut-to-override-the-search-space) argument:
|
||||
|
||||
```python
|
||||
custom_hp = {
|
||||
"xgboost": {
|
||||
"monotone_constraints": {
|
||||
"domain": "(1, -1)" # fix the domain as a constant
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
3. Constraints on the models tried in AutoML.
|
||||
|
||||
@@ -267,6 +304,7 @@ For example,
|
||||
```python
|
||||
automl.fit(X_train, y_train, max_iter=100, train_time_limit=1, pred_time_limit=1e-3)
|
||||
```
|
||||
|
||||
4. Constraints on the metrics of the ML model tried in AutoML.
|
||||
|
||||
When users provide a [custom metric function](https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#optimization-metric), which returns a primary optimization metric and a dictionary of additional metrics (typically also about the model) to log, users can also specify constraints on one or more of the metrics in the dictionary of additional metrics.
|
||||
@@ -357,7 +395,8 @@ automl2 = AutoML()
|
||||
automl2.fit(X_train, y_train, time_budget=7200, starting_points=automl1.best_config_per_estimator)
|
||||
```
|
||||
|
||||
`starting_points` is a dictionary. The keys are the estimator names. If you do not need to specify starting points for an estimator, exclude its name from the dictionary. The value for each key can be either a dictionary of a list of dictionaries, corresponding to one hyperparameter configuration, or multiple hyperparameter configurations, respectively.
|
||||
`starting_points` is a dictionary or a str to specify the starting hyperparameter config. (1) When it is a dictionary, the keys are the estimator names. If you do not need to specify starting points for an estimator, exclude its name from the dictionary. The value for each key can be either a dictionary of a list of dictionaries, corresponding to one hyperparameter configuration, or multiple hyperparameter configurations, respectively. (2) When it is a str: if "data", use data-dependent defaults; if "data:path", use data-dependent defaults which are stored at path; if "static", use data-independent defaults. Please find more details about data-dependent defaults in [zero shot AutoML](https://microsoft.github.io/FLAML/docs/Use-Cases/Zero-Shot-AutoML#combine-zero-shot-automl-and-hyperparameter-tuning).
|
||||
|
||||
|
||||
### Log the trials
|
||||
|
||||
|
||||
@@ -4,7 +4,8 @@
|
||||
|
||||
1. Your machine learning task is not one of the built-in tasks from `flaml.AutoML`.
|
||||
1. Your input cannot be represented as X_train + y_train or dataframe + label.
|
||||
1. You want to tune a function that may not even be a machine learning procedure.
|
||||
1. The optimization metric is not measurable via validation data only. For example, when you want to directly optimize a downstream application instead of a model accuracy metric.
|
||||
1. You need to tune a function that may not even be a machine learning procedure.
|
||||
|
||||
## Basic Tuning Procedure
|
||||
|
||||
@@ -43,7 +44,7 @@ def evaluate_config(config: dict):
|
||||
# we can return a single float as a score on the input config:
|
||||
# return score
|
||||
# or, we can return a dictionary that maps metric name to metric value:
|
||||
return {"score": score, "evaluation_cost": faked_evaluation_cost, "constraint_metric": x * y}
|
||||
return {"score": score, "evaluation_cost": faked_evaluation_cost, "constraint_metric": config["x"] * config["y"]}
|
||||
```
|
||||
|
||||
When the evaluation function returns a dictionary of metrics, you need to specify the name of the metric to optimize via the argument `metric` (this can be skipped when the function is just returning a scalar). In addition, you need to specify a mode of your optimization/tuning task (maximization or minimization) via the argument `mode` by choosing from "min" or "max".
|
||||
@@ -403,7 +404,7 @@ analysis = tune.run(
|
||||
Related arguments:
|
||||
|
||||
- `points_to_evaluate`: A list of initial hyperparameter configurations to run first.
|
||||
- `evaluated_rewards`: If you have previously evaluated the parameters passed in as `points_to_evaluate` , you can avoid re-running those trials by passing in the reward attributes as a list so the optimizer can be told the results without needing to re-compute the trial. Must be the same length as `points_to_evaluate`.
|
||||
- `evaluated_rewards`: If you have previously evaluated the parameters passed in as `points_to_evaluate` , you can avoid re-running those trials by passing in the reward attributes as a list so the optimizer can be told the results without needing to re-compute the trial. Must be the same length or shorter length than `points_to_evaluate`.
|
||||
|
||||
If you are aware of some good hyperparameter configurations, you are encouraged to provide them via `points_to_evaluate`. The search algorithm will try them first and use them to bootstrap the search.
|
||||
|
||||
@@ -425,6 +426,8 @@ config_search_space = {
|
||||
points_to_evaluate = [
|
||||
{"b": .99, "a": 3},
|
||||
{"b": .99, "a": 2},
|
||||
{"b": .80, "a": 3},
|
||||
{"b": .80, "a": 2},
|
||||
]
|
||||
evaluated_rewards = [3.99, 2.99]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user