mirror of
https://github.com/microsoft/FLAML.git
synced 2026-02-16 21:52:25 +08:00
Compare commits
21 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1b40b4b3a6 | ||
|
|
4c044e88bd | ||
|
|
7740cd3466 | ||
|
|
3a7ebe6896 | ||
|
|
40a86a5941 | ||
|
|
cf1dfd3966 | ||
|
|
79a24d06a9 | ||
|
|
1111d6d43a | ||
|
|
65fa72d583 | ||
|
|
5de3f54fd9 | ||
|
|
f8cc38bc16 | ||
|
|
0642b6e7bb | ||
|
|
619107edf5 | ||
|
|
18f692281a | ||
|
|
2d31138191 | ||
|
|
c9bac02ea4 | ||
|
|
9d164a0119 | ||
|
|
e0e317bfb1 | ||
|
|
c79c07f450 | ||
|
|
1af682b7f5 | ||
|
|
927a4eeae5 |
24
.github/workflows/python-package.yml
vendored
24
.github/workflows/python-package.yml
vendored
@@ -16,7 +16,7 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-2019]
|
||||
python-version: [3.6, 3.7, 3.8, 3.9]
|
||||
python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
@@ -24,11 +24,12 @@ jobs:
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: If mac, install libomp to facilitate lgbm and xgboost install
|
||||
if: matrix.os == 'macOS-latest'
|
||||
- name: On mac + python 3.10, install libomp to facilitate lgbm and xgboost install
|
||||
if: matrix.os == 'macOS-latest' && matrix.python-version == '3.10'
|
||||
run: |
|
||||
# remove libomp version constraint after xgboost works with libomp>11.1.0
|
||||
# remove libomp version constraint after xgboost works with libomp>11.1.0 on python 3.10
|
||||
wget https://raw.githubusercontent.com/Homebrew/homebrew-core/679923b4eb48a8dc7ecc1f05d06063cd79b3fc00/Formula/libomp.rb -O $(find $(brew --repository) -name libomp.rb)
|
||||
brew unlink libomp
|
||||
brew install libomp
|
||||
export CC=/usr/bin/clang
|
||||
export CXX=/usr/bin/clang++
|
||||
@@ -43,10 +44,13 @@ jobs:
|
||||
python -c "import flaml"
|
||||
pip install -e .[test]
|
||||
- name: If linux or mac, install ray
|
||||
if: (matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest') && matrix.python-version != '3.9'
|
||||
if: matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest'
|
||||
run: |
|
||||
pip install -e .[ray,forecast]
|
||||
pip install 'tensorboardX<=2.2'
|
||||
pip install -e .[ray]
|
||||
- name: If linux or mac, install prophet on python < 3.9
|
||||
if: (matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest') && matrix.python-version != '3.9' && matrix.python-version != '3.10'
|
||||
run: |
|
||||
pip install -e .[forecast]
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
@@ -54,17 +58,17 @@ jobs:
|
||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
||||
- name: Test with pytest
|
||||
if: ${{ matrix.python-version != '3.7' || matrix.os == 'macos-latest' }}
|
||||
if: (matrix.python-version != '3.7' || matrix.os == 'macos-latest') && matrix.python-version != '3.10'
|
||||
run: |
|
||||
pytest test
|
||||
- name: Coverage
|
||||
if: ${{ matrix.python-version == '3.7' && matrix.os != 'macos-latest' }}
|
||||
if: (matrix.python-version == '3.7') && matrix.os != 'macos-latest' || matrix.python-version == '3.10'
|
||||
run: |
|
||||
pip install coverage
|
||||
coverage run -a -m pytest test
|
||||
coverage xml
|
||||
- name: Upload coverage to Codecov
|
||||
if: ${{ matrix.python-version == '3.7' && matrix.os != 'macos-latest' }}
|
||||
if: (matrix.python-version == '3.7') && matrix.os != 'macos-latest' || matrix.python-version == '3.10'
|
||||
uses: codecov/codecov-action@v1
|
||||
with:
|
||||
file: ./coverage.xml
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[](https://badge.fury.io/py/FLAML)
|
||||

|
||||
[](https://github.com/microsoft/FLAML/actions/workflows/python-package.yml)
|
||||

|
||||

|
||||
[](https://pepy.tech/project/flaml)
|
||||
[](https://gitter.im/FLAMLer/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
|
||||
|
||||
160
flaml/automl.py
160
flaml/automl.py
@@ -89,7 +89,12 @@ class SearchState:
|
||||
renamed_type = list(
|
||||
inspect.signature(domain_one_dim.is_valid).parameters.values()
|
||||
)[0].annotation
|
||||
type_match = renamed_type == Any or isinstance(value_one_dim, renamed_type)
|
||||
type_match = (
|
||||
renamed_type == Any
|
||||
or isinstance(value_one_dim, renamed_type)
|
||||
or isinstance(value_one_dim, int)
|
||||
and renamed_type is float
|
||||
)
|
||||
if not (type_match and domain_one_dim.is_valid(value_one_dim)):
|
||||
return False
|
||||
elif value_one_dim != domain_one_dim:
|
||||
@@ -237,7 +242,7 @@ class SearchState:
|
||||
|
||||
if self.base_eci is None:
|
||||
self.base_eci = time_used
|
||||
if (obj is not None) and (self.best_loss is None or obj < self.best_loss):
|
||||
if (obj is not None) and (obj < self.best_loss):
|
||||
self.best_loss_old = self.best_loss if self.best_loss < np.inf else 2 * obj
|
||||
self.best_loss = obj
|
||||
self.best_result = result
|
||||
@@ -286,7 +291,7 @@ class AutoMLState:
|
||||
sampled_y_train = self.y_train[:sample_size]
|
||||
weight = self.fit_kwargs.get(
|
||||
"sample_weight"
|
||||
) # NOTE: _prepare_sample_train_data is before
|
||||
) # NOTE: _prepare_sample_train_data is before kwargs is updated to fit_kwargs_by_estimator
|
||||
if weight is not None:
|
||||
sampled_weight = weight[:sample_size]
|
||||
if self.groups is not None:
|
||||
@@ -296,7 +301,7 @@ class AutoMLState:
|
||||
sampled_y_train = self.y_train_all
|
||||
if (
|
||||
"sample_weight" in self.fit_kwargs
|
||||
): # NOTE: _prepare_sample_train_data is before
|
||||
): # NOTE: _prepare_sample_train_data is before kwargs is updated to fit_kwargs_by_estimator
|
||||
sampled_weight = self.sample_weight_all
|
||||
if self.groups is not None:
|
||||
groups = self.groups_all
|
||||
@@ -311,7 +316,7 @@ class AutoMLState:
|
||||
|
||||
this_estimator_kwargs = state.fit_kwargs_by_estimator.get(
|
||||
estimator
|
||||
).copy() # NOTE: _compute_with_config_base is after
|
||||
).copy() # NOTE: _compute_with_config_base is after kwargs is updated to fit_kwargs_by_estimator
|
||||
(
|
||||
sampled_X_train,
|
||||
sampled_y_train,
|
||||
@@ -380,6 +385,15 @@ class AutoMLState:
|
||||
tune.report(**result)
|
||||
return result
|
||||
|
||||
def sanitize(self, config: dict) -> dict:
|
||||
"""Make a config ready for passing to estimator."""
|
||||
config = config.get("ml", config).copy()
|
||||
if "FLAML_sample_size" in config:
|
||||
del config["FLAML_sample_size"]
|
||||
if "learner" in config:
|
||||
del config["learner"]
|
||||
return config
|
||||
|
||||
def _train_with_config(
|
||||
self,
|
||||
estimator,
|
||||
@@ -390,15 +404,11 @@ class AutoMLState:
|
||||
sample_size = config_w_resource.get(
|
||||
"FLAML_sample_size", len(self.y_train_all)
|
||||
)
|
||||
config = config_w_resource.get("ml", config_w_resource).copy()
|
||||
if "FLAML_sample_size" in config:
|
||||
del config["FLAML_sample_size"]
|
||||
if "learner" in config:
|
||||
del config["learner"]
|
||||
config = self.sanitize(config_w_resource)
|
||||
|
||||
this_estimator_kwargs = self.fit_kwargs_by_estimator.get(
|
||||
estimator
|
||||
).copy() # NOTE: _train_with_config is after
|
||||
).copy() # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator
|
||||
(
|
||||
sampled_X_train,
|
||||
sampled_y_train,
|
||||
@@ -408,14 +418,14 @@ class AutoMLState:
|
||||
if sampled_weight is not None:
|
||||
weight = this_estimator_kwargs[
|
||||
"sample_weight"
|
||||
] # NOTE: _train_with_config is after
|
||||
] # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator
|
||||
this_estimator_kwargs[
|
||||
"sample_weight"
|
||||
] = sampled_weight # NOTE: _train_with_config is after
|
||||
] = sampled_weight # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator
|
||||
if groups is not None:
|
||||
this_estimator_kwargs[
|
||||
"groups"
|
||||
] = groups # NOTE: _train_with_config is after
|
||||
] = groups # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator
|
||||
|
||||
budget = (
|
||||
None
|
||||
@@ -432,14 +442,14 @@ class AutoMLState:
|
||||
n_jobs=self.n_jobs,
|
||||
estimator_class=self.learner_classes.get(estimator),
|
||||
budget=budget,
|
||||
fit_kwargs=this_estimator_kwargs, # NOTE: _train_with_config is after
|
||||
fit_kwargs=this_estimator_kwargs, # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator
|
||||
eval_metric=self.metric if hasattr(self, "metric") else "train_time",
|
||||
)
|
||||
|
||||
if sampled_weight is not None:
|
||||
this_estimator_kwargs[
|
||||
"sample_weight"
|
||||
] = weight # NOTE: _train_with_config is after
|
||||
] = weight # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator
|
||||
|
||||
return estimator, train_time
|
||||
|
||||
@@ -626,10 +636,12 @@ class AutoML(BaseEstimator):
|
||||
augment rare classes.
|
||||
min_sample_size: int, default=MIN_SAMPLE_TRAIN | the minimal sample
|
||||
size when sample=True.
|
||||
use_ray: boolean, default=False | Whether to use ray to run the training
|
||||
use_ray: boolean or dict.
|
||||
If boolean: default=False | Whether to use ray to run the training
|
||||
in separate processes. This can be used to prevent OOM for large
|
||||
datasets, but will incur more overhead in time. Only use it if
|
||||
you run into OOM failures.
|
||||
datasets, but will incur more overhead in time.
|
||||
If dict: the dict contains the keywords arguments to be passed to
|
||||
[ray.tune.run](https://docs.ray.io/en/latest/tune/api_docs/execution.html).
|
||||
metric_constraints: list, default=[] | The list of metric constraints.
|
||||
Each element in this list is a 3-tuple, which shall be expressed
|
||||
in the following format: the first element of the 3-tuple is the name of the
|
||||
@@ -1103,7 +1115,7 @@ class AutoML(BaseEstimator):
|
||||
|
||||
self._sample_weight_full = self._state.fit_kwargs.get(
|
||||
"sample_weight"
|
||||
) # NOTE: _validate_data is before,
|
||||
) # NOTE: _validate_data is before kwargs is updated to fit_kwargs_by_estimator
|
||||
if X_val is not None and y_val is not None:
|
||||
assert (
|
||||
isinstance(X_val, np.ndarray)
|
||||
@@ -1164,7 +1176,7 @@ class AutoML(BaseEstimator):
|
||||
self._state.task in CLASSIFICATION
|
||||
and self._auto_augment
|
||||
and self._state.fit_kwargs.get("sample_weight")
|
||||
is None # NOTE: _prepare_data is before
|
||||
is None # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
|
||||
and self._split_type in ["stratified", "uniform"]
|
||||
and self._state.task != TOKENCLASSIFICATION
|
||||
):
|
||||
@@ -1208,7 +1220,9 @@ class AutoML(BaseEstimator):
|
||||
)
|
||||
self._state.fit_kwargs[
|
||||
"sample_weight"
|
||||
] = self._state.sample_weight_all # NOTE: _prepare_data is before
|
||||
] = (
|
||||
self._state.sample_weight_all
|
||||
) # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
|
||||
else:
|
||||
X_train_all, y_train_all = shuffle(
|
||||
X_train_all, y_train_all, random_state=RANDOM_SEED
|
||||
@@ -1227,7 +1241,7 @@ class AutoML(BaseEstimator):
|
||||
num_samples = X_train_all.shape[0]
|
||||
period = self._state.fit_kwargs[
|
||||
"period"
|
||||
] # NOTE: _prepare_data is before
|
||||
] # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
|
||||
assert (
|
||||
period < num_samples
|
||||
), f"period={period}>#examples={num_samples}"
|
||||
@@ -1239,7 +1253,7 @@ class AutoML(BaseEstimator):
|
||||
else:
|
||||
if (
|
||||
"sample_weight" in self._state.fit_kwargs
|
||||
): # NOTE: _prepare_data is before
|
||||
): # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
|
||||
(
|
||||
X_train,
|
||||
X_val,
|
||||
@@ -1247,14 +1261,14 @@ class AutoML(BaseEstimator):
|
||||
y_val,
|
||||
self._state.fit_kwargs[
|
||||
"sample_weight"
|
||||
], # NOTE: _prepare_data is before
|
||||
], # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
|
||||
self._state.weight_val,
|
||||
) = train_test_split(
|
||||
X_train_all,
|
||||
y_train_all,
|
||||
self._state.fit_kwargs[
|
||||
"sample_weight"
|
||||
], # NOTE: _prepare_data is before
|
||||
], # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
|
||||
test_size=split_ratio,
|
||||
shuffle=False,
|
||||
)
|
||||
@@ -1297,7 +1311,7 @@ class AutoML(BaseEstimator):
|
||||
stratify = y_rest if self._split_type == "stratified" else None
|
||||
if (
|
||||
"sample_weight" in self._state.fit_kwargs
|
||||
): # NOTE: _prepare_data is before
|
||||
): # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
|
||||
(
|
||||
X_train,
|
||||
X_val,
|
||||
@@ -1310,17 +1324,17 @@ class AutoML(BaseEstimator):
|
||||
y_rest,
|
||||
self._state.fit_kwargs["sample_weight"][
|
||||
rest
|
||||
], # NOTE: _prepare_data is before
|
||||
], # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
|
||||
test_size=split_ratio,
|
||||
random_state=RANDOM_SEED,
|
||||
)
|
||||
weight1 = self._state.fit_kwargs["sample_weight"][
|
||||
first
|
||||
] # NOTE: _prepare_data is before
|
||||
] # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
|
||||
self._state.weight_val = concat(weight1, weight_val)
|
||||
self._state.fit_kwargs[
|
||||
"sample_weight"
|
||||
] = concat( # NOTE: _prepare_data is before
|
||||
] = concat( # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
|
||||
weight1, weight_train
|
||||
)
|
||||
else:
|
||||
@@ -1346,7 +1360,7 @@ class AutoML(BaseEstimator):
|
||||
elif self._state.task in REGRESSION:
|
||||
if (
|
||||
"sample_weight" in self._state.fit_kwargs
|
||||
): # NOTE: _prepare_data is before
|
||||
): # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
|
||||
(
|
||||
X_train,
|
||||
X_val,
|
||||
@@ -1354,14 +1368,14 @@ class AutoML(BaseEstimator):
|
||||
y_val,
|
||||
self._state.fit_kwargs[
|
||||
"sample_weight"
|
||||
], # NOTE: _prepare_data is before
|
||||
], # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
|
||||
self._state.weight_val,
|
||||
) = train_test_split(
|
||||
X_train_all,
|
||||
y_train_all,
|
||||
self._state.fit_kwargs[
|
||||
"sample_weight"
|
||||
], # NOTE: _prepare_data is before
|
||||
], # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
|
||||
test_size=split_ratio,
|
||||
random_state=RANDOM_SEED,
|
||||
)
|
||||
@@ -1409,7 +1423,7 @@ class AutoML(BaseEstimator):
|
||||
if self._state.task in TS_FORECAST:
|
||||
period = self._state.fit_kwargs[
|
||||
"period"
|
||||
] # NOTE: _prepare_data is before
|
||||
] # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
|
||||
if period * (n_splits + 1) > y_train_all.size:
|
||||
n_splits = int(y_train_all.size / period - 1)
|
||||
assert n_splits >= 2, (
|
||||
@@ -1494,6 +1508,10 @@ class AutoML(BaseEstimator):
|
||||
):
|
||||
"""Retrain from log file.
|
||||
|
||||
This function is intended to retrain the logged configurations.
|
||||
NOTE: In some rare case, the last config is early stopped to meet time_budget and it's the best config.
|
||||
But the logged config's ITER_HP (e.g., n_estimators) is not reduced.
|
||||
|
||||
Args:
|
||||
log_file_name: A string of the log file name.
|
||||
X_train: A numpy array or dataframe of training data in shape n*m.
|
||||
@@ -1716,7 +1734,7 @@ class AutoML(BaseEstimator):
|
||||
|
||||
assert isinstance(
|
||||
self._state.fit_kwargs.get("period"),
|
||||
int, # NOTE: _decide_split_type is before
|
||||
int, # NOTE: _decide_split_type is before kwargs is updated to fit_kwargs_by_estimator
|
||||
), f"missing a required integer 'period' for '{TS_FORECAST}' task."
|
||||
elif self._state.task == "rank":
|
||||
assert (
|
||||
@@ -1897,32 +1915,14 @@ class AutoML(BaseEstimator):
|
||||
@property
|
||||
def trainable(self) -> Callable[[dict], Optional[float]]:
|
||||
"""Training function.
|
||||
|
||||
Returns:
|
||||
A function that evaluates each config and returns the loss.
|
||||
"""
|
||||
self._state.time_from_start = 0
|
||||
for estimator in self.estimator_list:
|
||||
search_state = self._search_states[estimator]
|
||||
if not hasattr(search_state, "training_function"):
|
||||
if self._use_ray is not False:
|
||||
from ray.tune import with_parameters
|
||||
|
||||
search_state.training_function = with_parameters(
|
||||
AutoMLState._compute_with_config_base,
|
||||
state=self._state,
|
||||
estimator=estimator,
|
||||
)
|
||||
else:
|
||||
search_state.training_function = partial(
|
||||
AutoMLState._compute_with_config_base,
|
||||
state=self._state,
|
||||
estimator=estimator,
|
||||
)
|
||||
states = self._search_states
|
||||
mem_res = self._mem_thres
|
||||
|
||||
def train(config: dict):
|
||||
def train(config: dict, state):
|
||||
|
||||
sample_size = config.get("FLAML_sample_size")
|
||||
config = config.get("ml", config).copy()
|
||||
@@ -1932,18 +1932,33 @@ class AutoML(BaseEstimator):
|
||||
# check memory constraints before training
|
||||
if states[estimator].learner_class.size(config) <= mem_res:
|
||||
del config["learner"]
|
||||
result = states[estimator].training_function(config)
|
||||
return result
|
||||
result = AutoMLState._compute_with_config_base(
|
||||
config, state=state, estimator=estimator
|
||||
)
|
||||
else:
|
||||
return {
|
||||
# If search algorithm is not in flaml, it does not handle the config constraint, should also tune.report before return
|
||||
result = {
|
||||
"pred_time": 0,
|
||||
"wall_clock_time": None,
|
||||
"metric_for_logging": np.inf,
|
||||
"val_loss": np.inf,
|
||||
"trained_estimator": None,
|
||||
}
|
||||
tune.report(**result)
|
||||
return result
|
||||
|
||||
return train
|
||||
if self._use_ray is not False:
|
||||
from ray.tune import with_parameters
|
||||
|
||||
return with_parameters(
|
||||
train,
|
||||
state=self._state,
|
||||
)
|
||||
else:
|
||||
return partial(
|
||||
train,
|
||||
state=self._state,
|
||||
)
|
||||
|
||||
@property
|
||||
def metric_constraints(self) -> list:
|
||||
@@ -2180,10 +2195,12 @@ class AutoML(BaseEstimator):
|
||||
augment rare classes.
|
||||
min_sample_size: int, default=MIN_SAMPLE_TRAIN | the minimal sample
|
||||
size when sample=True.
|
||||
use_ray: boolean or dict
|
||||
use_ray: boolean or dict.
|
||||
If boolean: default=False | Whether to use ray to run the training
|
||||
in separate processes. This can be used to prevent OOM for large
|
||||
datasets, but will incur more overhead in time.
|
||||
If dict: the dict contains the keywords arguments to be passed to
|
||||
[ray.tune.run](https://docs.ray.io/en/latest/tune/api_docs/execution.html).
|
||||
metric_constraints: list, default=[] | The list of metric constraints.
|
||||
Each element in this list is a 3-tuple, which shall be expressed
|
||||
in the following format: the first element of the 3-tuple is the name of the
|
||||
@@ -2565,7 +2582,7 @@ class AutoML(BaseEstimator):
|
||||
this_estimator_kwargs = this_estimator_kwargs.copy()
|
||||
this_estimator_kwargs.update(
|
||||
self._state.fit_kwargs
|
||||
) # update the shallow copy
|
||||
) # update the shallow copy of fit_kwargs to fit_kwargs_by_estimator
|
||||
self._state.fit_kwargs_by_estimator[
|
||||
estimator_name
|
||||
] = this_estimator_kwargs # set self._state.fit_kwargs_by_estimator[estimator_name] to the update, so only self._state.fit_kwargs_by_estimator will be updated
|
||||
@@ -2579,7 +2596,9 @@ class AutoML(BaseEstimator):
|
||||
data_size=self._state.data_size,
|
||||
task=self._state.task,
|
||||
starting_point=starting_points.get(estimator_name),
|
||||
period=self._state.fit_kwargs.get("period"), # NOTE: this is after
|
||||
period=self._state.fit_kwargs.get(
|
||||
"period"
|
||||
), # NOTE: this is after kwargs is updated to fit_kwargs_by_estimator
|
||||
custom_hp=custom_hp and custom_hp.get(estimator_name),
|
||||
max_iter=max_iter,
|
||||
)
|
||||
@@ -2643,7 +2662,7 @@ class AutoML(BaseEstimator):
|
||||
self._sample_weight_full,
|
||||
self._state.fit_kwargs_by_estimator,
|
||||
self._state.fit_kwargs,
|
||||
) # NOTE: this is after
|
||||
) # NOTE: this is after kwargs is updated to fit_kwargs_by_estimator
|
||||
del self._state.groups, self._state.groups_all, self._state.groups_val
|
||||
logger.setLevel(old_level)
|
||||
|
||||
@@ -2973,9 +2992,7 @@ class AutoML(BaseEstimator):
|
||||
search_state.search_alg.searcher.set_search_properties(
|
||||
metric=None,
|
||||
mode=None,
|
||||
setting={
|
||||
"metric_target": self._state.best_loss,
|
||||
},
|
||||
metric_target=self._state.best_loss,
|
||||
)
|
||||
start_run_time = time.time()
|
||||
analysis = tune.run(
|
||||
@@ -3166,6 +3183,7 @@ class AutoML(BaseEstimator):
|
||||
# Add a checkpoint for the current best config to the log.
|
||||
if self._training_log:
|
||||
self._training_log.checkpoint()
|
||||
self._state.time_from_start = time.time() - self._start_time_flag
|
||||
if self._best_estimator:
|
||||
self._selected = self._search_states[self._best_estimator]
|
||||
self.modelcount = sum(
|
||||
@@ -3189,7 +3207,7 @@ class AutoML(BaseEstimator):
|
||||
x[1].learner_class(
|
||||
task=self._state.task,
|
||||
n_jobs=self._state.n_jobs,
|
||||
**x[1].best_config,
|
||||
**self._state.sanitize(x[1].best_config),
|
||||
),
|
||||
)
|
||||
for x in search_states[:2]
|
||||
@@ -3200,13 +3218,15 @@ class AutoML(BaseEstimator):
|
||||
x[1].learner_class(
|
||||
task=self._state.task,
|
||||
n_jobs=self._state.n_jobs,
|
||||
**x[1].best_config,
|
||||
**self._state.sanitize(x[1].best_config),
|
||||
),
|
||||
)
|
||||
for x in search_states[2:]
|
||||
if x[1].best_loss < 4 * self._selected.best_loss
|
||||
]
|
||||
logger.info(estimators)
|
||||
logger.info(
|
||||
[(estimator[0], estimator[1].params) for estimator in estimators]
|
||||
)
|
||||
if len(estimators) > 1:
|
||||
if self._state.task in CLASSIFICATION:
|
||||
from sklearn.ensemble import StackingClassifier as Stacker
|
||||
@@ -3257,7 +3277,7 @@ class AutoML(BaseEstimator):
|
||||
stacker.fit(
|
||||
self._X_train_all,
|
||||
self._y_train_all,
|
||||
**sample_weight_dict, # NOTE: _search is after
|
||||
**sample_weight_dict, # NOTE: _search is after kwargs is updated to fit_kwargs_by_estimator
|
||||
)
|
||||
logger.info(f"ensemble: {stacker}")
|
||||
self._trained_estimator = stacker
|
||||
@@ -3276,7 +3296,7 @@ class AutoML(BaseEstimator):
|
||||
stacker.fit(
|
||||
self._X_train_all,
|
||||
self._y_train_all,
|
||||
**sample_weight_dict, # NOTE: _search is after
|
||||
**sample_weight_dict, # NOTE: _search is after kwargs is updated to fit_kwargs_by_estimator
|
||||
)
|
||||
logger.info(f"ensemble: {stacker}")
|
||||
self._trained_estimator = stacker
|
||||
|
||||
@@ -6,8 +6,10 @@
|
||||
N_SPLITS = 5
|
||||
RANDOM_SEED = 1
|
||||
SPLIT_RATIO = 0.1
|
||||
MEM_THRES = 4 * (1024 ** 3)
|
||||
MEM_THRES = 4 * (1024**3)
|
||||
SMALL_LARGE_THRES = 10000000
|
||||
MIN_SAMPLE_TRAIN = 10000
|
||||
CV_HOLDOUT_THRESHOLD = 100000
|
||||
SAMPLE_MULTIPLY_FACTOR = 4
|
||||
SEARCH_THREAD_EPS = 1.0
|
||||
PENALTY = 1e10 # penalty term for constraints
|
||||
|
||||
@@ -96,9 +96,14 @@ def load_openml_dataset(
|
||||
with open(filepath, "wb") as f:
|
||||
pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
|
||||
print("Dataset name:", dataset.name)
|
||||
X, y, *__ = dataset.get_data(
|
||||
target=dataset.default_target_attribute, dataset_format=dataset_format
|
||||
)
|
||||
try:
|
||||
X, y, *__ = dataset.get_data(
|
||||
target=dataset.default_target_attribute, dataset_format=dataset_format
|
||||
)
|
||||
except ValueError:
|
||||
from sklearn.datasets import fetch_openml
|
||||
|
||||
X, y = fetch_openml(data_id=dataset_id, return_X_y=True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)
|
||||
print(
|
||||
"X_train.shape: {}, y_train.shape: {};\nX_test.shape: {}, y_test.shape: {}".format(
|
||||
|
||||
@@ -176,12 +176,16 @@ def metric_loss_score(
|
||||
].mid.fmeasure
|
||||
elif metric_name.startswith("seqeval"):
|
||||
|
||||
label_len = len(labels)
|
||||
zip_pred_true = [
|
||||
[(p, lb) for (p, lb) in zip(prediction, label) if lb != -100]
|
||||
for (prediction, label) in zip(y_predict, y_true)
|
||||
]
|
||||
y_pred = [
|
||||
[labels[p] for (p, l) in each_list]
|
||||
[
|
||||
labels[p] if 0 <= p < label_len else -1
|
||||
for (p, l) in each_list
|
||||
]
|
||||
for each_list in zip_pred_true
|
||||
] # To compute precision and recall, y_pred and y_true must be converted to string labels
|
||||
# (B-PER, I-PER, etc.), so that the category-based precision/recall (i.e., PER, LOC, etc.) scores can be computed
|
||||
@@ -420,7 +424,6 @@ def get_val_loss(
|
||||
# fit_kwargs['groups_val'] = groups_val
|
||||
# fit_kwargs['X_val'] = X_val
|
||||
# fit_kwargs['y_val'] = y_val
|
||||
|
||||
estimator.fit(X_train, y_train, budget, **fit_kwargs)
|
||||
val_loss, metric_for_logging, pred_time, _ = _eval_estimator(
|
||||
config,
|
||||
|
||||
@@ -56,7 +56,11 @@ def limit_resource(memory_limit, time_limit):
|
||||
if memory_limit > 0:
|
||||
soft, hard = resource.getrlimit(resource.RLIMIT_AS)
|
||||
if soft < 0 and (hard < 0 or memory_limit <= hard) or memory_limit < soft:
|
||||
resource.setrlimit(resource.RLIMIT_AS, (memory_limit, hard))
|
||||
try:
|
||||
resource.setrlimit(resource.RLIMIT_AS, (int(memory_limit), hard))
|
||||
except ValueError:
|
||||
# According to https://bugs.python.org/issue40518, it's a mac-specific error.
|
||||
pass
|
||||
main_thread = False
|
||||
if time_limit is not None:
|
||||
try:
|
||||
@@ -948,7 +952,7 @@ class LGBMEstimator(BaseEstimator):
|
||||
"low_cost_init_value": 4,
|
||||
},
|
||||
"min_child_samples": {
|
||||
"domain": tune.lograndint(lower=2, upper=2 ** 7 + 1),
|
||||
"domain": tune.lograndint(lower=2, upper=2**7 + 1),
|
||||
"init_value": 20,
|
||||
},
|
||||
"learning_rate": {
|
||||
@@ -1047,7 +1051,6 @@ class LGBMEstimator(BaseEstimator):
|
||||
self.params[self.ITER_HP] = 1
|
||||
self._t1 = self._fit(X_train, y_train, **kwargs)
|
||||
if budget is not None and self._t1 >= budget or n_iter == 1:
|
||||
# self.params[self.ITER_HP] = n_iter
|
||||
return self._t1
|
||||
mem1 = psutil.virtual_memory().available if psutil is not None else 1
|
||||
self._mem1 = mem0 - mem1
|
||||
@@ -1168,7 +1171,7 @@ class XGBoostEstimator(SKLearnEstimator):
|
||||
},
|
||||
"min_child_weight": {
|
||||
"domain": tune.loguniform(lower=0.001, upper=128),
|
||||
"init_value": 1,
|
||||
"init_value": 1.0,
|
||||
},
|
||||
"learning_rate": {
|
||||
"domain": tune.loguniform(lower=1 / 1024, upper=1.0),
|
||||
@@ -1797,17 +1800,17 @@ class ARIMA(Prophet):
|
||||
def search_space(cls, **params):
|
||||
space = {
|
||||
"p": {
|
||||
"domain": tune.quniform(lower=0, upper=10, q=1),
|
||||
"domain": tune.qrandint(lower=0, upper=10, q=1),
|
||||
"init_value": 2,
|
||||
"low_cost_init_value": 0,
|
||||
},
|
||||
"d": {
|
||||
"domain": tune.quniform(lower=0, upper=10, q=1),
|
||||
"domain": tune.qrandint(lower=0, upper=10, q=1),
|
||||
"init_value": 2,
|
||||
"low_cost_init_value": 0,
|
||||
},
|
||||
"q": {
|
||||
"domain": tune.quniform(lower=0, upper=10, q=1),
|
||||
"domain": tune.qrandint(lower=0, upper=10, q=1),
|
||||
"init_value": 1,
|
||||
"low_cost_init_value": 0,
|
||||
},
|
||||
@@ -1884,32 +1887,32 @@ class SARIMAX(ARIMA):
|
||||
def search_space(cls, **params):
|
||||
space = {
|
||||
"p": {
|
||||
"domain": tune.quniform(lower=0, upper=10, q=1),
|
||||
"domain": tune.qrandint(lower=0, upper=10, q=1),
|
||||
"init_value": 2,
|
||||
"low_cost_init_value": 0,
|
||||
},
|
||||
"d": {
|
||||
"domain": tune.quniform(lower=0, upper=10, q=1),
|
||||
"domain": tune.qrandint(lower=0, upper=10, q=1),
|
||||
"init_value": 2,
|
||||
"low_cost_init_value": 0,
|
||||
},
|
||||
"q": {
|
||||
"domain": tune.quniform(lower=0, upper=10, q=1),
|
||||
"domain": tune.qrandint(lower=0, upper=10, q=1),
|
||||
"init_value": 1,
|
||||
"low_cost_init_value": 0,
|
||||
},
|
||||
"P": {
|
||||
"domain": tune.quniform(lower=0, upper=10, q=1),
|
||||
"domain": tune.qrandint(lower=0, upper=10, q=1),
|
||||
"init_value": 1,
|
||||
"low_cost_init_value": 0,
|
||||
},
|
||||
"D": {
|
||||
"domain": tune.quniform(lower=0, upper=10, q=1),
|
||||
"domain": tune.qrandint(lower=0, upper=10, q=1),
|
||||
"init_value": 1,
|
||||
"low_cost_init_value": 0,
|
||||
},
|
||||
"Q": {
|
||||
"domain": tune.quniform(lower=0, upper=10, q=1),
|
||||
"domain": tune.qrandint(lower=0, upper=10, q=1),
|
||||
"init_value": 1,
|
||||
"low_cost_init_value": 0,
|
||||
},
|
||||
|
||||
@@ -84,10 +84,10 @@ def tokenize_and_align_labels(
|
||||
tokenized_inputs = tokenizer(
|
||||
[list(examples[X_sent_key])],
|
||||
padding="max_length"
|
||||
if hf_args.pad_to_max_length
|
||||
if hf_args and hf_args.pad_to_max_length
|
||||
else False, # to be consistent with https://github.com/huggingface/transformers/blob/main/examples/pytorch/token-classification/run_ner.py#L394
|
||||
truncation=True,
|
||||
max_length=hf_args.max_seq_length,
|
||||
max_length=hf_args.max_seq_length if hf_args else None,
|
||||
# We use this argument because the texts in our dataset are lists of words (with a label for each word).
|
||||
is_split_into_words=True,
|
||||
)
|
||||
|
||||
@@ -7,7 +7,6 @@ import numpy as np
|
||||
import time
|
||||
import pickle
|
||||
|
||||
|
||||
try:
|
||||
from ray import __version__ as ray_version
|
||||
|
||||
@@ -22,17 +21,19 @@ from ..tune import INCUMBENT_RESULT
|
||||
from .search_thread import SearchThread
|
||||
from .flow2 import FLOW2
|
||||
from ..tune.space import add_cost_to_space, indexof, normalize, define_by_run_func
|
||||
from ..tune.result import TIME_TOTAL_S
|
||||
|
||||
import logging
|
||||
|
||||
SEARCH_THREAD_EPS = 1.0
|
||||
PENALTY = 1e10 # penalty term for constraints
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BlendSearch(Searcher):
|
||||
"""class for BlendSearch algorithm."""
|
||||
|
||||
cost_attr = "time_total_s" # cost attribute in result
|
||||
lagrange = "_lagrange" # suffix for lagrange-modified metric
|
||||
penalty = 1e10 # penalty term for constraints
|
||||
LocalSearch = FLOW2
|
||||
|
||||
def __init__(
|
||||
@@ -56,6 +57,7 @@ class BlendSearch(Searcher):
|
||||
] = None,
|
||||
metric_constraints: Optional[List[Tuple[str, str, float]]] = None,
|
||||
seed: Optional[int] = 20,
|
||||
cost_attr: Optional[str] = "auto",
|
||||
experimental: Optional[bool] = False,
|
||||
use_incumbent_result_in_evaluation=False,
|
||||
):
|
||||
@@ -102,8 +104,23 @@ class BlendSearch(Searcher):
|
||||
metric_constraints: A list of metric constraints to be satisfied.
|
||||
E.g., `['precision', '>=', 0.9]`. The sign can be ">=" or "<=".
|
||||
seed: An integer of the random seed.
|
||||
cost_attr: Choose from ["auto", None] to specify the attribute to evaluate the cost of different trials.
|
||||
Default is "auto", which means that we will automatically chose the cost attribute to use (depending
|
||||
on the nature of the resource budget). When cost_attr is set to None, cost differences between different trials will be omitted
|
||||
in our search algorithm.
|
||||
experimental: A bool of whether to use experimental features.
|
||||
"""
|
||||
self._eps = SEARCH_THREAD_EPS
|
||||
self._input_cost_attr = cost_attr
|
||||
if cost_attr == "auto":
|
||||
if time_budget_s is not None:
|
||||
self.cost_attr = TIME_TOTAL_S
|
||||
else:
|
||||
self.cost_attr = None
|
||||
else:
|
||||
self.cost_attr = cost_attr
|
||||
|
||||
self.penalty = PENALTY # penalty term for constraints
|
||||
self._metric, self._mode = metric, mode
|
||||
self._use_incumbent_result_in_evaluation = use_incumbent_result_in_evaluation
|
||||
init_config = low_cost_partial_config or {}
|
||||
@@ -218,7 +235,7 @@ class BlendSearch(Searcher):
|
||||
metric: Optional[str] = None,
|
||||
mode: Optional[str] = None,
|
||||
config: Optional[Dict] = None,
|
||||
setting: Optional[Dict] = None,
|
||||
**spec,
|
||||
) -> bool:
|
||||
metric_changed = mode_changed = False
|
||||
if metric and self._metric != metric:
|
||||
@@ -255,19 +272,21 @@ class BlendSearch(Searcher):
|
||||
)
|
||||
self._gs.space = self._ls.space
|
||||
self._init_search()
|
||||
if setting:
|
||||
if spec:
|
||||
# CFO doesn't need these settings
|
||||
if "time_budget_s" in setting:
|
||||
self._time_budget_s = setting["time_budget_s"] # budget from now
|
||||
if "time_budget_s" in spec:
|
||||
self._time_budget_s = spec["time_budget_s"] # budget from now
|
||||
now = time.time()
|
||||
self._time_used += now - self._start_time
|
||||
self._start_time = now
|
||||
self._set_deadline()
|
||||
if "metric_target" in setting:
|
||||
self._metric_target = setting.get("metric_target")
|
||||
if "num_samples" in setting:
|
||||
if self._input_cost_attr == "auto":
|
||||
self.cost_attr = TIME_TOTAL_S
|
||||
if "metric_target" in spec:
|
||||
self._metric_target = spec.get("metric_target")
|
||||
if "num_samples" in spec:
|
||||
self._num_samples = (
|
||||
setting["num_samples"]
|
||||
spec["num_samples"]
|
||||
+ len(self._result)
|
||||
+ len(self._trial_proposed_by)
|
||||
)
|
||||
@@ -276,10 +295,14 @@ class BlendSearch(Searcher):
|
||||
def _set_deadline(self):
|
||||
if self._time_budget_s is not None:
|
||||
self._deadline = self._time_budget_s + self._start_time
|
||||
SearchThread.set_eps(self._time_budget_s)
|
||||
self._set_eps()
|
||||
else:
|
||||
self._deadline = np.inf
|
||||
|
||||
def _set_eps(self):
|
||||
"""set eps for search threads according to time budget"""
|
||||
self._eps = max(min(self._time_budget_s / 1000.0, 1.0), 1e-9)
|
||||
|
||||
def _init_search(self):
|
||||
"""initialize the search"""
|
||||
self._start_time = time.time()
|
||||
@@ -290,7 +313,7 @@ class BlendSearch(Searcher):
|
||||
self._metric_target = np.inf * self._ls.metric_op
|
||||
self._search_thread_pool = {
|
||||
# id: int -> thread: SearchThread
|
||||
0: SearchThread(self._ls.mode, self._gs)
|
||||
0: SearchThread(self._ls.mode, self._gs, self.cost_attr, self._eps)
|
||||
}
|
||||
self._thread_count = 1 # total # threads created
|
||||
self._init_used = self._ls.init_config is None
|
||||
@@ -462,6 +485,7 @@ class BlendSearch(Searcher):
|
||||
space=space,
|
||||
),
|
||||
self.cost_attr,
|
||||
self._eps,
|
||||
)
|
||||
self._thread_count += 1
|
||||
self._update_admissible_region(
|
||||
|
||||
@@ -88,7 +88,9 @@ class FLOW2(Searcher):
|
||||
self.best_config = flatten_dict(init_config)
|
||||
self.resource_attr = resource_attr
|
||||
self.min_resource = min_resource
|
||||
self.resource_multiple_factor = resource_multiple_factor or SAMPLE_MULTIPLY_FACTOR
|
||||
self.resource_multiple_factor = (
|
||||
resource_multiple_factor or SAMPLE_MULTIPLY_FACTOR
|
||||
)
|
||||
self.cost_attr = cost_attr
|
||||
self.max_resource = max_resource
|
||||
self._resource = None
|
||||
@@ -316,7 +318,7 @@ class FLOW2(Searcher):
|
||||
self.best_obj = obj
|
||||
self.best_config, self.step = self._configs[trial_id]
|
||||
self.incumbent = self.normalize(self.best_config)
|
||||
self.cost_incumbent = result.get(self.cost_attr)
|
||||
self.cost_incumbent = result.get(self.cost_attr, 1)
|
||||
if self._resource:
|
||||
self._resource = self.best_config[self.resource_attr]
|
||||
self._num_complete4incumbent = 0
|
||||
@@ -339,7 +341,9 @@ class FLOW2(Searcher):
|
||||
# proposed by current incumbent and no better
|
||||
self._num_complete4incumbent += 1
|
||||
cost = (
|
||||
result.get(self.cost_attr) if result else self._trial_cost.get(trial_id)
|
||||
result.get(self.cost_attr, 1)
|
||||
if result
|
||||
else self._trial_cost.get(trial_id)
|
||||
)
|
||||
if cost:
|
||||
self._cost_complete4incumbent += cost
|
||||
@@ -369,14 +373,14 @@ class FLOW2(Searcher):
|
||||
if self._resource:
|
||||
self._resource = config[self.resource_attr]
|
||||
self.incumbent = self.normalize(self.best_config)
|
||||
self.cost_incumbent = result.get(self.cost_attr)
|
||||
self.cost_incumbent = result.get(self.cost_attr, 1)
|
||||
self._cost_complete4incumbent = 0
|
||||
self._num_complete4incumbent = 0
|
||||
self._num_proposedby_incumbent = 0
|
||||
self._num_allowed4incumbent = 2 * self.dim
|
||||
self._proposed_by.clear()
|
||||
self._iter_best_config = self.trial_count_complete
|
||||
cost = result.get(self.cost_attr)
|
||||
cost = result.get(self.cost_attr, 1)
|
||||
# record the cost in case it is pruned and cost info is lost
|
||||
self._trial_cost[trial_id] = cost
|
||||
|
||||
|
||||
@@ -22,13 +22,12 @@ logger = logging.getLogger(__name__)
|
||||
class SearchThread:
|
||||
"""Class of global or local search thread."""
|
||||
|
||||
_eps = 1.0
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
mode: str = "min",
|
||||
search_alg: Optional[Searcher] = None,
|
||||
cost_attr: Optional[str] = "time_total_s",
|
||||
eps: Optional[float] = 1.0,
|
||||
):
|
||||
"""When search_alg is omitted, use local search FLOW2."""
|
||||
self._search_alg = search_alg
|
||||
@@ -38,6 +37,7 @@ class SearchThread:
|
||||
self.cost_best = self.cost_last = self.cost_total = self.cost_best1 = getattr(
|
||||
search_alg, "cost_incumbent", 0
|
||||
)
|
||||
self._eps = eps
|
||||
self.cost_best2 = 0
|
||||
self.obj_best1 = self.obj_best2 = getattr(
|
||||
search_alg, "best_obj", np.inf
|
||||
@@ -59,10 +59,6 @@ class SearchThread:
|
||||
# remember const config
|
||||
self._const = add_cost_to_space(self.space, {}, {})
|
||||
|
||||
@classmethod
|
||||
def set_eps(cls, time_budget_s):
|
||||
cls._eps = max(min(time_budget_s / 1000.0, 1.0), 1e-9)
|
||||
|
||||
def suggest(self, trial_id: str) -> Optional[Dict]:
|
||||
"""Use the suggest() of the underlying search algorithm."""
|
||||
if isinstance(self._search_alg, FLOW2):
|
||||
@@ -107,7 +103,7 @@ class SearchThread:
|
||||
self.speed = (
|
||||
(self.obj_best2 - self.obj_best1)
|
||||
/ self.running
|
||||
/ (max(self.cost_total - self.cost_best2, SearchThread._eps))
|
||||
/ (max(self.cost_total - self.cost_best2, self._eps))
|
||||
)
|
||||
else:
|
||||
self.speed = 0
|
||||
@@ -164,8 +160,9 @@ class SearchThread:
|
||||
# rs is used in place of optuna sometimes
|
||||
if not str(e).endswith("has already finished and can not be updated."):
|
||||
raise e
|
||||
if self.cost_attr in result and self.cost_last < result[self.cost_attr]:
|
||||
self.cost_last = result[self.cost_attr]
|
||||
new_cost = result.get(self.cost_attr, 1)
|
||||
if self.cost_last < new_cost:
|
||||
self.cost_last = new_cost
|
||||
# self._update_speed()
|
||||
|
||||
@property
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# !
|
||||
# * Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# * Copyright (c) FLAML authors. All rights reserved.
|
||||
# * Licensed under the MIT License. See LICENSE file in the
|
||||
# * project root for license information.
|
||||
from typing import Optional, Union, List, Callable, Tuple
|
||||
@@ -23,8 +23,6 @@ from .result import DEFAULT_METRIC
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_use_ray = True
|
||||
_runner = None
|
||||
_verbose = 0
|
||||
@@ -83,38 +81,43 @@ def report(_metric=None, **kwargs):
|
||||
Raises:
|
||||
StopIteration (when not using ray, i.e., _use_ray=False):
|
||||
A StopIteration exception is raised if the trial has been signaled to stop.
|
||||
SystemExit (when using ray):
|
||||
A SystemExit exception is raised if the trial has been signaled to stop by ray.
|
||||
"""
|
||||
global _use_ray
|
||||
global _verbose
|
||||
global _running_trial
|
||||
global _training_iteration
|
||||
if _use_ray:
|
||||
from ray import tune
|
||||
try:
|
||||
from ray import tune
|
||||
|
||||
return tune.report(_metric, **kwargs)
|
||||
return tune.report(_metric, **kwargs)
|
||||
except ImportError:
|
||||
# calling tune.report() outside tune.run()
|
||||
return
|
||||
result = kwargs
|
||||
if _metric:
|
||||
result[DEFAULT_METRIC] = _metric
|
||||
trial = getattr(_runner, "running_trial", None)
|
||||
if not trial:
|
||||
return None
|
||||
if _running_trial == trial:
|
||||
_training_iteration += 1
|
||||
else:
|
||||
result = kwargs
|
||||
if _metric:
|
||||
result[DEFAULT_METRIC] = _metric
|
||||
trial = getattr(_runner, "running_trial", None)
|
||||
if not trial:
|
||||
return None
|
||||
if _running_trial == trial:
|
||||
_training_iteration += 1
|
||||
else:
|
||||
_training_iteration = 0
|
||||
_running_trial = trial
|
||||
result["training_iteration"] = _training_iteration
|
||||
result["config"] = trial.config
|
||||
if INCUMBENT_RESULT in result["config"]:
|
||||
del result["config"][INCUMBENT_RESULT]
|
||||
for key, value in trial.config.items():
|
||||
result["config/" + key] = value
|
||||
_runner.process_trial_result(trial, result)
|
||||
if _verbose > 2:
|
||||
logger.info(f"result: {result}")
|
||||
if trial.is_finished():
|
||||
raise StopIteration
|
||||
_training_iteration = 0
|
||||
_running_trial = trial
|
||||
result["training_iteration"] = _training_iteration
|
||||
result["config"] = trial.config
|
||||
if INCUMBENT_RESULT in result["config"]:
|
||||
del result["config"][INCUMBENT_RESULT]
|
||||
for key, value in trial.config.items():
|
||||
result["config/" + key] = value
|
||||
_runner.process_trial_result(trial, result)
|
||||
if _verbose > 2:
|
||||
logger.info(f"result: {result}")
|
||||
if trial.is_finished():
|
||||
raise StopIteration
|
||||
|
||||
|
||||
def run(
|
||||
@@ -144,6 +147,7 @@ def run(
|
||||
max_failure: Optional[int] = 100,
|
||||
use_ray: Optional[bool] = False,
|
||||
use_incumbent_result_in_evaluation: Optional[bool] = None,
|
||||
**ray_args,
|
||||
):
|
||||
"""The trigger for HPO.
|
||||
|
||||
@@ -239,9 +243,11 @@ def run(
|
||||
respectively. You can also provide a self-defined scheduler instance
|
||||
of the TrialScheduler class. When 'asha' or self-defined scheduler is
|
||||
used, you usually need to report intermediate results in the evaluation
|
||||
function via 'tune.report()'. In addition, when 'use_ray' is not enabled,
|
||||
you also need to stop the evaluation function by explicitly catching the
|
||||
`StopIteration` exception, as shown in the following example.
|
||||
function via 'tune.report()'.
|
||||
If you would like to do some cleanup opearation when the trial is stopped
|
||||
by the scheduler, you can catch the `StopIteration` (when not using ray)
|
||||
or `SystemExit` (when using ray) exception explicitly,
|
||||
as shown in the following example.
|
||||
Please find more examples using different types of schedulers
|
||||
and how to set up the corresponding evaluation functions in
|
||||
test/tune/test_scheduler.py, and test/tune/example_scheduler.py.
|
||||
@@ -252,7 +258,8 @@ def run(
|
||||
intermediate_score = evaluation_fn(step, width, height)
|
||||
try:
|
||||
tune.report(iterations=step, mean_loss=intermediate_score)
|
||||
except StopIteration:
|
||||
except (StopIteration, SystemExit):
|
||||
# do cleanup operation here
|
||||
return
|
||||
```
|
||||
search_alg: An instance of BlendSearch as the search algorithm
|
||||
@@ -291,15 +298,33 @@ def run(
|
||||
max_failure: int | the maximal consecutive number of failures to sample
|
||||
a trial before the tuning is terminated.
|
||||
use_ray: A boolean of whether to use ray as the backend.
|
||||
**ray_args: keyword arguments to pass to ray.tune.run().
|
||||
Only valid when use_ray=True.
|
||||
"""
|
||||
global _use_ray
|
||||
global _verbose
|
||||
global _running_trial
|
||||
global _training_iteration
|
||||
old_use_ray = _use_ray
|
||||
old_verbose = _verbose
|
||||
old_running_trial = _running_trial
|
||||
old_training_iteration = _training_iteration
|
||||
if not use_ray:
|
||||
_verbose = verbose
|
||||
old_handlers = logger.handlers
|
||||
old_level = logger.getEffectiveLevel()
|
||||
logger.handlers = []
|
||||
if (
|
||||
old_handlers
|
||||
and isinstance(old_handlers[0], logging.StreamHandler)
|
||||
and not isinstance(old_handlers[0], logging.FileHandler)
|
||||
):
|
||||
# Add the console handler.
|
||||
logger.addHandler(old_handlers[0])
|
||||
if verbose > 0:
|
||||
import os
|
||||
|
||||
if local_dir:
|
||||
import os
|
||||
|
||||
os.makedirs(local_dir, exist_ok=True)
|
||||
logger.addHandler(
|
||||
logging.FileHandler(
|
||||
@@ -309,7 +334,7 @@ def run(
|
||||
+ ".log"
|
||||
)
|
||||
)
|
||||
elif not logger.handlers:
|
||||
elif not logger.hasHandlers():
|
||||
# Add the console handler.
|
||||
_ch = logging.StreamHandler()
|
||||
logger_formatter = logging.Formatter(
|
||||
@@ -342,7 +367,7 @@ def run(
|
||||
flaml_scheduler_reduction_factor = reduction_factor
|
||||
scheduler = None
|
||||
try:
|
||||
import optuna
|
||||
import optuna as _
|
||||
|
||||
SearchAlgorithm = BlendSearch
|
||||
except ImportError:
|
||||
@@ -400,7 +425,7 @@ def run(
|
||||
setting["time_budget_s"] = time_budget_s
|
||||
if num_samples > 0:
|
||||
setting["num_samples"] = num_samples
|
||||
searcher.set_search_properties(metric, mode, config, setting)
|
||||
searcher.set_search_properties(metric, mode, config, **setting)
|
||||
else:
|
||||
searcher.set_search_properties(metric, mode, config)
|
||||
if scheduler in ("asha", "asynchyperband", "async_hyperband"):
|
||||
@@ -427,18 +452,26 @@ def run(
|
||||
"Please install ray[tune] or set use_ray=False"
|
||||
)
|
||||
_use_ray = True
|
||||
return tune.run(
|
||||
evaluation_function,
|
||||
metric=metric,
|
||||
mode=mode,
|
||||
search_alg=search_alg,
|
||||
scheduler=scheduler,
|
||||
time_budget_s=time_budget_s,
|
||||
verbose=verbose,
|
||||
local_dir=local_dir,
|
||||
num_samples=num_samples,
|
||||
resources_per_trial=resources_per_trial,
|
||||
)
|
||||
try:
|
||||
analysis = tune.run(
|
||||
evaluation_function,
|
||||
metric=metric,
|
||||
mode=mode,
|
||||
search_alg=search_alg,
|
||||
scheduler=scheduler,
|
||||
time_budget_s=time_budget_s,
|
||||
verbose=verbose,
|
||||
local_dir=local_dir,
|
||||
num_samples=num_samples,
|
||||
resources_per_trial=resources_per_trial,
|
||||
**ray_args,
|
||||
)
|
||||
return analysis
|
||||
finally:
|
||||
_use_ray = old_use_ray
|
||||
_verbose = old_verbose
|
||||
_running_trial = old_running_trial
|
||||
_training_iteration = old_training_iteration
|
||||
|
||||
# simple sequential run without using tune.run() from ray
|
||||
time_start = time.time()
|
||||
@@ -448,45 +481,56 @@ def run(
|
||||
from .trial_runner import SequentialTrialRunner
|
||||
|
||||
global _runner
|
||||
_runner = SequentialTrialRunner(
|
||||
search_alg=search_alg,
|
||||
scheduler=scheduler,
|
||||
metric=metric,
|
||||
mode=mode,
|
||||
)
|
||||
num_trials = 0
|
||||
if time_budget_s is None:
|
||||
time_budget_s = np.inf
|
||||
fail = 0
|
||||
ub = (len(evaluated_rewards) if evaluated_rewards else 0) + max_failure
|
||||
while (
|
||||
time.time() - time_start < time_budget_s
|
||||
and (num_samples < 0 or num_trials < num_samples)
|
||||
and fail < ub
|
||||
):
|
||||
trial_to_run = _runner.step()
|
||||
if trial_to_run:
|
||||
num_trials += 1
|
||||
if verbose:
|
||||
logger.info(f"trial {num_trials} config: {trial_to_run.config}")
|
||||
result = evaluation_function(trial_to_run.config)
|
||||
if result is not None:
|
||||
if isinstance(result, dict):
|
||||
if result:
|
||||
report(**result)
|
||||
else:
|
||||
# When the result returned is an empty dict, set the trial status to error
|
||||
trial_to_run.set_status(Trial.ERROR)
|
||||
else:
|
||||
report(_metric=result)
|
||||
_runner.stop_trial(trial_to_run)
|
||||
fail = 0
|
||||
else:
|
||||
fail += 1 # break with ub consecutive failures
|
||||
if fail == ub:
|
||||
logger.warning(
|
||||
f"fail to sample a trial for {max_failure} times in a row, stopping."
|
||||
old_runner = _runner
|
||||
try:
|
||||
_runner = SequentialTrialRunner(
|
||||
search_alg=search_alg,
|
||||
scheduler=scheduler,
|
||||
metric=metric,
|
||||
mode=mode,
|
||||
)
|
||||
if verbose > 0:
|
||||
logger.handlers.clear()
|
||||
return ExperimentAnalysis(_runner.get_trials(), metric=metric, mode=mode)
|
||||
num_trials = 0
|
||||
if time_budget_s is None:
|
||||
time_budget_s = np.inf
|
||||
fail = 0
|
||||
ub = (len(evaluated_rewards) if evaluated_rewards else 0) + max_failure
|
||||
while (
|
||||
time.time() - time_start < time_budget_s
|
||||
and (num_samples < 0 or num_trials < num_samples)
|
||||
and fail < ub
|
||||
):
|
||||
trial_to_run = _runner.step()
|
||||
if trial_to_run:
|
||||
num_trials += 1
|
||||
if verbose:
|
||||
logger.info(f"trial {num_trials} config: {trial_to_run.config}")
|
||||
result = evaluation_function(trial_to_run.config)
|
||||
if result is not None:
|
||||
if isinstance(result, dict):
|
||||
if result:
|
||||
report(**result)
|
||||
else:
|
||||
# When the result returned is an empty dict, set the trial status to error
|
||||
trial_to_run.set_status(Trial.ERROR)
|
||||
else:
|
||||
report(_metric=result)
|
||||
_runner.stop_trial(trial_to_run)
|
||||
fail = 0
|
||||
else:
|
||||
fail += 1 # break with ub consecutive failures
|
||||
if fail == ub:
|
||||
logger.warning(
|
||||
f"fail to sample a trial for {max_failure} times in a row, stopping."
|
||||
)
|
||||
analysis = ExperimentAnalysis(_runner.get_trials(), metric=metric, mode=mode)
|
||||
return analysis
|
||||
finally:
|
||||
# recover the global variables in case of nested run
|
||||
_use_ray = old_use_ray
|
||||
_verbose = old_verbose
|
||||
_running_trial = old_running_trial
|
||||
_training_iteration = old_training_iteration
|
||||
_runner = old_runner
|
||||
if not use_ray:
|
||||
logger.handlers = old_handlers
|
||||
logger.setLevel(old_level)
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "1.0.3"
|
||||
__version__ = "1.0.7"
|
||||
|
||||
@@ -131,7 +131,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!pip install flaml[notebook,ts_forecast]"
|
||||
"%pip install flaml[notebook,ts_forecast]\n",
|
||||
"# avoid version 1.0.2 to 1.0.5 for this notebook due to a bug for arima and sarimax's init config"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
553
notebook/zeroshot_lightgbm.ipynb
Normal file
553
notebook/zeroshot_lightgbm.ipynb
Normal file
@@ -0,0 +1,553 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"Copyright (c) FLAML authors. All rights reserved. \n",
|
||||
"\n",
|
||||
"Licensed under the MIT License.\n",
|
||||
"\n",
|
||||
"# Zero-shot AutoML with FLAML\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Introduction\n",
|
||||
"\n",
|
||||
"In this notebook, we demonstrate a basic use case of zero-shot AutoML with FLAML.\n",
|
||||
"\n",
|
||||
"FLAML requires `Python>=3.6`. To run this notebook example, please install flaml and openml:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -U flaml openml;"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## What is zero-shot AutoML?\n",
|
||||
"\n",
|
||||
"Zero-shot automl means automl systems without expensive tuning. But it does adapt to data.\n",
|
||||
"A zero-shot automl system will recommend a data-dependent default configuration for a given dataset.\n",
|
||||
"\n",
|
||||
"Think about what happens when you use a `LGBMRegressor`. When you initialize a `LGBMRegressor` without any argument, it will set all the hyperparameters to the default values preset by the lightgbm library.\n",
|
||||
"There is no doubt that these default values have been carefully chosen by the library developers.\n",
|
||||
"But they are static. They are not adaptive to different datasets.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 1.0, 'importance_type': 'split', 'learning_rate': 0.1, 'max_depth': -1, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 100, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_state': None, 'reg_alpha': 0.0, 'reg_lambda': 0.0, 'silent': 'warn', 'subsample': 1.0, 'subsample_for_bin': 200000, 'subsample_freq': 0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from lightgbm import LGBMRegressor\n",
|
||||
"estimator = LGBMRegressor()\n",
|
||||
"print(estimator.get_params())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It is unlikely that 100 trees with 31 leaves each is the best hyperparameter setting for every dataset.\n",
|
||||
"\n",
|
||||
"So, we propose to recommend data-dependent default configurations at runtime. \n",
|
||||
"All you need to do is to import the `LGBMRegressor` from flaml.default instead of from lightgbm.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from flaml.default import LGBMRegressor"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Other parts of code remain the same. The new `LGBMRegressor` will automatically choose a configuration according to the training data.\n",
|
||||
"For different training data the configuration could be different.\n",
|
||||
"The recommended configuration can be either the same as the static default configuration from the library, or different.\n",
|
||||
"It is expected to be no worse than the static default configuration in most cases.\n",
|
||||
"\n",
|
||||
"For example, let's download [houses dataset](https://www.openml.org/d/537) from OpenML. The task is to predict median price of the house in the region based on demographic composition and a state of housing market in the region."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "subslide"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"load dataset from ./openml_ds537.pkl\n",
|
||||
"Dataset name: houses\n",
|
||||
"X_train.shape: (15480, 8), y_train.shape: (15480,);\n",
|
||||
"X_test.shape: (5160, 8), y_test.shape: (5160,)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from flaml.data import load_openml_dataset\n",
|
||||
"X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=537, data_dir='./')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" median_income housing_median_age ... latitude longitude\n",
|
||||
"19226 7.3003 19.0 ... 38.46 -122.68\n",
|
||||
"14549 5.9547 18.0 ... 32.95 -117.24\n",
|
||||
"9093 3.2125 19.0 ... 34.68 -118.27\n",
|
||||
"12213 6.9930 13.0 ... 33.51 -117.18\n",
|
||||
"12765 2.5162 21.0 ... 38.62 -121.41\n",
|
||||
"... ... ... ... ... ...\n",
|
||||
"13123 4.4125 20.0 ... 38.27 -121.26\n",
|
||||
"19648 2.9135 27.0 ... 37.48 -120.89\n",
|
||||
"9845 3.1977 31.0 ... 36.58 -121.90\n",
|
||||
"10799 5.6315 34.0 ... 33.62 -117.93\n",
|
||||
"2732 1.3882 15.0 ... 32.80 -115.56\n",
|
||||
"\n",
|
||||
"[15480 rows x 8 columns]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(X_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"We fit the `flaml.default.LGBMRegressor` on this dataset."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.7019911744574896, 'importance_type': 'split', 'learning_rate': 0.022635758411078528, 'max_depth': -1, 'min_child_samples': 2, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 4797, 'n_jobs': -1, 'num_leaves': 122, 'objective': None, 'random_state': None, 'reg_alpha': 0.004252223402511765, 'reg_lambda': 0.11288241427227624, 'silent': 'warn', 'subsample': 1.0, 'subsample_for_bin': 200000, 'subsample_freq': 0, 'max_bin': 511, 'verbose': -1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"estimator = LGBMRegressor() # imported from flaml.default\n",
|
||||
"estimator.fit(X_train, y_train)\n",
|
||||
"print(estimator.get_params())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"The configuration is adapted as shown here. \n",
|
||||
"The number of trees is 4797, the number of leaves is 122.\n",
|
||||
"Does it work better than the static default configuration?\n",
|
||||
"Let’s compare.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"0.8537444671194614"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"estimator.score(X_test, y_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The data-dependent configuration has a $r^2$ metric 0.8537 on the test data. What about static default configuration from lightgbm?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"0.8296179648694404"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from lightgbm import LGBMRegressor\n",
|
||||
"estimator = LGBMRegressor()\n",
|
||||
"estimator.fit(X_train, y_train)\n",
|
||||
"estimator.score(X_test, y_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The static default configuration gets $r^2=0.8296$, much lower than 0.8537 by the data-dependent configuration using `flaml.default`.\n",
|
||||
"Again, the only difference in the code is from where you import the `LGBMRegressor`.\n",
|
||||
"The adaptation to the training dataset is under the hood.\n",
|
||||
"\n",
|
||||
"You might wonder, how is it possible to find the data-dependent configuration without tuning?\n",
|
||||
"The answer is that,\n",
|
||||
"flaml can recommend good data-dependent default configurations at runtime without tuning only because it mines the hyperparameter configurations across different datasets offline as a preparation step.\n",
|
||||
"So basically, zero-shot automl shifts the tuning cost from online to offline.\n",
|
||||
"In the offline preparation stage, we applied `flaml.AutoML`.\n",
|
||||
"\n",
|
||||
"### Benefit of zero-shot AutoML\n",
|
||||
"Now, what is the benefit of zero-shot automl? Or what is the benefit of shifting tuning from online to offline?\n",
|
||||
"The first benefit is the online computational cost. That is the cost paid by the final consumers of automl. They only need to train one model.\n",
|
||||
"They get the hyperparameter configuration right away. There is no overhead to worry about.\n",
|
||||
"Another big benefit is that your code doesn’t need to change. So if you currently have a workflow without the setup for tuning, you can use zero-shot automl without breaking that workflow.\n",
|
||||
"Compared to tuning-based automl, zero-shot automl requires less input. For example, it doesn’t need a tuning budget, resampling strategy, validation dataset etc.\n",
|
||||
"A related benefit is that you don’t need to worry about holding a subset of the training data for validation, which the tuning process might overfit.\n",
|
||||
"As there is no tuning, you can use all the training data to train your model.\n",
|
||||
"Finally, you can customize the offline preparation for a domain, and leverage the past tuning experience for better adaptation to similar tasks.\n",
|
||||
"\n",
|
||||
"## How to use at runtime\n",
|
||||
"The easiest way to leverage this technique is to import a \"flamlized\" learner of your favorite choice and use it just as how you use the learner before. \n",
|
||||
"The automation is done behind the scene.\n",
|
||||
"The current list of “flamlized” learners are:\n",
|
||||
"* LGBMClassifier, LGBMRegressor (inheriting LGBMClassifier, LGBMRegressor from lightgbm)\n",
|
||||
"* XGBClassifier, XGBRegressor (inheriting LGBMClassifier, LGBMRegressor from xgboost)\n",
|
||||
"* RandomForestClassifier, RandomForestRegressor (inheriting from scikit-learn)\n",
|
||||
"* ExtraTreesClassifier, ExtraTreesRegressor (inheriting from scikit-learn)\n",
|
||||
"They work for classification or regression tasks.\n",
|
||||
"\n",
|
||||
"### What's the magic behind the scene?\n",
|
||||
"`flaml.default.LGBMRegressor` inherits `lightgbm.LGBMRegressor`, so all the methods and attributes in `lightgbm.LGBMRegressor` are still valid in `flaml.default.LGBMRegressor`.\n",
|
||||
"The difference is, `flaml.default.LGBMRegressor` decides the hyperparameter configurations based on the training data. It would use a different configuration if it is predicted to outperform the original data-independent default. If you inspect the params of the fitted estimator, you can find what configuration is used. If the original default configuration is used, then it is equivalent to the original estimator.\n",
|
||||
"The recommendation of which configuration should be used is based on offline AutoML run results. Information about the training dataset, such as the size of the dataset will be used to recommend a data-dependent configuration. The recommendation is done instantly in negligible time. The training can be faster or slower than using the original default configuration depending on the recommended configuration. \n",
|
||||
"\n",
|
||||
"### Can I check the configuration before training?\n",
|
||||
"Yes. You can use `suggest_hyperparams()` method to find the suggested configuration.\n",
|
||||
"For example, when you run the following code with the houses dataset, it will return the hyperparameter configuration instantly, without training the model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'n_estimators': 4797, 'num_leaves': 122, 'min_child_samples': 2, 'learning_rate': 0.022635758411078528, 'colsample_bytree': 0.7019911744574896, 'reg_alpha': 0.004252223402511765, 'reg_lambda': 0.11288241427227624, 'max_bin': 511, 'verbose': -1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from flaml.default import LGBMRegressor\n",
|
||||
"\n",
|
||||
"estimator = LGBMRegressor()\n",
|
||||
"hyperparams, _, _, _ = estimator.suggest_hyperparams(X_train, y_train)\n",
|
||||
"print(hyperparams)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can print the configuration as a dictionary, in case you want to check it before you use it for training.\n",
|
||||
"\n",
|
||||
"This brings up an equivalent, open-box way for zero-shot AutoML if you would like more control over the training. \n",
|
||||
"Import the function `preprocess_and_suggest_hyperparams` from `flaml.default`.\n",
|
||||
"This function takes the task name, the training dataset, and the estimator name as input:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from flaml.default import preprocess_and_suggest_hyperparams\n",
|
||||
"(\n",
|
||||
" hyperparams,\n",
|
||||
" estimator_class,\n",
|
||||
" X_transformed,\n",
|
||||
" y_transformed,\n",
|
||||
" feature_transformer,\n",
|
||||
" label_transformer,\n",
|
||||
") = preprocess_and_suggest_hyperparams(\"regression\", X_train, y_train, \"lgbm\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It outputs the hyperparameter configurations, estimator class, transformed data, feature transformer and label transformer.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'lightgbm.sklearn.LGBMRegressor'>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(estimator_class)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this case, the estimator name is “lgbm”. The corresponding estimator class is `lightgbm.LGBMRegressor`.\n",
|
||||
"This line initializes a LGBMClassifier with the recommended hyperparameter configuration:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = estimator_class(**hyperparams)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then we can fit the model on the transformed data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
"slide_type": "slide"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"LGBMRegressor(colsample_bytree=0.7019911744574896,\n",
|
||||
" learning_rate=0.022635758411078528, max_bin=511,\n",
|
||||
" min_child_samples=2, n_estimators=4797, num_leaves=122,\n",
|
||||
" reg_alpha=0.004252223402511765, reg_lambda=0.11288241427227624,\n",
|
||||
" verbose=-1)"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model.fit(X_transformed, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The feature transformer needs to be applied to the test data before prediction."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_test_transformed = feature_transformer.transform(X_test)\n",
|
||||
"y_pred = model.predict(X_test_transformed)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"These are automated when you use the \"flamlized\" learner. So you don’t need to know these details when you don’t need to open the box.\n",
|
||||
"We demonstrate them here to help you understand what’s going on. And in case you need to modify some steps, you know what to do.\n",
|
||||
"\n",
|
||||
"(Note that some classifiers like XGBClassifier require the labels to be integers, while others do not. So you can decide whether to use the transformed labels y_transformed and the label transformer label_transformer. Also, each estimator may require specific preprocessing of the data.)\n",
|
||||
"\n",
|
||||
"## Combine Zero-shot AutoML and HPO\n",
|
||||
"\n",
|
||||
"Zero Shot AutoML is fast and simple to use. It is very useful if speed and simplicity are the primary concerns. \n",
|
||||
"If you are not satisfied with the accuracy of the zero shot model, you may want to spend extra time to tune the model.\n",
|
||||
"You can use `flaml.AutoML` to do that. Everything is the same as your normal `AutoML.fit()`, except to set `starting_points=\"data\"`.\n",
|
||||
"This tells AutoML to start the tuning from the data-dependent default configurations. You can set the tuning budget in the same way as before.\n",
|
||||
"Note that if you set `max_iter=0` and `time_budget=None`, you are effectively using zero-shot AutoML. \n",
|
||||
"When `estimator_list` is omitted, the most promising estimator together with its hyperparameter configuration will be tried first, which are both decided by zero-shot automl."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[flaml.automl: 05-31 22:54:25] {2373} INFO - task = regression\n",
|
||||
"[flaml.automl: 05-31 22:54:25] {2375} INFO - Data split method: uniform\n",
|
||||
"[flaml.automl: 05-31 22:54:25] {2379} INFO - Evaluation method: cv\n",
|
||||
"[flaml.automl: 05-31 22:54:25] {2448} INFO - Minimizing error metric: 1-r2\n",
|
||||
"[flaml.automl: 05-31 22:54:25] {2586} INFO - List of ML learners in AutoML Run: ['lgbm']\n",
|
||||
"[flaml.automl: 05-31 22:54:25] {2878} INFO - iteration 0, current learner lgbm\n",
|
||||
"[flaml.automl: 05-31 22:56:54] {3008} INFO - Estimated sufficient time budget=1490299s. Estimated necessary time budget=1490s.\n",
|
||||
"[flaml.automl: 05-31 22:56:54] {3055} INFO - at 149.1s,\testimator lgbm's best error=0.1513,\tbest estimator lgbm's best error=0.1513\n",
|
||||
"[flaml.automl: 05-31 22:56:54] {2878} INFO - iteration 1, current learner lgbm\n",
|
||||
"[flaml.automl: 05-31 22:59:24] {3055} INFO - at 299.0s,\testimator lgbm's best error=0.1513,\tbest estimator lgbm's best error=0.1513\n",
|
||||
"[flaml.automl: 05-31 22:59:24] {2878} INFO - iteration 2, current learner lgbm\n",
|
||||
"[flaml.automl: 05-31 23:01:34] {3055} INFO - at 429.1s,\testimator lgbm's best error=0.1513,\tbest estimator lgbm's best error=0.1513\n",
|
||||
"[flaml.automl: 05-31 23:01:34] {2878} INFO - iteration 3, current learner lgbm\n",
|
||||
"[flaml.automl: 05-31 23:04:43] {3055} INFO - at 618.2s,\testimator lgbm's best error=0.1513,\tbest estimator lgbm's best error=0.1513\n",
|
||||
"[flaml.automl: 05-31 23:05:14] {3315} INFO - retrain lgbm for 31.0s\n",
|
||||
"[flaml.automl: 05-31 23:05:14] {3322} INFO - retrained model: LGBMRegressor(colsample_bytree=0.7019911744574896,\n",
|
||||
" learning_rate=0.02263575841107852, max_bin=511,\n",
|
||||
" min_child_samples=2, n_estimators=4797, num_leaves=122,\n",
|
||||
" reg_alpha=0.004252223402511765, reg_lambda=0.11288241427227633,\n",
|
||||
" verbose=-1)\n",
|
||||
"[flaml.automl: 05-31 23:05:14] {2617} INFO - fit succeeded\n",
|
||||
"[flaml.automl: 05-31 23:05:14] {2618} INFO - Time taken to find the best model: 149.06516432762146\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from flaml import AutoML\n",
|
||||
"\n",
|
||||
"automl = AutoML()\n",
|
||||
"settings = {\n",
|
||||
" \"task\": \"regression\",\n",
|
||||
" \"starting_points\": \"data\",\n",
|
||||
" \"estimator_list\": [\"lgbm\"],\n",
|
||||
" \"time_budget\": 600,\n",
|
||||
"}\n",
|
||||
"automl.fit(X_train, y_train, **settings)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"interpreter": {
|
||||
"hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.9.9 64-bit",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
9
setup.py
9
setup.py
@@ -72,6 +72,7 @@ setuptools.setup(
|
||||
"blendsearch": ["optuna==2.8.0"],
|
||||
"ray": [
|
||||
"ray[tune]~=1.10",
|
||||
"protobuf<4", # to prevent TypeError in ray
|
||||
],
|
||||
"azureml": [
|
||||
"azureml-mlflow",
|
||||
@@ -91,11 +92,17 @@ setuptools.setup(
|
||||
"rouge_score",
|
||||
],
|
||||
"ts_forecast": [
|
||||
"holidays<0.14", # to prevent installation error for prophet
|
||||
"prophet>=1.0.1",
|
||||
"statsmodels>=0.12.2",
|
||||
"hcrystalball==0.1.10",
|
||||
],
|
||||
"forecast": [
|
||||
"holidays<0.14", # to prevent installation error for prophet
|
||||
"prophet>=1.0.1",
|
||||
"statsmodels>=0.12.2",
|
||||
"hcrystalball==0.1.10",
|
||||
],
|
||||
"forecast": ["prophet>=1.0.1", "statsmodels>=0.12.2", "hcrystalball==0.1.10"],
|
||||
"benchmark": ["catboost>=0.26", "psutil==5.8.0", "xgboost==1.3.3"],
|
||||
},
|
||||
classifiers=[
|
||||
|
||||
@@ -99,7 +99,6 @@ class TestClassification(unittest.TestCase):
|
||||
"ensemble": True,
|
||||
}
|
||||
automl.fit(X, y, **automl_settings)
|
||||
assert automl.model is not None
|
||||
|
||||
automl = AutoML()
|
||||
try:
|
||||
@@ -257,6 +256,7 @@ class TestClassification(unittest.TestCase):
|
||||
time_budget=10,
|
||||
task="classification",
|
||||
n_concurrent_trials=2,
|
||||
ensemble=True,
|
||||
)
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
@@ -21,7 +21,7 @@ def test_metric_constraints():
|
||||
"log_type": "all",
|
||||
"retrain_full": "budget",
|
||||
"keep_search_state": True,
|
||||
"time_budget": 1,
|
||||
"time_budget": 2,
|
||||
"pred_time_limit": 5.1e-05,
|
||||
}
|
||||
from sklearn.externals._arff import ArffException
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import sys
|
||||
from openml.exceptions import OpenMLServerException
|
||||
from requests.exceptions import ChunkedEncodingError
|
||||
from requests.exceptions import ChunkedEncodingError, SSLError
|
||||
|
||||
|
||||
def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
|
||||
@@ -23,6 +23,7 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
|
||||
OpenMLServerException,
|
||||
ChunkedEncodingError,
|
||||
urllib3.exceptions.ReadTimeoutError,
|
||||
SSLError,
|
||||
) as e:
|
||||
print(e)
|
||||
return
|
||||
@@ -110,7 +111,7 @@ def test_mlflow():
|
||||
X_train, X_test, y_train, y_test = load_openml_task(
|
||||
task_id=7592, data_dir="test/"
|
||||
)
|
||||
except (OpenMLServerException, ChunkedEncodingError) as e:
|
||||
except (OpenMLServerException, ChunkedEncodingError, SSLError) as e:
|
||||
print(e)
|
||||
return
|
||||
""" import AutoML class from flaml package """
|
||||
|
||||
@@ -56,6 +56,7 @@ class TestRegression(unittest.TestCase):
|
||||
y_pred = automl.predict(X_train)
|
||||
print(y_pred)
|
||||
print(automl.model.estimator)
|
||||
n_iter = automl.model.estimator.get_params("n_estimators")
|
||||
print(automl.config_history)
|
||||
print(automl.best_model_for_estimator("xgboost"))
|
||||
print(automl.best_iteration)
|
||||
@@ -86,7 +87,11 @@ class TestRegression(unittest.TestCase):
|
||||
)
|
||||
print(automl.model.estimator)
|
||||
y_pred2 = automl.predict(X_train)
|
||||
assert (y_pred == y_pred2).all()
|
||||
# In some rare case, the last config is early stopped and it's the best config. But the logged config's n_estimator is not reduced.
|
||||
assert (
|
||||
n_iter != automl.model.estimator.get_params("n_estimator")
|
||||
or (y_pred == y_pred2).all()
|
||||
)
|
||||
|
||||
def test_sparse_matrix_regression(self):
|
||||
X_train = scipy.sparse.random(300, 900, density=0.0001)
|
||||
|
||||
@@ -12,6 +12,7 @@ from flaml import AutoVW
|
||||
import string
|
||||
import os
|
||||
import openml
|
||||
from requests.exceptions import SSLError
|
||||
|
||||
VW_DS_DIR = "test/data/"
|
||||
NS_LIST = list(string.ascii_lowercase) + list(string.ascii_uppercase)
|
||||
@@ -96,10 +97,14 @@ def shuffle_data(X, y, seed):
|
||||
def get_oml_to_vw(did, max_ns_num, ds_dir=VW_DS_DIR):
|
||||
success = False
|
||||
print("-----getting oml dataset-------", did)
|
||||
ds = openml.datasets.get_dataset(did)
|
||||
target_attribute = ds.default_target_attribute
|
||||
# if target_attribute is None and did in OML_target_attribute_dict:
|
||||
# target_attribute = OML_target_attribute_dict[did]
|
||||
try:
|
||||
ds = openml.datasets.get_dataset(did)
|
||||
target_attribute = ds.default_target_attribute
|
||||
# if target_attribute is None and did in OML_target_attribute_dict:
|
||||
# target_attribute = OML_target_attribute_dict[did]
|
||||
except (SSLError) as e:
|
||||
print(e)
|
||||
return
|
||||
|
||||
print("target=ds.default_target_attribute", target_attribute)
|
||||
data = ds.get_data(target=target_attribute, dataset_format="array")
|
||||
|
||||
126
test/tune/test_reproducibility.py
Normal file
126
test/tune/test_reproducibility.py
Normal file
@@ -0,0 +1,126 @@
|
||||
from functools import partial
|
||||
|
||||
|
||||
def _evaluation_fn(step, width, height):
|
||||
return (0.1 + width * step / 100) ** (-1) + height * 0.1
|
||||
|
||||
|
||||
def _easy_objective(use_raytune, config):
|
||||
if use_raytune:
|
||||
from ray import tune
|
||||
else:
|
||||
from flaml import tune
|
||||
# Hyperparameters
|
||||
width, height = config["width"], config["height"]
|
||||
|
||||
for step in range(config["steps"]):
|
||||
# Iterative training function - can be any arbitrary training procedure
|
||||
intermediate_score = _evaluation_fn(step, width, height)
|
||||
# Feed the score back back to Tune.
|
||||
try:
|
||||
tune.report(iterations=step, mean_loss=intermediate_score)
|
||||
except StopIteration:
|
||||
print("Trial stopped", step)
|
||||
return
|
||||
|
||||
|
||||
def test_tune(
|
||||
smoke_test=True, externally_setup_searcher=False, use_ray=False, use_raytune=False
|
||||
):
|
||||
from flaml import tune
|
||||
from flaml.searcher.blendsearch import BlendSearch
|
||||
|
||||
easy_objective_custom_tune = partial(_easy_objective, use_raytune)
|
||||
search_space = {
|
||||
"steps": 100,
|
||||
"width": tune.uniform(0, 20),
|
||||
"height": tune.uniform(-100, 100),
|
||||
}
|
||||
if externally_setup_searcher:
|
||||
|
||||
searcher = BlendSearch(
|
||||
space=search_space,
|
||||
time_budget_s=5,
|
||||
metric="mean_loss",
|
||||
mode="min",
|
||||
)
|
||||
assert (
|
||||
searcher.cost_attr == "time_total_s"
|
||||
), "when time_budget_s is provided, cost_attr should be time_total_s"
|
||||
|
||||
searcher = BlendSearch(
|
||||
space=search_space,
|
||||
num_samples=10,
|
||||
metric="mean_loss",
|
||||
mode="min",
|
||||
)
|
||||
assert (
|
||||
searcher.cost_attr is None
|
||||
), "when time_budget_s is not provided, cost_attr should be None."
|
||||
|
||||
searcher = BlendSearch(
|
||||
space=search_space,
|
||||
num_samples=10,
|
||||
time_budget_s=5,
|
||||
metric="mean_loss",
|
||||
mode="min",
|
||||
)
|
||||
assert (
|
||||
searcher.cost_attr == "time_total_s"
|
||||
), "As long as time_budget_s is provided and cost_attr not otherwise specified (i.e., using the default auto value), time_total_s is used as the cost_attr"
|
||||
|
||||
searcher = BlendSearch(
|
||||
space=search_space,
|
||||
num_samples=10,
|
||||
time_budget_s=5,
|
||||
metric="mean_loss",
|
||||
mode="min",
|
||||
cost_attr=None,
|
||||
)
|
||||
assert (
|
||||
searcher.cost_attr is None
|
||||
), "When the cost_attr is explicitly specified to be None, BS should use None as the cost_attr."
|
||||
|
||||
searcher = BlendSearch(
|
||||
space=search_space,
|
||||
metric="mean_loss",
|
||||
mode="min",
|
||||
)
|
||||
else:
|
||||
searcher = None
|
||||
|
||||
analysis = tune.run(
|
||||
easy_objective_custom_tune,
|
||||
search_alg=searcher,
|
||||
metric="mean_loss",
|
||||
mode="min",
|
||||
num_samples=10,
|
||||
time_budget_s=5,
|
||||
use_ray=use_ray,
|
||||
config=search_space,
|
||||
)
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
print("best results", analysis.best_result)
|
||||
print("best results", analysis.results)
|
||||
return analysis.best_config
|
||||
|
||||
|
||||
def test_reproducibility():
|
||||
best_config_1 = test_tune(smoke_test=True)
|
||||
best_config_2 = test_tune(smoke_test=True)
|
||||
print(best_config_1)
|
||||
print(best_config_2)
|
||||
assert best_config_1 == best_config_2, "flaml.tune not reproducible"
|
||||
|
||||
best_config_1 = test_tune(smoke_test=True, externally_setup_searcher=True)
|
||||
best_config_2 = test_tune(smoke_test=True, externally_setup_searcher=True)
|
||||
print(best_config_1)
|
||||
print(best_config_2)
|
||||
assert (
|
||||
best_config_1 == best_config_2
|
||||
), "flaml.tune not reproducible when the searcher is set up externally"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_reproducibility()
|
||||
@@ -203,7 +203,9 @@ def test_searcher():
|
||||
points_to_evaluate=[{"a": 1, "b": 0.01}],
|
||||
)
|
||||
searcher.set_search_properties(
|
||||
metric="m2", config=config, setting={"time_budget_s": 0}
|
||||
metric="m2",
|
||||
config=config,
|
||||
time_budget_s=0,
|
||||
)
|
||||
c = searcher.suggest("t1")
|
||||
print("t1", c)
|
||||
|
||||
@@ -20,6 +20,37 @@ logger.addHandler(logging.FileHandler("logs/tune.log"))
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
def test_nested_run():
|
||||
from flaml import AutoML, tune
|
||||
|
||||
data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True)
|
||||
train_x, val_x, y_train, y_val = train_test_split(data, labels, test_size=0.25)
|
||||
space_pca = {
|
||||
"n_components": tune.uniform(0.5, 0.99),
|
||||
}
|
||||
|
||||
def pca_flaml(config):
|
||||
n_components = config["n_components"]
|
||||
from sklearn.decomposition import PCA
|
||||
|
||||
pca = PCA(n_components)
|
||||
X_train = pca.fit_transform(train_x)
|
||||
X_val = pca.transform(val_x)
|
||||
automl = AutoML()
|
||||
automl.fit(X_train, y_train, X_val=X_val, y_val=y_val, time_budget=1)
|
||||
return {"loss": automl.best_loss}
|
||||
|
||||
analysis = tune.run(
|
||||
pca_flaml,
|
||||
space_pca,
|
||||
metric="loss",
|
||||
mode="min",
|
||||
num_samples=5,
|
||||
local_dir="logs",
|
||||
)
|
||||
print(analysis.best_result)
|
||||
|
||||
|
||||
def train_breast_cancer(config: dict):
|
||||
# This is a simple training function to be passed into Tune
|
||||
# Load dataset
|
||||
@@ -182,7 +213,7 @@ def _test_xgboost(method="BlendSearch"):
|
||||
logger.info(f"Best model parameters: {best_trial.config}")
|
||||
|
||||
|
||||
def test_nested():
|
||||
def test_nested_space():
|
||||
from flaml import tune, CFO
|
||||
|
||||
search_space = {
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
# Contributing
|
||||
|
||||
This project welcomes (and encourages) all forms of contributions, including but not limited to:
|
||||
- Pushing patches.
|
||||
- Code review of pull requests.
|
||||
- Documentation, examples and test cases.
|
||||
- Readability improvement, e.g., improvement on docstr and comments.
|
||||
- Community participation in [issues](https://github.com/microsoft/FLAML/issues), [discussions](https://github.com/microsoft/FLAML/discussions), and [gitter](https://gitter.im/FLAMLer/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge).
|
||||
- Tutorials, blog posts, talks that promote the project.
|
||||
- Sharing application scenarios and/or related research.
|
||||
|
||||
- Pushing patches.
|
||||
- Code review of pull requests.
|
||||
- Documentation, examples and test cases.
|
||||
- Readability improvement, e.g., improvement on docstr and comments.
|
||||
- Community participation in [issues](https://github.com/microsoft/FLAML/issues), [discussions](https://github.com/microsoft/FLAML/discussions), and [gitter](https://gitter.im/FLAMLer/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge).
|
||||
- Tutorials, blog posts, talks that promote the project.
|
||||
- Sharing application scenarios and/or related research.
|
||||
|
||||
You can take a look at the [Roadmap for Upcoming Features](https://github.com/microsoft/FLAML/wiki/Roadmap-for-Upcoming-Features) to identify potential things to work on.
|
||||
|
||||
|
||||
Most contributions require you to agree to a
|
||||
Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
|
||||
the rights to use your contribution. For details, visit <https://cla.opensource.microsoft.com>.
|
||||
@@ -27,6 +27,7 @@ For more information see the [Code of Conduct FAQ](https://opensource.microsoft.
|
||||
contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
|
||||
|
||||
## Becoming a Reviewer
|
||||
|
||||
There is currently no formal reviewer solicitation process. Current reviewers identify reviewers from active contributors. If you are willing to become a reviewer, you are welcome to let us know on gitter.
|
||||
|
||||
## Developing
|
||||
@@ -35,7 +36,7 @@ There is currently no formal reviewer solicitation process. Current reviewers id
|
||||
|
||||
```bash
|
||||
git clone https://github.com/microsoft/FLAML.git
|
||||
pip install -e .[test,notebook]
|
||||
pip install -e FLAML[test,notebook]
|
||||
```
|
||||
|
||||
### Docker
|
||||
|
||||
@@ -58,6 +58,8 @@ X_test.shape: (5160, 8), y_test.shape: (5160,)
|
||||
{'n_estimators': 4797, 'num_leaves': 122, 'min_child_samples': 2, 'learning_rate': 0.022635758411078528, 'colsample_bytree': 0.7019911744574896, 'reg_alpha': 0.004252223402511765, 'reg_lambda': 0.11288241427227624, 'max_bin': 511, 'verbose': -1}
|
||||
```
|
||||
|
||||
[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/zeroshot_lightgbm.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/zeroshot_lightgbm.ipynb)
|
||||
|
||||
## Flamlized XGBClassifier
|
||||
|
||||
### Zero-shot AutoML
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
- Definition and purpose: The `low_cost_partial_config` is a dictionary of subset of the hyperparameter coordinates whose value corresponds to a configuration with known low-cost (i.e., low computation cost for training the corresponding model). The concept of low/high-cost is meaningful in the case where a subset of the hyperparameters to tune directly affects the computation cost for training the model. For example, `n_estimators` and `max_leaves` are known to affect the training cost of tree-based learners. We call this subset of hyperparameters, *cost-related hyperparameters*. In such scenarios, if you are aware of low-cost configurations for the cost-related hyperparameters, you are recommended to set them as the `low_cost_partial_config`. Using the tree-based method example again, since we know that small `n_estimators` and `max_leaves` generally correspond to simpler models and thus lower cost, we set `{'n_estimators': 4, 'max_leaves': 4}` as the `low_cost_partial_config` by default (note that `4` is the lower bound of search space for these two hyperparameters), e.g., in [LGBM](https://github.com/microsoft/FLAML/blob/main/flaml/model.py#L215). Configuring `low_cost_partial_config` helps the search algorithms make more cost-efficient choices.
|
||||
In AutoML, the `low_cost_init_value` in `search_space()` function for each estimator serves the same role.
|
||||
|
||||
- Usage in practice: It is recommended to configure it if there are cost-related hyperparameters in your tuning task and you happen to know the low-cost values for them, but it is not required( It is fine to leave it the default value, i.e., `None`).
|
||||
- Usage in practice: It is recommended to configure it if there are cost-related hyperparameters in your tuning task and you happen to know the low-cost values for them, but it is not required (It is fine to leave it the default value, i.e., `None`).
|
||||
|
||||
- How does it work: `low_cost_partial_config` if configured, will be used as an initial point of the search. It also affects the search trajectory. For more details about how does it play a role in the search algorithms, please refer to the papers about the search algorithms used: Section 2 of [Frugal Optimization for Cost-related Hyperparameters (CFO)](https://arxiv.org/pdf/2005.01571.pdf) and Section 3 of [Economical Hyperparameter Optimization with Blended Search Strategy (BlendSearch)](https://openreview.net/pdf?id=VbLH04pRA3).
|
||||
|
||||
@@ -18,6 +18,37 @@ Currently FLAML does several things for imbalanced data.
|
||||
2. We use stratified sampling when doing holdout and kf.
|
||||
3. We make sure no class is empty in both training and holdout data.
|
||||
4. We allow users to pass `sample_weight` to `AutoML.fit()`.
|
||||
5. User can customize the weight of each class by setting the `custom_hp` or `fit_kwargs_by_estimator` arguments. For example, the following code sets the weight for pos vs. neg as 2:1 for the RandomForest estimator:
|
||||
|
||||
```python
|
||||
from flaml import AutoML
|
||||
from sklearn.datasets import load_iris
|
||||
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"task": "classification",
|
||||
"log_file_name": "test/iris.log",
|
||||
"estimator_list": ["rf", "xgboost"],
|
||||
}
|
||||
|
||||
automl_settings["custom_hp"] = {
|
||||
"xgboost": {
|
||||
"scale_pos_weight": {
|
||||
"domain": 0.5,
|
||||
"init_value": 0.5,
|
||||
}
|
||||
},
|
||||
"rf": {
|
||||
"class_weight": {
|
||||
"domain": "balanced",
|
||||
"init_value": "balanced"
|
||||
}
|
||||
}
|
||||
}
|
||||
print(automl.model)
|
||||
```
|
||||
|
||||
|
||||
### How to interpret model performance? Is it possible for me to visualize feature importance, SHAP values, optimization history?
|
||||
|
||||
@@ -37,6 +37,11 @@ pip install flaml[vw]
|
||||
pip install flaml[forecast]
|
||||
```
|
||||
|
||||
* natural language processing: transformers
|
||||
```bash
|
||||
pip install flaml[nlp]
|
||||
```
|
||||
|
||||
### Distributed tuning
|
||||
* ray
|
||||
```bash
|
||||
|
||||
@@ -421,7 +421,29 @@ with mlflow.start_run():
|
||||
|
||||
### Extra fit arguments
|
||||
|
||||
Extra fit arguments that are needed by the estimators can be passed to `AutoML.fit()`. For example, if there is a weight associated with each training example, they can be passed via `sample_weight`. For another example, `period` can be passed for time series forecaster. For any extra keywork argument passed to `AutoML.fit()` which has not been explicitly listed in the function signature, it will be passed to the underlying estimators' `fit()` as is.
|
||||
Extra fit arguments that are needed by the estimators can be passed to `AutoML.fit()`. For example, if there is a weight associated with each training example, they can be passed via `sample_weight`. For another example, `period` can be passed for time series forecaster. For any extra keywork argument passed to `AutoML.fit()` which has not been explicitly listed in the function signature, it will be passed to the underlying estimators' `fit()` as is. For another example, you can set the number of gpus used by each trial with the `gpu_per_trial` argument, which is only used by TransformersEstimator and XGBoostSklearnEstimator.
|
||||
|
||||
In addition, you can specify the different arguments needed by different estimators using the `fit_kwargs_by_estimator` argument. For example, you can set the custom arguments for a Transformers model:
|
||||
|
||||
```python
|
||||
from flaml.data import load_openml_dataset
|
||||
from flaml import AutoML
|
||||
|
||||
X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir="./")
|
||||
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"task": "classification",
|
||||
"time_budget": 10,
|
||||
"estimator_list": ["catboost", "rf"],
|
||||
"fit_kwargs_by_estimator": {
|
||||
"catboost": {
|
||||
"verbose": True, # setting the verbosity of catboost to True
|
||||
}
|
||||
},
|
||||
}
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
```
|
||||
|
||||
## Retrieve and analyze the outcomes of AutoML.fit()
|
||||
|
||||
|
||||
@@ -353,7 +353,7 @@ tune.run(.., scheduler=my_scheduler, ...)
|
||||
|
||||
- Different from the case when the `flaml` scheduler is used, the amount of resources to use at each iteration is not suggested by the search algorithm through the `resource_attr` in a configuration. You need to specify the evaluation schedule explicitly by yourself in the `evaluation_function` and **report intermediate results (using `tune.report()`) accordingly**. In the following code example, we use the ASHA scheduler by setting `scheduler="asha"`. We specify `resource_attr`, `min_resource`, `min_resource` and `reduction_factor` the same way as in the previous example (when "flaml" is used as the scheduler). We perform the evaluation in a customized schedule.
|
||||
|
||||
- Use ray backend or not? You can choose to use ray backend or not by specifying `use_ray=True` or `use_ray=False`. When ray backend is not used, i.e., `use_ray=False`, you also need to stop the evaluation function by explicitly catching the `StopIteration` exception, as shown in the last two lines of the evaluation function `obj_w_intermediate_report()` in the following code example.
|
||||
- Use ray backend or not? You can choose to use ray backend or not by specifying `use_ray=True` or `use_ray=False`. When ray backend is not used, i.e., `use_ray=False`, you also need to stop the evaluation function by explicitly catching the `StopIteration` exception, as shown in the end of the evaluation function `obj_w_intermediate_report()` in the following code example.
|
||||
|
||||
```python
|
||||
def obj_w_intermediate_report(resource_attr, X_train, X_test, y_train, y_test, min_resource, max_resource, config):
|
||||
@@ -375,7 +375,8 @@ def obj_w_intermediate_report(resource_attr, X_train, X_test, y_train, y_test, m
|
||||
# need to report the resource attribute used and the corresponding intermediate results
|
||||
try:
|
||||
tune.report(sample_size=resource, loss=test_loss)
|
||||
except StopIteration:
|
||||
except (StopIteration, SystemExit):
|
||||
# do cleanup operation here
|
||||
return
|
||||
|
||||
resource_attr = "sample_size"
|
||||
@@ -399,6 +400,9 @@ analysis = tune.run(
|
||||
)
|
||||
```
|
||||
|
||||
- If you would like to do some cleanup opearation when the trial is stopped
|
||||
by the scheduler, you can do it when you catch the `StopIteration` (when not using ray) or `SystemExit` (when using ray) exception explicitly.
|
||||
|
||||
### Warm start
|
||||
|
||||
Related arguments:
|
||||
|
||||
Reference in New Issue
Block a user