mirror of
https://github.com/microsoft/FLAML.git
synced 2026-02-15 21:29:16 +08:00
Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c038fbca07 | ||
|
|
6a99202492 | ||
|
|
42d1dcfa0e | ||
|
|
b83c8a7d3b | ||
|
|
b9194cdcf2 | ||
|
|
9a1f6b0291 | ||
|
|
07f4413aae |
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
import warnings
|
||||
|
||||
try:
|
||||
from flaml.automl import AutoML, logger_formatter
|
||||
@@ -12,7 +13,8 @@ from flaml.version import __version__
|
||||
|
||||
# Set the root logger.
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
if logger.level == logging.NOTSET:
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
if not has_automl:
|
||||
logger.warning("flaml.automl is not available. Please install flaml[automl] to enable AutoML functionalities.")
|
||||
warnings.warn("flaml.automl is not available. Please install flaml[automl] to enable AutoML functionalities.")
|
||||
|
||||
@@ -203,7 +203,7 @@ class AutoML(BaseEstimator):
|
||||
* Valid str options depend on different tasks.
|
||||
For classification tasks, valid choices are
|
||||
["auto", 'stratified', 'uniform', 'time', 'group']. "auto" -> stratified.
|
||||
For regression tasks, valid choices are ["auto", 'uniform', 'time'].
|
||||
For regression tasks, valid choices are ["auto", 'uniform', 'time', 'group'].
|
||||
"auto" -> uniform.
|
||||
For time series forecast tasks, must be "auto" or 'time'.
|
||||
For ranking task, must be "auto" or 'group'.
|
||||
@@ -739,7 +739,7 @@ class AutoML(BaseEstimator):
|
||||
* Valid str options depend on different tasks.
|
||||
For classification tasks, valid choices are
|
||||
["auto", 'stratified', 'uniform', 'time', 'group']. "auto" -> stratified.
|
||||
For regression tasks, valid choices are ["auto", 'uniform', 'time'].
|
||||
For regression tasks, valid choices are ["auto", 'uniform', 'time', 'group'].
|
||||
"auto" -> uniform.
|
||||
For time series forecast tasks, must be "auto" or 'time'.
|
||||
For ranking task, must be "auto" or 'group'.
|
||||
@@ -1358,7 +1358,7 @@ class AutoML(BaseEstimator):
|
||||
* Valid str options depend on different tasks.
|
||||
For classification tasks, valid choices are
|
||||
["auto", 'stratified', 'uniform', 'time', 'group']. "auto" -> stratified.
|
||||
For regression tasks, valid choices are ["auto", 'uniform', 'time'].
|
||||
For regression tasks, valid choices are ["auto", 'uniform', 'time', 'group'].
|
||||
"auto" -> uniform.
|
||||
For time series forecast tasks, must be "auto" or 'time'.
|
||||
For ranking task, must be "auto" or 'group'.
|
||||
|
||||
@@ -2066,8 +2066,8 @@ class CatBoostEstimator(BaseEstimator):
|
||||
self.estimator_class = CatBoostRegressor
|
||||
|
||||
def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
|
||||
if "is_retrain" in kwargs:
|
||||
kwargs.pop("is_retrain")
|
||||
kwargs.pop("is_retrain", None)
|
||||
kwargs.pop("groups", None)
|
||||
start_time = time.time()
|
||||
deadline = start_time + budget if budget else np.inf
|
||||
train_dir = f"catboost_{str(start_time)}"
|
||||
|
||||
@@ -442,8 +442,8 @@ class GenericTask(Task):
|
||||
X_train_all, y_train_all = shuffle(X_train_all, y_train_all, random_state=RANDOM_SEED)
|
||||
if data_is_df:
|
||||
X_train_all.reset_index(drop=True, inplace=True)
|
||||
if isinstance(y_train_all, pd.Series):
|
||||
y_train_all.reset_index(drop=True, inplace=True)
|
||||
if isinstance(y_train_all, pd.Series):
|
||||
y_train_all.reset_index(drop=True, inplace=True)
|
||||
|
||||
X_train, y_train = X_train_all, y_train_all
|
||||
state.groups_all = state.groups
|
||||
|
||||
@@ -192,7 +192,7 @@ class Task(ABC):
|
||||
* Valid str options depend on different tasks.
|
||||
For classification tasks, valid choices are
|
||||
["auto", 'stratified', 'uniform', 'time', 'group']. "auto" -> stratified.
|
||||
For regression tasks, valid choices are ["auto", 'uniform', 'time'].
|
||||
For regression tasks, valid choices are ["auto", 'uniform', 'time', 'group'].
|
||||
"auto" -> uniform.
|
||||
For time series forecast tasks, must be "auto" or 'time'.
|
||||
For ranking task, must be "auto" or 'group'.
|
||||
|
||||
@@ -260,6 +260,8 @@ def run(
|
||||
mlflow_exp_name: Optional[str] = None,
|
||||
automl_info: Optional[Tuple[float]] = None,
|
||||
extra_tag: Optional[dict] = None,
|
||||
cost_attr: Optional[str] = "auto",
|
||||
cost_budget: Optional[float] = None,
|
||||
**ray_args,
|
||||
):
|
||||
"""The function-based way of performing HPO.
|
||||
@@ -462,6 +464,12 @@ def run(
|
||||
overwritten by the value of `n_concurrent_trials` in AutoML. When <= 0, the concurrent trials
|
||||
will be set to the number of executors.
|
||||
extra_tag: dict, default=None | Extra tags to be added to the mlflow runs created by autologging.
|
||||
cost_attr: None or str to specify the attribute to evaluate the cost of different trials.
|
||||
Default is "auto", which means that we will automatically choose the cost attribute to use (depending
|
||||
on the nature of the resource budget). When cost_attr is set to None, cost differences between different trials will be omitted
|
||||
in our search algorithm. When cost_attr is set to a str different from "auto" and "time_total_s",
|
||||
this cost_attr must be available in the result dict of the trial.
|
||||
cost_budget: A float of the cost budget. Only valid when cost_attr is a str different from "auto" and "time_total_s".
|
||||
**ray_args: keyword arguments to pass to ray.tune.run().
|
||||
Only valid when use_ray=True.
|
||||
"""
|
||||
@@ -600,6 +608,8 @@ def run(
|
||||
metric_constraints=metric_constraints,
|
||||
use_incumbent_result_in_evaluation=use_incumbent_result_in_evaluation,
|
||||
lexico_objectives=lexico_objectives,
|
||||
cost_attr=cost_attr,
|
||||
cost_budget=cost_budget,
|
||||
)
|
||||
else:
|
||||
if metric is None or mode is None:
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "2.3.2"
|
||||
__version__ = "2.3.3"
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from sklearn.datasets import fetch_openml
|
||||
import numpy as np
|
||||
from sklearn.datasets import fetch_openml, load_iris
|
||||
from sklearn.metrics import accuracy_score
|
||||
from sklearn.model_selection import GroupKFold, KFold, train_test_split
|
||||
|
||||
@@ -48,7 +49,7 @@ def test_time():
|
||||
_test(split_type="time")
|
||||
|
||||
|
||||
def test_groups():
|
||||
def test_groups_for_classification_task():
|
||||
from sklearn.externals._arff import ArffException
|
||||
|
||||
try:
|
||||
@@ -68,7 +69,7 @@ def test_groups():
|
||||
"model_history": True,
|
||||
"eval_method": "cv",
|
||||
"groups": np.random.randint(low=0, high=10, size=len(y)),
|
||||
"estimator_list": ["lgbm", "rf", "xgboost", "kneighbor"],
|
||||
"estimator_list": ["catboost", "lgbm", "rf", "xgboost", "kneighbor"],
|
||||
"learner_selector": "roundrobin",
|
||||
}
|
||||
automl.fit(X, y, **automl_settings)
|
||||
@@ -88,6 +89,35 @@ def test_groups():
|
||||
automl.fit(X, y, **automl_settings)
|
||||
|
||||
|
||||
def test_groups_for_regression_task():
|
||||
"""Append nonsensical groups to iris dataset and use it to test that GroupKFold works for regression tasks"""
|
||||
iris_dict_data = load_iris(as_frame=True) # numpy arrays
|
||||
iris_data = iris_dict_data["frame"] # pandas dataframe data + target
|
||||
|
||||
rng = np.random.default_rng(42)
|
||||
iris_data["cluster"] = rng.integers(
|
||||
low=0, high=5, size=iris_data.shape[0]
|
||||
) # np.random.randint(0, 5, iris_data.shape[0])
|
||||
|
||||
automl = AutoML()
|
||||
X = iris_data[["sepal length (cm)", "sepal width (cm)", "petal length (cm)"]].to_numpy()
|
||||
y = iris_data["petal width (cm)"]
|
||||
X_train, X_test, y_train, y_test, groups_train, groups_test = train_test_split(
|
||||
X, y, iris_data["cluster"], random_state=42
|
||||
)
|
||||
automl_settings = {
|
||||
"max_iter": 5,
|
||||
"time_budget": -1,
|
||||
"metric": "r2",
|
||||
"task": "regression",
|
||||
"estimator_list": ["lgbm", "rf", "xgboost", "kneighbor"],
|
||||
"eval_method": "cv",
|
||||
"split_type": "uniform",
|
||||
"groups": groups_train,
|
||||
}
|
||||
automl.fit(X_train, y_train, **automl_settings)
|
||||
|
||||
|
||||
def test_stratified_groupkfold():
|
||||
from minio.error import ServerError
|
||||
from sklearn.model_selection import StratifiedGroupKFold
|
||||
@@ -108,6 +138,7 @@ def test_stratified_groupkfold():
|
||||
"split_type": splitter,
|
||||
"groups": X_train["Airline"],
|
||||
"estimator_list": [
|
||||
"catboost",
|
||||
"lgbm",
|
||||
"rf",
|
||||
"xgboost",
|
||||
@@ -203,4 +234,4 @@ def test_object():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_groups()
|
||||
test_groups_for_classification_task()
|
||||
|
||||
@@ -3371,9 +3371,9 @@ cross-fetch@^3.1.5:
|
||||
node-fetch "2.6.7"
|
||||
|
||||
cross-spawn@^7.0.3:
|
||||
version "7.0.3"
|
||||
resolved "https://registry.npmmirror.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6"
|
||||
integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==
|
||||
version "7.0.6"
|
||||
resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.6.tgz#8a58fe78f00dcd70c370451759dfbfaf03e8ee9f"
|
||||
integrity sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==
|
||||
dependencies:
|
||||
path-key "^3.1.0"
|
||||
shebang-command "^2.0.0"
|
||||
@@ -5709,9 +5709,9 @@ multicast-dns@^7.2.5:
|
||||
thunky "^1.0.2"
|
||||
|
||||
nanoid@^3.3.6:
|
||||
version "3.3.6"
|
||||
resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.6.tgz#443380c856d6e9f9824267d960b4236ad583ea4c"
|
||||
integrity sha512-BGcqMMJuToF7i1rt+2PWSNVnWIkGCU78jBG3RxO/bZlnZPK2Cmi2QaffxGO/2RvWi9sL+FAiRiXMgsyxQ1DIDA==
|
||||
version "3.3.8"
|
||||
resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.8.tgz#b1be3030bee36aaff18bacb375e5cce521684baf"
|
||||
integrity sha512-WNLf5Sd8oZxOm+TzppcYk8gVOgP+l58xNy58D0nbUnOxOWRWvlcCV4kUF7ltmI6PsrLl/BgKEyS4mqsGChFN0w==
|
||||
|
||||
negotiator@0.6.3:
|
||||
version "0.6.3"
|
||||
|
||||
Reference in New Issue
Block a user