Compare commits

..

7 Commits

Author SHA1 Message Date
Daniel Grindrod
c038fbca07 fix: KeyError no longer occurs when using groupfolds for regression tasks. (#1385)
* fix: Now resetting indexes for regression datasets when using group folds

* refactor: Simplified if statement to include all fold types

* docs: Updated docs to make it clear that group folds can be used for regression tasks

---------

Co-authored-by: Daniel Grindrod <daniel.grindrod@evotec.com>
Co-authored-by: Li Jiang <bnujli@gmail.com>
2024-12-18 10:06:58 +08:00
dependabot[bot]
6a99202492 Bump nanoid from 3.3.6 to 3.3.8 in /website (#1387)
Bumps [nanoid](https://github.com/ai/nanoid) from 3.3.6 to 3.3.8.
- [Release notes](https://github.com/ai/nanoid/releases)
- [Changelog](https://github.com/ai/nanoid/blob/main/CHANGELOG.md)
- [Commits](https://github.com/ai/nanoid/compare/3.3.6...3.3.8)

---
updated-dependencies:
- dependency-name: nanoid
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Li Jiang <bnujli@gmail.com>
2024-12-17 19:26:34 +08:00
Daniel Grindrod
42d1dcfa0e fix: Fixed bug with catboost and groups (#1383)
Co-authored-by: Daniel Grindrod <daniel.grindrod@evotec.com>
2024-12-17 13:54:49 +08:00
EgorKraevTransferwise
b83c8a7d3b Pass cost_attr and cost_budget from flaml.tune.run() to the search algo (#1382) 2024-12-04 20:50:15 +08:00
dependabot[bot]
b9194cdcf2 Bump cross-spawn from 7.0.3 to 7.0.6 in /website (#1379)
Bumps [cross-spawn](https://github.com/moxystudio/node-cross-spawn) from 7.0.3 to 7.0.6.
- [Changelog](https://github.com/moxystudio/node-cross-spawn/blob/master/CHANGELOG.md)
- [Commits](https://github.com/moxystudio/node-cross-spawn/compare/v7.0.3...v7.0.6)

---
updated-dependencies:
- dependency-name: cross-spawn
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-11-20 15:48:39 +08:00
Li Jiang
9a1f6b0291 Bump version to 2.3.3 (#1378) 2024-11-13 11:44:34 +08:00
kernelmethod
07f4413aae Fix logging nuisances that can arise when importing flaml (#1377) 2024-11-13 07:49:55 +08:00
9 changed files with 64 additions and 21 deletions

View File

@@ -1,4 +1,5 @@
import logging
import warnings
try:
from flaml.automl import AutoML, logger_formatter
@@ -12,7 +13,8 @@ from flaml.version import __version__
# Set the root logger.
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
if logger.level == logging.NOTSET:
logger.setLevel(logging.INFO)
if not has_automl:
logger.warning("flaml.automl is not available. Please install flaml[automl] to enable AutoML functionalities.")
warnings.warn("flaml.automl is not available. Please install flaml[automl] to enable AutoML functionalities.")

View File

@@ -203,7 +203,7 @@ class AutoML(BaseEstimator):
* Valid str options depend on different tasks.
For classification tasks, valid choices are
["auto", 'stratified', 'uniform', 'time', 'group']. "auto" -> stratified.
For regression tasks, valid choices are ["auto", 'uniform', 'time'].
For regression tasks, valid choices are ["auto", 'uniform', 'time', 'group'].
"auto" -> uniform.
For time series forecast tasks, must be "auto" or 'time'.
For ranking task, must be "auto" or 'group'.
@@ -739,7 +739,7 @@ class AutoML(BaseEstimator):
* Valid str options depend on different tasks.
For classification tasks, valid choices are
["auto", 'stratified', 'uniform', 'time', 'group']. "auto" -> stratified.
For regression tasks, valid choices are ["auto", 'uniform', 'time'].
For regression tasks, valid choices are ["auto", 'uniform', 'time', 'group'].
"auto" -> uniform.
For time series forecast tasks, must be "auto" or 'time'.
For ranking task, must be "auto" or 'group'.
@@ -1358,7 +1358,7 @@ class AutoML(BaseEstimator):
* Valid str options depend on different tasks.
For classification tasks, valid choices are
["auto", 'stratified', 'uniform', 'time', 'group']. "auto" -> stratified.
For regression tasks, valid choices are ["auto", 'uniform', 'time'].
For regression tasks, valid choices are ["auto", 'uniform', 'time', 'group'].
"auto" -> uniform.
For time series forecast tasks, must be "auto" or 'time'.
For ranking task, must be "auto" or 'group'.

View File

@@ -2066,8 +2066,8 @@ class CatBoostEstimator(BaseEstimator):
self.estimator_class = CatBoostRegressor
def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
if "is_retrain" in kwargs:
kwargs.pop("is_retrain")
kwargs.pop("is_retrain", None)
kwargs.pop("groups", None)
start_time = time.time()
deadline = start_time + budget if budget else np.inf
train_dir = f"catboost_{str(start_time)}"

View File

@@ -442,8 +442,8 @@ class GenericTask(Task):
X_train_all, y_train_all = shuffle(X_train_all, y_train_all, random_state=RANDOM_SEED)
if data_is_df:
X_train_all.reset_index(drop=True, inplace=True)
if isinstance(y_train_all, pd.Series):
y_train_all.reset_index(drop=True, inplace=True)
if isinstance(y_train_all, pd.Series):
y_train_all.reset_index(drop=True, inplace=True)
X_train, y_train = X_train_all, y_train_all
state.groups_all = state.groups

View File

@@ -192,7 +192,7 @@ class Task(ABC):
* Valid str options depend on different tasks.
For classification tasks, valid choices are
["auto", 'stratified', 'uniform', 'time', 'group']. "auto" -> stratified.
For regression tasks, valid choices are ["auto", 'uniform', 'time'].
For regression tasks, valid choices are ["auto", 'uniform', 'time', 'group'].
"auto" -> uniform.
For time series forecast tasks, must be "auto" or 'time'.
For ranking task, must be "auto" or 'group'.

View File

@@ -260,6 +260,8 @@ def run(
mlflow_exp_name: Optional[str] = None,
automl_info: Optional[Tuple[float]] = None,
extra_tag: Optional[dict] = None,
cost_attr: Optional[str] = "auto",
cost_budget: Optional[float] = None,
**ray_args,
):
"""The function-based way of performing HPO.
@@ -462,6 +464,12 @@ def run(
overwritten by the value of `n_concurrent_trials` in AutoML. When <= 0, the concurrent trials
will be set to the number of executors.
extra_tag: dict, default=None | Extra tags to be added to the mlflow runs created by autologging.
cost_attr: None or str to specify the attribute to evaluate the cost of different trials.
Default is "auto", which means that we will automatically choose the cost attribute to use (depending
on the nature of the resource budget). When cost_attr is set to None, cost differences between different trials will be omitted
in our search algorithm. When cost_attr is set to a str different from "auto" and "time_total_s",
this cost_attr must be available in the result dict of the trial.
cost_budget: A float of the cost budget. Only valid when cost_attr is a str different from "auto" and "time_total_s".
**ray_args: keyword arguments to pass to ray.tune.run().
Only valid when use_ray=True.
"""
@@ -600,6 +608,8 @@ def run(
metric_constraints=metric_constraints,
use_incumbent_result_in_evaluation=use_incumbent_result_in_evaluation,
lexico_objectives=lexico_objectives,
cost_attr=cost_attr,
cost_budget=cost_budget,
)
else:
if metric is None or mode is None:

View File

@@ -1 +1 @@
__version__ = "2.3.2"
__version__ = "2.3.3"

View File

@@ -1,4 +1,5 @@
from sklearn.datasets import fetch_openml
import numpy as np
from sklearn.datasets import fetch_openml, load_iris
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GroupKFold, KFold, train_test_split
@@ -48,7 +49,7 @@ def test_time():
_test(split_type="time")
def test_groups():
def test_groups_for_classification_task():
from sklearn.externals._arff import ArffException
try:
@@ -68,7 +69,7 @@ def test_groups():
"model_history": True,
"eval_method": "cv",
"groups": np.random.randint(low=0, high=10, size=len(y)),
"estimator_list": ["lgbm", "rf", "xgboost", "kneighbor"],
"estimator_list": ["catboost", "lgbm", "rf", "xgboost", "kneighbor"],
"learner_selector": "roundrobin",
}
automl.fit(X, y, **automl_settings)
@@ -88,6 +89,35 @@ def test_groups():
automl.fit(X, y, **automl_settings)
def test_groups_for_regression_task():
"""Append nonsensical groups to iris dataset and use it to test that GroupKFold works for regression tasks"""
iris_dict_data = load_iris(as_frame=True) # numpy arrays
iris_data = iris_dict_data["frame"] # pandas dataframe data + target
rng = np.random.default_rng(42)
iris_data["cluster"] = rng.integers(
low=0, high=5, size=iris_data.shape[0]
) # np.random.randint(0, 5, iris_data.shape[0])
automl = AutoML()
X = iris_data[["sepal length (cm)", "sepal width (cm)", "petal length (cm)"]].to_numpy()
y = iris_data["petal width (cm)"]
X_train, X_test, y_train, y_test, groups_train, groups_test = train_test_split(
X, y, iris_data["cluster"], random_state=42
)
automl_settings = {
"max_iter": 5,
"time_budget": -1,
"metric": "r2",
"task": "regression",
"estimator_list": ["lgbm", "rf", "xgboost", "kneighbor"],
"eval_method": "cv",
"split_type": "uniform",
"groups": groups_train,
}
automl.fit(X_train, y_train, **automl_settings)
def test_stratified_groupkfold():
from minio.error import ServerError
from sklearn.model_selection import StratifiedGroupKFold
@@ -108,6 +138,7 @@ def test_stratified_groupkfold():
"split_type": splitter,
"groups": X_train["Airline"],
"estimator_list": [
"catboost",
"lgbm",
"rf",
"xgboost",
@@ -203,4 +234,4 @@ def test_object():
if __name__ == "__main__":
test_groups()
test_groups_for_classification_task()

View File

@@ -3371,9 +3371,9 @@ cross-fetch@^3.1.5:
node-fetch "2.6.7"
cross-spawn@^7.0.3:
version "7.0.3"
resolved "https://registry.npmmirror.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6"
integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==
version "7.0.6"
resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.6.tgz#8a58fe78f00dcd70c370451759dfbfaf03e8ee9f"
integrity sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==
dependencies:
path-key "^3.1.0"
shebang-command "^2.0.0"
@@ -5709,9 +5709,9 @@ multicast-dns@^7.2.5:
thunky "^1.0.2"
nanoid@^3.3.6:
version "3.3.6"
resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.6.tgz#443380c856d6e9f9824267d960b4236ad583ea4c"
integrity sha512-BGcqMMJuToF7i1rt+2PWSNVnWIkGCU78jBG3RxO/bZlnZPK2Cmi2QaffxGO/2RvWi9sL+FAiRiXMgsyxQ1DIDA==
version "3.3.8"
resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.8.tgz#b1be3030bee36aaff18bacb375e5cce521684baf"
integrity sha512-WNLf5Sd8oZxOm+TzppcYk8gVOgP+l58xNy58D0nbUnOxOWRWvlcCV4kUF7ltmI6PsrLl/BgKEyS4mqsGChFN0w==
negotiator@0.6.3:
version "0.6.3"