mirror of
https://github.com/microsoft/FLAML.git
synced 2026-02-15 13:19:16 +08:00
Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2ba5f8bed1 | ||
|
|
d0a11958a5 | ||
|
|
0ef9b00a75 | ||
|
|
840f76e5e5 | ||
|
|
d8b7d25b80 | ||
|
|
6d53929803 |
2
.github/workflows/python-package.yml
vendored
2
.github/workflows/python-package.yml
vendored
@@ -85,7 +85,7 @@ jobs:
|
||||
- name: Test with pytest
|
||||
if: matrix.python-version != '3.10'
|
||||
run: |
|
||||
pytest test
|
||||
pytest test/
|
||||
- name: Coverage
|
||||
if: matrix.python-version == '3.10'
|
||||
run: |
|
||||
|
||||
@@ -9,6 +9,7 @@ import os
|
||||
import shutil
|
||||
import signal
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import warnings
|
||||
from contextlib import contextmanager
|
||||
@@ -89,21 +90,25 @@ def limit_resource(memory_limit, time_limit):
|
||||
except ValueError:
|
||||
# According to https://bugs.python.org/issue40518, it's a mac-specific error.
|
||||
pass
|
||||
main_thread = False
|
||||
if time_limit is not None:
|
||||
alarm_set = False
|
||||
if time_limit is not None and threading.current_thread() is threading.main_thread():
|
||||
try:
|
||||
signal.signal(signal.SIGALRM, TimeoutHandler)
|
||||
signal.alarm(int(time_limit) or 1)
|
||||
main_thread = True
|
||||
alarm_set = True
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
if main_thread:
|
||||
if alarm_set:
|
||||
signal.alarm(0)
|
||||
if memory_limit > 0:
|
||||
resource.setrlimit(resource.RLIMIT_AS, (soft, hard))
|
||||
try:
|
||||
resource.setrlimit(resource.RLIMIT_AS, (soft, hard))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
|
||||
class BaseEstimator:
|
||||
@@ -130,7 +135,7 @@ class BaseEstimator:
|
||||
self._task = task if isinstance(task, Task) else task_factory(task, None, None)
|
||||
self.params = self.config2params(config)
|
||||
self.estimator_class = self._model = None
|
||||
if "_estimator_type" in config:
|
||||
if "_estimator_type" in self.params:
|
||||
self._estimator_type = self.params.pop("_estimator_type")
|
||||
else:
|
||||
self._estimator_type = "classifier" if self._task.is_classification() else "regressor"
|
||||
@@ -1691,7 +1696,7 @@ class XGBoostEstimator(SKLearnEstimator):
|
||||
# use_label_encoder is deprecated in 1.7.
|
||||
if xgboost_version < "1.7.0":
|
||||
params["use_label_encoder"] = params.get("use_label_encoder", False)
|
||||
if "n_jobs" in config:
|
||||
if "n_jobs" in params:
|
||||
params["nthread"] = params.pop("n_jobs")
|
||||
return params
|
||||
|
||||
@@ -1891,7 +1896,7 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
|
||||
params = super().config2params(config)
|
||||
if "max_leaves" in params:
|
||||
params["max_leaf_nodes"] = params.get("max_leaf_nodes", params.pop("max_leaves"))
|
||||
if not self._task.is_classification() and "criterion" in config:
|
||||
if not self._task.is_classification() and "criterion" in params:
|
||||
params.pop("criterion")
|
||||
if "random_state" not in params:
|
||||
params["random_state"] = 12032022
|
||||
@@ -2344,7 +2349,7 @@ class SGDEstimator(SKLearnEstimator):
|
||||
params["loss"] = params.get("loss", None)
|
||||
if params["loss"] is None and self._task.is_classification():
|
||||
params["loss"] = "log_loss" if SKLEARN_VERSION >= "1.1" else "log"
|
||||
if not self._task.is_classification():
|
||||
if not self._task.is_classification() and "n_jobs" in params:
|
||||
params.pop("n_jobs")
|
||||
|
||||
if params.get("penalty") != "elasticnet":
|
||||
|
||||
@@ -769,10 +769,10 @@ class GenericTask(Task):
|
||||
if not is_spark_dataframe:
|
||||
y_train, y_val = y_train_split[train_index], y_train_split[val_index]
|
||||
if weight is not None:
|
||||
fit_kwargs["sample_weight"], weight_val = (
|
||||
weight[train_index],
|
||||
weight[val_index],
|
||||
fit_kwargs["sample_weight"] = (
|
||||
weight[train_index] if isinstance(weight, np.ndarray) else weight.iloc[train_index]
|
||||
)
|
||||
weight_val = weight[val_index] if isinstance(weight, np.ndarray) else weight.iloc[val_index]
|
||||
if groups is not None:
|
||||
fit_kwargs["groups"] = (
|
||||
groups[train_index] if isinstance(groups, np.ndarray) else groups.iloc[train_index]
|
||||
|
||||
@@ -197,9 +197,16 @@ def report(_metric=None, **kwargs):
|
||||
global _training_iteration
|
||||
if _use_ray:
|
||||
try:
|
||||
from ray import tune
|
||||
from ray import __version__ as ray_version
|
||||
|
||||
return tune.report(_metric, **kwargs)
|
||||
if ray_version.startswith("1."):
|
||||
from ray import tune
|
||||
|
||||
return tune.report(_metric, **kwargs)
|
||||
else: # ray>=2
|
||||
from ray.air import session
|
||||
|
||||
return session.report(metrics={"metric": _metric, **kwargs})
|
||||
except ImportError:
|
||||
# calling tune.report() outside tune.run()
|
||||
return
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "2.3.3"
|
||||
__version__ = "2.3.4"
|
||||
|
||||
@@ -143,4 +143,5 @@ def test_prep():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_lrl2()
|
||||
test_prep()
|
||||
@@ -1,4 +1,5 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.datasets import fetch_openml, load_iris
|
||||
from sklearn.metrics import accuracy_score
|
||||
from sklearn.model_selection import GroupKFold, KFold, train_test_split
|
||||
@@ -59,8 +60,6 @@ def test_groups_for_classification_task():
|
||||
|
||||
X, y = load_wine(return_X_y=True)
|
||||
|
||||
import numpy as np
|
||||
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
@@ -118,6 +117,43 @@ def test_groups_for_regression_task():
|
||||
automl.fit(X_train, y_train, **automl_settings)
|
||||
|
||||
|
||||
def test_groups_with_sample_weights():
|
||||
"""Verifies that sample weights can be used with group splits i.e. that https://github.com/microsoft/FLAML/issues/1396 remains fixed"""
|
||||
iris_dict_data = load_iris(as_frame=True) # numpy arrays
|
||||
iris_data = iris_dict_data["frame"] # pandas dataframe data + target
|
||||
iris_data["cluster"] = np.random.randint(0, 5, iris_data.shape[0])
|
||||
automl = AutoML()
|
||||
|
||||
X = iris_data[["sepal length (cm)", "sepal width (cm)", "petal length (cm)"]].to_numpy()
|
||||
y = iris_data["petal width (cm)"]
|
||||
sample_weight = pd.Series(np.random.rand(X.shape[0]))
|
||||
(
|
||||
X_train,
|
||||
X_test,
|
||||
y_train,
|
||||
y_test,
|
||||
groups_train,
|
||||
groups_test,
|
||||
sample_weight_train,
|
||||
sample_weight_test,
|
||||
) = train_test_split(X, y, iris_data["cluster"], sample_weight, random_state=42)
|
||||
automl_settings = {
|
||||
"max_iter": 5,
|
||||
"time_budget": -1,
|
||||
"metric": "r2",
|
||||
"task": "regression",
|
||||
"log_file_name": "error.log",
|
||||
"log_type": "all",
|
||||
"estimator_list": ["lgbm"],
|
||||
"eval_method": "cv",
|
||||
"split_type": "group",
|
||||
"groups": groups_train,
|
||||
"sample_weight": sample_weight_train,
|
||||
}
|
||||
automl.fit(X_train, y_train, **automl_settings)
|
||||
assert automl.model is not None
|
||||
|
||||
|
||||
def test_stratified_groupkfold():
|
||||
from minio.error import ServerError
|
||||
from sklearn.model_selection import StratifiedGroupKFold
|
||||
|
||||
@@ -59,6 +59,17 @@ def _test_hf_data():
|
||||
except requests.exceptions.ConnectionError:
|
||||
return
|
||||
|
||||
# Tests will only run if there is a GPU available
|
||||
try:
|
||||
import ray
|
||||
|
||||
pg = ray.util.placement_group([{"CPU": 1, "GPU": 1}])
|
||||
|
||||
if not pg.wait(timeout_seconds=10): # Wait 10 seconds for resources
|
||||
raise RuntimeError("No available node types can fulfill resource request!")
|
||||
except RuntimeError:
|
||||
return
|
||||
|
||||
custom_sent_keys = ["sentence1", "sentence2"]
|
||||
label_key = "label"
|
||||
|
||||
|
||||
@@ -7272,14 +7272,7 @@ send@0.19.0:
|
||||
range-parser "~1.2.1"
|
||||
statuses "2.0.1"
|
||||
|
||||
serialize-javascript@^6.0.0:
|
||||
version "6.0.0"
|
||||
resolved "https://registry.npmmirror.com/serialize-javascript/-/serialize-javascript-6.0.0.tgz#efae5d88f45d7924141da8b5c3a7a7e663fefeb8"
|
||||
integrity sha512-Qr3TosvguFt8ePWqsvRfrKyQXIiW+nGbYpy8XK24NQHE83caxWt+mIymTT19DGFbNWNLfEwsrkSmN64lVWB9ag==
|
||||
dependencies:
|
||||
randombytes "^2.1.0"
|
||||
|
||||
serialize-javascript@^6.0.1:
|
||||
serialize-javascript@^6.0.0, serialize-javascript@^6.0.1:
|
||||
version "6.0.2"
|
||||
resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-6.0.2.tgz#defa1e055c83bf6d59ea805d8da862254eb6a6c2"
|
||||
integrity sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==
|
||||
|
||||
Reference in New Issue
Block a user