mirror of
https://github.com/microsoft/FLAML.git
synced 2026-02-09 02:09:16 +08:00
Fix sklearn 1.7+ compatibility: BaseEstimator type detection for ensemble (#1512)
* Initial plan * Fix ExtraTreesEstimator regression ensemble error with sklearn 1.7+ Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com> * Address code review feedback: improve __sklearn_tags__ implementation Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com> * Fix format error * Emphasize pre-commit --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com> Co-authored-by: Li Jiang <lijiang1@microsoft.com>
This commit is contained in:
1
.github/copilot-instructions.md
vendored
1
.github/copilot-instructions.md
vendored
@@ -135,6 +135,7 @@ The repository uses pre-commit hooks for:
|
|||||||
- Ensure all tests pass before requesting review
|
- Ensure all tests pass before requesting review
|
||||||
- Update documentation if adding new features
|
- Update documentation if adding new features
|
||||||
- Follow the PR template in `.github/PULL_REQUEST_TEMPLATE.md`
|
- Follow the PR template in `.github/PULL_REQUEST_TEMPLATE.md`
|
||||||
|
- ALWAYS run `pre-commit run --all-files` before each commit to avoid formatting issues
|
||||||
|
|
||||||
## Project Structure
|
## Project Structure
|
||||||
|
|
||||||
|
|||||||
@@ -26,6 +26,13 @@ from sklearn.preprocessing import Normalizer
|
|||||||
from sklearn.svm import LinearSVC
|
from sklearn.svm import LinearSVC
|
||||||
from xgboost import __version__ as xgboost_version
|
from xgboost import __version__ as xgboost_version
|
||||||
|
|
||||||
|
try:
|
||||||
|
from sklearn.utils._tags import ClassifierTags, RegressorTags
|
||||||
|
|
||||||
|
SKLEARN_TAGS_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
SKLEARN_TAGS_AVAILABLE = False
|
||||||
|
|
||||||
from flaml import tune
|
from flaml import tune
|
||||||
from flaml.automl.data import group_counts
|
from flaml.automl.data import group_counts
|
||||||
from flaml.automl.spark import ERROR as SPARK_ERROR
|
from flaml.automl.spark import ERROR as SPARK_ERROR
|
||||||
@@ -148,6 +155,25 @@ class BaseEstimator(sklearn.base.ClassifierMixin, sklearn.base.BaseEstimator):
|
|||||||
params["_estimator_type"] = self._estimator_type
|
params["_estimator_type"] = self._estimator_type
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
def __sklearn_tags__(self):
|
||||||
|
"""Override sklearn tags to respect the _estimator_type attribute.
|
||||||
|
|
||||||
|
This is needed for sklearn 1.7+ which uses get_tags() instead of
|
||||||
|
checking _estimator_type directly. Since BaseEstimator inherits from
|
||||||
|
ClassifierMixin, it would otherwise always be tagged as a classifier.
|
||||||
|
"""
|
||||||
|
tags = super().__sklearn_tags__()
|
||||||
|
if hasattr(self, "_estimator_type") and SKLEARN_TAGS_AVAILABLE:
|
||||||
|
if self._estimator_type == "regressor":
|
||||||
|
tags.estimator_type = "regressor"
|
||||||
|
tags.regressor_tags = RegressorTags()
|
||||||
|
tags.classifier_tags = None
|
||||||
|
elif self._estimator_type == "classifier":
|
||||||
|
tags.estimator_type = "classifier"
|
||||||
|
tags.classifier_tags = ClassifierTags()
|
||||||
|
tags.regressor_tags = None
|
||||||
|
return tags
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def classes_(self):
|
def classes_(self):
|
||||||
return self._model.classes_
|
return self._model.classes_
|
||||||
|
|||||||
89
test/automl/test_sklearn_17_compat.py
Normal file
89
test/automl/test_sklearn_17_compat.py
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
"""Test sklearn 1.7+ compatibility for estimator type detection.
|
||||||
|
|
||||||
|
This test ensures that FLAML estimators are properly recognized as
|
||||||
|
regressors or classifiers by sklearn's is_regressor() and is_classifier()
|
||||||
|
functions, which is required for sklearn 1.7+ ensemble methods.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from sklearn.base import is_classifier, is_regressor
|
||||||
|
|
||||||
|
from flaml.automl.model import (
|
||||||
|
ExtraTreesEstimator,
|
||||||
|
LGBMEstimator,
|
||||||
|
RandomForestEstimator,
|
||||||
|
XGBoostSklearnEstimator,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_extra_trees_regressor_type():
|
||||||
|
"""Test that ExtraTreesEstimator with regression task is recognized as regressor."""
|
||||||
|
est = ExtraTreesEstimator(task="regression")
|
||||||
|
assert is_regressor(est), "ExtraTreesEstimator(task='regression') should be recognized as a regressor"
|
||||||
|
assert not is_classifier(est), "ExtraTreesEstimator(task='regression') should not be recognized as a classifier"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extra_trees_classifier_type():
|
||||||
|
"""Test that ExtraTreesEstimator with classification task is recognized as classifier."""
|
||||||
|
est = ExtraTreesEstimator(task="binary")
|
||||||
|
assert is_classifier(est), "ExtraTreesEstimator(task='binary') should be recognized as a classifier"
|
||||||
|
assert not is_regressor(est), "ExtraTreesEstimator(task='binary') should not be recognized as a regressor"
|
||||||
|
|
||||||
|
est = ExtraTreesEstimator(task="multiclass")
|
||||||
|
assert is_classifier(est), "ExtraTreesEstimator(task='multiclass') should be recognized as a classifier"
|
||||||
|
assert not is_regressor(est), "ExtraTreesEstimator(task='multiclass') should not be recognized as a regressor"
|
||||||
|
|
||||||
|
|
||||||
|
def test_random_forest_regressor_type():
|
||||||
|
"""Test that RandomForestEstimator with regression task is recognized as regressor."""
|
||||||
|
est = RandomForestEstimator(task="regression")
|
||||||
|
assert is_regressor(est), "RandomForestEstimator(task='regression') should be recognized as a regressor"
|
||||||
|
assert not is_classifier(est), "RandomForestEstimator(task='regression') should not be recognized as a classifier"
|
||||||
|
|
||||||
|
|
||||||
|
def test_random_forest_classifier_type():
|
||||||
|
"""Test that RandomForestEstimator with classification task is recognized as classifier."""
|
||||||
|
est = RandomForestEstimator(task="binary")
|
||||||
|
assert is_classifier(est), "RandomForestEstimator(task='binary') should be recognized as a classifier"
|
||||||
|
assert not is_regressor(est), "RandomForestEstimator(task='binary') should not be recognized as a regressor"
|
||||||
|
|
||||||
|
|
||||||
|
def test_lgbm_regressor_type():
|
||||||
|
"""Test that LGBMEstimator with regression task is recognized as regressor."""
|
||||||
|
est = LGBMEstimator(task="regression")
|
||||||
|
assert is_regressor(est), "LGBMEstimator(task='regression') should be recognized as a regressor"
|
||||||
|
assert not is_classifier(est), "LGBMEstimator(task='regression') should not be recognized as a classifier"
|
||||||
|
|
||||||
|
|
||||||
|
def test_lgbm_classifier_type():
|
||||||
|
"""Test that LGBMEstimator with classification task is recognized as classifier."""
|
||||||
|
est = LGBMEstimator(task="binary")
|
||||||
|
assert is_classifier(est), "LGBMEstimator(task='binary') should be recognized as a classifier"
|
||||||
|
assert not is_regressor(est), "LGBMEstimator(task='binary') should not be recognized as a regressor"
|
||||||
|
|
||||||
|
|
||||||
|
def test_xgboost_regressor_type():
|
||||||
|
"""Test that XGBoostSklearnEstimator with regression task is recognized as regressor."""
|
||||||
|
est = XGBoostSklearnEstimator(task="regression")
|
||||||
|
assert is_regressor(est), "XGBoostSklearnEstimator(task='regression') should be recognized as a regressor"
|
||||||
|
assert not is_classifier(est), "XGBoostSklearnEstimator(task='regression') should not be recognized as a classifier"
|
||||||
|
|
||||||
|
|
||||||
|
def test_xgboost_classifier_type():
|
||||||
|
"""Test that XGBoostSklearnEstimator with classification task is recognized as classifier."""
|
||||||
|
est = XGBoostSklearnEstimator(task="binary")
|
||||||
|
assert is_classifier(est), "XGBoostSklearnEstimator(task='binary') should be recognized as a classifier"
|
||||||
|
assert not is_regressor(est), "XGBoostSklearnEstimator(task='binary') should not be recognized as a regressor"
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Run all tests
|
||||||
|
test_extra_trees_regressor_type()
|
||||||
|
test_extra_trees_classifier_type()
|
||||||
|
test_random_forest_regressor_type()
|
||||||
|
test_random_forest_classifier_type()
|
||||||
|
test_lgbm_regressor_type()
|
||||||
|
test_lgbm_classifier_type()
|
||||||
|
test_xgboost_regressor_type()
|
||||||
|
test_xgboost_classifier_type()
|
||||||
|
print("All sklearn 1.7+ compatibility tests passed!")
|
||||||
Reference in New Issue
Block a user