Fix sklearn 1.7+ compatibility: BaseEstimator type detection for ensemble (#1512)

* Initial plan

* Fix ExtraTreesEstimator regression ensemble error with sklearn 1.7+

Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com>

* Address code review feedback: improve __sklearn_tags__ implementation

Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com>

* Fix format error

* Emphasize pre-commit

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com>
Co-authored-by: Li Jiang <lijiang1@microsoft.com>
This commit is contained in:
Copilot
2026-01-23 10:20:59 +08:00
committed by GitHub
parent cd0e9fb0d2
commit fc4efe3510
3 changed files with 116 additions and 0 deletions

View File

@@ -135,6 +135,7 @@ The repository uses pre-commit hooks for:
- Ensure all tests pass before requesting review - Ensure all tests pass before requesting review
- Update documentation if adding new features - Update documentation if adding new features
- Follow the PR template in `.github/PULL_REQUEST_TEMPLATE.md` - Follow the PR template in `.github/PULL_REQUEST_TEMPLATE.md`
- ALWAYS run `pre-commit run --all-files` before each commit to avoid formatting issues
## Project Structure ## Project Structure

View File

@@ -26,6 +26,13 @@ from sklearn.preprocessing import Normalizer
from sklearn.svm import LinearSVC from sklearn.svm import LinearSVC
from xgboost import __version__ as xgboost_version from xgboost import __version__ as xgboost_version
try:
from sklearn.utils._tags import ClassifierTags, RegressorTags
SKLEARN_TAGS_AVAILABLE = True
except ImportError:
SKLEARN_TAGS_AVAILABLE = False
from flaml import tune from flaml import tune
from flaml.automl.data import group_counts from flaml.automl.data import group_counts
from flaml.automl.spark import ERROR as SPARK_ERROR from flaml.automl.spark import ERROR as SPARK_ERROR
@@ -148,6 +155,25 @@ class BaseEstimator(sklearn.base.ClassifierMixin, sklearn.base.BaseEstimator):
params["_estimator_type"] = self._estimator_type params["_estimator_type"] = self._estimator_type
return params return params
def __sklearn_tags__(self):
"""Override sklearn tags to respect the _estimator_type attribute.
This is needed for sklearn 1.7+ which uses get_tags() instead of
checking _estimator_type directly. Since BaseEstimator inherits from
ClassifierMixin, it would otherwise always be tagged as a classifier.
"""
tags = super().__sklearn_tags__()
if hasattr(self, "_estimator_type") and SKLEARN_TAGS_AVAILABLE:
if self._estimator_type == "regressor":
tags.estimator_type = "regressor"
tags.regressor_tags = RegressorTags()
tags.classifier_tags = None
elif self._estimator_type == "classifier":
tags.estimator_type = "classifier"
tags.classifier_tags = ClassifierTags()
tags.regressor_tags = None
return tags
@property @property
def classes_(self): def classes_(self):
return self._model.classes_ return self._model.classes_

View File

@@ -0,0 +1,89 @@
"""Test sklearn 1.7+ compatibility for estimator type detection.
This test ensures that FLAML estimators are properly recognized as
regressors or classifiers by sklearn's is_regressor() and is_classifier()
functions, which is required for sklearn 1.7+ ensemble methods.
"""
import pytest
from sklearn.base import is_classifier, is_regressor
from flaml.automl.model import (
ExtraTreesEstimator,
LGBMEstimator,
RandomForestEstimator,
XGBoostSklearnEstimator,
)
def test_extra_trees_regressor_type():
"""Test that ExtraTreesEstimator with regression task is recognized as regressor."""
est = ExtraTreesEstimator(task="regression")
assert is_regressor(est), "ExtraTreesEstimator(task='regression') should be recognized as a regressor"
assert not is_classifier(est), "ExtraTreesEstimator(task='regression') should not be recognized as a classifier"
def test_extra_trees_classifier_type():
"""Test that ExtraTreesEstimator with classification task is recognized as classifier."""
est = ExtraTreesEstimator(task="binary")
assert is_classifier(est), "ExtraTreesEstimator(task='binary') should be recognized as a classifier"
assert not is_regressor(est), "ExtraTreesEstimator(task='binary') should not be recognized as a regressor"
est = ExtraTreesEstimator(task="multiclass")
assert is_classifier(est), "ExtraTreesEstimator(task='multiclass') should be recognized as a classifier"
assert not is_regressor(est), "ExtraTreesEstimator(task='multiclass') should not be recognized as a regressor"
def test_random_forest_regressor_type():
"""Test that RandomForestEstimator with regression task is recognized as regressor."""
est = RandomForestEstimator(task="regression")
assert is_regressor(est), "RandomForestEstimator(task='regression') should be recognized as a regressor"
assert not is_classifier(est), "RandomForestEstimator(task='regression') should not be recognized as a classifier"
def test_random_forest_classifier_type():
"""Test that RandomForestEstimator with classification task is recognized as classifier."""
est = RandomForestEstimator(task="binary")
assert is_classifier(est), "RandomForestEstimator(task='binary') should be recognized as a classifier"
assert not is_regressor(est), "RandomForestEstimator(task='binary') should not be recognized as a regressor"
def test_lgbm_regressor_type():
"""Test that LGBMEstimator with regression task is recognized as regressor."""
est = LGBMEstimator(task="regression")
assert is_regressor(est), "LGBMEstimator(task='regression') should be recognized as a regressor"
assert not is_classifier(est), "LGBMEstimator(task='regression') should not be recognized as a classifier"
def test_lgbm_classifier_type():
"""Test that LGBMEstimator with classification task is recognized as classifier."""
est = LGBMEstimator(task="binary")
assert is_classifier(est), "LGBMEstimator(task='binary') should be recognized as a classifier"
assert not is_regressor(est), "LGBMEstimator(task='binary') should not be recognized as a regressor"
def test_xgboost_regressor_type():
"""Test that XGBoostSklearnEstimator with regression task is recognized as regressor."""
est = XGBoostSklearnEstimator(task="regression")
assert is_regressor(est), "XGBoostSklearnEstimator(task='regression') should be recognized as a regressor"
assert not is_classifier(est), "XGBoostSklearnEstimator(task='regression') should not be recognized as a classifier"
def test_xgboost_classifier_type():
"""Test that XGBoostSklearnEstimator with classification task is recognized as classifier."""
est = XGBoostSklearnEstimator(task="binary")
assert is_classifier(est), "XGBoostSklearnEstimator(task='binary') should be recognized as a classifier"
assert not is_regressor(est), "XGBoostSklearnEstimator(task='binary') should not be recognized as a regressor"
if __name__ == "__main__":
# Run all tests
test_extra_trees_regressor_type()
test_extra_trees_classifier_type()
test_random_forest_regressor_type()
test_random_forest_classifier_type()
test_lgbm_regressor_type()
test_lgbm_classifier_type()
test_xgboost_regressor_type()
test_xgboost_classifier_type()
print("All sklearn 1.7+ compatibility tests passed!")