diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index ae0843cee..86926ecc3 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -135,6 +135,7 @@ The repository uses pre-commit hooks for: - Ensure all tests pass before requesting review - Update documentation if adding new features - Follow the PR template in `.github/PULL_REQUEST_TEMPLATE.md` +- ALWAYS run `pre-commit run --all-files` before each commit to avoid formatting issues ## Project Structure diff --git a/flaml/automl/model.py b/flaml/automl/model.py index be99ad8b3..65ff77199 100644 --- a/flaml/automl/model.py +++ b/flaml/automl/model.py @@ -26,6 +26,13 @@ from sklearn.preprocessing import Normalizer from sklearn.svm import LinearSVC from xgboost import __version__ as xgboost_version +try: + from sklearn.utils._tags import ClassifierTags, RegressorTags + + SKLEARN_TAGS_AVAILABLE = True +except ImportError: + SKLEARN_TAGS_AVAILABLE = False + from flaml import tune from flaml.automl.data import group_counts from flaml.automl.spark import ERROR as SPARK_ERROR @@ -148,6 +155,25 @@ class BaseEstimator(sklearn.base.ClassifierMixin, sklearn.base.BaseEstimator): params["_estimator_type"] = self._estimator_type return params + def __sklearn_tags__(self): + """Override sklearn tags to respect the _estimator_type attribute. + + This is needed for sklearn 1.7+ which uses get_tags() instead of + checking _estimator_type directly. Since BaseEstimator inherits from + ClassifierMixin, it would otherwise always be tagged as a classifier. + """ + tags = super().__sklearn_tags__() + if hasattr(self, "_estimator_type") and SKLEARN_TAGS_AVAILABLE: + if self._estimator_type == "regressor": + tags.estimator_type = "regressor" + tags.regressor_tags = RegressorTags() + tags.classifier_tags = None + elif self._estimator_type == "classifier": + tags.estimator_type = "classifier" + tags.classifier_tags = ClassifierTags() + tags.regressor_tags = None + return tags + @property def classes_(self): return self._model.classes_ diff --git a/test/automl/test_sklearn_17_compat.py b/test/automl/test_sklearn_17_compat.py new file mode 100644 index 000000000..784eccb07 --- /dev/null +++ b/test/automl/test_sklearn_17_compat.py @@ -0,0 +1,89 @@ +"""Test sklearn 1.7+ compatibility for estimator type detection. + +This test ensures that FLAML estimators are properly recognized as +regressors or classifiers by sklearn's is_regressor() and is_classifier() +functions, which is required for sklearn 1.7+ ensemble methods. +""" + +import pytest +from sklearn.base import is_classifier, is_regressor + +from flaml.automl.model import ( + ExtraTreesEstimator, + LGBMEstimator, + RandomForestEstimator, + XGBoostSklearnEstimator, +) + + +def test_extra_trees_regressor_type(): + """Test that ExtraTreesEstimator with regression task is recognized as regressor.""" + est = ExtraTreesEstimator(task="regression") + assert is_regressor(est), "ExtraTreesEstimator(task='regression') should be recognized as a regressor" + assert not is_classifier(est), "ExtraTreesEstimator(task='regression') should not be recognized as a classifier" + + +def test_extra_trees_classifier_type(): + """Test that ExtraTreesEstimator with classification task is recognized as classifier.""" + est = ExtraTreesEstimator(task="binary") + assert is_classifier(est), "ExtraTreesEstimator(task='binary') should be recognized as a classifier" + assert not is_regressor(est), "ExtraTreesEstimator(task='binary') should not be recognized as a regressor" + + est = ExtraTreesEstimator(task="multiclass") + assert is_classifier(est), "ExtraTreesEstimator(task='multiclass') should be recognized as a classifier" + assert not is_regressor(est), "ExtraTreesEstimator(task='multiclass') should not be recognized as a regressor" + + +def test_random_forest_regressor_type(): + """Test that RandomForestEstimator with regression task is recognized as regressor.""" + est = RandomForestEstimator(task="regression") + assert is_regressor(est), "RandomForestEstimator(task='regression') should be recognized as a regressor" + assert not is_classifier(est), "RandomForestEstimator(task='regression') should not be recognized as a classifier" + + +def test_random_forest_classifier_type(): + """Test that RandomForestEstimator with classification task is recognized as classifier.""" + est = RandomForestEstimator(task="binary") + assert is_classifier(est), "RandomForestEstimator(task='binary') should be recognized as a classifier" + assert not is_regressor(est), "RandomForestEstimator(task='binary') should not be recognized as a regressor" + + +def test_lgbm_regressor_type(): + """Test that LGBMEstimator with regression task is recognized as regressor.""" + est = LGBMEstimator(task="regression") + assert is_regressor(est), "LGBMEstimator(task='regression') should be recognized as a regressor" + assert not is_classifier(est), "LGBMEstimator(task='regression') should not be recognized as a classifier" + + +def test_lgbm_classifier_type(): + """Test that LGBMEstimator with classification task is recognized as classifier.""" + est = LGBMEstimator(task="binary") + assert is_classifier(est), "LGBMEstimator(task='binary') should be recognized as a classifier" + assert not is_regressor(est), "LGBMEstimator(task='binary') should not be recognized as a regressor" + + +def test_xgboost_regressor_type(): + """Test that XGBoostSklearnEstimator with regression task is recognized as regressor.""" + est = XGBoostSklearnEstimator(task="regression") + assert is_regressor(est), "XGBoostSklearnEstimator(task='regression') should be recognized as a regressor" + assert not is_classifier(est), "XGBoostSklearnEstimator(task='regression') should not be recognized as a classifier" + + +def test_xgboost_classifier_type(): + """Test that XGBoostSklearnEstimator with classification task is recognized as classifier.""" + est = XGBoostSklearnEstimator(task="binary") + assert is_classifier(est), "XGBoostSklearnEstimator(task='binary') should be recognized as a classifier" + assert not is_regressor(est), "XGBoostSklearnEstimator(task='binary') should not be recognized as a regressor" + + +if __name__ == "__main__": + # Run all tests + test_extra_trees_regressor_type() + test_extra_trees_classifier_type() + test_random_forest_regressor_type() + test_random_forest_classifier_type() + test_lgbm_regressor_type() + test_lgbm_classifier_type() + test_xgboost_regressor_type() + test_xgboost_classifier_type() + print("All sklearn 1.7+ compatibility tests passed!")