diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 38b98ff3d..9e88a4e8d 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -22,8 +22,12 @@ on: - 'setup.py' merge_group: types: [checks_requested] + schedule: + # Every other day at 02:00 UTC + - cron: '0 2 */2 * *' -permissions: {} +permissions: + contents: write concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }} cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} @@ -36,7 +40,10 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.10", "3.11"] + exclude: + - os: macos-latest + python-version: "3.10" steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} @@ -44,7 +51,7 @@ jobs: with: python-version: ${{ matrix.python-version }} - name: On mac, install libomp to facilitate lgbm and xgboost install - if: matrix.os == 'macOS-latest' + if: matrix.os == 'macos-latest' run: | brew update brew install libomp @@ -70,33 +77,43 @@ jobs: run: | pip install pyspark==3.5.1 pip list | grep "pyspark" - - name: If linux and python<3.11, install ray 2 - if: matrix.os == 'ubuntu-latest' && matrix.python-version != '3.11' + - name: On Ubuntu python 3.12, install pyspark 4.0.1 + if: matrix.python-version == '3.12' && matrix.os == 'ubuntu-latest' run: | - pip install "ray[tune]<2.5.0" - - name: If mac and python 3.10, install ray and xgboost 1 - if: matrix.os == 'macOS-latest' && matrix.python-version == '3.10' - run: | - pip install -e .[ray] - # use macOS to test xgboost 1, but macOS also supports xgboost 2 - pip install "xgboost<2" - - name: If linux, install prophet on python < 3.9 - if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.8' + pip install pyspark==4.0.1 + pip list | grep "pyspark" + # # TODO: support ray + # - name: If linux and python<3.11, install ray 2 + # if: matrix.os == 'ubuntu-latest' && matrix.python-version < '3.11' + # run: | + # pip install "ray[tune]<2.5.0" + - name: Install prophet when on linux + if: matrix.os == 'ubuntu-latest' run: | pip install -e .[forecast] - - name: Install vw on python < 3.10 - if: matrix.python-version == '3.8' || matrix.python-version == '3.9' + # TODO: support vw for python 3.10+ + - name: If linux and python<3.10, install vw + if: matrix.os == 'ubuntu-latest' && matrix.python-version < '3.10' run: | pip install -e .[vw] + - name: Pip freeze + run: | + pip freeze + - name: Check dependencies + run: | + python test/check_dependency.py + - name: Clear pip cache + run: | + pip cache purge - name: Test with pytest if: matrix.python-version != '3.10' run: | - pytest test/ --ignore=test/autogen + pytest test/ --ignore=test/autogen --reruns 2 --reruns-delay 10 - name: Coverage if: matrix.python-version == '3.10' run: | pip install coverage - coverage run -a -m pytest test --ignore=test/autogen + coverage run -a -m pytest test --ignore=test/autogen --reruns 2 --reruns-delay 10 coverage xml - name: Upload coverage to Codecov if: matrix.python-version == '3.10' @@ -104,28 +121,24 @@ jobs: with: file: ./coverage.xml flags: unittests + - name: Save dependencies + shell: bash + run: | + git config --global user.name 'github-actions[bot]' + git config --global user.email 'github-actions[bot]@users.noreply.github.com' + git config advice.addIgnoredFile false - # docs: + BRANCH=unit-tests-installed-dependencies + git fetch origin + git checkout -B "$BRANCH" + if git show-ref --verify --quiet "refs/remotes/origin/$BRANCH"; then + git rebase "origin/$BRANCH" + fi - # runs-on: ubuntu-latest - - # steps: - # - uses: actions/checkout@v3 - # - name: Setup Python - # uses: actions/setup-python@v4 - # with: - # python-version: '3.8' - # - name: Compile documentation - # run: | - # pip install -e . - # python -m pip install sphinx sphinx_rtd_theme - # cd docs - # make html - # - name: Deploy to GitHub pages - # if: ${{ github.ref == 'refs/heads/main' }} - # uses: JamesIves/github-pages-deploy-action@3.6.2 - # with: - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # BRANCH: gh-pages - # FOLDER: docs/_build/html - # CLEAN: true + pip freeze > installed_all_dependencies_${{ matrix.python-version }}_${{ matrix.os }}.txt + python test/check_dependency.py > installed_first_tier_dependencies_${{ matrix.python-version }}_${{ matrix.os }}.txt + git add installed_*dependencies*.txt + mv coverage.xml ./coverage_${{ matrix.python-version }}_${{ matrix.os }}.xml || true + git add -f ./coverage_${{ matrix.python-version }}_${{ matrix.os }}.xml || true + git commit -m "Update installed dependencies for Python ${{ matrix.python-version }} on ${{ matrix.os }}" || exit 0 + git push origin "$BRANCH" diff --git a/.gitignore b/.gitignore index 8a3365b20..18c858dad 100644 --- a/.gitignore +++ b/.gitignore @@ -172,7 +172,7 @@ test/default test/housing.json test/nlp/default/transformer_ms/seq-classification.json -flaml/fabric/fanova/_fanova.c +flaml/fabric/fanova/*fanova.c # local config files *.config.local @@ -184,3 +184,7 @@ notebook/lightning_logs/ lightning_logs/ flaml/autogen/extensions/tmp/ test/autogen/my_tmp/ +catboost_* + +# Internal configs +.pypirc diff --git a/README.md b/README.md index fcd62bd42..30a2b1457 100644 --- a/README.md +++ b/README.md @@ -14,15 +14,9 @@

-:fire: FLAML supports AutoML and Hyperparameter Tuning in [Microsoft Fabric Data Science](https://learn.microsoft.com/en-us/fabric/data-science/automated-machine-learning-fabric). In addition, we've introduced Python 3.11 support, along with a range of new estimators, and comprehensive integration with MLflow—thanks to contributions from the Microsoft Fabric product team. +:fire: FLAML supports AutoML and Hyperparameter Tuning in [Microsoft Fabric Data Science](https://learn.microsoft.com/en-us/fabric/data-science/automated-machine-learning-fabric). In addition, we've introduced Python 3.11 and 3.12 support, along with a range of new estimators, and comprehensive integration with MLflow—thanks to contributions from the Microsoft Fabric product team. -:fire: Heads-up: We have migrated [AutoGen](https://microsoft.github.io/autogen/) into a dedicated [github repository](https://github.com/microsoft/autogen). Alongside this move, we have also launched a dedicated [Discord](https://discord.gg/pAbnFJrkgZ) server and a [website](https://microsoft.github.io/autogen/) for comprehensive documentation. - -:fire: The automated multi-agent chat framework in [AutoGen](https://microsoft.github.io/autogen/) is in preview from v2.0.0. - -:fire: FLAML is highlighted in OpenAI's [cookbook](https://github.com/openai/openai-cookbook#related-resources-from-around-the-web). - -:fire: [autogen](https://microsoft.github.io/autogen/) is released with support for ChatGPT and GPT-4, based on [Cost-Effective Hyperparameter Optimization for Large Language Model Generation Inference](https://arxiv.org/abs/2303.04673). +:fire: Heads-up: [AutoGen](https://microsoft.github.io/autogen/) has moved to a dedicated [GitHub repository](https://github.com/microsoft/autogen). FLAML no longer includes the `autogen` module—please use AutoGen directly. ## What is FLAML @@ -30,7 +24,7 @@ FLAML is a lightweight Python library for efficient automation of machine learning and AI operations. It automates workflow based on large language models, machine learning models, etc. and optimizes their performance. -- FLAML enables building next-gen GPT-X applications based on multi-agent conversations with minimal effort. It simplifies the orchestration, automation and optimization of a complex GPT-X workflow. It maximizes the performance of GPT-X models and augments their weakness. +- FLAML enables economical automation and tuning for ML/AI workflows, including model selection and hyperparameter optimization under resource constraints. - For common machine learning tasks like classification and regression, it quickly finds quality models for user-provided data with low computational resources. It is easy to customize or extend. Users can find their desired customizability from a smooth range. - It supports fast and economical automatic tuning (e.g., inference hyperparameters for foundation models, configurations in MLOps/LMOps workflows, pipelines, mathematical/statistical models, algorithms, computing experiments, software configurations), capable of handling large search space with heterogeneous evaluation cost and complex constraints/guidance/early stopping. @@ -46,10 +40,10 @@ FLAML requires **Python version >= 3.9**. It can be installed from pip: pip install flaml ``` -Minimal dependencies are installed without extra options. You can install extra options based on the feature you need. For example, use the following to install the dependencies needed by the [`autogen`](https://microsoft.github.io/autogen/) package. +Minimal dependencies are installed without extra options. You can install extra options based on the feature you need. For example, use the following to install the dependencies needed by the [`automl`](https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML) module. ```bash -pip install "flaml[autogen]" +pip install "flaml[automl]" ``` Find more options in [Installation](https://microsoft.github.io/FLAML/docs/Installation). @@ -57,39 +51,6 @@ Each of the [`notebook examples`](https://github.com/microsoft/FLAML/tree/main/n ## Quickstart -- (New) The [autogen](https://microsoft.github.io/autogen/) package enables the next-gen GPT-X applications with a generic multi-agent conversation framework. - It offers customizable and conversable agents which integrate LLMs, tools and human. - By automating chat among multiple capable agents, one can easily make them collectively perform tasks autonomously or with human feedback, including tasks that require using tools via code. For example, - -```python -from flaml import autogen - -assistant = autogen.AssistantAgent("assistant") -user_proxy = autogen.UserProxyAgent("user_proxy") -user_proxy.initiate_chat( - assistant, - message="Show me the YTD gain of 10 largest technology companies as of today.", -) -# This initiates an automated chat between the two agents to solve the task -``` - -Autogen also helps maximize the utility out of the expensive LLMs such as ChatGPT and GPT-4. It offers a drop-in replacement of `openai.Completion` or `openai.ChatCompletion` with powerful functionalites like tuning, caching, templating, filtering. For example, you can optimize generations by LLM with your own tuning data, success metrics and budgets. - -```python -# perform tuning -config, analysis = autogen.Completion.tune( - data=tune_data, - metric="success", - mode="max", - eval_func=eval_func, - inference_budget=0.05, - optimization_budget=3, - num_samples=-1, -) -# perform inference for a test instance -response = autogen.Completion.create(context=test_instance, **config) -``` - - With three lines of code, you can start using this economical and fast AutoML engine as a [scikit-learn style estimator](https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML). diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py index 1f6fdadc5..c4c6e2dbb 100644 --- a/flaml/automl/automl.py +++ b/flaml/automl/automl.py @@ -401,6 +401,24 @@ class AutoML(BaseEstimator): self._estimator_type = "classifier" if settings["task"] in CLASSIFICATION else "regressor" self.best_run_id = None + def __getstate__(self): + """Customize pickling to avoid serializing runtime-only objects. + + MLflow's sklearn flavor serializes estimators via (cloud)pickle. During + AutoML fitting we may attach an internal mlflow integration instance + which holds `concurrent.futures.Future` objects and executors containing + thread locks, which are not picklable. + """ + + state = self.__dict__.copy() + state.pop("mlflow_integration", None) + return state + + def __setstate__(self, state): + self.__dict__.update(state) + # Ensure attribute exists post-unpickle. + self.mlflow_integration = None + def get_params(self, deep: bool = False) -> dict: return self._settings.copy() diff --git a/flaml/automl/data.py b/flaml/automl/data.py index 4c473963f..096ba46d9 100644 --- a/flaml/automl/data.py +++ b/flaml/automl/data.py @@ -50,7 +50,10 @@ def load_openml_dataset(dataset_id, data_dir=None, random_state=0, dataset_forma """ import pickle - import openml + try: + import openml + except ImportError: + openml = None from sklearn.model_selection import train_test_split filename = "openml_ds" + str(dataset_id) + ".pkl" @@ -61,15 +64,15 @@ def load_openml_dataset(dataset_id, data_dir=None, random_state=0, dataset_forma dataset = pickle.load(f) else: print("download dataset from openml") - dataset = openml.datasets.get_dataset(dataset_id) + dataset = openml.datasets.get_dataset(dataset_id) if openml else None if not os.path.exists(data_dir): os.makedirs(data_dir) with open(filepath, "wb") as f: pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL) - print("Dataset name:", dataset.name) + print("Dataset name:", dataset.name) if dataset else None try: X, y, *__ = dataset.get_data(target=dataset.default_target_attribute, dataset_format=dataset_format) - except ValueError: + except (ValueError, AttributeError, TypeError): from sklearn.datasets import fetch_openml X, y = fetch_openml(data_id=dataset_id, return_X_y=True) diff --git a/flaml/automl/ml.py b/flaml/automl/ml.py index bd13d8259..8ba05120e 100644 --- a/flaml/automl/ml.py +++ b/flaml/automl/ml.py @@ -127,9 +127,21 @@ def metric_loss_score( import datasets datasets_metric_name = huggingface_submetric_to_metric.get(metric_name, metric_name.split(":")[0]) - metric = datasets.load_metric(datasets_metric_name, trust_remote_code=True) metric_mode = huggingface_metric_to_mode[datasets_metric_name] + # datasets>=3 removed load_metric; prefer evaluate if available + try: + import evaluate + + metric = evaluate.load(datasets_metric_name, trust_remote_code=True) + except Exception: + if hasattr(datasets, "load_metric"): + metric = datasets.load_metric(datasets_metric_name, trust_remote_code=True) + else: + from datasets import load_metric as _load_metric # older datasets + + metric = _load_metric(datasets_metric_name, trust_remote_code=True) + if metric_name.startswith("seqeval"): y_processed_true = [[labels[tr] for tr in each_list] for each_list in y_processed_true] elif metric in ("pearsonr", "spearmanr"): diff --git a/flaml/automl/model.py b/flaml/automl/model.py index 298f3cab8..53a92ece2 100644 --- a/flaml/automl/model.py +++ b/flaml/automl/model.py @@ -111,7 +111,7 @@ def limit_resource(memory_limit, time_limit): pass -class BaseEstimator: +class BaseEstimator(sklearn.base.ClassifierMixin, sklearn.base.BaseEstimator): """The abstract class for all learners. Typical examples: diff --git a/flaml/automl/nlp/huggingface/training_args.py b/flaml/automl/nlp/huggingface/training_args.py index 6a408b1a2..383fc9f39 100644 --- a/flaml/automl/nlp/huggingface/training_args.py +++ b/flaml/automl/nlp/huggingface/training_args.py @@ -77,6 +77,14 @@ class TrainingArgumentsForAuto(TrainingArguments): logging_steps: int = field(default=500, metadata={"help": "Log every X updates steps."}) + # Newer versions of HuggingFace Transformers may access `TrainingArguments.generation_config` + # (e.g., in generation-aware trainers/callbacks). Keep this attribute to remain compatible + # while defaulting to None for non-generation tasks. + generation_config: Optional[object] = field( + default=None, + metadata={"help": "Optional generation config (or path) used by generation-aware trainers."}, + ) + @staticmethod def load_args_from_console(): from dataclasses import fields diff --git a/flaml/automl/time_series/tft.py b/flaml/automl/time_series/tft.py index c9ab30be1..fb660ce43 100644 --- a/flaml/automl/time_series/tft.py +++ b/flaml/automl/time_series/tft.py @@ -1,3 +1,4 @@ +import inspect import time try: @@ -106,12 +107,17 @@ class TemporalFusionTransformerEstimator(TimeSeriesEstimator): def fit(self, X_train, y_train, budget=None, **kwargs): import warnings - import pytorch_lightning as pl + try: + import lightning.pytorch as pl + from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor + from lightning.pytorch.loggers import TensorBoardLogger + except ImportError: + import pytorch_lightning as pl + from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor + from pytorch_lightning.loggers import TensorBoardLogger import torch from pytorch_forecasting import TemporalFusionTransformer from pytorch_forecasting.metrics import QuantileLoss - from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor - from pytorch_lightning.loggers import TensorBoardLogger # a bit of monkey patching to fix the MacOS test # all the log_prediction method appears to do is plot stuff, which ?breaks github tests @@ -132,12 +138,26 @@ class TemporalFusionTransformerEstimator(TimeSeriesEstimator): lr_logger = LearningRateMonitor() # log the learning rate logger = TensorBoardLogger(kwargs.get("log_dir", "lightning_logs")) # logging results to a tensorboard default_trainer_kwargs = dict( - gpus=self._kwargs.get("gpu_per_trial", [0]) if torch.cuda.is_available() else None, max_epochs=max_epochs, gradient_clip_val=gradient_clip_val, callbacks=[lr_logger, early_stop_callback], logger=logger, ) + + # PyTorch Lightning >=2.0 replaced `gpus` with `accelerator`/`devices`. + # Also, passing `gpus=None` is not accepted on newer versions. + trainer_sig_params = inspect.signature(pl.Trainer.__init__).parameters + if torch.cuda.is_available() and "gpus" in trainer_sig_params: + gpus = self._kwargs.get("gpu_per_trial", None) + if gpus is not None: + default_trainer_kwargs["gpus"] = gpus + elif torch.cuda.is_available() and "devices" in trainer_sig_params: + devices = self._kwargs.get("gpu_per_trial", None) + if devices == -1: + devices = "auto" + if devices is not None: + default_trainer_kwargs["accelerator"] = "gpu" + default_trainer_kwargs["devices"] = devices trainer = pl.Trainer( **default_trainer_kwargs, ) @@ -157,7 +177,14 @@ class TemporalFusionTransformerEstimator(TimeSeriesEstimator): val_dataloaders=val_dataloader, ) best_model_path = trainer.checkpoint_callback.best_model_path - best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path) + # PyTorch 2.6 changed `torch.load` default `weights_only` from False -> True. + # Some Lightning checkpoints (including those produced here) can require full unpickling. + # This path is generated locally during training, so it's trusted. + load_sig_params = inspect.signature(TemporalFusionTransformer.load_from_checkpoint).parameters + if "weights_only" in load_sig_params: + best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path, weights_only=False) + else: + best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path) train_time = time.time() - current_time self._model = best_tft return train_time diff --git a/flaml/automl/time_series/ts_data.py b/flaml/automl/time_series/ts_data.py index 2587a70e7..0c7d25558 100644 --- a/flaml/automl/time_series/ts_data.py +++ b/flaml/automl/time_series/ts_data.py @@ -9,6 +9,7 @@ import numpy as np try: import pandas as pd from pandas import DataFrame, Series, to_datetime + from pandas.api.types import is_datetime64_any_dtype from scipy.sparse import issparse from sklearn.compose import ColumnTransformer from sklearn.impute import SimpleImputer @@ -392,6 +393,15 @@ class DataTransformerTS: assert len(self.num_columns) == 0, "Trying to call fit() twice, something is wrong" for column in X.columns: + # Never treat the time column as a feature for sklearn preprocessing + if column == self.time_col: + continue + + # Robust datetime detection (covers datetime64[ms/us/ns], tz-aware, etc.) + if is_datetime64_any_dtype(X[column]): + self.datetime_columns.append(column) + continue + # sklearn/utils/validation.py needs int/float values if X[column].dtype.name in ("object", "category", "string"): if ( diff --git a/flaml/tune/searcher/blendsearch.py b/flaml/tune/searcher/blendsearch.py index 8cd8f52e3..c76a9a162 100644 --- a/flaml/tune/searcher/blendsearch.py +++ b/flaml/tune/searcher/blendsearch.py @@ -244,13 +244,32 @@ class BlendSearch(Searcher): evaluated_rewards=evaluated_rewards, ) except (AssertionError, ValueError): - self._gs = GlobalSearch( - space=gs_space, - metric=metric, - mode=mode, - seed=gs_seed, - sampler=sampler, - ) + try: + self._gs = GlobalSearch( + space=gs_space, + metric=metric, + mode=mode, + seed=gs_seed, + sampler=sampler, + ) + except ValueError: + # Ray Tune's OptunaSearch converts Tune domains into Optuna + # distributions. Optuna disallows integer log distributions + # with step != 1 (e.g., qlograndint with q>1), which can + # raise here. Fall back to FLAML's OptunaSearch wrapper, + # which handles these spaces more permissively. + if getattr(GlobalSearch, "__module__", "").startswith("ray.tune"): + from .suggestion import OptunaSearch as _FallbackOptunaSearch + + self._gs = _FallbackOptunaSearch( + space=gs_space, + metric=metric, + mode=mode, + seed=gs_seed, + sampler=sampler, + ) + else: + raise self._gs.space = space else: self._gs = None diff --git a/flaml/tune/searcher/suggestion.py b/flaml/tune/searcher/suggestion.py index 614266892..552f65a66 100644 --- a/flaml/tune/searcher/suggestion.py +++ b/flaml/tune/searcher/suggestion.py @@ -35,6 +35,73 @@ from ..sample import ( Quantized, Uniform, ) + +# If Ray is installed, flaml.tune may re-export Ray Tune sampling functions. +# In that case, the search space contains Ray Tune Domain/Sampler objects, +# which should be accepted by our Optuna search-space conversion. +try: + from ray import __version__ as _ray_version # type: ignore + + if str(_ray_version).startswith("1."): + from ray.tune.sample import ( # type: ignore + Categorical as _RayCategorical, + ) + from ray.tune.sample import ( + Domain as _RayDomain, + ) + from ray.tune.sample import ( + Float as _RayFloat, + ) + from ray.tune.sample import ( + Integer as _RayInteger, + ) + from ray.tune.sample import ( + LogUniform as _RayLogUniform, + ) + from ray.tune.sample import ( + Quantized as _RayQuantized, + ) + from ray.tune.sample import ( + Uniform as _RayUniform, + ) + else: + from ray.tune.search.sample import ( # type: ignore + Categorical as _RayCategorical, + ) + from ray.tune.search.sample import ( + Domain as _RayDomain, + ) + from ray.tune.search.sample import ( + Float as _RayFloat, + ) + from ray.tune.search.sample import ( + Integer as _RayInteger, + ) + from ray.tune.search.sample import ( + LogUniform as _RayLogUniform, + ) + from ray.tune.search.sample import ( + Quantized as _RayQuantized, + ) + from ray.tune.search.sample import ( + Uniform as _RayUniform, + ) + + _FLOAT_TYPES = (Float, _RayFloat) + _INTEGER_TYPES = (Integer, _RayInteger) + _CATEGORICAL_TYPES = (Categorical, _RayCategorical) + _DOMAIN_TYPES = (Domain, _RayDomain) + _QUANTIZED_TYPES = (Quantized, _RayQuantized) + _UNIFORM_TYPES = (Uniform, _RayUniform) + _LOGUNIFORM_TYPES = (LogUniform, _RayLogUniform) +except Exception: # pragma: no cover + _FLOAT_TYPES = (Float,) + _INTEGER_TYPES = (Integer,) + _CATEGORICAL_TYPES = (Categorical,) + _DOMAIN_TYPES = (Domain,) + _QUANTIZED_TYPES = (Quantized,) + _UNIFORM_TYPES = (Uniform,) + _LOGUNIFORM_TYPES = (LogUniform,) from ..trial import flatten_dict, unflatten_dict from .variant_generator import parse_spec_vars @@ -850,19 +917,22 @@ class OptunaSearch(Searcher): def resolve_value(domain: Domain) -> ot.distributions.BaseDistribution: quantize = None - sampler = domain.get_sampler() - if isinstance(sampler, Quantized): + # Ray Tune Domains and FLAML Domains both provide get_sampler(), but + # fall back to the .sampler attribute for robustness. + sampler = domain.get_sampler() if hasattr(domain, "get_sampler") else getattr(domain, "sampler", None) + + if isinstance(sampler, _QUANTIZED_TYPES) or type(sampler).__name__ == "Quantized": quantize = sampler.q - sampler = sampler.sampler - if isinstance(sampler, LogUniform): + sampler = getattr(sampler, "sampler", None) or sampler.get_sampler() + if isinstance(sampler, _LOGUNIFORM_TYPES) or type(sampler).__name__ == "LogUniform": logger.warning( "Optuna does not handle quantization in loguniform " "sampling. The parameter will be passed but it will " "probably be ignored." ) - if isinstance(domain, Float): - if isinstance(sampler, LogUniform): + if isinstance(domain, _FLOAT_TYPES) or type(domain).__name__ == "Float": + if isinstance(sampler, _LOGUNIFORM_TYPES) or type(sampler).__name__ == "LogUniform": if quantize: logger.warning( "Optuna does not support both quantization and " @@ -870,17 +940,17 @@ class OptunaSearch(Searcher): ) return ot.distributions.LogUniformDistribution(domain.lower, domain.upper) - elif isinstance(sampler, Uniform): + elif isinstance(sampler, _UNIFORM_TYPES) or type(sampler).__name__ == "Uniform": if quantize: return ot.distributions.DiscreteUniformDistribution(domain.lower, domain.upper, quantize) return ot.distributions.UniformDistribution(domain.lower, domain.upper) - elif isinstance(domain, Integer): - if isinstance(sampler, LogUniform): + elif isinstance(domain, _INTEGER_TYPES) or type(domain).__name__ == "Integer": + if isinstance(sampler, _LOGUNIFORM_TYPES) or type(sampler).__name__ == "LogUniform": # ``step`` argument Deprecated in v2.0.0. ``step`` argument should be 1 in Log Distribution # The removal of this feature is currently scheduled for v4.0.0, return ot.distributions.IntLogUniformDistribution(domain.lower, domain.upper - 1, step=1) - elif isinstance(sampler, Uniform): + elif isinstance(sampler, _UNIFORM_TYPES) or type(sampler).__name__ == "Uniform": # Upper bound should be inclusive for quantization and # exclusive otherwise return ot.distributions.IntUniformDistribution( @@ -888,16 +958,16 @@ class OptunaSearch(Searcher): domain.upper - int(bool(not quantize)), step=quantize or 1, ) - elif isinstance(domain, Categorical): - if isinstance(sampler, Uniform): + elif isinstance(domain, _CATEGORICAL_TYPES) or type(domain).__name__ == "Categorical": + if isinstance(sampler, _UNIFORM_TYPES) or type(sampler).__name__ == "Uniform": return ot.distributions.CategoricalDistribution(domain.categories) raise ValueError( "Optuna search does not support parameters of type " - "`{}` with samplers of type `{}`".format(type(domain).__name__, type(domain.sampler).__name__) + "`{}` with samplers of type `{}`".format(type(domain).__name__, type(sampler).__name__) ) # Parameter name is e.g. "a/b/c" for nested dicts values = {"/".join(path): resolve_value(domain) for path, domain in domain_vars} - return values + return values diff --git a/flaml/version.py b/flaml/version.py index a0b06b867..3d67cd6bb 100644 --- a/flaml/version.py +++ b/flaml/version.py @@ -1 +1 @@ -__version__ = "2.3.7" +__version__ = "2.4.0" diff --git a/setup.py b/setup.py index 31cc56372..9b44b7814 100644 --- a/setup.py +++ b/setup.py @@ -51,60 +51,59 @@ setuptools.setup( "joblib<=1.3.2", ], "test": [ - "jupyter", + "numpy>=1.17,<2.0.0; python_version<'3.13'", + "numpy>2.0.0; python_version>='3.13'", + "jupyter; python_version<'3.13'", "lightgbm>=2.3.1", - "xgboost>=0.90,<2.0.0", + "xgboost>=0.90,<2.0.0; python_version<'3.11'", + "xgboost>=2.0.0; python_version>='3.11'", "scipy>=1.4.1", "pandas>=1.1.4,<2.0.0; python_version<'3.10'", "pandas>=1.1.4; python_version>='3.10'", - "scikit-learn>=1.0.0", + "scikit-learn>=1.2.0", "thop", "pytest>=6.1.1", + "pytest-rerunfailures>=13.0", "coverage>=5.3", "pre-commit", "torch", "torchvision", - "catboost>=0.26,<1.2; python_version<'3.11'", - "catboost>=0.26; python_version>='3.11'", + "catboost>=0.26; python_version<'3.13'", "rgf-python", "optuna>=2.8.0,<=3.6.1", - "openml", + "openml; python_version<'3.13'", "statsmodels>=0.12.2", - "psutil==5.8.0", + "psutil", "dataclasses", - "transformers[torch]==4.26", - "datasets<=3.5.0", - "nltk<=3.8.1", # 3.8.2 doesn't work with mlflow + "transformers[torch]", + "datasets", + "evaluate", + "nltk!=3.8.2", # 3.8.2 doesn't work with mlflow "rouge_score", - "hcrystalball==0.1.10", + "hcrystalball", "seqeval", - "pytorch-forecasting>=0.9.0,<=0.10.1; python_version<'3.11'", - # "pytorch-forecasting==0.10.1; python_version=='3.11'", - "mlflow==2.15.1", + "pytorch-forecasting; python_version<'3.13'", + "mlflow-skinny<=2.22.1", # Refer to https://mvnrepository.com/artifact/org.mlflow/mlflow-spark "joblibspark>=0.5.0", "joblib<=1.3.2", "nbconvert", "nbformat", "ipykernel", - "pytorch-lightning<1.9.1", # test_forecast_panel - "tensorboardX==2.6", # test_forecast_panel - "requests<2.29.0", # https://github.com/docker/docker-py/issues/3113 + "pytorch-lightning", # test_forecast_panel + "tensorboardX", # test_forecast_panel + "requests", # https://github.com/docker/docker-py/issues/3113 "packaging", - "pydantic==1.10.9", - "sympy", - "wolframalpha", "dill", # a drop in replacement of pickle ], "catboost": [ - "catboost>=0.26,<1.2; python_version<'3.11'", - "catboost>=0.26,<=1.2.5; python_version>='3.11'", + "catboost>=0.26", ], "blendsearch": [ "optuna>=2.8.0,<=3.6.1", "packaging", ], "ray": [ - "ray[tune]~=1.13", + "ray[tune]>=1.13,<2.5.0", ], "azureml": [ "azureml-mlflow", @@ -131,33 +130,21 @@ setuptools.setup( "seqeval", ], "ts_forecast": [ - "holidays<0.14", # to prevent installation error for prophet - "prophet>=1.0.1", + "holidays", + "prophet>=1.1.5", "statsmodels>=0.12.2", - "hcrystalball==0.1.10", + "hcrystalball>=0.1.10", ], "forecast": [ - "holidays<0.14", # to prevent installation error for prophet - "prophet>=1.0.1", + "holidays", + "prophet>=1.1.5", "statsmodels>=0.12.2", - "hcrystalball==0.1.10", - "pytorch-forecasting>=0.9.0; python_version<'3.11'", - # "pytorch-forecasting==0.10.1; python_version=='3.11'", - "pytorch-lightning==1.9.0", - "tensorboardX==2.6", + "hcrystalball>=0.1.10", + "pytorch-forecasting>=0.10.4; python_version<'3.13'", + "pytorch-lightning>=1.9.0", + "tensorboardX>=2.6", ], "benchmark": ["catboost>=0.26", "psutil==5.8.0", "xgboost==1.3.3", "pandas==1.1.4"], - "openai": ["openai==0.27.8", "diskcache"], - "autogen": ["openai==0.27.8", "diskcache", "termcolor"], - "mathchat": ["openai==0.27.8", "diskcache", "termcolor", "sympy", "pydantic==1.10.9", "wolframalpha"], - "retrievechat": [ - "openai==0.27.8", - "diskcache", - "termcolor", - "chromadb", - "tiktoken", - "sentence_transformers", - ], "synapse": [ "joblibspark>=0.5.0", "optuna>=2.8.0,<=3.6.1", @@ -170,9 +157,9 @@ setuptools.setup( "Operating System :: OS Independent", # Specify the Python versions you support here. "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ], - python_requires=">=3.9", + python_requires=">=3.10", ) diff --git a/test/automl/test_notebook_example.py b/test/automl/test_notebook_example.py index 536ef0484..b4558f109 100644 --- a/test/automl/test_notebook_example.py +++ b/test/automl/test_notebook_example.py @@ -1,8 +1,23 @@ import sys import pytest -from minio.error import ServerError -from openml.exceptions import OpenMLServerException + +try: + from minio.error import ServerError +except ImportError: + + class ServerError(Exception): + pass + + +try: + from openml.exceptions import OpenMLServerException +except ImportError: + + class OpenMLServerException(Exception): + pass + + from requests.exceptions import ChunkedEncodingError, SSLError diff --git a/test/automl/test_python_log.py b/test/automl/test_python_log.py index e18e33634..b3f141780 100644 --- a/test/automl/test_python_log.py +++ b/test/automl/test_python_log.py @@ -38,7 +38,7 @@ class TestLogging(unittest.TestCase): "keep_search_state": True, "learner_selector": "roundrobin", } - X_train, y_train = fetch_california_housing(return_X_y=True) + X_train, y_train = fetch_california_housing(return_X_y=True, data_home="test") n = len(y_train) >> 1 print(automl.model, automl.classes_, automl.predict(X_train)) automl.fit( diff --git a/test/automl/test_regression.py b/test/automl/test_regression.py index 892ad1ece..daa5f3830 100644 --- a/test/automl/test_regression.py +++ b/test/automl/test_regression.py @@ -47,7 +47,7 @@ class TestRegression(unittest.TestCase): "n_jobs": 1, "model_history": True, } - X_train, y_train = fetch_california_housing(return_X_y=True) + X_train, y_train = fetch_california_housing(return_X_y=True, data_home="test") n = int(len(y_train) * 9 // 10) automl.fit(X_train=X_train[:n], y_train=y_train[:n], X_val=X_train[n:], y_val=y_train[n:], **automl_settings) assert automl._state.eval_method == "holdout" @@ -141,7 +141,7 @@ class TestRegression(unittest.TestCase): "n_concurrent_trials": 10, "hpo_method": hpo_method, } - X_train, y_train = fetch_california_housing(return_X_y=True) + X_train, y_train = fetch_california_housing(return_X_y=True, data_home="test") try: automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings) print(automl_experiment.predict(X_train)) @@ -268,7 +268,7 @@ def test_reproducibility_of_regression_models(estimator: str): "skip_transform": True, "retrain_full": True, } - X, y = fetch_california_housing(return_X_y=True, as_frame=True) + X, y = fetch_california_housing(return_X_y=True, as_frame=True, data_home="test") automl.fit(X_train=X, y_train=y, **automl_settings) best_model = automl.model assert best_model is not None @@ -314,7 +314,7 @@ def test_reproducibility_of_catboost_regression_model(): "skip_transform": True, "retrain_full": True, } - X, y = fetch_california_housing(return_X_y=True, as_frame=True) + X, y = fetch_california_housing(return_X_y=True, as_frame=True, data_home="test") automl.fit(X_train=X, y_train=y, **automl_settings) best_model = automl.model assert best_model is not None @@ -360,7 +360,7 @@ def test_reproducibility_of_lgbm_regression_model(): "skip_transform": True, "retrain_full": True, } - X, y = fetch_california_housing(return_X_y=True, as_frame=True) + X, y = fetch_california_housing(return_X_y=True, as_frame=True, data_home="test") automl.fit(X_train=X, y_train=y, **automl_settings) best_model = automl.model assert best_model is not None @@ -424,7 +424,7 @@ def test_reproducibility_of_underlying_regression_models(estimator: str): "skip_transform": True, "retrain_full": False, } - X, y = fetch_california_housing(return_X_y=True, as_frame=True) + X, y = fetch_california_housing(return_X_y=True, as_frame=True, data_home="test") automl.fit(X_train=X, y_train=y, **automl_settings) best_model = automl.model assert best_model is not None diff --git a/test/automl/test_score.py b/test/automl/test_score.py index 2976daade..7e9abc864 100644 --- a/test/automl/test_score.py +++ b/test/automl/test_score.py @@ -142,7 +142,7 @@ class TestScore: def test_regression(self): automl_experiment = AutoML() - X_train, y_train = fetch_california_housing(return_X_y=True) + X_train, y_train = fetch_california_housing(return_X_y=True, data_home="test") n = int(len(y_train) * 9 // 10) for each_estimator in [ diff --git a/test/automl/test_training_log.py b/test/automl/test_training_log.py index cd8db8118..0d9628473 100644 --- a/test/automl/test_training_log.py +++ b/test/automl/test_training_log.py @@ -30,7 +30,7 @@ class TestTrainingLog(unittest.TestCase): "keep_search_state": True, "estimator_list": estimator_list, } - X_train, y_train = fetch_california_housing(return_X_y=True) + X_train, y_train = fetch_california_housing(return_X_y=True, data_home="test") automl.fit(X_train=X_train, y_train=y_train, **automl_settings) # Check if the training log file is populated. self.assertTrue(os.path.exists(filename)) diff --git a/test/automl/test_warmstart.py b/test/automl/test_warmstart.py index 677883741..ba390bf7d 100644 --- a/test/automl/test_warmstart.py +++ b/test/automl/test_warmstart.py @@ -108,7 +108,14 @@ class TestWarmStart(unittest.TestCase): def test_FLAML_sample_size_in_starting_points(self): from minio.error import ServerError - from openml.exceptions import OpenMLServerException + + try: + from openml.exceptions import OpenMLServerException + except ImportError: + + class OpenMLServerException(Exception): + pass + from requests.exceptions import ChunkedEncodingError, SSLError from flaml import AutoML diff --git a/test/cal_housing_py3.pkz b/test/cal_housing_py3.pkz new file mode 100644 index 000000000..201c864b2 Binary files /dev/null and b/test/cal_housing_py3.pkz differ diff --git a/test/check_dependency.py b/test/check_dependency.py new file mode 100644 index 000000000..526f25b18 --- /dev/null +++ b/test/check_dependency.py @@ -0,0 +1,60 @@ +import subprocess +from importlib.metadata import distributions + +installed_libs = sorted(f"{dist.metadata['Name']}=={dist.version}" for dist in distributions()) + +first_tier_dependencies = [ + "numpy", + "jupyter", + "lightgbm", + "xgboost", + "scipy", + "pandas", + "scikit-learn", + "thop", + "pytest", + "pytest-rerunfailures", + "coverage", + "pre-commit", + "torch", + "torchvision", + "catboost", + "rgf-python", + "optuna", + "openml", + "statsmodels", + "psutil", + "dataclasses", + "transformers[torch]", + "transformers", + "datasets", + "evaluate", + "nltk", + "rouge_score", + "hcrystalball", + "seqeval", + "pytorch-forecasting", + "mlflow-skinny", + "joblibspark", + "joblib", + "nbconvert", + "nbformat", + "ipykernel", + "pytorch-lightning", + "tensorboardX", + "requests", + "packaging", + "dill", + "ray", + "prophet", +] + + +for lib in installed_libs: + lib_name = lib.split("==")[0] + if lib_name in first_tier_dependencies: + print(lib) + +# print current commit hash +commit_hash = subprocess.check_output(["git", "rev-parse", "HEAD"]).decode("utf-8").strip() +print(f"Current commit hash: {commit_hash}") diff --git a/test/conftest.py b/test/conftest.py index 47a74b289..4b4620775 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -2,11 +2,24 @@ from typing import Any, Dict, List, Union import numpy as np import pandas as pd -from catboost import CatBoostClassifier, CatBoostRegressor, Pool +import pytest from sklearn.metrics import f1_score, r2_score +try: + from catboost import CatBoostClassifier, CatBoostRegressor, Pool +except ImportError: # pragma: no cover + CatBoostClassifier = None + CatBoostRegressor = None + Pool = None -def evaluate_cv_folds_with_underlying_model(X_train_all, y_train_all, kf, model: Any, task: str) -> pd.DataFrame: + +def _is_catboost_model_type(model_type: type) -> bool: + if CatBoostClassifier is not None and CatBoostRegressor is not None: + return model_type is CatBoostClassifier or model_type is CatBoostRegressor + return getattr(model_type, "__module__", "").startswith("catboost") + + +def evaluate_cv_folds_with_underlying_model(X_train_all, y_train_all, kf, model: Any, task: str) -> List[float]: """Mimic the FLAML CV process to calculate the metrics across each fold. :param X_train_all: X training data @@ -17,7 +30,7 @@ def evaluate_cv_folds_with_underlying_model(X_train_all, y_train_all, kf, model: :return: An array containing the metrics """ rng = np.random.RandomState(2020) - all_fold_metrics: List[Dict[str, Union[int, float]]] = [] + all_fold_metrics: List[float] = [] for train_index, val_index in kf.split(X_train_all, y_train_all): X_train_split, y_train_split = X_train_all, y_train_all train_index = rng.permutation(train_index) @@ -25,9 +38,11 @@ def evaluate_cv_folds_with_underlying_model(X_train_all, y_train_all, kf, model: X_val = X_train_split.iloc[val_index] y_train, y_val = y_train_split[train_index], y_train_split[val_index] model_type = type(model) - if model_type is not CatBoostClassifier and model_type is not CatBoostRegressor: + if not _is_catboost_model_type(model_type): model.fit(X_train, y_train) else: + if Pool is None: + pytest.skip("catboost is not installed") use_best_model = True n = max(int(len(y_train) * 0.9), len(y_train) - 1000) if use_best_model else len(y_train) X_tr, y_tr = (X_train)[:n], y_train[:n] @@ -38,5 +53,5 @@ def evaluate_cv_folds_with_underlying_model(X_train_all, y_train_all, kf, model: reproduced_metric = 1 - f1_score(y_val, y_pred_classes) else: reproduced_metric = 1 - r2_score(y_val, y_pred_classes) - all_fold_metrics.append(reproduced_metric) + all_fold_metrics.append(float(reproduced_metric)) return all_fold_metrics diff --git a/test/default/test_defaults.py b/test/default/test_defaults.py index d8be7b61b..acf50e4ea 100644 --- a/test/default/test_defaults.py +++ b/test/default/test_defaults.py @@ -60,7 +60,7 @@ def test_housing(as_frame=True): "starting_points": "data", "max_iter": 0, } - X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=as_frame) + X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=as_frame, data_home="test") automl.fit(X_train, y_train, **automl_settings) @@ -115,7 +115,7 @@ def test_suggest_classification(): def test_suggest_regression(): location = "test/default" - X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True) + X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True, data_home="test") suggested = suggest_hyperparams("regression", X_train, y_train, "lgbm", location=location) print(suggested) suggested = preprocess_and_suggest_hyperparams("regression", X_train, y_train, "xgboost", location=location) @@ -137,7 +137,7 @@ def test_rf(): print(rf) location = "test/default" - X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True) + X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True, data_home="test") rf = RandomForestRegressor(default_location=location) rf.fit(X_train[:100], y_train[:100]) rf.predict(X_train) @@ -155,7 +155,7 @@ def test_extratrees(): print(classifier) location = "test/default" - X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True) + X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True, data_home="test") regressor = ExtraTreesRegressor(default_location=location) regressor.fit(X_train[:100], y_train[:100]) regressor.predict(X_train) @@ -175,7 +175,7 @@ def test_lgbm(): print(classifier.classes_) location = "test/default" - X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True) + X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True, data_home="test") regressor = LGBMRegressor(default_location=location) regressor.fit(X_train, y_train) regressor.predict(X_train) @@ -194,7 +194,7 @@ def test_xgboost(): print(classifier.classes_) location = "test/default" - X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True) + X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True, data_home="test") regressor = XGBRegressor(default_location=location) regressor.fit(X_train[:100], y_train[:100]) regressor.predict(X_train) diff --git a/test/nlp/test_default.py b/test/nlp/test_default.py index 239fce227..5f7622a1c 100644 --- a/test/nlp/test_default.py +++ b/test/nlp/test_default.py @@ -30,21 +30,33 @@ def test_build_portfolio(path="./test/nlp/default", strategy="greedy"): @pytest.mark.skipif(sys.platform == "win32", reason="do not run on windows") def test_starting_point_not_in_search_space(): - from flaml import AutoML + """Regression test for invalid starting points and custom_hp. + + This test must not require network access to Hugging Face. + """ """ test starting_points located outside of the search space, and custom_hp is not set """ + from flaml.automl.state import SearchState + from flaml.automl.task.factory import task_factory + this_estimator_name = "transformer" - X_train, y_train, X_val, y_val, _ = get_toy_data_seqclassification() + X_train, y_train, _, _, _ = get_toy_data_seqclassification() + task = task_factory("seq-classification", X_train, y_train) + estimator_class = task.estimator_class_from_str(this_estimator_name) + estimator_class.init() - automl = AutoML() - automl_settings = get_automl_settings(estimator_name=this_estimator_name) - - automl_settings["starting_points"] = {this_estimator_name: [{"learning_rate": 2e-3}]} - - automl.fit(X_train, y_train, **automl_settings) - assert automl._search_states[this_estimator_name].init_config[0]["learning_rate"] != 2e-3 + # SearchState is where invalid starting points are filtered out when max_iter > 1. + search_state = SearchState( + learner_class=estimator_class, + data=X_train, + task=task, + starting_point={"learning_rate": 2e-3}, + max_iter=3, + budget=10, + ) + assert search_state.init_config and search_state.init_config[0].get("learning_rate") != 2e-3 """ test starting_points located outside of the search space, and custom_hp is set @@ -52,39 +64,60 @@ def test_starting_point_not_in_search_space(): from flaml import tune - X_train, y_train, X_val, y_val, _ = get_toy_data_seqclassification() + X_train, y_train, _, _, _ = get_toy_data_seqclassification() this_estimator_name = "transformer_ms" - automl = AutoML() - automl_settings = get_automl_settings(estimator_name=this_estimator_name) + task = task_factory("seq-classification", X_train, y_train) + estimator_class = task.estimator_class_from_str(this_estimator_name) + estimator_class.init() - automl_settings["custom_hp"] = { - this_estimator_name: { - "model_path": { - "domain": "albert-base-v2", - }, - "learning_rate": { - "domain": tune.choice([1e-4, 1e-5]), - }, - "per_device_train_batch_size": { - "domain": 2, - }, - } + custom_hp = { + "model_path": { + "domain": "albert-base-v2", + }, + "learning_rate": { + "domain": tune.choice([1e-4, 1e-5]), + }, + "per_device_train_batch_size": { + "domain": 2, + }, } - automl_settings["starting_points"] = "data:test/nlp/default/" - automl.fit(X_train, y_train, **automl_settings) - assert len(automl._search_states[this_estimator_name].init_config[0]) == len( - automl._search_states[this_estimator_name]._search_space_domain - ) - len(automl_settings["custom_hp"][this_estimator_name]), ( + # Simulate a suggested starting point (e.g. from portfolio) which becomes invalid + # after custom_hp constrains the space. + invalid_starting_points = [ + { + "learning_rate": 1e-5, + "num_train_epochs": 1.0, + "per_device_train_batch_size": 8, + "seed": 43, + "global_max_steps": 100, + "model_path": "google/electra-base-discriminator", + } + ] + + search_state = SearchState( + learner_class=estimator_class, + data=X_train, + task=task, + starting_point=invalid_starting_points, + custom_hp=custom_hp, + max_iter=3, + budget=10, + ) + + assert search_state.init_config, "Expected a non-empty init_config list" + init_config0 = search_state.init_config[0] + assert init_config0 is not None + assert len(init_config0) == len(search_state._search_space_domain) - len(custom_hp), ( "The search space is updated with the custom_hp on {} hyperparameters of " "the specified estimator without an initial value. Thus a valid init config " "should only contain the cardinality of the search space minus {}".format( - len(automl_settings["custom_hp"][this_estimator_name]), - len(automl_settings["custom_hp"][this_estimator_name]), + len(custom_hp), + len(custom_hp), ) ) - assert automl._search_states[this_estimator_name].search_space["model_path"] == "albert-base-v2" + assert search_state.search_space["model_path"] == "albert-base-v2" if os.path.exists("test/data/output/"): try: @@ -106,7 +139,13 @@ def test_points_to_evaluate(): automl_settings["custom_hp"] = {"transformer_ms": {"model_path": {"domain": "google/electra-small-discriminator"}}} - automl.fit(X_train, y_train, **automl_settings) + try: + automl.fit(X_train, y_train, **automl_settings) + except OSError as e: + message = str(e) + if "Too Many Requests" in message or "rate limit" in message.lower(): + pytest.skip(f"Skipping HF model load/training: {message}") + raise if os.path.exists("test/data/output/"): try: @@ -141,7 +180,14 @@ def test_zero_shot_nomodel(): fit_kwargs = automl_settings.pop("fit_kwargs_by_estimator", {}).get(estimator_name) fit_kwargs.update(automl_settings) pop_args(fit_kwargs) - model.fit(X_train, y_train, **fit_kwargs) + + try: + model.fit(X_train, y_train, **fit_kwargs) + except OSError as e: + message = str(e) + if "Too Many Requests" in message or "rate limit" in message.lower(): + pytest.skip(f"Skipping HF model load/training: {message}") + raise if os.path.exists("test/data/output/"): try: diff --git a/test/object_store.py b/test/object_store.py index 6d32237b4..1b3f98502 100644 --- a/test/object_store.py +++ b/test/object_store.py @@ -7,7 +7,7 @@ from sklearn.model_selection import train_test_split from flaml import tune from flaml.automl.model import LGBMEstimator -data = fetch_california_housing(return_X_y=False, as_frame=True) +data = fetch_california_housing(return_X_y=False, as_frame=True, data_home="test") X, y = data.data, data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) X_train_ref = ray.put(X_train) diff --git a/test/reg.py b/test/reg.py index 795ab1c5c..1b758e2ca 100644 --- a/test/reg.py +++ b/test/reg.py @@ -11,7 +11,7 @@ automl_settings = { "task": "regression", "log_file_name": "test/california.log", } -X_train, y_train = fetch_california_housing(return_X_y=True) +X_train, y_train = fetch_california_housing(return_X_y=True, data_home="test") # Train with labeled input data automl.fit(X_train=X_train, y_train=y_train, **automl_settings) print(automl.model) diff --git a/test/spark/test_exceptions.py b/test/spark/test_exceptions.py index 13c265d37..63a22e625 100644 --- a/test/spark/test_exceptions.py +++ b/test/spark/test_exceptions.py @@ -22,7 +22,7 @@ def base_automl(n_concurrent_trials=1, use_ray=False, use_spark=False, verbose=0 except (ServerError, Exception): from sklearn.datasets import fetch_california_housing - X_train, y_train = fetch_california_housing(return_X_y=True) + X_train, y_train = fetch_california_housing(return_X_y=True, data_home="test") automl = AutoML() settings = { "time_budget": 3, # total running time in seconds diff --git a/test/spark/test_performance.py b/test/spark/test_performance.py index febd1a4b8..030fcae49 100644 --- a/test/spark/test_performance.py +++ b/test/spark/test_performance.py @@ -2,8 +2,23 @@ import os import sys import pytest -from minio.error import ServerError -from openml.exceptions import OpenMLServerException + +try: + from minio.error import ServerError +except ImportError: + + class ServerError(Exception): + pass + + +try: + from openml.exceptions import OpenMLServerException +except ImportError: + + class OpenMLServerException(Exception): + pass + + from requests.exceptions import ChunkedEncodingError, SSLError from flaml.tune.spark.utils import check_spark diff --git a/test/test_autovw.py b/test/test_autovw.py index 06140f435..100021d56 100644 --- a/test/test_autovw.py +++ b/test/test_autovw.py @@ -5,17 +5,38 @@ import sys import unittest import numpy as np -import openml + +try: + import openml +except ImportError: + openml = None import pandas as pd import pytest import scipy.sparse -from minio.error import ServerError + +try: + from minio.error import ServerError +except ImportError: + + class ServerError(Exception): + pass + + from requests.exceptions import SSLError from sklearn.metrics import mean_absolute_error, mean_squared_error from flaml import AutoVW from flaml.tune import loguniform, polynomial_expansion_set +try: + from vowpalwabbit import pyvw +except ImportError: + skip_vw_test = True +else: + skip_vw_test = False + +pytest.skip("skipping if no openml", allow_module_level=True) if openml is None else None + VW_DS_DIR = "test/data/" NS_LIST = list(string.ascii_lowercase) + list(string.ascii_uppercase) logger = logging.getLogger(__name__) @@ -351,14 +372,9 @@ def get_vw_tuning_problem(tuning_hp="NamesapceInteraction"): return vw_oml_problem_args, vw_online_aml_problem -@pytest.mark.skipif( - "3.10" in sys.version or "3.11" in sys.version, - reason="do not run on py >= 3.10", -) +@pytest.mark.skipif(skip_vw_test, reason="vowpalwabbit not installed") class TestAutoVW(unittest.TestCase): def test_vw_oml_problem_and_vanilla_vw(self): - from vowpalwabbit import pyvw - try: vw_oml_problem_args, vw_online_aml_problem = get_vw_tuning_problem() except (SSLError, ServerError, Exception) as e: diff --git a/test/tune_example.py b/test/tune_example.py index 9b4ba68fe..592f94762 100644 --- a/test/tune_example.py +++ b/test/tune_example.py @@ -6,12 +6,12 @@ from sklearn.model_selection import train_test_split from flaml import tune from flaml.automl.model import LGBMEstimator -data = fetch_california_housing(return_X_y=False, as_frame=True) +data = fetch_california_housing(return_X_y=False, as_frame=True, data_home="test") df, X, y = data.frame, data.data, data.target df_train, _, X_train, X_test, _, y_test = train_test_split(df, X, y, test_size=0.33, random_state=42) csv_file_name = "test/housing.csv" df_train.to_csv(csv_file_name, index=False) -# X, y = fetch_california_housing(return_X_y=True, as_frame=True) +# X, y = fetch_california_housing(return_X_y=True, as_frame=True, data_home="test") # X_train, X_test, y_train, y_test = train_test_split( # X, y, test_size=0.33, random_state=42 # )