Files
FLAML/setup.py
Li Jiang 50334f2c52 Support spark dataframe as input dataset and spark models as estimators (#934)
* add basic support to Spark dataframe

add support to SynapseML LightGBM model

update to pyspark>=3.2.0 to leverage pandas_on_Spark API

* clean code, add TODOs

* add sample_train_data for pyspark.pandas dataframe, fix bugs

* improve some functions, fix bugs

* fix dict change size during iteration

* update model predict

* update LightGBM model, update test

* update SynapseML LightGBM params

* update synapseML and tests

* update TODOs

* Added support to roc_auc for spark models

* Added support to score of spark estimator

* Added test for automl score of spark estimator

* Added cv support to pyspark.pandas dataframe

* Update test, fix bugs

* Added tests

* Updated docs, tests, added a notebook

* Fix bugs in non-spark env

* Fix bugs and improve tests

* Fix uninstall pyspark

* Fix tests error

* Fix java.lang.OutOfMemoryError: Java heap space

* Fix test_performance

* Update test_sparkml to test_0sparkml to use the expected spark conf

* Remove unnecessary widgets in notebook

* Fix iloc java.lang.StackOverflowError

* fix pre-commit

* Added params check for spark dataframes

* Refactor code for train_test_split to a function

* Update train_test_split_pyspark

* Refactor if-else, remove unnecessary code

* Remove y from predict, remove mem control from n_iter compute

* Update workflow

* Improve _split_pyspark

* Fix test failure of too short training time

* Fix typos, improve docstrings

* Fix index errors of pandas_on_spark, add spark loss metric

* Fix typo of ndcgAtK

* Update NDCG metrics and tests

* Remove unuseful logger

* Use cache and count to ensure consistent indexes

* refactor for merge maain

* fix errors of refactor

* Updated SparkLightGBMEstimator and cache

* Updated config2params

* Remove unused import

* Fix unknown parameters

* Update default_estimator_list

* Add unit tests for spark metrics
2023-03-25 19:59:46 +00:00

133 lines
3.6 KiB
Python

import setuptools
import os
here = os.path.abspath(os.path.dirname(__file__))
with open("README.md", "r", encoding="UTF-8") as fh:
long_description = fh.read()
# Get the code version
version = {}
with open(os.path.join(here, "flaml/version.py")) as fp:
exec(fp.read(), version)
__version__ = version["__version__"]
install_requires = [
"NumPy>=1.17.0rc1",
"lightgbm>=2.3.1",
"xgboost>=0.90",
"scipy>=1.4.1",
"pandas>=1.1.4",
"scikit-learn>=0.24",
]
setuptools.setup(
name="FLAML",
version=__version__,
author="Microsoft Corporation",
author_email="hpo@microsoft.com",
description="A fast library for automated machine learning and tuning",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/microsoft/FLAML",
packages=setuptools.find_packages(include=["flaml*"]),
package_data={
"flaml.default": ["*/*.json"],
},
include_package_data=True,
install_requires=install_requires,
extras_require={
"notebook": [
"jupyter",
"matplotlib",
"openml==0.10.2",
],
"spark": [
"pyspark>=3.2.0",
"joblibspark>=0.5.0",
],
"test": [
"flake8>=3.8.4",
"thop",
"pytest>=6.1.1",
"coverage>=5.3",
"pre-commit",
"torch",
"torchvision",
"catboost>=0.26",
"rgf-python",
"optuna==2.8.0",
"openml==0.10.2",
"statsmodels>=0.12.2",
"psutil==5.8.0",
"dataclasses",
"transformers[torch]",
"datasets",
"nltk",
"rouge_score",
"hcrystalball==0.1.10",
"seqeval",
"pytorch-forecasting>=0.9.0,<=0.10.1",
"mlflow",
"pyspark>=3.2.0",
"joblibspark>=0.5.0",
"nbconvert",
"nbformat",
"ipykernel",
"pytorch-lightning<1.9.1", # test_forecast_panel
],
"catboost": ["catboost>=0.26"],
"blendsearch": ["optuna==2.8.0"],
"ray": [
"ray[tune]~=1.13",
],
"azureml": [
"azureml-mlflow",
],
"nni": [
"nni",
],
"vw": [
"vowpalwabbit>=8.10.0, <9.0.0",
],
"hf": [
"transformers[torch]==4.26",
"datasets",
"nltk",
"rouge_score",
"seqeval",
],
"nlp": [ # for backward compatibility; hf is the new option name
"transformers[torch]==4.26",
"datasets",
"nltk",
"rouge_score",
"seqeval",
],
"ts_forecast": [
"holidays<0.14", # to prevent installation error for prophet
"prophet>=1.0.1",
"statsmodels>=0.12.2",
"hcrystalball==0.1.10",
],
"forecast": [
"holidays<0.14", # to prevent installation error for prophet
"prophet>=1.0.1",
"statsmodels>=0.12.2",
"hcrystalball==0.1.10",
"pytorch-forecasting>=0.9.0",
],
"benchmark": ["catboost>=0.26", "psutil==5.8.0", "xgboost==1.3.3"],
"openai": ["openai==0.23.1", "diskcache", "optuna==2.8.0"],
"synapse": ["joblibspark>=0.5.0", "optuna==2.8.0", "pyspark>=3.2.0"],
},
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
python_requires=">=3.6",
)