Files
FLAML/.github/workflows/python-package.yml
Li Jiang 50334f2c52 Support spark dataframe as input dataset and spark models as estimators (#934)
* add basic support to Spark dataframe

add support to SynapseML LightGBM model

update to pyspark>=3.2.0 to leverage pandas_on_Spark API

* clean code, add TODOs

* add sample_train_data for pyspark.pandas dataframe, fix bugs

* improve some functions, fix bugs

* fix dict change size during iteration

* update model predict

* update LightGBM model, update test

* update SynapseML LightGBM params

* update synapseML and tests

* update TODOs

* Added support to roc_auc for spark models

* Added support to score of spark estimator

* Added test for automl score of spark estimator

* Added cv support to pyspark.pandas dataframe

* Update test, fix bugs

* Added tests

* Updated docs, tests, added a notebook

* Fix bugs in non-spark env

* Fix bugs and improve tests

* Fix uninstall pyspark

* Fix tests error

* Fix java.lang.OutOfMemoryError: Java heap space

* Fix test_performance

* Update test_sparkml to test_0sparkml to use the expected spark conf

* Remove unnecessary widgets in notebook

* Fix iloc java.lang.StackOverflowError

* fix pre-commit

* Added params check for spark dataframes

* Refactor code for train_test_split to a function

* Update train_test_split_pyspark

* Refactor if-else, remove unnecessary code

* Remove y from predict, remove mem control from n_iter compute

* Update workflow

* Improve _split_pyspark

* Fix test failure of too short training time

* Fix typos, improve docstrings

* Fix index errors of pandas_on_spark, add spark loss metric

* Fix typo of ndcgAtK

* Update NDCG metrics and tests

* Remove unuseful logger

* Use cache and count to ensure consistent indexes

* refactor for merge maain

* fix errors of refactor

* Updated SparkLightGBMEstimator and cache

* Updated config2params

* Remove unused import

* Fix unknown parameters

* Update default_estimator_list

* Add unit tests for spark metrics
2023-03-25 19:59:46 +00:00

127 lines
4.8 KiB
YAML

# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: Build
on:
push:
branches: ['main']
paths:
- 'flaml/**'
- 'test/**'
- 'notebook/**'
- '.github/workflows/python-package.yml'
- 'setup.py'
pull_request:
branches: ['main']
merge_group:
types: [checks_requested]
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-2019]
python-version: ["3.7", "3.8", "3.9", "3.10"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: On mac + python 3.10, install libomp to facilitate lgbm and xgboost install
if: matrix.os == 'macOS-latest' && matrix.python-version == '3.10'
run: |
# remove libomp version constraint after xgboost works with libomp>11.1.0 on python 3.10
wget https://raw.githubusercontent.com/Homebrew/homebrew-core/679923b4eb48a8dc7ecc1f05d06063cd79b3fc00/Formula/libomp.rb -O $(find $(brew --repository) -name libomp.rb)
brew unlink libomp
brew install libomp
export CC=/usr/bin/clang
export CXX=/usr/bin/clang++
export CPPFLAGS="$CPPFLAGS -Xpreprocessor -fopenmp"
export CFLAGS="$CFLAGS -I/usr/local/opt/libomp/include"
export CXXFLAGS="$CXXFLAGS -I/usr/local/opt/libomp/include"
export LDFLAGS="$LDFLAGS -Wl,-rpath,/usr/local/opt/libomp/lib -L/usr/local/opt/libomp/lib -lomp"
- name: On Linux + python 3.8, install pyspark 3.2.3
if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.8'
run: |
python -m pip install --upgrade pip wheel
pip install pyspark==3.2.3
- name: Install packages and dependencies
run: |
python -m pip install --upgrade pip wheel
pip install -e .
python -c "import flaml"
pip install -e .[test]
pip list | grep "pyspark"
- name: If linux, install ray 2
if: matrix.os == 'ubuntu-latest'
run: |
pip install ray[tune]
- name: If mac, install ray
if: matrix.os == 'macOS-latest'
run: |
pip install -e .[ray]
- name: If linux or mac, install prophet on python < 3.9
if: (matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest') && matrix.python-version != '3.9' && matrix.python-version != '3.10'
run: |
pip install -e .[forecast]
- name: Install vw on python < 3.10
if: matrix.python-version != '3.10'
run: |
pip install -e .[vw]
- name: Uninstall pyspark on python 3.9
if: matrix.python-version == '3.9'
run: |
# Uninstall pyspark to test env without pyspark
pip uninstall -y pyspark
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
if: (matrix.python-version != '3.7' || matrix.os == 'macos-latest') && matrix.python-version != '3.10'
run: |
pytest test
- name: Coverage
if: (matrix.python-version == '3.7') && matrix.os != 'macos-latest' || matrix.python-version == '3.10'
run: |
pip install coverage
coverage run -a -m pytest test
coverage xml
- name: Upload coverage to Codecov
if: (matrix.python-version == '3.7') && matrix.os != 'macos-latest' || matrix.python-version == '3.10'
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
flags: unittests
# docs:
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v3
# - name: Setup Python
# uses: actions/setup-python@v4
# with:
# python-version: '3.8'
# - name: Compile documentation
# run: |
# pip install -e .
# python -m pip install sphinx sphinx_rtd_theme
# cd docs
# make html
# - name: Deploy to GitHub pages
# if: ${{ github.ref == 'refs/heads/main' }}
# uses: JamesIves/github-pages-deploy-action@3.6.2
# with:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# BRANCH: gh-pages
# FOLDER: docs/_build/html
# CLEAN: true