From b645da3ea73f087791d2e86a56254835c8870402 Mon Sep 17 00:00:00 2001 From: Li Jiang Date: Fri, 9 Feb 2024 09:08:24 +0800 Subject: [PATCH] Fix spark errors (#1274) * Fix mlflow not found error * Fix joblib>1.2.0 force cancel error * Remove joblib version constraint * Update log * Improve joblib exception catch * Added permissions --- .github/workflows/deploy-website.yml | 3 +++ .github/workflows/openai.yml | 2 ++ .github/workflows/pre-commit.yml | 1 + .github/workflows/python-package.yml | 1 + flaml/tune/spark/utils.py | 6 ++++++ flaml/tune/tune.py | 10 ++++++---- setup.py | 3 --- test/spark/test_0sparkml.py | 2 +- 8 files changed, 20 insertions(+), 8 deletions(-) diff --git a/.github/workflows/deploy-website.yml b/.github/workflows/deploy-website.yml index 90cdb13f0..1df66381c 100644 --- a/.github/workflows/deploy-website.yml +++ b/.github/workflows/deploy-website.yml @@ -17,6 +17,9 @@ on: merge_group: types: [checks_requested] +permissions: + contents: write + jobs: checks: if: github.event_name != 'push' diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml index 50c880c40..c7dc0f549 100644 --- a/.github/workflows/openai.yml +++ b/.github/workflows/openai.yml @@ -13,6 +13,8 @@ on: - 'notebook/autogen_chatgpt_gpt4.ipynb' - '.github/workflows/openai.yml' +permissions: {} + jobs: test: strategy: diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index b3abaf8b6..4cbf05ff1 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -10,6 +10,7 @@ defaults: run: shell: bash +permissions: {} jobs: pre-commit-check: diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index be6863123..3ad30dba4 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -17,6 +17,7 @@ on: merge_group: types: [checks_requested] +permissions: {} concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }} cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} diff --git a/flaml/tune/spark/utils.py b/flaml/tune/spark/utils.py index b6c2dbcd1..8989ad2e8 100644 --- a/flaml/tune/spark/utils.py +++ b/flaml/tune/spark/utils.py @@ -286,6 +286,7 @@ class PySparkOvertimeMonitor: def __exit__(self, exc_type, exc_value, exc_traceback): """Exit the context manager. This will wait for the monitor thread to nicely exit.""" + logger.debug(f"monitor exited: {exc_type}, {exc_value}, {exc_traceback}") if self._force_cancel and _have_spark: self._finished_flag = True self._monitor_daemon.join() @@ -296,6 +297,11 @@ class PySparkOvertimeMonitor: if not exc_type: return True elif exc_type == py4j.protocol.Py4JJavaError: + logger.debug("Py4JJavaError Exception: %s", exc_value) + return True + elif exc_type == TypeError: + # When force cancel, joblib>1.2.0 will raise joblib.externals.loky.process_executor._ExceptionWithTraceback + logger.debug("TypeError Exception: %s", exc_value) return True else: return False diff --git a/flaml/tune/tune.py b/flaml/tune/tune.py index c4475384f..5108d132c 100644 --- a/flaml/tune/tune.py +++ b/flaml/tune/tune.py @@ -92,10 +92,12 @@ class ExperimentAnalysis(EA): feasible_index_filter = np.where( feasible_value <= max( - f_best[k_metric] + self.lexico_objectives["tolerances"][k_metric] - if not isinstance(self.lexico_objectives["tolerances"][k_metric], str) - else f_best[k_metric] - * (1 + 0.01 * float(self.lexico_objectives["tolerances"][k_metric].replace("%", ""))), + ( + f_best[k_metric] + self.lexico_objectives["tolerances"][k_metric] + if not isinstance(self.lexico_objectives["tolerances"][k_metric], str) + else f_best[k_metric] + * (1 + 0.01 * float(self.lexico_objectives["tolerances"][k_metric].replace("%", ""))) + ), k_target, ) )[0] diff --git a/setup.py b/setup.py index 3c4c590ed..6104390d4 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,6 @@ setuptools.setup( "spark": [ "pyspark>=3.2.0", "joblibspark>=0.5.0", - "joblib<1.3.0", # temp solution for joblib 1.3.0 issue, no need once https://github.com/joblib/joblib-spark/pull/48 is merged ], "test": [ "lightgbm>=2.3.1", @@ -88,7 +87,6 @@ setuptools.setup( "pydantic==1.10.9", "sympy", "wolframalpha", - "joblib<1.3.0", # temp solution for joblib 1.3.0 issue, no need once https://github.com/joblib/joblib-spark/pull/48 is merged ], "catboost": ["catboost>=0.26"], "blendsearch": [ @@ -153,7 +151,6 @@ setuptools.setup( "joblibspark>=0.5.0", "optuna==2.8.0", "pyspark>=3.2.0", - "joblib<1.3.0", # temp solution for joblib 1.3.0 issue, no need once https://github.com/joblib/joblib-spark/pull/48 is merged ], "autozero": ["scikit-learn", "pandas", "packaging"], }, diff --git a/test/spark/test_0sparkml.py b/test/spark/test_0sparkml.py index 1e4af808d..b5f33fc5b 100644 --- a/test/spark/test_0sparkml.py +++ b/test/spark/test_0sparkml.py @@ -27,7 +27,7 @@ else: f"com.microsoft.azure:synapseml_2.12:0.11.3{postfix_version}" "org.apache.hadoop:hadoop-azure:3.3.5," "com.microsoft.azure:azure-storage:8.6.6," - f"org.mlflow:mlflow-spark:{mlflow.__version__}" + f"org.mlflow:mlflow-spark:2.6.0" ), ) .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven")