From b645da3ea73f087791d2e86a56254835c8870402 Mon Sep 17 00:00:00 2001
From: Li Jiang <bnujli@gmail.com>
Date: Fri, 9 Feb 2024 09:08:24 +0800
Subject: [PATCH] Fix spark errors (#1274)

* Fix mlflow not found error

* Fix joblib>1.2.0 force cancel error

* Remove joblib version constraint

* Update log

* Improve joblib exception catch

* Added permissions
---
 .github/workflows/deploy-website.yml |  3 +++
 .github/workflows/openai.yml         |  2 ++
 .github/workflows/pre-commit.yml     |  1 +
 .github/workflows/python-package.yml |  1 +
 flaml/tune/spark/utils.py            |  6 ++++++
 flaml/tune/tune.py                   | 10 ++++++----
 setup.py                             |  3 ---
 test/spark/test_0sparkml.py          |  2 +-
 8 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/deploy-website.yml b/.github/workflows/deploy-website.yml
index 90cdb13f0..1df66381c 100644
--- a/.github/workflows/deploy-website.yml
+++ b/.github/workflows/deploy-website.yml
@@ -17,6 +17,9 @@ on:
   merge_group:
     types: [checks_requested]
 
+permissions:
+  contents: write
+
 jobs:
   checks:
     if: github.event_name != 'push'
diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml
index 50c880c40..c7dc0f549 100644
--- a/.github/workflows/openai.yml
+++ b/.github/workflows/openai.yml
@@ -13,6 +13,8 @@ on:
       - 'notebook/autogen_chatgpt_gpt4.ipynb'
       - '.github/workflows/openai.yml'
 
+permissions: {}
+
 jobs:
   test:
     strategy:
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index b3abaf8b6..4cbf05ff1 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -10,6 +10,7 @@ defaults:
   run:
     shell: bash
 
+permissions: {}
 jobs:
 
   pre-commit-check:
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index be6863123..3ad30dba4 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -17,6 +17,7 @@ on:
   merge_group:
     types: [checks_requested]
 
+permissions: {}
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}
   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
diff --git a/flaml/tune/spark/utils.py b/flaml/tune/spark/utils.py
index b6c2dbcd1..8989ad2e8 100644
--- a/flaml/tune/spark/utils.py
+++ b/flaml/tune/spark/utils.py
@@ -286,6 +286,7 @@ class PySparkOvertimeMonitor:
     def __exit__(self, exc_type, exc_value, exc_traceback):
         """Exit the context manager.
         This will wait for the monitor thread to nicely exit."""
+        logger.debug(f"monitor exited: {exc_type}, {exc_value}, {exc_traceback}")
         if self._force_cancel and _have_spark:
             self._finished_flag = True
             self._monitor_daemon.join()
@@ -296,6 +297,11 @@ class PySparkOvertimeMonitor:
             if not exc_type:
                 return True
             elif exc_type == py4j.protocol.Py4JJavaError:
+                logger.debug("Py4JJavaError Exception: %s", exc_value)
+                return True
+            elif exc_type == TypeError:
+                # When force cancel, joblib>1.2.0 will raise joblib.externals.loky.process_executor._ExceptionWithTraceback
+                logger.debug("TypeError Exception: %s", exc_value)
                 return True
             else:
                 return False
diff --git a/flaml/tune/tune.py b/flaml/tune/tune.py
index c4475384f..5108d132c 100644
--- a/flaml/tune/tune.py
+++ b/flaml/tune/tune.py
@@ -92,10 +92,12 @@ class ExperimentAnalysis(EA):
             feasible_index_filter = np.where(
                 feasible_value
                 <= max(
-                    f_best[k_metric] + self.lexico_objectives["tolerances"][k_metric]
-                    if not isinstance(self.lexico_objectives["tolerances"][k_metric], str)
-                    else f_best[k_metric]
-                    * (1 + 0.01 * float(self.lexico_objectives["tolerances"][k_metric].replace("%", ""))),
+                    (
+                        f_best[k_metric] + self.lexico_objectives["tolerances"][k_metric]
+                        if not isinstance(self.lexico_objectives["tolerances"][k_metric], str)
+                        else f_best[k_metric]
+                        * (1 + 0.01 * float(self.lexico_objectives["tolerances"][k_metric].replace("%", "")))
+                    ),
                     k_target,
                 )
             )[0]
diff --git a/setup.py b/setup.py
index 3c4c590ed..6104390d4 100644
--- a/setup.py
+++ b/setup.py
@@ -47,7 +47,6 @@ setuptools.setup(
         "spark": [
             "pyspark>=3.2.0",
             "joblibspark>=0.5.0",
-            "joblib<1.3.0",  # temp solution for joblib 1.3.0 issue, no need once https://github.com/joblib/joblib-spark/pull/48 is merged
         ],
         "test": [
             "lightgbm>=2.3.1",
@@ -88,7 +87,6 @@ setuptools.setup(
             "pydantic==1.10.9",
             "sympy",
             "wolframalpha",
-            "joblib<1.3.0",  # temp solution for joblib 1.3.0 issue, no need once https://github.com/joblib/joblib-spark/pull/48 is merged
         ],
         "catboost": ["catboost>=0.26"],
         "blendsearch": [
@@ -153,7 +151,6 @@ setuptools.setup(
             "joblibspark>=0.5.0",
             "optuna==2.8.0",
             "pyspark>=3.2.0",
-            "joblib<1.3.0",  # temp solution for joblib 1.3.0 issue, no need once https://github.com/joblib/joblib-spark/pull/48 is merged
         ],
         "autozero": ["scikit-learn", "pandas", "packaging"],
     },
diff --git a/test/spark/test_0sparkml.py b/test/spark/test_0sparkml.py
index 1e4af808d..b5f33fc5b 100644
--- a/test/spark/test_0sparkml.py
+++ b/test/spark/test_0sparkml.py
@@ -27,7 +27,7 @@ else:
                     f"com.microsoft.azure:synapseml_2.12:0.11.3{postfix_version}"
                     "org.apache.hadoop:hadoop-azure:3.3.5,"
                     "com.microsoft.azure:azure-storage:8.6.6,"
-                    f"org.mlflow:mlflow-spark:{mlflow.__version__}"
+                    f"org.mlflow:mlflow-spark:2.6.0"
                 ),
             )
             .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven")