From cd9740f022defa065578d9f1709195c9ac439dff Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Wed, 5 Jan 2022 13:49:12 -0800
Subject: [PATCH] Fix several issues for nlp tasks (#380)

* num cpu issue #378;
* temp fix for ray issue #379;
* transformers version.
---
 flaml/automl.py  | 117 +++++++++++++++++++++++++----------------------
 flaml/model.py   |   3 +-
 flaml/version.py |   2 +-
 setup.py         |  11 ++++-
 4 files changed, 73 insertions(+), 60 deletions(-)

diff --git a/flaml/automl.py b/flaml/automl.py
index 0b9353b0c..15a677657 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -45,7 +45,6 @@ from .data import (
     FORECAST,
     REGRESSION,
     _is_nlp_task,
-    SUMMARIZATION,
     NLG_TASKS,
 )
 from . import tune
@@ -322,60 +321,61 @@ class AutoMLState:
             if self.time_budget is None
             else self.time_budget - self.time_from_start
         )
-        if self.resources_per_trial.get("gpu", 0) > 0:
+        # if self.resources_per_trial.get("gpu", 0) > 0:
 
-            def _trainable_function_wrapper(config: dict):
+        #     def _trainable_function_wrapper(config: dict):
 
-                return_estimator, train_time = train_estimator(
-                    X_train=sampled_X_train,
-                    y_train=sampled_y_train,
-                    config_dic=config,
-                    task=self.task,
-                    estimator_name=estimator,
-                    n_jobs=self.n_jobs,
-                    estimator_class=self.learner_classes.get(estimator),
-                    budget=budget,
-                    fit_kwargs=self.fit_kwargs,
-                )
-                return {"estimator": return_estimator, "train_time": train_time}
+        #         return_estimator, train_time = train_estimator(
+        #             X_train=sampled_X_train,
+        #             y_train=sampled_y_train,
+        #             config_dic=config,
+        #             task=self.task,
+        #             estimator_name=estimator,
+        #             n_jobs=self.n_jobs,
+        #             estimator_class=self.learner_classes.get(estimator),
+        #             budget=budget,
+        #             fit_kwargs=self.fit_kwargs,
+        #         )
+        #         return {"estimator": return_estimator, "train_time": train_time}
 
-            if estimator not in self.learner_classes:
-                self.learner_classes[estimator] = get_estimator_class(
-                    self.task, estimator
-                )
+        #     if estimator not in self.learner_classes:
+        #         self.learner_classes[estimator] = get_estimator_class(
+        #             self.task, estimator
+        #         )
 
-            analysis = tune.run(
-                _trainable_function_wrapper,
-                config=config_w_resource,
-                metric="train_time",
-                mode="min",
-                resources_per_trial=self.resources_per_trial,
-                num_samples=1,
-                use_ray=True,
-            )
-            result = list(analysis.results.values())[0]
-            estimator, train_time = result["estimator"], result["train_time"]
+        #     analysis = tune.run(
+        #         _trainable_function_wrapper,
+        #         config=config_w_resource,
+        #         metric="train_time",
+        #         mode="min",
+        #         resources_per_trial=self.resources_per_trial,
+        #         num_samples=1,
+        #         use_ray=True,
+        #     )
+        #     result = list(analysis.results.values())[0]
+        #     estimator, train_time = result["estimator"], result["train_time"]
 
-        else:
-            if _is_nlp_task(self.task):
-                use_ray = self.fit_kwargs.get("use_ray")
-                self.fit_kwargs["use_ray"] = False
-            estimator, train_time = train_estimator(
-                X_train=sampled_X_train,
-                y_train=sampled_y_train,
-                config_dic=config,
-                task=self.task,
-                estimator_name=estimator,
-                n_jobs=self.n_jobs,
-                estimator_class=self.learner_classes.get(estimator),
-                budget=budget,
-                fit_kwargs=self.fit_kwargs,
-            )
-            if _is_nlp_task(self.task):
-                if use_ray:
-                    self.fit_kwargs["use_ray"] = use_ray
-                else:
-                    del self.fit_kwargs["use_ray"]
+        # else:
+        if _is_nlp_task(self.task):
+            use_ray = self.fit_kwargs.get("use_ray")
+            self.fit_kwargs["use_ray"] = False
+        # TODO: limit number of GPUs
+        estimator, train_time = train_estimator(
+            X_train=sampled_X_train,
+            y_train=sampled_y_train,
+            config_dic=config,
+            task=self.task,
+            estimator_name=estimator,
+            n_jobs=self.n_jobs,
+            estimator_class=self.learner_classes.get(estimator),
+            budget=budget,
+            fit_kwargs=self.fit_kwargs,
+        )
+        if _is_nlp_task(self.task):
+            if use_ray is None:
+                del self.fit_kwargs["use_ray"]
+            else:
+                self.fit_kwargs["use_ray"] = use_ray
         if sampled_weight is not None:
             self.fit_kwargs["sample_weight"] = weight
         return estimator, train_time
@@ -1984,11 +1984,18 @@ class AutoML(BaseEstimator):
         self._use_ray = use_ray or n_concurrent_trials > 1
         # use the following condition if we have an estimation of average_trial_time and average_trial_overhead
         # self._use_ray = use_ray or n_concurrent_trials > ( average_trail_time + average_trial_overhead) / (average_trial_time)
-        self._state.resources_per_trial = (
-            {"cpu": int(os.cpu_count() / n_concurrent_trials), "gpu": gpu_per_trial}
-            if n_jobs < 0
-            else {"cpu": n_jobs, "gpu": gpu_per_trial}
-        )
+        if self._use_ray:
+            import ray
+
+            n_cpus = use_ray and ray.available_resources()["CPU"] or os.cpu_count()
+            self._state.resources_per_trial = (
+                # when using gpu, default cpu is 1 per job; otherwise, default cpu is n_cpus / n_concurrent_trials
+                {"cpu": max(int(n_cpus / n_concurrent_trials), 1), "gpu": gpu_per_trial}
+                if gpu_per_trial == 0
+                else {"cpu": 1, "gpu": gpu_per_trial}
+                if n_jobs < 0
+                else {"cpu": n_jobs, "gpu": gpu_per_trial}
+            )
         self._retrain_in_budget = retrain_full == "budget" and (
             eval_method == "holdout" and self._state.X_val is None
         )
diff --git a/flaml/model.py b/flaml/model.py
index 423b64603..e6be73307 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -447,8 +447,7 @@ class TransformersEstimator(BaseEstimator):
 
         self._init_hpo_args(kwargs)
         self._metric = kwargs["metric"]
-        if hasattr(self, "use_ray") is False:
-            self.use_ray = kwargs["use_ray"]
+        self.use_ray = kwargs.get("use_ray")
 
         X_val = kwargs.get("X_val")
         y_val = kwargs.get("y_val")
diff --git a/flaml/version.py b/flaml/version.py
index c5981731c..e94731c0f 100644
--- a/flaml/version.py
+++ b/flaml/version.py
@@ -1 +1 @@
-__version__ = "0.9.3"
+__version__ = "0.9.4"
diff --git a/setup.py b/setup.py
index 7750f2767..d82472dc2 100644
--- a/setup.py
+++ b/setup.py
@@ -55,7 +55,7 @@ setuptools.setup(
             "statsmodels>=0.12.2",
             "psutil==5.8.0",
             "dataclasses",
-            "transformers",
+            "transformers>=4.14",
             "datasets",
             "torch",
             "nltk",
@@ -77,7 +77,14 @@ setuptools.setup(
         "vw": [
             "vowpalwabbit",
         ],
-        "nlp": ["transformers", "datasets", "torch", "seqeval", "nltk", "rouge_score"],
+        "nlp": [
+            "transformers>=4.14",
+            "datasets",
+            "torch",
+            "seqeval",
+            "nltk",
+            "rouge_score",
+        ],
         "ts_forecast": ["prophet>=1.0.1", "statsmodels>=0.12.2"],
         "forecast": ["prophet>=1.0.1", "statsmodels>=0.12.2"],
         "benchmark": ["catboost>=0.26", "psutil==5.8.0", "xgboost==1.3.3"],