diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 161b21ca8..98f7495c8 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -36,7 +36,7 @@ repos:
     - id: black
 
   - repo: https://github.com/executablebooks/mdformat
-    rev: 0.7.17
+    rev: 0.7.22
     hooks:
       - id: mdformat
         additional_dependencies:
diff --git a/NOTICE.md b/NOTICE.md
index 839201ff2..4da9e0e9e 100644
--- a/NOTICE.md
+++ b/NOTICE.md
@@ -4,8 +4,8 @@ This repository incorporates material as listed below or described in the code.
 
 ## Component. Ray.
 
-Code in tune/\[analysis.py, sample.py, trial.py, result.py\],
-searcher/\[suggestion.py, variant_generator.py\], and scheduler/trial_scheduler.py is adapted from
+Code in tune/[analysis.py, sample.py, trial.py, result.py],
+searcher/[suggestion.py, variant_generator.py], and scheduler/trial_scheduler.py is adapted from
 https://github.com/ray-project/ray/blob/master/python/ray/tune/
 
 ## Open Source License/Copyright Notice.
diff --git a/README.md b/README.md
index 8151fc7cd..18dc0b7f9 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ FLAML has a .NET implementation in [ML.NET](http://dot.net/ml), an open-source,
 
 ## Installation
 
-The latest version of FLAML requires **Python >= 3.10 and \< 3.13**. While other Python versions may work for core components, full model support is not guaranteed. FLAML can be installed via `pip`:
+The latest version of FLAML requires **Python >= 3.10 and < 3.13**. While other Python versions may work for core components, full model support is not guaranteed. FLAML can be installed via `pip`:
 
 ```bash
 pip install flaml
diff --git a/SECURITY.md b/SECURITY.md
index 9657262ba..105419313 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -12,7 +12,7 @@ If you believe you have found a security vulnerability in any Microsoft-owned re
 
 Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
 
-If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
+If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
 
 You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
 
diff --git a/flaml/automl/ml.py b/flaml/automl/ml.py
index 9ca3ee728..1bb33b17d 100644
--- a/flaml/automl/ml.py
+++ b/flaml/automl/ml.py
@@ -311,14 +311,14 @@ def get_y_pred(estimator, X, eval_metric, task: Task):
     else:
         y_pred = estimator.predict(X)
 
-    if isinstance(y_pred, Series) or isinstance(y_pred, DataFrame):
+    if isinstance(y_pred, (Series, DataFrame)):
         y_pred = y_pred.values
 
     return y_pred
 
 
 def to_numpy(x):
-    if isinstance(x, Series or isinstance(x, DataFrame)):
+    if isinstance(x, (Series, DataFrame)):
         x = x.values
     else:
         x = np.ndarray(x)
@@ -586,7 +586,7 @@ def _eval_estimator(
 
         # TODO: why are integer labels being cast to str in the first place?
 
-        if isinstance(val_pred_y, Series) or isinstance(val_pred_y, DataFrame) or isinstance(val_pred_y, np.ndarray):
+        if isinstance(val_pred_y, (Series, DataFrame, np.ndarray)):
             test = val_pred_y if isinstance(val_pred_y, np.ndarray) else val_pred_y.values
             if not np.issubdtype(test.dtype, np.number):
                 # some NLP models return a list
diff --git a/flaml/automl/nlp/utils.py b/flaml/automl/nlp/utils.py
index 603e23f67..5eb306630 100644
--- a/flaml/automl/nlp/utils.py
+++ b/flaml/automl/nlp/utils.py
@@ -25,9 +25,7 @@ def load_default_huggingface_metric_for_task(task):
 
 
 def is_a_list_of_str(this_obj):
-    return (isinstance(this_obj, list) or isinstance(this_obj, np.ndarray)) and all(
-        isinstance(x, str) for x in this_obj
-    )
+    return isinstance(this_obj, (list, np.ndarray)) and all(isinstance(x, str) for x in this_obj)
 
 
 def _clean_value(value: Any) -> str:
diff --git a/flaml/automl/state.py b/flaml/automl/state.py
index a5897f723..e5469e1eb 100644
--- a/flaml/automl/state.py
+++ b/flaml/automl/state.py
@@ -37,10 +37,9 @@ class SearchState:
         if isinstance(domain_one_dim, sample.Domain):
             renamed_type = list(inspect.signature(domain_one_dim.is_valid).parameters.values())[0].annotation
             type_match = (
-                renamed_type == Any
+                renamed_type is Any
                 or isinstance(value_one_dim, renamed_type)
-                or isinstance(value_one_dim, int)
-                and renamed_type is float
+                or (renamed_type is float and isinstance(value_one_dim, int))
             )
             if not (type_match and domain_one_dim.is_valid(value_one_dim)):
                 return False
diff --git a/flaml/automl/task/time_series_task.py b/flaml/automl/task/time_series_task.py
index cd69577a3..9f1684089 100644
--- a/flaml/automl/task/time_series_task.py
+++ b/flaml/automl/task/time_series_task.py
@@ -386,9 +386,8 @@ class TimeSeriesTask(Task):
         return X
 
     def preprocess(self, X, transformer=None):
-        if isinstance(X, pd.DataFrame) or isinstance(X, np.ndarray) or isinstance(X, pd.Series):
-            X = X.copy()
-            X = normalize_ts_data(X, self.target_names, self.time_col)
+        if isinstance(X, (pd.DataFrame, np.ndarray, pd.Series)):
+            X = normalize_ts_data(X.copy(), self.target_names, self.time_col)
             return self._preprocess(X, transformer)
         elif isinstance(X, int):
             return X
diff --git a/flaml/automl/time_series/ts_data.py b/flaml/automl/time_series/ts_data.py
index 5e2b60368..625049c60 100644
--- a/flaml/automl/time_series/ts_data.py
+++ b/flaml/automl/time_series/ts_data.py
@@ -546,14 +546,12 @@ def normalize_ts_data(X_train_all, target_names, time_col, y_train_all=None):
 
 
 def validate_data_basic(X_train_all, y_train_all):
-    assert isinstance(X_train_all, np.ndarray) or issparse(X_train_all) or isinstance(X_train_all, pd.DataFrame), (
-        "X_train_all must be a numpy array, a pandas dataframe, " "or Scipy sparse matrix."
-    )
+    assert isinstance(X_train_all, (np.ndarray, DataFrame)) or issparse(
+        X_train_all
+    ), "X_train_all must be a numpy array, a pandas dataframe, or Scipy sparse matrix."
 
-    assert (
-        isinstance(y_train_all, np.ndarray)
-        or isinstance(y_train_all, pd.Series)
-        or isinstance(y_train_all, pd.DataFrame)
+    assert isinstance(
+        y_train_all, (np.ndarray, pd.Series, pd.DataFrame)
     ), "y_train_all must be a numpy array or a pandas series or DataFrame."
 
     assert X_train_all.size != 0 and y_train_all.size != 0, "Input data must not be empty, use None if no data"
diff --git a/flaml/onlineml/README.md b/flaml/onlineml/README.md
index 36926ba16..0aa505e07 100644
--- a/flaml/onlineml/README.md
+++ b/flaml/onlineml/README.md
@@ -1,6 +1,6 @@
 # ChaCha for Online AutoML
 
-FLAML includes *ChaCha* which is an automatic hyperparameter tuning solution for online machine learning. Online machine learning has the following properties: (1) data comes in sequential order; and (2) the performance of the machine learning model is evaluated online, i.e., at every iteration. *ChaCha* performs online AutoML respecting the aforementioned properties of online learning, and at the same time respecting the following constraints: (1) only a small constant number of 'live' models are allowed to perform online learning at the same time;  and (2) no model persistence or offline training is allowed, which means that once we decide to replace a 'live' model with a new one, the replaced model can no longer be retrieved.
+FLAML includes *ChaCha* which is an automatic hyperparameter tuning solution for online machine learning. Online machine learning has the following properties: (1) data comes in sequential order; and (2) the performance of the machine learning model is evaluated online, i.e., at every iteration. *ChaCha* performs online AutoML respecting the aforementioned properties of online learning, and at the same time respecting the following constraints: (1) only a small constant number of 'live' models are allowed to perform online learning at the same time; and (2) no model persistence or offline training is allowed, which means that once we decide to replace a 'live' model with a new one, the replaced model can no longer be retrieved.
 
 For more technical details about *ChaCha*, please check our paper.
 
diff --git a/flaml/tune/searcher/flow2.py b/flaml/tune/searcher/flow2.py
index 4764c80d6..7ccf6fa6b 100644
--- a/flaml/tune/searcher/flow2.py
+++ b/flaml/tune/searcher/flow2.py
@@ -641,8 +641,10 @@ class FLOW2(Searcher):
             else:
                 # key must be in space
                 domain = space[key]
-                if self.hierarchical and not (
-                    domain is None or type(domain) in (str, int, float) or isinstance(domain, sample.Domain)
+                if (
+                    self.hierarchical
+                    and domain is not None
+                    and not isinstance(domain, (str, int, float, sample.Domain))
                 ):
                     # not domain or hashable
                     # get rid of list type for hierarchical search space.
diff --git a/flaml/tune/searcher/online_searcher.py b/flaml/tune/searcher/online_searcher.py
index 222875244..dfce8a75d 100644
--- a/flaml/tune/searcher/online_searcher.py
+++ b/flaml/tune/searcher/online_searcher.py
@@ -207,7 +207,7 @@ class ChampionFrontierSearcher(BaseSearcher):
                     hyperparameter_config_groups.append(partial_new_configs)
                     # does not have searcher_trial_ids
                     searcher_trial_ids_groups.append([])
-            elif isinstance(config_domain, Float) or isinstance(config_domain, Categorical):
+            elif isinstance(config_domain, (Float, Categorical)):
                 # otherwise we need to deal with them in group
                 nonpoly_config[k] = v
                 if k not in self._space_of_nonpoly_hp:
diff --git a/tutorials/flaml-tutorial-automl-24.md b/tutorials/flaml-tutorial-automl-24.md
index c20954b36..2c1337a14 100644
--- a/tutorials/flaml-tutorial-automl-24.md
+++ b/tutorials/flaml-tutorial-automl-24.md
@@ -4,7 +4,7 @@
 
 **Date and Time**: 09.09.2024, 15:30-17:00
 
-Location:  Sorbonne University, 4 place Jussieu, 75005 Paris
+Location: Sorbonne University, 4 place Jussieu, 75005 Paris
 
 Duration: 1.5 hours
 
diff --git a/tutorials/flaml-tutorial-pydata-23.md b/tutorials/flaml-tutorial-pydata-23.md
index b93147026..c85a00a0d 100644
--- a/tutorials/flaml-tutorial-pydata-23.md
+++ b/tutorials/flaml-tutorial-pydata-23.md
@@ -4,7 +4,7 @@
 
 **Date and Time**: 04-26, 09:00–10:30 PT.
 
-Location:  Microsoft Conference Center, Seattle, WA.
+Location: Microsoft Conference Center, Seattle, WA.
 
 Duration: 1.5 hours
 
diff --git a/website/docs/Best-Practices.md b/website/docs/Best-Practices.md
index ac925696e..a1e87f11c 100644
--- a/website/docs/Best-Practices.md
+++ b/website/docs/Best-Practices.md
@@ -79,7 +79,6 @@ from sklearn.datasets import load_iris
 from sklearn.model_selection import train_test_split
 from flaml import AutoML
 
-
 X, y = load_iris(return_X_y=True, as_frame=True)
 X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.2, random_state=42
@@ -110,7 +109,6 @@ from sklearn.datasets import load_iris
 from sklearn.model_selection import train_test_split
 from flaml import AutoML
 
-
 X, y = load_iris(return_X_y=True, as_frame=True)
 X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.2, random_state=42
diff --git a/website/docs/Contribute.md b/website/docs/Contribute.md
index fd0ef194c..0a390b31a 100644
--- a/website/docs/Contribute.md
+++ b/website/docs/Contribute.md
@@ -49,7 +49,7 @@ print(flaml.__version__)
 ```
 
 - Please ensure all **code snippets and error messages are formatted in
-  appropriate code blocks**.  See [Creating and highlighting code blocks](https://help.github.com/articles/creating-and-highlighting-code-blocks)
+  appropriate code blocks**. See [Creating and highlighting code blocks](https://help.github.com/articles/creating-and-highlighting-code-blocks)
   for more details.
 
 ## Becoming a Reviewer
@@ -88,7 +88,7 @@ Run `pre-commit install` to install pre-commit into your git hooks. Before you c
 
 ### Coverage
 
-Any code you commit should not decrease coverage. To run all unit tests, install the \[test\] option under FLAML/:
+Any code you commit should not decrease coverage. To run all unit tests, install the [test] option under FLAML/:
 
 ```bash
 pip install -e."[test]"
diff --git a/website/docs/Examples/AutoML-Classification.md b/website/docs/Examples/AutoML-Classification.md
index ab0ca32be..b47acb488 100644
--- a/website/docs/Examples/AutoML-Classification.md
+++ b/website/docs/Examples/AutoML-Classification.md
@@ -2,7 +2,7 @@
 
 ### Prerequisites
 
-Install the \[automl\] option.
+Install the [automl] option.
 
 ```bash
 pip install "flaml[automl]"
diff --git a/website/docs/Examples/AutoML-NLP.md b/website/docs/Examples/AutoML-NLP.md
index 6532985bb..4c17a428f 100644
--- a/website/docs/Examples/AutoML-NLP.md
+++ b/website/docs/Examples/AutoML-NLP.md
@@ -2,7 +2,7 @@
 
 ### Requirements
 
-This example requires GPU. Install the \[automl,hf\] option:
+This example requires GPU. Install the [automl,hf] option:
 
 ```python
 pip install "flaml[automl,hf]"
diff --git a/website/docs/Examples/AutoML-Rank.md b/website/docs/Examples/AutoML-Rank.md
index c3702e004..868e445e2 100644
--- a/website/docs/Examples/AutoML-Rank.md
+++ b/website/docs/Examples/AutoML-Rank.md
@@ -2,7 +2,7 @@
 
 ### Prerequisites
 
-Install the \[automl\] option.
+Install the [automl] option.
 
 ```bash
 pip install "flaml[automl]"
diff --git a/website/docs/Examples/AutoML-Regression.md b/website/docs/Examples/AutoML-Regression.md
index 84ca18f88..dd68b4b1f 100644
--- a/website/docs/Examples/AutoML-Regression.md
+++ b/website/docs/Examples/AutoML-Regression.md
@@ -2,7 +2,7 @@
 
 ### Prerequisites
 
-Install the \[automl\] option.
+Install the [automl] option.
 
 ```bash
 pip install "flaml[automl]"
diff --git a/website/docs/Examples/AutoML-Time series forecast.md b/website/docs/Examples/AutoML-Time series forecast.md
index 61ef7cf60..2214d9f0b 100644
--- a/website/docs/Examples/AutoML-Time series forecast.md	
+++ b/website/docs/Examples/AutoML-Time series forecast.md	
@@ -2,7 +2,7 @@
 
 ### Prerequisites
 
-Install the \[automl,ts_forecast\] option.
+Install the [automl,ts_forecast] option.
 
 ```bash
 pip install "flaml[automl,ts_forecast]"
diff --git a/website/docs/Examples/AutoML-for-LightGBM.md b/website/docs/Examples/AutoML-for-LightGBM.md
index e26918d32..ce5fb32ef 100644
--- a/website/docs/Examples/AutoML-for-LightGBM.md
+++ b/website/docs/Examples/AutoML-for-LightGBM.md
@@ -2,7 +2,7 @@
 
 ### Prerequisites for this example
 
-Install the \[automl\] option.
+Install the [automl] option.
 
 ```bash
 pip install "flaml[automl] matplotlib openml"
diff --git a/website/docs/Examples/AutoML-for-XGBoost.md b/website/docs/Examples/AutoML-for-XGBoost.md
index 2d264383c..072dab328 100644
--- a/website/docs/Examples/AutoML-for-XGBoost.md
+++ b/website/docs/Examples/AutoML-for-XGBoost.md
@@ -2,7 +2,7 @@
 
 ### Prerequisites for this example
 
-Install the \[automl\] option.
+Install the [automl] option.
 
 ```bash
 pip install "flaml[automl] matplotlib openml"
diff --git a/website/docs/Examples/Default-Flamlized.md b/website/docs/Examples/Default-Flamlized.md
index b89ffe7a8..4f2858b1d 100644
--- a/website/docs/Examples/Default-Flamlized.md
+++ b/website/docs/Examples/Default-Flamlized.md
@@ -6,7 +6,7 @@ Flamlized estimators automatically use data-dependent default hyperparameter con
 
 ### Prerequisites
 
-This example requires the \[autozero\] option.
+This example requires the [autozero] option.
 
 ```bash
 pip install flaml[autozero] lightgbm openml
diff --git a/website/docs/Examples/Integrate - AzureML.md b/website/docs/Examples/Integrate - AzureML.md
index 1a46ca624..85f643d5c 100644
--- a/website/docs/Examples/Integrate - AzureML.md	
+++ b/website/docs/Examples/Integrate - AzureML.md	
@@ -2,7 +2,7 @@ FLAML can be used together with AzureML. On top of that, using mlflow and ray is
 
 ### Prerequisites
 
-Install the \[automl,azureml\] option.
+Install the [automl,azureml] option.
 
 ```bash
 pip install "flaml[automl,azureml]"
diff --git a/website/docs/Examples/Integrate - Scikit-learn Pipeline.md b/website/docs/Examples/Integrate - Scikit-learn Pipeline.md
index 58657c76a..ec19e4e3c 100644
--- a/website/docs/Examples/Integrate - Scikit-learn Pipeline.md	
+++ b/website/docs/Examples/Integrate - Scikit-learn Pipeline.md	
@@ -2,7 +2,7 @@ As FLAML's AutoML module can be used a transformer in the Sklearn's pipeline we
 
 ### Prerequisites
 
-Install the \[automl\] option.
+Install the [automl] option.
 
 ```bash
 pip install "flaml[automl] openml"
diff --git a/website/docs/FAQ.md b/website/docs/FAQ.md
index 5f1477869..ef5a2ba70 100644
--- a/website/docs/FAQ.md
+++ b/website/docs/FAQ.md
@@ -8,7 +8,7 @@
 
 ### About `low_cost_partial_config` in `tune`.
 
-- Definition and purpose: The `low_cost_partial_config` is a dictionary of subset of the hyperparameter coordinates whose value corresponds to a configuration with known low-cost (i.e., low computation cost for training the corresponding model).  The concept of low/high-cost is meaningful in the case where a subset of the hyperparameters to tune directly affects the computation cost for training the model. For example, `n_estimators` and `max_leaves` are known to affect the training cost of tree-based learners. We call this subset of hyperparameters, *cost-related hyperparameters*. In such scenarios, if you are aware of low-cost configurations for the cost-related hyperparameters, you are recommended to set them as the `low_cost_partial_config`. Using the tree-based method example again, since we know that small `n_estimators` and  `max_leaves` generally correspond to simpler models and thus lower cost, we set `{'n_estimators': 4, 'max_leaves': 4}` as the `low_cost_partial_config` by default (note that `4` is the lower bound of search space for these two hyperparameters), e.g., in [LGBM](https://github.com/microsoft/FLAML/blob/main/flaml/model.py#L215).  Configuring `low_cost_partial_config` helps the search algorithms make more cost-efficient choices.
+- Definition and purpose: The `low_cost_partial_config` is a dictionary of subset of the hyperparameter coordinates whose value corresponds to a configuration with known low-cost (i.e., low computation cost for training the corresponding model). The concept of low/high-cost is meaningful in the case where a subset of the hyperparameters to tune directly affects the computation cost for training the model. For example, `n_estimators` and `max_leaves` are known to affect the training cost of tree-based learners. We call this subset of hyperparameters, *cost-related hyperparameters*. In such scenarios, if you are aware of low-cost configurations for the cost-related hyperparameters, you are recommended to set them as the `low_cost_partial_config`. Using the tree-based method example again, since we know that small `n_estimators` and `max_leaves` generally correspond to simpler models and thus lower cost, we set `{'n_estimators': 4, 'max_leaves': 4}` as the `low_cost_partial_config` by default (note that `4` is the lower bound of search space for these two hyperparameters), e.g., in [LGBM](https://github.com/microsoft/FLAML/blob/main/flaml/model.py#L215). Configuring `low_cost_partial_config` helps the search algorithms make more cost-efficient choices.
   In AutoML, the `low_cost_init_value` in `search_space()` function for each estimator serves the same role.
 
 - Usage in practice: It is recommended to configure it if there are cost-related hyperparameters in your tuning task and you happen to know the low-cost values for them, but it is not required (It is fine to leave it the default value, i.e., `None`).
diff --git a/website/docs/Installation.md b/website/docs/Installation.md
index af6a63aef..b971660e6 100644
--- a/website/docs/Installation.md
+++ b/website/docs/Installation.md
@@ -57,7 +57,7 @@ pip install "flaml[hf]"
 #### Notebook
 
 To run the [notebook examples](https://github.com/microsoft/FLAML/tree/main/notebook),
-install flaml with the \[notebook\] option:
+install flaml with the [notebook] option:
 
 ```bash
 pip install "flaml[notebook]"
diff --git a/website/docs/Use-Cases/Task-Oriented-AutoML.md b/website/docs/Use-Cases/Task-Oriented-AutoML.md
index 6f54146d7..48cffcede 100644
--- a/website/docs/Use-Cases/Task-Oriented-AutoML.md
+++ b/website/docs/Use-Cases/Task-Oriented-AutoML.md
@@ -144,7 +144,7 @@ The estimator list can contain one or more estimator names, each corresponding t
 - Built-in estimator.
   - 'lgbm': LGBMEstimator for task "classification", "regression", "rank", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, num_leaves, min_child_samples, learning_rate, log_max_bin (logarithm of (max_bin + 1) with base 2), colsample_bytree, reg_alpha, reg_lambda.
   - 'xgboost': XGBoostSkLearnEstimator for task "classification", "regression", "rank", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, max_leaves, min_child_weight, learning_rate, subsample, colsample_bylevel, colsample_bytree, reg_alpha, reg_lambda.
-  - 'xgb_limitdepth': XGBoostLimitDepthEstimator for task "classification", "regression", "rank", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators,  max_depth, min_child_weight, learning_rate, subsample, colsample_bylevel, colsample_bytree, reg_alpha, reg_lambda.
+  - 'xgb_limitdepth': XGBoostLimitDepthEstimator for task "classification", "regression", "rank", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, max_depth, min_child_weight, learning_rate, subsample, colsample_bylevel, colsample_bytree, reg_alpha, reg_lambda.
   - 'rf': RandomForestEstimator for task "classification", "regression", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, max_features, max_leaves, criterion (for classification only). Starting from v1.1.0,
     it uses a fixed random_state by default.
   - 'extra_tree': ExtraTreesEstimator for task "classification", "regression", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, max_features, max_leaves, criterion (for classification only). Starting from v1.1.0,
@@ -462,7 +462,7 @@ For both classification and regression tasks more advanced split configurations
 
 More in general, `split_type` can also be set as a custom splitter object, when `eval_method="cv"`. It needs to be an instance of a derived class of scikit-learn
 [KFold](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold)
-and have `split` and `get_n_splits` methods with the same signatures.  To disable shuffling, the splitter instance must contain the attribute `shuffle=False`.
+and have `split` and `get_n_splits` methods with the same signatures. To disable shuffling, the splitter instance must contain the attribute `shuffle=False`.
 
 ### Parallel tuning
 
@@ -740,7 +740,7 @@ If you want to get a sense of how much time is needed to find the best model, yo
 
 > INFO - Estimated sufficient time budget=145194s. Estimated necessary time budget=2118s.
 
-> INFO -  at 2.6s,  estimator lgbm's best error=0.4459,     best estimator lgbm's best error=0.4459
+> INFO - at 2.6s, estimator lgbm's best error=0.4459, best estimator lgbm's best error=0.4459
 
 You will see that the time to finish the first and cheapest trial is 2.6 seconds. The estimated necessary time budget is 2118 seconds, and the estimated sufficient time budget is 145194 seconds. Note that this is only an estimated range to help you decide your budget.
 
diff --git a/website/docs/Use-Cases/Tune-User-Defined-Function.md b/website/docs/Use-Cases/Tune-User-Defined-Function.md
index 8b19043aa..f565feef4 100644
--- a/website/docs/Use-Cases/Tune-User-Defined-Function.md
+++ b/website/docs/Use-Cases/Tune-User-Defined-Function.md
@@ -23,13 +23,13 @@ Related arguments:
 
 - `evaluation_function`: A user-defined evaluation function.
 - `metric`: A string of the metric name to optimize for.
-- `mode`:  A string in \['min', 'max'\] to specify the objective as minimization or maximization.
+- `mode`: A string in ['min', 'max'] to specify the objective as minimization or maximization.
 
 The first step is to specify your tuning objective.
 To do it, you should first specify your evaluation procedure (e.g., perform a machine learning model training and validation) with respect to the hyperparameters in a user-defined function `evaluation_function`.
 The function requires a hyperparameter configuration as input, and can simply return a metric value in a scalar or return a dictionary of metric name and metric value pairs.
 
-In the following code, we define an evaluation function with respect to two hyperparameters named `x` and `y` according to $obj := (x-85000)^2 - x/y$. Note that we use this toy example here for more accessible demonstration purposes. In real use cases, the evaluation function usually cannot be written in this closed form, but instead involves a black-box and expensive evaluation procedure.  Please check out [Tune HuggingFace](/docs/Examples/Tune-HuggingFace), [Tune PyTorch](/docs/Examples/Tune-PyTorch) and [Tune LightGBM](/docs/Getting-Started#tune-user-defined-function) for real examples of tuning tasks.
+In the following code, we define an evaluation function with respect to two hyperparameters named `x` and `y` according to $obj := (x-85000)^2 - x/y$. Note that we use this toy example here for more accessible demonstration purposes. In real use cases, the evaluation function usually cannot be written in this closed form, but instead involves a black-box and expensive evaluation procedure. Please check out [Tune HuggingFace](/docs/Examples/Tune-HuggingFace), [Tune PyTorch](/docs/Examples/Tune-PyTorch) and [Tune LightGBM](/docs/Getting-Started#tune-user-defined-function) for real examples of tuning tasks.
 
 ```python
 import time
@@ -72,7 +72,7 @@ Related arguments:
 
 The second step is to specify a search space of the hyperparameters through the argument `config`. In the search space, you need to specify valid values for your hyperparameters and can specify how these values are sampled (e.g., from a uniform distribution or a log-uniform distribution).
 
-In the following code example, we include a search space for the two hyperparameters `x` and `y` as introduced above. The valid values for both are integers in the range of \[1, 100000\]. The values for `x` are sampled uniformly in the specified range (using `tune.randint(lower=1, upper=100000)`), and the values for `y` are sampled uniformly in logarithmic space of the specified range (using `tune.lograndit(lower=1, upper=100000)`).
+In the following code example, we include a search space for the two hyperparameters `x` and `y` as introduced above. The valid values for both are integers in the range of [1, 100000]. The values for `x` are sampled uniformly in the specified range (using `tune.randint(lower=1, upper=100000)`), and the values for `y` are sampled uniformly in logarithmic space of the specified range (using `tune.lograndit(lower=1, upper=100000)`).
 
 ```python
 from flaml import tune
@@ -186,10 +186,10 @@ config = {
 Cost-related hyperparameters are a subset of the hyperparameters which directly affect the computation cost incurred in the evaluation of any hyperparameter configuration. For example, the number of estimators (`n_estimators`) and the maximum number of leaves (`max_leaves`) are known to affect the training cost of tree-based learners. So they are cost-related hyperparameters for tree-based learners.
 
 When cost-related hyperparameters exist, the evaluation cost in the search space is heterogeneous.
-In this case, designing a search space with proper ranges of the hyperparameter values is highly non-trivial. Classical tuning algorithms such as Bayesian optimization and random search are typically sensitive to such ranges.  It may take them a very high cost to find a good choice if the ranges are too large. And if the ranges are too small, the optimal choice(s) may not be included and thus not possible to be found. With our method, you can use a search space with larger ranges in the case of heterogeneous cost.
+In this case, designing a search space with proper ranges of the hyperparameter values is highly non-trivial. Classical tuning algorithms such as Bayesian optimization and random search are typically sensitive to such ranges. It may take them a very high cost to find a good choice if the ranges are too large. And if the ranges are too small, the optimal choice(s) may not be included and thus not possible to be found. With our method, you can use a search space with larger ranges in the case of heterogeneous cost.
 
 Our search algorithms are designed to finish the tuning process at a low total cost when the evaluation cost in the search space is heterogeneous.
-So in such scenarios, if you are aware of low-cost configurations for the cost-related hyperparameters, you are encouraged to set them as the `low_cost_partial_config`, which is a dictionary of a subset of the hyperparameter coordinates whose value corresponds to a configuration with known low cost.  Using the example of the tree-based methods again, since we know that small `n_estimators` and `max_leaves` generally correspond to simpler models and thus lower cost, we set `{'n_estimators': 4, 'max_leaves': 4}` as the `low_cost_partial_config` by default (note that 4 is the lower bound of search space for these two hyperparameters), e.g., in LGBM. Please find more details on how the algorithm works [here](#cfo-frugal-optimization-for-cost-related-hyperparameters).
+So in such scenarios, if you are aware of low-cost configurations for the cost-related hyperparameters, you are encouraged to set them as the `low_cost_partial_config`, which is a dictionary of a subset of the hyperparameter coordinates whose value corresponds to a configuration with known low cost. Using the example of the tree-based methods again, since we know that small `n_estimators` and `max_leaves` generally correspond to simpler models and thus lower cost, we set `{'n_estimators': 4, 'max_leaves': 4}` as the `low_cost_partial_config` by default (note that 4 is the lower bound of search space for these two hyperparameters), e.g., in LGBM. Please find more details on how the algorithm works [here](#cfo-frugal-optimization-for-cost-related-hyperparameters).
 
 In addition, if you are aware of the cost relationship between different categorical hyperparameter choices, you are encouraged to provide this information through `cat_hp_cost`. It also helps the search algorithm to reduce the total cost.
 
@@ -202,7 +202,7 @@ Related arguments:
 - `config_constraints` (optional): A list of config constraints to be satisfied.
 - `metric_constraints` (optional): A list of metric constraints to be satisfied. e.g., `['precision', '>=', 0.9]`.
 
-The third step is to specify constraints of the tuning task. One notable property of `flaml.tune` is that it is able to finish the tuning process (obtaining good results) within a required resource constraint. A user can either provide the resource constraint in terms of wall-clock time (in seconds) through the argument `time_budget_s`, or in terms of the number of trials through the argument `num_samples`.  The following example shows three use cases:
+The third step is to specify constraints of the tuning task. One notable property of `flaml.tune` is that it is able to finish the tuning process (obtaining good results) within a required resource constraint. A user can either provide the resource constraint in terms of wall-clock time (in seconds) through the argument `time_budget_s`, or in terms of the number of trials through the argument `num_samples`. The following example shows three use cases:
 
 ```python
 # Set a resource constraint of 60 seconds wall-clock time for the tuning.
@@ -295,8 +295,8 @@ Related arguments:
 
 Details about parallel tuning with Spark could be found [here](/docs/Examples/Integrate%20-%20Spark#parallel-spark-jobs).
 
-You can perform parallel tuning by specifying `use_ray=True` (requiring flaml\[ray\] option installed) or `use_spark=True`
-(requiring flaml\[spark\] option installed). You can also limit the amount of resources allocated per trial by specifying `resources_per_trial`,
+You can perform parallel tuning by specifying `use_ray=True` (requiring flaml[ray] option installed) or `use_spark=True`
+(requiring flaml[spark] option installed). You can also limit the amount of resources allocated per trial by specifying `resources_per_trial`,
 e.g., `resources_per_trial={'cpu': 2}` when `use_ray=True`.
 
 ```python
@@ -409,11 +409,11 @@ analysis = tune.run(
 
 You can find more details about this scheduler in [this paper](https://arxiv.org/pdf/1911.04706.pdf).
 
-#### 2. A scheduler of the  [`TrialScheduler`](https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#tune-schedulers) class from `ray.tune`.
+#### 2. A scheduler of the [`TrialScheduler`](https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#tune-schedulers) class from `ray.tune`.
 
 There is a handful of schedulers of this type implemented in `ray.tune`, for example, [ASHA](https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#asha-tune-schedulers-ashascheduler), [HyperBand](https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#tune-original-hyperband), [BOHB](https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#tune-scheduler-bohb), etc.
 
-To use this type of scheduler you can either (1) set `scheduler='asha'`, which will automatically create an  [ASHAScheduler](https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#asha-tune-schedulers-ashascheduler) instance using the provided inputs (`resource_attr`, `min_resource`, `max_resource`, and `reduction_factor`); or (2) create an instance by yourself and provided it via `scheduler`, as shown in the following code example,
+To use this type of scheduler you can either (1) set `scheduler='asha'`, which will automatically create an [ASHAScheduler](https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#asha-tune-schedulers-ashascheduler) instance using the provided inputs (`resource_attr`, `min_resource`, `max_resource`, and `reduction_factor`); or (2) create an instance by yourself and provided it via `scheduler`, as shown in the following code example,
 
 ```python
 #  require: pip install flaml[ray]
@@ -589,7 +589,7 @@ NOTE:
 
 ## Hyperparameter Optimization Algorithm
 
-To tune the hyperparameters toward your objective, you will want to use a hyperparameter optimization algorithm which can help suggest hyperparameters with better performance (regarding your objective). `flaml` offers two HPO methods: CFO and BlendSearch. `flaml.tune` uses BlendSearch by default when the option \[blendsearch\] is installed.
+To tune the hyperparameters toward your objective, you will want to use a hyperparameter optimization algorithm which can help suggest hyperparameters with better performance (regarding your objective). `flaml` offers two HPO methods: CFO and BlendSearch. `flaml.tune` uses BlendSearch by default when the option [blendsearch] is installed.
 
 <!-- ![png](images/CFO.png) | ![png](images/BlendSearch.png)
 :---:|:---: -->