diff --git a/flaml/automl.py b/flaml/automl.py
index 4e2772ed5..cff46230a 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -498,9 +498,9 @@ class AutoML(BaseEstimator):
 
         Args:
             metric: A string of the metric name or a function,
-                e.g., 'accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo',
-                'f1', 'micro_f1', 'macro_f1', 'log_loss', 'mae', 'mse', 'r2',
-                'mape'. Default is 'auto'.
+                e.g., 'accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_weighted',
+                'roc_auc_ovo_weighted', 'roc_auc_ovr_weighted', 'f1', 'micro_f1', 'macro_f1',
+                'log_loss', 'mae', 'mse', 'r2', 'mape'. Default is 'auto'.
                 If passing a customized metric function, the function needs to
                 have the following input arguments:
 
@@ -2172,9 +2172,9 @@ class AutoML(BaseEstimator):
                 dataframe and label are ignored;
                 If not, dataframe and label must be provided.
             metric: A string of the metric name or a function,
-                e.g., 'accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo',
-                'f1', 'micro_f1', 'macro_f1', 'log_loss', 'mae', 'mse', 'r2',
-                'mape'. Default is 'auto'.
+                e.g., 'accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_weighted',
+                'roc_auc_ovo_weighted', 'roc_auc_ovr_weighted', 'f1', 'micro_f1', 'macro_f1',
+                'log_loss', 'mae', 'mse', 'r2', 'mape'. Default is 'auto'.
                 If passing a customized metric function, the function needs to
                 have the following input arguments:
 
@@ -2699,6 +2699,9 @@ class AutoML(BaseEstimator):
                 "roc_auc",
                 "roc_auc_ovr",
                 "roc_auc_ovo",
+                "roc_auc_weighted",
+                "roc_auc_ovr_weighted",
+                "roc_auc_ovo_weighted",
                 "f1",
                 "ap",
                 "micro_f1",
diff --git a/flaml/ml.py b/flaml/ml.py
index e521c3239..31825308f 100644
--- a/flaml/ml.py
+++ b/flaml/ml.py
@@ -54,6 +54,9 @@ sklearn_metric_name_set = {
     "roc_auc",
     "roc_auc_ovr",
     "roc_auc_ovo",
+    "roc_auc_weighted",
+    "roc_auc_ovr_weighted",
+    "roc_auc_ovo_weighted",
     "log_loss",
     "mape",
     "f1",
@@ -239,8 +242,8 @@ def sklearn_metric_loss_score(
     Args:
         metric_name: A string of the metric name, one of
             'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'roc_auc_ovr',
-            'roc_auc_ovo', 'log_loss', 'mape', 'f1', 'ap', 'ndcg',
-            'micro_f1', 'macro_f1'.
+            'roc_auc_ovo', 'roc_auc_weighted', 'roc_auc_ovo_weighted', 'roc_auc_ovr_weighted',
+            'log_loss', 'mape', 'f1', 'ap', 'ndcg', 'micro_f1', 'macro_f1'.
         y_predict: A 1d or 2d numpy array of the predictions which can be
             used to calculate the metric. E.g., 2d for log_loss and 1d
             for others.
@@ -276,6 +279,26 @@ def sklearn_metric_loss_score(
         score = 1.0 - roc_auc_score(
             y_true, y_predict, sample_weight=sample_weight, multi_class="ovo"
         )
+    elif metric_name == "roc_auc_weighted":
+        score = 1.0 - roc_auc_score(
+            y_true, y_predict, sample_weight=sample_weight, average="weighted"
+        )
+    elif metric_name == "roc_auc_ovo_weighted":
+        score = 1.0 - roc_auc_score(
+            y_true,
+            y_predict,
+            sample_weight=sample_weight,
+            average="weighted",
+            multi_class="ovo",
+        )
+    elif metric_name == "roc_auc_ovr_weighted":
+        score = 1.0 - roc_auc_score(
+            y_true,
+            y_predict,
+            sample_weight=sample_weight,
+            average="weighted",
+            multi_class="ovr",
+        )
     elif "log_loss" == metric_name:
         score = log_loss(y_true, y_predict, labels=labels, sample_weight=sample_weight)
     elif "mape" == metric_name:
@@ -318,10 +341,17 @@ def sklearn_metric_loss_score(
 
 
 def get_y_pred(estimator, X, eval_metric, obj):
-    if eval_metric in ["roc_auc", "ap"] and "binary" in obj:
+    if eval_metric in ["roc_auc", "ap", "roc_auc_weighted"] and "binary" in obj:
         y_pred_classes = estimator.predict_proba(X)
         y_pred = y_pred_classes[:, 1] if y_pred_classes.ndim > 1 else y_pred_classes
-    elif eval_metric in ["log_loss", "roc_auc", "roc_auc_ovr", "roc_auc_ovo"]:
+    elif eval_metric in [
+        "log_loss",
+        "roc_auc",
+        "roc_auc_ovr",
+        "roc_auc_ovo",
+        "roc_auc_ovo_weighted",
+        "roc_auc_ovr_weighted",
+    ]:
         y_pred = estimator.predict_proba(X)
     else:
         y_pred = estimator.predict(X)
diff --git a/notebook/automl_classification.ipynb b/notebook/automl_classification.ipynb
index 378f026c1..c759a3c0a 100644
--- a/notebook/automl_classification.ipynb
+++ b/notebook/automl_classification.ipynb
@@ -112,12 +112,12 @@
    "source": [
     "settings = {\n",
     "    \"time_budget\": 600,  # total running time in seconds\n",
-    "    \"metric\": 'accuracy',  # can be: 'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'roc_auc_ovr',\n",
-    "                           # 'roc_auc_ovo', 'log_loss', 'mape', 'f1', 'ap', 'ndcg', 'micro_f1', 'macro_f1'\n",
+    "    \"metric\": 'accuracy', ",
+    "                        # check the documentation for options of metrics (https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#optimization-metric)\n",
     "    \"task\": 'classification',  # task type\n",
     "    \"log_file_name\": 'airlines_experiment.log',  # flaml log file\n",
     "    \"seed\": 7654321,    # random seed\n",
-    "}"
+    "}\n"
    ]
   },
   {
@@ -1269,7 +1269,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.9.15 64-bit",
+   "display_name": "Python 3.9.7 ('base')",
    "language": "python",
    "name": "python3"
   },
@@ -1283,11 +1283,11 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.15"
+   "version": "3.9.7"
   },
   "vscode": {
    "interpreter": {
-    "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1"
+    "hash": "e811209110f5aa4d8c2189eeb3ff7b9b4d146931cb9189ef6041ff71605c541d"
    }
   }
  },
diff --git a/notebook/integrate_azureml.ipynb b/notebook/integrate_azureml.ipynb
index 224ca2b53..b34f724fd 100644
--- a/notebook/integrate_azureml.ipynb
+++ b/notebook/integrate_azureml.ipynb
@@ -130,13 +130,13 @@
    "source": [
     "settings = {\n",
     "    \"time_budget\": 60,  # total running time in seconds\n",
-    "    \"metric\": 'accuracy',  # can be: 'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'roc_auc_ovr',\n",
-    "                           # 'roc_auc_ovo', 'log_loss', 'mape', 'f1', 'ap', 'ndcg', 'micro_f1', 'macro_f1'\n",
+    "    \"metric\": 'accuracy',  \n",
+    "                    # check the documentation for options of metrics (https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#optimization-metric)\n",
     "    \"estimator_list\": ['lgbm', 'rf', 'xgboost'],  # list of ML learners\n",
     "    \"task\": 'classification',  # task type    \n",
     "    \"sample\": False,  # whether to subsample training data\n",
     "    \"log_file_name\": 'airlines_experiment.log',  # flaml log file\n",
-    "}"
+    "}\n"
    ]
   },
   {
@@ -202,11 +202,9 @@
   }
  ],
  "metadata": {
-  "interpreter": {
-   "hash": "0cfea3304185a9579d09e0953576b57c8581e46e6ebc6dfeb681bc5a511f7544"
-  },
   "kernelspec": {
-   "display_name": "Python 3.8.0 64-bit ('blend': conda)",
+   "display_name": "Python 3.9.7 ('base')",
+   "language": "python",
    "name": "python3"
   },
   "language_info": {
@@ -219,7 +217,12 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.2"
+   "version": "3.9.7"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "e811209110f5aa4d8c2189eeb3ff7b9b4d146931cb9189ef6041ff71605c541d"
+   }
   }
  },
  "nbformat": 4,
diff --git a/test/automl/test_classification.py b/test/automl/test_classification.py
index 3ab6c8de0..19726029e 100644
--- a/test/automl/test_classification.py
+++ b/test/automl/test_classification.py
@@ -177,6 +177,23 @@ class TestClassification(unittest.TestCase):
         automl.fit(X, y, **automl_settings)
         del automl
 
+        automl = AutoML()
+        automl_settings = {
+            "time_budget": 3,
+            "task": "classification",
+            "n_jobs": 1,
+            "estimator_list": ["kneighbor"],
+            "eval_method": "cv",
+            "n_splits": 3,
+            "metric": "roc_auc_weighted",
+            "log_training_metric": True,
+            # "verbose": 4,
+            "ensemble": True,
+            "skip_transform": True,
+        }
+        automl.fit(X, y, **automl_settings)
+        del automl
+
     def test_binary(self):
         automl_experiment = AutoML()
         automl_settings = {
diff --git a/test/automl/test_multiclass.py b/test/automl/test_multiclass.py
index 9767a7beb..fb6451eef 100644
--- a/test/automl/test_multiclass.py
+++ b/test/automl/test_multiclass.py
@@ -318,6 +318,34 @@ class TestMultiClass(unittest.TestCase):
         X_train, y_train = load_iris(return_X_y=True)
         automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
 
+    def test_roc_auc_ovr_weighted(self):
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 1,
+            "metric": "roc_auc_ovr_weighted",
+            "task": "classification",
+            "log_file_name": "test/roc_auc_weighted.log",
+            "log_training_metric": True,
+            "n_jobs": 1,
+            "model_history": True,
+        }
+        X_train, y_train = load_iris(return_X_y=True)
+        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
+
+    def test_roc_auc_ovo_weighted(self):
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 1,
+            "metric": "roc_auc_ovo_weighted",
+            "task": "classification",
+            "log_file_name": "test/roc_auc_weighted.log",
+            "log_training_metric": True,
+            "n_jobs": 1,
+            "model_history": True,
+        }
+        X_train, y_train = load_iris(return_X_y=True)
+        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
+
     def test_sparse_matrix_classification(self):
         automl_experiment = AutoML()
         automl_settings = {
diff --git a/website/docs/Examples/Integrate - Scikit-learn Pipeline.md b/website/docs/Examples/Integrate - Scikit-learn Pipeline.md
index 9d20d26fb..c02ed6295 100644
--- a/website/docs/Examples/Integrate - Scikit-learn Pipeline.md	
+++ b/website/docs/Examples/Integrate - Scikit-learn Pipeline.md	
@@ -32,6 +32,7 @@ automl_pipeline = Pipeline([
 ])
 automl_pipeline
 ```
+
 ![png](images/pipeline.png)
 
 ### Run AutoML in the pipeline
@@ -39,7 +40,7 @@ automl_pipeline
 ```python
 automl_settings = {
     "time_budget": 60,  # total running time in seconds
-    "metric": "accuracy",  # primary metrics can be chosen from: ['accuracy','roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'f1','log_loss','mae','mse','r2']
+    "metric": "accuracy",  # primary metrics can be chosen from: ['accuracy', 'roc_auc', 'roc_auc_weighted', 'roc_auc_ovr', 'roc_auc_ovo', 'f1', 'log_loss', 'mae', 'mse', 'r2'] Check the documentation for more details (https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#optimization-metric)
     "task": "classification",  # task type
     "estimator_list": ["xgboost", "catboost", "lgbm"],
     "log_file_name": "airlines_experiment.log",  # flaml log file
@@ -61,4 +62,4 @@ print('Best accuracy on validation data: {0:.4g}'.format(1 - automl.best_loss))
 print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))
 ```
 
-[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/integrate_sklearn.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/integrate_sklearn.ipynb)
\ No newline at end of file
+[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/integrate_sklearn.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/integrate_sklearn.ipynb)
diff --git a/website/docs/Use-Cases/Task-Oriented-AutoML.md b/website/docs/Use-Cases/Task-Oriented-AutoML.md
index 05ec64cbd..66e11d62f 100644
--- a/website/docs/Use-Cases/Task-Oriented-AutoML.md
+++ b/website/docs/Use-Cases/Task-Oriented-AutoML.md
@@ -59,6 +59,9 @@ The optimization metric is specified via the `metric` argument. It can be either
     - 'roc_auc': minimize 1 - roc_auc_score. Default metric for binary classification.
     - 'roc_auc_ovr': minimize 1 - roc_auc_score with `multi_class="ovr"`.
     - 'roc_auc_ovo': minimize 1 - roc_auc_score with `multi_class="ovo"`.
+    - 'roc_auc_weighted': minimize 1 - roc_auc_score with `average="weighted"`.
+    - 'roc_auc_ovr_weighted': minimize 1 - roc_auc_score with `multi_class="ovr"` and `average="weighted"`.
+    - 'roc_auc_ovo_weighted': minimize 1 - roc_auc_score with `multi_class="ovo"` and `average="weighted"`.
     - 'f1': minimize 1 - f1_score.
     - 'micro_f1': minimize 1 - f1_score with `average="micro"`.
     - 'macro_f1': minimize 1 - f1_score with `average="macro"`.