From a0b318b12ee8288db54b674904655307f9e201c2 Mon Sep 17 00:00:00 2001 From: Chi Wang Date: Wed, 24 May 2023 16:55:04 -0700 Subject: [PATCH] create an automl option to remove unnecessary dependency for autogen and tune (#1007) * version update post release v1.2.2 * automl option * import pandas * remove automl.utils * default * test * type hint and version update * dependency update * link to open in colab * use packging.version to close #725 --------- Co-authored-by: Li Jiang Co-authored-by: Li Jiang --- flaml/automl/automl.py | 49 +- flaml/automl/data.py | 24 +- flaml/automl/ml.py | 56 +- flaml/automl/model.py | 46 +- flaml/automl/nlp/huggingface/training_args.py | 3 +- flaml/automl/nlp/huggingface/utils.py | 3 +- flaml/automl/spark/__init__.py | 32 + flaml/automl/spark/metrics.py | 46 +- flaml/automl/spark/utils.py | 86 +- flaml/automl/state.py | 41 +- flaml/automl/task/factory.py | 8 +- flaml/automl/task/generic_task.py | 69 +- flaml/automl/task/task.py | 38 +- flaml/automl/utils.py | 18 - flaml/config.py | 2 +- flaml/default/estimator.py | 28 +- flaml/default/suggest.py | 26 +- flaml/onlineml/trial.py | 6 +- flaml/tune/searcher/blendsearch.py | 4 +- flaml/tune/spark/utils.py | 14 +- flaml/tune/tune.py | 91 +- flaml/version.py | 2 +- notebook/autogen_chatgpt_gpt4.ipynb | 12 +- notebook/autogen_openai_completion.ipynb | 12 +- notebook/automl_classification.ipynb | 2003 ++++++++++++----- notebook/automl_lightgbm.ipynb | 12 +- notebook/automl_nlp.ipynb | 20 +- notebook/automl_synapseML.ipynb | 4 +- notebook/automl_time_series_forecast.ipynb | 5 +- notebook/automl_xgboost.ipynb | 7 +- notebook/integrate_azureml.ipynb | 7 +- notebook/integrate_sklearn.ipynb | 23 +- notebook/integrate_spark.ipynb | 2 +- notebook/research/acl2021.ipynb | 77 +- notebook/zeroshot_lightgbm.ipynb | 173 +- setup.py | 30 +- test/automl/test_utils.py | 20 - website/docs/Examples/AutoGen-OpenAI.md | 2 +- .../docs/Examples/AutoML-Classification.md | 7 + website/docs/Examples/AutoML-NLP.md | 4 +- website/docs/Examples/AutoML-Rank.md | 7 + website/docs/Examples/AutoML-Regression.md | 7 + .../Examples/AutoML-Time series forecast.md | 4 +- website/docs/Examples/AutoML-for-LightGBM.md | 6 +- website/docs/Examples/AutoML-for-XGBoost.md | 6 +- website/docs/Examples/Default-Flamlized.md | 14 +- website/docs/Examples/Integrate - AzureML.md | 4 +- .../Integrate - Scikit-learn Pipeline.md | 7 +- 48 files changed, 2013 insertions(+), 1154 deletions(-) delete mode 100644 flaml/automl/utils.py delete mode 100644 test/automl/test_utils.py diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py index e9861ae89..cdf17a37c 100644 --- a/flaml/automl/automl.py +++ b/flaml/automl/automl.py @@ -9,8 +9,6 @@ import sys from typing import Callable, List, Union, Optional from functools import partial import numpy as np -from sklearn.base import BaseEstimator -import pandas as pd import logging import json @@ -38,36 +36,18 @@ from flaml.automl.logger import logger, logger_formatter from flaml.automl.training_log import training_log_reader, training_log_writer from flaml.default import suggest_learner from flaml.version import __version__ as flaml_version +from flaml.automl.spark import psDataFrame, psSeries, DataFrame, Series from flaml.tune.spark.utils import check_spark, get_broadcast_data +ERROR = ( + DataFrame is None and ImportError("please install flaml[automl] option to use the flaml.automl package.") or None +) + try: - from flaml.automl.spark.utils import ( - train_test_split_pyspark, - unique_pandas_on_spark, - len_labels, - unique_value_first_index, - ) + from sklearn.base import BaseEstimator except ImportError: - train_test_split_pyspark = None - unique_pandas_on_spark = None - from flaml.automl.utils import ( - len_labels, - unique_value_first_index, - ) -try: - os.environ["PYARROW_IGNORE_TIMEZONE"] = "1" - import pyspark.pandas as ps - from pyspark.pandas import DataFrame as psDataFrame, Series as psSeries - from pyspark.pandas.config import set_option, reset_option -except ImportError: - ps = None - - class psDataFrame: - pass - - class psSeries: - pass - + BaseEstimator = object + ERROR = ERROR or ImportError("please install flaml[automl] option to use the flaml.automl package.") try: import mlflow @@ -78,7 +58,6 @@ try: from ray import __version__ as ray_version assert ray_version >= "1.10.0" - ray_available = True except (ImportError, AssertionError): ray_available = False @@ -346,6 +325,8 @@ class AutoML(BaseEstimator): FLAML will create nested runs. """ + if ERROR: + raise ERROR self._track_iter = 0 self._state = AutoMLState() self._state.learner_classes = {} @@ -540,8 +521,8 @@ class AutoML(BaseEstimator): def score( self, - X: Union[pd.DataFrame, psDataFrame], - y: Union[pd.Series, psSeries], + X: Union[DataFrame, psDataFrame], + y: Union[Series, psSeries], **kwargs, ): estimator = getattr(self, "_trained_estimator", None) @@ -555,7 +536,7 @@ class AutoML(BaseEstimator): def predict( self, - X: Union[np.array, pd.DataFrame, List[str], List[List[str]], psDataFrame], + X: Union[np.array, DataFrame, List[str], List[List[str]], psDataFrame], **pred_kwargs, ): """Predict label from features. @@ -574,7 +555,7 @@ class AutoML(BaseEstimator): the searched learners, such as per_device_eval_batch_size. ```python - multivariate_X_test = pd.DataFrame({ + multivariate_X_test = DataFrame({ 'timeStamp': pd.date_range(start='1/1/2022', end='1/07/2022'), 'categorical_col': ['yes', 'yes', 'no', 'no', 'yes', 'no', 'yes'], 'continuous_col': [105, 107, 120, 118, 110, 112, 115] @@ -596,7 +577,7 @@ class AutoML(BaseEstimator): if isinstance(y_pred, np.ndarray) and y_pred.ndim > 1 and isinstance(y_pred, np.ndarray): y_pred = y_pred.flatten() if self._label_transformer: - return self._label_transformer.inverse_transform(pd.Series(y_pred.astype(int))) + return self._label_transformer.inverse_transform(Series(y_pred.astype(int))) else: return y_pred diff --git a/flaml/automl/data.py b/flaml/automl/data.py index c2b22cf9f..46b03dfac 100644 --- a/flaml/automl/data.py +++ b/flaml/automl/data.py @@ -3,30 +3,16 @@ # * Licensed under the MIT License. See LICENSE file in the # * project root for license information. import numpy as np -from scipy.sparse import vstack, issparse -import pandas as pd -from pandas import DataFrame, Series - -from flaml.automl.training_log import training_log_reader - from datetime import datetime from typing import TYPE_CHECKING, Union - import os +from flaml.automl.training_log import training_log_reader +from flaml.automl.spark import ps, psDataFrame, psSeries, DataFrame, Series, pd try: - os.environ["PYARROW_IGNORE_TIMEZONE"] = "1" - import pyspark.pandas as ps - from pyspark.pandas import DataFrame as psDataFrame, Series as psSeries + from scipy.sparse import vstack, issparse except ImportError: - ps = None - - class psDataFrame: - pass - - class psSeries: - pass - + pass if TYPE_CHECKING: from flaml.automl.task import Task @@ -55,7 +41,6 @@ def load_openml_dataset(dataset_id, data_dir=None, random_state=0, dataset_forma y_train: A series or array of labels for training data. y_test: A series or array of labels for test data. """ - import os import openml import pickle from sklearn.model_selection import train_test_split @@ -108,7 +93,6 @@ def load_openml_task(task_id, data_dir): y_train: A series of labels for training data. y_test: A series of labels for test data. """ - import os import openml import pickle diff --git a/flaml/automl/ml.py b/flaml/automl/ml.py index d81587b3e..7040088f3 100644 --- a/flaml/automl/ml.py +++ b/flaml/automl/ml.py @@ -2,24 +2,9 @@ # * Copyright (c) FLAML authors. All rights reserved. # * Licensed under the MIT License. See LICENSE file in the # * project root for license information. -import os import time import numpy as np -import pandas as pd from typing import Union, Callable, TypeVar, Optional, Tuple - -from sklearn.metrics import ( - mean_squared_error, - r2_score, - roc_auc_score, - accuracy_score, - mean_absolute_error, - log_loss, - average_precision_score, - f1_score, - mean_absolute_percentage_error, - ndcg_score, -) from flaml.automl.model import ( XGBoostSklearnEstimator, XGBoost_TS, @@ -47,27 +32,26 @@ from flaml.automl.model import ( from flaml.automl.data import group_counts from flaml.automl.task.task import TS_FORECAST, Task from flaml.automl.model import BaseEstimator +from flaml.automl.spark import psDataFrame, psSeries, ERROR as SPARK_ERROR, Series try: - from flaml.automl.spark.utils import len_labels + from sklearn.metrics import ( + mean_squared_error, + r2_score, + roc_auc_score, + accuracy_score, + mean_absolute_error, + log_loss, + average_precision_score, + f1_score, + mean_absolute_percentage_error, + ndcg_score, + ) except ImportError: - from flaml.automl.utils import len_labels -try: - os.environ["PYARROW_IGNORE_TIMEZONE"] = "1" - from pyspark.sql.functions import col - import pyspark.pandas as ps - from pyspark.pandas import DataFrame as psDataFrame, Series as psSeries - from flaml.automl.spark.utils import to_pandas_on_spark, iloc_pandas_on_spark + pass + +if SPARK_ERROR is None: from flaml.automl.spark.metrics import spark_metric_loss_score -except ImportError: - ps = None - - class psDataFrame: - pass - - class psSeries: - pass - EstimatorSubclass = TypeVar("EstimatorSubclass", bound=BaseEstimator) @@ -209,7 +193,7 @@ def metric_loss_score( y_processed_true = [[labels[tr] for tr in each_list] for each_list in y_processed_true] elif metric in ("pearsonr", "spearmanr"): y_processed_true = ( - y_processed_true.to_list() if isinstance(y_processed_true, pd.Series) else list(y_processed_true) + y_processed_true.to_list() if isinstance(y_processed_true, Series) else list(y_processed_true) ) score_dict = metric.compute(predictions=y_processed_predict, references=y_processed_true) if "rouge" in metric_name: @@ -612,7 +596,7 @@ def train_estimator( return estimator, train_time -def norm_confusion_matrix(y_true: Union[np.array, pd.Series], y_pred: Union[np.array, pd.Series]): +def norm_confusion_matrix(y_true: Union[np.array, Series], y_pred: Union[np.array, Series]): """normalized confusion matrix. Args: @@ -631,8 +615,8 @@ def norm_confusion_matrix(y_true: Union[np.array, pd.Series], y_pred: Union[np.a def multi_class_curves( - y_true: Union[np.array, pd.Series], - y_pred_proba: Union[np.array, pd.Series], + y_true: Union[np.array, Series], + y_pred_proba: Union[np.array, Series], curve_func: Callable, ): """Binarize the data for multi-class tasks and produce ROC or precision-recall curves. diff --git a/flaml/automl/model.py b/flaml/automl/model.py index c32a89f35..b1be92f15 100644 --- a/flaml/automl/model.py +++ b/flaml/automl/model.py @@ -9,14 +9,8 @@ import os from typing import Callable, List, Union import numpy as np import time -from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier -from sklearn.ensemble import ExtraTreesRegressor, ExtraTreesClassifier -from sklearn.linear_model import LogisticRegression -from sklearn.dummy import DummyClassifier, DummyRegressor -from scipy.sparse import issparse import logging import shutil -from pandas import DataFrame, Series, to_datetime import sys import math from flaml import tune @@ -37,36 +31,28 @@ from flaml.automl.task.task import ( ) try: - from flaml.automl.spark.utils import len_labels, to_pandas_on_spark + from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier + from sklearn.ensemble import ExtraTreesRegressor, ExtraTreesClassifier + from sklearn.linear_model import LogisticRegression + from sklearn.dummy import DummyClassifier, DummyRegressor except ImportError: - from flaml.automl.utils import len_labels + pass - to_pandas_on_spark = None +try: + from scipy.sparse import issparse +except ImportError: + pass + +from flaml.automl.spark import psDataFrame, sparkDataFrame, psSeries, ERROR as SPARK_ERROR, DataFrame, Series +from flaml.automl.spark.utils import len_labels, to_pandas_on_spark from flaml.automl.spark.configs import ( ParamList_LightGBM_Classifier, ParamList_LightGBM_Regressor, ParamList_LightGBM_Ranker, ) -try: - os.environ["PYARROW_IGNORE_TIMEZONE"] = "1" - from pyspark.sql.dataframe import DataFrame as sparkDataFrame - from pyspark.sql import SparkSession - from pyspark.pandas import DataFrame as psDataFrame, Series as psSeries - - _have_spark = True -except ImportError: - _have_spark = False - - class psDataFrame: - pass - - class psSeries: - pass - - class sparkDataFrame: - pass - +if DataFrame is not None: + from pandas import to_datetime try: import psutil @@ -415,8 +401,8 @@ class SparkEstimator(BaseEstimator): """The base class for fine-tuning spark models, using pyspark.ml and SynapseML API.""" def __init__(self, task="binary", **config): - if not _have_spark: - raise ImportError("pyspark is not installed. Try `pip install flaml[spark]`.") + if SPARK_ERROR: + raise SPARK_ERROR super().__init__(task, **config) self.df_train = None diff --git a/flaml/automl/nlp/huggingface/training_args.py b/flaml/automl/nlp/huggingface/training_args.py index 60984e273..ec0856bad 100644 --- a/flaml/automl/nlp/huggingface/training_args.py +++ b/flaml/automl/nlp/huggingface/training_args.py @@ -1,8 +1,7 @@ import argparse from dataclasses import dataclass, field - -from flaml.automl.task.task import NLG_TASKS from typing import Optional, List +from flaml.automl.task.task import NLG_TASKS try: from transformers import TrainingArguments diff --git a/flaml/automl/nlp/huggingface/utils.py b/flaml/automl/nlp/huggingface/utils.py index 9f07be368..978674415 100644 --- a/flaml/automl/nlp/huggingface/utils.py +++ b/flaml/automl/nlp/huggingface/utils.py @@ -1,7 +1,5 @@ -import pandas as pd from itertools import chain import numpy as np - from flaml.automl.task.task import ( SUMMARIZATION, SEQREGRESSION, @@ -10,6 +8,7 @@ from flaml.automl.task.task import ( TOKENCLASSIFICATION, NLG_TASKS, ) +from flaml.automl.data import pd def todf(X, Y, column_name): diff --git a/flaml/automl/spark/__init__.py b/flaml/automl/spark/__init__.py index e69de29bb..19dca97d9 100644 --- a/flaml/automl/spark/__init__.py +++ b/flaml/automl/spark/__init__.py @@ -0,0 +1,32 @@ +import os + +os.environ["PYARROW_IGNORE_TIMEZONE"] = "1" +try: + import pyspark + import pyspark.pandas as ps + import pyspark.sql.functions as F + import pyspark.sql.types as T + from pyspark.sql import DataFrame as sparkDataFrame + from pyspark.pandas import DataFrame as psDataFrame, Series as psSeries, set_option + from pyspark.util import VersionUtils +except ImportError: + + class psDataFrame: + pass + + F = T = ps = sparkDataFrame = psSeries = psDataFrame + _spark_major_minor_version = set_option = None + ERROR = ImportError( + """Please run pip install flaml[spark] + and check [here](https://spark.apache.org/docs/latest/api/python/getting_started/install.html) + for more details about installing Spark.""" + ) +else: + ERROR = None + _spark_major_minor_version = VersionUtils.majorMinorVersion(pyspark.__version__) + +try: + import pandas as pd + from pandas import DataFrame, Series +except ImportError: + DataFrame = Series = pd = None diff --git a/flaml/automl/spark/metrics.py b/flaml/automl/spark/metrics.py index 6a5ae0c08..46de98ff3 100644 --- a/flaml/automl/spark/metrics.py +++ b/flaml/automl/spark/metrics.py @@ -1,28 +1,16 @@ -import logging -import os import numpy as np from typing import Union - -try: - os.environ["PYARROW_IGNORE_TIMEZONE"] = "1" - from pyspark.sql import DataFrame - import pyspark.pandas as ps - from pyspark.ml.evaluation import ( - BinaryClassificationEvaluator, - RegressionEvaluator, - MulticlassClassificationEvaluator, - MultilabelClassificationEvaluator, - RankingEvaluator, - ) - import pyspark.sql.functions as F -except ImportError: - msg = """use_spark=True requires installation of PySpark. Please run pip install flaml[spark] - and check [here](https://spark.apache.org/docs/latest/api/python/getting_started/install.html) - for more details about installing Spark.""" - raise ImportError(msg) +from flaml.automl.spark import psSeries, F +from pyspark.ml.evaluation import ( + BinaryClassificationEvaluator, + RegressionEvaluator, + MulticlassClassificationEvaluator, + MultilabelClassificationEvaluator, + RankingEvaluator, +) -def ps_group_counts(groups: Union[ps.Series, np.ndarray]) -> np.ndarray: +def ps_group_counts(groups: Union[psSeries, np.ndarray]) -> np.ndarray: if isinstance(groups, np.ndarray): _, i, c = np.unique(groups, return_counts=True, return_index=True) else: @@ -48,20 +36,20 @@ def _compute_label_from_probability(df, probability_col, prediction_col): def spark_metric_loss_score( metric_name: str, - y_predict: ps.Series, - y_true: ps.Series, - sample_weight: ps.Series = None, - groups: ps.Series = None, + y_predict: psSeries, + y_true: psSeries, + sample_weight: psSeries = None, + groups: psSeries = None, ) -> float: """ Compute the loss score of a metric for spark models. Args: metric_name: str | the name of the metric. - y_predict: ps.Series | the predicted values. - y_true: ps.Series | the true values. - sample_weight: ps.Series | the sample weights. Default: None. - groups: ps.Series | the group of each row. Default: None. + y_predict: psSeries | the predicted values. + y_true: psSeries | the true values. + sample_weight: psSeries | the sample weights. Default: None. + groups: psSeries | the group of each row. Default: None. Returns: float | the loss score. A lower value indicates a better model. diff --git a/flaml/automl/spark/utils.py b/flaml/automl/spark/utils.py index c3b48be8d..e2da0595f 100644 --- a/flaml/automl/spark/utils.py +++ b/flaml/automl/spark/utils.py @@ -1,37 +1,31 @@ import logging -import os from typing import Union, List, Optional, Tuple -import pandas as pd import numpy as np +from flaml.automl.spark import ( + sparkDataFrame, + ps, + F, + T, + psDataFrame, + psSeries, + _spark_major_minor_version, + DataFrame, + Series, + set_option, +) logger = logging.getLogger(__name__) logger_formatter = logging.Formatter( "[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s", "%m-%d %H:%M:%S" ) logger.propagate = False -try: - os.environ["PYARROW_IGNORE_TIMEZONE"] = "1" - from pyspark.sql import SparkSession - from pyspark.sql import DataFrame - import pyspark.pandas as ps - from pyspark.util import VersionUtils - import pyspark.sql.functions as F - import pyspark.sql.types as T - import pyspark - - _spark_major_minor_version = VersionUtils.majorMinorVersion(pyspark.__version__) -except ImportError: - msg = """use_spark=True requires installation of PySpark. Please run pip install flaml[spark] - and check [here](https://spark.apache.org/docs/latest/api/python/getting_started/install.html) - for more details about installing Spark.""" - raise ImportError(msg) def to_pandas_on_spark( - df: Union[pd.DataFrame, DataFrame, pd.Series, ps.DataFrame, ps.Series], + df: Union[DataFrame, sparkDataFrame, Series, psDataFrame, psSeries], index_col: Optional[str] = None, default_index_type: Optional[str] = "distributed-sequence", -) -> Union[ps.DataFrame, ps.Series]: +) -> Union[psDataFrame, psSeries]: """Convert pandas or pyspark dataframe/series to pandas_on_Spark dataframe/series. Args: @@ -46,7 +40,7 @@ def to_pandas_on_spark( import pandas as pd from flaml.automl.spark.utils import to_pandas_on_spark - pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + pdf = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) psdf = to_pandas_on_spark(pdf) print(psdf) @@ -57,33 +51,33 @@ def to_pandas_on_spark( psdf = to_pandas_on_spark(sdf) print(psdf) - pds = pd.Series([1, 2, 3]) + pds = Series([1, 2, 3]) pss = to_pandas_on_spark(pds) print(pss) ``` """ - ps.set_option("compute.default_index_type", default_index_type) - if isinstance(df, (pd.DataFrame, pd.Series)): + set_option("compute.default_index_type", default_index_type) + if isinstance(df, (DataFrame, Series)): return ps.from_pandas(df) - elif isinstance(df, DataFrame): + elif isinstance(df, sparkDataFrame): if _spark_major_minor_version[0] == 3 and _spark_major_minor_version[1] < 3: return df.to_pandas_on_spark(index_col=index_col) else: return df.pandas_api(index_col=index_col) - elif isinstance(df, (ps.DataFrame, ps.Series)): + elif isinstance(df, (psDataFrame, psSeries)): return df else: raise TypeError(f"{type(df)} is not one of pandas.DataFrame, pandas.Series and pyspark.sql.DataFrame") def train_test_split_pyspark( - df: Union[DataFrame, ps.DataFrame], + df: Union[sparkDataFrame, psDataFrame], stratify_column: Optional[str] = None, test_fraction: Optional[float] = 0.2, seed: Optional[int] = 1234, to_pandas_spark: Optional[bool] = True, index_col: Optional[str] = "tmp_index_col", -) -> Tuple[Union[DataFrame, ps.DataFrame], Union[DataFrame, ps.DataFrame]]: +) -> Tuple[Union[sparkDataFrame, psDataFrame], Union[sparkDataFrame, psDataFrame]]: """Split a pyspark dataframe into train and test dataframes. Args: @@ -98,7 +92,7 @@ def train_test_split_pyspark( pyspark.sql.DataFrame/pandas_on_spark DataFrame | The train dataframe. pyspark.sql.DataFrame/pandas_on_spark DataFrame | The test dataframe. """ - if isinstance(df, ps.DataFrame): + if isinstance(df, psDataFrame): df = df.to_spark(index_col=index_col) if stratify_column: @@ -123,9 +117,9 @@ def train_test_split_pyspark( return [df_train, df_test] -def unique_pandas_on_spark(psds: Union[ps.Series, ps.DataFrame]) -> Tuple[np.ndarray, np.ndarray]: +def unique_pandas_on_spark(psds: Union[psSeries, psDataFrame]) -> Tuple[np.ndarray, np.ndarray]: """Get the unique values and counts of a pandas_on_spark series.""" - if isinstance(psds, ps.DataFrame): + if isinstance(psds, psDataFrame): psds = psds.iloc[:, 0] _tmp = psds.value_counts().to_pandas() label_set = _tmp.index.values @@ -133,21 +127,21 @@ def unique_pandas_on_spark(psds: Union[ps.Series, ps.DataFrame]) -> Tuple[np.nda return label_set, counts -def len_labels(y: Union[ps.Series, np.ndarray], return_labels=False) -> Union[int, Optional[np.ndarray]]: +def len_labels(y: Union[psSeries, np.ndarray], return_labels=False) -> Union[int, Optional[np.ndarray]]: """Get the number of unique labels in y.""" - if not isinstance(y, (ps.DataFrame, ps.Series)): + if not isinstance(y, (psDataFrame, psSeries)): labels = np.unique(y) else: - labels = y.unique() if isinstance(y, ps.Series) else y.iloc[:, 0].unique() + labels = y.unique() if isinstance(y, psSeries) else y.iloc[:, 0].unique() if return_labels: return len(labels), labels return len(labels) -def unique_value_first_index(y: Union[pd.Series, ps.Series, np.ndarray]) -> Tuple[np.ndarray, np.ndarray]: +def unique_value_first_index(y: Union[Series, psSeries, np.ndarray]) -> Tuple[np.ndarray, np.ndarray]: """Get the unique values and indices of a pandas series, pandas_on_spark series or numpy array.""" - if isinstance(y, ps.Series): + if isinstance(y, psSeries): y_unique = y.drop_duplicates().sort_index() label_set = y_unique.values first_index = y_unique.index.values @@ -157,20 +151,20 @@ def unique_value_first_index(y: Union[pd.Series, ps.Series, np.ndarray]) -> Tupl def iloc_pandas_on_spark( - psdf: Union[ps.DataFrame, ps.Series, pd.DataFrame, pd.Series], + psdf: Union[psDataFrame, psSeries, DataFrame, Series], index: Union[int, slice, list], index_col: Optional[str] = "tmp_index_col", -) -> Union[ps.DataFrame, ps.Series]: +) -> Union[psDataFrame, psSeries]: """Get the rows of a pandas_on_spark dataframe/series by index.""" - if isinstance(psdf, (pd.DataFrame, pd.Series)): + if isinstance(psdf, (DataFrame, Series)): return psdf.iloc[index] if isinstance(index, (int, slice)): - if isinstance(psdf, ps.Series): + if isinstance(psdf, psSeries): return psdf.iloc[index] else: return psdf.iloc[index, :] elif isinstance(index, list): - if isinstance(psdf, ps.Series): + if isinstance(psdf, psSeries): sdf = psdf.to_frame().to_spark(index_col=index_col) else: if index_col not in psdf.columns: @@ -179,7 +173,7 @@ def iloc_pandas_on_spark( sdf = psdf.to_spark() sdfiloc = sdf.filter(F.col(index_col).isin(index)) psdfiloc = to_pandas_on_spark(sdfiloc) - if isinstance(psdf, ps.Series): + if isinstance(psdf, psSeries): psdfiloc = psdfiloc[psdfiloc.columns.drop(index_col)[0]] elif index_col not in psdf.columns: psdfiloc = psdfiloc.drop(columns=[index_col]) @@ -189,17 +183,17 @@ def iloc_pandas_on_spark( def spark_kFold( - dataset: Union[DataFrame, ps.DataFrame], + dataset: Union[sparkDataFrame, psDataFrame], nFolds: int = 3, foldCol: str = "", seed: int = 42, index_col: Optional[str] = "tmp_index_col", -) -> List[Tuple[ps.DataFrame, ps.DataFrame]]: +) -> List[Tuple[psDataFrame, psDataFrame]]: """Generate k-fold splits for a Spark DataFrame. Adopted from https://spark.apache.org/docs/latest/api/python/_modules/pyspark/ml/tuning.html#CrossValidator Args: - dataset: DataFrame / ps.DataFrame. | The DataFrame to split. + dataset: sparkDataFrame / psDataFrame. | The DataFrame to split. nFolds: int | The number of folds. Default is 3. foldCol: str | The column name to use for fold numbers. If not specified, the DataFrame will be randomly split. Default is "". @@ -213,7 +207,7 @@ def spark_kFold( Returns: A list of (train, validation) DataFrames. """ - if isinstance(dataset, ps.DataFrame): + if isinstance(dataset, psDataFrame): dataset = dataset.to_spark(index_col=index_col) datasets = [] diff --git a/flaml/automl/state.py b/flaml/automl/state.py index f739a7556..1597271a1 100644 --- a/flaml/automl/state.py +++ b/flaml/automl/state.py @@ -1,43 +1,12 @@ import inspect import time -import os from typing import Any, Optional - import numpy as np -import pandas as pd - from flaml import tune from flaml.automl.logger import logger from flaml.automl.ml import compute_estimator, train_estimator from flaml.automl.task.task import TS_FORECAST - -try: - from flaml.automl.spark.utils import ( - train_test_split_pyspark, - unique_pandas_on_spark, - len_labels, - unique_value_first_index, - ) -except ImportError: - train_test_split_pyspark = None - unique_pandas_on_spark = None - from flaml.automl.utils import ( - len_labels, - unique_value_first_index, - ) -try: - os.environ["PYARROW_IGNORE_TIMEZONE"] = "1" - import pyspark.pandas as ps - from pyspark.pandas import DataFrame as psDataFrame, Series as psSeries - from pyspark.pandas.config import set_option, reset_option -except ImportError: - ps = None - - class psDataFrame: - pass - - class psSeries: - pass +from flaml.automl.spark import psDataFrame, psSeries, DataFrame, Series class SearchState: @@ -245,11 +214,11 @@ class AutoMLState: def _prepare_sample_train_data(self, sample_size: int): sampled_weight = groups = None if sample_size <= self.data_size[0]: - if isinstance(self.X_train, (pd.DataFrame, psDataFrame)): + if isinstance(self.X_train, (DataFrame, psDataFrame)): sampled_X_train = self.X_train.iloc[:sample_size] else: sampled_X_train = self.X_train[:sample_size] - if isinstance(self.y_train, (pd.Series, psSeries)): + if isinstance(self.y_train, (Series, psSeries)): sampled_y_train = self.y_train.iloc[:sample_size] else: sampled_y_train = self.y_train[:sample_size] @@ -258,12 +227,12 @@ class AutoMLState: ) # NOTE: _prepare_sample_train_data is before kwargs is updated to fit_kwargs_by_estimator if weight is not None: sampled_weight = ( - weight.iloc[:sample_size] if isinstance(weight, (pd.Series, psSeries)) else weight[:sample_size] + weight.iloc[:sample_size] if isinstance(weight, (Series, psSeries)) else weight[:sample_size] ) if self.groups is not None: groups = ( self.groups.iloc[:sample_size] - if isinstance(self.groups, (pd.Series, psSeries)) + if isinstance(self.groups, (Series, psSeries)) else self.groups[:sample_size] ) else: diff --git a/flaml/automl/task/factory.py b/flaml/automl/task/factory.py index 55a45a66c..7974b9366 100644 --- a/flaml/automl/task/factory.py +++ b/flaml/automl/task/factory.py @@ -1,15 +1,13 @@ from typing import Optional, Union - import numpy as np -import pandas as pd - from flaml.automl.task.generic_task import GenericTask from flaml.automl.task.task import Task +from flaml.automl.data import DataFrame, Series def task_factory( task_name: str, - X_train: Optional[Union[np.ndarray, pd.DataFrame]] = None, - y_train: Optional[Union[np.ndarray, pd.DataFrame, pd.Series]] = None, + X_train: Optional[Union[np.ndarray, DataFrame]] = None, + y_train: Optional[Union[np.ndarray, DataFrame, Series]] = None, ) -> Task: return GenericTask(task_name, X_train, y_train) diff --git a/flaml/automl/task/generic_task.py b/flaml/automl/task/generic_task.py index 7c683a03a..7f480365b 100644 --- a/flaml/automl/task/generic_task.py +++ b/flaml/automl/task/generic_task.py @@ -1,22 +1,7 @@ -import os import logging import time from typing import List, Optional - -import pandas as pd import numpy as np -from scipy.sparse import issparse -from sklearn.utils import shuffle -from sklearn.model_selection import ( - train_test_split, - RepeatedStratifiedKFold, - RepeatedKFold, - GroupKFold, - TimeSeriesSplit, - GroupShuffleSplit, - StratifiedGroupKFold, -) - from flaml.automl.data import TS_TIMESTAMP_COL, concat from flaml.automl.ml import EstimatorSubclass, default_cv_score_agg_func, get_val_loss from flaml.automl.model import ( @@ -40,40 +25,34 @@ from flaml.automl.task.task import ( TS_FORECASTPANEL, ) from flaml.config import RANDOM_SEED +from flaml.automl.spark import ps, psDataFrame, psSeries, pd +from flaml.automl.spark.utils import ( + iloc_pandas_on_spark, + spark_kFold, + train_test_split_pyspark, + unique_pandas_on_spark, + unique_value_first_index, + len_labels, + set_option, +) try: - os.environ["PYARROW_IGNORE_TIMEZONE"] = "1" - from pyspark.sql.functions import col - import pyspark.pandas as ps - from pyspark.pandas import DataFrame as psDataFrame, Series as psSeries - from pyspark.pandas.config import set_option, reset_option - from flaml.automl.spark.utils import ( - to_pandas_on_spark, - iloc_pandas_on_spark, - spark_kFold, - train_test_split_pyspark, - unique_pandas_on_spark, - unique_value_first_index, - len_labels, - ) - from flaml.automl.spark.metrics import spark_metric_loss_score + from scipy.sparse import issparse except ImportError: - train_test_split_pyspark = None - unique_pandas_on_spark = None - iloc_pandas_on_spark = None - from flaml.automl.utils import ( - len_labels, - unique_value_first_index, + pass +try: + from sklearn.utils import shuffle + from sklearn.model_selection import ( + train_test_split, + RepeatedStratifiedKFold, + RepeatedKFold, + GroupKFold, + TimeSeriesSplit, + GroupShuffleSplit, + StratifiedGroupKFold, ) - - ps = None - - class psDataFrame: - pass - - class psSeries: - pass - +except ImportError: + pass logger = logging.getLogger(__name__) diff --git a/flaml/automl/task/task.py b/flaml/automl/task/task.py index 5c8ecee91..c833627b0 100644 --- a/flaml/automl/task/task.py +++ b/flaml/automl/task/task.py @@ -1,17 +1,11 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING, List, Optional, Tuple, Union - import numpy as np -import pandas as pd +from flaml.automl.data import DataFrame, Series, psDataFrame, psSeries if TYPE_CHECKING: import flaml -try: - import ray -except ImportError: - ray = None - # TODO: if your task is not specified in here, define your task as an all-capitalized word SEQCLASSIFICATION = "seq-classification" MULTICHOICECLASSIFICATION = "multichoice-classification" @@ -80,8 +74,8 @@ class Task(ABC): def __init__( self, task_name: str, - X_train: Optional[Union[np.ndarray, pd.DataFrame]] = None, - y_train: Optional[Union[np.ndarray, pd.DataFrame, pd.Series]] = None, + X_train: Optional[Union[np.ndarray, DataFrame, psDataFrame]] = None, + y_train: Optional[Union[np.ndarray, DataFrame, Series, psSeries]] = None, ): """Constructor. @@ -104,8 +98,8 @@ class Task(ABC): self, config: dict, estimator: "flaml.automl.ml.BaseEstimator", - X_train_all: Union[np.ndarray, pd.DataFrame], - y_train_all: Union[np.ndarray, pd.DataFrame, pd.Series], + X_train_all: Union[np.ndarray, DataFrame, psDataFrame], + y_train_all: Union[np.ndarray, DataFrame, Series, psSeries], budget: int, kf, eval_metric: str, @@ -136,12 +130,12 @@ class Task(ABC): self, automl: "flaml.automl.automl.AutoML", state: "flaml.automl.state.AutoMLState", - X_train_all: Union[np.ndarray, pd.DataFrame, None], - y_train_all: Union[np.ndarray, pd.DataFrame, pd.Series, None], - dataframe: Union[pd.DataFrame, None], + X_train_all: Union[np.ndarray, DataFrame, psDataFrame, None], + y_train_all: Union[np.ndarray, DataFrame, Series, psSeries, None], + dataframe: Union[DataFrame, None], label: str, - X_val: Optional[Union[np.ndarray, pd.DataFrame]] = None, - y_val: Optional[Union[np.ndarray, pd.DataFrame, pd.Series]] = None, + X_val: Optional[Union[np.ndarray, DataFrame, psDataFrame]] = None, + y_val: Optional[Union[np.ndarray, DataFrame, Series, psSeries]] = None, groups_val: Optional[List[str]] = None, groups: Optional[List[str]] = None, ): @@ -169,8 +163,8 @@ class Task(ABC): def prepare_data( self, state: "flaml.automl.state.AutoMLState", - X_train_all: Union[np.ndarray, pd.DataFrame], - y_train_all: Union[np.ndarray, pd.DataFrame, pd.Series, None], + X_train_all: Union[np.ndarray, DataFrame, psDataFrame], + y_train_all: Union[np.ndarray, DataFrame, Series, psSeries, None], auto_augment: bool, eval_method: str, split_type: str, @@ -203,7 +197,7 @@ class Task(ABC): For ranking task, must be "auto" or 'group'. split_ratio: A float of the valiation data percentage for holdout. n_splits: An integer of the number of folds for cross - validation. - data_is_df: True if the data was provided as a pd.DataFrame else False. + data_is_df: True if the data was provided as a DataFrame else False. sample_weight_full: A 1d arraylike of the sample weight. Raises: @@ -214,7 +208,7 @@ class Task(ABC): def decide_split_type( self, split_type: str, - y_train_all: Union[np.ndarray, pd.DataFrame, pd.Series, None], + y_train_all: Union[np.ndarray, DataFrame, Series, psSeries, None], fit_kwargs: dict, groups: Optional[List[str]] = None, ) -> str: @@ -240,9 +234,9 @@ class Task(ABC): @abstractmethod def preprocess( self, - X: Union[np.ndarray, pd.DataFrame], + X: Union[np.ndarray, DataFrame, psDataFrame], transformer: Optional["flaml.automl.data.DataTransformer"] = None, - ) -> Union[np.ndarray, pd.DataFrame]: + ) -> Union[np.ndarray, DataFrame]: """Preprocess the data ready for fitting or inference with this task type. Args: diff --git a/flaml/automl/utils.py b/flaml/automl/utils.py deleted file mode 100644 index e6322f272..000000000 --- a/flaml/automl/utils.py +++ /dev/null @@ -1,18 +0,0 @@ -from typing import Optional, Union, Tuple -import numpy as np - - -def len_labels(y: np.ndarray, return_labels=False) -> Union[int, Optional[np.ndarray]]: - """Get the number of unique labels in y. The non-spark version of - flaml.automl.spark.utils.len_labels""" - labels = np.unique(y) - if return_labels: - return len(labels), labels - return len(labels) - - -def unique_value_first_index(y: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: - """Get the unique values and indices of a pandas series or numpy array. - The non-spark version of flaml.automl.spark.utils.unique_value_first_index""" - label_set, first_index = np.unique(y, return_index=True) - return label_set, first_index diff --git a/flaml/config.py b/flaml/config.py index 97a990cc4..b23d5c547 100644 --- a/flaml/config.py +++ b/flaml/config.py @@ -1,5 +1,5 @@ """! - * Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved. + * Copyright (c) Microsoft Corporation. All rights reserved. * Licensed under the MIT License. """ diff --git a/flaml/default/estimator.py b/flaml/default/estimator.py index 67598702b..9056c7d5e 100644 --- a/flaml/default/estimator.py +++ b/flaml/default/estimator.py @@ -1,4 +1,3 @@ -import sklearn.ensemble as ensemble from functools import wraps from flaml.automl.task.task import CLASSIFICATION from .suggest import preprocess_and_suggest_hyperparams @@ -143,22 +142,31 @@ def flamlize_estimator(super_class, name: str, task: str, alternatives=None): return EstimatorClass -RandomForestRegressor = flamlize_estimator(ensemble.RandomForestRegressor, "rf", "regression") -RandomForestClassifier = flamlize_estimator(ensemble.RandomForestClassifier, "rf", "classification") -ExtraTreesRegressor = flamlize_estimator(ensemble.ExtraTreesRegressor, "extra_tree", "regression") -ExtraTreesClassifier = flamlize_estimator(ensemble.ExtraTreesClassifier, "extra_tree", "classification") +try: + import sklearn.ensemble as ensemble +except ImportError: + RandomForestClassifier = RandomForestRegressor = ExtraTreesClassifier = ExtraTreesRegressor = ImportError( + "Using flaml.default.* requires scikit-learn." + ) +else: + RandomForestRegressor = flamlize_estimator(ensemble.RandomForestRegressor, "rf", "regression") + RandomForestClassifier = flamlize_estimator(ensemble.RandomForestClassifier, "rf", "classification") + ExtraTreesRegressor = flamlize_estimator(ensemble.ExtraTreesRegressor, "extra_tree", "regression") + ExtraTreesClassifier = flamlize_estimator(ensemble.ExtraTreesClassifier, "extra_tree", "classification") try: import lightgbm - +except ImportError: + LGBMRegressor = LGBMClassifier = ImportError("Using flaml.default.LGBM* requires lightgbm.") +else: LGBMRegressor = flamlize_estimator(lightgbm.LGBMRegressor, "lgbm", "regression") LGBMClassifier = flamlize_estimator(lightgbm.LGBMClassifier, "lgbm", "classification") -except ImportError: - pass try: import xgboost - +except ImportError: + XGBClassifier = XGBRegressor = ImportError("Using flaml.default.XGB* requires xgboost.") +else: XGBRegressor = flamlize_estimator( xgboost.XGBRegressor, "xgb_limitdepth", @@ -171,5 +179,3 @@ try: "classification", [("max_depth", 0, "xgboost")], ) -except ImportError: - pass diff --git a/flaml/default/suggest.py b/flaml/default/suggest.py index ec0a706ad..12acc3678 100644 --- a/flaml/default/suggest.py +++ b/flaml/default/suggest.py @@ -1,41 +1,23 @@ -import os import numpy as np -from sklearn.neighbors import NearestNeighbors import logging import pathlib import json from flaml.automl.data import DataTransformer from flaml.automl.task.task import CLASSIFICATION, get_classification_objective +from flaml.automl.task.generic_task import len_labels from flaml.automl.ml import get_estimator_class from flaml.version import __version__ try: - from flaml.automl.spark.utils import len_labels + from sklearn.neighbors import NearestNeighbors except ImportError: - from flaml.automl.utils import len_labels -try: - os.environ["PYARROW_IGNORE_TIMEZONE"] = "1" - import pyspark.pandas as ps - from pyspark.pandas import DataFrame as psDataFrame, Series as psSeries -except ImportError: - ps = None - - class psDataFrame: - pass - - class psSeries: - pass - + pass LOCATION = pathlib.Path(__file__).parent.resolve() logger = logging.getLogger(__name__) CONFIG_PREDICTORS = {} -def version_parse(version): - return tuple(map(int, (version.split(".")))) - - def meta_feature(task, X_train, y_train, meta_feature_names): this_feature = [] n_row = X_train.shape[0] @@ -94,6 +76,8 @@ def suggest_config( The returned configs can be used as starting points for AutoML.fit(). `FLAML_sample_size` is removed from the configs. """ + from packaging.version import parse as version_parse + task = get_classification_objective(len_labels(y)) if task == "classification" and y is not None else task predictor = ( load_config_predictor(estimator_or_predictor, task, location) diff --git a/flaml/onlineml/trial.py b/flaml/onlineml/trial.py index 134211bc8..3c9223c9a 100644 --- a/flaml/onlineml/trial.py +++ b/flaml/onlineml/trial.py @@ -5,9 +5,13 @@ import math import copy import collections from typing import Optional, Union -from sklearn.metrics import mean_squared_error, mean_absolute_error from flaml.tune import Trial +try: + from sklearn.metrics import mean_squared_error, mean_absolute_error +except ImportError: + pass + logger = logging.getLogger(__name__) diff --git a/flaml/tune/searcher/blendsearch.py b/flaml/tune/searcher/blendsearch.py index b6e2fa76d..0d264fcbd 100644 --- a/flaml/tune/searcher/blendsearch.py +++ b/flaml/tune/searcher/blendsearch.py @@ -958,9 +958,7 @@ try: from nni.tuner import Tuner as NNITuner from nni.utils import extract_scalar_reward except ImportError: - - class NNITuner: - pass + NNITuner = object def extract_scalar_reward(x: Dict): return x.get("default") diff --git a/flaml/tune/spark/utils.py b/flaml/tune/spark/utils.py index c42b670e7..b6c2dbcd1 100644 --- a/flaml/tune/spark/utils.py +++ b/flaml/tune/spark/utils.py @@ -11,20 +11,19 @@ logger_formatter = logging.Formatter( "[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s", "%m-%d %H:%M:%S" ) logger.propagate = False +os.environ["PYARROW_IGNORE_TIMEZONE"] = "1" try: - os.environ["PYARROW_IGNORE_TIMEZONE"] = "1" import pyspark from pyspark.sql import SparkSession from pyspark.util import VersionUtils import py4j - - _have_spark = True - _spark_major_minor_version = VersionUtils.majorMinorVersion(pyspark.__version__) -except ImportError as e: - logger.debug("Could not import pyspark: %s", e) +except ImportError: _have_spark = False py4j = None _spark_major_minor_version = (0, 0) +else: + _have_spark = True + _spark_major_minor_version = VersionUtils.majorMinorVersion(pyspark.__version__) @lru_cache(maxsize=2) @@ -37,7 +36,7 @@ def check_spark(): Return (True, None) if the check passes, otherwise log the exception message and return (False, Exception(msg)). The exception can be raised by the caller. """ - logger.debug("\ncheck Spark installation...This line should appear only once.\n") + logger.debug("\nchecking Spark installation...This line should appear only once.\n") if not _have_spark: msg = """use_spark=True requires installation of PySpark. Please run pip install flaml[spark] and check [here](https://spark.apache.org/docs/latest/api/python/getting_started/install.html) @@ -51,7 +50,6 @@ def check_spark(): try: SparkSession.builder.getOrCreate() except RuntimeError as e: - # logger.warning(f"\nSparkSession is not available: {e}\n") return False, RuntimeError(e) return True, None diff --git a/flaml/tune/tune.py b/flaml/tune/tune.py index 4ad93b42c..37a91774e 100644 --- a/flaml/tune/tune.py +++ b/flaml/tune/tune.py @@ -15,16 +15,16 @@ try: assert ray_version >= "1.10.0" from ray.tune.analysis import ExperimentAnalysis as EA - - ray_available = True except (ImportError, AssertionError): ray_available = False from .analysis import ExperimentAnalysis as EA +else: + ray_available = True from .trial import Trial from .result import DEFAULT_METRIC import logging -from flaml.tune.spark.utils import PySparkOvertimeMonitor +from flaml.tune.spark.utils import PySparkOvertimeMonitor, check_spark logger = logging.getLogger(__name__) logger.propagate = False @@ -231,7 +231,7 @@ def run( n_concurrent_trials: Optional[int] = 0, **ray_args, ): - """The trigger for HPO. + """The function-based way of performing HPO. Example: @@ -612,8 +612,6 @@ def run( if use_spark: # parallel run with spark - from flaml.tune.spark.utils import check_spark - spark_available, spark_error_msg = check_spark() if not spark_available: raise spark_error_msg @@ -811,3 +809,84 @@ def run( _runner = old_runner logger.handlers = old_handlers logger.setLevel(old_level) + + +class Tuner: + """Tuner is the class-based way of launching hyperparameter tuning jobs compatible with Ray Tune 2. + + Args: + trainable: A user-defined evaluation function. + It takes a configuration as input, outputs a evaluation + result (can be a numerical value or a dictionary of string + and numerical value pairs) for the input configuration. + For machine learning tasks, it usually involves training and + scoring a machine learning model, e.g., through validation loss. + param_space: Search space of the tuning job. + One thing to note is that both preprocessor and dataset can be tuned here. + tune_config: Tuning algorithm specific configs. + Refer to ray.tune.tune_config.TuneConfig for more info. + run_config: Runtime configuration that is specific to individual trials. + If passed, this will overwrite the run config passed to the Trainer, + if applicable. Refer to ray.air.config.RunConfig for more info. + + Usage pattern: + + .. code-block:: python + + from sklearn.datasets import load_breast_cancer + + from ray import tune + from ray.data import from_pandas + from ray.air.config import RunConfig, ScalingConfig + from ray.train.xgboost import XGBoostTrainer + from ray.tune.tuner import Tuner + + def get_dataset(): + data_raw = load_breast_cancer(as_frame=True) + dataset_df = data_raw["data"] + dataset_df["target"] = data_raw["target"] + dataset = from_pandas(dataset_df) + return dataset + + trainer = XGBoostTrainer( + label_column="target", + params={}, + datasets={"train": get_dataset()}, + ) + + param_space = { + "scaling_config": ScalingConfig( + num_workers=tune.grid_search([2, 4]), + resources_per_worker={ + "CPU": tune.grid_search([1, 2]), + }, + ), + # You can even grid search various datasets in Tune. + # "datasets": { + # "train": tune.grid_search( + # [ds1, ds2] + # ), + # }, + "params": { + "objective": "binary:logistic", + "tree_method": "approx", + "eval_metric": ["logloss", "error"], + "eta": tune.loguniform(1e-4, 1e-1), + "subsample": tune.uniform(0.5, 1.0), + "max_depth": tune.randint(1, 9), + }, + } + tuner = Tuner(trainable=trainer, param_space=param_space, + run_config=RunConfig(name="my_tune_run")) + analysis = tuner.fit() + + To retry a failed tune run, you can then do + + .. code-block:: python + + tuner = Tuner.restore(experiment_checkpoint_dir) + tuner.fit() + + ``experiment_checkpoint_dir`` can be easily located near the end of the + console output of your first failed run. + """ diff --git a/flaml/version.py b/flaml/version.py index b3f9ac7f1..0cda2d108 100644 --- a/flaml/version.py +++ b/flaml/version.py @@ -1 +1 @@ -__version__ = "1.2.4" +__version__ = "2.0.0rc1" diff --git a/notebook/autogen_chatgpt_gpt4.ipynb b/notebook/autogen_chatgpt_gpt4.ipynb index 9e9db0cec..a8ca9917a 100644 --- a/notebook/autogen_chatgpt_gpt4.ipynb +++ b/notebook/autogen_chatgpt_gpt4.ipynb @@ -1,5 +1,13 @@ { "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"Open" + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -23,7 +31,7 @@ "\n", "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the [openai,blendsearch] option:\n", "```bash\n", - "pip install flaml[openai,blendsearch]==1.2.2\n", + "pip install flaml[openai,blendsearch]\n", "```" ] }, @@ -40,7 +48,7 @@ }, "outputs": [], "source": [ - "# %pip install flaml[openai,blendsearch]==1.2.2 datasets" + "# %pip install flaml[openai,blendsearch] datasets" ] }, { diff --git a/notebook/autogen_openai_completion.ipynb b/notebook/autogen_openai_completion.ipynb index 664f12184..780c17a01 100644 --- a/notebook/autogen_openai_completion.ipynb +++ b/notebook/autogen_openai_completion.ipynb @@ -1,5 +1,13 @@ { "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"Open" + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -23,7 +31,7 @@ "\n", "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the [autogen,blendsearch] option:\n", "```bash\n", - "pip install flaml[autogen,blendsearch]==1.2.2\n", + "pip install flaml[autogen,blendsearch]\n", "```" ] }, @@ -40,7 +48,7 @@ }, "outputs": [], "source": [ - "# %pip install flaml[autogen,blendsearch]==1.2.2 datasets" + "# %pip install flaml[autogen,blendsearch] datasets" ] }, { diff --git a/notebook/automl_classification.ipynb b/notebook/automl_classification.ipynb index 1ea9a0720..9e367630c 100644 --- a/notebook/automl_classification.ipynb +++ b/notebook/automl_classification.ipynb @@ -27,19 +27,19 @@ "\n", "In this notebook, we use one real data example (binary classification) to showcase how to use FLAML library.\n", "\n", - "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the `notebook` option:\n", + "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the `automl` option (this option is introduced from version 2, for version 1 it is installed by default):\n", "```bash\n", - "pip install flaml[notebook]==1.1.2\n", + "pip install flaml[automl]\n", "```" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "%pip install flaml[notebook]==1.1.2" + "# %pip install flaml[automl] matplotlib openml" ] }, { @@ -58,14 +58,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "slideshow": { "slide_type": "subslide" }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "download dataset from openml\n", + "Dataset name: airlines\n", + "X_train.shape: (404537, 7), y_train.shape: (404537,);\n", + "X_test.shape: (134846, 7), y_test.shape: (134846,)\n" + ] + } + ], "source": [ "from flaml.data import load_openml_dataset\n", "X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir='./')" @@ -73,9 +84,108 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AirlineFlightAirportFromAirportToDayOfWeekTimeLength
249392EV5309.0MDTATL3794.0131.0
166918CO1079.0IAHSAT5900.060.0
89110US1636.0CLECLT1530.0103.0
70258WN928.0CMHLAS7480.0280.0
492985WN729.0GEGLAS3630.0140.0
\n", + "
" + ], + "text/plain": [ + " Airline Flight AirportFrom AirportTo DayOfWeek Time Length\n", + "249392 EV 5309.0 MDT ATL 3 794.0 131.0\n", + "166918 CO 1079.0 IAH SAT 5 900.0 60.0\n", + "89110 US 1636.0 CLE CLT 1 530.0 103.0\n", + "70258 WN 928.0 CMH LAS 7 480.0 280.0\n", + "492985 WN 729.0 GEG LAS 3 630.0 140.0" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "X_train.head()" ] @@ -94,7 +204,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": { "slideshow": { "slide_type": "slide" @@ -109,7 +219,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "metadata": { "slideshow": { "slide_type": "slide" @@ -129,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 7, "metadata": { "slideshow": { "slide_type": "slide" @@ -139,335 +249,778 @@ ] }, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:20:40] {1663} INFO - task = classification\n", + "[flaml.automl.logger: 04-28 02:20:40] {1670} INFO - Data split method: stratified\n", + "[flaml.automl.logger: 04-28 02:20:40] {1673} INFO - Evaluation method: holdout\n", + "[flaml.automl.logger: 04-28 02:20:40] {1771} INFO - Minimizing error metric: 1-accuracy\n", + "[flaml.automl.logger: 04-28 02:20:41] {1881} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'lrl1']\n", + "[flaml.automl.logger: 04-28 02:20:41] {2191} INFO - iteration 0, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:20:41] {2317} INFO - Estimated sufficient time budget=44511s. Estimated necessary time budget=1093s.\n", + "[flaml.automl.logger: 04-28 02:20:41] {2364} INFO - at 1.2s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n", + "[flaml.automl.logger: 04-28 02:20:41] {2191} INFO - iteration 1, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:20:41] {2364} INFO - at 1.2s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n", + "[flaml.automl.logger: 04-28 02:20:41] {2191} INFO - iteration 2, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:20:41] {2364} INFO - at 1.2s,\testimator lgbm's best error=0.3614,\tbest estimator lgbm's best error=0.3614\n", + "[flaml.automl.logger: 04-28 02:20:41] {2191} INFO - iteration 3, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:20:41] {2364} INFO - at 1.3s,\testimator lgbm's best error=0.3614,\tbest estimator lgbm's best error=0.3614\n", + "[flaml.automl.logger: 04-28 02:20:41] {2191} INFO - iteration 4, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:20:41] {2364} INFO - at 1.4s,\testimator lgbm's best error=0.3614,\tbest estimator lgbm's best error=0.3614\n", + "[flaml.automl.logger: 04-28 02:20:41] {2191} INFO - iteration 5, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:20:41] {2364} INFO - at 1.6s,\testimator lgbm's best error=0.3614,\tbest estimator lgbm's best error=0.3614\n", + "[flaml.automl.logger: 04-28 02:20:41] {2191} INFO - iteration 6, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:20:41] {2364} INFO - at 1.7s,\testimator lgbm's best error=0.3614,\tbest estimator lgbm's best error=0.3614\n", + "[flaml.automl.logger: 04-28 02:20:41] {2191} INFO - iteration 7, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:20:41] {2364} INFO - at 1.8s,\testimator lgbm's best error=0.3614,\tbest estimator lgbm's best error=0.3614\n", + "[flaml.automl.logger: 04-28 02:20:41] {2191} INFO - iteration 8, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:20:41] {2364} INFO - at 1.9s,\testimator lgbm's best error=0.3550,\tbest estimator lgbm's best error=0.3550\n", + "[flaml.automl.logger: 04-28 02:20:41] {2191} INFO - iteration 9, current learner xgboost\n", + "[flaml.automl.logger: 04-28 02:20:42] {2364} INFO - at 2.0s,\testimator xgboost's best error=0.3787,\tbest estimator lgbm's best error=0.3550\n", + "[flaml.automl.logger: 04-28 02:20:42] {2191} INFO - iteration 10, current learner xgboost\n", + "[flaml.automl.logger: 04-28 02:20:42] {2364} INFO - at 2.0s,\testimator xgboost's best error=0.3746,\tbest estimator lgbm's best error=0.3550\n", + "[flaml.automl.logger: 04-28 02:20:42] {2191} INFO - iteration 11, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:20:42] {2364} INFO - at 2.1s,\testimator extra_tree's best error=0.3787,\tbest estimator lgbm's best error=0.3550\n", + "[flaml.automl.logger: 04-28 02:20:42] {2191} INFO - iteration 12, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:20:42] {2364} INFO - at 2.1s,\testimator extra_tree's best error=0.3787,\tbest estimator lgbm's best error=0.3550\n", + "[flaml.automl.logger: 04-28 02:20:42] {2191} INFO - iteration 13, current learner extra_tree\n" + ] + }, { "name": "stderr", "output_type": "stream", "text": [ - "[flaml.automl: 03-30 21:48:57] {2105} INFO - task = classification\n", - "[flaml.automl: 03-30 21:48:57] {2107} INFO - Data split method: stratified\n", - "[flaml.automl: 03-30 21:48:57] {2111} INFO - Evaluation method: holdout\n", - "[flaml.automl: 03-30 21:48:58] {2188} INFO - Minimizing error metric: 1-accuracy\n", - "[flaml.automl: 03-30 21:48:58] {2281} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'lrl1']\n", - "[flaml.automl: 03-30 21:48:58] {2567} INFO - iteration 0, current learner lgbm\n", - "[flaml.automl: 03-30 21:48:58] {2697} INFO - Estimated sufficient time budget=24546s. Estimated necessary time budget=603s.\n", - "[flaml.automl: 03-30 21:48:58] {2744} INFO - at 0.7s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n", - "[flaml.automl: 03-30 21:48:58] {2567} INFO - iteration 1, current learner lgbm\n", - "[flaml.automl: 03-30 21:48:58] {2744} INFO - at 0.8s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n", - "[flaml.automl: 03-30 21:48:58] {2567} INFO - iteration 2, current learner lgbm\n", - "[flaml.automl: 03-30 21:48:58] {2744} INFO - at 0.9s,\testimator lgbm's best error=0.3614,\tbest estimator lgbm's best error=0.3614\n", - "[flaml.automl: 03-30 21:48:58] {2567} INFO - iteration 3, current learner xgboost\n", - "[flaml.automl: 03-30 21:48:58] {2744} INFO - at 1.0s,\testimator xgboost's best error=0.3787,\tbest estimator lgbm's best error=0.3614\n", - "[flaml.automl: 03-30 21:48:58] {2567} INFO - iteration 4, current learner extra_tree\n", - "[flaml.automl: 03-30 21:48:58] {2744} INFO - at 1.1s,\testimator extra_tree's best error=0.3892,\tbest estimator lgbm's best error=0.3614\n", - "[flaml.automl: 03-30 21:48:58] {2567} INFO - iteration 5, current learner lgbm\n", - "[flaml.automl: 03-30 21:48:58] {2744} INFO - at 1.3s,\testimator lgbm's best error=0.3614,\tbest estimator lgbm's best error=0.3614\n", - "[flaml.automl: 03-30 21:48:58] {2567} INFO - iteration 6, current learner xgboost\n", - "[flaml.automl: 03-30 21:48:58] {2744} INFO - at 1.3s,\testimator xgboost's best error=0.3787,\tbest estimator lgbm's best error=0.3614\n", - "[flaml.automl: 03-30 21:48:58] {2567} INFO - iteration 7, current learner lgbm\n", - "[flaml.automl: 03-30 21:48:58] {2744} INFO - at 1.4s,\testimator lgbm's best error=0.3614,\tbest estimator lgbm's best error=0.3614\n", - "[flaml.automl: 03-30 21:48:58] {2567} INFO - iteration 8, current learner lgbm\n", - "[flaml.automl: 03-30 21:48:58] {2744} INFO - at 1.6s,\testimator lgbm's best error=0.3614,\tbest estimator lgbm's best error=0.3614\n", - "[flaml.automl: 03-30 21:48:58] {2567} INFO - iteration 9, current learner xgboost\n", - "[flaml.automl: 03-30 21:48:59] {2744} INFO - at 1.7s,\testimator xgboost's best error=0.3604,\tbest estimator xgboost's best error=0.3604\n", - "[flaml.automl: 03-30 21:48:59] {2567} INFO - iteration 10, current learner xgboost\n", - "[flaml.automl: 03-30 21:48:59] {2744} INFO - at 1.9s,\testimator xgboost's best error=0.3601,\tbest estimator xgboost's best error=0.3601\n", - "[flaml.automl: 03-30 21:48:59] {2567} INFO - iteration 11, current learner extra_tree\n", - "[flaml.automl: 03-30 21:48:59] {2744} INFO - at 2.0s,\testimator extra_tree's best error=0.3892,\tbest estimator xgboost's best error=0.3601\n", - "[flaml.automl: 03-30 21:48:59] {2567} INFO - iteration 12, current learner extra_tree\n", - "[flaml.automl: 03-30 21:48:59] {2744} INFO - at 2.1s,\testimator extra_tree's best error=0.3792,\tbest estimator xgboost's best error=0.3601\n", - "[flaml.automl: 03-30 21:48:59] {2567} INFO - iteration 13, current learner rf\n", - "[flaml.automl: 03-30 21:48:59] {2744} INFO - at 2.1s,\testimator rf's best error=0.3789,\tbest estimator xgboost's best error=0.3601\n", - "[flaml.automl: 03-30 21:48:59] {2567} INFO - iteration 14, current learner rf\n", - "[flaml.automl: 03-30 21:48:59] {2744} INFO - at 2.2s,\testimator rf's best error=0.3789,\tbest estimator xgboost's best error=0.3601\n", - "[flaml.automl: 03-30 21:48:59] {2567} INFO - iteration 15, current learner rf\n", - "[flaml.automl: 03-30 21:48:59] {2744} INFO - at 2.3s,\testimator rf's best error=0.3766,\tbest estimator xgboost's best error=0.3601\n", - "[flaml.automl: 03-30 21:48:59] {2567} INFO - iteration 16, current learner lgbm\n", - "[flaml.automl: 03-30 21:48:59] {2744} INFO - at 2.4s,\testimator lgbm's best error=0.3614,\tbest estimator xgboost's best error=0.3601\n", - "[flaml.automl: 03-30 21:48:59] {2567} INFO - iteration 17, current learner extra_tree\n", - "[flaml.automl: 03-30 21:48:59] {2744} INFO - at 2.5s,\testimator extra_tree's best error=0.3792,\tbest estimator xgboost's best error=0.3601\n", - "[flaml.automl: 03-30 21:48:59] {2567} INFO - iteration 18, current learner lgbm\n", - "[flaml.automl: 03-30 21:49:00] {2744} INFO - at 2.6s,\testimator lgbm's best error=0.3614,\tbest estimator xgboost's best error=0.3601\n", - "[flaml.automl: 03-30 21:49:00] {2567} INFO - iteration 19, current learner xgboost\n", - "[flaml.automl: 03-30 21:49:00] {2744} INFO - at 2.7s,\testimator xgboost's best error=0.3594,\tbest estimator xgboost's best error=0.3594\n", - "[flaml.automl: 03-30 21:49:00] {2567} INFO - iteration 20, current learner xgboost\n", - "[flaml.automl: 03-30 21:49:00] {2744} INFO - at 2.8s,\testimator xgboost's best error=0.3594,\tbest estimator xgboost's best error=0.3594\n", - "[flaml.automl: 03-30 21:49:00] {2567} INFO - iteration 21, current learner xgboost\n", - "[flaml.automl: 03-30 21:49:00] {2744} INFO - at 2.9s,\testimator xgboost's best error=0.3594,\tbest estimator xgboost's best error=0.3594\n", - "[flaml.automl: 03-30 21:49:00] {2567} INFO - iteration 22, current learner lgbm\n", - "[flaml.automl: 03-30 21:49:00] {2744} INFO - at 3.1s,\testimator lgbm's best error=0.3614,\tbest estimator xgboost's best error=0.3594\n", - "[flaml.automl: 03-30 21:49:00] {2567} INFO - iteration 23, current learner lgbm\n", - "[flaml.automl: 03-30 21:49:00] {2744} INFO - at 3.3s,\testimator lgbm's best error=0.3550,\tbest estimator lgbm's best error=0.3550\n", - "[flaml.automl: 03-30 21:49:00] {2567} INFO - iteration 24, current learner extra_tree\n", - "[flaml.automl: 03-30 21:49:00] {2744} INFO - at 3.4s,\testimator extra_tree's best error=0.3792,\tbest estimator lgbm's best error=0.3550\n", - "[flaml.automl: 03-30 21:49:00] {2567} INFO - iteration 25, current learner extra_tree\n", - "[flaml.automl: 03-30 21:49:00] {2744} INFO - at 3.5s,\testimator extra_tree's best error=0.3792,\tbest estimator lgbm's best error=0.3550\n", - "[flaml.automl: 03-30 21:49:00] {2567} INFO - iteration 26, current learner lgbm\n", - "[flaml.automl: 03-30 21:49:01] {2744} INFO - at 3.7s,\testimator lgbm's best error=0.3550,\tbest estimator lgbm's best error=0.3550\n", - "[flaml.automl: 03-30 21:49:01] {2567} INFO - iteration 27, current learner xgboost\n", - "[flaml.automl: 03-30 21:49:01] {2744} INFO - at 3.8s,\testimator xgboost's best error=0.3594,\tbest estimator lgbm's best error=0.3550\n", - "[flaml.automl: 03-30 21:49:01] {2567} INFO - iteration 28, current learner extra_tree\n", - "[flaml.automl: 03-30 21:49:01] {2744} INFO - at 3.9s,\testimator extra_tree's best error=0.3792,\tbest estimator lgbm's best error=0.3550\n", - "[flaml.automl: 03-30 21:49:01] {2567} INFO - iteration 29, current learner extra_tree\n", - "[flaml.automl: 03-30 21:49:01] {2744} INFO - at 4.0s,\testimator extra_tree's best error=0.3792,\tbest estimator lgbm's best error=0.3550\n", - "[flaml.automl: 03-30 21:49:01] {2567} INFO - iteration 30, current learner lgbm\n", - "[flaml.automl: 03-30 21:49:01] {2744} INFO - at 4.5s,\testimator lgbm's best error=0.3545,\tbest estimator lgbm's best error=0.3545\n", - "[flaml.automl: 03-30 21:49:01] {2567} INFO - iteration 31, current learner lgbm\n", - "[flaml.automl: 03-30 21:49:02] {2744} INFO - at 4.8s,\testimator lgbm's best error=0.3545,\tbest estimator lgbm's best error=0.3545\n", - "[flaml.automl: 03-30 21:49:02] {2567} INFO - iteration 32, current learner lgbm\n", - "[flaml.automl: 03-30 21:49:03] {2744} INFO - at 6.1s,\testimator lgbm's best error=0.3545,\tbest estimator lgbm's best error=0.3545\n", - "[flaml.automl: 03-30 21:49:03] {2567} INFO - iteration 33, current learner catboost\n", - "[flaml.automl: 03-30 21:49:08] {2744} INFO - at 10.6s,\testimator catboost's best error=0.3587,\tbest estimator lgbm's best error=0.3545\n", - "[flaml.automl: 03-30 21:49:08] {2567} INFO - iteration 34, current learner extra_tree\n", - "[flaml.automl: 03-30 21:49:09] {2744} INFO - at 11.8s,\testimator extra_tree's best error=0.3792,\tbest estimator lgbm's best error=0.3545\n", - "[flaml.automl: 03-30 21:49:09] {2567} INFO - iteration 35, current learner lgbm\n", - "[flaml.automl: 03-30 21:49:10] {2744} INFO - at 13.0s,\testimator lgbm's best error=0.3536,\tbest estimator lgbm's best error=0.3536\n", - "[flaml.automl: 03-30 21:49:10] {2567} INFO - iteration 36, current learner rf\n", - "[flaml.automl: 03-30 21:49:10] {2744} INFO - at 13.5s,\testimator rf's best error=0.3766,\tbest estimator lgbm's best error=0.3536\n", - "[flaml.automl: 03-30 21:49:10] {2567} INFO - iteration 37, current learner extra_tree\n", - "[flaml.automl: 03-30 21:49:15] {2744} INFO - at 18.6s,\testimator extra_tree's best error=0.3792,\tbest estimator lgbm's best error=0.3536\n", - "[flaml.automl: 03-30 21:49:15] {2567} INFO - iteration 38, current learner catboost\n", - "[flaml.automl: 03-30 21:49:17] {2744} INFO - at 20.0s,\testimator catboost's best error=0.3587,\tbest estimator lgbm's best error=0.3536\n", - "[flaml.automl: 03-30 21:49:17] {2567} INFO - iteration 39, current learner catboost\n", - "[flaml.automl: 03-30 21:49:25] {2744} INFO - at 28.2s,\testimator catboost's best error=0.3587,\tbest estimator lgbm's best error=0.3536\n", - "[flaml.automl: 03-30 21:49:25] {2567} INFO - iteration 40, current learner lgbm\n", - "[flaml.automl: 03-30 21:49:28] {2744} INFO - at 30.7s,\testimator lgbm's best error=0.3536,\tbest estimator lgbm's best error=0.3536\n", - "[flaml.automl: 03-30 21:49:28] {2567} INFO - iteration 41, current learner catboost\n", - "[flaml.automl: 03-30 21:49:36] {2744} INFO - at 38.9s,\testimator catboost's best error=0.3587,\tbest estimator lgbm's best error=0.3536\n", - "[flaml.automl: 03-30 21:49:36] {2567} INFO - iteration 42, current learner xgboost\n", - "[flaml.automl: 03-30 21:49:37] {2744} INFO - at 40.4s,\testimator xgboost's best error=0.3594,\tbest estimator lgbm's best error=0.3536\n", - "[flaml.automl: 03-30 21:49:37] {2567} INFO - iteration 43, current learner lgbm\n", - "[flaml.automl: 03-30 21:49:41] {2744} INFO - at 44.3s,\testimator lgbm's best error=0.3536,\tbest estimator lgbm's best error=0.3536\n", - "[flaml.automl: 03-30 21:49:41] {2567} INFO - iteration 44, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:49:42] {2744} INFO - at 44.7s,\testimator xgb_limitdepth's best error=0.3630,\tbest estimator lgbm's best error=0.3536\n", - "[flaml.automl: 03-30 21:49:42] {2567} INFO - iteration 45, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:49:43] {2744} INFO - at 46.1s,\testimator xgb_limitdepth's best error=0.3630,\tbest estimator lgbm's best error=0.3536\n", - "[flaml.automl: 03-30 21:49:43] {2567} INFO - iteration 46, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:49:45] {2744} INFO - at 47.7s,\testimator xgb_limitdepth's best error=0.3630,\tbest estimator lgbm's best error=0.3536\n", - "[flaml.automl: 03-30 21:49:45] {2567} INFO - iteration 47, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:49:46] {2744} INFO - at 49.4s,\testimator xgb_limitdepth's best error=0.3572,\tbest estimator lgbm's best error=0.3536\n", - "[flaml.automl: 03-30 21:49:46] {2567} INFO - iteration 48, current learner lgbm\n", - "[flaml.automl: 03-30 21:49:48] {2744} INFO - at 51.1s,\testimator lgbm's best error=0.3528,\tbest estimator lgbm's best error=0.3528\n", - "[flaml.automl: 03-30 21:49:48] {2567} INFO - iteration 49, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:49:49] {2744} INFO - at 51.9s,\testimator xgb_limitdepth's best error=0.3521,\tbest estimator xgb_limitdepth's best error=0.3521\n", - "[flaml.automl: 03-30 21:49:49] {2567} INFO - iteration 50, current learner catboost\n", - "[flaml.automl: 03-30 21:50:01] {2744} INFO - at 63.7s,\testimator catboost's best error=0.3499,\tbest estimator catboost's best error=0.3499\n", - "[flaml.automl: 03-30 21:50:01] {2567} INFO - iteration 51, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:50:01] {2744} INFO - at 64.0s,\testimator xgb_limitdepth's best error=0.3521,\tbest estimator catboost's best error=0.3499\n", - "[flaml.automl: 03-30 21:50:01] {2567} INFO - iteration 52, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:50:06] {2744} INFO - at 68.7s,\testimator xgb_limitdepth's best error=0.3521,\tbest estimator catboost's best error=0.3499\n", - "[flaml.automl: 03-30 21:50:06] {2567} INFO - iteration 53, current learner catboost\n", - "[flaml.automl: 03-30 21:50:13] {2744} INFO - at 75.6s,\testimator catboost's best error=0.3481,\tbest estimator catboost's best error=0.3481\n", - "[flaml.automl: 03-30 21:50:13] {2567} INFO - iteration 54, current learner catboost\n", - "[flaml.automl: 03-30 21:50:41] {2744} INFO - at 104.5s,\testimator catboost's best error=0.3481,\tbest estimator catboost's best error=0.3481\n", - "[flaml.automl: 03-30 21:50:41] {2567} INFO - iteration 55, current learner rf\n", - "[flaml.automl: 03-30 21:50:42] {2744} INFO - at 104.7s,\testimator rf's best error=0.3766,\tbest estimator catboost's best error=0.3481\n", - "[flaml.automl: 03-30 21:50:42] {2567} INFO - iteration 56, current learner lgbm\n", - "[flaml.automl: 03-30 21:50:54] {2744} INFO - at 117.2s,\testimator lgbm's best error=0.3405,\tbest estimator lgbm's best error=0.3405\n", - "[flaml.automl: 03-30 21:50:54] {2567} INFO - iteration 57, current learner lrl1\n", - "/home/ec2-user/miniconda3/envs/myflaml/lib/python3.8/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", - " warnings.warn(\"The max_iter was reached which means \"\n", - "[flaml.automl: 03-30 21:50:54] {2744} INFO - at 117.5s,\testimator lrl1's best error=0.4338,\tbest estimator lgbm's best error=0.3405\n", - "[flaml.automl: 03-30 21:50:54] {2567} INFO - iteration 58, current learner lrl1\n", - "/home/ec2-user/miniconda3/envs/myflaml/lib/python3.8/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", - " warnings.warn(\"The max_iter was reached which means \"\n", - "[flaml.automl: 03-30 21:50:55] {2744} INFO - at 117.7s,\testimator lrl1's best error=0.4337,\tbest estimator lgbm's best error=0.3405\n", - "[flaml.automl: 03-30 21:50:55] {2567} INFO - iteration 59, current learner lgbm\n", - "[flaml.automl: 03-30 21:50:56] {2744} INFO - at 119.6s,\testimator lgbm's best error=0.3405,\tbest estimator lgbm's best error=0.3405\n", - "[flaml.automl: 03-30 21:50:56] {2567} INFO - iteration 60, current learner lgbm\n", - "[flaml.automl: 03-30 21:51:02] {2744} INFO - at 124.7s,\testimator lgbm's best error=0.3370,\tbest estimator lgbm's best error=0.3370\n", - "[flaml.automl: 03-30 21:51:02] {2567} INFO - iteration 61, current learner lgbm\n", - "[flaml.automl: 03-30 21:51:07] {2744} INFO - at 130.3s,\testimator lgbm's best error=0.3370,\tbest estimator lgbm's best error=0.3370\n", - "[flaml.automl: 03-30 21:51:07] {2567} INFO - iteration 62, current learner lgbm\n", - "[flaml.automl: 03-30 21:51:12] {2744} INFO - at 134.7s,\testimator lgbm's best error=0.3370,\tbest estimator lgbm's best error=0.3370\n", - "[flaml.automl: 03-30 21:51:12] {2567} INFO - iteration 63, current learner rf\n", - "[flaml.automl: 03-30 21:51:12] {2744} INFO - at 135.0s,\testimator rf's best error=0.3755,\tbest estimator lgbm's best error=0.3370\n", - "[flaml.automl: 03-30 21:51:12] {2567} INFO - iteration 64, current learner lgbm\n", - "[flaml.automl: 03-30 21:51:18] {2744} INFO - at 141.5s,\testimator lgbm's best error=0.3318,\tbest estimator lgbm's best error=0.3318\n", - "[flaml.automl: 03-30 21:51:18] {2567} INFO - iteration 65, current learner rf\n", - "[flaml.automl: 03-30 21:51:19] {2744} INFO - at 141.8s,\testimator rf's best error=0.3755,\tbest estimator lgbm's best error=0.3318\n", - "[flaml.automl: 03-30 21:51:19] {2567} INFO - iteration 66, current learner extra_tree\n", - "[flaml.automl: 03-30 21:51:20] {2744} INFO - at 143.4s,\testimator extra_tree's best error=0.3777,\tbest estimator lgbm's best error=0.3318\n", - "[flaml.automl: 03-30 21:51:20] {2567} INFO - iteration 67, current learner lgbm\n", - "[flaml.automl: 03-30 21:51:26] {2744} INFO - at 148.6s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:51:26] {2567} INFO - iteration 68, current learner lgbm\n", - "[flaml.automl: 03-30 21:51:32] {2744} INFO - at 155.3s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:51:32] {2567} INFO - iteration 69, current learner lgbm\n", - "[flaml.automl: 03-30 21:51:38] {2744} INFO - at 161.0s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:51:38] {2567} INFO - iteration 70, current learner lgbm\n", - "[flaml.automl: 03-30 21:51:44] {2744} INFO - at 167.3s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:51:44] {2567} INFO - iteration 71, current learner lgbm\n", - "[flaml.automl: 03-30 21:51:50] {2744} INFO - at 173.2s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:51:50] {2567} INFO - iteration 72, current learner rf\n", - "[flaml.automl: 03-30 21:51:50] {2744} INFO - at 173.4s,\testimator rf's best error=0.3755,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:51:50] {2567} INFO - iteration 73, current learner xgboost\n", - "[flaml.automl: 03-30 21:51:51] {2744} INFO - at 173.6s,\testimator xgboost's best error=0.3594,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:51:51] {2567} INFO - iteration 74, current learner lgbm\n", - "[flaml.automl: 03-30 21:51:56] {2744} INFO - at 178.7s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:51:56] {2567} INFO - iteration 75, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:52:04] {2744} INFO - at 186.8s,\testimator xgb_limitdepth's best error=0.3382,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:52:04] {2567} INFO - iteration 76, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:52:06] {2744} INFO - at 189.6s,\testimator xgb_limitdepth's best error=0.3382,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:52:06] {2567} INFO - iteration 77, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:52:29] {2744} INFO - at 212.3s,\testimator xgb_limitdepth's best error=0.3382,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:52:29] {2567} INFO - iteration 78, current learner lgbm\n", - "[flaml.automl: 03-30 21:52:33] {2744} INFO - at 215.8s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:52:33] {2567} INFO - iteration 79, current learner lgbm\n", - "[flaml.automl: 03-30 21:52:43] {2744} INFO - at 226.1s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:52:43] {2567} INFO - iteration 80, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:52:48] {2744} INFO - at 230.8s,\testimator xgb_limitdepth's best error=0.3382,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:52:48] {2567} INFO - iteration 81, current learner rf\n", - "[flaml.automl: 03-30 21:52:48] {2744} INFO - at 231.2s,\testimator rf's best error=0.3746,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:52:48] {2567} INFO - iteration 82, current learner lgbm\n", - "[flaml.automl: 03-30 21:53:12] {2744} INFO - at 254.6s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:53:12] {2567} INFO - iteration 83, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:53:24] {2744} INFO - at 266.9s,\testimator xgb_limitdepth's best error=0.3341,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:53:24] {2567} INFO - iteration 84, current learner rf\n", - "[flaml.automl: 03-30 21:53:24] {2744} INFO - at 267.2s,\testimator rf's best error=0.3746,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:53:24] {2567} INFO - iteration 85, current learner extra_tree\n", - "[flaml.automl: 03-30 21:53:25] {2744} INFO - at 268.3s,\testimator extra_tree's best error=0.3777,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:53:25] {2567} INFO - iteration 86, current learner extra_tree\n", - "[flaml.automl: 03-30 21:53:27] {2744} INFO - at 270.4s,\testimator extra_tree's best error=0.3753,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:53:27] {2567} INFO - iteration 87, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:53:39] {2744} INFO - at 281.8s,\testimator xgb_limitdepth's best error=0.3341,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:53:39] {2567} INFO - iteration 88, current learner xgboost\n", - "[flaml.automl: 03-30 21:53:40] {2744} INFO - at 282.8s,\testimator xgboost's best error=0.3594,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:53:40] {2567} INFO - iteration 89, current learner extra_tree\n", - "[flaml.automl: 03-30 21:53:42] {2744} INFO - at 285.0s,\testimator extra_tree's best error=0.3753,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:53:42] {2567} INFO - iteration 90, current learner lgbm\n", - "[flaml.automl: 03-30 21:53:44] {2744} INFO - at 286.9s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:53:44] {2567} INFO - iteration 91, current learner lgbm\n", - "[flaml.automl: 03-30 21:54:20] {2744} INFO - at 322.9s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:54:20] {2567} INFO - iteration 92, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:54:29] {2744} INFO - at 331.6s,\testimator xgb_limitdepth's best error=0.3316,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:54:29] {2567} INFO - iteration 93, current learner xgboost\n", - "[flaml.automl: 03-30 21:54:30] {2744} INFO - at 332.8s,\testimator xgboost's best error=0.3594,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:54:30] {2567} INFO - iteration 94, current learner lrl1\n", - "/home/ec2-user/miniconda3/envs/myflaml/lib/python3.8/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", - " warnings.warn(\"The max_iter was reached which means \"\n", - "[flaml.automl: 03-30 21:54:30] {2744} INFO - at 333.0s,\testimator lrl1's best error=0.4337,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:54:30] {2567} INFO - iteration 95, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:54:40] {2744} INFO - at 343.1s,\testimator xgb_limitdepth's best error=0.3316,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:54:40] {2567} INFO - iteration 96, current learner extra_tree\n", - "[flaml.automl: 03-30 21:54:42] {2744} INFO - at 344.6s,\testimator extra_tree's best error=0.3753,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:54:42] {2567} INFO - iteration 97, current learner lgbm\n", - "[flaml.automl: 03-30 21:54:43] {2744} INFO - at 346.2s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:54:43] {2567} INFO - iteration 98, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:54:52] {2744} INFO - at 354.8s,\testimator xgb_limitdepth's best error=0.3316,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:54:52] {2567} INFO - iteration 99, current learner rf\n", - "[flaml.automl: 03-30 21:54:52] {2744} INFO - at 355.3s,\testimator rf's best error=0.3746,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:54:52] {2567} INFO - iteration 100, current learner extra_tree\n", - "[flaml.automl: 03-30 21:54:56] {2744} INFO - at 358.8s,\testimator extra_tree's best error=0.3753,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:54:56] {2567} INFO - iteration 101, current learner rf\n", - "[flaml.automl: 03-30 21:54:56] {2744} INFO - at 359.2s,\testimator rf's best error=0.3746,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:54:56] {2567} INFO - iteration 102, current learner lgbm\n", - "[flaml.automl: 03-30 21:55:02] {2744} INFO - at 365.5s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:55:02] {2567} INFO - iteration 103, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:55:12] {2744} INFO - at 375.1s,\testimator xgb_limitdepth's best error=0.3306,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:55:12] {2567} INFO - iteration 104, current learner xgboost\n", - "[flaml.automl: 03-30 21:55:13] {2744} INFO - at 376.4s,\testimator xgboost's best error=0.3501,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:55:13] {2567} INFO - iteration 105, current learner lgbm\n", - "[flaml.automl: 03-30 21:55:18] {2744} INFO - at 381.5s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:55:18] {2567} INFO - iteration 106, current learner xgboost\n", - "[flaml.automl: 03-30 21:55:21] {2744} INFO - at 383.9s,\testimator xgboost's best error=0.3501,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:55:21] {2567} INFO - iteration 107, current learner xgboost\n", - "[flaml.automl: 03-30 21:55:22] {2744} INFO - at 385.1s,\testimator xgboost's best error=0.3392,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:55:22] {2567} INFO - iteration 108, current learner xgboost\n", - "[flaml.automl: 03-30 21:55:23] {2744} INFO - at 386.5s,\testimator xgboost's best error=0.3392,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:55:23] {2567} INFO - iteration 109, current learner xgboost\n", - "[flaml.automl: 03-30 21:55:25] {2744} INFO - at 387.7s,\testimator xgboost's best error=0.3391,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:55:25] {2567} INFO - iteration 110, current learner rf\n", - "[flaml.automl: 03-30 21:55:25] {2744} INFO - at 388.1s,\testimator rf's best error=0.3746,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:55:25] {2567} INFO - iteration 111, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:55:34] {2744} INFO - at 397.0s,\testimator xgb_limitdepth's best error=0.3306,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:55:34] {2567} INFO - iteration 112, current learner extra_tree\n", - "[flaml.automl: 03-30 21:55:38] {2744} INFO - at 400.7s,\testimator extra_tree's best error=0.3711,\tbest estimator lgbm's best error=0.3282\n", - "[flaml.automl: 03-30 21:55:38] {2567} INFO - iteration 113, current learner lgbm\n", - "[flaml.automl: 03-30 21:55:43] {2744} INFO - at 405.9s,\testimator lgbm's best error=0.3274,\tbest estimator lgbm's best error=0.3274\n", - "[flaml.automl: 03-30 21:55:43] {2567} INFO - iteration 114, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:55:55] {2744} INFO - at 417.7s,\testimator xgb_limitdepth's best error=0.3306,\tbest estimator lgbm's best error=0.3274\n", - "[flaml.automl: 03-30 21:55:55] {2567} INFO - iteration 115, current learner extra_tree\n", - "[flaml.automl: 03-30 21:55:57] {2744} INFO - at 419.9s,\testimator extra_tree's best error=0.3711,\tbest estimator lgbm's best error=0.3274\n", - "[flaml.automl: 03-30 21:55:57] {2567} INFO - iteration 116, current learner lrl1\n", - "/home/ec2-user/miniconda3/envs/myflaml/lib/python3.8/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", - " warnings.warn(\"The max_iter was reached which means \"\n", - "[flaml.automl: 03-30 21:55:58] {2744} INFO - at 421.0s,\testimator lrl1's best error=0.4334,\tbest estimator lgbm's best error=0.3274\n", - "[flaml.automl: 03-30 21:55:58] {2567} INFO - iteration 117, current learner lgbm\n", - "[flaml.automl: 03-30 21:56:03] {2744} INFO - at 426.5s,\testimator lgbm's best error=0.3274,\tbest estimator lgbm's best error=0.3274\n", - "[flaml.automl: 03-30 21:56:03] {2567} INFO - iteration 118, current learner lgbm\n", - "[flaml.automl: 03-30 21:56:07] {2744} INFO - at 429.6s,\testimator lgbm's best error=0.3274,\tbest estimator lgbm's best error=0.3274\n", - "[flaml.automl: 03-30 21:56:07] {2567} INFO - iteration 119, current learner xgb_limitdepth\n", - "[flaml.automl: 03-30 21:56:16] {2744} INFO - at 439.2s,\testimator xgb_limitdepth's best error=0.3306,\tbest estimator lgbm's best error=0.3274\n", - "[flaml.automl: 03-30 21:56:16] {2567} INFO - iteration 120, current learner extra_tree\n", - "[flaml.automl: 03-30 21:56:18] {2744} INFO - at 440.7s,\testimator extra_tree's best error=0.3711,\tbest estimator lgbm's best error=0.3274\n", - "[flaml.automl: 03-30 21:56:18] {2567} INFO - iteration 121, current learner lgbm\n", - "[flaml.automl: 03-30 21:56:33] {2744} INFO - at 456.2s,\testimator lgbm's best error=0.3268,\tbest estimator lgbm's best error=0.3268\n", - "[flaml.automl: 03-30 21:56:33] {2567} INFO - iteration 122, current learner lgbm\n", - "[flaml.automl: 03-30 21:56:40] {2744} INFO - at 463.3s,\testimator lgbm's best error=0.3268,\tbest estimator lgbm's best error=0.3268\n", - "[flaml.automl: 03-30 21:56:40] {2567} INFO - iteration 123, current learner lgbm\n", - "[flaml.automl: 03-30 21:56:58] {2744} INFO - at 481.3s,\testimator lgbm's best error=0.3250,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:56:58] {2567} INFO - iteration 124, current learner extra_tree\n", - "[flaml.automl: 03-30 21:57:05] {2744} INFO - at 488.1s,\testimator extra_tree's best error=0.3623,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:57:05] {2567} INFO - iteration 125, current learner rf\n", - "[flaml.automl: 03-30 21:57:05] {2744} INFO - at 488.5s,\testimator rf's best error=0.3722,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:57:05] {2567} INFO - iteration 126, current learner lgbm\n", - "[flaml.automl: 03-30 21:57:15] {2744} INFO - at 498.5s,\testimator lgbm's best error=0.3250,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:57:15] {2567} INFO - iteration 127, current learner lgbm\n", - "[flaml.automl: 03-30 21:58:00] {2744} INFO - at 543.2s,\testimator lgbm's best error=0.3250,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:00] {2567} INFO - iteration 128, current learner lgbm\n", - "[flaml.automl: 03-30 21:58:24] {2744} INFO - at 566.8s,\testimator lgbm's best error=0.3250,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:24] {2567} INFO - iteration 129, current learner lgbm\n", - "[flaml.automl: 03-30 21:58:41] {2744} INFO - at 583.9s,\testimator lgbm's best error=0.3250,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:41] {2567} INFO - iteration 130, current learner rf\n", - "[flaml.automl: 03-30 21:58:41] {2744} INFO - at 584.2s,\testimator rf's best error=0.3722,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:41] {2567} INFO - iteration 131, current learner extra_tree\n", - "[flaml.automl: 03-30 21:58:48] {2744} INFO - at 590.7s,\testimator extra_tree's best error=0.3572,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:48] {2567} INFO - iteration 132, current learner extra_tree\n", - "[flaml.automl: 03-30 21:58:54] {2744} INFO - at 596.8s,\testimator extra_tree's best error=0.3572,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:54] {2567} INFO - iteration 133, current learner rf\n", - "[flaml.automl: 03-30 21:58:54] {2744} INFO - at 597.0s,\testimator rf's best error=0.3722,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:54] {2567} INFO - iteration 134, current learner rf\n", - "[flaml.automl: 03-30 21:58:54] {2744} INFO - at 597.2s,\testimator rf's best error=0.3701,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:54] {2567} INFO - iteration 135, current learner rf\n", - "[flaml.automl: 03-30 21:58:54] {2744} INFO - at 597.3s,\testimator rf's best error=0.3701,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:54] {2567} INFO - iteration 136, current learner rf\n", - "[flaml.automl: 03-30 21:58:54] {2744} INFO - at 597.5s,\testimator rf's best error=0.3701,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:54] {2567} INFO - iteration 137, current learner rf\n", - "[flaml.automl: 03-30 21:58:55] {2744} INFO - at 597.7s,\testimator rf's best error=0.3655,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:55] {2567} INFO - iteration 138, current learner rf\n", - "[flaml.automl: 03-30 21:58:55] {2744} INFO - at 597.8s,\testimator rf's best error=0.3655,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:55] {2567} INFO - iteration 139, current learner rf\n", - "[flaml.automl: 03-30 21:58:55] {2744} INFO - at 598.1s,\testimator rf's best error=0.3641,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:55] {2567} INFO - iteration 140, current learner rf\n", - "[flaml.automl: 03-30 21:58:55] {2744} INFO - at 598.3s,\testimator rf's best error=0.3604,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:55] {2567} INFO - iteration 141, current learner rf\n", - "[flaml.automl: 03-30 21:58:55] {2744} INFO - at 598.5s,\testimator rf's best error=0.3594,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:55] {2567} INFO - iteration 142, current learner rf\n", - "[flaml.automl: 03-30 21:58:56] {2744} INFO - at 598.7s,\testimator rf's best error=0.3594,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:56] {2567} INFO - iteration 143, current learner rf\n", - "[flaml.automl: 03-30 21:58:56] {2744} INFO - at 599.0s,\testimator rf's best error=0.3594,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:56] {2567} INFO - iteration 144, current learner rf\n", - "[flaml.automl: 03-30 21:58:56] {2744} INFO - at 599.1s,\testimator rf's best error=0.3594,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:56] {2567} INFO - iteration 145, current learner rf\n", - "[flaml.automl: 03-30 21:58:56] {2744} INFO - at 599.3s,\testimator rf's best error=0.3594,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:56] {2567} INFO - iteration 146, current learner rf\n", - "[flaml.automl: 03-30 21:58:56] {2744} INFO - at 599.4s,\testimator rf's best error=0.3594,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:56] {2567} INFO - iteration 147, current learner rf\n", - "[flaml.automl: 03-30 21:58:56] {2744} INFO - at 599.5s,\testimator rf's best error=0.3594,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:56] {2567} INFO - iteration 148, current learner rf\n", - "[flaml.automl: 03-30 21:58:56] {2744} INFO - at 599.5s,\testimator rf's best error=0.3594,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:56] {2567} INFO - iteration 149, current learner rf\n", - "[flaml.automl: 03-30 21:58:57] {2744} INFO - at 599.6s,\testimator rf's best error=0.3594,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:58:57] {2567} INFO - iteration 150, current learner rf\n", - "[flaml.automl: 03-30 21:58:57] {2744} INFO - at 599.7s,\testimator rf's best error=0.3594,\tbest estimator lgbm's best error=0.3250\n", - "[flaml.automl: 03-30 21:59:13] {2974} INFO - retrain lgbm for 16.9s\n", - "[flaml.automl: 03-30 21:59:14] {2981} INFO - retrained model: LGBMClassifier(colsample_bytree=0.763983850698587,\n", - " learning_rate=0.08749366799403727, max_bin=127,\n", + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n", + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:20:42] {2364} INFO - at 2.2s,\testimator extra_tree's best error=0.3787,\tbest estimator lgbm's best error=0.3550\n", + "[flaml.automl.logger: 04-28 02:20:42] {2191} INFO - iteration 14, current learner rf\n", + "[flaml.automl.logger: 04-28 02:20:42] {2364} INFO - at 2.2s,\testimator rf's best error=0.3816,\tbest estimator lgbm's best error=0.3550\n", + "[flaml.automl.logger: 04-28 02:20:42] {2191} INFO - iteration 15, current learner rf\n", + "[flaml.automl.logger: 04-28 02:20:42] {2364} INFO - at 2.3s,\testimator rf's best error=0.3791,\tbest estimator lgbm's best error=0.3550\n", + "[flaml.automl.logger: 04-28 02:20:42] {2191} INFO - iteration 16, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:20:42] {2364} INFO - at 2.4s,\testimator lgbm's best error=0.3550,\tbest estimator lgbm's best error=0.3550\n", + "[flaml.automl.logger: 04-28 02:20:42] {2191} INFO - iteration 17, current learner xgboost\n", + "[flaml.automl.logger: 04-28 02:20:42] {2364} INFO - at 2.4s,\testimator xgboost's best error=0.3699,\tbest estimator lgbm's best error=0.3550\n", + "[flaml.automl.logger: 04-28 02:20:42] {2191} INFO - iteration 18, current learner lgbm\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:20:42] {2364} INFO - at 2.7s,\testimator lgbm's best error=0.3545,\tbest estimator lgbm's best error=0.3545\n", + "[flaml.automl.logger: 04-28 02:20:42] {2191} INFO - iteration 19, current learner xgboost\n", + "[flaml.automl.logger: 04-28 02:20:42] {2364} INFO - at 2.8s,\testimator xgboost's best error=0.3596,\tbest estimator lgbm's best error=0.3545\n", + "[flaml.automl.logger: 04-28 02:20:42] {2191} INFO - iteration 20, current learner xgboost\n", + "[flaml.automl.logger: 04-28 02:20:42] {2364} INFO - at 2.8s,\testimator xgboost's best error=0.3596,\tbest estimator lgbm's best error=0.3545\n", + "[flaml.automl.logger: 04-28 02:20:42] {2191} INFO - iteration 21, current learner xgboost\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n", + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n", + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:20:42] {2364} INFO - at 2.9s,\testimator xgboost's best error=0.3596,\tbest estimator lgbm's best error=0.3545\n", + "[flaml.automl.logger: 04-28 02:20:42] {2191} INFO - iteration 22, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:20:43] {2364} INFO - at 3.1s,\testimator lgbm's best error=0.3545,\tbest estimator lgbm's best error=0.3545\n", + "[flaml.automl.logger: 04-28 02:20:43] {2191} INFO - iteration 23, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:20:43] {2364} INFO - at 3.6s,\testimator lgbm's best error=0.3545,\tbest estimator lgbm's best error=0.3545\n", + "[flaml.automl.logger: 04-28 02:20:43] {2191} INFO - iteration 24, current learner xgboost\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:20:43] {2364} INFO - at 3.9s,\testimator xgboost's best error=0.3586,\tbest estimator lgbm's best error=0.3545\n", + "[flaml.automl.logger: 04-28 02:20:43] {2191} INFO - iteration 25, current learner xgboost\n", + "[flaml.automl.logger: 04-28 02:20:44] {2364} INFO - at 4.0s,\testimator xgboost's best error=0.3577,\tbest estimator lgbm's best error=0.3545\n", + "[flaml.automl.logger: 04-28 02:20:44] {2191} INFO - iteration 26, current learner lgbm\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:20:44] {2364} INFO - at 4.1s,\testimator lgbm's best error=0.3536,\tbest estimator lgbm's best error=0.3536\n", + "[flaml.automl.logger: 04-28 02:20:44] {2191} INFO - iteration 27, current learner rf\n", + "[flaml.automl.logger: 04-28 02:20:44] {2364} INFO - at 4.2s,\testimator rf's best error=0.3791,\tbest estimator lgbm's best error=0.3536\n", + "[flaml.automl.logger: 04-28 02:20:44] {2191} INFO - iteration 28, current learner xgboost\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:20:44] {2364} INFO - at 4.7s,\testimator xgboost's best error=0.3561,\tbest estimator lgbm's best error=0.3536\n", + "[flaml.automl.logger: 04-28 02:20:44] {2191} INFO - iteration 29, current learner xgboost\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:20:44] {2364} INFO - at 4.9s,\testimator xgboost's best error=0.3561,\tbest estimator lgbm's best error=0.3536\n", + "[flaml.automl.logger: 04-28 02:20:44] {2191} INFO - iteration 30, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:20:45] {2364} INFO - at 5.2s,\testimator lgbm's best error=0.3536,\tbest estimator lgbm's best error=0.3536\n", + "[flaml.automl.logger: 04-28 02:20:45] {2191} INFO - iteration 31, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:20:45] {2364} INFO - at 5.6s,\testimator lgbm's best error=0.3536,\tbest estimator lgbm's best error=0.3536\n", + "[flaml.automl.logger: 04-28 02:20:45] {2191} INFO - iteration 32, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:20:46] {2364} INFO - at 6.3s,\testimator lgbm's best error=0.3528,\tbest estimator lgbm's best error=0.3528\n", + "[flaml.automl.logger: 04-28 02:20:46] {2191} INFO - iteration 33, current learner xgboost\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:20:47] {2364} INFO - at 7.2s,\testimator xgboost's best error=0.3561,\tbest estimator lgbm's best error=0.3528\n", + "[flaml.automl.logger: 04-28 02:20:47] {2191} INFO - iteration 34, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:20:49] {2364} INFO - at 9.6s,\testimator lgbm's best error=0.3405,\tbest estimator lgbm's best error=0.3405\n", + "[flaml.automl.logger: 04-28 02:20:49] {2191} INFO - iteration 35, current learner catboost\n", + "[flaml.automl.logger: 04-28 02:20:50] {2364} INFO - at 10.5s,\testimator catboost's best error=0.3587,\tbest estimator lgbm's best error=0.3405\n", + "[flaml.automl.logger: 04-28 02:20:50] {2191} INFO - iteration 36, current learner catboost\n", + "[flaml.automl.logger: 04-28 02:20:50] {2364} INFO - at 10.8s,\testimator catboost's best error=0.3587,\tbest estimator lgbm's best error=0.3405\n", + "[flaml.automl.logger: 04-28 02:20:50] {2191} INFO - iteration 37, current learner catboost\n", + "[flaml.automl.logger: 04-28 02:20:51] {2364} INFO - at 11.8s,\testimator catboost's best error=0.3587,\tbest estimator lgbm's best error=0.3405\n", + "[flaml.automl.logger: 04-28 02:20:51] {2191} INFO - iteration 38, current learner catboost\n", + "[flaml.automl.logger: 04-28 02:20:56] {2364} INFO - at 16.9s,\testimator catboost's best error=0.3587,\tbest estimator lgbm's best error=0.3405\n", + "[flaml.automl.logger: 04-28 02:20:56] {2191} INFO - iteration 39, current learner catboost\n", + "[flaml.automl.logger: 04-28 02:20:59] {2364} INFO - at 19.9s,\testimator catboost's best error=0.3483,\tbest estimator lgbm's best error=0.3405\n", + "[flaml.automl.logger: 04-28 02:20:59] {2191} INFO - iteration 40, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:21:00] {2364} INFO - at 20.7s,\testimator lgbm's best error=0.3405,\tbest estimator lgbm's best error=0.3405\n", + "[flaml.automl.logger: 04-28 02:21:00] {2191} INFO - iteration 41, current learner rf\n", + "[flaml.automl.logger: 04-28 02:21:00] {2364} INFO - at 20.8s,\testimator rf's best error=0.3791,\tbest estimator lgbm's best error=0.3405\n", + "[flaml.automl.logger: 04-28 02:21:00] {2191} INFO - iteration 42, current learner rf\n", + "[flaml.automl.logger: 04-28 02:21:00] {2364} INFO - at 20.8s,\testimator rf's best error=0.3789,\tbest estimator lgbm's best error=0.3405\n", + "[flaml.automl.logger: 04-28 02:21:00] {2191} INFO - iteration 43, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:21:03] {2364} INFO - at 23.2s,\testimator lgbm's best error=0.3370,\tbest estimator lgbm's best error=0.3370\n", + "[flaml.automl.logger: 04-28 02:21:03] {2191} INFO - iteration 44, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:21:05] {2364} INFO - at 25.8s,\testimator lgbm's best error=0.3370,\tbest estimator lgbm's best error=0.3370\n", + "[flaml.automl.logger: 04-28 02:21:05] {2191} INFO - iteration 45, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:21:05] {2364} INFO - at 25.8s,\testimator extra_tree's best error=0.3787,\tbest estimator lgbm's best error=0.3370\n", + "[flaml.automl.logger: 04-28 02:21:05] {2191} INFO - iteration 46, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:21:09] {2364} INFO - at 29.7s,\testimator lgbm's best error=0.3370,\tbest estimator lgbm's best error=0.3370\n", + "[flaml.automl.logger: 04-28 02:21:09] {2191} INFO - iteration 47, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:21:12] {2364} INFO - at 32.6s,\testimator lgbm's best error=0.3318,\tbest estimator lgbm's best error=0.3318\n", + "[flaml.automl.logger: 04-28 02:21:12] {2191} INFO - iteration 48, current learner xgb_limitdepth\n", + "[flaml.automl.logger: 04-28 02:21:12] {2364} INFO - at 32.7s,\testimator xgb_limitdepth's best error=0.3630,\tbest estimator lgbm's best error=0.3318\n", + "[flaml.automl.logger: 04-28 02:21:12] {2191} INFO - iteration 49, current learner xgb_limitdepth\n", + "[flaml.automl.logger: 04-28 02:21:12] {2364} INFO - at 32.7s,\testimator xgb_limitdepth's best error=0.3630,\tbest estimator lgbm's best error=0.3318\n", + "[flaml.automl.logger: 04-28 02:21:12] {2191} INFO - iteration 50, current learner xgb_limitdepth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n", + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n", + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:21:13] {2364} INFO - at 33.1s,\testimator xgb_limitdepth's best error=0.3630,\tbest estimator lgbm's best error=0.3318\n", + "[flaml.automl.logger: 04-28 02:21:13] {2191} INFO - iteration 51, current learner xgb_limitdepth\n", + "[flaml.automl.logger: 04-28 02:21:13] {2364} INFO - at 33.3s,\testimator xgb_limitdepth's best error=0.3572,\tbest estimator lgbm's best error=0.3318\n", + "[flaml.automl.logger: 04-28 02:21:13] {2191} INFO - iteration 52, current learner xgb_limitdepth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n", + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:21:14] {2364} INFO - at 34.0s,\testimator xgb_limitdepth's best error=0.3536,\tbest estimator lgbm's best error=0.3318\n", + "[flaml.automl.logger: 04-28 02:21:14] {2191} INFO - iteration 53, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:21:17] {2364} INFO - at 37.4s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:21:17] {2191} INFO - iteration 54, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:21:20] {2364} INFO - at 40.1s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:21:20] {2191} INFO - iteration 55, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:21:22] {2364} INFO - at 42.8s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:21:22] {2191} INFO - iteration 56, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:21:26] {2364} INFO - at 46.8s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:21:26] {2191} INFO - iteration 57, current learner xgb_limitdepth\n", + "[flaml.automl.logger: 04-28 02:21:27] {2364} INFO - at 47.0s,\testimator xgb_limitdepth's best error=0.3536,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:21:27] {2191} INFO - iteration 58, current learner lgbm\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:21:30] {2364} INFO - at 50.0s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:21:30] {2191} INFO - iteration 59, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:21:33] {2364} INFO - at 53.0s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:21:33] {2191} INFO - iteration 60, current learner catboost\n", + "[flaml.automl.logger: 04-28 02:21:34] {2364} INFO - at 54.9s,\testimator catboost's best error=0.3479,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:21:34] {2191} INFO - iteration 61, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:21:37] {2364} INFO - at 57.1s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:21:37] {2191} INFO - iteration 62, current learner xgb_limitdepth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:21:37] {2364} INFO - at 57.4s,\testimator xgb_limitdepth's best error=0.3536,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:21:37] {2191} INFO - iteration 63, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:21:41] {2364} INFO - at 61.8s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:21:41] {2191} INFO - iteration 64, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:21:42] {2364} INFO - at 62.0s,\testimator extra_tree's best error=0.3787,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:21:42] {2191} INFO - iteration 65, current learner xgb_limitdepth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:21:45] {2364} INFO - at 65.1s,\testimator xgb_limitdepth's best error=0.3516,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:21:45] {2191} INFO - iteration 66, current learner lrl1\n", + "[flaml.automl.logger: 04-28 02:21:45] {2364} INFO - at 65.2s,\testimator lrl1's best error=0.4338,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:21:45] {2191} INFO - iteration 67, current learner lrl1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:21:45] {2364} INFO - at 65.4s,\testimator lrl1's best error=0.4338,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:21:45] {2191} INFO - iteration 68, current learner lrl1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:21:45] {2364} INFO - at 65.7s,\testimator lrl1's best error=0.4338,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:21:45] {2191} INFO - iteration 69, current learner lrl1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:21:46] {2364} INFO - at 66.5s,\testimator lrl1's best error=0.4334,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:21:46] {2191} INFO - iteration 70, current learner lgbm\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:21:59] {2364} INFO - at 79.0s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:21:59] {2191} INFO - iteration 71, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:22:00] {2364} INFO - at 80.3s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:22:00] {2191} INFO - iteration 72, current learner catboost\n", + "[flaml.automl.logger: 04-28 02:22:29] {2364} INFO - at 109.5s,\testimator catboost's best error=0.3479,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:22:29] {2191} INFO - iteration 73, current learner xgboost\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:22:31] {2364} INFO - at 111.9s,\testimator xgboost's best error=0.3561,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:22:31] {2191} INFO - iteration 74, current learner rf\n", + "[flaml.automl.logger: 04-28 02:22:32] {2364} INFO - at 112.0s,\testimator rf's best error=0.3781,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:22:32] {2191} INFO - iteration 75, current learner xgb_limitdepth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:22:32] {2364} INFO - at 112.4s,\testimator xgb_limitdepth's best error=0.3516,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:22:32] {2191} INFO - iteration 76, current learner rf\n", + "[flaml.automl.logger: 04-28 02:22:32] {2364} INFO - at 112.5s,\testimator rf's best error=0.3781,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:22:32] {2191} INFO - iteration 77, current learner xgb_limitdepth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:22:38] {2364} INFO - at 118.4s,\testimator xgb_limitdepth's best error=0.3516,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:22:38] {2191} INFO - iteration 78, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:23:03] {2364} INFO - at 143.6s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:03] {2191} INFO - iteration 79, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:23:04] {2364} INFO - at 144.3s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:04] {2191} INFO - iteration 80, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:04] {2364} INFO - at 144.4s,\testimator rf's best error=0.3725,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:04] {2191} INFO - iteration 81, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:04] {2364} INFO - at 144.5s,\testimator rf's best error=0.3725,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:04] {2191} INFO - iteration 82, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:04] {2364} INFO - at 144.6s,\testimator rf's best error=0.3725,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:04] {2191} INFO - iteration 83, current learner xgboost\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:23:06] {2364} INFO - at 146.4s,\testimator xgboost's best error=0.3555,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:06] {2191} INFO - iteration 84, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:06] {2364} INFO - at 146.5s,\testimator rf's best error=0.3706,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:06] {2191} INFO - iteration 85, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:06] {2364} INFO - at 146.7s,\testimator rf's best error=0.3706,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:06] {2191} INFO - iteration 86, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:06] {2364} INFO - at 146.8s,\testimator rf's best error=0.3706,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:06] {2191} INFO - iteration 87, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:23:06] {2364} INFO - at 146.9s,\testimator extra_tree's best error=0.3787,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:06] {2191} INFO - iteration 88, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:07] {2364} INFO - at 146.9s,\testimator rf's best error=0.3706,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:07] {2191} INFO - iteration 89, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:07] {2364} INFO - at 147.1s,\testimator rf's best error=0.3706,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:07] {2191} INFO - iteration 90, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:07] {2364} INFO - at 147.3s,\testimator rf's best error=0.3706,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:07] {2191} INFO - iteration 91, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:23:12] {2364} INFO - at 152.4s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:12] {2191} INFO - iteration 92, current learner xgb_limitdepth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:23:13] {2364} INFO - at 153.2s,\testimator xgb_limitdepth's best error=0.3516,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:13] {2191} INFO - iteration 93, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:13] {2364} INFO - at 153.4s,\testimator rf's best error=0.3678,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:13] {2191} INFO - iteration 94, current learner xgb_limitdepth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:23:15] {2364} INFO - at 155.7s,\testimator xgb_limitdepth's best error=0.3483,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:15] {2191} INFO - iteration 95, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:15] {2364} INFO - at 155.8s,\testimator rf's best error=0.3678,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:15] {2191} INFO - iteration 96, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:16] {2364} INFO - at 156.0s,\testimator rf's best error=0.3617,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:16] {2191} INFO - iteration 97, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:16] {2364} INFO - at 156.3s,\testimator rf's best error=0.3593,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:16] {2191} INFO - iteration 98, current learner xgb_limitdepth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:23:20] {2364} INFO - at 160.6s,\testimator xgb_limitdepth's best error=0.3483,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:20] {2191} INFO - iteration 99, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:21] {2364} INFO - at 161.0s,\testimator rf's best error=0.3593,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:21] {2191} INFO - iteration 100, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:21] {2364} INFO - at 161.5s,\testimator rf's best error=0.3593,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:21] {2191} INFO - iteration 101, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:21] {2364} INFO - at 161.9s,\testimator rf's best error=0.3593,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:21] {2191} INFO - iteration 102, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:23:26] {2364} INFO - at 166.1s,\testimator lgbm's best error=0.3282,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:26] {2191} INFO - iteration 103, current learner xgb_limitdepth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:23:28] {2364} INFO - at 168.5s,\testimator xgb_limitdepth's best error=0.3483,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:28] {2191} INFO - iteration 104, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:30] {2364} INFO - at 170.4s,\testimator rf's best error=0.3499,\tbest estimator lgbm's best error=0.3282\n", + "[flaml.automl.logger: 04-28 02:23:30] {2191} INFO - iteration 105, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:23:35] {2364} INFO - at 175.2s,\testimator lgbm's best error=0.3274,\tbest estimator lgbm's best error=0.3274\n", + "[flaml.automl.logger: 04-28 02:23:35] {2191} INFO - iteration 106, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:36] {2364} INFO - at 176.4s,\testimator rf's best error=0.3499,\tbest estimator lgbm's best error=0.3274\n", + "[flaml.automl.logger: 04-28 02:23:36] {2191} INFO - iteration 107, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:38] {2364} INFO - at 178.9s,\testimator rf's best error=0.3491,\tbest estimator lgbm's best error=0.3274\n", + "[flaml.automl.logger: 04-28 02:23:38] {2191} INFO - iteration 108, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:41] {2364} INFO - at 181.3s,\testimator rf's best error=0.3411,\tbest estimator lgbm's best error=0.3274\n", + "[flaml.automl.logger: 04-28 02:23:41] {2191} INFO - iteration 109, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:43] {2364} INFO - at 183.8s,\testimator rf's best error=0.3411,\tbest estimator lgbm's best error=0.3274\n", + "[flaml.automl.logger: 04-28 02:23:43] {2191} INFO - iteration 110, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:49] {2364} INFO - at 189.1s,\testimator rf's best error=0.3355,\tbest estimator lgbm's best error=0.3274\n", + "[flaml.automl.logger: 04-28 02:23:49] {2191} INFO - iteration 111, current learner xgb_limitdepth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:23:51] {2364} INFO - at 191.7s,\testimator xgb_limitdepth's best error=0.3483,\tbest estimator lgbm's best error=0.3274\n", + "[flaml.automl.logger: 04-28 02:23:51] {2191} INFO - iteration 112, current learner rf\n", + "[flaml.automl.logger: 04-28 02:23:54] {2364} INFO - at 194.4s,\testimator rf's best error=0.3355,\tbest estimator lgbm's best error=0.3274\n", + "[flaml.automl.logger: 04-28 02:23:54] {2191} INFO - iteration 113, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:23:56] {2364} INFO - at 196.8s,\testimator lgbm's best error=0.3274,\tbest estimator lgbm's best error=0.3274\n", + "[flaml.automl.logger: 04-28 02:23:56] {2191} INFO - iteration 114, current learner xgb_limitdepth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:24:14] {2364} INFO - at 214.9s,\testimator xgb_limitdepth's best error=0.3389,\tbest estimator lgbm's best error=0.3274\n", + "[flaml.automl.logger: 04-28 02:24:14] {2191} INFO - iteration 115, current learner rf\n", + "[flaml.automl.logger: 04-28 02:24:25] {2364} INFO - at 225.5s,\testimator rf's best error=0.3346,\tbest estimator lgbm's best error=0.3274\n", + "[flaml.automl.logger: 04-28 02:24:25] {2191} INFO - iteration 116, current learner lrl1\n", + "[flaml.automl.logger: 04-28 02:24:26] {2364} INFO - at 226.4s,\testimator lrl1's best error=0.4334,\tbest estimator lgbm's best error=0.3274\n", + "[flaml.automl.logger: 04-28 02:24:26] {2191} INFO - iteration 117, current learner lgbm\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:24:27] {2364} INFO - at 227.9s,\testimator lgbm's best error=0.3274,\tbest estimator lgbm's best error=0.3274\n", + "[flaml.automl.logger: 04-28 02:24:27] {2191} INFO - iteration 118, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:24:37] {2364} INFO - at 237.0s,\testimator lgbm's best error=0.3268,\tbest estimator lgbm's best error=0.3268\n", + "[flaml.automl.logger: 04-28 02:24:37] {2191} INFO - iteration 119, current learner xgb_limitdepth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:25:01] {2364} INFO - at 261.0s,\testimator xgb_limitdepth's best error=0.3358,\tbest estimator lgbm's best error=0.3268\n", + "[flaml.automl.logger: 04-28 02:25:01] {2191} INFO - iteration 120, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:25:05] {2364} INFO - at 265.6s,\testimator lgbm's best error=0.3268,\tbest estimator lgbm's best error=0.3268\n", + "[flaml.automl.logger: 04-28 02:25:05] {2191} INFO - iteration 121, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:25:15] {2364} INFO - at 275.5s,\testimator lgbm's best error=0.3250,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:25:15] {2191} INFO - iteration 122, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:25:20] {2364} INFO - at 280.4s,\testimator lgbm's best error=0.3250,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:25:20] {2191} INFO - iteration 123, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:25:48] {2364} INFO - at 308.8s,\testimator lgbm's best error=0.3250,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:25:48] {2191} INFO - iteration 124, current learner xgb_limitdepth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:26:03] {2364} INFO - at 323.7s,\testimator xgb_limitdepth's best error=0.3358,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:26:03] {2191} INFO - iteration 125, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:26:15] {2364} INFO - at 335.6s,\testimator lgbm's best error=0.3250,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:26:15] {2191} INFO - iteration 126, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:26:25] {2364} INFO - at 345.2s,\testimator lgbm's best error=0.3250,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:26:25] {2191} INFO - iteration 127, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:26:30] {2364} INFO - at 350.2s,\testimator lgbm's best error=0.3250,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:26:30] {2191} INFO - iteration 128, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:26:45] {2364} INFO - at 365.5s,\testimator lgbm's best error=0.3250,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:26:45] {2191} INFO - iteration 129, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:26:47] {2364} INFO - at 367.6s,\testimator lgbm's best error=0.3250,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:26:47] {2191} INFO - iteration 130, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:27:31] {2364} INFO - at 411.1s,\testimator lgbm's best error=0.3250,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:27:31] {2191} INFO - iteration 131, current learner xgb_limitdepth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:27:54] {2364} INFO - at 434.4s,\testimator xgb_limitdepth's best error=0.3353,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:27:54] {2191} INFO - iteration 132, current learner rf\n", + "[flaml.automl.logger: 04-28 02:27:59] {2364} INFO - at 439.5s,\testimator rf's best error=0.3346,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:27:59] {2191} INFO - iteration 133, current learner rf\n", + "[flaml.automl.logger: 04-28 02:28:15] {2364} INFO - at 455.9s,\testimator rf's best error=0.3346,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:28:15] {2191} INFO - iteration 134, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:28:16] {2364} INFO - at 456.0s,\testimator extra_tree's best error=0.3786,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:28:16] {2191} INFO - iteration 135, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:29:38] {2364} INFO - at 538.9s,\testimator lgbm's best error=0.3250,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:29:38] {2191} INFO - iteration 136, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:29:40] {2364} INFO - at 540.3s,\testimator lgbm's best error=0.3250,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:29:40] {2191} INFO - iteration 137, current learner rf\n", + "[flaml.automl.logger: 04-28 02:29:46] {2364} INFO - at 546.6s,\testimator rf's best error=0.3346,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:29:46] {2191} INFO - iteration 138, current learner xgb_limitdepth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:30:10] {2364} INFO - at 570.4s,\testimator xgb_limitdepth's best error=0.3353,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:30:10] {2191} INFO - iteration 139, current learner rf\n", + "[flaml.automl.logger: 04-28 02:30:35] {2364} INFO - at 595.2s,\testimator rf's best error=0.3336,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:30:35] {2191} INFO - iteration 140, current learner catboost\n", + "[flaml.automl.logger: 04-28 02:30:39] {2364} INFO - at 599.8s,\testimator catboost's best error=0.3422,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:30:39] {2191} INFO - iteration 141, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:30:39] {2364} INFO - at 599.8s,\testimator extra_tree's best error=0.3786,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:30:39] {2191} INFO - iteration 142, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:30:39] {2364} INFO - at 599.8s,\testimator extra_tree's best error=0.3786,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:30:39] {2191} INFO - iteration 143, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:30:39] {2364} INFO - at 599.8s,\testimator extra_tree's best error=0.3786,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:30:39] {2191} INFO - iteration 144, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:30:39] {2364} INFO - at 599.9s,\testimator extra_tree's best error=0.3786,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:30:39] {2191} INFO - iteration 145, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:30:39] {2364} INFO - at 599.9s,\testimator extra_tree's best error=0.3786,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:30:39] {2191} INFO - iteration 146, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:30:39] {2364} INFO - at 599.9s,\testimator extra_tree's best error=0.3786,\tbest estimator lgbm's best error=0.3250\n", + "[flaml.automl.logger: 04-28 02:30:49] {2600} INFO - retrain lgbm for 9.5s\n", + "[flaml.automl.logger: 04-28 02:30:49] {2603} INFO - retrained model: LGBMClassifier(colsample_bytree=0.763983850698587,\n", + " learning_rate=0.087493667994037, max_bin=127,\n", " min_child_samples=128, n_estimators=302, num_leaves=466,\n", - " reg_alpha=0.09968008477303378, reg_lambda=23.22741934331899,\n", + " reg_alpha=0.09968008477303378, reg_lambda=23.227419343318914,\n", " verbose=-1)\n", - "[flaml.automl: 03-30 21:59:14] {2310} INFO - fit succeeded\n", - "[flaml.automl: 03-30 21:59:14] {2311} INFO - Time taken to find the best model: 481.2624523639679\n", - "[flaml.automl: 03-30 21:59:14] {2322} WARNING - Time taken to find the best model is 80% of the provided time budget and not all estimators' hyperparameter search converged. Consider increasing the time budget.\n" + "[flaml.automl.logger: 04-28 02:30:49] {1911} INFO - fit succeeded\n", + "[flaml.automl.logger: 04-28 02:30:49] {1912} INFO - Time taken to find the best model: 275.4841866493225\n" ] } ], @@ -489,14 +1042,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best ML leaner: lgbm\n", + "Best hyperparmeter config: {'n_estimators': 302, 'num_leaves': 466, 'min_child_samples': 128, 'learning_rate': 0.087493667994037, 'log_max_bin': 7, 'colsample_bytree': 0.763983850698587, 'reg_alpha': 0.09968008477303378, 'reg_lambda': 23.227419343318914}\n", + "Best accuracy on validation data: 0.675\n", + "Training duration of best run: 9.453 s\n" + ] + } + ], "source": [ "'''retrieve best config and best learner'''\n", "print('Best ML leaner:', automl.best_estimator)\n", @@ -507,7 +1071,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "metadata": { "slideshow": { "slide_type": "slide" @@ -516,15 +1080,26 @@ "outputs": [ { "data": { + "text/html": [ + "
LGBMClassifier(colsample_bytree=0.763983850698587,\n",
+       "               learning_rate=0.087493667994037, max_bin=127,\n",
+       "               min_child_samples=128, n_estimators=302, num_leaves=466,\n",
+       "               reg_alpha=0.09968008477303378, reg_lambda=23.227419343318914,\n",
+       "               verbose=-1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], "text/plain": [ "LGBMClassifier(colsample_bytree=0.763983850698587,\n", - " learning_rate=0.08749366799403727, max_bin=127,\n", + " learning_rate=0.087493667994037, max_bin=127,\n", " min_child_samples=128, n_estimators=302, num_leaves=466,\n", - " reg_alpha=0.09968008477303378, reg_lambda=23.22741934331899,\n", + " reg_alpha=0.09968008477303378, reg_lambda=23.227419343318914,\n", " verbose=-1)" ] }, - "execution_count": 6, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -535,7 +1110,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 10, "metadata": { "slideshow": { "slide_type": "slide" @@ -554,7 +1129,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, "metadata": { "slideshow": { "slide_type": "slide" @@ -593,14 +1168,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "accuracy = 0.6732939797991784\n", + "roc_auc = 0.7276250346550404\n", + "log_loss = 0.6014655432027879\n" + ] + } + ], "source": [ "''' compute different metric values on testing dataset'''\n", "from flaml.ml import sklearn_metric_loss_score\n", @@ -624,7 +1209,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 13, "metadata": { "slideshow": { "slide_type": "subslide" @@ -638,20 +1223,16 @@ "text": [ "{'Current Learner': 'lgbm', 'Current Sample': 10000, 'Current Hyper-parameters': {'n_estimators': 4, 'num_leaves': 4, 'min_child_samples': 20, 'learning_rate': 0.09999999999999995, 'log_max_bin': 8, 'colsample_bytree': 1.0, 'reg_alpha': 0.0009765625, 'reg_lambda': 1.0, 'FLAML_sample_size': 10000}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 4, 'num_leaves': 4, 'min_child_samples': 20, 'learning_rate': 0.09999999999999995, 'log_max_bin': 8, 'colsample_bytree': 1.0, 'reg_alpha': 0.0009765625, 'reg_lambda': 1.0, 'FLAML_sample_size': 10000}}\n", "{'Current Learner': 'lgbm', 'Current Sample': 10000, 'Current Hyper-parameters': {'n_estimators': 26, 'num_leaves': 4, 'min_child_samples': 18, 'learning_rate': 0.2293009676418639, 'log_max_bin': 9, 'colsample_bytree': 0.9086551727646448, 'reg_alpha': 0.0015561782752413472, 'reg_lambda': 0.33127416269768944, 'FLAML_sample_size': 10000}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 26, 'num_leaves': 4, 'min_child_samples': 18, 'learning_rate': 0.2293009676418639, 'log_max_bin': 9, 'colsample_bytree': 0.9086551727646448, 'reg_alpha': 0.0015561782752413472, 'reg_lambda': 0.33127416269768944, 'FLAML_sample_size': 10000}}\n", - "{'Current Learner': 'xgboost', 'Current Sample': 10000, 'Current Hyper-parameters': {'n_estimators': 28, 'max_leaves': 4, 'min_child_weight': 0.7500252416342552, 'learning_rate': 0.23798984382572066, 'subsample': 1.0, 'colsample_bylevel': 0.9045613143846261, 'colsample_bytree': 1.0, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.48864254576029176, 'FLAML_sample_size': 10000}, 'Best Learner': 'xgboost', 'Best Hyper-parameters': {'n_estimators': 28, 'max_leaves': 4, 'min_child_weight': 0.7500252416342552, 'learning_rate': 0.23798984382572066, 'subsample': 1.0, 'colsample_bylevel': 0.9045613143846261, 'colsample_bytree': 1.0, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.48864254576029176, 'FLAML_sample_size': 10000}}\n", - "{'Current Learner': 'xgboost', 'Current Sample': 10000, 'Current Hyper-parameters': {'n_estimators': 129, 'max_leaves': 4, 'min_child_weight': 1.2498964566809219, 'learning_rate': 0.3574837022388901, 'subsample': 0.9773266280674643, 'colsample_bylevel': 0.9705283362807284, 'colsample_bytree': 0.8561269216168275, 'reg_alpha': 0.0021694711024901254, 'reg_lambda': 4.620219690690227, 'FLAML_sample_size': 10000}, 'Best Learner': 'xgboost', 'Best Hyper-parameters': {'n_estimators': 129, 'max_leaves': 4, 'min_child_weight': 1.2498964566809219, 'learning_rate': 0.3574837022388901, 'subsample': 0.9773266280674643, 'colsample_bylevel': 0.9705283362807284, 'colsample_bytree': 0.8561269216168275, 'reg_alpha': 0.0021694711024901254, 'reg_lambda': 4.620219690690227, 'FLAML_sample_size': 10000}}\n", - "{'Current Learner': 'xgboost', 'Current Sample': 10000, 'Current Hyper-parameters': {'n_estimators': 28, 'max_leaves': 5, 'min_child_weight': 0.7500252416342552, 'learning_rate': 0.23798984382572066, 'subsample': 1.0, 'colsample_bylevel': 0.9045613143846261, 'colsample_bytree': 1.0, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.48864254576029176, 'FLAML_sample_size': 10000}, 'Best Learner': 'xgboost', 'Best Hyper-parameters': {'n_estimators': 28, 'max_leaves': 5, 'min_child_weight': 0.7500252416342552, 'learning_rate': 0.23798984382572066, 'subsample': 1.0, 'colsample_bylevel': 0.9045613143846261, 'colsample_bytree': 1.0, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.48864254576029176, 'FLAML_sample_size': 10000}}\n", "{'Current Learner': 'lgbm', 'Current Sample': 40000, 'Current Hyper-parameters': {'n_estimators': 55, 'num_leaves': 4, 'min_child_samples': 20, 'learning_rate': 0.43653962213332903, 'log_max_bin': 10, 'colsample_bytree': 0.8048558760626646, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.23010605579846408, 'FLAML_sample_size': 40000}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 55, 'num_leaves': 4, 'min_child_samples': 20, 'learning_rate': 0.43653962213332903, 'log_max_bin': 10, 'colsample_bytree': 0.8048558760626646, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.23010605579846408, 'FLAML_sample_size': 40000}}\n", - "{'Current Learner': 'lgbm', 'Current Sample': 40000, 'Current Hyper-parameters': {'n_estimators': 90, 'num_leaves': 18, 'min_child_samples': 34, 'learning_rate': 0.35726266205297247, 'log_max_bin': 10, 'colsample_bytree': 0.9295656128173544, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.1981463604305675, 'FLAML_sample_size': 40000}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 90, 'num_leaves': 18, 'min_child_samples': 34, 'learning_rate': 0.35726266205297247, 'log_max_bin': 10, 'colsample_bytree': 0.9295656128173544, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.1981463604305675, 'FLAML_sample_size': 40000}}\n", - "{'Current Learner': 'lgbm', 'Current Sample': 40000, 'Current Hyper-parameters': {'n_estimators': 56, 'num_leaves': 7, 'min_child_samples': 92, 'learning_rate': 0.23536463281405448, 'log_max_bin': 10, 'colsample_bytree': 0.9898009552962395, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.14329426172643311, 'FLAML_sample_size': 40000}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 56, 'num_leaves': 7, 'min_child_samples': 92, 'learning_rate': 0.23536463281405448, 'log_max_bin': 10, 'colsample_bytree': 0.9898009552962395, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.14329426172643311, 'FLAML_sample_size': 40000}}\n", - "{'Current Learner': 'lgbm', 'Current Sample': 364083, 'Current Hyper-parameters': {'n_estimators': 56, 'num_leaves': 7, 'min_child_samples': 92, 'learning_rate': 0.23536463281405448, 'log_max_bin': 10, 'colsample_bytree': 0.9898009552962395, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.14329426172643311, 'FLAML_sample_size': 364083}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 56, 'num_leaves': 7, 'min_child_samples': 92, 'learning_rate': 0.23536463281405448, 'log_max_bin': 10, 'colsample_bytree': 0.9898009552962395, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.14329426172643311, 'FLAML_sample_size': 364083}}\n", - "{'Current Learner': 'xgb', 'Current Sample': 40000, 'Current Hyper-parameters': {'n_estimators': 46, 'max_depth': 6, 'min_child_weight': 1.6664725229213329, 'learning_rate': 0.45062893839370016, 'subsample': 0.9773266280674643, 'colsample_bylevel': 1.0, 'colsample_bytree': 0.8561269216168275, 'reg_alpha': 0.0021694711024901254, 'reg_lambda': 9.455213695118394, 'FLAML_sample_size': 40000}, 'Best Learner': 'xgb', 'Best Hyper-parameters': {'n_estimators': 46, 'max_depth': 6, 'min_child_weight': 1.6664725229213329, 'learning_rate': 0.45062893839370016, 'subsample': 0.9773266280674643, 'colsample_bylevel': 1.0, 'colsample_bytree': 0.8561269216168275, 'reg_alpha': 0.0021694711024901254, 'reg_lambda': 9.455213695118394, 'FLAML_sample_size': 40000}}\n", - "{'Current Learner': 'catboost', 'Current Sample': 40000, 'Current Hyper-parameters': {'early_stopping_rounds': 10, 'learning_rate': 0.09999999999999996, 'n_estimators': 99, 'FLAML_sample_size': 40000}, 'Best Learner': 'catboost', 'Best Hyper-parameters': {'early_stopping_rounds': 10, 'learning_rate': 0.09999999999999996, 'n_estimators': 99, 'FLAML_sample_size': 40000}}\n", - "{'Current Learner': 'catboost', 'Current Sample': 40000, 'Current Hyper-parameters': {'early_stopping_rounds': 10, 'learning_rate': 0.2, 'n_estimators': 52, 'FLAML_sample_size': 40000}, 'Best Learner': 'catboost', 'Best Hyper-parameters': {'early_stopping_rounds': 10, 'learning_rate': 0.2, 'n_estimators': 52, 'FLAML_sample_size': 40000}}\n", - "{'Current Learner': 'lgbm', 'Current Sample': 364083, 'Current Hyper-parameters': {'n_estimators': 179, 'num_leaves': 27, 'min_child_samples': 75, 'learning_rate': 0.09744966359309036, 'log_max_bin': 10, 'colsample_bytree': 1.0, 'reg_alpha': 0.002826104794043855, 'reg_lambda': 0.1457318237156161, 'FLAML_sample_size': 364083}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 179, 'num_leaves': 27, 'min_child_samples': 75, 'learning_rate': 0.09744966359309036, 'log_max_bin': 10, 'colsample_bytree': 1.0, 'reg_alpha': 0.002826104794043855, 'reg_lambda': 0.1457318237156161, 'FLAML_sample_size': 364083}}\n", - "{'Current Learner': 'lgbm', 'Current Sample': 364083, 'Current Hyper-parameters': {'n_estimators': 180, 'num_leaves': 31, 'min_child_samples': 112, 'learning_rate': 0.14172261747380896, 'log_max_bin': 8, 'colsample_bytree': 0.9882716197099741, 'reg_alpha': 0.004676080321450302, 'reg_lambda': 2.704862827036818, 'FLAML_sample_size': 364083}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 180, 'num_leaves': 31, 'min_child_samples': 112, 'learning_rate': 0.14172261747380896, 'log_max_bin': 8, 'colsample_bytree': 0.9882716197099741, 'reg_alpha': 0.004676080321450302, 'reg_lambda': 2.704862827036818, 'FLAML_sample_size': 364083}}\n", - "{'Current Learner': 'lgbm', 'Current Sample': 364083, 'Current Hyper-parameters': {'n_estimators': 284, 'num_leaves': 24, 'min_child_samples': 57, 'learning_rate': 0.34506374431782694, 'log_max_bin': 8, 'colsample_bytree': 0.9661606582789269, 'reg_alpha': 0.05708594148438563, 'reg_lambda': 3.0806435484123478, 'FLAML_sample_size': 364083}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 284, 'num_leaves': 24, 'min_child_samples': 57, 'learning_rate': 0.34506374431782694, 'log_max_bin': 8, 'colsample_bytree': 0.9661606582789269, 'reg_alpha': 0.05708594148438563, 'reg_lambda': 3.0806435484123478, 'FLAML_sample_size': 364083}}\n", - "{'Current Learner': 'lgbm', 'Current Sample': 364083, 'Current Hyper-parameters': {'n_estimators': 150, 'num_leaves': 176, 'min_child_samples': 62, 'learning_rate': 0.2607939951456869, 'log_max_bin': 8, 'colsample_bytree': 1.0, 'reg_alpha': 0.015973158305354472, 'reg_lambda': 1.1581244082992255, 'FLAML_sample_size': 364083}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 150, 'num_leaves': 176, 'min_child_samples': 62, 'learning_rate': 0.2607939951456869, 'log_max_bin': 8, 'colsample_bytree': 1.0, 'reg_alpha': 0.015973158305354472, 'reg_lambda': 1.1581244082992255, 'FLAML_sample_size': 364083}}\n" + "{'Current Learner': 'lgbm', 'Current Sample': 40000, 'Current Hyper-parameters': {'n_estimators': 90, 'num_leaves': 18, 'min_child_samples': 34, 'learning_rate': 0.3572626620529719, 'log_max_bin': 10, 'colsample_bytree': 0.9295656128173544, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.1981463604305675, 'FLAML_sample_size': 40000}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 90, 'num_leaves': 18, 'min_child_samples': 34, 'learning_rate': 0.3572626620529719, 'log_max_bin': 10, 'colsample_bytree': 0.9295656128173544, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.1981463604305675, 'FLAML_sample_size': 40000}}\n", + "{'Current Learner': 'lgbm', 'Current Sample': 40000, 'Current Hyper-parameters': {'n_estimators': 56, 'num_leaves': 7, 'min_child_samples': 92, 'learning_rate': 0.23536463281405412, 'log_max_bin': 10, 'colsample_bytree': 0.9898009552962395, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.143294261726433, 'FLAML_sample_size': 40000}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 56, 'num_leaves': 7, 'min_child_samples': 92, 'learning_rate': 0.23536463281405412, 'log_max_bin': 10, 'colsample_bytree': 0.9898009552962395, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.143294261726433, 'FLAML_sample_size': 40000}}\n", + "{'Current Learner': 'lgbm', 'Current Sample': 364083, 'Current Hyper-parameters': {'n_estimators': 56, 'num_leaves': 7, 'min_child_samples': 92, 'learning_rate': 0.23536463281405412, 'log_max_bin': 10, 'colsample_bytree': 0.9898009552962395, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.143294261726433, 'FLAML_sample_size': 364083}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 56, 'num_leaves': 7, 'min_child_samples': 92, 'learning_rate': 0.23536463281405412, 'log_max_bin': 10, 'colsample_bytree': 0.9898009552962395, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.143294261726433, 'FLAML_sample_size': 364083}}\n", + "{'Current Learner': 'lgbm', 'Current Sample': 364083, 'Current Hyper-parameters': {'n_estimators': 179, 'num_leaves': 27, 'min_child_samples': 75, 'learning_rate': 0.09744966359309021, 'log_max_bin': 10, 'colsample_bytree': 1.0, 'reg_alpha': 0.002826104794043855, 'reg_lambda': 0.145731823715616, 'FLAML_sample_size': 364083}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 179, 'num_leaves': 27, 'min_child_samples': 75, 'learning_rate': 0.09744966359309021, 'log_max_bin': 10, 'colsample_bytree': 1.0, 'reg_alpha': 0.002826104794043855, 'reg_lambda': 0.145731823715616, 'FLAML_sample_size': 364083}}\n", + "{'Current Learner': 'lgbm', 'Current Sample': 364083, 'Current Hyper-parameters': {'n_estimators': 180, 'num_leaves': 31, 'min_child_samples': 112, 'learning_rate': 0.14172261747380863, 'log_max_bin': 8, 'colsample_bytree': 0.9882716197099741, 'reg_alpha': 0.004676080321450302, 'reg_lambda': 2.7048628270368136, 'FLAML_sample_size': 364083}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 180, 'num_leaves': 31, 'min_child_samples': 112, 'learning_rate': 0.14172261747380863, 'log_max_bin': 8, 'colsample_bytree': 0.9882716197099741, 'reg_alpha': 0.004676080321450302, 'reg_lambda': 2.7048628270368136, 'FLAML_sample_size': 364083}}\n", + "{'Current Learner': 'lgbm', 'Current Sample': 364083, 'Current Hyper-parameters': {'n_estimators': 284, 'num_leaves': 24, 'min_child_samples': 57, 'learning_rate': 0.34506374431782616, 'log_max_bin': 8, 'colsample_bytree': 0.9661606582789269, 'reg_alpha': 0.05708594148438563, 'reg_lambda': 3.080643548412343, 'FLAML_sample_size': 364083}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 284, 'num_leaves': 24, 'min_child_samples': 57, 'learning_rate': 0.34506374431782616, 'log_max_bin': 8, 'colsample_bytree': 0.9661606582789269, 'reg_alpha': 0.05708594148438563, 'reg_lambda': 3.080643548412343, 'FLAML_sample_size': 364083}}\n", + "{'Current Learner': 'lgbm', 'Current Sample': 364083, 'Current Hyper-parameters': {'n_estimators': 150, 'num_leaves': 176, 'min_child_samples': 62, 'learning_rate': 0.2607939951456863, 'log_max_bin': 8, 'colsample_bytree': 1.0, 'reg_alpha': 0.015973158305354472, 'reg_lambda': 1.1581244082992237, 'FLAML_sample_size': 364083}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 150, 'num_leaves': 176, 'min_child_samples': 62, 'learning_rate': 0.2607939951456863, 'log_max_bin': 8, 'colsample_bytree': 1.0, 'reg_alpha': 0.015973158305354472, 'reg_lambda': 1.1581244082992237, 'FLAML_sample_size': 364083}}\n", + "{'Current Learner': 'lgbm', 'Current Sample': 364083, 'Current Hyper-parameters': {'n_estimators': 100, 'num_leaves': 380, 'min_child_samples': 83, 'learning_rate': 0.1439688182217924, 'log_max_bin': 7, 'colsample_bytree': 0.9365250834556608, 'reg_alpha': 0.07492795084698504, 'reg_lambda': 10.854898771631566, 'FLAML_sample_size': 364083}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 100, 'num_leaves': 380, 'min_child_samples': 83, 'learning_rate': 0.1439688182217924, 'log_max_bin': 7, 'colsample_bytree': 0.9365250834556608, 'reg_alpha': 0.07492795084698504, 'reg_lambda': 10.854898771631566, 'FLAML_sample_size': 364083}}\n", + "{'Current Learner': 'lgbm', 'Current Sample': 364083, 'Current Hyper-parameters': {'n_estimators': 157, 'num_leaves': 985, 'min_child_samples': 115, 'learning_rate': 0.15986853540486204, 'log_max_bin': 6, 'colsample_bytree': 0.8905312088154893, 'reg_alpha': 0.17376372850615002, 'reg_lambda': 196.8899439847594, 'FLAML_sample_size': 364083}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 157, 'num_leaves': 985, 'min_child_samples': 115, 'learning_rate': 0.15986853540486204, 'log_max_bin': 6, 'colsample_bytree': 0.8905312088154893, 'reg_alpha': 0.17376372850615002, 'reg_lambda': 196.8899439847594, 'FLAML_sample_size': 364083}}\n" ] } ], @@ -665,7 +1246,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 14, "metadata": { "slideshow": { "slide_type": "slide" @@ -674,14 +1255,12 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" } ], @@ -713,7 +1292,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -723,16 +1302,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
LGBMClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LGBMClassifier()" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "lgbm.fit(X_train, y_train)" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -748,7 +1341,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -762,16 +1355,60 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
+       "              colsample_bylevel=None, colsample_bynode=None,\n",
+       "              colsample_bytree=None, early_stopping_rounds=None,\n",
+       "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
+       "              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
+       "              interaction_constraints=None, learning_rate=None, max_bin=None,\n",
+       "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
+       "              max_delta_step=None, max_depth=None, max_leaves=None,\n",
+       "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+       "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
+       "              predictor=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=None, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=None, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None, num_parallel_tree=None,\n", + " predictor=None, random_state=None, ...)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "xgb.fit(X, y_train_xgb)" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -783,7 +1420,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -847,16 +1484,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: rgf-python in /home/vscode/.local/lib/python3.9/site-packages (3.12.0)\n", + "Requirement already satisfied: scikit-learn>=0.18 in /usr/local/lib/python3.9/site-packages (from rgf-python) (1.1.3)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.9/site-packages (from rgf-python) (1.2.0)\n", + "Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.9/site-packages (from scikit-learn>=0.18->rgf-python) (1.9.3)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.9/site-packages (from scikit-learn>=0.18->rgf-python) (3.1.0)\n", + "Requirement already satisfied: numpy>=1.17.3 in /home/vscode/.local/lib/python3.9/site-packages (from scikit-learn>=0.18->rgf-python) (1.23.5)\n", + "\u001b[33mWARNING: You are using pip version 22.0.4; however, version 23.1.1 is available.\n", + "You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.\u001b[0m\u001b[33m\n", + "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n" + ] + } + ], "source": [ "%pip install rgf-python" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 23, "metadata": { "slideshow": { "slide_type": "slide" @@ -867,7 +1521,7 @@ "''' SKLearnEstimator is the super class for a sklearn learner '''\n", "from flaml.model import SKLearnEstimator\n", "from flaml import tune\n", - "from flaml.data import CLASSIFICATION\n", + "from flaml.automl.task.task import CLASSIFICATION\n", "\n", "\n", "class MyRegularizedGreedyForest(SKLearnEstimator):\n", @@ -952,7 +1606,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 24, "metadata": { "slideshow": { "slide_type": "slide" @@ -966,7 +1620,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 25, "metadata": { "slideshow": { "slide_type": "slide" @@ -975,89 +1629,119 @@ }, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "[flaml.automl: 03-30 22:00:01] {2105} INFO - task = classification\n", - "[flaml.automl: 03-30 22:00:02] {2107} INFO - Data split method: stratified\n", - "[flaml.automl: 03-30 22:00:02] {2111} INFO - Evaluation method: holdout\n", - "[flaml.automl: 03-30 22:00:02] {2188} INFO - Minimizing error metric: 1-accuracy\n", - "[flaml.automl: 03-30 22:00:02] {2281} INFO - List of ML learners in AutoML Run: ['RGF', 'lgbm', 'rf', 'xgboost']\n", - "[flaml.automl: 03-30 22:00:02] {2567} INFO - iteration 0, current learner RGF\n", - "[flaml.automl: 03-30 22:00:02] {2697} INFO - Estimated sufficient time budget=255753s. Estimated necessary time budget=256s.\n", - "[flaml.automl: 03-30 22:00:02] {2744} INFO - at 1.3s,\testimator RGF's best error=0.3787,\tbest estimator RGF's best error=0.3787\n", - "[flaml.automl: 03-30 22:00:02] {2567} INFO - iteration 1, current learner RGF\n", - "[flaml.automl: 03-30 22:00:03] {2744} INFO - at 1.9s,\testimator RGF's best error=0.3787,\tbest estimator RGF's best error=0.3787\n", - "[flaml.automl: 03-30 22:00:03] {2567} INFO - iteration 2, current learner RGF\n", - "[flaml.automl: 03-30 22:00:04] {2744} INFO - at 2.6s,\testimator RGF's best error=0.3787,\tbest estimator RGF's best error=0.3787\n", - "[flaml.automl: 03-30 22:00:04] {2567} INFO - iteration 3, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:04] {2744} INFO - at 2.7s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n", - "[flaml.automl: 03-30 22:00:04] {2567} INFO - iteration 4, current learner RGF\n", - "[flaml.automl: 03-30 22:00:04] {2744} INFO - at 3.2s,\testimator RGF's best error=0.3787,\tbest estimator lgbm's best error=0.3777\n", - "[flaml.automl: 03-30 22:00:04] {2567} INFO - iteration 5, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:04] {2744} INFO - at 3.3s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n", - "[flaml.automl: 03-30 22:00:04] {2567} INFO - iteration 6, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:05] {2744} INFO - at 3.4s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n", - "[flaml.automl: 03-30 22:00:05] {2567} INFO - iteration 7, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:05] {2744} INFO - at 3.4s,\testimator lgbm's best error=0.3661,\tbest estimator lgbm's best error=0.3661\n", - "[flaml.automl: 03-30 22:00:05] {2567} INFO - iteration 8, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:05] {2744} INFO - at 3.5s,\testimator lgbm's best error=0.3633,\tbest estimator lgbm's best error=0.3633\n", - "[flaml.automl: 03-30 22:00:05] {2567} INFO - iteration 9, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:05] {2744} INFO - at 3.6s,\testimator lgbm's best error=0.3633,\tbest estimator lgbm's best error=0.3633\n", - "[flaml.automl: 03-30 22:00:05] {2567} INFO - iteration 10, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:05] {2744} INFO - at 3.7s,\testimator lgbm's best error=0.3633,\tbest estimator lgbm's best error=0.3633\n", - "[flaml.automl: 03-30 22:00:05] {2567} INFO - iteration 11, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:05] {2744} INFO - at 3.8s,\testimator lgbm's best error=0.3613,\tbest estimator lgbm's best error=0.3613\n", - "[flaml.automl: 03-30 22:00:05] {2567} INFO - iteration 12, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:05] {2744} INFO - at 3.9s,\testimator lgbm's best error=0.3613,\tbest estimator lgbm's best error=0.3613\n", - "[flaml.automl: 03-30 22:00:05] {2567} INFO - iteration 13, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:05] {2744} INFO - at 4.1s,\testimator lgbm's best error=0.3613,\tbest estimator lgbm's best error=0.3613\n", - "[flaml.automl: 03-30 22:00:05] {2567} INFO - iteration 14, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:05] {2744} INFO - at 4.2s,\testimator lgbm's best error=0.3613,\tbest estimator lgbm's best error=0.3613\n", - "[flaml.automl: 03-30 22:00:05] {2567} INFO - iteration 15, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:05] {2744} INFO - at 4.3s,\testimator lgbm's best error=0.3613,\tbest estimator lgbm's best error=0.3613\n", - "[flaml.automl: 03-30 22:00:05] {2567} INFO - iteration 16, current learner RGF\n", - "[flaml.automl: 03-30 22:00:06] {2744} INFO - at 4.9s,\testimator RGF's best error=0.3787,\tbest estimator lgbm's best error=0.3613\n", - "[flaml.automl: 03-30 22:00:06] {2567} INFO - iteration 17, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:06] {2744} INFO - at 5.0s,\testimator lgbm's best error=0.3613,\tbest estimator lgbm's best error=0.3613\n", - "[flaml.automl: 03-30 22:00:06] {2567} INFO - iteration 18, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:06] {2744} INFO - at 5.1s,\testimator lgbm's best error=0.3613,\tbest estimator lgbm's best error=0.3613\n", - "[flaml.automl: 03-30 22:00:06] {2567} INFO - iteration 19, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:07] {2744} INFO - at 5.5s,\testimator lgbm's best error=0.3600,\tbest estimator lgbm's best error=0.3600\n", - "[flaml.automl: 03-30 22:00:07] {2567} INFO - iteration 20, current learner RGF\n", - "[flaml.automl: 03-30 22:00:07] {2744} INFO - at 6.1s,\testimator RGF's best error=0.3669,\tbest estimator lgbm's best error=0.3600\n", - "[flaml.automl: 03-30 22:00:07] {2567} INFO - iteration 21, current learner RGF\n", - "[flaml.automl: 03-30 22:00:08] {2744} INFO - at 6.7s,\testimator RGF's best error=0.3669,\tbest estimator lgbm's best error=0.3600\n", - "[flaml.automl: 03-30 22:00:08] {2567} INFO - iteration 22, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:08] {2744} INFO - at 6.9s,\testimator lgbm's best error=0.3544,\tbest estimator lgbm's best error=0.3544\n", - "[flaml.automl: 03-30 22:00:08] {2567} INFO - iteration 23, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:08] {2744} INFO - at 7.1s,\testimator lgbm's best error=0.3544,\tbest estimator lgbm's best error=0.3544\n", - "[flaml.automl: 03-30 22:00:08] {2567} INFO - iteration 24, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:08] {2744} INFO - at 7.2s,\testimator lgbm's best error=0.3544,\tbest estimator lgbm's best error=0.3544\n", - "[flaml.automl: 03-30 22:00:08] {2567} INFO - iteration 25, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:09] {2744} INFO - at 7.4s,\testimator lgbm's best error=0.3544,\tbest estimator lgbm's best error=0.3544\n", - "[flaml.automl: 03-30 22:00:09] {2567} INFO - iteration 26, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:09] {2744} INFO - at 7.5s,\testimator lgbm's best error=0.3544,\tbest estimator lgbm's best error=0.3544\n", - "[flaml.automl: 03-30 22:00:09] {2567} INFO - iteration 27, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:09] {2744} INFO - at 7.6s,\testimator lgbm's best error=0.3544,\tbest estimator lgbm's best error=0.3544\n", - "[flaml.automl: 03-30 22:00:09] {2567} INFO - iteration 28, current learner RGF\n", - "[flaml.automl: 03-30 22:00:09] {2744} INFO - at 8.2s,\testimator RGF's best error=0.3669,\tbest estimator lgbm's best error=0.3544\n", - "[flaml.automl: 03-30 22:00:09] {2567} INFO - iteration 29, current learner RGF\n", - "[flaml.automl: 03-30 22:00:10] {2744} INFO - at 9.3s,\testimator RGF's best error=0.3642,\tbest estimator lgbm's best error=0.3544\n", - "[flaml.automl: 03-30 22:00:10] {2567} INFO - iteration 30, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:11] {2744} INFO - at 9.4s,\testimator lgbm's best error=0.3544,\tbest estimator lgbm's best error=0.3544\n", - "[flaml.automl: 03-30 22:00:11] {2567} INFO - iteration 31, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:11] {2744} INFO - at 10.0s,\testimator lgbm's best error=0.3544,\tbest estimator lgbm's best error=0.3544\n", - "[flaml.automl: 03-30 22:00:11] {2567} INFO - iteration 32, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:11] {2744} INFO - at 10.0s,\testimator xgboost's best error=0.3787,\tbest estimator lgbm's best error=0.3544\n", - "[flaml.automl: 03-30 22:00:13] {2974} INFO - retrain lgbm for 1.8s\n", - "[flaml.automl: 03-30 22:00:13] {2981} INFO - retrained model: LGBMClassifier(colsample_bytree=0.8485873378520249,\n", - " learning_rate=0.6205212209154768, max_bin=1023,\n", - " min_child_samples=6, n_estimators=46, num_leaves=16,\n", - " reg_alpha=0.0009765625, reg_lambda=0.0033009704647149916,\n", + "[flaml.automl.logger: 04-28 02:31:18] {1663} INFO - task = classification\n", + "[flaml.automl.logger: 04-28 02:31:18] {1670} INFO - Data split method: stratified\n", + "[flaml.automl.logger: 04-28 02:31:18] {1673} INFO - Evaluation method: holdout\n", + "[flaml.automl.logger: 04-28 02:31:18] {1771} INFO - Minimizing error metric: 1-accuracy\n", + "[flaml.automl.logger: 04-28 02:31:18] {1881} INFO - List of ML learners in AutoML Run: ['RGF', 'lgbm', 'rf', 'xgboost']\n", + "[flaml.automl.logger: 04-28 02:31:18] {2191} INFO - iteration 0, current learner RGF\n", + "[flaml.automl.logger: 04-28 02:31:19] {2317} INFO - Estimated sufficient time budget=320931s. Estimated necessary time budget=321s.\n", + "[flaml.automl.logger: 04-28 02:31:19] {2364} INFO - at 1.4s,\testimator RGF's best error=0.3840,\tbest estimator RGF's best error=0.3840\n", + "[flaml.automl.logger: 04-28 02:31:19] {2191} INFO - iteration 1, current learner RGF\n", + "[flaml.automl.logger: 04-28 02:31:19] {2364} INFO - at 1.9s,\testimator RGF's best error=0.3840,\tbest estimator RGF's best error=0.3840\n", + "[flaml.automl.logger: 04-28 02:31:19] {2191} INFO - iteration 2, current learner RGF\n", + "[flaml.automl.logger: 04-28 02:31:20] {2364} INFO - at 2.3s,\testimator RGF's best error=0.3840,\tbest estimator RGF's best error=0.3840\n", + "[flaml.automl.logger: 04-28 02:31:20] {2191} INFO - iteration 3, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:20] {2364} INFO - at 2.4s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n", + "[flaml.automl.logger: 04-28 02:31:20] {2191} INFO - iteration 4, current learner RGF\n", + "[flaml.automl.logger: 04-28 02:31:20] {2364} INFO - at 2.9s,\testimator RGF's best error=0.3840,\tbest estimator lgbm's best error=0.3777\n", + "[flaml.automl.logger: 04-28 02:31:20] {2191} INFO - iteration 5, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:20] {2364} INFO - at 2.9s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n", + "[flaml.automl.logger: 04-28 02:31:20] {2191} INFO - iteration 6, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:20] {2364} INFO - at 2.9s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n", + "[flaml.automl.logger: 04-28 02:31:20] {2191} INFO - iteration 7, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:20] {2364} INFO - at 3.0s,\testimator lgbm's best error=0.3661,\tbest estimator lgbm's best error=0.3661\n", + "[flaml.automl.logger: 04-28 02:31:20] {2191} INFO - iteration 8, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:20] {2364} INFO - at 3.0s,\testimator lgbm's best error=0.3661,\tbest estimator lgbm's best error=0.3661\n", + "[flaml.automl.logger: 04-28 02:31:21] {2191} INFO - iteration 9, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:21] {2364} INFO - at 3.1s,\testimator lgbm's best error=0.3633,\tbest estimator lgbm's best error=0.3633\n", + "[flaml.automl.logger: 04-28 02:31:21] {2191} INFO - iteration 10, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:21] {2364} INFO - at 3.2s,\testimator lgbm's best error=0.3633,\tbest estimator lgbm's best error=0.3633\n", + "[flaml.automl.logger: 04-28 02:31:21] {2191} INFO - iteration 11, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:21] {2364} INFO - at 3.2s,\testimator lgbm's best error=0.3633,\tbest estimator lgbm's best error=0.3633\n", + "[flaml.automl.logger: 04-28 02:31:21] {2191} INFO - iteration 12, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:21] {2364} INFO - at 3.3s,\testimator lgbm's best error=0.3613,\tbest estimator lgbm's best error=0.3613\n", + "[flaml.automl.logger: 04-28 02:31:21] {2191} INFO - iteration 13, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:21] {2364} INFO - at 3.3s,\testimator lgbm's best error=0.3613,\tbest estimator lgbm's best error=0.3613\n", + "[flaml.automl.logger: 04-28 02:31:21] {2191} INFO - iteration 14, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:21] {2364} INFO - at 3.5s,\testimator lgbm's best error=0.3591,\tbest estimator lgbm's best error=0.3591\n", + "[flaml.automl.logger: 04-28 02:31:21] {2191} INFO - iteration 15, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:21] {2364} INFO - at 3.6s,\testimator lgbm's best error=0.3591,\tbest estimator lgbm's best error=0.3591\n", + "[flaml.automl.logger: 04-28 02:31:21] {2191} INFO - iteration 16, current learner RGF\n", + "[flaml.automl.logger: 04-28 02:31:22] {2364} INFO - at 4.1s,\testimator RGF's best error=0.3840,\tbest estimator lgbm's best error=0.3591\n", + "[flaml.automl.logger: 04-28 02:31:22] {2191} INFO - iteration 17, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:22] {2364} INFO - at 4.2s,\testimator lgbm's best error=0.3591,\tbest estimator lgbm's best error=0.3591\n", + "[flaml.automl.logger: 04-28 02:31:22] {2191} INFO - iteration 18, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:22] {2364} INFO - at 4.3s,\testimator lgbm's best error=0.3589,\tbest estimator lgbm's best error=0.3589\n", + "[flaml.automl.logger: 04-28 02:31:22] {2191} INFO - iteration 19, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:22] {2364} INFO - at 4.5s,\testimator lgbm's best error=0.3587,\tbest estimator lgbm's best error=0.3587\n", + "[flaml.automl.logger: 04-28 02:31:22] {2191} INFO - iteration 20, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:22] {2364} INFO - at 4.6s,\testimator lgbm's best error=0.3587,\tbest estimator lgbm's best error=0.3587\n", + "[flaml.automl.logger: 04-28 02:31:22] {2191} INFO - iteration 21, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:22] {2364} INFO - at 4.7s,\testimator lgbm's best error=0.3587,\tbest estimator lgbm's best error=0.3587\n", + "[flaml.automl.logger: 04-28 02:31:22] {2191} INFO - iteration 22, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:22] {2364} INFO - at 4.8s,\testimator lgbm's best error=0.3587,\tbest estimator lgbm's best error=0.3587\n", + "[flaml.automl.logger: 04-28 02:31:22] {2191} INFO - iteration 23, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:22] {2364} INFO - at 5.0s,\testimator lgbm's best error=0.3587,\tbest estimator lgbm's best error=0.3587\n", + "[flaml.automl.logger: 04-28 02:31:22] {2191} INFO - iteration 24, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:23] {2364} INFO - at 5.2s,\testimator lgbm's best error=0.3587,\tbest estimator lgbm's best error=0.3587\n", + "[flaml.automl.logger: 04-28 02:31:23] {2191} INFO - iteration 25, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:23] {2364} INFO - at 5.3s,\testimator lgbm's best error=0.3587,\tbest estimator lgbm's best error=0.3587\n", + "[flaml.automl.logger: 04-28 02:31:23] {2191} INFO - iteration 26, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:23] {2364} INFO - at 5.4s,\testimator lgbm's best error=0.3587,\tbest estimator lgbm's best error=0.3587\n", + "[flaml.automl.logger: 04-28 02:31:23] {2191} INFO - iteration 27, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:23] {2364} INFO - at 5.6s,\testimator lgbm's best error=0.3587,\tbest estimator lgbm's best error=0.3587\n", + "[flaml.automl.logger: 04-28 02:31:23] {2191} INFO - iteration 28, current learner RGF\n", + "[flaml.automl.logger: 04-28 02:31:24] {2364} INFO - at 6.1s,\testimator RGF's best error=0.3766,\tbest estimator lgbm's best error=0.3587\n", + "[flaml.automl.logger: 04-28 02:31:24] {2191} INFO - iteration 29, current learner RGF\n", + "[flaml.automl.logger: 04-28 02:31:24] {2364} INFO - at 6.5s,\testimator RGF's best error=0.3766,\tbest estimator lgbm's best error=0.3587\n", + "[flaml.automl.logger: 04-28 02:31:24] {2191} INFO - iteration 30, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:24] {2364} INFO - at 6.6s,\testimator lgbm's best error=0.3587,\tbest estimator lgbm's best error=0.3587\n", + "[flaml.automl.logger: 04-28 02:31:24] {2191} INFO - iteration 31, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:24] {2364} INFO - at 6.9s,\testimator lgbm's best error=0.3575,\tbest estimator lgbm's best error=0.3575\n", + "[flaml.automl.logger: 04-28 02:31:24] {2191} INFO - iteration 32, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:25] {2364} INFO - at 7.1s,\testimator lgbm's best error=0.3575,\tbest estimator lgbm's best error=0.3575\n", + "[flaml.automl.logger: 04-28 02:31:25] {2191} INFO - iteration 33, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:25] {2364} INFO - at 7.3s,\testimator lgbm's best error=0.3575,\tbest estimator lgbm's best error=0.3575\n", + "[flaml.automl.logger: 04-28 02:31:25] {2191} INFO - iteration 34, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:25] {2364} INFO - at 7.6s,\testimator lgbm's best error=0.3537,\tbest estimator lgbm's best error=0.3537\n", + "[flaml.automl.logger: 04-28 02:31:25] {2191} INFO - iteration 35, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:25] {2364} INFO - at 7.7s,\testimator lgbm's best error=0.3537,\tbest estimator lgbm's best error=0.3537\n", + "[flaml.automl.logger: 04-28 02:31:25] {2191} INFO - iteration 36, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:25] {2364} INFO - at 7.9s,\testimator lgbm's best error=0.3537,\tbest estimator lgbm's best error=0.3537\n", + "[flaml.automl.logger: 04-28 02:31:25] {2191} INFO - iteration 37, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:26] {2364} INFO - at 8.1s,\testimator lgbm's best error=0.3530,\tbest estimator lgbm's best error=0.3530\n", + "[flaml.automl.logger: 04-28 02:31:26] {2191} INFO - iteration 38, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:26] {2364} INFO - at 8.2s,\testimator lgbm's best error=0.3530,\tbest estimator lgbm's best error=0.3530\n", + "[flaml.automl.logger: 04-28 02:31:26] {2191} INFO - iteration 39, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:26] {2364} INFO - at 8.3s,\testimator lgbm's best error=0.3530,\tbest estimator lgbm's best error=0.3530\n", + "[flaml.automl.logger: 04-28 02:31:26] {2191} INFO - iteration 40, current learner RGF\n", + "[flaml.automl.logger: 04-28 02:31:26] {2364} INFO - at 8.8s,\testimator RGF's best error=0.3766,\tbest estimator lgbm's best error=0.3530\n", + "[flaml.automl.logger: 04-28 02:31:26] {2191} INFO - iteration 41, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:26] {2364} INFO - at 8.9s,\testimator lgbm's best error=0.3530,\tbest estimator lgbm's best error=0.3530\n", + "[flaml.automl.logger: 04-28 02:31:26] {2191} INFO - iteration 42, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:26] {2364} INFO - at 9.0s,\testimator lgbm's best error=0.3530,\tbest estimator lgbm's best error=0.3530\n", + "[flaml.automl.logger: 04-28 02:31:26] {2191} INFO - iteration 43, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:26] {2364} INFO - at 9.0s,\testimator lgbm's best error=0.3530,\tbest estimator lgbm's best error=0.3530\n", + "[flaml.automl.logger: 04-28 02:31:26] {2191} INFO - iteration 44, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:27] {2364} INFO - at 9.1s,\testimator lgbm's best error=0.3530,\tbest estimator lgbm's best error=0.3530\n", + "[flaml.automl.logger: 04-28 02:31:27] {2191} INFO - iteration 45, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:27] {2364} INFO - at 9.1s,\testimator lgbm's best error=0.3530,\tbest estimator lgbm's best error=0.3530\n", + "[flaml.automl.logger: 04-28 02:31:27] {2191} INFO - iteration 46, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:27] {2364} INFO - at 9.2s,\testimator lgbm's best error=0.3530,\tbest estimator lgbm's best error=0.3530\n", + "[flaml.automl.logger: 04-28 02:31:27] {2191} INFO - iteration 47, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:28] {2364} INFO - at 10.2s,\testimator lgbm's best error=0.3430,\tbest estimator lgbm's best error=0.3430\n", + "[flaml.automl.logger: 04-28 02:31:59] {2600} INFO - retrain lgbm for 30.9s\n", + "[flaml.automl.logger: 04-28 02:31:59] {2603} INFO - retrained model: LGBMClassifier(colsample_bytree=0.521204713137351,\n", + " learning_rate=0.38514327038525437, max_bin=127,\n", + " min_child_samples=5, n_estimators=1159, num_leaves=35,\n", + " reg_alpha=0.007578110040801311, reg_lambda=0.03255827388036828,\n", " verbose=-1)\n", - "[flaml.automl: 03-30 22:00:13] {2310} INFO - fit succeeded\n", - "[flaml.automl: 03-30 22:00:13] {2311} INFO - Time taken to find the best model: 6.87259840965271\n" + "[flaml.automl.logger: 04-28 02:31:59] {1911} INFO - fit succeeded\n", + "[flaml.automl.logger: 04-28 02:31:59] {1912} INFO - Time taken to find the best model: 10.156839609146118\n" ] } ], @@ -1085,7 +1769,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -1120,146 +1804,275 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 27, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:31:59] {1663} INFO - task = classification\n", + "[flaml.automl.logger: 04-28 02:31:59] {1670} INFO - Data split method: stratified\n", + "[flaml.automl.logger: 04-28 02:31:59] {1673} INFO - Evaluation method: holdout\n", + "[flaml.automl.logger: 04-28 02:31:59] {1771} INFO - Minimizing error metric: customized metric\n", + "[flaml.automl.logger: 04-28 02:31:59] {1881} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'lrl1']\n", + "[flaml.automl.logger: 04-28 02:31:59] {2191} INFO - iteration 0, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:59] {2317} INFO - Estimated sufficient time budget=13725s. Estimated necessary time budget=337s.\n", + "[flaml.automl.logger: 04-28 02:31:59] {2364} INFO - at 0.5s,\testimator lgbm's best error=0.6647,\tbest estimator lgbm's best error=0.6647\n", + "[flaml.automl.logger: 04-28 02:31:59] {2191} INFO - iteration 1, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:59] {2364} INFO - at 0.6s,\testimator lgbm's best error=0.6647,\tbest estimator lgbm's best error=0.6647\n", + "[flaml.automl.logger: 04-28 02:31:59] {2191} INFO - iteration 2, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:59] {2364} INFO - at 0.6s,\testimator lgbm's best error=0.6491,\tbest estimator lgbm's best error=0.6491\n", + "[flaml.automl.logger: 04-28 02:31:59] {2191} INFO - iteration 3, current learner xgboost\n", + "[flaml.automl.logger: 04-28 02:31:59] {2364} INFO - at 0.7s,\testimator xgboost's best error=0.6672,\tbest estimator lgbm's best error=0.6491\n", + "[flaml.automl.logger: 04-28 02:31:59] {2191} INFO - iteration 4, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:31:59] {2364} INFO - at 0.7s,\testimator lgbm's best error=0.6423,\tbest estimator lgbm's best error=0.6423\n", + "[flaml.automl.logger: 04-28 02:31:59] {2191} INFO - iteration 5, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 0.8s,\testimator lgbm's best error=0.6423,\tbest estimator lgbm's best error=0.6423\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 6, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 0.8s,\testimator lgbm's best error=0.6423,\tbest estimator lgbm's best error=0.6423\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 7, current learner lgbm\n" + ] + }, { "name": "stderr", "output_type": "stream", "text": [ - "[flaml.automl: 03-30 22:00:14] {2105} INFO - task = classification\n", - "[flaml.automl: 03-30 22:00:14] {2107} INFO - Data split method: stratified\n", - "[flaml.automl: 03-30 22:00:14] {2111} INFO - Evaluation method: holdout\n", - "[flaml.automl: 03-30 22:00:14] {2188} INFO - Minimizing error metric: customized metric\n", - "[flaml.automl: 03-30 22:00:14] {2281} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'lrl1']\n", - "[flaml.automl: 03-30 22:00:14] {2567} INFO - iteration 0, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:14] {2697} INFO - Estimated sufficient time budget=48059s. Estimated necessary time budget=1180s.\n", - "[flaml.automl: 03-30 22:00:14] {2744} INFO - at 0.8s,\testimator lgbm's best error=0.6796,\tbest estimator lgbm's best error=0.6796\n", - "[flaml.automl: 03-30 22:00:14] {2567} INFO - iteration 1, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:14] {2744} INFO - at 0.9s,\testimator lgbm's best error=0.6796,\tbest estimator lgbm's best error=0.6796\n", - "[flaml.automl: 03-30 22:00:14] {2567} INFO - iteration 2, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:14] {2744} INFO - at 0.9s,\testimator lgbm's best error=0.6491,\tbest estimator lgbm's best error=0.6491\n", - "[flaml.automl: 03-30 22:00:14] {2567} INFO - iteration 3, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:14] {2744} INFO - at 1.0s,\testimator lgbm's best error=0.6423,\tbest estimator lgbm's best error=0.6423\n", - "[flaml.automl: 03-30 22:00:14] {2567} INFO - iteration 4, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:14] {2744} INFO - at 1.1s,\testimator lgbm's best error=0.6423,\tbest estimator lgbm's best error=0.6423\n", - "[flaml.automl: 03-30 22:00:14] {2567} INFO - iteration 5, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:14] {2744} INFO - at 1.2s,\testimator lgbm's best error=0.6423,\tbest estimator lgbm's best error=0.6423\n", - "[flaml.automl: 03-30 22:00:14] {2567} INFO - iteration 6, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:15] {2744} INFO - at 1.3s,\testimator lgbm's best error=0.6400,\tbest estimator lgbm's best error=0.6400\n", - "[flaml.automl: 03-30 22:00:15] {2567} INFO - iteration 7, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:15] {2744} INFO - at 1.3s,\testimator xgboost's best error=0.6672,\tbest estimator lgbm's best error=0.6400\n", - "[flaml.automl: 03-30 22:00:15] {2567} INFO - iteration 8, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:15] {2744} INFO - at 1.4s,\testimator lgbm's best error=0.6400,\tbest estimator lgbm's best error=0.6400\n", - "[flaml.automl: 03-30 22:00:15] {2567} INFO - iteration 9, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:15] {2744} INFO - at 1.5s,\testimator xgboost's best error=0.6672,\tbest estimator lgbm's best error=0.6400\n", - "[flaml.automl: 03-30 22:00:15] {2567} INFO - iteration 10, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:15] {2744} INFO - at 1.5s,\testimator xgboost's best error=0.6500,\tbest estimator lgbm's best error=0.6400\n", - "[flaml.automl: 03-30 22:00:15] {2567} INFO - iteration 11, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:15] {2744} INFO - at 1.6s,\testimator xgboost's best error=0.6413,\tbest estimator lgbm's best error=0.6400\n", - "[flaml.automl: 03-30 22:00:15] {2567} INFO - iteration 12, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:15] {2744} INFO - at 1.6s,\testimator xgboost's best error=0.6413,\tbest estimator lgbm's best error=0.6400\n", - "[flaml.automl: 03-30 22:00:15] {2567} INFO - iteration 13, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:15] {2744} INFO - at 1.7s,\testimator xgboost's best error=0.6413,\tbest estimator lgbm's best error=0.6400\n", - "[flaml.automl: 03-30 22:00:15] {2567} INFO - iteration 14, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:15] {2744} INFO - at 1.8s,\testimator lgbm's best error=0.6400,\tbest estimator lgbm's best error=0.6400\n", - "[flaml.automl: 03-30 22:00:15] {2567} INFO - iteration 15, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:15] {2744} INFO - at 1.9s,\testimator xgboost's best error=0.6413,\tbest estimator lgbm's best error=0.6400\n", - "[flaml.automl: 03-30 22:00:15] {2567} INFO - iteration 16, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:15] {2744} INFO - at 2.0s,\testimator lgbm's best error=0.6400,\tbest estimator lgbm's best error=0.6400\n", - "[flaml.automl: 03-30 22:00:15] {2567} INFO - iteration 17, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:15] {2744} INFO - at 2.0s,\testimator xgboost's best error=0.6413,\tbest estimator lgbm's best error=0.6400\n", - "[flaml.automl: 03-30 22:00:15] {2567} INFO - iteration 18, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:16] {2744} INFO - at 2.3s,\testimator lgbm's best error=0.6400,\tbest estimator lgbm's best error=0.6400\n", - "[flaml.automl: 03-30 22:00:16] {2567} INFO - iteration 19, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:16] {2744} INFO - at 2.4s,\testimator xgboost's best error=0.6393,\tbest estimator xgboost's best error=0.6393\n", - "[flaml.automl: 03-30 22:00:16] {2567} INFO - iteration 20, current learner extra_tree\n", - "[flaml.automl: 03-30 22:00:16] {2744} INFO - at 2.4s,\testimator extra_tree's best error=0.6734,\tbest estimator xgboost's best error=0.6393\n", - "[flaml.automl: 03-30 22:00:16] {2567} INFO - iteration 21, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:16] {2744} INFO - at 2.6s,\testimator xgboost's best error=0.6342,\tbest estimator xgboost's best error=0.6342\n", - "[flaml.automl: 03-30 22:00:16] {2567} INFO - iteration 22, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:16] {2744} INFO - at 2.7s,\testimator xgboost's best error=0.6342,\tbest estimator xgboost's best error=0.6342\n", - "[flaml.automl: 03-30 22:00:16] {2567} INFO - iteration 23, current learner extra_tree\n", - "[flaml.automl: 03-30 22:00:16] {2744} INFO - at 2.7s,\testimator extra_tree's best error=0.6617,\tbest estimator xgboost's best error=0.6342\n", - "[flaml.automl: 03-30 22:00:16] {2567} INFO - iteration 24, current learner extra_tree\n", - "[flaml.automl: 03-30 22:00:16] {2744} INFO - at 2.8s,\testimator extra_tree's best error=0.6617,\tbest estimator xgboost's best error=0.6342\n", - "[flaml.automl: 03-30 22:00:16] {2567} INFO - iteration 25, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:16] {2744} INFO - at 2.9s,\testimator xgboost's best error=0.6342,\tbest estimator xgboost's best error=0.6342\n", - "[flaml.automl: 03-30 22:00:16] {2567} INFO - iteration 26, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:16] {2744} INFO - at 3.1s,\testimator xgboost's best error=0.6308,\tbest estimator xgboost's best error=0.6308\n", - "[flaml.automl: 03-30 22:00:16] {2567} INFO - iteration 27, current learner rf\n", - "[flaml.automl: 03-30 22:00:16] {2744} INFO - at 3.1s,\testimator rf's best error=0.6531,\tbest estimator xgboost's best error=0.6308\n", - "[flaml.automl: 03-30 22:00:16] {2567} INFO - iteration 28, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:17] {2744} INFO - at 3.3s,\testimator xgboost's best error=0.6308,\tbest estimator xgboost's best error=0.6308\n", - "[flaml.automl: 03-30 22:00:17] {2567} INFO - iteration 29, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:17] {2744} INFO - at 3.5s,\testimator xgboost's best error=0.6308,\tbest estimator xgboost's best error=0.6308\n", - "[flaml.automl: 03-30 22:00:17] {2567} INFO - iteration 30, current learner rf\n", - "[flaml.automl: 03-30 22:00:17] {2744} INFO - at 3.6s,\testimator rf's best error=0.6471,\tbest estimator xgboost's best error=0.6308\n", - "[flaml.automl: 03-30 22:00:17] {2567} INFO - iteration 31, current learner rf\n", - "[flaml.automl: 03-30 22:00:17] {2744} INFO - at 3.6s,\testimator rf's best error=0.6471,\tbest estimator xgboost's best error=0.6308\n", - "[flaml.automl: 03-30 22:00:17] {2567} INFO - iteration 32, current learner rf\n", - "[flaml.automl: 03-30 22:00:17] {2744} INFO - at 3.8s,\testimator rf's best error=0.6471,\tbest estimator xgboost's best error=0.6308\n", - "[flaml.automl: 03-30 22:00:17] {2567} INFO - iteration 33, current learner extra_tree\n", - "[flaml.automl: 03-30 22:00:17] {2744} INFO - at 3.9s,\testimator extra_tree's best error=0.6617,\tbest estimator xgboost's best error=0.6308\n", - "[flaml.automl: 03-30 22:00:17] {2567} INFO - iteration 34, current learner rf\n", - "[flaml.automl: 03-30 22:00:17] {2744} INFO - at 4.0s,\testimator rf's best error=0.6460,\tbest estimator xgboost's best error=0.6308\n", - "[flaml.automl: 03-30 22:00:17] {2567} INFO - iteration 35, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:17] {2744} INFO - at 4.1s,\testimator xgboost's best error=0.6308,\tbest estimator xgboost's best error=0.6308\n", - "[flaml.automl: 03-30 22:00:17] {2567} INFO - iteration 36, current learner extra_tree\n", - "[flaml.automl: 03-30 22:00:17] {2744} INFO - at 4.2s,\testimator extra_tree's best error=0.6527,\tbest estimator xgboost's best error=0.6308\n", - "[flaml.automl: 03-30 22:00:17] {2567} INFO - iteration 37, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:18] {2744} INFO - at 4.3s,\testimator xgboost's best error=0.6308,\tbest estimator xgboost's best error=0.6308\n", - "[flaml.automl: 03-30 22:00:18] {2567} INFO - iteration 38, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:18] {2744} INFO - at 5.1s,\testimator xgboost's best error=0.6252,\tbest estimator xgboost's best error=0.6252\n", - "[flaml.automl: 03-30 22:00:18] {2567} INFO - iteration 39, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:19] {2744} INFO - at 5.6s,\testimator xgboost's best error=0.6252,\tbest estimator xgboost's best error=0.6252\n", - "[flaml.automl: 03-30 22:00:19] {2567} INFO - iteration 40, current learner extra_tree\n", - "[flaml.automl: 03-30 22:00:19] {2744} INFO - at 5.7s,\testimator extra_tree's best error=0.6527,\tbest estimator xgboost's best error=0.6252\n", - "[flaml.automl: 03-30 22:00:19] {2567} INFO - iteration 41, current learner extra_tree\n", - "[flaml.automl: 03-30 22:00:19] {2744} INFO - at 5.8s,\testimator extra_tree's best error=0.6527,\tbest estimator xgboost's best error=0.6252\n", - "[flaml.automl: 03-30 22:00:19] {2567} INFO - iteration 42, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:19] {2744} INFO - at 6.0s,\testimator lgbm's best error=0.6335,\tbest estimator xgboost's best error=0.6252\n", - "[flaml.automl: 03-30 22:00:19] {2567} INFO - iteration 43, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:21] {2744} INFO - at 7.7s,\testimator xgboost's best error=0.6237,\tbest estimator xgboost's best error=0.6237\n", - "[flaml.automl: 03-30 22:00:21] {2567} INFO - iteration 44, current learner extra_tree\n", - "[flaml.automl: 03-30 22:00:21] {2744} INFO - at 7.9s,\testimator extra_tree's best error=0.6527,\tbest estimator xgboost's best error=0.6237\n", - "[flaml.automl: 03-30 22:00:21] {2567} INFO - iteration 45, current learner xgboost\n", - "[flaml.automl: 03-30 22:00:22] {2744} INFO - at 8.6s,\testimator xgboost's best error=0.6237,\tbest estimator xgboost's best error=0.6237\n", - "[flaml.automl: 03-30 22:00:22] {2567} INFO - iteration 46, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:22] {2744} INFO - at 8.7s,\testimator lgbm's best error=0.6335,\tbest estimator xgboost's best error=0.6237\n", - "[flaml.automl: 03-30 22:00:22] {2567} INFO - iteration 47, current learner catboost\n", - "[flaml.automl: 03-30 22:00:22] {2744} INFO - at 8.8s,\testimator catboost's best error=0.6828,\tbest estimator xgboost's best error=0.6237\n", - "[flaml.automl: 03-30 22:00:22] {2567} INFO - iteration 48, current learner catboost\n", - "[flaml.automl: 03-30 22:00:22] {2744} INFO - at 8.9s,\testimator catboost's best error=0.6828,\tbest estimator xgboost's best error=0.6237\n", - "[flaml.automl: 03-30 22:00:22] {2567} INFO - iteration 49, current learner catboost\n", - "[flaml.automl: 03-30 22:00:22] {2744} INFO - at 9.0s,\testimator catboost's best error=0.6738,\tbest estimator xgboost's best error=0.6237\n", - "[flaml.automl: 03-30 22:00:22] {2567} INFO - iteration 50, current learner catboost\n", - "[flaml.automl: 03-30 22:00:22] {2744} INFO - at 9.1s,\testimator catboost's best error=0.6738,\tbest estimator xgboost's best error=0.6237\n", - "[flaml.automl: 03-30 22:00:22] {2567} INFO - iteration 51, current learner extra_tree\n", - "[flaml.automl: 03-30 22:00:22] {2744} INFO - at 9.2s,\testimator extra_tree's best error=0.6527,\tbest estimator xgboost's best error=0.6237\n", - "[flaml.automl: 03-30 22:00:22] {2567} INFO - iteration 52, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:23] {2744} INFO - at 9.3s,\testimator lgbm's best error=0.6335,\tbest estimator xgboost's best error=0.6237\n", - "[flaml.automl: 03-30 22:00:23] {2567} INFO - iteration 53, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:23] {2744} INFO - at 9.5s,\testimator lgbm's best error=0.6335,\tbest estimator xgboost's best error=0.6237\n", - "[flaml.automl: 03-30 22:00:23] {2567} INFO - iteration 54, current learner lgbm\n", - "[flaml.automl: 03-30 22:00:23] {2744} INFO - at 10.1s,\testimator lgbm's best error=0.6335,\tbest estimator xgboost's best error=0.6237\n", - "[flaml.automl: 03-30 22:00:32] {2974} INFO - retrain xgboost for 8.8s\n", - "[flaml.automl: 03-30 22:00:32] {2981} INFO - retrained model: XGBClassifier(base_score=0.5, booster='gbtree',\n", - " colsample_bylevel=0.847756342161632, colsample_bynode=1,\n", - " colsample_bytree=0.7597930580523548, gamma=0, gpu_id=-1,\n", - " grow_policy='lossguide', importance_type='gain',\n", - " interaction_constraints='', learning_rate=0.19997653978110663,\n", - " max_delta_step=0, max_depth=0, max_leaves=39,\n", - " min_child_weight=10.070493332676804, missing=nan,\n", - " monotone_constraints='()', n_estimators=13, n_jobs=-1,\n", - " num_parallel_tree=1, random_state=0,\n", - " reg_alpha=0.02609403888821573, reg_lambda=0.19745601532140325,\n", - " scale_pos_weight=1, subsample=0.8895588746662894,\n", - " tree_method='hist', use_label_encoder=False,\n", - " validate_parameters=1, verbosity=0)\n", - "[flaml.automl: 03-30 22:00:32] {2310} INFO - fit succeeded\n", - "[flaml.automl: 03-30 22:00:32] {2311} INFO - Time taken to find the best model: 7.734541177749634\n", - "[flaml.automl: 03-30 22:00:32] {2322} WARNING - Time taken to find the best model is 77% of the provided time budget and not all estimators' hyperparameter search converged. Consider increasing the time budget.\n" + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 0.9s,\testimator lgbm's best error=0.6400,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 8, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 0.9s,\testimator lgbm's best error=0.6400,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 9, current learner xgboost\n", + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 0.9s,\testimator xgboost's best error=0.6672,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 10, current learner xgboost\n", + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 1.0s,\testimator xgboost's best error=0.6503,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 11, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 1.0s,\testimator extra_tree's best error=0.6678,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 12, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 1.1s,\testimator extra_tree's best error=0.6576,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 13, current learner rf\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n", + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 1.1s,\testimator rf's best error=0.6614,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 14, current learner rf\n", + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 1.2s,\testimator rf's best error=0.6523,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 15, current learner xgboost\n", + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 1.3s,\testimator xgboost's best error=0.6428,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 16, current learner rf\n", + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 1.3s,\testimator rf's best error=0.6523,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 17, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 1.4s,\testimator extra_tree's best error=0.6576,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 18, current learner lgbm\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 1.4s,\testimator lgbm's best error=0.6400,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 19, current learner xgboost\n", + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 1.5s,\testimator xgboost's best error=0.6428,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 20, current learner xgboost\n", + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 1.5s,\testimator xgboost's best error=0.6428,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 21, current learner xgboost\n", + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 1.6s,\testimator xgboost's best error=0.6428,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 22, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 1.6s,\testimator lgbm's best error=0.6400,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 23, current learner lgbm\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n", + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n", + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:32:00] {2364} INFO - at 1.7s,\testimator lgbm's best error=0.6400,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:00] {2191} INFO - iteration 24, current learner xgboost\n", + "[flaml.automl.logger: 04-28 02:32:01] {2364} INFO - at 1.8s,\testimator xgboost's best error=0.6428,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:01] {2191} INFO - iteration 25, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:32:01] {2364} INFO - at 1.8s,\testimator extra_tree's best error=0.6576,\tbest estimator lgbm's best error=0.6400\n", + "[flaml.automl.logger: 04-28 02:32:01] {2191} INFO - iteration 26, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:32:01] {2364} INFO - at 1.9s,\testimator lgbm's best error=0.6335,\tbest estimator lgbm's best error=0.6335\n", + "[flaml.automl.logger: 04-28 02:32:01] {2191} INFO - iteration 27, current learner xgboost\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n", + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:32:01] {2364} INFO - at 2.0s,\testimator xgboost's best error=0.6423,\tbest estimator lgbm's best error=0.6335\n", + "[flaml.automl.logger: 04-28 02:32:01] {2191} INFO - iteration 28, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:32:01] {2364} INFO - at 2.0s,\testimator extra_tree's best error=0.6480,\tbest estimator lgbm's best error=0.6335\n", + "[flaml.automl.logger: 04-28 02:32:01] {2191} INFO - iteration 29, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:32:01] {2364} INFO - at 2.1s,\testimator extra_tree's best error=0.6480,\tbest estimator lgbm's best error=0.6335\n", + "[flaml.automl.logger: 04-28 02:32:01] {2191} INFO - iteration 30, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:32:01] {2364} INFO - at 2.2s,\testimator lgbm's best error=0.6335,\tbest estimator lgbm's best error=0.6335\n", + "[flaml.automl.logger: 04-28 02:32:01] {2191} INFO - iteration 31, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:32:01] {2364} INFO - at 2.2s,\testimator lgbm's best error=0.6335,\tbest estimator lgbm's best error=0.6335\n", + "[flaml.automl.logger: 04-28 02:32:01] {2191} INFO - iteration 32, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:32:01] {2364} INFO - at 2.3s,\testimator lgbm's best error=0.6335,\tbest estimator lgbm's best error=0.6335\n", + "[flaml.automl.logger: 04-28 02:32:01] {2191} INFO - iteration 33, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:32:01] {2364} INFO - at 2.4s,\testimator extra_tree's best error=0.6480,\tbest estimator lgbm's best error=0.6335\n", + "[flaml.automl.logger: 04-28 02:32:01] {2191} INFO - iteration 34, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:32:01] {2364} INFO - at 2.5s,\testimator lgbm's best error=0.6335,\tbest estimator lgbm's best error=0.6335\n", + "[flaml.automl.logger: 04-28 02:32:01] {2191} INFO - iteration 35, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:32:02] {2364} INFO - at 3.0s,\testimator lgbm's best error=0.6328,\tbest estimator lgbm's best error=0.6328\n", + "[flaml.automl.logger: 04-28 02:32:02] {2191} INFO - iteration 36, current learner extra_tree\n", + "[flaml.automl.logger: 04-28 02:32:02] {2364} INFO - at 3.1s,\testimator extra_tree's best error=0.6479,\tbest estimator lgbm's best error=0.6328\n", + "[flaml.automl.logger: 04-28 02:32:02] {2191} INFO - iteration 37, current learner rf\n", + "[flaml.automl.logger: 04-28 02:32:02] {2364} INFO - at 3.1s,\testimator rf's best error=0.6523,\tbest estimator lgbm's best error=0.6328\n", + "[flaml.automl.logger: 04-28 02:32:02] {2191} INFO - iteration 38, current learner catboost\n", + "[flaml.automl.logger: 04-28 02:32:02] {2364} INFO - at 3.3s,\testimator catboost's best error=0.6598,\tbest estimator lgbm's best error=0.6328\n", + "[flaml.automl.logger: 04-28 02:32:02] {2191} INFO - iteration 39, current learner catboost\n", + "[flaml.automl.logger: 04-28 02:32:02] {2364} INFO - at 3.5s,\testimator catboost's best error=0.6598,\tbest estimator lgbm's best error=0.6328\n", + "[flaml.automl.logger: 04-28 02:32:02] {2191} INFO - iteration 40, current learner catboost\n", + "[flaml.automl.logger: 04-28 02:32:02] {2364} INFO - at 3.6s,\testimator catboost's best error=0.6459,\tbest estimator lgbm's best error=0.6328\n", + "[flaml.automl.logger: 04-28 02:32:02] {2191} INFO - iteration 41, current learner catboost\n", + "[flaml.automl.logger: 04-28 02:32:03] {2364} INFO - at 3.8s,\testimator catboost's best error=0.6459,\tbest estimator lgbm's best error=0.6328\n", + "[flaml.automl.logger: 04-28 02:32:03] {2191} INFO - iteration 42, current learner catboost\n", + "[flaml.automl.logger: 04-28 02:32:03] {2364} INFO - at 4.0s,\testimator catboost's best error=0.6459,\tbest estimator lgbm's best error=0.6328\n", + "[flaml.automl.logger: 04-28 02:32:03] {2191} INFO - iteration 43, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:32:03] {2364} INFO - at 4.4s,\testimator lgbm's best error=0.6241,\tbest estimator lgbm's best error=0.6241\n", + "[flaml.automl.logger: 04-28 02:32:03] {2191} INFO - iteration 44, current learner rf\n", + "[flaml.automl.logger: 04-28 02:32:03] {2364} INFO - at 4.5s,\testimator rf's best error=0.6470,\tbest estimator lgbm's best error=0.6241\n", + "[flaml.automl.logger: 04-28 02:32:03] {2191} INFO - iteration 45, current learner xgboost\n", + "[flaml.automl.logger: 04-28 02:32:03] {2364} INFO - at 4.5s,\testimator xgboost's best error=0.6423,\tbest estimator lgbm's best error=0.6241\n", + "[flaml.automl.logger: 04-28 02:32:03] {2191} INFO - iteration 46, current learner rf\n", + "[flaml.automl.logger: 04-28 02:32:03] {2364} INFO - at 4.6s,\testimator rf's best error=0.6468,\tbest estimator lgbm's best error=0.6241\n", + "[flaml.automl.logger: 04-28 02:32:03] {2191} INFO - iteration 47, current learner lgbm\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:32:04] {2364} INFO - at 5.0s,\testimator lgbm's best error=0.6241,\tbest estimator lgbm's best error=0.6241\n", + "[flaml.automl.logger: 04-28 02:32:04] {2191} INFO - iteration 48, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:32:04] {2364} INFO - at 5.4s,\testimator lgbm's best error=0.6206,\tbest estimator lgbm's best error=0.6206\n", + "[flaml.automl.logger: 04-28 02:32:04] {2191} INFO - iteration 49, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:32:05] {2364} INFO - at 5.8s,\testimator lgbm's best error=0.6206,\tbest estimator lgbm's best error=0.6206\n", + "[flaml.automl.logger: 04-28 02:32:05] {2191} INFO - iteration 50, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:32:05] {2364} INFO - at 6.2s,\testimator lgbm's best error=0.6206,\tbest estimator lgbm's best error=0.6206\n", + "[flaml.automl.logger: 04-28 02:32:05] {2191} INFO - iteration 51, current learner catboost\n", + "[flaml.automl.logger: 04-28 02:32:05] {2364} INFO - at 6.5s,\testimator catboost's best error=0.6459,\tbest estimator lgbm's best error=0.6206\n", + "[flaml.automl.logger: 04-28 02:32:05] {2191} INFO - iteration 52, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:32:06] {2364} INFO - at 7.1s,\testimator lgbm's best error=0.6185,\tbest estimator lgbm's best error=0.6185\n", + "[flaml.automl.logger: 04-28 02:32:06] {2191} INFO - iteration 53, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:32:07] {2364} INFO - at 8.0s,\testimator lgbm's best error=0.6156,\tbest estimator lgbm's best error=0.6156\n", + "[flaml.automl.logger: 04-28 02:32:07] {2191} INFO - iteration 54, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:32:08] {2364} INFO - at 8.8s,\testimator lgbm's best error=0.6156,\tbest estimator lgbm's best error=0.6156\n", + "[flaml.automl.logger: 04-28 02:32:08] {2191} INFO - iteration 55, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:32:09] {2364} INFO - at 9.8s,\testimator lgbm's best error=0.6156,\tbest estimator lgbm's best error=0.6156\n", + "[flaml.automl.logger: 04-28 02:32:09] {2191} INFO - iteration 56, current learner xgb_limitdepth\n", + "[flaml.automl.logger: 04-28 02:32:09] {2364} INFO - at 9.8s,\testimator xgb_limitdepth's best error=0.6682,\tbest estimator lgbm's best error=0.6156\n", + "[flaml.automl.logger: 04-28 02:32:09] {2191} INFO - iteration 57, current learner xgb_limitdepth\n", + "[flaml.automl.logger: 04-28 02:32:09] {2364} INFO - at 9.9s,\testimator xgb_limitdepth's best error=0.6682,\tbest estimator lgbm's best error=0.6156\n", + "[flaml.automl.logger: 04-28 02:32:09] {2191} INFO - iteration 58, current learner xgb_limitdepth\n", + "[flaml.automl.logger: 04-28 02:32:09] {2364} INFO - at 9.9s,\testimator xgb_limitdepth's best error=0.6542,\tbest estimator lgbm's best error=0.6156\n", + "[flaml.automl.logger: 04-28 02:32:09] {2191} INFO - iteration 59, current learner xgb_limitdepth\n", + "[flaml.automl.logger: 04-28 02:32:09] {2364} INFO - at 10.0s,\testimator xgb_limitdepth's best error=0.6496,\tbest estimator lgbm's best error=0.6156\n", + "[flaml.automl.logger: 04-28 02:32:09] {2191} INFO - iteration 60, current learner lrl1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n", + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n", + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n", + "/home/vscode/.local/lib/python3.9/site-packages/xgboost/sklearn.py:1395: UserWarning: `use_label_encoder` is deprecated in 1.7.0.\n", + " warnings.warn(\"`use_label_encoder` is deprecated in 1.7.0.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:32:09] {2364} INFO - at 10.1s,\testimator lrl1's best error=0.6817,\tbest estimator lgbm's best error=0.6156\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:32:10] {2600} INFO - retrain lgbm for 0.7s\n", + "[flaml.automl.logger: 04-28 02:32:10] {2603} INFO - retrained model: LGBMClassifier(colsample_bytree=0.9031374907114736,\n", + " learning_rate=0.3525398690474661, max_bin=1023,\n", + " min_child_samples=4, n_estimators=22, num_leaves=69,\n", + " reg_alpha=0.0060777294606297145, reg_lambda=37.65858370595088,\n", + " verbose=-1)\n", + "[flaml.automl.logger: 04-28 02:32:10] {1911} INFO - fit succeeded\n", + "[flaml.automl.logger: 04-28 02:32:10] {1912} INFO - Time taken to find the best model: 8.02491545677185\n" ] } ], @@ -1292,7 +2105,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16 (main, Dec 8 2022, 02:40:11) \n[GCC 10.2.1 20210110]" + "version": "3.9.15" }, "vscode": { "interpreter": { diff --git a/notebook/automl_lightgbm.ipynb b/notebook/automl_lightgbm.ipynb index 410912cd5..e8c7abe02 100644 --- a/notebook/automl_lightgbm.ipynb +++ b/notebook/automl_lightgbm.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": { "slideshow": { @@ -27,9 +28,9 @@ "\n", "In this notebook, we demonstrate how to use FLAML library to tune hyperparameters of LightGBM with a regression example.\n", "\n", - "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the `notebook` option:\n", + "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the `automl` option (this option is introduced from version 2, for version 1 it is installed by default):\n", "```bash\n", - "pip install flaml[notebook]\n", + "pip install flaml[automl]\n", "```" ] }, @@ -39,7 +40,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install flaml[notebook]==1.0.10" + "%pip install flaml[automl] matplotlib openml" ] }, { @@ -786,11 +787,6 @@ "model = lgb.train(params, dtrain, valid_sets=[dtrain, dval], verbose_eval=10000) \n" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, { "cell_type": "code", "execution_count": 20, diff --git a/notebook/automl_nlp.ipynb b/notebook/automl_nlp.ipynb index f3bea1e42..d46d3493f 100644 --- a/notebook/automl_nlp.ipynb +++ b/notebook/automl_nlp.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "43f7-wG-Tjg_" @@ -20,9 +21,9 @@ "\n", "In this notebook, we demonstrate how to use the FLAML library to fine tune an NLP language model with hyperparameter search. We will use [flaml.tune](https://microsoft.github.io/FLAML/docs/Use-Cases/Tune-User-Defined-Function) with the built in GPU in colab for the tuning. However, if you have a machine with more than 1 GPU, you can also use FLAML's [parallel tuning](https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#parallel-tuning) with the ray tune option. \n", "\n", - "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the `nlp,notebook` and `blendsearch` option:\n", + "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the `[automl,hf,blendsearch]` option:\n", "```bash\n", - "pip install flaml[nlp,notebook,blendsearch]; \n", + "pip install flaml[automl,hf,blendsearch]; \n", "```" ] }, @@ -248,7 +249,7 @@ } ], "source": [ - "%pip install flaml[notebook,blendsearch,ray]\n", + "%pip install flaml[automl,hf,blendsearch]\n", "import flaml\n", "flaml.__version__" ] @@ -2130,7 +2131,7 @@ }, { "data": { - "image/png": "\n", + "image/png": "", "text/plain": [ "
" ] @@ -2187,8 +2188,8 @@ "Downloading...\n", "From: https://drive.google.com/uc?id=1Jk-_Vg_SxOUDfFVzF7S85oBasY8fFvOY\n", "To: /content/spooky-author-identification.csv\n", - "\r", - " 0% 0.00/3.30M [00:00" ] @@ -4088,7 +4088,7 @@ }, { "data": { - "image/png": "\n", + "image/png": "", "text/plain": [ "
" ] @@ -5125,7 +5125,7 @@ }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlEAAAHHCAYAAACfqw0dAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABSyklEQVR4nO3de1yUVeI/8M8MyHBzBpSbKBeR9YKKIgiSGWokmuElN81VQXPbdLVUypSvG+TWLmyxid3UdNcsM81NLd0EDaS8sKIgJpBapmnIRRcZEOQ2c35/+ONZJ0DhERgYP+/Xa14v5zzneZ5zBpj5eJ7znFEIIQSIiIiIqEWUxm4AERERUWfEEEVEREQkA0MUERERkQwMUUREREQyMEQRERERycAQRURERCQDQxQRERGRDAxRRERERDIwRBERERHJwBBFRA80T09PzJ0719jNIKJOiCGKiO7bhx9+CIVCgZMnTxq7KZ1OVVUV1qxZg6CgIGg0GlhaWqJv375YvHgxzp8/b+zmEdFdmBu7AURExnTu3Dkolcb5/+T169cxfvx4ZGZm4oknnsDvfvc72Nra4ty5c9i+fTs++OAD1NTUGKVtRHRvDFFEZDLq6uqg1+thYWHR7H1UKlUbtuju5s6di1OnTuFf//oXpk2bZrDttddew6pVq1rlPHJeFyK6N17OI6J2k5+fj2eeeQbOzs5QqVQYOHAg/vnPfxrUqampQUxMDPz9/aHRaGBjY4NRo0bh0KFDBvUuXboEhUKBhIQEJCYmok+fPlCpVMjLy8Orr74KhUKBH3/8EXPnzoWdnR00Gg3mzZuHyspKg+P8ek5U/aXJo0ePIioqCo6OjrCxscHUqVNx7do1g331ej1effVVuLq6wtraGmPGjEFeXl6z5lkdP34c//73vzF//vwGAQq4He4SEhKk56NHj8bo0aMb1Js7dy48PT3v+bqcOnUK5ubmWL16dYNjnDt3DgqFAu+++65UVlpaiqVLl8LNzQ0qlQre3t7429/+Br1ef9d+ET1IOBJFRO2iqKgII0aMgEKhwOLFi+Ho6Ij9+/dj/vz5KCsrw9KlSwEAZWVl2LRpE2bOnIlnn30W5eXl+Mc//oGwsDBkZGRg6NChBsfdvHkzqqqq8Ic//AEqlQrdunWTtk2fPh29e/dGXFwcsrKysGnTJjg5OeFvf/vbPdv7/PPPw97eHrGxsbh06RISExOxePFi7NixQ6oTHR2NN954A+Hh4QgLC8Pp06cRFhaGqqqqex7/yy+/BADMmTOnGa9ey/36denRowdCQkLw2WefITY21qDujh07YGZmhqeeegoAUFlZiZCQEOTn5+O5556Du7s7jh07hujoaBQUFCAxMbFN2kzU6Qgiovu0efNmAUCcOHGiyTrz588XPXr0ENevXzcof/rpp4VGoxGVlZVCCCHq6upEdXW1QZ0bN24IZ2dn8cwzz0hlFy9eFACEWq0WxcXFBvVjY2MFAIP6QggxdepU0b17d4MyDw8PERkZ2aAvoaGhQq/XS+XLli0TZmZmorS0VAghRGFhoTA3NxdTpkwxON6rr74qABgcszFTp04VAMSNGzfuWq9eSEiICAkJaVAeGRkpPDw8pOd3e102bNggAIgzZ84YlPv4+IixY8dKz1977TVhY2Mjzp8/b1Bv5cqVwszMTFy+fLlZbSYydbycR0RtTgiBzz//HOHh4RBC4Pr169IjLCwMWq0WWVlZAAAzMzNp7o5er0dJSQnq6uoQEBAg1bnTtGnT4Ojo2Oh5FyxYYPB81KhR+O9//4uysrJ7tvkPf/gDFAqFwb46nQ4///wzACAlJQV1dXX44x//aLDf888/f89jA5Da0LVr12bVb6nGXpcnn3wS5ubmBqNpOTk5yMvLw4wZM6SynTt3YtSoUbC3tzf4WYWGhkKn0+Hbb79tkzYTdTa8nEdEbe7atWsoLS3FBx98gA8++KDROsXFxdK/t2zZgr///e84e/YsamtrpfLevXs32K+xsnru7u4Gz+3t7QEAN27cgFqtvmub77YvAClMeXt7G9Tr1q2bVPdu6s9fXl4OOzu7e9ZvqcZeFwcHBzz66KP47LPP8NprrwG4fSnP3NwcTz75pFTvhx9+wHfffddkOL3zZ0X0IGOIIqI2Vz8Zefbs2YiMjGy0jq+vLwBg69atmDt3LqZMmYLly5fDyckJZmZmiIuLw4ULFxrsZ2Vl1eR5zczMGi0XQtyzzfezb3P0798fAHDmzBmMGjXqnvUVCkWj59bpdI3Wb+p1efrppzFv3jxkZ2dj6NCh+Oyzz/Doo4/CwcFBqqPX6/HYY4/h5ZdfbvQYffv2vWd7iR4EDFFE1OYcHR3RtWtX6HQ6hIaG3rXuv/71L3h5eWHXrl0Gl9N+PRna2Dw8PAAAP/74o8Goz3//+19ptOpuwsPDERcXh61btzYrRNnb2+Onn35qUF4/ItZcU6ZMwXPPPSdd0jt//jyio6MN6vTp0wc3b96858+K6EHHOVFE1ObMzMwwbdo0fP7558jJyWmw/c6lA+pHgO4cdTl+/DjS09PbvqEt8Oijj8Lc3Bzr1q0zKL9zmYC7CQ4Oxvjx47Fp0ybs2bOnwfaamhq89NJL0vM+ffrg7NmzBq/V6dOncfTo0Ra1287ODmFhYfjss8+wfft2WFhYYMqUKQZ1pk+fjvT0dCQnJzfYv7S0FHV1dS06J5Gp4kgUEbWaf/7zn0hKSmpQvmTJEsTHx+PQoUMICgrCs88+Cx8fH5SUlCArKwtff/01SkpKAABPPPEEdu3ahalTp2LixIm4ePEi1q9fDx8fH9y8ebO9u9QkZ2dnLFmyBH//+98xadIkjB8/HqdPn8b+/fvh4OBgMIrWlI8++gjjxo3Dk08+ifDwcDz66KOwsbHBDz/8gO3bt6OgoEBaK+qZZ57BW2+9hbCwMMyfPx/FxcVYv349Bg4c2KyJ8neaMWMGZs+ejffffx9hYWEN5mQtX74cX375JZ544gnMnTsX/v7+qKiowJkzZ/Cvf/0Lly5dMrj8R/SgYogiolbz61GZenPnzkWvXr2QkZGBP//5z9i1axfef/99dO/eHQMHDjRYt2nu3LkoLCzEhg0bkJycDB8fH2zduhU7d+5EWlpaO/Wkef72t7/B2toaGzduxNdff43g4GAcOHAADz/8MCwtLe+5v6OjI44dO4b3338fO3bswKpVq1BTUwMPDw9MmjQJS5YskeoOGDAAH330EWJiYhAVFQUfHx98/PHH2LZtW4tfl0mTJsHKygrl5eUGd+XVs7a2xjfffIO//vWv2LlzJz766COo1Wr07dsXq1evhkajadH5iEyVQrTWLEkiIkJpaSns7e3x+uuvt9rXthBRx8Q5UUREMt26datBWf1q3o19RQsRmRZeziMikmnHjh348MMP8fjjj8PW1hZHjhzBp59+inHjxmHkyJHGbh4RtTGGKCIimXx9fWFubo433ngDZWVl0mTz119/3dhNI6J2wDlRRERERDJwThQRERGRDAxRRERERDJwTlQb0uv1uHr1Krp27dqshfeIiIjI+IQQKC8vh6urK5TKpsebGKLa0NWrV+Hm5mbsZhAREZEMV65cQa9evZrczhDVhrp27Qrg9g9BrVYbuTVERETUHGVlZXBzc5M+x5vCENWG6i/hqdVqhigiIqJO5l5TcTixnIiIiEgGhigiIiIiGRiiiIiIiGRgiCIiIiKSgSGKiIiISAaGKCIiIiIZGKKIiIiIZGCIIiIiIpKBIYqIiIhIBq5YTkREZEJ0eoGMiyUoLq+CU1dLBPbuBjPl3VfeJnkYooiIiExEUk4BVu/NQ4G2SirrobFEbLgPxg/qYcSWmSZeziMiIjIBSTkFWLg1yyBAAUChtgoLt2YhKafASC0zXRyJIiIi6uR0eoHVe/MgGtlWXxb7ZS5GejuY3KU9qy5m9/yi4LbCEEVERNTJZVwsaTAC9WtFZdUY/OqBdmpR+8n7cxisLYwTZ3g5j4iIqJMrLr97gKK2wZEoIiKiTs6pq2Wz6m2eOxxBXt3auDXty6qLmdHOzRBFRETUyQX27oYeGksUaqsanRelAOCiscQjfR1Nbk6UMfFyHhERUSdnplQgNtyn0W31kSk23IcBqpUxRBEREZmA8YN6YN3sYXBWqwzKXTSWWDd7GNeJagO8nEdERGQixg/qgZHeDtJdeJvnDuclvDbEkSgiIiITcmdgCvLiV760JYYoIiIiIhkYooiIiIhkYIgiIiIikoEhioiIiEgGhigiIiIiGRiiiIiIiGRgiCIiIiKSgSGKiIiISAaGKCIiIiIZGKKIiIiIZGCIIiIiIpKBIYqIiIhIBoYoIiIiIhkYooiIiIhk6BAh6r333oOnpycsLS0RFBSEjIyMJuvm5uZi2rRp8PT0hEKhQGJi4l2PHR8fD4VCgaVLlxqUV1VVYdGiRejevTtsbW0xbdo0FBUVGdS5fPkyJk6cCGtrazg5OWH58uWoq6uT200iIiIyIUYPUTt27EBUVBRiY2ORlZWFIUOGICwsDMXFxY3Wr6yshJeXF+Lj4+Hi4nLXY584cQIbNmyAr69vg23Lli3D3r17sXPnTnzzzTe4evUqnnzySWm7TqfDxIkTUVNTg2PHjmHLli348MMPERMTc38dJiIiItMgjCwwMFAsWrRIeq7T6YSrq6uIi4u7574eHh5izZo1jW4rLy8Xv/nNb8TBgwdFSEiIWLJkibSttLRUdOnSRezcuVMq+/777wUAkZ6eLoQQ4quvvhJKpVIUFhZKddatWyfUarWorq5uVt+0Wq0AILRabbPqExER3a+K6lrhsWKf8FixT1RU1xq7OZ1Scz+/jToSVVNTg8zMTISGhkplSqUSoaGhSE9Pv69jL1q0CBMnTjQ4dr3MzEzU1tYabOvfvz/c3d2l86anp2Pw4MFwdnaW6oSFhaGsrAy5ubn31TYiIiLq/MyNefLr169Dp9MZBBUAcHZ2xtmzZ2Ufd/v27cjKysKJEyca3V5YWAgLCwvY2dk1OG9hYaFUp7F21W9rTHV1Naqrq6XnZWVlcrtAREREHZzR50S1titXrmDJkiX45JNPYGlp2a7njouLg0ajkR5ubm7ten4iIiJqP0YNUQ4ODjAzM2twV1xRUdE9J403JTMzE8XFxRg2bBjMzc1hbm6Ob775Bm+//TbMzc2h0+ng4uKCmpoalJaWNnleFxeXRttVv60x0dHR0Gq10uPKlSuy+kBEREQdn1FDlIWFBfz9/ZGSkiKV6fV6pKSkIDg4WNYxH330UZw5cwbZ2dnSIyAgALNmzUJ2djbMzMzg7++PLl26GJz33LlzuHz5snTe4OBgnDlzxuAuwYMHD0KtVsPHx6fRc6tUKqjVaoMHERERmSajzokCgKioKERGRiIgIACBgYFITExERUUF5s2bBwCIiIhAz549ERcXB+D2ZPS8vDzp3/n5+cjOzoatrS28vb3RtWtXDBo0yOAcNjY26N69u1Su0Wgwf/58REVFoVu3blCr1Xj++ecRHByMESNGAADGjRsHHx8fzJkzB2+88QYKCwvxpz/9CYsWLYJKpWqvl4eIiIg6KKOHqBkzZuDatWuIiYlBYWEhhg4diqSkJGkS9+XLl6FU/m/A7OrVq/Dz85OeJyQkICEhASEhIUhLS2v2edesWQOlUolp06ahuroaYWFheP/996XtZmZm2LdvHxYuXIjg4GDY2NggMjISf/7zn++/00RERNTpKYQQwtiNMFVlZWXQaDTQarW8tEdERO2isqYOPjHJAIC8P4fB2sLo4yWdTnM/v03u7jwiIiKi9sAQRURERCQDQxQRERGRDAxRRERERDIwRBERERHJwBBFREREJANDFBEREZEMDFFEREREMjBEEREREcnAEEVEREQkA0MUERERkQwMUUREREQyMEQRERERycAQRURERCQDQxQRERGRDAxRRERERDIwRBERERHJwBBFREREJANDFBEREZEMDFFEREREMjBEEREREcnAEEVEREQkA0MUERERkQwMUUREREQyMEQRERERycAQRURERCQDQxQRERGRDAxRRERERDIwRBERERHJwBBFREREJANDFBEREZEMDFFEREREMjBEEREREcnAEEVEREQkA0MUERERkQwMUUREREQyMEQRERERycAQRURERCQDQxQRERGRDEYPUe+99x48PT1haWmJoKAgZGRkNFk3NzcX06ZNg6enJxQKBRITExvUWbduHXx9faFWq6FWqxEcHIz9+/cb1Llw4QKmTp0KR0dHqNVqTJ8+HUVFRQZ1zp8/j8mTJ8PBwQFqtRoPP/wwDh061Cp9JiIios7PqCFqx44diIqKQmxsLLKysjBkyBCEhYWhuLi40fqVlZXw8vJCfHw8XFxcGq3Tq1cvxMfHIzMzEydPnsTYsWMxefJk5ObmAgAqKiowbtw4KBQKpKam4ujRo6ipqUF4eDj0er10nCeeeAJ1dXVITU1FZmYmhgwZgieeeAKFhYWt/0IQERFR5yOMKDAwUCxatEh6rtPphKurq4iLi7vnvh4eHmLNmjXNOo+9vb3YtGmTEEKI5ORkoVQqhVarlbaXlpYKhUIhDh48KIQQ4tq1awKA+Pbbb6U6ZWVlAoBUpzm0Wq0AYHAuIiKitlRRXSs8VuwTHiv2iYrqWmM3p1Nq7ue30UaiampqkJmZidDQUKlMqVQiNDQU6enprXIOnU6H7du3o6KiAsHBwQCA6upqKBQKqFQqqZ6lpSWUSiWOHDkCAOjevTv69euHjz76CBUVFairq8OGDRvg5OQEf3//VmkbERERdW7mxjrx9evXodPp4OzsbFDu7OyMs2fP3texz5w5g+DgYFRVVcHW1ha7d++Gj48PAGDEiBGwsbHBihUr8Ne//hVCCKxcuRI6nQ4FBQUAAIVCga+//hpTpkxB165doVQq4eTkhKSkJNjb2zd53urqalRXV0vPy8rK7qsfRERE1HEZfWJ5W+jXrx+ys7Nx/PhxLFy4EJGRkcjLywMAODo6YufOndi7dy9sbW2h0WhQWlqKYcOGQam8/XIIIbBo0SI4OTnh8OHDyMjIwJQpUxAeHi4FrcbExcVBo9FIDzc3t3bpLxEREbU/o41EOTg4wMzMrMFdcUVFRU1OGm8uCwsLeHt7AwD8/f1x4sQJrF27Fhs2bAAAjBs3DhcuXMD169dhbm4OOzs7uLi4wMvLCwCQmpqKffv24caNG1Cr1QCA999/HwcPHsSWLVuwcuXKRs8bHR2NqKgo6XlZWRmDFBERkYky2kiUhYUF/P39kZKSIpXp9XqkpKRI85dai16vN7jMVs/BwQF2dnZITU1FcXExJk2aBOD2XYAApJGpekql0uAOvl9TqVTS0gr1DyIiIjJNRhuJAoCoqChERkYiICAAgYGBSExMREVFBebNmwcAiIiIQM+ePREXFwfg9mT0+styNTU1yM/PR3Z2NmxtbaWRp+joaEyYMAHu7u4oLy/Htm3bkJaWhuTkZOm8mzdvxoABA+Do6Ij09HQsWbIEy5YtQ79+/QAAwcHBsLe3R2RkJGJiYmBlZYWNGzfi4sWLmDhxYnu+RERERNRBGTVEzZgxA9euXUNMTAwKCwsxdOhQJCUlSZPNL1++bDAadPXqVfj5+UnPExISkJCQgJCQEKSlpQEAiouLERERgYKCAmg0Gvj6+iI5ORmPPfaYtN+5c+cQHR2NkpISeHp6YtWqVVi2bJm03cHBAUlJSVi1ahXGjh2L2tpaDBw4EF988QWGDBnSxq8KERERdQYKIYQwdiNMVVlZGTQaDbRaLS/tERFRu6isqYNPzO2rL3l/DoO1hVHHSzql5n5+m+TdeURERERtjSGKiIiISAaGKCIiIiIZGKKIiIiIZGCIIiIiIpKBIYqIiIhIBoYoIiIiIhkYooiIiIhkYIgiIiIikoEhioiIiEgGhigiIiIiGRiiiIiIiGRgiCIiIiKSgSGKiIiISAaGKCIiIiIZGKKIiIiIZGCIIiIiIpKBIYqIiIhIBoYoIiIiIhkYooiIiIhkYIgiIiIikoEhioiIiEgGhigiIiIiGRiiiIiIiGRgiCIiIiKSgSGKiIiISAaGKCIiIiIZGKKIiIiIZGCIIiIiIpKBIYqIiIhIBoYoIiIiIhkYooiIiIhkYIgiIiIikoEhioiIiEgGhigiIiIiGRiiiIiIiGRgiCIiIiKSgSGKiIiISAaGKCIiIiIZjB6i3nvvPXh6esLS0hJBQUHIyMhosm5ubi6mTZsGT09PKBQKJCYmNqizbt06+Pr6Qq1WQ61WIzg4GPv37zeoc+HCBUydOhWOjo5Qq9WYPn06ioqKGhzr3//+N4KCgmBlZQV7e3tMmTLlfrtLREREJsKoIWrHjh2IiopCbGwssrKyMGTIEISFhaG4uLjR+pWVlfDy8kJ8fDxcXFwardOrVy/Ex8cjMzMTJ0+exNixYzF58mTk5uYCACoqKjBu3DgoFAqkpqbi6NGjqKmpQXh4OPR6vXSczz//HHPmzMG8efNw+vRpHD16FL/73e9a/0UgIiKiTkkhhBDGOnlQUBCGDx+Od999FwCg1+vh5uaG559/HitXrrzrvp6enli6dCmWLl16z/N069YNb775JubPn48DBw5gwoQJuHHjBtRqNQBAq9XC3t4eBw4cQGhoKOrq6uDp6YnVq1dj/vz5svtXVlYGjUYDrVYrnYuIiKgtVdbUwScmGQCQ9+cwWFuYG7lFnU9zP7+NNhJVU1ODzMxMhIaG/q8xSiVCQ0ORnp7eKufQ6XTYvn07KioqEBwcDACorq6GQqGASqWS6llaWkKpVOLIkSMAgKysLOTn50OpVMLPzw89evTAhAkTkJOT0yrtIiIios7PaCHq+vXr0Ol0cHZ2Nih3dnZGYWHhfR37zJkzsLW1hUqlwoIFC7B79274+PgAAEaMGAEbGxusWLEClZWVqKiowEsvvQSdToeCggIAwE8//QQAePXVV/GnP/0J+/btg729PUaPHo2SkpImz1tdXY2ysjKDBxEREZkmo08sbwv9+vVDdnY2jh8/joULFyIyMhJ5eXkAAEdHR+zcuRN79+6Fra0tNBoNSktLMWzYMCiVt1+O+rlRq1atwrRp0+Dv74/NmzdDoVBg586dTZ43Li4OGo1Geri5ubV9Z4mIiMgojHah1MHBAWZmZg3uiisqKmpy0nhzWVhYwNvbGwDg7++PEydOYO3atdiwYQMAYNy4cbhw4QKuX78Oc3Nz2NnZwcXFBV5eXgCAHj16AIA0egUAKpUKXl5euHz5cpPnjY6ORlRUlPS8rKyMQYqIiMhEGW0kysLCAv7+/khJSZHK9Ho9UlJSpPlLrUWv16O6urpBuYODA+zs7JCamori4mJMmjQJwO3gpVKpcO7cOalubW0tLl26BA8PjybPo1KppKUV6h9ERERkmow6ZT8qKgqRkZEICAhAYGAgEhMTUVFRgXnz5gEAIiIi0LNnT8TFxQG4PRm9/rJcTU0N8vPzkZ2dDVtbW2nkKTo6GhMmTIC7uzvKy8uxbds2pKWlITk5WTrv5s2bMWDAADg6OiI9PR1LlizBsmXL0K9fPwCAWq3GggULEBsbCzc3N3h4eODNN98EADz11FPt9voQERFRx2XUEDVjxgxcu3YNMTExKCwsxNChQ5GUlCRNNr98+bI0TwkArl69Cj8/P+l5QkICEhISEBISgrS0NABAcXExIiIiUFBQAI1GA19fXyQnJ+Oxxx6T9jt37hyio6NRUlICT09PrFq1CsuWLTNo25tvvglzc3PMmTMHt27dQlBQEFJTU2Fvb9+GrwgRERF1FkZdJ8rUcZ0oIiJqb1wn6v51+HWiiIiIiDozhigiIiIiGRiiiIiIiGRgiCIiIiKSgSGKiIiISAaGKCIiIiIZGKKIiIiIZGCIIiIiIpKBIYqIiIhIBoYoIiIiIhlaLURduXIFzzzzTGsdjoiIiKhDa7UQVVJSgi1btrTW4YiIiIg6tGZ/K+GXX3551+0//fTTfTeGiIiIqLNodoiaMmUKFAoFhBBN1lEoFK3SKCIiIqKOrtmX83r06IFdu3ZBr9c3+sjKymrLdhIRERF1KM0OUf7+/sjMzGxy+71GqYiIiIhMSbMv5y1fvhwVFRVNbvf29sahQ4dapVFEREREHV2zQ9SoUaPuut3GxgYhISH33SAiIiKizoCLbRIRERHJwBBFREREJEOzL+cRET1odHqBjIslKC6vglNXSwT27gYzJZdyITK2jvK3yRBFRNSIpJwCrN6bhwJtlVTWQ2OJ2HAfjB/Uw4gtI3qwdaS/TV7OIyL6laScAizcmmXwJg0AhdoqLNyahaScAiO1jOjB1tH+NmWNRH388cdYv349Ll68iPT0dHh4eCAxMRG9e/fG5MmTW7uNRETtRqcXWL03D42teldfFvtlLkZ6O/DSHnVIlTU6YzehTdzrb1MBYPXePDzm49Juf5stDlHr1q1DTEwMli5dir/85S/Q6W7/sOzs7JCYmMgQRUSdWsbFkgb/y/21orJqDH71QDu1iIiAe/9tCgAF2ipkXCxBcJ/u7dKmFl/Oe+edd7Bx40asWrUKZmZmUnlAQADOnDnTqo0jImpvxeV3D1BEnUWAhz2supjdu2In0dy/zfb8G27xSNTFixfh5+fXoFylUt11RXMios7Aqatls+ptnjscQV7d2rg1RPJZdTGDQmE6l5yb+7fZ3HqtocUhqnfv3sjOzoaHh4dBeVJSEgYMGNBqDSMiMobA3t3QQ2OJQm1Vo3MvFABcNJZ4pK8j50QRtaPm/m0G9m6//9y0+HJeVFQUFi1ahB07dkAIgYyMDPzlL39BdHQ0Xn755bZoIxFRuzFTKhAb7tPotvrIFBvuwwBF1M7u/Nv89V+fsf42FUKIxgLdXX3yySd49dVXceHCBQCAq6srVq9ejfnz57d6AzuzsrIyaDQaaLVaqNVqYzeHiFogKacAsV/moqisWirjOlFExtce60Q19/NbVoiqV1lZiZs3b8LJyUnuIUwaQxRR51ZeVSvdhbd57nBewiPqINp6xfLmfn7f14rl1tbWsLa2vp9DEBF1WHe+KQd58StfiDoKM6Wi3ZYxuJsWhyg/P79GZ/srFApYWlrC29sbc+fOxZgxY1qlgUREREQdUYsnlo8fPx4//fQTbGxsMGbMGIwZMwa2tra4cOEChg8fjoKCAoSGhuKLL75oi/YSERERdQgtHom6fv06XnzxRbzyyisG5a+//jp+/vlnHDhwALGxsXjttde4ejkRERGZrBaPRH322WeYOXNmg/Knn34an332GQBg5syZOHfu3P23joiIiKiDanGIsrS0xLFjxxqUHzt2DJaWt1cJ1ev10r+JiIiITFGLL+c9//zzWLBgATIzMzF8+HAAwIkTJ7Bp0yb83//9HwAgOTkZQ4cObdWGEhEREXUkshfbfPfdd6VLdv369cPzzz+P3/3udwCAW7duSXfrPci4ThRR51ZZUwefmGQAQN6fw2BtcV+rwhBRJ9Hcz+8WX84DgFmzZiE9PR0lJSUoKSlBenq6FKAAwMrKqkUB6r333oOnpycsLS0RFBSEjIyMJuvm5uZi2rRp8PT0hEKhQGJiYoM669atg6+vL9RqNdRqNYKDg7F//36DOhcuXMDUqVPh6OgItVqN6dOno6ioqNFzVldXY+jQoVAoFMjOzm52v4iIiMh0yQpRrWnHjh2IiopCbGwssrKyMGTIEISFhaG4uLjR+pWVlfDy8kJ8fDxcXFwardOrVy/Ex8cjMzMTJ0+exNixYzF58mTk5uYCACoqKjBu3DgoFAqkpqbi6NGjqKmpQXh4OPR6fYPjvfzyy3B1dW29ThMREVHnJ1pIoVAIpVLZ5KOlAgMDxaJFi6TnOp1OuLq6iri4uHvu6+HhIdasWdOs89jb24tNmzYJIYRITk4WSqVSaLVaaXtpaalQKBTi4MGDBvt99dVXon///iI3N1cAEKdOnWrW+YQQQqvVCgAG5yGizqOiulZ4rNgnPFbsExXVtcZuDhG1k+Z+frf4Av/u3bsNntfW1uLUqVPYsmULVq9e3aJj1dTUIDMzE9HR0VKZUqlEaGgo0tPTW9q0Rul0OuzcuRMVFRUIDg4GcPvynEKhgEqlkupZWlpCqVTiyJEjCA0NBQAUFRXh2WefxZ49e5r19TbV1dWorv7fl5WWlZW1Sh+IiIio42lxiGpsAc3f/va3GDhwIHbs2IH58+c3+1jXr1+HTqeDs7OzQbmzszPOnj3b0qYZOHPmDIKDg1FVVQVbW1vs3r0bPj4+AIARI0bAxsYGK1aswF//+lcIIbBy5UrodDoUFBQAAIQQmDt3LhYsWICAgABcunTpnueMi4trcZAkIiKizqnV5kSNGDECKSkprXW4+9avXz9kZ2fj+PHjWLhwISIjI5GXlwcAcHR0xM6dO7F3717Y2tpCo9GgtLQUw4YNg1J5+yV55513UF5ebjBKdi/R0dHQarXS48qVK23SNyIiIjK+Vrlf99atW3j77bfRs2fPFu3n4OAAMzOzBnfFFRUVNTlpvLksLCzg7e0NAPD398eJEyewdu1abNiwAQAwbtw4XLhwAdevX4e5uTns7Ozg4uICLy8vAEBqairS09MNLvkBQEBAAGbNmoUtW7Y0OKdKpWpQn4iIiExTi0OUvb09FAqF9FwIgfLyclhbW2Pr1q0tOpaFhQX8/f2RkpKCKVOmALi92nlKSgoWL17c0qbdlV6vN5ivVM/BwQHA7dBUXFyMSZMmAQDefvttvP7661K9q1evIiwsDDt27EBQUFCrto2IiIg6nxaHqF+vy6RUKuHo6IigoCDY29u3uAFRUVGIjIxEQEAAAgMDkZiYiIqKCsybNw8AEBERgZ49eyIuLg7A7cno9ZflampqkJ+fj+zsbNja2kojT9HR0ZgwYQLc3d1RXl6Obdu2IS0tDcnJydJ5N2/ejAEDBsDR0RHp6elYsmQJli1bhn79+gEA3N3dDdppa2sLAOjTpw969erV4n4SERGRaWlxiIqMjGzVBsyYMQPXrl1DTEwMCgsLMXToUCQlJUmTzS9fvizNUwJujwj5+flJzxMSEpCQkICQkBCkpaUBAIqLixEREYGCggJoNBr4+voiOTkZjz32mLTfuXPnEB0djZKSEnh6emLVqlVYtmxZq/aNiIiITJesr30pLS3FP/7xD3z//fcAgIEDB+KZZ56BRqNp9QZ2ZvzaF6LOjV/7QvRgarOvfTl58iT69OmDNWvWSF/78tZbb6FPnz7Iysq6r0YTERERdRYt/m/VsmXLMGnSJGzcuBHm5rd3r6urw+9//3ssXboU3377bas3koiIiKijaXGIOnnypEGAAgBzc3O8/PLLCAgIaNXGEREREXVULb6cp1arcfny5QblV65cQdeuXVulUUREREQdXYtD1IwZMzB//nzs2LEDV65cwZUrV7B9+3b8/ve/x8yZM9uijUREREQdTosv5yUkJEChUCAiIgJ1dXUAgC5dumDhwoWIj49v9QYSERERdUQtDlEWFhZYu3Yt4uLicOHCBQC3F6C0trbGrVu3Wr2BRERERB2R7C8gtra2xuDBgzF48GCYmZnhrbfeQu/evVuzbUREREQdVrNDVHV1NaKjoxEQEICHHnoIe/bsAXD761N69+6NNWvWcMVvIiIiemA0+3JeTEwMNmzYgNDQUBw7dgxPPfUU5s2bh//85z9466238NRTT8HMzKwt20pERETUYTQ7RO3cuRMfffQRJk2ahJycHPj6+qKurg6nT5+GQqFoyzYSERERdTjNvpz3yy+/wN/fHwAwaNAgqFQqLFu2jAGKiIiIHkjNDlE6nQ4WFhbSc3Nzc9ja2rZJo4iIiIg6umZfzhNCYO7cuVCpVACAqqoqLFiwADY2Ngb1du3a1botJCIiIuqAmh2iIiMjDZ7Pnj271RtDRERE1Fk0O0Rt3ry5LdtBRERE1KnIXmyTiIiI6EHGEEVEREQkA0MUERERkQwMUUREREQyMEQRERERycAQRURERCQDQxQRERGRDAxRRERERDIwRBERERHJwBBFREREJANDFBEREZEMDFFEREREMjBEEREREcnAEEVEREQkA0MUERERkQwMUUREREQyMEQRERERycAQRURERCQDQxQRERGRDAxRRERERDIwRBERERHJwBBFREREJANDFBEREZEMHSJEvffee/D09ISlpSWCgoKQkZHRZN3c3FxMmzYNnp6eUCgUSExMbFBn3bp18PX1hVqthlqtRnBwMPbv329Q58KFC5g6dSocHR2hVqsxffp0FBUVSdsvXbqE+fPno3fv3rCyskKfPn0QGxuLmpqaVus3ERERdV5GD1E7duxAVFQUYmNjkZWVhSFDhiAsLAzFxcWN1q+srISXlxfi4+Ph4uLSaJ1evXohPj4emZmZOHnyJMaOHYvJkycjNzcXAFBRUYFx48ZBoVAgNTUVR48eRU1NDcLDw6HX6wEAZ8+ehV6vx4YNG5Cbm4s1a9Zg/fr1+L//+7+2eSGIiIioU1EIIYQxGxAUFIThw4fj3XffBQDo9Xq4ubnh+eefx8qVK++6r6enJ5YuXYqlS5fe8zzdunXDm2++ifnz5+PAgQOYMGECbty4AbVaDQDQarWwt7fHgQMHEBoa2ugx3nzzTaxbtw4//fRTs/pWVlYGjUYDrVYrnYeIOo/Kmjr4xCQDAPL+HAZrC3Mjt4iI2kNzP7+NOhJVU1ODzMxMg9CiVCoRGhqK9PT0VjmHTqfD9u3bUVFRgeDgYABAdXU1FAoFVCqVVM/S0hJKpRJHjhxp8lharRbdunVrcnt1dTXKysoMHkRERGSajBqirl+/Dp1OB2dnZ4NyZ2dnFBYW3texz5w5A1tbW6hUKixYsAC7d++Gj48PAGDEiBGwsbHBihUrUFlZiYqKCrz00kvQ6XQoKCho9Hg//vgj3nnnHTz33HNNnjMuLg4ajUZ6uLm53VcfiIiIqOMy+pyottKvXz9kZ2fj+PHjWLhwISIjI5GXlwcAcHR0xM6dO7F3717Y2tpCo9GgtLQUw4YNg1LZ8CXJz8/H+PHj8dRTT+HZZ59t8pzR0dHQarXS48qVK23WPyIiIjIuo17gd3BwgJmZmcFdcQBQVFTU5KTx5rKwsIC3tzcAwN/fHydOnMDatWuxYcMGAMC4ceNw4cIFXL9+Hebm5rCzs4OLiwu8vLwMjnP16lWMGTMGDz30ED744IO7nlOlUhlcIiQiIiLTZdSRKAsLC/j7+yMlJUUq0+v1SElJkeYvtRa9Xo/q6uoG5Q4ODrCzs0NqaiqKi4sxadIkaVt+fj5Gjx4Nf39/bN68udFRKiIiInowGf1Wk6ioKERGRiIgIACBgYFITExERUUF5s2bBwCIiIhAz549ERcXB+D2ZPT6y3I1NTXIz89HdnY2bG1tpZGn6OhoTJgwAe7u7igvL8e2bduQlpaG5ORk6bybN2/GgAED4OjoiPT0dCxZsgTLli1Dv379APwvQHl4eCAhIQHXrl2T9r3fUTIiIiLq/IweombMmIFr164hJiYGhYWFGDp0KJKSkqTJ5pcvXzYYAbp69Sr8/Pyk5wkJCUhISEBISAjS0tIAAMXFxYiIiEBBQQE0Gg18fX2RnJyMxx57TNrv3LlziI6ORklJCTw9PbFq1SosW7ZM2n7w4EH8+OOP+PHHH9GrVy+DNht5VQgiIiLqAIy+TpQp4zpRRJ0b14kiejB1inWiiIiIiDorhigiIiIiGRiiiIiIiGRgiCIiIiKSgSGKiIiISAaGKCIiIiIZGKKIiIiIZGCIIiIiIpKBIYqIiIhIBi6/S0QtptMLZFwsQXF5FZy6WiKwdzeYKRXGbhYRUbtiiCKiFknKKcDqvXko0FZJZT00logN98H4QT2M2DIiovbFy3lE1GxJOQVYuDXLIEABQKG2Cgu3ZiEpp8BILSMian8ciSKiZtHpBVbvzUNj31heXxb7ZS5GejuYzKW9yhqdsZtARB0YQxQRNUvGxZIGI1C/VlRWjcGvHminFhERGRcv5xFRsxSX3z1AmbIAD3tYdTEzdjOIqIPhSBQRNYtTV8tm1ds8dziCvLq1cWval1UXMygUpnGJkohaD0MUETVLYO9u6KGxRKG2qtF5UQoALhpLPNLX0WTmRBER3Q0v5xFRs5gpFYgN92l0W31kig33YYAiogcGQxQRNdv4QT2wbvYwOKtVBuUuGkusmz2M60QR0QOFl/OIqEXGD+qBkd4O0l14m+cO5yU8InogcSSKiFrszsAU5MWvfCGiBxNDFBEREZEMDFFEREREMjBEEREREcnAEEVEREQkA0MUERERkQwMUUREREQyMEQRERERycAQRURERCQDVywnInrA6fQCGRdLUFxeBaeulgjszQVUiZqDIYqI6AGWlFOA1XvzUKCtksp6aCwRG+7D70IkugdeziMiekAl5RRg4dYsgwAFAIXaKizcmoWknAIjtYyoc2CIIiJ6AOn0Aqv35kE0sq2+bPXePOj0jdUgIoAhiojogZRxsaTBCNSdBIACbRUyLpa0X6OIOhmGKCKiB1BxedMBSk49ogcRQxQR0QPIqatlq9YjehAxRBERPYACe3dDD40lmlrIQIHbd+kF9u7Wns0i6lQYooiIHkBmSgViw30AoEGQqn8eG+7D9aKI7oIhiojoATV+UA+smz0MLhrDS3YuGkusmz2M60QR3UOHCFHvvfcePD09YWlpiaCgIGRkZDRZNzc3F9OmTYOnpycUCgUSExMb1Fm3bh18fX2hVquhVqsRHByM/fv3G9S5cOECpk6dCkdHR6jVakyfPh1FRUUGdUpKSjBr1iyo1WrY2dlh/vz5uHnzZqv0mYioIxg/qAeOrBiLT58dgbVPD8Wnz47AkRVjGaCImsHoIWrHjh2IiopCbGwssrKyMGTIEISFhaG4uLjR+pWVlfDy8kJ8fDxcXFwardOrVy/Ex8cjMzMTJ0+exNixYzF58mTk5uYCACoqKjBu3DgoFAqkpqbi6NGjqKmpQXh4OPR6vXScWbNmITc3FwcPHsS+ffvw7bff4g9/+EPrvwhEREZkplQguE93TB7aE8F9uvMSHlFzCSMLDAwUixYtkp7rdDrh6uoq4uLi7rmvh4eHWLNmTbPOY29vLzZt2iSEECI5OVkolUqh1Wql7aWlpUKhUIiDBw8KIYTIy8sTAMSJEyekOvv37xcKhULk5+c365xarVYAMDgPkSmoqK4VHiv2CY8V+0RFda2xm0NE1Kqa+/lt1JGompoaZGZmIjQ0VCpTKpUIDQ1Fenp6q5xDp9Nh+/btqKioQHBwMACguroaCoUCKpVKqmdpaQmlUokjR44AANLT02FnZ4eAgACpTmhoKJRKJY4fP97ouaqrq1FWVmbwICIiItNk1BB1/fp16HQ6ODs7G5Q7OzujsLDwvo595swZ2NraQqVSYcGCBdi9ezd8fG7fiTJixAjY2NhgxYoVqKysREVFBV566SXodDoUFNz+rqjCwkI4OTkZHNPc3BzdunVrsm1xcXHQaDTSw83N7b76QERERB2X0edEtZV+/fohOzsbx48fx8KFCxEZGYm8vDwAgKOjI3bu3Im9e/fC1tYWGo0GpaWlGDZsGJRK+S9JdHQ0tFqt9Lhy5UprdYeIiIg6GHNjntzBwQFmZmYN7oorKipqctJ4c1lYWMDb2xsA4O/vjxMnTmDt2rXYsGEDAGDcuHG4cOECrl+/DnNzc9jZ2cHFxQVeXl4AABcXlwaT2+vq6lBSUtJk21QqlcElQiIiIjJdRh2JsrCwgL+/P1JSUqQyvV6PlJQUaf5Sa9Hr9aiurm5Q7uDgADs7O6SmpqK4uBiTJk0CAAQHB6O0tBSZmZlS3dTUVOj1egQFBbVq24iIiKjzMepIFABERUUhMjISAQEBCAwMRGJiIioqKjBv3jwAQEREBHr27Im4uDgAtyej11+Wq6mpQX5+PrKzs2FrayuNPEVHR2PChAlwd3dHeXk5tm3bhrS0NCQnJ0vn3bx5MwYMGABHR0ekp6djyZIlWLZsGfr16wcAGDBgAMaPH49nn30W69evR21tLRYvXoynn34arq6u7fkSERERUQdk9BA1Y8YMXLt2DTExMSgsLMTQoUORlJQkTTa/fPmywTylq1evws/PT3qekJCAhIQEhISEIC0tDQBQXFyMiIgIFBQUQKPRwNfXF8nJyXjsscek/c6dO4fo6GiUlJTA09MTq1atwrJlywza9sknn2Dx4sV49NFHoVQqMW3aNLz99ttt+Go82HR6gYyLJSgur4JT19vf2cX1aoiIqKNSCCGEsRthqsrKyqDRaKDVaqFWq1vlmKYaNJJyCrB6bx4KtFVSWQ+NJWLDfTr1ysmm+vOqrKmDT8ztkd28P4fB2sLo/x8jImo1zf385jtfJ2KqQSMppwALt2bh12m+UFuFhVuzOu13eJnqz4uIiG5jiOokTDVo6PQCq/fmNegXAKks9stcjPR26FQjOAfzCrFk++kG5QXaKizYmoW1Tw/BYz73dweqMVXW6IzdBCIio2OI6gRMNWgAwPGfSgxGahpTVFaNwa8eaKcWtY/bAathyCIios6DIaoTyLj4YAYN6vgCPOxh1cXM2M0gIjIKhqhOoLj87gHqQbB57nAEeXUzdjOaZd93V/Hyv87cs94bvx2MJ3w793IZVl3MoFB0rtFPIqLWwhDVCTh1tWxWvc4UNOrp9AKhb32DorKGC6ECgAKAi8YSj/R17DSXKt3sbZpdj3e1ERF1XnwH7wQCe3dDD40lCrVVjc6L6oxB406rJw3Ewq1ZAGDQv/qexIb7dKp+NffnFdi7cwVeIiIyZLJfQGxKzJQKxIb7APhfsKjXWYPGncYP6oF1s4fBRWM44uaiseyUdx2a+s+LiIhu42Kbbai1F9s09XWHTG1hSlP/eRERmarmfn4zRLUhrlhO/HkREXU+XLHcRJkpFQju093YzaBm4s+LiMh0cU4UERERkQwMUUREREQyMEQRERERycAQRURERCQDQxQRERGRDAxRRERERDIwRBERERHJwBBFREREJANDFBEREZEMDFFEREREMjBEEREREcnAEEVEREQkA0MUERERkQwMUUREREQyMEQRERERycAQRURERCQDQxQRERGRDAxRRERERDIwRBERERHJwBBFREREJANDFBEREZEMDFFEREREMjBEEREREcnAEEVEREQkA0MUERERkQwMUUREREQyMEQRERERycAQRURERCSD0UPUe++9B09PT1haWiIoKAgZGRlN1s3NzcW0adPg6ekJhUKBxMTEBnXWrVsHX19fqNVqqNVqBAcHY//+/QZ1CgsLMWfOHLi4uMDGxgbDhg3D559/blDn/PnzmDx5MhwcHKBWq/Hwww/j0KFDrdJnIiIi6vyMGqJ27NiBqKgoxMbGIisrC0OGDEFYWBiKi4sbrV9ZWQkvLy/Ex8fDxcWl0Tq9evVCfHw8MjMzcfLkSYwdOxaTJ09Gbm6uVCciIgLnzp3Dl19+iTNnzuDJJ5/E9OnTcerUKanOE088gbq6OqSmpiIzMxNDhgzBE088gcLCwtZ9EYiIiKhzEkYUGBgoFi1aJD3X6XTC1dVVxMXF3XNfDw8PsWbNmmadx97eXmzatEl6bmNjIz766CODOt26dRMbN24UQghx7do1AUB8++230vaysjIBQBw8eLBZ5xRCCK1WKwAIrVbb7H2IiIjIuJr7+W20kaiamhpkZmYiNDRUKlMqlQgNDUV6enqrnEOn02H79u2oqKhAcHCwVP7QQw9hx44dKCkpgV6vx/bt21FVVYXRo0cDALp3745+/frho48+QkVFBerq6rBhwwY4OTnB39+/yfNVV1ejrKzM4EFERESmydxYJ75+/Tp0Oh2cnZ0Nyp2dnXH27Nn7OvaZM2cQHByMqqoq2NraYvfu3fDx8ZG2f/bZZ5gxYwa6d+8Oc3NzWFtbY/fu3fD29gYAKBQKfP3115gyZQq6du0KpVIJJycnJCUlwd7evsnzxsXFYfXq1ffVdiIiIuocjD6xvC3069cP2dnZOH78OBYuXIjIyEjk5eVJ21955RWUlpbi66+/xsmTJxEVFYXp06fjzJkzAAAhBBYtWgQnJyccPnwYGRkZmDJlCsLDw1FQUNDkeaOjo6HVaqXHlStX2ryvREREZBxGG4lycHCAmZkZioqKDMqLioqanDTeXBYWFtKokr+/P06cOIG1a9diw4YNuHDhAt59913k5ORg4MCBAIAhQ4bg8OHDeO+997B+/XqkpqZi3759uHHjBtRqNQDg/fffx8GDB7FlyxasXLmy0fOqVCqoVKr7ajsRERF1DkYbibKwsIC/vz9SUlKkMr1ej5SUFIP5S61Br9ejuroawO07/IDb86/uZGZmBr1ef9c6SqVSqkNEREQPNqONRAFAVFQUIiMjERAQgMDAQCQmJqKiogLz5s0DcHspgp49eyIuLg7A7cno9ZflampqkJ+fj+zsbNja2kojT9HR0ZgwYQLc3d1RXl6Obdu2IS0tDcnJyQCA/v37w9vbG8899xwSEhLQvXt37NmzBwcPHsS+ffsAAMHBwbC3t0dkZCRiYmJgZWWFjRs34uLFi5g4cWJ7v0xERETUARk1RM2YMQPXrl1DTEwMCgsLMXToUCQlJUmTzS9fvmwwGnT16lX4+flJzxMSEpCQkICQkBCkpaUBAIqLixEREYGCggJoNBr4+voiOTkZjz32GACgS5cu+Oqrr7By5UqEh4fj5s2b8Pb2xpYtW/D4448DuH2pMSkpCatWrcLYsWNRW1uLgQMH4osvvsCQIUPa6dUhIiKijkwhhBDGboSpKisrg0ajgVarleZWERGRcej0AhkXS1BcXgWnrpYI7N0NZkqFsZtFHVBzP7+NOhJFRETUHpJyCrB6bx4KtFVSWQ+NJWLDfTB+UA8jtow6M5Nc4oCIiKheUk4BFm7NMghQAFCorcLCrVlIyml66Rqiu2GIIiIik6XTC6zem4fG5q3Ul63emwednjNbqOUYooiIyGRlXCxpMAJ1JwGgQFuFjIsl7dcoMhkMUUREZLKKy5sOUHLqEd2JIYqIiEyWU1fLVq1HdCeGKCIiMlmBvbuhh8YSTS1koMDtu/QCe3drz2aRiWCIIiIik2WmVCA23AcAGgSp+uex4T5cL4pkYYgiIiKTNn5QD6ybPQwuGsNLdi4aS6ybPYzrRJFsXGyTiIhM3vhBPfCYjwtXLKdWxRBFREQPBDOlAsF9uhu7GWRCeDmPiIiISAaGKCIiIiIZGKKIiIiIZGCIIiIiIpKBIYqIiIhIBoYoIiIiIhkYooiIiIhkYIgiIiIikoEhioiIiEgGrljehoQQAICysjIjt4SIiIiaq/5zu/5zvCkMUW2ovLwcAODm5mbklhAREVFLlZeXQ6PRNLldIe4Vs0g2vV6Pq1evomvXrlAo2uZLLsvKyuDm5oYrV65ArVa3yTk6mgetzw9afwH2mX02XQ9anztrf4UQKC8vh6urK5TKpmc+cSSqDSmVSvTq1atdzqVWqzvVL2hreND6/KD1F2CfHxTss+nrjP292whUPU4sJyIiIpKBIYqIiIhIBoaoTk6lUiE2NhYqlcrYTWk3D1qfH7T+Auzzg4J9Nn2m3l9OLCciIiKSgSNRRERERDIwRBERERHJwBBFREREJANDFBEREZEMDFEdUFxcHIYPH46uXbvCyckJU6ZMwblz5wzqVFVVYdGiRejevTtsbW0xbdo0FBUVGdS5fPkyJk6cCGtrazg5OWH58uWoq6trz67IEh8fD4VCgaVLl0plptjf/Px8zJ49G927d4eVlRUGDx6MkydPStuFEIiJiUGPHj1gZWWF0NBQ/PDDDwbHKCkpwaxZs6BWq2FnZ4f58+fj5s2b7d2VZtHpdHjllVfQu3dvWFlZoU+fPnjttdcMvpuqs/f522+/RXh4OFxdXaFQKLBnzx6D7a3Vv++++w6jRo2CpaUl3Nzc8MYbb7R115p0tz7X1tZixYoVGDx4MGxsbODq6oqIiAhcvXrV4Bim1OdfW7BgARQKBRITEw3KO1Ofm9Pf77//HpMmTYJGo4GNjQ2GDx+Oy5cvS9tN8T0cACCowwkLCxObN28WOTk5Ijs7Wzz++OPC3d1d3Lx5U6qzYMEC4ebmJlJSUsTJkyfFiBEjxEMPPSRtr6urE4MGDRKhoaHi1KlT4quvvhIODg4iOjraGF1qtoyMDOHp6Sl8fX3FkiVLpHJT629JSYnw8PAQc+fOFcePHxc//fSTSE5OFj/++KNUJz4+Xmg0GrFnzx5x+vRpMWnSJNG7d29x69Ytqc748ePFkCFDxH/+8x9x+PBh4e3tLWbOnGmMLt3TX/7yF9G9e3exb98+cfHiRbFz505ha2sr1q5dK9Xp7H3+6quvxKpVq8SuXbsEALF7926D7a3RP61WK5ydncWsWbNETk6O+PTTT4WVlZXYsGFDe3XTwN36XFpaKkJDQ8WOHTvE2bNnRXp6uggMDBT+/v4GxzClPt9p165dYsiQIcLV1VWsWbPGYFtn6vO9+vvjjz+Kbt26ieXLl4usrCzx448/ii+++EIUFRVJdUztPbweQ1QnUFxcLACIb775Rghx+42pS5cuYufOnVKd77//XgAQ6enpQojbv/RKpVIUFhZKddatWyfUarWorq5u3w40U3l5ufjNb34jDh48KEJCQqQQZYr9XbFihXj44Yeb3K7X64WLi4t48803pbLS0lKhUqnEp59+KoQQIi8vTwAQJ06ckOrs379fKBQKkZ+f33aNl2nixInimWeeMSh78sknxaxZs4QQptfnX3/YtFb/3n//fWFvb2/we71ixQrRr1+/Nu7Rvd0tUNTLyMgQAMTPP/8shDDdPv/yyy+iZ8+eIicnR3h4eBiEqM7c58b6O2PGDDF79uwm9zHF9/B6vJzXCWi1WgBAt27dAACZmZmora1FaGioVKd///5wd3dHeno6ACA9PR2DBw+Gs7OzVCcsLAxlZWXIzc1tx9Y336JFizBx4kSDfgGm2d8vv/wSAQEBeOqpp+Dk5AQ/Pz9s3LhR2n7x4kUUFhYa9Fmj0SAoKMigz3Z2dggICJDqhIaGQqlU4vjx4+3XmWZ66KGHkJKSgvPnzwMATp8+jSNHjmDChAkATLPPd2qt/qWnp+ORRx6BhYWFVCcsLAznzp3DjRs32qk38mm1WigUCtjZ2QEwzT7r9XrMmTMHy5cvx8CBAxtsN6U+6/V6/Pvf/0bfvn0RFhYGJycnBAUFGVzyM8X38HoMUR2cXq/H0qVLMXLkSAwaNAgAUFhYCAsLC+lNqJ6zszMKCwulOnf+MtZvr9/W0Wzfvh1ZWVmIi4trsM0U+/vTTz9h3bp1+M1vfoPk5GQsXLgQL7zwArZs2QLgf21urE939tnJyclgu7m5Obp169Yh+7xy5Uo8/fTT6N+/P7p06QI/Pz8sXboUs2bNAmCafb5Ta/Wvs/2u36mqqgorVqzAzJkzpS+jNcU+/+1vf4O5uTleeOGFRrebUp+Li4tx8+ZNxMfHY/z48Thw4ACmTp2KJ598Et988w0A03wPr2du7AbQ3S1atAg5OTk4cuSIsZvSZq5cuYIlS5bg4MGDsLS0NHZz2oVer0dAQAD++te/AgD8/PyQk5OD9evXIzIy0sitaxufffYZPvnkE2zbtg0DBw5EdnY2li5dCldXV5PtM/1PbW0tpk+fDiEE1q1bZ+zmtJnMzEysXbsWWVlZUCgUxm5Om9Pr9QCAyZMnY9myZQCAoUOH4tixY1i/fj1CQkKM2bw2x5GoDmzx4sXYt28fDh06hF69eknlLi4uqKmpQWlpqUH9oqIiuLi4SHV+fedD/fP6Oh1FZmYmiouLMWzYMJibm8Pc3BzffPMN3n77bZibm8PZ2dmk+gsAPXr0gI+Pj0HZgAEDpLtZ6tvcWJ/u7HNxcbHB9rq6OpSUlHTIPi9fvlwajRo8eDDmzJmDZcuWSaOPptjnO7VW/zrb7zrwvwD1888/4+DBg9IoFGB6fT58+DCKi4vh7u4uvZ/9/PPPePHFF+Hp6QnAtPrs4OAAc3Pze76fmdp7eD2GqA5ICIHFixdj9+7dSE1NRe/evQ22+/v7o0uXLkhJSZHKzp07h8uXLyM4OBgAEBwcjDNnzhj8oda/ef36l93YHn30UZw5cwbZ2dnSIyAgALNmzZL+bUr9BYCRI0c2WLbi/Pnz8PDwAAD07t0bLi4uBn0uKyvD8ePHDfpcWlqKzMxMqU5qair0ej2CgoLaoRctU1lZCaXS8C3HzMxM+p+sKfb5Tq3Vv+DgYHz77beora2V6hw8eBD9+vWDvb19O/Wm+eoD1A8//ICvv/4a3bt3N9huan2eM2cOvvvuO4P3M1dXVyxfvhzJyckATKvPFhYWGD58+F3fz0ztM8uAsWe2U0MLFy4UGo1GpKWliYKCAulRWVkp1VmwYIFwd3cXqamp4uTJkyI4OFgEBwdL2+tvFx03bpzIzs4WSUlJwtHRscPfLlrvzrvzhDC9/mZkZAhzc3Pxl7/8Rfzwww/ik08+EdbW1mLr1q1Snfj4eGFnZye++OIL8d1334nJkyc3eju8n5+fOH78uDhy5Ij4zW9+02Fu9/+1yMhI0bNnT2mJg127dgkHBwfx8ssvS3U6e5/Ly8vFqVOnxKlTpwQA8dZbb4lTp05Jd6K1Rv9KS0uFs7OzmDNnjsjJyRHbt28X1tbWRrvd/259rqmpEZMmTRK9evUS2dnZBu9nd95xZUp9bsyv784TonP1+V793bVrl+jSpYv44IMPxA8//CDeeecdYWZmJg4fPiwdw9Tew+sxRHVAABp9bN68Wapz69Yt8cc//lHY29sLa2trMXXqVFFQUGBwnEuXLokJEyYIKysr4eDgIF588UVRW1vbzr2R59chyhT7u3fvXjFo0CChUqlE//79xQcffGCwXa/Xi1deeUU4OzsLlUolHn30UXHu3DmDOv/973/FzJkzha2trVCr1WLevHmivLy8PbvRbGVlZWLJkiXC3d1dWFpaCi8vL7Fq1SqDD9PO3udDhw41+rcbGRkphGi9/p0+fVo8/PDDQqVSiZ49e4r4+Pj26mIDd+vzxYsXm3w/O3TokHQMU+pzYxoLUZ2pz83p7z/+8Q/h7e0tLC0txZAhQ8SePXsMjmGK7+FCCKEQ4o7lgomIiIioWTgnioiIiEgGhigiIiIiGRiiiIiIiGRgiCIiIiKSgSGKiIiISAaGKCIiIiIZGKKIiIiIZGCIIiIiIpKBIYqIOoy0tDQoFArpi0o//PBD2NnZ3fdxW+s4bXU8ABg9ejSWLl3aqsdsiUceeQTbtm1rVt0RI0bg888/b+MWEXV8DFFE1GLr169H165dUVdXJ5XdvHkTXbp0wejRow3q1gejCxcutFl7Dh06hMcffxzdu3eHtbU1fHx88OKLLyI/P7/Nztlcly5dgkKhuOvjww8/xK5du/Daa68ZpY1ffvklioqK8PTTTzer/p/+9CesXLlS+vJoogcVQxQRtdiYMWNw8+ZNnDx5Uio7fPgwXFxccPz4cVRVVUnlhw4dgru7O/r06dMmbdmwYQNCQ0Ph4uKCzz//HHl5eVi/fj20Wi3+/ve/t8k5W8LNzQ0FBQXS48UXX8TAgQMNymbMmIFu3bqha9euRmnj22+/jXnz5kGpbN5HwoQJE1BeXo79+/e3ccuIOjaGKCJqsX79+qFHjx5IS0uTytLS0jB58mT07t0b//nPfwzKx4wZAwD4+OOPERAQgK5du8LFxQW/+93vUFxcLLsdv/zyC1544QW88MIL+Oc//4nRo0fD09MTjzzyCDZt2oSYmJgm9123bh369OkDCwsL9OvXDx9//LHB9tLSUjz33HNwdnaGpaUlBg0ahH379jV6rGvXriEgIABTp05FdXW1wTYzMzO4uLhID1tbW5ibmxuUWVlZNbic5+npiddffx0RERGwtbWFh4cHvvzyS1y7dg2TJ0+Gra0tfH19DYIsABw5cgSjRo2ClZUV3Nzc8MILL6CioqLJ1+HatWtITU1FeHi4VCaEwKuvvgp3d3eoVCq4urrihRdeMOjT448/ju3btzd5XKIHAUMUEckyZswYHDp0SHp+6NAhjB49GiEhIVL5rVu3cPz4cSlE1dbW4rXXXsPp06exZ88eXLp0CXPnzpXdhp07d6KmpgYvv/xyo9ubmre0e/duLFmyBC+++CJycnLw3HPPYd68eVK79Xo9JkyYgKNHj2Lr1q3Iy8tDfHw8zMzMGhzrypUrGDVqFAYNGoR//etfUKlUsvvza2vWrMHIkSNx6tQpTJw4EXPmzEFERARmz56NrKws9OnTBxEREaj/HvkLFy5g/PjxmDZtGr777jvs2LEDR44cweLFi5s8x5EjR2BtbY0BAwZIZZ9//jnWrFmDDRs24IcffsCePXswePBgg/0CAwNx+PDhVusrUackiIhk2Lhxo7CxsRG1tbWirKxMmJubi+LiYrFt2zbxyCOPCCGESElJEQDEzz//3OgxTpw4IQCI8vJyIYQQhw4dEgDEjRs3hBBCbN68WWg0mibbsHDhQqFWq+/Z1l8f56GHHhLPPvusQZ2nnnpKPP7440IIIZKTk4VSqRTnzp276/HOnj0r3NzcxAsvvCD0ev092yGEELGxsWLIkCENykNCQsSSJUuk5x4eHmL27NnS84KCAgFAvPLKK1JZenq6ACAKCgqEEELMnz9f/OEPfzA47uHDh4VSqRS3bt1qtD1r1qwRXl5eBmV///vfRd++fUVNTU2T/fjiiy+EUqkUOp2uyTpEpo4jUUQky+jRo1FRUYETJ07g8OHD6Nu3LxwdHRESEiLNi0pLS4OXlxfc3d0BAJmZmQgPD4e7uzu6du2KkJAQAMDly5dltUEIAYVC0eL9vv/+e4wcOdKgbOTIkfj+++8BANnZ2ejVqxf69u3b5DFu3bqFUaNG4cknn8TatWtlteNefH19pX87OzsDgMGIUH1Z/SXR06dP48MPP4Stra30CAsLg16vx8WLF5vsh6WlpUHZU089hVu3bsHLywvPPvssdu/ebXATAQBYWVlBr9c3uHxJ9CBhiCIiWby9vdGrVy8cOnQIhw4dkgKRq6sr3NzccOzYMRw6dAhjx44FAFRUVCAsLAxqtRqffPIJTpw4gd27dwMAampqZLWhb9++0Gq1KCgoaJ1O/X9WVlb3rKNSqRAaGop9+/a12V2AXbp0kf5dH9IaK6u/S+7mzZt47rnnkJ2dLT1Onz6NH374ocmJ/Q4ODrhx44ZBmZubG86dO4f3338fVlZW+OMf/4hHHnkEtbW1Up2SkhLY2Ng067UiMlUMUUQk25gxY5CWloa0tDSDpQ0eeeQR7N+/HxkZGdJ8qLNnz+K///0v4uPjMWrUKPTv3/++JpUDwG9/+1tYWFjgjTfeaHR7/XpTvzZgwAAcPXrUoOzo0aPw8fEBcHsE6JdffsH58+ebPLdSqcTHH38Mf39/jBkzBlevXpXXiVY0bNgw5OXlwdvbu8HDwsKi0X38/PxQWFjYIEhZWVkhPDwcb7/9NtLS0pCeno4zZ85I23NycuDn59em/SHq6MyN3QAi6rzGjBmDRYsWoba2VhqJAoCQkBAsXrwYNTU1Uohyd3eHhYUF3nnnHSxYsAA5OTn3vS6Sm5sb1qxZg8WLF6OsrAwRERHw9PTEL7/8go8++gi2traNLnOwfPlyTJ8+HX5+fggNDcXevXuxa9cufP3111L7H3nkEUybNg1vvfUWvL29cfbsWSgUCowfP146jpmZGT755BPMnDkTY8eORVpaGlxcXO6rT/djxYoVGDFiBBYvXozf//73sLGxQV5eHg4ePIh333230X38/Pzg4OCAo0eP4oknngBwezFRnU6HoKAgWFtbY+vWrbCysoKHh4e03+HDhzFu3Lh26RdRR8WRKCKSbcyYMbh16xa8vb2l+TnA7RBSXl4uLYUAAI6Ojvjwww+xc+dO+Pj4ID4+HgkJCffdhj/+8Y84cOAA8vPzMXXqVPTv3x+///3voVar8dJLLzW6z5QpU7B27VokJCRg4MCB2LBhAzZv3mwwmvb5559j+PDhmDlzJnx8fPDyyy9Dp9M1OJa5uTk+/fRTDBw4EGPHjr3v0bX74evri2+++Qbnz5/HqFGj4Ofnh5iYGLi6uja5j5mZGebNm4dPPvlEKrOzs8PGjRsxcuRI+Pr64uuvv8bevXvRvXt3AEB+fj6OHTuGefPmtXmfiDoyhRD//95YIiJ6IBUWFmLgwIHIysoyGG1qyooVK3Djxg188MEH7dA6oo6LI1FERA84FxcX/OMf/2j2XZJOTk5G+4oaoo6EI1FEREREMnAkioiIiEgGhigiIiIiGRiiiIiIiGRgiCIiIiKSgSGKiIiISAaGKCIiIiIZGKKIiIiIZGCIIiIiIpKBIYqIiIhIhv8HNjKMT+aFRdUAAAAASUVORK5CYII=\n", + "image/png": "", "text/plain": [ "
" ] diff --git a/notebook/automl_synapseML.ipynb b/notebook/automl_synapseML.ipynb index 6dff0f081..f6b550c45 100644 --- a/notebook/automl_synapseML.ipynb +++ b/notebook/automl_synapseML.ipynb @@ -25,7 +25,7 @@ "\n", "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the `synapse` option:\n", "```bash\n", - "pip install flaml[synapse]>=1.1.3; \n", + "pip install flaml[synapse] \n", "```\n", " " ] @@ -36,7 +36,7 @@ "metadata": {}, "outputs": [], "source": [ - "# %pip install \"flaml[synapse]>=1.1.3\"" + "# %pip install \"flaml[synapse]\"" ] }, { diff --git a/notebook/automl_time_series_forecast.ipynb b/notebook/automl_time_series_forecast.ipynb index 793f08963..c7cf3b9b5 100644 --- a/notebook/automl_time_series_forecast.ipynb +++ b/notebook/automl_time_series_forecast.ipynb @@ -8,6 +8,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -21,7 +22,7 @@ "\n", "In this notebook, we demonstrate how to use FLAML library for time series forecasting tasks: univariate time series forecasting (only time), multivariate time series forecasting (with exogneous variables) and forecasting discrete values.\n", "\n", - "FLAML requires Python>=3.7. To run this notebook example, please install flaml with the notebook and forecast option:\n" + "FLAML requires Python>=3.7. To run this notebook example, please install flaml with the [automl,ts_forecast] option:\n" ] }, { @@ -156,7 +157,7 @@ } ], "source": [ - "%pip install flaml[notebook,ts_forecast]==1.1.2\n", + "%pip install flaml[automl,ts_forecast] matplotlib openml\n", "# avoid version 1.0.2 to 1.0.5 for this notebook due to a bug for arima and sarimax's init config" ] }, diff --git a/notebook/automl_xgboost.ipynb b/notebook/automl_xgboost.ipynb index c2429fa8f..a46e520c2 100644 --- a/notebook/automl_xgboost.ipynb +++ b/notebook/automl_xgboost.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": { "slideshow": { @@ -27,9 +28,9 @@ "\n", "In this notebook, we demonstrate how to use FLAML library to tune hyperparameters of XGBoost with a regression example.\n", "\n", - "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the `notebook` option:\n", + "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the `automl` option (this option is introduced from version 2, for version 1 it is installed by default):\n", "```bash\n", - "pip install flaml[notebook]==1.1.2\n", + "pip install flaml[automl]\n", "```" ] }, @@ -39,7 +40,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install flaml[notebook]==1.1.2" + "%pip install flaml[automl] matplotlib openml" ] }, { diff --git a/notebook/integrate_azureml.ipynb b/notebook/integrate_azureml.ipynb index b7f0694f7..88cb7fe04 100644 --- a/notebook/integrate_azureml.ipynb +++ b/notebook/integrate_azureml.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": { "slideshow": { @@ -27,9 +28,9 @@ "\n", "In this notebook, we use one real data example (binary classification) to showcase how to use FLAML library together with AzureML.\n", "\n", - "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the [azureml] option:\n", + "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the [automl,azureml] option:\n", "```bash\n", - "pip install flaml[azureml]\n", + "pip install flaml[automl,azureml]\n", "```" ] }, @@ -39,7 +40,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install flaml[azureml]" + "%pip install flaml[automl,azureml]" ] }, { diff --git a/notebook/integrate_sklearn.ipynb b/notebook/integrate_sklearn.ipynb index 83667543e..e124ca995 100644 --- a/notebook/integrate_sklearn.ipynb +++ b/notebook/integrate_sklearn.ipynb @@ -21,6 +21,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -39,12 +40,21 @@ "\n", "In this notebook, we use one real data example (binary classification) to showcase how to use FLAML library.\n", "\n", - "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the `notebook` option:\n", + "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the `[automl]` option (this option is introduced from version 2, for version 1 it is installed by default):\n", "```bash\n", - "pip install flaml[notebook]\n", + "pip install flaml[automl]\n", "```" ] }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install flaml[automl] openml" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -72,15 +82,6 @@ "#### As FLAML's AutoML module can be used a transformer in the Sklearn's pipeline we can get all the benefits of pipeline and thereby write extremley clean, and resuable code." ] }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install flaml[notebook]" - ] - }, { "cell_type": "markdown", "metadata": {}, diff --git a/notebook/integrate_spark.ipynb b/notebook/integrate_spark.ipynb index e440787b0..c93430ff0 100644 --- a/notebook/integrate_spark.ipynb +++ b/notebook/integrate_spark.ipynb @@ -1 +1 @@ -{"cells":[{"attachments":{},"cell_type":"markdown","metadata":{"slideshow":{"slide_type":"slide"}},"source":["Copyright (c) Microsoft Corporation. All rights reserved. \n","\n","Licensed under the MIT License.\n","\n","# Run FLAML Parallel tuning with Spark\n","\n","\n","## 1. Introduction\n","\n","FLAML is a Python library (https://github.com/microsoft/FLAML) designed to automatically produce accurate machine learning models \n","with low computational cost. It is fast and economical. The simple and lightweight design makes it easy \n","to use and extend, such as adding new learners. FLAML can \n","- serve as an economical AutoML engine,\n","- be used as a fast hyperparameter tuning tool, or \n","- be embedded in self-tuning software that requires low latency & resource in repetitive\n"," tuning tasks.\n","\n","In this notebook, we demonstrate how to run FLAML parallel tuning using Spark as the backend.\n","\n","FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the following options:\n","```bash\n","pip install flaml[spark,notebook,blendsearch]>=1.1.0\n","```\n","*Spark support is added in v1.1.0*"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:16:51.6335768Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:17:21.9028602Z\",\"execution_finish_time\":\"2022-12-07T08:18:52.3646576Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}"},"outputs":[],"source":["# %pip install flaml[spark,notebook,blendsearch]>=1.1.0"]},{"cell_type":"markdown","metadata":{"slideshow":{"slide_type":"slide"}},"source":["## 2. Regression Example\n","### Load data and preprocess\n","\n","Download [houses dataset](https://www.openml.org/d/537) from OpenML. The task is to predict median price of the house in the region based on demographic composition and a state of housing market in the region."]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:53.4783943Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:20:55.7666047Z\",\"execution_finish_time\":\"2022-12-07T08:21:10.9050139Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"subslide"},"tags":[]},"outputs":[],"source":["from flaml.data import load_openml_dataset\n","X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=537, data_dir='./')"]},{"cell_type":"markdown","metadata":{"slideshow":{"slide_type":"slide"}},"source":["### Run FLAML\n","In the FLAML automl run configuration, users can specify the task type, time budget, error metric, learner list, whether to subsample, resampling strategy type, and so on. All these arguments have default values which will be used if users do not provide them. \n","\n","Notice that here `use_spark` is set to `True` in order to use Spark as the parallel training backend."]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:53.7001471Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:21:10.9846131Z\",\"execution_finish_time\":\"2022-12-07T08:21:11.3604062Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"slide"},"tags":[]},"outputs":[],"source":["''' import AutoML class from flaml package '''\n","from flaml import AutoML\n","automl = AutoML()"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:53.8983341Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:21:11.4417491Z\",\"execution_finish_time\":\"2022-12-07T08:21:11.8242955Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"slide"}},"outputs":[],"source":["settings = {\n"," \"time_budget\": 30, # total running time in seconds\n"," \"metric\": 'r2', # primary metrics for regression can be chosen from: ['mae','mse','r2','rmse','mape']\n"," \"estimator_list\": ['lgbm'], # list of ML learners; we tune lightgbm in this example\n"," \"task\": 'regression', # task type \n"," \"log_file_name\": 'houses_experiment.log', # flaml log file\n"," \"seed\": 7654321, # random seed\n"," \"use_spark\": True, # whether to use Spark for distributed training\n"," \"n_concurrent_trials\": 2, # the maximum number of concurrent trials\n","}"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:54.3953298Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:21:11.9003975Z\",\"execution_finish_time\":\"2022-12-07T08:27:58.525709Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"slide"},"tags":[]},"outputs":[],"source":["'''The main flaml automl API'''\n","automl.fit(X_train=X_train, y_train=y_train, **settings)"]},{"cell_type":"markdown","metadata":{"slideshow":{"slide_type":"slide"}},"source":["### Best model and metric"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:54.789647Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:27:58.6014435Z\",\"execution_finish_time\":\"2022-12-07T08:27:58.9745212Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"slide"},"tags":[]},"outputs":[],"source":["''' retrieve best config'''\n","print('Best hyperparmeter config:', automl.best_config)\n","print('Best r2 on validation data: {0:.4g}'.format(1-automl.best_loss))\n","print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:54.9962623Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:27:59.0491242Z\",\"execution_finish_time\":\"2022-12-07T08:27:59.4076477Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"slide"}},"outputs":[],"source":["automl.model.estimator"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:55.2539877Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:27:59.5247209Z\",\"execution_finish_time\":\"2022-12-07T08:28:00.4849272Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}"},"outputs":[],"source":["import matplotlib.pyplot as plt\n","plt.barh(automl.feature_names_in_, automl.feature_importances_)"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:55.5182783Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:28:00.5644015Z\",\"execution_finish_time\":\"2022-12-07T08:28:01.5531147Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"slide"}},"outputs":[],"source":["''' pickle and save the automl object '''\n","import pickle\n","with open('automl.pkl', 'wb') as f:\n"," pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:55.803107Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:28:01.6350567Z\",\"execution_finish_time\":\"2022-12-07T08:28:02.5774117Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"slide"},"tags":[]},"outputs":[],"source":["''' compute predictions of testing dataset ''' \n","y_pred = automl.predict(X_test)\n","print('Predicted labels', y_pred)\n","print('True labels', y_test)"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:56.0585537Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:28:02.6537337Z\",\"execution_finish_time\":\"2022-12-07T08:28:03.0177805Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"slide"},"tags":[]},"outputs":[],"source":["''' compute different metric values on testing dataset'''\n","from flaml.ml import sklearn_metric_loss_score\n","print('r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))\n","print('mse', '=', sklearn_metric_loss_score('mse', y_pred, y_test))\n","print('mae', '=', sklearn_metric_loss_score('mae', y_pred, y_test))"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:56.2226463Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:28:03.1150781Z\",\"execution_finish_time\":\"2022-12-07T08:28:03.4858362Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"subslide"},"tags":[]},"outputs":[],"source":["from flaml.data import get_output_from_log\n","time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \\\n"," get_output_from_log(filename=settings['log_file_name'], time_budget=60)\n","\n","for config in config_history:\n"," print(config)"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:56.4020235Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:28:03.5811012Z\",\"execution_finish_time\":\"2022-12-07T08:28:04.5493292Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"slide"}},"outputs":[],"source":["import numpy as np\n","\n","plt.title('Learning Curve')\n","plt.xlabel('Wall Clock Time (s)')\n","plt.ylabel('Validation r2')\n","plt.scatter(time_history, 1 - np.array(valid_loss_history))\n","plt.step(time_history, 1 - np.array(best_valid_loss_history), where='post')\n","plt.show()"]},{"cell_type":"markdown","metadata":{},"source":["## 3. Add a customized LightGBM learner in FLAML\n","The native API of LightGBM allows one to specify a custom objective function in the model constructor. You can easily enable it by adding a customized LightGBM learner in FLAML. In the following example, we show how to add such a customized LightGBM learner with a custom objective function for parallel tuning with Spark.\n","\n","It's a little bit different from adding customized learners for sequential training. In sequential training, we can define the customized learner in a notebook cell. However, in spark training, we have to import it from a file so that Spark can use it in executors. We can easily do it by leveraging `broadcast_code` function in `flaml.tune.spark.utils`."]},{"cell_type":"markdown","metadata":{},"source":["### Create a customized LightGBM learner with a custom objective function"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T09:09:49.540914Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T09:09:49.6259637Z\",\"execution_finish_time\":\"2022-12-07T09:09:50.5841239Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}"},"outputs":[],"source":["custom_code = \"\"\"\n","import numpy as np \n","from flaml.model import LGBMEstimator\n","from flaml import tune\n","\n","\n","''' define your customized objective function '''\n","def my_loss_obj(y_true, y_pred):\n"," c = 0.5\n"," residual = y_pred - y_true\n"," grad = c * residual /(np.abs(residual) + c)\n"," hess = c ** 2 / (np.abs(residual) + c) ** 2\n"," # rmse grad and hess\n"," grad_rmse = residual\n"," hess_rmse = 1.0\n"," \n"," # mae grad and hess\n"," grad_mae = np.array(residual)\n"," grad_mae[grad_mae > 0] = 1.\n"," grad_mae[grad_mae <= 0] = -1.\n"," hess_mae = 1.0\n","\n"," coef = [0.4, 0.3, 0.3]\n"," return coef[0] * grad + coef[1] * grad_rmse + coef[2] * grad_mae, \\\n"," coef[0] * hess + coef[1] * hess_rmse + coef[2] * hess_mae\n","\n","\n","''' create a customized LightGBM learner class with your objective function '''\n","class MyLGBM(LGBMEstimator):\n"," '''LGBMEstimator with my_loss_obj as the objective function\n"," '''\n","\n"," def __init__(self, **config):\n"," super().__init__(objective=my_loss_obj, **config)\n","\"\"\"\n","\n","from flaml.tune.spark.utils import broadcast_code\n","custom_learner_path = broadcast_code(custom_code=custom_code)\n","print(custom_learner_path)\n","from flaml.tune.spark.mylearner import MyLGBM"]},{"cell_type":"markdown","metadata":{},"source":["### Add the customized learner in FLAML"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T09:14:16.2449566Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T09:14:16.3227204Z\",\"execution_finish_time\":\"2022-12-07T09:16:49.7573919Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","tags":[]},"outputs":[],"source":["automl = AutoML()\n","automl.add_learner(learner_name='my_lgbm', learner_class=MyLGBM)\n","settings = {\n"," \"time_budget\": 30, # total running time in seconds\n"," \"metric\": 'r2', # primary metrics for regression can be chosen from: ['mae','mse','r2']\n"," \"estimator_list\": ['my_lgbm',], # list of ML learners; we tune lightgbm in this example\n"," \"task\": 'regression', # task type \n"," \"log_file_name\": 'houses_experiment_my_lgbm.log', # flaml log file\n"," \"n_concurrent_trials\": 2,\n"," \"use_spark\": True,\n","}\n","automl.fit(X_train=X_train, y_train=y_train, **settings)"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T09:17:06.0159529Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T09:17:06.1042554Z\",\"execution_finish_time\":\"2022-12-07T09:17:06.467989Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","tags":[]},"outputs":[],"source":["print('Best hyperparmeter config:', automl.best_config)\n","print('Best r2 on validation data: {0:.4g}'.format(1-automl.best_loss))\n","print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))\n","\n","y_pred = automl.predict(X_test)\n","print('Predicted labels', y_pred)\n","print('True labels', y_test)\n","\n","from flaml.ml import sklearn_metric_loss_score\n","print('r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))\n","print('mse', '=', sklearn_metric_loss_score('mse', y_pred, y_test))\n","print('mae', '=', sklearn_metric_loss_score('mae', y_pred, y_test))"]},{"cell_type":"code","execution_count":null,"metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":[]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Python 3.8.13 ('syml-py38')","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.13 (default, Oct 21 2022, 23:50:54) \n[GCC 11.2.0]"},"notebook_environment":{},"save_output":true,"spark_compute":{"compute_id":"/trident/default","session_options":{"conf":{"spark.livy.synapse.ipythonInterpreter.enabled":"true"},"enableDebugMode":false,"keepAliveTimeout":30}},"synapse_widget":{"state":{},"version":"0.1"},"trident":{"lakehouse":{}},"vscode":{"interpreter":{"hash":"e3d9487e2ef008ade0db1bc293d3206d35cb2b6081faff9f66b40b257b7398f7"}}},"nbformat":4,"nbformat_minor":0} +{"cells":[{"attachments":{},"cell_type":"markdown","metadata":{"slideshow":{"slide_type":"slide"}},"source":["Copyright (c) Microsoft Corporation. All rights reserved. \n","\n","Licensed under the MIT License.\n","\n","# Run FLAML Parallel tuning with Spark\n","\n","\n","## 1. Introduction\n","\n","FLAML is a Python library (https://github.com/microsoft/FLAML) designed to automatically produce accurate machine learning models \n","with low computational cost. It is fast and economical. The simple and lightweight design makes it easy \n","to use and extend, such as adding new learners. FLAML can \n","- serve as an economical AutoML engine,\n","- be used as a fast hyperparameter tuning tool, or \n","- be embedded in self-tuning software that requires low latency & resource in repetitive\n"," tuning tasks.\n","\n","In this notebook, we demonstrate how to run FLAML parallel tuning using Spark as the backend.\n","\n","FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the following options:\n","```bash\n","pip install flaml[automl,spark,blendsearch]\n","```\n","*Spark support is added in v1.1.0*"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:16:51.6335768Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:17:21.9028602Z\",\"execution_finish_time\":\"2022-12-07T08:18:52.3646576Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}"},"outputs":[],"source":["# %pip install flaml[automl,spark,blendsearch] matplotlib openml"]},{"cell_type":"markdown","metadata":{"slideshow":{"slide_type":"slide"}},"source":["## 2. Regression Example\n","### Load data and preprocess\n","\n","Download [houses dataset](https://www.openml.org/d/537) from OpenML. The task is to predict median price of the house in the region based on demographic composition and a state of housing market in the region."]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:53.4783943Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:20:55.7666047Z\",\"execution_finish_time\":\"2022-12-07T08:21:10.9050139Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"subslide"},"tags":[]},"outputs":[],"source":["from flaml.data import load_openml_dataset\n","X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=537, data_dir='./')"]},{"cell_type":"markdown","metadata":{"slideshow":{"slide_type":"slide"}},"source":["### Run FLAML\n","In the FLAML automl run configuration, users can specify the task type, time budget, error metric, learner list, whether to subsample, resampling strategy type, and so on. All these arguments have default values which will be used if users do not provide them. \n","\n","Notice that here `use_spark` is set to `True` in order to use Spark as the parallel training backend."]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:53.7001471Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:21:10.9846131Z\",\"execution_finish_time\":\"2022-12-07T08:21:11.3604062Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"slide"},"tags":[]},"outputs":[],"source":["''' import AutoML class from flaml package '''\n","from flaml import AutoML\n","automl = AutoML()"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:53.8983341Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:21:11.4417491Z\",\"execution_finish_time\":\"2022-12-07T08:21:11.8242955Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"slide"}},"outputs":[],"source":["settings = {\n"," \"time_budget\": 30, # total running time in seconds\n"," \"metric\": 'r2', # primary metrics for regression can be chosen from: ['mae','mse','r2','rmse','mape']\n"," \"estimator_list\": ['lgbm'], # list of ML learners; we tune lightgbm in this example\n"," \"task\": 'regression', # task type \n"," \"log_file_name\": 'houses_experiment.log', # flaml log file\n"," \"seed\": 7654321, # random seed\n"," \"use_spark\": True, # whether to use Spark for distributed training\n"," \"n_concurrent_trials\": 2, # the maximum number of concurrent trials\n","}"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:54.3953298Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:21:11.9003975Z\",\"execution_finish_time\":\"2022-12-07T08:27:58.525709Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"slide"},"tags":[]},"outputs":[],"source":["'''The main flaml automl API'''\n","automl.fit(X_train=X_train, y_train=y_train, **settings)"]},{"cell_type":"markdown","metadata":{"slideshow":{"slide_type":"slide"}},"source":["### Best model and metric"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:54.789647Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:27:58.6014435Z\",\"execution_finish_time\":\"2022-12-07T08:27:58.9745212Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"slide"},"tags":[]},"outputs":[],"source":["''' retrieve best config'''\n","print('Best hyperparmeter config:', automl.best_config)\n","print('Best r2 on validation data: {0:.4g}'.format(1-automl.best_loss))\n","print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:54.9962623Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:27:59.0491242Z\",\"execution_finish_time\":\"2022-12-07T08:27:59.4076477Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"slide"}},"outputs":[],"source":["automl.model.estimator"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:55.2539877Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:27:59.5247209Z\",\"execution_finish_time\":\"2022-12-07T08:28:00.4849272Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}"},"outputs":[],"source":["import matplotlib.pyplot as plt\n","plt.barh(automl.feature_names_in_, automl.feature_importances_)"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:55.5182783Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:28:00.5644015Z\",\"execution_finish_time\":\"2022-12-07T08:28:01.5531147Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"slide"}},"outputs":[],"source":["''' pickle and save the automl object '''\n","import pickle\n","with open('automl.pkl', 'wb') as f:\n"," pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:55.803107Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:28:01.6350567Z\",\"execution_finish_time\":\"2022-12-07T08:28:02.5774117Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"slide"},"tags":[]},"outputs":[],"source":["''' compute predictions of testing dataset ''' \n","y_pred = automl.predict(X_test)\n","print('Predicted labels', y_pred)\n","print('True labels', y_test)"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:56.0585537Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:28:02.6537337Z\",\"execution_finish_time\":\"2022-12-07T08:28:03.0177805Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"slide"},"tags":[]},"outputs":[],"source":["''' compute different metric values on testing dataset'''\n","from flaml.ml import sklearn_metric_loss_score\n","print('r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))\n","print('mse', '=', sklearn_metric_loss_score('mse', y_pred, y_test))\n","print('mae', '=', sklearn_metric_loss_score('mae', y_pred, y_test))"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:56.2226463Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:28:03.1150781Z\",\"execution_finish_time\":\"2022-12-07T08:28:03.4858362Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"subslide"},"tags":[]},"outputs":[],"source":["from flaml.data import get_output_from_log\n","time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \\\n"," get_output_from_log(filename=settings['log_file_name'], time_budget=60)\n","\n","for config in config_history:\n"," print(config)"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T08:20:56.4020235Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T08:28:03.5811012Z\",\"execution_finish_time\":\"2022-12-07T08:28:04.5493292Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","slideshow":{"slide_type":"slide"}},"outputs":[],"source":["import numpy as np\n","\n","plt.title('Learning Curve')\n","plt.xlabel('Wall Clock Time (s)')\n","plt.ylabel('Validation r2')\n","plt.scatter(time_history, 1 - np.array(valid_loss_history))\n","plt.step(time_history, 1 - np.array(best_valid_loss_history), where='post')\n","plt.show()"]},{"cell_type":"markdown","metadata":{},"source":["## 3. Add a customized LightGBM learner in FLAML\n","The native API of LightGBM allows one to specify a custom objective function in the model constructor. You can easily enable it by adding a customized LightGBM learner in FLAML. In the following example, we show how to add such a customized LightGBM learner with a custom objective function for parallel tuning with Spark.\n","\n","It's a little bit different from adding customized learners for sequential training. In sequential training, we can define the customized learner in a notebook cell. However, in spark training, we have to import it from a file so that Spark can use it in executors. We can easily do it by leveraging `broadcast_code` function in `flaml.tune.spark.utils`."]},{"cell_type":"markdown","metadata":{},"source":["### Create a customized LightGBM learner with a custom objective function"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T09:09:49.540914Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T09:09:49.6259637Z\",\"execution_finish_time\":\"2022-12-07T09:09:50.5841239Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}"},"outputs":[],"source":["custom_code = \"\"\"\n","import numpy as np \n","from flaml.model import LGBMEstimator\n","from flaml import tune\n","\n","\n","''' define your customized objective function '''\n","def my_loss_obj(y_true, y_pred):\n"," c = 0.5\n"," residual = y_pred - y_true\n"," grad = c * residual /(np.abs(residual) + c)\n"," hess = c ** 2 / (np.abs(residual) + c) ** 2\n"," # rmse grad and hess\n"," grad_rmse = residual\n"," hess_rmse = 1.0\n"," \n"," # mae grad and hess\n"," grad_mae = np.array(residual)\n"," grad_mae[grad_mae > 0] = 1.\n"," grad_mae[grad_mae <= 0] = -1.\n"," hess_mae = 1.0\n","\n"," coef = [0.4, 0.3, 0.3]\n"," return coef[0] * grad + coef[1] * grad_rmse + coef[2] * grad_mae, \\\n"," coef[0] * hess + coef[1] * hess_rmse + coef[2] * hess_mae\n","\n","\n","''' create a customized LightGBM learner class with your objective function '''\n","class MyLGBM(LGBMEstimator):\n"," '''LGBMEstimator with my_loss_obj as the objective function\n"," '''\n","\n"," def __init__(self, **config):\n"," super().__init__(objective=my_loss_obj, **config)\n","\"\"\"\n","\n","from flaml.tune.spark.utils import broadcast_code\n","custom_learner_path = broadcast_code(custom_code=custom_code)\n","print(custom_learner_path)\n","from flaml.tune.spark.mylearner import MyLGBM"]},{"cell_type":"markdown","metadata":{},"source":["### Add the customized learner in FLAML"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T09:14:16.2449566Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T09:14:16.3227204Z\",\"execution_finish_time\":\"2022-12-07T09:16:49.7573919Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","tags":[]},"outputs":[],"source":["automl = AutoML()\n","automl.add_learner(learner_name='my_lgbm', learner_class=MyLGBM)\n","settings = {\n"," \"time_budget\": 30, # total running time in seconds\n"," \"metric\": 'r2', # primary metrics for regression can be chosen from: ['mae','mse','r2']\n"," \"estimator_list\": ['my_lgbm',], # list of ML learners; we tune lightgbm in this example\n"," \"task\": 'regression', # task type \n"," \"log_file_name\": 'houses_experiment_my_lgbm.log', # flaml log file\n"," \"n_concurrent_trials\": 2,\n"," \"use_spark\": True,\n","}\n","automl.fit(X_train=X_train, y_train=y_train, **settings)"]},{"cell_type":"code","execution_count":null,"metadata":{"cellStatus":"{\"Li Jiang\":{\"queued_time\":\"2022-12-07T09:17:06.0159529Z\",\"session_start_time\":null,\"execution_start_time\":\"2022-12-07T09:17:06.1042554Z\",\"execution_finish_time\":\"2022-12-07T09:17:06.467989Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","tags":[]},"outputs":[],"source":["print('Best hyperparmeter config:', automl.best_config)\n","print('Best r2 on validation data: {0:.4g}'.format(1-automl.best_loss))\n","print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))\n","\n","y_pred = automl.predict(X_test)\n","print('Predicted labels', y_pred)\n","print('True labels', y_test)\n","\n","from flaml.ml import sklearn_metric_loss_score\n","print('r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))\n","print('mse', '=', sklearn_metric_loss_score('mse', y_pred, y_test))\n","print('mae', '=', sklearn_metric_loss_score('mae', y_pred, y_test))"]},{"cell_type":"code","execution_count":null,"metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":[]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Python 3.8.13 ('syml-py38')","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.13 (default, Oct 21 2022, 23:50:54) \n[GCC 11.2.0]"},"notebook_environment":{},"save_output":true,"spark_compute":{"compute_id":"/trident/default","session_options":{"conf":{"spark.livy.synapse.ipythonInterpreter.enabled":"true"},"enableDebugMode":false,"keepAliveTimeout":30}},"synapse_widget":{"state":{},"version":"0.1"},"trident":{"lakehouse":{}},"vscode":{"interpreter":{"hash":"e3d9487e2ef008ade0db1bc293d3206d35cb2b6081faff9f66b40b257b7398f7"}}},"nbformat":4,"nbformat_minor":0} diff --git a/notebook/research/acl2021.ipynb b/notebook/research/acl2021.ipynb index 9b099b1c9..cc0480caa 100644 --- a/notebook/research/acl2021.ipynb +++ b/notebook/research/acl2021.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -22,7 +23,7 @@ "\n", "*Running this notebook takes about one hour.\n", "\n", - "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the `notebook` and `nlp` options:\n", + "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the legacy `[nlp]` options:\n", "\n", "```bash\n", "pip install flaml[nlp]==0.7.1 # in higher version of flaml, the API for nlp tasks changed\n", @@ -362,10 +363,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[2m\u001B[36m(pid=50964)\u001B[0m {'eval_loss': 0.5942569971084595, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.10434782608695652}\n", - "\u001B[2m\u001B[36m(pid=50964)\u001B[0m {'eval_loss': 0.5942569971084595, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.10434782608695652}\n", - "\u001B[2m\u001B[36m(pid=50948)\u001B[0m {'eval_loss': 0.649192214012146, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.2}\n", - "\u001B[2m\u001B[36m(pid=50948)\u001B[0m {'eval_loss': 0.649192214012146, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.2}\n" + "\u001b[2m\u001b[36m(pid=50964)\u001b[0m {'eval_loss': 0.5942569971084595, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.10434782608695652}\n", + "\u001b[2m\u001b[36m(pid=50964)\u001b[0m {'eval_loss': 0.5942569971084595, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.10434782608695652}\n", + "\u001b[2m\u001b[36m(pid=50948)\u001b[0m {'eval_loss': 0.649192214012146, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.2}\n", + "\u001b[2m\u001b[36m(pid=50948)\u001b[0m {'eval_loss': 0.649192214012146, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.2}\n" ] }, { @@ -483,12 +484,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[2m\u001B[36m(pid=54411)\u001B[0m {'eval_loss': 0.624100387096405, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.5}\n", - "\u001B[2m\u001B[36m(pid=54411)\u001B[0m {'eval_loss': 0.624100387096405, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.5}\n", - "\u001B[2m\u001B[36m(pid=54411)\u001B[0m {'eval_loss': 0.624100387096405, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.5}\n", - "\u001B[2m\u001B[36m(pid=54417)\u001B[0m {'eval_loss': 0.5938675999641418, 'eval_accuracy': 0.7156862745098039, 'eval_f1': 0.8258258258258258, 'epoch': 0.5}\n", - "\u001B[2m\u001B[36m(pid=54417)\u001B[0m {'eval_loss': 0.5938675999641418, 'eval_accuracy': 0.7156862745098039, 'eval_f1': 0.8258258258258258, 'epoch': 0.5}\n", - "\u001B[2m\u001B[36m(pid=54417)\u001B[0m {'eval_loss': 0.5938675999641418, 'eval_accuracy': 0.7156862745098039, 'eval_f1': 0.8258258258258258, 'epoch': 0.5}\n" + "\u001b[2m\u001b[36m(pid=54411)\u001b[0m {'eval_loss': 0.624100387096405, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.5}\n", + "\u001b[2m\u001b[36m(pid=54411)\u001b[0m {'eval_loss': 0.624100387096405, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.5}\n", + "\u001b[2m\u001b[36m(pid=54411)\u001b[0m {'eval_loss': 0.624100387096405, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.5}\n", + "\u001b[2m\u001b[36m(pid=54417)\u001b[0m {'eval_loss': 0.5938675999641418, 'eval_accuracy': 0.7156862745098039, 'eval_f1': 0.8258258258258258, 'epoch': 0.5}\n", + "\u001b[2m\u001b[36m(pid=54417)\u001b[0m {'eval_loss': 0.5938675999641418, 'eval_accuracy': 0.7156862745098039, 'eval_f1': 0.8258258258258258, 'epoch': 0.5}\n", + "\u001b[2m\u001b[36m(pid=54417)\u001b[0m {'eval_loss': 0.5938675999641418, 'eval_accuracy': 0.7156862745098039, 'eval_f1': 0.8258258258258258, 'epoch': 0.5}\n" ] }, { @@ -588,18 +589,18 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[2m\u001B[36m(pid=57835)\u001B[0m {'eval_loss': 0.5822290778160095, 'eval_accuracy': 0.7058823529411765, 'eval_f1': 0.8181818181818181, 'epoch': 0.5043478260869565}\n", - "\u001B[2m\u001B[36m(pid=57835)\u001B[0m {'eval_loss': 0.5822290778160095, 'eval_accuracy': 0.7058823529411765, 'eval_f1': 0.8181818181818181, 'epoch': 0.5043478260869565}\n", - "\u001B[2m\u001B[36m(pid=57835)\u001B[0m {'eval_loss': 0.5822290778160095, 'eval_accuracy': 0.7058823529411765, 'eval_f1': 0.8181818181818181, 'epoch': 0.5043478260869565}\n", - "\u001B[2m\u001B[36m(pid=57835)\u001B[0m {'eval_loss': 0.5822290778160095, 'eval_accuracy': 0.7058823529411765, 'eval_f1': 0.8181818181818181, 'epoch': 0.5043478260869565}\n", - "\u001B[2m\u001B[36m(pid=57836)\u001B[0m {'eval_loss': 0.6087244749069214, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.10344827586206896}\n", - "\u001B[2m\u001B[36m(pid=57836)\u001B[0m {'eval_loss': 0.6087244749069214, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.10344827586206896}\n", - "\u001B[2m\u001B[36m(pid=57836)\u001B[0m {'eval_loss': 0.6087244749069214, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.10344827586206896}\n", - "\u001B[2m\u001B[36m(pid=57836)\u001B[0m {'eval_loss': 0.6087244749069214, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.10344827586206896}\n", - "\u001B[2m\u001B[36m(pid=57839)\u001B[0m {'eval_loss': 0.5486209392547607, 'eval_accuracy': 0.7034313725490197, 'eval_f1': 0.8141321044546851, 'epoch': 0.5}\n", - "\u001B[2m\u001B[36m(pid=57839)\u001B[0m {'eval_loss': 0.5486209392547607, 'eval_accuracy': 0.7034313725490197, 'eval_f1': 0.8141321044546851, 'epoch': 0.5}\n", - "\u001B[2m\u001B[36m(pid=57839)\u001B[0m {'eval_loss': 0.5486209392547607, 'eval_accuracy': 0.7034313725490197, 'eval_f1': 0.8141321044546851, 'epoch': 0.5}\n", - "\u001B[2m\u001B[36m(pid=57839)\u001B[0m {'eval_loss': 0.5486209392547607, 'eval_accuracy': 0.7034313725490197, 'eval_f1': 0.8141321044546851, 'epoch': 0.5}\n" + "\u001b[2m\u001b[36m(pid=57835)\u001b[0m {'eval_loss': 0.5822290778160095, 'eval_accuracy': 0.7058823529411765, 'eval_f1': 0.8181818181818181, 'epoch': 0.5043478260869565}\n", + "\u001b[2m\u001b[36m(pid=57835)\u001b[0m {'eval_loss': 0.5822290778160095, 'eval_accuracy': 0.7058823529411765, 'eval_f1': 0.8181818181818181, 'epoch': 0.5043478260869565}\n", + "\u001b[2m\u001b[36m(pid=57835)\u001b[0m {'eval_loss': 0.5822290778160095, 'eval_accuracy': 0.7058823529411765, 'eval_f1': 0.8181818181818181, 'epoch': 0.5043478260869565}\n", + "\u001b[2m\u001b[36m(pid=57835)\u001b[0m {'eval_loss': 0.5822290778160095, 'eval_accuracy': 0.7058823529411765, 'eval_f1': 0.8181818181818181, 'epoch': 0.5043478260869565}\n", + "\u001b[2m\u001b[36m(pid=57836)\u001b[0m {'eval_loss': 0.6087244749069214, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.10344827586206896}\n", + "\u001b[2m\u001b[36m(pid=57836)\u001b[0m {'eval_loss': 0.6087244749069214, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.10344827586206896}\n", + "\u001b[2m\u001b[36m(pid=57836)\u001b[0m {'eval_loss': 0.6087244749069214, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.10344827586206896}\n", + "\u001b[2m\u001b[36m(pid=57836)\u001b[0m {'eval_loss': 0.6087244749069214, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.10344827586206896}\n", + "\u001b[2m\u001b[36m(pid=57839)\u001b[0m {'eval_loss': 0.5486209392547607, 'eval_accuracy': 0.7034313725490197, 'eval_f1': 0.8141321044546851, 'epoch': 0.5}\n", + "\u001b[2m\u001b[36m(pid=57839)\u001b[0m {'eval_loss': 0.5486209392547607, 'eval_accuracy': 0.7034313725490197, 'eval_f1': 0.8141321044546851, 'epoch': 0.5}\n", + "\u001b[2m\u001b[36m(pid=57839)\u001b[0m {'eval_loss': 0.5486209392547607, 'eval_accuracy': 0.7034313725490197, 'eval_f1': 0.8141321044546851, 'epoch': 0.5}\n", + "\u001b[2m\u001b[36m(pid=57839)\u001b[0m {'eval_loss': 0.5486209392547607, 'eval_accuracy': 0.7034313725490197, 'eval_f1': 0.8141321044546851, 'epoch': 0.5}\n" ] }, { @@ -699,21 +700,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[2m\u001B[36m(pid=61251)\u001B[0m {'eval_loss': 0.6236899495124817, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.5}\n", - "\u001B[2m\u001B[36m(pid=61251)\u001B[0m {'eval_loss': 0.6236899495124817, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.5}\n", - "\u001B[2m\u001B[36m(pid=61251)\u001B[0m {'eval_loss': 0.6236899495124817, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.5}\n", - "\u001B[2m\u001B[36m(pid=61251)\u001B[0m {'eval_loss': 0.6236899495124817, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.5}\n", - "\u001B[2m\u001B[36m(pid=61251)\u001B[0m {'eval_loss': 0.6236899495124817, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.5}\n", - "\u001B[2m\u001B[36m(pid=61255)\u001B[0m {'eval_loss': 0.6249027848243713, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.3}\n", - "\u001B[2m\u001B[36m(pid=61255)\u001B[0m {'eval_loss': 0.6249027848243713, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.3}\n", - "\u001B[2m\u001B[36m(pid=61255)\u001B[0m {'eval_loss': 0.6249027848243713, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.3}\n", - "\u001B[2m\u001B[36m(pid=61255)\u001B[0m {'eval_loss': 0.6249027848243713, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.3}\n", - "\u001B[2m\u001B[36m(pid=61255)\u001B[0m {'eval_loss': 0.6249027848243713, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.3}\n", - "\u001B[2m\u001B[36m(pid=61236)\u001B[0m {'eval_loss': 0.6138392686843872, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.20689655172413793}\n", - "\u001B[2m\u001B[36m(pid=61236)\u001B[0m {'eval_loss': 0.6138392686843872, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.20689655172413793}\n", - "\u001B[2m\u001B[36m(pid=61236)\u001B[0m {'eval_loss': 0.6138392686843872, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.20689655172413793}\n", - "\u001B[2m\u001B[36m(pid=61236)\u001B[0m {'eval_loss': 0.6138392686843872, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.20689655172413793}\n", - "\u001B[2m\u001B[36m(pid=61236)\u001B[0m {'eval_loss': 0.6138392686843872, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.20689655172413793}\n" + "\u001b[2m\u001b[36m(pid=61251)\u001b[0m {'eval_loss': 0.6236899495124817, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.5}\n", + "\u001b[2m\u001b[36m(pid=61251)\u001b[0m {'eval_loss': 0.6236899495124817, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.5}\n", + "\u001b[2m\u001b[36m(pid=61251)\u001b[0m {'eval_loss': 0.6236899495124817, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.5}\n", + "\u001b[2m\u001b[36m(pid=61251)\u001b[0m {'eval_loss': 0.6236899495124817, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.5}\n", + "\u001b[2m\u001b[36m(pid=61251)\u001b[0m {'eval_loss': 0.6236899495124817, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.5}\n", + "\u001b[2m\u001b[36m(pid=61255)\u001b[0m {'eval_loss': 0.6249027848243713, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.3}\n", + "\u001b[2m\u001b[36m(pid=61255)\u001b[0m {'eval_loss': 0.6249027848243713, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.3}\n", + "\u001b[2m\u001b[36m(pid=61255)\u001b[0m {'eval_loss': 0.6249027848243713, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.3}\n", + "\u001b[2m\u001b[36m(pid=61255)\u001b[0m {'eval_loss': 0.6249027848243713, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.3}\n", + "\u001b[2m\u001b[36m(pid=61255)\u001b[0m {'eval_loss': 0.6249027848243713, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.3}\n", + "\u001b[2m\u001b[36m(pid=61236)\u001b[0m {'eval_loss': 0.6138392686843872, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.20689655172413793}\n", + "\u001b[2m\u001b[36m(pid=61236)\u001b[0m {'eval_loss': 0.6138392686843872, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.20689655172413793}\n", + "\u001b[2m\u001b[36m(pid=61236)\u001b[0m {'eval_loss': 0.6138392686843872, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.20689655172413793}\n", + "\u001b[2m\u001b[36m(pid=61236)\u001b[0m {'eval_loss': 0.6138392686843872, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.20689655172413793}\n", + "\u001b[2m\u001b[36m(pid=61236)\u001b[0m {'eval_loss': 0.6138392686843872, 'eval_accuracy': 0.6838235294117647, 'eval_f1': 0.8122270742358079, 'epoch': 0.20689655172413793}\n" ] }, { diff --git a/notebook/zeroshot_lightgbm.ipynb b/notebook/zeroshot_lightgbm.ipynb index bd43b4311..32acda41c 100644 --- a/notebook/zeroshot_lightgbm.ipynb +++ b/notebook/zeroshot_lightgbm.ipynb @@ -1,6 +1,15 @@ { "cells": [ { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"Open" + ] + }, + { + "attachments": {}, "cell_type": "markdown", "metadata": { "slideshow": { @@ -19,16 +28,16 @@ "\n", "In this notebook, we demonstrate a basic use case of zero-shot AutoML with FLAML.\n", "\n", - "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml and openml:" + "FLAML requires `Python>=3.7`. To run this notebook example, please install the [autozero] option:" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "# %pip install -U flaml openml;" + "# %pip install flaml[autozero] lightgbm openml;" ] }, { @@ -51,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -80,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -101,7 +110,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": { "slideshow": { "slide_type": "subslide" @@ -113,7 +122,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "load dataset from ./openml_ds537.pkl\n", + "download dataset from openml\n", "Dataset name: houses\n", "X_train.shape: (15480, 8), y_train.shape: (15480,);\n", "X_test.shape: (5160, 8), y_test.shape: (5160,)\n" @@ -127,25 +136,38 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " median_income housing_median_age ... latitude longitude\n", - "19226 7.3003 19.0 ... 38.46 -122.68\n", - "14549 5.9547 18.0 ... 32.95 -117.24\n", - "9093 3.2125 19.0 ... 34.68 -118.27\n", - "12213 6.9930 13.0 ... 33.51 -117.18\n", - "12765 2.5162 21.0 ... 38.62 -121.41\n", - "... ... ... ... ... ...\n", - "13123 4.4125 20.0 ... 38.27 -121.26\n", - "19648 2.9135 27.0 ... 37.48 -120.89\n", - "9845 3.1977 31.0 ... 36.58 -121.90\n", - "10799 5.6315 34.0 ... 33.62 -117.93\n", - "2732 1.3882 15.0 ... 32.80 -115.56\n", + " median_income housing_median_age total_rooms total_bedrooms \\\n", + "19226 7.3003 19 4976.0 711.0 \n", + "14549 5.9547 18 1591.0 268.0 \n", + "9093 3.2125 19 552.0 129.0 \n", + "12213 6.9930 13 270.0 42.0 \n", + "12765 2.5162 21 3260.0 763.0 \n", + "... ... ... ... ... \n", + "13123 4.4125 20 1314.0 229.0 \n", + "19648 2.9135 27 1118.0 195.0 \n", + "9845 3.1977 31 1431.0 370.0 \n", + "10799 5.6315 34 2125.0 498.0 \n", + "2732 1.3882 15 1171.0 328.0 \n", + "\n", + " population households latitude longitude \n", + "19226 1926.0 625.0 38.46 -122.68 \n", + "14549 547.0 243.0 32.95 -117.24 \n", + "9093 314.0 106.0 34.68 -118.27 \n", + "12213 120.0 42.0 33.51 -117.18 \n", + "12765 1735.0 736.0 38.62 -121.41 \n", + "... ... ... ... ... \n", + "13123 712.0 219.0 38.27 -121.26 \n", + "19648 647.0 209.0 37.48 -120.89 \n", + "9845 704.0 393.0 36.58 -121.90 \n", + "10799 1052.0 468.0 33.62 -117.93 \n", + "2732 1024.0 298.0 32.80 -115.56 \n", "\n", "[15480 rows x 8 columns]\n" ] @@ -168,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": { "slideshow": { "slide_type": "slide" @@ -176,6 +198,13 @@ "tags": [] }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:flaml.default.suggest:metafeature distance: 0.02197989436019765\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -206,7 +235,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": { "slideshow": { "slide_type": "slide" @@ -220,7 +249,7 @@ "0.8537444671194614" ] }, - "execution_count": 10, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -238,7 +267,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": { "slideshow": { "slide_type": "slide" @@ -251,7 +280,7 @@ "0.8296179648694404" ] }, - "execution_count": 11, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -309,9 +338,16 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 10, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:flaml.default.suggest:metafeature distance: 0.02197989436019765\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -341,9 +377,17 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:flaml.default.suggest:metafeature distance: 0.02197989436019765\n" + ] + } + ], "source": [ "from flaml.default import preprocess_and_suggest_hyperparams\n", "(\n", @@ -365,7 +409,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 12, "metadata": { "slideshow": { "slide_type": "slide" @@ -394,7 +438,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 13, "metadata": { "slideshow": { "slide_type": "slide" @@ -415,7 +459,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 14, "metadata": { "slideshow": { "slide_type": "slide" @@ -425,6 +469,17 @@ "outputs": [ { "data": { + "text/html": [ + "
LGBMRegressor(colsample_bytree=0.7019911744574896,\n",
+       "              learning_rate=0.022635758411078528, max_bin=511,\n",
+       "              min_child_samples=2, n_estimators=4797, num_leaves=122,\n",
+       "              reg_alpha=0.004252223402511765, reg_lambda=0.11288241427227624,\n",
+       "              verbose=-1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], "text/plain": [ "LGBMRegressor(colsample_bytree=0.7019911744574896,\n", " learning_rate=0.022635758411078528, max_bin=511,\n", @@ -433,7 +488,7 @@ " verbose=-1)" ] }, - "execution_count": 17, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -451,7 +506,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -480,35 +535,45 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 16, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:51:45] {1663} INFO - task = regression\n", + "[flaml.automl.logger: 04-28 02:51:45] {1670} INFO - Data split method: uniform\n", + "[flaml.automl.logger: 04-28 02:51:45] {1673} INFO - Evaluation method: cv\n", + "[flaml.automl.logger: 04-28 02:51:45] {1771} INFO - Minimizing error metric: 1-r2\n" + ] + }, { "name": "stderr", "output_type": "stream", "text": [ - "[flaml.automl: 05-31 22:54:25] {2373} INFO - task = regression\n", - "[flaml.automl: 05-31 22:54:25] {2375} INFO - Data split method: uniform\n", - "[flaml.automl: 05-31 22:54:25] {2379} INFO - Evaluation method: cv\n", - "[flaml.automl: 05-31 22:54:25] {2448} INFO - Minimizing error metric: 1-r2\n", - "[flaml.automl: 05-31 22:54:25] {2586} INFO - List of ML learners in AutoML Run: ['lgbm']\n", - "[flaml.automl: 05-31 22:54:25] {2878} INFO - iteration 0, current learner lgbm\n", - "[flaml.automl: 05-31 22:56:54] {3008} INFO - Estimated sufficient time budget=1490299s. Estimated necessary time budget=1490s.\n", - "[flaml.automl: 05-31 22:56:54] {3055} INFO - at 149.1s,\testimator lgbm's best error=0.1513,\tbest estimator lgbm's best error=0.1513\n", - "[flaml.automl: 05-31 22:56:54] {2878} INFO - iteration 1, current learner lgbm\n", - "[flaml.automl: 05-31 22:59:24] {3055} INFO - at 299.0s,\testimator lgbm's best error=0.1513,\tbest estimator lgbm's best error=0.1513\n", - "[flaml.automl: 05-31 22:59:24] {2878} INFO - iteration 2, current learner lgbm\n", - "[flaml.automl: 05-31 23:01:34] {3055} INFO - at 429.1s,\testimator lgbm's best error=0.1513,\tbest estimator lgbm's best error=0.1513\n", - "[flaml.automl: 05-31 23:01:34] {2878} INFO - iteration 3, current learner lgbm\n", - "[flaml.automl: 05-31 23:04:43] {3055} INFO - at 618.2s,\testimator lgbm's best error=0.1513,\tbest estimator lgbm's best error=0.1513\n", - "[flaml.automl: 05-31 23:05:14] {3315} INFO - retrain lgbm for 31.0s\n", - "[flaml.automl: 05-31 23:05:14] {3322} INFO - retrained model: LGBMRegressor(colsample_bytree=0.7019911744574896,\n", + "INFO:flaml.default.suggest:metafeature distance: 0.02197989436019765\n", + "INFO:flaml.default.suggest:metafeature distance: 0.006677018633540373\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 04-28 02:51:45] {1881} INFO - List of ML learners in AutoML Run: ['lgbm']\n", + "[flaml.automl.logger: 04-28 02:51:45] {2191} INFO - iteration 0, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:53:39] {2317} INFO - Estimated sufficient time budget=1134156s. Estimated necessary time budget=1134s.\n", + "[flaml.automl.logger: 04-28 02:53:39] {2364} INFO - at 113.5s,\testimator lgbm's best error=0.1513,\tbest estimator lgbm's best error=0.1513\n", + "[flaml.automl.logger: 04-28 02:53:39] {2191} INFO - iteration 1, current learner lgbm\n", + "[flaml.automl.logger: 04-28 02:55:32] {2364} INFO - at 226.6s,\testimator lgbm's best error=0.1513,\tbest estimator lgbm's best error=0.1513\n", + "[flaml.automl.logger: 04-28 02:55:54] {2600} INFO - retrain lgbm for 22.3s\n", + "[flaml.automl.logger: 04-28 02:55:54] {2603} INFO - retrained model: LGBMRegressor(colsample_bytree=0.7019911744574896,\n", " learning_rate=0.02263575841107852, max_bin=511,\n", " min_child_samples=2, n_estimators=4797, num_leaves=122,\n", - " reg_alpha=0.004252223402511765, reg_lambda=0.11288241427227633,\n", + " reg_alpha=0.004252223402511765, reg_lambda=0.11288241427227624,\n", " verbose=-1)\n", - "[flaml.automl: 05-31 23:05:14] {2617} INFO - fit succeeded\n", - "[flaml.automl: 05-31 23:05:14] {2618} INFO - Time taken to find the best model: 149.06516432762146\n" + "[flaml.automl.logger: 04-28 02:55:54] {1911} INFO - fit succeeded\n", + "[flaml.automl.logger: 04-28 02:55:54] {1912} INFO - Time taken to find the best model: 113.4601559638977\n" ] } ], @@ -545,7 +610,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.15 (main, Oct 26 2022, 03:47:43) \n[GCC 10.2.1 20210110]" + "version": "3.9.15" } }, "nbformat": 4, diff --git a/setup.py b/setup.py index 5e59da800..f24875885 100644 --- a/setup.py +++ b/setup.py @@ -15,11 +15,6 @@ __version__ = version["__version__"] install_requires = [ "NumPy>=1.17.0rc1", - "lightgbm>=2.3.1", - "xgboost>=0.90", - "scipy>=1.4.1", - "pandas>=1.1.4", - "scikit-learn>=0.24", ] @@ -39,16 +34,28 @@ setuptools.setup( include_package_data=True, install_requires=install_requires, extras_require={ + "automl": [ + "lightgbm>=2.3.1", + "xgboost>=0.90", + "scipy>=1.4.1", + "pandas>=1.1.4", + "scikit-learn>=0.24", + ], "notebook": [ "jupyter", "matplotlib", - "openml==0.10.2", + "openml", ], "spark": [ "pyspark>=3.2.0", "joblibspark>=0.5.0", ], "test": [ + "lightgbm>=2.3.1", + "xgboost>=0.90", + "scipy>=1.4.1", + "pandas>=1.1.4", + "scikit-learn>=0.24", "thop", "pytest>=6.1.1", "coverage>=5.3", @@ -58,7 +65,7 @@ setuptools.setup( "catboost>=0.26,<1.2", "rgf-python", "optuna==2.8.0", - "openml==0.10.2", + "openml", "statsmodels>=0.12.2", "psutil==5.8.0", "dataclasses", @@ -77,6 +84,7 @@ setuptools.setup( "ipykernel", "pytorch-lightning<1.9.1", # test_forecast_panel "requests<2.29.0", # https://github.com/docker/docker-py/issues/3113 + "packaging", ], "catboost": ["catboost>=0.26"], "blendsearch": ["optuna==2.8.0"], @@ -91,6 +99,7 @@ setuptools.setup( ], "vw": [ "vowpalwabbit>=8.10.0, <9.0.0", + "scikit-learn", ], "hf": [ "transformers[torch]==4.26", @@ -122,7 +131,12 @@ setuptools.setup( "benchmark": ["catboost>=0.26", "psutil==5.8.0", "xgboost==1.3.3"], "openai": ["openai==0.27.4", "diskcache"], "autogen": ["openai==0.27.4", "diskcache", "docker"], - "synapse": ["joblibspark>=0.5.0", "optuna==2.8.0", "pyspark>=3.2.0"], + "synapse": [ + "joblibspark>=0.5.0", + "optuna==2.8.0", + "pyspark>=3.2.0", + ], + "autozero": ["scikit-learn", "pandas", "packaging"], }, classifiers=[ "Programming Language :: Python :: 3", diff --git a/test/automl/test_utils.py b/test/automl/test_utils.py deleted file mode 100644 index 0b830c5af..000000000 --- a/test/automl/test_utils.py +++ /dev/null @@ -1,20 +0,0 @@ -import numpy as np -from flaml.automl.utils import len_labels, unique_value_first_index - - -def test_len_labels(): - assert len_labels([1, 2, 3]) == 3 - assert len_labels([1, 2, 3, 1, 2, 3]) == 3 - assert np.array_equal(len_labels([1, 2, 3], True)[1], [1, 2, 3]) - assert np.array_equal(len_labels([1, 2, 3, 1, 2, 3], True)[1], [1, 2, 3]) - - -def test_unique_value_first_index(): - label_set, first_index = unique_value_first_index([1, 2, 2, 3]) - assert np.array_equal(label_set, np.array([1, 2, 3])) - assert np.array_equal(first_index, np.array([0, 1, 3])) - - -if __name__ == "__main__": - test_len_labels() - test_unique_value_first_index() diff --git a/website/docs/Examples/AutoGen-OpenAI.md b/website/docs/Examples/AutoGen-OpenAI.md index 82f138867..97ce73df4 100644 --- a/website/docs/Examples/AutoGen-OpenAI.md +++ b/website/docs/Examples/AutoGen-OpenAI.md @@ -7,7 +7,7 @@ In this example, we will tune several hyperparameters for the OpenAI's completio Install the [autogen,blendsearch] option. ```bash -pip install "flaml[autogen,blendsearch]==1.2.2 datasets" +pip install "flaml[autogen,blendsearch] datasets" ``` Setup your OpenAI key: diff --git a/website/docs/Examples/AutoML-Classification.md b/website/docs/Examples/AutoML-Classification.md index 010ea81b8..8ef8a74dc 100644 --- a/website/docs/Examples/AutoML-Classification.md +++ b/website/docs/Examples/AutoML-Classification.md @@ -1,5 +1,12 @@ # AutoML - Classification +### Prerequisites + +Install the [automl] option. +```bash +pip install "flaml[automl]" +``` + ### A basic classification example ```python diff --git a/website/docs/Examples/AutoML-NLP.md b/website/docs/Examples/AutoML-NLP.md index 409ac8872..2896ff89d 100644 --- a/website/docs/Examples/AutoML-NLP.md +++ b/website/docs/Examples/AutoML-NLP.md @@ -2,9 +2,9 @@ ### Requirements -This example requires GPU. Install the [hf] option: +This example requires GPU. Install the [automl,hf] option: ```python -pip install "flaml[hf]" +pip install "flaml[automl,hf]" ``` ### A simple sequence classification example diff --git a/website/docs/Examples/AutoML-Rank.md b/website/docs/Examples/AutoML-Rank.md index 99b04a434..c1b3930b1 100644 --- a/website/docs/Examples/AutoML-Rank.md +++ b/website/docs/Examples/AutoML-Rank.md @@ -1,5 +1,12 @@ # AutoML - Rank +### Prerequisites + +Install the [automl] option. +```bash +pip install "flaml[automl]" +``` + ### A simple learning-to-rank example ```python diff --git a/website/docs/Examples/AutoML-Regression.md b/website/docs/Examples/AutoML-Regression.md index 32c38086a..2eee59f8b 100644 --- a/website/docs/Examples/AutoML-Regression.md +++ b/website/docs/Examples/AutoML-Regression.md @@ -1,5 +1,12 @@ # AutoML - Regression +### Prerequisites + +Install the [automl] option. +```bash +pip install "flaml[automl]" +``` + ### A basic regression example ```python diff --git a/website/docs/Examples/AutoML-Time series forecast.md b/website/docs/Examples/AutoML-Time series forecast.md index ede9ecdc4..a357dc772 100644 --- a/website/docs/Examples/AutoML-Time series forecast.md +++ b/website/docs/Examples/AutoML-Time series forecast.md @@ -2,9 +2,9 @@ ### Prerequisites -Install the [ts_forecast] option. +Install the [automl,ts_forecast] option. ```bash -pip install "flaml[ts_forecast]" +pip install "flaml[automl,ts_forecast]" ``` ### Simple NumPy Example diff --git a/website/docs/Examples/AutoML-for-LightGBM.md b/website/docs/Examples/AutoML-for-LightGBM.md index 60cebc99f..11378a974 100644 --- a/website/docs/Examples/AutoML-for-LightGBM.md +++ b/website/docs/Examples/AutoML-for-LightGBM.md @@ -2,13 +2,11 @@ ### Prerequisites for this example -Install the [notebook] option. +Install the [automl] option. ```bash -pip install "flaml[notebook]" +pip install "flaml[automl] matplotlib openml" ``` -This option is not necessary in general. - ### Use built-in LGBMEstimator ```python diff --git a/website/docs/Examples/AutoML-for-XGBoost.md b/website/docs/Examples/AutoML-for-XGBoost.md index ba4bbccf4..76aa2597d 100644 --- a/website/docs/Examples/AutoML-for-XGBoost.md +++ b/website/docs/Examples/AutoML-for-XGBoost.md @@ -2,13 +2,11 @@ ### Prerequisites for this example -Install the [notebook] option. +Install the [automl] option. ```bash -pip install "flaml[notebook]" +pip install "flaml[automl] matplotlib openml" ``` -This option is not necessary in general. - ### Use built-in XGBoostSklearnEstimator ```python diff --git a/website/docs/Examples/Default-Flamlized.md b/website/docs/Examples/Default-Flamlized.md index d87ee47f9..4b0f2853f 100644 --- a/website/docs/Examples/Default-Flamlized.md +++ b/website/docs/Examples/Default-Flamlized.md @@ -2,10 +2,16 @@ Flamlized estimators automatically use data-dependent default hyperparameter configurations for each estimator, offering a unique zero-shot AutoML capability, or "no tuning" AutoML. -This example requires openml==0.10.2. - ## Flamlized LGBMRegressor +### Prerequisites + +This example requires the [autozero] option. + +```bash +pip install flaml[autozero] lightgbm openml +``` + ### Zero-shot AutoML ```python @@ -62,6 +68,10 @@ X_test.shape: (5160, 8), y_test.shape: (5160,) ## Flamlized XGBClassifier +### Prerequisites + +This example requires xgboost, sklearn, openml==0.10.2. + ### Zero-shot AutoML ```python diff --git a/website/docs/Examples/Integrate - AzureML.md b/website/docs/Examples/Integrate - AzureML.md index 4d9db9088..582c75858 100644 --- a/website/docs/Examples/Integrate - AzureML.md +++ b/website/docs/Examples/Integrate - AzureML.md @@ -2,9 +2,9 @@ FLAML can be used together with AzureML. On top of that, using mlflow and ray is ### Prerequisites -Install the [azureml] option. +Install the [automl,azureml] option. ```bash -pip install "flaml[azureml]" +pip install "flaml[automl,azureml]" ``` Setup a AzureML workspace: diff --git a/website/docs/Examples/Integrate - Scikit-learn Pipeline.md b/website/docs/Examples/Integrate - Scikit-learn Pipeline.md index ee32ebd0d..6c7006dea 100644 --- a/website/docs/Examples/Integrate - Scikit-learn Pipeline.md +++ b/website/docs/Examples/Integrate - Scikit-learn Pipeline.md @@ -1,6 +1,11 @@ As FLAML's AutoML module can be used a transformer in the Sklearn's pipeline we can get all the benefits of pipeline. -This example requires openml==0.10.2. +### Prerequisites + +Install the [automl] option. +```bash +pip install "flaml[automl] openml" +``` ### Load data