mirror of
https://github.com/microsoft/FLAML.git
synced 2026-02-09 02:09:16 +08:00
* Merged PR 1686010: Bump version to 2.3.5.post2, Distribute source and wheel, Fix license-file, Only log better models
- Fix license-file
- Bump version to 2.3.5.post2
- Distribute source and wheel
- Log better models only
- Add artifact_path to register_automl_pipeline
- Improve logging of _automl_user_configurations
----
This pull request fixes the project’s configuration by updating the license metadata for compliance with FLAML OSS 2.3.5.
The changes in `/pyproject.toml` update the project’s license and readme metadata by replacing deprecated keys with the new structured fields.
- `/pyproject.toml`: Replaced `license_file` with `license = { text = "MIT" }`.
- `/pyproject.toml`: Replaced `description-file` with `readme = "README.md"`.
<!-- GitOpsUserAgent=GitOps.Apps.Server.pullrequestcopilot -->
Related work items: #4252053
* Merged PR 1688479: Handle feature_importances_ is None, Catch RuntimeError and wait for spark cluster to recover
- Add warning message when feature_importances_ is None (#3982120)
- Catch RuntimeError and wait for spark cluster to recover (#3982133)
----
Bug fix.
This pull request prevents an AttributeError in the feature importance plotting function by adding a check for a `None` value with an informative warning message.
- `flaml/fabric/visualization.py`: Checks if `result.feature_importances_` is `None`, logs a warning with possible reasons, and returns early.
- `flaml/fabric/visualization.py`: Imports `logger` from `flaml.automl.logger` to support the warning message.
<!-- GitOpsUserAgent=GitOps.Apps.Server.pullrequestcopilot -->
Related work items: #3982120, #3982133
* Removed deprecated metadata section
* Fix log_params, log_artifact doesn't support run_id in mlflow 2.6.0
* Remove autogen
* Remove autogen
* Remove autogen
* Merged PR 1776547: Fix flaky test test_automl
Don't throw error when time budget is not enough
----
#### AI description (iteration 1)
#### PR Classification
Bug fix addressing a failing test in the AutoML notebook example.
#### PR Summary
This PR fixes a flaky test by adding a conditional check in the AutoML test that prints a message and exits early if no best estimator is set, thereby preventing unpredictable test failures.
- `test/automl/test_notebook_example.py`: Introduced a check to print "Training budget is not sufficient" and return if `automl.best_estimator` is not found.
<!-- GitOpsUserAgent=GitOps.Apps.Server.pullrequestcopilot -->
Related work items: #4573514
* Merged PR 1777952: Fix unrecognized or malformed field 'license-file' when uploading wheel to feed
Try to fix InvalidDistribution: Invalid distribution metadata: unrecognized or malformed field 'license-file'
----
Bug fix addressing package metadata configuration.
This pull request fixes the error with unrecognized or malformed license file fields during wheel uploads by updating the setup configuration.
- In `setup.py`, added `license="MIT"` and `license_files=["LICENSE"]` to provide proper license metadata.
<!-- GitOpsUserAgent=GitOps.Apps.Server.pullrequestcopilot -->
Related work items: #4560034
* Cherry-pick Merged PR 1879296: Add support to python 3.12 and spark 4.0
* Cherry-pick Merged PR 1890869: Improve time_budget estimation for mlflow logging
* Cherry-pick Merged PR 1879296: Add support to python 3.12 and spark 4.0
* Disable openai workflow
* Add python 3.12 to test envs
* Manually trigger openai
* Support markdown files with underscore-prefixed file names
* Improve save dependencies
* SynapseML is not installed
* Fix syntax error:Module !flaml/autogen was never imported
* macos 3.12 also hangs
* fix syntax error
* Update python version in actions
* Install setuptools for using pkg_resources
* Fix test_automl_performance in Github actions
* Fix test_nested_run
131 lines
4.1 KiB
Python
131 lines
4.1 KiB
Python
import os
|
|
import sys
|
|
|
|
import pytest
|
|
|
|
try:
|
|
from minio.error import ServerError
|
|
except ImportError:
|
|
|
|
class ServerError(Exception):
|
|
pass
|
|
|
|
|
|
try:
|
|
from openml.exceptions import OpenMLServerException
|
|
except ImportError:
|
|
|
|
class OpenMLServerException(Exception):
|
|
pass
|
|
|
|
|
|
from requests.exceptions import ChunkedEncodingError, SSLError
|
|
|
|
from flaml.tune.spark.utils import check_spark
|
|
|
|
spark_available, _ = check_spark()
|
|
skip_spark = not spark_available
|
|
|
|
pytestmark = [pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests."), pytest.mark.spark]
|
|
|
|
os.environ["FLAML_MAX_CONCURRENT"] = "2"
|
|
|
|
|
|
def run_automl(budget=30, dataset_format="dataframe", hpo_method=None):
|
|
import urllib3
|
|
|
|
from flaml.automl.data import load_openml_dataset
|
|
|
|
performance_check_budget = 3600
|
|
if sys.platform == "darwin" or "nt" in os.name or "3.10" not in sys.version:
|
|
budget = 30 # revise the buget if the platform is not linux + python 3.10
|
|
if budget >= performance_check_budget:
|
|
max_iter = 60
|
|
performance_check_budget = None
|
|
else:
|
|
max_iter = None
|
|
try:
|
|
X_train, X_test, y_train, y_test = load_openml_dataset(
|
|
dataset_id=1169, data_dir="test/", dataset_format=dataset_format
|
|
)
|
|
except (
|
|
OpenMLServerException,
|
|
ChunkedEncodingError,
|
|
urllib3.exceptions.ReadTimeoutError,
|
|
SSLError,
|
|
ServerError,
|
|
Exception,
|
|
) as e:
|
|
print(e)
|
|
return
|
|
|
|
""" import AutoML class from flaml package """
|
|
from flaml import AutoML
|
|
|
|
automl = AutoML()
|
|
settings = {
|
|
"time_budget": budget, # total running time in seconds
|
|
"max_iter": max_iter, # maximum number of iterations
|
|
"metric": "accuracy", # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']
|
|
"task": "classification", # task type
|
|
"log_file_name": "airlines_experiment.log", # flaml log file
|
|
"seed": 7654321, # random seed
|
|
"hpo_method": hpo_method,
|
|
"log_type": "all",
|
|
"estimator_list": [
|
|
"lgbm",
|
|
"xgboost",
|
|
"xgb_limitdepth",
|
|
"rf",
|
|
"extra_tree",
|
|
], # list of ML learners
|
|
"eval_method": "holdout",
|
|
"n_concurrent_trials": 2,
|
|
"use_spark": True,
|
|
}
|
|
|
|
"""The main flaml automl API"""
|
|
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
|
|
|
""" retrieve best config and best learner """
|
|
print("Best ML leaner:", automl.best_estimator)
|
|
print("Best hyperparmeter config:", automl.best_config)
|
|
print(f"Best accuracy on validation data: {1 - automl.best_loss:.4g}")
|
|
if performance_check_budget is not None and automl.best_estimator is None:
|
|
# skip the performance check if no model is trained
|
|
# this happens sometimes in github actions ubuntu python 3.12 environment
|
|
print("Warning: no model is trained, skip performance check")
|
|
return
|
|
print(f"Training duration of best run: {automl.best_config_train_time:.4g} s")
|
|
print(automl.model.estimator)
|
|
print(automl.best_config_per_estimator)
|
|
print("time taken to find best model:", automl.time_to_find_best_model)
|
|
|
|
""" compute predictions of testing dataset """
|
|
y_pred = automl.predict(X_test)
|
|
print("Predicted labels", y_pred)
|
|
print("True labels", y_test)
|
|
y_pred_proba = automl.predict_proba(X_test)[:, 1]
|
|
""" compute different metric values on testing dataset """
|
|
from flaml.automl.ml import sklearn_metric_loss_score
|
|
|
|
accuracy = 1 - sklearn_metric_loss_score("accuracy", y_pred, y_test)
|
|
print("accuracy", "=", accuracy)
|
|
print("roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test))
|
|
print("log_loss", "=", sklearn_metric_loss_score("log_loss", y_pred_proba, y_test))
|
|
if performance_check_budget is None:
|
|
assert accuracy >= 0.669, "the accuracy of flaml should be larger than 0.67"
|
|
|
|
|
|
def test_automl_array():
|
|
run_automl(3, "array", "bs")
|
|
|
|
|
|
def test_automl_performance():
|
|
run_automl(3600)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test_automl_array()
|
|
test_automl_performance()
|