mirror of
https://github.com/microsoft/FLAML.git
synced 2026-02-09 02:09:16 +08:00
* Merged PR 1686010: Bump version to 2.3.5.post2, Distribute source and wheel, Fix license-file, Only log better models
- Fix license-file
- Bump version to 2.3.5.post2
- Distribute source and wheel
- Log better models only
- Add artifact_path to register_automl_pipeline
- Improve logging of _automl_user_configurations
----
This pull request fixes the project’s configuration by updating the license metadata for compliance with FLAML OSS 2.3.5.
The changes in `/pyproject.toml` update the project’s license and readme metadata by replacing deprecated keys with the new structured fields.
- `/pyproject.toml`: Replaced `license_file` with `license = { text = "MIT" }`.
- `/pyproject.toml`: Replaced `description-file` with `readme = "README.md"`.
<!-- GitOpsUserAgent=GitOps.Apps.Server.pullrequestcopilot -->
Related work items: #4252053
* Merged PR 1688479: Handle feature_importances_ is None, Catch RuntimeError and wait for spark cluster to recover
- Add warning message when feature_importances_ is None (#3982120)
- Catch RuntimeError and wait for spark cluster to recover (#3982133)
----
Bug fix.
This pull request prevents an AttributeError in the feature importance plotting function by adding a check for a `None` value with an informative warning message.
- `flaml/fabric/visualization.py`: Checks if `result.feature_importances_` is `None`, logs a warning with possible reasons, and returns early.
- `flaml/fabric/visualization.py`: Imports `logger` from `flaml.automl.logger` to support the warning message.
<!-- GitOpsUserAgent=GitOps.Apps.Server.pullrequestcopilot -->
Related work items: #3982120, #3982133
* Removed deprecated metadata section
* Fix log_params, log_artifact doesn't support run_id in mlflow 2.6.0
* Remove autogen
* Remove autogen
* Remove autogen
* Merged PR 1776547: Fix flaky test test_automl
Don't throw error when time budget is not enough
----
#### AI description (iteration 1)
#### PR Classification
Bug fix addressing a failing test in the AutoML notebook example.
#### PR Summary
This PR fixes a flaky test by adding a conditional check in the AutoML test that prints a message and exits early if no best estimator is set, thereby preventing unpredictable test failures.
- `test/automl/test_notebook_example.py`: Introduced a check to print "Training budget is not sufficient" and return if `automl.best_estimator` is not found.
<!-- GitOpsUserAgent=GitOps.Apps.Server.pullrequestcopilot -->
Related work items: #4573514
* Merged PR 1777952: Fix unrecognized or malformed field 'license-file' when uploading wheel to feed
Try to fix InvalidDistribution: Invalid distribution metadata: unrecognized or malformed field 'license-file'
----
Bug fix addressing package metadata configuration.
This pull request fixes the error with unrecognized or malformed license file fields during wheel uploads by updating the setup configuration.
- In `setup.py`, added `license="MIT"` and `license_files=["LICENSE"]` to provide proper license metadata.
<!-- GitOpsUserAgent=GitOps.Apps.Server.pullrequestcopilot -->
Related work items: #4560034
* Cherry-pick Merged PR 1879296: Add support to python 3.12 and spark 4.0
* Cherry-pick Merged PR 1890869: Improve time_budget estimation for mlflow logging
* Cherry-pick Merged PR 1879296: Add support to python 3.12 and spark 4.0
* Disable openai workflow
* Add python 3.12 to test envs
* Manually trigger openai
* Support markdown files with underscore-prefixed file names
* Improve save dependencies
* SynapseML is not installed
* Fix syntax error:Module !flaml/autogen was never imported
* macos 3.12 also hangs
* fix syntax error
* Update python version in actions
* Install setuptools for using pkg_resources
* Fix test_automl_performance in Github actions
* Fix test_nested_run
244 lines
7.7 KiB
Python
244 lines
7.7 KiB
Python
import os
|
|
import shutil
|
|
import sys
|
|
|
|
import pytest
|
|
from utils import get_automl_settings, get_toy_data_seqclassification
|
|
|
|
from flaml.default import portfolio
|
|
|
|
try:
|
|
import transformers
|
|
|
|
_transformers_installed = True
|
|
except ImportError:
|
|
_transformers_installed = False
|
|
|
|
if (
|
|
sys.platform.startswith("darwin")
|
|
and sys.version_info >= (3, 11)
|
|
or not _transformers_installed
|
|
or sys.platform == "win32"
|
|
):
|
|
pytest.skip("skipping Python 3.11 on MacOS or without transformers or on Windows", allow_module_level=True)
|
|
|
|
pytestmark = (
|
|
pytest.mark.spark
|
|
) # set to spark as parallel testing raised ValueError: Feature NonExisting not implemented.
|
|
|
|
|
|
def pop_args(fit_kwargs):
|
|
fit_kwargs.pop("max_iter", None)
|
|
fit_kwargs.pop("use_ray", None)
|
|
fit_kwargs.pop("estimator_list", None)
|
|
fit_kwargs.pop("time_budget", None)
|
|
fit_kwargs.pop("log_file_name", None)
|
|
|
|
|
|
def test_build_portfolio(path="./test/nlp/default", strategy="greedy"):
|
|
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task seq-classification --estimator transformer_ms --strategy {strategy}".split()
|
|
portfolio.main()
|
|
|
|
|
|
def test_starting_point_not_in_search_space():
|
|
"""Regression test for invalid starting points and custom_hp.
|
|
|
|
This test must not require network access to Hugging Face.
|
|
"""
|
|
|
|
"""
|
|
test starting_points located outside of the search space, and custom_hp is not set
|
|
"""
|
|
from flaml.automl.state import SearchState
|
|
from flaml.automl.task.factory import task_factory
|
|
|
|
this_estimator_name = "transformer"
|
|
X_train, y_train, _, _, _ = get_toy_data_seqclassification()
|
|
task = task_factory("seq-classification", X_train, y_train)
|
|
estimator_class = task.estimator_class_from_str(this_estimator_name)
|
|
estimator_class.init()
|
|
|
|
# SearchState is where invalid starting points are filtered out when max_iter > 1.
|
|
search_state = SearchState(
|
|
learner_class=estimator_class,
|
|
data=X_train,
|
|
task=task,
|
|
starting_point={"learning_rate": 2e-3},
|
|
max_iter=3,
|
|
budget=10,
|
|
)
|
|
assert search_state.init_config and search_state.init_config[0].get("learning_rate") != 2e-3
|
|
|
|
"""
|
|
test starting_points located outside of the search space, and custom_hp is set
|
|
"""
|
|
|
|
from flaml import tune
|
|
|
|
X_train, y_train, _, _, _ = get_toy_data_seqclassification()
|
|
|
|
this_estimator_name = "transformer_ms"
|
|
task = task_factory("seq-classification", X_train, y_train)
|
|
estimator_class = task.estimator_class_from_str(this_estimator_name)
|
|
estimator_class.init()
|
|
|
|
custom_hp = {
|
|
"model_path": {
|
|
"domain": "albert-base-v2",
|
|
},
|
|
"learning_rate": {
|
|
"domain": tune.choice([1e-4, 1e-5]),
|
|
},
|
|
"per_device_train_batch_size": {
|
|
"domain": 2,
|
|
},
|
|
}
|
|
|
|
# Simulate a suggested starting point (e.g. from portfolio) which becomes invalid
|
|
# after custom_hp constrains the space.
|
|
invalid_starting_points = [
|
|
{
|
|
"learning_rate": 1e-5,
|
|
"num_train_epochs": 1.0,
|
|
"per_device_train_batch_size": 8,
|
|
"seed": 43,
|
|
"global_max_steps": 100,
|
|
"model_path": "google/electra-base-discriminator",
|
|
}
|
|
]
|
|
|
|
search_state = SearchState(
|
|
learner_class=estimator_class,
|
|
data=X_train,
|
|
task=task,
|
|
starting_point=invalid_starting_points,
|
|
custom_hp=custom_hp,
|
|
max_iter=3,
|
|
budget=10,
|
|
)
|
|
|
|
assert search_state.init_config, "Expected a non-empty init_config list"
|
|
init_config0 = search_state.init_config[0]
|
|
assert init_config0 is not None
|
|
assert len(init_config0) == len(search_state._search_space_domain) - len(custom_hp), (
|
|
"The search space is updated with the custom_hp on {} hyperparameters of "
|
|
"the specified estimator without an initial value. Thus a valid init config "
|
|
"should only contain the cardinality of the search space minus {}".format(
|
|
len(custom_hp),
|
|
len(custom_hp),
|
|
)
|
|
)
|
|
assert search_state.search_space["model_path"] == "albert-base-v2"
|
|
|
|
if os.path.exists("test/data/output/"):
|
|
try:
|
|
shutil.rmtree("test/data/output/")
|
|
except PermissionError:
|
|
print("PermissionError when deleting test/data/output/")
|
|
|
|
|
|
def test_points_to_evaluate():
|
|
from flaml import AutoML
|
|
|
|
X_train, y_train, X_val, y_val, _ = get_toy_data_seqclassification()
|
|
|
|
automl = AutoML()
|
|
automl_settings = get_automl_settings(estimator_name="transformer_ms")
|
|
|
|
automl_settings["starting_points"] = "data:test/nlp/default/"
|
|
|
|
automl_settings["custom_hp"] = {"transformer_ms": {"model_path": {"domain": "google/electra-small-discriminator"}}}
|
|
|
|
try:
|
|
automl.fit(X_train, y_train, **automl_settings)
|
|
except OSError as e:
|
|
message = str(e)
|
|
if "Too Many Requests" in message or "rate limit" in message.lower():
|
|
pytest.skip(f"Skipping HF model load/training: {message}")
|
|
raise
|
|
|
|
if os.path.exists("test/data/output/"):
|
|
try:
|
|
shutil.rmtree("test/data/output/")
|
|
except PermissionError:
|
|
print("PermissionError when deleting test/data/output/")
|
|
|
|
|
|
# TODO: implement _test_zero_shot_model
|
|
def test_zero_shot_nomodel():
|
|
from flaml.default import preprocess_and_suggest_hyperparams
|
|
|
|
estimator_name = "transformer_ms"
|
|
|
|
location = "test/nlp/default"
|
|
X_train, y_train, X_val, y_val, X_test = get_toy_data_seqclassification()
|
|
|
|
automl_settings = get_automl_settings(estimator_name)
|
|
|
|
(
|
|
hyperparams,
|
|
estimator_class,
|
|
X_train,
|
|
y_train,
|
|
_,
|
|
_,
|
|
) = preprocess_and_suggest_hyperparams("seq-classification", X_train, y_train, estimator_name, location=location)
|
|
|
|
model = estimator_class(**hyperparams) # estimator_class is TransformersEstimatorModelSelection
|
|
|
|
fit_kwargs = automl_settings.pop("fit_kwargs_by_estimator", {}).get(estimator_name)
|
|
fit_kwargs.update(automl_settings)
|
|
pop_args(fit_kwargs)
|
|
|
|
try:
|
|
model.fit(X_train, y_train, **fit_kwargs)
|
|
except OSError as e:
|
|
message = str(e)
|
|
if "Too Many Requests" in message or "rate limit" in message.lower():
|
|
pytest.skip(f"Skipping HF model load/training: {message}")
|
|
raise
|
|
|
|
if os.path.exists("test/data/output/"):
|
|
try:
|
|
shutil.rmtree("test/data/output/")
|
|
except PermissionError:
|
|
print("PermissionError when deleting test/data/output/")
|
|
|
|
|
|
def test_build_error_portfolio(path="./test/nlp/default", strategy="greedy"):
|
|
import os
|
|
|
|
os.remove("./test/nlp/default/transformer_ms/seq-classification.json")
|
|
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures_err.csv --task seq-classification --estimator transformer_ms --strategy {strategy}".split()
|
|
portfolio.main()
|
|
|
|
from flaml.default import preprocess_and_suggest_hyperparams
|
|
|
|
estimator_name = "transformer_ms"
|
|
|
|
location = "test/nlp/default"
|
|
X_train, y_train, X_val, y_val, X_test = get_toy_data_seqclassification()
|
|
|
|
try:
|
|
(
|
|
hyperparams,
|
|
estimator_class,
|
|
X_train,
|
|
y_train,
|
|
_,
|
|
_,
|
|
) = preprocess_and_suggest_hyperparams(
|
|
"seq-classification", X_train, y_train, estimator_name, location=location
|
|
)
|
|
except ValueError:
|
|
print("Feature not implemented")
|
|
|
|
import os
|
|
import shutil
|
|
|
|
if os.path.exists("test/data/output/"):
|
|
try:
|
|
shutil.rmtree("test/data/output/")
|
|
except PermissionError:
|
|
print("PermissionError when deleting test/data/output/")
|