Files
FLAML/test/automl/test_mlflow.py
Li Jiang c2b25310fc Sync Fabric till 2cd1c3da (#1433)
* Sync Fabric till 2cd1c3da

* Remove synapseml from tag names

* Fix 'NoneType' object has no attribute 'DataFrame'

* Deprecated 3.8 support

* Fix 'NoneType' object has no attribute 'DataFrame'

* Still use python 3.8 for pydoc

* Don't run tests in parallel

* Remove autofe and lowcode
2025-05-23 10:19:31 +08:00

123 lines
4.9 KiB
Python

import pickle
import mlflow
import mlflow.entities
import pytest
from pandas import DataFrame
from sklearn.datasets import load_iris
from flaml import AutoML
class TestMLFlowLoggingParam:
def test_update_and_install_requirements(self):
import mlflow
from sklearn import tree
from flaml.fabric.mlflow import update_and_install_requirements
with mlflow.start_run(run_name="test") as run:
sk_model = tree.DecisionTreeClassifier()
mlflow.sklearn.log_model(sk_model, "model", registered_model_name="test")
update_and_install_requirements(run_id=run.info.run_id)
def test_should_start_new_run_by_default(self, automl_settings):
with mlflow.start_run() as parent_run:
automl = AutoML()
X_train, y_train = load_iris(return_X_y=True)
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
try:
self._check_mlflow_parameters(automl, parent_run.info)
except FileNotFoundError:
print("[WARNING]: No file found")
children = self._get_child_runs(parent_run)
assert len(children) >= 1, f"Expected at least 1 child run, got {len(children)}"
def test_should_not_start_new_run_when_mlflow_logging_set_to_false_in_init(self, automl_settings):
with mlflow.start_run() as parent_run:
automl = AutoML(mlflow_logging=False)
X_train, y_train = load_iris(return_X_y=True)
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
try:
self._check_mlflow_parameters(automl, parent_run.info)
except FileNotFoundError:
print("[WARNING]: No file found")
children = self._get_child_runs(parent_run)
assert len(children) == 0, f"Expected 0 child runs, got {len(children)}"
def test_should_not_start_new_run_when_mlflow_logging_set_to_false_in_fit(self, automl_settings):
with mlflow.start_run() as parent_run:
automl = AutoML()
X_train, y_train = load_iris(return_X_y=True)
automl.fit(X_train=X_train, y_train=y_train, mlflow_logging=False, **automl_settings)
try:
self._check_mlflow_parameters(automl, parent_run.info)
except FileNotFoundError:
print("[WARNING]: No file found")
children = self._get_child_runs(parent_run)
assert len(children) == 0, f"Expected 0 child runs, got {len(children)}"
def test_should_start_new_run_when_mlflow_logging_set_to_true_in_fit(self, automl_settings):
with mlflow.start_run() as parent_run:
automl = AutoML(mlflow_logging=False)
X_train, y_train = load_iris(return_X_y=True)
automl.fit(X_train=X_train, y_train=y_train, mlflow_logging=True, **automl_settings)
try:
self._check_mlflow_parameters(automl, parent_run.info)
except FileNotFoundError:
print("[WARNING]: No file found")
children = self._get_child_runs(parent_run)
assert len(children) >= 1, f"Expected at least 1 child run, got {len(children)}"
@staticmethod
def _get_child_runs(parent_run: mlflow.entities.Run) -> DataFrame:
experiment_id = parent_run.info.experiment_id
return mlflow.search_runs(
[experiment_id], filter_string=f"tags.mlflow.parentRunId = '{parent_run.info.run_id}'"
)
@staticmethod
def _check_mlflow_parameters(automl: AutoML, run_info: mlflow.entities.RunInfo):
with open(
f"./mlruns/{run_info.experiment_id}/{run_info.run_id}/artifacts/automl_pipeline/model.pkl", "rb"
) as f:
t = pickle.load(f)
if __name__ == "__main__":
print(t)
if not hasattr(automl.model._model, "_get_param_names"):
return
for param in automl.model._model._get_param_names():
assert eval("t._final_estimator._model" + f".{param}") == eval(
"automl.model._model" + f".{param}"
), "The mlflow logging not consistent with automl model"
if __name__ == "__main__":
print(param, "\t", eval("automl.model._model" + f".{param}"))
print("[INFO]: Successfully Logged")
@pytest.fixture(scope="class")
def automl_settings(self):
mlflow.end_run()
return {
"time_budget": 5, # in seconds
"metric": "accuracy",
"task": "classification",
"log_file_name": "iris.log",
}
if __name__ == "__main__":
s = TestMLFlowLoggingParam()
automl_settings = {
"time_budget": 5, # in seconds
"metric": "accuracy",
"task": "classification",
"log_file_name": "iris.log",
}
s.test_should_start_new_run_by_default(automl_settings)
s.test_should_start_new_run_when_mlflow_logging_set_to_true_in_fit(automl_settings)