mirror of
https://github.com/microsoft/FLAML.git
synced 2026-02-09 02:09:16 +08:00
* Sync Fabric till 2cd1c3da * Remove synapseml from tag names * Fix 'NoneType' object has no attribute 'DataFrame' * Deprecated 3.8 support * Fix 'NoneType' object has no attribute 'DataFrame' * Still use python 3.8 for pydoc * Don't run tests in parallel * Remove autofe and lowcode
123 lines
4.9 KiB
Python
123 lines
4.9 KiB
Python
import pickle
|
|
|
|
import mlflow
|
|
import mlflow.entities
|
|
import pytest
|
|
from pandas import DataFrame
|
|
from sklearn.datasets import load_iris
|
|
|
|
from flaml import AutoML
|
|
|
|
|
|
class TestMLFlowLoggingParam:
|
|
def test_update_and_install_requirements(self):
|
|
import mlflow
|
|
from sklearn import tree
|
|
|
|
from flaml.fabric.mlflow import update_and_install_requirements
|
|
|
|
with mlflow.start_run(run_name="test") as run:
|
|
sk_model = tree.DecisionTreeClassifier()
|
|
mlflow.sklearn.log_model(sk_model, "model", registered_model_name="test")
|
|
|
|
update_and_install_requirements(run_id=run.info.run_id)
|
|
|
|
def test_should_start_new_run_by_default(self, automl_settings):
|
|
with mlflow.start_run() as parent_run:
|
|
automl = AutoML()
|
|
X_train, y_train = load_iris(return_X_y=True)
|
|
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
|
try:
|
|
self._check_mlflow_parameters(automl, parent_run.info)
|
|
except FileNotFoundError:
|
|
print("[WARNING]: No file found")
|
|
|
|
children = self._get_child_runs(parent_run)
|
|
assert len(children) >= 1, f"Expected at least 1 child run, got {len(children)}"
|
|
|
|
def test_should_not_start_new_run_when_mlflow_logging_set_to_false_in_init(self, automl_settings):
|
|
with mlflow.start_run() as parent_run:
|
|
automl = AutoML(mlflow_logging=False)
|
|
X_train, y_train = load_iris(return_X_y=True)
|
|
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
|
try:
|
|
self._check_mlflow_parameters(automl, parent_run.info)
|
|
except FileNotFoundError:
|
|
print("[WARNING]: No file found")
|
|
|
|
children = self._get_child_runs(parent_run)
|
|
assert len(children) == 0, f"Expected 0 child runs, got {len(children)}"
|
|
|
|
def test_should_not_start_new_run_when_mlflow_logging_set_to_false_in_fit(self, automl_settings):
|
|
with mlflow.start_run() as parent_run:
|
|
automl = AutoML()
|
|
X_train, y_train = load_iris(return_X_y=True)
|
|
automl.fit(X_train=X_train, y_train=y_train, mlflow_logging=False, **automl_settings)
|
|
try:
|
|
self._check_mlflow_parameters(automl, parent_run.info)
|
|
except FileNotFoundError:
|
|
print("[WARNING]: No file found")
|
|
|
|
children = self._get_child_runs(parent_run)
|
|
assert len(children) == 0, f"Expected 0 child runs, got {len(children)}"
|
|
|
|
def test_should_start_new_run_when_mlflow_logging_set_to_true_in_fit(self, automl_settings):
|
|
with mlflow.start_run() as parent_run:
|
|
automl = AutoML(mlflow_logging=False)
|
|
X_train, y_train = load_iris(return_X_y=True)
|
|
automl.fit(X_train=X_train, y_train=y_train, mlflow_logging=True, **automl_settings)
|
|
try:
|
|
self._check_mlflow_parameters(automl, parent_run.info)
|
|
except FileNotFoundError:
|
|
print("[WARNING]: No file found")
|
|
|
|
children = self._get_child_runs(parent_run)
|
|
assert len(children) >= 1, f"Expected at least 1 child run, got {len(children)}"
|
|
|
|
@staticmethod
|
|
def _get_child_runs(parent_run: mlflow.entities.Run) -> DataFrame:
|
|
experiment_id = parent_run.info.experiment_id
|
|
return mlflow.search_runs(
|
|
[experiment_id], filter_string=f"tags.mlflow.parentRunId = '{parent_run.info.run_id}'"
|
|
)
|
|
|
|
@staticmethod
|
|
def _check_mlflow_parameters(automl: AutoML, run_info: mlflow.entities.RunInfo):
|
|
with open(
|
|
f"./mlruns/{run_info.experiment_id}/{run_info.run_id}/artifacts/automl_pipeline/model.pkl", "rb"
|
|
) as f:
|
|
t = pickle.load(f)
|
|
if __name__ == "__main__":
|
|
print(t)
|
|
if not hasattr(automl.model._model, "_get_param_names"):
|
|
return
|
|
for param in automl.model._model._get_param_names():
|
|
assert eval("t._final_estimator._model" + f".{param}") == eval(
|
|
"automl.model._model" + f".{param}"
|
|
), "The mlflow logging not consistent with automl model"
|
|
if __name__ == "__main__":
|
|
print(param, "\t", eval("automl.model._model" + f".{param}"))
|
|
print("[INFO]: Successfully Logged")
|
|
|
|
@pytest.fixture(scope="class")
|
|
def automl_settings(self):
|
|
mlflow.end_run()
|
|
return {
|
|
"time_budget": 5, # in seconds
|
|
"metric": "accuracy",
|
|
"task": "classification",
|
|
"log_file_name": "iris.log",
|
|
}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
s = TestMLFlowLoggingParam()
|
|
automl_settings = {
|
|
"time_budget": 5, # in seconds
|
|
"metric": "accuracy",
|
|
"task": "classification",
|
|
"log_file_name": "iris.log",
|
|
}
|
|
s.test_should_start_new_run_by_default(automl_settings)
|
|
s.test_should_start_new_run_when_mlflow_logging_set_to_true_in_fit(automl_settings)
|