Merged PR 1685054: Add more logs and function wait_futures for easier post analysis (#1438)

- Add function wait_futures for easier post analysis
- Use logger instead of print

----
#### AI description  (iteration 1)
#### PR Classification
A code enhancement for debugging asynchronous mlflow logging and improving post-run analysis.

#### PR Summary
This PR adds detailed debug logging to the mlflow integration and introduces a new `wait_futures` function to streamline the collection of asynchronous task results for improved analysis.
- `flaml/fabric/mlflow.py`: Added debug log statements around starting and ending mlflow runs to trace run IDs and execution flow.
- `flaml/automl/automl.py`: Implemented the `wait_futures` function to handle asynchronous task results and replaced a print call with `logger.info` for consistent logging.
<!-- GitOpsUserAgent=GitOps.Apps.Server.pullrequestcopilot -->

Related work items: #4029592
This commit is contained in:
Li Jiang
2025-05-27 15:32:56 +08:00
committed by GitHub
parent 12183e5f73
commit 22911ea1ef
2 changed files with 20 additions and 1 deletions

View File

@@ -1732,7 +1732,7 @@ class AutoML(BaseEstimator):
if not (mlflow.active_run() is not None or is_autolog_enabled()):
self.mlflow_integration.only_history = True
except KeyError:
print("Not in Fabric, Skipped")
logger.info("Not in Fabric, Skipped")
task.validate_data(
self,
self._state,
@@ -2756,6 +2756,9 @@ class AutoML(BaseEstimator):
)
else:
logger.warning("not retraining because the time budget is too small.")
self.wait_futures()
def wait_futures(self):
if self.mlflow_integration is not None:
logger.debug("Collecting results from submitted record_state tasks")
t1 = time.perf_counter()
@@ -2775,6 +2778,8 @@ class AutoML(BaseEstimator):
logger.warning(f"Exception for log_model task {_task}: {e}")
t2 = time.perf_counter()
logger.debug(f"Collecting results from tasks submitted to executors costs {t2-t1} seconds.")
else:
logger.debug("No futures to wait for.")
def __del__(self):
if (

View File

@@ -516,6 +516,9 @@ class MLflowIntegration:
)
run = mlflow.active_run()
if run and run.info.run_id == self.parent_run_id:
logger.debug(
f"Current active run_id {run.info.run_id} == parent_run_id {self.parent_run_id}, Starting run_id {run_id}"
)
mlflow.start_run(run_id=run_id, nested=True)
elif run and run.info.run_id != run_id:
ret_message = (
@@ -523,7 +526,9 @@ class MLflowIntegration:
)
logger.error(ret_message)
else:
logger.debug(f"No active run, start run_id {run_id}")
mlflow.start_run(run_id=run_id)
logger.debug(f"logged model {estimator} to run_id {mlflow.active_run().info.run_id}")
if estimator.endswith("_spark"):
# mlflow.spark.log_model(model, estimator, signature=signature)
mlflow.spark.log_model(model, "model", signature=signature)
@@ -550,6 +555,7 @@ class MLflowIntegration:
)
self.futures[future] = f"run_{run_id}_requirements_updated"
if not run or run.info.run_id == self.parent_run_id:
logger.debug(f"Ending current run_id {mlflow.active_run().info.run_id}")
mlflow.end_run()
return ret_message
@@ -575,12 +581,19 @@ class MLflowIntegration:
)
run = mlflow.active_run()
if run and run.info.run_id == self.parent_run_id:
logger.debug(
f"Current active run_id {run.info.run_id} == parent_run_id {self.parent_run_id}, Starting run_id {run_id}"
)
mlflow.start_run(run_id=run_id, nested=True)
elif run and run.info.run_id != run_id:
ret_message = f"Error: Should _log_pipeline {flavor_name}:{pipeline_name}:{estimator} model to run_id {run_id}, but logged to run_id {run.info.run_id}"
logger.error(ret_message)
else:
logger.debug(f"No active run, start run_id {run_id}")
mlflow.start_run(run_id=run_id)
logger.debug(
f"logging pipeline {flavor_name}:{pipeline_name}:{estimator} to run_id {mlflow.active_run().info.run_id}"
)
if flavor_name == "sklearn":
mlflow.sklearn.log_model(pipeline, pipeline_name, signature=signature)
elif flavor_name == "spark":
@@ -596,6 +609,7 @@ class MLflowIntegration:
)
self.futures[future] = f"run_{run_id}_requirements_updated"
if not run or run.info.run_id == self.parent_run_id:
logger.debug(f"Ending current run_id {mlflow.active_run().info.run_id}")
mlflow.end_run()
return ret_message