Merged PR 1685054: Add more logs and function wait_futures for easier post analysis (#1438)

- Add function wait_futures for easier post analysis - Use logger instead of print ---- #### AI description (iteration 1) #### PR Classification A code enhancement for debugging asynchronous mlflow logging and improving post-run analysis. #### PR Summary This PR adds detailed debug logging to the mlflow integration and introduces a new `wait_futures` function to streamline the collection of asynchronous task results for improved analysis. - `flaml/fabric/mlflow.py`: Added debug log statements around starting and ending mlflow runs to trace run IDs and execution flow. - `flaml/automl/automl.py`: Implemented the `wait_futures` function to handle asynchronous task results and replaced a print call with `logger.info` for consistent logging.  Related work items: #4029592
2026-02-09 02:09:16 +08:00 · 2025-05-27 15:32:56 +08:00
parent 12183e5f73
commit 22911ea1ef
2 changed files with 20 additions and 1 deletions
--- a/flaml/automl/automl.py
+++ b/flaml/automl/automl.py
@@ -1732,7 +1732,7 @@ class AutoML(BaseEstimator):
                if not (mlflow.active_run() is not None or is_autolog_enabled()):
                    self.mlflow_integration.only_history = True
            except KeyError:
-                print("Not in Fabric, Skipped")
+                logger.info("Not in Fabric, Skipped")
        task.validate_data(
            self,
            self._state,
@@ -2756,6 +2756,9 @@ class AutoML(BaseEstimator):
                                    )
                else:
                    logger.warning("not retraining because the time budget is too small.")
+        self.wait_futures()
+
+    def wait_futures(self):
        if self.mlflow_integration is not None:
            logger.debug("Collecting results from submitted record_state tasks")
            t1 = time.perf_counter()
@@ -2775,6 +2778,8 @@ class AutoML(BaseEstimator):
                    logger.warning(f"Exception for log_model task {_task}: {e}")
            t2 = time.perf_counter()
            logger.debug(f"Collecting results from tasks submitted to executors costs {t2-t1} seconds.")
+        else:
+            logger.debug("No futures to wait for.")

    def __del__(self):
        if (
--- a/flaml/fabric/mlflow.py
+++ b/flaml/fabric/mlflow.py
@@ -516,6 +516,9 @@ class MLflowIntegration:
        )
        run = mlflow.active_run()
        if run and run.info.run_id == self.parent_run_id:
+            logger.debug(
+                f"Current active run_id {run.info.run_id} == parent_run_id {self.parent_run_id}, Starting run_id {run_id}"
+            )
            mlflow.start_run(run_id=run_id, nested=True)
        elif run and run.info.run_id != run_id:
            ret_message = (
@@ -523,7 +526,9 @@ class MLflowIntegration:
            )
            logger.error(ret_message)
        else:
+            logger.debug(f"No active run, start run_id {run_id}")
            mlflow.start_run(run_id=run_id)
+        logger.debug(f"logged model {estimator} to run_id {mlflow.active_run().info.run_id}")
        if estimator.endswith("_spark"):
            # mlflow.spark.log_model(model, estimator, signature=signature)
            mlflow.spark.log_model(model, "model", signature=signature)
@@ -550,6 +555,7 @@ class MLflowIntegration:
        )
        self.futures[future] = f"run_{run_id}_requirements_updated"
        if not run or run.info.run_id == self.parent_run_id:
+            logger.debug(f"Ending current run_id {mlflow.active_run().info.run_id}")
            mlflow.end_run()
        return ret_message

@@ -575,12 +581,19 @@ class MLflowIntegration:
        )
        run = mlflow.active_run()
        if run and run.info.run_id == self.parent_run_id:
+            logger.debug(
+                f"Current active run_id {run.info.run_id} == parent_run_id {self.parent_run_id}, Starting run_id {run_id}"
+            )
            mlflow.start_run(run_id=run_id, nested=True)
        elif run and run.info.run_id != run_id:
            ret_message = f"Error: Should _log_pipeline {flavor_name}:{pipeline_name}:{estimator} model to run_id {run_id}, but logged to run_id {run.info.run_id}"
            logger.error(ret_message)
        else:
+            logger.debug(f"No active run, start run_id {run_id}")
            mlflow.start_run(run_id=run_id)
+        logger.debug(
+            f"logging pipeline {flavor_name}:{pipeline_name}:{estimator} to run_id {mlflow.active_run().info.run_id}"
+        )
        if flavor_name == "sklearn":
            mlflow.sklearn.log_model(pipeline, pipeline_name, signature=signature)
        elif flavor_name == "spark":
@@ -596,6 +609,7 @@ class MLflowIntegration:
        )
        self.futures[future] = f"run_{run_id}_requirements_updated"
        if not run or run.info.run_id == self.parent_run_id:
+            logger.debug(f"Ending current run_id {mlflow.active_run().info.run_id}")
            mlflow.end_run()
        return ret_message