Fix issue with "list index out of range" when max_iter=1 (#1419)

This commit is contained in:
Stickic-cyber
2025-04-09 21:54:17 +08:00
committed by GitHub
parent 437c239c11
commit 468bc62d27
2 changed files with 66 additions and 0 deletions

View File

@@ -2529,6 +2529,21 @@ class AutoML(BaseEstimator):
self._selected = state = self._search_states[estimator]
state.best_config_sample_size = self._state.data_size[0]
state.best_config = state.init_config[0] if state.init_config else {}
self._track_iter = 0
self._config_history[self._track_iter] = (estimator, state.best_config, self._state.time_from_start)
self._best_iteration = self._track_iter
state.val_loss = getattr(state, "val_loss", float("inf"))
state.best_loss = getattr(state, "best_loss", float("inf"))
state.config = getattr(state, "config", state.best_config.copy())
state.metric_for_logging = getattr(state, "metric_for_logging", None)
state.sample_size = getattr(state, "sample_size", self._state.data_size[0])
state.learner_class = getattr(state, "learner_class", self._state.learner_classes.get(estimator))
if hasattr(self, "mlflow_integration") and self.mlflow_integration:
self.mlflow_integration.record_state(
automl=self,
search_state=state,
estimator=estimator,
)
elif self._use_ray is False and self._use_spark is False:
self._search_sequential()
else:

View File

@@ -0,0 +1,51 @@
import mlflow
import numpy as np
import pandas as pd
from flaml import AutoML
def test_max_iter_1():
date_rng = pd.date_range(start="2024-01-01", periods=100, freq="H")
X = pd.DataFrame({"ds": date_rng})
y_train_24h = np.random.rand(len(X)) * 100
# AutoML
settings = {
"max_iter": 1,
"estimator_list": ["xgboost", "lgbm"],
"starting_points": {"xgboost": {}, "lgbm": {}},
"task": "ts_forecast",
"log_file_name": "test_max_iter_1.log",
"seed": 41,
"mlflow_exp_name": "TestExp-max_iter-1",
"use_spark": False,
"n_concurrent_trials": 1,
"verbose": 1,
"featurization": "off",
"metric": "rmse",
"mlflow_logging": True,
}
automl = AutoML(**settings)
with mlflow.start_run(run_name="AutoMLModel-XGBoost-and-LGBM-max_iter_1"):
automl.fit(
X_train=X,
y_train=y_train_24h,
period=24,
X_val=X,
y_val=y_train_24h,
split_ratio=0,
force_cancel=False,
)
assert automl.model is not None, "AutoML failed to return a model"
assert automl.best_run_id is not None, "Best run ID should not be None with mlflow logging"
print("Best model:", automl.model)
print("Best run ID:", automl.best_run_id)
if __name__ == "__main__":
test_max_iter_1()