Fix log_training_metric causing IndexError for time series models (#1469)

Co-authored-by: Li Jiang <lijiang1@microsoft.com>
2026-02-09 02:09:16 +08:00 · 2026-01-10 18:07:17 +08:00
parent 1c9835dc0a
commit 0b138d9193
5 changed files with 88 additions and 7 deletions
--- a/flaml/automl/ml.py
+++ b/flaml/automl/ml.py
@@ -616,7 +616,12 @@ def _eval_estimator(
            logger.warning(f"ValueError {e} happened in `metric_loss_score`, set `val_loss` to `np.inf`")
        metric_for_logging = {"pred_time": pred_time}
        if log_training_metric:
-            train_pred_y = get_y_pred(estimator, X_train, eval_metric, task)
+            # For time series forecasting, X_train may be a sampled dataset whose
+            # test partition can be empty. Use the training partition from X_val
+            # (which is the dataset used to define y_train above) to keep shapes
+            # aligned and avoid empty prediction inputs.
+            X_train_for_metric = X_val.X_train if isinstance(X_val, TimeSeriesDataset) else X_train
+            train_pred_y = get_y_pred(estimator, X_train_for_metric, eval_metric, task)
            metric_for_logging["train_loss"] = metric_loss_score(
                eval_metric,
                train_pred_y,
--- a/flaml/automl/time_series/tcn.py
+++ b/flaml/automl/time_series/tcn.py
@@ -264,7 +264,8 @@ class TCNEstimator(TimeSeriesEstimator):
    def predict(self, X):
        X = self.enrich(X)
        if isinstance(X, TimeSeriesDataset):
-            df = X.X_val
+            # Use X_train if X_val is empty (e.g., when computing training metrics)
+            df = X.X_val if len(X.test_data) > 0 else X.X_train
        else:
            df = X
        dataset = DataframeDataset(
--- a/flaml/automl/time_series/tft.py
+++ b/flaml/automl/time_series/tft.py
@@ -197,7 +197,11 @@ class TemporalFusionTransformerEstimator(TimeSeriesEstimator):
        last_data_cols = self.group_ids.copy()
        last_data_cols.append(self.target_names[0])
        last_data = self.data[lambda x: x.time_idx == x.time_idx.max()][last_data_cols]
-        decoder_data = X.X_val if isinstance(X, TimeSeriesDataset) else X
+        # Use X_train if test_data is empty (e.g., when computing training metrics)
+        if isinstance(X, TimeSeriesDataset):
+            decoder_data = X.X_val if len(X.test_data) > 0 else X.X_train
+        else:
+            decoder_data = X
        if "time_idx" not in decoder_data:
            decoder_data = add_time_idx_col(decoder_data)
        decoder_data["time_idx"] += encoder_data["time_idx"].max() + 1 - decoder_data["time_idx"].min()
--- a/flaml/automl/time_series/ts_model.py
+++ b/flaml/automl/time_series/ts_model.py
@@ -194,7 +194,13 @@ class Orbit(TimeSeriesEstimator):

        elif isinstance(X, TimeSeriesDataset):
            data = X
-            X = data.test_data[[self.time_col] + X.regressors]
+            # By default we predict on the dataset's test partition.
+            # Some internal call paths (e.g., training-metric logging) may pass a
+            # dataset whose test partition is empty; fall back to train partition.
+            if data.test_data is not None and len(data.test_data):
+                X = data.test_data[data.regressors + [data.time_col]]
+            else:
+                X = data.train_data[data.regressors + [data.time_col]]

        if self._model is not None:
            forecast = self._model.predict(X, **kwargs)
@@ -301,7 +307,13 @@ class Prophet(TimeSeriesEstimator):

        if isinstance(X, TimeSeriesDataset):
            data = X
+            # By default we predict on the dataset's test partition.
+            # Some internal call paths (e.g., training-metric logging) may pass a
+            # dataset whose test partition is empty; fall back to train partition.
+            if data.test_data is not None and len(data.test_data):
                X = data.test_data[data.regressors + [data.time_col]]
+            else:
+                X = data.train_data[data.regressors + [data.time_col]]

        X = X.rename(columns={self.time_col: "ds"})
        if self._model is not None:
@@ -327,11 +339,19 @@ class StatsModelsEstimator(TimeSeriesEstimator):

        if isinstance(X, TimeSeriesDataset):
            data = X
+            # By default we predict on the dataset's test partition.
+            # Some internal call paths (e.g., training-metric logging) may pass a
+            # dataset whose test partition is empty; fall back to train partition.
+            if data.test_data is not None and len(data.test_data):
                X = data.test_data[data.regressors + [data.time_col]]
+            else:
+                X = data.train_data[data.regressors + [data.time_col]]
        else:
            X = X[self.regressors + [self.time_col]]

        if isinstance(X, DataFrame):
+            if X.shape[0] == 0:
+                return pd.Series([], name=self.target_names[0], dtype=float)
            start = X[self.time_col].iloc[0]
            end = X[self.time_col].iloc[-1]
            if len(self.regressors):
@@ -829,6 +849,13 @@ class TS_SKLearn(TimeSeriesEstimator):
        if isinstance(X, TimeSeriesDataset):
            data = X
            X = data.test_data
+            # By default we predict on the dataset's test partition.
+            # Some internal call paths (e.g., training-metric logging) may pass a
+            # dataset whose test partition is empty; fall back to train partition.
+            if data.test_data is not None and len(data.test_data):
+                X = data.test_data
+            else:
+                X = data.train_data

        if self._model is not None:
            X = X[self.regressors]
--- a/test/automl/test_forecast.py
+++ b/test/automl/test_forecast.py
@@ -681,11 +681,55 @@ def test_cv_step():
    print("yahoo!")


+def test_log_training_metric_ts_models():
+    """Test that log_training_metric=True works with time series models (arima, sarimax, holt-winters)."""
+    import statsmodels.api as sm
+
+    from flaml.automl.task.time_series_task import TimeSeriesTask
+
+    estimators_all = TimeSeriesTask("forecast").estimators.keys()
+    estimators_to_test = ["xgboost", "arima", "lassolars", "tcn", "snaive", "prophet", "orbit"]
+    estimators = [
+        est for est in estimators_to_test if est in estimators_all
+    ]  # not all estimators available in current python env
+    print(f"Testing estimators: {estimators}")
+
+    # Prepare data
+    data = sm.datasets.co2.load_pandas().data["co2"]
+    data = data.resample("MS").mean()
+    data = data.bfill().ffill()
+    data = data.to_frame().reset_index()
+    data = data.rename(columns={"index": "ds", "co2": "y"})
+    num_samples = data.shape[0]
+    time_horizon = 12
+    split_idx = num_samples - time_horizon
+    df = data[:split_idx]
+
+    # Test each time series model with log_training_metric=True
+    for estimator in estimators:
+        print(f"\nTesting {estimator} with log_training_metric=True")
+        automl = AutoML()
+        settings = {
+            "time_budget": 3,
+            "metric": "mape",
+            "task": "forecast",
+            "eval_method": "holdout",
+            "label": "y",
+            "log_training_metric": True,  # This should not cause errors
+            "estimator_list": [estimator],
+        }
+        automl.fit(dataframe=df, **settings, period=time_horizon, force_cancel=True)
+        print(f"  ✅ {estimator} SUCCESS with log_training_metric=True")
+        if automl.best_estimator:
+            assert automl.best_estimator == estimator
+
+
 if __name__ == "__main__":
    # test_forecast_automl(60)
    # test_multivariate_forecast_num(5)
    # test_multivariate_forecast_cat(5)
-    test_numpy()
+    # test_numpy()
    # test_forecast_classification(5)
    # test_forecast_panel(5)
    # test_cv_step()
+    test_log_training_metric_ts_models()