Spaces:

bosh94
/

chronos2-test-app

Running

App Files Files Community

bosh94 commited on 7 days ago

Commit

0718fad

verified ·

1 Parent(s): f79bf21

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -141

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import os
-import inspect
 import numpy as np
 import pandas as pd
 import gradio as gr
@@ -8,54 +7,29 @@ import torch
 from chronos import Chronos2Pipeline
 # =========================
 # Config
 # =========================
 MODEL_ID_DEFAULT = os.getenv("CHRONOS_MODEL_ID", "amazon/chronos-2")
 DATA_DIR = "data"
 # =========================
-# Helpers: files & device
 # =========================
 def available_test_csv():
     if not os.path.isdir(DATA_DIR):
         return []
     return sorted(f for f in os.listdir(DATA_DIR) if f.lower().endswith(".csv"))
 def pick_device(ui_choice: str) -> str:
     if (ui_choice or "").startswith("cuda") and torch.cuda.is_available():
         return "cuda"
     return "cpu"
-# =========================
-# Model cache
-# =========================
-_PIPELINE = None
-_PIPELINE_META = {}
-def get_pipeline(model_id: str, device: str):
-    """
-    Caches the pipeline across calls to avoid re-downloading and re-loading.
-    """
-    global _PIPELINE, _PIPELINE_META
-    model_id = (model_id or MODEL_ID_DEFAULT).strip()
-    device = "cuda" if device == "cuda" and torch.cuda.is_available() else "cpu"
-    if (
-        _PIPELINE is None
-        or _PIPELINE_META.get("model_id") != model_id
-        or _PIPELINE_META.get("device") != device
-    ):
-        # Chronos-2 pipeline
-        _PIPELINE = Chronos2Pipeline.from_pretrained(model_id, device_map=device)
-        _PIPELINE_META = {"model_id": model_id, "device": device}
-    return _PIPELINE
-# =========================
-# Data generation/loading
-# =========================
 def make_sample_series(n, seed, trend, season_period, season_amp, noise):
     rng = np.random.default_rng(int(seed))
     t = np.arange(int(n))
@@ -64,30 +38,30 @@ def make_sample_series(n, seed, trend, season_period, season_amp, noise):
         + float(season_amp) * np.sin(2 * np.pi * t / max(1, int(season_period)))
         + rng.normal(0.0, float(noise), size=len(t))
     )
-    # shift up if negative (not required, but keeps nice plots)
     mn = float(np.min(y))
     if mn < 0:
         y = y - mn
     return y.astype(np.float32)
 def load_series_from_csv(path_or_file, column=None):
     df = pd.read_csv(path_or_file)
     if df.shape[1] == 0:
         raise ValueError("CSV vuoto o non leggibile.")
     col = (column or "").strip()
     if col == "":
         numeric_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
         if not numeric_cols:
-            # try coercion to numeric on all columns (sometimes dtype is object)
-            numeric_cols = []
             for c in df.columns:
                 coerced = pd.to_numeric(df[c], errors="coerce")
                 if coerced.notna().sum() >= 10:
                     numeric_cols.append(c)
-            if not numeric_cols:
-                raise ValueError("Nessuna colonna numerica nel CSV. Specifica una colonna con numeri.")
         col = numeric_cols[0]
     if col not in df.columns:
@@ -99,76 +73,78 @@ def load_series_from_csv(path_or_file, column=None):
     return y.astype(np.float32), col
 # =========================
-# Chronos2 predict normalization
 # =========================
-def _extract_samples(pred_out):
     """
-    Chronos2Pipeline.predict may return:
-      - numpy array / list -> samples
-      - dict with 'samples'
-      - object with attribute 'samples'
-    This returns np.ndarray of shape (n_draws, pred_len) or (pred_len,) if only one draw.
     """
-    if isinstance(pred_out, np.ndarray):
-        return pred_out
-    if isinstance(pred_out, list):
-        return np.asarray(pred_out)
-    if isinstance(pred_out, dict):
-        if "samples" in pred_out:
-            return np.asarray(pred_out["samples"])
-        # sometimes "forecast" keys etc.
-        for k in ("predictions", "prediction", "outputs"):
-            if k in pred_out:
-                return np.asarray(pred_out[k])
-        return np.asarray(pred_out)
-    # object with samples attribute
-    if hasattr(pred_out, "samples"):
-        return np.asarray(getattr(pred_out, "samples"))
-    # last resort
-    return np.asarray(pred_out)
-def chronos2_predict_samples(pipe, y, prediction_length: int, n_draws: int):
     """
-    Calls pipe.predict in a robust way across Chronos versions:
-    - Uses `inputs=` (required)
-    - Uses `num_predictions=` if supported
-    - If not supported, falls back to a single prediction and returns shape (1, pred_len)
     """
-    sig = inspect.signature(pipe.predict)
-    params = sig.parameters
-    kwargs = {"inputs": y.tolist(), "prediction_length": int(prediction_length)}
-    # API differences: some versions accept num_predictions, others not
-    if "num_predictions" in params:
-        kwargs["num_predictions"] = int(n_draws)
-    # Some versions might have different names; try a couple safe fallbacks
-    try:
-        out = pipe.predict(**kwargs)
-    except TypeError as e:
-        # If num_predictions was rejected, retry without it
-        if "num_predictions" in kwargs:
-            kwargs.pop("num_predictions", None)
-            out = pipe.predict(**kwargs)
-        else:
-            raise e
-    samples = _extract_samples(out).astype(np.float32)
-    # Normalize shape: expected (n_draws, pred_len)
-    if samples.ndim == 1:
-        samples = samples[None, :]
-    elif samples.ndim == 2:
-        pass
-    else:
-        # If extra dims, squeeze conservatively
-        samples = np.squeeze(samples)
-        if samples.ndim == 1:
-            samples = samples[None, :]
-    return samples
 # =========================
 # Forecast core
@@ -185,33 +161,32 @@ def run_forecast(
     season_amp,
     noise,
     prediction_length,
-    num_draws,
     q_low,
     q_high,
     device_ui,
     model_id,
 ):
-    # Validate quantiles
-    if float(q_low) >= float(q_high):
         raise gr.Error("Quantile low deve essere < quantile high.")
-    # Device + pipeline
     device = pick_device(device_ui)
     pipe = get_pipeline(model_id, device)
-    # Choose input series
     if input_mode == "Test CSV":
         if not test_csv_name:
-            raise gr.Error("Seleziona un file nella dropdown dei Test CSV oppure usa Sample/Upload.")
-        csv_path = os.path.join(DATA_DIR, test_csv_name)
-        if not os.path.exists(csv_path):
-            raise gr.Error(f"Non trovo {csv_path}. Assicurati che esista nel repo dello Space.")
-        y, used_col = load_series_from_csv(csv_path, csv_column)
         source = f"Test CSV: {test_csv_name} ({used_col})"
     elif input_mode == "Upload CSV":
         if upload_csv is None:
-            raise gr.Error("Carica un CSV oppure scegli Sample/Test CSV.")
         y, used_col = load_series_from_csv(upload_csv.name, csv_column)
         source = f"Upload CSV ({used_col})"
@@ -219,27 +194,41 @@ def run_forecast(
         y = make_sample_series(n, seed, trend, season_period, season_amp, noise)
         source = "Sample data"
-    # Forecast samples
-    samples = chronos2_predict_samples(
-        pipe=pipe,
-        y=y,
         prediction_length=int(prediction_length),
-        n_draws=int(num_draws),
     )
-    # Quantiles
-    median = np.quantile(samples, 0.50, axis=0)
-    low = np.quantile(samples, float(q_low), axis=0)
-    high = np.quantile(samples, float(q_high), axis=0)
-    # Plot
     t_hist = np.arange(len(y))
     t_fcst = np.arange(len(y), len(y) + int(prediction_length))
     fig, ax = plt.subplots(figsize=(10, 4))
     ax.plot(t_hist, y, label="history")
     ax.plot(t_fcst, median, label="forecast (median)")
-    ax.fill_between(t_fcst, low, high, alpha=0.25, label=f"band [{float(q_low):.2f}, {float(q_high):.2f}]")
     ax.axvline(len(y) - 1, linestyle="--", linewidth=1)
     ax.set_title(source)
     ax.set_xlabel("t")
@@ -247,13 +236,14 @@ def run_forecast(
     ax.grid(True, alpha=0.3)
     ax.legend()
-    # Output table + CSV
     out_df = pd.DataFrame(
         {
             "t": t_fcst,
             "median": median,
-            f"q{float(q_low):.2f}": low,
-            f"q{float(q_high):.2f}": high,
         }
     )
@@ -266,25 +256,27 @@ def run_forecast(
         "source": source,
         "history_points": int(len(y)),
         "prediction_length": int(prediction_length),
-        "requested_draws": int(num_draws),
-        "returned_draws": int(samples.shape[0]),
     }
     return fig, out_df, out_path, info
 # =========================
 # UI
 # =========================
 with gr.Blocks(title="Chronos-2 • HF Spaces Demo") as demo:
-    gr.Markdown("# ⏱️ Chronos-2 Forecast Demo (HF Spaces)\n\n"
-                "Supporta **Sample**, **Test CSV** (da cartella `data/`) e **Upload CSV**.")
     with gr.Row():
-        input_mode = gr.Radio(
-            ["Sample", "Test CSV", "Upload CSV"],
-            value="Sample",
-            label="Input source",
-        )
         device_ui = gr.Dropdown(
             ["cpu", "cuda (se disponibile)"],
             value="cuda (se disponibile)" if torch.cuda.is_available() else "cpu",
@@ -293,10 +285,7 @@ with gr.Blocks(title="Chronos-2 • HF Spaces Demo") as demo:
         model_id = gr.Textbox(value=MODEL_ID_DEFAULT, label="Model ID")
     with gr.Row():
-        test_csv_name = gr.Dropdown(
-            choices=available_test_csv(),
-            label="Test CSV disponibili (cartella data/)",
-        )
         upload_csv = gr.File(label="Upload CSV", file_types=[".csv"])
         csv_column = gr.Textbox(label="Colonna numerica (opzionale)", placeholder="es: value")
@@ -310,8 +299,6 @@ with gr.Blocks(title="Chronos-2 • HF Spaces Demo") as demo:
     with gr.Accordion("Forecast settings", open=True):
         prediction_length = gr.Slider(1, 180, 30, step=1, label="Prediction length")
-        # UI label stays "Num samples", internally treated as number of prediction draws if supported
-        num_draws = gr.Slider(1, 400, 200, step=10, label="Num samples (draws)")
         q_low = gr.Slider(0.01, 0.49, 0.10, step=0.01, label="Quantile low")
         q_high = gr.Slider(0.51, 0.99, 0.90, step=0.01, label="Quantile high")
@@ -336,7 +323,6 @@ with gr.Blocks(title="Chronos-2 • HF Spaces Demo") as demo:
             season_amp,
             noise,
             prediction_length,
-            num_draws,
             q_low,
             q_high,
             device_ui,

 import os
 import numpy as np
 import pandas as pd
 import gradio as gr
 from chronos import Chronos2Pipeline
 # =========================
 # Config
 # =========================
 MODEL_ID_DEFAULT = os.getenv("CHRONOS_MODEL_ID", "amazon/chronos-2")
 DATA_DIR = "data"
 # =========================
+# Utils
 # =========================
 def available_test_csv():
     if not os.path.isdir(DATA_DIR):
         return []
     return sorted(f for f in os.listdir(DATA_DIR) if f.lower().endswith(".csv"))
 def pick_device(ui_choice: str) -> str:
     if (ui_choice or "").startswith("cuda") and torch.cuda.is_available():
         return "cuda"
     return "cpu"
 def make_sample_series(n, seed, trend, season_period, season_amp, noise):
     rng = np.random.default_rng(int(seed))
     t = np.arange(int(n))
         + float(season_amp) * np.sin(2 * np.pi * t / max(1, int(season_period)))
         + rng.normal(0.0, float(noise), size=len(t))
     )
+    # shift up if negative to keep plots nice
     mn = float(np.min(y))
     if mn < 0:
         y = y - mn
     return y.astype(np.float32)
 def load_series_from_csv(path_or_file, column=None):
     df = pd.read_csv(path_or_file)
     if df.shape[1] == 0:
         raise ValueError("CSV vuoto o non leggibile.")
     col = (column or "").strip()
     if col == "":
+        # try native numeric dtypes first
         numeric_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
+        # fallback: try coercion
         if not numeric_cols:
             for c in df.columns:
                 coerced = pd.to_numeric(df[c], errors="coerce")
                 if coerced.notna().sum() >= 10:
                     numeric_cols.append(c)
+        if not numeric_cols:
+            raise ValueError("Nessuna colonna numerica nel CSV. Specifica la colonna corretta.")
         col = numeric_cols[0]
     if col not in df.columns:
     return y.astype(np.float32), col
 # =========================
+# Pipeline cache
 # =========================
+_PIPELINE = None
+_PIPELINE_META = {}
+def get_pipeline(model_id: str, device: str):
+    global _PIPELINE, _PIPELINE_META
+    model_id = (model_id or MODEL_ID_DEFAULT).strip()
+    device = "cuda" if device == "cuda" and torch.cuda.is_available() else "cpu"
+    if (
+        _PIPELINE is None
+        or _PIPELINE_META.get("model_id") != model_id
+        or _PIPELINE_META.get("device") != device
+    ):
+        _PIPELINE = Chronos2Pipeline.from_pretrained(model_id, device_map=device)
+        _PIPELINE_META = {"model_id": model_id, "device": device}
+    return _PIPELINE
+# =========================
+# Chronos-2 predict_df helpers
+# =========================
+def build_context_df(y: np.ndarray, freq: str = "D"):
     """
+    Build a minimal context DataFrame compatible with Chronos2Pipeline.predict_df().
+    We generate a synthetic timestamp index so it works for Sample and numeric-only CSV.
     """
+    ts = pd.date_range("2000-01-01", periods=len(y), freq=freq)
+    return pd.DataFrame({"id": "series_0", "timestamp": ts, "target": y})
+def pick_quantile_column(pred_df: pd.DataFrame, q: float) -> str:
     """
+    Column naming can vary. We robustly find a column representing quantile q.
+    Common patterns: "0.1", "0.5", "0.9" OR "q0.1" OR "quantile_0.1" etc.
     """
+    q = float(q)
+    # direct numeric-string match
+    for c in pred_df.columns:
+        try:
+            if abs(float(c) - q) < 1e-9:
+                return c
+        except Exception:
+            pass
+    # prefixed patterns
+    candidates = []
+    for c in pred_df.columns:
+        lc = str(c).lower()
+        if "quant" in lc or lc.startswith("q"):
+            # try to extract float from tail
+            for token in [lc.replace("quantile", "").replace("_", ""), lc.replace("q", "")]:
+                try:
+                    if abs(float(token) - q) < 1e-9:
+                        candidates.append(c)
+                except Exception:
+                    pass
+    if candidates:
+        return candidates[0]
+    raise ValueError(
+        f"Non riesco a trovare la colonna del quantile {q}. "
+        f"Colonne disponibili: {list(pred_df.columns)}"
+    )
 # =========================
 # Forecast core
     season_amp,
     noise,
     prediction_length,
     q_low,
     q_high,
     device_ui,
     model_id,
 ):
+    q_low = float(q_low)
+    q_high = float(q_high)
+    if q_low >= q_high:
         raise gr.Error("Quantile low deve essere < quantile high.")
     device = pick_device(device_ui)
     pipe = get_pipeline(model_id, device)
+    # 1) pick data
     if input_mode == "Test CSV":
         if not test_csv_name:
+            raise gr.Error("Seleziona un file nella dropdown dei Test CSV.")
+        path = os.path.join(DATA_DIR, test_csv_name)
+        if not os.path.exists(path):
+            raise gr.Error(f"Non trovo {path}. Assicurati che sia nel repo.")
+        y, used_col = load_series_from_csv(path, csv_column)
         source = f"Test CSV: {test_csv_name} ({used_col})"
     elif input_mode == "Upload CSV":
         if upload_csv is None:
+            raise gr.Error("Carica un CSV per usare la modalità Upload.")
         y, used_col = load_series_from_csv(upload_csv.name, csv_column)
         source = f"Upload CSV ({used_col})"
         y = make_sample_series(n, seed, trend, season_period, season_amp, noise)
         source = "Sample data"
+    # 2) build context df (single series)
+    context_df = build_context_df(y, freq="D")
+    # 3) predict quantiles via predict_df (stable API per chronos-2)
+    quantiles = sorted({q_low, 0.5, q_high})
+    pred_df = pipe.predict_df(
+        context_df,
         prediction_length=int(prediction_length),
+        quantile_levels=quantiles,
+        id_column="id",
+        timestamp_column="timestamp",
+        target="target",
     )
+    # 4) extract arrays
+    col_low = pick_quantile_column(pred_df, q_low)
+    col_med = pick_quantile_column(pred_df, 0.5)
+    col_high = pick_quantile_column(pred_df, q_high)
+    # pred_df contains the forecast horizon rows; keep only series_0
+    pred_df = pred_df[pred_df["id"] == "series_0"].copy()
+    ts_fcst = pd.to_datetime(pred_df["timestamp"]).to_numpy()
+    low = pred_df[col_low].to_numpy(dtype=np.float32)
+    median = pred_df[col_med].to_numpy(dtype=np.float32)
+    high = pred_df[col_high].to_numpy(dtype=np.float32)
+    # 5) plot (use integer axis for simplicity)
     t_hist = np.arange(len(y))
     t_fcst = np.arange(len(y), len(y) + int(prediction_length))
     fig, ax = plt.subplots(figsize=(10, 4))
     ax.plot(t_hist, y, label="history")
     ax.plot(t_fcst, median, label="forecast (median)")
+    ax.fill_between(t_fcst, low, high, alpha=0.25, label=f"band [{q_low:.2f}, {q_high:.2f}]")
     ax.axvline(len(y) - 1, linestyle="--", linewidth=1)
     ax.set_title(source)
     ax.set_xlabel("t")
     ax.grid(True, alpha=0.3)
     ax.legend()
+    # 6) output table + downloadable csv
     out_df = pd.DataFrame(
         {
             "t": t_fcst,
+            "timestamp": ts_fcst,
             "median": median,
+            f"q{q_low:.2f}": low,
+            f"q{q_high:.2f}": high,
         }
     )
         "source": source,
         "history_points": int(len(y)),
         "prediction_length": int(prediction_length),
+        "quantile_levels": quantiles,
+        "pred_df_columns": list(out_df.columns),
     }
     return fig, out_df, out_path, info
 # =========================
 # UI
 # =========================
 with gr.Blocks(title="Chronos-2 • HF Spaces Demo") as demo:
+    gr.Markdown(
+        "# ⏱️ Chronos-2 Forecast Demo (HF Spaces)\n"
+        "- **Sample**: genera una serie sintetica\n"
+        "- **Test CSV**: usa file in `data/`\n"
+        "- **Upload CSV**: carica un tuo CSV\n\n"
+        "Questa versione usa **predict_df()** (API consigliata per Chronos-2) e calcola direttamente i **quantili**. "
+    )
     with gr.Row():
+        input_mode = gr.Radio(["Sample", "Test CSV", "Upload CSV"], value="Sample", label="Input source")
         device_ui = gr.Dropdown(
             ["cpu", "cuda (se disponibile)"],
             value="cuda (se disponibile)" if torch.cuda.is_available() else "cpu",
         model_id = gr.Textbox(value=MODEL_ID_DEFAULT, label="Model ID")
     with gr.Row():
+        test_csv_name = gr.Dropdown(choices=available_test_csv(), label="Test CSV disponibili (data/)")
         upload_csv = gr.File(label="Upload CSV", file_types=[".csv"])
         csv_column = gr.Textbox(label="Colonna numerica (opzionale)", placeholder="es: value")
     with gr.Accordion("Forecast settings", open=True):
         prediction_length = gr.Slider(1, 180, 30, step=1, label="Prediction length")
         q_low = gr.Slider(0.01, 0.49, 0.10, step=0.01, label="Quantile low")
         q_high = gr.Slider(0.51, 0.99, 0.90, step=0.01, label="Quantile high")
             season_amp,
             noise,
             prediction_length,
             q_low,
             q_high,
             device_ui,