src/iohmm_evac/report/loader.py

# SPDX-License-Identifier: AGPL-3.0-only
# Copyright (C) 2026 SWGY, Inc.
"""Load a simulation bundle (observations + sidecars) from disk."""

from __future__ import annotations

import tomllib
from dataclasses import dataclass
from pathlib import Path
from typing import Any

import pandas as pd

__all__ = ["SimulationBundle", "load_bundle"]


@dataclass(frozen=True, slots=True)
class SimulationBundle:
    """A loaded simulation: observations panel plus its three sidecars."""

    observations: pd.DataFrame
    """Long panel: household_id, t, state, departure, displacement, comm_count."""

    population: pd.DataFrame
    """Static covariates per household."""

    timeline: pd.DataFrame
    """Hourly exogenous timeline (forecast + warning orders)."""

    config: dict[str, Any]
    """Resolved configuration loaded from the TOML sidecar."""

    @property
    def n_households(self) -> int:
        """Number of households in this bundle."""
        return int(self.population.shape[0])

    @property
    def t_landfall(self) -> int:
        """The terminal hour (max ``t`` in the timeline)."""
        return int(self.timeline["t"].max())


def _sibling_paths(input_path: Path) -> tuple[Path, Path, Path]:
    """Return (population, timeline, config) paths derived from ``input_path``."""
    parent = input_path.parent
    stem = input_path.stem
    return (
        parent / f"{stem}.population.parquet",
        parent / f"{stem}.timeline.parquet",
        parent / f"{stem}.config.toml",
    )


def _require_exists(label: str, path: Path) -> None:
    if not path.exists():
        msg = f"Missing {label} sidecar: {path}"
        raise FileNotFoundError(msg)


def load_bundle(path: Path) -> SimulationBundle:
    """Load the four files written by :func:`iohmm_evac.io.write_results`.

    ``path`` is the main observations Parquet file. Sibling files are located
    by replacing the stem with ``<stem>.population.parquet``,
    ``<stem>.timeline.parquet``, and ``<stem>.config.toml``.

    Raises :class:`FileNotFoundError` if any of the four files is missing.
    """
    path = Path(path)
    _require_exists("observations", path)
    pop_path, tl_path, cfg_path = _sibling_paths(path)
    _require_exists("population", pop_path)
    _require_exists("timeline", tl_path)
    _require_exists("config", cfg_path)

    observations = pd.read_parquet(path)
    population = pd.read_parquet(pop_path)
    timeline = pd.read_parquet(tl_path)
    with cfg_path.open("rb") as f:
        config = tomllib.load(f)

    return SimulationBundle(
        observations=observations,
        population=population,
        timeline=timeline,
        config=config,
    )