scenario.R

# Proxy Pool Governor — R tooling for scenario (.pps) files.
#
# Copyright (C) 2026  SWGY, Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#
# SPDX-License-Identifier: AGPL-3.0-or-later

#' Read a Proxy Pool Scenario file
#'
#' @param path Path to .pps file
#' @return List with pools, services, and data (data.frame)
read_scenario <- function(path) {
  lines <- readLines(path)
  
  # Find section markers
  pools_idx <- grep("^@pools ", lines)
  services_idx <- grep("^@services ", lines)
  records_idx <- grep("^@records ", lines)
  data_idx <- grep("^---$", lines)
  
  if (length(data_idx) == 0) {
    stop("No data section found (missing '---' marker)")
  }
  
  # Parse pools
  pools <- NULL
  if (length(pools_idx) > 0) {
    n_pools <- as.integer(sub("^@pools ", "", lines[pools_idx]))
    pool_lines <- lines[(pools_idx + 1):(pools_idx + n_pools)]
    pools <- do.call(rbind, lapply(pool_lines, function(l) {
      parts <- strsplit(trimws(l), "\\s+")[[1]]
      data.frame(id = as.integer(parts[1]), name = parts[2], stringsAsFactors = FALSE)
    }))
  }
  
  # Parse services
  services <- NULL
  if (length(services_idx) > 0) {
    n_services <- as.integer(sub("^@services ", "", lines[services_idx]))
    service_lines <- lines[(services_idx + 1):(services_idx + n_services)]
    services <- do.call(rbind, lapply(service_lines, function(l) {
      parts <- strsplit(trimws(l), "\\s+")[[1]]
      data.frame(id = as.integer(parts[1]), name = parts[2], stringsAsFactors = FALSE)
    }))
  }
  
  # Parse data
  data_lines <- lines[(data_idx + 1):length(lines)]
  data_lines <- data_lines[!grepl("^#", data_lines) & nchar(trimws(data_lines)) > 0]
  
  if (length(data_lines) == 0) {
    data <- data.frame()
  } else {
    data <- read.table(
      text = data_lines,
      col.names = c(
        "timestamp", "pool", "service",
        "rate_success", "rate_lost_race", "rate_302",
        "rate_timeout", "rate_ssl", "rate_other",
        "response_time",
        "avg_success", "avg_response_time",
        "stddev_success", "stddev_response_time"
      ),
      colClasses = c(
        "numeric", "integer", "integer",
        rep("numeric", 11)
      )
    )
    
    # Add human-readable names as factors
    if (!is.null(pools)) {
      data$pool_name <- factor(data$pool, levels = pools$id, labels = pools$name)
    }
    if (!is.null(services)) {
      data$service_name <- factor(data$service, levels = services$id, labels = services$name)
    }
  }
  
  list(
    pools = pools,
    services = services,
    data = data
  )
}

#' Write a Proxy Pool Scenario file
#'
#' @param scenario List with pools, services, and data components
#' @param path Output path
#' @param comment Optional comment string for header
write_scenario <- function(scenario, path, comment = NULL) {
  con <- file(path, "w")
  on.exit(close(con))
  
  # Header
  writeLines(sprintf("# Proxy Pool Scenario v1"), con)
  writeLines(sprintf("# Generated: %s", Sys.time()), con)
  if (!is.null(comment)) {
    writeLines(sprintf("# %s", comment), con)
  }
  writeLines("", con)
  
  # Pools
  writeLines(sprintf("@pools %d", nrow(scenario$pools)), con)
  for (i in seq_len(nrow(scenario$pools))) {
    writeLines(sprintf("%d %s", scenario$pools$id[i], scenario$pools$name[i]), con)
  }
  writeLines("", con)
  
  # Services
  writeLines(sprintf("@services %d", nrow(scenario$services)), con)
  for (i in seq_len(nrow(scenario$services))) {
    writeLines(sprintf("%d %s", scenario$services$id[i], scenario$services$name[i]), con)
  }
  writeLines("", con)
  
  # Record count
  writeLines(sprintf("@records %d", nrow(scenario$data)), con)
  writeLines("---", con)
  
  # Data - select only the core columns in order
  core_cols <- c(
    "timestamp", "pool", "service",
    "rate_success", "rate_lost_race", "rate_302",
    "rate_timeout", "rate_ssl", "rate_other",
    "response_time",
    "avg_success", "avg_response_time",
    "stddev_success", "stddev_response_time"
  )
  
  write.table(
    scenario$data[, core_cols],
    con,
    row.names = FALSE,
    col.names = FALSE,
    quote = FALSE,
    sep = " "
  )
}

#' Generate a simple random scenario for testing
#'
#' @param n_cycles Number of time cycles
#' @param n_pools Number of pools
#' @param n_services Number of services
#' @param cycle_duration Seconds between cycles
#' @param start_time Unix epoch start time
#' @param seed Random seed
#' @return Scenario list
generate_test_scenario <- function(
  n_cycles = 60,
  n_pools = 5,
  n_services = 3,
  cycle_duration = 60,
  start_time = 1737300000,
  seed = NULL
) {
  if (!is.null(seed)) set.seed(seed)
  
  pools <- data.frame(
    id = 0:(n_pools - 1),
    name = paste0("pool-", LETTERS[1:n_pools]),
    stringsAsFactors = FALSE
  )
  
  services <- data.frame(
    id = 0:(n_services - 1),
    name = paste0("service-", 1:n_services),
    stringsAsFactors = FALSE
  )
  
  # Generate records for each cycle, pool, service combination
  records <- list()
  idx <- 1
  
  # Base characteristics per pool
  pool_base_success <- runif(n_pools, 0.88, 0.96)
  pool_base_rt <- runif(n_pools, 0.5, 1.5)
  
  # Track moving stats
  history_window <- 20
  history <- array(
    dim = c(n_pools, n_services, history_window, 2),
    dimnames = list(NULL, NULL, NULL, c("success", "rt"))
  )
  history_idx <- rep(1, n_pools * n_services)
  dim(history_idx) <- c(n_pools, n_services)
  
  for (cycle in seq_len(n_cycles)) {
    timestamp <- start_time + (cycle - 1) * cycle_duration
    
    for (p in seq_len(n_pools)) {
      for (s in seq_len(n_services)) {
        # Simulate occasional degradation
        is_degraded <- runif(1) < 0.05
        
        if (is_degraded) {
          success <- runif(1, 0.5, 0.75)
          rt <- pool_base_rt[p] * runif(1, 2, 4)
        } else {
          success <- pmin(1, pmax(0, rnorm(1, pool_base_success[p], 0.02)))
          rt <- pmax(0.1, rnorm(1, pool_base_rt[p], 0.1))
        }
        
        # Store in history
        hi <- history_idx[p, s]
        history[p, s, hi, "success"] <- success
        history[p, s, hi, "rt"] <- rt
        history_idx[p, s] <- (hi %% history_window) + 1
        
        # Compute moving stats
        valid_success <- na.omit(history[p, s, , "success"])
        valid_rt <- na.omit(history[p, s, , "rt"])
        
        avg_success <- if (length(valid_success) > 0) mean(valid_success) else 0.9
        avg_rt <- if (length(valid_rt) > 0) mean(valid_rt) else 1.0
        sd_success <- if (length(valid_success) > 1) sd(valid_success) else 0.02
        sd_rt <- if (length(valid_rt) > 1) sd(valid_rt) else 0.1
        
        # Distribute failure across categories
        failure <- 1 - success
        rate_timeout <- failure * runif(1, 0.2, 0.4)
        rate_ssl <- failure * runif(1, 0.05, 0.15)
        rate_302 <- failure * runif(1, 0.1, 0.2)
        rate_lost <- failure * runif(1, 0.05, 0.15)
        rate_other <- failure - rate_timeout - rate_ssl - rate_302 - rate_lost
        rate_other <- pmax(0, rate_other)
        
        records[[idx]] <- data.frame(
          timestamp = timestamp,
          pool = p - 1,
          service = s - 1,
          rate_success = success,
          rate_lost_race = rate_lost,
          rate_302 = rate_302,
          rate_timeout = rate_timeout,
          rate_ssl = rate_ssl,
          rate_other = rate_other,
          response_time = rt,
          avg_success = avg_success,
          avg_response_time = avg_rt,
          stddev_success = sd_success,
          stddev_response_time = sd_rt
        )
        idx <- idx + 1
      }
    }
  }
  
  data <- do.call(rbind, records)
  
  list(
    pools = pools,
    services = services,
    data = data
  )
}