#### AllSeasons: Table 3 Poe Tests  ###############
# AUTHOR: Peter King (p.king1@leeds.ac.uk)
# LAST CHANGE: 30/09/2025
# FUNCTION: To repeat Mann-Whitney test by attribute and season.
## for each of 1000 columns and then reporting the mean P value so unsurprisingly a little slow.
## The comments are me talking to myself with the SSH terminal.
# CHANGES:
# - Sped up version using MW below
# - need to change to latest WTP


# *****************************************************************************
#### Section 0: Setting up ####
## Find session information in 00_ReplicatePaper.R
# *****************************************************************************


## Libraries here: ************************************************************
options(scipen = 90)
library(apollo)
library(tidyverse)
library(dplyr)
library(magrittr)
library(mded)
library(here)
library(data.table)
library(stats)
library(cmdlr)


# *************************************************************
#### Section 1: Import Data  ####
# *************************************************************


## This is the very large data frame
Data <- here(
  "CEOutputData/H3",
  "SimulatedMeans_Correlated.csv"
) %>%
  fread() %>%
  data.frame()


# *************************************************************
#### Section 2: Define useful functions  ####
# *************************************************************

Mutator <- function(Input) {
  Input <- round(Input, 3)  # Perform rounding just once
  # Create a suffix based on conditions
  suffix <- ifelse(Input < 0.01, "***",
                   ifelse(Input < 0.05, "**",
                          ifelse(Input < 0.1, "*", "")))
  # Combine the formatted numbers with their suffixes
  formatted <- sprintf("%.3f%s", Input, suffix)
  # Convert to a data frame
  data.frame(result = formatted)
}


# P-value formatting function
format_p <- function(p) {
  p_num <- as.numeric(p)
  stars <- case_when(
    p_num < 0.001 ~ "***",
    p_num < 0.01  ~ "**",
    p_num < 0.05  ~ "*",
    TRUE ~ ""
  )
  ifelse(p_num < 0.001, "<0.001", sprintf("%.3f", p_num)) %>%
    paste0(ifelse(stars == "", "", paste0(" ", stars)))
}


# Poe test wrapper
compute_poe <- function(var, s1, s2, Data) {
  x <- Data[[var]][Data$Season == s1]
  y <- Data[[var]][Data$Season == s2]
  res <- cmdlr::poe_test(x, y)
  if (is.data.frame(res)) {
    res[[1]]    # extract first column as numeric
  } else {
    as.numeric(res)
  }
}


# *************************************************************
#### Section 3: Define variables  ####
# *************************************************************


Variables <- c("Colour", "Colour2", "Smell", "Smell2",
               "Sound", "Sound2", "Deadwood", "Deadwood2")


# Define the desired order for Variable
variable_order <- c(
  "Colours: high", "Colours: medium",
  "Smells: high",  "Smells: medium",
  "Sounds: high",  "Sounds: medium",
  "Deadwoods: high", "Deadwoods: medium"
)


season_labels <- c("Winter", "Spring", "Summer", "Autumn")


# All unique seasonal pairs
Looper <- expand.grid(Season1 = 0:3, Season2 = 0:3) %>%
  filter(Season1 < Season2)


# *************************************************************
#### Section 4: Report all tests ####
# *************************************************************


# Build tidy results
results_tidy <- crossing(Variable = Variables, Looper) %>%
  mutate(
    P_value = pmap_dbl(list(Variable, Season1, Season2),
                       ~ compute_poe(..1, ..2, ..3, Data)) %>%
      format_p(),
    Variable = case_when(
      Variable == "Colour2" ~ "Colours: high",
      Variable == "Colour"  ~ "Colours: medium",
      Variable == "Smell2"  ~ "Smells: high",
      Variable == "Smell"   ~ "Smells: medium",
      Variable == "Sound2"  ~ "Sounds: high",
      Variable == "Sound"   ~ "Sounds: medium",
      Variable == "Deadwood2" ~ "Deadwoods: high",
      Variable == "Deadwood"  ~ "Deadwoods: medium"
    ),
    # Convert to ordered factors
    Variable = factor(Variable, levels = variable_order, ordered = TRUE),
    Season1 = factor(Season1, levels = 0:3, labels = season_labels, ordered = TRUE),
    Season2 = factor(Season2, levels = 0:3, labels = season_labels, ordered = TRUE)
  ) %>%
  dplyr::select(Season1, Season2, Variable, P_value) %>%
  arrange(Season1, Season2, Variable) %>%
  mutate(P_value = ifelse(P_value == "1.000", ">0.999", P_value))


# *************************************************************
#### Section 5: Output + tidy table ####
# *************************************************************


Table3 <- results_tidy


## Output to screen
Table3 %>% write.csv(quote = FALSE, row.names = FALSE)


## Pooled WTP
Table3 %>% fwrite(sep = "#",
                   quote = FALSE,
                   paste0(here(
                     "OtherOutput/Tables/",
                     "Table3_PoeTestsWTP_Correlated.txt"
                   )))


# End Of Script   **********************************************************
