#### AllSeasons: Table C2 Mann-Whitney test simulated mean WTP  ###############
# AUTHOR: Peter King (p.king1@leeds.ac.uk)
# LAST CHANGE: 16/10/2025
# FUNCTION: To repeat Mann-Whitney test by attribute and season.
## for each of 1000 columns and then reporting the mean P value so unsurprisingly a little slow.
## The comments are me talking to myself with the SSH terminal.
# CHANGES:
# - Sped up version using MW below
# - need to change to latest WTP


# *************************************************************
#### Section 0: Setup and estimate models ####
# *************************************************************

# install.packages("data.table",repos="http://cran.us.r-project.org")
options(scipen = 90)
library(apollo)
library(tidyverse)
library(dplyr)
library(magrittr)
library(mded)
library(here)
library(data.table)
library(stats)
library(purrr)
library(tidyr)
library(stringr)


# *************************************************************
#### Section 1: Import Data  ####
# *************************************************************



## This is the very large data frame
Data <- here(
  "CEOutputData/H3",
  "SimulatedMeans_Correlated.csv"
) %>%
  fread() %>%
  data.frame()


## List variables here to loop through
Variables <- c(
  "Colour2",
  "Colour",
  "Smell2",
  "Smell",
  "Sound2",
  "Sound",
  "Deadwood2",
  "Deadwood"
)


# *************************************************************
#### Section 3: Run MW tests ####
# *************************************************************


process_variable <- function(var) {
  Formula <- as.formula(paste(var, "~ Season"))

  wilcox_eff <- rstatix::wilcox_effsize(data = Data, formula = Formula)
  wilcox_test <- rstatix::wilcox_test(data = Data, formula = Formula)

  result <- wilcox_test %>%
    dplyr::select(statistic, p.adj, p.adj.signif, group1, group2) %>%
    bind_cols(select(wilcox_eff, effsize)) %>%
    mutate(Output = sprintf("%s, r:%.3f, P:%.3f%s",
                            statistic,
                            round(effsize, 3),
                            round(p.adj, 3),
                            p.adj.signif),
           Season = paste0(group1, "-", group2)) %>%
    dplyr::select(Output, Season)

  return(result)
}


# Process all variables and combine results
results <- purrr::set_names(Variables) %>%
  map_dfr(~process_variable(.x), .id = "Variable")


# *************************************************************
#### Section 4: Re-organise results ####
# *************************************************************


tidy_compare_table <- function(df) {
  # long conversion if needed
  df_long <- if ("Season" %in% names(df) && "Output" %in% names(df)) {
    df
  } else {
    df %>%
      pivot_longer(-Variable, names_to = "Season", values_to = "Output")
  }

  season_names <- c("Winter", "Spring", "Summer", "Autumn")

  # desired WTP order
  wtp_order <- c("Colour2","Colour","Smell2","Smell","Sound2","Sound","Deadwood2","Deadwood")

  df_long %>%
    rename(WTP_raw = Variable) %>%
    separate(Season, into = c("s1", "s2"), sep = "-", remove = FALSE) %>%
    mutate(
      s1 = as.integer(s1),
      s2 = as.integer(s2),
      Fixed = factor(season_names[s1 + 1], levels = season_names, ordered = TRUE),
      Variable = factor(season_names[s2 + 1], levels = season_names, ordered = TRUE),
      statistic_raw = str_extract(Output, "^[^,]+"),
      statistic = suppressWarnings(as.numeric(str_replace_all(statistic_raw, "[^0-9eE.\\-]", ""))),
      r = as.numeric(str_extract(Output, "(?<=r:)\\-?[0-9.]+")),
      p_value = as.numeric(str_extract(Output, "(?<=P:)\\-?[0-9.]+")),
      signif = str_extract(Output, "\\*+$"),
      signif = ifelse(is.na(signif), "", signif),
      EffectSize = format(round(r, 3), nsmall = 3),
      P.Value = ifelse(
        !is.na(p_value) & p_value < 0.001,
        paste0("<0.001", ifelse(signif == "", "", paste0(" ", signif))),
        paste0(format(round(p_value, 3), nsmall = 3), ifelse(signif == "", "", paste0(" ", signif)))
      ),
      # apply renaming rule
      WTP = case_when(
        str_detect(WTP_raw, "2$") ~ paste0(str_remove(WTP_raw, "2$"), ": high"),
        TRUE ~ paste0(WTP_raw, ": medium")
      ),
      WTP = factor(WTP, levels = c("Colour: high","Colour: medium",
                                   "Smell: high","Smell: medium",
                                   "Sound: high","Sound: medium",
                                   "Deadwood: high","Deadwood: medium"),
                   ordered = TRUE)
    ) %>%
    transmute(
      Fixed,
      Variable,
      WTP,
      TestStatistic = ifelse(is.na(statistic), NA_real_, round(statistic, 3)),
      EffectSize,
      P.Value
    ) %>%
    arrange(Fixed, Variable, WTP)
}



# *************************************************************
#### Section 5: Output + tidy table ####
# *************************************************************


TableC2 <- tidy_compare_table(results)


## Pooled WTP
TableC2 %>% fwrite(sep = "#",
                    quote = FALSE,
                    paste0(here(
                      "OtherOutput/Tables/",
                      "TableC2_MWTestsWTP_Correlated.txt"
                    )))


# End Of Script   **********************************************************
