#### AllSeasons: TableA1 ####
## Author: Dr Peter King (p.king1@leeds.ac.uk)
## Last change: 30/09/2025
## Changes:
# - Verifying that it reproduces as intext
# - Fixed income variable name


# *****************************************************************************
#### Section 0: Setting up ####
## Find session information in 00_ReplicatePaper.R
# *****************************************************************************


## Libraries here: ************************************************************
rm(list = ls())
library(tidyverse)
library(magrittr)
library(here)
library(data.table)
library(Rfast)
library(janitor)
library(rstatix)
library(DescTools)


# **************************************************************************
#### Section 1: Import Data  ####
# **************************************************************************


## Latest version:
Data <-
  here("SurveyData", "AllSeasons_dataframe_Step3_Anonymised.csv") %>%
  fread() %>%
  data.frame()



# **************************************************************************
#### Section 2: Transform Data  ####
# **************************************************************************


## New version with more categories
Data$DistanceCategories <- cut(
  Data$MilesDistance,
  breaks = c(-1, 1.609344, 4.02336, 11.265408, 23.335488, 32.18688),
  labels = c(1, 2, 3, 4, 5)
) %>% as.numeric()


## Add bracktes for reporting
Data$AgeLevels <-
  cut(
    Data$ExactAge,
    breaks = c(0, 29, 39, 49, 59, 69, 120),
    labels = c(1, 2, 3, 4, 5, 6)
  ) %>% as.numeric()



## collect the raw levels into brackets for ease of reporting
Data$IncomeBracket <- cut(
  Data$IncomeLevels_PlusMissing,
  breaks = c(-2, 20501, 26800, 51999, 52000),
  labels = c(1, 2, 3, 4)
) %>% as.numeric()


# To simplify analysis I just calculate england or not
## (scotland and wales bundled together here sorry guys)
Data$EnglandDummy <- ifelse(Data$Country == 0, 1, 0)



# **************************************************************************
#### Section 3: Setup functions  ####
# **************************************************************************


# Add MostRecentVisit to Variable vector and reorder alphabetically
Variable <- c(
  "AgeLevels",
  "Country",
  "DistanceCategories",
  "EthnicityDummyWhite",
  "Gender",
  "IncomeBracket",
  "Impairment",
  "ClassDummy",
  "Urbanicity",
  "MostRecentVisit"
)


# Create proper category labels mapping
CategoryLabels <- list(
  Gender = c("0" = "Men", "1" = "Women"),
  AgeLevels = c("1" = "18 – 29", "2" = "30 – 39", "3" = "40 – 49",
                "4" = "50 – 59", "5" = "60 – 69", "6" = "70+"),
  EthnicityDummyWhite = c("0" = "Non-white", "1" = "White"),
  IncomeBracket = c("1" = "< £20,500", "2" = "£20,501 to £26,800",
                    "3" = "£26,801 to £54,000", "4" = "£54,000+"),
  Country = c("0" = "England", "1" = "Wales", "2" = "Scotland"),
  Urbanicity = c("0" = "Rural", "1" = "Urban"),
  ClassDummy = c("0" = "ABC1", "1" = "C2DE"),
  Impairment = c("0" = "None", "1" = "Any"),
  DistanceCategories = c("1" = "<1.6 km", "2" = "1.6 - 4km", "3" = "4.1 - 15km",
                         "4" = "16 - 31km", "5" = "32km+"),
  MostRecentVisit = c("0" = "Did not visit", "1" = "Once or twice a season",
                      "2" = "Once or twice a month", "3" = "Once a week",
                      "4" = "Several times a week", "5" = "Every day")
)


# Variable name labels for table
VariableNames <- c(
  AgeLevels = "Age (years)",
  Country = "Country of residence",
  DistanceCategories = "Distance to nearby forest",
  EthnicityDummyWhite = "Ethnicity",
  Gender = "Gender",
  IncomeBracket = "Household income (gross annual)",
  Impairment = "Sensory impairment",
  ClassDummy = "Social grade",
  Urbanicity = "Urbanicity",
  MostRecentVisit = "Visit frequency to nearby forest in the season of sampling"
)


# Modified function to extract just G-statistic and add stars
AddStarsToStat <- function(test_string) {
  # Extract statistic and p-value from format "stat (pval stars)"
  stat <- as.numeric(sub(" \\(.*", "", test_string))
  pval <- as.numeric(sub(".*\\(([0-9.]+).*", "\\1", test_string))

  stars <- ifelse(pval < 0.01, "***",
                  ifelse(pval < 0.05, "**",
                         ifelse(pval < 0.1, "*", "")))

  paste0(round(stat, 2), stars)
}


## Custom function that adds significance stars
AddStars <- function(Data) {
  ifelse(Data < 0.01,
         paste0("***"),
         ifelse(Data < 0.05,
                paste0("**"),
                ifelse(Data < 0.1,
                       paste0("*"),
                       paste0(" "))))
}


# **************************************************************************
#### Section 4: Function  ####
# **************************************************************************


# Function to get observed counts, proportions, and chi-square results
perform_GT_with_descriptives_perseason <- function(data,
                                                   comparison,
                                                   var1) {
  # Create contingency table with counts and proportions
  TestInput_Count <- data %>%
    janitor::tabyl(!!sym(var1), !!sym("Season"))

  TestInput_P <- TestInput_Count %>%
    adorn_percentages(denominator = "col") %>%
    dplyr::select("0")


  ## Format nicely
  Output_Summary <- TestInput_Count %>%
    adorn_percentages(denominator = "col") %>%
    adorn_pct_formatting(digits = 2) %>%
    adorn_ns(position = "front") %>%
    data.frame()

  TestData <-  TestInput_Count %>%
    DescTools::GTest(p = TestInput_P,
                     rescale.p = TRUE
    )

  TestData_Format <- paste0(TestData$statistic %>% round(3),
                            " (",
                            TestData$p.value %>% round(3),
                            " ",
                            TestData$p.value %>% AddStars(),
                            ")")

  ## Add variable name
  Output_Summary_S1 <- cbind(
    "Variable" = var1,
    "Season" = comparison,
    Output_Summary,
    "GT" = TestData_Format
  )


  return(Output_Summary_S1)
}



# **************************************************************************
#### Section 5: Apply function ####
# **************************************************************************


# Run the existing loop (keep as is)
results_list_variables <- list()
for (V in Variable) {
  results_temp_season <- list()
  for (i in 1:3) {
    result <- perform_GT_with_descriptives_perseason(
      data = Data %>% dplyr::filter(Season %in% c(0, i)),
      comparison = i,
      var1 = V
    )
    colnames(result) <- c("Variable", "Comparison", "Category", "Winter", "Other", "GT")
    results_temp_season[[i]] <- result
  }
  results_list_variables[[V]] <- do.call(bind_rows, results_temp_season)
}


## Run here
Table_GTest_Results <- bind_rows(results_list_variables, .id = "Variable") %>%
  data.frame()



# make a tibble and detect which Variable column exists
tbl <- Table_GTest_Results %>% tibble::as_tibble()

var_col <- if ("Variable.1" %in% names(tbl)) {
  "Variable.1"
} else if ("Variable" %in% names(tbl)) {
  "Variable"
} else {
  stop("No 'Variable' column found in Table_GTest_Results")
}

# clean GT and create Category_Label robustly
tbl <- tbl %>%
  mutate(GT_Clean = sapply(GT, AddStarsToStat))

tbl$Category_Label <- mapply(function(var, cat) {
  # ensure cat is character for lookup
  cat_chr <- as.character(cat)
  if (var %in% names(CategoryLabels)) {
    lbls <- CategoryLabels[[var]]
    if (cat_chr %in% names(lbls)) lbls[[cat_chr]] else cat_chr
  } else {
    cat_chr
  }
}, tbl[[var_col]], tbl$Category, USE.NAMES = FALSE)

# select and pivot using the detected var_col
out_table <- tbl %>%
  select(
    Variable = !!rlang::sym(var_col),
    Category = Category_Label,
    Winter, Other, GT_Clean, Comparison
  ) %>%
  pivot_wider(
    names_from = Comparison,
    values_from = c(Other, GT_Clean),
    names_sep = "_"
  ) %>%
  rename(
    Spring = Other_1,
    Summer = Other_2,
    Autumn = Other_3
  )

# show result
TableA1_Final <- out_table


# **************************************************************************
#### Section 6: Tidy table  ####
# **************************************************************************


# Add G-test rows
TableA1_WithTests <- TableA1_Final %>%
  group_by(Variable) %>%
  group_modify(~ {
    bind_rows(
      .x,
      data.frame(
        Category = "G test statistic vs Winter",
        Winter = "",
        Spring = unique(.x$GT_Clean_1)[1],
        Summer = unique(.x$GT_Clean_2)[1],
        Autumn = unique(.x$GT_Clean_3)[1]
      )
    )
  }) %>%
  ungroup() %>%
  dplyr::select(Variable, Category, Winter, Spring, Summer, Autumn) %>%
  mutate(
    Variable = ifelse(duplicated(Variable), "", VariableNames[Variable])
  )


# Add column headers with n
colnames(TableA1_WithTests) <- c(
  "Variable",
  "Category",
  "Winter (n=1879)",
  "Spring (n=1769)",
  "Summer (n=1767)",
  "Autumn (n=1748)"
)

TableA1_WithTests



# **************************************************************************
#### Section 7: Export  ####
# **************************************************************************


TableA1_WithTests %>% fwrite(sep = "#",
                       quote = FALSE,
                       file = paste0(here(
                         "OtherOutput/Tables",
                         "TableA1_GTests_VariablesVsWinter.txt"
                       )))

