#### AllSeasons Paper: Fig.B2 scale variance by season ####
## Author: Dr Peter King (p.king1@leeds.ac.uk)
## Last change: 16/10/2025
## Changes:
# - Completely rewrote setup to use tidyverse efficiently
# - misc cosmetic fixes including text sizes, line alpha
# - Adding backup code if you want to draw visit frequency facets


# *****************************************************************************
#### Section 0: Setting up ####
## Find session information in 00_ReplicatePaper.R
# *****************************************************************************


## Libraries here: ************************************************************
library(magrittr)
library(dplyr)
library(apollo)
library(reshape2)
library(ggplot2)
library(ggridges)
library(distributional)
library(ggdist)
library(gridExtra)
library(data.table)
library(here)
library(tidyverse)


# ****************************************************************
#### Section 1: Import Data ####
# ****************************************************************


## Output one row per choice format i.e., N=7163 * 9 tasks
Data <-
  here("CEInputData", "database_AllSeasons_Step1_Anonymised.csv") %>%
  fread() %>%
  data.frame()


## Reshape and isolate choices
AllSeasons <- Data %>%
  # ensure Task is integer 1..9
  mutate(Task = as.integer(as.character(Task))) %>%
  # sanity: keep respondent-level vars from the first row (should be identical)
  group_by(Respondent) %>%
  mutate(
    Season = first(Season),
    MostRecentVisit = first(MostRecentVisit)
  ) %>%
  ungroup() %>%
  # pivot: Task -> Choice1..Choice9
  pivot_wider(
    id_cols = c(Respondent, Season, MostRecentVisit),
    names_from = Task,
    values_from = Choice,
    names_prefix = "Choice"
  ) %>%
  # guarantee column order
  dplyr::select(Respondent, Season, MostRecentVisit, paste0("Choice", 1:9))


# ****************************************************************
#### Section 2: Reformat data to have values per choice per season  ####
# ****************************************************************


# ---- compute season totals dynamically ----
season_totals <- AllSeasons %>%
  as_tibble() %>%
  distinct(Respondent, Season) %>%
  dplyr::count(Season, name = "n_resp") %>%
  mutate(Total = n_resp * 9) %>%
  dplyr::select(Season, Total, n_resp)


# dynamically create legend labels: "Winter\n(n=1879)" etc.
season_labels <- season_totals %>%
  arrange(Season) %>%
  mutate(label = paste0(
    c("Winter", "Spring", "Summer", "Autumn"),
    "\n(n=", n_resp, ")"
  )) %>%
  pull(label)


## Setup plot data
PlotData2 <-

  AllSeasons %>%

  as_tibble() %>%

  pivot_longer(cols = starts_with("Choice"),
               names_to = "Task",
               values_to = "value") %>%

  mutate(value = as.integer(as.character(value))) %>%

  mutate(value = case_when(
    value %in% c(1L, 2L) ~ "Yes",
    value == 3L          ~ "No",
    TRUE ~ as.character(value)
  )) %>%

  # ensure value factor order: No then Yes
  mutate(value = factor(value, levels = c("No", "Yes"))) %>%

  group_by(Season, MostRecentVisit, value) %>%

  summarise(Freq = n(), .groups = "drop") %>%

  group_by(Season, MostRecentVisit) %>%            # within each season × visit combo

  mutate(
    Percentage = Freq / sum(Freq),
    # fraction of the two choices
    # approximate 95% CI on counts, then convert to percentage of season total
    YMIN_count = pmax(0, Freq - 1.96 * sqrt(Freq)),
    YMAX_count = Freq + 1.96 * sqrt(Freq),
    Y25_count  = pmax(0, Freq - (1.96 / 2) * sqrt(Freq)),
    Y75_count  = Freq + (1.96 / 2) * sqrt(Freq),
    YMIN = YMIN_count / sum(Freq),
    YMAX = YMAX_count / sum(Freq),
    Y25  = Y25_count  / sum(Freq),
    Y75  = Y75_count  / sum(Freq)
  ) %>%

  ungroup()


# preserve MostRecentVisit ordering as present in data (stable) or by frequency if you prefer:
# use existing order:
most_recent_levels <- PlotData2 %>% distinct(MostRecentVisit) %>% pull(MostRecentVisit)
# (optional) reorder by overall frequency descending: uncomment to use
# most_recent_levels <- PlotData2 %>% group_by(MostRecentVisit) %>% summarise(total = sum(Freq)) %>% arrange(desc(total)) %>% pull(MostRecentVisit)


PlotData2 <- PlotData2 %>%
  mutate(MostRecentVisit = factor(MostRecentVisit, levels = most_recent_levels))


# ************************************
## In case we want the details dropped
# ************************************


LegendLabels_Simple <- c(
  '0' = "I do not visit",
  '1' = "Once or twice a season",
  '2' = "Once or twice a month",
  '3' = "Once a week",
  '4' = "Several times a week",
  '5' = "Every day"
)



Facet_Labels_Simple <- c(
  '0' = "Winter",
  '1' = "Spring",
  '2' = "Summer",
  '3' = "Autumn"
)


# facet labels for seasons
season_names <- c("0" = "Winter", "1" = "Spring", "2" = "Summer", "3" = "Autumn")


# ****************************************************************
#### Section 3A: Plot Setup ####
# ****************************************************************


## Specify here once
TextSize <- 12


## Force all labels to look identical
TextSetup <- element_text(size = TextSize,
                          colour = "black",
                          family = "serif")


# ****************************************************************
#### Section 4: simple plot   ####
# ****************************************************************


FigureB2 <-

  PlotData2 %>%

  ggplot(aes(x = value, y = Percentage, colour = MostRecentVisit,
             group = interaction(MostRecentVisit, value))) +

  geom_pointrange(aes(ymin = YMIN, ymax = YMAX),
                position = position_dodge(width = 0.8),
                lineend = "square",
                size = 0.8,      # Controls the size of the central point
                linewidth = 1) + # Controls the thickness of the range line

  facet_wrap(~ Season,
             scales = "free_y",
             labeller = labeller(Season = season_names),
             nrow = 2) +

  theme_bw() +

  scale_y_continuous(
    name = "Percentage of Choices",
    labels = label_percent(accuracy = 1),
    breaks = pretty_breaks(n = 6)
  ) +

  scale_x_discrete(name = NULL, labels = c("No" = "No Change", "Yes" = "Option A or B")) +

  scale_colour_manual(
    name = "Visit Frequency",
    values = RColorBrewer::brewer.pal(9, "Blues")[c(9, 8, 7, 5, 3, 2)],
    labels = LegendLabels_Simple
  ) +

  geom_vline(xintercept = 1.5, colour = "grey80", linewidth = 0.5) +
  theme(
    legend.position = "bottom",
    legend.text = TextSetup,
    legend.title = TextSetup,
    axis.text.x = TextSetup,
    axis.text.y = TextSetup,
    axis.title.y = TextSetup,
    strip.background = element_rect(fill = "white"),
    strip.text = TextSetup,
    legend.background = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.y = element_blank()
  )


# ****************************************************************
#### Section 4.5: Export Plot ####
# ****************************************************************


ggsave(
  FigureB2,
  device = "png",
  filename = here("OtherOutput/Figures",
                  "H2_FigureB2_VisitVarianceBySeason.png"),
  width = 20,
  height = 15,
  units = "cm",
  dpi = 500
)


# END OF SCRIPT *******************************************


## Alternative plot facet by frequency + colour by season
# PlotData2 %>%
#
#   ggplot(aes(x = value, y = Percentage, colour = Season %>% as.factor(),
#              group = interaction(Season, value))) +
#
#   geom_pointrange(aes(ymin = YMIN, ymax = YMAX),
#                   position = position_dodge(width = 0.8),
#                   lineend = "square",
#                   size = 0.8,      # Controls the size of the central point
#                   linewidth = 1) + # Controls the thickness of the range line
#
#   facet_wrap(~ MostRecentVisit,
#              labeller = as_labeller(c(LegendLabels_Simple)),
#              nrow = 2) +
#
#   theme_bw() +
#   scale_y_continuous(
#     name = "Percentage of Choices",
#     labels = label_percent(accuracy = 1),
#     breaks = pretty_breaks(n = 6)
#   ) +
#
#   scale_x_discrete(name = NULL, labels = c("No" = "No Change", "Yes" = "Option A or B")) +
#
#   scale_colour_manual(
#     name = "Season",
#     values = brewer.pal(9, "Blues")[c(4, 6, 7, 9)],
#     guide = guide_legend(reverse = FALSE)
#   ) +
#   geom_vline(xintercept = 1.5, colour = "grey80", linewidth = 0.5) +
#   theme(
#     legend.position = "bottom",
#     legend.text = TextSetup,
#     legend.title = TextSetup,
#     axis.text.x = TextSetup,
#     axis.text.y = TextSetup,
#     axis.title.y = TextSetup,
#     strip.background = element_rect(fill = "white"),
#     strip.text = TextSetup,
#     legend.background = element_blank(),
#     panel.grid.major.x = element_blank(),
#     panel.grid.minor.x = element_blank(),
#     panel.grid.major.y = element_blank()
#   )

