#* index_dates_vs_bmi:
#*   attr:
#*     fillcolor: '2'
#*   desc: Summary visualization patient index dates vs how many conditions they had
#*     pre vs post.
#*   ext: R
#*   inputs:
#*   - person_all_facts
#* 

library(dplyr)
library(ggplot2)
library(tidyr)
library(hexbin)

index_dates_vs_pre_post_count_ratio <- function( person_all_facts) {
    epoch_stats_by_person <- SparkR::collect(SparkR::select(person_all_facts, c("post_num_condition_eras", 
                                                                                "pre_num_condition_eras", 
                                                                                #"post_epoch_effective_days", 
                                                                                #"pre_epoch_effective_days", 
                                                                                "cohort", 
                                                                                "index_date"))) %>%
        mutate(post_num_condition_eras = as.numeric(post_num_condition_eras)) %>%
        mutate(pre_num_condition_eras = as.numeric(pre_num_condition_eras)) %>%
        #mutate(post_epoch_effective_days = as.numeric(post_epoch_effective_days)) %>%
        #mutate(pre_epoch_effective_days = as.numeric(pre_epoch_effective_days)) %>%
        mutate(pre_post_count_ratio = pre_num_condition_eras / post_num_condition_eras)

    str(epoch_stats_by_person)
    
    #epoch_stats_by_person <- epoch_stats_by_person %>% sample_frac(0.2)
    
    p <- ggplot(epoch_stats_by_person) +
        stat_summary_hex(aes(x = index_date, y = pre_post_count_ratio, z = index_date), fun = length, bins = 60) +
        #scale_x_continuous(trans = "log10") + 
        scale_y_continuous(trans = "log10") +
        #scale_fill_continuous(trans = "log10") +
        annotation_logticks(sides = "l") +
        facet_wrap(~ cohort) 
        #coord_equal()

    plot(p)

    # p2 <- ggplot(epoch_stats_by_person) +
    #     stat_summary_2d(aes(x = pre_epoch_effective_days, y = post_epoch_effective_days, z = increase_effective_days), fun = length, bins = 60) +
    #     scale_x_continuous(trans = "log10") + 
    #     scale_y_continuous(trans = "log10") +
    #     scale_fill_continuous(trans = "log10") +
    #     annotation_logticks(sides = "bl")
    # plot(p2)

    return(epoch_stats_by_person)
}