#* index_dates_vs_age: #* attr: #* fillcolor: '2' #* desc: Summary visualization patient index dates vs their age. #* ext: R #* inputs: #* - person_all_facts #* library(dplyr) library(ggplot2) library(tidyr) library(hexbin) index_dates_vs_age <- function( person_all_facts) { epoch_stats_by_person <- SparkR::collect(SparkR::select(person_all_facts, c("pre_first_date", "cci_score_quan", "age_at_index", "cohort", "index_date"))) %>% mutate(cci_score_quan = as.integer(cci_score_quan)) print(str(epoch_stats_by_person)) #epoch_stats_by_person <- epoch_stats_by_person %>% sample_frac(0.2) p <- ggplot(epoch_stats_by_person) + stat_summary_hex(aes(x = index_date, y = age_at_index, z = cci_score_quan), fun = length, bins = 60) + #scale_x_continuous(trans = "log10") + #scale_y_continuous(trans = "log10") + #scale_fill_continuous(trans = "log10") + #annotation_logticks(sides = "bl") + facet_wrap(~ cohort) #coord_equal() plot(p) # p2 <- ggplot(epoch_stats_by_person) + # stat_summary_2d(aes(x = pre_epoch_effective_days, y = post_epoch_effective_days, z = increase_effective_days), fun = length, bins = 60) + # scale_x_continuous(trans = "log10") + # scale_y_continuous(trans = "log10") + # scale_fill_continuous(trans = "log10") + # annotation_logticks(sides = "bl") # plot(p2) return(NULL) }