#* topic_data_partner_usage_nodps:
#*   attr:
#*     fillcolor: '3'
#*   desc: Plot relative topic usage across data partners with masked site IDs.
#*   ext: R
#*   inputs:
#*   - masked_topic_weight_totals_nodps
#*   - masked_data_partner_cdms
#* 

topic_data_partner_usage_nodps <- function( masked_topic_weight_totals_nodps, masked_data_partner_cdms) {
    ### Params
    #topic_correlations <- topic_correlations
    topic_weight_totals <- masked_topic_weight_totals_nodps
    print(head(topic_weight_totals))
    topic_distance_metric <- "Jensen-Shannon"
    order_topics_method <- "Topic Name/Usage" # Seriate, Topic Name/Usage
    order_data_partners_method <- "Seriate" # Seriate, Data Partner Name
    topic_name_font_size <- 4
    data_partner_name_font_size <- 4
    # seriation methods have some randomness - let's not, 'tis confusing
    set.seed(42)

    # make data partner id character
    topic_weight_totals$data_partner_id <- as.character(topic_weight_totals$data_partner_id)
    # we don't need topic_index
    topic_weight_totals <- select(topic_weight_totals, -topic_index)

    # topic_correlations <- select(topic_correlations, -topici_index, -topicj_index)
    # 
    # # compute distance 
    # if(topic_distance_metric == "Jensen-Shannon") {
    #     topic_correlations$metric <- 1 - sqrt(topic_correlations$js_div)
    # } else {
    #     topic_correlations$metric <- topic_correlations$corr
    # }

    # # perfect correlations only happen for self-compares, and blow the scale, so set them NA
    # topic_correlations$metric[topic_correlations$metric == 1] <- NA

    # # compute similarity matrix and set rownames
    # longish <- topic_correlations %>%
    #   select(topici_name, topicj_name, metric) %>%
    #   pivot_wider(names_from = topicj_name, values_from = metric) %>% 
    #   set_rownames(.$topici_name) %>%
    #   select(-topici_name)

    # mat <- as.matrix(longish)
    # colnames(mat) <- colnames(longish)
    # rownames(mat) <- rownames(longish)
    # mat <- mat[order(rownames(mat)), order(rownames(mat))]

    ave_weight_by_topic <- topic_weight_totals %>% 
      group_by(topic_name) %>% 
      summarize(topic_sum_weight = sum(topic_sum_weight)) %>%
      arrange(topic_name)

    
    # top_annotations <- HeatmapAnnotation(Usage = anno_barplot(ave_weight_by_topic$topic_sum_weight, 
    #                                                           border = FALSE, 
    #                                                           axis = FALSE),
    #                                      annotation_name_side = c(Usage = "left"), 
    #                                      annotation_name_gp = gpar(fontsize = 10, fontface = "bold"))

    # left_annotations <- rowAnnotation(Usage = anno_barplot(ave_weight_by_topic$topic_sum_weight, 
    #                                                        axis_param = list(direction = "reverse"), 
    #                                                        border = FALSE, 
    #                                                        axis = FALSE), 
    #                                                        show_annotation_name = c(Usage = FALSE))

    if(order_topics_method == "Seriate") {
        # see complexheatmap manual and http://nicolas.kruchten.com/content/2018/02/seriation/
        ordering <- seriate(dist(mat), method = "TSP")
        topic_ordering = names(ordering[[1]])
    } else  {
        topic_ordering = str_sort(ave_weight_by_topic$topic_name, numeric = TRUE) 
    } 

    
    # # spectral
    # metric_min <- min(topic_correlations$metric, na.rm = TRUE)
    # metric_max <- max(topic_correlations$metric, na.rm = TRUE)
    # col_fun <- colorRamp2(seq(metric_min, metric_max, length.out = 7), rev(brewer.pal(n = 7, name = "Spectral")))

    # map <- Heatmap(mat, 
    #                name = "Topic Similarity",
    #                col = col_fun, 
    #                column_title = "Topic Similarity", 
    #                rect_gp = gpar(col = "white", lwd = 1),
    #                column_names_gp = gpar(fontsize = topic_name_font_size),
    #                row_names_gp = gpar(fontsize = topic_name_font_size),
    #                row_names_side = "left",
    #                row_order = topic_ordering,
    #                column_order = topic_ordering,
    #                top_annotation = top_annotations,
    #                left_annotation = left_annotations, 
    #                height = unit(10, "cm"), 
    #                width = unit(10, "cm"))

    ### data partner breakouts
    norm_weight <- function(df) {
        df$normed_weight <- df$topic_sum_weight/sum(df$topic_sum_weight)
        return(df)
    }
    print(head(topic_weight_totals))

    data_partner_normed_long <- topic_weight_totals %>% 
      group_by(data_partner_id) %>%
      do(norm_weight(.)) %>%
      select(-topic_sum_weight)
      
    data_partner_normed_wide <- data_partner_normed_long %>%
      pivot_wider(names_from = data_partner_id, values_from = normed_weight) %>%
      as.data.frame() %>%
      set_rownames(., .$topic_name) %>%
      select(-topic_name)

    mat <- as.matrix(data_partner_normed_wide)
    rownames(mat) <- rownames(data_partner_normed_wide)
    colnames(mat) <- colnames(data_partner_normed_wide)
    mat <- mat[order(rownames(mat)), order(colnames(mat))]

    if(order_data_partners_method == "Seriate") {
        ordering <- seriate(mat, method = "BEA_TSP")
        dp_ordering <- names(ordering[[2]]) 
    } else {
        dp_ordering <- str_sort(colnames(mat), numeric = TRUE) 
    }    

    # for color mapping CDMs
    unique_cdms <- unique(masked_data_partner_cdms$cdm_name)
    cdm_colors <- brewer.pal(n = length(unique_cdms), name = "Set2")
    names(cdm_colors) <- unique_cdms

    # for ordering CDM annotation same as the matrix
    cdm_names <- masked_data_partner_cdms$cdm_name
    names(cdm_names) <- masked_data_partner_cdms$data_partner_id
    cdm_annotation = columnAnnotation(CDM = cdm_names[colnames(mat)], col = list(CDM = cdm_colors))

    ## This data has some outliers but we still want to see the bulk of range below 0.1, hence the weird color scale
    top_95th <- quantile(data_partner_normed_long$normed_weight, 0.95)
    top_100th <- max(data_partner_normed_long$normed_weight)
    col_fun_data_partner <- colorRamp2(c(0, top_95th, top_95th + 1e-7, top_100th), c("blue", "orange", "orange", "red"))

    row_labels <- rownames(mat)
    col_labels <- colnames(mat)
    column_names_rot <- 90

    row_topic_index <- as.integer(gsub("^T-(\\d+) .+$", "\\1", rownames(mat)))

    row_labels_fix <- paste0(rownames(mat), " \u2014") 
    row_labels <- row_labels_fix
    row_labels[row_topic_index %% 10 != 0] <- "\u2014"
    row_labels[row_topic_index %% 5 != 0 & 
            row_topic_index %% 10 != 0] <- "-"
    # always label #1, but use a short tic
    row_labels[which.min(row_topic_index)] <- paste0(rownames(mat), " -")[which.min(row_topic_index)]
    #row_labels[which.max(row_topic_index)] <- row_labels_fix[which.max(row_topic_index)]

    data_partner_map <- Heatmap(mat,
                   name = "Data Partner Usage",
                   column_title = "Data Partner Topic Usage", 
                   col = col_fun_data_partner, 
                   rect_gp = gpar(col = "white", lwd = 0.5),
                   row_labels = row_labels,
                   column_labels = col_labels,
                   column_order = dp_ordering,
                   column_names_gp = gpar(fontsize = 6),
                   row_names_gp = gpar(fontsize = 10),
                   row_names_side = "left",
                   row_order = rev(topic_ordering),
                   width = unit(12, "cm"),
                   height = unit(18, "cm"),
                   heatmap_legend_param = list(title = "Data Partner\n Relative Usage", 
                                               col_fun = col_fun_data_partner),
                   bottom_annotation = cdm_annotation
                )

    draw(data_partner_map, merge_legend = TRUE)
    
    # for plotting again later
    grob = grid.grabExpr(draw(data_partner_map)) 

    return(to_rdata(grob))
}

#################################################
## Global imports and functions included below ##
#################################################

library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)
library(magrittr)
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
library(seriation)

to_rdata <- function(data) {
    output <- new.output()
    output_fs <- output$fileSystem()
    saveRDS(data, output_fs$get_path("data.rds", 'w'))
}

from_rdata <- function(data) {
    fs <- data$fileSystem()
        
    path <- fs$get_path("data.rds", 'r')
    rds <- readRDS(path)
    return(rds)
}

int64fix <- function(df) {
    is_int64 <- function(x) {
        "integer64" %in% class(x)
    }
    needs_fix <- lapply(df, is_int64) %>% unlist()
    df[needs_fix] <- lapply(df[needs_fix], as.integer)
    return(df)
}

`%+%` <- function(a, b) {
    paste0(a, b)
}

# rotates a vector so the smallest entry is first
# was trying to use with seriation to get more repeatable orderings,
# but seriation isn't quite as wrap-around as I thought, handy function to
# keep somewhere though...
#rotate_to_first <- function(v) {
#  first <- str_sort(v, numeric = TRUE)[1]
#  first_index_adj <- which(v == first)[1] - 2
#  rotated <- v[(1:length(v) + first_index_adj) %% length(v) + 1]
#  return(rotated)
#}

first_n_words <- function(charvec, n = 6) {
    charvec %>%
    str_split(" +") %>% 
    lapply(function(x) {
        sub <- head(x, n); 
        ifelse(length(x) > length(sub), 
               paste(c(sub, "..."), collapse = " "),                    paste(sub, collapse = " "))
        }) %>% 
    unlist()
}