#* jensen_shannon:
#*   attr:
#*     fillcolor: '3'
#*   desc: Plot jensen-shannon heatmap.
#*   ext: R
#*   inputs:
#*   - topic_weight_totals
#*   - topic_correlations
#* 

jensen_shannon <- function(topic_correlations, topic_weight_totals) {
    ### Params
    topic_correlations <- topic_correlations
    topic_weight_totals <- topic_weight_totals
    topic_distance_metric <- "Jensen-Shannon"
    order_topics_method <- "Topic Name/Usage" # Seriate, Topic Name/Usage
    order_data_partners_method <- "Seriate" # Seriate, Data Partner Name
    topic_name_font_size <- 10
    data_partner_name_font_size <- 10
    # seriation methods have some randomness - let's not, 'tis confusing
    set.seed(42)

    # make data partner id character
    topic_weight_totals$data_partner_id <- as.character(topic_weight_totals$data_partner_id)
    # we don't need topic_index
    topic_weight_totals <- select(topic_weight_totals, -topic_index)
    topic_correlations <- select(topic_correlations, -topici_index, -topicj_index)

    # compute distance 
    if(topic_distance_metric == "Jensen-Shannon") {
        topic_correlations$metric <- sqrt(topic_correlations$js_div)
    } else {
        topic_correlations$metric <- topic_correlations$corr
    }

    topic_correlations$metric <- ifelse(topic_correlations$metric == 0, NA, topic_correlations$metric)

    # perfect correlations only happen for self-compares, and blow the scale, so set them NA
    topic_correlations$metric[topic_correlations$metric == 1] <- NA

    # compute similarity matrix and set rownames
    longish <- topic_correlations %>%
      select(topici_name, topicj_name, metric) %>%
      pivot_wider(names_from = topicj_name, values_from = metric) %>% 
      set_rownames(.$topici_name) %>%
      select(-topici_name)

    mat <- as.matrix(longish)
    colnames(mat) <- colnames(longish)
    rownames(mat) <- rownames(longish)
    mat <- mat[order(rownames(mat)), order(rownames(mat))]

    ave_weight_by_topic <- topic_weight_totals %>% 
      group_by(topic_name) %>% 
      summarize(topic_sum_weight = sum(topic_sum_weight)) %>%
      arrange(topic_name)

    
    top_annotations <- HeatmapAnnotation(Usage = anno_barplot(ave_weight_by_topic$topic_sum_weight, 
                                                              border = FALSE, 
                                                              axis = FALSE),
                                         annotation_name_side = c(Usage = "left"), 
                                         annotation_name_gp = gpar(fontsize = 10, fontface = "bold"))

    left_annotations <- rowAnnotation(Usage = anno_barplot(ave_weight_by_topic$topic_sum_weight, 
                                                           axis_param = list(direction = "reverse"), 
                                                           border = FALSE, 
                                                           axis = FALSE), 
                                                           show_annotation_name = c(Usage = FALSE))

    if(order_topics_method == "Seriate") {
        # see complexheatmap manual and http://nicolas.kruchten.com/content/2018/02/seriation/
        ordering <- seriate(dist(mat), method = "TSP")
        topic_ordering = names(ordering[[1]])
    } else  {
        topic_ordering = str_sort(ave_weight_by_topic$topic_name, numeric = TRUE) 
    } 

    
    # spectral
    metric_min <- min(topic_correlations$metric, na.rm = TRUE)
    metric_max <- max(topic_correlations$metric, na.rm = TRUE)
    col_fun <- colorRamp2(seq(metric_min, metric_max, length.out = 7), brewer.pal(n = 7, name = "Spectral"))

    row_labels <- rownames(mat)
    col_labels <- colnames(mat)
    column_names_rot <- 90

        row_topic_index <- as.integer(gsub("^T-(\\d+) .+$", "\\1", rownames(mat)))

        row_labels_fix <- paste0(rownames(mat), " \u2014") 
        row_labels <- row_labels_fix
        row_labels[row_topic_index %% 10 != 0] <- "\u2014"
        row_labels[row_topic_index %% 5 != 0 & 
                row_topic_index %% 10 != 0] <- "-"
        # always label #1, but use a short tic
        row_labels[which.min(row_topic_index)] <- paste0(rownames(mat), " -")[which.min(row_topic_index)]
        #row_labels[which.max(row_topic_index)] <- row_labels_fix[which.max(row_topic_index)]

        if(grepl("T-\\d+", colnames(mat)[1])) {
          col_topic_index <- as.integer(gsub("^T-(\\d+) .+$", "\\1", colnames(mat)))
        } else {
          col_topic_index <- as.integer(colnames(mat))
        }

        col_labels_fix <- paste0(colnames(mat), " \u2014") 
        col_labels <- col_labels_fix
        col_labels[col_topic_index %% 10 != 0] <- "\u2014"
        col_labels[col_topic_index %% 5 != 0 & 
                col_topic_index %% 10 != 0] <- "-"
        # always label #1, but use a short tic
        col_labels[which.min(col_topic_index)] <- paste0(colnames(mat), " -")[which.min(col_topic_index)]
        #col_labels[which.max(col_topic_index)] <- col_labels_fix[which.max(col_topic_index)]

    map <- Heatmap(mat, 
                   name = "Jensen-Shannon\nDistance",
                   col = col_fun, 
                   #column_title = "Jensen-Shannon\nDivergence", 
                   rect_gp = gpar(col = "white", lwd = .2),
                   column_names_gp = gpar(fontsize = topic_name_font_size),
                   row_names_gp = gpar(fontsize = topic_name_font_size),
                   row_names_side = "left",
                   row_order = rev(topic_ordering),
                   row_labels = row_labels,
                   column_labels = col_labels,
                   column_order = topic_ordering,
                   #top_annotation = top_annotations,
                   #left_annotation = left_annotations, 
                   height = unit(18, "cm"), 
                   width = unit(18, "cm"))

    print(mean(mat, na.rm = T))
    print(median(mat, na.rm = T))
    print(range(mat, na.rm = T))
    print(median(mat[290:300, 290:300], na.rm = T))
    print(mat[290:300, 290:300])

    draw(map)

    # for plotting again later
    #grob = grid.grabExpr(draw(map)) 
    #return(to_rdata(grob))
    return(NULL)

}

#################################################
## Global imports and functions included below ##
#################################################

library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)
library(magrittr)
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
library(seriation)

to_rdata <- function(data) {
    output <- new.output()
    output_fs <- output$fileSystem()
    saveRDS(data, file = output_fs$get_path("data.rds", 'w'))
}

from_rdata <- function(data) {
    fs <- data$fileSystem()
        
    path <- fs$get_path("data.rds", 'r')
    rds <- readRDS(path)
    return(rds)
}

int64fix <- function(df) {
    is_int64 <- function(x) {
        "integer64" %in% class(x)
    }
    needs_fix <- lapply(df, is_int64) %>% unlist()
    df[needs_fix] <- lapply(df[needs_fix], as.integer)
    return(df)
}

`%+%` <- function(a, b) {
    paste0(a, b)
}

# rotates a vector so the smallest entry is first
# was trying to use with seriation to get more repeatable orderings,
# but seriation isn't quite as wrap-around as I thought, handy function to
# keep somewhere though...
#rotate_to_first <- function(v) {
#  first <- str_sort(v, numeric = TRUE)[1]
#  first_index_adj <- which(v == first)[1] - 2
#  rotated <- v[(1:length(v) + first_index_adj) %% length(v) + 1]
#  return(rotated)
#}

first_n_words <- function(charvec, n = 6) {
    charvec %>%
    str_split(" +") %>% 
    lapply(function(x) {
        sub <- head(x, n); 
        ifelse(length(x) > length(sub), 
               paste(c(sub, "..."), collapse = " "),                    paste(sub, collapse = " "))
        }) %>% 
    unlist()
}