#* datapartner_topic_usage_entropy:
#*   attr:
#*     fillcolor: '3'
#*   desc: For each topic, computes the entropy of that topic from the distribution of
#*     usage over sites, normalized to 1.0 being max entropy (uniform) and 0.0 being
#*     min entropy (used only by one site).
#*   ext: R
#*   inputs:
#*   - topic_weight_totals_nodps
#* 

# this is the entropy of usage across data partners
# we normalize each topic to have a total sum weight of 1.0 and compute the entropy of that distribution
library(dplyr)
library(tidyr)

datapartner_topic_usage_entropy <- function(topic_weight_totals_nodps) {
    df <- topic_weight_totals_nodps

    compute_entropy <- function(sub_df) {
        p <- sub_df$topic_sum_weight / sum(sub_df$topic_sum_weight)

        entropy <- -1 * sum(p * log2(p))
        relative_entropy <- entropy / (-1 * log2(1/length(p))) # divide by max possible entropy
        return(data.frame(entropy = entropy, relative_entropy = relative_entropy))
    }

    df_by_topic <- group_by(df, topic_name)
    res <- do(df_by_topic, compute_entropy(.))
    return(res)
}