--* topic_names: --* attr: --* fillcolor: '3' --* desc: Create basic topic names by computing for each topic the percent usage by --* probability mass in the data (training and validation). --* ext: sql --* inputs: --* - person_topics_arrayify_long --* -- for each topic, compute the total probability mass -- assigned to it WITH topic_usage AS ( SELECT topic_index, sum(topic_prob) as topic_usage_relative FROM person_topics_arrayify_long GROUP BY topic_index) -- use the generated order and normalize probability -- to create a topic name SELECT topic_index, topic_usage_percent, concat('T-', topic_order, ' (', round(100.0 * topic_usage_percent, 1), '%)') as topic_name FROM ( -- normalize the probability masses across topics to 1.0 -- using window function, OVER () aggregates goes over -- all rows -- and also order the topics so we can rename -- them by order SELECT topic_index, topic_usage_relative / sum(topic_usage_relative) OVER () as topic_usage_percent, row_number() OVER (ORDER BY topic_usage_relative DESC) as topic_order FROM topic_usage)