--- title: "

Visualization of patterns of missing data by Agustin Calatroni" output: html_document: self_containded: TRUE code_download: yes toc: false --- ```{=html} ``` ```{=html} ``` ```{r setup, include=FALSE} options(width = 200) knitr::opts_chunk$set(echo = FALSE, eval = TRUE, warning = FALSE, message = FALSE, comment = NA, cache = FALSE) ``` ```{r packages} pacman::p_load(tidyverse) ``` ```{r import-data} d1 <- read.csv("https://raw.githubusercontent.com/VIS-SIG/Wonderful-Wednesdays/master/data/2021/2021-02-10/missing_data.csv") %>% select(-contains(".bin.")) %>% mutate( trt = factor(trt, labels = c("A", "P")), gender = factor(gender, labels = c("F", "M")) ) %>% rowwise() %>% mutate(na_num = sum(is.na(c_across(pain.0:pain.10)))) %>% mutate(group = case_when( na_num > 0 & trt == "P" ~ "P/m", na_num > 0 & trt == "A" ~ "A/m", na_num == 0 & trt == "P" ~ "P", na_num == 0 & trt == "A" ~ "A",) %>% factor(levels = c("A","A/m","P/m","P")))%>% ungroup() write.csv(d1, "ch_missing_data.csv") ``` ```{r long-data} d3 <- d1 %>% rownames_to_column(var = 'id') %>% pivot_longer(cols = starts_with("pain."), values_to = 'pain') %>% separate(name, c("name","time"), "\\.")%>% mutate(time = factor(time, levels = c(0:10))) %>% drop_na(pain) ``` ```{r seriation} pacman::p_load(seriation) d1_na <- d1 %>% mutate(across(contains("pain."), ~ is.na(.x) %>% as.numeric())) %>% select(contains("pain.")) %>% data.matrix() set.seed(123) o <- seriate(d1_na, method = "BEA", margin = 1, control = list(rep = 100, istart = 1, jstart = 1)) ``` ```{r data-reorder} d2 <- d1 %>% slice(get_order(o)) ``` ```{r patterns-missing, include=FALSE} pacman::p_load(samon) # 1. t the time-point # 2. Number On Study # 3. Number Observed # 4. Number last seen at time t # 5. Proportion last seen (of number on-study) # 6. Proportion last seen (of number observed) # 7. Number intermittent missing data # 8. proportion intermittent missing (of number on-study) # TRT:A samonTabmat1(d2 %>% filter(trt == "A") %>% select(contains("pain."))) %>% samonTable1() # Check p1_a <- samonDataCheck(d2 %>% filter(trt == "A") %>% select(contains("pain.")) %>% as.data.frame()) p2_a <- samonTabmat1(d2 %>% filter(trt == "A") %>% select(contains("pain."))) p3_a <- data.frame(time = 0:10, miss = 150 - p2_a[,3], mon = c(sum(p1_a$missingPatterns == '___________'), sum(p1_a$missingPatterns == '*__________'), sum(p1_a$missingPatterns == '**_________'), sum(p1_a$missingPatterns == '***________'), sum(p1_a$missingPatterns == '****_______'), sum(p1_a$missingPatterns == '*****______'), sum(p1_a$missingPatterns == '******_____'), sum(p1_a$missingPatterns == '*******____'), sum(p1_a$missingPatterns == '********___'), sum(p1_a$missingPatterns == '*********__'), sum(p1_a$missingPatterns == '**********_')), int = p2_a[,7]) # TRT:P samonTabmat1(d2 %>% filter(trt == "P") %>% select(contains("pain."))) %>% samonTable1() # Check p1_p <- samonDataCheck(d2 %>% filter(trt == "P") %>% select(contains("pain.")) %>% as.data.frame()) p2_p <- samonTabmat1(d2 %>% filter(trt == "P") %>% select(contains("pain."))) p3_p <- data.frame(time = 0:10, miss = 150 - p2_p[,3], mon = c(sum(p1_p$missingPatterns == '___________'), sum(p1_p$missingPatterns == '*__________'), sum(p1_p$missingPatterns == '**_________'), sum(p1_p$missingPatterns == '***________'), sum(p1_p$missingPatterns == '****_______'), sum(p1_p$missingPatterns == '*****______'), sum(p1_p$missingPatterns == '******_____'), sum(p1_p$missingPatterns == '*******____'), sum(p1_p$missingPatterns == '********___'), sum(p1_p$missingPatterns == '*********__'), sum(p1_p$missingPatterns == '**********_')), int = p2_p[,7]) ``` ```{r heatmap-create} pacman::p_load(ComplexHeatmap) pacman::p_load(circlize) t_column_ha <- HeatmapAnnotation( bar1 = anno_barplot(cbind(p3_a['miss'], p3_p['miss']), height = unit(2, "cm"), ylim = c(0, 100), axis_param = list(at=c(25, 50, 100)), gp = gpar(fill = c("#A6CEE3","#B2DF8A"), col = c("#A6CEE3","#B2DF8A"))), bar2 = anno_barplot(cbind(p3_a['mon'], p3_p['mon']), height = unit(2, "cm"), ylim = c(0,10), gp = gpar(fill = c("#A6CEE3","#B2DF8A"), col = c("#A6CEE3","#B2DF8A"))), annotation_label = c("Missing\nOverall","Monotone"), gap = unit(3, "points") ) b_column_ha <- HeatmapAnnotation( foo1 = d3 %>% group_by(time, group) %>% summarise(m = mean(pain)) %>% ungroup() %>% pivot_wider(names_from = group, values_from = m) %>% select(-time) %>% anno_lines( add_points = TRUE, gp = gpar(col = c("#1F78B4","#A6CEE3","#B2DF8A","#33A02C")), pt_gp = gpar(col = c("#1F78B4","#A6CEE3","#B2DF8A","#33A02C")) ), annotation_name_side = "left", annotation_label = c("Mean") ) col_fun <- circlize::colorRamp2(breaks = c(0, 16, 24, 32, 64), colors = RColorBrewer::brewer.pal(n = 9, name = "PuOr")[c(1,3,5,7,9)], transparency = 0) ha_list <- Heatmap(d2 %>% select(contains("pain")) %>% data.matrix(), heatmap_legend_param = list(title = 'Pain', at = c(0, 15, 24, 32, 64), labels = c("0-Min", "15-Q1", "24-Q2/M", "32-Q3", "64-Max")), col = col_fun, na_col = "gray25", column_labels = 0:10, column_names_rot = 0, column_gap = unit(2, "mm"), cluster_columns = FALSE, cluster_rows = FALSE, cluster_row_slices = FALSE, row_split = d2$group, row_title = ' ', row_title_gp = gpar(cex = 0.2), top_annotation = t_column_ha, bottom_annotation = b_column_ha ) + Heatmap(d2 %>% pull(group), name = "TRT", col = c("#1F78B4","#A6CEE3","#B2DF8A","#33A02C"), show_heatmap_legend = FALSE, heatmap_legend_param = list(title = "TRT", labels = c("Active","A w/Miss", "P w/Miss","Placebo")), bottom_annotation = HeatmapAnnotation(ggplot = anno_empty(border = FALSE, height = unit(3, "cm"))) ) + Heatmap(d2 %>% pull(bmi), name = "BMI", col = circlize::colorRamp2(breaks = c(18, 25, 27, 30, 35), colors = RColorBrewer::brewer.pal(n = 9, name = "YlOrRd")[c(1,3,5,7,9)], transparency = 0), heatmap_legend_param = list(title = "\nBMI (kg/m2)", at = c(18, 25, 27, 30, 35), labels = c("18-Min", "25-Q1", "27-Q2", "30-Q3", "35-Max")), bottom_annotation = HeatmapAnnotation(summary = anno_summary(gp = gpar(fill = c("#1F78B4","#A6CEE3","#B2DF8A","#33A02C")))) ) + Heatmap(d2 %>% pull(age), name = "Age", col = circlize::colorRamp2(breaks = c(19, 37, 49, 62, 88), colors = RColorBrewer::brewer.pal(n = 9, name = "YlOrRd")[c(1,3,5,7,9)], transparency = 0), heatmap_legend_param = list(title = "\nAge (yr)", at = c(19, 37, 49, 62, 88), labels = c("19-Min", "37-Q1", "49-Q2", "620-Q3", "88-Max")), bottom_annotation = HeatmapAnnotation(summary = anno_summary(gp = gpar(fill = c("#1F78B4","#A6CEE3","#B2DF8A","#33A02C")))) )+ Heatmap(d2 %>% pull(gender), name = "Gender", col = c("#FFCCFF","#CCFFFF"), show_heatmap_legend = FALSE, heatmap_legend_param = list(title = "Gender", at = c("F","M"), labels = c("Female","Male")), bottom_annotation = HeatmapAnnotation(summary = anno_summary( border = FALSE, axis_param = list(side = 'right', at = c(0, 0.25, 0.5, 0.75, 1), labels = c('0','Female','.5','Male','1')))) ) ```
**Phase III clinical trial on *Psoriasis* comparing Active Treatment vs Placebo for outcome variable Pain** (collected on a visual analog scale [0-100]). The pain outcome variable was reordered and classified into four distinctive groups: Active Treatment without [   ]{style="background-color: #1F78B4"} with [   ]{style="background-color: #A6CEE3"} any missing data and Placebo without [   ]{style="background-color: #33A02C"} with [   ]{style="background-color: #B2DF8A"} any missing data. The longitudinal pain scores are visualized through a color-coded Heatmap where missing values are coded [   ]{style="background-color: #7f7f7f"}. Vertical and horizontal figures were added to adorn different aspects of the missing data. Vertical top annotations display the *overall* and *monotone* missing data patterns, while bottom annotations display the longitudinal *mean* results for each of the aforementioned groups. Right horizontal figures represent the distribution of covariates extended by the bottom summary display.

```{r heatmap-draw, fig.width = 14, fig.height = 9, dpi = 600} draw(ha_list, heatmap_legend_side = 'right', gap = unit(c(2,6,6,5), 'mm')) decorate_annotation("ggplot", { vp <- current.viewport()$name print( ggplot( data = d2, aes(group, fill = group)) + geom_bar() + geom_text(stat='count', aes(label=..count..), vjust=1.2, size = 2.5) + scale_fill_manual(values = c("#1F78B4","#A6CEE3","#B2DF8A","#33A02C")) + labs(y = NULL)+ theme_void() + theme(legend.position = "none"), vp = vp ) }) decorate_heatmap_body("TRT", {grid.text("Active")}, row_slice = 1) decorate_heatmap_body("TRT", {grid.text("Active\nw/ Miss")}, row_slice = 2) decorate_heatmap_body("TRT", {grid.text("Placebo\nw/ Miss")},row_slice = 3) decorate_heatmap_body("TRT", {grid.text("Placebo")}, row_slice = 4) ``` ```{r} sessioninfo::session_info()%>% details::details( summary = 'Current session info', open = FALSE ) ``` #### Links [ComplexHeatmap](https://jokergoo.github.io/ComplexHeatmap-reference/book/ "https://jokergoo.github.io/ComplexHeatmap-reference/book/complexheatmap-cover.jpg") [Package Reference](https://jokergoo.github.io/ComplexHeatmap/index.html "http://jokergoo.github.io/complexheatmap_logo.svg") [seriation: Infrastructure for Ordering Objects Using Seriation](https://cran.r-project.org/web/packages/seriation/index.html) [samon: Sensitivity Analysis for Missing Data](https://cran.r-project.org/web/packages/samon/index.html)