---
title: "
Visualization of patterns of missing data by Agustin Calatroni"
output:
html_document:
self_containded: TRUE
code_download: yes
toc: false
---
```{=html}
```
```{=html}
```
```{r setup, include=FALSE}
options(width = 200)
knitr::opts_chunk$set(echo = FALSE, eval = TRUE, warning = FALSE, message = FALSE, comment = NA, cache = FALSE)
```
```{r packages}
pacman::p_load(tidyverse)
```
```{r import-data}
d1 <- read.csv("https://raw.githubusercontent.com/VIS-SIG/Wonderful-Wednesdays/master/data/2021/2021-02-10/missing_data.csv") %>%
select(-contains(".bin.")) %>%
mutate(
trt = factor(trt, labels = c("A", "P")),
gender = factor(gender, labels = c("F", "M"))
) %>%
rowwise() %>%
mutate(na_num = sum(is.na(c_across(pain.0:pain.10)))) %>%
mutate(group = case_when(
na_num > 0 & trt == "P" ~ "P/m",
na_num > 0 & trt == "A" ~ "A/m",
na_num == 0 & trt == "P" ~ "P",
na_num == 0 & trt == "A" ~ "A",) %>%
factor(levels = c("A","A/m","P/m","P")))%>%
ungroup()
write.csv(d1, "ch_missing_data.csv")
```
```{r long-data}
d3 <- d1 %>%
rownames_to_column(var = 'id') %>%
pivot_longer(cols = starts_with("pain."),
values_to = 'pain') %>%
separate(name, c("name","time"), "\\.")%>%
mutate(time = factor(time, levels = c(0:10))) %>%
drop_na(pain)
```
```{r seriation}
pacman::p_load(seriation)
d1_na <- d1 %>%
mutate(across(contains("pain."), ~ is.na(.x) %>% as.numeric())) %>%
select(contains("pain.")) %>%
data.matrix()
set.seed(123)
o <- seriate(d1_na, method = "BEA", margin = 1, control = list(rep = 100, istart = 1, jstart = 1))
```
```{r data-reorder}
d2 <- d1 %>%
slice(get_order(o))
```
```{r patterns-missing, include=FALSE}
pacman::p_load(samon)
# 1. t the time-point
# 2. Number On Study
# 3. Number Observed
# 4. Number last seen at time t
# 5. Proportion last seen (of number on-study)
# 6. Proportion last seen (of number observed)
# 7. Number intermittent missing data
# 8. proportion intermittent missing (of number on-study)
# TRT:A
samonTabmat1(d2 %>% filter(trt == "A") %>% select(contains("pain."))) %>%
samonTable1()
# Check
p1_a <- samonDataCheck(d2 %>% filter(trt == "A") %>% select(contains("pain.")) %>% as.data.frame())
p2_a <- samonTabmat1(d2 %>% filter(trt == "A") %>% select(contains("pain.")))
p3_a <- data.frame(time = 0:10,
miss = 150 - p2_a[,3],
mon = c(sum(p1_a$missingPatterns == '___________'),
sum(p1_a$missingPatterns == '*__________'),
sum(p1_a$missingPatterns == '**_________'),
sum(p1_a$missingPatterns == '***________'),
sum(p1_a$missingPatterns == '****_______'),
sum(p1_a$missingPatterns == '*****______'),
sum(p1_a$missingPatterns == '******_____'),
sum(p1_a$missingPatterns == '*******____'),
sum(p1_a$missingPatterns == '********___'),
sum(p1_a$missingPatterns == '*********__'),
sum(p1_a$missingPatterns == '**********_')),
int = p2_a[,7])
# TRT:P
samonTabmat1(d2 %>% filter(trt == "P") %>% select(contains("pain."))) %>%
samonTable1()
# Check
p1_p <- samonDataCheck(d2 %>% filter(trt == "P") %>% select(contains("pain.")) %>% as.data.frame())
p2_p <- samonTabmat1(d2 %>% filter(trt == "P") %>% select(contains("pain.")))
p3_p <- data.frame(time = 0:10,
miss = 150 - p2_p[,3],
mon = c(sum(p1_p$missingPatterns == '___________'),
sum(p1_p$missingPatterns == '*__________'),
sum(p1_p$missingPatterns == '**_________'),
sum(p1_p$missingPatterns == '***________'),
sum(p1_p$missingPatterns == '****_______'),
sum(p1_p$missingPatterns == '*****______'),
sum(p1_p$missingPatterns == '******_____'),
sum(p1_p$missingPatterns == '*******____'),
sum(p1_p$missingPatterns == '********___'),
sum(p1_p$missingPatterns == '*********__'),
sum(p1_p$missingPatterns == '**********_')),
int = p2_p[,7])
```
```{r heatmap-create}
pacman::p_load(ComplexHeatmap)
pacman::p_load(circlize)
t_column_ha <- HeatmapAnnotation(
bar1 = anno_barplot(cbind(p3_a['miss'], p3_p['miss']),
height = unit(2, "cm"),
ylim = c(0, 100),
axis_param = list(at=c(25, 50, 100)),
gp = gpar(fill = c("#A6CEE3","#B2DF8A"),
col = c("#A6CEE3","#B2DF8A"))),
bar2 = anno_barplot(cbind(p3_a['mon'], p3_p['mon']),
height = unit(2, "cm"),
ylim = c(0,10),
gp = gpar(fill = c("#A6CEE3","#B2DF8A"),
col = c("#A6CEE3","#B2DF8A"))),
annotation_label = c("Missing\nOverall","Monotone"),
gap = unit(3, "points")
)
b_column_ha <- HeatmapAnnotation(
foo1 = d3 %>%
group_by(time, group) %>%
summarise(m = mean(pain)) %>%
ungroup() %>%
pivot_wider(names_from = group,
values_from = m) %>%
select(-time) %>%
anno_lines(
add_points = TRUE,
gp = gpar(col = c("#1F78B4","#A6CEE3","#B2DF8A","#33A02C")),
pt_gp = gpar(col = c("#1F78B4","#A6CEE3","#B2DF8A","#33A02C"))
),
annotation_name_side = "left",
annotation_label = c("Mean")
)
col_fun <- circlize::colorRamp2(breaks = c(0, 16, 24, 32, 64),
colors = RColorBrewer::brewer.pal(n = 9, name = "PuOr")[c(1,3,5,7,9)],
transparency = 0)
ha_list <- Heatmap(d2 %>%
select(contains("pain")) %>%
data.matrix(),
heatmap_legend_param = list(title = 'Pain',
at = c(0, 15, 24, 32, 64),
labels = c("0-Min", "15-Q1", "24-Q2/M", "32-Q3", "64-Max")),
col = col_fun,
na_col = "gray25",
column_labels = 0:10,
column_names_rot = 0,
column_gap = unit(2, "mm"),
cluster_columns = FALSE,
cluster_rows = FALSE,
cluster_row_slices = FALSE,
row_split = d2$group,
row_title = ' ',
row_title_gp = gpar(cex = 0.2),
top_annotation = t_column_ha,
bottom_annotation = b_column_ha
) +
Heatmap(d2 %>% pull(group),
name = "TRT",
col = c("#1F78B4","#A6CEE3","#B2DF8A","#33A02C"),
show_heatmap_legend = FALSE,
heatmap_legend_param = list(title = "TRT",
labels = c("Active","A w/Miss", "P w/Miss","Placebo")),
bottom_annotation = HeatmapAnnotation(ggplot = anno_empty(border = FALSE,
height = unit(3, "cm")))
) +
Heatmap(d2 %>% pull(bmi),
name = "BMI",
col = circlize::colorRamp2(breaks = c(18, 25, 27, 30, 35),
colors = RColorBrewer::brewer.pal(n = 9, name = "YlOrRd")[c(1,3,5,7,9)],
transparency = 0),
heatmap_legend_param = list(title = "\nBMI (kg/m2)",
at = c(18, 25, 27, 30, 35),
labels = c("18-Min", "25-Q1", "27-Q2", "30-Q3", "35-Max")),
bottom_annotation = HeatmapAnnotation(summary = anno_summary(gp = gpar(fill = c("#1F78B4","#A6CEE3","#B2DF8A","#33A02C"))))
) +
Heatmap(d2 %>% pull(age),
name = "Age",
col = circlize::colorRamp2(breaks = c(19, 37, 49, 62, 88),
colors = RColorBrewer::brewer.pal(n = 9, name = "YlOrRd")[c(1,3,5,7,9)],
transparency = 0),
heatmap_legend_param = list(title = "\nAge (yr)",
at = c(19, 37, 49, 62, 88),
labels = c("19-Min", "37-Q1", "49-Q2", "620-Q3", "88-Max")),
bottom_annotation = HeatmapAnnotation(summary = anno_summary(gp = gpar(fill = c("#1F78B4","#A6CEE3","#B2DF8A","#33A02C"))))
)+
Heatmap(d2 %>% pull(gender),
name = "Gender",
col = c("#FFCCFF","#CCFFFF"),
show_heatmap_legend = FALSE,
heatmap_legend_param = list(title = "Gender",
at = c("F","M"),
labels = c("Female","Male")),
bottom_annotation = HeatmapAnnotation(summary = anno_summary(
border = FALSE,
axis_param = list(side = 'right',
at = c(0, 0.25, 0.5, 0.75, 1),
labels = c('0','Female','.5','Male','1'))))
)
```
**Phase III clinical trial on *Psoriasis* comparing Active Treatment vs Placebo for outcome variable Pain** (collected on a visual analog scale [0-100]). The pain outcome variable was reordered and classified into four distinctive groups: Active Treatment without [ ]{style="background-color: #1F78B4"} with [ ]{style="background-color: #A6CEE3"} any missing data and Placebo without [ ]{style="background-color: #33A02C"} with [ ]{style="background-color: #B2DF8A"} any missing data. The longitudinal pain scores are visualized through a color-coded Heatmap where missing values are coded [ ]{style="background-color: #7f7f7f"}. Vertical and horizontal figures were added to adorn different aspects of the missing data. Vertical top annotations display the *overall* and *monotone* missing data patterns, while bottom annotations display the longitudinal *mean* results for each of the aforementioned groups. Right horizontal figures represent the distribution of covariates extended by the bottom summary display.
```{r heatmap-draw, fig.width = 14, fig.height = 9, dpi = 600}
draw(ha_list,
heatmap_legend_side = 'right',
gap = unit(c(2,6,6,5), 'mm'))
decorate_annotation("ggplot", {
vp <- current.viewport()$name
print(
ggplot(
data = d2,
aes(group, fill = group)) +
geom_bar() +
geom_text(stat='count', aes(label=..count..), vjust=1.2, size = 2.5) +
scale_fill_manual(values = c("#1F78B4","#A6CEE3","#B2DF8A","#33A02C")) +
labs(y = NULL)+
theme_void() +
theme(legend.position = "none"),
vp = vp
)
})
decorate_heatmap_body("TRT", {grid.text("Active")}, row_slice = 1)
decorate_heatmap_body("TRT", {grid.text("Active\nw/ Miss")}, row_slice = 2)
decorate_heatmap_body("TRT", {grid.text("Placebo\nw/ Miss")},row_slice = 3)
decorate_heatmap_body("TRT", {grid.text("Placebo")}, row_slice = 4)
```
```{r}
sessioninfo::session_info()%>%
details::details(
summary = 'Current session info',
open = FALSE
)
```
#### Links
[ComplexHeatmap](https://jokergoo.github.io/ComplexHeatmap-reference/book/ "https://jokergoo.github.io/ComplexHeatmap-reference/book/complexheatmap-cover.jpg")
[Package Reference](https://jokergoo.github.io/ComplexHeatmap/index.html "http://jokergoo.github.io/complexheatmap_logo.svg")
[seriation: Infrastructure for Ordering Objects Using Seriation](https://cran.r-project.org/web/packages/seriation/index.html)
[samon: Sensitivity Analysis for Missing Data](https://cran.r-project.org/web/packages/samon/index.html)