Produce data visualisation(s) providing insights into how microbiome profiles change over time and what the differences are in microbiome profiles between Pre-Term Birth and Term Birth.
Challenge:
Produce data visualisation(s) providing insights into if the vaginal microbiome profiles can be associated with Pre-Term Birth.
A description of the challenge can be found here.
A recording of the session can be found here.
Accessibility check:
The app can be found here
The pptx file can be found here.
library(readr)
library(tidyr)
library(tidyverse)
library(ggplot2)
library(cowplot)
library(RColorBrewer)
library(paletteer)
library(colorspace)
################################################
# Combine abundance, taxonomy and clinical data
clin1 <- read_csv2("./Data/Metadata.csv") %>%
arrange(subject_id, visit_number)
tax <- read_csv2("./Data/Taxonomy_info.csv")
abun1 <- read_csv2("./Data/Relative_abundance.csv") %>%
gather(key = "sample_id", value = "Value", -taxID) %>%
group_by(taxID, sample_id) %>%
filter(Value>0)
abun3 <- left_join(clin1, abun1, by = "sample_id")
abun4 <- left_join(tax, abun3, by = "taxID") %>%
arrange(subject_id, visit_number, taxID)
################################################
# Identify most prevalent taxIDs and subset data
num_gen <- 6
tax_prev <- abun4 %>%
group_by(genus) %>%
summarise(mn = mean(Value)) %>%
arrange(desc(mn)) %>%
select(genus)
tax_prev2 <- tax_prev[1:num_gen,] %>%
as_vector()
abun5 <- abun4 %>%
filter(genus %in% tax_prev2)
################################################
# Groups subjects by most prevalent genus
overall_most.prev<- abun5 %>%
group_by(genus) %>%
summarise(sum = sum(Value)) %>%
arrange(desc(sum)) %>%
select(genus) %>%
mutate(sort.order = row_number())
################################################
# Create plots
myColors <- brewer.pal(num_gen,"Set1")
names(myColors) <- levels(overall_most.prev$genus)
make.plot <- function(gp, vs) {
tmp <- abun5 %>%
filter(group == gp & visit_number == vs) %>%
group_by(subject_id, genus) %>%
summarise(sum = sum(Value)) %>%
arrange(subject_id, desc(sum))
# select(subject_id, genus, Value)
sub_most.prev <- tmp %>%
group_by(subject_id, genus) %>%
arrange(subject_id, desc(sum)) %>%
group_by(subject_id) %>%
slice(1) %>%
select(subject_id, genus)
sub_most.prev2 <- left_join(sub_most.prev, overall_most.prev, by = "genus") %>%
ungroup() %>%
select(subject_id, sort.order)
tmp2 <- left_join(tmp, sub_most.prev2, by = "subject_id")
subID <- tmp2 %>%
group_by(sort.order, subject_id) %>%
distinct(subject_id) %>%
arrange(sort.order) %>%
ungroup() %>%
mutate(new.subjid = row_number()) %>%
select(subject_id, new.subjid)
tmp3 <- left_join(tmp2, subID, by = "subject_id")
ggplot(tmp3, aes(fill=genus, y=sum, x=new.subjid)) +
geom_bar(show.legend = TRUE,
position="fill", stat="identity", width=1, color="black", linewidth=0.1) +
coord_polar(theta="x") +
scale_fill_manual(name = "genus", values = myColors) +
scale_y_continuous(limits=c(-0.2, 1)) +
theme(
panel.background=element_rect(fill="white"),
axis.text=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
legend.text=element_text(size=14),
legend.title=element_text(size=14)
) +
annotate("text", label = str_c("Visit ", vs), x = 0, y = -0.2, color = "black", size=5)
}
p1 <- make.plot("Preterm delivery", 1)
p2 <- make.plot("Preterm delivery", 2)
p3 <- make.plot("Preterm delivery", 3)
p4 <- make.plot("Preterm delivery", 4)
p5 <- make.plot("Term delivery", 1)
p6 <- make.plot("Term delivery", 2)
p7 <- make.plot("Term delivery", 3)
p8 <- make.plot("Term delivery", 4)
p_ptd <- plot_grid(
p1 + theme(legend.position="none"),
p2 + theme(legend.position="none"),
p3 + theme(legend.position="none"),
p4 + theme(legend.position="none"),
align = 'vh',
hjust = -1,
ncol = 4,
nrow = 1
)
p_td <- plot_grid(
p5 + theme(legend.position="none"),
p6 + theme(legend.position="none"),
p7 + theme(legend.position="none"),
p8 + theme(legend.position="none"),
align = 'vh',
labels = c(" ", " ", " ", " "),
hjust = -1,
ncol = 4,
nrow = 1
)
################################################
# Formatting and output to file
title0 <- ggdraw() +
draw_label("Longitudinal Human Microbiome Project (MOMS-PI Pre-Term Birth Study)\n", size = 20, color = "blue")
title1 <- ggdraw() +
draw_label("Pre-Term Delivery", size = 22)
title2 <- ggdraw() +
draw_label("Term Delivery", size = 22)
legend <- get_legend(
p2 + theme(legend.box.margin = margin(0, 0, 0, 12))
)
cap <- "How to read this chart: Each wheel represents a set of samples from the Pre-Term or Term Delivery cohort at a particular visit. Each spoke in the wheel represents a single participant in the study, with coloured segments representing the relative abundance
of bacteria types (genus) in the sample. For simplicity, only the most prevalent genus types and the first four visits are included. Approximately 75-80% of samples had Lactobacillus as the dominant genus, with no obvious difference between visits or cohorts."
caption <- ggdraw() +
draw_label(cap, size = 14)
ptd2 <- plot_grid(title1, p_ptd, ncol=1, rel_heights = c(1.7, 25), align = "v")
td2 <- plot_grid(title2, p_td, ncol=1, rel_heights = c(1.7, 25), align = "v")
all1 <- plot_grid(ptd2, td2, ncol=1, rel_heights = c(1, 1), align = "v")
all2 <- plot_grid(all1, legend, rel_widths = c(3, 0.5))
all3 <- plot_grid(title0, all2, caption, ncol=1, rel_heights = c(8, 100, 5), align = "v") +
theme(plot.background = element_rect(fill = "white", colour = "blue"))
ggsave("./WW_Mar24_01_OkabeIto.png", all3, width=24, height=12, dpi=300)
################################################
# Accessibility check:
library(readr)
library(tidyr)
library(tidyverse)
library(ggplot2)
library(cowplot)
library(RColorBrewer)
library(paletteer)
library(colorspace)
library(dichromat)
################################################
# Combine abundance, taxonomy and clinical data
clin1 <- read_csv2("./Data/Metadata.csv") %>%
arrange(subject_id, visit_number)
tax <- read_csv2("./Data/Taxonomy_info.csv")
abun1 <- read_csv2("./Data/Relative_abundance.csv") %>%
gather(key = "sample_id", value = "Value", -taxID) %>%
group_by(taxID, sample_id) %>%
filter(Value>0)
abun3 <- left_join(clin1, abun2, by = "sample_id")
abun4 <- left_join(tax, abun3, by = "taxID") %>%
arrange(subject_id, visit_number, taxID)
################################################
# Identify most prevalent taxIDs and subset data
num_gen <- 6
tax_prev <- abun4 %>%
group_by(genus) %>%
summarise(mn = mean(Value)) %>%
arrange(desc(mn)) %>%
select(genus)
tax_prev2 <- tax_prev[1:num_gen,] %>%
as_vector()
abun5 <- abun4 %>%
filter(genus %in% tax_prev2)
################################################
# Groups subjects by most prevalent genus
overall_most.prev<- abun5 %>%
group_by(genus) %>%
summarise(sum = sum(Value)) %>%
arrange(desc(sum)) %>%
select(genus) %>%
mutate(sort.order = row_number())
################################################
# Create plots
myColors <- brewer.pal(num_gen,"Set1")
protan <- dichromat(myColors , type = "protan")
deutan <- dichromat(myColors , type = "deutan")
tritan <- dichromat(myColors , type = "tritan")
names(myColors) <- levels(overall_most.prev$genus)
make.plot <- function(pal_lab,pal) {
tmp <- abun5 %>%
filter(group == "Preterm delivery" & visit_number == 1) %>%
group_by(subject_id, genus) %>%
summarise(sum = sum(Value)) %>%
arrange(subject_id, desc(sum))
# select(subject_id, genus, Value)
sub_most.prev <- tmp %>%
group_by(subject_id, genus) %>%
arrange(subject_id, desc(sum)) %>%
group_by(subject_id) %>%
slice(1) %>%
select(subject_id, genus)
sub_most.prev2 <- left_join(sub_most.prev, overall_most.prev, by = "genus") %>%
ungroup() %>%
select(subject_id, sort.order)
tmp2 <- left_join(tmp, sub_most.prev2, by = "subject_id")
subID <- tmp2 %>%
group_by(sort.order, subject_id) %>%
distinct(subject_id) %>%
arrange(sort.order) %>%
ungroup() %>%
mutate(new.subjid = row_number()) %>%
select(subject_id, new.subjid)
tmp3 <- left_join(tmp2, subID, by = "subject_id")
ggplot(tmp3, aes(fill=genus, y=sum, x=new.subjid)) +
geom_bar(show.legend = TRUE,
position="fill", stat="identity", width=1, color="black", linewidth=0.1) +
coord_polar(theta="x") +
scale_fill_manual(name = "genus", values = pal) +
scale_y_continuous(limits=c(-0.2, 1)) +
theme(
panel.background=element_rect(fill="white"),
axis.text=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
legend.text=element_text(size=14),
legend.title=element_text(size=14)
) +
labs(title=pal_lab)
}
p1 <- make.plot("Original", myColors)
p2 <- make.plot("Protanopic", protan)
p3 <- make.plot("Deutanopic", deutan)
p4 <- make.plot("Tritanopic", tritan)
all <- plot_grid(p1, p2, p3, p4,
align = 'vh',
hjust = -1,
ncol = 2,
nrow = 2
)
ggsave("./WW_Mar24_colorblindness.png", all, width=24, height=12, dpi=300)
################################################
library(shiny)
library(readr)
library(tidyr)
library(tidyverse)
library(ggplot2)
library(RColorBrewer)
library(introdataviz)
# Define UI for application that draws a histogram
num_gen <- 15
abun5 <- read_csv("./Data/abun5.csv")
genus_list <- abun5 %>%
group_by(genus) %>%
summarise(mn = mean(Value)) %>%
arrange(desc(mn)) %>%
select(genus) %>%
slice_head(n=num_gen)
genus_list_v <- collect(select(genus_list, genus))[[1]]
ui <- fluidPage(
titlePanel("Multi-Omic Microbiome Study:Pregnancy Initiative (MOMS-PI)"),
sidebarLayout(
sidebarPanel(
radioButtons("genus_select", "Select Genus", genus_list_v),
radioButtons("visit_select", "Select Visit", c("All", "1", "2", "3", "4" ,"5")),
sliderInput("age_select", "Select Age Range", value = c(20, 50), step = 5, min = 20, max = 50, width=400),
radioButtons("income_select", "Select Annual Income", c("All", "<US$20,000", "US$20,000-59,999", "US$60,000+")),
width=3),
mainPanel(
shinycssloaders::withSpinner(
plotOutput("myplot", width = "1200px", height = "800px")
)
)
)
)
server <- function(input, output) {
rain_height <- .5
output$myplot <- renderPlot({
if(input$visit_select == "All") {}
else {
abun5 <- abun5 %>%
filter(visit_number == input$visit_select)
}
if(input$income_select == "All") {}
else {
abun5 <- abun5 %>%
filter(subject_annual_income == input$income_select)
}
abun5 <- abun5 %>%
filter(genus == input$genus_select) %>%
filter(subject_age > input$age_select[1], subject_age < input$age_select[2]) %>%
mutate(log_val = log(Value))
# browser()
ggplot(data=abun5, aes(x=" ", y=log_val)) +
geom_flat_violin(aes(fill=group),
trim=FALSE, alpha = 0.3,
position = position_nudge(x = rain_height+.05)) +
geom_point(aes(colour = group), shape = 1, size = 3, alpha = .9, show.legend = FALSE,
position = position_jitter(width = rain_height, height = 0)) +
scale_y_continuous("Log(Abundance)") +
# scale_color_brewer(palette = "Set1") +
# scale_fill_brewer(palette = "Set1") +
coord_flip() +
theme(
panel.background=element_blank(),
axis.title.y=element_blank(),
axis.text.x=element_text(size = 15),
axis.line.x=element_line(color="grey"),
axis.title.x=element_text(size = 18),
legend.text=element_text(size = 18),
legend.title=element_blank()
)
})
}
# Run the application
shinyApp(ui = ui, server = server)
# Microbiome Challenge
# WonderfulWednesday
library(tidyverse)
library(stringr)
library(scales)
library(RColorBrewer)
library(glue)
library(janitor)
library(readr)
root_dir <- here::here()
tues_dir <- file.path(root_dir,
"microbiome")
tt_theme <- "microbiome"
author_initials <- "TomMarlow"
azTidyTuesday_date <- "20240228"
# Relative abundance of each microbial community in each woman in pregnancy and the perinatal period
ra <- file.path(tues_dir, "202402_Collaboration_DataVizESIG-BiomarkersESIG_relative_abundance.csv")
# Taxonomy information of each microbial community
ti <- file.path(tues_dir, "202402_Collaboration_DataVizESIG-BiomarkersESIG_taxonomy_info.csv")
# Clinical information of each woman (simulated data)
sm <- file.path(tues_dir, "202402_Collaboration_DataVizESIG-BiomarkersESIG_simulated_metadata.csv")
rel_dat <- read.csv(ra, sep = ";")
sim_dat <- read.csv(sm, sep = ";")
tax_dat <- read.csv(ti, sep = ";")
rel_id <- rel_dat %>%
pivot_longer(.,
cols = 2:ncol(.),
names_to = "id",
values_to = "rel_abu",
values_drop_na = TRUE) %>%
mutate(sample_id = substr(id, 2, nchar(id))) %>%
filter(rel_abu != 0)
# Merge to get audio characteristics
microb <- merge(x = rel_id,
y = sim_dat,
by.x = c("sample_id"),
by.y = c("sample_id"),
all.y = FALSE)
# Taxonomy details
micro_tax <- merge(x = microb,
y = tax_dat,
by.x = c("taxID"),
by.y = c("taxID"),
all.y = FALSE) %>%
mutate(abu = as.numeric(gsub(",", ".", rel_abu)))
table(micro_tax$phylum, micro_tax$group)
table(micro_tax$class, micro_tax$group)
table(sim_dat$subject_id)
msd_pat <- micro_tax %>%
group_by(group, subject_id, visit_number, class) %>%
summarise(suma = sum(abu)) %>%
mutate(sump = suma * 100) %>%
mutate(visits = if_else(visit_number >= 5, 5, visit_number)) %>%
mutate(Class = gsub("c__", "", class))
unique(msd_pat$Class)
cls_avg <- msd_pat %>%
group_by(Class) %>%
summarise(mean = mean(sump)) %>%
arrange(-mean)
cls_pop <- as.data.frame(table(msd_pat$Class)) %>%
arrange(-Freq) %>%
rename(Class = Var1)
msd_pat <- micro_tax %>%
group_by(group, subject_id, visit_number, phylum) %>%
summarise(suma = sum(abu)) %>%
mutate(sump = suma * 100) %>%
mutate(visits = if_else(visit_number >= 5, 5, visit_number)) %>%
mutate(Phylum = gsub("p__", "", phylum))
write.csv(msd_pat, file.path(tues_dir, "pat.csv"))
phy_avg <- msd_pat %>%
group_by(Phylum) %>%
summarise(mean = mean(sump)) %>%
arrange(-mean)
phy_pop <- as.data.frame(table(msd_pat$Phylum)) %>%
arrange(-Freq) %>%
rename(Phylum = Var1)
# Remove low frequency phylums / classes
phy_hi <- c(unlist(phy_pop$Phylum[phy_pop$Freq >= 20]))
cls_hi <- c(unlist(cls_pop$Class[cls_pop$Freq >= 1000]))
# Line plot
lin_dat <- msd_pat %>%
group_by(Class, group, visit_number) %>%
summarise(meanab = mean(sump)) %>%
mutate(Class = factor(Class,
levels = cls_avg$Class,
labels = cls_avg$Class)) %>%
filter(Class %in% cls_hi)
line_fig <- ggplot() +
geom_line(
data = lin_dat, linewidth = 2,
aes(
x = visit_number, y = meanab, group = group,
colour = group
)
) +
xlab("") +
ylab(NULL) +
labs(
title = "Mean relative abundance by microbiome class, term and visit number",
caption = "Multi-Omic Microbiome Study: Pregnancy Initiative (MOMS-PI) is part of the longitudinal Human Microbiome Project (HMP)",
colour = "Term",
x = "Visit number",
y = "Mean abundance %"
) +
theme_bw() +
theme(
strip.text.x = element_text(size = 10),
axis.title = element_text(size = 10),
axis.text.y = element_text(size = 8),
axis.text.x = element_text(size = 8, angle = 0, hjust = .5),
legend.position = "bottom"
) +
scale_color_viridis_d(option = "viridis") +
scale_fill_viridis_d(option = "viridis") +
facet_wrap(. ~ Class)
line_fig
ggsave(
plot = line_fig,
filename = glue("{root_dir}/azTidyTuesday_{azTidyTuesday_date}_{tt_theme}_{author_initials}_{Sys.Date()}.jpg"),
width = 16,
height = 9)
The jmp file can be found here
The rmd file can be found here.
For attribution, please cite this work as
SIG (2024, March 13). VIS-SIG Blog: Wonderful Wednesdays March 2024. Retrieved from https://graphicsprinciples.github.io/posts/2024-09-09-wonderful-wednesdays-march-2024/
BibTeX citation
@misc{sig2024wonderful, author = {SIG, PSI VIS}, title = {VIS-SIG Blog: Wonderful Wednesdays March 2024}, url = {https://graphicsprinciples.github.io/posts/2024-09-09-wonderful-wednesdays-march-2024/}, year = {2024} }