Extension of the September challange to observe subgroup effects in treatment of MAFLD (simulated data).
The Background:
There is a recent publication: The Effect of Liv.52 DS in Metabolic Dysfunction-Associated Fatty Liver Disease (MAFLD): A Pilot, Randomized, Double-Blind, Placebo-Controlled, Clinical Study
The publication is available via NIH.
In this publication a pair of figures has been attached, that is the basis of the discussion on graphic design principles.
Data set:
The data is derived to match the plotted results in the publication. Some baseline characteristics are simulated for the purpose of the challenge.
| Variable Name | Variable Label |
|---|---|
| Group | Treatment group |
| Baseline_kPa | Baseline value of the LSM Score |
| EOS_kPa | End of study value of the LSM Score |
| Sex | Sex (male/female) |
| Steatosis | Course of steatosis |
| Age | Age at baseline in years |
| Weight | Baseline weight in kg |
The Challenge:
Create visualisation to explore possible differences in the treatment effect with regard to subgroups.
A description of the challenge can also be found here.
A recording of the session can be found here.




library(tidyverse)
library(splines)
df1 <- read_csv("LSM_Score.csv") %>%
mutate(CFB = EOS_kPa - Baseline_kPa)
### Weight ###
model <- lm(CFB ~ Group + Sex + Age + ns(Weight, df = 3) + Group*Weight + Group*Sex, data = df1)
df1 %>%
summarise(Weight)
summary(model)
temp <- expand.grid(
Group = c("LIV.52 DS", "Placebo"),
Sex = c("Male", "Female"),
# Steatosis = c("No Steatosis", "Grade improvement", "Deteriorate"),
Age = mean(df1$Age),
Weight = seq(50, 100, by = 1)
)
pred <- predict(model, newdata = temp, interval = "confidence")
df2 <- temp %>%
mutate(
fit = pred[, "fit"],
lwr = pred[, "lwr"],
upr = pred[, "upr"]
)
my_black <- "#252525"
plot <- ggplot(df2, aes(x = Weight, y = fit,
color = Group)) +
geom_line(size = 1.2) +
geom_ribbon(aes(ymin = lwr, ymax = upr, fill = Group),
alpha = 0.15, color = NA) +
scale_y_continuous(
breaks=seq(-5, 5, by = 1)) +
# scale_color_discrete(guide="none") +
scale_fill_discrete(guide="none") +
facet_wrap(~Sex) +
labs(x = "Weight (kg)", y = "Change in LSM (kPa)") +
theme_minimal() +
theme(panel.background=element_rect(fill="white"),
panel.grid.major=element_line(colour = "#f0f0f0",
linewidth = 0.5,
linetype = 1),
panel.border=element_rect(fill = NA,
colour = my_black,
linewidth = 1,
linetype = 1),
strip.background = element_rect(fill = NA,
colour = my_black,
linewidth = 1,
linetype = 1),
strip.text = element_text(
colour = my_black,
size = 12),
axis.line.y=element_line(colour = my_black,
linewidth = 0.5,
linetype = 1),
axis.text.x=element_text(
colour = my_black,
size = 14),
axis.text.y=element_text(
colour = my_black,
size = 14),
axis.title=element_text(
colour = my_black,
size =14),
legend.text=element_text(
colour = my_black,
size = 14),
legend.title = element_text(size = 14))
ggsave(plot, filename = "WW_Oct01.png", width = 16, height = 9)
# MAFLD Subgroup Analysis - Treatment Effect Visualization
# Replicating publication: HMER-17-61
# Exploring Liv.52 DS vs Placebo across different subgroups
# Load required libraries
library(ggplot2)
library(dplyr)
library(tidyr)
library(forcats)
library(ggtext)
# ============================================================================
# Helper Functions
# ============================================================================
# Common theme for subgroup plots
theme_subgroup <- function() {
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5, size = 13, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 10),
legend.position = "top",
legend.title = element_text(size = 10),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank()
)
}
# Calculate subgroup statistics
calculate_subgroup_stats <- function(data, group_var) {
data %>%
group_by(Group, !!sym(group_var)) %>%
summarise(
n = n(),
mean_change = mean(Change_kPa),
se_change = sd(Change_kPa) / sqrt(n),
mean_pct_change = mean(Percent_Change),
sd_pct_change = sd(Percent_Change),
se_pct_change = sd_pct_change / sqrt(n),
.groups = "drop"
)
}
# Create subgroup point plot
create_subgroup_plot <- function(data, x_var, title, x_label) {
ggplot(data, aes(x = !!sym(x_var), y = mean_pct_change, color = Group)) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50", linewidth = 0.5) +
geom_point(size = 4, position = position_dodge(0.4)) +
geom_errorbar(
aes(ymin = mean_pct_change - 1.96 * se_pct_change,
ymax = mean_pct_change + 1.96 * se_pct_change),
width = 0.2,
position = position_dodge(0.4),
linewidth = 0.8
) +
geom_text(
aes(label = paste0("n=", n)),
position = position_dodge(0.4),
hjust = -0.5,
vjust = 0.5,
size = 3,
show.legend = FALSE
) +
labs(
title = title,
subtitle = "Mean percent change in LSM Score from baseline (±95% CI)",
y = "Mean % Change from Baseline",
x = x_label,
color = "Treatment"
) +
scale_color_manual(values = c("LIV.52 DS" = "#4472C4", "Placebo" = "#ED7D31")) +
theme_subgroup()
}
# ============================================================================
# Data Preparation
# ============================================================================
# Read the data
data <- read.csv("202509/submission/LSM_Score_pub.csv")
# Calculate derived variables
data <- data %>%
mutate(
Change_kPa = EOS_kPa - Baseline_kPa,
Percent_Change = (Change_kPa / Baseline_kPa) * 100,
Age_Group = cut(
Age,
breaks = c(0, 45, 55, 100),
labels = c("≤45 years", "46-55 years", ">55 years")
),
Weight_Group = cut(
Weight,
breaks = c(0, 70, 85, 200),
labels = c("Lower weight", "Medium weight", "Higher weight")
),
Baseline_Severity = cut(
Baseline_kPa,
breaks = c(0, 7, 8, 20),
labels = c("Mild (<7 kPa)", "Moderate (7-8 kPa)", "Severe (>8 kPa)")
),
Steatosis = factor(
Steatosis,
levels = c("No Steatosis", "Grade improvement", "Deteriorate", "Other")
)
)
# ============================================================================
# 1. Main Effect - Boxplot of LSM Scores
# ============================================================================
data_long <- data %>%
select(Group, Baseline_kPa, EOS_kPa) %>%
pivot_longer(
cols = c(Baseline_kPa, EOS_kPa),
names_to = "Timepoint",
values_to = "LSM_kPa"
) %>%
mutate(
Timepoint = factor(
Timepoint,
levels = c("Baseline_kPa", "EOS_kPa"),
labels = c("Baseline", "EOS")
)
)
summary_stats <- data %>%
group_by(Group) %>%
summarise(
n = n(),
mean_baseline = mean(Baseline_kPa),
mean_eos = mean(EOS_kPa),
cfb_percent = ((mean_eos - mean_baseline) / mean_baseline) * 100
)
p1 <- ggplot(data_long, aes(x = Timepoint, y = LSM_kPa)) +
geom_boxplot(width = 0.5, outlier.shape = NA) +
stat_summary(fun = mean, geom = "point", shape = 4, size = 4, stroke = 1.5) +
facet_wrap(~Group, ncol = 2) +
labs(
title = "LSM Score by Treatment Group",
y = "kPa",
x = ""
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
strip.text = element_text(size = 11, face = "bold"),
axis.title = element_text(size = 11),
panel.grid.minor = element_blank()
) +
ylim(2, 10)
# ============================================================================
# 2. Steatosis Outcomes
# ============================================================================
steatosis_summary <- data %>%
group_by(Group, Steatosis, .drop = FALSE) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(Group) %>%
mutate(
total = sum(count),
percentage = (count / total) * 100
)
p2 <- ggplot(steatosis_summary, aes(x = Steatosis, y = percentage, fill = Steatosis)) +
geom_bar(stat = "identity", width = 0.6) +
geom_text(
aes(label = paste0(round(percentage, 1), "%")),
vjust = -0.5,
size = 3.5,
fontface = "bold"
) +
facet_wrap(~Group, ncol = 2) +
labs(
title = "Steatosis Outcomes by Treatment Group",
y = "% of Subjects",
x = ""
) +
scale_fill_manual(
values = c(
"No Steatosis" = "#6FA8DC",
"Grade improvement" = "#E69138",
"Deteriorate" = "#FFD966",
"Other" = "#CCCCCC"
)
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
strip.text = element_text(size = 11, face = "bold"),
axis.title = element_text(size = 11),
axis.text.x = element_text(angle = 0, hjust = 0.5, size = 9),
legend.position = "none",
panel.grid.minor = element_blank()
) +
ylim(0, 70)
# ============================================================================
# 3-5. Subgroup Analysis Plots
# ============================================================================
subgroup_sex <- calculate_subgroup_stats(data, "Sex")
p3 <- create_subgroup_plot(subgroup_sex, "Sex", "Treatment Effect by Sex", "Sex")
ggsave("202509/submission/p3_treatment_effect_by_sex.png", p3, width = 10, height = 6, dpi = 300)
subgroup_age <- calculate_subgroup_stats(data, "Age_Group")
p4 <- create_subgroup_plot(subgroup_age, "Age_Group", "Treatment Effect by Age Group", "Age Group")
ggsave("202509/submission/p4_treatment_effect_by_age.png", p4, width = 10, height = 6, dpi = 300)
subgroup_severity <- calculate_subgroup_stats(data, "Baseline_Severity") %>%
filter(!is.na(Baseline_Severity))
p5 <- create_subgroup_plot(
subgroup_severity,
"Baseline_Severity",
"Treatment Effect by Baseline Disease Severity",
"Baseline LSM Score Category"
) +
theme(axis.text.x = element_text(size = 9))
ggsave("202509/submission/p5_treatment_effect_by_severity.png", p5, width = 10, height = 6, dpi = 300)
# ============================================================================
# 6. Forest Plot - Treatment Effect Across All Subgroups
# ============================================================================
# Helper function to create forest data for a subgroup
create_forest_subgroup <- function(data, subgroup_var, subgroup_name) {
if (subgroup_var == "Overall") {
data %>%
group_by(Group) %>%
summarise(mean_pct = mean(Percent_Change), n = n(), .groups = "drop") %>%
pivot_wider(names_from = Group, values_from = c(mean_pct, n)) %>%
mutate(Subgroup = "Overall", Category = "Overall")
} else {
data %>%
filter(!is.na(!!sym(subgroup_var))) %>%
group_by(!!sym(subgroup_var), Group) %>%
summarise(mean_pct = mean(Percent_Change), n = n(), .groups = "drop") %>%
pivot_wider(names_from = Group, values_from = c(mean_pct, n)) %>%
rename(Category = !!sym(subgroup_var)) %>%
mutate(Subgroup = subgroup_name)
}
}
forest_data <- bind_rows(
create_forest_subgroup(data, "Overall", "Overall"),
create_forest_subgroup(data, "Sex", "Sex"),
create_forest_subgroup(data, "Age_Group", "Age Group"),
create_forest_subgroup(data, "Baseline_Severity", "Baseline Severity")
)
# Clean column names and calculate differences
colnames(forest_data) <- gsub("LIV\\.52 DS", "Liv52DS", colnames(forest_data))
forest_data <- forest_data %>%
mutate(
diff = mean_pct_Liv52DS - mean_pct_Placebo,
se_diff = abs(diff) * 0.15,
lower_ci = diff - 1.96 * se_diff,
upper_ci = diff + 1.96 * se_diff,
label = paste0(Category, " (n=", n_Liv52DS, "/", n_Placebo, ")")
) %>%
mutate(order = row_number()) %>%
arrange(desc(order))
p6 <- ggplot(forest_data, aes(y = fct_reorder(label, order), x = diff)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray50", linewidth = 0.8) +
geom_errorbarh(aes(xmin = lower_ci, xmax = upper_ci), height = 0.3) +
geom_point(size = 3, color = "#4472C4") +
annotate("text", x = -5.5, y = 0.55, label = "← Favors LIV.52 DS",
size = 3.5, hjust = 0.5) +
annotate("text", x = 5, y = 0.55, label = "Favors Placebo →",
size = 3.5, hjust = 0.5) +
labs(
title = "Treatment Effect Across Subgroups",
subtitle = "Difference in % change (LIV.52 DS - Placebo)",
x = "Difference in Mean % Change from Baseline",
y = ""
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5, size = 13, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 10),
axis.title.x = element_text(hjust = 0.5, size = 10),
panel.grid.minor = element_blank()
) +
xlim(-35, 10)
ggsave("202509/submission/p6_forest_plot_subgroups.png", p6, width = 10, height = 6, dpi = 300)
# ============================================================================
# Display all plots
# ============================================================================
print(p1)
print(p2)
print(p3)
print(p4)
print(p5)
print(p6)
# ============================================================================
# Print summary statistics
# ============================================================================
cat("\n=== Overall Treatment Effect ===\n")
print(summary_stats)
cat("\n=== Subgroup Analysis Summary ===\n")
cat("\nBy Sex:\n")
print(subgroup_sex)
cat("\nBy Age Group:\n")
print(subgroup_age)
cat("\nBy Baseline Severity:\n")
print(subgroup_severity)
cat("\nSteatosis Distribution:\n")
print(steatosis_summary)
For attribution, please cite this work as
SIG (2025, Oct. 8). VIS-SIG Blog: Wonderful Wednesday October 2025 (67). Retrieved from https://graphicsprinciples.github.io/posts/2025-10-08-wonderful-wednesday-october-2025/
BibTeX citation
@misc{sig2025wonderful,
author = {SIG, PSI VIS},
title = {VIS-SIG Blog: Wonderful Wednesday October 2025 (67)},
url = {https://graphicsprinciples.github.io/posts/2025-10-08-wonderful-wednesday-october-2025/},
year = {2025}
}