library(mbtools)
library(phyloseq)
library(SummarizedExperiment)
theme_set(theme_minimal())
Let’s start by loading some previously preprocessed metabolome data.
mets <- readRDS("/proj/gibbons/toadnet/data/metabolome.rds")
mets
class: SummarizedExperiment
dim: 1296 3305
metadata(0):
assays(1): ''
rownames(1296): metabolite_35 metabolite_50 ... metabolite_999954839
metabolite_999954840
rowData names(17): chemical_id sub_pathway ... percent_samples_detected total_samples
colnames(3305): A477AV558-002 A391BM948-002 ... A750AX220-002 A803AW467-002
colData names(8): public_client_id sample_id ... weekday season
This includes data for all of Arivale so let’s filter the sample we are interested in:
cohort <- rbind(
fread("no_weight_loss.csv", colClasses=c(public_client_id="character")),
fread("successful_weight_loss.csv", colClasses=c(public_client_id="character"))
)[since_baseline == 0]
cohort[, "subset" := "controls"]
cohort[weight_change_relative < 0, "subset" := "weight loss"]
cohort[, subset := factor(subset)]
manifest <- cohort
met_samples <- colData(mets) %>% as.data.table()
matched <- met_samples[manifest, on = c("public_client_id", "days_in_program"), nomatch=0]
matched[, "subset" := "controls"]
matched[weight_change_relative < 0, subset:="weight loss"]
sdata <- as.data.frame(matched)
sdata$subset <- factor(sdata$subset)
sdata$sex <- factor(sdata$sex)
rownames(sdata) <- sdata$sample_id
sdata$high_bmi <- factor(as.numeric(sdata$bmi > 30))
mets <- mets[, mets$sample_id %in% matched$sample_id]
taxa <- matrix(rownames(mets), ncol=1)
colnames(taxa) <- "metabolite"
rownames(taxa) <- rownames(mets)
mat <- log2(t(assay(mets)))
mat[is.na(mat)] <- -9
ps <- phyloseq(
otu_table(mat, taxa_are_rows = FALSE),
sample_data(sdata),
tax_table(taxa)
)
tests <- association(
ps,
presence_threshold = -5,
min_abundance = -3,
in_samples = 0.5,
confounders = c("age", "bmi", "sex"),
variables = "subset",
taxa_rank = NA,
method = "lm"
)
bmi <- association(
ps,
presence_threshold = -5,
min_abundance = -3,
in_samples = 0.5,
confounders = c("age", "sex"),
variables = "bmi",
taxa_rank = NA,
method = "lm"
)
tests <- rbind(tests, bmi)
rowData(mets)$metabolite <- rownames(mets)
tests[, "metabolite" := variant]
tests <- merge.data.table(tests, rowData(mets), on="metabolite")
fwrite(tests[order(padj)], "data/tests_metabolites.csv")
tests
Volcano plots
ggplot(tests, aes(log2FoldChange, y=-log10(pvalue),
shape=padj<0.1, col=variable, size=padj<0.1)) +
geom_point() + scale_size_discrete(range=c(2, 3)) +
labs(shape="FDR < 0.1", size="FDR < 0.1")
Using size for a discrete variable is not advised.
ggsave("figures/metabolome_volcano.png", dpi=300, width=4, height=3)
Corr
wide <- dcast(tests, metabolite ~ variable, value.var=c("t", "padj"), fill=0)
wide[, "sig" := "none"]
wide[padj_bmi < 0.05, "sig" := "BMI"]
wide[padj_subset < 0.05, "sig" := "weight loss"]
wide[padj_bmi < 0.05 & padj_subset < 0.05, "sig" := "both"]
wide[, "sig" := factor(sig, levels=c("none", "BMI", "weight loss", "both"))]
ggplot(wide, aes(x=t_bmi, y=t_subset, color=sig)) +
geom_vline(xintercept = 0, color="gray30", lty="dashed") +
geom_hline(yintercept = 0, color="gray30", lty="dashed") +
geom_point() + stat_smooth(method="glm", aes(group = 1)) +
labs(x = "t statistic BMI", y = "t statistic weight loss") +
scale_color_manual(values=c(none = "black", BMI = "royalblue", `weight loss`="orange", both = "red"))
ggsave("figures/metabolome_t.png", dpi=300, width=5, height=3)
cor.test(~ t_bmi + t_subset, data=wide)
Pearson's product-moment correlation
data: t_bmi and t_subset
t = 2.4364, df = 932, p-value = 0.01502
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.0154859 0.1429713
sample estimates:
cor
0.0795539
library(vegan)
m <- as(otu_table(ps), "matrix")
m[is.na(m)] <- min(as.numeric(m), na.rm=T)
perm <- adonis(m ~ age + sex + bmi + subset, data=as(sample_data(ps), "data.frame"), method="euclidean")
perm
Call:
adonis(formula = m ~ age + sex + bmi + subset, data = as(sample_data(ps), "data.frame"), method = "euclidean")
Permutation: free
Number of permutations: 999
Terms added sequentially (first to last)
Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
age 1 15212 15212 1.29265 0.01352 0.019 *
sex 1 20666 20666 1.75610 0.01837 0.002 **
bmi 1 30244 30244 2.57009 0.02689 0.001 ***
subset 1 11309 11309 0.96102 0.01005 0.577
Residuals 89 1047339 11768 0.93116
Total 93 1124770 1.00000
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1