library(mbtools)
library(phyloseq)
library(SummarizedExperiment)
theme_set(theme_minimal())

meteome analysis

Let’s start by loading some previously preprocessed metabolome data.

mets <- readRDS("/proj/gibbons/toadnet/data/metabolome.rds")
mets
class: SummarizedExperiment 
dim: 1296 3305 
metadata(0):
assays(1): ''
rownames(1296): metabolite_35 metabolite_50 ... metabolite_999954839
  metabolite_999954840
rowData names(17): chemical_id sub_pathway ... percent_samples_detected total_samples
colnames(3305): A477AV558-002 A391BM948-002 ... A750AX220-002 A803AW467-002
colData names(8): public_client_id sample_id ... weekday season

This includes data for all of Arivale so let’s filter the sample we are interested in:

cohort <- rbind(
    fread("no_weight_loss.csv", colClasses=c(public_client_id="character")),
    fread("successful_weight_loss.csv", colClasses=c(public_client_id="character"))
)[since_baseline == 0]
cohort[, "subset" := "controls"]
cohort[weight_change_relative < 0, "subset" := "weight loss"]
cohort[, subset := factor(subset)]

manifest <- cohort
met_samples <- colData(mets) %>% as.data.table()
matched <- met_samples[manifest, on = c("public_client_id", "days_in_program"), nomatch=0]
matched[, "subset" := "controls"]
matched[weight_change_relative < 0, subset:="weight loss"]
sdata <- as.data.frame(matched)
sdata$subset <- factor(sdata$subset)
sdata$sex <- factor(sdata$sex)
rownames(sdata) <- sdata$sample_id
sdata$high_bmi <- factor(as.numeric(sdata$bmi > 30))
mets <- mets[, mets$sample_id %in% matched$sample_id]
taxa <- matrix(rownames(mets), ncol=1)
colnames(taxa) <- "metabolite"
rownames(taxa) <- rownames(mets)
mat <- log2(t(assay(mets)))
mat[is.na(mat)] <- -9
ps <- phyloseq(
    otu_table(mat, taxa_are_rows = FALSE),
    sample_data(sdata),
    tax_table(taxa)
)
tests <- association(
    ps, 
    presence_threshold = -5,
    min_abundance = -3,
    in_samples = 0.5,
    confounders = c("age", "bmi", "sex"),
    variables = "subset",
    taxa_rank = NA,
    method = "lm"
)
bmi <- association(
    ps, 
    presence_threshold = -5,
    min_abundance = -3,
    in_samples = 0.5,
    confounders = c("age", "sex"),
    variables = "bmi",
    taxa_rank = NA,
    method = "lm"
)
tests <- rbind(tests, bmi)
rowData(mets)$metabolite <- rownames(mets)
tests[, "metabolite" := variant]
tests <- merge.data.table(tests, rowData(mets), on="metabolite")
fwrite(tests[order(padj)], "data/tests_metabolites.csv")
tests

Volcano plots


ggplot(tests, aes(log2FoldChange, y=-log10(pvalue), 
                  shape=padj<0.1, col=variable, size=padj<0.1)) +
    geom_point() + scale_size_discrete(range=c(2, 3)) +
    labs(shape="FDR < 0.1", size="FDR < 0.1")
Using size for a discrete variable is not advised.
ggsave("figures/metabolome_volcano.png", dpi=300, width=4, height=3)

Corr

wide <- dcast(tests, metabolite ~ variable, value.var=c("t", "padj"), fill=0)
wide[, "sig" := "none"]
wide[padj_bmi < 0.05, "sig" := "BMI"]
wide[padj_subset < 0.05, "sig" := "weight loss"]
wide[padj_bmi < 0.05 & padj_subset < 0.05, "sig" := "both"]
wide[, "sig" := factor(sig, levels=c("none", "BMI", "weight loss", "both"))]
ggplot(wide, aes(x=t_bmi, y=t_subset, color=sig)) +
    geom_vline(xintercept = 0, color="gray30", lty="dashed") +
    geom_hline(yintercept = 0, color="gray30", lty="dashed") +
    geom_point() + stat_smooth(method="glm", aes(group = 1)) +
    labs(x = "t statistic BMI", y = "t statistic weight loss") +
    scale_color_manual(values=c(none = "black", BMI = "royalblue", `weight loss`="orange", both = "red"))

ggsave("figures/metabolome_t.png", dpi=300, width=5, height=3)
cor.test(~ t_bmi + t_subset, data=wide)

    Pearson's product-moment correlation

data:  t_bmi and t_subset
t = 2.4364, df = 932, p-value = 0.01502
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.0154859 0.1429713
sample estimates:
      cor 
0.0795539 

Overall explained variance

library(vegan)

m <- as(otu_table(ps), "matrix")
m[is.na(m)] <- min(as.numeric(m), na.rm=T)
perm <- adonis(m ~ age + sex + bmi + subset, data=as(sample_data(ps), "data.frame"), method="euclidean")
perm

Call:
adonis(formula = m ~ age + sex + bmi + subset, data = as(sample_data(ps),      "data.frame"), method = "euclidean") 

Permutation: free
Number of permutations: 999

Terms added sequentially (first to last)

          Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
age        1     15212   15212 1.29265 0.01352  0.019 *  
sex        1     20666   20666 1.75610 0.01837  0.002 ** 
bmi        1     30244   30244 2.57009 0.02689  0.001 ***
subset     1     11309   11309 0.96102 0.01005  0.577    
Residuals 89   1047339   11768         0.93116           
Total     93   1124770                 1.00000           
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
LS0tCnRpdGxlOiAibWV0YWJvbG9tZSIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkobWJ0b29scykKbGlicmFyeShwaHlsb3NlcSkKbGlicmFyeShTdW1tYXJpemVkRXhwZXJpbWVudCkKdGhlbWVfc2V0KHRoZW1lX21pbmltYWwoKSkKYGBgCgojIG1ldGVvbWUgYW5hbHlzaXMKCkxldCdzIHN0YXJ0IGJ5IGxvYWRpbmcgc29tZSBwcmV2aW91c2x5IHByZXByb2Nlc3NlZCBtZXRhYm9sb21lIGRhdGEuCgpgYGB7cn0KbWV0cyA8LSByZWFkUkRTKCIvcHJvai9naWJib25zL3RvYWRuZXQvZGF0YS9tZXRhYm9sb21lLnJkcyIpCm1ldHMKYGBgCgpUaGlzIGluY2x1ZGVzIGRhdGEgZm9yIGFsbCBvZiBBcml2YWxlIHNvIGxldCdzIGZpbHRlciB0aGUgc2FtcGxlIHdlIGFyZSBpbnRlcmVzdGVkIGluOgoKYGBge3J9CmNvaG9ydCA8LSByYmluZCgKICAgIGZyZWFkKCJub193ZWlnaHRfbG9zcy5jc3YiLCBjb2xDbGFzc2VzPWMocHVibGljX2NsaWVudF9pZD0iY2hhcmFjdGVyIikpLAogICAgZnJlYWQoInN1Y2Nlc3NmdWxfd2VpZ2h0X2xvc3MuY3N2IiwgY29sQ2xhc3Nlcz1jKHB1YmxpY19jbGllbnRfaWQ9ImNoYXJhY3RlciIpKQopW3NpbmNlX2Jhc2VsaW5lID09IDBdCmNvaG9ydFssICJzdWJzZXQiIDo9ICJjb250cm9scyJdCmNvaG9ydFt3ZWlnaHRfY2hhbmdlX3JlbGF0aXZlIDwgMCwgInN1YnNldCIgOj0gIndlaWdodCBsb3NzIl0KY29ob3J0Wywgc3Vic2V0IDo9IGZhY3RvcihzdWJzZXQpXQoKbWFuaWZlc3QgPC0gY29ob3J0Cm1ldF9zYW1wbGVzIDwtIGNvbERhdGEobWV0cykgJT4lIGFzLmRhdGEudGFibGUoKQptYXRjaGVkIDwtIG1ldF9zYW1wbGVzW21hbmlmZXN0LCBvbiA9IGMoInB1YmxpY19jbGllbnRfaWQiLCAiZGF5c19pbl9wcm9ncmFtIiksIG5vbWF0Y2g9MF0KbWF0Y2hlZFssICJzdWJzZXQiIDo9ICJjb250cm9scyJdCm1hdGNoZWRbd2VpZ2h0X2NoYW5nZV9yZWxhdGl2ZSA8IDAsIHN1YnNldDo9IndlaWdodCBsb3NzIl0Kc2RhdGEgPC0gYXMuZGF0YS5mcmFtZShtYXRjaGVkKQpzZGF0YSRzdWJzZXQgPC0gZmFjdG9yKHNkYXRhJHN1YnNldCkKc2RhdGEkc2V4IDwtIGZhY3RvcihzZGF0YSRzZXgpCnJvd25hbWVzKHNkYXRhKSA8LSBzZGF0YSRzYW1wbGVfaWQKc2RhdGEkaGlnaF9ibWkgPC0gZmFjdG9yKGFzLm51bWVyaWMoc2RhdGEkYm1pID4gMzApKQptZXRzIDwtIG1ldHNbLCBtZXRzJHNhbXBsZV9pZCAlaW4lIG1hdGNoZWQkc2FtcGxlX2lkXQp0YXhhIDwtIG1hdHJpeChyb3duYW1lcyhtZXRzKSwgbmNvbD0xKQpjb2xuYW1lcyh0YXhhKSA8LSAibWV0YWJvbGl0ZSIKcm93bmFtZXModGF4YSkgPC0gcm93bmFtZXMobWV0cykKbWF0IDwtIGxvZzIodChhc3NheShtZXRzKSkpCm1hdFtpcy5uYShtYXQpXSA8LSAtOQpwcyA8LSBwaHlsb3NlcSgKICAgIG90dV90YWJsZShtYXQsIHRheGFfYXJlX3Jvd3MgPSBGQUxTRSksCiAgICBzYW1wbGVfZGF0YShzZGF0YSksCiAgICB0YXhfdGFibGUodGF4YSkKKQpgYGAKCmBgYHtyfQp0ZXN0cyA8LSBhc3NvY2lhdGlvbigKICAgIHBzLCAKICAgIHByZXNlbmNlX3RocmVzaG9sZCA9IC01LAogICAgbWluX2FidW5kYW5jZSA9IC0zLAogICAgaW5fc2FtcGxlcyA9IDAuNSwKICAgIGNvbmZvdW5kZXJzID0gYygiYWdlIiwgImJtaSIsICJzZXgiKSwKICAgIHZhcmlhYmxlcyA9ICJzdWJzZXQiLAogICAgdGF4YV9yYW5rID0gTkEsCiAgICBtZXRob2QgPSAibG0iCikKYm1pIDwtIGFzc29jaWF0aW9uKAogICAgcHMsIAogICAgcHJlc2VuY2VfdGhyZXNob2xkID0gLTUsCiAgICBtaW5fYWJ1bmRhbmNlID0gLTMsCiAgICBpbl9zYW1wbGVzID0gMC41LAogICAgY29uZm91bmRlcnMgPSBjKCJhZ2UiLCAic2V4IiksCiAgICB2YXJpYWJsZXMgPSAiYm1pIiwKICAgIHRheGFfcmFuayA9IE5BLAogICAgbWV0aG9kID0gImxtIgopCnRlc3RzIDwtIHJiaW5kKHRlc3RzLCBibWkpCnJvd0RhdGEobWV0cykkbWV0YWJvbGl0ZSA8LSByb3duYW1lcyhtZXRzKQp0ZXN0c1ssICJtZXRhYm9saXRlIiA6PSB2YXJpYW50XQp0ZXN0cyA8LSBtZXJnZS5kYXRhLnRhYmxlKHRlc3RzLCByb3dEYXRhKG1ldHMpLCBvbj0ibWV0YWJvbGl0ZSIpCmZ3cml0ZSh0ZXN0c1tvcmRlcihwYWRqKV0sICJkYXRhL3Rlc3RzX21ldGFib2xpdGVzLmNzdiIpCnRlc3RzCmBgYAoKVm9sY2FubyBwbG90cwoKYGBge3J9CgpnZ3Bsb3QodGVzdHMsIGFlcyhsb2cyRm9sZENoYW5nZSwgeT0tbG9nMTAocHZhbHVlKSwgCiAgICAgICAgICAgICAgICAgIHNoYXBlPXBhZGo8MC4xLCBjb2w9dmFyaWFibGUsIHNpemU9cGFkajwwLjEpKSArCiAgICBnZW9tX3BvaW50KCkgKyBzY2FsZV9zaXplX2Rpc2NyZXRlKHJhbmdlPWMoMiwgMykpICsKICAgIGxhYnMoc2hhcGU9IkZEUiA8IDAuMSIsIHNpemU9IkZEUiA8IDAuMSIpCmdnc2F2ZSgiZmlndXJlcy9tZXRhYm9sb21lX3ZvbGNhbm8ucG5nIiwgZHBpPTMwMCwgd2lkdGg9NCwgaGVpZ2h0PTMpCmBgYAoKQ29ycgoKYGBge3IsIGZpZy53aWR0aD01LCBmaWcuaGVpZ2h0PTN9CndpZGUgPC0gZGNhc3QodGVzdHMsIG1ldGFib2xpdGUgfiB2YXJpYWJsZSwgdmFsdWUudmFyPWMoInQiLCAicGFkaiIpLCBmaWxsPTApCndpZGVbLCAic2lnIiA6PSAibm9uZSJdCndpZGVbcGFkal9ibWkgPCAwLjA1LCAic2lnIiA6PSAiQk1JIl0Kd2lkZVtwYWRqX3N1YnNldCA8IDAuMDUsICJzaWciIDo9ICJ3ZWlnaHQgbG9zcyJdCndpZGVbcGFkal9ibWkgPCAwLjA1ICYgcGFkal9zdWJzZXQgPCAwLjA1LCAic2lnIiA6PSAiYm90aCJdCndpZGVbLCAic2lnIiA6PSBmYWN0b3Ioc2lnLCBsZXZlbHM9Yygibm9uZSIsICJCTUkiLCAid2VpZ2h0IGxvc3MiLCAiYm90aCIpKV0KZ2dwbG90KHdpZGUsIGFlcyh4PXRfYm1pLCB5PXRfc3Vic2V0LCBjb2xvcj1zaWcpKSArCiAgICBnZW9tX3ZsaW5lKHhpbnRlcmNlcHQgPSAwLCBjb2xvcj0iZ3JheTMwIiwgbHR5PSJkYXNoZWQiKSArCiAgICBnZW9tX2hsaW5lKHlpbnRlcmNlcHQgPSAwLCBjb2xvcj0iZ3JheTMwIiwgbHR5PSJkYXNoZWQiKSArCiAgICBnZW9tX3BvaW50KCkgKyBzdGF0X3Ntb290aChtZXRob2Q9ImdsbSIsIGFlcyhncm91cCA9IDEpKSArCiAgICBsYWJzKHggPSAidCBzdGF0aXN0aWMgQk1JIiwgeSA9ICJ0IHN0YXRpc3RpYyB3ZWlnaHQgbG9zcyIpICsKICAgIHNjYWxlX2NvbG9yX21hbnVhbCh2YWx1ZXM9Yyhub25lID0gImJsYWNrIiwgQk1JID0gInJveWFsYmx1ZSIsIGB3ZWlnaHQgbG9zc2A9Im9yYW5nZSIsIGJvdGggPSAicmVkIikpCmdnc2F2ZSgiZmlndXJlcy9tZXRhYm9sb21lX3QucG5nIiwgZHBpPTMwMCwgd2lkdGg9NSwgaGVpZ2h0PTMpCmNvci50ZXN0KH4gdF9ibWkgKyB0X3N1YnNldCwgZGF0YT13aWRlKQpgYGAKCiMjIE92ZXJhbGwgZXhwbGFpbmVkIHZhcmlhbmNlCgpgYGB7cn0KbGlicmFyeSh2ZWdhbikKCm0gPC0gYXMob3R1X3RhYmxlKHBzKSwgIm1hdHJpeCIpCm1baXMubmEobSldIDwtIG1pbihhcy5udW1lcmljKG0pLCBuYS5ybT1UKQpwZXJtIDwtIGFkb25pcyhtIH4gYWdlICsgc2V4ICsgYm1pICsgc3Vic2V0LCBkYXRhPWFzKHNhbXBsZV9kYXRhKHBzKSwgImRhdGEuZnJhbWUiKSwgbWV0aG9kPSJldWNsaWRlYW4iKQpwZXJtCmBgYA==