library(data.table)
library(magrittr)
library(ggplot2)
library(SummarizedExperiment)
library(readxl)
theme_set(theme_minimal())
We will start by reading the data.
gmap <- c(control = "no weight loss", `weight loss` = "weight loss")
srm <- read_excel("data/innovator_SRM_02122020.xlsx", sheet = "data") %>% setDT()
prots <- read_excel("data/innovator_SRM_02122020.xlsx", sheet = "proteins") %>% setDT()
srm <- melt(srm, id.vars = c("replicate_id", "public_client_id", "group"),
variable.name = "id", value.name = "abundance")
srm <- prots[srm, on = "id"]
srm[, c("group", "state") := tstrsplit(group, "_")]
srm[, group := gmap[group]]
srm
Let;s join that with our manifest.
manifest <- fread("data/manifest.csv")
classes <- c(microbiome_id = "character")
cohort <- list(
no_loss = fread("no_weight_loss.csv", colClasses = classes)[, "group" := "no weight loss"],
loss = fread("successful_weight_loss.csv", colClasses = classes)[, "group" := "weight loss"]
) %>% rbindlist()
cohort[, "state" := c("before", "after")[order(days_in_program)], by = "public_client_id"]
cohort
And let’s combine the two. We will only keep the first transition for each protein to have the same n for each probe.
merged <- cohort[srm, on = c("public_client_id", "group", "state")]
norm <- copy(merged)
norm[, "log_abundance" := log2(abundance)]
norm[, "log_abundance_norm" := log_abundance - mean(log_abundance), by = c("public_client_id", "state")]
ggplot(norm, aes(x=factor(paste(public_client_id, state)),
y=log_abundance, color=group)) +
geom_jitter(width=0.2)

Now we can start to model the associations:
model_stats <- function(dt, delta = FALSE) {
mod <- glm(log_abundance ~ group + bmi + age, data=dt)
if (delta) {
mod <- glm(delta ~ group + bmi + age, data=dt)
}
coef <- coefficients(mod)[2]
pval <- summary(mod)$coefficients[2, 4]
pval_bmi <- summary(mod)$coefficients[3, 4]
return(data.table(
id = dt$id[1],
# gene = dt[1, `gene name`],
coef_name = names(coefficients(mod))[2],
coef = coef,
pval = pval,
coef_bmi = coefficients(mod)[3],
pval_bmi = pval_bmi
))
}
stats <- norm[state == "after", model_stats(.SD), by = "gene name"]
stats[, padj := p.adjust(pval, method = "fdr")]
stats[, padj_bmi := p.adjust(pval_bmi, method = "fdr")]
stats[order(padj)]
ggplot(norm[state == "after"], aes(x=group, y=log_abundance)) +
geom_jitter(width=0.2, alpha = 0.5) +
facet_wrap(~ `gene name`, scale="free_y") +
labs(x = "", y="Δprotein [log2(abundance)/month]") +
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))

And we can look at the deltas:
norm[, delta := (log_abundance[state == "after"] - log_abundance[state == "before"]) / span_days * 30.5, by = c("public_client_id", "id")]
stats <- norm[state == "after", model_stats(.SD, delta=T), by = "gene name"]
stats[, padj := p.adjust(pval, method = "fdr")]
stats[, padj_bmi := p.adjust(pval_bmi, method = "fdr")]
stats[order(pval)]
sig <- stats[padj < 0.1, `gene name`]
npep <- merged[, uniqueN(`peptide seq`), by="gene name"]
labels <- npep[, paste0(`gene name`, " [", V1, "]")]
names(labels) <- npep[, `gene name`]
ggplot(norm[state == "before" & `gene name` %in% sig], aes(x=group, y=delta)) +
geom_hline(yintercept = 0, lty = "dashed", color = "gray20") +
geom_jitter(width=0.2) +
facet_wrap(~ `gene name`, scale="free_y", nrow=1, labeller=as_labeller(labels)) +
stat_summary(fun.y = median, geom = "point", size = 2, stroke=1, color = "blue", fill="white", shape = 23) +
labs(x = "", y="Δprotein [log2 fold-change/month]") +
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))
`fun.y` is deprecated. Use `fun` instead.

ggsave("figures/deltas.png", dpi=300, width=12, height=3.5)
timed <- copy(norm)[, state := factor(state, levels=c("before", "after"))]
timed[state == "before", time := 0]
timed[state == "after", time := span_days]
means <- timed[`gene name` == "ADIPOQ", .(log_abundance = mean(log_abundance)), by = c("state", "group", "id", "gene name")]
ggplot(timed[`gene name` == "ADIPOQ"],
aes(x=state, y=log_abundance, color = group)) +
geom_point(alpha=0.25, size=0.5) + geom_line(aes(group = interaction(public_client_id, id)), alpha=0.25) +
geom_point(data=means) + geom_line(aes(group = id), data = means, size=1) +
guides(color = FALSE) + labs(x="", y="abundance [log-scale]") +
facet_grid(group ~ `gene name`)

ggsave("figures/slopes.png", dpi=300, width=3, height=3.5)
LS0tCnRpdGxlOiAiU1JNIGRhdGEiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCmBgYHtyfQpsaWJyYXJ5KGRhdGEudGFibGUpCmxpYnJhcnkobWFncml0dHIpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShTdW1tYXJpemVkRXhwZXJpbWVudCkKbGlicmFyeShyZWFkeGwpCnRoZW1lX3NldCh0aGVtZV9taW5pbWFsKCkpCmBgYAoKV2Ugd2lsbCBzdGFydCBieSByZWFkaW5nIHRoZSBkYXRhLgoKYGBge3J9CmdtYXAgPC0gYyhjb250cm9sID0gIm5vIHdlaWdodCBsb3NzIiwgYHdlaWdodCBsb3NzYCA9ICJ3ZWlnaHQgbG9zcyIpCgpzcm0gPC0gcmVhZF9leGNlbCgiZGF0YS9pbm5vdmF0b3JfU1JNXzAyMTIyMDIwLnhsc3giLCBzaGVldCA9ICJkYXRhIikgJT4lIHNldERUKCkKcHJvdHMgPC0gcmVhZF9leGNlbCgiZGF0YS9pbm5vdmF0b3JfU1JNXzAyMTIyMDIwLnhsc3giLCBzaGVldCA9ICJwcm90ZWlucyIpICU+JSBzZXREVCgpCgpzcm0gPC0gbWVsdChzcm0sIGlkLnZhcnMgPSBjKCJyZXBsaWNhdGVfaWQiLCAicHVibGljX2NsaWVudF9pZCIsICJncm91cCIpLCAKICAgICAgICAgICAgdmFyaWFibGUubmFtZSA9ICJpZCIsIHZhbHVlLm5hbWUgPSAiYWJ1bmRhbmNlIikKc3JtIDwtIHByb3RzW3NybSwgb24gPSAiaWQiXQpzcm1bLCBjKCJncm91cCIsICJzdGF0ZSIpIDo9IHRzdHJzcGxpdChncm91cCwgIl8iKV0Kc3JtWywgZ3JvdXAgOj0gZ21hcFtncm91cF1dCnNybQpgYGAKCkxldDtzIGpvaW4gdGhhdCB3aXRoIG91ciBtYW5pZmVzdC4KCmBgYHtyfQpjbGFzc2VzIDwtIGMobWljcm9iaW9tZV9pZCA9ICJjaGFyYWN0ZXIiKQpjb2hvcnQgPC0gbGlzdCgKICAgIG5vX2xvc3MgPSBmcmVhZCgibm9fd2VpZ2h0X2xvc3MuY3N2IiwgY29sQ2xhc3NlcyA9IGNsYXNzZXMpWywgImdyb3VwIiA6PSAibm8gd2VpZ2h0IGxvc3MiXSwKICAgIGxvc3MgPSBmcmVhZCgic3VjY2Vzc2Z1bF93ZWlnaHRfbG9zcy5jc3YiLCBjb2xDbGFzc2VzID0gY2xhc3NlcylbLCAiZ3JvdXAiIDo9ICJ3ZWlnaHQgbG9zcyJdCikgJT4lIHJiaW5kbGlzdCgpCmNvaG9ydFssICJzdGF0ZSIgOj0gYygiYmVmb3JlIiwgImFmdGVyIilbb3JkZXIoZGF5c19pbl9wcm9ncmFtKV0sIGJ5ID0gInB1YmxpY19jbGllbnRfaWQiXQpjb2hvcnQKYGBgCgpBbmQgbGV0J3MgY29tYmluZSB0aGUgdHdvLiBXZSB3aWxsIG9ubHkga2VlcCB0aGUgZmlyc3QgdHJhbnNpdGlvbiBmb3IgZWFjaCBwcm90ZWluIHRvIGhhdmUgdGhlIHNhbWUgbiBmb3IgZWFjaCBwcm9iZS4KCmBgYHtyfQptZXJnZWQgPC0gY29ob3J0W3NybSwgb24gPSBjKCJwdWJsaWNfY2xpZW50X2lkIiwgImdyb3VwIiwgInN0YXRlIildCgpub3JtIDwtIGNvcHkobWVyZ2VkKQpub3JtWywgImxvZ19hYnVuZGFuY2UiIDo9IGxvZzIoYWJ1bmRhbmNlKV0Kbm9ybVssICJsb2dfYWJ1bmRhbmNlX25vcm0iIDo9IGxvZ19hYnVuZGFuY2UgLSBtZWFuKGxvZ19hYnVuZGFuY2UpLCBieSA9IGMoInB1YmxpY19jbGllbnRfaWQiLCAic3RhdGUiKV0KZ2dwbG90KG5vcm0sIGFlcyh4PWZhY3RvcihwYXN0ZShwdWJsaWNfY2xpZW50X2lkLCBzdGF0ZSkpLCAKICAgICAgICAgICAgICAgICB5PWxvZ19hYnVuZGFuY2UsIGNvbG9yPWdyb3VwKSkgKyAKICAgIGdlb21faml0dGVyKHdpZHRoPTAuMikKYGBgCgpOb3cgd2UgY2FuIHN0YXJ0IHRvIG1vZGVsIHRoZSBhc3NvY2lhdGlvbnM6CgpgYGB7ciwgZmlnLndpZHRoID0gOCwgZmlnLmhlaWdodCA9IDh9Cm1vZGVsX3N0YXRzIDwtIGZ1bmN0aW9uKGR0LCBkZWx0YSA9IEZBTFNFKSB7CiAgICBtb2QgPC0gZ2xtKGxvZ19hYnVuZGFuY2UgfiBncm91cCArIGJtaSArIGFnZSwgZGF0YT1kdCkKICAgIGlmIChkZWx0YSkgewogICAgICAgIG1vZCA8LSBnbG0oZGVsdGEgfiBncm91cCArIGJtaSArIGFnZSwgZGF0YT1kdCkKICAgIH0KICAgIGNvZWYgPC0gY29lZmZpY2llbnRzKG1vZClbMl0KICAgIHB2YWwgPC0gc3VtbWFyeShtb2QpJGNvZWZmaWNpZW50c1syLCA0XQogICAgcHZhbF9ibWkgPC0gc3VtbWFyeShtb2QpJGNvZWZmaWNpZW50c1szLCA0XQogICAgcmV0dXJuKGRhdGEudGFibGUoCiAgICAgICAgaWQgPSBkdCRpZFsxXSwKICAgICAgICAjIGdlbmUgPSBkdFsxLCBgZ2VuZSBuYW1lYF0sCiAgICAgICAgY29lZl9uYW1lID0gbmFtZXMoY29lZmZpY2llbnRzKG1vZCkpWzJdLAogICAgICAgIGNvZWYgPSBjb2VmLAogICAgICAgIHB2YWwgPSBwdmFsLAogICAgICAgIGNvZWZfYm1pID0gY29lZmZpY2llbnRzKG1vZClbM10sCiAgICAgICAgcHZhbF9ibWkgPSBwdmFsX2JtaQogICAgKSkKfQoKc3RhdHMgPC0gbm9ybVtzdGF0ZSA9PSAiYWZ0ZXIiLCBtb2RlbF9zdGF0cyguU0QpLCBieSA9ICJnZW5lIG5hbWUiXQpzdGF0c1ssIHBhZGogOj0gcC5hZGp1c3QocHZhbCwgbWV0aG9kID0gImZkciIpXQpzdGF0c1ssIHBhZGpfYm1pIDo9IHAuYWRqdXN0KHB2YWxfYm1pLCBtZXRob2QgPSAiZmRyIildCnN0YXRzW29yZGVyKHBhZGopXQoKZ2dwbG90KG5vcm1bc3RhdGUgPT0gImFmdGVyIl0sIGFlcyh4PWdyb3VwLCB5PWxvZ19hYnVuZGFuY2UpKSArIAogICAgZ2VvbV9qaXR0ZXIod2lkdGg9MC4yLCBhbHBoYSA9IDAuNSkgKyAKICAgIGZhY2V0X3dyYXAofiBgZ2VuZSBuYW1lYCwgc2NhbGU9ImZyZWVfeSIpICsKICAgIGxhYnMoeCA9ICIiLCB5PSLOlHByb3RlaW4gW2xvZzIoYWJ1bmRhbmNlKS9tb250aF0iKSArCiAgICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDMwLCB2anVzdCA9IDEsIGhqdXN0PTEpKQpgYGAKCkFuZCB3ZSBjYW4gbG9vayBhdCB0aGUgZGVsdGFzOgoKYGBge3IsIGZpZy53aWR0aCA9IDEyLCBmaWcuaGVpZ2h0ID0gMy41fQpub3JtWywgZGVsdGEgOj0gKGxvZ19hYnVuZGFuY2Vbc3RhdGUgPT0gImFmdGVyIl0gLSBsb2dfYWJ1bmRhbmNlW3N0YXRlID09ICJiZWZvcmUiXSkgLyBzcGFuX2RheXMgKiAzMC41LCBieSA9IGMoInB1YmxpY19jbGllbnRfaWQiLCAiaWQiKV0KCnN0YXRzIDwtIG5vcm1bc3RhdGUgPT0gImFmdGVyIiwgbW9kZWxfc3RhdHMoLlNELCBkZWx0YT1UKSwgYnkgPSAiZ2VuZSBuYW1lIl0Kc3RhdHNbLCBwYWRqIDo9IHAuYWRqdXN0KHB2YWwsIG1ldGhvZCA9ICJmZHIiKV0Kc3RhdHNbLCBwYWRqX2JtaSA6PSBwLmFkanVzdChwdmFsX2JtaSwgbWV0aG9kID0gImZkciIpXQpzdGF0c1tvcmRlcihwdmFsKV0Kc2lnIDwtIHN0YXRzW3BhZGogPCAwLjEsIGBnZW5lIG5hbWVgXQoKbnBlcCA8LSBtZXJnZWRbLCB1bmlxdWVOKGBwZXB0aWRlIHNlcWApLCBieT0iZ2VuZSBuYW1lIl0KbGFiZWxzIDwtIG5wZXBbLCBwYXN0ZTAoYGdlbmUgbmFtZWAsICIgWyIsIFYxLCAiXSIpXQpuYW1lcyhsYWJlbHMpIDwtIG5wZXBbLCBgZ2VuZSBuYW1lYF0KCmdncGxvdChub3JtW3N0YXRlID09ICJiZWZvcmUiICYgYGdlbmUgbmFtZWAgJWluJSBzaWddLCBhZXMoeD1ncm91cCwgeT1kZWx0YSkpICsgCiAgICBnZW9tX2hsaW5lKHlpbnRlcmNlcHQgPSAwLCBsdHkgPSAiZGFzaGVkIiwgY29sb3IgPSAiZ3JheTIwIikgKwogICAgZ2VvbV9qaXR0ZXIod2lkdGg9MC4yKSArIAogICAgZmFjZXRfd3JhcCh+IGBnZW5lIG5hbWVgLCBzY2FsZT0iZnJlZV95IiwgbnJvdz0xLCBsYWJlbGxlcj1hc19sYWJlbGxlcihsYWJlbHMpKSArCiAgICBzdGF0X3N1bW1hcnkoZnVuLnkgPSBtZWRpYW4sIGdlb20gPSAicG9pbnQiLCBzaXplID0gMiwgc3Ryb2tlPTEsIGNvbG9yID0gImJsdWUiLCBmaWxsPSJ3aGl0ZSIsIHNoYXBlID0gMjMpICsKICAgIGxhYnMoeCA9ICIiLCB5PSLOlHByb3RlaW4gW2xvZzIgZm9sZC1jaGFuZ2UvbW9udGhdIikgKwogICAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSAzMCwgdmp1c3QgPSAxLCBoanVzdD0xKSkKZ2dzYXZlKCJmaWd1cmVzL2RlbHRhcy5wbmciLCBkcGk9MzAwLCB3aWR0aD0xMiwgaGVpZ2h0PTMuNSkKYGBgCgpgYGB7ciwgZmlnLndpZHRoPTMsIGZpZy5oZWlnaHQ9My41fQp0aW1lZCA8LSBjb3B5KG5vcm0pWywgc3RhdGUgOj0gZmFjdG9yKHN0YXRlLCBsZXZlbHM9YygiYmVmb3JlIiwgImFmdGVyIikpXQp0aW1lZFtzdGF0ZSA9PSAiYmVmb3JlIiwgdGltZSA6PSAwXQp0aW1lZFtzdGF0ZSA9PSAiYWZ0ZXIiLCB0aW1lIDo9IHNwYW5fZGF5c10KbWVhbnMgPC0gdGltZWRbYGdlbmUgbmFtZWAgPT0gIkFESVBPUSIsIC4obG9nX2FidW5kYW5jZSA9IG1lYW4obG9nX2FidW5kYW5jZSkpLCBieSA9IGMoInN0YXRlIiwgImdyb3VwIiwgImlkIiwgImdlbmUgbmFtZSIpXQpnZ3Bsb3QodGltZWRbYGdlbmUgbmFtZWAgPT0gIkFESVBPUSJdLCAKICAgICAgIGFlcyh4PXN0YXRlLCB5PWxvZ19hYnVuZGFuY2UsIGNvbG9yID0gZ3JvdXApKSArIAogICAgZ2VvbV9wb2ludChhbHBoYT0wLjI1LCBzaXplPTAuNSkgKyBnZW9tX2xpbmUoYWVzKGdyb3VwID0gaW50ZXJhY3Rpb24ocHVibGljX2NsaWVudF9pZCwgaWQpKSwgYWxwaGE9MC4yNSkgKwogICAgZ2VvbV9wb2ludChkYXRhPW1lYW5zKSArIGdlb21fbGluZShhZXMoZ3JvdXAgPSBpZCksIGRhdGEgPSBtZWFucywgc2l6ZT0xKSArCiAgICBndWlkZXMoY29sb3IgPSBGQUxTRSkgKyBsYWJzKHg9IiIsIHk9ImFidW5kYW5jZSBbbG9nLXNjYWxlXSIpICsKICAgIGZhY2V0X2dyaWQoZ3JvdXAgfiBgZ2VuZSBuYW1lYCkKZ2dzYXZlKCJmaWd1cmVzL3Nsb3Blcy5wbmciLCBkcGk9MzAwLCB3aWR0aD0zLCBoZWlnaHQ9My41KQpgYGA=