library(arivale.data.interface)
list_snapshot_contents()
qs <- get_snapshot("assessments", clean=T)
diet_variables <- names(qs)[grepl("diet_freq", names(qs))]
lifestyle_variables <- c(
"assessment_lifestyle_cruciferous_vegetables_enum",
"assessment_lifestyle_fruits_enum",
"assessment_lifestyle_vegetables_enum",
"assessment_lifestyle_sugary_drinks_enum",
"assessment_lifestyle_water_enum",
"assessment_lifestyle_alcohol_drinks_a_day_enum",
"assessment_lifestyle_grains_enum"
)
variables <- c(diet_variables, lifestyle_variables)
qs <- qs[, c("public_client_id", variables), with=FALSE]
qs[, "complete" := apply(qs, 1, function(x) sum(!is.na(x))/length(x))]
qs <- qs[order(-complete)][!duplicated(public_client_id)]
scales <- apply(qs[, variables, with=F], 2, function(x) unique(x, na.rm=T) %>%
sort() %>% paste(collapse = ", ")) %>% data.table()
scales <- data.table(quantity=variables, scale=scales)
fwrite(scales, "data/diet_scales.csv")
All of those should be on a ordinal scale so we will conver them to the corresponding rank values.
library(stringr)
library(magrittr)
for (v in variables) {
qs[[v]] <- str_match(qs[[v]], "^\\((\\d)\\)")[, 2] %>% as.numeric()
}
qs
Now let’s merge in our selected cohort.
cohort <- rbind(
fread("no_weight_loss.csv", colClasses=c(public_client_id="character")),
fread("successful_weight_loss.csv", colClasses=c(public_client_id="character"))
)[since_baseline == 0]
cohort[, "subset" := "controls"]
cohort[weight_change_relative < 0, "subset" := "weight loss"]
cohort[, subset := factor(subset)]
data <- qs[cohort, on="public_client_id"]
Let’s visualize what we have.
library(pheatmap)
library(viridisLite)
df <- as.data.frame(data[, variables, with=F])
rownames(df) <- data$public_client_id
anns <- data.frame(BMI=data$bmi, subset=data$subset, row.names=data$public_client_id)
pheatmap(t(df), color=viridis(256), annotation_col = anns, show_colnames=FALSE)
# Save image
pheatmap(t(df), color=viridis(256), annotation_col = anns, show_colnames=FALSE,
filename="figures/diets.png", width=16, height=6, dpi=300)

Finally we can run the models for BMI.
models <- lapply(variables, function(v){
model <- reformulate(c("age", "sex", "bmi"), v) %>% glm(data=data)
attr(model, "feature") <- v
model
})
bmi_tests <- lapply(models, function(m) {
coefs <- summary(m)$coefficients
data.table(
feature=attr(m, "feature"),
coef=coefs[4, 1],
se=coefs[4, 2],
t=coefs[4, 3],
p=coefs[4, 4]
)
}) %>% rbindlist()
bmi_tests[, "padj" := p.adjust(p, method="fdr")]
bmi_tests[, "variable" := "bmi"]
bmi_tests[order(p)]
And for weight loss.
models <- lapply(variables, function(v){
model <- reformulate(c("age", "sex", "bmi", "subset"), v) %>%
glm(data=data)
attr(model, "feature") <- v
model
})
wl_tests <- lapply(models, function(m) {
coefs <- summary(m)$coefficients
data.table(
feature=attr(m, "feature"),
coef=coefs[5, 1],
se=coefs[5, 2],
t=coefs[5, 3],
p=coefs[5, 4]
)
}) %>% rbindlist()
wl_tests[, "padj" := p.adjust(p, method="fdr")]
wl_tests[, "variable" := "subset"]
wl_tests[order(p)]
tests <- rbind(bmi_tests, wl_tests)
library(ggplot2)
theme_minimal() %>% theme_set()
wide <- dcast(tests, feature ~ variable, value.var=c("t", "padj"), fill=0)
wide[, "sig" := "none"]
wide[padj_bmi < 0.05, "sig" := "BMI"]
wide[padj_subset < 0.05, "sig" := "weight loss"]
wide[padj_bmi < 0.05 & padj_subset < 0.05, "sig" := "both"]
wide[, "sig" := factor(sig, levels=c("none", "BMI", "weight loss", "both"))]
ggplot(wide, aes(x=t_bmi, y=t_subset, color=sig)) +
geom_vline(xintercept = 0, color="gray30", lty="dashed") +
geom_hline(yintercept = 0, color="gray30", lty="dashed") +
geom_point() + stat_smooth(method="glm", aes(group = 1)) +
labs(x = "t statistic BMI", y = "t statistic weight loss") +
scale_color_manual(values=c(none = "black", BMI = "royalblue", `weight loss`="orange", both = "red"))

ggsave("figures/diet_t.png", dpi=300, width=5, height=3)
cor.test(~ t_bmi + t_subset, data=wide)
Pearson's product-moment correlation
data: t_bmi and t_subset
t = -1.1438, df = 37, p-value = 0.26
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.4727561 0.1388057
sample estimates:
cor
-0.184805
Overall explained variance
library(vegan)
m <- as(data[, variables, with=F], "matrix")
m[is.na(m)] <- min(as.numeric(m), na.rm=T)
perm <- adonis(m ~ age + sex + bmi + subset, data=data, method="euclidean")
perm
Call:
adonis(formula = m ~ age + sex + bmi + subset, data = data, method = "euclidean")
Permutation: free
Number of permutations: 999
Terms added sequentially (first to last)
Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
age 1 324.9 324.89 1.96175 0.01883 0.124
sex 1 94.0 94.01 0.56765 0.00545 0.538
bmi 1 159.0 158.96 0.95982 0.00921 0.320
subset 1 118.8 118.76 0.71709 0.00688 0.447
Residuals 100 16561.3 165.61 0.95963
Total 104 17257.9 1.00000
LS0tCnRpdGxlOiAiRGlldCBjb3ZhcmlhdGVzIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7cn0KbGlicmFyeShhcml2YWxlLmRhdGEuaW50ZXJmYWNlKQoKbGlzdF9zbmFwc2hvdF9jb250ZW50cygpCnFzIDwtIGdldF9zbmFwc2hvdCgiYXNzZXNzbWVudHMiLCBjbGVhbj1UKQpkaWV0X3ZhcmlhYmxlcyA8LSBuYW1lcyhxcylbZ3JlcGwoImRpZXRfZnJlcSIsIG5hbWVzKHFzKSldCmxpZmVzdHlsZV92YXJpYWJsZXMgPC0gYygKICAgICJhc3Nlc3NtZW50X2xpZmVzdHlsZV9jcnVjaWZlcm91c192ZWdldGFibGVzX2VudW0iLAogICAgImFzc2Vzc21lbnRfbGlmZXN0eWxlX2ZydWl0c19lbnVtIiwgICAgICAgICAgICAgICAgICAgICAgCiAgICAiYXNzZXNzbWVudF9saWZlc3R5bGVfdmVnZXRhYmxlc19lbnVtIiwgICAgICAgICAgICAgICAgICAKICAgICJhc3Nlc3NtZW50X2xpZmVzdHlsZV9zdWdhcnlfZHJpbmtzX2VudW0iLCAgICAgICAgICAgICAgIAogICAgImFzc2Vzc21lbnRfbGlmZXN0eWxlX3dhdGVyX2VudW0iLCAgICAgICAgICAgICAgICAgICAgICAgCiAgICAiYXNzZXNzbWVudF9saWZlc3R5bGVfYWxjb2hvbF9kcmlua3NfYV9kYXlfZW51bSIsICAgICAgICAKICAgICJhc3Nlc3NtZW50X2xpZmVzdHlsZV9ncmFpbnNfZW51bSIKKQp2YXJpYWJsZXMgPC0gYyhkaWV0X3ZhcmlhYmxlcywgbGlmZXN0eWxlX3ZhcmlhYmxlcykKcXMgPC0gcXNbLCBjKCJwdWJsaWNfY2xpZW50X2lkIiwgdmFyaWFibGVzKSwgd2l0aD1GQUxTRV0KcXNbLCAiY29tcGxldGUiIDo9IGFwcGx5KHFzLCAxLCBmdW5jdGlvbih4KSBzdW0oIWlzLm5hKHgpKS9sZW5ndGgoeCkpXQpxcyA8LSBxc1tvcmRlcigtY29tcGxldGUpXVshZHVwbGljYXRlZChwdWJsaWNfY2xpZW50X2lkKV0KCnNjYWxlcyA8LSBhcHBseShxc1ssIHZhcmlhYmxlcywgd2l0aD1GXSwgMiwgZnVuY3Rpb24oeCkgdW5pcXVlKHgsIG5hLnJtPVQpICU+JSAKICAgICAgICAgIHNvcnQoKSAlPiUgcGFzdGUoY29sbGFwc2UgPSAiLCAiKSkgJT4lIGRhdGEudGFibGUoKQpzY2FsZXMgPC0gZGF0YS50YWJsZShxdWFudGl0eT12YXJpYWJsZXMsIHNjYWxlPXNjYWxlcykKZndyaXRlKHNjYWxlcywgImRhdGEvZGlldF9zY2FsZXMuY3N2IikKYGBgCgpBbGwgb2YgdGhvc2Ugc2hvdWxkIGJlIG9uIGEgb3JkaW5hbCBzY2FsZSBzbyB3ZSB3aWxsIGNvbnZlciB0aGVtIHRvIHRoZSBjb3JyZXNwb25kaW5nIHJhbmsgdmFsdWVzLgoKYGBge3J9CmxpYnJhcnkoc3RyaW5ncikKbGlicmFyeShtYWdyaXR0cikKCmZvciAodiBpbiB2YXJpYWJsZXMpIHsKICAgIHFzW1t2XV0gPC0gc3RyX21hdGNoKHFzW1t2XV0sICJeXFwoKFxcZClcXCkiKVssIDJdICU+JSBhcy5udW1lcmljKCkKfQoKcXMKYGBgCk5vdyBsZXQncyBtZXJnZSBpbiBvdXIgc2VsZWN0ZWQgY29ob3J0LgoKYGBge3J9CmNvaG9ydCA8LSByYmluZCgKICAgIGZyZWFkKCJub193ZWlnaHRfbG9zcy5jc3YiLCBjb2xDbGFzc2VzPWMocHVibGljX2NsaWVudF9pZD0iY2hhcmFjdGVyIikpLAogICAgZnJlYWQoInN1Y2Nlc3NmdWxfd2VpZ2h0X2xvc3MuY3N2IiwgY29sQ2xhc3Nlcz1jKHB1YmxpY19jbGllbnRfaWQ9ImNoYXJhY3RlciIpKQopW3NpbmNlX2Jhc2VsaW5lID09IDBdCmNvaG9ydFssICJzdWJzZXQiIDo9ICJjb250cm9scyJdCmNvaG9ydFt3ZWlnaHRfY2hhbmdlX3JlbGF0aXZlIDwgMCwgInN1YnNldCIgOj0gIndlaWdodCBsb3NzIl0KY29ob3J0Wywgc3Vic2V0IDo9IGZhY3RvcihzdWJzZXQpXQoKZGF0YSA8LSBxc1tjb2hvcnQsIG9uPSJwdWJsaWNfY2xpZW50X2lkIl0KYGBgCgpMZXQncyB2aXN1YWxpemUgd2hhdCB3ZSBoYXZlLgoKYGBge3IsIGZpZy53aWR0aD0xNiwgZmlnLmhlaWdodD02fQpsaWJyYXJ5KHBoZWF0bWFwKQpsaWJyYXJ5KHZpcmlkaXNMaXRlKQoKZGYgPC0gYXMuZGF0YS5mcmFtZShkYXRhWywgdmFyaWFibGVzLCB3aXRoPUZdKQpyb3duYW1lcyhkZikgPC0gZGF0YSRwdWJsaWNfY2xpZW50X2lkCmFubnMgPC0gZGF0YS5mcmFtZShCTUk9ZGF0YSRibWksIHN1YnNldD1kYXRhJHN1YnNldCwgcm93Lm5hbWVzPWRhdGEkcHVibGljX2NsaWVudF9pZCkKcGhlYXRtYXAodChkZiksIGNvbG9yPXZpcmlkaXMoMjU2KSwgYW5ub3RhdGlvbl9jb2wgPSBhbm5zLCBzaG93X2NvbG5hbWVzPUZBTFNFKQoKIyBTYXZlIGltYWdlCnBoZWF0bWFwKHQoZGYpLCBjb2xvcj12aXJpZGlzKDI1NiksIGFubm90YXRpb25fY29sID0gYW5ucywgc2hvd19jb2xuYW1lcz1GQUxTRSwKICAgICAgICAgZmlsZW5hbWU9ImZpZ3VyZXMvZGlldHMucG5nIiwgd2lkdGg9MTYsIGhlaWdodD02LCBkcGk9MzAwKQpgYGAKCkZpbmFsbHkgd2UgY2FuIHJ1biB0aGUgbW9kZWxzIGZvciBCTUkuCgpgYGB7cn0KbW9kZWxzIDwtIGxhcHBseSh2YXJpYWJsZXMsIGZ1bmN0aW9uKHYpewogICAgbW9kZWwgPC0gcmVmb3JtdWxhdGUoYygiYWdlIiwgInNleCIsICJibWkiKSwgdikgJT4lIGdsbShkYXRhPWRhdGEpCiAgICBhdHRyKG1vZGVsLCAiZmVhdHVyZSIpIDwtIHYKICAgIG1vZGVsCn0pCgpibWlfdGVzdHMgPC0gbGFwcGx5KG1vZGVscywgZnVuY3Rpb24obSkgewogICAgY29lZnMgPC0gc3VtbWFyeShtKSRjb2VmZmljaWVudHMKICAgIGRhdGEudGFibGUoCiAgICAgICAgZmVhdHVyZT1hdHRyKG0sICJmZWF0dXJlIiksIAogICAgICAgIGNvZWY9Y29lZnNbNCwgMV0sIAogICAgICAgIHNlPWNvZWZzWzQsIDJdLCAKICAgICAgICB0PWNvZWZzWzQsIDNdLCAKICAgICAgICBwPWNvZWZzWzQsIDRdCiAgICApCn0pICU+JSByYmluZGxpc3QoKQpibWlfdGVzdHNbLCAicGFkaiIgOj0gcC5hZGp1c3QocCwgbWV0aG9kPSJmZHIiKV0KYm1pX3Rlc3RzWywgInZhcmlhYmxlIiA6PSAiYm1pIl0KYm1pX3Rlc3RzW29yZGVyKHApXQpgYGAKCkFuZCBmb3Igd2VpZ2h0IGxvc3MuCgpgYGB7cn0KbW9kZWxzIDwtIGxhcHBseSh2YXJpYWJsZXMsIGZ1bmN0aW9uKHYpewogICAgbW9kZWwgPC0gcmVmb3JtdWxhdGUoYygiYWdlIiwgInNleCIsICJibWkiLCAic3Vic2V0IiksIHYpICU+JSAKICAgICAgICAgICAgIGdsbShkYXRhPWRhdGEpCiAgICBhdHRyKG1vZGVsLCAiZmVhdHVyZSIpIDwtIHYKICAgIG1vZGVsCn0pCgp3bF90ZXN0cyA8LSBsYXBwbHkobW9kZWxzLCBmdW5jdGlvbihtKSB7CiAgICBjb2VmcyA8LSBzdW1tYXJ5KG0pJGNvZWZmaWNpZW50cwogICAgZGF0YS50YWJsZSgKICAgICAgICBmZWF0dXJlPWF0dHIobSwgImZlYXR1cmUiKSwgCiAgICAgICAgY29lZj1jb2Vmc1s1LCAxXSwgCiAgICAgICAgc2U9Y29lZnNbNSwgMl0sIAogICAgICAgIHQ9Y29lZnNbNSwgM10sIAogICAgICAgIHA9Y29lZnNbNSwgNF0KICAgICkKfSkgJT4lIHJiaW5kbGlzdCgpCndsX3Rlc3RzWywgInBhZGoiIDo9IHAuYWRqdXN0KHAsIG1ldGhvZD0iZmRyIildCndsX3Rlc3RzWywgInZhcmlhYmxlIiA6PSAic3Vic2V0Il0Kd2xfdGVzdHNbb3JkZXIocCldCmBgYAoKYGBge3J9CnRlc3RzIDwtIHJiaW5kKGJtaV90ZXN0cywgd2xfdGVzdHMpCmZ3cml0ZSh0ZXN0c1tvcmRlcihwYWRqKV0sICJkYXRhL3Rlc3RzX2RpZXQuY3N2IikKYGBgCgpgYGB7ciwgZmlnLndpZHRoPTUsIGZpZy5oZWlnaHQ9M30KbGlicmFyeShnZ3Bsb3QyKQp0aGVtZV9taW5pbWFsKCkgJT4lIHRoZW1lX3NldCgpCgp3aWRlIDwtIGRjYXN0KHRlc3RzLCBmZWF0dXJlIH4gdmFyaWFibGUsIHZhbHVlLnZhcj1jKCJ0IiwgInBhZGoiKSwgZmlsbD0wKQp3aWRlWywgInNpZyIgOj0gIm5vbmUiXQp3aWRlW3BhZGpfYm1pIDwgMC4wNSwgInNpZyIgOj0gIkJNSSJdCndpZGVbcGFkal9zdWJzZXQgPCAwLjA1LCAic2lnIiA6PSAid2VpZ2h0IGxvc3MiXQp3aWRlW3BhZGpfYm1pIDwgMC4wNSAmIHBhZGpfc3Vic2V0IDwgMC4wNSwgInNpZyIgOj0gImJvdGgiXQp3aWRlWywgInNpZyIgOj0gZmFjdG9yKHNpZywgbGV2ZWxzPWMoIm5vbmUiLCAiQk1JIiwgIndlaWdodCBsb3NzIiwgImJvdGgiKSldCmdncGxvdCh3aWRlLCBhZXMoeD10X2JtaSwgeT10X3N1YnNldCwgY29sb3I9c2lnKSkgKwogICAgZ2VvbV92bGluZSh4aW50ZXJjZXB0ID0gMCwgY29sb3I9ImdyYXkzMCIsIGx0eT0iZGFzaGVkIikgKwogICAgZ2VvbV9obGluZSh5aW50ZXJjZXB0ID0gMCwgY29sb3I9ImdyYXkzMCIsIGx0eT0iZGFzaGVkIikgKwogICAgZ2VvbV9wb2ludCgpICsgc3RhdF9zbW9vdGgobWV0aG9kPSJnbG0iLCBhZXMoZ3JvdXAgPSAxKSkgKwogICAgbGFicyh4ID0gInQgc3RhdGlzdGljIEJNSSIsIHkgPSAidCBzdGF0aXN0aWMgd2VpZ2h0IGxvc3MiKSArCiAgICBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzPWMobm9uZSA9ICJibGFjayIsIEJNSSA9ICJyb3lhbGJsdWUiLCBgd2VpZ2h0IGxvc3NgPSJvcmFuZ2UiLCBib3RoID0gInJlZCIpKQpnZ3NhdmUoImZpZ3VyZXMvZGlldF90LnBuZyIsIGRwaT0zMDAsIHdpZHRoPTUsIGhlaWdodD0zKQpjb3IudGVzdCh+IHRfYm1pICsgdF9zdWJzZXQsIGRhdGE9d2lkZSkKYGBgCgojIyBPdmVyYWxsIGV4cGxhaW5lZCB2YXJpYW5jZQoKYGBge3J9CmxpYnJhcnkodmVnYW4pCgptIDwtIGFzKGRhdGFbLCB2YXJpYWJsZXMsIHdpdGg9Rl0sICJtYXRyaXgiKQptW2lzLm5hKG0pXSA8LSBtaW4oYXMubnVtZXJpYyhtKSwgbmEucm09VCkKcGVybSA8LSBhZG9uaXMobSB+IGFnZSArIHNleCArIGJtaSArIHN1YnNldCwgZGF0YT1kYXRhLCBtZXRob2Q9ImV1Y2xpZGVhbiIpCnBlcm0KYGBg