library(arivale.data.interface)

list_snapshot_contents()
qs <- get_snapshot("assessments", clean=T)
diet_variables <- names(qs)[grepl("diet_freq", names(qs))]
lifestyle_variables <- c(
    "assessment_lifestyle_cruciferous_vegetables_enum",
    "assessment_lifestyle_fruits_enum",                      
    "assessment_lifestyle_vegetables_enum",                  
    "assessment_lifestyle_sugary_drinks_enum",               
    "assessment_lifestyle_water_enum",                       
    "assessment_lifestyle_alcohol_drinks_a_day_enum",        
    "assessment_lifestyle_grains_enum"
)
variables <- c(diet_variables, lifestyle_variables)
qs <- qs[, c("public_client_id", variables), with=FALSE]
qs[, "complete" := apply(qs, 1, function(x) sum(!is.na(x))/length(x))]
qs <- qs[order(-complete)][!duplicated(public_client_id)]

scales <- apply(qs[, variables, with=F], 2, function(x) unique(x, na.rm=T) %>% 
          sort() %>% paste(collapse = ", ")) %>% data.table()
scales <- data.table(quantity=variables, scale=scales)
fwrite(scales, "data/diet_scales.csv")

All of those should be on a ordinal scale so we will conver them to the corresponding rank values.

library(stringr)
library(magrittr)

for (v in variables) {
    qs[[v]] <- str_match(qs[[v]], "^\\((\\d)\\)")[, 2] %>% as.numeric()
}

qs

Now let’s merge in our selected cohort.

cohort <- rbind(
    fread("no_weight_loss.csv", colClasses=c(public_client_id="character")),
    fread("successful_weight_loss.csv", colClasses=c(public_client_id="character"))
)[since_baseline == 0]
cohort[, "subset" := "controls"]
cohort[weight_change_relative < 0, "subset" := "weight loss"]
cohort[, subset := factor(subset)]

data <- qs[cohort, on="public_client_id"]

Let’s visualize what we have.

library(pheatmap)
library(viridisLite)

df <- as.data.frame(data[, variables, with=F])
rownames(df) <- data$public_client_id
anns <- data.frame(BMI=data$bmi, subset=data$subset, row.names=data$public_client_id)
pheatmap(t(df), color=viridis(256), annotation_col = anns, show_colnames=FALSE)

# Save image
pheatmap(t(df), color=viridis(256), annotation_col = anns, show_colnames=FALSE,
         filename="figures/diets.png", width=16, height=6, dpi=300)

Finally we can run the models for BMI.

models <- lapply(variables, function(v){
    model <- reformulate(c("age", "sex", "bmi"), v) %>% glm(data=data)
    attr(model, "feature") <- v
    model
})

bmi_tests <- lapply(models, function(m) {
    coefs <- summary(m)$coefficients
    data.table(
        feature=attr(m, "feature"), 
        coef=coefs[4, 1], 
        se=coefs[4, 2], 
        t=coefs[4, 3], 
        p=coefs[4, 4]
    )
}) %>% rbindlist()
bmi_tests[, "padj" := p.adjust(p, method="fdr")]
bmi_tests[, "variable" := "bmi"]
bmi_tests[order(p)]

And for weight loss.

models <- lapply(variables, function(v){
    model <- reformulate(c("age", "sex", "bmi", "subset"), v) %>% 
             glm(data=data)
    attr(model, "feature") <- v
    model
})

wl_tests <- lapply(models, function(m) {
    coefs <- summary(m)$coefficients
    data.table(
        feature=attr(m, "feature"), 
        coef=coefs[5, 1], 
        se=coefs[5, 2], 
        t=coefs[5, 3], 
        p=coefs[5, 4]
    )
}) %>% rbindlist()
wl_tests[, "padj" := p.adjust(p, method="fdr")]
wl_tests[, "variable" := "subset"]
wl_tests[order(p)]
tests <- rbind(bmi_tests, wl_tests)
library(ggplot2)
theme_minimal() %>% theme_set()

wide <- dcast(tests, feature ~ variable, value.var=c("t", "padj"), fill=0)
wide[, "sig" := "none"]
wide[padj_bmi < 0.05, "sig" := "BMI"]
wide[padj_subset < 0.05, "sig" := "weight loss"]
wide[padj_bmi < 0.05 & padj_subset < 0.05, "sig" := "both"]
wide[, "sig" := factor(sig, levels=c("none", "BMI", "weight loss", "both"))]
ggplot(wide, aes(x=t_bmi, y=t_subset, color=sig)) +
    geom_vline(xintercept = 0, color="gray30", lty="dashed") +
    geom_hline(yintercept = 0, color="gray30", lty="dashed") +
    geom_point() + stat_smooth(method="glm", aes(group = 1)) +
    labs(x = "t statistic BMI", y = "t statistic weight loss") +
    scale_color_manual(values=c(none = "black", BMI = "royalblue", `weight loss`="orange", both = "red"))

ggsave("figures/diet_t.png", dpi=300, width=5, height=3)
cor.test(~ t_bmi + t_subset, data=wide)

    Pearson's product-moment correlation

data:  t_bmi and t_subset
t = -1.1438, df = 37, p-value = 0.26
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.4727561  0.1388057
sample estimates:
      cor 
-0.184805 

Overall explained variance

library(vegan)

m <- as(data[, variables, with=F], "matrix")
m[is.na(m)] <- min(as.numeric(m), na.rm=T)
perm <- adonis(m ~ age + sex + bmi + subset, data=data, method="euclidean")
perm

Call:
adonis(formula = m ~ age + sex + bmi + subset, data = data, method = "euclidean") 

Permutation: free
Number of permutations: 999

Terms added sequentially (first to last)

           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
age         1     324.9  324.89 1.96175 0.01883  0.124
sex         1      94.0   94.01 0.56765 0.00545  0.538
bmi         1     159.0  158.96 0.95982 0.00921  0.320
subset      1     118.8  118.76 0.71709 0.00688  0.447
Residuals 100   16561.3  165.61         0.95963       
Total     104   17257.9                 1.00000       
LS0tCnRpdGxlOiAiRGlldCBjb3ZhcmlhdGVzIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7cn0KbGlicmFyeShhcml2YWxlLmRhdGEuaW50ZXJmYWNlKQoKbGlzdF9zbmFwc2hvdF9jb250ZW50cygpCnFzIDwtIGdldF9zbmFwc2hvdCgiYXNzZXNzbWVudHMiLCBjbGVhbj1UKQpkaWV0X3ZhcmlhYmxlcyA8LSBuYW1lcyhxcylbZ3JlcGwoImRpZXRfZnJlcSIsIG5hbWVzKHFzKSldCmxpZmVzdHlsZV92YXJpYWJsZXMgPC0gYygKICAgICJhc3Nlc3NtZW50X2xpZmVzdHlsZV9jcnVjaWZlcm91c192ZWdldGFibGVzX2VudW0iLAogICAgImFzc2Vzc21lbnRfbGlmZXN0eWxlX2ZydWl0c19lbnVtIiwgICAgICAgICAgICAgICAgICAgICAgCiAgICAiYXNzZXNzbWVudF9saWZlc3R5bGVfdmVnZXRhYmxlc19lbnVtIiwgICAgICAgICAgICAgICAgICAKICAgICJhc3Nlc3NtZW50X2xpZmVzdHlsZV9zdWdhcnlfZHJpbmtzX2VudW0iLCAgICAgICAgICAgICAgIAogICAgImFzc2Vzc21lbnRfbGlmZXN0eWxlX3dhdGVyX2VudW0iLCAgICAgICAgICAgICAgICAgICAgICAgCiAgICAiYXNzZXNzbWVudF9saWZlc3R5bGVfYWxjb2hvbF9kcmlua3NfYV9kYXlfZW51bSIsICAgICAgICAKICAgICJhc3Nlc3NtZW50X2xpZmVzdHlsZV9ncmFpbnNfZW51bSIKKQp2YXJpYWJsZXMgPC0gYyhkaWV0X3ZhcmlhYmxlcywgbGlmZXN0eWxlX3ZhcmlhYmxlcykKcXMgPC0gcXNbLCBjKCJwdWJsaWNfY2xpZW50X2lkIiwgdmFyaWFibGVzKSwgd2l0aD1GQUxTRV0KcXNbLCAiY29tcGxldGUiIDo9IGFwcGx5KHFzLCAxLCBmdW5jdGlvbih4KSBzdW0oIWlzLm5hKHgpKS9sZW5ndGgoeCkpXQpxcyA8LSBxc1tvcmRlcigtY29tcGxldGUpXVshZHVwbGljYXRlZChwdWJsaWNfY2xpZW50X2lkKV0KCnNjYWxlcyA8LSBhcHBseShxc1ssIHZhcmlhYmxlcywgd2l0aD1GXSwgMiwgZnVuY3Rpb24oeCkgdW5pcXVlKHgsIG5hLnJtPVQpICU+JSAKICAgICAgICAgIHNvcnQoKSAlPiUgcGFzdGUoY29sbGFwc2UgPSAiLCAiKSkgJT4lIGRhdGEudGFibGUoKQpzY2FsZXMgPC0gZGF0YS50YWJsZShxdWFudGl0eT12YXJpYWJsZXMsIHNjYWxlPXNjYWxlcykKZndyaXRlKHNjYWxlcywgImRhdGEvZGlldF9zY2FsZXMuY3N2IikKYGBgCgpBbGwgb2YgdGhvc2Ugc2hvdWxkIGJlIG9uIGEgb3JkaW5hbCBzY2FsZSBzbyB3ZSB3aWxsIGNvbnZlciB0aGVtIHRvIHRoZSBjb3JyZXNwb25kaW5nIHJhbmsgdmFsdWVzLgoKYGBge3J9CmxpYnJhcnkoc3RyaW5ncikKbGlicmFyeShtYWdyaXR0cikKCmZvciAodiBpbiB2YXJpYWJsZXMpIHsKICAgIHFzW1t2XV0gPC0gc3RyX21hdGNoKHFzW1t2XV0sICJeXFwoKFxcZClcXCkiKVssIDJdICU+JSBhcy5udW1lcmljKCkKfQoKcXMKYGBgCk5vdyBsZXQncyBtZXJnZSBpbiBvdXIgc2VsZWN0ZWQgY29ob3J0LgoKYGBge3J9CmNvaG9ydCA8LSByYmluZCgKICAgIGZyZWFkKCJub193ZWlnaHRfbG9zcy5jc3YiLCBjb2xDbGFzc2VzPWMocHVibGljX2NsaWVudF9pZD0iY2hhcmFjdGVyIikpLAogICAgZnJlYWQoInN1Y2Nlc3NmdWxfd2VpZ2h0X2xvc3MuY3N2IiwgY29sQ2xhc3Nlcz1jKHB1YmxpY19jbGllbnRfaWQ9ImNoYXJhY3RlciIpKQopW3NpbmNlX2Jhc2VsaW5lID09IDBdCmNvaG9ydFssICJzdWJzZXQiIDo9ICJjb250cm9scyJdCmNvaG9ydFt3ZWlnaHRfY2hhbmdlX3JlbGF0aXZlIDwgMCwgInN1YnNldCIgOj0gIndlaWdodCBsb3NzIl0KY29ob3J0Wywgc3Vic2V0IDo9IGZhY3RvcihzdWJzZXQpXQoKZGF0YSA8LSBxc1tjb2hvcnQsIG9uPSJwdWJsaWNfY2xpZW50X2lkIl0KYGBgCgpMZXQncyB2aXN1YWxpemUgd2hhdCB3ZSBoYXZlLgoKYGBge3IsIGZpZy53aWR0aD0xNiwgZmlnLmhlaWdodD02fQpsaWJyYXJ5KHBoZWF0bWFwKQpsaWJyYXJ5KHZpcmlkaXNMaXRlKQoKZGYgPC0gYXMuZGF0YS5mcmFtZShkYXRhWywgdmFyaWFibGVzLCB3aXRoPUZdKQpyb3duYW1lcyhkZikgPC0gZGF0YSRwdWJsaWNfY2xpZW50X2lkCmFubnMgPC0gZGF0YS5mcmFtZShCTUk9ZGF0YSRibWksIHN1YnNldD1kYXRhJHN1YnNldCwgcm93Lm5hbWVzPWRhdGEkcHVibGljX2NsaWVudF9pZCkKcGhlYXRtYXAodChkZiksIGNvbG9yPXZpcmlkaXMoMjU2KSwgYW5ub3RhdGlvbl9jb2wgPSBhbm5zLCBzaG93X2NvbG5hbWVzPUZBTFNFKQoKIyBTYXZlIGltYWdlCnBoZWF0bWFwKHQoZGYpLCBjb2xvcj12aXJpZGlzKDI1NiksIGFubm90YXRpb25fY29sID0gYW5ucywgc2hvd19jb2xuYW1lcz1GQUxTRSwKICAgICAgICAgZmlsZW5hbWU9ImZpZ3VyZXMvZGlldHMucG5nIiwgd2lkdGg9MTYsIGhlaWdodD02LCBkcGk9MzAwKQpgYGAKCkZpbmFsbHkgd2UgY2FuIHJ1biB0aGUgbW9kZWxzIGZvciBCTUkuCgpgYGB7cn0KbW9kZWxzIDwtIGxhcHBseSh2YXJpYWJsZXMsIGZ1bmN0aW9uKHYpewogICAgbW9kZWwgPC0gcmVmb3JtdWxhdGUoYygiYWdlIiwgInNleCIsICJibWkiKSwgdikgJT4lIGdsbShkYXRhPWRhdGEpCiAgICBhdHRyKG1vZGVsLCAiZmVhdHVyZSIpIDwtIHYKICAgIG1vZGVsCn0pCgpibWlfdGVzdHMgPC0gbGFwcGx5KG1vZGVscywgZnVuY3Rpb24obSkgewogICAgY29lZnMgPC0gc3VtbWFyeShtKSRjb2VmZmljaWVudHMKICAgIGRhdGEudGFibGUoCiAgICAgICAgZmVhdHVyZT1hdHRyKG0sICJmZWF0dXJlIiksIAogICAgICAgIGNvZWY9Y29lZnNbNCwgMV0sIAogICAgICAgIHNlPWNvZWZzWzQsIDJdLCAKICAgICAgICB0PWNvZWZzWzQsIDNdLCAKICAgICAgICBwPWNvZWZzWzQsIDRdCiAgICApCn0pICU+JSByYmluZGxpc3QoKQpibWlfdGVzdHNbLCAicGFkaiIgOj0gcC5hZGp1c3QocCwgbWV0aG9kPSJmZHIiKV0KYm1pX3Rlc3RzWywgInZhcmlhYmxlIiA6PSAiYm1pIl0KYm1pX3Rlc3RzW29yZGVyKHApXQpgYGAKCkFuZCBmb3Igd2VpZ2h0IGxvc3MuCgpgYGB7cn0KbW9kZWxzIDwtIGxhcHBseSh2YXJpYWJsZXMsIGZ1bmN0aW9uKHYpewogICAgbW9kZWwgPC0gcmVmb3JtdWxhdGUoYygiYWdlIiwgInNleCIsICJibWkiLCAic3Vic2V0IiksIHYpICU+JSAKICAgICAgICAgICAgIGdsbShkYXRhPWRhdGEpCiAgICBhdHRyKG1vZGVsLCAiZmVhdHVyZSIpIDwtIHYKICAgIG1vZGVsCn0pCgp3bF90ZXN0cyA8LSBsYXBwbHkobW9kZWxzLCBmdW5jdGlvbihtKSB7CiAgICBjb2VmcyA8LSBzdW1tYXJ5KG0pJGNvZWZmaWNpZW50cwogICAgZGF0YS50YWJsZSgKICAgICAgICBmZWF0dXJlPWF0dHIobSwgImZlYXR1cmUiKSwgCiAgICAgICAgY29lZj1jb2Vmc1s1LCAxXSwgCiAgICAgICAgc2U9Y29lZnNbNSwgMl0sIAogICAgICAgIHQ9Y29lZnNbNSwgM10sIAogICAgICAgIHA9Y29lZnNbNSwgNF0KICAgICkKfSkgJT4lIHJiaW5kbGlzdCgpCndsX3Rlc3RzWywgInBhZGoiIDo9IHAuYWRqdXN0KHAsIG1ldGhvZD0iZmRyIildCndsX3Rlc3RzWywgInZhcmlhYmxlIiA6PSAic3Vic2V0Il0Kd2xfdGVzdHNbb3JkZXIocCldCmBgYAoKYGBge3J9CnRlc3RzIDwtIHJiaW5kKGJtaV90ZXN0cywgd2xfdGVzdHMpCmZ3cml0ZSh0ZXN0c1tvcmRlcihwYWRqKV0sICJkYXRhL3Rlc3RzX2RpZXQuY3N2IikKYGBgCgpgYGB7ciwgZmlnLndpZHRoPTUsIGZpZy5oZWlnaHQ9M30KbGlicmFyeShnZ3Bsb3QyKQp0aGVtZV9taW5pbWFsKCkgJT4lIHRoZW1lX3NldCgpCgp3aWRlIDwtIGRjYXN0KHRlc3RzLCBmZWF0dXJlIH4gdmFyaWFibGUsIHZhbHVlLnZhcj1jKCJ0IiwgInBhZGoiKSwgZmlsbD0wKQp3aWRlWywgInNpZyIgOj0gIm5vbmUiXQp3aWRlW3BhZGpfYm1pIDwgMC4wNSwgInNpZyIgOj0gIkJNSSJdCndpZGVbcGFkal9zdWJzZXQgPCAwLjA1LCAic2lnIiA6PSAid2VpZ2h0IGxvc3MiXQp3aWRlW3BhZGpfYm1pIDwgMC4wNSAmIHBhZGpfc3Vic2V0IDwgMC4wNSwgInNpZyIgOj0gImJvdGgiXQp3aWRlWywgInNpZyIgOj0gZmFjdG9yKHNpZywgbGV2ZWxzPWMoIm5vbmUiLCAiQk1JIiwgIndlaWdodCBsb3NzIiwgImJvdGgiKSldCmdncGxvdCh3aWRlLCBhZXMoeD10X2JtaSwgeT10X3N1YnNldCwgY29sb3I9c2lnKSkgKwogICAgZ2VvbV92bGluZSh4aW50ZXJjZXB0ID0gMCwgY29sb3I9ImdyYXkzMCIsIGx0eT0iZGFzaGVkIikgKwogICAgZ2VvbV9obGluZSh5aW50ZXJjZXB0ID0gMCwgY29sb3I9ImdyYXkzMCIsIGx0eT0iZGFzaGVkIikgKwogICAgZ2VvbV9wb2ludCgpICsgc3RhdF9zbW9vdGgobWV0aG9kPSJnbG0iLCBhZXMoZ3JvdXAgPSAxKSkgKwogICAgbGFicyh4ID0gInQgc3RhdGlzdGljIEJNSSIsIHkgPSAidCBzdGF0aXN0aWMgd2VpZ2h0IGxvc3MiKSArCiAgICBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzPWMobm9uZSA9ICJibGFjayIsIEJNSSA9ICJyb3lhbGJsdWUiLCBgd2VpZ2h0IGxvc3NgPSJvcmFuZ2UiLCBib3RoID0gInJlZCIpKQpnZ3NhdmUoImZpZ3VyZXMvZGlldF90LnBuZyIsIGRwaT0zMDAsIHdpZHRoPTUsIGhlaWdodD0zKQpjb3IudGVzdCh+IHRfYm1pICsgdF9zdWJzZXQsIGRhdGE9d2lkZSkKYGBgCgojIyBPdmVyYWxsIGV4cGxhaW5lZCB2YXJpYW5jZQoKYGBge3J9CmxpYnJhcnkodmVnYW4pCgptIDwtIGFzKGRhdGFbLCB2YXJpYWJsZXMsIHdpdGg9Rl0sICJtYXRyaXgiKQptW2lzLm5hKG0pXSA8LSBtaW4oYXMubnVtZXJpYyhtKSwgbmEucm09VCkKcGVybSA8LSBhZG9uaXMobSB+IGFnZSArIHNleCArIGJtaSArIHN1YnNldCwgZGF0YT1kYXRhLCBtZXRob2Q9ImV1Y2xpZGVhbiIpCnBlcm0KYGBg