library(arivale.data.interface)
library(mbtools)
classes <- c(public_client_id = "character", microbiome_id = "character")
cohort <- rbind(
    fread("no_weight_loss.csv", colClasses=classes),
    fread("successful_weight_loss.csv", colClasses=classes)
)[since_baseline == 0]
cohort[, "subset" := "controls"]
cohort[weight_change_relative < 0, "subset" := "weight loss"]
cohort[, subset := factor(subset)]
cohort[, "vendor_observation_id" := microbiome_id]
ps <- readRDS("/proj/arivale/microbiome/16S_processed/phyloseq.rds")
ps <- subset_samples(ps, vendor_observation_id %in% cohort[, vendor_observation_id])
ps
phyloseq-class experiment-level object
otu_table()   OTU Table:          [ 89022 taxa and 105 samples ]:
sample_data() Sample Data:        [ 105 samples by 24 sample variables ]:
tax_table()   Taxonomy Table:     [ 89022 taxa by 8 taxonomic ranks ]:
phy_tree()    Phylogenetic Tree:  [ 89022 tips and 82828 internal nodes ]:
taxa are columns
genus <- speedyseq::tax_glom(ps, "Genus")

sdata <- merge(
    as(sample_data(genus), "data.frame"),
    as.data.frame(cohort),
    by = c("vendor_observation_id", "sex")
)
rownames(sdata) <- sdata$id
sdata$age <- sdata$age.y
sdata$subset <- factor(sdata$subset)
sdata$sex <- factor(sdata$sex)
sample_data(genus) <- sdata
tests <- association(
    genus, 
    presence_threshold = 1,
    min_abundance = 10,
    in_samples = 0.5,
    confounders = c("age", "bmi", "sex"),
    variables = "subset",
    taxa_rank = "Genus",
    method = "deseq2",
    shrink=F
)
bmi <- association(
    genus, 
    presence_threshold = 1,
    min_abundance = 10,
    in_samples = 0.5,
    confounders = c("age", "sex"),
    variables = "bmi",
    taxa_rank = "Genus",
    method = "deseq2",
    shrink=F
)
tests <- rbind(tests, bmi)
tests <- merge.data.table(tests, as.data.frame(as(tax_table(genus), "matrix")), on="Genus")
fwrite(tests[order(padj)], "data/tests_16S_genus.csv")
tests
tests[, "t" := log2FoldChange / lfcSE]
wide <- dcast(tests, Genus ~ variable, value.var=c("t", "padj"), fill=0)
wide[, "sig" := "none"]
wide[padj_bmi < 0.05, "sig" := "BMI"]
wide[padj_subset < 0.05, "sig" := "weight loss"]
wide[padj_bmi < 0.05 & padj_subset < 0.05, "sig" := "both"]
wide[, "sig" := factor(sig, levels=c("none", "BMI", "weight loss", "both"))]
ggplot(wide, aes(x=t_bmi, y=t_subset, color=sig)) +
    geom_vline(xintercept = 0, color="gray30", lty="dashed") +
    geom_hline(yintercept = 0, color="gray30", lty="dashed") +
    geom_point() + stat_smooth(method="glm", aes(group = 1)) +
    labs(x = "t statistic BMI", y = "t statistic weight loss") +
    scale_color_manual(values=c(none = "black", BMI = "royalblue", `weight loss`="orange", both = "red"))

ggsave("figures/16S_genus_t.png", dpi=300, width=5, height=3)
cor.test(~ t_bmi + t_subset, data=wide, method="spearman")

    Spearman's rank correlation rho

data:  t_bmi and t_subset
S = 70764, p-value = 0.1301
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.1706048 

Overall explained variance

library(vegan)

m <- as(otu_table(genus), "matrix")
m[is.na(m)] <- min(as.numeric(m), na.rm=T)
perm <- adonis(m ~ age + sex + bmi + subset, data=as(sample_data(genus), "data.frame"), method="bray")
perm

Call:
adonis(formula = m ~ age + sex + bmi + subset, data = as(sample_data(genus),      "data.frame"), method = "bray") 

Permutation: free
Number of permutations: 999

Terms added sequentially (first to last)

           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
age         1    0.3211 0.32105  2.0219 0.01876  0.026 *  
sex         1    0.1541 0.15408  0.9703 0.00900  0.429    
bmi         1    0.6281 0.62814  3.9558 0.03671  0.001 ***
subset      1    0.1283 0.12828  0.8078 0.00750  0.619    
Residuals 100   15.8789 0.15879         0.92802           
Total     104   17.1104                 1.00000           
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
LS0tCnRpdGxlOiAiRXhpc3RpbmcgZGF0YSIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQpsaWJyYXJ5KGFyaXZhbGUuZGF0YS5pbnRlcmZhY2UpCmxpYnJhcnkobWJ0b29scykKYGBgCgpgYGB7cn0KY2xhc3NlcyA8LSBjKHB1YmxpY19jbGllbnRfaWQgPSAiY2hhcmFjdGVyIiwgbWljcm9iaW9tZV9pZCA9ICJjaGFyYWN0ZXIiKQpjb2hvcnQgPC0gcmJpbmQoCiAgICBmcmVhZCgibm9fd2VpZ2h0X2xvc3MuY3N2IiwgY29sQ2xhc3Nlcz1jbGFzc2VzKSwKICAgIGZyZWFkKCJzdWNjZXNzZnVsX3dlaWdodF9sb3NzLmNzdiIsIGNvbENsYXNzZXM9Y2xhc3NlcykKKVtzaW5jZV9iYXNlbGluZSA9PSAwXQpjb2hvcnRbLCAic3Vic2V0IiA6PSAiY29udHJvbHMiXQpjb2hvcnRbd2VpZ2h0X2NoYW5nZV9yZWxhdGl2ZSA8IDAsICJzdWJzZXQiIDo9ICJ3ZWlnaHQgbG9zcyJdCmNvaG9ydFssIHN1YnNldCA6PSBmYWN0b3Ioc3Vic2V0KV0KY29ob3J0WywgInZlbmRvcl9vYnNlcnZhdGlvbl9pZCIgOj0gbWljcm9iaW9tZV9pZF0KYGBgCgpgYGB7cn0KcHMgPC0gcmVhZFJEUygiL3Byb2ovYXJpdmFsZS9taWNyb2Jpb21lLzE2U19wcm9jZXNzZWQvcGh5bG9zZXEucmRzIikKcHMgPC0gc3Vic2V0X3NhbXBsZXMocHMsIHZlbmRvcl9vYnNlcnZhdGlvbl9pZCAlaW4lIGNvaG9ydFssIHZlbmRvcl9vYnNlcnZhdGlvbl9pZF0pCnBzCmBgYAoKYGBge3J9CmdlbnVzIDwtIHNwZWVkeXNlcTo6dGF4X2dsb20ocHMsICJHZW51cyIpCgpzZGF0YSA8LSBtZXJnZSgKICAgIGFzKHNhbXBsZV9kYXRhKGdlbnVzKSwgImRhdGEuZnJhbWUiKSwKICAgIGFzLmRhdGEuZnJhbWUoY29ob3J0KSwKICAgIGJ5ID0gYygidmVuZG9yX29ic2VydmF0aW9uX2lkIiwgInNleCIpCikKcm93bmFtZXMoc2RhdGEpIDwtIHNkYXRhJGlkCnNkYXRhJGFnZSA8LSBzZGF0YSRhZ2UueQpzZGF0YSRzdWJzZXQgPC0gZmFjdG9yKHNkYXRhJHN1YnNldCkKc2RhdGEkc2V4IDwtIGZhY3RvcihzZGF0YSRzZXgpCnNhbXBsZV9kYXRhKGdlbnVzKSA8LSBzZGF0YQpgYGAKCmBgYHtyfQp0ZXN0cyA8LSBhc3NvY2lhdGlvbigKICAgIGdlbnVzLCAKICAgIHByZXNlbmNlX3RocmVzaG9sZCA9IDEsCiAgICBtaW5fYWJ1bmRhbmNlID0gMTAsCiAgICBpbl9zYW1wbGVzID0gMC41LAogICAgY29uZm91bmRlcnMgPSBjKCJhZ2UiLCAiYm1pIiwgInNleCIpLAogICAgdmFyaWFibGVzID0gInN1YnNldCIsCiAgICB0YXhhX3JhbmsgPSAiR2VudXMiLAogICAgbWV0aG9kID0gImRlc2VxMiIsCiAgICBzaHJpbms9RgopCmJtaSA8LSBhc3NvY2lhdGlvbigKICAgIGdlbnVzLCAKICAgIHByZXNlbmNlX3RocmVzaG9sZCA9IDEsCiAgICBtaW5fYWJ1bmRhbmNlID0gMTAsCiAgICBpbl9zYW1wbGVzID0gMC41LAogICAgY29uZm91bmRlcnMgPSBjKCJhZ2UiLCAic2V4IiksCiAgICB2YXJpYWJsZXMgPSAiYm1pIiwKICAgIHRheGFfcmFuayA9ICJHZW51cyIsCiAgICBtZXRob2QgPSAiZGVzZXEyIiwKICAgIHNocmluaz1GCikKdGVzdHMgPC0gcmJpbmQodGVzdHMsIGJtaSkKdGVzdHMgPC0gbWVyZ2UuZGF0YS50YWJsZSh0ZXN0cywgYXMuZGF0YS5mcmFtZShhcyh0YXhfdGFibGUoZ2VudXMpLCAibWF0cml4IikpLCBvbj0iR2VudXMiKQpmd3JpdGUodGVzdHNbb3JkZXIocGFkaildLCAiZGF0YS90ZXN0c18xNlNfZ2VudXMuY3N2IikKdGVzdHMKYGBgCgpgYGB7ciwgZmlnLndpZHRoPTUsIGZpZy5oZWlnaHQ9M30KdGVzdHNbLCAidCIgOj0gbG9nMkZvbGRDaGFuZ2UgLyBsZmNTRV0Kd2lkZSA8LSBkY2FzdCh0ZXN0cywgR2VudXMgfiB2YXJpYWJsZSwgdmFsdWUudmFyPWMoInQiLCAicGFkaiIpLCBmaWxsPTApCndpZGVbLCAic2lnIiA6PSAibm9uZSJdCndpZGVbcGFkal9ibWkgPCAwLjA1LCAic2lnIiA6PSAiQk1JIl0Kd2lkZVtwYWRqX3N1YnNldCA8IDAuMDUsICJzaWciIDo9ICJ3ZWlnaHQgbG9zcyJdCndpZGVbcGFkal9ibWkgPCAwLjA1ICYgcGFkal9zdWJzZXQgPCAwLjA1LCAic2lnIiA6PSAiYm90aCJdCndpZGVbLCAic2lnIiA6PSBmYWN0b3Ioc2lnLCBsZXZlbHM9Yygibm9uZSIsICJCTUkiLCAid2VpZ2h0IGxvc3MiLCAiYm90aCIpKV0KZ2dwbG90KHdpZGUsIGFlcyh4PXRfYm1pLCB5PXRfc3Vic2V0LCBjb2xvcj1zaWcpKSArCiAgICBnZW9tX3ZsaW5lKHhpbnRlcmNlcHQgPSAwLCBjb2xvcj0iZ3JheTMwIiwgbHR5PSJkYXNoZWQiKSArCiAgICBnZW9tX2hsaW5lKHlpbnRlcmNlcHQgPSAwLCBjb2xvcj0iZ3JheTMwIiwgbHR5PSJkYXNoZWQiKSArCiAgICBnZW9tX3BvaW50KCkgKyBzdGF0X3Ntb290aChtZXRob2Q9ImdsbSIsIGFlcyhncm91cCA9IDEpKSArCiAgICBsYWJzKHggPSAidCBzdGF0aXN0aWMgQk1JIiwgeSA9ICJ0IHN0YXRpc3RpYyB3ZWlnaHQgbG9zcyIpICsKICAgIHNjYWxlX2NvbG9yX21hbnVhbCh2YWx1ZXM9Yyhub25lID0gImJsYWNrIiwgQk1JID0gInJveWFsYmx1ZSIsIGB3ZWlnaHQgbG9zc2A9Im9yYW5nZSIsIGJvdGggPSAicmVkIikpCmdnc2F2ZSgiZmlndXJlcy8xNlNfZ2VudXNfdC5wbmciLCBkcGk9MzAwLCB3aWR0aD01LCBoZWlnaHQ9MykKY29yLnRlc3QofiB0X2JtaSArIHRfc3Vic2V0LCBkYXRhPXdpZGUsIG1ldGhvZD0ic3BlYXJtYW4iKQpgYGAKCiMjIE92ZXJhbGwgZXhwbGFpbmVkIHZhcmlhbmNlCgpgYGB7cn0KbGlicmFyeSh2ZWdhbikKCm0gPC0gYXMob3R1X3RhYmxlKGdlbnVzKSwgIm1hdHJpeCIpCm1baXMubmEobSldIDwtIG1pbihhcy5udW1lcmljKG0pLCBuYS5ybT1UKQpwZXJtIDwtIGFkb25pcyhtIH4gYWdlICsgc2V4ICsgYm1pICsgc3Vic2V0LCBkYXRhPWFzKHNhbXBsZV9kYXRhKGdlbnVzKSwgImRhdGEuZnJhbWUiKSwgbWV0aG9kPSJicmF5IikKcGVybQpgYGA=