## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE # most of this requires datelife / pigauto / network ) ## ----setup-------------------------------------------------------------------- # library(prepR4pcm) # # Suggests packages we'll lean on: # # install.packages("fishtree") # # pak::pak("phylotastic/datelife") # # Optional, for the pooling step at the end: # # pak::pak("itchyshin/pigauto") ## ----data--------------------------------------------------------------------- # # Your trait data — typically loaded from disk. Names need not be # # tidy: `Esox_lucius` carries an underscore, which pr_get_tree() # # normalises away. # my_data <- data.frame( # species = c("Salmo salar", "Esox_lucius", "Oncorhynchus mykiss"), # body_size = c(98, NA, 50) # NA is fine; pigauto will impute it # ) ## ----retrieve, eval = FALSE--------------------------------------------------- # trees <- pr_get_tree(my_data, # species_col = "species", # source = "fishtree", # n_tree = 50) # # class(trees$tree) # "multiPhylo" # length(trees$tree) # 50 # trees$matched # the 3 species, in their original input form # trees$unmatched # character(0) — every species was placed # # # Per-tree provenance: one list entry per returned tree, each # # recording source_index, citation, calibration_method, and n_tips. # length(trees$backend_meta$tree_provenance) # 50 # trees$backend_meta$tree_provenance[[1]] # the provenance of tree 1 ## ----datelife, eval = FALSE--------------------------------------------------- # trees <- pr_get_tree(my_data, # species_col = "species", # source = "datelife", # n_tree = 50) # # trees$tree is multiPhylo; per-source citations are in # # trees$backend_meta$tree_provenance[[i]]$citation ## ----date, eval = FALSE------------------------------------------------------- # my_topology <- ape::read.tree("my_topology.nex") # dated_trees <- pr_date_tree(my_topology, n_dated = 50) # class(dated_trees$tree) # "multiPhylo" # length(dated_trees$tree) # up to 50 ## ----reconcile, eval = FALSE-------------------------------------------------- # rec <- reconcile_tree(my_data, trees$tree[[1]], # x_species = "species", # authority = NULL) # tree tips are plain binomials # rec # match summary: 1 exact, 2 normalized, 0 unresolved # # aligned <- reconcile_apply(rec, # data = my_data, # tree = trees$tree[[1]], # species_col = "species", # drop_unresolved = TRUE) # # aligned$data # one row per tree tip — the model-ready frame # #> species body_size # #> 1 Salmo salar 98 # #> 2 Esox_lucius NA # #> 3 Oncorhynchus mykiss 50 ## ----cite, eval = FALSE------------------------------------------------------- # # Plain text for an email or a methods paragraph # pr_cite_tree(trees) # # # Markdown for a GitHub issue or PR description # cat(pr_cite_tree(trees, format = "markdown")) # # # BibTeX for a manuscript bibliography (sanity-check before submitting) # cat(pr_cite_tree(trees, format = "bibtex")) ## ----pigauto, eval = FALSE---------------------------------------------------- # library(pigauto) # # # 5 imputations per tree × 50 trees = 250 complete datasets # mi <- multi_impute_trees(aligned$data, # trees = trees$tree, # m_per_tree = 5) # # # Fit your model to each completed dataset. Replace the formula # # with whatever your hypothesis is. # fits <- with_imputations(mi, function(df) { # glmmTMB::glmmTMB(body_size ~ 1 + (1 | species), # data = df) # }) # # # Pool via Rubin's rules — the standard errors reflect BOTH the # # imputation uncertainty AND the tree-posterior uncertainty. # pooled <- pool_mi(fits, # coef_fun = function(fit) fixef(fit)$cond, # vcov_fun = function(fit) vcov(fit)$cond) # # pooled