## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(prepR4pcm) ## ----load-data---------------------------------------------------------------- data(avonet_subset) # AVONET morphological traits (Tobias et al. 2022) data(tree_jetz) # Jetz et al. (2012) phylogeny, Corvoidea + allies cat(sprintf("Data: %d species\n", nrow(avonet_subset))) cat(sprintf("Tree: %d tips\n", ape::Ntip(tree_jetz))) # The data uses spaces; the tree uses underscores head(avonet_subset$Species1, 3) head(tree_jetz$tip.label, 3) ## ----reconcile-tree----------------------------------------------------------- result <- reconcile_tree( x = avonet_subset, tree = tree_jetz, x_species = "Species1", authority = NULL # skip synonym lookup for speed ) print(result) ## ----mapping------------------------------------------------------------------ mapping <- reconcile_mapping(result) # Match type breakdown table(mapping$match_type) # Show normalised matches (formatting differences resolved automatically) norm <- mapping[mapping$match_type == "normalized", c("name_x", "name_y", "notes")] if (nrow(norm) > 0) head(norm, 5) # Unresolved: in data but not in tree unresolved <- mapping[mapping$match_type == "unresolved" & mapping$in_x, ] cat(sprintf("\nSpecies in data but not in tree: %d\n", nrow(unresolved))) ## ----summary, eval = FALSE---------------------------------------------------- # reconcile_summary(result, detail = "mismatches_only") ## ----apply-------------------------------------------------------------------- aligned <- reconcile_apply( result, data = avonet_subset, tree = tree_jetz, species_col = "Species1", drop_unresolved = TRUE ) cat(sprintf("Aligned data: %d rows\nAligned tree: %d tips\n", nrow(aligned$data), ape::Ntip(aligned$tree))) ## ----pgls, message = FALSE, warning = FALSE, eval = requireNamespace("caper", quietly = TRUE)---- library(caper) # reconcile_apply() aligns names so data$Species1 matches tree tip labels cd <- comparative.data(aligned$tree, aligned$data, names.col = "Species1", vcv = TRUE) # PGLS: body mass ~ wing length model_pgls <- pgls(log(Mass) ~ log(Wing.Length), data = cd) summary(model_pgls) ## ----pglmm, message = FALSE, warning = FALSE, results = "hide", eval = requireNamespace("MCMCglmm", quietly = TRUE)---- library(MCMCglmm) # Species column as the phylogenetic grouping factor aligned$data$phylo <- aligned$data$Species1 # Inverse phylogenetic covariance matrix # Replace any zero-length branches (can arise after pruning) tree_mcmc <- aligned$tree tree_mcmc$edge.length[tree_mcmc$edge.length < .Machine$double.eps] <- 1e-6 inv_phylo <- inverseA(tree_mcmc, nodes = "ALL", scale = FALSE) # PGLMM: continuous response prior <- list(R = list(V = 1, nu = 0.002), G = list(G1 = list(V = 1, nu = 0.002))) model_mcmc <- MCMCglmm( log(Mass) ~ log(Wing.Length) + Trophic.Level, random = ~phylo, family = "gaussian", ginverse = list(phylo = inv_phylo$Ainv), data = aligned$data, prior = prior, nitt = 50000, burnin = 10000, thin = 20, verbose = FALSE ) ## ----pglmm-summary, eval = requireNamespace("MCMCglmm", quietly = TRUE)------- summary(model_mcmc) ## ----reconcile-data----------------------------------------------------------- data(nesttrait_subset) # Nest traits (Chia et al. 2023) rec_data <- reconcile_data( x = nesttrait_subset, y = avonet_subset, x_species = "Scientific_name", y_species = "Species1", authority = NULL, quiet = TRUE ) print(rec_data) ## ----merge-data--------------------------------------------------------------- merged <- reconcile_merge( rec_data, data_x = nesttrait_subset, data_y = avonet_subset, species_col_x = "Scientific_name", species_col_y = "Species1" ) cat(sprintf("Merged: %d rows, %d columns\n", nrow(merged), ncol(merged))) ## ----multirow-aggregate, eval = FALSE----------------------------------------- # # Example: averaging individual measurements to species means # species_means <- aggregate( # cbind(Mass, Wing.Length) ~ Species1, # data = individual_measurements, # FUN = mean # ) # merged <- reconcile_merge(rec_data, species_means, avonet_subset, # species_col_x = "Species1", # species_col_y = "Species1") ## ----multirow-lookup, eval = FALSE-------------------------------------------- # # Reconcile on unique species # species_level <- data.frame( # Species1 = unique(individual_measurements$Species1) # ) # rec <- reconcile_data(species_level, avonet_subset, # x_species = "Species1", y_species = "Species1", # authority = NULL, quiet = TRUE) # # # Join the mapping back to the full, multi-row dataset # mapping <- reconcile_mapping(rec) # individual_measurements$species_resolved <- mapping$name_resolved[ # match(individual_measurements$Species1, mapping$name_x) # ] ## ----asymmetric, eval = FALSE------------------------------------------------- # # Keep only species present in both: inner join # inner <- reconcile_merge(rec_data, small_data, large_data, # species_col_x = "species", # species_col_y = "Species1", # how = "inner") # # # Keep all small_data rows; fill large_data columns with NA # # for species missing from the reference: left join # left <- reconcile_merge(rec_data, small_data, large_data, # species_col_x = "species", # species_col_y = "Species1", # how = "left") ## ----crosswalk---------------------------------------------------------------- data(crosswalk_birdlife_birdtree) table(crosswalk_birdlife_birdtree$Match.type) ## ----make-overrides----------------------------------------------------------- overrides <- reconcile_crosswalk( crosswalk_birdlife_birdtree, from_col = "Species1", to_col = "Species3", match_type_col = "Match.type" ) # Re-reconcile with overrides result_xw <- reconcile_tree( x = avonet_subset, tree = tree_jetz, x_species = "Species1", authority = NULL, overrides = overrides ) # Compare: how many more matches with the crosswalk? cat(sprintf("Without crosswalk: %d matched\n", sum(result$mapping$in_x & result$mapping$in_y, na.rm = TRUE))) cat(sprintf("With crosswalk: %d matched\n", sum(result_xw$mapping$in_x & result_xw$mapping$in_y, na.rm = TRUE))) ## ----manual-overrides, eval = FALSE------------------------------------------- # my_overrides <- data.frame( # name_x = c("Old name A", "Old name B"), # name_y = c("Tree name A", "Tree name B"), # user_note = c("Reclassified in 2023", "Spelling correction") # ) # result <- reconcile_tree(my_data, my_tree, overrides = my_overrides) ## ----multi-tree--------------------------------------------------------------- data(tree_clements25) # Clements 2025 tree results <- reconcile_to_trees( x = avonet_subset, trees = list( jetz = tree_jetz, clements = tree_clements25 ), x_species = "Species1", authority = NULL ) # Compare overlap across trees sapply(results, function(r) { c(matched = sum(r$mapping$in_x & r$mapping$in_y, na.rm = TRUE), unresolved_x = r$counts$n_unresolved_x) }) ## ----fuzzy, eval = FALSE------------------------------------------------------ # result <- reconcile_tree( # x = my_data, # tree = my_tree, # fuzzy = TRUE, # enable fuzzy matching # fuzzy_threshold = 0.9, # minimum similarity (0-1) # resolve = "flag" # flag low-confidence matches for review # ) # # # Check flagged matches # flagged <- reconcile_mapping(result) # flagged[flagged$match_type == "flagged", c("name_x", "name_y", "match_score")] ## ----augment------------------------------------------------------------------ aug <- reconcile_augment( result, tree_jetz, where = "genus", # sister to a random congener branch_length = "congener_median", # median terminal branch of congeners seed = 42, # for reproducibility quiet = TRUE ) cat(sprintf("Original tips: %d\nAugmented tips: %d\n", ape::Ntip(aug$original), ape::Ntip(aug$tree))) cat(sprintf("Added: %d | Skipped (no congener): %d\n", nrow(aug$augmented), nrow(aug$skipped))) # Which species were added, and where? if (nrow(aug$augmented) > 0) head(aug$augmented[, c("species", "placed_near", "branch_length")]) ## ----augment-apply, eval = FALSE---------------------------------------------- # aligned_aug <- reconcile_apply( # result, # data = avonet_subset, # tree = aug$tree, # augmented tree, not the original # species_col = "Species1", # drop_unresolved = FALSE # keep augmented tips (they are now in the tree) # ) ## ----export, eval = FALSE----------------------------------------------------- # out_dir <- file.path(tempdir(), "prepr4pcm-export") # reconcile_export( # result, # data = avonet_subset, # tree = tree_jetz, # species_col = "Species1", # dir = out_dir, # prefix = "avonet_jetz" # ) # # Writes: avonet_jetz_data.csv, avonet_jetz_tree.nex, avonet_jetz_mapping.csv # unlink(out_dir, recursive = TRUE) ## ----report, eval = FALSE----------------------------------------------------- # report_file <- tempfile(fileext = ".html") # reconcile_report(result, file = report_file) # unlink(report_file)