This is a very simplified workflow in treepplr for analysis of host repertoire evolution to show the overall structure. A real analysis would include many other steps such as testing of different inference methods and check of convergence.
Run treeppl
Run the hostrep.tppl
TreePPL program using the input
data available in the package and default inference options.
model <- tp_model("hostrep3states")
data <- tp_data("hostrep3states")
output_json <- tp_treeppl(model = model, model_file_name = "hostrep",
data = data, data_file_name = "hostrep")
output <- tp_parse(output_json)
The output from tp_parse() contains all information outputted by treeppl, so we need to separate the sampled character histories from the sampled parameter values.
Parameter estimates
logs <- output[[1]] %>%
dplyr::select(iteration, log_weight, mu, beta) %>%
unique() %>%
dplyr::mutate(weight = exp(log_weight - max(.$log_weight)))
ggplot(logs) +
geom_histogram(aes(mu, y = after_stat(density), weight=weight), col = "white", fill = "lightblue") +
geom_density(aes(mu, weight=weight)) +
theme_bw()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(logs) +
geom_histogram(aes(beta, y = after_stat(density), weight=weight), col = "white", fill = "lightblue") +
geom_density(aes(beta, weight=weight)) +
theme_bw()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Character history
# post-treatment function
get_history <- function(parsed_output){
table <- parsed_output[[1]] %>%
dplyr::select(-c(log_weight, mu, beta, lambda1, lambda2, lambda3, lambda4)) %>%
dplyr::mutate(transition_type = "anagenetic") %>%
dplyr::mutate(node_index = dplyr::case_when( # fix to mismatch and 0- to 1-base
node_index == 3 ~ 5,
node_index == 4 ~ 4,
TRUE ~ node_index + 1)) %>%
dplyr::filter(!is.na(log_norm_const))
return(table)
}
Extract table with character history samples
tp_hist <- get_history(output)
head(tp_hist)
#> iteration log_norm_const node_index branch_start_time branch_end_time
#> 1 0 -37.34668 7 NA 2.000000
#> 2 0 -37.34668 6 2.000000 0.735218
#> 3 0 -37.34668 5 0.735218 0.000000
#> 4 0 -37.34668 3 0.735218 0.000000
#> 5 0 -37.34668 4 2.000000 0.428200
#> 6 0 -37.34668 4 2.000000 0.428200
#> start_state end_state transition_time parent_index child1_index child2_index
#> 1 220 220 NA NA 5 4
#> 2 220 120 1.6199567 6 3 2
#> 3 120 20 0.2626133 5 NA NA
#> 4 120 220 0.6140343 5 NA NA
#> 5 220 210 1.2031141 6 1 0
#> 6 210 200 0.5926294 6 1 0
#> transition_type
#> 1 anagenetic
#> 2 anagenetic
#> 3 anagenetic
#> 4 anagenetic
#> 5 anagenetic
#> 6 anagenetic
This table can be used by evolnets for plotting results, together with the phylogenetic trees and the known extant interactions.
# get data from treepplr
symbiont_tree <- evolnets::read_tree_from_revbayes(
system.file("extdata/tree_subroot_Rev.tre", package = "treepplr"))
host_tree <- treeio::read.tree(
system.file("extdata/host_tree.tre", package = "treepplr"))
matrix <- read.csv(
system.file("extdata/host_matrix.csv", package = "treepplr"),
row.names = 1) %>%
as.matrix()
# calculate posterior at nodes
tp_at_nodes <- evolnets::posterior_at_nodes(tp_hist, symbiont_tree, host_tree, state = c(1,2))
# plot data and inferred ancestral states
# fundamental host repertoire
(tp_asr_fund <- evolnets::plot_matrix_phylo(matrix, tp_at_nodes, symbiont_tree,
host_tree, type = "repertoires",
repertoire = "fundamental"))
# realized host repertoire
(tp_asr_real <- evolnets::plot_matrix_phylo(matrix, tp_at_nodes, symbiont_tree,
host_tree, type = "repertoires",
repertoire = "realized"))