# This script implements the BranchManager in the statistical language R and # recapitulates Figure 5 from the BMC Bioinformatics manuscript in which the # method was described. # Written and tested by EAS in R 2.0.1 for Windows XP # Uses the R library ape maintained by Emmanuel Paradis # See http://pbil.univ-lyon1.fr/R/ape/ library(ape) # Note that this script is intended to be informative at the expense of # computational efficiency. # Initialize tree used in Figure 5 example.tree.newick <- "(((((((((PV22:0.3,BH10:0.3):0.1,BRU:0.5):0.1,HXB:0.7):2.4,SF2:3.3):0.1,CDC:3.7):0.5,WMJ2:3.0):0.4,RF:4.3):2.6,(ELI:6.3,MAL:6.1):1.9):2.7,Z3:9.3);" example.tree <- read.tree(text = example.tree.newick) # BEGIN HELPER FUNCTIONS # ---------------------- # The function edgeroot takes a rooted tree (input phy) and reroots that tree # at the midpoint of the specified edge (input x). edgeroot <- function(x, phy) { if (class(phy) != "phylo") stop("object \"phy\" is not of class \"phylo\"") newphy <- phy numedges <- dim(phy$edge)[1] theedge <- phy$edge[x,]; thelen <- phy$edge.length[x] tipnum <- max(as.numeric(phy$edge))+1 nexttip <- as.character(tipnum) nextnode <- as.character(min(as.numeric(phy$edge))-1) newedge1 <- c(theedge[1],nextnode); newedge2 <- c(nextnode,theedge[2]); newedge3 <- c(nextnode,nexttip); newlen1 <- thelen/2 newlen2 <- thelen/2 newlen3 <- 0 newphy$edge <- rbind(phy$edge[-x,],newedge1,newedge2,newedge3) newphy$edge.length <- c(phy$edge.length[-x],newlen1,newlen2,newlen3) newphy$tip.label <- c(newphy$tip.label,"Foo") newphy <- root(newphy,tipnum) newphy <- drop.tip(newphy,tipnum) return(newphy) } # The function firstcherry takes a rooted tree (input phy) and identifies a pair # of sister taxa, i.e. a cherry. firstcherry = function(phy) { if (class(phy) != "phylo") stop("object \"phy\" is not of class \"phylo\"") numedges <- dim(phy$edge)[1] numtaxa <- length(phy$tip.label) taxon <- 1 repeat { tipedgeindex <- which(as.logical(match(phy$edge[, 2], taxon))) adjnode <- phy$edge[tipedgeindex] nodeedges <- which(phy$edge[, 1] == adjnode) potcherry <- phy$edge[nodeedges,2] if (sum(potcherry > 0) == 2) {break} taxon <- taxon + 1; } return(potcherry) } # The function resettree takes a rooted tree (input phy) and attempts to reorder # its nodes and leaves to obtain a proper object of the ape class "phylo" resettree <- function(phy) { if (class(phy) != "phylo") stop("object \"phy\" is not of class \"phylo\"") # First reset tips tiplocs <- which(as.numeric(phy$edge[,2]) > 0) tipvals <- as.numeric(phy$edge[tiplocs,2]) phy$edge[tiplocs,2] <- as.character(order(tipvals)) # Then reset nodes orderednodes <- phy$edge[order(as.numeric(phy$edge[,1]),decreasing = TRUE)] nodelist <- union(orderednodes,orderednodes) numnodes <- length(nodelist); for (i in 1:numnodes) { newnodename <- as.character(-i) findindices1 <- which(as.logical(match(phy$edge[,1],nodelist[i]))) phy$edge[findindices1,1] <- newnodename findindices2 <- which(as.logical(match(phy$edge[,2],nodelist[i]))) if (length(findindices2) > 0) { phy$edge[findindices2,2] <- newnodename } } return(phy) } # The function mergecherry is a subroutine called by ACL mergecherry <- function(phy,cherry,newbl) { if (class(phy) != "phylo") stop("object \"phy\" is not of class \"phylo\"") numedges <- dim(phy$edge)[1] newtaxind <- max(as.numeric(phy$edge))+1 newtax <- as.character(newtaxind) # newtaxname <- paste(cherry[1],"+",cherry[2]) newtaxname <- paste(phy$tip.label[as.numeric(cherry[1])],"+",phy$tip.label[as.numeric(cherry[2])]) tipedgeinds <- which(as.logical(match(phy$edge[, 2], cherry))) adjnode <- phy$edge[tipedgeinds[1]] nodeedges <- which(phy$edge[, 1] == adjnode) adjnodeedgeind <- which(as.logical(match(phy$edge[, 2], adjnode))) newedge <- c(phy$edge[adjnodeedgeind],newtax) edges <- rbind(phy$edge[-c(tipedgeinds,adjnodeedgeind),],newedge) tips <- c(phy$tip.label[-as.numeric(cherry)],newtaxname) lengths <- c(phy$edge.length[-c(tipedgeinds,adjnodeedgeind)],newbl) phy$edge <- edges phy$tip.label <- tips phy$edge.length <- lengths return(phy) } # END HELPER FUNCTIONS # -------------------- # The function ACL takes a rooted tree (input phy) and calculates the ACL # weights by means of Felsenstein's algorithm ACL <- function(phy) { if (class(phy) != "phylo") stop("object \"phy\" is not of class \"phylo\"") numtips <- max(as.numeric(phy$edge)) tipnames <- phy$tip.label compmat <- diag(rep(1,numtips)); while (dim(phy$edge)[1] > 2) { cherry <- firstcherry(phy) cherryinds <- as.numeric(cherry) cherryedgeinds <- which(as.logical(match(phy$edge[, 2], cherry))) cherryedgelens <- phy$edge.length[cherryedgeinds] bl1 <- cherryedgelens[1] bl2 <- cherryedgelens[2] adjnode <- phy$edge[cherryedgeinds[1]] nodeedges <- which(phy$edge[, 1] == adjnode) adjnodeedgeind <- which(as.logical(match(phy$edge[, 2], adjnode))) adjnodebl <- phy$edge.length[adjnodeedgeind] newbl <- adjnodebl + (bl1 * bl2)/(bl1 + bl2) vec1 <- compmat[cherryinds[1],] vec2 <- compmat[cherryinds[2],] newvec <- (vec1 * bl2 + vec2 * bl1)/(bl1 + bl2) compmat <- rbind(compmat[-cherryinds,],newvec) phy <- resettree(mergecherry(phy,cherry,newbl)) } bl1 <- phy$edge.length[1] bl2 <- phy$edge.length[2] vec1 <- compmat[1,] vec2 <- compmat[2,] weightvec <- (vec1 * bl2 + vec2 * bl1)/(bl1 + bl2) names(weightvec) <- tipnames return(weightvec[order(tipnames)]) } # The function BranchManager.imp1 takes a rooted tree (input phy) and implements # the BranchManager by first computing an effective covariance matrix. # See Additional file 1 BranchManager.imp1 <- function(phy) { if (class(phy) != "phylo") stop("object \"phy\" is not of class \"phylo\"") numedges <- dim(phy$edge)[1] numtaxa <- length(phy$tip.label) totallength <- 0 S <- matrix(0,numtaxa,numtaxa) for (i in 1:numedges) { curedgelen <- phy$edge.length[i] totallength <- totallength + curedgelen newphy <- edgeroot(i, phy) curS <- vcv.phylo(newphy) S <- S + curedgelen * curS[order(colnames(curS)),order(colnames(curS))] } S <- S/totallength Sinv <- solve(S) Num <- rowSums(Sinv) Den <- sum(Num) W <- Num/Den return(W) } # The function BranchManager.imp2 takes a rooted tree (input phy) and implements # the BranchManager using a weighted average of the ACL weights obtained # by successively rooting the tree at the midpoint of each of its edges BranchManager.imp2 <- function(phy) { if (class(phy) != "phylo") stop("object \"phy\" is not of class \"phylo\"") numedges <- dim(phy$edge)[1] numtaxa <- length(phy$tip.label) W <- rep(0,numtaxa) totallength <- 0 for (i in 1:numedges) { curedgelen <- phy$edge.length[i] W <- W + curedgelen*ACL(edgeroot(i, phy)) totallength <- totallength + curedgelen } W <- W/totallength return(W) } # Plot the example tree plot.phylo(example.tree) # Calculate the ACL weights ACL.weights.1 <- ACL(example.tree) # Note that the same result can be achieved by S <- vcv.phylo(example.tree) ACL.weights.2 <- rowSums(solve(S))/sum(solve(S)) # Calculate the BM weights BM.weights.1 <- BranchManager.imp1(example.tree) BM.weights.2 <- BranchManager.imp2(example.tree) # Return weights ACL.weights.1 ACL.weights.2 BM.weights.1 BM.weights.2