# --------------------------------------------------------------------- # FUNCTIONS FOR PERFORMING A SERIES OF OU ANALYSES ON A BATCH OF TREES # --------------------------------------------------------------------- # Copied from functions used in Hipp 2007 Evolution paper # *** To run a hansen (OU) analysis, call runBatchHansenFit *** # Tweaked for ouch v 1.2-4 runBatchHansenFit <- # Runs batchHansenFit and brown.fit over a list of ouchTrees # Arguments: # "ouchTrees" = list of OUCH-style trees; if a single tree, send as 'list(TREE)' # "characterStates" = vector of character states, extracted from an ouch-style tree data.frame # "SEM"= standard error of the mean, vector extracted from an ouch-style tree data.frame # "scalingFactor" = factor to multiply against (times / max(times)) -- choose based on trial analyses # "cladeMembersList" = list of vectors containing names of the members of each clade (except for the root of the tree) function(ouchTrees, characterStates, cladeMembersList, SEM = rep(0, length(characterStates)), scalingFactor = 1, logData = FALSE) { hansenBatch = vector("list", length(ouchTrees)) treeCounter = 0 for (i in ouchTrees) { treeCounter = treeCounter + 1 regimesList = regimeVectors(i$node, i$ancestor, i$times, i$species, cladeMembersList) write.table(regimesList, 'regimesList.txt') if (logData) hansenBatch[[treeCounter]] = batchHansenFit(i$node, i$ancestor, i$times, characterStates, error = SEM, scalingFactor, regimesList, c(as.character(1:length(regimesList)), "brown"), numberOfTermini = sum(!is.na(i$species))) else hansenBatch[[treeCounter]] = batchHansenFit(i$node, i$ancestor, i$times, characterStates, error = SEM, scalingFactor, regimesList, c(as.character(1:length(regimesList)), "brown"), numberOfTermini = sum(!is.na(i$species))) message(paste("Tree",treeCounter,"of",length(ouchTrees),"complete"))} return(list(hansens = hansenBatch, regimes = regimesList)) } paintBranches <- # Reads down an ouch data.frame tree row by row and paints regimes accordingly, with regimes changing at nodes specified # arguments # "node" "ancestor" "times" = the standard tree specification vectors of the OUCH-style tree # "regimeShiftNodes" = a vector of nodes at which selective regimes shift: root must be included, but tips are meaningless in this context # "regimeTitles" = a vector of titles for the regimes that begin at the root and at the nodes indicated in "regimeShiftNodes", # in order of description in "regimeShiftNodes", except that the root is listed first in "regimeTitles" # but not at all in "regimeShiftNodes"... defaults to "node[x]regime # Value: a vector of regimes that can be plopped right into an OUCH-style tree data frame function(node, ancestor, times, regimeShiftNodes, regimeTitles) { names(regimeTitles) = regimeShiftNodes colorsVector = vector("character", length(node)) for (i in 1:length(ancestor)) { # First three lines fill up the vector for nodes that are hit in order # uncomment the following line if you are using a newer version of OUCH: if (is.na(ancestor[as.integer(i)])) { # and comment the following line: # if (ancestor[as.integer(i)] == 0) { colorsVector[i] = regimeTitles["1"] next } if (any(ancestor[i] == regimeShiftNodes)) { colorsVector[i] = regimeTitles[as.character(ancestor[as.integer(i)])] next } if (colorsVector[as.integer(ancestor[i])] != "") { colorsVector[i] = colorsVector[as.integer(ancestor[i])] next } # These lines fill up the vector for nodes run reached before their immediate ancestor nodeQ = vector(integer, length(node)) ii = i repeat { nodeQ = c(ii, nodeQ) ii = ancestor[ii] if (any(ancestor[ii] == regimeShiftNodes)) { colorsVector[ii] = colorsVector[as.integer(ancestor[ii])] break} if (colorsVector[ancestor[ii]] != "") { colorsVector[ii] = colorsVector[as.integer(ancestor[ii])] break} } for(j in nodeQ) { colorsVector[j] = colorsVector[as.integer(ancestor[j])] } } return(colorsVector) } mrcaOUCH <- # Finds most recent common ancestor for a vector of tips by: # 1. Creating a vector of ancestral nodes leading to each tip # 2. Creating an intersection set of ancestral nodes for all taxa by intersecting taxa successively with the last intersection set # 3. Returning the node of the final intersection set that has the highest time # Arguments: # "node" "ancestor" "times" "species" = the standard tree specification vectors of the OUCH-style tree # "cladeVector" = vector of species for which you want to find the most recent common ancestor # Value: the node number (as an integer) of the most recent common ancestor # Works! 3-31-06 function(node, ancestor, times, species, cladeVector) { tips = match(cladeVector, species) listOfAncestorLines = lapply(tips, ancestorLine, node = node, ancestor = ancestor) latestMatch = listOfAncestorLines[[1]] for (i in listOfAncestorLines) { latestMatch = i[match(latestMatch, i, nomatch = 0)] } timesVector = times[as.integer(latestMatch)] if(length(timesVector) == 1) { if (is.na(timesVector)) mrca = "1" else mrca = timesVector} else mrca = latestMatch[match(max(as.double(timesVector), na.rm = TRUE), timesVector)] return(mrca) } ancestorLine <- # Creates a vector of ancestral nodes for a tip # Arguments: # "node" and "ancestor" = the standard tree specification vectors of the OUCH-style tree # "tip" = the tip node to trace back # Value: a vector of nodes leading from a tip to the root function(node, ancestor, tip) { nodesVector = vector("character") counter = 0 repeat { if (is.na(tip)) break counter = counter + 1 nodesVector[counter] = ancestor[as.integer(tip)] tip = ancestor[as.integer(tip)] } return(nodesVector) } allPossibleRegimes <- # Generates a list of vectors of all possible 2^n regimes for a given list of n ancestor nodes, assuming that each node # can either be a change node or not, and that all combinations are meaningful. # NOTE: This routine returns all possible permutations, ignoring the fact that the root must be included as a changeNode # for paintBranches to work properly. Exclude the root when calling this routine. # Arguments: # "changeNodes" = vector of all change nodes in all possible scenarios # Value: a list of changeNode vectors (assumes type = "character"), one for each possible scenario function(changeNodes) { numberOfRegimes = ifelse(length(changeNodes) == 1, 2, 2^length(changeNodes)) regime = vector("list", numberOfRegimes) for (i in 1:(numberOfRegimes - 1)) { remainder = i n = NULL for (j in as.integer(log2(i)):0) { if (2^j > remainder) n[j+1] = NA else {n[j+1] = changeNodes[j+1] remainder = remainder %% 2^j }} regime[[i]] = sort(n[!is.na(n)]) } regime[[numberOfRegimes]] = rep("0", times = as.integer(log2(i)) + 1) return(regime) } regimeVectors <- # Generates the list of painted branches representing all possible selective regimes for OU analyses, taking as argument # species vectors that describe the clades at the bases of which regimes are specified to change. # Arguments: # "node" "ancestor" "times" "species" = the standard tree specification vectors of the OUCH-style tree # "cladeMembersList" = list of vectors containing names of the members of each clade (except for the root of the tree) # Value: list of vectors that can each be plugged directly into OU analysis as the "regimes" argument function(node, ancestor, times, species, cladeMembersList) { changeNodesList = lapply(cladeMembersList, mrcaOUCH, node = node, ancestor = ancestor, times = times, species = species) #Returns a list of length-1 character vectors, each containing a single changeNode -- the fact that this is a list causes problems in paintBranches if not changed to a 1-d vector changeNodesVector = vector("character", length(changeNodesList)) for (i in 1:length(changeNodesList)) # Changing cladeMemberList to a 1-d vector {changeNodesVector[i] = changeNodesList[[i]]} allRegimes = allPossibleRegimes(changeNodesVector) regimePaintings = vector("list", length(allRegimes)) for (i in 1:length(allRegimes)) { allRegimes[[i]] = c("1", allRegimes[[i]]) regimePaintings[[i]] = paintBranches(node, ancestor, times, allRegimes[[i]], as.character(allRegimes[[i]])) } return(regimePaintings) } batchHansenFit <- # Runs hansen.fit and brown.fit on a tree over a batch of selective regimes # Arguments: # "node" "ancestor" "times" "data" = the standard tree specification vectors of the OUCH-style tree # "regimesList" = list of regime-paintings as output from regimeVectors # "scalingFactor" = factor to multiply against (times / max(times)) -- choose based on trial analyses # Value: a matrix with nrow = regimes + 1 and columns for u, d.f., all estimated parameters, LRvsBM, AIC, and AIC weight # ADDED "error" on 2 march 07 to accomodate the "me" in Pienaar's ouch function(node, ancestor, times, data, error, scalingFactor = 1, regimesList, regimeTitles, numberOfTermini = 53) { variables = c("loglik", "df", "alpha", "sigma", "theta0", "theta1", "theta2", "theta3", "theta4", "theta5","theta6","theta7","theta8","theta9", "LRvsBM", "AIC", "AICweight", "deltaAIC", "exp(-0.5*deltaAIC)", "AICc", "AICcWeight", "deltaAICc", "exp(-0.5*deltaAICc)") treeData = matrix(data = NA, nrow = (length(regimesList) + 1), ncol = length(variables), dimnames = list(regimeTitles,variables)) brownianResults = brown.fit(data, error, node, ancestor, (times/max(times))*scalingFactor) treeData[length(regimesList) + 1, "AIC"] = brownianResults$aic treeData[length(regimesList) + 1, "loglik"] = brownianResults$loglik treeData[length(regimesList) + 1, "sigma"] = brownianResults$sigma treeData[length(regimesList) + 1, "theta0"] = brownianResults$theta treeData[length(regimesList) + 1, "df"] = brownianResults$df treeData[length(regimesList) + 1, "AICc"] = brownianResults$aic + ((2 * brownianResults$df * (brownianResults$df + 1)) / (numberOfTermini - brownianResults$df - 1)) for (i in 1:length(regimesList)) { message(paste("Running regime",i)) tempHansen = hansen.fit(data, error, node, ancestor, (times/max(times))*scalingFactor, regimesList[[i]]) treeData[i,"loglik"] = tempHansen$loglik treeData[i,"df"] = tempHansen$df treeData[i,"alpha"] = tempHansen$alpha treeData[i,"sigma"] = tempHansen$sigma treeData[i,"LRvsBM"] = 2 * ((tempHansen$loglik / 2) - (treeData[length(regimesList) + 1, "loglik"] / 2)) treeData[i,"AIC"] = tempHansen$aic treeData[i,"AICc"] = tempHansen$aic + ((2 * tempHansen$df * (tempHansen$df + 1)) / (numberOfTermini - tempHansen$df - 1)) for (j in 0:(length(names(tempHansen$theta))-1)) { treeData[i, paste("theta",j,sep = "")] = tempHansen$theta[[j+1]]}} for (i in 1:(length(regimesList) + 1)) { treeData[i, "deltaAIC"] = treeData[i, "AIC"] - min(treeData[1:(length(regimesList) + 1), "AIC"]) treeData[i, "exp(-0.5*deltaAIC)"] = exp(-0.5 * treeData[i,"deltaAIC"]) treeData[i, "deltaAICc"] = treeData[i, "AICc"] - min(treeData[1:(length(regimesList) + 1), "AICc"]) treeData[i, "exp(-0.5*deltaAICc)"] = exp(-0.5 * treeData[i,"deltaAICc"]) } for (i in 1:(length(regimesList) + 1)) { treeData[i, "AICweight"] = treeData[i, "exp(-0.5*deltaAIC)"] / sum(treeData[1:(length(regimesList) + 1), "exp(-0.5*deltaAIC)"]) treeData[i, "AICcWeight"] = treeData[i, "exp(-0.5*deltaAICc)"] / sum(treeData[1:(length(regimesList) + 1), "exp(-0.5*deltaAICc)"]) } return(treeData) } hansenStats <- # Returns a list of statistics for a runBatchHansenFit list function(hansenRunList) { columnNames = c("maxAlpha","bestFitModel", "maxAICweight") rowNames = as.character(1:length(hansenRunList)) hansenStatsTemp = matrix(data = NA, ncol = length(columnNames), nrow = length(hansenRunList), dimnames = list(rowNames, columnNames)) counter = 0 for (i in hansenRunList) { counter = counter + 1 hansenStatsTemp[counter, "maxAlpha"] = max(i[0:16, "alpha"]) hansenStatsTemp[counter, "bestFitModel"] = match(max(i[0:16, "AICweight"]), i[0:16, "AICweight"]) hansenStatsTemp[counter, "maxAICweight"] = max(i[0:16, "AICweight"])} return(hansenStatsTemp)} regimeNodes <- # Returns a binary table indicating whether a regime node is present or absent based on allPossibleRegimes output; # presumes nodes are labelled 1:n) # ONLY NEEDED FOR LABELLING AND INTERPRETATION; not actually used in the analysis function(n) { regimesList = allPossibleRegimes(1:n) regimesNameMatrix = matrix(data = NA, ncol = n, nrow = length(regimesList), dimnames = list(as.character(1:length(regimesList)), as.character(1:n))) for (i in 1:length(regimesList)) { for (j in 1:n) { if (is.na(match(j,regimesList[[i]]))) regimesNameMatrix[i,j] = 0 else regimesNameMatrix[i,j] = 1 }} return(regimesNameMatrix) }