## Functions to merge individual AFLP datasets and convert formats ## 17 February 07 ## Andrew Hipp AFLPmerge = function(sortCols = FALSE) { ## assumes that each file selected has the loci labelled in the header, but no column for the sample names; ## sample names are designated as row.names; locus names are the header ## Arguments: ## sortCols = logical, determines whether the matrix will be sorted by columns (typically locus names) as well as rows. Default = FALSE ## note: leave sortCols = FALSE if any of your column names are non-unique across datasets! otherwise the dataset will not be correct filesVector = choose.files(caption = "select all files you want to merge") filesList = lapply(filesVector, read.table, header = TRUE) namesList = lapply(filesList, row.names) namesListSorted = lapply(namesList, sort) for(i in namesListSorted) { if(!identical(i,namesListSorted[[1]])) { print("NOT ALL NAMES ARE IDENTICAL") return(namesListSorted) stop() }} for (i in filesList) { i = i[sort(row.names(i)), ] } mergedData = filesList[[1]] for (i in 2:length(filesList)) mergedData = cbind(mergedData, filesList[[i]]) if(sortCols) mergedData = mergedData[,sort(names(mergedData))] message("To save the merged data file, copy the following and paste into the command line, changing the filename and variable as needed:") message("") message("write.table(___$mergedData, '___.txt', quote = FALSE)") comment(mergedData) = "AFLPmerge" return(list(files = filesList, mergedData = mergedData, names = namesList, namesSorted = namesListSorted)) } structureFormat = ## Takes a matrix of the form exported by AFLPmerge and converts it to STRUCTURE format ## Arguments: ## AFLPdata = data matrix in AFLPmerge format (set comment = "AFLPmerge" if the matrix isn't actually coming from AFLPmerge) ## unknown = value to set all second alleles to. Default = -9, as is standard for STRUCTURE at the time of this writing. function(AFLPdata, unknown = -9) { errorCheck = checkAFLPformat(AFLPdata) if(errorCheck[1] == "error") stop(errorCheck[2]) numberOfIndividuals = dim(AFLPdata)[1] numberOfLoci = dim(AFLPdata)[2] dummyMatrix = AFLPdata row.names(dummyMatrix) = paste(row.names(dummyMatrix),"_unknownAllele",sep = "") dummyMatrix[ , ] = unknown fullMatrix = rbind(AFLPdata, dummyMatrix) fullMatrix = fullMatrix[sort(row.names(fullMatrix)), ] message("To save the formated structure file, copy the following and paste into the command line, changing the filename and variable as needed:") message("") message("write.table(___, '___.txt', quote = FALSE)") message("") message("Once saved, do a global replace for the string '_unknownAllele', which should be replaced with a NULL.") message("Then the file should import into STRUCTURE without modification.") message("") message(paste("Number of individuals:",numberOfIndividuals)) message(paste("Number of loci:",numberOfLoci)) return(fullMatrix) } delTaxa = ## Deletes taxa from an AFLPmerge-format data matrix ## Removes all zeros if requested ## Arguments ## AFLPdata = data matrix in AFLPmerge format (set comment = "AFLPmerge" if the matrix isn't actually coming from AFLPmerge) ## taxonVector = vector of individual titles to retain; if NULL, then delTaxa calls a dialog box to select taxa ## delZeros = flag to delete all zeros. Default = TRUE function(AFLPdata, taxonVector = NULL, delZeros = TRUE) { errorCheck = checkAFLPformat(AFLPdata) if(errorCheck[1] == "error") stop(errorCheck[2]) if(identical(taxonVector, NULL)) taxonVector <- select.list(row.names(AFLPdata), preselect = NULL, multiple = TRUE, title = "Select individuals to include in the analysis; use shift and ctrl to select multiple names.") newData = AFLPdata[taxonVector, ] if(delZeros) newData = newData[ ,apply(newData,2,sum) != 0] numberOfIndividuals = dim(newData)[1] numberOfLoci = dim(newData)[2] message("New dataset dimensions:") message(paste(" Number of individuals:",numberOfIndividuals)) message(paste(" Number of loci:",numberOfLoci)) return(list(dataMatrix = newData, taxaIncluded = taxonVector)) } checkAFLPformat = ## Checks whether the comment on a dataset is "AFLPformat" function(AFLPdata) { if(!identical(comment(AFLPdata), "AFLPmerge")) { message("This dataset appears not to have been generated by AFLPmerge") message("If you are pretty sure your dataset is a matrix in the right format, set comment([NAME_OF_YOUR_DATA_MATRIX]) = 'AFLPmerge' and call this function again") message("Note that if you are calling this on an AFLPmerge object, you need to select out only the AFLP dataset (e.g., aflpData$mergedData)") message("") return(c("error","reformat aborted; try again with another data matrix or after setting the comment for this matrix to 'AFLPmerge'")) } return("ok") } ## TO SAVE COMMA DELIMITED: ## write.table(XXX$mergedData,"XXX.txt",sep = ",", quote = FALSE) pcordConvert = function(AFLPdata, datatype = "C") { numberOfIndividuals = dim(AFLPdata)[1] numberOfLoci = dim(AFLPdata)[2] testHeader = numeric(4*numberOfLoci) dim(testHeader) <- c(4,numberOfLoci) testHeader = as.data.frame(testHeader) names(testHeader) = names(AFLPdata) row.names(testHeader)[1] = numberOfIndividuals row.names(testHeader)[2] = numberOfLoci row.names(testHeader)[3] = "" row.names(testHeader)[4] = " " testHeader[,] = "" testHeader[1,1] = "Trees" testHeader[2,1] = "Loci" testHeader[3,] = "C" testHeader[4,] = names(AFLPdata) pcordFile = rbind(testHeader,AFLPdata) message("To save this file, copy and paste the following:") message("write.table(testPCORDfile,'pcORDtest.csv',sep = ',', quote = FALSE, col.names = FALSE)") return(pcordFile) }