authorParse <- function(SN) { SNp <- tolower(SN) ## 0. get rid of genus, sp, and infrasp. epithet SNp <- gsub("ssp. \\b\\S*\\b", "ssp. ", SNp) SNp <- gsub("var. \\b\\S*\\b", "var. ", SNp) SNp <- sub("\\b\\S*\\b \\b\\S*\\b", "", SNp) ## 1. strsplit by " var. " or " ssp. " SNp <- unlist(strsplit(SNp, "var.", fixed = T), recursive = T) SNp <- unlist(strsplit(SNp, "ssp.", fixed = T), recursive = T) ## 2. strsplit by " ex " SNp <- unlist(strsplit(SNp, " ex ", fixed = T), recursive = T) ## 3. strsplit by "&" SNp <- unlist(strsplit(SNp, "&", fixed = T), recursive = T) SNp <- unlist(strsplit(SNp, " and ", fixed = T), recursive = T) ## 4. replace any "(" with "" SNp <- unlist(lapply(SNp, function(x) gsub("(", "", x, fixed = T))) ## 5. strsplit by ") " SNp <- unlist(strsplit(SNp, ")", fixed = T), recursive = T) ## 6. get rid of all spaces and return the vector SNp <- unlist(lapply(SNp, function(x) gsub(" ", "", x, fixed = T))) return(SNp)} authorMatch <- function(nameVector, authorVector) { nameVector <- as.character(nameVector) authorVector <- tolower(as.character(authorVector)) messageOut <- matrix(nrow = 0, ncol = 2, dimnames = list(NULL, c("species", "problemName"))) messageOutIndex <- 1 for(i in 1:length(nameVector)) { message(paste("Working on name", nameVector[i])) nameAuthors <- authorParse(nameVector[i]) for(j in 1:length(nameAuthors)) { if(is.na(match(nameAuthors[j], authorVector))) { messageOutTemp <- matrix(nrow = 1, ncol = 2) messageOutTemp[1, 1] <- nameVector[i] messageOutTemp[1, 2] <- nameAuthors[j] messageOut <- rbind(messageOut, messageOutTemp) } } } return(messageOut) }