#Script to perform (final?) analyses and produce figures for a # leaf rank manuscript # see also: files named: 09-07-14BSAfigsScript.R, # 09-07-22RankProcessingBSAfigs.R, 10-01-29Rscript.R, # 10-11rank_test_analysis.R # and: /Users/wag/Desktop/HerbLeafOrg/09-05-25analysis/09-05-25Rscript.R # # this version begun: 10-11-10 # last modified: 10-11-17, 11-01-21 # this is where the raw data for the MS should live PATH <- '/Users/wag/Desktop/HerbLeafOrg/11-01MS/data/' ############################################################ # Read in final synthesized version of Leo's rank and # habit data from the US national herbarium sheets as # cleaned up by Stefan and me raw <- read.table(paste(PATH, '10-11rank_habit_data.csv', sep = ''), sep = '\t', header = TRUE, colClasses = 'character') GnamesU <- unique(raw$GENUS) FnamesU <- unique(raw$FAMILY) GFnames <- paste(raw$GENUS, raw$FAMILY, sep = '_') GFnamesU <- unique(GFnames) #length(GnamesU) # 1835 unique Genera #length(FnamesU) # 281 unique Families #length(GFnamesU) # 1849 unique Gen./Fam. pairs # basic checking dim(raw) str(raw) library(YaleToolkit) whatis(raw) # pull information on xeric habit out of HABIT.REV field XERIC <- rep(FALSE, nrow(raw)) XERIC[grep('-x', raw$HABIT.REV)] <- TRUE final.habit <- gsub('-x', '', raw$HABIT.REV) raw <- cbind(raw, XERIC) HABIT.FIN <- factor(final.habit, levels = c('parasite', 'aquatic', 'herb', 'shrub', 'woody', 'tree', 'vine')) raw <- cbind(raw, HABIT.FIN) # get rid of any row with the word 'delete' in its # comments column; eliminate unnecessary columns dim(raw) bad <- grep('delete', tolower(raw$COMMENT)) clean <- raw[-bad,c(1:2,4:7,9,12:13)] dim(clean) # convert numeric rank columns to numbers and make # the REVERSION column a factor clean[,5:6] <- apply(clean[,5:6], 2, as.numeric) clean$REVERSION <- as.factor(clean$REVERSION) str(clean) whatis(clean) Cgenera <- unique(clean$GENUS) length(Cgenera) Cfam <- unique(clean$FAMILY) length(Cfam) # (lost one genus in cleaning) CGFfactor <- paste(clean$GENUS, clean$FAMILY, sep = '_') # Split rank and habit on names to get data by genus and family gminr <- sapply(split(clean$RMIN.N, as.factor(clean$GENUS)), min) gmaxr <- sapply(split(clean$RMAX.N, as.factor(clean$GENUS)), max) fminr <- sapply(split(clean$RMIN.N, as.factor(clean$FAMILY)), min) fmaxr <- sapply(split(clean$RMAX.N, as.factor(clean$FAMILY)), max) ghabit <- lapply(split(clean$HABIT.FIN, as.factor(clean$GENUS)), unique) ghabit <- sapply(ghabit, function(x) paste(x, collapse = ',')) fhabit <- lapply(split(clean$HABIT.FIN, as.factor(clean$FAMILY)), unique) fhabit <- sapply(fhabit, function(x) paste(x, collapse = ',')) # Write out data by genus and by family GenDat <- data.frame(gminr, gmaxr, ghabit) FamDat <- data.frame(fminr, fmaxr, fhabit) write.table(GenDat, file = 'GenusRankData.csv') write.table(FamDat, file = 'FamilyRankData.csv') meanrank <- apply(clean[,5:6], 1, mean) clean.na <- clean[!(is.na(clean[5]) | is.na(clean[9])),] meanrank.na <- apply(clean.na[,5:6], 1, mean) clean <- cbind(clean, MEANRANK = meanrank) clean.na <- cbind(clean.na, MEANRANK = meanrank.na) ##### Some plots.... # Boxplot of habit versus leaf rank: plot and regression dev.new('1', height = 5, width = 7) plot(jitter(as.numeric(clean$HABIT.FIN)), jitter(meanrank), col = 'grey', las = 2, cex.axis = 0.7, xaxt = 'n', yaxt = 'n', pch = 5, cex = 0.75, xlab = '', ylab = 'Leaf Rank', main = paste('Boxplots of leaf rank by habit for', length(Cgenera), 'genera')) boxplot(split(meanrank, clean$HABIT.FIN), #notch = TRUE, yaxt = 'n', varwidth = TRUE, add = TRUE, pch = 19) axis(2, at = c(1,5,9,13), labels = c('1r0', '2r0', '3r0', '4r0')) axis(2, at = 1:16, labels = FALSE) hab.lm <- lm(meanrank ~ clean$HABIT.FIN + clean$XERIC) summary(hab.lm) anova(hab.lm) # Drought tolerance versus leaf rank: plot and tests dev.new('2', height = 5, width = 5) plot(jitter(as.numeric(clean$XERIC) + 1), jitter(meanrank), col = 'grey', las = 2, cex.axis = 0.7, xaxt = 'n', yaxt = 'n', xlim = c(0.5,2.4), xlab = '', ylab = 'Leaf Rank', pch = 5, cex = 0.75, main = paste('Drought tolerance and leaf rank for', length(Cgenera), 'genera')) boxplot(meanrank[clean$XERIC == FALSE], meanrank[clean$XERIC == TRUE], notch = TRUE, pch = 19, yaxt = 'n', varwidth = TRUE, add = TRUE) axis(1, at = c(1,2), labels = c('Non-xeric', 'Xeric')) axis(2, at = c(1,5,9,13), labels = c('1r0', '2r0', '3r0', '4r0')) axis(2, at = 1:16, labels = FALSE) t.test(meanrank[clean$XERIC == TRUE], meanrank[clean$XERIC == FALSE]) wilcox.test(meanrank[clean$XERIC == TRUE], meanrank[clean$XERIC == FALSE]) # Habit versus family: plot and regression dev.new('3', height = 5, width = 15) par(mar = c(6, 4, 3, 1)) plot(jitter(as.numeric(as.factor(clean$FAMILY))), jitter(meanrank), col = 'grey', las = 2, cex.axis = 0.7, xaxt = 'n', yaxt = 'n', xlab = '', ylab = 'Leaf Rank', main = paste('Boxplots of leaf rank by family for', length(Cgenera), 'genera')) boxplot(split(meanrank, clean$FAMILY), las = 3, cex.axis = 0.5, #xaxt = 'n', yaxt = 'n', varwidth = TRUE, add = TRUE) #axis(1) axis(2, at = c(1,5,9,13), labels = c('1r0', '2r0', '3r0', '4r0')) axis(2, at = 1:16, labels = FALSE) fam.lm <- lm(meanrank ~ clean$FAMILY) summary(fam.lm) anova(fam.lm) # plot and regression of rank against habit, assuming habit is # an ordered factor (number) probably a bad idea plot(jitter(as.numeric(clean$HABIT.FIN)), jitter(meanrank), yaxt = 'n', xaxt = 'n', ylab = 'Leaf Rank', xlab = 'Habit') axis(1, at = 1:nlevels(clean$final.habit), labels = levels(clean$final.habit)) axis(2, at = c(1,5,9,13), labels = c('1r0', '2r0', '3r0', '4r0')) axis(2, at = 1:16, labels = FALSE) hab.ord.lm <- lm(meanrank ~ as.numeric(clean$HABIT.FIN)) summary(hab.ord.lm) anova(hab.ord.lm) abline(hab.ord.lm$coef, col = 'red') # plot of distributions of leaf rank broken down by habit dev.new('4', height = 5, width = 5) plot(density(meanrank.na), xaxt = 'n', main = 'Distributions of leaf rank', lwd = 2) lines(density(meanrank.na[clean.na$HABIT.FIN == 'tree']), col = 'red') lines(density(meanrank.na[clean.na$HABIT.FIN == 'shrub']), col = 'blue') lines(density(meanrank.na[clean.na$HABIT.FIN == 'herb']), col = 'green') axis(1, at = c(1,5,9,13), labels = c('1r0', '2r0', '3r0', '4r0')) axis(1, at = 1:16, labels = FALSE) mtext('black=total, red=trees, blue=shrubs, green=herbs') # Data in: # clean # clean.na # GenDat # FamDat ############################################################ # Processing measurement data: file paths: # #/Users/wag/Desktop/HerbLeafOrg/dataV.3/09-04-03fromChuck/AreoleDistance.txt #/Users/wag/Desktop/HerbLeafOrg/dataV.3/09-04-03fromChuck/AreolePropertiesMatrix_4_2_09.txt # # renamed: 09-04-02areole_distances.csv and # 09-04-02areole_properties.csv and # moved to PATH dat <- read.table(paste(PATH, '09-04-02areole_properties.csv', sep = ''), header = TRUE, colClasses = 'numeric') dists <- read.table(paste(PATH, '09-04-02areole_distances.csv', sep = ''), header = TRUE, colClasses = 'numeric') #put all data from Chuck together into one table dat <- cbind(dat, dists) # read in the metadata meta <- read.table(paste(PATH, '10-11metadata.csv', sep = ''), header = TRUE, sep = '\t', colClasses = 'character') # these are the 120 binomials (Gen. sp.) in the metadata meta.tax <- unique(paste(as.character(meta$REV.GEN), as.character(meta$REV.SP)))[-1] # will use these for the medium and short length taxon names taxa.med <- meta.tax abbrev <- function(x) paste(substr(x, 1, 3), collapse = '.') taxa.sht <- sapply(strsplit(meta.tax, split = ' '), abbrev) # long taxa names are: taxa.long <- unique(paste(meta$REV.GEN, ' ', meta$REV.SP, ' ', meta$REV.AUTH, ' (', meta$REV.FAM, ')', sep = ''))[-1] # check that all the taxa names match cbind(taxa.sht, taxa.med, taxa.long) #note there is no data for [40], Phragmites communis # create an indicator to identify which leaves were processed measured <- rep(FALSE, nrow(meta)) measured[unique(dat$Leaf)] <- TRUE # create an indicator to identify the 'good' leaves bad <- c(8, 9, 58, 59, 66, 73, 82, 80, 81, 28, 29, 32, 33, 58, 93, 94, 86, 98, 89,100, 107,110,133,147,175) bad <- sort(c(bad,106,134)) good <- !(meta$PHOT.NO %in% bad) & measured # check measured and good indicators cbind(as.character(meta$NCLC.NO), as.character(meta$PHOT.NO), as.character(meta$REV.GEN), as.character(meta$REV.SP), measured, good) ########## CALCULATE SUMMARY STATISTICS FOR EACH MEASURED IMAGE #calculate leaf means for all measured variables leaf.means <- split(dat, dat$Leaf) col.mean <- function(x){ colSums(as.data.frame(x)) / nrow(as.data.frame(x)) } for (i in seq_along(leaf.means)){ leaf.means[[i]] <- sapply(leaf.means[[i]], col.mean) } leaf.means <- as.data.frame(t(as.data.frame(leaf.means))) row.names(leaf.means) <- gsub('X', '', row.names(leaf.means)) names(leaf.means) <- gsub('.x$', '', names(leaf.means)) #calculate leaf standard deviations leaf.sds <- split(dat, dat$Leaf) col.sd <- function(x){apply(x, 2, sd)} for (i in seq_along(leaf.sds)){ leaf.sds[[i]] <- col.sd(leaf.sds[[i]]) } leaf.sds <- as.data.frame(t(as.data.frame(leaf.sds))) row.names(leaf.sds) <- gsub('X', '', row.names(leaf.sds)) leaf.sds$Leaf <- leaf.means$Leaf #calculate leaf medians leaf.meds <- split(dat, dat$Leaf) col.med <- function(x){apply(x, 2, median)} for (i in seq_along(leaf.meds)){ leaf.meds[[i]] <- col.med(leaf.meds[[i]]) } leaf.meds <- as.data.frame(t(as.data.frame(leaf.meds))) row.names(leaf.meds) <- gsub('X', '', row.names(leaf.meds)) #calculate 25th and 75th quantiles leaf.q25 <- split(dat, dat$Leaf) col.q25 <- function(x){apply(x, 2, function(y){quantile(y, 0.25)}) } for (i in seq_along(leaf.q25)){ leaf.q25[[i]] <- col.q25(leaf.q25[[i]]) } leaf.q25 <- as.data.frame(t(as.data.frame(leaf.q25))) row.names(leaf.q25) <- gsub('X', '', row.names(leaf.q25)) leaf.q75 <- split(dat, dat$Leaf) col.q75 <- function(x){apply(x, 2, function(y){quantile(y, 0.75)}) } for (i in seq_along(leaf.q75)){ leaf.q75[[i]] <- col.q75(leaf.q75[[i]]) } leaf.q75 <- as.data.frame(t(as.data.frame(leaf.q75))) row.names(leaf.q75) <- gsub('X', '', row.names(leaf.q75)) #calculate min and max leaf.min <- split(dat, dat$Leaf) for (i in seq_along(leaf.min)){ leaf.min[[i]] <- apply(leaf.min[[i]], 2, min) } leaf.min <- as.data.frame(t(as.data.frame(leaf.min))) row.names(leaf.min) <- gsub('X', '', row.names(leaf.min)) leaf.max <- split(dat, dat$Leaf) for (i in seq_along(leaf.max)){ leaf.max[[i]] <- apply(leaf.max[[i]], 2, max) } leaf.max <- as.data.frame(t(as.data.frame(leaf.max))) row.names(leaf.max) <- gsub('X', '', row.names(leaf.max)) #calculate modes #foo <- density(leaf.mod[[1]]$Area) #plot(foo) #abline(h = max(foo$y)) #mod <- foo$x[foo$y == max(foo$y)] #abline(v = mod) leaf.mod <- split(dat, dat$Leaf) mod <- function(x){ x.d <- density(x) mean(x.d$x[x.d$y == max(x.d$y)]) } for (i in seq_along(leaf.mod)){ leaf.mod[[i]] <- apply(leaf.mod[[i]], 2, mod) } leaf.mod <- as.data.frame(t(as.data.frame(leaf.mod))) row.names(leaf.mod) <- gsub('X', '', row.names(leaf.mod)) # merge data with metadata table.... names(dat) meas.vars <- names(dat)[c(-1, -10)] merged.dat <- meta merged.dat <- cbind(merged.dat, MEASURED = measured, GOOD = good) # put all the measurement summaries together in one data frame for(i in length(meas.vars):1){ temp <- cbind(leaf.min[,meas.vars[i]], ### MINIMUM as.integer(row.names(leaf.min))) this.name <- paste(meas.vars[i], 'MIN', sep = '.') colnames(temp) <- c(this.name, 'PHOT.NO') merged.dat <- merge(temp, merged.dat, by = 'PHOT.NO') temp <- cbind(leaf.q25[,meas.vars[i]], ### 25 QUANTILE as.integer(row.names(leaf.q25))) this.name <- paste(meas.vars[i], 'Q25', sep = '.') colnames(temp) <- c(this.name, 'PHOT.NO') merged.dat <- merge(temp, merged.dat, by = 'PHOT.NO') temp <- cbind(leaf.meds[,meas.vars[i]], ### MEDIAN as.integer(row.names(leaf.meds))) this.name <- paste(meas.vars[i], 'MEDIAN', sep = '.') colnames(temp) <- c(this.name, 'PHOT.NO') merged.dat <- merge(temp, merged.dat, by = 'PHOT.NO') temp <- cbind(leaf.q75[,meas.vars[i]], ### 75 QUANTILE as.integer(row.names(leaf.q75))) this.name <- paste(meas.vars[i], 'Q75', sep = '.') colnames(temp) <- c(this.name, 'PHOT.NO') merged.dat <- merge(temp, merged.dat, by = 'PHOT.NO') temp <- cbind(leaf.max[,meas.vars[i]], ### MAXIMUM as.integer(row.names(leaf.max))) this.name <- paste(meas.vars[i], 'MAX', sep = '.') colnames(temp) <- c(this.name, 'PHOT.NO') merged.dat <- merge(temp, merged.dat, by = 'PHOT.NO') temp <- cbind(leaf.sds[,meas.vars[i]], ### STD. DEV. as.integer(row.names(leaf.sds))) this.name <- paste(meas.vars[i], 'SD', sep = '.') colnames(temp) <- c(this.name, 'PHOT.NO') merged.dat <- merge(temp, merged.dat, by = 'PHOT.NO') temp <- cbind(leaf.means[,meas.vars[i]], ### MEAN as.integer(row.names(leaf.means))) this.name <- paste(meas.vars[i], 'MEAN', sep = '.') colnames(temp) <- c(this.name, 'PHOT.NO') merged.dat <- merge(temp, merged.dat, by = 'PHOT.NO') temp <- cbind(leaf.mod[,meas.vars[i]], ### MODE as.integer(row.names(leaf.mod))) this.name <- paste(meas.vars[i], 'MODE', sep = '.') colnames(temp) <- c(this.name, 'PHOT.NO') merged.dat <- merge(temp, merged.dat, by = 'PHOT.NO') } merged.good <- merged.dat[merged.dat$GOOD,] ########## # Convert from 230 images to 120 taxa # Note: 120 taxa but only 119 in merged.dat (because there is # no data for Phragmites communis) merged.tax <- unique(paste(as.character(merged.dat$REV.GEN), as.character(merged.dat$REV.SP))) cbind(meta.tax, merged.tax) # NO MATCH! merged.tax <- unique(paste(as.character(merged.good$REV.GEN), as.character(merged.good$REV.SP))) cbind(meta.tax, merged.tax) # EVEN WORSE MATCH! # Create 120 by ? matrix with good taxon data.... merged.taxa <- paste(merged.good$REV.GEN, merged.good$REV.SP) for(i in 1:length(taxa.med)){ this.tax <- merged.good[merged.taxa == taxa.med[i], 2:89] this.tax <- apply(this.tax, 2, mean) if(i == 1) tax.dat <- this.tax else tax.dat <- rbind(tax.dat, this.tax) } rownames(tax.dat) <- taxa.sht for(i in 1:length(taxa.med)){ this.tax <- merged.good[merged.taxa == taxa.med[i], 90:110] if(length(unique(this.tax$AB)) > 1) this.tax$AB <- 'both' this.tax <- this.tax[1,] if(i == 1) tax.met <- this.tax else tax.met <- rbind(tax.met, this.tax) } rownames(tax.met) <- taxa.sht tax.dat[is.nan(tax.dat)] <- NA tax <- cbind(tax.met, tax.dat) woody.ind <- meta[leaf.means$Leaf,]$WOODY wood.long <- rep('Y', nrow(dat)) for(i in seq_along(wood.long)){ if(is.na(meta[dat$Leaf[i],'WOODY'])) wood.long[i] <- '?' else if(meta[dat$Leaf[i],'WOODY'] == 'N') wood.long[i] <- 'N' } wood.long <- as.factor(wood.long) ab.ind <- as.character(meta[leaf.means$Leaf,]$AB) ab.ind[(1:length(ab.ind))[is.na(ab.ind)]] <- 's' ab.ind <- as.factor(ab.ind) ab.long <- rep('a', nrow(dat)) for(i in seq_along(ab.long)){ if(is.na(meta[dat$Leaf[i],'AB'])) ab.long[i] <- 's' else if(meta[dat$Leaf[i],'AB'] == 'b') ab.long[i] <- 'b' } ab.long <- as.factor(ab.long) tax.long <- dat$Leaf for(i in seq_along(tax.long)){ tax.long[i] <- paste(meta$REV.GEN[tax.long[i] == meta$PHOT.NO], meta$REV.SP[tax.long[i] == meta$PHOT.NO]) } # data in: # 208253 by 13: dat (raw areole data) # 238 by 21: meta (raw metadata) # tree of 120 taxa: tr # 230 by 111: merged.dat (summary data by image) # 206 by 111: merged.good (only good images) # 120 by 88: tax.dat (merged data averaged over taxa) # 120 by 21: tax.met (meta data averaged over taxa) # 120 by 109: tax (merged plus meta data averaged over taxa) # estimators of center and spread (230 by 13): # leaf.mod # leaf.means, leaf.sds # leaf.meds, leaf.q25, leaf.q75, leaf.max, leaf.min # indictors, etc.: # length = 208253: wood.long, ab.long, tax.long # length = 238: measured, good # length = 230: locs, woody.ind, ab.ind # length = 120: taxa.long, taxa.med, taxa.sht ##### Rank test stuff # See also: analysis of full test data set # begun 2010-11-09 #PATH <- '/Users/wag/Desktop/HerbLeafOrg/RankTest/10-04test/' levs <- c('1r1', '1r2', '1r3', '2r0', '2r1', '2r2', '2r3', '3r0', '3r1', '3r2', '3r3', '4r0', '4r1', '4r2', '4r3') ########## # Should not need to repeat the commented section #library(gdata) #wag <- read.xls(paste(PATH, 'rank.coding.sheet.WAG.xls', # sep = ''), skip = 2) #sal <- read.xls(paste(PATH, 'rank.coding.sheet.SAL.xls', # sep = ''), skip = 2) #cap <- read.xls(paste(PATH, 'rank.coding.sheet.CAP.xls', # sep = ''), skip = 2) #gd <- read.xls(paste(PATH, 'rank.coding.sheet.GD.xls', # sep = ''), skip = 2) #wag <- gsub(' ', '', as.character(wag[,2])) #sal <- gsub(' ', '', as.character(sal[,2])) #cap <- gsub(' ', '', as.character(cap[,2])) #gd <- gsub(' ', '', as.character(gd[,2])) #wag <- factor(wag, levs) #sal <- factor(sal, levs) #cap <- factor(cap, levs) #gd <- factor(gd, levs) #wag_n <- as.numeric(wag) #sal_n <- as.numeric(sal) #cap_n <- as.numeric(cap) #gd_n <- as.numeric(gd) #comp <- data.frame(wag, sal, cap, gd, wag_n, sal_n, cap_n, gd_n) #write.table(comp, paste(PATH, 'prelim.comp.csv')) ########## #lookup <- read.table(paste(PATH, 'lookup.csv', sep = ''), # header = TRUE) #lookup <- lookup[order(lookup$NUM),] #nclcs <- as.character(lookup$NCLC) #nclc <- read.table('/Users/wag/Desktop/HerbLeafOrg/09-06-02NCLCindex/09-06-02NCLCindex.csv', header = TRUE, sep = '\t') #test.set <- nclc[nclc$No. %in% nclcs,] #test.set <- test.set[order(as.character(test.set$No.)), # c(1,2,9,11)] #write.table(test.set, paste(PATH, 'some.names.csv', sep = '')) ########## # by hand, combine some.names.csv, prelim.comp.csv # and comparision.csv, filling in holes....Then: comp <- read.table(paste(PATH, '10-11comparison.csv', sep = ''), header = TRUE, sep = '\t') num <- comp[,c(11,12,14,16)] mns <- rowSums(num)/ncol(num) cont1 <- comp[1:10,11:16] cont2 <- comp[11:30,11:16] test <- comp[31:150,c(11,12,14,16)] dups <- c(18,19,38,39) dev.new('5', height = 6, width = 11) par(mar = c(11,4,4,1)) plot(rep(1:150, 4), jitter(unlist(num[,1:4])), col = rep(1:4, each = 150), las = 2, cex.axis = 0.7, xaxt = 'n', yaxt = 'n', ylim = c(0,18), xlab = '', ylab = 'Leaf Rank', pch = 5, cex = 0.75, main = 'Leaf Rank in 148 leaves through 4 pairs of eyes') mtext('black line is mean') axis(1, at = 1:150, las = 2, cex.axis = 0.5, labels = paste(comp$GENUS, ' ', comp$SP, ' (', comp$FAM, ') ', comp$X, sep = '')) axis(2, at = c(1,5,9,13), labels = c('1r1', '2r1', '3r1', '4r1')) axis(2, at = 1:16, labels = FALSE, cex.axis = 0.5) legend('top', ncol = 4, pch = 1, legend = colnames(num[,1:4]), col = 1:4) lines(1:150, mns) abline(v = 10.5, col = 'grey', lwd = 1) abline(v = 30.5, col = 'grey', lwd = 1) text(5, 18, 'Control1', col = 'grey', cex = 0.7) text(21, 18, 'Control2', col = 'grey', cex = 0.7) segments(18, unlist(num[18,1:4]), 38, unlist(num[38,1:4]), col = 1:4) segments(19, unlist(num[19,1:4]), 39, unlist(num[39,1:4]), col = 1:4) ##### reorder <- order(mns) mns.r <- mns[reorder] num.r <- num[reorder,] dev.new('6', height = 6, width = 11) par(mar = c(11,4,4,1)) plot(rep(1:150, 4), jitter(unlist(num.r[,1:4])), col = rep(1:4, each = 150), las = 2, cex.axis = 0.7, xaxt = 'n', yaxt = 'n', ylim = c(0,18), xlab = '', ylab = 'Leaf Rank', pch = 5, cex = 0.75, main = 'Leaf Rank in 148 leaves through 4 pairs of eyes') mtext('black line is mean') axis(1, at = 1:150, las = 2, cex.axis = 0.5, labels = paste(comp$GENUS, ' ', comp$SP, ' (', comp$FAM, ') ', comp$X, sep = '')[reorder]) axis(2, at = c(1,5,9,13), labels = c('1r1', '2r1', '3r1', '4r1')) axis(2, at = 1:16, labels = FALSE, cex.axis = 0.5) legend('topleft', ncol = 2, pch = 1, legend = colnames(num.r[,1:4]), col = 1:4) lines(1:150, mns.r) ##### dev.new('7', height = 6, width = 11) par(mar = c(11,4,4,1)) plot(rep(1:150, 4), jitter(unlist(num.r[,1:4])), col = rep(1:4, each = 150), las = 2, cex.axis = 0.7, xaxt = 'n', yaxt = 'n', ylim = c(0,18), xlab = '', ylab = 'Leaf Rank', pch = 5, cex = 0.75, main = 'Leaf Rank in 148 leaves through 4 pairs of eyes') mtext('grey line is mean') axis(1, at = 1:150, las = 2, cex.axis = 0.5, labels = paste(comp$GENUS, ' ', comp$SP, ' (', comp$FAM, ') ', comp$X, sep = '')[reorder]) axis(2, at = c(1,5,9,13), labels = c('1r1', '2r1', '3r1', '4r1')) axis(2, at = 1:16, labels = FALSE, cex.axis = 0.5) legend('topleft', ncol = 2, pch = 1, legend = colnames(num.r[,1:4]), col = 1:4) for(i in 1:4){ lines(1:150, num.r[,i], col = i) } lines(1:150, mns.r, col = 'grey', lwd = 4) ########## c1 <- unique(as.numeric(cor(cont1)))[-1] c2 <- unique(as.numeric(cor(cont2)))[-1] test <- unique(as.numeric(cor(test)))[-1] t.test(c1, c2, paired = TRUE) wilcox.test(c1, c2, paired = TRUE) ks.test(c1, c2) t.test(c1, test) t.test(c2, test) dev.new('8', height = 6, width = 6) par(mfrow = c(3,1)) hist(c1, xlim = c(0.3,1), xlab = 'Correlations between pairs of coders', main = 'First Control, n = 10', breaks = seq(0.3,1,length.out = 14)) mu <- mean(c1) abline(v = mu, col = 'red') text(mu, 7, expression(mu), xpd = NA, col = 'red') hist(c2, xlim = c(0.3,1), xlab = 'Correlations between pairs of coders', main = 'Second Control, n = 20', breaks = seq(0.3,1,length.out = 14)) mu <- mean(c2) abline(v = mu, col = 'red') text(mu, 7, expression(mu), xpd = NA, col = 'red') hist(test, xlim = c(0.3,1), xlab = 'Correlations between pairs of coders', main = 'Test, n = 120', breaks = seq(0.3,1,length.out = 14)) mu <- mean(test) abline(v = mu, col = 'red') text(mu, 7, expression(mu), xpd = NA, col = 'red') ########## library(YaleToolkit) dev.new('9', height = 6, width = 6) gpairs(cbind(jitter(as.matrix(num)), jitter(mns)), upper.pars = list(scatter = 'stats'), stat.pars = list(verbose = TRUE), scatter.pars = list(col = c(rep(1,10), rep (2,20), rep(3,120)))) ############################################################ # Combining rank test data with measurements.... # measurements in 'tax'; rank test data in 'comp' dim(comp) dim(tax) names(comp)[1] <- 'TEST.NO' matcher <- paste(substr(as.character(comp$GENUS)[31:150], 1, 3), substr(as.character(comp$SP)[31:150], 1, 3), sep = '.') row.names(comp) <- c(1:30, matcher) all(sort(row.names(comp)[31:150]) == sort(row.names(tax))) xtax <- merge(tax, comp[31:150,], by = 'row.names') xtax <- xtax[,-4] names(xtax)[110] <- 'TEST.NO' xtax$mean.rank.n <- apply(xtax[,120:125], 1, function(x) mean(x, na.rm = TRUE)) dev.new('10', height = 6, width = 6) plot(xtax$mean.rank.n, log(xtax$Area.MODE), xlab = 'Mean Rank', ylab = 'Modal areole area (log pixels)', xaxt = 'n', main = 'Predicting Areole Size from Rank') axis(1, at = c(1,5,9,13), labels = c('1r0', '2r0', '3r0', '4r0')) axis(1, at = 1:16, labels = FALSE) comp.lm <- lm(log(xtax$Area.MODE) ~ xtax$mean.rank.n) summary(comp.lm) abline(comp.lm) dev.new('11', height = 6, width = 6) gpairs(as.data.frame(list(mean.rank = xtax$mean.rank.n, areole.area.mode = log(xtax$Area.MODE), areole.area.mean = log(xtax$Area.MEAN), areole.ecc.mode = log(xtax$Eccentricity.MODE))), upper.pars = list(scatter = 'stats'), stat.pars = list(verbose = TRUE)) ##### Stuff requiring EBImage require(EBImage) #tiles of 30 images with distributions dev.new('12', height = 6, width = 11) par(mfrow = c(5,6), mar = c(0,0,1,0)) for (i in meta$PHOT.NO[good][4:33]){ par(new = FALSE) here <- '/Users/wag/Desktop/HerbLeafOrg/dataV.3/09-04-03fromChuck/' inum <- gsub(' ', 0, format(c(i, '9999'), justify = 'right')[1]) this.image <- readImage(paste(here, inum, '.tif', sep = '')) this.image <- resize(this.image, w = 300) image(this.image, col = c('black', 'white'), xaxt = 'n', yaxt = 'n') par(new = TRUE) plot(density(dat$Area[dat$Leaf == i]), xlim = c(0, 50000), cex.main = 0.6, xaxt = 'n', yaxt = 'n', bty = 'n', new = FALSE, col = 'red', col.main = 'red', main = paste('\n', meta$REV.GEN[meta$PHOT.NO == i], meta$REV.SP[meta$PHOT.NO == i], '(', meta$REV.FAM[meta$PHOT.NO == i], ')\n\n', 'NCLC #:', meta$NCLC.NO[meta$PHOT.NO == i], ', Phot. #:', i)) } # figure with six images, thresholds and distributions dev.new('13', height = 6, width = 8) par(mfrow = c(6,4), mar = c(0,0,1,0)) for (i in meta$PHOT.NO[good][c(7,8,11:14)]){ par(new = FALSE) here <- '/Users/wag/Desktop/HerbLeafOrg/dataV.3/alljpgs/' inum <- gsub(' ', 0, format(c(i, '9999'), justify = 'right')[1]) this.image <- readImage(paste(here, inum, '.jpg', sep = '')) this.image <- channel(this.image, mode = 'grey') # convert to grey becuase otherwise it would read in as # an array with 3 levels for red, green, and blue this.image <- resize(this.image, w = 300) image(this.image, col = grey(seq(0, 1, length.out = 256))) par(new = TRUE) text(150, 100, paste('\n', meta$REV.GEN[meta$PHOT.NO == i], meta$REV.SP[meta$PHOT.NO == i], '\n(', meta$REV.FAM[meta$PHOT.NO == i], ')\n', 'NCLC #:', meta$NCLC.NO[meta$PHOT.NO == i], ', Phot. #:', i), col = 'red', cex = 1.5) here <- '/Users/wag/Desktop/HerbLeafOrg/dataV.3/09-04-03fromChuck/' this.image <- readImage(paste(here, inum, '.tif', sep = '')) this.image <- resize(this.image, w = 300) image(this.image, col = c('black', 'white')) if(i == 10){ plot(density(dat$Area[dat$Leaf == i]), xlim = c(0, 50000), yaxt = 'n', bty = 'n', new = FALSE, main = 'Distribution of areole areas') plot(density(dat$Eccentricity[dat$Leaf == i]), xlim = c(0,1), yaxt = 'n', bty = 'n', new = FALSE, main = 'Distribution of areole eccentricities') }else{ plot(density(dat$Area[dat$Leaf == i]), xlim = c(0, 50000), xlab = '', xaxt = 'n', yaxt = 'n', bty = 'n', new = FALSE, main = '') plot(density(dat$Eccentricity[dat$Leaf == i]), xlim = c(0,1), xlab = '', xaxt = 'n', yaxt = 'n', bty = 'n', new = FALSE, main = '') } } # image tiles and distributions saved # image tiles... for (i in meta$PHOT.NO[good][c(7,8,11:14)]){ here <- '/Users/wag/Desktop/HerbLeafOrg/dataV.3/alljpgs/' inum <- gsub(' ', 0, format(c(i, '9999'), justify = 'right')[1]) this.raw <- readImage(paste(here, inum, '.jpg', sep = '')) this.raw <- channel(this.raw, mode = 'grey') this.raw <- resize(this.raw, w = 300) here <- '/Users/wag/Desktop/HerbLeafOrg/dataV.3/09-04-03fromChuck/' this.thresh <- readImage(paste(here, inum, '.tif', sep = '')) this.thresh <- resize(this.thresh, w = 300) if(i == 10) stk <- combine(this.raw, this.thresh) else{ stk <- combine(stk, this.raw) stk <- combine(stk, this.thresh) } til <- tile(stk, nx = 2) writeImage(til, file = '14.jpg') } # areole measurement distributions... dev.new('14', height = 8, width = 4) par(mfrow = c(6,3), mar = c(0,0,0,0)) for (i in meta$PHOT.NO[good][c(7,8,11:14)]){ plot.new() text(0.5, 0.5, paste('\n', meta$REV.GEN[meta$PHOT.NO == i], meta$REV.SP[meta$PHOT.NO == i], '\n(', meta$REV.FAM[meta$PHOT.NO == i], ')\n', 'NCLC #:', meta$NCLC.NO[meta$PHOT.NO == i], ', Phot. #:', i)) par(mar = c(0,0,1,1)) if(i == 10){ plot(density(dat$Area[dat$Leaf == i]), xlim = c(0, 50000), yaxt = 'n', bty = 'n', new = FALSE, main = 'Distribution of areole areas') plot(density(dat$Eccentricity[dat$Leaf == i]), xlim = c(0,1), yaxt = 'n', bty = 'n', new = FALSE, main = 'Distribution of areole eccentricities') }else{ plot(density(dat$Area[dat$Leaf == i]), xlim = c(0, 50000), xlab = '', xaxt = 'n', yaxt = 'n', bty = 'n', new = FALSE, main = '') plot(density(dat$Eccentricity[dat$Leaf == i]), xlim = c(0,1), xlab = '', xaxt = 'n', yaxt = 'n', bty = 'n', new = FALSE, main = '') } } # START HERE # The following section needs repairs # read in the tree and taxon names require(ape) tr <- read.tree(paste(PATH, '09-04-27samples.phy', sep = '')) dev.new(height = 11, width = 8) par(mar = c(4,1,1,2), xpd = NA) plot(tr, use.edge.length = FALSE, show.tip.label = FALSE, direction = 'rightwards') tiplabels(taxa.med[as.numeric(tr$tip.label)], adj = c(0,0.5), frame = 'none', bg = NULL, srt = 0, cex = 0.6) # Fix location of Chrysophyllum! CHECK REST OF RELATIONSHIPS ############################################################ # # Making comparisons between Leo's scored ranks and specimen # measurements at a genus level # # see also: /Users/wag/Desktop/HerbLeafOrg/rankdata/09-03-06RankProcessingRscript.R # and: /Users/wag/Desktop/HerbLeafOrg/rankdata/09-06-02toStefan/09-06-02Rscript.R # and: /Users/wag/Desktop/HerbLeafOrg/09-05-25analysis/09-05-25Rscript.R # and: /Users/wag/Desktop/HerbLeafOrg/rankdata/09-07-12toStefan/09-07-12Rscript.R # and: /Users/wag/Desktop/09.BSA/09-07-14BSAfigsScript.R #nclc <- read.table('/Users/wag/Desktop/HerbLeafOrg/09-06-02NCLCindex/09-06-02NCLCindex.csv', sep = '\t', header = TRUE) #meas <- read.table('/Users/wag/Desktop/HerbLeafOrg/rankdata/09-07-22toStefan/09-07-22tax.meas.txt', header = TRUE) ranks <- clean nclc <- read.table(paste(PATH, '09-06-02NCLC.csv', sep = ''), sep = '\t', header = TRUE) #meas <- read.table(paste(PATH, '09-07-22meas_tax.csv', sep = ''), # header = TRUE) gr <- unique(ranks$GENUS) gr <- as.character(gr) gn <- unique(nclc$PREFERRED.GENUS) gn <- as.character(gn) gm <- as.character(unique(meas$REV.GEN)) gm <- na.omit(gm) cleared_in_leo_ranks <- gn[gn %in% gr] ranks_in_cleared <- gr[gr %in% gn] sum(sort(ranks_in_cleared) == sort(cleared_in_leo_ranks)) # there are 1097 cleared leaves with ranks assigned by Leo sum(sort(gm[gm %in% gn]) == sort(gn[gn %in% gm])) # there are 99 successfully measured genera in the NCLC sum(sort(gm[gm %in% gr]) == sort(gr[gr %in% gm])) # there are 51 successfully measured genera with ranks matches <- gm[gm %in% gr] LR.MIN <- rep('', length(matches)) LR.MAX <- rep('', length(matches)) LR.NUM.MIN <- rep(0, length(matches)) LR.NUM.MAX <- rep(0, length(matches)) for(i in 1:length(matches)){ foo <- (1:nrow(ranks))[ranks$GENUS == matches[i]] foo <- as.numeric(na.omit(foo)) goo <- as.character(na.omit(ranks$RMIN[foo])) if(length(goo) == 0) LR.MIN[i] <- NA else LR.MIN[i] <- sort(goo, decreasing = FALSE)[1] goo <- as.character(na.omit(ranks$RMAX[foo])) if(length(goo) == 0) LR.MAX[i] <- NA else LR.MAX[i] <- sort(goo, decreasing = TRUE)[1] goo <- as.numeric(na.omit(ranks$RMIN.N[foo])) if(length(goo) == 0) LR.NUM.MIN[i] <- NA else LR.NUM.MIN[i] <- sort(goo, decreasing = FALSE)[1] goo <- as.numeric(na.omit(ranks$RMAX.N[foo])) if(length(goo) == 0) LR.NUM.MAX[i] <- NA else LR.NUM.MAX[i] <- sort(goo, decreasing = TRUE)[1] } # out is a data frame for Leo's ranks of genera for which we also # have measurements LR.BAR <- (LR.NUM.MIN +LR.NUM.MAX) / 2 out <- data.frame(LR.MIN, LR.MAX, LR.NUM.MIN, LR.NUM.MAX, LR.BAR) row.names(out) <- matches # pull out just the measurments for which there are matching ranks # in Leo's data meas.rank <- meas[meas$REV.GEN %in% matches,20:96] # average the two spp. of Salix in the measured specimens # and put together Leo's ranks with the measurements on # specimens from the same genera out.meas <- cbind(out, rbind(meas.rank[1:46,], colSums(meas.rank[47:48,])/2, meas.rank[49:52,])) # Plot of modal areole size against leaf rank plot(log(out.meas$Area.MODE), out.meas$LR.BAR, type = 'n', xlab = 'Log modal areole area in pixels', ylab = 'Leaf rank', yaxt = 'n', main = 'Comparison of areole size with leaf rank') segments(log(out.meas$Area.MODE), out.meas$LR.NUM.MIN, log(out.meas$Area.MODE), out.meas$LR.NUM.MAX, lwd = 3) text(log(out.meas$Area.MODE), out.meas$LR.BAR, adj = c(0,0), labels = matches, cex = 0.5, srt = 45) axis(2, at = c(1,5,9,13), labels = c('1r0', '2r0', '3r0', '4r0')) axis(2, at = 1:16, labels = FALSE) ranksize.lm <- lm(out.meas$LR.BAR ~ log(out.meas$Area.MEAN)) summary(ranksize.lm) abline(ranksize.lm$coef, col = 'red') pp <- format(df(summary(ranksize.lm)$fstatistic[1], summary(ranksize.lm)$fstatistic[2], summary(ranksize.lm)$fstatistic[3]), digits = 3) rsq <- format(summary(ranksize.lm)$r.squared, digits = 3) coe <- format(ranksize.lm$coef, digits = 3) mtext(paste('r^2=', rsq, ', slope=', coe[2], ', intercept=', coe[1], ', p =', pp, ', n=', nrow(out.meas))) # Plot of median and IQR showing a similar pattern par(mfrow = c(1,2)) plot(out.meas$Area.MEDIAN, out.meas$LR.BAR, log = 'x', xlab = 'Median areole area in pixels', ylab = 'Leaf rank', yaxt = 'n') segments(out.meas$Area.MEDIAN, out.meas$LR.NUM.MIN, out.meas$Area.MEDIAN, out.meas$LR.NUM.MAX) axis(2, at = c(1,5,9,13), labels = c('1r0', '2r0', '3r0', '4r0')) axis(2, at = 1:16, labels = FALSE) plot(out.meas$Area.Q75 - out.meas$Area.Q25, out.meas$LR.BAR, log = 'x', xlab = 'Areole area IQR in pixels', ylab = 'Leaf rank', yaxt = 'n') segments(out.meas$Area.Q75 - out.meas$Area.Q25, out.meas$LR.NUM.MIN, out.meas$Area.Q75 - out.meas$Area.Q25, out.meas$LR.NUM.MAX) axis(2, at = c(1,5,9,13), labels = c('1r0', '2r0', '3r0', '4r0')) axis(2, at = 1:16, labels = FALSE) # plot of tree with areas quartz(width = 8, height = 10.5) par(mfrow = c(1,2)) par(mar = c(4,1,1,2), xpd = NA) tr.pt <- plot(tr, use.edge.length = FALSE, show.tip.label = FALSE, direction = 'rightwards') #tiplabels(taxa.med[as.numeric(tr$tip.label)], adj = c(0,0.5), # frame = 'none', bg = NULL, srt = 0, # cex = 0.6) par(mar = c(4,5,1,1)) plot(tax$Area.MODE[as.numeric(tr$tip.label)], 1:nrow(tax), yaxt = 'n', ylab = '', type = 'n', xlab = 'Areole area in pixels', log = 'x') axis(2, at = 1:nrow(tax), labels = taxa.med[as.numeric(tr$tip.label)], cex.axis = 0.6, las = 2) axis(2, at = 1:nrow(tax), labels = FALSE, tck = 1, lwd = 0.5, col = 'grey') mtext('black = mode; red = median; blue = mean', cex = 0.7) points(tax$Area.MEAN[as.numeric(tr$tip.label)], 1:nrow(tax), col = 'blue', cex = 0.5, pch = 20) segments(tax$Area.MEAN[as.numeric(tr$tip.label)] - tax$Area.SD[as.numeric(tr$tip.label)], 1:nrow(tax), tax$Area.MEAN[as.numeric(tr$tip.label)] + tax$Area.SD[as.numeric(tr$tip.label)], 1:nrow(tax), col = 'blue', xpd = FALSE) points(tax$Area.MEDIAN[as.numeric(tr$tip.label)], 1:nrow(tax), col = 'red', cex = 0.5, pch = 20) segments(tax$Area.Q25[as.numeric(tr$tip.label)], 1:nrow(tax), tax$Area.Q75[as.numeric(tr$tip.label)], 1:nrow(tax), col = 'red', xpd = FALSE) points(tax$Area.MODE[as.numeric(tr$tip.label)], 1:nrow(tax), pch = 19) #plot of eccentricities and solidities quartz(width = 8, height = 10.5) par(mfrow = c(1,2)) par(mar = c(4,5,1,1)) plot(tax$Eccentricity.MODE[as.numeric(tr$tip.label)], 1:nrow(tax), yaxt = 'n', ylab = '', type = 'n', xlab = 'Areole eccentricity', xlim = c(0.5,1)) #axis(2, at = 1:nrow(tax), # labels = taxa.med[as.numeric(tr$tip.label)], # cex.axis = 0.6, las = 2) axis(2, at = 1:nrow(tax), labels = FALSE, tck = 1, lwd = 0.5, col = 'grey') mtext('black = mode; red = median; blue = mean', cex = 0.7) points(tax$Eccentricity.MEAN[as.numeric(tr$tip.label)], 1:nrow(tax), col = 'blue', cex = 0.5, pch = 20) segments(tax$Eccentricity.MEAN[as.numeric(tr$tip.label)] - tax$Eccentricity.SD[as.numeric(tr$tip.label)], 1:nrow(tax), tax$Eccentricity.MEAN[as.numeric(tr$tip.label)] + tax$Eccentricity.SD[as.numeric(tr$tip.label)], 1:nrow(tax), col = 'blue', xpd = FALSE) points(tax$Eccentricity.MEDIAN[as.numeric(tr$tip.label)], 1:nrow(tax), col = 'red', cex = 0.5, pch = 20) segments(tax$Eccentricity.Q25[as.numeric(tr$tip.label)], 1:nrow(tax), tax$Eccentricity.Q75[as.numeric(tr$tip.label)], 1:nrow(tax), col = 'red', xpd = FALSE) points(tax$Eccentricity.MODE[as.numeric(tr$tip.label)], 1:nrow(tax), pch = 19) par(mar = c(4,5,1,1)) plot(tax$Solidity.MODE[as.numeric(tr$tip.label)], 1:nrow(tax), yaxt = 'n', ylab = '', type = 'n', xlab = 'Areole solidity', xlim = c(0.5,1)) #axis(2, at = 1:nrow(tax), # labels = taxa.med[as.numeric(tr$tip.label)], # cex.axis = 0.6, las = 2) axis(2, at = 1:nrow(tax), labels = FALSE, tck = 1, lwd = 0.5, col = 'grey') mtext('black = mode; red = median; blue = mean', cex = 0.7) points(tax$Solidity.MEAN[as.numeric(tr$tip.label)], 1:nrow(tax), col = 'blue', cex = 0.5, pch = 20) segments(tax$Solidity.MEAN[as.numeric(tr$tip.label)] - tax$Solidity.SD[as.numeric(tr$tip.label)], 1:nrow(tax), tax$Solidity.MEAN[as.numeric(tr$tip.label)] + tax$Solidity.SD[as.numeric(tr$tip.label)], 1:nrow(tax), col = 'blue', xpd = FALSE) points(tax$Solidity.MEDIAN[as.numeric(tr$tip.label)], 1:nrow(tax), col = 'red', cex = 0.5, pch = 20) segments(tax$Solidity.Q25[as.numeric(tr$tip.label)], 1:nrow(tax), tax$Solidity.Q75[as.numeric(tr$tip.label)], 1:nrow(tax), col = 'red', xpd = FALSE) points(tax$Solidity.MODE[as.numeric(tr$tip.label)], 1:nrow(tax), pch = 19) # lots of pairs plot(log(tax$Area.MODE), log(tax$Area.SD^2)) pairs(log(tax[,22:29])) pairs(tax[,c('Area.MODE', 'ConvexArea.MODE', 'Eccentricity.MODE', 'Solidity.MODE', 'Perimeter.MODE', 'EquivDiameter.MODE', 'MajorAxis.MODE', 'MinorAxis.MODE', 'MaximumDistance.MODE','MeanDistance.MODE', 'DistanceStandardDeviation.MODE')], pch = '.') # reduced pairs plot with habit quartz(width = 8, height = 8) logArea.MODE <- log(tax$Area.MODE) logPerim.MODE <- log(tax$Perimeter.MODE) area.perim.ratio <- tax$Area.MODE / tax$Perimeter.MODE pairs(cbind(logArea.MODE, logPerim.MODE, tax[,c('Eccentricity.MODE', 'Solidity.MODE', 'MeanDistance.MODE')]), col = as.numeric(tax$WOODY == 'Y') + 1, pch = 5) mtext('red woody; black herbaceaous', line = 3) # comparisons by habit boxplot(split(log(tax$Area.MODE), tax$WOODY), varwidth = TRUE, notch = FALSE, ylab = 'log(modal areole area)', main = 'Boxplots by habit', names = c('Unknown', 'Herbaceaous', 'Woody')) jit <-jitter(as.numeric(tax$WOODY), amount = 0.1) points(jit, log(tax$Area.MODE), pch = 5) labs <- taxa.med labs[tax$WOODY == 'Y' & log(tax$Area.MODE) < 9.5 & log(tax$Area.MODE) > 6] <- '' text(jit, log(tax$Area.MODE), labels = labs, cex = 0.5, adj = c(0,0)) mtext(paste('for', nrow(tax), 'angiosperm species')) wilcox.test(tax$Area.MODE[tax$WOODY == 'N'], tax$Area.MODE[tax$WOODY == 'Y']) t.test(tax$Area.MODE[tax$WOODY == 'N'], tax$Area.MODE[tax$WOODY == 'Y']) # pairs plot of Annacardiaceae quartz(width = 8, height = 8) fam.col <- rep('grey', nrow(tax)) fam.pch <- rep(4, nrow(tax)) fam.col[tax$REV.FAM == 'Anacardaceae'] <- 'black' fam.pch[tax$REV.FAM == 'Anacardaceae'] <- 19 pairs(cbind(logArea.MODE, logPerim.MODE, tax[,c('Eccentricity.MODE', 'Solidity.MODE', 'MeanDistance.MODE')]), col = fam.col, pch = fam.pch, gap = 0.5) mtext('Anacardiaceae', line = 3) # pairs plot of Annonacae quartz(width = 8, height = 8) fam.col <- rep('grey', nrow(tax)) fam.pch <- rep(4, nrow(tax)) fam.col[tax$REV.FAM == 'Annonaceae'] <- 'black' fam.pch[tax$REV.FAM == 'Annonaceae'] <- 19 pairs(cbind(logArea.MODE, logPerim.MODE, tax[,c('Eccentricity.MODE', 'Solidity.MODE', 'MeanDistance.MODE')]), col = fam.col, pch = fam.pch, gap = 0.5) mtext('Annonaceae', line = 3) # more image tiles.... these <- na.omit(meta$PHOT.NO[good & measured & meta$AB == 'b']) for (i in these[5:54]){ inum <- gsub(' ', 0, format(c(i, '9999'), justify = 'right')[1]) here <- '/Users/wag/Desktop/HerbLeafOrg/dataV.3/09-04-03fromChuck/' this.thresh <- readImage(paste(here, inum, '.tif', sep = '')) this.thresh <- resize(this.thresh, w = 300) if(i == 13) stk <- this.thresh else{ stk <- combine(stk, this.thresh) } til <- tile(stk, nx = 5) writeImage(til, file = '1to50.jpg') } # and areole size distributions... quartz(width = 8, height = 10.5) par(mfrow = c(10,5), mar = c(0,0,0,0)) for (i in these[5:54]){ par(mar = c(0,0,1,1)) dens <- density(dat$Area[dat$Leaf == i]) plot(dens, xlim = c(0, 50000), xlab = '', xaxt = 'n', yaxt = 'n', bty = 'n', new = FALSE, main = '', col = 'red') text(25000, mean(c(max(dens$y), min(dens$y))), col = 'red', paste('\n', meta$REV.GEN[meta$PHOT.NO == i], meta$REV.SP[meta$PHOT.NO == i], '\n(', meta$REV.FAM[meta$PHOT.NO == i], ')\n', 'NCLC #:', meta$NCLC.NO[meta$PHOT.NO == i], ', Phot. #:', i)) }