#Script to perform (final?) analyses and produce figures for a
# leaf rank manuscript
# see also: files named: 09-07-14BSAfigsScript.R,
#  09-07-22RankProcessingBSAfigs.R, 10-01-29Rscript.R,
#  10-11rank_test_analysis.R
# and: /Users/wag/Desktop/HerbLeafOrg/09-05-25analysis/09-05-25Rscript.R
#
# this version begun: 10-11-10
# last modified: 10-11-17, 11-01-21

# this is where the raw data for the MS should live
PATH <- '/Users/wag/Desktop/HerbLeafOrg/11-01MS/data/'

############################################################
# Read in final synthesized version of Leo's rank and
#  habit data from the US national herbarium sheets as
#  cleaned up by Stefan and me
raw <- read.table(paste(PATH, '10-11rank_habit_data.csv',
                        sep = ''),
                  sep = '\t', header = TRUE,
                  colClasses = 'character')
     
GnamesU <- unique(raw$GENUS)
FnamesU <- unique(raw$FAMILY)
GFnames <- paste(raw$GENUS,
                 raw$FAMILY, sep = '_')
GFnamesU <- unique(GFnames)

#length(GnamesU)
# 1835 unique Genera
#length(FnamesU)
# 281 unique Families
#length(GFnamesU)
# 1849 unique Gen./Fam. pairs

# basic checking
dim(raw)
str(raw)
library(YaleToolkit)
whatis(raw)

# pull information on xeric habit out of HABIT.REV field
XERIC <- rep(FALSE, nrow(raw))
XERIC[grep('-x', raw$HABIT.REV)] <- TRUE
final.habit <- gsub('-x', '', raw$HABIT.REV)
raw <- cbind(raw, XERIC)
HABIT.FIN <- factor(final.habit,
                      levels = c('parasite', 'aquatic',
                                 'herb', 'shrub', 'woody',
                                 'tree', 'vine'))
raw <- cbind(raw, HABIT.FIN)

# get rid of any row with the word 'delete' in its
#  comments column; eliminate unnecessary columns
dim(raw)
bad <- grep('delete', tolower(raw$COMMENT))
clean <- raw[-bad,c(1:2,4:7,9,12:13)]
dim(clean)

# convert numeric rank columns to numbers and make
#  the REVERSION column a factor
clean[,5:6] <- apply(clean[,5:6], 2, as.numeric)
clean$REVERSION <- as.factor(clean$REVERSION)
str(clean)
whatis(clean)

Cgenera <- unique(clean$GENUS)
length(Cgenera)
Cfam <- unique(clean$FAMILY)
length(Cfam)
# (lost one genus in cleaning)

CGFfactor <- paste(clean$GENUS,
                   clean$FAMILY, sep = '_')

# Split rank and habit on names to get data by genus and family
gminr <- sapply(split(clean$RMIN.N,
                      as.factor(clean$GENUS)), min)
gmaxr <- sapply(split(clean$RMAX.N,
                as.factor(clean$GENUS)), max)

fminr <- sapply(split(clean$RMIN.N,
                      as.factor(clean$FAMILY)), min)
fmaxr <- sapply(split(clean$RMAX.N,
                as.factor(clean$FAMILY)), max)

ghabit <- lapply(split(clean$HABIT.FIN,
                       as.factor(clean$GENUS)), unique)
ghabit <- sapply(ghabit, function(x) paste(x, collapse = ','))

fhabit <- lapply(split(clean$HABIT.FIN,
                       as.factor(clean$FAMILY)), unique)
fhabit <- sapply(fhabit, function(x) paste(x, collapse = ','))

# Write out data by genus and by family
GenDat <- data.frame(gminr, gmaxr, ghabit)
FamDat <- data.frame(fminr, fmaxr, fhabit)
write.table(GenDat, file = 'GenusRankData.csv')
write.table(FamDat, file = 'FamilyRankData.csv')

meanrank <- apply(clean[,5:6], 1, mean)

clean.na <- clean[!(is.na(clean[5]) | is.na(clean[9])),]
meanrank.na <- apply(clean.na[,5:6], 1, mean)

clean <- cbind(clean, MEANRANK = meanrank)
clean.na <- cbind(clean.na, MEANRANK = meanrank.na)

##### Some plots....

# Boxplot of habit versus leaf rank: plot and regression
dev.new('1', height = 5, width = 7)
plot(jitter(as.numeric(clean$HABIT.FIN)), jitter(meanrank),
     col = 'grey', las = 2, cex.axis = 0.7,
     xaxt = 'n', yaxt = 'n', pch = 5, cex = 0.75,
     xlab = '', ylab = 'Leaf Rank',
     main = paste('Boxplots of leaf rank by habit for',
                  length(Cgenera), 'genera'))
boxplot(split(meanrank, clean$HABIT.FIN), #notch = TRUE,
        yaxt = 'n', varwidth = TRUE, add = TRUE, pch = 19)
axis(2, at = c(1,5,9,13), labels = c('1r0', '2r0', '3r0', '4r0'))
axis(2, at = 1:16, labels = FALSE)

hab.lm <- lm(meanrank ~ clean$HABIT.FIN + clean$XERIC)
summary(hab.lm)
anova(hab.lm)

# Drought tolerance versus leaf rank: plot and tests
dev.new('2', height = 5, width = 5)
plot(jitter(as.numeric(clean$XERIC) + 1), jitter(meanrank),
     col = 'grey', las = 2, cex.axis = 0.7,
     xaxt = 'n', yaxt = 'n', xlim = c(0.5,2.4),
     xlab = '', ylab = 'Leaf Rank', pch = 5, cex = 0.75,
     main = paste('Drought tolerance and leaf rank for',
                  length(Cgenera), 'genera'))
boxplot(meanrank[clean$XERIC == FALSE],
        meanrank[clean$XERIC == TRUE],
        notch = TRUE, pch = 19,
        yaxt = 'n', varwidth = TRUE, add = TRUE)
axis(1, at = c(1,2), labels = c('Non-xeric', 'Xeric'))
axis(2, at = c(1,5,9,13), labels = c('1r0', '2r0', '3r0', '4r0'))
axis(2, at = 1:16, labels = FALSE)

t.test(meanrank[clean$XERIC == TRUE],
       meanrank[clean$XERIC == FALSE])
wilcox.test(meanrank[clean$XERIC == TRUE],
            meanrank[clean$XERIC == FALSE])
            
# Habit versus family: plot and regression
dev.new('3', height = 5, width = 15)
par(mar = c(6, 4, 3, 1))
plot(jitter(as.numeric(as.factor(clean$FAMILY))),
     jitter(meanrank),
     col = 'grey', las = 2, cex.axis = 0.7,
     xaxt = 'n', yaxt = 'n',
     xlab = '', ylab = 'Leaf Rank',
     main = paste('Boxplots of leaf rank by family for',
                  length(Cgenera), 'genera'))
boxplot(split(meanrank, clean$FAMILY),
        las = 3, cex.axis = 0.5, #xaxt = 'n',
        yaxt = 'n', varwidth = TRUE, add = TRUE)
#axis(1)
axis(2, at = c(1,5,9,13), labels = c('1r0', '2r0', '3r0', '4r0'))
axis(2, at = 1:16, labels = FALSE)

fam.lm <- lm(meanrank ~ clean$FAMILY)
summary(fam.lm)
anova(fam.lm)

# plot and regression of rank against habit, assuming habit is
#  an ordered factor (number) probably a bad idea
plot(jitter(as.numeric(clean$HABIT.FIN)), jitter(meanrank),
     yaxt = 'n', xaxt = 'n',
     ylab = 'Leaf Rank', xlab = 'Habit')
axis(1, at = 1:nlevels(clean$final.habit),
     labels = levels(clean$final.habit))
axis(2, at = c(1,5,9,13), labels = c('1r0', '2r0', '3r0', '4r0'))
axis(2, at = 1:16, labels = FALSE)
hab.ord.lm <- lm(meanrank ~ as.numeric(clean$HABIT.FIN))
summary(hab.ord.lm)
anova(hab.ord.lm)
abline(hab.ord.lm$coef, col = 'red')

# plot of distributions of leaf rank broken down by habit
dev.new('4', height = 5, width = 5)
plot(density(meanrank.na), xaxt = 'n',
     main = 'Distributions of leaf rank', lwd = 2)
lines(density(meanrank.na[clean.na$HABIT.FIN == 'tree']),
      col = 'red')
lines(density(meanrank.na[clean.na$HABIT.FIN == 'shrub']),
      col = 'blue')
lines(density(meanrank.na[clean.na$HABIT.FIN == 'herb']),
      col = 'green')
axis(1, at = c(1,5,9,13), labels = c('1r0', '2r0', '3r0', '4r0'))
axis(1, at = 1:16, labels = FALSE)
mtext('black=total, red=trees, blue=shrubs, green=herbs')

# Data in:
#  clean
#  clean.na
#  GenDat
#  FamDat

############################################################
# Processing measurement data: file paths:
#
#/Users/wag/Desktop/HerbLeafOrg/dataV.3/09-04-03fromChuck/AreoleDistance.txt
#/Users/wag/Desktop/HerbLeafOrg/dataV.3/09-04-03fromChuck/AreolePropertiesMatrix_4_2_09.txt
#
# renamed: 09-04-02areole_distances.csv and
#          09-04-02areole_properties.csv and
# moved to PATH

dat <- read.table(paste(PATH, '09-04-02areole_properties.csv',
                  sep = ''), header = TRUE,
                  colClasses = 'numeric')
dists <- read.table(paste(PATH, '09-04-02areole_distances.csv',
                          sep = ''),
                    header = TRUE, colClasses = 'numeric')

#put all data from Chuck together into one table
dat <- cbind(dat, dists)

# read in the metadata
meta <- read.table(paste(PATH, '10-11metadata.csv', sep = ''),
                   header = TRUE, sep = '\t',
                   colClasses = 'character')

# these are the 120 binomials (Gen. sp.) in the metadata
meta.tax <- unique(paste(as.character(meta$REV.GEN),
                     as.character(meta$REV.SP)))[-1]

# will use these for the medium and short length taxon names
taxa.med <- meta.tax
abbrev <- function(x) paste(substr(x, 1, 3), collapse = '.')
taxa.sht <- sapply(strsplit(meta.tax, split = ' '), abbrev)

# long taxa names are:
taxa.long <- unique(paste(meta$REV.GEN, ' ', meta$REV.SP,
                          ' ', meta$REV.AUTH, ' (',
                          meta$REV.FAM, ')', sep = ''))[-1]

# check that all the taxa names match
cbind(taxa.sht, taxa.med, taxa.long)

#note there is no data for [40], Phragmites communis

# create an indicator to identify which leaves were processed
measured <- rep(FALSE, nrow(meta))
measured[unique(dat$Leaf)] <- TRUE

# create an indicator to identify the 'good' leaves
bad <- c(8,  9, 58, 59, 66,
        73, 82, 80, 81, 28,
        29, 32, 33, 58, 93,
        94, 86, 98, 89,100,
       107,110,133,147,175)
bad <- sort(c(bad,106,134))
good <- !(meta$PHOT.NO %in% bad) & measured

# check measured and good indicators
cbind(as.character(meta$NCLC.NO),
      as.character(meta$PHOT.NO),
      as.character(meta$REV.GEN),
      as.character(meta$REV.SP),
      measured, good)

########## CALCULATE SUMMARY STATISTICS FOR EACH MEASURED IMAGE
#calculate leaf means for all measured variables
leaf.means <- split(dat, dat$Leaf)
col.mean <- function(x){
  colSums(as.data.frame(x)) / nrow(as.data.frame(x))
}
for (i in seq_along(leaf.means)){
  leaf.means[[i]] <- sapply(leaf.means[[i]], col.mean)
}
leaf.means <- as.data.frame(t(as.data.frame(leaf.means)))
row.names(leaf.means) <- gsub('X', '', row.names(leaf.means))
names(leaf.means) <- gsub('.x$', '', names(leaf.means))

#calculate leaf standard deviations
leaf.sds <- split(dat, dat$Leaf)
col.sd <- function(x){apply(x, 2, sd)}
for (i in seq_along(leaf.sds)){
  leaf.sds[[i]] <- col.sd(leaf.sds[[i]])
}
leaf.sds <- as.data.frame(t(as.data.frame(leaf.sds)))
row.names(leaf.sds) <- gsub('X', '', row.names(leaf.sds))
leaf.sds$Leaf <- leaf.means$Leaf

#calculate leaf medians
leaf.meds <- split(dat, dat$Leaf)
col.med <- function(x){apply(x, 2, median)}
for (i in seq_along(leaf.meds)){
  leaf.meds[[i]] <- col.med(leaf.meds[[i]])
}
leaf.meds <- as.data.frame(t(as.data.frame(leaf.meds)))
row.names(leaf.meds) <- gsub('X', '', row.names(leaf.meds))

#calculate 25th and 75th quantiles
leaf.q25 <- split(dat, dat$Leaf)
col.q25 <- function(x){apply(x, 2,
	                     function(y){quantile(y, 0.25)})
	                   }
for (i in seq_along(leaf.q25)){
  leaf.q25[[i]] <- col.q25(leaf.q25[[i]])
}
leaf.q25 <- as.data.frame(t(as.data.frame(leaf.q25)))
row.names(leaf.q25) <- gsub('X', '', row.names(leaf.q25))

leaf.q75 <- split(dat, dat$Leaf)
col.q75 <- function(x){apply(x, 2,
	                     function(y){quantile(y, 0.75)})
	                   }
for (i in seq_along(leaf.q75)){
  leaf.q75[[i]] <- col.q75(leaf.q75[[i]])
}
leaf.q75 <- as.data.frame(t(as.data.frame(leaf.q75)))
row.names(leaf.q75) <- gsub('X', '', row.names(leaf.q75))

#calculate min and max
leaf.min <- split(dat, dat$Leaf)
for (i in seq_along(leaf.min)){
  leaf.min[[i]] <- apply(leaf.min[[i]], 2, min)
}
leaf.min <- as.data.frame(t(as.data.frame(leaf.min)))
row.names(leaf.min) <- gsub('X', '', row.names(leaf.min))

leaf.max <- split(dat, dat$Leaf)
for (i in seq_along(leaf.max)){
  leaf.max[[i]] <- apply(leaf.max[[i]], 2, max)
}
leaf.max <- as.data.frame(t(as.data.frame(leaf.max)))
row.names(leaf.max) <- gsub('X', '', row.names(leaf.max))

#calculate modes

#foo <- density(leaf.mod[[1]]$Area)
#plot(foo)
#abline(h = max(foo$y))
#mod <- foo$x[foo$y == max(foo$y)]
#abline(v = mod)

leaf.mod <- split(dat, dat$Leaf)
mod <- function(x){
  x.d <- density(x)
  mean(x.d$x[x.d$y == max(x.d$y)])
}
for (i in seq_along(leaf.mod)){
  leaf.mod[[i]] <- apply(leaf.mod[[i]], 2, mod)
}
leaf.mod <- as.data.frame(t(as.data.frame(leaf.mod)))
row.names(leaf.mod) <- gsub('X', '', row.names(leaf.mod))

# merge data with metadata table....
names(dat)
meas.vars <- names(dat)[c(-1, -10)]

merged.dat <- meta
merged.dat <- cbind(merged.dat, MEASURED = measured, GOOD = good)

# put all the measurement summaries together in one data frame
for(i in length(meas.vars):1){
  
  temp <- cbind(leaf.min[,meas.vars[i]], ### MINIMUM
                as.integer(row.names(leaf.min)))
  this.name <- paste(meas.vars[i], 'MIN', sep = '.')
  colnames(temp) <- c(this.name, 'PHOT.NO')
  merged.dat <- merge(temp, merged.dat, by = 'PHOT.NO')
  
  temp <- cbind(leaf.q25[,meas.vars[i]], ### 25 QUANTILE
                as.integer(row.names(leaf.q25)))
  this.name <- paste(meas.vars[i], 'Q25', sep = '.')
  colnames(temp) <- c(this.name, 'PHOT.NO')
  merged.dat <- merge(temp, merged.dat, by = 'PHOT.NO')
  
  temp <- cbind(leaf.meds[,meas.vars[i]], ### MEDIAN
                    as.integer(row.names(leaf.meds)))
  this.name <- paste(meas.vars[i], 'MEDIAN', sep = '.')
  colnames(temp) <- c(this.name, 'PHOT.NO')
  merged.dat <- merge(temp, merged.dat, by = 'PHOT.NO')

  temp <- cbind(leaf.q75[,meas.vars[i]], ### 75 QUANTILE
                    as.integer(row.names(leaf.q75)))
  this.name <- paste(meas.vars[i], 'Q75', sep = '.')
  colnames(temp) <- c(this.name, 'PHOT.NO')
  merged.dat <- merge(temp, merged.dat, by = 'PHOT.NO')

  temp <- cbind(leaf.max[,meas.vars[i]], ### MAXIMUM
                    as.integer(row.names(leaf.max)))
  this.name <- paste(meas.vars[i], 'MAX', sep = '.')
  colnames(temp) <- c(this.name, 'PHOT.NO')
  merged.dat <- merge(temp, merged.dat, by = 'PHOT.NO')

  temp <- cbind(leaf.sds[,meas.vars[i]], ### STD. DEV.
                    as.integer(row.names(leaf.sds)))
  this.name <- paste(meas.vars[i], 'SD', sep = '.')
  colnames(temp) <- c(this.name, 'PHOT.NO')
  merged.dat <- merge(temp, merged.dat, by = 'PHOT.NO')

  temp <- cbind(leaf.means[,meas.vars[i]], ### MEAN
                    as.integer(row.names(leaf.means)))
  this.name <- paste(meas.vars[i], 'MEAN', sep = '.')
  colnames(temp) <- c(this.name, 'PHOT.NO')
  merged.dat <- merge(temp, merged.dat, by = 'PHOT.NO')

  temp <- cbind(leaf.mod[,meas.vars[i]], ### MODE
                    as.integer(row.names(leaf.mod)))
  this.name <- paste(meas.vars[i], 'MODE', sep = '.')
  colnames(temp) <- c(this.name, 'PHOT.NO')
  merged.dat <- merge(temp, merged.dat, by = 'PHOT.NO')

}

merged.good <- merged.dat[merged.dat$GOOD,]

##########
# Convert from 230 images to 120 taxa
# Note: 120 taxa but only 119 in merged.dat (because there is
#  no data for Phragmites communis)
merged.tax <- unique(paste(as.character(merged.dat$REV.GEN),
                       as.character(merged.dat$REV.SP)))
cbind(meta.tax, merged.tax)
# NO MATCH!

merged.tax <- unique(paste(as.character(merged.good$REV.GEN),
                       as.character(merged.good$REV.SP)))
cbind(meta.tax, merged.tax)
# EVEN WORSE MATCH!

# Create 120 by ? matrix with good taxon data....
merged.taxa <- paste(merged.good$REV.GEN, merged.good$REV.SP)
for(i in 1:length(taxa.med)){
	this.tax <- merged.good[merged.taxa == taxa.med[i], 2:89]
	this.tax <- apply(this.tax, 2, mean)
    if(i == 1) tax.dat <- this.tax
    else tax.dat <- rbind(tax.dat, this.tax)
}
rownames(tax.dat) <- taxa.sht

for(i in 1:length(taxa.med)){
	this.tax <- merged.good[merged.taxa == taxa.med[i], 90:110]
	if(length(unique(this.tax$AB)) > 1)
	  this.tax$AB <- 'both'
	this.tax <- this.tax[1,]
    if(i == 1) tax.met <- this.tax
    else tax.met <- rbind(tax.met, this.tax)
}
rownames(tax.met) <- taxa.sht

tax.dat[is.nan(tax.dat)] <- NA
tax <- cbind(tax.met, tax.dat)

woody.ind <- meta[leaf.means$Leaf,]$WOODY
wood.long <- rep('Y', nrow(dat))
for(i in seq_along(wood.long)){
  if(is.na(meta[dat$Leaf[i],'WOODY']))
    wood.long[i] <- '?'
  else if(meta[dat$Leaf[i],'WOODY'] == 'N')
    wood.long[i] <- 'N'
}
wood.long <- as.factor(wood.long)

ab.ind <- as.character(meta[leaf.means$Leaf,]$AB)
ab.ind[(1:length(ab.ind))[is.na(ab.ind)]] <- 's'
ab.ind <- as.factor(ab.ind)
ab.long <- rep('a', nrow(dat))
for(i in seq_along(ab.long)){
  if(is.na(meta[dat$Leaf[i],'AB']))
    ab.long[i] <- 's'
  else if(meta[dat$Leaf[i],'AB'] == 'b')
    ab.long[i] <- 'b'
}
ab.long <- as.factor(ab.long)

tax.long <- dat$Leaf
for(i in seq_along(tax.long)){
  tax.long[i] <- paste(meta$REV.GEN[tax.long[i] == meta$PHOT.NO],
                       meta$REV.SP[tax.long[i] == meta$PHOT.NO])
}

# data in:
#  208253 by 13: dat (raw areole data)
#  238 by 21: meta (raw metadata)
#  tree of 120 taxa: tr
#  230 by 111: merged.dat (summary data by image)
#  206 by 111: merged.good (only good images)
#  120 by 88: tax.dat (merged data averaged over taxa)
#  120 by 21: tax.met (meta data averaged over taxa)
#  120 by 109: tax (merged plus meta data averaged over taxa)

# estimators of center and spread (230 by 13):
#  leaf.mod
#  leaf.means, leaf.sds
#  leaf.meds, leaf.q25, leaf.q75, leaf.max, leaf.min

# indictors, etc.:
#  length = 208253: wood.long, ab.long, tax.long
#  length = 238: measured, good
#  length = 230: locs, woody.ind, ab.ind
#  length = 120: taxa.long, taxa.med, taxa.sht

##### Rank test stuff
# See also: analysis of full test data set
#  begun 2010-11-09
#PATH <- '/Users/wag/Desktop/HerbLeafOrg/RankTest/10-04test/'

levs <- c('1r1', '1r2', '1r3',
          '2r0', '2r1', '2r2', '2r3',
          '3r0', '3r1', '3r2', '3r3',
          '4r0', '4r1', '4r2', '4r3')

##########
# Should not need to repeat the commented section
#library(gdata)
#wag <- read.xls(paste(PATH, 'rank.coding.sheet.WAG.xls',
#                      sep = ''), skip = 2)
#sal <- read.xls(paste(PATH, 'rank.coding.sheet.SAL.xls',
#                      sep = ''), skip = 2)
#cap <- read.xls(paste(PATH, 'rank.coding.sheet.CAP.xls',
#                      sep = ''), skip = 2)
#gd <- read.xls(paste(PATH, 'rank.coding.sheet.GD.xls',
#                     sep = ''), skip = 2)

#wag <- gsub(' ', '', as.character(wag[,2]))
#sal <- gsub(' ', '', as.character(sal[,2]))
#cap <- gsub(' ', '', as.character(cap[,2]))
#gd <- gsub(' ', '', as.character(gd[,2]))

#wag <- factor(wag, levs)
#sal <- factor(sal, levs)
#cap <- factor(cap, levs)
#gd <- factor(gd, levs)

#wag_n <- as.numeric(wag)
#sal_n <- as.numeric(sal)
#cap_n <- as.numeric(cap)
#gd_n <- as.numeric(gd)

#comp <- data.frame(wag, sal, cap, gd, wag_n, sal_n, cap_n, gd_n)
#write.table(comp, paste(PATH, 'prelim.comp.csv'))

##########

#lookup <- read.table(paste(PATH, 'lookup.csv', sep = ''),
#                     header = TRUE)
#lookup <- lookup[order(lookup$NUM),]
#nclcs <- as.character(lookup$NCLC)

#nclc <- read.table('/Users/wag/Desktop/HerbLeafOrg/09-06-02NCLCindex/09-06-02NCLCindex.csv', header = TRUE, sep = '\t')

#test.set <- nclc[nclc$No. %in% nclcs,]
#test.set <- test.set[order(as.character(test.set$No.)),
#                     c(1,2,9,11)]
#write.table(test.set, paste(PATH, 'some.names.csv', sep = ''))

##########
# by hand, combine some.names.csv, prelim.comp.csv
#  and comparision.csv, filling in holes....Then:

comp <- read.table(paste(PATH, '10-11comparison.csv',
                   sep = ''), header = TRUE, sep = '\t')

num <- comp[,c(11,12,14,16)]
mns <- rowSums(num)/ncol(num)

cont1 <- comp[1:10,11:16]
cont2 <- comp[11:30,11:16]
test <- comp[31:150,c(11,12,14,16)]

dups <- c(18,19,38,39)

dev.new('5', height = 6, width = 11)
par(mar = c(11,4,4,1))
plot(rep(1:150, 4), jitter(unlist(num[,1:4])),
     col = rep(1:4, each = 150), las = 2, cex.axis = 0.7,
     xaxt = 'n', yaxt = 'n', ylim = c(0,18),
     xlab = '', ylab = 'Leaf Rank', pch = 5, cex = 0.75,
     main = 'Leaf Rank in 148 leaves through 4 pairs of eyes')
mtext('black line is mean')
axis(1, at = 1:150, las = 2, cex.axis = 0.5,
     labels = paste(comp$GENUS, ' ', comp$SP,
                    ' (', comp$FAM, ') ', comp$X, sep = ''))
axis(2, at = c(1,5,9,13), labels = c('1r1', '2r1', '3r1', '4r1'))
axis(2, at = 1:16, labels = FALSE, cex.axis = 0.5)
legend('top', ncol = 4, pch = 1, legend = colnames(num[,1:4]),
       col = 1:4)
lines(1:150, mns)
abline(v = 10.5, col = 'grey', lwd = 1)
abline(v = 30.5, col = 'grey', lwd = 1)
text(5, 18, 'Control1', col = 'grey', cex = 0.7)
text(21, 18, 'Control2', col = 'grey', cex = 0.7)

segments(18, unlist(num[18,1:4]),
         38, unlist(num[38,1:4]), col = 1:4)
segments(19, unlist(num[19,1:4]),
         39, unlist(num[39,1:4]), col = 1:4)

#####
reorder <- order(mns)
mns.r <- mns[reorder]
num.r <- num[reorder,]

dev.new('6', height = 6, width = 11)
par(mar = c(11,4,4,1))
plot(rep(1:150, 4), jitter(unlist(num.r[,1:4])),
     col = rep(1:4, each = 150), las = 2, cex.axis = 0.7,
     xaxt = 'n', yaxt = 'n', ylim = c(0,18),
     xlab = '', ylab = 'Leaf Rank', pch = 5, cex = 0.75,
     main = 'Leaf Rank in 148 leaves through 4 pairs of eyes')
mtext('black line is mean')
axis(1, at = 1:150, las = 2, cex.axis = 0.5,
     labels = paste(comp$GENUS, ' ', comp$SP,
                    ' (', comp$FAM, ') ', comp$X,
                    sep = '')[reorder])
axis(2, at = c(1,5,9,13), labels = c('1r1', '2r1', '3r1', '4r1'))
axis(2, at = 1:16, labels = FALSE, cex.axis = 0.5)
legend('topleft', ncol = 2, pch = 1,
       legend = colnames(num.r[,1:4]),
       col = 1:4)
lines(1:150, mns.r)

#####
dev.new('7', height = 6, width = 11)
par(mar = c(11,4,4,1))
plot(rep(1:150, 4), jitter(unlist(num.r[,1:4])),
     col = rep(1:4, each = 150), las = 2, cex.axis = 0.7,
     xaxt = 'n', yaxt = 'n', ylim = c(0,18),
     xlab = '', ylab = 'Leaf Rank', pch = 5, cex = 0.75,
     main = 'Leaf Rank in 148 leaves through 4 pairs of eyes')
mtext('grey line is mean')
axis(1, at = 1:150, las = 2, cex.axis = 0.5,
     labels = paste(comp$GENUS, ' ', comp$SP,
                    ' (', comp$FAM, ') ', comp$X,
                    sep = '')[reorder])
axis(2, at = c(1,5,9,13), labels = c('1r1', '2r1', '3r1', '4r1'))
axis(2, at = 1:16, labels = FALSE, cex.axis = 0.5)
legend('topleft', ncol = 2, pch = 1,
       legend = colnames(num.r[,1:4]),
       col = 1:4)
for(i in 1:4){
  lines(1:150, num.r[,i], col = i)
}
lines(1:150, mns.r, col = 'grey', lwd = 4)

##########
c1 <- unique(as.numeric(cor(cont1)))[-1]
c2 <- unique(as.numeric(cor(cont2)))[-1]
test <- unique(as.numeric(cor(test)))[-1]

t.test(c1, c2, paired = TRUE)
wilcox.test(c1, c2, paired = TRUE)
ks.test(c1, c2)

t.test(c1, test)
t.test(c2, test)

dev.new('8', height = 6, width = 6)
par(mfrow = c(3,1))
hist(c1, xlim = c(0.3,1),
     xlab = 'Correlations between pairs of coders',
     main = 'First Control, n = 10',
     breaks = seq(0.3,1,length.out = 14))
mu <- mean(c1)
abline(v = mu, col = 'red')
text(mu, 7, expression(mu), xpd = NA, col = 'red')
hist(c2, xlim = c(0.3,1),
     xlab = 'Correlations between pairs of coders',
     main = 'Second Control, n = 20',
     breaks = seq(0.3,1,length.out = 14))
mu <- mean(c2)
abline(v = mu, col = 'red')
text(mu, 7, expression(mu), xpd = NA, col = 'red')
hist(test, xlim = c(0.3,1),
     xlab = 'Correlations between pairs of coders',
     main = 'Test, n = 120',
     breaks = seq(0.3,1,length.out = 14))
mu <- mean(test)
abline(v = mu, col = 'red')
text(mu, 7, expression(mu), xpd = NA, col = 'red')

##########

library(YaleToolkit)
dev.new('9', height = 6, width = 6)
gpairs(cbind(jitter(as.matrix(num)), jitter(mns)),
       upper.pars = list(scatter = 'stats'),
       stat.pars = list(verbose = TRUE),
       scatter.pars = list(col = c(rep(1,10), rep (2,20),
                                   rep(3,120))))

############################################################
# Combining rank test data with measurements....
#  measurements in 'tax'; rank test data in 'comp'

dim(comp)
dim(tax)
names(comp)[1] <- 'TEST.NO'
matcher <- paste(substr(as.character(comp$GENUS)[31:150], 1, 3),
                 substr(as.character(comp$SP)[31:150], 1, 3),
                 sep = '.')
row.names(comp) <- c(1:30, matcher)

all(sort(row.names(comp)[31:150]) == sort(row.names(tax)))

xtax <- merge(tax, comp[31:150,], by = 'row.names')
xtax <- xtax[,-4]
names(xtax)[110] <- 'TEST.NO'

xtax$mean.rank.n <- apply(xtax[,120:125], 1,
                          function(x) mean(x, na.rm = TRUE))

dev.new('10', height = 6, width = 6)
plot(xtax$mean.rank.n, log(xtax$Area.MODE),
     xlab = 'Mean Rank', ylab = 'Modal areole area (log pixels)',
     xaxt = 'n', main = 'Predicting Areole Size from Rank')
axis(1, at = c(1,5,9,13), labels = c('1r0', '2r0', '3r0', '4r0'))
axis(1, at = 1:16, labels = FALSE)
comp.lm <- lm(log(xtax$Area.MODE) ~ xtax$mean.rank.n)
summary(comp.lm)
abline(comp.lm)

dev.new('11', height = 6, width = 6)
gpairs(as.data.frame(list(mean.rank = xtax$mean.rank.n,
       areole.area.mode = log(xtax$Area.MODE),
       areole.area.mean = log(xtax$Area.MEAN),
       areole.ecc.mode = log(xtax$Eccentricity.MODE))),
       upper.pars = list(scatter = 'stats'),
       stat.pars = list(verbose = TRUE))

##### Stuff requiring EBImage
require(EBImage)

#tiles of 30 images with distributions
dev.new('12', height = 6, width = 11)
par(mfrow = c(5,6), mar = c(0,0,1,0))
for (i in meta$PHOT.NO[good][4:33]){
  par(new = FALSE)
  here <- '/Users/wag/Desktop/HerbLeafOrg/dataV.3/09-04-03fromChuck/'
  inum <- gsub(' ', 0, format(c(i, '9999'), justify = 'right')[1])
  this.image <- readImage(paste(here, inum, '.tif', sep = ''))
  this.image <- resize(this.image, w = 300)
  image(this.image, col = c('black', 'white'),
        xaxt = 'n', yaxt = 'n')
  par(new = TRUE)
  plot(density(dat$Area[dat$Leaf == i]),
       xlim = c(0, 50000), cex.main = 0.6,
       xaxt = 'n', yaxt = 'n', bty = 'n',
       new = FALSE, col = 'red', col.main = 'red',
       main = paste('\n', meta$REV.GEN[meta$PHOT.NO == i],
                    meta$REV.SP[meta$PHOT.NO == i], '(',
                    meta$REV.FAM[meta$PHOT.NO == i], ')\n\n',
                    'NCLC #:', meta$NCLC.NO[meta$PHOT.NO == i],
                    ', Phot. #:', i))
}

# figure with six images, thresholds and distributions
dev.new('13', height = 6, width = 8)
par(mfrow = c(6,4), mar = c(0,0,1,0))
for (i in meta$PHOT.NO[good][c(7,8,11:14)]){
  par(new = FALSE)
  here <- '/Users/wag/Desktop/HerbLeafOrg/dataV.3/alljpgs/'
  inum <- gsub(' ', 0, format(c(i, '9999'), justify = 'right')[1])
  this.image <- readImage(paste(here, inum, '.jpg', sep = ''))
  this.image <- channel(this.image, mode = 'grey')
  # convert to grey becuase otherwise it would read in as
  #  an array with 3 levels for red, green, and blue
  this.image <- resize(this.image, w = 300)
  image(this.image, col = grey(seq(0, 1, length.out = 256)))                            
  par(new = TRUE)
  text(150, 100, paste('\n', meta$REV.GEN[meta$PHOT.NO == i],
                       meta$REV.SP[meta$PHOT.NO == i], '\n(',
                       meta$REV.FAM[meta$PHOT.NO == i], ')\n',
                       'NCLC #:', meta$NCLC.NO[meta$PHOT.NO == i],
                       ', Phot. #:', i),
       col = 'red', cex = 1.5)
    here <- '/Users/wag/Desktop/HerbLeafOrg/dataV.3/09-04-03fromChuck/'
  this.image <- readImage(paste(here, inum, '.tif', sep = ''))
  this.image <- resize(this.image, w = 300)
  image(this.image, col = c('black', 'white'))
  if(i == 10){
    plot(density(dat$Area[dat$Leaf == i]),
         xlim = c(0, 50000),
         yaxt = 'n', bty = 'n',
         new = FALSE,
         main = 'Distribution of areole areas')
    plot(density(dat$Eccentricity[dat$Leaf == i]),
         xlim = c(0,1),
         yaxt = 'n', bty = 'n',
         new = FALSE,
         main = 'Distribution of areole eccentricities')
  }else{
    plot(density(dat$Area[dat$Leaf == i]),
         xlim = c(0, 50000), xlab = '',
         xaxt = 'n', yaxt = 'n', bty = 'n',
         new = FALSE, main = '')
    plot(density(dat$Eccentricity[dat$Leaf == i]),
         xlim = c(0,1), xlab = '',
         xaxt = 'n', yaxt = 'n', bty = 'n',
         new = FALSE, main = '')
  }
}

# image tiles and distributions saved
#  image tiles...
for (i in meta$PHOT.NO[good][c(7,8,11:14)]){
  here <- '/Users/wag/Desktop/HerbLeafOrg/dataV.3/alljpgs/'
  inum <- gsub(' ', 0, format(c(i, '9999'), justify = 'right')[1])
  this.raw <- readImage(paste(here, inum, '.jpg', sep = ''))
  this.raw <- channel(this.raw, mode = 'grey')
  this.raw <- resize(this.raw, w = 300)
  here <- '/Users/wag/Desktop/HerbLeafOrg/dataV.3/09-04-03fromChuck/'
  this.thresh <- readImage(paste(here, inum, '.tif', sep = ''))
  this.thresh <- resize(this.thresh, w = 300)
  if(i == 10) stk <- combine(this.raw, this.thresh)
  else{
    stk <- combine(stk, this.raw)
    stk <- combine(stk, this.thresh)
  }
  til <- tile(stk, nx = 2)
  writeImage(til, file = '14.jpg')
}
#  areole measurement distributions...
dev.new('14', height = 8, width = 4)
par(mfrow = c(6,3), mar = c(0,0,0,0))
for (i in meta$PHOT.NO[good][c(7,8,11:14)]){
  plot.new()
  text(0.5, 0.5, paste('\n', meta$REV.GEN[meta$PHOT.NO == i],
                       meta$REV.SP[meta$PHOT.NO == i], '\n(',
                       meta$REV.FAM[meta$PHOT.NO == i], ')\n',
                       'NCLC #:', meta$NCLC.NO[meta$PHOT.NO == i],
                       ', Phot. #:', i))
  par(mar = c(0,0,1,1))
  if(i == 10){
    plot(density(dat$Area[dat$Leaf == i]),
         xlim = c(0, 50000),
         yaxt = 'n', bty = 'n',
         new = FALSE,
         main = 'Distribution of areole areas')
    plot(density(dat$Eccentricity[dat$Leaf == i]),
         xlim = c(0,1),
         yaxt = 'n', bty = 'n',
         new = FALSE,
         main = 'Distribution of areole eccentricities')
  }else{
    plot(density(dat$Area[dat$Leaf == i]),
         xlim = c(0, 50000), xlab = '',
         xaxt = 'n', yaxt = 'n', bty = 'n',
         new = FALSE, main = '')
    plot(density(dat$Eccentricity[dat$Leaf == i]),
         xlim = c(0,1), xlab = '',
         xaxt = 'n', yaxt = 'n', bty = 'n',
         new = FALSE, main = '')
  }
}

# START HERE
# The following section needs repairs

# read in the tree and taxon names
require(ape)
tr <- read.tree(paste(PATH, '09-04-27samples.phy', sep = ''))

dev.new(height = 11, width = 8)
par(mar = c(4,1,1,2), xpd = NA)
plot(tr, use.edge.length = FALSE, show.tip.label = FALSE,
              direction = 'rightwards')
tiplabels(taxa.med[as.numeric(tr$tip.label)], adj = c(0,0.5),
          frame = 'none', bg = NULL, srt = 0,
          cex = 0.6)
# Fix location of Chrysophyllum! CHECK REST OF RELATIONSHIPS

############################################################
#
# Making comparisons between Leo's scored ranks and specimen
#  measurements at a genus level
#
# see also: /Users/wag/Desktop/HerbLeafOrg/rankdata/09-03-06RankProcessingRscript.R
# and: /Users/wag/Desktop/HerbLeafOrg/rankdata/09-06-02toStefan/09-06-02Rscript.R
# and: /Users/wag/Desktop/HerbLeafOrg/09-05-25analysis/09-05-25Rscript.R
# and: /Users/wag/Desktop/HerbLeafOrg/rankdata/09-07-12toStefan/09-07-12Rscript.R
# and: /Users/wag/Desktop/09.BSA/09-07-14BSAfigsScript.R

#nclc <- read.table('/Users/wag/Desktop/HerbLeafOrg/09-06-02NCLCindex/09-06-02NCLCindex.csv', sep = '\t', header = TRUE)
#meas <- read.table('/Users/wag/Desktop/HerbLeafOrg/rankdata/09-07-22toStefan/09-07-22tax.meas.txt', header = TRUE)

ranks <- clean
nclc <- read.table(paste(PATH, '09-06-02NCLC.csv', sep = ''),
                   sep = '\t', header = TRUE)
#meas <- read.table(paste(PATH, '09-07-22meas_tax.csv', sep = ''),
#                   header = TRUE)

gr <- unique(ranks$GENUS)
gr <- as.character(gr)
gn <- unique(nclc$PREFERRED.GENUS)
gn <- as.character(gn)
gm <- as.character(unique(meas$REV.GEN))
gm <- na.omit(gm)

cleared_in_leo_ranks <- gn[gn %in% gr]
ranks_in_cleared <- gr[gr %in% gn]
sum(sort(ranks_in_cleared) == sort(cleared_in_leo_ranks))
# there are 1097 cleared leaves with ranks assigned by Leo
sum(sort(gm[gm %in% gn]) == sort(gn[gn %in% gm]))
# there are 99 successfully measured genera in the NCLC
sum(sort(gm[gm %in% gr]) == sort(gr[gr %in% gm]))
# there are 51 successfully measured genera with ranks

matches <- gm[gm %in% gr]
LR.MIN <- rep('', length(matches))
LR.MAX <- rep('', length(matches))
LR.NUM.MIN <- rep(0, length(matches))
LR.NUM.MAX <- rep(0, length(matches))
for(i in 1:length(matches)){
  foo <- (1:nrow(ranks))[ranks$GENUS == matches[i]]
  foo <- as.numeric(na.omit(foo))
  goo <- as.character(na.omit(ranks$RMIN[foo]))
  if(length(goo) == 0) LR.MIN[i] <- NA
  else LR.MIN[i] <- sort(goo, decreasing = FALSE)[1]

  goo <- as.character(na.omit(ranks$RMAX[foo]))
  if(length(goo) == 0) LR.MAX[i] <- NA
  else LR.MAX[i] <- sort(goo, decreasing = TRUE)[1]

  goo <- as.numeric(na.omit(ranks$RMIN.N[foo]))
  if(length(goo) == 0) LR.NUM.MIN[i] <- NA
  else LR.NUM.MIN[i] <- sort(goo, decreasing = FALSE)[1]
  
  goo <- as.numeric(na.omit(ranks$RMAX.N[foo]))
  if(length(goo) == 0) LR.NUM.MAX[i] <- NA
  else LR.NUM.MAX[i] <- sort(goo, decreasing = TRUE)[1]
}

# out is a data frame for Leo's ranks of genera for which we also
#  have measurements
LR.BAR <- (LR.NUM.MIN +LR.NUM.MAX) / 2
out <- data.frame(LR.MIN, LR.MAX, LR.NUM.MIN, LR.NUM.MAX, LR.BAR)
row.names(out) <- matches

# pull out just the measurments for which there are matching ranks
#  in Leo's data
meas.rank <- meas[meas$REV.GEN %in% matches,20:96]

# average the two spp. of Salix in the measured specimens
#  and put together Leo's ranks with the measurements on
#  specimens from the same genera
out.meas <- cbind(out, rbind(meas.rank[1:46,],
                       colSums(meas.rank[47:48,])/2,
                       meas.rank[49:52,]))

# Plot of modal areole size against leaf rank
plot(log(out.meas$Area.MODE), out.meas$LR.BAR, type = 'n',
     xlab = 'Log modal areole area in pixels',
     ylab = 'Leaf rank', yaxt = 'n',
     main = 'Comparison of areole size with leaf rank')
segments(log(out.meas$Area.MODE), out.meas$LR.NUM.MIN,
         log(out.meas$Area.MODE), out.meas$LR.NUM.MAX,
         lwd = 3)
text(log(out.meas$Area.MODE), out.meas$LR.BAR, adj = c(0,0),
     labels = matches, cex = 0.5, srt = 45)
axis(2, at = c(1,5,9,13), labels = c('1r0', '2r0', '3r0', '4r0'))
axis(2, at = 1:16, labels = FALSE)
ranksize.lm <- lm(out.meas$LR.BAR ~ log(out.meas$Area.MEAN))
summary(ranksize.lm)
abline(ranksize.lm$coef, col = 'red')
pp <- format(df(summary(ranksize.lm)$fstatistic[1],
             summary(ranksize.lm)$fstatistic[2],
             summary(ranksize.lm)$fstatistic[3]), digits = 3)
rsq <- format(summary(ranksize.lm)$r.squared, digits = 3)
coe <- format(ranksize.lm$coef, digits = 3)
mtext(paste('r^2=', rsq, ', slope=',
            coe[2], ', intercept=', coe[1],
            ', p =', pp, ', n=', nrow(out.meas)))

# Plot of median and IQR showing a similar pattern
par(mfrow = c(1,2))
plot(out.meas$Area.MEDIAN, out.meas$LR.BAR,
     log = 'x', xlab = 'Median areole area in pixels',
     ylab = 'Leaf rank', yaxt = 'n')
segments(out.meas$Area.MEDIAN, out.meas$LR.NUM.MIN,
         out.meas$Area.MEDIAN, out.meas$LR.NUM.MAX)
axis(2, at = c(1,5,9,13), labels = c('1r0', '2r0', '3r0', '4r0'))
axis(2, at = 1:16, labels = FALSE)
plot(out.meas$Area.Q75 - out.meas$Area.Q25, out.meas$LR.BAR,
     log = 'x', xlab = 'Areole area IQR in pixels',
     ylab = 'Leaf rank', yaxt = 'n')
segments(out.meas$Area.Q75 - out.meas$Area.Q25, out.meas$LR.NUM.MIN,
         out.meas$Area.Q75 - out.meas$Area.Q25, out.meas$LR.NUM.MAX)
axis(2, at = c(1,5,9,13), labels = c('1r0', '2r0', '3r0', '4r0'))
axis(2, at = 1:16, labels = FALSE)


# plot of tree with areas
quartz(width = 8, height = 10.5)
par(mfrow = c(1,2))

par(mar = c(4,1,1,2), xpd = NA)
tr.pt <- plot(tr, use.edge.length = FALSE, show.tip.label = FALSE,
              direction = 'rightwards')
#tiplabels(taxa.med[as.numeric(tr$tip.label)], adj = c(0,0.5),
#          frame = 'none', bg = NULL, srt = 0,
#          cex = 0.6)

par(mar = c(4,5,1,1))
plot(tax$Area.MODE[as.numeric(tr$tip.label)],
     1:nrow(tax), yaxt = 'n', ylab = '', type = 'n',
     xlab = 'Areole area in pixels', log = 'x')
axis(2, at = 1:nrow(tax),
     labels = taxa.med[as.numeric(tr$tip.label)],
     cex.axis = 0.6, las = 2)
axis(2, at = 1:nrow(tax), labels = FALSE, tck = 1,
     lwd = 0.5, col = 'grey')
mtext('black = mode; red = median; blue = mean', cex = 0.7)
points(tax$Area.MEAN[as.numeric(tr$tip.label)],
     1:nrow(tax), col = 'blue', cex = 0.5, pch = 20)
segments(tax$Area.MEAN[as.numeric(tr$tip.label)] - 
         tax$Area.SD[as.numeric(tr$tip.label)],
         1:nrow(tax),
         tax$Area.MEAN[as.numeric(tr$tip.label)] +
         tax$Area.SD[as.numeric(tr$tip.label)],
         1:nrow(tax), col = 'blue', xpd = FALSE)
points(tax$Area.MEDIAN[as.numeric(tr$tip.label)],
     1:nrow(tax), col = 'red', cex = 0.5, pch = 20)
segments(tax$Area.Q25[as.numeric(tr$tip.label)],
         1:nrow(tax),
         tax$Area.Q75[as.numeric(tr$tip.label)],
         1:nrow(tax), col = 'red', xpd = FALSE)
points(tax$Area.MODE[as.numeric(tr$tip.label)],
       1:nrow(tax), pch = 19)

#plot of eccentricities and solidities
quartz(width = 8, height = 10.5)
par(mfrow = c(1,2))

par(mar = c(4,5,1,1))
plot(tax$Eccentricity.MODE[as.numeric(tr$tip.label)],
     1:nrow(tax), yaxt = 'n', ylab = '', type = 'n',
     xlab = 'Areole eccentricity', xlim = c(0.5,1))
#axis(2, at = 1:nrow(tax),
#     labels = taxa.med[as.numeric(tr$tip.label)],
#     cex.axis = 0.6, las = 2)
axis(2, at = 1:nrow(tax), labels = FALSE, tck = 1,
     lwd = 0.5, col = 'grey')
mtext('black = mode; red = median; blue = mean', cex = 0.7)
points(tax$Eccentricity.MEAN[as.numeric(tr$tip.label)],
     1:nrow(tax), col = 'blue', cex = 0.5, pch = 20)
segments(tax$Eccentricity.MEAN[as.numeric(tr$tip.label)] - 
         tax$Eccentricity.SD[as.numeric(tr$tip.label)],
         1:nrow(tax),
         tax$Eccentricity.MEAN[as.numeric(tr$tip.label)] +
         tax$Eccentricity.SD[as.numeric(tr$tip.label)],
         1:nrow(tax), col = 'blue', xpd = FALSE)
points(tax$Eccentricity.MEDIAN[as.numeric(tr$tip.label)],
     1:nrow(tax), col = 'red', cex = 0.5, pch = 20)
segments(tax$Eccentricity.Q25[as.numeric(tr$tip.label)],
         1:nrow(tax),
         tax$Eccentricity.Q75[as.numeric(tr$tip.label)],
         1:nrow(tax), col = 'red', xpd = FALSE)
points(tax$Eccentricity.MODE[as.numeric(tr$tip.label)],
       1:nrow(tax), pch = 19)

par(mar = c(4,5,1,1))
plot(tax$Solidity.MODE[as.numeric(tr$tip.label)],
     1:nrow(tax), yaxt = 'n', ylab = '', type = 'n',
     xlab = 'Areole solidity', xlim = c(0.5,1))
#axis(2, at = 1:nrow(tax),
#     labels = taxa.med[as.numeric(tr$tip.label)],
#     cex.axis = 0.6, las = 2)
axis(2, at = 1:nrow(tax), labels = FALSE, tck = 1,
     lwd = 0.5, col = 'grey')
mtext('black = mode; red = median; blue = mean', cex = 0.7)
points(tax$Solidity.MEAN[as.numeric(tr$tip.label)],
     1:nrow(tax), col = 'blue', cex = 0.5, pch = 20)
segments(tax$Solidity.MEAN[as.numeric(tr$tip.label)] - 
         tax$Solidity.SD[as.numeric(tr$tip.label)],
         1:nrow(tax),
         tax$Solidity.MEAN[as.numeric(tr$tip.label)] +
         tax$Solidity.SD[as.numeric(tr$tip.label)],
         1:nrow(tax), col = 'blue', xpd = FALSE)
points(tax$Solidity.MEDIAN[as.numeric(tr$tip.label)],
     1:nrow(tax), col = 'red', cex = 0.5, pch = 20)
segments(tax$Solidity.Q25[as.numeric(tr$tip.label)],
         1:nrow(tax),
         tax$Solidity.Q75[as.numeric(tr$tip.label)],
         1:nrow(tax), col = 'red', xpd = FALSE)
points(tax$Solidity.MODE[as.numeric(tr$tip.label)],
       1:nrow(tax), pch = 19)
       
# lots of pairs
plot(log(tax$Area.MODE), log(tax$Area.SD^2))
pairs(log(tax[,22:29]))
pairs(tax[,c('Area.MODE', 'ConvexArea.MODE', 'Eccentricity.MODE',
             'Solidity.MODE', 'Perimeter.MODE',
             'EquivDiameter.MODE',
             'MajorAxis.MODE', 'MinorAxis.MODE',
             'MaximumDistance.MODE','MeanDistance.MODE',
             'DistanceStandardDeviation.MODE')], pch = '.')

# reduced pairs plot with habit
quartz(width = 8, height = 8)
logArea.MODE <- log(tax$Area.MODE)
logPerim.MODE <- log(tax$Perimeter.MODE)
area.perim.ratio <- tax$Area.MODE / tax$Perimeter.MODE     
pairs(cbind(logArea.MODE, logPerim.MODE,
            tax[,c('Eccentricity.MODE',
                   'Solidity.MODE',
                   'MeanDistance.MODE')]),
      col = as.numeric(tax$WOODY == 'Y') + 1, pch = 5)
mtext('red woody; black herbaceaous', line = 3)

# comparisons by habit
boxplot(split(log(tax$Area.MODE), tax$WOODY),
        varwidth = TRUE, notch = FALSE,
        ylab = 'log(modal areole area)',
        main = 'Boxplots by habit',
        names = c('Unknown', 'Herbaceaous', 'Woody'))
jit <-jitter(as.numeric(tax$WOODY), amount = 0.1)
points(jit, log(tax$Area.MODE), pch = 5)
labs <- taxa.med
labs[tax$WOODY == 'Y' & log(tax$Area.MODE) < 9.5
                      & log(tax$Area.MODE) > 6] <- ''
text(jit, log(tax$Area.MODE), labels = labs,
     cex = 0.5, adj = c(0,0))
mtext(paste('for', nrow(tax), 'angiosperm species'))
wilcox.test(tax$Area.MODE[tax$WOODY == 'N'],
            tax$Area.MODE[tax$WOODY == 'Y'])
t.test(tax$Area.MODE[tax$WOODY == 'N'],
       tax$Area.MODE[tax$WOODY == 'Y'])

# pairs plot of Annacardiaceae
quartz(width = 8, height = 8)
fam.col <- rep('grey', nrow(tax))
fam.pch <- rep(4, nrow(tax))
fam.col[tax$REV.FAM == 'Anacardaceae'] <- 'black'
fam.pch[tax$REV.FAM == 'Anacardaceae'] <- 19
pairs(cbind(logArea.MODE, logPerim.MODE,
            tax[,c('Eccentricity.MODE',
                   'Solidity.MODE',
                   'MeanDistance.MODE')]),
      col = fam.col, pch = fam.pch, gap = 0.5)
mtext('Anacardiaceae', line = 3)

# pairs plot of Annonacae
quartz(width = 8, height = 8)
fam.col <- rep('grey', nrow(tax))
fam.pch <- rep(4, nrow(tax))
fam.col[tax$REV.FAM == 'Annonaceae'] <- 'black'
fam.pch[tax$REV.FAM == 'Annonaceae'] <- 19
pairs(cbind(logArea.MODE, logPerim.MODE,
            tax[,c('Eccentricity.MODE',
                   'Solidity.MODE',
                   'MeanDistance.MODE')]),
      col = fam.col, pch = fam.pch, gap = 0.5)
mtext('Annonaceae', line = 3)

# more image tiles....
these <- na.omit(meta$PHOT.NO[good & measured & meta$AB == 'b'])

for (i in these[5:54]){
  inum <- gsub(' ', 0, format(c(i, '9999'), justify = 'right')[1])
  here <- '/Users/wag/Desktop/HerbLeafOrg/dataV.3/09-04-03fromChuck/'
  this.thresh <- readImage(paste(here, inum, '.tif', sep = ''))
  this.thresh <- resize(this.thresh, w = 300)
  if(i == 13) stk <- this.thresh
  else{
    stk <- combine(stk, this.thresh)
  }
  til <- tile(stk, nx = 5)
  writeImage(til, file = '1to50.jpg')
}

#  and areole size distributions...
quartz(width = 8, height = 10.5)
par(mfrow = c(10,5), mar = c(0,0,0,0))
for (i in these[5:54]){
  par(mar = c(0,0,1,1))
  dens <- density(dat$Area[dat$Leaf == i])
  plot(dens,
       xlim = c(0, 50000), xlab = '',
       xaxt = 'n', yaxt = 'n', bty = 'n',
       new = FALSE, main = '', col = 'red')
  text(25000, mean(c(max(dens$y), min(dens$y))), col = 'red',
       paste('\n', meta$REV.GEN[meta$PHOT.NO == i],
             meta$REV.SP[meta$PHOT.NO == i], '\n(',
             meta$REV.FAM[meta$PHOT.NO == i], ')\n',
             'NCLC #:', meta$NCLC.NO[meta$PHOT.NO == i],
             ', Phot. #:', i))
}