######################################################################
###### Sydne Record Tree Hindcasting##################################
##### November, 9, 2011 (Corrected/Updated June 2012) ##############################################
####################################################################

#clear everything, just to be safe 
rm(list=ls(all=TRUE))

# load packages 
library(spBayes)
library(MBA)
library(geoR)
library(fields)
library(maptools)
library(coda)

# Read in fia occurrence data and corresponding climate data
tree_data <- read.csv("spenv_alltimesteps.csv", header=TRUE)

# subsample data maintaining 10% of the data for a hold-out validation
k <- sample(nrow(tree_data), nrow(tree_data)*.9)
treebp0 <- tree_data[k,]
holdout <- tree_data[-k,]

# Select columns with fagus and tsuga presence/absence data. Note the different ways that the non-Bayesian vs. Bayesian glm's handle the response variables.
fagus.pa <- treebp0$Fagus# Response for Bayesian models
fagus.pa.glm <- treebp0$Fagus/treebp0$count# Response for non-Bayesian glm used to get starting values
tsuga.pa <- treebp0$Tsuga # Response for Bayesian models
tsuga.pa.glm <- treebp0$Tsuga/treebp0$count # Response for non-Bayesian glm used to get starting values

# Climate variables
bio1 <- scale(treebp0$bio1_0) # Mean annual temp
bio15 <- scale(treebp0$bio15_0) # seasonality of precip

# Specify occurrence data grid coordinates. We tested this for various numbers of knots and knot generation algorithms.
coords <- as.matrix(treebp0[,12:13])
m <- 500
km.knots <- kmeans(coords, m)$centers

x <- as.matrix(rep(1,nrow(treebp0)))

# Fit non-spatial glm to get starting values for Bayesian GLMs
fagus_logit <- glm(fagus.pa.glm ~ 1 , family=binomial("logit"), weights=treebp0$count)
summary(fagus_logit)
tsuga_logit <- glm(tsuga.pa.glm ~ 1 , family=binomial("logit"), weights=treebp0$count)
summary(tsuga_logit)

# Run tsuga first
beta.starting <- coefficients(tsuga_logit)
beta.tuning <- t(chol(vcov(tsuga_logit)))

#Calculate maximum distance between coordinates for phi prior
max.dist <- max(iDist(coords))

###############################################################
#Fagus models
# Specify starting and tuning values from non-Bayesian glm
beta.starting <- coefficients(fagus_logit)
beta.tuning <- t(chol(vcov(fagus_logit)))

# Fagus pure spatial model (3 chains)
spfagusc1 <- spGLM(fagus.pa ~ 1, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots,
                        starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0),
                        tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5),
                        priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)),
                        cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE,
                        verbose=TRUE, n.report=1)

spfagusc2 <- spGLM(fagus.pa ~ 1, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots,
                        starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0),
                        tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5),
                        priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)),
                        cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE,
                        verbose=TRUE, n.report=1)

spfagusc3 <- spGLM(fagus.pa ~ 1, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots,
                        starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0),
                        tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5),
                        priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)),
                        cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE,
                        verbose=TRUE, n.report=1)

# Check convergence
samps <- mcmc.list(spfagusc1$p.samples, spfagusc2$p.samples, spfagusc3$p.samples)
plot(samps)

#Create MCMC trace plot, run diagmonstics
samps <- mcmc.list(spfagusc1$p.samples,spfagusc2$p.samples,spfagusc3$p.samples)
plot(samps)
print(gelman.diag(samps))
gelman.plot(samps)
burn.in <- 80000
print(round(summary(window(samps, start = burn.in))$quantiles[,c(3, 1, 5)], 2))

# Load custom code for non-spatial Bayesian GLM
dyn.load("src/glm_amcmc.so")
source("src/glm_amcmc.R")

n.batch <- 500
batch.length <- 200
n.samples <- n.batch*batch.length

# Calculate diagnostics for model fit (e.g., DIC)
print(spDiag(spfagusc1, start=80000))

# Fagus nons-spatial model (3 chains)
nonspfagusc1 <- glm.amcmc(fagus.pa~as.matrix(cbind(bio1, bio15)), family="binomial", weights=treebp0$count, 
                 starting=list("beta"=t_beta.starting),
                 tuning=list("beta"=t_beta.tuning),
                 priors=list("beta.flat"),
                 amcmc=list("n.batch"=n.batch,"batch.length"=batch.length,"accept.rate"=0.43),
                 n.samples=n.samples, sub.samples=c(100,n.samples,10),
                 verbose=TRUE, n.report=10)

nonspfagusc2 <- glm.amcmc(fagus.pa~as.matrix(cbind(bio1, bio15)), family="binomial", weights=treebp0$count, 
                 starting=list("beta"=t_beta.starting),
                 tuning=list("beta"=t_beta.tuning),
                 priors=list("beta.flat"),
                 amcmc=list("n.batch"=n.batch,"batch.length"=batch.length,"accept.rate"=0.43),
                 n.samples=n.samples, sub.samples=c(100,n.samples,10),
                 verbose=TRUE, n.report=10)
nonspfagusc3 <- glm.amcmc(fagus.pa~as.matrix(cbind(bio1, bio15)), family="binomial", weights=treebp0$count, 
                 starting=list("beta"=t_beta.starting),
                 tuning=list("beta"=t_beta.tuning),
                 priors=list("beta.flat"),
                 amcmc=list("n.batch"=n.batch,"batch.length"=batch.length,"accept.rate"=0.43),
                 n.samples=n.samples, sub.samples=c(100,n.samples,10),
                 verbose=TRUE, n.report=10)

# Check convergence
samps <- mcmc.list(nonspfagusc1$p.samples, nonspfagusc2$p.samples, nonspfagusc3$p.samples)
plot(samps)

#Create MCMC trace plot, run diagmonstics
samps <- mcmc.list(nonspfagusc1$p.samples,nonspfagusc2$p.samples,nonspfagusc3$p.samples)
plot(samps)
print(gelman.diag(samps))
gelman.plot(samps)
burn.in <- 80000
print(round(summary(window(samps, start = burn.in))$quantiles[,c(3, 1, 5)], 2))


# Calculate diagnostics for model fit (e.g., DIC)
print(spDiag(nonspfagusc1, start=80000))

# Fagus aggregated spatial and climatic model (3 chains)
agfagusc1 <- spGLM(fagus.pa ~ bio1 + bio15, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots,
                        starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0),
                        tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5),
                        priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)),
                        cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE,
                        verbose=TRUE, n.report=1)

agfagusc2 <- spGLM(fagus.pa ~ bio1 + bio15, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots,
                        starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0),
                        tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5),
                        priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)),
                        cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE,
                        verbose=TRUE, n.report=1)

agfagusc3 <- spGLM(fagus.pa ~ bio1 + bio15, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots,
                        starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0),
                        tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5),
                        priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)),
                        cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE,
                        verbose=TRUE, n.report=1)

# Check convergence
samps <- mcmc.list(agfagusc1$p.samples, agfagusc2$p.samples, agfagusc3$p.samples)
plot(samps)

#Create MCMC trace plot, run diagmonstics
samps <- mcmc.list(agfagusc1$p.samples,agfagusc2$p.samples,agfagusc3$p.samples)
plot(samps)
print(gelman.diag(samps))
gelman.plot(samps)
burn.in <- 80000
print(round(summary(window(samps, start = burn.in))$quantiles[,c(3, 1, 5)], 2))


# Calculate diagnostics for model fit (e.g., DIC)
print(spDiag(agfagusc1, start=80000))

###############################################################################
#Tsuga models
# Specify starting and tuning values from non-Bayesian glm
beta.starting <- coefficients(tsuga_logit)
beta.tuning <- t(chol(vcov(tsuga_logit)))

# Tsuga pure spatial model (3 chains)
sptsugac1 <- spGLM(tsuga.pa ~ 1, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots,
                        starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0),
                        tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5),
                        priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)),
                        cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE,
                        verbose=TRUE, n.report=1)

sptsugac2 <- spGLM(tsuga.pa ~ 1, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots,
                        starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0),
                        tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5),
                        priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)),
                        cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE,
                        verbose=TRUE, n.report=1)

sptsugac3 <- spGLM(tsuga.pa ~ 1, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots,
                        starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0),
                        tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5),
                        priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)),
                        cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE,
                        verbose=TRUE, n.report=1)

# Check convergence
samps <- mcmc.list(sptsugac1$p.samples, sptsugac2$p.samples, sptsugac3$p.samples)
plot(samps)

#Create MCMC trace plot, run diagmonstics
samps <- mcmc.list(sptsugac1$p.samples,sptsugac2$p.samples,sptsugac3$p.samples)
plot(samps)
print(gelman.diag(samps))
gelman.plot(samps)
burn.in <- 80000
print(round(summary(window(samps, start = burn.in))$quantiles[,c(3, 1, 5)], 2))


# Calculate diagnostics for model fit (e.g., DIC)
print(spDiag(sptsugac1, start=80000))

# Tsuga nons-spatial model (3 chains)
nonsptsugac1 <- glm.amcmc(tsuga.pa~as.matrix(cbind(bio1, bio15)), family="binomial", weights=treebp0$count, 
                 starting=list("beta"=t_beta.starting),
                 tuning=list("beta"=t_beta.tuning),
                 priors=list("beta.flat"),
                 amcmc=list("n.batch"=n.batch,"batch.length"=batch.length,"accept.rate"=0.43),
                 n.samples=n.samples, sub.samples=c(100,n.samples,10),
                 verbose=TRUE, n.report=10)

nonsptsugac2 <- glm.amcmc(tsuga.pa~as.matrix(cbind(bio1, bio15)), family="binomial", weights=treebp0$count, 
                 starting=list("beta"=t_beta.starting),
                 tuning=list("beta"=t_beta.tuning),
                 priors=list("beta.flat"),
                 amcmc=list("n.batch"=n.batch,"batch.length"=batch.length,"accept.rate"=0.43),
                 n.samples=n.samples, sub.samples=c(100,n.samples,10),
                 verbose=TRUE, n.report=10)
nonsptsugac3 <- glm.amcmc(tsuga.pa~as.matrix(cbind(bio1, bio15)), family="binomial", weights=treebp0$count, 
                 starting=list("beta"=t_beta.starting),
                 tuning=list("beta"=t_beta.tuning),
                 priors=list("beta.flat"),
                 amcmc=list("n.batch"=n.batch,"batch.length"=batch.length,"accept.rate"=0.43),
                 n.samples=n.samples, sub.samples=c(100,n.samples,10),
                 verbose=TRUE, n.report=10)

# Check convergence
samps <- mcmc.list(nonsptsugac1$p.samples, nonsptsugac2$p.samples, nonsptsugac3$p.samples)
plot(samps)

#Create MCMC trace plot, run diagmonstics
samps <- mcmc.list(nonsptsugac1$p.samples,nonsptsugac2$p.samples,nonsptsugac3$p.samples)
plot(samps)
print(gelman.diag(samps))
gelman.plot(samps)
burn.in <- 80000
print(round(summary(window(samps, start = burn.in))$quantiles[,c(3, 1, 5)], 2))

# Calculate diagnostics for model fit (e.g., DIC)
print(spDiag(nonsptsugac1, start=80000))

# Tsuga aggregated model (3 chains)
agtsugac1 <- spGLM(tsuga.pa ~ bio1 + bio15, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots,
                        starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0),
                        tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5),
                        priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)),
                        cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE,
                        verbose=TRUE, n.report=1)

agtsugac2 <- spGLM(tsuga.pa ~ bio1 + bio15, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots,
                        starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0),
                        tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5),
                        priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)),
                        cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE,
                        verbose=TRUE, n.report=1)

agtsugac3 <- spGLM(tsuga.pa ~ bio1 + bio15, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots,
                        starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0),
                        tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5),
                        priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)),
                        cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE,
                        verbose=TRUE, n.report=1)

# Check convergence
samps <- mcmc.list(agtsugac1$p.samples, agtsugac2$p.samples, agtsugac3$p.samples)
plot(samps)

#Create MCMC trace plot, run diagmonstics
samps <- mcmc.list(agtsugac1$p.samples,agtsugac2$p.samples,agtsugac3$p.samples)
plot(samps)
print(gelman.diag(samps))
gelman.plot(samps)
burn.in <- 80000
print(round(summary(window(samps, start = burn.in))$quantiles[,c(3, 1, 5)], 2))

# Calculate diagnostics for model fit (e.g., DIC)
print(spDiag(agtsugac1, start=80000))
###########################################################################################################
# Predict Fagus and Tsuga models for holdout data set

pred.coords <- as.matrix(holdout[,12:13])
pred.covars <- as.matrix(cbind(holdout$bio1_0, holdout$bio15_0))

n.samples <- 100000
burn.in <- 80000

# Pure spatial models
ho_pred_spfagusc1 <- spPredict(spfagusc1, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in))
ho_pred_spfagusc2 <- spPredict(spfagusc2, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in))
ho_pred_spfagusc3 <- spPredict(spfagusc3, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in))

ho_pred_sptsugac1 <- spPredict(sptsugac1, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in))
ho_pred_sptsugac2 <- spPredict(sptsugac2, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in))
ho_pred_sptsugac3 <- spPredict(sptsugac3, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in))

# Non-spatial models. Note spPredict doesn't work on the outputs of the non-spatial models from the custom code

# Fagus non-spatial prediction

#Create matrix for outputs of predicted probabilities for Fagus chain 1
ho_pred_nonspfagusc1 <- matrix(0, nrow(pred.coords), nrow(nonspfagusc1$p.samples))

#take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function
for(i in 1:nrow(pred.coords)){
	for(j in 1:nrow(nonspfagusc1$p.samples)){
		z <- nonspfagusc1$p.samples[j,1] + nonspfagusc1$p.samples[j,2]*pred.covars[i,1] + nonspfagusc1$p.samples[j,3]*pred.covars[i,2]
		ho_pred_nonspfagusc1[i,j] <- 1/(1 + exp(-z))
	}
}

#Create matrix for outputs of predicted probabilities for Fagus chain 2
ho_pred_nonspfagusc2 <- matrix(0, nrow(pred.coords), nrow(nonspfagusc2$p.samples))

#take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function
for(i in 1:nrow(pred.coords)){
	for(j in 1:nrow(nonspfagusc2$p.samples)){
		z <- nonspfagusc2$p.samples[j,1] + nonspfagusc2$p.samples[j,2]*pred.covars[i,1] + nonspfagusc2$p.samples[j,3]*pred.covars[i,2]
		ho_pred_nonspfagusc2[i,j] <- 1/(1 + exp(-z))
	}
}

#Create matrix for outputs of predicted probabilities for Fagus chain 3
ho_pred_nonspfagusc3 <- matrix(0, nrow(pred.coords), nrow(nonspfagusc3$p.samples))

#take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function
for(i in 1:nrow(pred.coords)){
	for(j in 1:nrow(nonspfagusc3$p.samples)){
		z <- nonspfagusc3$p.samples[j,1] + nonspfagusc3$p.samples[j,2]*pred.covars[i,1] + nonspfagusc3$p.samples[j,3]*pred.covars[i,2]
		ho_pred_nonspfagusc2[i,j] <- 1/(1 + exp(-z))
	}
}

# Tsuga non-spatial prediction
#Create matrix for outputs of predicted probabilities for Fagus chain 1
ho_pred_nonsptsugac1 <- matrix(0, nrow(pred.coords), nrow(nonsptsugac1$p.samples))

#take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function
for(i in 1:nrow(pred.coords)){
	for(j in 1:nrow(nonsptsugac1$p.samples)){
		z <- nonsptsugac1$p.samples[j,1] + nonsptsugac1$p.samples[j,2]*pred.covars[i,1] + nonsptsugac1$p.samples[j,3]*pred.covars[i,2]
		ho_pred_nonsptsugac1[i,j] <- 1/(1 + exp(-z))
	}
}

#Create matrix for outputs of predicted probabilities for Fagus chain 2
ho_pred_nonsptsugac2 <- matrix(0, nrow(pred.coords), nrow(nonsptsugac2$p.samples))

#take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function
for(i in 1:nrow(pred.coords)){
	for(j in 1:nrow(nonsptsugac2$p.samples)){
		z <- nonsptsugac2$p.samples[j,1] + nonsptsugac2$p.samples[j,2]*pred.covars[i,1] + nonsptsugac2$p.samples[j,3]*pred.covars[i,2]
		ho_pred_nonsptsugac2[i,j] <- 1/(1 + exp(-z))
	}
}

#Create matrix for outputs of predicted probabilities for Fagus chain 3
ho_pred_nonsptsugac3 <- matrix(0, nrow(pred.coords), nrow(nonsptsugac3$p.samples))

#take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function
for(i in 1:nrow(pred.coords)){
	for(j in 1:nrow(nonsptsugac3$p.samples)){
		z <- nonsptsugac3$p.samples[j,1] + nonsptsugac3$p.samples[j,2]*pred.covars[i,1] + nonsptsugac3$p.samples[j,3]*pred.covars[i,2]
		ho_pred_nonsptsugac3[i,j] <- 1/(1 + exp(-z))
	}
}
# Aggregated models
ho_pred_agfagusc1 <- spPredict(agfagusc1, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in))
ho_pred_agfagusc2 <- spPredict(agfagusc2, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in))
ho_pred_agfagusc3 <- spPredict(agfagusc3, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in))

ho_pred_agtsugac1 <- spPredict(agtsugac1, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in))
ho_pred_agtsugac2 <- spPredict(agtsugac2, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in))
ho_pred_agtsugac3 <- spPredict(agtsugac3, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in))

# FIA B-spline models

# Fagus	
fia_surf <- mba.points(as.matrix(cbind(treebp0$albx, treebp0$alby, (treebp0$Fagus_grandifolia/fia$count))), as.matrix(cbind(holdout$albx, holdout$alby)))
fia_surfmat <- as.matrix(fia_surf[[1]])	
colnames(fia_surfmat) <- c("fiax","fiay","fiaz")
fiaholdout <- cbind(holdout, fia_surf)
rowid <- seq(1,nrow(fiaholdout),1)		
ho_pred_fiafagus <- cbind(rowid, fiaholdout)

# Tsuga
fia_surf <- mba.points(as.matrix(cbind(treebp0$albx, treebp0$alby, (treebp0$Tsuga_canadensis/fia$count))), as.matrix(cbind(holdout$albx, holdout$alby)))
fia_surfmat <- as.matrix(fia_surf[[1]])	
colnames(fia_surfmat) <- c("fiax","fiay","fiaz")
fiaholdout <- cbind(holdout, fia_surf)
rowid <- seq(1,nrow(fiaholdout),1)		
ho_pred_fiatsuga <- cbind(rowid, fiaholdout)

##########################################################################
# Predict Fagus and Tsuga models for fossil pollen. * Note I only show the code for the 1 kaBP time step, but for the paper we ran the predictions out to 8kaBP

# Read in pollen and paleoclimate data for 1kaBP
pollen1 <- read.csv("pollen1kabp.csv", header=TRUE)

pred.coords <- as.matrix(pollen1[,1:2])
pred.covars <- as.matrix(cbind(pollen$bio1, pollen$bio15))

n.samples <- 100000
burn.in <- 80000

# Pure spatial models
p1_pred_spfagusc1 <- spPredict(spfagusc1, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in))
p1_pred_spfagusc2 <- spPredict(spfagusc2, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in))
p1_pred_spfagusc3 <- spPredict(spfagusc3, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in))

p1_pred_sptsugac1 <- spPredict(sptsugac1, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in))
p1_pred_sptsugac2 <- spPredict(sptsugac2, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in))
p1_pred_sptsugac3 <- spPredict(sptsugac3, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in))

# Non-spatial models. Note spPredict doesn't work on the outputs of the non-spatial models from the custom code

# Fagus non-spatial prediction

#Create matrix for outputs of predicted probabilities for Fagus chain 1
p1_pred_nonspfagusc1 <- matrix(0, nrow(pred.coords), nrow(nonspfagusc1$p.samples))

#take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function
for(i in 1:nrow(pred.coords)){
	for(j in 1:nrow(nonspfagusc1$p.samples)){
		z <- nonspfagusc1$p.samples[j,1] + nonspfagusc1$p.samples[j,2]*pred.covars[i,1] + nonspfagusc1$p.samples[j,3]*pred.covars[i,2]
		p1_pred_nonspfagusc1[i,j] <- 1/(1 + exp(-z))
	}
}

#Create matrix for outputs of predicted probabilities for Fagus chain 2
p1_pred_nonspfagusc2 <- matrix(0, nrow(pred.coords), nrow(nonspfagusc2$p.samples))

#take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function
for(i in 1:nrow(pred.coords)){
	for(j in 1:nrow(nonspfagusc2$p.samples)){
		z <- nonspfagusc2$p.samples[j,1] + nonspfagusc2$p.samples[j,2]*pred.covars[i,1] + nonspfagusc2$p.samples[j,3]*pred.covars[i,2]
		p1_pred_nonspfagusc2[i,j] <- 1/(1 + exp(-z))
	}
}

#Create matrix for outputs of predicted probabilities for Fagus chain 3
p1_pred_nonspfagusc3 <- matrix(0, nrow(pred.coords), nrow(nonspfagusc3$p.samples))

#take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function
for(i in 1:nrow(pred.coords)){
	for(j in 1:nrow(nonspfagusc3$p.samples)){
		z <- nonspfagusc3$p.samples[j,1] + nonspfagusc3$p.samples[j,2]*pred.covars[i,1] + nonspfagusc3$p.samples[j,3]*pred.covars[i,2]
		p1_pred_nonspfagusc3[i,j] <- 1/(1 + exp(-z))
	}
}

# Tsuga non-spatial prediction
#Create matrix for outputs of predicted probabilities for Fagus chain 1
p1_pred_nonsptsugac1 <- matrix(0, nrow(pred.coords), nrow(nonsptsugac1$p.samples))

#take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function
for(i in 1:nrow(pred.coords)){
	for(j in 1:nrow(nonsptsugac1$p.samples)){
		z <- nonsptsugac1$p.samples[j,1] + nonsptsugac1$p.samples[j,2]*pred.covars[i,1] + nonsptsugac1$p.samples[j,3]*pred.covars[i,2]
		p1_pred_nonsptsugac1[i,j] <- 1/(1 + exp(-z))
	}
}

#Create matrix for outputs of predicted probabilities for Fagus chain 2
p1_pred_nonsptsugac2 <- matrix(0, nrow(pred.coords), nrow(nonsptsugac2$p.samples))

#take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function
for(i in 1:nrow(pred.coords)){
	for(j in 1:nrow(nonsptsugac2$p.samples)){
		z <- nonsptsugac2$p.samples[j,1] + nonsptsugac2$p.samples[j,2]*pred.covars[i,1] + nonsptsugac2$p.samples[j,3]*pred.covars[i,2]
		p1_pred_nonsptsugac2[i,j] <- 1/(1 + exp(-z))
	}
}

#Create matrix for outputs of predicted probabilities for Fagus chain 3
p1_pred_nonsptsugac3 <- matrix(0, nrow(pred.coords), nrow(nonsptsugac3$p.samples))

#take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function
for(i in 1:nrow(pred.coords)){
	for(j in 1:nrow(nonsptsugac3$p.samples)){
		z <- nonsptsugac3$p.samples[j,1] + nonsptsugac3$p.samples[j,2]*pred.covars[i,1] + nonsptsugac3$p.samples[j,3]*pred.covars[i,2]
		p1_pred_nonsptsugac3[i,j] <- 1/(1 + exp(-z))
	}
}
# Aggregated models
p1_pred_agfagusc1 <- spPredict(agfagusc1, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in))
p1_pred_agfagusc2 <- spPredict(agfagusc2, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in))
p1_pred_agfagusc3 <- spPredict(agfagusc3, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in))

p1_pred_agtsugac1 <- spPredict(agtsugac1, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in))
p1_pred_agtsugac2 <- spPredict(agtsugac2, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in))
p1_pred_agtsugac3 <- spPredict(agtsugac3, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in))

# FIA B-spline models

# Fagus	
fia_surf <- mba.points(as.matrix(cbind(treebp0$albx, treebp0$alby, (treebp0$Fagus_grandifolia/fia$count))), as.matrix(cbind(pollen1$albx, pollen1$alby)))
fia_surfmat <- as.matrix(fia_surf[[1]])	
colnames(fia_surfmat) <- c("fiax","fiay","fiaz")
fiapollen1 <- cbind(pollen1, fia_surf)
rowid <- seq(1,nrow(fiapollen1),1)		
p1_pred_FIAfagus <- cbind(rowid, fiapollen1)

# Tsuga
fia_surf <- mba.points(as.matrix(cbind(treebp0$albx, treebp0$alby, (treebp0$Tsuga_canadensis/fia$count))), as.matrix(cbind(pollen1$albx, pollen1$alby)))
fia_surfmat <- as.matrix(fia_surf[[1]])	
colnames(fia_surfmat) <- c("fiax","fiay","fiaz")
fiapollen1 <- cbind(pollen1, fia_surf)
rowid <- seq(1,nrow(fiapollen1),1)		
p1_pred_FIAtsuga <- cbind(rowid, fiapollen1)

##################################################################################
# Assess predictive performance of models (Area under the receiver operating curve, false negative rate, false positive rate)

library(ROCR)
time_slices <- seq(0,8,1)

##########################
#
#	confuse
#		get confusion matrix from two vectors of 1's and 0's
#		gives matrix, percent correctly classified, and chi squared test
#		false negative and false positive rates can be calculated from the confusion matrix
#		Function written by Noah D. Charney
###########################

confuse <- function(predicted,observed){
	A <- predicted
	B <- observed
	confusion<-matrix(nrow=2,ncol=2,
		dimnames=list(c('pred.pres','pred.abs'),c('obs.pres','obs.abs')))
	confusion[] <-
		c(sum(A*B),
			sum((1-A)*B),
			sum(A*(1-B)),
			sum((1-A)*(1-B))
		)
	perc.correct <- sum(diag(confusion))/sum(confusion)
	chisq<-chisq.test(confusion)
	occurrence <- sum(B)/length(B)
	if(occurrence < 0.5){
		null <- 1- occurrence
	}else{
		null <- occurrence
	}
	return(list(confusion=confusion,perc.correct=perc.correct,null=null,chisq=chisq))
}	
##########################
library(PresenceAbsence)

# Fagus pure spatial model assessment

# Create matrices to hold outputs
	spfnr_fagus_high <- matrix(NA, nrow=1000, ncol=9) 
	colnames(spfnr_fagus_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")
	spfnr_fagus_low <- matrix(NA, nrow=1000, ncol=9) 
	colnames(spfnr_fagus_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")

	spfpr_fagus_high <- matrix(NA, nrow=1000, ncol=9) 
	colnames(spfpr_fagus_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")
	spfpr_fagus_low <- matrix(NA, nrow=1000, ncol=9) 
	colnames(spfpr_fagus_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")

	spauc_fagus_high <- matrix(NA, nrow=1000, ncol=9) 
	colnames(spauc_fagus_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")
	spauc_fagus_low <- matrix(NA, nrow=1000, ncol=9) 
	colnames(spauc_fagus_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")	

# Holdout calculations
	ypred_holdout <- c(ho_pred_spfagusc1$y.pred, ho_pred_spfagusc2$y.pred, ho_pred_spfagusc3$y.pred)
	# Randomly sub-sample 1000 draws from the posterior predictive distribution
	sub_sample <- sample(ncol(ypred_holdout), 1000)

	for(j in 1:length(sub_sample)){			
		
		# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
		pred <- prediction(ypred_holdout[, sub_sample[j]], holdout$Fagus) 
		AUC_temp <-performance(pred, "auc")
		spauc_fagus_low[1,j] <- AUC_temp@y.values[[1]][1]
		spauc_fagus_high[1,j] <- AUC_temp@y.values[[1]][1]
		rm(list=c("pred","AUC_temp"))

		# Calculate fnr and fpr for Fagus holdout
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(holdout),1),holdout$fagus_bin,ypred_holdout$[,sub_sample[j]), threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred_holdout$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=holdout$Fagus_bin)
		confusion_mat <- confusion_out$confusion
		spfnr_fagus_high[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		spfpr_fagus_high[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		spfnr_fagus_low[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		spfpr_fagus_low[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))
	}	
	

# now do these calculations for Fagus pure spatial pollen
for(i in 1:8){
	ypred <- c(paste("p",i,"_pred_spfagusc1$y.pred", sep=''), paste("p",i,"_pred_spfagusc2$y.pred", sep=''), paste("p",i,"_pred_spfagusc3$y.pred", sep=''))
	pollen <- read.csv(paste("pollen",i,"kabp.csv",sep=''), header=TRUE)	

	# Randomly sub-sample 1000 draws from the posterior predictive distribution
	sub_sample <- sample(ncol(ypred), 1000)

	for(j in 1:length(sub_sample)){			
		# Fagus low pollen threhold
		# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
		pred <- prediction(ypred[, sub_sample[j]], pollen$Fagus_.5) 
		AUC_temp <-performance(pred, "auc")
		spauc_fagus_low[i+1,j] <- AUC_temp@y.values[[1]][1]
		rm(list=c("pred","AUC_temp"))

		# Calculate fnr and fpr for Fagus low pollen threhold
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Fagus_.5,ypred$[,sub_sample[j]), threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=pollen$Fagus_.5)
		confusion_mat <- confusion_out$confusion
		spfnr_fagus_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		spfpr_fagus_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))

		# Fagus high pollen threhold
		# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
		pred <- prediction(ypred[, sub_sample[j]], pollen$Fagus_1) 
		AUC_temp <-performance(pred, "auc")
		spauc_fagus_high[i+1,j] <- AUC_temp@y.values[[1]][1]
		rm(list=c("pred","AUC_temp"))

		# Calculate fnr and fpr for Fagus high pollen threshold
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Fagus_1,ypred$[,sub_sample[j]), threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=pollen$Fagus_1)
		confusion_mat <- confusion_out$confusion
		spfnr_fagus_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		spfpr_fagus_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))
	}	
}

############
# Tsuga pure spatial model assessment

# Create matrices to hold outputs
	spfnr_tsuga_high <- matrix(NA, nrow=1000, ncol=9) 
	colnames(spfnr_tsuga_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")
	spfnr_tsuga_low <- matrix(NA, nrow=1000, ncol=9) 
	colnames(spfnr_tsuga_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")

	spfpr_tsuga_high <- matrix(NA, nrow=1000, ncol=9) 
	colnames(spfpr_tsuga_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")
	spfpr_tsuga_low <- matrix(NA, nrow=1000, ncol=9) 
	colnames(spfpr_tsuga_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")

	spauc_tsuga_high <- matrix(NA, nrow=1000, ncol=9) 
	colnames(spauc_tsuga_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")
	spauc_tsuga_low <- matrix(NA, nrow=1000, ncol=9) 
	colnames(spauc_tsuga_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")	
	
# Holdout calculations
	ypred_holdout <- c(ho_pred_sptsugac1$y.pred, ho_pred_sptsugac2$y.pred, ho_pred_sptsugac3$y.pred)
	# Randomly sub-sample 1000 draws from the posterior predictive distribution
	sub_sample <- sample(ncol(ypred_holdout), 1000)

	for(j in 1:length(sub_sample)){			
		
		# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
		pred <- prediction(ypred_holdout[, sub_sample[j]], holdout$Tsuga) 
		AUC_temp <-performance(pred, "auc")
		spauc_tsuga_low[1,j] <- AUC_temp@y.values[[1]][1]
		spauc_tsuga_high[1,j] <- AUC_temp@y.values[[1]][1]
		rm(list=c("pred","AUC_temp"))

		# Calculate fnr and fpr for Tsuga holdout
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(holdout),1),holdout$tsuga_bin,ypred_holdout$[,sub_sample[j]), threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred_holdout$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=holdout$Tsuga_bin)
		confusion_mat <- confusion_out$confusion
		spfnr_tsuga_high[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		spfpr_tsuga_high[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		spfnr_tsuga_low[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		spfpr_tsuga_low[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))
	}	
	

# now do these calculations for Tsuga pure spatial pollen
for(i in 1:8){
	ypred <- c(paste("p",i,"_pred_sptsugac1$y.pred", sep=''), paste("p",i,"_pred_sptsugac2$y.pred", sep=''), paste("p",i,"_pred_sptsugac3$y.pred", sep=''))
	pollen <- read.csv(paste("pollen",i,"kabp.csv",sep=''), header=TRUE)	

	# Randomly sub-sample 1000 draws from the posterior predictive distribution
	sub_sample <- sample(ncol(ypred), 1000)

	for(j in 1:length(sub_sample)){			
		# Tsuga low pollen threhold
		# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
		pred <- prediction(ypred[, sub_sample[j]], pollen$Tsuga_.5) 
		AUC_temp <-performance(pred, "auc")
		spauc_tsuga_low[i+1,j] <- AUC_temp@y.values[[1]][1]
		rm(list=c("pred","AUC_temp"))

		# Calculate fnr and fpr for Tsuga low pollen threhold
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Tsuga_.5,ypred$[,sub_sample[j]), threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=pollen$Tsuga_.5)
		confusion_mat <- confusion_out$confusion
		spfnr_tsuga_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		spfpr_tsuga_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))

		# Tsuga high pollen threhold
		# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
		pred <- prediction(ypred[, sub_sample[j]], pollen$Tsuga_1) 
		AUC_temp <-performance(pred, "auc")
		spauc_tsuga_high[i+1,j] <- AUC_temp@y.values[[1]][1]
		rm(list=c("pred","AUC_temp"))

		# Calculate fnr and fpr for Tsuga high pollen threshold
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Tsuga_1,ypred$[,sub_sample[j]), threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=pollen$Tsuga_1)
		confusion_mat <- confusion_out$confusion
		spfnr_tsuga_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		spfpr_tsuga_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))
	}	
}

########################
# Fagus non-spatial model assessment

# Create matrices to hold outputs
	nonspfnr_fagus_high <- matrix(NA, nrow=1000, ncol=9) 
	colnames(nonspfnr_fagus_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")
	nonspfnr_fagus_low <- matrix(NA, nrow=1000, ncol=9) 
	colnames(nonspfnr_fagus_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")

	nonspfpr_fagus_high <- matrix(NA, nrow=1000, ncol=9) 
	colnames(nonspfpr_fagus_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")
	nonspfpr_fagus_low <- matrix(NA, nrow=1000, ncol=9) 
	colnames(nonspfpr_fagus_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")

	nonspauc_fagus_high <- matrix(NA, nrow=1000, ncol=9) 
	colnames(nonspauc_fagus_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")
	nonspauc_fagus_low <- matrix(NA, nrow=1000, ncol=9) 
	colnames(nonspauc_fagus_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")	

# Holdout calculations
	ypred_holdout <- c(ho_pred_nonspFagusc1$y.pred, ho_pred_nonspFagusc2$y.pred, ho_pred_nonspFagusc3$y.pred)
	# Randomly sub-sample 1000 draws from the posterior predictive distribution
	sub_sample <- sample(ncol(ypred_holdout), 1000)

	for(j in 1:length(sub_sample)){			
		
		# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
		pred <- prediction(ypred_holdout[, sub_sample[j]], holdout$Fagus) 
		AUC_temp <-performance(pred, "auc")
		nonspauc_fagus_low[1,j] <- AUC_temp@y.values[[1]][1]
		nonspauc_fagus_high[1,j] <- AUC_temp@y.values[[1]][1]
		rm(list=c("pred","AUC_temp"))

		# Calculate fnr and fpr for Fagus holdout
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(holdout),1),holdout$fagus_bin,ypred_holdout$[,sub_sample[j]), threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred_holdout$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=holdout$Fagus_bin)
		confusion_mat <- confusion_out$confusion
		nonspfnr_fagus_high[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		nonspfpr_fagus_high[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		nonspfnr_fagus_low[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		nonspfpr_fagus_low[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))
	}	

# now do these calculations for Fagus nonspatial pollen
for(i in 1:8){
	ypred <- c(paste("p",i,"_pred_nonspFagusc1$y.pred", sep=''), paste("p",i,"_pred_nonspFagusc2$y.pred", sep=''), paste("p",i,"_pred_nonspFagusc3$y.pred", sep=''))
	pollen <- read.csv(paste("pollen",i,"kabp.csv",sep=''), header=TRUE)	

	# Randomly sub-sample 1000 draws from the posterior predictive distribution
	sub_sample <- sample(ncol(ypred), 1000)

	for(j in 1:length(sub_sample)){			
		# Fagus low pollen threhold
		# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
		pred <- prediction(ypred[, sub_sample[j]], pollen$Fagus_.5) 
		AUC_temp <-performance(pred, "auc")
		nonspauc_fagus_low[i+1,j] <- AUC_temp@y.values[[1]][1]
		rm(list=c("pred","AUC_temp"))

		# Calculate fnr and fpr for Fagus low pollen threhold
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Fagus_.5,ypred$[,sub_sample[j]), threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=pollen$Fagus_.5)
		confusion_mat <- confusion_out$confusion
		nonspfnr_fagus_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		nonspfpr_fagus_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))

		# Fagus high pollen threhold
		# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
		pred <- prediction(ypred[, sub_sample[j]], pollen$Fagus_1) 
		AUC_temp <-performance(pred, "auc")
		nonspauc_fagus_high[i+1,j] <- AUC_temp@y.values[[1]][1]
		rm(list=c("pred","AUC_temp"))

		# Calculate fnr and fpr for Fagus high pollen threshold
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Fagus_1,ypred$[,sub_sample[j]), threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=pollen$Fagus_1)
		confusion_mat <- confusion_out$confusion
		nonspfnr_fagus_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		nonspfpr_fagus_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))
	}	
}

##############################
# Tsuga non-spatial model assessment

# Create matrices to hold outputs
	nonspfnr_tsuga_high <- matrix(NA, nrow=1000, ncol=9) 
	colnames(nonspfnr_tsuga_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")
	nonspfnr_tsuga_low <- matrix(NA, nrow=1000, ncol=9) 
	colnames(nonspfnr_tsuga_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")

	nonspfpr_Tsuga_high <- matrix(NA, nrow=1000, ncol=9) 
	colnames(nonspfpr_tsuga_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")
	nonspfpr_tsuga_low <- matrix(NA, nrow=1000, ncol=9) 
	colnames(nonspfpr_tsuga_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")

	nonspauc_tsuga_high <- matrix(NA, nrow=1000, ncol=9) 
	colnames(nonspauc_tsuga_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")
	nonspauc_tsuga_low <- matrix(NA, nrow=1000, ncol=9) 
	colnames(nonspauc_tsuga_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")	
	
# Holdout calculations
	ypred_holdout <- c(ho_pred_nonspTsugac1$y.pred, ho_pred_nonspTsugac2$y.pred, ho_pred_nonspTsugac3$y.pred)

	# Randomly sub-sample 1000 draws from the posterior predictive distribution
	sub_sample <- sample(ncol(ypred_holdout), 1000)

	for(j in 1:length(sub_sample)){			
		
		# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
		pred <- prediction(ypred_holdout[, sub_sample[j]], holdout$Tsuga) 
		AUC_temp <-performance(pred, "auc")
		nonspauc_tsuga_low[1,j] <- AUC_temp@y.values[[1]][1]
		nonspauc_tsuga_high[1,j] <- AUC_temp@y.values[[1]][1]
		rm(list=c("pred","AUC_temp"))

		# Calculate fnr and fpr for Tsuga holdout
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(holdout),1),holdout$tsuga_bin,ypred_holdout$[,sub_sample[j]), threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred_holdout$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=holdout$Tsuga_bin)
		confusion_mat <- confusion_out$confusion
		nonspfnr_tsuga_high[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		nonspfpr_tsuga_high[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		nonspfnr_tsuga_low[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		nonspfpr_tsuga_low[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))
	}	
	
# now do these calculations for Tsuga nonspatial pollen
for(i in 1:8){
	ypred <- c(paste("p",i,"_pred_nonspTsugac1$y.pred", sep=''), paste("p",i,"_pred_nonspTsugac2$y.pred", sep=''), paste("p",i,"_pred_nonspTsugac3$y.pred", sep=''))
	pollen <- read.csv(paste("pollen",i,"kabp.csv",sep=''), header=TRUE)	

	# Randomly sub-sample 1000 draws from the posterior predictive distribution
	sub_sample <- sample(ncol(ypred), 1000)

	for(j in 1:length(sub_sample)){			
		# Tsuga low pollen threhold
		# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
		pred <- prediction(ypred[, sub_sample[j]], pollen$Tsuga_1) 
		AUC_temp <-performance(pred, "auc")
		nonspauc_tsuga_low[i+1,j] <- AUC_temp@y.values[[1]][1]
		rm(list=c("pred","AUC_temp"))

		# Calculate fnr and fpr for Tsuga low pollen threhold
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Tsuga_1,ypred$[,sub_sample[j]), threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=pollen$Tsuga_1)
		confusion_mat <- confusion_out$confusion
		nonspfnr_tsuga_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		nonspfpr_tsuga_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))

		# Tsuga high pollen threhold
		# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
		pred <- prediction(ypred[, sub_sample[j]], pollen$Tsuga_2) 
		AUC_temp <-performance(pred, "auc")
		nonspauc_tsuga_high[i+1,j] <- AUC_temp@y.values[[1]][1]
		rm(list=c("pred","AUC_temp"))

		# Calculate fnr and fpr for Tsuga high pollen threshold
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Tsuga_2,ypred$[,sub_sample[j]), threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=pollen$Tsuga_2)
		confusion_mat <- confusion_out$confusion
		nonspfnr_tsuga_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		nonspfpr_tsuga_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))
	}	
}
########################
# Fagus aggregated model assessment
# Create matrices to hold outputs
	agfnr_fagus_high <- matrix(NA, nrow=1000, ncol=9) 
	colnames(agfnr_fagus_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")
	agfnr_fagus_low <- matrix(NA, nrow=1000, ncol=9) 
	colnames(agfnr_fagus_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")

	agfpr_fagus_high <- matrix(NA, nrow=1000, ncol=9) 
	colnames(agfpr_fagus_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")
	agfpr_fagus_low <- matrix(NA, nrow=1000, ncol=9) 
	colnames(agfpr_fagus_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")

	agauc_fagus_high <- matrix(NA, nrow=1000, ncol=9) 
	colnames(agauc_fagus_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")
	agauc_fagus_low <- matrix(NA, nrow=1000, ncol=9) 
	colnames(agauc_fagus_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")	
	
# Holdout calculations
	ypred_holdout <- c(ho_pred_agFagusc1$y.pred, ho_pred_agFagusc2$y.pred, ho_pred_agFagusc3$y.pred)

	# Randomly sub-sample 1000 draws from the posterior predictive distribution
	sub_sample <- sample(ncol(ypred_holdout), 1000)

	for(j in 1:length(sub_sample)){			
		
		# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
		pred <- prediction(ypred_holdout[, sub_sample[j]], holdout$Fagus) 
		AUC_temp <-performance(pred, "auc")
		agauc_fagus_low[1,j] <- AUC_temp@y.values[[1]][1]
		agauc_fagus_high[1,j] <- AUC_temp@y.values[[1]][1]
		rm(list=c("pred","AUC_temp"))

		# Calculate fnr and fpr for Fagus holdout
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(holdout),1),holdout$fagus_bin,ypred_holdout$[,sub_sample[j]), threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred_holdout$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=holdout$Fagus_bin)
		confusion_mat <- confusion_out$confusion
		agfnr_fagus_high[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		agfpr_fagus_high[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		agfnr_fagus_low[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		agfpr_fagus_low[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))
	}	
	

# now do these calculations for Fagus aggregated pollen
for(i in 1:8){
	ypred <- c(paste("p",i,"_pred_agFagusc1$y.pred", sep=''), paste("p",i,"_pred_agFagusc2$y.pred", sep=''), paste("p",i,"_pred_agFagusc3$y.pred", sep=''))
	pollen <- read.csv(paste("pollen",i,"kabp.csv",sep=''), header=TRUE)	

	# Randomly sub-sample 1000 draws from the posterior predictive distribution
	sub_sample <- sample(ncol(ypred), 1000)

	for(j in 1:length(sub_sample)){			
		# Fagus low pollen threhold
		# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
		pred <- prediction(ypred[, sub_sample[j]], pollen$Fagus_.5) 
		AUC_temp <-performance(pred, "auc")
		agauc_fagus_low[i+1,j] <- AUC_temp@y.values[[1]][1]
		rm(list=c("pred","AUC_temp"))

		# Calculate fnr and fpr for Fagus low pollen threhold
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Fagus_.5,ypred$[,sub_sample[j]), threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=pollen$Fagus_.5)
		confusion_mat <- confusion_out$confusion
		agfnr_fagus_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		agfpr_fagus_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))

		# Fagus high pollen threhold
		# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
		pred <- prediction(ypred[, sub_sample[j]], pollen$Fagus_1) 
		AUC_temp <-performance(pred, "auc")
		agpauc_fagus_high[i+1,j] <- AUC_temp@y.values[[1]][1]
		rm(list=c("pred","AUC_temp"))

		# Calculate fnr and fpr for Fagus high pollen threshold
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Fagus_1,ypred$[,sub_sample[j]), threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=pollen$Fagus_1)
		confusion_mat <- confusion_out$confusion
		agfnr_fagus_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		agfpr_fagus_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))
	}	
}
##############################
# Tsuga aggregated model assessment

# Create matrices to hold outputs
	agfnr_tsuga_high <- matrix(NA, nrow=1000, ncol=9) 
	colnames(agfnr_tsuga_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")
	agfnr_tsuga_low <- matrix(NA, nrow=1000, ncol=9) 
	colnames(agfnr_tsuga_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")

	agfpr_tsuga_high <- matrix(NA, nrow=1000, ncol=9) 
	colnames(agfpr_tsuga_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")
	agfpr_tsuga_low <- matrix(NA, nrow=1000, ncol=9) 
	colnames(agfpr_tsuga_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")

	agauc_tsuga_high <- matrix(NA, nrow=1000, ncol=9) 
	colnames(agauc_tsuga_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")
	agauc_tsuga_low <- matrix(NA, nrow=1000, ncol=9) 
	colnames(agauc_tsuga_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8")	
	
# Holdout calculations
	pred_holdout <- c(ho_pred_agTsugac1$y.pred, ho_pred_agTsugac2$y.pred, ho_pred_agTsugac3$y.pred)
	# Randomly sub-sample 1000 draws from the posterior predictive distribution
	sub_sample <- sample(ncol(ypred_holdout), 1000)

	for(j in 1:length(sub_sample)){			
		
		# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
		pred <- prediction(ypred_holdout[, sub_sample[j]], holdout$Tsuga) 
		AUC_temp <-performance(pred, "auc")
		agauc_tsuga_low[1,j] <- AUC_temp@y.values[[1]][1]
		agauc_tsuga_high[1,j] <- AUC_temp@y.values[[1]][1]
		rm(list=c("pred","AUC_temp"))

		# Calculate fnr and fpr for Tsuga holdout
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(holdout),1),holdout$tsuga_bin,ypred_holdout$[,sub_sample[j]), threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred_holdout$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=holdout$Tsuga_bin)
		confusion_mat <- confusion_out$confusion
		nonspfnr_tsuga_high[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		nonspfpr_tsuga_high[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		nonspfnr_tsuga_low[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		nonspfpr_tsuga_low[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))
	}	
	
# now do these calculations for Tsuga nonspatial pollen
for(i in 1:8){
	ypred <- c(paste("p",i,"_pred_agTsugac1$y.pred", sep=''), paste("p",i,"_pred_agTsugac2$y.pred", sep=''), paste("p",i,"_pred_agTsugac3$y.pred", sep=''))
	pollen <- read.csv(paste("pollen",i,"kabp.csv",sep=''), header=TRUE)	

	# Randomly sub-sample 1000 draws from the posterior predictive distribution
	sub_sample <- sample(ncol(ypred), 1000)

	for(j in 1:length(sub_sample)){			
		# Tsuga low pollen threhold
		# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
		pred <- prediction(ypred[, sub_sample[j]], pollen$Tsuga_1) 
		AUC_temp <-performance(pred, "auc")
		agauc_tsuga_low[i+1,j] <- AUC_temp@y.values[[1]][1]
		rm(list=c("pred","AUC_temp"))

		# Calculate fnr and fpr for Tsuga low pollen threhold
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Tsuga_1,ypred$[,sub_sample[j]), threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=pollen$Tsuga_1)
		confusion_mat <- confusion_out$confusion
		agfnr_tsuga_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		agfpr_tsuga_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))

		# Tsuga high pollen threhold
		# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
		pred <- prediction(ypred[, sub_sample[j]], pollen$Tsuga_2) 
		AUC_temp <-performance(pred, "auc")
		agauc_tsuga_high[i+1,j] <- AUC_temp@y.values[[1]][1]
		rm(list=c("pred","AUC_temp"))

		# Calculate fnr and fpr for Tsuga high pollen threshold
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Tsuga_2,ypred$[,sub_sample[j]), threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=pollen$Tsuga_2)
		confusion_mat <- confusion_out$confusion
		agfnr_tsuga_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		agfpr_tsuga_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))
	}	
}

################
# FIA B-splines model assessment
# Fagus holdout auc
	pred <- prediction(ho_pred_fiafagus$fiaz, holdout$Fagus) 
	AUC_temp <-performance(pred, "auc")
	auc_Fagus_holdout <- AUC_temp@y.values[[1]][1]
	rm(list=c("pred","AUC_temp"))
# Tsuga holdout auc
	pred <- prediction(ho_pred_fiatsuga$fiaz, holdout$Tsuga) 
	AUC_temp <-performance(pred, "auc")
	auc_Tsuga_holdout <- AUC_temp@y.values[[1]][1]
	rm(list=c("pred","AUC_temp"))

# Calculate fnr and fpr for Fagis holdout
	cutoff <- optimal.thresholds(cbind(sequence(1,nrow(holdout),1),holdout$Fagus_bin,ho_pred_fiafagus$fiaz, threshold=100, opt.methods=2)
	cutoff <- cutoff[[2]][1]
	binpred <- ypred_holdout$[,sub_sample[j])
	binpred[binpred > cutoff] <- 1
	binpred[binpred < cutoff]<-0
	confusion_out <- confuse(predicted=binpred, observed=holdout$Fagus_bin)
	confusion_mat <- confusion_out$confusion
	fpr_fagus_holdout <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
	fnr_fagus_holdout <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
	rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))

# Calculate fnr and fpr for Tsuga holdout
	cutoff <- optimal.thresholds(cbind(sequence(1,nrow(holdout),1),holdout$tsuga_bin,ho_pred_fiatsuga$fiaz, threshold=100, opt.methods=2)
	cutoff <- cutoff[[2]][1]
	binpred <- ypred_holdout$[,sub_sample[j])
	binpred[binpred > cutoff] <- 1
	binpred[binpred < cutoff]<-0
	confusion_out <- confuse(predicted=binpred, observed=holdout$Tsuga_bin)
	confusion_mat <- confusion_out$confusion
	fpr_tsuga_holdout <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
	fnr_tsuga_holdout <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
	rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))

# Pollen assessment FIA B-spline false negative and false positive rates
# Create matrices to hold outputs
FIAfnr <- matrix(NA, nrow=8, ncol=5)
colnames(FIAfnr) <- c("kaBP", "Tsuga_1", "Tsuga_2", "Fagus_.5", "Fagus_1") 
FIAfnr[,1] <- seq(1,8,1)
FIAfpr <- matrix(NA, nrow=8, ncol=5)
colnames(FIAfpr) <- c("kaBP", "Tsuga_1", "Tsuga_2", "Fagus_.5", "Fagus_1") 
FIAfpr[,1] <- seq(1,8,1)

for(i in 1:8){
		pred_fagus <- paste("p", i, "_pred_FIAfagus")
		pred_tsuga <- paste("p", i, "_pred_FIAtsuga")
		
	# Low pollen threholds
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(treebp0),1),pred_fagus$Fagus_.5, pollen$fiaz, threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred_holdout$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=treebp0$Fagus_.5)
		confusion_mat <- confusion_out$confusion
		FIAfnr[i,4] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		FIAfpr[i,4] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))

		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(treebp0),1),pred_tsuga$Tsuga_1, pollen$fiaz, threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred_holdout$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=treebp0$Tsuga_1)
		confusion_mat <- confusion_out$confusion
		FIAfnr[i,2] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		FIAfpr[i,2] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))
		
	# High pollen thresholds
		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(treebp0),1),pred_fagus$Fagus_1, pollen$fiaz, threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred_holdout$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=treebp0$Fagus_1)
		confusion_mat <- confusion_out$confusion
		FIAfnr[i,4] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		FIAfpr[i,4] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))

		cutoff <- optimal.thresholds(cbind(sequence(1,nrow(treebp0),1),pred_fagus$Tsuga_2, pollen$fiaz, threshold=100, opt.methods=2)
		cutoff <- cutoff[[2]][1]
		binpred <- ypred_holdout$[,sub_sample[j])
		binpred[binpred > cutoff] <- 1
		binpred[binpred < cutoff]<-0
		confusion_out <- confuse(predicted=binpred, observed=treebp0$Tsuga_2)
		confusion_mat <- confusion_out$confusion
		FIAfnr[i,3] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1])
		FIAfpr[i,3] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4])
		rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat"))

}

# Pollen assessment FIA B-spline auc
# Create matrices to hold outputs
FIAauc <- matrix(NA, nrow=8, ncol=5)
colnames(FIAauc) <- c("kaBP", "Tsuga_1", "Tsuga_2", "Fagus_5", "Fagus_1") 
FIAauc[,1] <- time_slices

for(i in 1:length(time_slices)){

	# read in pollen data for each 1000 year time slice
	pollen <- read.csv(paste("pollen",i,"kabp.csv", sep=''), header=TRUE)
	pollen_predTsuga <- paste("p",i,"_pred_FIAtsuga", sep='')
	pollen_predFagus <- paste("p",i,"_pred_FIAfagus", sep='')

	# Low pollen threshold fagus
	# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
	pred <- prediction(pollen_predFagus$xyz.est.z, pollen$Fagus_.5)
	auc_temp <-performance(pred, "auc")
	auc[i,4] <- auc_temp@y.values[[1]][1]
	
	# high pollen threshold fagus
	# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
	pred <- prediction(pollen_predFagus$xyz.est.z, fiapollen$Fagus_1)
	auc_temp <-performance(pred, "auc")
	auc[i,5] <- auc_temp@y.values[[1]][1]

	# Low pollen threshold Tsuga
	# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
	pred <- prediction(pollen_predTsuga$xyz.est.z, fiapollen$Tsuga_1)
	auc_temp <-performance(pred, "auc")
	auc[i,2] <- auc_temp@y.values[[1]][1]
	
	# high pollen threshold Tsuga
	# Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC)
	pred <- prediction(pollen_predTsuga$xyz.est.z, fiapollen$Tsuga_2)
	auc_temp <-performance(pred, "auc")
	auc[i,3] <- auc_temp@y.values[[1]][1]

}

###################
# Anovas
library(car)
data <- read.csv("anova_analysis.csv", header=TRUE)

# AUC
# Q-Q Plot to check for normality and homogeneity of variance
par(mfrow=c(1,2))
qqnorm(data$auc)
qqline(data$auc)
# Arcsine transform response to meet normality assumption
qqnorm(asin(data$auc))
qqline(asin(data$auc))
# Fit auc hindcasting linear model
modauc <- glm(asin(auc) ~ genus*model + time + threshold, data=data, family=gaussian)
# Check for homogeneity of variance by looking at a plot of the model's residuals
plot(residuals(modauc))
summary(aov(modauc))

# Calculate Tukey's post hoc comparisons. Since the design is balanced, we don't have to worry about differences between sums of squares in coding the factors.
library(agricolae)
HSD.test(modauc, trt="model")
HSD.test(modauc, trt="genus")
HSD.test(modauc, trt="time")
HSD.test(modauc, trt="threshold")
# Graph interaction
boxplot(auc ~ genus*model, data=data) 

############################
# False negative rate
# Q-Q Plot to check for normality and homogeneity of variance
 qqnorm(data$fnr)
 qqline(data$fnr)
 qqnorm(asin(data$fnr))
 qqline(asin(data$fnr))

# Fit auc hindcasting linear model
modfnr <- glm(asin(fnr) ~ genus*model + time + threshold, data=data, family=gaussian)
# Check for homogeneity of variace by looking at a plot of the model's residuals
plot(residuals(modfnr))
summary(aov(modfnr))
# Calculate Tukey's post hoc comparisons. Since the design is balanced, we don't have to worry about differences between sums of squares in coding the factors.
HSD.test(modfnr, trt="model")
HSD.test(modfnr, trt="genus")
HSD.test(modfnr, trt="time")
HSD.test(modfnr, trt="threshold")

############################
# False positive rate
# Q-Q Plot to check for normality and homogeneity of variance
 qqnorm(data$fpr)
 qqline(data$fpr)
 qqnorm(asin(data$fpr))
 qqline(asin(data$fpr))

# Fit auc hindcasting linear model
modfpr <- glm(asin(fpr) ~ genus*model + time + threshold, data=data[-61,], family=gaussian)
# Check for homogeneity of variace by looking at a plot of the model's residuals
plot(residuals(modfpr))
summary(aov(modfpr))
# Calculate Tukey's post hoc comparisons. Since the design is balanced, we don't have to worry about differences between sums of squares in coding the factors.
HSD.test(modfpr, trt="model")
HSD.test(modfpr, trt="genus")
HSD.test(modfpr, trt="time") 
HSD.test(modfpr, trt="threshold") 

# Bonferonni correction
p <- read.csv("pvals.csv", header=TRUE)
adjusted <- p.adjust(p$pvalue, "holm")
output <-cbind(p, adjusted)

################### El fin. ######################################################