###################################################################### ###### Sydne Record Tree Hindcasting################################## ##### November, 9, 2011 (Corrected/Updated June 2012) ############################################## #################################################################### #clear everything, just to be safe rm(list=ls(all=TRUE)) # load packages library(spBayes) library(MBA) library(geoR) library(fields) library(maptools) library(coda) # Read in fia occurrence data and corresponding climate data tree_data <- read.csv("spenv_alltimesteps.csv", header=TRUE) # subsample data maintaining 10% of the data for a hold-out validation k <- sample(nrow(tree_data), nrow(tree_data)*.9) treebp0 <- tree_data[k,] holdout <- tree_data[-k,] # Select columns with fagus and tsuga presence/absence data. Note the different ways that the non-Bayesian vs. Bayesian glm's handle the response variables. fagus.pa <- treebp0$Fagus# Response for Bayesian models fagus.pa.glm <- treebp0$Fagus/treebp0$count# Response for non-Bayesian glm used to get starting values tsuga.pa <- treebp0$Tsuga # Response for Bayesian models tsuga.pa.glm <- treebp0$Tsuga/treebp0$count # Response for non-Bayesian glm used to get starting values # Climate variables bio1 <- scale(treebp0$bio1_0) # Mean annual temp bio15 <- scale(treebp0$bio15_0) # seasonality of precip # Specify occurrence data grid coordinates. We tested this for various numbers of knots and knot generation algorithms. coords <- as.matrix(treebp0[,12:13]) m <- 500 km.knots <- kmeans(coords, m)$centers x <- as.matrix(rep(1,nrow(treebp0))) # Fit non-spatial glm to get starting values for Bayesian GLMs fagus_logit <- glm(fagus.pa.glm ~ 1 , family=binomial("logit"), weights=treebp0$count) summary(fagus_logit) tsuga_logit <- glm(tsuga.pa.glm ~ 1 , family=binomial("logit"), weights=treebp0$count) summary(tsuga_logit) # Run tsuga first beta.starting <- coefficients(tsuga_logit) beta.tuning <- t(chol(vcov(tsuga_logit))) #Calculate maximum distance between coordinates for phi prior max.dist <- max(iDist(coords)) ############################################################### #Fagus models # Specify starting and tuning values from non-Bayesian glm beta.starting <- coefficients(fagus_logit) beta.tuning <- t(chol(vcov(fagus_logit))) # Fagus pure spatial model (3 chains) spfagusc1 <- spGLM(fagus.pa ~ 1, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots, starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0), tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5), priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)), cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE, verbose=TRUE, n.report=1) spfagusc2 <- spGLM(fagus.pa ~ 1, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots, starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0), tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5), priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)), cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE, verbose=TRUE, n.report=1) spfagusc3 <- spGLM(fagus.pa ~ 1, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots, starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0), tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5), priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)), cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE, verbose=TRUE, n.report=1) # Check convergence samps <- mcmc.list(spfagusc1$p.samples, spfagusc2$p.samples, spfagusc3$p.samples) plot(samps) #Create MCMC trace plot, run diagmonstics samps <- mcmc.list(spfagusc1$p.samples,spfagusc2$p.samples,spfagusc3$p.samples) plot(samps) print(gelman.diag(samps)) gelman.plot(samps) burn.in <- 80000 print(round(summary(window(samps, start = burn.in))$quantiles[,c(3, 1, 5)], 2)) # Load custom code for non-spatial Bayesian GLM dyn.load("src/glm_amcmc.so") source("src/glm_amcmc.R") n.batch <- 500 batch.length <- 200 n.samples <- n.batch*batch.length # Calculate diagnostics for model fit (e.g., DIC) print(spDiag(spfagusc1, start=80000)) # Fagus nons-spatial model (3 chains) nonspfagusc1 <- glm.amcmc(fagus.pa~as.matrix(cbind(bio1, bio15)), family="binomial", weights=treebp0$count, starting=list("beta"=t_beta.starting), tuning=list("beta"=t_beta.tuning), priors=list("beta.flat"), amcmc=list("n.batch"=n.batch,"batch.length"=batch.length,"accept.rate"=0.43), n.samples=n.samples, sub.samples=c(100,n.samples,10), verbose=TRUE, n.report=10) nonspfagusc2 <- glm.amcmc(fagus.pa~as.matrix(cbind(bio1, bio15)), family="binomial", weights=treebp0$count, starting=list("beta"=t_beta.starting), tuning=list("beta"=t_beta.tuning), priors=list("beta.flat"), amcmc=list("n.batch"=n.batch,"batch.length"=batch.length,"accept.rate"=0.43), n.samples=n.samples, sub.samples=c(100,n.samples,10), verbose=TRUE, n.report=10) nonspfagusc3 <- glm.amcmc(fagus.pa~as.matrix(cbind(bio1, bio15)), family="binomial", weights=treebp0$count, starting=list("beta"=t_beta.starting), tuning=list("beta"=t_beta.tuning), priors=list("beta.flat"), amcmc=list("n.batch"=n.batch,"batch.length"=batch.length,"accept.rate"=0.43), n.samples=n.samples, sub.samples=c(100,n.samples,10), verbose=TRUE, n.report=10) # Check convergence samps <- mcmc.list(nonspfagusc1$p.samples, nonspfagusc2$p.samples, nonspfagusc3$p.samples) plot(samps) #Create MCMC trace plot, run diagmonstics samps <- mcmc.list(nonspfagusc1$p.samples,nonspfagusc2$p.samples,nonspfagusc3$p.samples) plot(samps) print(gelman.diag(samps)) gelman.plot(samps) burn.in <- 80000 print(round(summary(window(samps, start = burn.in))$quantiles[,c(3, 1, 5)], 2)) # Calculate diagnostics for model fit (e.g., DIC) print(spDiag(nonspfagusc1, start=80000)) # Fagus aggregated spatial and climatic model (3 chains) agfagusc1 <- spGLM(fagus.pa ~ bio1 + bio15, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots, starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0), tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5), priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)), cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE, verbose=TRUE, n.report=1) agfagusc2 <- spGLM(fagus.pa ~ bio1 + bio15, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots, starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0), tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5), priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)), cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE, verbose=TRUE, n.report=1) agfagusc3 <- spGLM(fagus.pa ~ bio1 + bio15, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots, starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0), tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5), priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)), cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE, verbose=TRUE, n.report=1) # Check convergence samps <- mcmc.list(agfagusc1$p.samples, agfagusc2$p.samples, agfagusc3$p.samples) plot(samps) #Create MCMC trace plot, run diagmonstics samps <- mcmc.list(agfagusc1$p.samples,agfagusc2$p.samples,agfagusc3$p.samples) plot(samps) print(gelman.diag(samps)) gelman.plot(samps) burn.in <- 80000 print(round(summary(window(samps, start = burn.in))$quantiles[,c(3, 1, 5)], 2)) # Calculate diagnostics for model fit (e.g., DIC) print(spDiag(agfagusc1, start=80000)) ############################################################################### #Tsuga models # Specify starting and tuning values from non-Bayesian glm beta.starting <- coefficients(tsuga_logit) beta.tuning <- t(chol(vcov(tsuga_logit))) # Tsuga pure spatial model (3 chains) sptsugac1 <- spGLM(tsuga.pa ~ 1, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots, starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0), tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5), priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)), cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE, verbose=TRUE, n.report=1) sptsugac2 <- spGLM(tsuga.pa ~ 1, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots, starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0), tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5), priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)), cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE, verbose=TRUE, n.report=1) sptsugac3 <- spGLM(tsuga.pa ~ 1, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots, starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0), tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5), priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)), cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE, verbose=TRUE, n.report=1) # Check convergence samps <- mcmc.list(sptsugac1$p.samples, sptsugac2$p.samples, sptsugac3$p.samples) plot(samps) #Create MCMC trace plot, run diagmonstics samps <- mcmc.list(sptsugac1$p.samples,sptsugac2$p.samples,sptsugac3$p.samples) plot(samps) print(gelman.diag(samps)) gelman.plot(samps) burn.in <- 80000 print(round(summary(window(samps, start = burn.in))$quantiles[,c(3, 1, 5)], 2)) # Calculate diagnostics for model fit (e.g., DIC) print(spDiag(sptsugac1, start=80000)) # Tsuga nons-spatial model (3 chains) nonsptsugac1 <- glm.amcmc(tsuga.pa~as.matrix(cbind(bio1, bio15)), family="binomial", weights=treebp0$count, starting=list("beta"=t_beta.starting), tuning=list("beta"=t_beta.tuning), priors=list("beta.flat"), amcmc=list("n.batch"=n.batch,"batch.length"=batch.length,"accept.rate"=0.43), n.samples=n.samples, sub.samples=c(100,n.samples,10), verbose=TRUE, n.report=10) nonsptsugac2 <- glm.amcmc(tsuga.pa~as.matrix(cbind(bio1, bio15)), family="binomial", weights=treebp0$count, starting=list("beta"=t_beta.starting), tuning=list("beta"=t_beta.tuning), priors=list("beta.flat"), amcmc=list("n.batch"=n.batch,"batch.length"=batch.length,"accept.rate"=0.43), n.samples=n.samples, sub.samples=c(100,n.samples,10), verbose=TRUE, n.report=10) nonsptsugac3 <- glm.amcmc(tsuga.pa~as.matrix(cbind(bio1, bio15)), family="binomial", weights=treebp0$count, starting=list("beta"=t_beta.starting), tuning=list("beta"=t_beta.tuning), priors=list("beta.flat"), amcmc=list("n.batch"=n.batch,"batch.length"=batch.length,"accept.rate"=0.43), n.samples=n.samples, sub.samples=c(100,n.samples,10), verbose=TRUE, n.report=10) # Check convergence samps <- mcmc.list(nonsptsugac1$p.samples, nonsptsugac2$p.samples, nonsptsugac3$p.samples) plot(samps) #Create MCMC trace plot, run diagmonstics samps <- mcmc.list(nonsptsugac1$p.samples,nonsptsugac2$p.samples,nonsptsugac3$p.samples) plot(samps) print(gelman.diag(samps)) gelman.plot(samps) burn.in <- 80000 print(round(summary(window(samps, start = burn.in))$quantiles[,c(3, 1, 5)], 2)) # Calculate diagnostics for model fit (e.g., DIC) print(spDiag(nonsptsugac1, start=80000)) # Tsuga aggregated model (3 chains) agtsugac1 <- spGLM(tsuga.pa ~ bio1 + bio15, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots, starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0), tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5), priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)), cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE, verbose=TRUE, n.report=1) agtsugac2 <- spGLM(tsuga.pa ~ bio1 + bio15, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots, starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0), tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5), priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)), cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE, verbose=TRUE, n.report=1) agtsugac3 <- spGLM(tsuga.pa ~ bio1 + bio15, family="binomial", weights=treebp0[,3], coords=coords, knots=km.knots, starting=list("beta"=beta.starting, "phi"=3/(0.5*max.dist), "sigma.sq"=1, "w"=0), tuning=list("beta"=beta.tuning, "phi"=0.5, "sigma.sq"=0.5, "w"=0.5), priors=list("phi.Unif"=c(3/max.dist, 3/1), "sigma.sq.IG"=c(2, 10)), cov.model="exponential", amcmc=list("n.batch"=2000,"batch.length"=50,"accept.rate"=0.43), modified.pp=FALSE, verbose=TRUE, n.report=1) # Check convergence samps <- mcmc.list(agtsugac1$p.samples, agtsugac2$p.samples, agtsugac3$p.samples) plot(samps) #Create MCMC trace plot, run diagmonstics samps <- mcmc.list(agtsugac1$p.samples,agtsugac2$p.samples,agtsugac3$p.samples) plot(samps) print(gelman.diag(samps)) gelman.plot(samps) burn.in <- 80000 print(round(summary(window(samps, start = burn.in))$quantiles[,c(3, 1, 5)], 2)) # Calculate diagnostics for model fit (e.g., DIC) print(spDiag(agtsugac1, start=80000)) ########################################################################################################### # Predict Fagus and Tsuga models for holdout data set pred.coords <- as.matrix(holdout[,12:13]) pred.covars <- as.matrix(cbind(holdout$bio1_0, holdout$bio15_0)) n.samples <- 100000 burn.in <- 80000 # Pure spatial models ho_pred_spfagusc1 <- spPredict(spfagusc1, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in)) ho_pred_spfagusc2 <- spPredict(spfagusc2, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in)) ho_pred_spfagusc3 <- spPredict(spfagusc3, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in)) ho_pred_sptsugac1 <- spPredict(sptsugac1, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in)) ho_pred_sptsugac2 <- spPredict(sptsugac2, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in)) ho_pred_sptsugac3 <- spPredict(sptsugac3, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in)) # Non-spatial models. Note spPredict doesn't work on the outputs of the non-spatial models from the custom code # Fagus non-spatial prediction #Create matrix for outputs of predicted probabilities for Fagus chain 1 ho_pred_nonspfagusc1 <- matrix(0, nrow(pred.coords), nrow(nonspfagusc1$p.samples)) #take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function for(i in 1:nrow(pred.coords)){ for(j in 1:nrow(nonspfagusc1$p.samples)){ z <- nonspfagusc1$p.samples[j,1] + nonspfagusc1$p.samples[j,2]*pred.covars[i,1] + nonspfagusc1$p.samples[j,3]*pred.covars[i,2] ho_pred_nonspfagusc1[i,j] <- 1/(1 + exp(-z)) } } #Create matrix for outputs of predicted probabilities for Fagus chain 2 ho_pred_nonspfagusc2 <- matrix(0, nrow(pred.coords), nrow(nonspfagusc2$p.samples)) #take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function for(i in 1:nrow(pred.coords)){ for(j in 1:nrow(nonspfagusc2$p.samples)){ z <- nonspfagusc2$p.samples[j,1] + nonspfagusc2$p.samples[j,2]*pred.covars[i,1] + nonspfagusc2$p.samples[j,3]*pred.covars[i,2] ho_pred_nonspfagusc2[i,j] <- 1/(1 + exp(-z)) } } #Create matrix for outputs of predicted probabilities for Fagus chain 3 ho_pred_nonspfagusc3 <- matrix(0, nrow(pred.coords), nrow(nonspfagusc3$p.samples)) #take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function for(i in 1:nrow(pred.coords)){ for(j in 1:nrow(nonspfagusc3$p.samples)){ z <- nonspfagusc3$p.samples[j,1] + nonspfagusc3$p.samples[j,2]*pred.covars[i,1] + nonspfagusc3$p.samples[j,3]*pred.covars[i,2] ho_pred_nonspfagusc2[i,j] <- 1/(1 + exp(-z)) } } # Tsuga non-spatial prediction #Create matrix for outputs of predicted probabilities for Fagus chain 1 ho_pred_nonsptsugac1 <- matrix(0, nrow(pred.coords), nrow(nonsptsugac1$p.samples)) #take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function for(i in 1:nrow(pred.coords)){ for(j in 1:nrow(nonsptsugac1$p.samples)){ z <- nonsptsugac1$p.samples[j,1] + nonsptsugac1$p.samples[j,2]*pred.covars[i,1] + nonsptsugac1$p.samples[j,3]*pred.covars[i,2] ho_pred_nonsptsugac1[i,j] <- 1/(1 + exp(-z)) } } #Create matrix for outputs of predicted probabilities for Fagus chain 2 ho_pred_nonsptsugac2 <- matrix(0, nrow(pred.coords), nrow(nonsptsugac2$p.samples)) #take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function for(i in 1:nrow(pred.coords)){ for(j in 1:nrow(nonsptsugac2$p.samples)){ z <- nonsptsugac2$p.samples[j,1] + nonsptsugac2$p.samples[j,2]*pred.covars[i,1] + nonsptsugac2$p.samples[j,3]*pred.covars[i,2] ho_pred_nonsptsugac2[i,j] <- 1/(1 + exp(-z)) } } #Create matrix for outputs of predicted probabilities for Fagus chain 3 ho_pred_nonsptsugac3 <- matrix(0, nrow(pred.coords), nrow(nonsptsugac3$p.samples)) #take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function for(i in 1:nrow(pred.coords)){ for(j in 1:nrow(nonsptsugac3$p.samples)){ z <- nonsptsugac3$p.samples[j,1] + nonsptsugac3$p.samples[j,2]*pred.covars[i,1] + nonsptsugac3$p.samples[j,3]*pred.covars[i,2] ho_pred_nonsptsugac3[i,j] <- 1/(1 + exp(-z)) } } # Aggregated models ho_pred_agfagusc1 <- spPredict(agfagusc1, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in)) ho_pred_agfagusc2 <- spPredict(agfagusc2, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in)) ho_pred_agfagusc3 <- spPredict(agfagusc3, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in)) ho_pred_agtsugac1 <- spPredict(agtsugac1, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in)) ho_pred_agtsugac2 <- spPredict(agtsugac2, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in)) ho_pred_agtsugac3 <- spPredict(agtsugac3, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in)) # FIA B-spline models # Fagus fia_surf <- mba.points(as.matrix(cbind(treebp0$albx, treebp0$alby, (treebp0$Fagus_grandifolia/fia$count))), as.matrix(cbind(holdout$albx, holdout$alby))) fia_surfmat <- as.matrix(fia_surf[[1]]) colnames(fia_surfmat) <- c("fiax","fiay","fiaz") fiaholdout <- cbind(holdout, fia_surf) rowid <- seq(1,nrow(fiaholdout),1) ho_pred_fiafagus <- cbind(rowid, fiaholdout) # Tsuga fia_surf <- mba.points(as.matrix(cbind(treebp0$albx, treebp0$alby, (treebp0$Tsuga_canadensis/fia$count))), as.matrix(cbind(holdout$albx, holdout$alby))) fia_surfmat <- as.matrix(fia_surf[[1]]) colnames(fia_surfmat) <- c("fiax","fiay","fiaz") fiaholdout <- cbind(holdout, fia_surf) rowid <- seq(1,nrow(fiaholdout),1) ho_pred_fiatsuga <- cbind(rowid, fiaholdout) ########################################################################## # Predict Fagus and Tsuga models for fossil pollen. * Note I only show the code for the 1 kaBP time step, but for the paper we ran the predictions out to 8kaBP # Read in pollen and paleoclimate data for 1kaBP pollen1 <- read.csv("pollen1kabp.csv", header=TRUE) pred.coords <- as.matrix(pollen1[,1:2]) pred.covars <- as.matrix(cbind(pollen$bio1, pollen$bio15)) n.samples <- 100000 burn.in <- 80000 # Pure spatial models p1_pred_spfagusc1 <- spPredict(spfagusc1, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in)) p1_pred_spfagusc2 <- spPredict(spfagusc2, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in)) p1_pred_spfagusc3 <- spPredict(spfagusc3, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in)) p1_pred_sptsugac1 <- spPredict(sptsugac1, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in)) p1_pred_sptsugac2 <- spPredict(sptsugac2, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in)) p1_pred_sptsugac3 <- spPredict(sptsugac3, pred.coords=pred.coords, pred.covars=rep(1, nrow(pred.covars)), start=burn.in)) # Non-spatial models. Note spPredict doesn't work on the outputs of the non-spatial models from the custom code # Fagus non-spatial prediction #Create matrix for outputs of predicted probabilities for Fagus chain 1 p1_pred_nonspfagusc1 <- matrix(0, nrow(pred.coords), nrow(nonspfagusc1$p.samples)) #take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function for(i in 1:nrow(pred.coords)){ for(j in 1:nrow(nonspfagusc1$p.samples)){ z <- nonspfagusc1$p.samples[j,1] + nonspfagusc1$p.samples[j,2]*pred.covars[i,1] + nonspfagusc1$p.samples[j,3]*pred.covars[i,2] p1_pred_nonspfagusc1[i,j] <- 1/(1 + exp(-z)) } } #Create matrix for outputs of predicted probabilities for Fagus chain 2 p1_pred_nonspfagusc2 <- matrix(0, nrow(pred.coords), nrow(nonspfagusc2$p.samples)) #take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function for(i in 1:nrow(pred.coords)){ for(j in 1:nrow(nonspfagusc2$p.samples)){ z <- nonspfagusc2$p.samples[j,1] + nonspfagusc2$p.samples[j,2]*pred.covars[i,1] + nonspfagusc2$p.samples[j,3]*pred.covars[i,2] p1_pred_nonspfagusc2[i,j] <- 1/(1 + exp(-z)) } } #Create matrix for outputs of predicted probabilities for Fagus chain 3 p1_pred_nonspfagusc3 <- matrix(0, nrow(pred.coords), nrow(nonspfagusc3$p.samples)) #take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function for(i in 1:nrow(pred.coords)){ for(j in 1:nrow(nonspfagusc3$p.samples)){ z <- nonspfagusc3$p.samples[j,1] + nonspfagusc3$p.samples[j,2]*pred.covars[i,1] + nonspfagusc3$p.samples[j,3]*pred.covars[i,2] p1_pred_nonspfagusc3[i,j] <- 1/(1 + exp(-z)) } } # Tsuga non-spatial prediction #Create matrix for outputs of predicted probabilities for Fagus chain 1 p1_pred_nonsptsugac1 <- matrix(0, nrow(pred.coords), nrow(nonsptsugac1$p.samples)) #take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function for(i in 1:nrow(pred.coords)){ for(j in 1:nrow(nonsptsugac1$p.samples)){ z <- nonsptsugac1$p.samples[j,1] + nonsptsugac1$p.samples[j,2]*pred.covars[i,1] + nonsptsugac1$p.samples[j,3]*pred.covars[i,2] p1_pred_nonsptsugac1[i,j] <- 1/(1 + exp(-z)) } } #Create matrix for outputs of predicted probabilities for Fagus chain 2 p1_pred_nonsptsugac2 <- matrix(0, nrow(pred.coords), nrow(nonsptsugac2$p.samples)) #take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function for(i in 1:nrow(pred.coords)){ for(j in 1:nrow(nonsptsugac2$p.samples)){ z <- nonsptsugac2$p.samples[j,1] + nonsptsugac2$p.samples[j,2]*pred.covars[i,1] + nonsptsugac2$p.samples[j,3]*pred.covars[i,2] p1_pred_nonsptsugac2[i,j] <- 1/(1 + exp(-z)) } } #Create matrix for outputs of predicted probabilities for Fagus chain 3 p1_pred_nonsptsugac3 <- matrix(0, nrow(pred.coords), nrow(nonsptsugac3$p.samples)) #take mcmc samples from nonspGLM for beta coefficients and multiply times x for predicted locations and get the probabilities using the logistic function for(i in 1:nrow(pred.coords)){ for(j in 1:nrow(nonsptsugac3$p.samples)){ z <- nonsptsugac3$p.samples[j,1] + nonsptsugac3$p.samples[j,2]*pred.covars[i,1] + nonsptsugac3$p.samples[j,3]*pred.covars[i,2] p1_pred_nonsptsugac3[i,j] <- 1/(1 + exp(-z)) } } # Aggregated models p1_pred_agfagusc1 <- spPredict(agfagusc1, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in)) p1_pred_agfagusc2 <- spPredict(agfagusc2, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in)) p1_pred_agfagusc3 <- spPredict(agfagusc3, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in)) p1_pred_agtsugac1 <- spPredict(agtsugac1, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in)) p1_pred_agtsugac2 <- spPredict(agtsugac2, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in)) p1_pred_agtsugac3 <- spPredict(agtsugac3, pred.coords=pred.coords, pred.covars=cbind(rep(1, nrow(pred.covars)), pred.covars), start=burn.in)) # FIA B-spline models # Fagus fia_surf <- mba.points(as.matrix(cbind(treebp0$albx, treebp0$alby, (treebp0$Fagus_grandifolia/fia$count))), as.matrix(cbind(pollen1$albx, pollen1$alby))) fia_surfmat <- as.matrix(fia_surf[[1]]) colnames(fia_surfmat) <- c("fiax","fiay","fiaz") fiapollen1 <- cbind(pollen1, fia_surf) rowid <- seq(1,nrow(fiapollen1),1) p1_pred_FIAfagus <- cbind(rowid, fiapollen1) # Tsuga fia_surf <- mba.points(as.matrix(cbind(treebp0$albx, treebp0$alby, (treebp0$Tsuga_canadensis/fia$count))), as.matrix(cbind(pollen1$albx, pollen1$alby))) fia_surfmat <- as.matrix(fia_surf[[1]]) colnames(fia_surfmat) <- c("fiax","fiay","fiaz") fiapollen1 <- cbind(pollen1, fia_surf) rowid <- seq(1,nrow(fiapollen1),1) p1_pred_FIAtsuga <- cbind(rowid, fiapollen1) ################################################################################## # Assess predictive performance of models (Area under the receiver operating curve, false negative rate, false positive rate) library(ROCR) time_slices <- seq(0,8,1) ########################## # # confuse # get confusion matrix from two vectors of 1's and 0's # gives matrix, percent correctly classified, and chi squared test # false negative and false positive rates can be calculated from the confusion matrix # Function written by Noah D. Charney ########################### confuse <- function(predicted,observed){ A <- predicted B <- observed confusion<-matrix(nrow=2,ncol=2, dimnames=list(c('pred.pres','pred.abs'),c('obs.pres','obs.abs'))) confusion[] <- c(sum(A*B), sum((1-A)*B), sum(A*(1-B)), sum((1-A)*(1-B)) ) perc.correct <- sum(diag(confusion))/sum(confusion) chisq<-chisq.test(confusion) occurrence <- sum(B)/length(B) if(occurrence < 0.5){ null <- 1- occurrence }else{ null <- occurrence } return(list(confusion=confusion,perc.correct=perc.correct,null=null,chisq=chisq)) } ########################## library(PresenceAbsence) # Fagus pure spatial model assessment # Create matrices to hold outputs spfnr_fagus_high <- matrix(NA, nrow=1000, ncol=9) colnames(spfnr_fagus_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") spfnr_fagus_low <- matrix(NA, nrow=1000, ncol=9) colnames(spfnr_fagus_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") spfpr_fagus_high <- matrix(NA, nrow=1000, ncol=9) colnames(spfpr_fagus_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") spfpr_fagus_low <- matrix(NA, nrow=1000, ncol=9) colnames(spfpr_fagus_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") spauc_fagus_high <- matrix(NA, nrow=1000, ncol=9) colnames(spauc_fagus_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") spauc_fagus_low <- matrix(NA, nrow=1000, ncol=9) colnames(spauc_fagus_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") # Holdout calculations ypred_holdout <- c(ho_pred_spfagusc1$y.pred, ho_pred_spfagusc2$y.pred, ho_pred_spfagusc3$y.pred) # Randomly sub-sample 1000 draws from the posterior predictive distribution sub_sample <- sample(ncol(ypred_holdout), 1000) for(j in 1:length(sub_sample)){ # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(ypred_holdout[, sub_sample[j]], holdout$Fagus) AUC_temp <-performance(pred, "auc") spauc_fagus_low[1,j] <- AUC_temp@y.values[[1]][1] spauc_fagus_high[1,j] <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Fagus holdout cutoff <- optimal.thresholds(cbind(sequence(1,nrow(holdout),1),holdout$fagus_bin,ypred_holdout$[,sub_sample[j]), threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred_holdout$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=holdout$Fagus_bin) confusion_mat <- confusion_out$confusion spfnr_fagus_high[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) spfpr_fagus_high[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) spfnr_fagus_low[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) spfpr_fagus_low[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) } # now do these calculations for Fagus pure spatial pollen for(i in 1:8){ ypred <- c(paste("p",i,"_pred_spfagusc1$y.pred", sep=''), paste("p",i,"_pred_spfagusc2$y.pred", sep=''), paste("p",i,"_pred_spfagusc3$y.pred", sep='')) pollen <- read.csv(paste("pollen",i,"kabp.csv",sep=''), header=TRUE) # Randomly sub-sample 1000 draws from the posterior predictive distribution sub_sample <- sample(ncol(ypred), 1000) for(j in 1:length(sub_sample)){ # Fagus low pollen threhold # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(ypred[, sub_sample[j]], pollen$Fagus_.5) AUC_temp <-performance(pred, "auc") spauc_fagus_low[i+1,j] <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Fagus low pollen threhold cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Fagus_.5,ypred$[,sub_sample[j]), threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=pollen$Fagus_.5) confusion_mat <- confusion_out$confusion spfnr_fagus_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) spfpr_fagus_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) # Fagus high pollen threhold # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(ypred[, sub_sample[j]], pollen$Fagus_1) AUC_temp <-performance(pred, "auc") spauc_fagus_high[i+1,j] <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Fagus high pollen threshold cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Fagus_1,ypred$[,sub_sample[j]), threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=pollen$Fagus_1) confusion_mat <- confusion_out$confusion spfnr_fagus_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) spfpr_fagus_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) } } ############ # Tsuga pure spatial model assessment # Create matrices to hold outputs spfnr_tsuga_high <- matrix(NA, nrow=1000, ncol=9) colnames(spfnr_tsuga_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") spfnr_tsuga_low <- matrix(NA, nrow=1000, ncol=9) colnames(spfnr_tsuga_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") spfpr_tsuga_high <- matrix(NA, nrow=1000, ncol=9) colnames(spfpr_tsuga_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") spfpr_tsuga_low <- matrix(NA, nrow=1000, ncol=9) colnames(spfpr_tsuga_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") spauc_tsuga_high <- matrix(NA, nrow=1000, ncol=9) colnames(spauc_tsuga_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") spauc_tsuga_low <- matrix(NA, nrow=1000, ncol=9) colnames(spauc_tsuga_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") # Holdout calculations ypred_holdout <- c(ho_pred_sptsugac1$y.pred, ho_pred_sptsugac2$y.pred, ho_pred_sptsugac3$y.pred) # Randomly sub-sample 1000 draws from the posterior predictive distribution sub_sample <- sample(ncol(ypred_holdout), 1000) for(j in 1:length(sub_sample)){ # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(ypred_holdout[, sub_sample[j]], holdout$Tsuga) AUC_temp <-performance(pred, "auc") spauc_tsuga_low[1,j] <- AUC_temp@y.values[[1]][1] spauc_tsuga_high[1,j] <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Tsuga holdout cutoff <- optimal.thresholds(cbind(sequence(1,nrow(holdout),1),holdout$tsuga_bin,ypred_holdout$[,sub_sample[j]), threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred_holdout$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=holdout$Tsuga_bin) confusion_mat <- confusion_out$confusion spfnr_tsuga_high[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) spfpr_tsuga_high[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) spfnr_tsuga_low[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) spfpr_tsuga_low[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) } # now do these calculations for Tsuga pure spatial pollen for(i in 1:8){ ypred <- c(paste("p",i,"_pred_sptsugac1$y.pred", sep=''), paste("p",i,"_pred_sptsugac2$y.pred", sep=''), paste("p",i,"_pred_sptsugac3$y.pred", sep='')) pollen <- read.csv(paste("pollen",i,"kabp.csv",sep=''), header=TRUE) # Randomly sub-sample 1000 draws from the posterior predictive distribution sub_sample <- sample(ncol(ypred), 1000) for(j in 1:length(sub_sample)){ # Tsuga low pollen threhold # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(ypred[, sub_sample[j]], pollen$Tsuga_.5) AUC_temp <-performance(pred, "auc") spauc_tsuga_low[i+1,j] <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Tsuga low pollen threhold cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Tsuga_.5,ypred$[,sub_sample[j]), threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=pollen$Tsuga_.5) confusion_mat <- confusion_out$confusion spfnr_tsuga_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) spfpr_tsuga_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) # Tsuga high pollen threhold # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(ypred[, sub_sample[j]], pollen$Tsuga_1) AUC_temp <-performance(pred, "auc") spauc_tsuga_high[i+1,j] <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Tsuga high pollen threshold cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Tsuga_1,ypred$[,sub_sample[j]), threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=pollen$Tsuga_1) confusion_mat <- confusion_out$confusion spfnr_tsuga_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) spfpr_tsuga_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) } } ######################## # Fagus non-spatial model assessment # Create matrices to hold outputs nonspfnr_fagus_high <- matrix(NA, nrow=1000, ncol=9) colnames(nonspfnr_fagus_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") nonspfnr_fagus_low <- matrix(NA, nrow=1000, ncol=9) colnames(nonspfnr_fagus_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") nonspfpr_fagus_high <- matrix(NA, nrow=1000, ncol=9) colnames(nonspfpr_fagus_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") nonspfpr_fagus_low <- matrix(NA, nrow=1000, ncol=9) colnames(nonspfpr_fagus_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") nonspauc_fagus_high <- matrix(NA, nrow=1000, ncol=9) colnames(nonspauc_fagus_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") nonspauc_fagus_low <- matrix(NA, nrow=1000, ncol=9) colnames(nonspauc_fagus_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") # Holdout calculations ypred_holdout <- c(ho_pred_nonspFagusc1$y.pred, ho_pred_nonspFagusc2$y.pred, ho_pred_nonspFagusc3$y.pred) # Randomly sub-sample 1000 draws from the posterior predictive distribution sub_sample <- sample(ncol(ypred_holdout), 1000) for(j in 1:length(sub_sample)){ # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(ypred_holdout[, sub_sample[j]], holdout$Fagus) AUC_temp <-performance(pred, "auc") nonspauc_fagus_low[1,j] <- AUC_temp@y.values[[1]][1] nonspauc_fagus_high[1,j] <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Fagus holdout cutoff <- optimal.thresholds(cbind(sequence(1,nrow(holdout),1),holdout$fagus_bin,ypred_holdout$[,sub_sample[j]), threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred_holdout$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=holdout$Fagus_bin) confusion_mat <- confusion_out$confusion nonspfnr_fagus_high[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) nonspfpr_fagus_high[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) nonspfnr_fagus_low[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) nonspfpr_fagus_low[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) } # now do these calculations for Fagus nonspatial pollen for(i in 1:8){ ypred <- c(paste("p",i,"_pred_nonspFagusc1$y.pred", sep=''), paste("p",i,"_pred_nonspFagusc2$y.pred", sep=''), paste("p",i,"_pred_nonspFagusc3$y.pred", sep='')) pollen <- read.csv(paste("pollen",i,"kabp.csv",sep=''), header=TRUE) # Randomly sub-sample 1000 draws from the posterior predictive distribution sub_sample <- sample(ncol(ypred), 1000) for(j in 1:length(sub_sample)){ # Fagus low pollen threhold # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(ypred[, sub_sample[j]], pollen$Fagus_.5) AUC_temp <-performance(pred, "auc") nonspauc_fagus_low[i+1,j] <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Fagus low pollen threhold cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Fagus_.5,ypred$[,sub_sample[j]), threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=pollen$Fagus_.5) confusion_mat <- confusion_out$confusion nonspfnr_fagus_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) nonspfpr_fagus_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) # Fagus high pollen threhold # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(ypred[, sub_sample[j]], pollen$Fagus_1) AUC_temp <-performance(pred, "auc") nonspauc_fagus_high[i+1,j] <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Fagus high pollen threshold cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Fagus_1,ypred$[,sub_sample[j]), threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=pollen$Fagus_1) confusion_mat <- confusion_out$confusion nonspfnr_fagus_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) nonspfpr_fagus_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) } } ############################## # Tsuga non-spatial model assessment # Create matrices to hold outputs nonspfnr_tsuga_high <- matrix(NA, nrow=1000, ncol=9) colnames(nonspfnr_tsuga_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") nonspfnr_tsuga_low <- matrix(NA, nrow=1000, ncol=9) colnames(nonspfnr_tsuga_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") nonspfpr_Tsuga_high <- matrix(NA, nrow=1000, ncol=9) colnames(nonspfpr_tsuga_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") nonspfpr_tsuga_low <- matrix(NA, nrow=1000, ncol=9) colnames(nonspfpr_tsuga_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") nonspauc_tsuga_high <- matrix(NA, nrow=1000, ncol=9) colnames(nonspauc_tsuga_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") nonspauc_tsuga_low <- matrix(NA, nrow=1000, ncol=9) colnames(nonspauc_tsuga_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") # Holdout calculations ypred_holdout <- c(ho_pred_nonspTsugac1$y.pred, ho_pred_nonspTsugac2$y.pred, ho_pred_nonspTsugac3$y.pred) # Randomly sub-sample 1000 draws from the posterior predictive distribution sub_sample <- sample(ncol(ypred_holdout), 1000) for(j in 1:length(sub_sample)){ # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(ypred_holdout[, sub_sample[j]], holdout$Tsuga) AUC_temp <-performance(pred, "auc") nonspauc_tsuga_low[1,j] <- AUC_temp@y.values[[1]][1] nonspauc_tsuga_high[1,j] <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Tsuga holdout cutoff <- optimal.thresholds(cbind(sequence(1,nrow(holdout),1),holdout$tsuga_bin,ypred_holdout$[,sub_sample[j]), threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred_holdout$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=holdout$Tsuga_bin) confusion_mat <- confusion_out$confusion nonspfnr_tsuga_high[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) nonspfpr_tsuga_high[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) nonspfnr_tsuga_low[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) nonspfpr_tsuga_low[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) } # now do these calculations for Tsuga nonspatial pollen for(i in 1:8){ ypred <- c(paste("p",i,"_pred_nonspTsugac1$y.pred", sep=''), paste("p",i,"_pred_nonspTsugac2$y.pred", sep=''), paste("p",i,"_pred_nonspTsugac3$y.pred", sep='')) pollen <- read.csv(paste("pollen",i,"kabp.csv",sep=''), header=TRUE) # Randomly sub-sample 1000 draws from the posterior predictive distribution sub_sample <- sample(ncol(ypred), 1000) for(j in 1:length(sub_sample)){ # Tsuga low pollen threhold # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(ypred[, sub_sample[j]], pollen$Tsuga_1) AUC_temp <-performance(pred, "auc") nonspauc_tsuga_low[i+1,j] <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Tsuga low pollen threhold cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Tsuga_1,ypred$[,sub_sample[j]), threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=pollen$Tsuga_1) confusion_mat <- confusion_out$confusion nonspfnr_tsuga_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) nonspfpr_tsuga_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) # Tsuga high pollen threhold # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(ypred[, sub_sample[j]], pollen$Tsuga_2) AUC_temp <-performance(pred, "auc") nonspauc_tsuga_high[i+1,j] <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Tsuga high pollen threshold cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Tsuga_2,ypred$[,sub_sample[j]), threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=pollen$Tsuga_2) confusion_mat <- confusion_out$confusion nonspfnr_tsuga_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) nonspfpr_tsuga_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) } } ######################## # Fagus aggregated model assessment # Create matrices to hold outputs agfnr_fagus_high <- matrix(NA, nrow=1000, ncol=9) colnames(agfnr_fagus_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") agfnr_fagus_low <- matrix(NA, nrow=1000, ncol=9) colnames(agfnr_fagus_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") agfpr_fagus_high <- matrix(NA, nrow=1000, ncol=9) colnames(agfpr_fagus_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") agfpr_fagus_low <- matrix(NA, nrow=1000, ncol=9) colnames(agfpr_fagus_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") agauc_fagus_high <- matrix(NA, nrow=1000, ncol=9) colnames(agauc_fagus_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") agauc_fagus_low <- matrix(NA, nrow=1000, ncol=9) colnames(agauc_fagus_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") # Holdout calculations ypred_holdout <- c(ho_pred_agFagusc1$y.pred, ho_pred_agFagusc2$y.pred, ho_pred_agFagusc3$y.pred) # Randomly sub-sample 1000 draws from the posterior predictive distribution sub_sample <- sample(ncol(ypred_holdout), 1000) for(j in 1:length(sub_sample)){ # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(ypred_holdout[, sub_sample[j]], holdout$Fagus) AUC_temp <-performance(pred, "auc") agauc_fagus_low[1,j] <- AUC_temp@y.values[[1]][1] agauc_fagus_high[1,j] <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Fagus holdout cutoff <- optimal.thresholds(cbind(sequence(1,nrow(holdout),1),holdout$fagus_bin,ypred_holdout$[,sub_sample[j]), threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred_holdout$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=holdout$Fagus_bin) confusion_mat <- confusion_out$confusion agfnr_fagus_high[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) agfpr_fagus_high[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) agfnr_fagus_low[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) agfpr_fagus_low[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) } # now do these calculations for Fagus aggregated pollen for(i in 1:8){ ypred <- c(paste("p",i,"_pred_agFagusc1$y.pred", sep=''), paste("p",i,"_pred_agFagusc2$y.pred", sep=''), paste("p",i,"_pred_agFagusc3$y.pred", sep='')) pollen <- read.csv(paste("pollen",i,"kabp.csv",sep=''), header=TRUE) # Randomly sub-sample 1000 draws from the posterior predictive distribution sub_sample <- sample(ncol(ypred), 1000) for(j in 1:length(sub_sample)){ # Fagus low pollen threhold # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(ypred[, sub_sample[j]], pollen$Fagus_.5) AUC_temp <-performance(pred, "auc") agauc_fagus_low[i+1,j] <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Fagus low pollen threhold cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Fagus_.5,ypred$[,sub_sample[j]), threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=pollen$Fagus_.5) confusion_mat <- confusion_out$confusion agfnr_fagus_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) agfpr_fagus_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) # Fagus high pollen threhold # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(ypred[, sub_sample[j]], pollen$Fagus_1) AUC_temp <-performance(pred, "auc") agpauc_fagus_high[i+1,j] <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Fagus high pollen threshold cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Fagus_1,ypred$[,sub_sample[j]), threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=pollen$Fagus_1) confusion_mat <- confusion_out$confusion agfnr_fagus_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) agfpr_fagus_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) } } ############################## # Tsuga aggregated model assessment # Create matrices to hold outputs agfnr_tsuga_high <- matrix(NA, nrow=1000, ncol=9) colnames(agfnr_tsuga_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") agfnr_tsuga_low <- matrix(NA, nrow=1000, ncol=9) colnames(agfnr_tsuga_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") agfpr_tsuga_high <- matrix(NA, nrow=1000, ncol=9) colnames(agfpr_tsuga_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") agfpr_tsuga_low <- matrix(NA, nrow=1000, ncol=9) colnames(agfpr_tsuga_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") agauc_tsuga_high <- matrix(NA, nrow=1000, ncol=9) colnames(agauc_tsuga_high) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") agauc_tsuga_low <- matrix(NA, nrow=1000, ncol=9) colnames(agauc_tsuga_low) <- c("holdout", "kabp1", "kabp2","kabp3", "kabp4","kabp5", "kabp6","kabp7", "kabp8") # Holdout calculations pred_holdout <- c(ho_pred_agTsugac1$y.pred, ho_pred_agTsugac2$y.pred, ho_pred_agTsugac3$y.pred) # Randomly sub-sample 1000 draws from the posterior predictive distribution sub_sample <- sample(ncol(ypred_holdout), 1000) for(j in 1:length(sub_sample)){ # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(ypred_holdout[, sub_sample[j]], holdout$Tsuga) AUC_temp <-performance(pred, "auc") agauc_tsuga_low[1,j] <- AUC_temp@y.values[[1]][1] agauc_tsuga_high[1,j] <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Tsuga holdout cutoff <- optimal.thresholds(cbind(sequence(1,nrow(holdout),1),holdout$tsuga_bin,ypred_holdout$[,sub_sample[j]), threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred_holdout$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=holdout$Tsuga_bin) confusion_mat <- confusion_out$confusion nonspfnr_tsuga_high[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) nonspfpr_tsuga_high[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) nonspfnr_tsuga_low[j,1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) nonspfpr_tsuga_low[j,1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) } # now do these calculations for Tsuga nonspatial pollen for(i in 1:8){ ypred <- c(paste("p",i,"_pred_agTsugac1$y.pred", sep=''), paste("p",i,"_pred_agTsugac2$y.pred", sep=''), paste("p",i,"_pred_agTsugac3$y.pred", sep='')) pollen <- read.csv(paste("pollen",i,"kabp.csv",sep=''), header=TRUE) # Randomly sub-sample 1000 draws from the posterior predictive distribution sub_sample <- sample(ncol(ypred), 1000) for(j in 1:length(sub_sample)){ # Tsuga low pollen threhold # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(ypred[, sub_sample[j]], pollen$Tsuga_1) AUC_temp <-performance(pred, "auc") agauc_tsuga_low[i+1,j] <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Tsuga low pollen threhold cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Tsuga_1,ypred$[,sub_sample[j]), threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=pollen$Tsuga_1) confusion_mat <- confusion_out$confusion agfnr_tsuga_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) agfpr_tsuga_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) # Tsuga high pollen threhold # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(ypred[, sub_sample[j]], pollen$Tsuga_2) AUC_temp <-performance(pred, "auc") agauc_tsuga_high[i+1,j] <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Tsuga high pollen threshold cutoff <- optimal.thresholds(cbind(sequence(1,nrow(ypred),1),pollen$Tsuga_2,ypred$[,sub_sample[j]), threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=pollen$Tsuga_2) confusion_mat <- confusion_out$confusion agfnr_tsuga_high[j,i+1] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) agfpr_tsuga_high[j,i+1] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) } } ################ # FIA B-splines model assessment # Fagus holdout auc pred <- prediction(ho_pred_fiafagus$fiaz, holdout$Fagus) AUC_temp <-performance(pred, "auc") auc_Fagus_holdout <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Tsuga holdout auc pred <- prediction(ho_pred_fiatsuga$fiaz, holdout$Tsuga) AUC_temp <-performance(pred, "auc") auc_Tsuga_holdout <- AUC_temp@y.values[[1]][1] rm(list=c("pred","AUC_temp")) # Calculate fnr and fpr for Fagis holdout cutoff <- optimal.thresholds(cbind(sequence(1,nrow(holdout),1),holdout$Fagus_bin,ho_pred_fiafagus$fiaz, threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred_holdout$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=holdout$Fagus_bin) confusion_mat <- confusion_out$confusion fpr_fagus_holdout <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) fnr_fagus_holdout <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) # Calculate fnr and fpr for Tsuga holdout cutoff <- optimal.thresholds(cbind(sequence(1,nrow(holdout),1),holdout$tsuga_bin,ho_pred_fiatsuga$fiaz, threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred_holdout$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=holdout$Tsuga_bin) confusion_mat <- confusion_out$confusion fpr_tsuga_holdout <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) fnr_tsuga_holdout <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) # Pollen assessment FIA B-spline false negative and false positive rates # Create matrices to hold outputs FIAfnr <- matrix(NA, nrow=8, ncol=5) colnames(FIAfnr) <- c("kaBP", "Tsuga_1", "Tsuga_2", "Fagus_.5", "Fagus_1") FIAfnr[,1] <- seq(1,8,1) FIAfpr <- matrix(NA, nrow=8, ncol=5) colnames(FIAfpr) <- c("kaBP", "Tsuga_1", "Tsuga_2", "Fagus_.5", "Fagus_1") FIAfpr[,1] <- seq(1,8,1) for(i in 1:8){ pred_fagus <- paste("p", i, "_pred_FIAfagus") pred_tsuga <- paste("p", i, "_pred_FIAtsuga") # Low pollen threholds cutoff <- optimal.thresholds(cbind(sequence(1,nrow(treebp0),1),pred_fagus$Fagus_.5, pollen$fiaz, threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred_holdout$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=treebp0$Fagus_.5) confusion_mat <- confusion_out$confusion FIAfnr[i,4] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) FIAfpr[i,4] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) cutoff <- optimal.thresholds(cbind(sequence(1,nrow(treebp0),1),pred_tsuga$Tsuga_1, pollen$fiaz, threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred_holdout$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=treebp0$Tsuga_1) confusion_mat <- confusion_out$confusion FIAfnr[i,2] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) FIAfpr[i,2] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) # High pollen thresholds cutoff <- optimal.thresholds(cbind(sequence(1,nrow(treebp0),1),pred_fagus$Fagus_1, pollen$fiaz, threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred_holdout$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=treebp0$Fagus_1) confusion_mat <- confusion_out$confusion FIAfnr[i,4] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) FIAfpr[i,4] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) cutoff <- optimal.thresholds(cbind(sequence(1,nrow(treebp0),1),pred_fagus$Tsuga_2, pollen$fiaz, threshold=100, opt.methods=2) cutoff <- cutoff[[2]][1] binpred <- ypred_holdout$[,sub_sample[j]) binpred[binpred > cutoff] <- 1 binpred[binpred < cutoff]<-0 confusion_out <- confuse(predicted=binpred, observed=treebp0$Tsuga_2) confusion_mat <- confusion_out$confusion FIAfnr[i,3] <- confusion_mat[2]/(confusion_mat[2]+confusion_mat[1]) FIAfpr[i,3] <- confusion_mat[3]/(confusion_mat[3]+confusion_mat[4]) rm(list=c("cutoff", "binpred", "confusion_out", "confusion_mat")) } # Pollen assessment FIA B-spline auc # Create matrices to hold outputs FIAauc <- matrix(NA, nrow=8, ncol=5) colnames(FIAauc) <- c("kaBP", "Tsuga_1", "Tsuga_2", "Fagus_5", "Fagus_1") FIAauc[,1] <- time_slices for(i in 1:length(time_slices)){ # read in pollen data for each 1000 year time slice pollen <- read.csv(paste("pollen",i,"kabp.csv", sep=''), header=TRUE) pollen_predTsuga <- paste("p",i,"_pred_FIAtsuga", sep='') pollen_predFagus <- paste("p",i,"_pred_FIAfagus", sep='') # Low pollen threshold fagus # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(pollen_predFagus$xyz.est.z, pollen$Fagus_.5) auc_temp <-performance(pred, "auc") auc[i,4] <- auc_temp@y.values[[1]][1] # high pollen threshold fagus # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(pollen_predFagus$xyz.est.z, fiapollen$Fagus_1) auc_temp <-performance(pred, "auc") auc[i,5] <- auc_temp@y.values[[1]][1] # Low pollen threshold Tsuga # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(pollen_predTsuga$xyz.est.z, fiapollen$Tsuga_1) auc_temp <-performance(pred, "auc") auc[i,2] <- auc_temp@y.values[[1]][1] # high pollen threshold Tsuga # Create ROCR prediction object to feed into performance function to calculate Area Under the Operating Curve (AUC) pred <- prediction(pollen_predTsuga$xyz.est.z, fiapollen$Tsuga_2) auc_temp <-performance(pred, "auc") auc[i,3] <- auc_temp@y.values[[1]][1] } ################### # Anovas library(car) data <- read.csv("anova_analysis.csv", header=TRUE) # AUC # Q-Q Plot to check for normality and homogeneity of variance par(mfrow=c(1,2)) qqnorm(data$auc) qqline(data$auc) # Arcsine transform response to meet normality assumption qqnorm(asin(data$auc)) qqline(asin(data$auc)) # Fit auc hindcasting linear model modauc <- glm(asin(auc) ~ genus*model + time + threshold, data=data, family=gaussian) # Check for homogeneity of variance by looking at a plot of the model's residuals plot(residuals(modauc)) summary(aov(modauc)) # Calculate Tukey's post hoc comparisons. Since the design is balanced, we don't have to worry about differences between sums of squares in coding the factors. library(agricolae) HSD.test(modauc, trt="model") HSD.test(modauc, trt="genus") HSD.test(modauc, trt="time") HSD.test(modauc, trt="threshold") # Graph interaction boxplot(auc ~ genus*model, data=data) ############################ # False negative rate # Q-Q Plot to check for normality and homogeneity of variance qqnorm(data$fnr) qqline(data$fnr) qqnorm(asin(data$fnr)) qqline(asin(data$fnr)) # Fit auc hindcasting linear model modfnr <- glm(asin(fnr) ~ genus*model + time + threshold, data=data, family=gaussian) # Check for homogeneity of variace by looking at a plot of the model's residuals plot(residuals(modfnr)) summary(aov(modfnr)) # Calculate Tukey's post hoc comparisons. Since the design is balanced, we don't have to worry about differences between sums of squares in coding the factors. HSD.test(modfnr, trt="model") HSD.test(modfnr, trt="genus") HSD.test(modfnr, trt="time") HSD.test(modfnr, trt="threshold") ############################ # False positive rate # Q-Q Plot to check for normality and homogeneity of variance qqnorm(data$fpr) qqline(data$fpr) qqnorm(asin(data$fpr)) qqline(asin(data$fpr)) # Fit auc hindcasting linear model modfpr <- glm(asin(fpr) ~ genus*model + time + threshold, data=data[-61,], family=gaussian) # Check for homogeneity of variace by looking at a plot of the model's residuals plot(residuals(modfpr)) summary(aov(modfpr)) # Calculate Tukey's post hoc comparisons. Since the design is balanced, we don't have to worry about differences between sums of squares in coding the factors. HSD.test(modfpr, trt="model") HSD.test(modfpr, trt="genus") HSD.test(modfpr, trt="time") HSD.test(modfpr, trt="threshold") # Bonferonni correction p <- read.csv("pvals.csv", header=TRUE) adjusted <- p.adjust(p$pvalue, "holm") output <-cbind(p, adjusted) ################### El fin. ######################################################