################################################################################################## ##written by Olivier Broennimann. Departement of Ecology and Evolution (DEE). ##October 09. University of Lausanne. Switzerland ## ##DESCRIPTION ## remove occurences in a dataframe that are closer to each other than a specified distance threshold ## ##ARGUMENTS ##df: dataframe with x, y, and variables ##colxy: the range of columns for x and y in df ##colvar: the range of columns for variables in df ##min.dist: minimun distance threshold in the sub-dataframe ## occ.desaggragation <-function(df,colxy,colvar=NULL,min.dist,plot=T){ initial<-df train<-initial xx<-colxy[1] yy<-colxy[2] kept<-0 ;out<-0; keep<-c() x11(2,2,pointsize = 12); par(mar=c(0,0,0,0)); plot.new() while(nrow(train)>0){ i<-sample(1:nrow(train),1) if(sum(sqrt((train[,xx]-train[i,xx])^2 + (train[,yy]-train[i,yy])^2)<=min.dist)>1) { out<-out+1 plot.new(); text(0.5,0.8,paste("# initial:",nrow(initial))); text(0.5,0.5,paste("# kept: ",kept)); text(0.5,0.2,paste("# out: ",out)) } else { keep<-c(keep,row.names(train[i,])) kept<-kept+1 plot.new(); text(0.5,0.8,paste("# initial:",nrow(initial))); text(0.5,0.5,paste("# kept: ",kept)); text(0.5,0.2,paste("# out: ",out)) } train<-train[-i,] } keep.row<-rep(F,nrow(initial)) for(k in 1:nrow(initial)){ if( sum(row.names(initial)[k]==keep)==1) keep.row[k]<-T } dev.off() if(is.null(colvar))final<-initial[keep.row,colxy] if(ncol(df)==2)final<-initial[keep.row,colxy] if(!is.null(colvar)&ncol(df)>2)final<-initial[keep.row,c(colxy,colvar)] if(plot==T){ x11() plot(initial[,colxy],main="distribution of occurences",sub=paste("# initial (black):",nrow(initial)," | # kept (red): ",kept),pch=19,col="black",cex=0.2) points(final[,colxy],pch=19,col="red",cex=0.2) } return(final) } ################################################################################################## ##written by Olivier Broennimann. Departement of Ecology and Evolution (DEE). ##October 09. University of Lausanne. Switzerland ## ##DESCRIPTION ## ## add environmental values to a species dataframe. ## the xy (lat/long) coordinates of the species occurrences are compared to those of the environment dataframe ## and the value of the closest pixel is added to the species dataframe. ## when the closest environment pixel is more distant than resolution, NA is added instead of the value. ## (similar to sample() in ArcGIS) ##ARGUMENTS ##dfsp: species dataframe with x, y and optional other variables ##colspxy: the range of columns for x and y in dfsp ##colspkept: the columns of dfsp that should be kept in the final dataframe (by default: xy ) ##dfvar: environmental dataframe with x, y and environmental variables ##colvarxy: the range of columns for x and y in dfvar ##colvar: the range of enviromental variables columns in dfvar. (by default: all exept xy ) ##resolution: distance between x,y of species and environmental datafreme after which values shouldn't be added ##(typically, the resolution of the data in dfvar) sample.sp.globvar <-function(dfsp,colspxy,colspkept="xy",dfvar,colvarxy,colvar="all",resolution){ if(sum(colspkept=="xy")==1)colspkept<-colspxy if(sum(colvar=="all")==1) { if(!is.null(colspkept)) colvar<-(1:ncol(dfvar))[-colvarxy] if(is.null(colspkept)) colvar<-(1:ncol(dfvar)) } colspx<-colspxy[1];colspy<-colspxy[2];colvarx<-colvarxy[1];colvary<-colvarxy[2] x<-dfsp[,colspx] X<-dfvar[,colvarx] y<-dfsp[,colspy] Y<-dfvar[,colvary] train<-data.frame(matrix(nrow=nrow(dfsp),ncol=length(colvar))) names(train)<-names(dfvar)[colvar] x11(2,2,pointsize = 12); par(mar=c(0,0,0,0)); for (i in 1:nrow(dfsp)){ dist<-sqrt((X-x[i])^2 + (Y-y[i])^2) min<-min(dist) if(min<=resolution){ if(length(colvar)>1)train[i,]<-dfvar[dist==min,colvar][1,] if(length(colvar)==1) train[i,]<-dfvar[dist==min,colvar][1] } plot.new(); text(0.5,0.5,paste(paste("sampling:","\n","runs to go: ",nrow(dfsp)-i))); } dev.off() if(!is.null(colspkept))final<-cbind(dfsp[,colspkept],train) if(is.null(colspkept))final<-train return(final) } ################################################################################################## ##written by Olivier Broennimann. Departement of Ecology and Evolution (DEE). ##October 09. University of Lausanne. Switzerland ## ##DESCRIPTION ##Investigate spatial autocorrelation by drawing a mantel Correlogram (autocorrelation vs distance) ## ##ARGUMENTS ##df: dataframe with x, y, and variables ##colxy: the range of columns for x and y in df ##colvar: the range of columns for variables in df ##n: number of random occurences used for the test (computation time increase tremendiously when using more than 500occ.) ##max: maximum distance to be computed in the correlogram ##nclass: number of class of distance to be computed in the correlogram ##nperm: number of permutation in the randomization process mantel.correlogram <- function(df,colxy,n,colvar,max,nclass,nperm){ library(ecodist) envnorm<-data.frame(t((t(df[,colvar])-mean(df[,colvar]))/sd(df[,colvar]))) row.rand<-sample(1:nrow(df),n,replace=T) envdist<-dist(envnorm[row.rand,]) geodist<-dist(df[row.rand,colxy]) b<- seq(from = min(geodist), to = max, length.out = nclass) crlg<-mgram(envdist,geodist,breaks=b,nperm=nperm) plot(crlg) abline(h=0) } ################################################################################################## ##written by Olivier Broennimann. Departement of Ecology and Evolution (DEE). ##October 09. University of Lausanne. Switzerland ## ##DESCRIPTION ##randomly sample pseudoabsences from an environmental dataframe covering the study area ##A minimum distance from presences can be set. ##ARGUMENTS ##nbabsences: number of pseudoabsences desired ##glob: environmental dataframe covering the study area to sample, with x,y ##colxyglob: the range of columns for x and y in glob ##colvar: the range of columns for x and y in glob. colvar="all" keeps all the variables in glob in the final dataframe. colvar=NULL keeps only x and y ##presence: occurence dataframe ##colxypresence: the range of columns for x and y in presence ##mindist: minimum distance from prensences closer to wich pseudoabsences shouldn't be drawn (buffer distance around presences) rand.pseudoabsences<-function(nbabsences, glob, colxyglob,colvar="all", presence, colxypresence, mindist){ colxglob<-colxyglob[1] colyglob<-colxyglob[2] colxpresence<-colxypresence[1] colypresence<-colxypresence[2] keep<-c() no.i<-1 while(no.i <= nbabsences){ ki<-sample(1:nrow(glob),1) if(sum(((glob[ki,colxglob]- presence[,colxpresence])^2 + (glob[ki,colyglob]- presence[,colypresence])^2) <= mindist^2)==0) { keep[no.i]<-ki no.i<-no.i+1 } } if(sum(colvar=="all")==1) colvar<-(1:ncol(glob))[-colvarxy] if(!is.null(colvar))pseudoabs<-glob[keep,c(colxyglob,colvar)] if(is.null(colvar))pseudoabs<-glob[keep,colxyglob] return(pseudoabs) }