#---(From Rmd)
#  title: "new_data"
#author: "Maggie Anderson"
#date: "July 1, 2018"
#output: html_document
#editor_options: 
#  chunk_output_type: console
#
  
#
#  title: "Tsugca_code_05-30"
#author: "Maggie Anderson"
#date: "May 30, 2018"
#output: html_document
#---
  

megaplot.raw <- read.csv(url("http://harvardforest.fas.harvard.edu/data/p25/hf253/hf253-03-trees-2014.csv"))
library(ggplot2)
library(geoR)
library(gstat)
library(readr)
library(spatstat)
library(aplpack)
library(sp)
library(rgdal)
library(dplyr)
library(geosphere)
library(ggrepel)
#grab some useful functions for rasterization (written by Hannah Buckley & Aaron Ellison for codispersion analysis)

source("Codispersion-analysis-functions.R")



##############Data Acquisition##########

#read in raw data (local copy)

megaplot.raw <- read.csv("hf253-03-trees-2014.csv")

#select living trees > 10-cm dbh, and four species to use

megaplot.live <-  megaplot.raw[megaplot.raw$df.status=="alive" & megaplot.raw$dbh>=10,]
use.species <- c('acerru', 'pinust', 'querru', 'tsugca')
megaplot.4sp <- megaplot.live[megaplot.live$sp %in% use.species,]
megaplot.4sp <- droplevels(megaplot.4sp)

ggplot(data=megaplot.4sp, aes(x=gx, y=gy)) +
  geom_point() +
  facet_grid(sp ~.)


############### Sp. 4: HEMLOCK -Data Aggregation and computation############

#### Set the plot dimensions
xmin=0; xmax=700; ymin=0; ymax=500  

##For Tsuga canadensis (hemlock) (can loop over use.species); outputs, x, y, basal area as z

megaplot.tc.ppp <- ppp(megaplot.4sp$gx[megaplot.4sp$sp == use.species[4]], 
                       megaplot.4sp$gy[megaplot.4sp$sp == use.species[4]], marks=megaplot.4sp$dbh[megaplot.4sp$sp == use.species[4]], 
                       xrange = c(xmin, xmax), yrange = c(ymin, ymax))

plot(megaplot.tc.ppp, main = use.species[4])

#### Create geoR objects, one with abundance per cell, the other total basal area per cell
megaplot.tc.abun.geoR <- ppp.to.geoR.fn(megaplot.tc.ppp, quad.size = 100, xmin, xmax, ymin, ymax, method = "abundance")
megaplot.tc.ba.geoR <- ppp.to.geoR.fn(megaplot.tc.ppp, quad.size = 100, xmin, xmax, ymin, ymax, method = "total.ba")

#geo R -> data frames

#abundance
tc.abun <- data.frame(megaplot.tc.abun.geoR)
names(tc.abun) <- c("x", "y", "count")

#basal area

tc.ba <- data.frame(megaplot.tc.ba.geoR)
names(tc.ba) <- c("x", "y", "totalba")

#combined

tc.sample <- merge(tc.abun, tc.ba)


############### Sp. 3: OAK -Data Aggregation and computation############

#### Set the plot dimensions
xmin=0; xmax=700; ymin=0; ymax=500  

##For Quercus rubra (oak) (can loop over use.species); outputs, x, y, basal area as z

megaplot.qu.ppp <- ppp(megaplot.4sp$gx[megaplot.4sp$sp == use.species[3]], 
                       megaplot.4sp$gy[megaplot.4sp$sp == use.species[3]], marks=megaplot.4sp$dbh[megaplot.4sp$sp == use.species[3]], 
                       xrange = c(xmin, xmax), yrange = c(ymin, ymax))

plot(megaplot.qu.ppp, main = use.species[3])

#### Create geoR objects, one with abundance per cell, the other total basal area per cell
megaplot.qu.abun.geoR <- ppp.to.geoR.fn(megaplot.qu.ppp, quad.size = 100, xmin, xmax, ymin, ymax, method = "abundance")
megaplot.qu.ba.geoR <- ppp.to.geoR.fn(megaplot.qu.ppp, quad.size = 100, xmin, xmax, ymin, ymax, method = "total.ba")

#geo R -> data frames

#abundance
qu.abun <- data.frame(megaplot.qu.abun.geoR)
names(qu.abun) <- c("x", "y", "count")

#basal area

qu.ba <- data.frame(megaplot.qu.ba.geoR)
names(qu.ba) <- c("x", "y", "totalba")

#combined

qu.sample <- merge(qu.abun, qu.ba)


############### Sp. 2: WHITE PINE -Data Aggregation and computation############

#### Set the plot dimensions
xmin=0; xmax=700; ymin=0; ymax=500  

##For Pinus strobus (White Pine) (can loop over use.species); outputs, x, y, basal area as z

megaplot.ps.ppp <- ppp(megaplot.4sp$gx[megaplot.4sp$sp == use.species[2]], 
                       megaplot.4sp$gy[megaplot.4sp$sp == use.species[2]], marks=megaplot.4sp$dbh[megaplot.4sp$sp == use.species[2]], 
                       xrange = c(xmin, xmax), yrange = c(ymin, ymax))

plot(megaplot.ps.ppp, main = use.species[3])

#### Create geoR objects, one with abundance per cell, the other total basal area per cell
megaplot.ps.abun.geoR <- ppp.to.geoR.fn(megaplot.ps.ppp, quad.size = 100, xmin, xmax, ymin, ymax, method = "abundance")
megaplot.ps.ba.geoR <- ppp.to.geoR.fn(megaplot.ps.ppp, quad.size = 100, xmin, xmax, ymin, ymax, method = "total.ba")

#geo R -> data frames

#abundance
ps.abun <- data.frame(megaplot.ps.abun.geoR)
names(ps.abun) <- c("x", "y", "count")

#basal area

ps.ba <- data.frame(megaplot.ps.ba.geoR)
names(ps.ba) <- c("x", "y", "totalba")

#combined

ps.sample <- merge(ps.abun, ps.ba)




############### Sp. 1: RED MAPLE -Data Aggregation and computation############

#### Set the plot dimensions
xmin=0; xmax=700; ymin=0; ymax=500  

##For Acer rubrum (Red Maple) (can loop over use.species); outputs, x, y, basal area as z

megaplot.ar.ppp <- ppp(megaplot.4sp$gx[megaplot.4sp$sp == use.species[1]], 
                       megaplot.4sp$gy[megaplot.4sp$sp == use.species[1]], marks=megaplot.4sp$dbh[megaplot.4sp$sp == use.species[1]], 
                       xrange = c(xmin, xmax), yrange = c(ymin, ymax))

plot(megaplot.ar.ppp, main = use.species[1])

#### Create geoR objects, one with abundance per cell, the other total basal area per cell
megaplot.ar.abun.geoR <- ppp.to.geoR.fn(megaplot.ar.ppp, quad.size = 100, xmin, xmax, ymin, ymax, method = "abundance")
megaplot.ar.ba.geoR <- ppp.to.geoR.fn(megaplot.ar.ppp, quad.size = 100, xmin, xmax, ymin, ymax, method = "total.ba")

#geo R -> data frames

#abundance
ar.abun <- data.frame(megaplot.ar.abun.geoR)
names(ar.abun) <- c("x", "y", "count")

#basal area

ar.ba <- data.frame(megaplot.ar.ba.geoR)
names(ar.ba) <- c("x", "y", "totalba")

#combined

ar.sample <- merge(ar.abun, ar.ba)


####Determine number of trees needed####

##need samples proportional to total basal area

#copy 4 species file to working file
sample.set <- megaplot.4sp

#compute basal area of each tree
sample.set$ba <- basal.area.fn(sample.set$dbh)

#aggregate to get sum of ba per species, to set relative proportions of sample (out of n = 1000 trees)
#temp1 variable prop has number of individual trees to be sampled
temp1 <- aggregate(ba~sp, data=sample.set, sum)
temp1$prop <- round(temp1$ba/sum(temp1$ba)*1250,0)

#now, return to abundance data frame <sp>.abun
#we want to get temp1$prop total individuals spread weighted by total basal area in each 100 x 100 m cell
#so, if we take (totalba/cell)/temp1$prop, round to 0 decimals, we get sample size...


### Sp. 4: HEMLOCK ###
tc.sample$sample <- round(tc.sample$totalba/temp1$prop[temp1$sp==use.species[4]]*tc.sample$count,0)

#plot HEMLOCK
plot(tc.sample$x+50, tc.sample$y+50, xlim=c(0,700), ylim=c(0,500), xlab="Easting (m)", ylab="Northing (m)", las=1, pch=19, main="Hemlock")
text(tc.sample$x+50, tc.sample$y+70, tc.sample$count, col="red")
mtext("count", 3, adj=0, col="red")
text(tc.sample$x+50, tc.sample$y+30, round(tc.sample$totalba,0), col="blue")
mtext("total basal area (m^2)", 3, adj=1, col="blue")
text(tc.sample$x+80, tc.sample$y+50, tc.sample$sample, col="darkgreen")
mtext("Sample size", 3, col="darkgreen")


### Sp. 3: RED OAK ###
qu.sample$sample <- round(qu.sample$totalba/temp1$prop[temp1$sp==use.species[3]]*qu.sample$count,0)

#plot RED OAK
plot(qu.sample$x+50, qu.sample$y+50, xlim=c(0,700), ylim=c(0,500), xlab="Easting (m)", ylab="Northing (m)", las=1, pch=19, main="Red Oak")
text(qu.sample$x+50, qu.sample$y+70, qu.sample$count, col="red")
mtext("count", 3, adj=0, col="red")
text(qu.sample$x+50, qu.sample$y+30, round(qu.sample$totalba,0), col="blue")
mtext("total basal area (m^2)", 3, adj=1, col="blue")
text(qu.sample$x+80, qu.sample$y+50, qu.sample$sample, col="darkgreen")
mtext("Sample size", 3, col="darkgreen")


### Sp. 2: WHITE PINE ###
ps.sample$sample <- round(ps.sample$totalba/temp1$prop[temp1$sp==use.species[2]]*tc.sample$count,0)

#plot WHITE PINE
plot(ps.sample$x+50, ps.sample$y+50, xlim=c(0,700), ylim=c(0,500), xlab="Easting (m)", ylab="Northing (m)", las=1, pch=19, main="White Pine")
text(ps.sample$x+50, ps.sample$y+70, ps.sample$count, col="red")
mtext("count", 3, adj=0, col="red")
text(ps.sample$x+50, ps.sample$y+30, round(ps.sample$totalba,0), col="blue")
mtext("total basal area (m^2)", 3, adj=1, col="blue")
text(ps.sample$x+80, ps.sample$y+50, ps.sample$sample, col="darkgreen")
mtext("Sample size", 3, col="darkgreen")


ar.sample$sample <- round(ar.sample$totalba/temp1$prop[temp1$sp==use.species[1]]*tc.sample$count,0)

#plot RED MAPLE
plot(ar.sample$x+50, ar.sample$y+50, xlim=c(0,700), ylim=c(0,500), xlab="Easting (m)", ylab="Northing (m)", las=1, pch=19, main="Red Maple")
text(ar.sample$x+50, ar.sample$y+70, ar.sample$count, col="red")
mtext("count", 3, adj=0, col="red")
text(ar.sample$x+50, ar.sample$y+30, round(ar.sample$totalba,0), col="blue")
mtext("total basal area (m^2)", 3, adj=1, col="blue")
text(ar.sample$x+80, ar.sample$y+50, ar.sample$sample, col="darkgreen")
mtext("Sample size", 3, col="darkgreen")


###############Identify sample set################

#sample(<from polygon>, size=tc.sample$sample, replace=FALSE) 

#example for hemlock
#tc.for.sampling <- megaplot.4sp[megaplot.4sp$sp==use.species[4], c(3:4,6:8:)]

tc.for.sampling <- megaplot.4sp[megaplot.4sp$sp==use.species[4], c(3:4,5:6,7:8,11)]
qu.for.sampling <- megaplot.4sp[megaplot.4sp$sp==use.species[3], c(3:4,5:6,7:8,11)]
ps.for.sampling <- megaplot.4sp[megaplot.4sp$sp==use.species[2], c(3:4,5:6,7:8,11)]
ar.for.sampling <- megaplot.4sp[megaplot.4sp$sp==use.species[1], c(3:4,5:6,7:8,11)]

summary(tc.for.sampling) #sp 4
summary(qu.for.sampling) #sp 3
summary(ps.for.sampling) #sp 2
summary(ar.for.sampling) #sp 1
#loop over plot, across rows, then up columns)

## sort for HEMLOCK
tc.sample <-  tc.sample[with(tc.sample, order(y,x)),] #sorted across rows, up columns

## sort for RED OAK
qu.sample <-  qu.sample[with(qu.sample, order(y,x)),] 

## sort for WHITE PINE
ps.sample <-  ps.sample[with(ps.sample, order(y,x)),] 

## sort for RED MAPLE
ar.sample <-  ar.sample[with(ar.sample, order(y,x)),] 



set.seed(1) #setting random seed ensures that each time it runs it will yield the same thing

sample.area <- NULL
sample.quadrats <- NULL
sample.trees <- NULL
hemlock.sample.trees <- list(sample.area, sample.quadrats, sample.trees)
redoak.sample.trees <- list(sample.area, sample.quadrats, sample.trees)
whitepine.sample.trees <- list(sample.area, sample.quadrats, sample.trees)
redmaple.sample.trees <- list(sample.area, sample.quadrats, sample.trees)



#get rid opt points/shrink
#no label overlap
# list into dataframe
tc.for.sampling <- data.frame(tc.for.sampling)
qu.for.sampling <- data.frame(qu.for.sampling)
ps.for.sampling <- data.frame(ps.for.sampling)
ar.for.sampling <- data.frame(ar.for.sampling)
##NEED THIS? 

#plot trees with labels by color
#ggplot(tc.for.sampling, aes(x=gx, y=gy, colour = sp)) + geom_point() +geom_text_repel(aes(label = stem.and.time)) + xlim(20, 40) + ylim(0,20)


#list trees for sp. 4: HEMLOCK
for (y in 0:4) {
  for (x in 0:6) {
    sample.box<-tc.for.sampling[tc.for.sampling$gx >= x*100 & tc.for.sampling$gx < (x+1)*100
                                & tc.for.sampling$gy >=y*100 & tc.for.sampling$gy < (y+1)*100 , ]
    hemlock.sample.trees[[1]][(x+1)+ y*7] <- paste(x*100, y*100)
    sample.me <- sort(sample(sample.box$stem.tag, size=tc.sample$sample[(x+1)+y*7], replace=FALSE))
    sample.quads <- sort(tc.for.sampling[tc.for.sampling$stem.tag %in% sample.me,]$quadrat)
    hemlock.sample.trees[[2]][(x+1)+y*7] <- paste(sample.quads, collapse=", ")
    hemlock.sample.trees[[3]][(x+1)+y*7] <- paste(sample.me, collapse=", ")
  }
} 

hemlock.sample.trees
#unpaste to create vector, then map


###list trees for sp. 3: RED OAK
for (y in 0:4) {
  for (x in 0:6) {
    sample.box<-qu.for.sampling[qu.for.sampling$gx >= x*100 & qu.for.sampling$gx < (x+1)*100
                                & qu.for.sampling$gy >=y*100 & qu.for.sampling$gy < (y+1)*100 , ]
    redoak.sample.trees[[1]][(x+1)+ y*7] <- paste(x*100, y*100)
    sample.me <- sort(sample(sample.box$stem.tag, size=qu.sample$sample[(x+1)+y*7], replace=FALSE))
    sample.quads <- sort(qu.for.sampling[qu.for.sampling$stem.tag %in% sample.me,]$quadrat)
    redoak.sample.trees[[2]][(x+1)+y*7] <- paste(sample.quads, collapse=", ")
    redoak.sample.trees[[3]][(x+1)+y*7] <- paste(sample.me, collapse=", ")
  }
} 

redoak.sample.trees
#unpaste to create vector, then map


###list trees for sp. 2: WHITE PINE
for (y in 0:4) {
  for (x in 0:6) {
    sample.box<-ps.for.sampling[ps.for.sampling$gx >= x*100 & ps.for.sampling$gx < (x+1)*100
                                & ps.for.sampling$gy >=y*100 & ps.for.sampling$gy < (y+1)*100 , ]
    whitepine.sample.trees[[1]][(x+1)+ y*7] <- paste(x*100, y*100)
    ### ERROR 
    sample.me <- sort(sample(sample.box$stem.tag, size=ps.sample$sample[(x+1)+y*7], replace=FALSE))
    sample.quads <- sort(ps.for.sampling[ps.for.sampling$stem.tag %in% sample.me,]$quadrat)
    whitepine.sample.trees[[2]][(x+1)+y*7] <- paste(sample.quads, collapse=", ")
    whitepine.sample.trees[[3]][(x+1)+y*7] <- paste(sample.me, collapse=", ")
  }
} 

whitepine.sample.trees
#unpaste to create vector, then map


###list trees for sp. 1: RED MAPLE
for (y in 0:4) {
  for (x in 0:6) {
    sample.box<-ar.for.sampling[ar.for.sampling$gx >= x*100 & ar.for.sampling$gx < (x+1)*100
                                & ar.for.sampling$gy >=y*100 & ar.for.sampling$gy < (y+1)*100 , ]
    redmaple.sample.trees[[1]][(x+1)+ y*7] <- paste(x*100, y*100)
    sample.me <- sort(sample(sample.box$stem.tag, size=ar.sample$sample[(x+1)+y*7], replace=FALSE))
    sample.quads <- sort(ar.for.sampling[ar.for.sampling$stem.tag %in% sample.me,]$quadrat)
    redmaple.sample.trees[[2]][(x+1)+y*7] <- paste(sample.quads, collapse=", ")
    redmaple.sample.trees[[3]][(x+1)+y*7] <- paste(sample.me, collapse=", ")
  }
} 

redmaple.sample.trees
#unpaste to create vector, then map



### create final df for sampling)

#load .csv files
tc.stem.ID <- read.csv("stemID.tc.csv")
qr.stem.ID <- read.csv("stemID.qr.csv")
ps.stem.ID <- read.csv("stemID.ps.csv")
ar.stem.ID <- read.csv("stemID.ar.csv")

#merge datasets
all.stems <- rbind(tc.stem.ID, qr.stem.ID)
all.stems <- rbind(all.stems, ps.stem.ID)
all.stems <- rbind(all.stems, ar.stem.ID)
summary(all.stems)

#add dbh from megaplot.4sp
head(megaplot.4sp)
all.stems.1 <- merge(x = all.stems, y = megaplot.4sp, by = 'stem.tag')
summary(all.stems.1)

#add columns to dataframs
as.data.frame()#create dataframe
#add basal area
all.stems.1 <- mutate(all.stems.1, basal.area = pi*(dbh/2)^2)
#sample area
all.stems.1 <- mutate(all.stems.1, area.m = (pi*((dbh/2)+95)^2)-basal.area)
#add time
all.stems.1 <- mutate(all.stems.1, time.m = (dbh*(1/5)))

#parse stem.tag and time.m using paste()
stem.and.time <- paste(all.stems.1$stem.tag, all.stems.1$time.m, sep="_")

#plot sample trees within sector, label by stem.tag & time.m 
ggplot(all.stems.1, aes(x=gx, y=gy, color = sp.y, shape = sp.y)) + geom_point() + geom_text_repel(aes(label = stem.and.time)) +  xlim(0, 60) + ylim(0,60) 


#generate list of all trees in sector
#plots 1 - 25 exported
xy.sec <- subset(subset(all.stems.1, gx>=0 & gx<=60 & gy>=0 & gy<=60), select = c("stem.tag", "sp.x", "time.m"))
head(xy.sec)
summary(xy.sec)

#export as .csv file
#plots 1 - 20 exported
write.table(xy.sec, file = "tree_data_sec50-100")


#stems we have already sampled
done.stems <- subset(subset(all.stems.1, gx>=0 & gx<=320 & gy>=0 & gy<=20))
summary(done.stems)