Concord Phenology data analysis with Chuck Davis and Charlie Willis New version with Blue Hills temperature data 15 October 2015 A.M. Ellison Note: All datafile names refer to data files on HF Archives, Dataset HF-258 ```{r} library(ggplot2) library(grid) library(zoo) library(reshape) library(MASS) library(nlme) library(dplyr) library(sp) #library(reshape2) ``` Needed functions for later ```{r} ### Plotting function to plot convex hulls ### Filename: Plot_ConvexHull.R ### Notes: ############################################################################ # INPUTS: # xcoords: x-coordinates of point data # ycoords: y-coordinates of point data # lcolor: line color # OUTPUTS: # convex hull around data points in a particular color (specified by lcolor) # FUNCTION: Plot_ConvexHull<-function(xcoord, ycoord, lcolor, llwd){ hpts <- chull(x = xcoord, y = ycoord) hpts <- c(hpts, hpts[1]) lines(xcoord[hpts], ycoord[hpts], col = lcolor, lwd=llwd) } # END OF FUNCTION my_theme_bw <- function (base_size = 12, base_family = "") { theme_grey(base_size = base_size, base_family = base_family) %+replace% theme(axis.text = element_text(size = rel(0.8)), axis.ticks = element_line(colour = "black"), legend.key = element_rect(fill="gray100", colour = "white"), panel.background = element_rect(fill = "white", colour = NA), panel.border = element_rect(fill = NA, colour = "grey50"), panel.grid.major = element_line(colour = "grey90", size = 0.2), panel.grid.minor = element_line(colour = "grey98", size = 0.5), strip.background = element_rect(fill = "grey80", colour = "grey50", size = 0.2)) } ``` I. Examine temperature data files Blue Hills Temps 1. bluehills 1893-2015 data provided by Bryan Connolly via Charlie Willis, includes min/max data 1893-2015, and Daymet values. 2. Primack data are 1831-2007 provided by Richard Primack and are monthly means. Provenance is unknown. 3. Historical data file of Blue Hills met station (1885-present) from NOAA's Global Historical Climatology Network - Monthly (GHCN): http://www.ncdc.noaa.gov/ghcnm/ but issing 2005-2006! Downloaded and subsetted 2015-03-16 Other MA sites from GHCN available include: Bedford (Middlesex County), Great Barrington (Berkshire), Lawrence (Essex), New Bedford (Bristol), Plymouth-Kingston (Plymouth), Provincetown (Barnstable), Reading (Middlesex), Taunton (Bristol), Walpole (Norfolk), and West Medway (Norfolk) ```{r} #Primack temperature data bhtemp.rp <- read.csv("hf258-01-blue-hills.csv") summary(bhtemp.rp) #Primack data is in "wide" format, so get into long format bh.temp.long <- reshape(bhtemp.rp, idvar="year", varying=list(2:13), times=names(bhtemp.rp)[2:13], direction="long") names(bh.temp.long)<- c("year","month","mean.m.temp") bh.temp.long$month <- factor(bh.temp.long$month, levels=c("jan", "feb", "mar", "apr", "may","jun","jul","aug","sep","oct","nov","dec")) summary(bh.temp.long) #sort it before turning into time-series object bh.temp.long <- bh.temp.long[order(bh.temp.long[,1], bh.temp.long[,2]),] #plot to check against Miller-Rushing et al. 2006 AJB 93: 1667-1674 Figure 1 ts.plot(bh.temp.long$mean.m.temp[bh.temp.long$month %in% c("feb", "mar", "apr","may")]) #turn into time-series object bh.m.temp.rp <- ts(bh.temp.long$mean.m.temp, start=c(1831,1), frequency=12) #GHCN unadjusted data bhtemp.ghcn <- read.csv("hf258-02-ghcn-data.csv") summary(bhtemp.ghcn) #get it into long form bhtemp.ghcn.long <- reshape(bhtemp.ghcn, idvar="year", varying=list(2:13), times=names(bhtemp.ghcn)[2:13], direction="long") names(bhtemp.ghcn.long)<- c("year","month","mean.m.temp") bhtemp.ghcn.long$month <- factor(bhtemp.ghcn.long$month, levels=c("jan", "feb", "mar", "apr", "may","jun","jul","aug","sep","oct","nov","dec")) summary(bhtemp.ghcn.long) #sort it bfore turning into time-series object bhtemp.ghcn.long <- bhtemp.ghcn.long[order(bhtemp.ghcn.long[,1], bhtemp.ghcn.long[,2]),] #turn it into time-series object bh.m.temp.ghcn <- ts(bhtemp.ghcn.long$mean.m.temp/100, start=c(1885,1), frequency=12) ``` Organize temperature data for modeling ```{r} #remove 2015 data from ghcn unadjusted (partial year) and use only 2008-2014 since 2005, 2006 are missing bh.m.temp.ghcn.trunc <- window(bh.m.temp.ghcn, start=c(2008,1), end=c(2014,12)) #add primack early data onto bh.m.temp.ghcn.1 #early.primack <- window(bh.m.temp.rp, end=c(1884,12)) #bh.m.temp.ghcn.1 <- ts(c(early.primack,bh.m.temp.ghcn.trunc),start=1831, freq=12) bh.m.temp.ghcn.1 <- ts(c(bh.m.temp.rp, bh.m.temp.ghcn.trunc), start=1831, freq=12) plot(bh.m.temp.ghcn.1) str(bh.m.temp.ghcn.1) #Extract winter/spring months per various Miller-Rushing & Primack papers January.temps <- window(bh.m.temp.ghcn.1, start=c(1831,1), deltat=1) February.temps <- window(bh.m.temp.ghcn.1, start=c(1831,2), deltat=1) March.temps <- window(bh.m.temp.ghcn.1, start=c(1831,3), deltat=1) April.temps <- window(bh.m.temp.ghcn.1, start=c(1831,4), deltat=1) May.temps <- window(bh.m.temp.ghcn.1, start=c(1831,5), deltat=1) #Get annual mean temps as average of mean monthly temps Annual.mean.temps <- aggregate(bh.m.temp.ghcn.1, nfrequency=1, FUN=mean, na.rm=TRUE) #creat mts object Blue.temps <- cbind(Annual.mean.temps, January.temps, February.temps, March.temps, April.temps,May.temps) #turn mts object into data frame for ggplot use Blue.temps.m <- melt(data.frame(time=as.numeric(time(Blue.temps)),Blue.temps),id.vars="time") #tsplot version plot(Blue.temps) #ggplot version names(Blue.temps.m) <- c("year", "variable", "mean.temp") ggplot(Blue.temps.m, aes(year, mean.temp)) + geom_line() + facet_grid(variable ~.) + geom_smooth(method="loess") #Note all temps are increasing through time. ``` IV. Phenology data Available data include: Herbarium records from Courtland's HU honors thesis Observational data from Thoreau, Hosmer, Primack, and Davis (note that GHCN data does not extend back to Thoreau's time) A. Herbarium data ```{r} HUHThesis.dat<-read.csv("hf258-03-herbarium-data.csv") str(HUHThesis.dat) #remove non-flowering phenology HUHThesis <- droplevels(HUHThesis.dat[HUHThesis.dat$phenology=="Flowering",]) ``` B. Observational data from Thoreau, Hosmer, Primack, Davis ```{r} Observation.dat <- read.csv("hf258-04-field-data.csv") str(Observation.dat) ``` C. Visual look at overall data density ```{r} #overall data density par(mfrow=c(2,1), mar=c(4,4,2,1)) plot(HUHThesis$doy~HUHThesis$year, main="Herbarium data", xlab="", ylab="Flowering day of year", cex=.5, pch=19) plot(Observation.dat$doy~Observation.dat$year, main="Observational data", xlab="Year", ylab="First flowering day of year", cex=.5, pch=19) hist(HUHThesis$year, breaks=20, xlab="", main="Herbarium data", ylab="Number of specimens") hist(Observation.dat$year, breaks=20, xlab="Year", main="Observational data", ylab="Number of observations") hist(HUHThesis$doy, xlab="Flowering day of year", main="Herbarium data", ylab="Number of specimens") hist(Observation.dat$doy, xlab="First flowering day of year", main="Observational data", ylab="Number of observations") barplot(table(HUHThesis$locality), cex.names=.5, xlab="", ylab="Number of specimens", main="Herbarium data") barplot(table(Observation.dat$locality), cex.names=.5, xlab="County", ylab="Number of specimens", main="Observational data") ``` D. Reduce data Courtland Thesis data to the five common counties (> 100 observations) identified in barplot (Bristol, Essex, Middlesex, Norfolk, Worcester) ```{r} HUHThesis.CommonCounties <- droplevels(HUHThesis[HUHThesis$locality == "Bristol" | HUHThesis$locality == "Essex" | HUHThesis$locality == "Middlesex" | HUHThesis$locality == "Norfolk" | HUHThesis$locality == "Worcester",]) ``` V. Initial exploratory plots A. Plot herbarium flowering data by year (common counties only) Use method=rlm for robust linear models (rlm) (substitute method=lm for basic linear model) Take-home: Even without formal analysis, it's clear that on a year-by-year bases, we shouldn't pool counties - regression slopes (even adjusted for outliers with rlm) are different for the majority of species. ```{r} a <- ggplot(HUHThesis.CommonCounties, aes(x=year, y=doy, colour=locality)) + geom_point() a + facet_wrap(~ genus.species, nrow=4) + stat_smooth(se=FALSE, method=rlm, data=HUHThesis.CommonCounties, aes(group=1), colour="black") + ggtitle("Herbarium data only\nFive counties\nCommon slope") a <- ggplot(HUHThesis.CommonCounties, aes(x=year, y=doy, colour=locality)) + geom_point() a + facet_wrap(~ genus.species, nrow=4) + stat_smooth(se=FALSE, method=rlm, data=HUHThesis.CommonCounties) + ggtitle("Herbarium data only\nFive counties\nSeparate Slopes for each county") ``` VI. Reduce to Middlesex County (county of observational data) To compare herbarium data with observational data, we'll use only Middlesex county herbarium records (and may want to filter these to Concord if we can get town-level data for temperatures or herbarium specimens). A. Extract Middlesex from herbarium data; rbind with observational data ```{r} HUHThesis.Middlesex <- droplevels(HUHThesis[HUHThesis$locality == "Middlesex",]) str(HUHThesis.Middlesex) #Combine with observational data Middlesex.dat <- rbind(HUHThesis.Middlesex[,-c(6,7)], Observation.dat) str(Middlesex.dat) ``` B. Plot herbarium + observational data by year ```{r} a <- ggplot(Middlesex.dat, aes(x=year, y=doy, colour=datum.type)) + geom_point() a + facet_wrap(~ genus.species, nrow=4) + stat_smooth(se=FALSE, method=rlm, data=Middlesex.dat, aes(group=1), colour="black") + ggtitle("Herbarium and observational data\nMiddlesex County Only\ncommon slope\n") a <- ggplot(Middlesex.dat, aes(x=year, y=doy, colour=datum.type)) + geom_point() a + facet_wrap(~ genus.species, nrow=4) + stat_smooth(se=FALSE, method=rlm, data=Middlesex.dat) + ggtitle("Herbarium and observational data\nMiddlesex County Only\nseparate slopes\n") ``` C. Per query from Charlie Willis 2014-04-15 "It looks like, for the most part, that obs. (first) flowering dates are earlier than the herbarium flowering dates (as expected, given that the obs. dates are 'first flowering'). Do you know if this is true for all counties? That is, are the rest of the counties within the range of the flowering time distribution set by the first flowering date in Concord?" So combine 5-county data with observational data, plot as before. Take-home: In general, yes, observed first-flowering dates are earlier than herbarium specimens from adjacent counties. ```{r} HerbariumObserved <- rbind(HUHThesis.CommonCounties[,-c(6,7)], Observation.dat) head(HerbariumObserved) str(HerbariumObserved) a <- ggplot(HerbariumObserved, aes(x=year, y=doy, colour=datum.type)) + geom_point() a + facet_wrap(~ genus.species, nrow=4) + stat_smooth(se=FALSE, method=rlm, data=HerbariumObserved, aes(group=1), colour="black") + ggtitle("Herbarium and observational data\nHerbarium: Five Common Counties\nObservations: Concord\ncommon slope\n") a <- ggplot(HerbariumObserved, aes(x=year, y=doy, colour=datum.type)) + geom_point() a + facet_wrap(~ genus.species, nrow=4) + stat_smooth(se=FALSE, method=rlm, data=HerbariumObserved) + ggtitle("Herbarium and observational data\nHerbarium: Five Common Counties\nObservations: Concord\nseparate slopes\n") ``` VI. Models Dat objects: Five-county data + Concord observations: HerbariumObserved Middlesex (Concord) herbarium data + Concord observations: Middlesex.dat Temperature data as multiple time-series object: Blue.temps Temperature data as data frame: Blue.temps.m A. First some plots ```{r} #Plots: Blue.temps.m.wide <- reshape(Blue.temps.m, idvar="year", timevar="variable", v.names="mean.temp", direction="wide") names(Blue.temps.m.wide)<- c("year","Annual.mean.temps", "January.temps", "February.temps", "March.temps","April.temps","May.temps") #combine GHCN unadjusted temperature data with Middlesex flowering data, using only years where there are data Temp.All<-merge(Blue.temps.m.wide, Middlesex.dat[,c(4, 6:7,11)], by="year") str(Temp.All) #Plots of data by different (combinations of) temperatures ggplot(data=Temp.All, aes(x=Annual.mean.temps, y=doy, colour=datum.type), group=datum.type) + geom_point() + stat_smooth(method="lm", se=FALSE) + stat_smooth(aes(group=1), method="lm") + facet_wrap(~ genus.species) + ggtitle("Annual mean temperature") ggplot(data=Temp.All, aes(x=January.temps, y=doy, colour=datum.type), group=datum.type) + geom_point() + stat_smooth(method="lm", se=FALSE) + stat_smooth(aes(group=1), method="lm") + facet_wrap(~ genus.species) + ggtitle("mean January temperature") ggplot(data=Temp.All, aes(x=February.temps, y=doy, colour=datum.type), group=datum.type) + geom_point() + stat_smooth(method="lm", se=FALSE) + stat_smooth(aes(group=1), method="lm") + facet_wrap(~ genus.species) + ggtitle("mean February temperature") ggplot(data=Temp.All, aes(x=March.temps, y=doy, colour=datum.type), group=datum.type) + geom_point() + stat_smooth(method="lm", se=FALSE) + stat_smooth(aes(group=1), method="lm") + facet_wrap(~ genus.species) + ggtitle("mean March temperature") ggplot(data=Temp.All, aes(x=April.temps, y=doy, colour=datum.type), group=datum.type) + geom_point() + stat_smooth(method="lm", se=FALSE) + stat_smooth(aes(group=1), method="lm") + facet_wrap(~ genus.species) + ggtitle("mean April temperature") ggplot(data=Temp.All, aes(x=May.temps, y=doy, colour=datum.type), group=datum.type) + geom_point() + stat_smooth(method="lm", se=FALSE) + stat_smooth(aes(group=1), method="lm") + facet_wrap(~ genus.species) + ggtitle("mean May temperature") ``` B. Linear mixed-effects models Miller-Rushing and Primack used "mean" spring temperatures, but it's not obvious if they averaged spring months or used individual spring-month averages. Their figures suggest the former, so that's what we did. our spring mean is average of Feb-May graphical smoother is robust linear model (rlm) models are linear mixed effect models with temp or year * herbarium/observation as fixed effects, and species as random effect. Key results: 0. Used linear mixed effects model, with temperature or year and observation type (field vs. herbarium) as main effects and species as a random effect 1. In model of spring mean temperature (mean of February to May), date of flowering is significantly affected by temperature and observation type, but interaction term is not significant 2. In model of time (year): date of flowering is significantly affected only by observation type, not by year or by the interaction between year and observation type. 3. Individual species respond differently. 4. Herbarium and observations have species-specific amounts of overlap by year and temperature ```{r} Temp.All$spring.mean.temp <- apply(Temp.All[,4:7], 1, mean, na.rm=TRUE) ggplot(data=Temp.All, aes(x=spring.mean.temp, y=doy, colour=datum.type), group=datum.type) + geom_point() + stat_smooth(method="rlm", se=FALSE) + stat_smooth(aes(group=1), method="lm") + facet_wrap(~ genus.species) + ggtitle("mean Feb-May temperature") #linear mixed-effect model, with random effect of species, and fixed effect of average spring temperature (Feb-May) and herbarium vs. observational data. Strip out NAs model.temps.full <- lme(doy ~ spring.mean.temp*datum.type, random = ~1|genus.species, data=Temp.All, na.action=na.omit) summary(model.temps.full) anova(model.temps.full) plot(ranef(model.temps.full)) plot(residuals(model.temps.full)) qqnorm(residuals(model.temps.full)) #linear mixed-effect model, with random effect of species, and fixed effect of year and herbarium vs. observational data. Strip out NAs model.year.full <- lme(doy ~ year*datum.type, random = ~1|genus.species, data=Temp.All, na.action=na.omit) summary(model.year.full) anova(model.year.full) plot(ranef(model.year.full)) plot(residuals(model.year.full)) qqnorm(residuals(model.year.full)) ggplot(data=Temp.All, aes(x=year, y=doy, colour=datum.type), group=datum.type) + geom_point() + stat_smooth(method="rlm", se=FALSE) + stat_smooth(aes(group=1), method="lm") + facet_wrap(~ genus.species) + ggtitle("By Year") ``` C. There are some duplicate herbarium records for a given year. So this next set uses the earliest herbarium records only (i.e., delete duplicates w/in site x year) ```{r} #Get the minimum HUHThesis.Middlesex.Earliest <- aggregate(HUHThesis.Middlesex[,8], by=list(year=HUHThesis.Middlesex$year, genus.species=HUHThesis.Middlesex$genus.species), min) names(HUHThesis.Middlesex.Earliest)[3] <- "doy" HUHThesis.Middlesex.Earliest$datum.type <- "Herbarium" Middlesex.dat.Earliest <- bind_rows(Observation.dat, HUHThesis.Middlesex.Earliest) #new data object Temp.All.e merges only earliest flowering data. Then run the plots and models again Temp.All.e<-merge(Blue.temps.m.wide, Middlesex.dat.Earliest[,c(4, 6:7,11)], by="year") ggplot(data=Temp.All.e, aes(x=Annual.mean.temps, y=doy, colour=datum.type), group=datum.type) + geom_point() + stat_smooth(method="lm", se=FALSE) + stat_smooth(aes(group=1), method="lm") + facet_wrap(~ genus.species) + ggtitle("Annual mean temperature") ggplot(data=Temp.All.e, aes(x=January.temps, y=doy, colour=datum.type), group=datum.type) + geom_point() + stat_smooth(method="lm", se=FALSE) + stat_smooth(aes(group=1), method="lm") + facet_wrap(~ genus.species) + ggtitle("mean January temperature") ggplot(data=Temp.All.e, aes(x=February.temps, y=doy, colour=datum.type), group=datum.type) + geom_point() + stat_smooth(method="lm", se=FALSE) + stat_smooth(aes(group=1), method="lm") + facet_wrap(~ genus.species) + ggtitle("mean February temperature") ggplot(data=Temp.All.e, aes(x=March.temps, y=doy, colour=datum.type), group=datum.type) + geom_point() + stat_smooth(method="lm", se=FALSE) + stat_smooth(aes(group=1), method="lm") + facet_wrap(~ genus.species) + ggtitle("mean March temperature") ggplot(data=Temp.All.e, aes(x=April.temps, y=doy, colour=datum.type), group=datum.type) + geom_point() + stat_smooth(method="lm", se=FALSE) + stat_smooth(aes(group=1), method="lm") + facet_wrap(~ genus.species) + ggtitle("mean April temperature") ggplot(data=Temp.All.e, aes(x=May.temps, y=doy, colour=datum.type), group=datum.type) + geom_point() + stat_smooth(method="lm", se=FALSE) + stat_smooth(aes(group=1), method="lm") + facet_wrap(~ genus.species) + ggtitle("mean May temperature") ``` Models - again, but with only earliest annual date from herbarium records Key results: 0. Used linear mixed effects model, with temperature or year and observation type (field vs. herbarium) as main effects and species as a random effect 1. In model of spring mean temperature (mean of February to May), date of flowering is significantly affected by temperature and observation type, but interaction term is not significant 2. In model of time (year): date of flowering is significantly affected only by observation type, not by year or by the interaction between year and observation type. 3. Individual species respond differently. 4. Herbarium and observations have species-specific amounts of overlap by year and temperature ```{r} Temp.All.e$spring.mean.temp <- apply(Temp.All.e[,4:7], 1, mean, na.rm=TRUE) ggplot(data=Temp.All.e, aes(x=spring.mean.temp, y=doy, colour=datum.type), group=datum.type) + geom_point() + stat_smooth(method="lm", se=FALSE) + stat_smooth(aes(group=1), method="rlm") + facet_wrap(~ genus.species) + ggtitle("mean Feb-May temperature") model.temps.full.e <- lme(doy ~ spring.mean.temp*datum.type, random = ~1|genus.species, data=Temp.All.e, na.action=na.omit) model.temps.fixed.e <- lm(doy ~ spring.mean.temp*datum.type, data=Temp.All.e, na.action=na.omit) summary(model.temps.full.e) anova(model.temps.full.e) plot(ranef(model.temps.full.e)) plot(residuals(model.temps.full.e)) qqnorm(residuals(model.temps.full.e)) ggplot(data=Temp.All.e, aes(x=year, y=doy, colour=datum.type), group=datum.type) + geom_point() + stat_smooth(method="lm", se=FALSE) + stat_smooth(aes(group=1), method="lm") + facet_wrap(~ genus.species) + ggtitle("By Year") model.year.full.e <- lme(doy ~ year*datum.type, random = ~1|genus.species, data=Temp.All.e, na.action=na.omit) summary(model.year.full.e) anova(model.year.full.e) plot(ranef(model.year.full.e)) plot(residuals(model.year.full.e)) qqnorm(residuals(model.year.full.e)) ``` Some figures for ms. key points ```{r} ggplot(data=Temp.All.e, aes(x=spring.mean.temp, y=doy, colour=datum.type)) + geom_point() + stat_smooth(method="rlm", se=TRUE) plot((spring.mean.temp-mean(spring.mean.temp))~year, data=Temp.All.e) lines(Annual.mean.temps-mean(Annual.mean.temps)) abline(h=0, col="blue", lty=2) Blue.temps.spring <- Blue.temps.m[Blue.temps.m$variable!="Annual.mean.temps",] Blue.temps.spring <- Blue.temps.spring[Blue.temps.spring$variable!="January.temps",] Blue.temps.spring.x <- aggregate(Blue.temps.spring[,3], by=list(year=Blue.temps.spring$year), mean) names(Blue.temps.spring.x)[2] <- "x.spring" Blue.temps.x<-cbind(Blue.temps.spring.x, Blue.temps.m$mean.temp[Blue.temps.m$variable=="Annual.mean.temps"]) names(Blue.temps.x)[3] <- "x.year" ``` Final plots (for manuscript) 1. Climate space of herbarium specimens and field observations ```{r} TE <- Temp.All.e TE$Observer <- NA TE$Observer[TE$datum.type=="Observation" & TE$year < 1860] <- "Thoreau" TE$Observer[TE$datum.type=="Observation" & TE$year >2000] <- "Contemporary" TE$Observer[TE$datum.type=="Observation" & is.na(TE$Observer)] <- "Hosmer" pdf("samplespace.pdf", width=7, height=8, pointsize=10) #par(mfrow=c(3,1), mar=c(4,5,1,1)) layout(matrix(c(1,2,3,4,5,5,5,5),4,2,byrow=FALSE), widths=c(4,6), heights=c(2,2,4,4)) #layout.show(5) par(mar=c(2,5,1,1)) plot(x.year~year, data=Blue.temps.x, type="l", xlim=c(1850,2015), axes=FALSE, ylab="Mean annual\ntemperature", xlab="", font.lab=2) abline(lm(x.year~year, data=Blue.temps.x), lwd=2, col="darkgrey") axis(1, at=seq(1855,2015, 20), labels=FALSE, tcl=-.25) axis(2, tcl=-.25, font.axis=2, las=1) text(1855,10.5,"a", font=2, cex=2) box() par(mar=c(2,5,0,1)) plot(x.spring~year, data=Blue.temps.x, type="l", xlim=c(1850,2015), axes=FALSE, ylab="Mean spring\ntemperature", xlab="", font.lab=2) abline(lm(x.spring~year, data=Blue.temps.x), lwd=2, col="darkgrey") axis(1, at=seq(1855,2015, 20), labels=FALSE, tcl=-.25) axis(2, tcl=-.25, font.axis=2, las=1) text(1855,7.5,"b", font=2, cex=2) box() plot(TE$doy[TE$Observer=="Thoreau"]~TE$year[TE$Observer=="Thoreau"], main="", xlab="Year", ylab="First flowering day of year", cex=.5, pch=19, axes=FALSE, font.lab=2, col="darkorange2", xlim=c(1850,2015), ylim=c(90,270)) axis(1, at=seq(1855,2015, 20), labels=FALSE, tcl=-.25) axis(2, tcl=-.25, font.axis=2, las=1) box() # text(1955, 230,"Field observations", cex=.75, font=2, pos=4 ) text(1855,260,"c", font=2, cex=2) points(TE$year[TE$Observer=="Hosmer"],TE$doy[TE$Observer=="Hosmer"], pch=19, cex=.5, col="dodgerblue3") points(TE$year[TE$Observer=="Contemporary"],TE$doy[TE$Observer=="Contemporary"], pch=19, cex=.5, col="black") par(mar=c(5,5,0,1)) plot(TE$doy[TE$datum.type=="Herbarium"]~TE$year[TE$datum.type=="Herbarium"], main="", xlab="", ylab="Flowering day of year", cex=.5, pch=19, axes=FALSE, font.lab=2, col="darkmagenta", xlim=c(1850,2015), ylim=c(90,270)) axis(1, at=seq(1855,2015, 20), labels=TRUE, tcl=-.25) axis(2, tcl=-.25, font.axis=2, las=1) box() # text(1955, 300,"Herbarium specimens", cex=.75, font=2, pos=4 ) text(1855,260,"d", font=2, cex=2) par(mar=c(5,4,1,1)) plot(x.spring~x.year, data=Blue.temps.x, pch=0, cex=2.25, lwd=1.5, col="darkgrey", xlab="Mean annual temperature", ylab="Mean spring temperature", axes=FALSE, font.lab=2) axis(1, tcl=-.25, font.axis=2) axis(2, tcl=-.25, font.axis=2, las=1) box() points(TE$Annual.mean.temps[TE$datum.type=="Herbarium"], TE$spring.mean.temp[TE$datum.type=="Herbarium"], pch=0, cex=2.25,col="darkmagenta") points(TE$Annual.mean.temps[TE$datum.type=="Observation" & TE$Observer=="Thoreau"], TE$spring.mean.temp[TE$datum.type=="Observation" & TE$Observer=="Thoreau"], pch=19, col="darkorange2", cex=1.1) points(TE$Annual.mean.temps[TE$datum.type=="Observation" & TE$Observer=="Hosmer"], TE$spring.mean.temp[TE$datum.type=="Observation" & TE$Observer=="Hosmer"], pch=19, col="dodgerblue3", cex=1.1) points(TE$Annual.mean.temps[TE$datum.type=="Observation" & TE$Observer=="Contemporary"], TE$spring.mean.temp[TE$datum.type=="Observation" & TE$Observer=="Contemporary"], pch=19, col="black", cex=1.1) abline(rlm(x.spring~x.year, data=Blue.temps.x), lty=1, col="darkgrey", lwd=2) text(6,8.5,"e", font=2, cex=2) Plot_ConvexHull(xcoord=TE$Annual.mean.temps[TE$datum.type=="Herbarium"], ycoord=TE$spring.mean.temp[TE$datum.type=="Herbarium"], lcolor="darkmagenta", llwd=2) Plot_ConvexHull(xcoord=TE$Annual.mean.temps[TE$datum.type=="Observation" & TE$Observer=="Thoreau"], ycoord=TE$spring.mean.temp[TE$datum.type=="Observation" & TE$Observer=="Thoreau"], lcolor="darkorange2", llwd=2) Plot_ConvexHull(xcoord=TE$Annual.mean.temps[TE$datum.type=="Observation" & TE$Observer=="Hosmer"], ycoord=TE$spring.mean.temp[TE$datum.type=="Observation" & TE$Observer=="Hosmer"], lcolor="dodgerblue3", llwd=2) Plot_ConvexHull(xcoord=TE$Annual.mean.temps[TE$datum.type=="Observation" & TE$Observer=="Contemporary"], ycoord=TE$spring.mean.temp[TE$datum.type=="Observation" & TE$Observer=="Contemporary"], lcolor="black", llwd=2) dev.off() #some data on the convex hulls hpts.herb <- chull(x=Temp.All.e$Annual.mean.temps[Temp.All.e$datum.type=="Herbarium"], y=Temp.All.e$spring.mean.temp[Temp.All.e$datum.type=="Herbarium"]) hpts.herb <- c(hpts.herb,hpts.herb[1]) xy.herb <- cbind(Temp.All.e$Annual.mean.temps[Temp.All.e$datum.type=="Herbarium"], Temp.All.e$spring.mean.temp[Temp.All.e$datum.type=="Herbarium"]) chull.herb.xy <- xy.herb[hpts.herb,] chull.poly.herb <- Polygon(chull.herb.xy, hole=as.logical(FALSE)) chull.area.herb <- chull.poly.herb@area hpts.obs <- chull(x=Temp.All.e$Annual.mean.temps[Temp.All.e$datum.type=="Observation"], y=Temp.All.e$spring.mean.temp[Temp.All.e$datum.type=="Observation"]) hpts.obs <- c(hpts.obs,hpts.obs[1]) xy.obs <- cbind(Temp.All.e$Annual.mean.temps[Temp.All.e$datum.type=="Observation"], Temp.All.e$spring.mean.temp[Temp.All.e$datum.type=="Observation"]) chull.obs.xy <- xy.obs[hpts.obs,] chull.poly.obs <- Polygon(chull.obs.xy, hole=as.logical(FALSE)) chull.area.obs <- chull.poly.obs@area hpts.all <- chull(x=Temp.All.e$Annual.mean.temps, y=Temp.All.e$spring.mean.temp) hpts.all <- c(hpts.all,hpts.all[1]) xy.all <- cbind(Temp.All.e$Annual.mean.temps, Temp.All.e$spring.mean.temp) chull.all.xy <- xy.all[hpts.all,] chull.poly.all <- Polygon(chull.all.xy, hole=as.logical(FALSE)) chull.area.all <- chull.poly.all@area chull.area.all chull.area.herb chull.area.obs chull.area.herb/chull.area.all chull.area.obs/chull.area.all chull.area.herb/chull.area.obs ``` 2. Species-specific responses to climate ```{r} pdf("climate-effect.pdf", height=7, width=7) ggplot(data=Temp.All.e, aes(x=spring.mean.temp, y=doy, colour=datum.type), group=datum.type) + geom_point(size=1.25) + stat_smooth(method="lm", se=FALSE) + stat_smooth(aes(group=1), method="rlm", se=FALSE, colour="black") + facet_wrap(~ genus.species) + xlab("Mean spring temperature (February 1 - May 31)") + ylab("Earliest flowering date") + my_theme_bw() + theme(strip.text=element_text(face="italic", size=8), axis.title=element_text(face="bold", size=9), axis.text=element_text(face="bold", size=6), legend.text=element_text(size=8), legend.title=element_text(size=7, face="bold"), axis.ticks.length=unit(.7, "mm"), legend.position="bottom") + scale_colour_discrete(name="", breaks=c("Herbarium", "Observation"), labels=c("Herbarium specimens", "Field observations") ) dev.off() pdf("year-effect.pdf", height=7, width=7) ggplot(data=Temp.All.e, aes(x=year, y=doy, colour=datum.type), group=datum.type) + geom_point(size=1.25) + stat_smooth(method="lm", se=FALSE) + stat_smooth(aes(group=1), method="rlm", se=FALSE, colour="black") + facet_wrap(~ genus.species) + xlab("Year") + ylab("Earliest flowering date") + my_theme_bw() + theme(strip.text=element_text(face="italic", size=8), axis.title=element_text(face="bold", size=9), axis.text=element_text(face="bold", size=6), legend.text=element_text(size=8), legend.title=element_text(size=7, face="bold"), axis.ticks.length=unit(.7, "mm"), legend.position="bottom") + scale_colour_discrete(name="", breaks=c("Herbarium", "Observation"), labels=c("Herbarium specimens", "Field observations") ) dev.off() ``` Extract linear models for each species ```{r} #attach(Temp.All.e) clim.slopes<- data.frame(levels(Temp.All.e$genus.species), NA) clim.slopes[,1] <- levels(Temp.All.e$genus.species) names(clim.slopes)<-c("species","slope" ) clim.slopes for (i in 1:length(levels(Temp.All.e$genus.species))) { clim.slopes[i,2]<- rlm(Temp.All.e$doy[Temp.All.e$genus.species==levels(Temp.All.e$genus.species)[i]]~Temp.All.e$spring.mean.temp[Temp.All.e$genus.species==levels(Temp.All.e$genus.species)[i]])$coefficients[2] # a<- lm(Date[GenusSpecies==levels(GenusSpecies)[i]]~spring.mean.temp[GenusSpecies==levels(GenusSpecies)[i]]) # print(summary(a)) print(levels(Temp.All.e$genus.species)[i]) print(clim.slopes[i,2]) } clim.slopes<-cbind(clim.slopes,ranef(model.temps.full)) rownames(clim.slopes)<-NULL names(clim.slopes)[3]<-"ran.effect" #detach(Temp.All.e) pdf("Species-acceleration.pdf", width=6, height=6, pointsize=10) par(mar=c(5,6,1,1)) plot(slope~ran.effect, data=clim.slopes, pch=19,xlab="Random effect size", ylab="Acceleration of flowering (days per degree warming)\n(common slope)", xlim=c(-40,60), font.lab=2, font.axis=2, las=1, ylim=c(-9,4)) text(clim.slopes$ran.effect[-8], clim.slopes$slope[-8], clim.slopes[-8,1], font=3, pos=4, cex=.5) text(clim.slopes$ran.effect[8], clim.slopes$slope[8], clim.slopes[8,1], font=3, pos=2, cex=.5) abline(lm(slope~ran.effect, data=clim.slopes), lty=2, lwd=1, col="darkgray") text(65,-1.25, "P = 0.2", col="darkgray", pos=2, font=3) abline(v=0, col="blue") text(0,-9, "Spring bloomers", col="blue", pos=2, cex=.75) text(0, -9, "Summer or fall bloomers", col="blue", pos=4,cex=.75) dev.off() summary(lm(slope~ran.effect, data=clim.slopes)) ``` Graphics &c for Supplementary online material ```{r} #1. Herbarium data density library(maps) MA.specimens <- table(HUHThesis$locality) MA.counties <- map("county", "massachusetts", names=T, plot=FALSE) county.specimens <- data.frame(MA.counties,MA.specimens) county.specimens$color <- paste("gray", trunc((317-county.specimens$Freq)/3.17), sep="") county.specimens$labelcolor <- c(rep("black",8),"white",rep("black",4),"white") pdf("MAcounties.pdf", height=5, width=7) map("county", "massachusetts", fill=T, col=county.specimens$color) map.text("county", "massachusetts", col=county.specimens$labelcolor, add=TRUE) points(-71.34833,42.46056, pch=19, cex=1.5, col="cyan") points(-71.0981064, 42.2431546, pch=17, cex=1.5, col="forestgreen") points(-71.1153304, 42.3781525, pch=15, cex=1.5, col="darkred") dev.off() ``` Five-county data There are some duplicate herbarium records for a given year. So this next set uses the earliest herbarium records only (i.e., delete duplicates w/in site x year) ```{r} #Get the minimum HerbariumObserved.Earliest <- aggregate(HerbariumObserved[,6], by=list(year=HerbariumObserved$year, genus.species=HerbariumObserved$genus.species, locality=HerbariumObserved$locality), min) names(HerbariumObserved.Earliest)[4] <- "doy" HerbariumObserved.Earliest$datum.type <- "Herbarium" FiveCounty.dat.Earliest <- bind_rows(Observation.dat, HerbariumObserved.Earliest) FiveCounty.dat.Earliest$locality <- factor(FiveCounty.dat.Earliest$locality) #new data object Five.All.e merges only earliest flowering data. Then run the plots and models Five.All.e<-merge(Blue.temps.m.wide, FiveCounty.dat.Earliest[,c(4, 6:8,11)], by="year") Five.All.e$spring.mean.temp <- apply(Five.All.e[,4:7], 1, mean, na.rm=TRUE) Five.All.e$datum.type <- factor(Five.All.e$datum.type) ``` Models - again, but with only earliest annual date from herbarium records Key results: 0. Used linear mixed effects model, with temperature or year and observation type (field vs. herbarium) as main effects and species as a random effect 1. In model of spring mean temperature (mean of February to May), date of flowering is significantly affected by temperature and observation type, but interaction term is not significant 2. In model of time (year): date of flowering is significantly affected only by observation type, not by year or by the interaction between year and observation type. 3. Individual species respond differently. 4. Herbarium and observations have species-specific amounts of overlap by year and temperature ```{r} pdf("climate-effect-5sp.pdf", height=7, width=7) ggplot(data=Five.All.e, aes(x=spring.mean.temp, y=doy, colour=datum.type), group=datum.type) + geom_point(size=1.25) + stat_smooth(method="lm", se=FALSE) + stat_smooth(aes(group=1), method="rlm", se=FALSE, colour="black") + facet_wrap(~ genus.species) + xlab("Mean spring temperature (February 1 - May 31)") + ylab("Earliest flowering date") + my_theme_bw() + theme(strip.text=element_text(face="italic", size=8), axis.title=element_text(face="bold", size=9), axis.text=element_text(face="bold", size=6), legend.text=element_text(size=8), legend.title=element_text(size=7, face="bold"), axis.ticks.length=unit(.7, "mm"), legend.position="bottom") + scale_colour_discrete(name="", breaks=c("Herbarium", "Observation"), labels=c("Herbarium specimens", "Field observations") ) dev.off() model.temps.full.e <- lme(doy ~ spring.mean.temp*datum.type, random = ~1|genus.species, data=Five.All.e, na.action=na.omit) summary(model.temps.full.e) anova(model.temps.full.e) plot(ranef(model.temps.full.e)) plot(residuals(model.temps.full.e)) qqnorm(residuals(model.temps.full.e)) model.temps.fixed.e <- lm(doy ~ spring.mean.temp*datum.type, data=Five.All.e, na.action=na.omit) AIC(model.temps.fixed.e) attach(Five.All.e) clim.slopes<- data.frame(levels(genus.species), NA) clim.slopes[,1] <- levels(genus.species) names(clim.slopes)<-c("species","slope" ) clim.slopes for (i in 1:length(levels(genus.species))) { clim.slopes[i,2]<- as.numeric(rlm(doy[genus.species==levels(genus.species)[i]]~spring.mean.temp[genus.species==levels(genus.species)[i]])$coefficients[2]) } clim.slopes<-cbind(clim.slopes,ranef(model.temps.full.e)) rownames(clim.slopes)<-NULL names(clim.slopes)[3]<-"ran.effect" detach(Five.All.e) pdf("Species-acceleration-5sp.pdf", width=6, height=6, pointsize=10) par(mar=c(5,6,1,1)) plot(slope~ran.effect, data=clim.slopes, pch=19,xlab="Random effect size", ylab="Acceleration of flowering (days per degree warming)\n(common slope)", xlim=c(-40,60), font.lab=2, font.axis=2, las=1, ylim=c(-9,4)) text(clim.slopes$ran.effect[-c(8,14,15)], clim.slopes$slope[-c(8,14,15)], clim.slopes[-c(8,14,15),1], font=3, pos=4, cex=.5) text(clim.slopes$ran.effect[c(8,14,15)], clim.slopes$slope[c(8,14,15)], clim.slopes[c(8,14,15),1], font=3, pos=2, cex=.5) abline(lm(slope~ran.effect, data=clim.slopes), lty=1, lwd=1, col="black") text(65,-1.25, "P = 0.03", col="black", pos=2, font=3) abline(v=0, col="blue") text(0,-9, "Spring bloomers", col="blue", pos=2, cex=.75) text(0, -9, "Summer or fall bloomers", col="blue", pos=4,cex=.75) dev.off() summary(lm(slope~ran.effect, data=clim.slopes)) pdf("year-effect-5co.pdf", height=7, width=7) ggplot(data=Five.All.e, aes(x=year, y=doy, colour=datum.type), group=datum.type) + geom_point(size=1.25) + stat_smooth(method="lm", se=FALSE) + stat_smooth(aes(group=1), method="rlm", se=FALSE, colour="black") + facet_wrap(~ genus.species) + xlab("Year") + ylab("Earliest flowering date") + my_theme_bw() + theme(strip.text=element_text(face="italic", size=8), axis.title=element_text(face="bold", size=9), axis.text=element_text(face="bold", size=6), legend.text=element_text(size=8), legend.title=element_text(size=7, face="bold"), axis.ticks.length=unit(.7, "mm"), legend.position="bottom") + scale_colour_discrete(name="", breaks=c("Herbarium", "Observation"), labels=c("Herbarium specimens", "Field observations") ) dev.off() model.year.full.5e <- lme(doy ~ year*datum.type, random = ~1|genus.species, data=Five.All.e, na.action=na.omit) summary(model.year.full.5e) anova(model.year.full.5e) plot(ranef(model.year.full.5e)) plot(residuals(model.year.full.5e)) qqnorm(residuals(model.year.full.5e)) ``` Look at long-term variance in temps ```{r} springtemps <- aggregate(Temp.All[,11], by=list(year=Temp.All$year), mean) springtemps <- ts(springtemps$x, start=1852, frequency=1) plot(springtemps) plot(Annual.mean.temps) ```