This RMarkdown demonstrates the data cleaning, transformations, and multivariate analysis used to understand the relationship between the composition of the modern pollen data and the vegetation communities throughout eh Balearic Islands. When running this Rmarkdown or the R script in the “Multivariate_Analysis_Servera_Vives_et_al_2022” RStudio project, all path names are relative to the containing folder. Also note that the term “species” is used in this script to denote variables and not necessarily taxa represented in the pollen data. Finally, this analysis excludes rare pollen taxa following methods in López-Sáez et al. 2020, which includes pollen taxa with percentages ≥ 3% in at least two samples.
# clear workspace
rm(list = ls())
# set seed
set.seed(123)
# load libraries
library(devtools)
library(vegan)
library(RColorBrewer)
library(raster)
library(dplyr)
library(stats)
library(dendextend)
#devtools::install_github("ropenscilabs/ochRe") # install via devtools::install_github if using for the first time
library(ochRe)
library(ggplot2)
library(ggrepel)
library(indicspecies)
#devtools::install_github("gavinsimpson/ggvegan") # install via devtools::install_github if using for the first time
library(ggvegan)
library(tibble)
mod.pol = read.csv("./data/modern_pollen_data_percent_reduced_1per_2samp.csv") # with the latest recombination of the pollen data
# note: these data are represented as percentages and have been subset to remove pollen clumps and
# individual genera that are accounted for in summed genera (ex. Quercus represents the summed observations of Quercus ilex-t, Quercus robur/pubescens-t, and Quercus sp).
mod.pol$Veg_Types = as.factor(mod.pol$Veg_Types)
mod.pol.data = mod.pol[,-c(1:5)] # remove site name, code, and veg type. These will be added again later to examine similarities between veg types.
mod.pol.data[is.na(mod.pol.data)] = 0 # replace all NAs with 0
mod.pol.sites = mod.pol[,c(1:5)] # preserve the veg type, site name, and code
mod.pol.sites$Veg_Types = droplevels(mod.pol.sites$Veg_Types) # drop levels for future labeling
# fix column names
columns = colnames(mod.pol.data)
columns.fixed = gsub(".t", '-t', columns, fixed = T)
columns.fixed.2 = gsub(".", ' ', columns.fixed, fixed = T)
columns.fixed.3 = gsub("major media", 'major/media', columns.fixed.2, fixed = T)
colnames(mod.pol.data) = columns.fixed.3
mod.pol.sqrt = sqrt(mod.pol.data)
dca.sqrt = decorana(mod.pol.sqrt ) # Lepš & Šmilauer (2003) suggest performing all transformations prior to DCA.
dca.sqrt
##
## Call:
## decorana(veg = mod.pol.sqrt)
##
## Detrended correspondence analysis with 26 segments.
## Rescaling of axes with 4 iterations.
##
## DCA1 DCA2 DCA3 DCA4
## Eigenvalues 0.1676 0.1368 0.1102 0.07680
## Decorana values 0.2082 0.1363 0.1010 0.06527
## Axis lengths 2.1056 1.8387 1.5664 1.53204
# axis length for DCA1 < 3 (DCA1 = 2.0147), so linear ordination is preferred
dca= decorana(mod.pol.data) # Lepš & Šmilauer (2003) suggest performing all transformations prior to DCA.
dca
##
## Call:
## decorana(veg = mod.pol.data)
##
## Detrended correspondence analysis with 26 segments.
## Rescaling of axes with 4 iterations.
##
## DCA1 DCA2 DCA3 DCA4
## Eigenvalues 0.4162 0.2730 0.2317 0.1836
## Decorana values 0.4954 0.2796 0.2170 0.1320
## Axis lengths 3.7809 2.3148 2.5993 1.8523
# axis length for DCA1 is between 3-4 (DCA1 = 3.7889), so linear ordination is ok
# create cluster colors
colors = ochre_pal(palette = "parliament")(8) # define colors for plots
colors = c("red", colors)
rownames(mod.pol.sqrt) = mod.pol.sites$Site_Code_Veg# add now names to help interpret clusters
mod.pol.sqrt.dist <- vegdist(mod.pol.sqrt, method = "euclidean") # euclidean distance following Lopes- saez
mod.pol.sqrt.clust <- hclust(mod.pol.sqrt.dist, method = "ward.D2")
plot(mod.pol.sqrt.clust$height[length(mod.pol.sqrt.clust$height):(length(mod.pol.sqrt.clust$height)-10)], type = "b", xlab = "Clusters", ylab= "Height") # use elbow method to determine how many clusters
abline(v = 3, lty = "dashed")
mod.pol.dend <- as.dendrogram(mod.pol.sqrt.clust)
plot(mod.pol.dend, ylim = c(0,26), main = "Hierarchical clustering of Veg Types using Ward's Method", ylab = "Height")
mtext("With Sqrt Transformation", side=3)
rect.dendrogram(mod.pol.dend, k = 3)
mod.pol$Cluster_SqrtTrans =cutree(mod.pol.sqrt.clust, 3)
# 4.1 create initial unconstrained ordination (pca), but using a hellinger / Sqrt transformation to accommodate for sparseness and zeros in species data for each sample site.
pca.sqrt = rda(mod.pol.sqrt)
pca.sqrt # shows eigenvalues and inertia (variance)
## Call: rda(X = mod.pol.sqrt)
##
## Inertia Rank
## Total 50.95
## Unconstrained 50.95 46
## Inertia is variance
##
## Eigenvalues for unconstrained axes:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## 8.255 7.738 5.079 3.331 2.764 2.685 2.427 2.179
## (Showing 8 of 46 unconstrained eigenvalues)
summary(eigenvals(pca.sqrt)) # the proportion of variance explained in the first 2 axes is ~ 34%
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Eigenvalue 8.255 7.7376 5.0795 3.33064 2.76432 2.68529 2.42722
## Proportion Explained 0.162 0.1519 0.0997 0.06537 0.05426 0.05271 0.04764
## Cumulative Proportion 0.162 0.3139 0.4136 0.47896 0.53322 0.58592 0.63356
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Eigenvalue 2.17897 1.98829 1.70277 1.62354 1.42608 1.24548 1.06355
## Proportion Explained 0.04277 0.03903 0.03342 0.03187 0.02799 0.02445 0.02087
## Cumulative Proportion 0.67633 0.71536 0.74878 0.78064 0.80863 0.83308 0.85395
## PC15 PC16 PC17 PC18 PC19 PC20 PC21
## Eigenvalue 0.94878 0.75161 0.73151 0.64240 0.59834 0.491280 0.416922
## Proportion Explained 0.01862 0.01475 0.01436 0.01261 0.01174 0.009643 0.008183
## Cumulative Proportion 0.87258 0.88733 0.90169 0.91430 0.92604 0.935683 0.943866
## PC22 PC23 PC24 PC25 PC26 PC27
## Eigenvalue 0.370789 0.320328 0.28937 0.269376 0.223700 0.1987
## Proportion Explained 0.007278 0.006287 0.00568 0.005287 0.004391 0.0039
## Cumulative Proportion 0.951144 0.957431 0.96311 0.968398 0.972789 0.9767
## PC28 PC29 PC30 PC31 PC32 PC33
## Eigenvalue 0.190797 0.172450 0.146994 0.133121 0.099624 0.081244
## Proportion Explained 0.003745 0.003385 0.002885 0.002613 0.001955 0.001595
## Cumulative Proportion 0.980433 0.983818 0.986703 0.989316 0.991271 0.992866
## PC34 PC35 PC36 PC37 PC38 PC39
## Eigenvalue 0.071709 0.066709 0.055241 0.040505 0.0307017 0.0275078
## Proportion Explained 0.001407 0.001309 0.001084 0.000795 0.0006026 0.0005399
## Cumulative Proportion 0.994274 0.995583 0.996667 0.997462 0.9980648 0.9986047
## PC40 PC41 PC42 PC43 PC44
## Eigenvalue 0.019257 0.0166524 0.0133417 0.0111196 0.0055904
## Proportion Explained 0.000378 0.0003268 0.0002619 0.0002183 0.0001097
## Cumulative Proportion 0.998983 0.9993095 0.9995714 0.9997896 0.9998994
## PC45 PC46
## Eigenvalue 3.563e-03 1.565e-03
## Proportion Explained 6.993e-05 3.071e-05
## Cumulative Proportion 1.000e+00 1.000e+00
screeplot(pca.sqrt, bstick = T, type = "l", main = "Scree plot of variance explained: Sqrt") # start with axes 1 and 2, but still not a lot of variance explained here
# Sqrt
species.scores.sqrt = scores(pca.sqrt, display = "species") # extract species scores
species.scores.sqrt = cbind(species.scores.sqrt, rep(NA, length(species.scores.sqrt[,1]))) # create empty column to populate species importance
colnames(species.scores.sqrt)[3] = "importance"
for (i in 1:length(species.scores.sqrt[,1])){ # find the length of each species "arrow" using euclidean distance from plot origin to arrow tip
arrow.dist = pointDistance(c(0, 0), species.scores.sqrt[i,c(1:2)], lonlat=FALSE)
species.scores.sqrt[i,3] = arrow.dist
}
species.scores.sqrt.df = as.data.frame(species.scores.sqrt) # convert to dataframe for sorting and later operations
species.scores.sqrt.df.ordered = species.scores.sqrt.df[order(-species.scores.sqrt.df$importance),] # reorder by importance
species.scores.sqrt.df.ordered
## PC1 PC2 importance
## Quercus -1.099237544 1.715697585 2.037631317
## Pinus -1.499344558 -0.935737610 1.767381956
## Juniperus-t -0.377992705 -1.202758726 1.260756534
## Pistacia -0.885797910 -0.813719682 1.202820709
## Chenopodiaceae 0.974106855 -0.309059605 1.021959884
## Olea -0.836984017 0.035275224 0.837727036
## Cichorieae 0.665507988 -0.401026359 0.776996153
## Poaceae undiff 0.453543390 -0.467717630 0.651507013
## Erica -0.325550602 -0.493674541 0.591352473
## Plantago coronopus-t 0.496760202 -0.104940499 0.507723554
## Scrophulariaceae undiff 0.390160603 -0.003438341 0.390175753
## Buxus 0.088420861 0.361265741 0.371929004
## Cistaceae undiff -0.276818042 -0.229506010 0.359584812
## Plantago sp 0.264124529 -0.154538249 0.306012805
## Senecio-t 0.004624182 -0.276738045 0.276776677
## Brassicaceae undiff 0.226598213 -0.141406059 0.267100026
## Hypericum -0.039476101 0.262758213 0.265707059
## Plantago lanceolata-t -0.045958909 -0.235767707 0.240205397
## Asteraceae 0.231936427 -0.016333658 0.232510848
## Apiaceae undiff 0.090482639 -0.205891241 0.224896223
## Hordeum-t 0.176100310 -0.136199979 0.222624692
## Parietaria 0.201556593 -0.048318253 0.207267252
## Cyperaceae undiff 0.199718630 -0.013154136 0.200151348
## Asteroideae 0.181521262 -0.040223091 0.185924355
## Cerealia-t 0.121956764 -0.139015627 0.184929167
## Liliaceae undiff -0.124548930 -0.126179818 0.177295748
## Polygonum 0.156374254 -0.078753975 0.175085967
## Globularia cf alypum -0.092561197 -0.140617755 0.168347641
## Caryophyllaceae undiff -0.056162805 -0.150612155 0.160742906
## Betula -0.060125993 -0.143881389 0.155939056
## Ceratonia siliqua -0.120410580 -0.097463346 0.154912271
## Teucrium -0.058488489 -0.142455537 0.153995075
## Fabaceae undiff -0.124273874 -0.079410427 0.147478851
## Phyllirea -0.102131594 -0.103997523 0.145761268
## Dipsacaceae undiff 0.132145603 -0.050054140 0.141307740
## Euphorbia 0.116464391 -0.072324781 0.137094231
## Genista-t -0.126490621 -0.044373255 0.134047988
## Plantago albicans 0.123741063 -0.022313608 0.125736820
## Myrtus 0.101569128 -0.053239575 0.114676676
## Rosaceae -0.112570905 -0.014933029 0.113557052
## Lamiaceae undiff -0.035762582 -0.106549617 0.112391206
## Corylus -0.106474306 0.028020164 0.110099535
## Rubiaceae undiff 0.081007212 0.048113046 0.094218011
## Artemisia -0.063269287 -0.067608459 0.092595391
## Matricaria-t 0.088536697 -0.015841176 0.089942702
## Ranunculaceae undiff 0.053667784 0.054334560 0.076370645
## Alnus 0.022858344 -0.066406495 0.070230524
## Medicago -0.044032008 -0.044400510 0.062531776
## Sorbus group -0.006098233 0.057077896 0.057402741
## Centaurea 0.056179578 0.011092455 0.057264191
## Sedum-t -0.031209913 -0.043610530 0.053627763
## Saxifraga 0.018271515 0.029170084 0.034420082
## Ephedra fragilis-t 0.024047655 -0.005824923 0.024743069
## Asphodelus group 0.008528020 -0.003799956 0.009336315
# create a scree plot of species importance
plot((species.scores.sqrt.df.ordered$importance), main = "Species Importance", xlab ="Number of Species", ylab = "Importance")
mtext("Blue line denotes the top 25% most influential species: Sqrt", side=3, col = "blue")
lines(species.scores.sqrt.df.ordered$importance)
abline(h = quantile(species.scores.sqrt.df$importance, prob = .75), col = "blue")
# evaluate the top 25% most influential species
for (i in 1:length(species.scores.sqrt[,1])){
if(species.scores.sqrt.df$importance[i] >= quantile(species.scores.sqrt.df$importance, prob = .75) ) { # adjust quantile here
species.scores.sqrt.df$include[i] = 1
} else {species.scores.sqrt.df$include[i] = 2}}
scores(pca.sqrt, choices = 1:3, display = "species", scaling = "species")
## PC1 PC2 PC3
## Quercus -1.099237544 1.715697585 0.613364749
## Olea -0.836984017 0.035275224 -1.148387830
## Corylus -0.106474306 0.028020164 0.046220848
## Buxus 0.088420861 0.361265741 0.377342660
## Pinus -1.499344558 -0.935737610 0.198106393
## Betula -0.060125993 -0.143881389 0.101310991
## Alnus 0.022858344 -0.066406495 0.073460209
## Juniperus-t -0.377992705 -1.202758726 1.269099246
## Ceratonia siliqua -0.120410580 -0.097463346 0.078601961
## Hypericum -0.039476101 0.262758213 0.241736151
## Myrtus 0.101569128 -0.053239575 -0.045047588
## Sorbus group -0.006098233 0.057077896 0.049850324
## Ephedra fragilis-t 0.024047655 -0.005824923 0.039366562
## Genista-t -0.126490621 -0.044373255 -0.076587247
## Phyllirea -0.102131594 -0.103997523 -0.048421323
## Pistacia -0.885797910 -0.813719682 -0.589164678
## Erica -0.325550602 -0.493674541 0.319466614
## Cistaceae undiff -0.276818042 -0.229506010 -0.079483611
## Poaceae undiff 0.453543390 -0.467717630 -0.349387786
## Hordeum-t 0.176100310 -0.136199979 -0.250744981
## Cerealia-t 0.121956764 -0.139015627 -0.284527679
## Plantago lanceolata-t -0.045958909 -0.235767707 -0.001274774
## Plantago albicans 0.123741063 -0.022313608 -0.075759979
## Plantago sp 0.264124529 -0.154538249 0.083060010
## Asphodelus group 0.008528020 -0.003799956 -0.062725996
## Polygonum 0.156374254 -0.078753975 0.060075449
## Brassicaceae undiff 0.226598213 -0.141406059 -0.044592156
## Caryophyllaceae undiff -0.056162805 -0.150612155 -0.057970608
## Euphorbia 0.116464391 -0.072324781 -0.149919843
## Asteroideae 0.181521262 -0.040223091 -0.279702724
## Cichorieae 0.665507988 -0.401026359 -0.157539499
## Centaurea 0.056179578 0.011092455 0.028688181
## Matricaria-t 0.088536697 -0.015841176 -0.153459371
## Artemisia -0.063269287 -0.067608459 0.102798480
## Senecio-t 0.004624182 -0.276738045 0.160397563
## Asteraceae 0.231936427 -0.016333658 -0.243022353
## Apiaceae undiff 0.090482639 -0.205891241 -0.001458966
## Teucrium -0.058488489 -0.142455537 0.017896050
## Lamiaceae undiff -0.035762582 -0.106549617 0.109716744
## Dipsacaceae undiff 0.132145603 -0.050054140 0.081032268
## Medicago -0.044032008 -0.044400510 -0.050739702
## Fabaceae undiff -0.124273874 -0.079410427 -0.083401223
## Ranunculaceae undiff 0.053667784 0.054334560 0.016567225
## Scrophulariaceae undiff 0.390160603 -0.003438341 0.219499037
## Liliaceae undiff -0.124548930 -0.126179818 -0.048059835
## Parietaria 0.201556593 -0.048318253 -0.036961239
## Rosaceae -0.112570905 -0.014933029 0.068766775
## Sedum-t -0.031209913 -0.043610530 0.021085343
## Saxifraga 0.018271515 0.029170084 0.015534308
## Globularia cf alypum -0.092561197 -0.140617755 -0.066704416
## Rubiaceae undiff 0.081007212 0.048113046 -0.011083122
## Plantago coronopus-t 0.496760202 -0.104940499 0.307914656
## Chenopodiaceae 0.974106855 -0.309059605 0.254517985
## Cyperaceae undiff 0.199718630 -0.013154136 0.013687897
## attr(,"const")
## [1] 6.957806
# 4.6.2 Calculate cluster centroids for plotting PCs 1 and 2
pca.scores.sqrt = as.data.frame(scores(pca.sqrt, choices = c(1,2), display = c("sp", "sites"),
scaling = "species")[2])
pca.scores.sqrt$Cluster_SqrtTrans = mod.pol$Cluster_SqrtTrans
pca.scores.centroids.sqrt = pca.scores.sqrt %>% dplyr::group_by(Cluster_SqrtTrans) %>%
dplyr::summarise(
PC1.mean = mean(sites.PC1),
PC2.mean = mean(sites.PC2))
# Plot PCs 1 and 2
colors.cluster = brewer.pal(4, "Dark2") # define colors for plots
colors.species = c("red", rgb(0, 0, 0, alpha = 0))
shapes = seq(1:9)
biplot(pca.sqrt, scaling = "species", type = c("point", "n"), main = "PCA of Modern Pollen Data and Vegetation Type: Sqrt Trans")
mtext("Only the top 25% most influential species (by score) are labeled - PCs 1 and 2", side=3)
with(mod.pol, points(pca.sqrt, display = "sites", col = colors.cluster[Cluster_SqrtTrans],
scaling = "species", pch = shapes[Veg_Types], cex =1.1))
with(pca.scores.centroids.sqrt, points(PC1.mean, PC2.mean,col = colors.cluster[Cluster_SqrtTrans], pch = 16, cex =1.6))
with(species.scores.sqrt.df, text(pca.sqrt, display = "species", col = colors.species[include],
scaling = "species", cex = .8, offset = .2))
with(mod.pol, legend("bottomright", legend = levels(as.factor(Cluster_SqrtTrans)), bty = "n",
col = colors.cluster, pch = 16, cex = .9, title="Clusters: Sqrt Trans", title.adj =1))
#Sites only
biplot(pca.sqrt, scaling = "species", type = c("n", "n"), main = "PCA of Modern Pollen Data and Vegetation Type: Sqrt Trans")
mtext("Sites only - PCs 1 and 2", side=3)
with(mod.pol, points(pca.sqrt, display = "sites", col = colors.cluster[Cluster_SqrtTrans],
scaling = "species", pch = shapes[Veg_Types], cex =1.1))
with(mod.pol, text(pca.sqrt, display = "sites", col = colors.cluster[Cluster_SqrtTrans],
scaling = "species", pch = shapes[Veg_Types], cex =.7, pos = 3, offset = .5))
with(pca.scores.centroids.sqrt, points(PC1.mean, PC2.mean,col = colors.cluster[Cluster_SqrtTrans], pch = 16, cex =1.6))
with(mod.pol, legend("bottomright", legend = levels(as.factor(Cluster_SqrtTrans)), bty = "n",
col = colors.cluster, pch = 16, cex = .9, title="Clusters: Sqrt Trans", title.adj =1))
# A = B0 / (P0 + P1 + B0)
# B0 = number of stands in which the pollen type is present in the surface sample and the associated plant taxon is present in vegetation sample of the stand
# P0 = number of stands in which the pollen type is present in the surface sample but the associated plant taxon is absent in the local vegetation
# P1 = number of stands in which the plant taxon is present in the vegetation but the pollen type is absent in the surface sample
# For percentage data, the variance of the sample is a function of the mean. Consequently, percentage data do not meet the assumption of normality for correlation analysis. The arcsine transformation:
# Xa = arcsine * sqrt(X)
# where X is the untransformed percent and Xa is the transformed value is a standard procedure (Sokal and Rohlf, 1969) used to treat percentage data so they meet the assumptions for statistical tests. For these data the trans- formed and untransformed percentages give similar results for correlation analysis: the same types show significant relationships between pollen percentages and vegetation coverage.
# to make this work, all column names in the pollen dataset have to be the same as the species names identified in the field.
# see López-Sáez et al.2020 for pollen indices.
species.reveles = read.csv("./data/vegetation_relevés_davis_fid_disp.csv")
species.present = as.data.frame(unique(species.reveles$Equiv_Pollen_Types))
head(species.present)
## unique(species.reveles$Equiv_Pollen_Types)
## 1 Cichorieae
## 2 Poaceae undiff
## 3 Ailanthus
## 4 Lamiaceae undiff
## 5 Liliaceae undiff
## 6 Chenopodiaceae
# summarized by species then convert to 1/0 presence absence
species.reveles.summ = species.reveles %>%
dplyr::group_by(Equiv_Pollen_Types) %>%
dplyr::summarise_all(list(mean))
species.reveles.summ$height_log_value = round(species.reveles.summ$height_log_code)
species.reveles.summ$pollination_value = round(species.reveles.summ$pollination_code)
# replace values
species.reveles.summ$height_log_value[species.reveles.summ$height_log_value==1]<-"7-20 cm"
species.reveles.summ$height_log_value[species.reveles.summ$height_log_value==2]<-"25-60 cm"
species.reveles.summ$height_log_value[species.reveles.summ$height_log_value==3]<-"70-200 cm"
species.reveles.summ$height_log_value[species.reveles.summ$height_log_value==4]<- "300-3,000 cm"
species.reveles.summ$pollination_value[species.reveles.summ$pollination_value==0]<-"Entomophilic, anthers not exposed"
species.reveles.summ$pollination_value[species.reveles.summ$pollination_value==1]<-"Entomophilic, anthers exposed"
species.reveles.summ$pollination_value[species.reveles.summ$pollination_value==2]<-"Anemophilic"
species.reveles.info = species.reveles.summ[,1:6]
species.reveles.info = species.reveles.info[,-2]
colnames(species.reveles.info)[1] = "species"
species.reveles.data = species.reveles.summ[,-c(2:6)]
species.reveles.p.a = species.reveles.data %>% mutate_if(is.numeric, ~1 * (. > 0))
#load (all) pollen data with taxa names that match the reveles
pollen.taxa.indices = read.csv("./data/modern_pollen_data_percent_davis_fid_disp_index_values.csv")
# convert to Presence/absence
pollen.taxa.p.a = pollen.taxa.indices %>% mutate_if(is.numeric, ~1 * (. > 0))
#replace NA with 0
pollen.taxa.p.a[is.na(pollen.taxa.p.a )] = 0
# limit analysis to only the reveles we have pollen data for
reveles.analyzed = colnames(pollen.taxa.p.a) %>% .[-c(1)]
reveles.analyzed = c("Equiv_Pollen_Types", reveles.analyzed)
species.reveles.p.a = species.reveles.p.a[,c(reveles.analyzed)]
davis.indices.df = data.frame('species' = character(), 'A' =numeric(), 'U' = numeric(), 'O' = numeric(), 'Fid' = numeric(), 'Dis' = numeric())
for (i in 1:length(pollen.taxa.p.a$Pollen_taxa)){
species = pollen.taxa.p.a[i,1]
plants.reveles.present = species.reveles.p.a[species.reveles.p.a$Equiv_Pollen_Types == species,] %>% .[, colSums(. != 0) > 0]
plants.reveles.absent = species.reveles.p.a[species.reveles.p.a$Equiv_Pollen_Types == species,] %>% .[, colSums(. != 0) == 0]
plants.present = colnames(plants.reveles.present) %>% .[-1]
plants.absent = colnames(plants.reveles.absent)
pollen.reveles.present = pollen.taxa.p.a[pollen.taxa.p.a$Pollen_taxa == species,] %>% .[, colSums(. != 0) > 0]
pollen.present = colnames(pollen.reveles.present) %>% .[-1]
pollen.reveles.absent = pollen.taxa.p.a[pollen.taxa.p.a$Pollen_taxa == species,] %>% .[, colSums(. != 0) == 0]
pollen.absent = colnames(pollen.reveles.absent)
# where b0 is the number of samples in which both the pollen type and its parent plant are present,
# p0 is the number of samples in which the pollen type is present and the parent plant absent, and
# p1 is the number of samples in which the parent plant is present and the pollen type absent.
`%notin%` <- Negate(`%in%`)
B0 = pollen.present %in% plants.present %>% sum(.)
P0 = pollen.present %in% plants.absent %>% sum(.)
P1 = plants.present %in% pollen.absent %>% sum(.)
B0
P0
P1
A = B0 / (P0 + P1 + B0)
U = P1 / (P1 + B0)
O = P0 / (P0 + B0)
Fid = (B0 / length(plants.present)) * 100
Dis = (P0 / length(plants.absent)) * 100
A
U
O
Fid
Dis
index_input = c(A,U,O,Fid,Dis)
davis.indices.df[i,2:6] = index_input
davis.indices.df[i,1] = species
}
# 5.5 clean up results and subset to top 25% percent species (as calculated in PCA)
davis.indices.all = davis.indices.df
davis.indices.all[davis.indices.all=='NaN']<-0
# Join reveles info to species
species.reveles.info = as.data.frame(species.reveles.info)
davis.indices.all = merge(x = davis.indices.all, y = species.reveles.info, by = "species" )
# Cluster Davis and Dis/Fid indices
rownames(davis.indices.all) = davis.indices.all$species# add now names to help interpret clusters
davis.indices.all.dist <- vegdist(scale(davis.indices.all[,2:6]), method = "euclidean") # euclidean distance following Lopes- saez
davis.indices.all.clust <- hclust(davis.indices.all.dist, method = "ward.D2")
plot(davis.indices.all.clust$height[length(davis.indices.all.clust$height):(length(davis.indices.all.clust$height)-10)], type = "b", xlab = "Clusters", ylab= "Height") # use elbow method to determine how many clusters
abline(v = 5, lty = "dashed")
davis.indices.dend <- as.dendrogram(davis.indices.all.clust)
plot(davis.indices.dend, main = "Hierarchical clustering of Davis Indices and Dis/Fid using Ward's Method\n scaled", ylab = "Height")
rect.dendrogram(davis.indices.dend, k = 5)
davis.indices.all$Cluster =cutree(davis.indices.all.clust, 5)
# subset to top 25% species
top.25.species = rownames(species.scores.sqrt.df[species.scores.sqrt.df$include ==1,])
davis.indices.df.top.25 = davis.indices.all[(davis.indices.all$species %in% top.25.species), ]
davis.indices.df.top.25$include = 1
# create PCA for plot
davis.indices.rda = davis.indices.all[,2:4]
rownames(davis.indices.rda) = davis.indices.all$species
pca.davis = rda(davis.indices.rda)
# create color palette
colors.cluster.davis = brewer.pal(5, "Dark2") # define colors for plots
colors.species = c(colors.cluster.davis, rgb(0, 0, 0, alpha = 0))
## ggplot version
PCAscores <- scores(pca.davis, display = "sites") %>% as.data.frame() %>% rownames_to_column("species") %>% full_join(davis.indices.all, by = "species")
PCAvect <- scores(pca.davis, display = "species") %>% as.data.frame()
PCAvect.A = PCAvect[1,]
PCAvect.U = PCAvect[2,]
PCAvect.O = PCAvect[3,]
# plot PCA of -color
ggplot() +
geom_point(data = PCAscores, aes(x = PC1, y = PC2, color = as.factor(Cluster)), shape = 16, size = 2) +
scale_color_manual(values =c("#1B9E77" ,"#D95F02" ,"#7570B3" ,"#E7298A", "#66A61E"), breaks = c(1,2,3,4,5)) +
scale_shape(solid = FALSE) +
geom_text_repel(data = PCAscores,aes(x = PC1, y = PC2,label = species, segment.alpha = 0.5, segment.size = 0.3), max.overlaps = 30,box.padding = unit(0.45, "lines"), size = 2.8) +
geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
geom_segment(data = PCAvect, aes(x = 0, y = 0, xend = PC1, yend = PC2), alpha = 0.5, arrow = arrow(length = unit(0.2, "cm"))) +
geom_text(data = PCAvect.A, aes(x = PC1, y = PC2, label = rownames(PCAvect.A)), nudge_x = -.055, nudge_y = 0.04, size =5) +
geom_text(data = PCAvect.O, aes(x = PC1, y = PC2, label = rownames(PCAvect.O)), nudge_x = .07, nudge_y = -0.003, size =5) +
geom_text(data = PCAvect.U, aes(x = PC1, y = PC2, label = rownames(PCAvect.U)), nudge_x = -.02, nudge_y = -0.04, size =5) +
theme_bw() + theme(axis.title = element_text(size=13), axis.text = element_text(size = 11), legend.position = c(.9,.115),legend.background = element_rect(fill="white",size=0.25, linetype="solid", colour ="black")) +
labs(x = "\nPC1 (62.1%)",
y = "\nPC2 (35.0%)", color = "Cluster groups")
davis.indices.all$cluster_include = match(davis.indices.all$species, davis.indices.df.top.25$species)
for (i in 1:length(davis.indices.all$species)){
if (!is.na(davis.indices.all$cluster_include[i])){davis.indices.all$cluster_include[i] = davis.indices.all$Cluster[i]}}
davis.indices.all[is.na(davis.indices.all)] <- 6
# plot Fidelity / Dispersibility scatterplots
# reorder factors
davis.indices.all$height_log_value<- factor(davis.indices.all$height_log_value, levels=c("7-20 cm", "25-60 cm", "70-200 cm", "300-3,000 cm"))
ggplot(data = davis.indices.all, aes(x =Dis , y = Fid)) +
geom_point(aes(col = pollination_value, size = height_log_value,shape = Cluster)) + scale_shape_identity(guide = "legend") +
geom_text_repel(aes(label = species, segment.alpha = 0.5, segment.size = 0.45), max.overlaps = 50,box.padding = unit(0.6, "lines"),point.padding = unit(0.6, "lines"), size = 3.0) +
scale_fill_brewer(palette="Dark2")+
labs(title = "", subtitle = "", y = "Fidelity (%)\n", x = "\nDispersibility (%)",
col = "Pollination syndrome", size = "Taxa height") +
theme_bw() + theme(axis.title = element_text(size = 13), axis.text = element_text(size = 11),
legend.background = element_rect(fill="white",size=0.25, linetype="solid", colour ="black"))
# load and select environmental data
env.data.raw = read.csv("./data/environmental_variables.csv")
row.names(env.data.raw) = env.data.raw$Code
env.data = env.data.raw[reveles.analyzed[-1],]
env.data = env.data[,4:(length(env.data))]
env.data = as.data.frame(scale(env.data))
#conduct initial, holding nothing back, RDA
rda.initial = rda(mod.pol.sqrt ~., data = env.data)
# Analyze variance inflation factor to eliminate correlated variables
vif.cca(rda.initial)
## masl aspect
## 65.523152 3.668796
## herbivory_pressure agropastoral_use_binary
## 6.477274 4.193323
## trampling land_open
## 3.328012 7.685014
## Soil_type Sea_distance_meters
## 2.869669 8.517636
## thermality_index_2008_2018 rainfall_10_yrs
## 432.239973 12.880123
## temp_10_yrs Percent_Trees
## 512.338090 9.635628
## Percent_Shrubs Percent_herbs
## 16.181554 7.905909
## Percent_bryophytes Percent_rocks
## 2.365431 6.438860
## Percent_stones_gravels Slope_percent
## 4.096674 118.497414
## Slope_degree salty_soil
## 123.220370 6.238257
## flooded_soil direct_human_pressure
## 4.586079 9.010840
## direct_human_pressure_bi Nitrophily_index_relevés
## 20.396454 6.416165
## n_total_taxa n_total_crops
## 7.209848 21.241850
## N_totals_nitrophilous_taxa EFFIS_Fire_10km_10yrs
## 7.372386 10.203034
## Veg_type Crop_index
## 4.015775 10.044180
## Dist_ag_km gHM
## 3.769041 6.596807
#Conduct initial cut by removing highly correlated variables (those with a variance inflation factor of approximately >10)
env.data.cut = subset(env.data, select=-c(Slope_degree,thermality_index_2008_2018,Percent_Trees,
Percent_Shrubs, Percent_herbs, Percent_bryophytes,
Percent_rocks, Percent_stones_gravels,N_totals_nitrophilous_taxa,
n_total_taxa, n_total_crops, Veg_type, direct_human_pressure))
# examine the result of cutting variables
rda.cut = rda(mod.pol.sqrt ~., data = env.data.cut)
vif.cca(rda.cut) # proceed with these variables
## masl aspect herbivory_pressure
## 14.820281 1.205186 2.837920
## agropastoral_use_binary trampling land_open
## 2.967160 1.656827 2.387789
## Soil_type Sea_distance_meters rainfall_10_yrs
## 1.373523 2.956466 6.340926
## temp_10_yrs Slope_percent salty_soil
## 11.982328 3.284784 3.205547
## flooded_soil direct_human_pressure_bi Nitrophily_index_relevés
## 2.036656 3.490859 2.390804
## EFFIS_Fire_10km_10yrs Crop_index Dist_ag_km
## 4.423643 2.220410 1.647820
## gHM
## 2.522092
# Run second RDA
rda.2 = rda(mod.pol.sqrt ~., data = env.data.cut)
stp = rda(mod.pol.sqrt ~ 1, data = env.data.cut)
# Plots No Selection
# clusters
rda.no.selection.scores.sqrt = as.data.frame(scores(rda.2, choices = c(1,2), display = c("sp", "sites"),
scaling = "species")[2])
rda.no.selection.scores.sqrt$Cluster_SqrtTrans = mod.pol$Cluster_SqrtTrans
rda.no.selection.centroids.sqrt = rda.no.selection.scores.sqrt %>% dplyr::group_by(Cluster_SqrtTrans) %>%
dplyr::summarise(
RDA1.mean = mean(sites.RDA1),
RDA2.mean = mean(sites.RDA2))
# Sites
plot(rda.2, type="n", main = "RDA", ylim =c(-3.5,2.5), xlim = c(-3,3))
mtext("No Selection", side=3)
text(rda.2, dis="bp")
with(mod.pol, points(rda.2, display = "sites", col = colors.cluster[Cluster_SqrtTrans],
scaling = "species", pch = shapes[Veg_Types], cex =1.1))
with(mod.pol, text(rda.2, display = "sites", col = colors.cluster[Cluster_SqrtTrans],
scaling = "species", pch = shapes[Veg_Types], cex =.7, pos = 3, offset = .5))
with(rda.no.selection.centroids.sqrt, points(RDA1.mean, RDA2.mean,col = colors.cluster[Cluster_SqrtTrans], pch = 16, cex =1.6))
with(mod.pol, legend("bottomright", legend = levels(as.factor(Cluster_SqrtTrans)), bty = "n",
col = colors.cluster, pch = 16, cex = .7, title="Clusters: Sqrt Trans", title.adj =1))
with(mod.pol, legend("bottomleft", legend = levels(Veg_Types), bty = "n", pch = shapes, cex = .7, title="Veg Types", title.adj = 0.01))
# Species
plot(rda.2, display = c("sp", "cn"), main = "RDA", ylim =c(-1.5,1.5), xlim = c(-1.5, 1.5))
mtext("No Selection", side=3)
text(rda.2, dis="bp")
plot(rda.2, display = c("sp", "cn"), main = "RDA", ylim =c(-.5,.5), xlim = c(-.5, .5))
mtext("No Selection - zoomed", side=3)
text(rda.2, dis="bp")
# Anova
anova(rda.2, by = "margin")
## Permutation test for rda under reduced model
## Marginal effects of terms
## Permutation: free
## Number of permutations: 999
##
## Model: rda(formula = mod.pol.sqrt ~ masl + aspect + herbivory_pressure + agropastoral_use_binary + trampling + land_open + Soil_type + Sea_distance_meters + rainfall_10_yrs + temp_10_yrs + Slope_percent + salty_soil + flooded_soil + direct_human_pressure_bi + Nitrophily_index_relevés + EFFIS_Fire_10km_10yrs + Crop_index + Dist_ag_km + gHM, data = env.data.cut)
## Df Variance F Pr(>F)
## masl 1 0.9231 1.1499 0.294
## aspect 1 0.5666 0.7057 0.822
## herbivory_pressure 1 1.4647 1.8246 0.037 *
## agropastoral_use_binary 1 0.8712 1.0852 0.361
## trampling 1 1.3858 1.7263 0.040 *
## land_open 1 1.4243 1.7742 0.040 *
## Soil_type 1 1.5646 1.9490 0.012 *
## Sea_distance_meters 1 1.1855 1.4768 0.095 .
## rainfall_10_yrs 1 1.1971 1.4913 0.104
## temp_10_yrs 1 1.1205 1.3959 0.135
## Slope_percent 1 0.9249 1.1521 0.256
## salty_soil 1 1.3385 1.6673 0.031 *
## flooded_soil 1 1.9601 2.4417 0.003 **
## direct_human_pressure_bi 1 0.4635 0.5774 0.921
## Nitrophily_index_relevés 1 0.7411 0.9231 0.536
## EFFIS_Fire_10km_10yrs 1 0.9027 1.1245 0.313
## Crop_index 1 0.9332 1.1625 0.272
## Dist_ag_km 1 1.1332 1.4117 0.128
## gHM 1 1.1000 1.3702 0.131
## Residual 27 21.6747
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(rda.2)
##
## Call:
## rda(formula = mod.pol.sqrt ~ masl + aspect + herbivory_pressure + agropastoral_use_binary + trampling + land_open + Soil_type + Sea_distance_meters + rainfall_10_yrs + temp_10_yrs + Slope_percent + salty_soil + flooded_soil + direct_human_pressure_bi + Nitrophily_index_relevés + EFFIS_Fire_10km_10yrs + Crop_index + Dist_ag_km + gHM, data = env.data.cut)
##
## Partitioning of variance:
## Inertia Proportion
## Total 50.95 1.0000
## Constrained 29.27 0.5746
## Unconstrained 21.67 0.4254
##
## Eigenvalues, and their contribution to the variance
##
## Importance of components:
## RDA1 RDA2 RDA3 RDA4 RDA5 RDA6 RDA7
## Eigenvalue 6.7569 5.5294 3.49604 2.16194 2.02025 1.87810 1.35649
## Proportion Explained 0.1326 0.1085 0.06862 0.04243 0.03965 0.03686 0.02662
## Cumulative Proportion 0.1326 0.2412 0.30977 0.35220 0.39186 0.42872 0.45534
## RDA8 RDA9 RDA10 RDA11 RDA12 RDA13 RDA14
## Eigenvalue 1.20061 1.02872 0.78268 0.66822 0.54229 0.457657 0.418010
## Proportion Explained 0.02357 0.02019 0.01536 0.01312 0.01064 0.008983 0.008205
## Cumulative Proportion 0.47891 0.49910 0.51446 0.52758 0.53822 0.547206 0.555410
## RDA15 RDA16 RDA17 RDA18 RDA19 PC1
## Eigenvalue 0.312767 0.225830 0.183862 0.155230 0.098733 3.93750
## Proportion Explained 0.006139 0.004433 0.003609 0.003047 0.001938 0.07728
## Cumulative Proportion 0.561549 0.565982 0.569591 0.572637 0.574575 0.65186
## PC2 PC3 PC4 PC5 PC6 PC7 PC8
## Eigenvalue 2.6288 2.08031 1.89635 1.54175 1.43620 1.3196 1.05192
## Proportion Explained 0.0516 0.04083 0.03722 0.03026 0.02819 0.0259 0.02065
## Cumulative Proportion 0.7035 0.74429 0.78151 0.81177 0.83996 0.8659 0.88651
## PC9 PC10 PC11 PC12 PC13 PC14 PC15
## Eigenvalue 0.80966 0.72066 0.66127 0.6012 0.493719 0.435896 0.404391
## Proportion Explained 0.01589 0.01414 0.01298 0.0118 0.009691 0.008556 0.007937
## Cumulative Proportion 0.90240 0.91654 0.92952 0.9413 0.951012 0.959568 0.967505
## PC16 PC17 PC18 PC19 PC20 PC21
## Eigenvalue 0.324795 0.272137 0.228866 0.164027 0.149336 0.11516
## Proportion Explained 0.006375 0.005341 0.004492 0.003219 0.002931 0.00226
## Cumulative Proportion 0.973880 0.979221 0.983714 0.986933 0.989864 0.99212
## PC22 PC23 PC24 PC25 PC26 PC27
## Eigenvalue 0.104000 0.098368 0.073961 0.056390 0.0438603 0.0246680
## Proportion Explained 0.002041 0.001931 0.001452 0.001107 0.0008609 0.0004842
## Cumulative Proportion 0.994166 0.996096 0.997548 0.998655 0.9995158 1.0000000
##
## Accumulated constrained eigenvalues
## Importance of components:
## RDA1 RDA2 RDA3 RDA4 RDA5 RDA6 RDA7
## Eigenvalue 6.7569 5.5294 3.4960 2.16194 2.02025 1.87810 1.35649
## Proportion Explained 0.2308 0.1889 0.1194 0.07385 0.06901 0.06416 0.04634
## Cumulative Proportion 0.2308 0.4197 0.5391 0.61298 0.68199 0.74615 0.79249
## RDA8 RDA9 RDA10 RDA11 RDA12 RDA13 RDA14
## Eigenvalue 1.20061 1.02872 0.78268 0.66822 0.54229 0.45766 0.41801
## Proportion Explained 0.04101 0.03514 0.02674 0.02283 0.01852 0.01563 0.01428
## Cumulative Proportion 0.83350 0.86864 0.89538 0.91821 0.93673 0.95237 0.96665
## RDA15 RDA16 RDA17 RDA18 RDA19
## Eigenvalue 0.31277 0.225830 0.183862 0.155230 0.098733
## Proportion Explained 0.01068 0.007714 0.006281 0.005303 0.003373
## Cumulative Proportion 0.97733 0.985044 0.991325 0.996627 1.000000
##
## Scaling 2 for species and site scores
## * Species are scaled proportional to eigenvalues
## * Sites are unscaled: weighted dispersion equal on all dimensions
## * General scaling constant of scores: 6.957806
##
##
## Species scores
##
## RDA1 RDA2 RDA3 RDA4 RDA5
## Quercus -1.344420 1.2643863 0.0214363 -0.283826 0.062850
## Olea -0.727439 -0.6459288 -0.7306117 -0.096372 -0.415130
## Corylus -0.203860 0.0597363 0.0007717 -0.162750 -0.028184
## Buxus -0.043142 0.3771044 0.6290913 0.549604 0.158579
## Pinus -0.928828 -0.7704397 0.3385198 -0.389298 -0.099267
## Betula -0.029566 -0.1193019 0.0463787 0.049305 0.072215
## Alnus 0.045832 -0.0358244 0.1025487 0.051366 -0.049278
## Juniperus-t -0.190404 -0.7795575 0.9258606 0.315721 0.168576
## Ceratonia siliqua -0.083424 -0.1020697 0.0725295 0.038453 0.026477
## Hypericum -0.261661 0.4811955 0.2247383 0.182299 -0.040392
## Myrtus 0.152265 -0.0107884 -0.0609324 -0.194766 0.392441
## Sorbus group -0.048965 0.1452562 0.0383545 0.005331 0.004724
## Ephedra fragilis-t 0.022005 0.0081053 0.0663978 -0.013097 -0.087246
## Genista-t -0.139963 -0.0691396 -0.0207254 0.012178 -0.002411
## Phyllirea -0.024655 -0.1477456 0.0823726 -0.128893 0.116397
## Pistacia -0.538900 -1.0021691 -0.2782707 0.307328 -0.233541
## Erica -0.177606 -0.4397931 0.3131448 -0.401728 0.640523
## Cistaceae undiff -0.068527 -0.2442611 -0.0028153 -0.175418 0.359193
## Poaceae undiff 0.589841 -0.0859185 -0.3091856 -0.265247 0.158670
## Hordeum-t 0.131630 0.0238810 -0.3108114 0.146681 0.185941
## Cerealia-t 0.130665 -0.0733177 -0.3474821 0.304111 0.120758
## Plantago lanceolata-t 0.024385 -0.2525894 -0.1043992 0.522910 0.131954
## Plantago albicans 0.085441 -0.0627482 -0.0898538 0.200516 0.044830
## Plantago sp 0.265727 -0.0943794 0.0842936 0.046766 0.112788
## Asphodelus group 0.040060 -0.0011399 -0.1193279 -0.007368 0.080849
## Polygonum 0.151580 0.0008972 -0.0419177 0.027466 0.015925
## Brassicaceae undiff 0.171803 -0.0600666 -0.0701023 0.090879 0.098442
## Caryophyllaceae undiff -0.016267 -0.0651463 -0.0227074 -0.043418 0.040895
## Euphorbia 0.159422 0.1217491 -0.2247056 0.031429 0.016472
## Asteroideae 0.235131 0.0290074 -0.2365417 -0.040603 0.120972
## Cichorieae 0.707089 -0.0912559 -0.2973547 0.026572 0.254424
## Centaurea 0.043311 0.0389310 -0.0170178 0.026857 -0.012449
## Matricaria-t 0.151466 0.1088566 -0.1412324 0.208135 -0.039945
## Artemisia -0.033560 -0.0255855 0.1187422 0.117039 -0.081557
## Senecio-t 0.016141 -0.1438814 0.1143729 -0.113493 0.016964
## Asteraceae 0.242029 0.0842220 -0.2257914 0.016550 0.111001
## Apiaceae undiff 0.190999 -0.1189310 -0.0300236 -0.248895 0.349785
## Teucrium -0.053076 -0.1390514 0.0243326 -0.061590 0.041574
## Lamiaceae undiff -0.029332 -0.0892919 0.1103202 0.087089 0.088499
## Dipsacaceae undiff 0.137334 -0.0706790 0.0755478 -0.058982 -0.077732
## Medicago -0.023904 -0.0716171 -0.0480357 0.007097 -0.008616
## Fabaceae undiff -0.098467 -0.1022639 0.0103009 0.030735 -0.018318
## Ranunculaceae undiff 0.005612 0.1074386 0.1428207 0.113342 -0.118194
## Scrophulariaceae undiff 0.414951 0.1241707 0.3701121 -0.028300 -0.374763
## Liliaceae undiff -0.026170 -0.1372871 -0.0306048 -0.122116 -0.022948
## Parietaria 0.192357 0.0490440 -0.1487872 -0.005180 0.034125
## Rosaceae -0.142862 0.0751316 0.1126715 0.050484 0.080359
## Sedum-t -0.019205 -0.0521786 0.0663304 -0.031319 0.116044
## Saxifraga 0.020648 0.0133850 0.0144982 0.012338 -0.001560
## Globularia cf alypum -0.068790 -0.1949006 0.0682734 -0.220532 0.088442
## Rubiaceae undiff 0.052488 -0.0210458 0.0117010 -0.019860 -0.069460
## Plantago coronopus-t 0.530062 -0.0091495 0.3968099 -0.185132 -0.390808
## Chenopodiaceae 0.939957 -0.0850269 0.3508101 -0.466856 -0.431611
## Cyperaceae undiff 0.192439 0.0090904 0.0278969 -0.081580 -0.093779
## RDA6
## Quercus -0.446088
## Olea 0.455765
## Corylus 0.119069
## Buxus 0.666183
## Pinus 0.107053
## Betula 0.083257
## Alnus 0.055805
## Juniperus-t -0.359060
## Ceratonia siliqua -0.146489
## Hypericum 0.432782
## Myrtus 0.036958
## Sorbus group 0.063143
## Ephedra fragilis-t -0.036046
## Genista-t 0.037791
## Phyllirea 0.107183
## Pistacia -0.244012
## Erica 0.215371
## Cistaceae undiff -0.238902
## Poaceae undiff -0.003224
## Hordeum-t 0.051326
## Cerealia-t -0.059546
## Plantago lanceolata-t -0.224675
## Plantago albicans -0.125363
## Plantago sp -0.045288
## Asphodelus group 0.055677
## Polygonum -0.025405
## Brassicaceae undiff 0.030626
## Caryophyllaceae undiff 0.067068
## Euphorbia -0.061908
## Asteroideae 0.062852
## Cichorieae -0.148285
## Centaurea -0.061357
## Matricaria-t 0.040082
## Artemisia 0.097530
## Senecio-t -0.094114
## Asteraceae 0.102157
## Apiaceae undiff 0.054692
## Teucrium 0.057038
## Lamiaceae undiff -0.021450
## Dipsacaceae undiff 0.001676
## Medicago 0.084736
## Fabaceae undiff 0.077271
## Ranunculaceae undiff 0.119862
## Scrophulariaceae undiff -0.082618
## Liliaceae undiff 0.098274
## Parietaria 0.039764
## Rosaceae -0.016671
## Sedum-t 0.126330
## Saxifraga 0.086373
## Globularia cf alypum 0.283650
## Rubiaceae undiff 0.003190
## Plantago coronopus-t -0.248603
## Chenopodiaceae 0.108623
## Cyperaceae undiff -0.022065
##
##
## Site scores (weighted sums of species scores)
##
## RDA1 RDA2 RDA3 RDA4 RDA5 RDA6
## ME-38-HOF -1.35756 2.31958 -0.62785 -0.55451 0.16213 -1.16146
## MA-25-HOF -1.36177 1.31465 -0.54445 -1.29488 -0.03831 -0.71161
## MA-14-HOF -0.97236 2.35347 -0.09197 0.09711 -0.04877 -1.74749
## MA-9-HOF -1.36167 2.37049 0.01647 -0.83216 0.13003 -1.73816
## MA-26-HOF -1.28658 0.95886 -0.44923 -1.62741 -1.00930 -0.69831
## E-65-HOF -0.92720 -0.97051 0.80344 1.43237 0.04994 -2.37843
## MA-23-MAQ -0.53906 -0.82157 -2.10905 1.03074 -2.04325 1.38874
## MA-24-MAQ -0.38481 -0.90473 -1.76092 -0.03679 -0.74366 0.88219
## MA-17-MAQ -0.71707 -0.55830 -1.11146 -0.41905 -1.69120 1.15442
## ME-32-MAQ 1.25510 0.19507 -0.72991 -1.75959 4.24920 0.58305
## MA-19-MAQ -0.64966 -1.29352 -1.71124 1.14708 -1.41389 0.13379
## MA-22-MAQ -1.09924 -0.63847 -0.80298 0.15892 0.14725 0.51390
## MA-12-MAQ -0.87922 -0.14144 -0.06377 0.26322 0.41732 -0.23219
## E-64-MAQ -1.07445 -1.29968 0.71561 -0.67562 0.48372 -0.57473
## F-69-MAQ 0.45699 -0.58905 1.60996 1.14927 0.50459 -1.73259
## E-60-MAQ -1.10162 -1.31311 0.34372 -0.22399 -0.56939 -0.68700
## MA-8-MAQ -0.09796 -0.55047 -0.11506 -1.29369 -0.89279 0.15907
## MA-27-MAQ -0.69253 -1.65837 1.75839 -0.79375 0.68285 -0.55935
## F-67-MAQ -0.65233 -2.06231 1.92013 0.85645 -0.12191 -0.97266
## MA-21-GAR -0.44288 -1.02240 -0.61883 -1.98544 -0.22825 1.57801
## E-63-GAR -0.55901 -0.48321 0.15994 0.02375 1.23879 -1.68416
## MA-20-GAR -0.13858 -0.75486 -0.87901 -0.05623 -0.79097 0.80699
## F-66-GAR 0.53931 -0.52933 0.90393 -0.05057 0.30735 -0.63809
## E-62-GAR -0.50661 -1.46905 1.91135 -0.08061 1.84307 -0.60242
## MA-4-GAR -0.19288 -1.04415 -0.89236 -0.12334 0.34292 -0.01696
## MA-10-BUX -1.08076 2.25146 -0.52950 -0.32282 -0.44929 -0.11220
## MA-44-BUX -0.67862 1.31734 1.40486 1.19444 0.38419 2.07500
## MA-43-BUX -0.73514 1.25799 1.33048 0.53401 -0.95531 1.21660
## MA-42-BUX 0.44430 1.40397 2.15874 3.23657 0.45702 3.03293
## CA-51-BUX -0.43761 -1.30767 0.01956 -1.09876 0.77780 1.79079
## CA-50-BUX 0.04522 -1.04017 0.96910 -0.96775 1.90559 1.66392
## MA-3-DUN 2.08380 0.66672 0.37916 -0.27892 -1.64413 -0.29252
## MA-11-SPH -0.41579 1.57995 0.65806 0.37004 0.62482 2.11076
## ME-30-SPH 0.31564 -0.22877 -1.57033 0.07588 -0.43925 1.01055
## ME-39-ANT 0.30522 -0.02474 -1.94256 0.96044 -1.38073 1.73372
## MA-18-ANT 2.24634 0.29001 -1.32105 0.40796 1.02611 0.03656
## MA-13-ANT -0.76076 1.16622 -0.79479 -0.32816 0.18486 -0.28641
## ME-37-ANT 1.68313 0.12289 -1.67435 0.75419 1.42894 -0.14910
## F-68-ANT 1.41603 0.40016 -1.40958 3.17491 1.09304 -1.06792
## MA-15-ANT -0.30955 0.67166 -0.53000 -0.26862 0.01990 0.48668
## MA-2-CSV 2.23330 0.81315 1.06320 -0.54875 -1.93820 -0.76055
## MA-1-CSV 2.66274 0.84869 1.04313 -0.83386 -1.96506 0.41998
## ME-29-CSV 1.08562 0.51063 1.23333 0.13462 -0.82663 -1.46206
## ME-33-HFV 1.55963 -0.16196 1.30680 -1.46666 -1.20502 -0.89114
## ME-41-HFV 1.70114 -0.13490 0.38184 -1.76395 0.59406 0.43282
## MA-7-HFV 1.66483 -0.15773 -1.41915 1.31026 0.65822 -0.67483
## E-61-HFV -0.28508 -1.65248 1.60820 1.37366 0.68160 -1.37816
##
##
## Site constraints (linear combinations of constraining variables)
##
## RDA1 RDA2 RDA3 RDA4 RDA5 RDA6
## ME-38-HOF -0.81762 0.73629 -0.28430 -0.51377 0.82621 -0.966249
## MA-25-HOF -1.57282 0.51460 -0.20505 -0.76569 0.33995 -1.028087
## MA-14-HOF -1.83949 2.30994 -0.30957 -0.69660 -0.23546 -1.464364
## MA-9-HOF -0.79399 2.00897 0.33086 -0.26481 0.38858 -0.708178
## MA-26-HOF -0.86133 0.28525 -0.32086 -1.75652 -0.91095 -0.641664
## E-65-HOF 0.39815 -0.49198 -0.27985 2.40294 -0.06684 -0.330251
## MA-23-MAQ -1.18658 -0.65037 -1.93078 0.67543 -1.16971 0.373190
## MA-24-MAQ -0.22723 -0.70300 -1.24330 -0.45614 -0.76980 1.465478
## MA-17-MAQ -0.29909 -0.98620 0.59012 0.01936 -0.27635 0.265438
## ME-32-MAQ 1.25510 0.19507 -0.72991 -1.75959 4.24920 0.583052
## MA-19-MAQ -0.77739 -1.48812 -1.02639 0.81299 -1.12160 -0.014002
## MA-22-MAQ -0.31970 -0.36846 -1.07496 0.60326 -0.23137 0.630884
## MA-12-MAQ -0.96613 0.87539 -0.23266 0.03998 0.16450 0.577345
## E-64-MAQ -0.92711 -1.25403 1.10160 -0.28149 0.48879 -0.618886
## F-69-MAQ -0.39271 -1.03348 0.71219 0.72787 0.23791 -0.988386
## E-60-MAQ -0.94189 -1.00954 0.59993 0.19809 -0.71084 -1.480126
## MA-8-MAQ -0.30350 -0.06020 -0.15194 -1.53672 -0.12111 -0.078336
## MA-27-MAQ -0.78846 -0.73209 0.30068 -1.19982 -0.52248 -0.174826
## F-67-MAQ -0.55225 -1.09097 0.52230 0.19270 -0.25088 0.191790
## MA-21-GAR -0.72012 -0.84902 0.25638 -1.60164 -1.05943 1.282582
## E-63-GAR -0.12502 -0.19572 0.10023 0.19145 1.19974 -1.158863
## MA-20-GAR 0.06861 -1.07832 -0.39140 -0.05399 -0.55282 0.926543
## F-66-GAR 0.19422 -1.09380 0.48957 0.50461 0.75547 -0.120904
## E-62-GAR -0.56202 -1.11327 1.29921 0.09968 0.55828 -0.898113
## MA-4-GAR 0.18403 -0.11415 -0.57886 0.27397 0.51403 -1.308661
## MA-10-BUX -0.64348 1.92234 -0.36284 -0.39066 0.17195 -0.238401
## MA-44-BUX -0.20825 1.29254 1.02783 0.70088 0.45533 2.187554
## MA-43-BUX -1.42791 1.65144 0.92481 1.16430 -0.90108 1.397819
## MA-42-BUX 0.27087 0.90744 2.88080 2.79759 0.58122 1.955560
## CA-51-BUX -0.22064 -1.25381 0.81161 -0.77298 1.46663 1.565090
## CA-50-BUX -0.18360 -1.13871 0.02684 -1.30047 1.11514 1.896259
## MA-3-DUN 1.56398 -0.39761 -0.15455 0.02548 -1.24841 0.749893
## MA-11-SPH -0.59010 1.77348 0.72185 0.32026 0.35399 1.502806
## ME-30-SPH 0.87658 0.49983 -1.33879 0.55875 -1.14557 0.498965
## ME-39-ANT 0.40020 -0.86630 -1.64653 0.22154 -0.84275 0.811998
## MA-18-ANT 1.86756 0.73484 -1.98552 0.19253 0.57098 0.509187
## MA-13-ANT -0.32002 0.95375 -1.31564 0.62138 -0.63711 -0.272787
## ME-37-ANT 1.24246 0.73478 -1.20193 0.88553 1.41036 -0.724699
## F-68-ANT 0.69693 -0.59989 -1.07731 2.51557 0.49741 -1.044600
## MA-15-ANT -0.52825 0.42214 -0.85496 -0.37777 0.10029 0.138721
## MA-2-CSV 2.48270 0.79567 1.22635 -0.39914 -1.33608 -1.176229
## MA-1-CSV 2.13075 0.60298 1.05066 -0.95385 -1.43741 -0.003536
## ME-29-CSV 1.32707 1.10955 1.36481 -0.68227 -1.64581 -1.153702
## ME-33-HFV 1.50386 -0.65882 1.01642 -1.01887 -0.69886 -0.185211
## ME-41-HFV 1.79804 0.02623 0.37006 -1.42448 -0.22269 0.257723
## MA-7-HFV 1.17990 0.20988 -0.80817 0.75420 0.92876 -1.198900
## E-61-HFV -0.34432 -1.33453 1.78097 0.70691 0.74068 -1.789919
##
##
## Biplot scores for constraining variables
##
## RDA1 RDA2 RDA3 RDA4 RDA5
## masl -0.43939 0.649538 0.18138 0.2852311 0.03845
## aspect -0.17069 -0.075268 0.08797 -0.2065774 0.26750
## herbivory_pressure 0.05901 -0.060246 -0.29339 -0.0789728 -0.24592
## agropastoral_use_binary 0.23500 0.027379 -0.54208 0.2155152 0.02720
## trampling -0.07522 0.218527 0.22672 -0.1347781 -0.33773
## land_open 0.74084 0.065675 0.06295 0.3148256 0.10665
## Soil_type 0.18234 0.028340 -0.10604 -0.2556279 0.61731
## Sea_distance_meters -0.24146 0.466355 -0.29719 0.0006233 0.16751
## rainfall_10_yrs -0.30946 0.746666 -0.14214 -0.0607695 -0.13504
## temp_10_yrs 0.33229 -0.749715 -0.18873 -0.2142598 -0.02414
## Slope_percent -0.42318 0.402122 0.08885 0.0562811 -0.06747
## salty_soil 0.56986 0.177491 0.14608 -0.0986607 -0.46326
## flooded_soil 0.62842 0.127529 0.34189 -0.3045159 -0.36314
## direct_human_pressure_bi -0.06530 0.534580 -0.38099 0.2422492 -0.08361
## Nitrophily_index_relevés 0.46676 -0.005931 -0.32408 0.1443164 0.19554
## EFFIS_Fire_10km_10yrs -0.20531 0.465031 0.37091 0.1903263 -0.28580
## Crop_index 0.11501 -0.078715 -0.20342 0.0092121 0.01894
## Dist_ag_km -0.06437 -0.170467 0.12963 -0.2033727 0.27230
## gHM 0.29164 -0.159010 -0.13705 -0.0081939 -0.15757
## RDA6
## masl 0.403683
## aspect -0.098143
## herbivory_pressure 0.178285
## agropastoral_use_binary -0.085347
## trampling 0.032560
## land_open 0.195010
## Soil_type 0.084704
## Sea_distance_meters 0.005612
## rainfall_10_yrs 0.019881
## temp_10_yrs -0.192240
## Slope_percent 0.360555
## salty_soil -0.073746
## flooded_soil -0.153730
## direct_human_pressure_bi 0.184516
## Nitrophily_index_relevés -0.218027
## EFFIS_Fire_10km_10yrs 0.241087
## Crop_index -0.253065
## Dist_ag_km 0.444083
## gHM -0.007760
# Stepwise forward/backward method for variable selection
rda.selected.fb = step(rda.2, scope = list(lower = formula(stp), upper = formula(rda.2)), trace = 0, test = "perm")
colnames(env.data.cut)
## [1] "masl" "aspect"
## [3] "herbivory_pressure" "agropastoral_use_binary"
## [5] "trampling" "land_open"
## [7] "Soil_type" "Sea_distance_meters"
## [9] "rainfall_10_yrs" "temp_10_yrs"
## [11] "Slope_percent" "salty_soil"
## [13] "flooded_soil" "direct_human_pressure_bi"
## [15] "Nitrophily_index_relevés" "EFFIS_Fire_10km_10yrs"
## [17] "Crop_index" "Dist_ag_km"
## [19] "gHM"
# create clusters and centroids for plotting
rda.fb.scores.sqrt = as.data.frame(scores(rda.selected.fb, choices = c(1,2), display = c("sp", "sites"),
scaling = "sites")[2])
rda.fb.scores.sqrt$Cluster_SqrtTrans = mod.pol$Cluster_SqrtTrans
rda.fb.centroids.sqrt = rda.fb.scores.sqrt %>% dplyr::group_by(Cluster_SqrtTrans) %>%
dplyr::summarise(
RDA1.mean = mean(sites.RDA1),
RDA2.mean = mean(sites.RDA2))
# Transform data for use with ggplot (for publication)
# env variables and taxa
RDAspecies <- scores(rda.selected.fb, display = "species", scaling = "species") %>% as.data.frame() %>% rownames_to_column("species")
clusters = rda.fb.scores.sqrt %>% rownames_to_column("Site_Code_Veg") %>% full_join(mod.pol.sites, by = "Site_Code_Veg")
clusters = as.data.frame(clusters[,c(1,4)])
RDAsites <- scores(rda.selected.fb, display = "sites", scaling = "sites") %>% as.data.frame() %>% rownames_to_column("Site_Code_Veg") %>% full_join(mod.pol.sites, by = "Site_Code_Veg") %>% full_join(clusters, by = "Site_Code_Veg")
RDAenv <- scores(rda.selected.fb, display = "bp", scaling = "species") %>% as.data.frame()
RDAenv.herb = RDAenv[1,]
RDAenv.agro = RDAenv[2,]
RDAenv.tramp = RDAenv[3,]
RDAenv.landopen = RDAenv[4,]
RDAenv.soiltype = RDAenv[5,]
RDAenv.rainfall = RDAenv[6,]
RDAenv.temp = RDAenv[7,]
RDAenv.slope = RDAenv[8,]
RDAenv.salty = RDAenv[9,]
RDAenv.flooded = RDAenv[10,]
RDAenv.fire = RDAenv[11,]
RDAenv.distag = RDAenv[12,]
RDAenv.gHM = RDAenv[13,]
# Sites
ggplot() +
geom_point(data = RDAsites, aes(x = RDA1, y = RDA2, color = as.factor(Cluster_SqrtTrans), shape = Veg_Types_Code ), size = 2.5) +
geom_text_repel(data = RDAsites,aes(x = RDA1, y = RDA2,label = Site_Code, segment.alpha = 0.5, segment.size = 0.3), max.overlaps = 10,box.padding = unit(0.45, "lines"), size = 2.8) +
geom_point(data = rda.fb.centroids.sqrt, aes(x = RDA1.mean, y = RDA2.mean, color = as.factor(Cluster_SqrtTrans)), shape = 19, size = 3) +
scale_shape_manual(values = c(0,1,2,3,4,5,6,7,8)) +
scale_color_manual(values = c("#D58B60", "#548F00", "#4A7381")) +
geom_text(data = RDAenv.herb, aes(x = RDA1, y = RDA2), label = "Herbivory pressure", nudge_x = -0.2, nudge_y = 0.0, size =4) +
geom_text(data = RDAenv.agro, aes(x = RDA1, y = RDA2), label = "Agropastoral use", nudge_x = -0.09, nudge_y = 0.029, size =4) +
geom_text(data = RDAenv.tramp, aes(x = RDA1, y = RDA2), label = "Trampling", nudge_x = 0.1, nudge_y = 0.04, size =4) +
geom_text(data = RDAenv.landopen, aes(x = RDA1, y = RDA2), label = "Landscape openness", nudge_x = 0.2, nudge_y = -0.04, size =4) +
geom_text(data = RDAenv.soiltype, aes(x = RDA1, y = RDA2), label = "Soil type", nudge_x =0.09, nudge_y = -0.031, size =4) +
geom_text(data = RDAenv.rainfall, aes(x = RDA1, y = RDA2), label = "Mean decadal rainfall", nudge_x =-0.01, nudge_y = 0.08, size =4) +
geom_text(data = RDAenv.temp, aes(x = RDA1, y = RDA2), label = "Mean decadal temperature", nudge_x =0.005, nudge_y = -0.032, size =4) +
geom_text(data = RDAenv.slope, aes(x = RDA1, y = RDA2), label = "Slope", nudge_x =0.02, nudge_y = 0.03, size =4) +
geom_text(data = RDAenv.salty, aes(x = RDA1, y = RDA2), label = "Saline soil", nudge_x =-0.05, nudge_y = 0.05, size =4) +
geom_text(data = RDAenv.flooded, aes(x = RDA1, y = RDA2), label = "Flooded Soil", nudge_x =0.13, nudge_y = 0.028, size =4) +
geom_text(data = RDAenv.fire, aes(x = RDA1, y = RDA2), label = "Fire Disturbance", nudge_x =-0.19, nudge_y = 0.021, size =4) +
geom_text(data = RDAenv.distag, aes(x = RDA1, y = RDA2), label = "Distance to agriculture", nudge_x =-0.23, nudge_y = 0.07, size =4) +
geom_text(data = RDAenv.gHM, aes(x = RDA1, y = RDA2), label = "gHM", nudge_x =0.023, nudge_y = -0.023, size =4) +
geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
geom_segment(data = RDAenv, aes(x = 0, y = 0, xend = RDA1, yend = RDA2), alpha = 0.5, arrow = arrow(length = unit(0.2, "cm"))) +
theme_bw() + theme(axis.title = element_text(size=13), axis.text = element_text(size = 11), legend.position = c(.895,.25),legend.background = element_rect(fill="white",size=0.25, linetype="solid", colour ="black")) +
labs(x = "\nRDA1 (12.6%)",
y = "RDA2 (10.0%)\n", color = "Cluster Centroid", shape = "Vegetation Type") +
lims(x = c(-0.7,1.2), y = c(-.9,.9))
# Species
ggplot() +
geom_point(data = RDAspecies, aes(x = RDA1, y = RDA2), color = 'grey50', shape = 1, size = 2.2) +
geom_text_repel(data = RDAspecies,aes(x = RDA1, y = RDA2,label = species, segment.alpha = 0.5, segment.size = 0.3), max.overlaps = 10,box.padding = unit(0.45, "lines"), size = 2.8) +
geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
geom_segment(data = RDAenv, aes(x = 0, y = 0, xend = RDA1, yend = RDA2), alpha = 0.5, arrow = arrow(length = unit(0.2, "cm"))) +
theme_bw() + theme(axis.title = element_text(size=13), axis.text = element_text(size = 11)) +
labs(x = "\nRDA1 (12.6%)",
y = "RDA2 (10.0%)\n") +
lims(x = c(-1.362,1.25), y = c(-.89,1.37))
# Inset of species plot (to show obscured data points)
ggplot() +
geom_point(data = RDAspecies, aes(x = RDA1, y = RDA2), color = 'grey50', shape = 1, size = 3) +
geom_text_repel(data = RDAspecies,aes(x = RDA1, y = RDA2,label = species, segment.alpha = 0.5, segment.size = 0.3), max.overlaps = 10,box.padding = unit(0.45, "lines"), size = 2.5) +
geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
theme_bw() + theme(axis.title = element_text(size=13), axis.text = element_text(size = 11), plot.margin=unit(c(.6,.6,.2,.2),"cm")) +
labs(x = "",y = "") + lims(x = c(-.1,.25), y = c(-.2,.2))