Please reference this work using the following citation:

      Servera-Vives, Gabriel, Maurici Mus Amezquita, Grant Snitker, Assunta Florenzano, Paola Torri, Joan Estrany, and Anna Maria Mercuri.
      2022. Modern pollen analogues for the understanding of pollen-vegetation relationships in a Mediterranean mosaic landscape (Balearic
      Islands, Western Mediterranean). Manuscript published in the The Holocene. Accessible via https://doi.org/10.1177/09596836221088229.


      The supplementary data and code for this project are accessible via the UNIMORE IRIS repository: https://doi.org/10.25431/11380_1254197.


Introduction

This RMarkdown demonstrates the data cleaning, transformations, and multivariate analysis used to understand the relationship between the composition of the modern pollen data and the vegetation communities throughout eh Balearic Islands. When running this Rmarkdown or the R script in the “Multivariate_Analysis_Servera_Vives_et_al_2022” RStudio project, all path names are relative to the containing folder. Also note that the term “species” is used in this script to denote variables and not necessarily taxa represented in the pollen data. Finally, this analysis excludes rare pollen taxa following methods in López-Sáez et al. 2020, which includes pollen taxa with percentages ≥ 3% in at least two samples.


0.0 Preamble

# clear workspace
rm(list = ls())
# set seed
set.seed(123)
# load libraries
library(devtools)
library(vegan)
library(RColorBrewer)
library(raster)
library(dplyr)
library(stats)
library(dendextend)
#devtools::install_github("ropenscilabs/ochRe") # install via devtools::install_github if using for the first time
library(ochRe)
library(ggplot2)
library(ggrepel)
library(indicspecies)
#devtools::install_github("gavinsimpson/ggvegan") # install via devtools::install_github if using for the first time
library(ggvegan)
library(tibble)


1.0 Load csv data and subset

mod.pol = read.csv("./data/modern_pollen_data_percent_reduced_1per_2samp.csv") # with the latest recombination of the pollen data
# note: these data are represented as percentages and have been subset to remove pollen clumps and 
# individual genera that are accounted for in summed genera (ex. Quercus represents the summed observations of Quercus ilex-t,  Quercus robur/pubescens-t, and Quercus sp).

mod.pol$Veg_Types = as.factor(mod.pol$Veg_Types)

mod.pol.data  = mod.pol[,-c(1:5)] # remove site name, code, and veg type. These will be added again later to examine similarities between veg types. 

mod.pol.data[is.na(mod.pol.data)] = 0 # replace all NAs with 0

mod.pol.sites = mod.pol[,c(1:5)] # preserve the veg type, site name, and code

mod.pol.sites$Veg_Types  = droplevels(mod.pol.sites$Veg_Types) # drop levels for future labeling

# fix column names
columns = colnames(mod.pol.data)
columns.fixed = gsub(".t", '-t', columns, fixed = T)
columns.fixed.2 = gsub(".", ' ', columns.fixed, fixed = T)
columns.fixed.3 = gsub("major media", 'major/media', columns.fixed.2, fixed = T)
colnames(mod.pol.data) = columns.fixed.3 

1.1 Perform sqrt transformation on the pollen dataset for all future analyses

mod.pol.sqrt = sqrt(mod.pol.data)


2.0 Conduct Detrended Correspondence analysis to evaluate appropriate ordination technique after Lepš & Šmilauer (2003)

dca.sqrt = decorana(mod.pol.sqrt ) # Lepš & Šmilauer (2003) suggest performing all transformations prior to DCA. 
dca.sqrt 
## 
## Call:
## decorana(veg = mod.pol.sqrt) 
## 
## Detrended correspondence analysis with 26 segments.
## Rescaling of axes with 4 iterations.
## 
##                   DCA1   DCA2   DCA3    DCA4
## Eigenvalues     0.1676 0.1368 0.1102 0.07680
## Decorana values 0.2082 0.1363 0.1010 0.06527
## Axis lengths    2.1056 1.8387 1.5664 1.53204
# axis length for DCA1 < 3 (DCA1 = 2.0147), so linear ordination is preferred  

dca= decorana(mod.pol.data) # Lepš & Šmilauer (2003) suggest performing all transformations prior to DCA. 
dca
## 
## Call:
## decorana(veg = mod.pol.data) 
## 
## Detrended correspondence analysis with 26 segments.
## Rescaling of axes with 4 iterations.
## 
##                   DCA1   DCA2   DCA3   DCA4
## Eigenvalues     0.4162 0.2730 0.2317 0.1836
## Decorana values 0.4954 0.2796 0.2170 0.1320
## Axis lengths    3.7809 2.3148 2.5993 1.8523
# axis length for DCA1 is between 3-4 (DCA1 = 3.7889), so linear ordination is ok  


3.0 Hierarchical clustering of each site to identify vegetation type clusters

# create cluster colors
colors  = ochre_pal(palette =  "parliament")(8) # define colors for plots
colors = c("red", colors)

3.1 Cluster with sqrt transformation

rownames(mod.pol.sqrt) = mod.pol.sites$Site_Code_Veg# add now names to help interpret clusters

mod.pol.sqrt.dist <- vegdist(mod.pol.sqrt, method = "euclidean") # euclidean distance following Lopes- saez

mod.pol.sqrt.clust <- hclust(mod.pol.sqrt.dist, method = "ward.D2")
plot(mod.pol.sqrt.clust$height[length(mod.pol.sqrt.clust$height):(length(mod.pol.sqrt.clust$height)-10)], type = "b", xlab = "Clusters", ylab=  "Height") # use elbow method to determine how many clusters
abline(v = 3, lty = "dashed")

mod.pol.dend <- as.dendrogram(mod.pol.sqrt.clust)
plot(mod.pol.dend, ylim  = c(0,26), main = "Hierarchical clustering of Veg Types using Ward's Method", ylab = "Height")
mtext("With Sqrt Transformation", side=3)
rect.dendrogram(mod.pol.dend, k = 3)

mod.pol$Cluster_SqrtTrans =cutree(mod.pol.sqrt.clust, 3)

4.0 Exploratory Ordination (PCA)

4.1 create initial unconstrained redundancy analysis (pca)

# 4.1 create initial unconstrained ordination (pca), but using a hellinger / Sqrt  transformation to accommodate for sparseness and zeros in species data for each sample site.
pca.sqrt = rda(mod.pol.sqrt)

4.2 Evaluate the performance of the pca

pca.sqrt # shows eigenvalues and inertia (variance)
## Call: rda(X = mod.pol.sqrt)
## 
##               Inertia Rank
## Total           50.95     
## Unconstrained   50.95   46
## Inertia is variance 
## 
## Eigenvalues for unconstrained axes:
##   PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8 
## 8.255 7.738 5.079 3.331 2.764 2.685 2.427 2.179 
## (Showing 8 of 46 unconstrained eigenvalues)

4.3 A more in depth look at eigenvalues and proportion of variance explained

summary(eigenvals(pca.sqrt)) # the proportion of variance explained in the first 2 axes is ~ 34%
## Importance of components:
##                         PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Eigenvalue            8.255 7.7376 5.0795 3.33064 2.76432 2.68529 2.42722
## Proportion Explained  0.162 0.1519 0.0997 0.06537 0.05426 0.05271 0.04764
## Cumulative Proportion 0.162 0.3139 0.4136 0.47896 0.53322 0.58592 0.63356
##                           PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Eigenvalue            2.17897 1.98829 1.70277 1.62354 1.42608 1.24548 1.06355
## Proportion Explained  0.04277 0.03903 0.03342 0.03187 0.02799 0.02445 0.02087
## Cumulative Proportion 0.67633 0.71536 0.74878 0.78064 0.80863 0.83308 0.85395
##                          PC15    PC16    PC17    PC18    PC19     PC20     PC21
## Eigenvalue            0.94878 0.75161 0.73151 0.64240 0.59834 0.491280 0.416922
## Proportion Explained  0.01862 0.01475 0.01436 0.01261 0.01174 0.009643 0.008183
## Cumulative Proportion 0.87258 0.88733 0.90169 0.91430 0.92604 0.935683 0.943866
##                           PC22     PC23    PC24     PC25     PC26   PC27
## Eigenvalue            0.370789 0.320328 0.28937 0.269376 0.223700 0.1987
## Proportion Explained  0.007278 0.006287 0.00568 0.005287 0.004391 0.0039
## Cumulative Proportion 0.951144 0.957431 0.96311 0.968398 0.972789 0.9767
##                           PC28     PC29     PC30     PC31     PC32     PC33
## Eigenvalue            0.190797 0.172450 0.146994 0.133121 0.099624 0.081244
## Proportion Explained  0.003745 0.003385 0.002885 0.002613 0.001955 0.001595
## Cumulative Proportion 0.980433 0.983818 0.986703 0.989316 0.991271 0.992866
##                           PC34     PC35     PC36     PC37      PC38      PC39
## Eigenvalue            0.071709 0.066709 0.055241 0.040505 0.0307017 0.0275078
## Proportion Explained  0.001407 0.001309 0.001084 0.000795 0.0006026 0.0005399
## Cumulative Proportion 0.994274 0.995583 0.996667 0.997462 0.9980648 0.9986047
##                           PC40      PC41      PC42      PC43      PC44
## Eigenvalue            0.019257 0.0166524 0.0133417 0.0111196 0.0055904
## Proportion Explained  0.000378 0.0003268 0.0002619 0.0002183 0.0001097
## Cumulative Proportion 0.998983 0.9993095 0.9995714 0.9997896 0.9998994
##                            PC45      PC46
## Eigenvalue            3.563e-03 1.565e-03
## Proportion Explained  6.993e-05 3.071e-05
## Cumulative Proportion 1.000e+00 1.000e+00

4.4 Visualize scree plot to evaluate which axes should be considered

screeplot(pca.sqrt, bstick = T, type = "l", main  = "Scree plot of variance explained: Sqrt") # start with axes 1 and 2, but still not a lot of variance explained here

4.5 Identify only the most influential species scores

# Sqrt
species.scores.sqrt = scores(pca.sqrt, display = "species") # extract species scores
species.scores.sqrt = cbind(species.scores.sqrt, rep(NA, length(species.scores.sqrt[,1]))) # create empty column to populate species importance
colnames(species.scores.sqrt)[3] = "importance"
for (i in 1:length(species.scores.sqrt[,1])){ # find the length of each species "arrow" using euclidean distance from plot origin to arrow tip
  arrow.dist = pointDistance(c(0, 0), species.scores.sqrt[i,c(1:2)], lonlat=FALSE)
  species.scores.sqrt[i,3] = arrow.dist
}
species.scores.sqrt.df = as.data.frame(species.scores.sqrt) # convert to dataframe for sorting and later operations
species.scores.sqrt.df.ordered  = species.scores.sqrt.df[order(-species.scores.sqrt.df$importance),] # reorder by importance

species.scores.sqrt.df.ordered
##                                  PC1          PC2  importance
## Quercus                 -1.099237544  1.715697585 2.037631317
## Pinus                   -1.499344558 -0.935737610 1.767381956
## Juniperus-t             -0.377992705 -1.202758726 1.260756534
## Pistacia                -0.885797910 -0.813719682 1.202820709
## Chenopodiaceae           0.974106855 -0.309059605 1.021959884
## Olea                    -0.836984017  0.035275224 0.837727036
## Cichorieae               0.665507988 -0.401026359 0.776996153
## Poaceae undiff           0.453543390 -0.467717630 0.651507013
## Erica                   -0.325550602 -0.493674541 0.591352473
## Plantago coronopus-t     0.496760202 -0.104940499 0.507723554
## Scrophulariaceae undiff  0.390160603 -0.003438341 0.390175753
## Buxus                    0.088420861  0.361265741 0.371929004
## Cistaceae undiff        -0.276818042 -0.229506010 0.359584812
## Plantago sp              0.264124529 -0.154538249 0.306012805
## Senecio-t                0.004624182 -0.276738045 0.276776677
## Brassicaceae undiff      0.226598213 -0.141406059 0.267100026
## Hypericum               -0.039476101  0.262758213 0.265707059
## Plantago lanceolata-t   -0.045958909 -0.235767707 0.240205397
## Asteraceae               0.231936427 -0.016333658 0.232510848
## Apiaceae undiff          0.090482639 -0.205891241 0.224896223
## Hordeum-t                0.176100310 -0.136199979 0.222624692
## Parietaria               0.201556593 -0.048318253 0.207267252
## Cyperaceae undiff        0.199718630 -0.013154136 0.200151348
## Asteroideae              0.181521262 -0.040223091 0.185924355
## Cerealia-t               0.121956764 -0.139015627 0.184929167
## Liliaceae undiff        -0.124548930 -0.126179818 0.177295748
## Polygonum                0.156374254 -0.078753975 0.175085967
## Globularia cf alypum    -0.092561197 -0.140617755 0.168347641
## Caryophyllaceae undiff  -0.056162805 -0.150612155 0.160742906
## Betula                  -0.060125993 -0.143881389 0.155939056
## Ceratonia siliqua       -0.120410580 -0.097463346 0.154912271
## Teucrium                -0.058488489 -0.142455537 0.153995075
## Fabaceae undiff         -0.124273874 -0.079410427 0.147478851
## Phyllirea               -0.102131594 -0.103997523 0.145761268
## Dipsacaceae undiff       0.132145603 -0.050054140 0.141307740
## Euphorbia                0.116464391 -0.072324781 0.137094231
## Genista-t               -0.126490621 -0.044373255 0.134047988
## Plantago albicans        0.123741063 -0.022313608 0.125736820
## Myrtus                   0.101569128 -0.053239575 0.114676676
## Rosaceae                -0.112570905 -0.014933029 0.113557052
## Lamiaceae undiff        -0.035762582 -0.106549617 0.112391206
## Corylus                 -0.106474306  0.028020164 0.110099535
## Rubiaceae undiff         0.081007212  0.048113046 0.094218011
## Artemisia               -0.063269287 -0.067608459 0.092595391
## Matricaria-t             0.088536697 -0.015841176 0.089942702
## Ranunculaceae undiff     0.053667784  0.054334560 0.076370645
## Alnus                    0.022858344 -0.066406495 0.070230524
## Medicago                -0.044032008 -0.044400510 0.062531776
## Sorbus group            -0.006098233  0.057077896 0.057402741
## Centaurea                0.056179578  0.011092455 0.057264191
## Sedum-t                 -0.031209913 -0.043610530 0.053627763
## Saxifraga                0.018271515  0.029170084 0.034420082
## Ephedra fragilis-t       0.024047655 -0.005824923 0.024743069
## Asphodelus group         0.008528020 -0.003799956 0.009336315
# create a scree plot of species importance
plot((species.scores.sqrt.df.ordered$importance), main = "Species Importance", xlab ="Number of Species", ylab = "Importance")
mtext("Blue line denotes the top 25% most influential species: Sqrt", side=3, col = "blue")
lines(species.scores.sqrt.df.ordered$importance)
abline(h =  quantile(species.scores.sqrt.df$importance, prob = .75), col = "blue")

# evaluate the top 25% most influential species
for (i in 1:length(species.scores.sqrt[,1])){
  if(species.scores.sqrt.df$importance[i] >= quantile(species.scores.sqrt.df$importance, prob = .75) ) { # adjust quantile here 
    species.scores.sqrt.df$include[i] = 1
  } else {species.scores.sqrt.df$include[i] = 2}}

4.6 create biplots of pca results

4.6.1 PCs 1-3
scores(pca.sqrt, choices = 1:3, display = "species", scaling = "species")
##                                  PC1          PC2          PC3
## Quercus                 -1.099237544  1.715697585  0.613364749
## Olea                    -0.836984017  0.035275224 -1.148387830
## Corylus                 -0.106474306  0.028020164  0.046220848
## Buxus                    0.088420861  0.361265741  0.377342660
## Pinus                   -1.499344558 -0.935737610  0.198106393
## Betula                  -0.060125993 -0.143881389  0.101310991
## Alnus                    0.022858344 -0.066406495  0.073460209
## Juniperus-t             -0.377992705 -1.202758726  1.269099246
## Ceratonia siliqua       -0.120410580 -0.097463346  0.078601961
## Hypericum               -0.039476101  0.262758213  0.241736151
## Myrtus                   0.101569128 -0.053239575 -0.045047588
## Sorbus group            -0.006098233  0.057077896  0.049850324
## Ephedra fragilis-t       0.024047655 -0.005824923  0.039366562
## Genista-t               -0.126490621 -0.044373255 -0.076587247
## Phyllirea               -0.102131594 -0.103997523 -0.048421323
## Pistacia                -0.885797910 -0.813719682 -0.589164678
## Erica                   -0.325550602 -0.493674541  0.319466614
## Cistaceae undiff        -0.276818042 -0.229506010 -0.079483611
## Poaceae undiff           0.453543390 -0.467717630 -0.349387786
## Hordeum-t                0.176100310 -0.136199979 -0.250744981
## Cerealia-t               0.121956764 -0.139015627 -0.284527679
## Plantago lanceolata-t   -0.045958909 -0.235767707 -0.001274774
## Plantago albicans        0.123741063 -0.022313608 -0.075759979
## Plantago sp              0.264124529 -0.154538249  0.083060010
## Asphodelus group         0.008528020 -0.003799956 -0.062725996
## Polygonum                0.156374254 -0.078753975  0.060075449
## Brassicaceae undiff      0.226598213 -0.141406059 -0.044592156
## Caryophyllaceae undiff  -0.056162805 -0.150612155 -0.057970608
## Euphorbia                0.116464391 -0.072324781 -0.149919843
## Asteroideae              0.181521262 -0.040223091 -0.279702724
## Cichorieae               0.665507988 -0.401026359 -0.157539499
## Centaurea                0.056179578  0.011092455  0.028688181
## Matricaria-t             0.088536697 -0.015841176 -0.153459371
## Artemisia               -0.063269287 -0.067608459  0.102798480
## Senecio-t                0.004624182 -0.276738045  0.160397563
## Asteraceae               0.231936427 -0.016333658 -0.243022353
## Apiaceae undiff          0.090482639 -0.205891241 -0.001458966
## Teucrium                -0.058488489 -0.142455537  0.017896050
## Lamiaceae undiff        -0.035762582 -0.106549617  0.109716744
## Dipsacaceae undiff       0.132145603 -0.050054140  0.081032268
## Medicago                -0.044032008 -0.044400510 -0.050739702
## Fabaceae undiff         -0.124273874 -0.079410427 -0.083401223
## Ranunculaceae undiff     0.053667784  0.054334560  0.016567225
## Scrophulariaceae undiff  0.390160603 -0.003438341  0.219499037
## Liliaceae undiff        -0.124548930 -0.126179818 -0.048059835
## Parietaria               0.201556593 -0.048318253 -0.036961239
## Rosaceae                -0.112570905 -0.014933029  0.068766775
## Sedum-t                 -0.031209913 -0.043610530  0.021085343
## Saxifraga                0.018271515  0.029170084  0.015534308
## Globularia cf alypum    -0.092561197 -0.140617755 -0.066704416
## Rubiaceae undiff         0.081007212  0.048113046 -0.011083122
## Plantago coronopus-t     0.496760202 -0.104940499  0.307914656
## Chenopodiaceae           0.974106855 -0.309059605  0.254517985
## Cyperaceae undiff        0.199718630 -0.013154136  0.013687897
## attr(,"const")
## [1] 6.957806
4.6.2 Species and Sites with PC1 PC2 (clusters Sqrt trans)
# 4.6.2 Calculate cluster centroids for plotting PCs 1 and 2
pca.scores.sqrt = as.data.frame(scores(pca.sqrt, choices = c(1,2), display = c("sp", "sites"),
                                       scaling = "species")[2])

pca.scores.sqrt$Cluster_SqrtTrans = mod.pol$Cluster_SqrtTrans 
pca.scores.centroids.sqrt  = pca.scores.sqrt %>%  dplyr::group_by(Cluster_SqrtTrans) %>% 
  dplyr::summarise(
    PC1.mean = mean(sites.PC1),
    PC2.mean = mean(sites.PC2))


# Plot PCs 1 and 2
colors.cluster = brewer.pal(4, "Dark2") # define colors for plots
colors.species  = c("red", rgb(0, 0, 0, alpha = 0))
shapes = seq(1:9)

biplot(pca.sqrt, scaling = "species", type = c("point", "n"), main = "PCA of Modern Pollen Data and Vegetation Type: Sqrt Trans")
mtext("Only the top 25% most influential species (by score) are labeled - PCs 1 and 2", side=3)
with(mod.pol, points(pca.sqrt, display = "sites", col = colors.cluster[Cluster_SqrtTrans],
                     scaling = "species", pch = shapes[Veg_Types], cex =1.1))
with(pca.scores.centroids.sqrt, points(PC1.mean, PC2.mean,col = colors.cluster[Cluster_SqrtTrans], pch = 16, cex =1.6))
with(species.scores.sqrt.df, text(pca.sqrt, display = "species", col = colors.species[include],
                                  scaling = "species", cex = .8, offset  = .2))
with(mod.pol, legend("bottomright", legend = levels(as.factor(Cluster_SqrtTrans)), bty = "n",
                     col = colors.cluster, pch = 16, cex  = .9, title="Clusters: Sqrt Trans", title.adj =1))

#Sites only
biplot(pca.sqrt, scaling = "species", type = c("n", "n"), main = "PCA of Modern Pollen Data and Vegetation Type: Sqrt Trans")
mtext("Sites only - PCs 1 and 2", side=3)
with(mod.pol, points(pca.sqrt, display = "sites", col = colors.cluster[Cluster_SqrtTrans],
                     scaling = "species", pch = shapes[Veg_Types], cex =1.1))
with(mod.pol, text(pca.sqrt, display = "sites", col = colors.cluster[Cluster_SqrtTrans],
                   scaling = "species", pch = shapes[Veg_Types], cex =.7, pos = 3, offset = .5))
with(pca.scores.centroids.sqrt, points(PC1.mean, PC2.mean,col = colors.cluster[Cluster_SqrtTrans], pch = 16, cex =1.6))
with(mod.pol, legend("bottomright", legend = levels(as.factor(Cluster_SqrtTrans)), bty = "n",
                     col = colors.cluster, pch = 16, cex  = .9, title="Clusters: Sqrt Trans", title.adj =1))


5.0 Vegetation Indices (from Davis 1984; McGlone and Meurk 2000; and Fall 2012)

5.1 Define index variables

# A = B0 / (P0 + P1 + B0) 
# B0 =  number of stands in which the pollen type is present in the surface sample and the associated plant taxon is present in vegetation sample of the stand
# P0 = number of stands in which the pollen type is present in the surface sample but the associated plant taxon is absent in the local vegetation
# P1 = number of stands in which the plant taxon is present in the vegetation but the pollen type is absent in the surface sample
# For percentage data, the variance of the sample is a function of the mean. Consequently, percentage data do not meet the assumption of normality for correlation analysis. The arcsine transformation:
#  Xa = arcsine * sqrt(X)
# where X is the untransformed percent and Xa is the transformed value is a standard procedure (Sokal and Rohlf, 1969) used to treat percentage data so they meet the assumptions for statistical tests. For these data the trans- formed and untransformed percentages give similar results for correlation analysis: the same types show significant relationships between pollen percentages and vegetation coverage.

# to make this work, all column names in the pollen dataset have to be the same as the species names identified in the field.
# see López-Sáez et al.2020 for pollen indices.

5.2 Load observed species data

species.reveles = read.csv("./data/vegetation_relevés_davis_fid_disp.csv")
species.present =  as.data.frame(unique(species.reveles$Equiv_Pollen_Types))
head(species.present)
##   unique(species.reveles$Equiv_Pollen_Types)
## 1                                 Cichorieae
## 2                             Poaceae undiff
## 3                                  Ailanthus
## 4                           Lamiaceae undiff
## 5                           Liliaceae undiff
## 6                             Chenopodiaceae
# summarized by species then convert to 1/0 presence absence
species.reveles.summ = species.reveles %>%
  dplyr::group_by(Equiv_Pollen_Types) %>%
  dplyr::summarise_all(list(mean))

species.reveles.summ$height_log_value = round(species.reveles.summ$height_log_code)
species.reveles.summ$pollination_value = round(species.reveles.summ$pollination_code)
# replace values
species.reveles.summ$height_log_value[species.reveles.summ$height_log_value==1]<-"7-20 cm"
species.reveles.summ$height_log_value[species.reveles.summ$height_log_value==2]<-"25-60 cm"
species.reveles.summ$height_log_value[species.reveles.summ$height_log_value==3]<-"70-200 cm"
species.reveles.summ$height_log_value[species.reveles.summ$height_log_value==4]<- "300-3,000 cm"
species.reveles.summ$pollination_value[species.reveles.summ$pollination_value==0]<-"Entomophilic, anthers not exposed"
species.reveles.summ$pollination_value[species.reveles.summ$pollination_value==1]<-"Entomophilic, anthers exposed"
species.reveles.summ$pollination_value[species.reveles.summ$pollination_value==2]<-"Anemophilic"

species.reveles.info = species.reveles.summ[,1:6]
species.reveles.info = species.reveles.info[,-2]
colnames(species.reveles.info)[1] = "species"
species.reveles.data = species.reveles.summ[,-c(2:6)]

species.reveles.p.a = species.reveles.data %>% mutate_if(is.numeric, ~1 * (. > 0))

#load (all) pollen data with taxa names that match the reveles
pollen.taxa.indices = read.csv("./data/modern_pollen_data_percent_davis_fid_disp_index_values.csv")
# convert to Presence/absence
pollen.taxa.p.a = pollen.taxa.indices %>% mutate_if(is.numeric, ~1 * (. > 0))
#replace NA with 0
pollen.taxa.p.a[is.na(pollen.taxa.p.a )] = 0

# limit analysis to only the reveles we have pollen data for
reveles.analyzed = colnames(pollen.taxa.p.a) %>% .[-c(1)]
reveles.analyzed = c("Equiv_Pollen_Types", reveles.analyzed)
species.reveles.p.a  = species.reveles.p.a[,c(reveles.analyzed)]

4.3 Construct Davis index and Fid/Disp dataframe

davis.indices.df = data.frame('species' = character(), 'A' =numeric(), 'U' = numeric(), 'O' = numeric(), 'Fid' = numeric(), 'Dis' = numeric())

5.4 Populate dataframe

for (i in 1:length(pollen.taxa.p.a$Pollen_taxa)){
  species = pollen.taxa.p.a[i,1]
  plants.reveles.present = species.reveles.p.a[species.reveles.p.a$Equiv_Pollen_Types == species,] %>% .[, colSums(. != 0) > 0]
  plants.reveles.absent = species.reveles.p.a[species.reveles.p.a$Equiv_Pollen_Types == species,] %>% .[, colSums(. != 0) == 0]

  plants.present = colnames(plants.reveles.present) %>% .[-1]
  plants.absent = colnames(plants.reveles.absent)

  pollen.reveles.present = pollen.taxa.p.a[pollen.taxa.p.a$Pollen_taxa == species,] %>% .[, colSums(. != 0) > 0]
  pollen.present = colnames(pollen.reveles.present) %>% .[-1]

  pollen.reveles.absent = pollen.taxa.p.a[pollen.taxa.p.a$Pollen_taxa == species,] %>% .[, colSums(. != 0) == 0]
  pollen.absent = colnames(pollen.reveles.absent)


  # where b0 is the number of samples in which both the pollen type and its parent plant are present,
  # p0 is the number of samples in which the pollen type is present and the parent plant absent, and
  # p1 is the number of samples in which the parent plant is present and the pollen type absent.
  `%notin%` <- Negate(`%in%`)


  B0 = pollen.present %in% plants.present %>% sum(.)
  P0 = pollen.present %in% plants.absent %>% sum(.)
  P1 = plants.present %in% pollen.absent %>% sum(.)

  B0
  P0
  P1

  A = B0 / (P0 + P1 + B0)
  U = P1 / (P1 + B0)
  O = P0 / (P0 + B0)
  Fid = (B0 / length(plants.present)) * 100
  Dis = (P0 / length(plants.absent)) * 100

  A
  U
  O
  Fid
  Dis

  index_input = c(A,U,O,Fid,Dis)
  davis.indices.df[i,2:6] = index_input
  davis.indices.df[i,1] = species
}

5.5 Clean up results and subset to top 25% percent species (as calculated in PCA)

# 5.5 clean up results and subset to top 25% percent species (as calculated in PCA)
davis.indices.all = davis.indices.df
davis.indices.all[davis.indices.all=='NaN']<-0

# Join reveles info to species
species.reveles.info = as.data.frame(species.reveles.info)
davis.indices.all = merge(x = davis.indices.all, y = species.reveles.info, by = "species" )

5.6 Clusters fo all indices

# Cluster Davis and Dis/Fid indices
rownames(davis.indices.all) = davis.indices.all$species# add now names to help interpret clusters
davis.indices.all.dist <- vegdist(scale(davis.indices.all[,2:6]), method = "euclidean") # euclidean distance following Lopes- saez

davis.indices.all.clust <- hclust(davis.indices.all.dist, method = "ward.D2")
plot(davis.indices.all.clust$height[length(davis.indices.all.clust$height):(length(davis.indices.all.clust$height)-10)], type = "b", xlab = "Clusters", ylab=  "Height") # use elbow method to determine how many clusters
abline(v = 5, lty = "dashed")

davis.indices.dend <- as.dendrogram(davis.indices.all.clust)
plot(davis.indices.dend, main = "Hierarchical clustering of Davis Indices and Dis/Fid using Ward's Method\n scaled", ylab = "Height")
rect.dendrogram(davis.indices.dend, k = 5)

davis.indices.all$Cluster =cutree(davis.indices.all.clust, 5)

# subset to top 25% species
top.25.species = rownames(species.scores.sqrt.df[species.scores.sqrt.df$include ==1,])

davis.indices.df.top.25 = davis.indices.all[(davis.indices.all$species  %in% top.25.species), ]
davis.indices.df.top.25$include = 1

5.7 Plot all indices using ggplot

# create PCA for plot
davis.indices.rda = davis.indices.all[,2:4]
rownames(davis.indices.rda) = davis.indices.all$species
pca.davis = rda(davis.indices.rda)

# create color palette
colors.cluster.davis = brewer.pal(5, "Dark2") # define colors for plots
colors.species  = c(colors.cluster.davis, rgb(0, 0, 0, alpha = 0))

## ggplot version
PCAscores <- scores(pca.davis, display = "sites") %>% as.data.frame() %>% rownames_to_column("species") %>% full_join(davis.indices.all, by = "species")
PCAvect <- scores(pca.davis, display = "species") %>% as.data.frame()
PCAvect.A = PCAvect[1,] 
PCAvect.U = PCAvect[2,] 
PCAvect.O = PCAvect[3,] 

# plot PCA of -color
ggplot() +
  geom_point(data = PCAscores, aes(x = PC1, y = PC2, color = as.factor(Cluster)), shape = 16, size = 2) +
  scale_color_manual(values =c("#1B9E77" ,"#D95F02" ,"#7570B3" ,"#E7298A", "#66A61E"), breaks = c(1,2,3,4,5)) +
  scale_shape(solid = FALSE) +
  geom_text_repel(data = PCAscores,aes(x = PC1, y = PC2,label = species, segment.alpha = 0.5, segment.size = 0.3), max.overlaps = 30,box.padding = unit(0.45, "lines"), size = 2.8) +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
  geom_segment(data = PCAvect, aes(x = 0, y = 0, xend = PC1, yend = PC2), alpha = 0.5, arrow = arrow(length = unit(0.2, "cm"))) +
  geom_text(data = PCAvect.A, aes(x = PC1, y = PC2, label = rownames(PCAvect.A)), nudge_x = -.055, nudge_y = 0.04, size =5) +
  geom_text(data = PCAvect.O, aes(x = PC1, y = PC2, label = rownames(PCAvect.O)), nudge_x = .07, nudge_y = -0.003, size =5) +
  geom_text(data = PCAvect.U, aes(x = PC1, y = PC2, label = rownames(PCAvect.U)), nudge_x = -.02, nudge_y = -0.04, size =5) +
  theme_bw() + theme(axis.title = element_text(size=13), axis.text = element_text(size = 11),  legend.position = c(.9,.115),legend.background = element_rect(fill="white",size=0.25, linetype="solid", colour ="black")) +
  labs(x = "\nPC1 (62.1%)",
       y = "\nPC2 (35.0%)", color = "Cluster groups") 

davis.indices.all$cluster_include = match(davis.indices.all$species, davis.indices.df.top.25$species)
for (i in 1:length(davis.indices.all$species)){
if (!is.na(davis.indices.all$cluster_include[i])){davis.indices.all$cluster_include[i] = davis.indices.all$Cluster[i]}}
davis.indices.all[is.na(davis.indices.all)] <- 6


# plot Fidelity / Dispersibility scatterplots
# reorder factors
davis.indices.all$height_log_value<- factor(davis.indices.all$height_log_value, levels=c("7-20 cm", "25-60 cm", "70-200 cm", "300-3,000 cm"))

ggplot(data = davis.indices.all, aes(x =Dis , y = Fid)) +
  geom_point(aes(col = pollination_value,  size = height_log_value,shape = Cluster)) + scale_shape_identity(guide = "legend") +
  geom_text_repel(aes(label = species, segment.alpha = 0.5, segment.size = 0.45), max.overlaps = 50,box.padding = unit(0.6, "lines"),point.padding = unit(0.6, "lines"), size = 3.0) +
  scale_fill_brewer(palette="Dark2")+
  labs(title = "", subtitle = "", y = "Fidelity (%)\n", x = "\nDispersibility (%)",
       col = "Pollination syndrome", size = "Taxa height") +
  theme_bw() + theme(axis.title = element_text(size = 13), axis.text = element_text(size = 11),
                     legend.background = element_rect(fill="white",size=0.25, linetype="solid", colour ="black"))


6.0 RDA and Environmental Variables

6.1 Load, clean, and select appropriate environmental variables

# load and select environmental data
env.data.raw = read.csv("./data/environmental_variables.csv")
row.names(env.data.raw) = env.data.raw$Code
env.data = env.data.raw[reveles.analyzed[-1],]
env.data = env.data[,4:(length(env.data))]
env.data = as.data.frame(scale(env.data))

#conduct initial, holding nothing back, RDA
rda.initial = rda(mod.pol.sqrt ~., data  = env.data)

# Analyze variance inflation factor to eliminate correlated variables
vif.cca(rda.initial)
##                       masl                     aspect 
##                  65.523152                   3.668796 
##         herbivory_pressure    agropastoral_use_binary 
##                   6.477274                   4.193323 
##                  trampling                  land_open 
##                   3.328012                   7.685014 
##                  Soil_type        Sea_distance_meters 
##                   2.869669                   8.517636 
## thermality_index_2008_2018            rainfall_10_yrs 
##                 432.239973                  12.880123 
##                temp_10_yrs              Percent_Trees 
##                 512.338090                   9.635628 
##             Percent_Shrubs              Percent_herbs 
##                  16.181554                   7.905909 
##         Percent_bryophytes              Percent_rocks 
##                   2.365431                   6.438860 
##     Percent_stones_gravels              Slope_percent 
##                   4.096674                 118.497414 
##               Slope_degree                 salty_soil 
##                 123.220370                   6.238257 
##               flooded_soil      direct_human_pressure 
##                   4.586079                   9.010840 
##   direct_human_pressure_bi   Nitrophily_index_relevés 
##                  20.396454                   6.416165 
##               n_total_taxa              n_total_crops 
##                   7.209848                  21.241850 
## N_totals_nitrophilous_taxa      EFFIS_Fire_10km_10yrs 
##                   7.372386                  10.203034 
##                   Veg_type                 Crop_index 
##                   4.015775                  10.044180 
##                 Dist_ag_km                        gHM 
##                   3.769041                   6.596807
#Conduct initial cut by removing highly correlated variables (those with a variance inflation factor of approximately >10)
env.data.cut = subset(env.data, select=-c(Slope_degree,thermality_index_2008_2018,Percent_Trees,
                                          Percent_Shrubs, Percent_herbs, Percent_bryophytes,
                                          Percent_rocks, Percent_stones_gravels,N_totals_nitrophilous_taxa, 
                                          n_total_taxa, n_total_crops, Veg_type, direct_human_pressure))

# examine the result of cutting variables
rda.cut = rda(mod.pol.sqrt ~., data  = env.data.cut)
vif.cca(rda.cut) # proceed with these variables
##                     masl                   aspect       herbivory_pressure 
##                14.820281                 1.205186                 2.837920 
##  agropastoral_use_binary                trampling                land_open 
##                 2.967160                 1.656827                 2.387789 
##                Soil_type      Sea_distance_meters          rainfall_10_yrs 
##                 1.373523                 2.956466                 6.340926 
##              temp_10_yrs            Slope_percent               salty_soil 
##                11.982328                 3.284784                 3.205547 
##             flooded_soil direct_human_pressure_bi Nitrophily_index_relevés 
##                 2.036656                 3.490859                 2.390804 
##    EFFIS_Fire_10km_10yrs               Crop_index               Dist_ag_km 
##                 4.423643                 2.220410                 1.647820 
##                      gHM 
##                 2.522092

6.2 Run initial RDA without permutation selection of environmental variables

# Run second RDA
rda.2 = rda(mod.pol.sqrt ~., data  = env.data.cut)
stp = rda(mod.pol.sqrt ~ 1, data  = env.data.cut)

# Plots No Selection
# clusters
rda.no.selection.scores.sqrt = as.data.frame(scores(rda.2, choices = c(1,2), display = c("sp", "sites"),
                                                scaling = "species")[2])
rda.no.selection.scores.sqrt$Cluster_SqrtTrans = mod.pol$Cluster_SqrtTrans 
rda.no.selection.centroids.sqrt  = rda.no.selection.scores.sqrt %>%  dplyr::group_by(Cluster_SqrtTrans) %>% 
  dplyr::summarise(
    RDA1.mean = mean(sites.RDA1),
    RDA2.mean = mean(sites.RDA2))

# Sites
plot(rda.2, type="n", main = "RDA", ylim =c(-3.5,2.5), xlim = c(-3,3))
mtext("No Selection", side=3)
text(rda.2, dis="bp")
with(mod.pol, points(rda.2, display = "sites", col = colors.cluster[Cluster_SqrtTrans],
                     scaling = "species", pch = shapes[Veg_Types], cex =1.1))
with(mod.pol, text(rda.2, display = "sites", col = colors.cluster[Cluster_SqrtTrans],
                   scaling = "species", pch = shapes[Veg_Types], cex =.7, pos = 3, offset = .5))
with(rda.no.selection.centroids.sqrt, points(RDA1.mean, RDA2.mean,col = colors.cluster[Cluster_SqrtTrans], pch = 16, cex =1.6))
with(mod.pol, legend("bottomright", legend = levels(as.factor(Cluster_SqrtTrans)), bty = "n",
                     col = colors.cluster, pch = 16, cex  = .7, title="Clusters: Sqrt Trans", title.adj =1))
with(mod.pol, legend("bottomleft", legend = levels(Veg_Types), bty = "n", pch = shapes, cex  = .7, title="Veg Types", title.adj = 0.01))

# Species 
plot(rda.2,  display = c("sp", "cn"), main = "RDA", ylim =c(-1.5,1.5), xlim = c(-1.5, 1.5))
mtext("No Selection", side=3)
text(rda.2, dis="bp")

plot(rda.2,  display = c("sp", "cn"), main = "RDA", ylim =c(-.5,.5), xlim = c(-.5, .5))
mtext("No Selection - zoomed", side=3)
text(rda.2, dis="bp")

# Anova
anova(rda.2, by = "margin")
## Permutation test for rda under reduced model
## Marginal effects of terms
## Permutation: free
## Number of permutations: 999
## 
## Model: rda(formula = mod.pol.sqrt ~ masl + aspect + herbivory_pressure + agropastoral_use_binary + trampling + land_open + Soil_type + Sea_distance_meters + rainfall_10_yrs + temp_10_yrs + Slope_percent + salty_soil + flooded_soil + direct_human_pressure_bi + Nitrophily_index_relevés + EFFIS_Fire_10km_10yrs + Crop_index + Dist_ag_km + gHM, data = env.data.cut)
##                          Df Variance      F Pr(>F)   
## masl                      1   0.9231 1.1499  0.294   
## aspect                    1   0.5666 0.7057  0.822   
## herbivory_pressure        1   1.4647 1.8246  0.037 * 
## agropastoral_use_binary   1   0.8712 1.0852  0.361   
## trampling                 1   1.3858 1.7263  0.040 * 
## land_open                 1   1.4243 1.7742  0.040 * 
## Soil_type                 1   1.5646 1.9490  0.012 * 
## Sea_distance_meters       1   1.1855 1.4768  0.095 . 
## rainfall_10_yrs           1   1.1971 1.4913  0.104   
## temp_10_yrs               1   1.1205 1.3959  0.135   
## Slope_percent             1   0.9249 1.1521  0.256   
## salty_soil                1   1.3385 1.6673  0.031 * 
## flooded_soil              1   1.9601 2.4417  0.003 **
## direct_human_pressure_bi  1   0.4635 0.5774  0.921   
## Nitrophily_index_relevés  1   0.7411 0.9231  0.536   
## EFFIS_Fire_10km_10yrs     1   0.9027 1.1245  0.313   
## Crop_index                1   0.9332 1.1625  0.272   
## Dist_ag_km                1   1.1332 1.4117  0.128   
## gHM                       1   1.1000 1.3702  0.131   
## Residual                 27  21.6747                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(rda.2)
## 
## Call:
## rda(formula = mod.pol.sqrt ~ masl + aspect + herbivory_pressure +      agropastoral_use_binary + trampling + land_open + Soil_type +      Sea_distance_meters + rainfall_10_yrs + temp_10_yrs + Slope_percent +      salty_soil + flooded_soil + direct_human_pressure_bi + Nitrophily_index_relevés +      EFFIS_Fire_10km_10yrs + Crop_index + Dist_ag_km + gHM, data = env.data.cut) 
## 
## Partitioning of variance:
##               Inertia Proportion
## Total           50.95     1.0000
## Constrained     29.27     0.5746
## Unconstrained   21.67     0.4254
## 
## Eigenvalues, and their contribution to the variance 
## 
## Importance of components:
##                         RDA1   RDA2    RDA3    RDA4    RDA5    RDA6    RDA7
## Eigenvalue            6.7569 5.5294 3.49604 2.16194 2.02025 1.87810 1.35649
## Proportion Explained  0.1326 0.1085 0.06862 0.04243 0.03965 0.03686 0.02662
## Cumulative Proportion 0.1326 0.2412 0.30977 0.35220 0.39186 0.42872 0.45534
##                          RDA8    RDA9   RDA10   RDA11   RDA12    RDA13    RDA14
## Eigenvalue            1.20061 1.02872 0.78268 0.66822 0.54229 0.457657 0.418010
## Proportion Explained  0.02357 0.02019 0.01536 0.01312 0.01064 0.008983 0.008205
## Cumulative Proportion 0.47891 0.49910 0.51446 0.52758 0.53822 0.547206 0.555410
##                          RDA15    RDA16    RDA17    RDA18    RDA19     PC1
## Eigenvalue            0.312767 0.225830 0.183862 0.155230 0.098733 3.93750
## Proportion Explained  0.006139 0.004433 0.003609 0.003047 0.001938 0.07728
## Cumulative Proportion 0.561549 0.565982 0.569591 0.572637 0.574575 0.65186
##                          PC2     PC3     PC4     PC5     PC6    PC7     PC8
## Eigenvalue            2.6288 2.08031 1.89635 1.54175 1.43620 1.3196 1.05192
## Proportion Explained  0.0516 0.04083 0.03722 0.03026 0.02819 0.0259 0.02065
## Cumulative Proportion 0.7035 0.74429 0.78151 0.81177 0.83996 0.8659 0.88651
##                           PC9    PC10    PC11   PC12     PC13     PC14     PC15
## Eigenvalue            0.80966 0.72066 0.66127 0.6012 0.493719 0.435896 0.404391
## Proportion Explained  0.01589 0.01414 0.01298 0.0118 0.009691 0.008556 0.007937
## Cumulative Proportion 0.90240 0.91654 0.92952 0.9413 0.951012 0.959568 0.967505
##                           PC16     PC17     PC18     PC19     PC20    PC21
## Eigenvalue            0.324795 0.272137 0.228866 0.164027 0.149336 0.11516
## Proportion Explained  0.006375 0.005341 0.004492 0.003219 0.002931 0.00226
## Cumulative Proportion 0.973880 0.979221 0.983714 0.986933 0.989864 0.99212
##                           PC22     PC23     PC24     PC25      PC26      PC27
## Eigenvalue            0.104000 0.098368 0.073961 0.056390 0.0438603 0.0246680
## Proportion Explained  0.002041 0.001931 0.001452 0.001107 0.0008609 0.0004842
## Cumulative Proportion 0.994166 0.996096 0.997548 0.998655 0.9995158 1.0000000
## 
## Accumulated constrained eigenvalues
## Importance of components:
##                         RDA1   RDA2   RDA3    RDA4    RDA5    RDA6    RDA7
## Eigenvalue            6.7569 5.5294 3.4960 2.16194 2.02025 1.87810 1.35649
## Proportion Explained  0.2308 0.1889 0.1194 0.07385 0.06901 0.06416 0.04634
## Cumulative Proportion 0.2308 0.4197 0.5391 0.61298 0.68199 0.74615 0.79249
##                          RDA8    RDA9   RDA10   RDA11   RDA12   RDA13   RDA14
## Eigenvalue            1.20061 1.02872 0.78268 0.66822 0.54229 0.45766 0.41801
## Proportion Explained  0.04101 0.03514 0.02674 0.02283 0.01852 0.01563 0.01428
## Cumulative Proportion 0.83350 0.86864 0.89538 0.91821 0.93673 0.95237 0.96665
##                         RDA15    RDA16    RDA17    RDA18    RDA19
## Eigenvalue            0.31277 0.225830 0.183862 0.155230 0.098733
## Proportion Explained  0.01068 0.007714 0.006281 0.005303 0.003373
## Cumulative Proportion 0.97733 0.985044 0.991325 0.996627 1.000000
## 
## Scaling 2 for species and site scores
## * Species are scaled proportional to eigenvalues
## * Sites are unscaled: weighted dispersion equal on all dimensions
## * General scaling constant of scores:  6.957806 
## 
## 
## Species scores
## 
##                              RDA1       RDA2       RDA3      RDA4      RDA5
## Quercus                 -1.344420  1.2643863  0.0214363 -0.283826  0.062850
## Olea                    -0.727439 -0.6459288 -0.7306117 -0.096372 -0.415130
## Corylus                 -0.203860  0.0597363  0.0007717 -0.162750 -0.028184
## Buxus                   -0.043142  0.3771044  0.6290913  0.549604  0.158579
## Pinus                   -0.928828 -0.7704397  0.3385198 -0.389298 -0.099267
## Betula                  -0.029566 -0.1193019  0.0463787  0.049305  0.072215
## Alnus                    0.045832 -0.0358244  0.1025487  0.051366 -0.049278
## Juniperus-t             -0.190404 -0.7795575  0.9258606  0.315721  0.168576
## Ceratonia siliqua       -0.083424 -0.1020697  0.0725295  0.038453  0.026477
## Hypericum               -0.261661  0.4811955  0.2247383  0.182299 -0.040392
## Myrtus                   0.152265 -0.0107884 -0.0609324 -0.194766  0.392441
## Sorbus group            -0.048965  0.1452562  0.0383545  0.005331  0.004724
## Ephedra fragilis-t       0.022005  0.0081053  0.0663978 -0.013097 -0.087246
## Genista-t               -0.139963 -0.0691396 -0.0207254  0.012178 -0.002411
## Phyllirea               -0.024655 -0.1477456  0.0823726 -0.128893  0.116397
## Pistacia                -0.538900 -1.0021691 -0.2782707  0.307328 -0.233541
## Erica                   -0.177606 -0.4397931  0.3131448 -0.401728  0.640523
## Cistaceae undiff        -0.068527 -0.2442611 -0.0028153 -0.175418  0.359193
## Poaceae undiff           0.589841 -0.0859185 -0.3091856 -0.265247  0.158670
## Hordeum-t                0.131630  0.0238810 -0.3108114  0.146681  0.185941
## Cerealia-t               0.130665 -0.0733177 -0.3474821  0.304111  0.120758
## Plantago lanceolata-t    0.024385 -0.2525894 -0.1043992  0.522910  0.131954
## Plantago albicans        0.085441 -0.0627482 -0.0898538  0.200516  0.044830
## Plantago sp              0.265727 -0.0943794  0.0842936  0.046766  0.112788
## Asphodelus group         0.040060 -0.0011399 -0.1193279 -0.007368  0.080849
## Polygonum                0.151580  0.0008972 -0.0419177  0.027466  0.015925
## Brassicaceae undiff      0.171803 -0.0600666 -0.0701023  0.090879  0.098442
## Caryophyllaceae undiff  -0.016267 -0.0651463 -0.0227074 -0.043418  0.040895
## Euphorbia                0.159422  0.1217491 -0.2247056  0.031429  0.016472
## Asteroideae              0.235131  0.0290074 -0.2365417 -0.040603  0.120972
## Cichorieae               0.707089 -0.0912559 -0.2973547  0.026572  0.254424
## Centaurea                0.043311  0.0389310 -0.0170178  0.026857 -0.012449
## Matricaria-t             0.151466  0.1088566 -0.1412324  0.208135 -0.039945
## Artemisia               -0.033560 -0.0255855  0.1187422  0.117039 -0.081557
## Senecio-t                0.016141 -0.1438814  0.1143729 -0.113493  0.016964
## Asteraceae               0.242029  0.0842220 -0.2257914  0.016550  0.111001
## Apiaceae undiff          0.190999 -0.1189310 -0.0300236 -0.248895  0.349785
## Teucrium                -0.053076 -0.1390514  0.0243326 -0.061590  0.041574
## Lamiaceae undiff        -0.029332 -0.0892919  0.1103202  0.087089  0.088499
## Dipsacaceae undiff       0.137334 -0.0706790  0.0755478 -0.058982 -0.077732
## Medicago                -0.023904 -0.0716171 -0.0480357  0.007097 -0.008616
## Fabaceae undiff         -0.098467 -0.1022639  0.0103009  0.030735 -0.018318
## Ranunculaceae undiff     0.005612  0.1074386  0.1428207  0.113342 -0.118194
## Scrophulariaceae undiff  0.414951  0.1241707  0.3701121 -0.028300 -0.374763
## Liliaceae undiff        -0.026170 -0.1372871 -0.0306048 -0.122116 -0.022948
## Parietaria               0.192357  0.0490440 -0.1487872 -0.005180  0.034125
## Rosaceae                -0.142862  0.0751316  0.1126715  0.050484  0.080359
## Sedum-t                 -0.019205 -0.0521786  0.0663304 -0.031319  0.116044
## Saxifraga                0.020648  0.0133850  0.0144982  0.012338 -0.001560
## Globularia cf alypum    -0.068790 -0.1949006  0.0682734 -0.220532  0.088442
## Rubiaceae undiff         0.052488 -0.0210458  0.0117010 -0.019860 -0.069460
## Plantago coronopus-t     0.530062 -0.0091495  0.3968099 -0.185132 -0.390808
## Chenopodiaceae           0.939957 -0.0850269  0.3508101 -0.466856 -0.431611
## Cyperaceae undiff        0.192439  0.0090904  0.0278969 -0.081580 -0.093779
##                              RDA6
## Quercus                 -0.446088
## Olea                     0.455765
## Corylus                  0.119069
## Buxus                    0.666183
## Pinus                    0.107053
## Betula                   0.083257
## Alnus                    0.055805
## Juniperus-t             -0.359060
## Ceratonia siliqua       -0.146489
## Hypericum                0.432782
## Myrtus                   0.036958
## Sorbus group             0.063143
## Ephedra fragilis-t      -0.036046
## Genista-t                0.037791
## Phyllirea                0.107183
## Pistacia                -0.244012
## Erica                    0.215371
## Cistaceae undiff        -0.238902
## Poaceae undiff          -0.003224
## Hordeum-t                0.051326
## Cerealia-t              -0.059546
## Plantago lanceolata-t   -0.224675
## Plantago albicans       -0.125363
## Plantago sp             -0.045288
## Asphodelus group         0.055677
## Polygonum               -0.025405
## Brassicaceae undiff      0.030626
## Caryophyllaceae undiff   0.067068
## Euphorbia               -0.061908
## Asteroideae              0.062852
## Cichorieae              -0.148285
## Centaurea               -0.061357
## Matricaria-t             0.040082
## Artemisia                0.097530
## Senecio-t               -0.094114
## Asteraceae               0.102157
## Apiaceae undiff          0.054692
## Teucrium                 0.057038
## Lamiaceae undiff        -0.021450
## Dipsacaceae undiff       0.001676
## Medicago                 0.084736
## Fabaceae undiff          0.077271
## Ranunculaceae undiff     0.119862
## Scrophulariaceae undiff -0.082618
## Liliaceae undiff         0.098274
## Parietaria               0.039764
## Rosaceae                -0.016671
## Sedum-t                  0.126330
## Saxifraga                0.086373
## Globularia cf alypum     0.283650
## Rubiaceae undiff         0.003190
## Plantago coronopus-t    -0.248603
## Chenopodiaceae           0.108623
## Cyperaceae undiff       -0.022065
## 
## 
## Site scores (weighted sums of species scores)
## 
##               RDA1     RDA2     RDA3     RDA4     RDA5     RDA6
## ME-38-HOF -1.35756  2.31958 -0.62785 -0.55451  0.16213 -1.16146
## MA-25-HOF -1.36177  1.31465 -0.54445 -1.29488 -0.03831 -0.71161
## MA-14-HOF -0.97236  2.35347 -0.09197  0.09711 -0.04877 -1.74749
## MA-9-HOF  -1.36167  2.37049  0.01647 -0.83216  0.13003 -1.73816
## MA-26-HOF -1.28658  0.95886 -0.44923 -1.62741 -1.00930 -0.69831
## E-65-HOF  -0.92720 -0.97051  0.80344  1.43237  0.04994 -2.37843
## MA-23-MAQ -0.53906 -0.82157 -2.10905  1.03074 -2.04325  1.38874
## MA-24-MAQ -0.38481 -0.90473 -1.76092 -0.03679 -0.74366  0.88219
## MA-17-MAQ -0.71707 -0.55830 -1.11146 -0.41905 -1.69120  1.15442
## ME-32-MAQ  1.25510  0.19507 -0.72991 -1.75959  4.24920  0.58305
## MA-19-MAQ -0.64966 -1.29352 -1.71124  1.14708 -1.41389  0.13379
## MA-22-MAQ -1.09924 -0.63847 -0.80298  0.15892  0.14725  0.51390
## MA-12-MAQ -0.87922 -0.14144 -0.06377  0.26322  0.41732 -0.23219
## E-64-MAQ  -1.07445 -1.29968  0.71561 -0.67562  0.48372 -0.57473
## F-69-MAQ   0.45699 -0.58905  1.60996  1.14927  0.50459 -1.73259
## E-60-MAQ  -1.10162 -1.31311  0.34372 -0.22399 -0.56939 -0.68700
## MA-8-MAQ  -0.09796 -0.55047 -0.11506 -1.29369 -0.89279  0.15907
## MA-27-MAQ -0.69253 -1.65837  1.75839 -0.79375  0.68285 -0.55935
## F-67-MAQ  -0.65233 -2.06231  1.92013  0.85645 -0.12191 -0.97266
## MA-21-GAR -0.44288 -1.02240 -0.61883 -1.98544 -0.22825  1.57801
## E-63-GAR  -0.55901 -0.48321  0.15994  0.02375  1.23879 -1.68416
## MA-20-GAR -0.13858 -0.75486 -0.87901 -0.05623 -0.79097  0.80699
## F-66-GAR   0.53931 -0.52933  0.90393 -0.05057  0.30735 -0.63809
## E-62-GAR  -0.50661 -1.46905  1.91135 -0.08061  1.84307 -0.60242
## MA-4-GAR  -0.19288 -1.04415 -0.89236 -0.12334  0.34292 -0.01696
## MA-10-BUX -1.08076  2.25146 -0.52950 -0.32282 -0.44929 -0.11220
## MA-44-BUX -0.67862  1.31734  1.40486  1.19444  0.38419  2.07500
## MA-43-BUX -0.73514  1.25799  1.33048  0.53401 -0.95531  1.21660
## MA-42-BUX  0.44430  1.40397  2.15874  3.23657  0.45702  3.03293
## CA-51-BUX -0.43761 -1.30767  0.01956 -1.09876  0.77780  1.79079
## CA-50-BUX  0.04522 -1.04017  0.96910 -0.96775  1.90559  1.66392
## MA-3-DUN   2.08380  0.66672  0.37916 -0.27892 -1.64413 -0.29252
## MA-11-SPH -0.41579  1.57995  0.65806  0.37004  0.62482  2.11076
## ME-30-SPH  0.31564 -0.22877 -1.57033  0.07588 -0.43925  1.01055
## ME-39-ANT  0.30522 -0.02474 -1.94256  0.96044 -1.38073  1.73372
## MA-18-ANT  2.24634  0.29001 -1.32105  0.40796  1.02611  0.03656
## MA-13-ANT -0.76076  1.16622 -0.79479 -0.32816  0.18486 -0.28641
## ME-37-ANT  1.68313  0.12289 -1.67435  0.75419  1.42894 -0.14910
## F-68-ANT   1.41603  0.40016 -1.40958  3.17491  1.09304 -1.06792
## MA-15-ANT -0.30955  0.67166 -0.53000 -0.26862  0.01990  0.48668
## MA-2-CSV   2.23330  0.81315  1.06320 -0.54875 -1.93820 -0.76055
## MA-1-CSV   2.66274  0.84869  1.04313 -0.83386 -1.96506  0.41998
## ME-29-CSV  1.08562  0.51063  1.23333  0.13462 -0.82663 -1.46206
## ME-33-HFV  1.55963 -0.16196  1.30680 -1.46666 -1.20502 -0.89114
## ME-41-HFV  1.70114 -0.13490  0.38184 -1.76395  0.59406  0.43282
## MA-7-HFV   1.66483 -0.15773 -1.41915  1.31026  0.65822 -0.67483
## E-61-HFV  -0.28508 -1.65248  1.60820  1.37366  0.68160 -1.37816
## 
## 
## Site constraints (linear combinations of constraining variables)
## 
##               RDA1     RDA2     RDA3     RDA4     RDA5      RDA6
## ME-38-HOF -0.81762  0.73629 -0.28430 -0.51377  0.82621 -0.966249
## MA-25-HOF -1.57282  0.51460 -0.20505 -0.76569  0.33995 -1.028087
## MA-14-HOF -1.83949  2.30994 -0.30957 -0.69660 -0.23546 -1.464364
## MA-9-HOF  -0.79399  2.00897  0.33086 -0.26481  0.38858 -0.708178
## MA-26-HOF -0.86133  0.28525 -0.32086 -1.75652 -0.91095 -0.641664
## E-65-HOF   0.39815 -0.49198 -0.27985  2.40294 -0.06684 -0.330251
## MA-23-MAQ -1.18658 -0.65037 -1.93078  0.67543 -1.16971  0.373190
## MA-24-MAQ -0.22723 -0.70300 -1.24330 -0.45614 -0.76980  1.465478
## MA-17-MAQ -0.29909 -0.98620  0.59012  0.01936 -0.27635  0.265438
## ME-32-MAQ  1.25510  0.19507 -0.72991 -1.75959  4.24920  0.583052
## MA-19-MAQ -0.77739 -1.48812 -1.02639  0.81299 -1.12160 -0.014002
## MA-22-MAQ -0.31970 -0.36846 -1.07496  0.60326 -0.23137  0.630884
## MA-12-MAQ -0.96613  0.87539 -0.23266  0.03998  0.16450  0.577345
## E-64-MAQ  -0.92711 -1.25403  1.10160 -0.28149  0.48879 -0.618886
## F-69-MAQ  -0.39271 -1.03348  0.71219  0.72787  0.23791 -0.988386
## E-60-MAQ  -0.94189 -1.00954  0.59993  0.19809 -0.71084 -1.480126
## MA-8-MAQ  -0.30350 -0.06020 -0.15194 -1.53672 -0.12111 -0.078336
## MA-27-MAQ -0.78846 -0.73209  0.30068 -1.19982 -0.52248 -0.174826
## F-67-MAQ  -0.55225 -1.09097  0.52230  0.19270 -0.25088  0.191790
## MA-21-GAR -0.72012 -0.84902  0.25638 -1.60164 -1.05943  1.282582
## E-63-GAR  -0.12502 -0.19572  0.10023  0.19145  1.19974 -1.158863
## MA-20-GAR  0.06861 -1.07832 -0.39140 -0.05399 -0.55282  0.926543
## F-66-GAR   0.19422 -1.09380  0.48957  0.50461  0.75547 -0.120904
## E-62-GAR  -0.56202 -1.11327  1.29921  0.09968  0.55828 -0.898113
## MA-4-GAR   0.18403 -0.11415 -0.57886  0.27397  0.51403 -1.308661
## MA-10-BUX -0.64348  1.92234 -0.36284 -0.39066  0.17195 -0.238401
## MA-44-BUX -0.20825  1.29254  1.02783  0.70088  0.45533  2.187554
## MA-43-BUX -1.42791  1.65144  0.92481  1.16430 -0.90108  1.397819
## MA-42-BUX  0.27087  0.90744  2.88080  2.79759  0.58122  1.955560
## CA-51-BUX -0.22064 -1.25381  0.81161 -0.77298  1.46663  1.565090
## CA-50-BUX -0.18360 -1.13871  0.02684 -1.30047  1.11514  1.896259
## MA-3-DUN   1.56398 -0.39761 -0.15455  0.02548 -1.24841  0.749893
## MA-11-SPH -0.59010  1.77348  0.72185  0.32026  0.35399  1.502806
## ME-30-SPH  0.87658  0.49983 -1.33879  0.55875 -1.14557  0.498965
## ME-39-ANT  0.40020 -0.86630 -1.64653  0.22154 -0.84275  0.811998
## MA-18-ANT  1.86756  0.73484 -1.98552  0.19253  0.57098  0.509187
## MA-13-ANT -0.32002  0.95375 -1.31564  0.62138 -0.63711 -0.272787
## ME-37-ANT  1.24246  0.73478 -1.20193  0.88553  1.41036 -0.724699
## F-68-ANT   0.69693 -0.59989 -1.07731  2.51557  0.49741 -1.044600
## MA-15-ANT -0.52825  0.42214 -0.85496 -0.37777  0.10029  0.138721
## MA-2-CSV   2.48270  0.79567  1.22635 -0.39914 -1.33608 -1.176229
## MA-1-CSV   2.13075  0.60298  1.05066 -0.95385 -1.43741 -0.003536
## ME-29-CSV  1.32707  1.10955  1.36481 -0.68227 -1.64581 -1.153702
## ME-33-HFV  1.50386 -0.65882  1.01642 -1.01887 -0.69886 -0.185211
## ME-41-HFV  1.79804  0.02623  0.37006 -1.42448 -0.22269  0.257723
## MA-7-HFV   1.17990  0.20988 -0.80817  0.75420  0.92876 -1.198900
## E-61-HFV  -0.34432 -1.33453  1.78097  0.70691  0.74068 -1.789919
## 
## 
## Biplot scores for constraining variables
## 
##                              RDA1      RDA2     RDA3       RDA4     RDA5
## masl                     -0.43939  0.649538  0.18138  0.2852311  0.03845
## aspect                   -0.17069 -0.075268  0.08797 -0.2065774  0.26750
## herbivory_pressure        0.05901 -0.060246 -0.29339 -0.0789728 -0.24592
## agropastoral_use_binary   0.23500  0.027379 -0.54208  0.2155152  0.02720
## trampling                -0.07522  0.218527  0.22672 -0.1347781 -0.33773
## land_open                 0.74084  0.065675  0.06295  0.3148256  0.10665
## Soil_type                 0.18234  0.028340 -0.10604 -0.2556279  0.61731
## Sea_distance_meters      -0.24146  0.466355 -0.29719  0.0006233  0.16751
## rainfall_10_yrs          -0.30946  0.746666 -0.14214 -0.0607695 -0.13504
## temp_10_yrs               0.33229 -0.749715 -0.18873 -0.2142598 -0.02414
## Slope_percent            -0.42318  0.402122  0.08885  0.0562811 -0.06747
## salty_soil                0.56986  0.177491  0.14608 -0.0986607 -0.46326
## flooded_soil              0.62842  0.127529  0.34189 -0.3045159 -0.36314
## direct_human_pressure_bi -0.06530  0.534580 -0.38099  0.2422492 -0.08361
## Nitrophily_index_relevés  0.46676 -0.005931 -0.32408  0.1443164  0.19554
## EFFIS_Fire_10km_10yrs    -0.20531  0.465031  0.37091  0.1903263 -0.28580
## Crop_index                0.11501 -0.078715 -0.20342  0.0092121  0.01894
## Dist_ag_km               -0.06437 -0.170467  0.12963 -0.2033727  0.27230
## gHM                       0.29164 -0.159010 -0.13705 -0.0081939 -0.15757
##                               RDA6
## masl                      0.403683
## aspect                   -0.098143
## herbivory_pressure        0.178285
## agropastoral_use_binary  -0.085347
## trampling                 0.032560
## land_open                 0.195010
## Soil_type                 0.084704
## Sea_distance_meters       0.005612
## rainfall_10_yrs           0.019881
## temp_10_yrs              -0.192240
## Slope_percent             0.360555
## salty_soil               -0.073746
## flooded_soil             -0.153730
## direct_human_pressure_bi  0.184516
## Nitrophily_index_relevés -0.218027
## EFFIS_Fire_10km_10yrs     0.241087
## Crop_index               -0.253065
## Dist_ag_km                0.444083
## gHM                      -0.007760

6.3 Run RDA with selection of most important variables using a permutation tests on environmental variables

# Stepwise forward/backward method for variable selection
rda.selected.fb = step(rda.2, scope = list(lower = formula(stp), upper = formula(rda.2)), trace = 0, test = "perm")
colnames(env.data.cut)
##  [1] "masl"                     "aspect"                  
##  [3] "herbivory_pressure"       "agropastoral_use_binary" 
##  [5] "trampling"                "land_open"               
##  [7] "Soil_type"                "Sea_distance_meters"     
##  [9] "rainfall_10_yrs"          "temp_10_yrs"             
## [11] "Slope_percent"            "salty_soil"              
## [13] "flooded_soil"             "direct_human_pressure_bi"
## [15] "Nitrophily_index_relevés" "EFFIS_Fire_10km_10yrs"   
## [17] "Crop_index"               "Dist_ag_km"              
## [19] "gHM"
# create clusters and centroids for plotting
rda.fb.scores.sqrt = as.data.frame(scores(rda.selected.fb, choices = c(1,2), display = c("sp", "sites"),
                                                    scaling = "sites")[2])
rda.fb.scores.sqrt$Cluster_SqrtTrans = mod.pol$Cluster_SqrtTrans 
rda.fb.centroids.sqrt  = rda.fb.scores.sqrt %>%  dplyr::group_by(Cluster_SqrtTrans) %>% 
  dplyr::summarise(
    RDA1.mean = mean(sites.RDA1),
    RDA2.mean = mean(sites.RDA2))

# Transform data for use with ggplot (for publication)
# env variables and taxa
RDAspecies <- scores(rda.selected.fb, display = "species", scaling = "species") %>% as.data.frame() %>% rownames_to_column("species") 
clusters = rda.fb.scores.sqrt %>% rownames_to_column("Site_Code_Veg") %>% full_join(mod.pol.sites, by = "Site_Code_Veg")
clusters = as.data.frame(clusters[,c(1,4)])
RDAsites <- scores(rda.selected.fb, display = "sites", scaling = "sites") %>% as.data.frame() %>% rownames_to_column("Site_Code_Veg") %>% full_join(mod.pol.sites, by = "Site_Code_Veg")  %>%  full_join(clusters, by = "Site_Code_Veg")

RDAenv <- scores(rda.selected.fb, display = "bp", scaling = "species") %>% as.data.frame()
RDAenv.herb = RDAenv[1,]
RDAenv.agro = RDAenv[2,]
RDAenv.tramp = RDAenv[3,]
RDAenv.landopen = RDAenv[4,]
RDAenv.soiltype = RDAenv[5,]
RDAenv.rainfall = RDAenv[6,]
RDAenv.temp = RDAenv[7,]
RDAenv.slope = RDAenv[8,]
RDAenv.salty = RDAenv[9,]
RDAenv.flooded = RDAenv[10,]
RDAenv.fire = RDAenv[11,]
RDAenv.distag = RDAenv[12,]
RDAenv.gHM = RDAenv[13,]

# Sites
ggplot() +
  geom_point(data = RDAsites, aes(x = RDA1, y = RDA2, color = as.factor(Cluster_SqrtTrans), shape = Veg_Types_Code ), size = 2.5) +
  geom_text_repel(data = RDAsites,aes(x = RDA1, y = RDA2,label = Site_Code, segment.alpha = 0.5, segment.size = 0.3), max.overlaps = 10,box.padding = unit(0.45, "lines"), size = 2.8) + 
  geom_point(data = rda.fb.centroids.sqrt, aes(x = RDA1.mean, y = RDA2.mean, color = as.factor(Cluster_SqrtTrans)), shape = 19, size = 3) +
  scale_shape_manual(values = c(0,1,2,3,4,5,6,7,8)) +
  scale_color_manual(values = c("#D58B60", "#548F00", "#4A7381")) +
  geom_text(data = RDAenv.herb, aes(x = RDA1, y = RDA2), label = "Herbivory pressure", nudge_x = -0.2, nudge_y = 0.0, size =4) +
  geom_text(data = RDAenv.agro, aes(x = RDA1, y = RDA2), label = "Agropastoral use", nudge_x = -0.09, nudge_y = 0.029, size =4) +
  geom_text(data = RDAenv.tramp, aes(x = RDA1, y = RDA2), label = "Trampling", nudge_x = 0.1, nudge_y = 0.04, size =4) +
  geom_text(data = RDAenv.landopen, aes(x = RDA1, y = RDA2), label = "Landscape openness", nudge_x = 0.2,  nudge_y = -0.04, size =4) +
  geom_text(data = RDAenv.soiltype, aes(x = RDA1, y = RDA2), label = "Soil type", nudge_x =0.09, nudge_y = -0.031, size =4) +
  geom_text(data = RDAenv.rainfall, aes(x = RDA1, y = RDA2), label = "Mean decadal rainfall", nudge_x =-0.01, nudge_y = 0.08, size =4) +
  geom_text(data = RDAenv.temp, aes(x = RDA1, y = RDA2), label = "Mean decadal temperature", nudge_x =0.005, nudge_y = -0.032, size =4) +
  geom_text(data = RDAenv.slope, aes(x = RDA1, y = RDA2), label = "Slope", nudge_x =0.02, nudge_y = 0.03, size =4) +
  geom_text(data = RDAenv.salty, aes(x = RDA1, y = RDA2), label = "Saline soil", nudge_x =-0.05, nudge_y = 0.05, size =4) +
  geom_text(data = RDAenv.flooded, aes(x = RDA1, y = RDA2), label = "Flooded Soil", nudge_x =0.13, nudge_y = 0.028, size =4) +
  geom_text(data = RDAenv.fire, aes(x = RDA1, y = RDA2), label = "Fire Disturbance", nudge_x =-0.19, nudge_y = 0.021, size =4) +
  geom_text(data = RDAenv.distag, aes(x = RDA1, y = RDA2), label = "Distance to agriculture", nudge_x =-0.23, nudge_y = 0.07, size =4) +
  geom_text(data = RDAenv.gHM, aes(x = RDA1, y = RDA2), label = "gHM", nudge_x =0.023, nudge_y = -0.023, size =4) +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
  geom_segment(data = RDAenv, aes(x = 0, y = 0, xend = RDA1, yend = RDA2), alpha = 0.5, arrow = arrow(length = unit(0.2, "cm"))) +
  theme_bw() + theme(axis.title = element_text(size=13), axis.text = element_text(size = 11),  legend.position = c(.895,.25),legend.background = element_rect(fill="white",size=0.25, linetype="solid", colour ="black")) +
  labs(x = "\nRDA1 (12.6%)",
       y = "RDA2 (10.0%)\n", color = "Cluster Centroid", shape = "Vegetation Type") + 
  lims(x = c(-0.7,1.2), y = c(-.9,.9))

 # Species
ggplot() +
  geom_point(data = RDAspecies, aes(x = RDA1, y = RDA2), color = 'grey50', shape = 1, size = 2.2) +
  geom_text_repel(data = RDAspecies,aes(x = RDA1, y = RDA2,label = species, segment.alpha = 0.5, segment.size = 0.3), max.overlaps = 10,box.padding = unit(0.45, "lines"), size = 2.8) +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
  geom_segment(data = RDAenv, aes(x = 0, y = 0, xend = RDA1, yend = RDA2), alpha = 0.5, arrow = arrow(length = unit(0.2, "cm"))) +
  theme_bw() + theme(axis.title = element_text(size=13), axis.text = element_text(size = 11)) +
  labs(x = "\nRDA1 (12.6%)",
       y = "RDA2 (10.0%)\n") + 
  lims(x = c(-1.362,1.25), y = c(-.89,1.37))

# Inset of species plot (to show obscured data points)
ggplot() +
  geom_point(data = RDAspecies, aes(x = RDA1, y = RDA2), color = 'grey50', shape = 1, size = 3) +
  geom_text_repel(data = RDAspecies,aes(x = RDA1, y = RDA2,label = species, segment.alpha = 0.5, segment.size = 0.3), max.overlaps = 10,box.padding = unit(0.45, "lines"), size = 2.5) +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
  theme_bw()  + theme(axis.title = element_text(size=13), axis.text = element_text(size = 11), plot.margin=unit(c(.6,.6,.2,.2),"cm")) +
  labs(x = "",y = "") + lims(x = c(-.1,.25), y = c(-.2,.2))