Import Adiantum quantitative ecological data
adDat <- read.table("QuantitativeDataAdiantum1.csv",sep=",",header=T,stringsAsFactors = F)
head(adDat,3)
## SitePatch SpeciesID Elevation PercentCanopyCover MicroSlope MicroAspect
## 1 1.1 P 231.00 85.44 30 110
## 2 1.2 P 224.59 88.82 31 102
## 3 2.1 P 384.00 88.82 5 211
## MacroSlope MacroAspect AvgLitterDepth AvgDepthO AvgDepthA AvgDepthB
## 1 27.0 102 4.0 3.75 13 NA
## 2 29.0 108 2.0 2.75 7 10.16
## 3 7.5 90 2.8 3.25 10 NA
## AvgSoilDepth pH Ompct P K Ca Mg Zn B Mn Cu Fe Al Na
## 1 27.000 4.80 3.965 2.40 36 154 25 1.55 0.20 10.3 0.2 24.9 343 9
## 2 27.300 4.89 6.240 2.05 60 406 74 8.20 0.25 49.8 0.2 27.6 125 87
## 3 5.625 6.10 8.606 1.10 34 2010 89 3.60 0.15 30.0 0.1 2.5 25 13
## S ExchAcid ECEC CaBaseSat MgBaseSat KBaseSat Pb Ni Cd Cr
## 1 20 7.06 1.07 9.47 2.56 1.14 NA NA NA NA
## 2 17 6.39 2.80 22.09 6.71 1.67 NA NA NA NA
## 3 6 2.71 10.88 73.96 5.46 0.64 NA NA NA NA
tail(adDat,3)
## SitePatch SpeciesID Elevation PercentCanopyCover MicroSlope MicroAspect
## 27 10.1 V 392.5824 66.20 5 0
## 28 10.2 A&V 434.6448 85.44 33 250
## 29 10.3 P 391.6680 83.62 18 273
## MacroSlope MacroAspect AvgLitterDepth AvgDepthO AvgDepthA AvgDepthB
## 27 6 0 1.750 2.3750 10.875000 5.500
## 28 25 257 2.625 2.8125 2.857143 1.125
## 29 20 270 2.100 3.8000 4.400000 11.700
## AvgSoilDepth pH Ompct P K Ca Mg Zn B Mn Cu Fe Al
## 27 38.100 7.04 7.150 1.75 52 358 1213 0.95 0.25 7.6 0.55 2.2 31
## 28 5.875 6.63 19.799 3.40 53 661 1282 0.75 0.30 7.6 0.10 3.2 4
## 29 20.000 6.69 8.515 3.55 61 1183 923 5.60 0.25 18.4 0.05 2.6 6
## Na S ExchAcid ECEC CaBaseSat MgBaseSat KBaseSat Pb Ni Cd Cr
## 27 8 4 0.00 12.03 14.88 84.01 1.11 0.05 0.95 0.05 0.05
## 28 11 7 3.32 14.12 18.95 61.24 0.78 0.30 3.00 0.05 0.10
## 29 8 7 0.77 13.76 40.70 52.92 1.08 0.05 5.95 0.25 0.10
# Clean up data set by changing NA values to 0 - justified for this data-set where the missing values are an absent soil layer and no test results for heavy metal presence from rich northern hardwood forest sites
# Transform NA values to 0
adDat[is.na(adDat)]<- 0
head(adDat,3)
## SitePatch SpeciesID Elevation PercentCanopyCover MicroSlope MicroAspect
## 1 1.1 P 231.00 85.44 30 110
## 2 1.2 P 224.59 88.82 31 102
## 3 2.1 P 384.00 88.82 5 211
## MacroSlope MacroAspect AvgLitterDepth AvgDepthO AvgDepthA AvgDepthB
## 1 27.0 102 4.0 3.75 13 0.00
## 2 29.0 108 2.0 2.75 7 10.16
## 3 7.5 90 2.8 3.25 10 0.00
## AvgSoilDepth pH Ompct P K Ca Mg Zn B Mn Cu Fe Al Na
## 1 27.000 4.80 3.965 2.40 36 154 25 1.55 0.20 10.3 0.2 24.9 343 9
## 2 27.300 4.89 6.240 2.05 60 406 74 8.20 0.25 49.8 0.2 27.6 125 87
## 3 5.625 6.10 8.606 1.10 34 2010 89 3.60 0.15 30.0 0.1 2.5 25 13
## S ExchAcid ECEC CaBaseSat MgBaseSat KBaseSat Pb Ni Cd Cr
## 1 20 7.06 1.07 9.47 2.56 1.14 0 0 0 0
## 2 17 6.39 2.80 22.09 6.71 1.67 0 0 0 0
## 3 6 2.71 10.88 73.96 5.46 0.64 0 0 0 0
dim(adDat)
## [1] 29 36
Part 1: Statisticlly analyze real data
Linear Regression of Ca:Mg ratio in soil
Logistic Regression of Depth A Soil Layer between the serpentine species A. aleuticum & A. viridimontanum
# subset out just the serpentine maidenhair data from adDat
serpDatA <- subset(adDat,SpeciesID=="A")
serpDatV <- subset(adDat,SpeciesID=="V")
# create new species id column to be used in place of letter species id for logistic regression, so that A. aleuticum species id=0 and A. viridimontanum species id=1
dim(serpDatA)
## [1] 6 36
dim(serpDatV)
## [1] 9 36
id <- rep("0",length=6)
serpDatA <- cbind(serpDatA,id)
id <- rep("1",length=9)
serpDatV <- cbind(serpDatV,id)
# combine both subsets into a single data frame serpDat
serpDat <- rbind(serpDatA,serpDatV)
# Test the difference in depth of the A soil layer between the two serpentine maidenhairs using logistic regression - the difference is non-significant, with a p-value of .13.
logReg(xVar=serpDat$AvgDepthA,yVar=serpDat$id)
## xVarEst pValue
## 0.8765369 0.1363388
logRegPlot(xVar=serpDat$AvgDepthA,yVar=serpDat$id)
## $x
## [1] 0.00000 0.10875 0.21750 0.32625 0.43500 0.54375 0.65250
## [8] 0.76125 0.87000 0.97875 1.08750 1.19625 1.30500 1.41375
## [15] 1.52250 1.63125 1.74000 1.84875 1.95750 2.06625 2.17500
## [22] 2.28375 2.39250 2.50125 2.61000 2.71875 2.82750 2.93625
## [29] 3.04500 3.15375 3.26250 3.37125 3.48000 3.58875 3.69750
## [36] 3.80625 3.91500 4.02375 4.13250 4.24125 4.35000 4.45875
## [43] 4.56750 4.67625 4.78500 4.89375 5.00250 5.11125 5.22000
## [50] 5.32875 5.43750 5.54625 5.65500 5.76375 5.87250 5.98125
## [57] 6.09000 6.19875 6.30750 6.41625 6.52500 6.63375 6.74250
## [64] 6.85125 6.96000 7.06875 7.17750 7.28625 7.39500 7.50375
## [71] 7.61250 7.72125 7.83000 7.93875 8.04750 8.15625 8.26500
## [78] 8.37375 8.48250 8.59125 8.70000 8.80875 8.91750 9.02625
## [85] 9.13500 9.24375 9.35250 9.46125 9.57000 9.67875 9.78750
## [92] 9.89625 10.00500 10.11375 10.22250 10.33125 10.44000 10.54875
## [99] 10.65750 10.76625 10.87500
##
## $y
## 1 2 3 4 5 6 7
## 0.1004828 0.1094327 0.1190743 0.1294420 0.1405682 0.1524834 0.1652143
## 8 9 10 11 12 13 14
## 0.1787839 0.1932101 0.2085048 0.2246731 0.2417123 0.2596110 0.2783486
## 15 16 17 18 19 20 21
## 0.2978944 0.3182076 0.3392366 0.3609197 0.3831850 0.4059514 0.4291293
## 22 23 24 25 26 27 28
## 0.4526223 0.4763283 0.5001415 0.5239540 0.5476580 0.5711479 0.5943215
## 29 30 31 32 33 34 35
## 0.6170824 0.6393413 0.6610170 0.6820379 0.7023422 0.7218787 0.7406065
## 36 37 38 39 40 41 42
## 0.7584951 0.7755239 0.7916819 0.8069662 0.8213822 0.8349417 0.8476628
## 43 44 45 46 47 48 49
## 0.8595684 0.8706855 0.8810443 0.8906775 0.8996195 0.9079057 0.9155720
## 50 51 52 53 54 55 56
## 0.9226546 0.9291890 0.9352100 0.9407518 0.9458470 0.9505271 0.9548220
## 57 58 59 60 61 62 63
## 0.9587602 0.9623687 0.9656727 0.9686961 0.9714610 0.9739883 0.9762973
## 64 65 66 67 68 69 70
## 0.9784058 0.9803306 0.9820869 0.9836890 0.9851500 0.9864819 0.9876959
## 71 72 73 74 75 76 77
## 0.9888021 0.9898098 0.9907277 0.9915637 0.9923248 0.9930178 0.9936486
## 78 79 80 81 82 83 84
## 0.9942227 0.9947452 0.9952207 0.9956534 0.9960470 0.9964051 0.9967309
## 85 86 87 88 89 90 91
## 0.9970273 0.9972968 0.9975420 0.9977650 0.9979678 0.9981522 0.9983199
## 92 93 94 95 96 97 98
## 0.9984724 0.9986111 0.9987373 0.9988519 0.9989562 0.9990510 0.9991372
## 99 100 101
## 0.9992156 0.9992869 0.9993517
Test variation in depth of the A layer between species groups using ANOVA
# run anova on the avg depth of the A layer of the soil
ANOV(xVar=adDat$SpeciesID,yVar=adDat$AvgDepthA)
## [1] 0.00480182
ANOVplot(xVar=adDat$SpeciesID,yVar=adDat$AvgDepthA)
## $stats
## [,1] [,2] [,3] [,4] [,5]
## [1,] 2.1 0.00 2.818182 2.900000 3.093000
## [2,] 2.1 0.00 2.837662 5.400000 3.214286
## [3,] 2.1 2.25 2.857143 7.142857 3.300000
## [4,] 2.1 3.75 3.250000 10.000000 3.750000
## [5,] 2.1 4.33 3.642857 13.660000 4.500000
##
## $n
## [1] 1 6 3 10 9
##
## $conf
## [,1] [,2] [,3] [,4] [,5]
## [1,] 2.1 -0.1688711 2.481003 4.844514 3.017857
## [2,] 2.1 4.6688711 3.233283 9.441201 3.582143
##
## $out
## [1] 2.250 10.875
##
## $group
## [1] 5 5
##
## $names
## [1] "3x" "A" "A&V" "P" "V"
Use the ranDat() function to generate mock data of the Ca and Mg levels in the soil with distributions based on the parameters of the original data.
library(MASS)
ranMg <- ranDat(adDat$Mg)
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## [1] 1.260928e-03 2.371506e-04 8.036322e-04 6.668858e-04 5.418385e-05
## [6] 7.126342e-04 3.299591e-04 9.121019e-04 6.239497e-04 8.420879e-05
## [11] 5.691360e-04 1.677425e-03 3.926563e-04 1.682350e-03 1.002015e-03
## [16] 1.802133e-03 6.649589e-04 9.460608e-04 4.876497e-04 3.152155e-04
## [21] 4.139849e-04 1.039426e-03 1.139841e-03 9.838013e-05 8.612064e-04
## [26] 7.894484e-04 1.501664e-05 1.722253e-03 1.484089e-03
ranCa <- ranDat(adDat$Ca)
## [1] 0.024579047 0.004265676 0.001409702 0.017402776 0.008202038
## [6] 0.023160241 0.002128543 0.011708163 0.010852953 0.007711171
## [11] 0.001812124 0.001871388 0.018419917 0.015318653 0.001203716
## [16] 0.009047172 0.010155178 0.003541300 0.011030990 0.005548603
## [21] 0.006934800 0.014240846 0.010197439 0.002054469 0.004348953
## [26] 0.011033198 0.032823453 0.005351959 0.010310612
# bind into a single data frame
randomData <- data.frame(ranCa,ranMg)
head(randomData)
## ranCa ranMg
## 1 0.024579047 1.260928e-03
## 2 0.004265676 2.371506e-04
## 3 0.001409702 8.036322e-04
## 4 0.017402776 6.668858e-04
## 5 0.008202038 5.418385e-05
## 6 0.023160241 7.126342e-04
# test the relationship between the randomized Ca and Mg concentrations using the linear regression function
linReg(xVar=randomData$ranCa,yVar=randomData$ranMg)
## slope pValue
## -0.003548415 0.789952405
linRegPlot(xVar=randomData$ranCa,yVar=randomData$ranMg)
## NULL
Test differences in the percent organic matter of the soil between the two species of serpentine maidenhair ferns with ANOVA using randomized data
# Subset out percent organic matter for just the serpentine maidenhairs
serpA <- subset(adDat,SpeciesID=="A")
serpV <- subset(adDat,SpeciesID=="V")
serpSpp <- rbind(serpA,serpV)
# use the fitdistr function to get parameters of percent organic matter in soil using gamma distribution
ranOM <- ranDat(serpSpp$Ompct,"gamma",n=15)
## [1] 0.4228538 0.5903263 0.4137830 0.8448777 0.2613784 0.4616184 0.6503301
## [8] 1.0533990 0.8661054 0.5026296 0.7312353 0.2410870 0.3086397 0.6132676
## [15] 0.9937831
# Create vector of species id's and assign to randomly generated ranOM values
ranSpID <- rbinom(15,size=1,prob=.5)
randomData1 <- data.frame(ranSpID,ranOM)
#Run ANOVA with randomly generated values - returns non-significant result
ANOV(xVar=randomData1$ranSpID,yVar=randomData1$ranOM)
## [1] 0.1103978
ANOVplot(xVar=randomData1$ranSpID,yVar=randomData1$ranOM)
## $stats
## [,1] [,2]
## [1,] 0.2410870 0.3086397
## [2,] 0.2613784 0.4228538
## [3,] 0.4616184 0.6607808
## [4,] 0.6132676 0.8661054
## [5,] 0.6503301 1.0533990
##
## $n
## [1] 5 10
##
## $conf
## [,1] [,2]
## [1,] 0.2129744 0.4393147
## [2,] 0.7102624 0.8822469
##
## $out
## numeric(0)
##
## $group
## numeric(0)
##
## $names
## [1] "0" "1"
Part 3: Statistical Analysis of Idealized (bling) data
Modeling a significant difference in soil percent organic matter between the two serpentine maidenhair ferns
# find parameters of the distribution of percent organic matter for each species
# aleuticum
fitOM.a <- fitdistr(serpA$Ompct,"gamma")
print(c(fitOM.a$estimate[1],fitOM.a$estimate[2]))
## shape rate
## 10.123988 0.373033
#viridimontanum
fitOM.v <- fitdistr(serpV$Ompct,"gamma")
print(c(fitOM.v$estimate[1],fitOM.v$estimate[2]))
## shape rate
## 2.9412538 0.1439252
# create random data that amplifies the existing difference
# aleuticum
datBlingA <- rgamma(6,shape=15,rate=.3)
sp <- rep("A",length=6)
datBlingA <- data.frame(cbind(datBlingA,sp))
# viridimontanum
datBlingV <- rgamma(9,shape=1.5,rate=.15)
sp <- rep("V",length=9)
datBlingV <- data.frame(cbind(datBlingV,sp))
# Trying to combine this data into a single dataframe for the ANOV() test, but I can't figure out how to coerce it to that form.