Import Adiantum quantitative ecological data

adDat <- read.table("QuantitativeDataAdiantum1.csv",sep=",",header=T,stringsAsFactors = F)
head(adDat,3)
##   SitePatch SpeciesID Elevation PercentCanopyCover MicroSlope MicroAspect
## 1       1.1         P    231.00              85.44         30         110
## 2       1.2         P    224.59              88.82         31         102
## 3       2.1         P    384.00              88.82          5         211
##   MacroSlope MacroAspect AvgLitterDepth AvgDepthO AvgDepthA AvgDepthB
## 1       27.0         102            4.0      3.75        13        NA
## 2       29.0         108            2.0      2.75         7     10.16
## 3        7.5          90            2.8      3.25        10        NA
##   AvgSoilDepth   pH Ompct    P  K   Ca Mg   Zn    B   Mn  Cu   Fe  Al Na
## 1       27.000 4.80 3.965 2.40 36  154 25 1.55 0.20 10.3 0.2 24.9 343  9
## 2       27.300 4.89 6.240 2.05 60  406 74 8.20 0.25 49.8 0.2 27.6 125 87
## 3        5.625 6.10 8.606 1.10 34 2010 89 3.60 0.15 30.0 0.1  2.5  25 13
##    S ExchAcid  ECEC CaBaseSat MgBaseSat KBaseSat Pb Ni Cd Cr
## 1 20     7.06  1.07      9.47      2.56     1.14 NA NA NA NA
## 2 17     6.39  2.80     22.09      6.71     1.67 NA NA NA NA
## 3  6     2.71 10.88     73.96      5.46     0.64 NA NA NA NA
tail(adDat,3)
##    SitePatch SpeciesID Elevation PercentCanopyCover MicroSlope MicroAspect
## 27      10.1         V  392.5824              66.20          5           0
## 28      10.2       A&V  434.6448              85.44         33         250
## 29      10.3         P  391.6680              83.62         18         273
##    MacroSlope MacroAspect AvgLitterDepth AvgDepthO AvgDepthA AvgDepthB
## 27          6           0          1.750    2.3750 10.875000     5.500
## 28         25         257          2.625    2.8125  2.857143     1.125
## 29         20         270          2.100    3.8000  4.400000    11.700
##    AvgSoilDepth   pH  Ompct    P  K   Ca   Mg   Zn    B   Mn   Cu  Fe Al
## 27       38.100 7.04  7.150 1.75 52  358 1213 0.95 0.25  7.6 0.55 2.2 31
## 28        5.875 6.63 19.799 3.40 53  661 1282 0.75 0.30  7.6 0.10 3.2  4
## 29       20.000 6.69  8.515 3.55 61 1183  923 5.60 0.25 18.4 0.05 2.6  6
##    Na S ExchAcid  ECEC CaBaseSat MgBaseSat KBaseSat   Pb   Ni   Cd   Cr
## 27  8 4     0.00 12.03     14.88     84.01     1.11 0.05 0.95 0.05 0.05
## 28 11 7     3.32 14.12     18.95     61.24     0.78 0.30 3.00 0.05 0.10
## 29  8 7     0.77 13.76     40.70     52.92     1.08 0.05 5.95 0.25 0.10
# Clean up data set by changing NA values to 0 - justified for this data-set where the missing values are an absent soil layer and no test results for heavy metal presence from rich northern hardwood forest sites
# Transform NA values to 0
adDat[is.na(adDat)]<- 0
head(adDat,3)
##   SitePatch SpeciesID Elevation PercentCanopyCover MicroSlope MicroAspect
## 1       1.1         P    231.00              85.44         30         110
## 2       1.2         P    224.59              88.82         31         102
## 3       2.1         P    384.00              88.82          5         211
##   MacroSlope MacroAspect AvgLitterDepth AvgDepthO AvgDepthA AvgDepthB
## 1       27.0         102            4.0      3.75        13      0.00
## 2       29.0         108            2.0      2.75         7     10.16
## 3        7.5          90            2.8      3.25        10      0.00
##   AvgSoilDepth   pH Ompct    P  K   Ca Mg   Zn    B   Mn  Cu   Fe  Al Na
## 1       27.000 4.80 3.965 2.40 36  154 25 1.55 0.20 10.3 0.2 24.9 343  9
## 2       27.300 4.89 6.240 2.05 60  406 74 8.20 0.25 49.8 0.2 27.6 125 87
## 3        5.625 6.10 8.606 1.10 34 2010 89 3.60 0.15 30.0 0.1  2.5  25 13
##    S ExchAcid  ECEC CaBaseSat MgBaseSat KBaseSat Pb Ni Cd Cr
## 1 20     7.06  1.07      9.47      2.56     1.14  0  0  0  0
## 2 17     6.39  2.80     22.09      6.71     1.67  0  0  0  0
## 3  6     2.71 10.88     73.96      5.46     0.64  0  0  0  0
dim(adDat)
## [1] 29 36

Part 1: Statisticlly analyze real data

Linear Regression of Ca:Mg ratio in soil

Logistic Regression of Depth A Soil Layer between the serpentine species A. aleuticum & A. viridimontanum

# subset out just the serpentine maidenhair data from adDat
serpDatA <- subset(adDat,SpeciesID=="A")
serpDatV <- subset(adDat,SpeciesID=="V")

# create new species id column to be used in place of letter species id for logistic regression, so that A. aleuticum species id=0 and A. viridimontanum species id=1
dim(serpDatA)
## [1]  6 36
dim(serpDatV)
## [1]  9 36
id <- rep("0",length=6)
serpDatA <- cbind(serpDatA,id)

id <- rep("1",length=9)
serpDatV <- cbind(serpDatV,id)

# combine both subsets into a single data frame serpDat
serpDat <- rbind(serpDatA,serpDatV)

# Test the difference in depth of the A soil layer between the two serpentine maidenhairs using logistic regression - the difference is non-significant, with a p-value of .13. 
logReg(xVar=serpDat$AvgDepthA,yVar=serpDat$id)
##   xVarEst    pValue 
## 0.8765369 0.1363388
logRegPlot(xVar=serpDat$AvgDepthA,yVar=serpDat$id)

## $x
##   [1]  0.00000  0.10875  0.21750  0.32625  0.43500  0.54375  0.65250
##   [8]  0.76125  0.87000  0.97875  1.08750  1.19625  1.30500  1.41375
##  [15]  1.52250  1.63125  1.74000  1.84875  1.95750  2.06625  2.17500
##  [22]  2.28375  2.39250  2.50125  2.61000  2.71875  2.82750  2.93625
##  [29]  3.04500  3.15375  3.26250  3.37125  3.48000  3.58875  3.69750
##  [36]  3.80625  3.91500  4.02375  4.13250  4.24125  4.35000  4.45875
##  [43]  4.56750  4.67625  4.78500  4.89375  5.00250  5.11125  5.22000
##  [50]  5.32875  5.43750  5.54625  5.65500  5.76375  5.87250  5.98125
##  [57]  6.09000  6.19875  6.30750  6.41625  6.52500  6.63375  6.74250
##  [64]  6.85125  6.96000  7.06875  7.17750  7.28625  7.39500  7.50375
##  [71]  7.61250  7.72125  7.83000  7.93875  8.04750  8.15625  8.26500
##  [78]  8.37375  8.48250  8.59125  8.70000  8.80875  8.91750  9.02625
##  [85]  9.13500  9.24375  9.35250  9.46125  9.57000  9.67875  9.78750
##  [92]  9.89625 10.00500 10.11375 10.22250 10.33125 10.44000 10.54875
##  [99] 10.65750 10.76625 10.87500
## 
## $y
##         1         2         3         4         5         6         7 
## 0.1004828 0.1094327 0.1190743 0.1294420 0.1405682 0.1524834 0.1652143 
##         8         9        10        11        12        13        14 
## 0.1787839 0.1932101 0.2085048 0.2246731 0.2417123 0.2596110 0.2783486 
##        15        16        17        18        19        20        21 
## 0.2978944 0.3182076 0.3392366 0.3609197 0.3831850 0.4059514 0.4291293 
##        22        23        24        25        26        27        28 
## 0.4526223 0.4763283 0.5001415 0.5239540 0.5476580 0.5711479 0.5943215 
##        29        30        31        32        33        34        35 
## 0.6170824 0.6393413 0.6610170 0.6820379 0.7023422 0.7218787 0.7406065 
##        36        37        38        39        40        41        42 
## 0.7584951 0.7755239 0.7916819 0.8069662 0.8213822 0.8349417 0.8476628 
##        43        44        45        46        47        48        49 
## 0.8595684 0.8706855 0.8810443 0.8906775 0.8996195 0.9079057 0.9155720 
##        50        51        52        53        54        55        56 
## 0.9226546 0.9291890 0.9352100 0.9407518 0.9458470 0.9505271 0.9548220 
##        57        58        59        60        61        62        63 
## 0.9587602 0.9623687 0.9656727 0.9686961 0.9714610 0.9739883 0.9762973 
##        64        65        66        67        68        69        70 
## 0.9784058 0.9803306 0.9820869 0.9836890 0.9851500 0.9864819 0.9876959 
##        71        72        73        74        75        76        77 
## 0.9888021 0.9898098 0.9907277 0.9915637 0.9923248 0.9930178 0.9936486 
##        78        79        80        81        82        83        84 
## 0.9942227 0.9947452 0.9952207 0.9956534 0.9960470 0.9964051 0.9967309 
##        85        86        87        88        89        90        91 
## 0.9970273 0.9972968 0.9975420 0.9977650 0.9979678 0.9981522 0.9983199 
##        92        93        94        95        96        97        98 
## 0.9984724 0.9986111 0.9987373 0.9988519 0.9989562 0.9990510 0.9991372 
##        99       100       101 
## 0.9992156 0.9992869 0.9993517

Test variation in depth of the A layer between species groups using ANOVA

# run anova on the avg depth of the A layer of the soil 
ANOV(xVar=adDat$SpeciesID,yVar=adDat$AvgDepthA)
## [1] 0.00480182
ANOVplot(xVar=adDat$SpeciesID,yVar=adDat$AvgDepthA)

## $stats
##      [,1] [,2]     [,3]      [,4]     [,5]
## [1,]  2.1 0.00 2.818182  2.900000 3.093000
## [2,]  2.1 0.00 2.837662  5.400000 3.214286
## [3,]  2.1 2.25 2.857143  7.142857 3.300000
## [4,]  2.1 3.75 3.250000 10.000000 3.750000
## [5,]  2.1 4.33 3.642857 13.660000 4.500000
## 
## $n
## [1]  1  6  3 10  9
## 
## $conf
##      [,1]       [,2]     [,3]     [,4]     [,5]
## [1,]  2.1 -0.1688711 2.481003 4.844514 3.017857
## [2,]  2.1  4.6688711 3.233283 9.441201 3.582143
## 
## $out
## [1]  2.250 10.875
## 
## $group
## [1] 5 5
## 
## $names
## [1] "3x"  "A"   "A&V" "P"   "V"

Part 2: Statistical Analysis of Randomized Data

Make a function ranDat that measures the parameters of a vector or a column of a dataframe using the fitdistr function from the MASS package and the rgamma function to simulate continuous random values over 0.

library(MASS)
ranDat <- function(var=rnorm(29,10,1),dist="gamma",n=29){
  fitvar =  fitdistr(var,densfun=dist)
  ranvar = rgamma(n,shape=fitvar$estimate[1],scale=fitvar$estimate[2])
                return(print(ranvar))}

ranDat()
##  [1]  875.3407  702.8842  839.0212  918.9406  827.9229  931.8063  864.0961
##  [8]  940.7076  791.5430 1010.0651  944.3954  985.7426  848.5883  982.5398
## [15]  918.4780  837.5311  825.6974  863.0000  932.1583  813.7643  961.5748
## [22]  890.6003  823.6628  940.6476  999.1107 1068.9957  855.0613  708.0798
## [29]  803.6266

Use the ranDat() function to generate mock data of the Ca and Mg levels in the soil with distributions based on the parameters of the original data.

library(MASS)
ranMg <- ranDat(adDat$Mg)
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
##  [1] 1.260928e-03 2.371506e-04 8.036322e-04 6.668858e-04 5.418385e-05
##  [6] 7.126342e-04 3.299591e-04 9.121019e-04 6.239497e-04 8.420879e-05
## [11] 5.691360e-04 1.677425e-03 3.926563e-04 1.682350e-03 1.002015e-03
## [16] 1.802133e-03 6.649589e-04 9.460608e-04 4.876497e-04 3.152155e-04
## [21] 4.139849e-04 1.039426e-03 1.139841e-03 9.838013e-05 8.612064e-04
## [26] 7.894484e-04 1.501664e-05 1.722253e-03 1.484089e-03
ranCa <- ranDat(adDat$Ca)
##  [1] 0.024579047 0.004265676 0.001409702 0.017402776 0.008202038
##  [6] 0.023160241 0.002128543 0.011708163 0.010852953 0.007711171
## [11] 0.001812124 0.001871388 0.018419917 0.015318653 0.001203716
## [16] 0.009047172 0.010155178 0.003541300 0.011030990 0.005548603
## [21] 0.006934800 0.014240846 0.010197439 0.002054469 0.004348953
## [26] 0.011033198 0.032823453 0.005351959 0.010310612
# bind into a single data frame
randomData <- data.frame(ranCa,ranMg)
head(randomData)
##         ranCa        ranMg
## 1 0.024579047 1.260928e-03
## 2 0.004265676 2.371506e-04
## 3 0.001409702 8.036322e-04
## 4 0.017402776 6.668858e-04
## 5 0.008202038 5.418385e-05
## 6 0.023160241 7.126342e-04
# test the relationship between the randomized Ca and Mg concentrations using the linear regression function
linReg(xVar=randomData$ranCa,yVar=randomData$ranMg)
##        slope       pValue 
## -0.003548415  0.789952405
linRegPlot(xVar=randomData$ranCa,yVar=randomData$ranMg)

## NULL

Test differences in the percent organic matter of the soil between the two species of serpentine maidenhair ferns with ANOVA using randomized data

# Subset out percent organic matter for just the serpentine maidenhairs
serpA <- subset(adDat,SpeciesID=="A")
serpV <- subset(adDat,SpeciesID=="V")

serpSpp <- rbind(serpA,serpV)

# use the fitdistr function to get parameters of percent organic matter in soil using gamma distribution
ranOM <- ranDat(serpSpp$Ompct,"gamma",n=15)
##  [1] 0.4228538 0.5903263 0.4137830 0.8448777 0.2613784 0.4616184 0.6503301
##  [8] 1.0533990 0.8661054 0.5026296 0.7312353 0.2410870 0.3086397 0.6132676
## [15] 0.9937831
# Create vector of species id's and assign to randomly generated ranOM values
ranSpID <- rbinom(15,size=1,prob=.5)
randomData1 <- data.frame(ranSpID,ranOM)

#Run ANOVA with randomly generated values - returns non-significant result
ANOV(xVar=randomData1$ranSpID,yVar=randomData1$ranOM)
## [1] 0.1103978
ANOVplot(xVar=randomData1$ranSpID,yVar=randomData1$ranOM)

## $stats
##           [,1]      [,2]
## [1,] 0.2410870 0.3086397
## [2,] 0.2613784 0.4228538
## [3,] 0.4616184 0.6607808
## [4,] 0.6132676 0.8661054
## [5,] 0.6503301 1.0533990
## 
## $n
## [1]  5 10
## 
## $conf
##           [,1]      [,2]
## [1,] 0.2129744 0.4393147
## [2,] 0.7102624 0.8822469
## 
## $out
## numeric(0)
## 
## $group
## numeric(0)
## 
## $names
## [1] "0" "1"

Part 3: Statistical Analysis of Idealized (bling) data

Modeling a significant difference in soil percent organic matter between the two serpentine maidenhair ferns

# find parameters of the distribution of percent organic matter for each species
# aleuticum
fitOM.a <- fitdistr(serpA$Ompct,"gamma")
print(c(fitOM.a$estimate[1],fitOM.a$estimate[2]))
##     shape      rate 
## 10.123988  0.373033
#viridimontanum
fitOM.v <- fitdistr(serpV$Ompct,"gamma")
print(c(fitOM.v$estimate[1],fitOM.v$estimate[2]))
##     shape      rate 
## 2.9412538 0.1439252
# create random data that amplifies the existing difference
# aleuticum
datBlingA <- rgamma(6,shape=15,rate=.3)
sp <- rep("A",length=6)
datBlingA <- data.frame(cbind(datBlingA,sp))

# viridimontanum
datBlingV <- rgamma(9,shape=1.5,rate=.15)
sp <- rep("V",length=9)
datBlingV <- data.frame(cbind(datBlingV,sp))

# Trying to combine this data into a single dataframe for the ANOV() test, but I can't figure out how to coerce it to that form.