Function: Linear Regression

Description: Fits a regression line to a scatterplot of x and y values

Input: continuous x & y variables

Output: abline & scatterplot

linReg <- function(xVar= 1:100,yVar=runif(100,min=0,max=100),
                   dataframe=data.frame(xVar,yVar)){
  linRegMod <- lm(yVar~xVar,data=dataframe)
  linRegOut <- c(slope=summary(linRegMod)$coefficients[2,1],
                 pValue=summary(linRegMod)$coefficients[2,4])
    return(linRegOut)}

linReg()
##        slope       pValue 
## -0.007030389  0.943965575

Function for plotting results of linear regression

linRegPlot <-  function(xVar=1:100,yVar=runif(100,min=0,max=100),dataframe=data.frame(xVar,yVar)){
  lrplot <- plot(y=dataframe$yVar,
                       x=dataframe$xVar,
                      cex=2, 
                      pch=21,
                      bg="purple",
                      main="Linear Regression",
                      xlab="X Variable", 
                      ylab="Y variable",
                      xlim=range(xVar),
                      ylim=range(yVar))
  linRegMod <- lm(yVar~xVar,data=dataframe)
  abline(linRegMod)
  return(lrplot)
}

linRegPlot()

## NULL

Testing linReg() & linRegPlot() with tiny data set

# make tiny data set
rdat <- data.frame(xVar=runif(10,min=0,max=20),yVar=runif(10,min=20,max=40))

# test linReg function with rdat
linReg(rdat)
##       slope      pValue 
## 0.219591998 0.001375061
# test linRegPlot function with rdat
linRegPlot(rdat)

## NULL

Function: Logistic Regression

Description:

Input: continuous x variable & categorical y variable

Output: estimate of xvar and p value of xvar

logReg <- function(xVar=rgamma(n=20,shape=5,scale=5), 
                   yVar=rbinom(n=20,size=1,p=0.5)){
  logRegMod <- glm(yVar~ xVar,
                   family=binomial(link="logit"))
  logRegOut <- c(xVarEst=summary(logRegMod)$coefficients[2,1],
                 pValue=summary(logRegMod)$coefficients[2,4])
   return(logRegOut)}

logReg()
##    xVarEst     pValue 
## -0.0141115  0.7664394

Function for plotting results of logistic regression analysis

logRegPlot <- function(xVar=rgamma(n=20,shape=5,scale=5), 
                       yVar=rbinom(n=20,size=1,p=0.5),dataframe = data.frame(xVar,yVar)){
lrResults <- glm(yVar ~ xVar, family="binomial"(link="logit"))
LRplot <- plot(dataframe$xVar, y=dataframe$yVar,
                   pch=21,
                   bg="tan",
                   cex=1,
                   main="Logistic Regression")
LRplot1 <- curve(predict(lrResults,
              data.frame(xVar=x),
              type="response"),
      add=TRUE,
      lwd=2)
return(LRplot1)}

logRegPlot()

## $x
##   [1]  6.624031  7.024256  7.424482  7.824708  8.224933  8.625159  9.025385
##   [8]  9.425610  9.825836 10.226062 10.626287 11.026513 11.426739 11.826964
##  [15] 12.227190 12.627415 13.027641 13.427867 13.828092 14.228318 14.628544
##  [22] 15.028769 15.428995 15.829221 16.229446 16.629672 17.029898 17.430123
##  [29] 17.830349 18.230575 18.630800 19.031026 19.431251 19.831477 20.231703
##  [36] 20.631928 21.032154 21.432380 21.832605 22.232831 22.633057 23.033282
##  [43] 23.433508 23.833734 24.233959 24.634185 25.034411 25.434636 25.834862
##  [50] 26.235087 26.635313 27.035539 27.435764 27.835990 28.236216 28.636441
##  [57] 29.036667 29.436893 29.837118 30.237344 30.637570 31.037795 31.438021
##  [64] 31.838247 32.238472 32.638698 33.038923 33.439149 33.839375 34.239600
##  [71] 34.639826 35.040052 35.440277 35.840503 36.240729 36.640954 37.041180
##  [78] 37.441406 37.841631 38.241857 38.642083 39.042308 39.442534 39.842759
##  [85] 40.242985 40.643211 41.043436 41.443662 41.843888 42.244113 42.644339
##  [92] 43.044565 43.444790 43.845016 44.245242 44.645467 45.045693 45.445919
##  [99] 45.846144 46.246370 46.646595
## 
## $y
##         1         2         3         4         5         6         7 
## 0.3525548 0.3564700 0.3604044 0.3643577 0.3683294 0.3723190 0.3763260 
##         8         9        10        11        12        13        14 
## 0.3803501 0.3843907 0.3884473 0.3925194 0.3966065 0.4007081 0.4048236 
##        15        16        17        18        19        20        21 
## 0.4089526 0.4130944 0.4172486 0.4214146 0.4255918 0.4297796 0.4339775 
##        22        23        24        25        26        27        28 
## 0.4381850 0.4424013 0.4466259 0.4508583 0.4550977 0.4593437 0.4635957 
##        29        30        31        32        33        34        35 
## 0.4678529 0.4721148 0.4763807 0.4806501 0.4849224 0.4891968 0.4934729 
##        36        37        38        39        40        41        42 
## 0.4977498 0.5020271 0.5063042 0.5105802 0.5148548 0.5191272 0.5233967 
##        43        44        45        46        47        48        49 
## 0.5276629 0.5319250 0.5361825 0.5404347 0.5446810 0.5489209 0.5531536 
##        50        51        52        53        54        55        56 
## 0.5573786 0.5615954 0.5658033 0.5700017 0.5741901 0.5783679 0.5825344 
##        57        58        59        60        61        62        63 
## 0.5866893 0.5908318 0.5949614 0.5990777 0.6031800 0.6072679 0.6113408 
##        64        65        66        67        68        69        70 
## 0.6153982 0.6194396 0.6234645 0.6274725 0.6314630 0.6354356 0.6393899 
##        71        72        73        74        75        76        77 
## 0.6433253 0.6472415 0.6511380 0.6550144 0.6588703 0.6627053 0.6665189 
##        78        79        80        81        82        83        84 
## 0.6703110 0.6740810 0.6778286 0.6815534 0.6852552 0.6889336 0.6925883 
##        85        86        87        88        89        90        91 
## 0.6962190 0.6998254 0.7034072 0.7069642 0.7104961 0.7140026 0.7174836 
##        92        93        94        95        96        97        98 
## 0.7209387 0.7243678 0.7277707 0.7311472 0.7344971 0.7378202 0.7411163 
##        99       100       101 
## 0.7443854 0.7476273 0.7508418

Test logReg and logRegPlot functions with tiny data set

# tiny data set
xVar <- rgamma(n=30,shape=4,scale=4)
yVar <- rbinom(n=30,size=1,p=0.5)
df <- data.frame(xVar,yVar)
 
# test logReg
logReg(xVar=xVar,yVar=yVar)
##    xVarEst     pValue 
## 0.01965795 0.68671718
# test logRegPlot
logRegPlot(xVar=xVar,yVar = yVar,dataframe=df)

## $x
##   [1]  3.432618  3.733125  4.033632  4.334138  4.634645  4.935152  5.235659
##   [8]  5.536166  5.836672  6.137179  6.437686  6.738193  7.038699  7.339206
##  [15]  7.639713  7.940220  8.240726  8.541233  8.841740  9.142247  9.442753
##  [22]  9.743260 10.043767 10.344274 10.644780 10.945287 11.245794 11.546301
##  [29] 11.846808 12.147314 12.447821 12.748328 13.048835 13.349341 13.649848
##  [36] 13.950355 14.250862 14.551368 14.851875 15.152382 15.452889 15.753395
##  [43] 16.053902 16.354409 16.654916 16.955422 17.255929 17.556436 17.856943
##  [50] 18.157450 18.457956 18.758463 19.058970 19.359477 19.659983 19.960490
##  [57] 20.260997 20.561504 20.862010 21.162517 21.463024 21.763531 22.064037
##  [64] 22.364544 22.665051 22.965558 23.266064 23.566571 23.867078 24.167585
##  [71] 24.468092 24.768598 25.069105 25.369612 25.670119 25.970625 26.271132
##  [78] 26.571639 26.872146 27.172652 27.473159 27.773666 28.074173 28.374679
##  [85] 28.675186 28.975693 29.276200 29.576706 29.877213 30.177720 30.478227
##  [92] 30.778734 31.079240 31.379747 31.680254 31.980761 32.281267 32.581774
##  [99] 32.882281 33.182788 33.483294
## 
## $y
##         1         2         3         4         5         6         7 
## 0.3713983 0.3727785 0.3741608 0.3755451 0.3769314 0.3783198 0.3797102 
##         8         9        10        11        12        13        14 
## 0.3811025 0.3824968 0.3838931 0.3852912 0.3866913 0.3880932 0.3894970 
##        15        16        17        18        19        20        21 
## 0.3909026 0.3923100 0.3937193 0.3951302 0.3965430 0.3979575 0.3993736 
##        22        23        24        25        26        27        28 
## 0.4007915 0.4022110 0.4036322 0.4050550 0.4064793 0.4079053 0.4093328 
##        29        30        31        32        33        34        35 
## 0.4107618 0.4121924 0.4136244 0.4150579 0.4164928 0.4179292 0.4193669 
##        36        37        38        39        40        41        42 
## 0.4208060 0.4222465 0.4236883 0.4251313 0.4265757 0.4280213 0.4294682 
##        43        44        45        46        47        48        49 
## 0.4309162 0.4323654 0.4338158 0.4352673 0.4367200 0.4381737 0.4396285 
##        50        51        52        53        54        55        56 
## 0.4410843 0.4425411 0.4439990 0.4454578 0.4469175 0.4483781 0.4498397 
##        57        58        59        60        61        62        63 
## 0.4513021 0.4527653 0.4542294 0.4556942 0.4571598 0.4586262 0.4600933 
##        64        65        66        67        68        69        70 
## 0.4615611 0.4630295 0.4644986 0.4659683 0.4674386 0.4689094 0.4703808 
##        71        72        73        74        75        76        77 
## 0.4718527 0.4733251 0.4747980 0.4762713 0.4777450 0.4792191 0.4806935 
##        78        79        80        81        82        83        84 
## 0.4821683 0.4836434 0.4851188 0.4865945 0.4880704 0.4895465 0.4910228 
##        85        86        87        88        89        90        91 
## 0.4924992 0.4939758 0.4954524 0.4969292 0.4984060 0.4998828 0.5013596 
##        92        93        94        95        96        97        98 
## 0.5028365 0.5043132 0.5057899 0.5072665 0.5087429 0.5102192 0.5116954 
##        99       100       101 
## 0.5131713 0.5146470 0.5161224

Function: ANOVA

input: categorical x variable and continuous y variable

output: p value

analVar <- function(xVar=as.factor(rep(c("A","B","C","D","E"),each=3)),
                    yVar=c(rgamma(10,shape=5,scale=5),rgamma(5,shape=5,scale=10))){
                      df=data.frame(xVar,yVar)
          aovMod <- aov(yVar~xVar,data=df)
          aovOut <- summary(aovMod)[[1]][["Pr(>F)"]][1]
         return(aovOut)}
        
analVar()
## [1] 0.3487473

Function for plotting boxplot to represent ANOVA results

analVarPlot <- function(xVar=as.factor(rep(c("A","B","C","D","E"),each=3)),yVar=c(rgamma(15,shape=5,scale=5))){
  df <- data.frame(xVar,yVar)
  aovMod <- aov(yVar~xVar,data=df)
  aovPlot <-  boxplot(yVar~xVar,
                      data=df,
                      col=c("red","yellow","blue","green","purple"),
                    xlab=names(xVar),ylab=names(yVar))
  return(aovPlot)}
  
analVarPlot() 

## $stats
##          [,1]     [,2]     [,3]     [,4]     [,5]
## [1,] 11.30472 19.19192 17.68174 11.84153 12.65913
## [2,] 12.95046 23.77338 18.70487 16.89273 14.73855
## [3,] 14.59621 28.35485 19.72799 21.94392 16.81797
## [4,] 21.28848 34.92308 43.28703 28.43865 21.24710
## [5,] 27.98075 41.49131 66.84606 34.93338 25.67623
## 
## $n
## [1] 3 3 3 3 3
## 
## $conf
##           [,1]     [,2]      [,3]     [,4]     [,5]
## [1,]  6.990153 18.18394 -2.696184 11.41157 10.88078
## [2,] 22.202257 38.52576 42.152170 32.47627 22.75516
## 
## $out
## numeric(0)
## 
## $group
## numeric(0)
## 
## $names
## [1] "A" "B" "C" "D" "E"

test analVar and analVarPlot using tiny data set

# make tiny data set
xVar1 <- as.factor(rep(c("a","b","c"),each=5))
yVar1 <- c(rgamma(10,shape=5,scale=5),rgamma(5,shape=5,scale=10))

# test analVar() using tiny data set
analVar(xVar=xVar1,yVar=yVar1)
## [1] 0.3119682
# test analVarPlot() using tiny data set
analVarPlot(xVar=xVar1,yVar=yVar1)

## $stats
##           [,1]     [,2]     [,3]
## [1,]  8.867711 22.01823 10.57734
## [2,] 13.415654 22.01823 25.01972
## [3,] 30.672002 23.63924 38.25846
## [4,] 32.614166 24.19501 50.94457
## [5,] 34.052092 24.19501 51.43919
## 
## $n
## [1] 5 5 5
## 
## $conf
##          [,1]     [,2]     [,3]
## [1,] 17.10638 22.10113 19.94003
## [2,] 44.23762 25.17735 56.57689
## 
## $out
## [1] 33.50240 18.67288
## 
## $group
## [1] 2 2
## 
## $names
## [1] "a" "b" "c"

Function: Contingency Table

Input: discrete independent variable and discrete dependent variable

Output:

contTable <- function(x=c(22,40,60),y=c(40,80,45),datamatrix=rbind(x,y)){
  rownames(datamatrix)=c("Cold","Warm")
  colnames(datamatrix)=c("Species1","Species2","Species3")
  contTableMod <- chisq.test(datamatrix)
  contTableOut <- print(chisq.test(datamatrix)[3])
          return(contTableOut)}

contTable()
## $p.value
## [1] 0.0006799671
## $p.value
## [1] 0.0006799671

Function for plotting mosaic plot of data complementary to contingency table results

contPlot <- function(x=c(22,40,60),y=c(40,80,45),datamatrix=rbind(x,y)){
  rownames(datamatrix)=c("Cold","Warm")
  colnames(datamatrix)=c("Species1","Species2","Species3")
  mplot <- mosaicplot(x=datamatrix,col=c("red","blue","yellow"),shade=F)
  return(mplot)}

contPlot()

## NULL

Test contingency table functions using tiny data set

x <- c(2,10,7)
y <- c(20,40,50)
dm <- rbind(x,y)

# test contTable()
contTable(x,y,dm)
## $p.value
## [1] 0.3800403
## $p.value
## [1] 0.3800403
# test contPlot()
contPlot(x,y,dm)

## NULL