Function: Linear Regression
Description: Fits a regression line to a scatterplot of x and y values
Input: continuous x & y variables
Output: abline & scatterplot
linReg <- function(xVar= 1:100,yVar=runif(100,min=0,max=100),
dataframe=data.frame(xVar,yVar)){
linRegMod <- lm(yVar~xVar,data=dataframe)
linRegOut <- c(slope=summary(linRegMod)$coefficients[2,1],
pValue=summary(linRegMod)$coefficients[2,4])
return(linRegOut)}
linReg()
## slope pValue
## -0.007030389 0.943965575
Function for plotting results of linear regression
linRegPlot <- function(xVar=1:100,yVar=runif(100,min=0,max=100),dataframe=data.frame(xVar,yVar)){
lrplot <- plot(y=dataframe$yVar,
x=dataframe$xVar,
cex=2,
pch=21,
bg="purple",
main="Linear Regression",
xlab="X Variable",
ylab="Y variable",
xlim=range(xVar),
ylim=range(yVar))
linRegMod <- lm(yVar~xVar,data=dataframe)
abline(linRegMod)
return(lrplot)
}
linRegPlot()
## NULL
Testing linReg() & linRegPlot() with tiny data set
# make tiny data set
rdat <- data.frame(xVar=runif(10,min=0,max=20),yVar=runif(10,min=20,max=40))
# test linReg function with rdat
linReg(rdat)
## slope pValue
## 0.219591998 0.001375061
# test linRegPlot function with rdat
linRegPlot(rdat)
## NULL
Function: Logistic Regression
Description:
Input: continuous x variable & categorical y variable
Output: estimate of xvar and p value of xvar
logReg <- function(xVar=rgamma(n=20,shape=5,scale=5),
yVar=rbinom(n=20,size=1,p=0.5)){
logRegMod <- glm(yVar~ xVar,
family=binomial(link="logit"))
logRegOut <- c(xVarEst=summary(logRegMod)$coefficients[2,1],
pValue=summary(logRegMod)$coefficients[2,4])
return(logRegOut)}
logReg()
## xVarEst pValue
## -0.0141115 0.7664394
Function for plotting results of logistic regression analysis
logRegPlot <- function(xVar=rgamma(n=20,shape=5,scale=5),
yVar=rbinom(n=20,size=1,p=0.5),dataframe = data.frame(xVar,yVar)){
lrResults <- glm(yVar ~ xVar, family="binomial"(link="logit"))
LRplot <- plot(dataframe$xVar, y=dataframe$yVar,
pch=21,
bg="tan",
cex=1,
main="Logistic Regression")
LRplot1 <- curve(predict(lrResults,
data.frame(xVar=x),
type="response"),
add=TRUE,
lwd=2)
return(LRplot1)}
logRegPlot()
## $x
## [1] 6.624031 7.024256 7.424482 7.824708 8.224933 8.625159 9.025385
## [8] 9.425610 9.825836 10.226062 10.626287 11.026513 11.426739 11.826964
## [15] 12.227190 12.627415 13.027641 13.427867 13.828092 14.228318 14.628544
## [22] 15.028769 15.428995 15.829221 16.229446 16.629672 17.029898 17.430123
## [29] 17.830349 18.230575 18.630800 19.031026 19.431251 19.831477 20.231703
## [36] 20.631928 21.032154 21.432380 21.832605 22.232831 22.633057 23.033282
## [43] 23.433508 23.833734 24.233959 24.634185 25.034411 25.434636 25.834862
## [50] 26.235087 26.635313 27.035539 27.435764 27.835990 28.236216 28.636441
## [57] 29.036667 29.436893 29.837118 30.237344 30.637570 31.037795 31.438021
## [64] 31.838247 32.238472 32.638698 33.038923 33.439149 33.839375 34.239600
## [71] 34.639826 35.040052 35.440277 35.840503 36.240729 36.640954 37.041180
## [78] 37.441406 37.841631 38.241857 38.642083 39.042308 39.442534 39.842759
## [85] 40.242985 40.643211 41.043436 41.443662 41.843888 42.244113 42.644339
## [92] 43.044565 43.444790 43.845016 44.245242 44.645467 45.045693 45.445919
## [99] 45.846144 46.246370 46.646595
##
## $y
## 1 2 3 4 5 6 7
## 0.3525548 0.3564700 0.3604044 0.3643577 0.3683294 0.3723190 0.3763260
## 8 9 10 11 12 13 14
## 0.3803501 0.3843907 0.3884473 0.3925194 0.3966065 0.4007081 0.4048236
## 15 16 17 18 19 20 21
## 0.4089526 0.4130944 0.4172486 0.4214146 0.4255918 0.4297796 0.4339775
## 22 23 24 25 26 27 28
## 0.4381850 0.4424013 0.4466259 0.4508583 0.4550977 0.4593437 0.4635957
## 29 30 31 32 33 34 35
## 0.4678529 0.4721148 0.4763807 0.4806501 0.4849224 0.4891968 0.4934729
## 36 37 38 39 40 41 42
## 0.4977498 0.5020271 0.5063042 0.5105802 0.5148548 0.5191272 0.5233967
## 43 44 45 46 47 48 49
## 0.5276629 0.5319250 0.5361825 0.5404347 0.5446810 0.5489209 0.5531536
## 50 51 52 53 54 55 56
## 0.5573786 0.5615954 0.5658033 0.5700017 0.5741901 0.5783679 0.5825344
## 57 58 59 60 61 62 63
## 0.5866893 0.5908318 0.5949614 0.5990777 0.6031800 0.6072679 0.6113408
## 64 65 66 67 68 69 70
## 0.6153982 0.6194396 0.6234645 0.6274725 0.6314630 0.6354356 0.6393899
## 71 72 73 74 75 76 77
## 0.6433253 0.6472415 0.6511380 0.6550144 0.6588703 0.6627053 0.6665189
## 78 79 80 81 82 83 84
## 0.6703110 0.6740810 0.6778286 0.6815534 0.6852552 0.6889336 0.6925883
## 85 86 87 88 89 90 91
## 0.6962190 0.6998254 0.7034072 0.7069642 0.7104961 0.7140026 0.7174836
## 92 93 94 95 96 97 98
## 0.7209387 0.7243678 0.7277707 0.7311472 0.7344971 0.7378202 0.7411163
## 99 100 101
## 0.7443854 0.7476273 0.7508418
Test logReg and logRegPlot functions with tiny data set
# tiny data set
xVar <- rgamma(n=30,shape=4,scale=4)
yVar <- rbinom(n=30,size=1,p=0.5)
df <- data.frame(xVar,yVar)
# test logReg
logReg(xVar=xVar,yVar=yVar)
## xVarEst pValue
## 0.01965795 0.68671718
# test logRegPlot
logRegPlot(xVar=xVar,yVar = yVar,dataframe=df)
## $x
## [1] 3.432618 3.733125 4.033632 4.334138 4.634645 4.935152 5.235659
## [8] 5.536166 5.836672 6.137179 6.437686 6.738193 7.038699 7.339206
## [15] 7.639713 7.940220 8.240726 8.541233 8.841740 9.142247 9.442753
## [22] 9.743260 10.043767 10.344274 10.644780 10.945287 11.245794 11.546301
## [29] 11.846808 12.147314 12.447821 12.748328 13.048835 13.349341 13.649848
## [36] 13.950355 14.250862 14.551368 14.851875 15.152382 15.452889 15.753395
## [43] 16.053902 16.354409 16.654916 16.955422 17.255929 17.556436 17.856943
## [50] 18.157450 18.457956 18.758463 19.058970 19.359477 19.659983 19.960490
## [57] 20.260997 20.561504 20.862010 21.162517 21.463024 21.763531 22.064037
## [64] 22.364544 22.665051 22.965558 23.266064 23.566571 23.867078 24.167585
## [71] 24.468092 24.768598 25.069105 25.369612 25.670119 25.970625 26.271132
## [78] 26.571639 26.872146 27.172652 27.473159 27.773666 28.074173 28.374679
## [85] 28.675186 28.975693 29.276200 29.576706 29.877213 30.177720 30.478227
## [92] 30.778734 31.079240 31.379747 31.680254 31.980761 32.281267 32.581774
## [99] 32.882281 33.182788 33.483294
##
## $y
## 1 2 3 4 5 6 7
## 0.3713983 0.3727785 0.3741608 0.3755451 0.3769314 0.3783198 0.3797102
## 8 9 10 11 12 13 14
## 0.3811025 0.3824968 0.3838931 0.3852912 0.3866913 0.3880932 0.3894970
## 15 16 17 18 19 20 21
## 0.3909026 0.3923100 0.3937193 0.3951302 0.3965430 0.3979575 0.3993736
## 22 23 24 25 26 27 28
## 0.4007915 0.4022110 0.4036322 0.4050550 0.4064793 0.4079053 0.4093328
## 29 30 31 32 33 34 35
## 0.4107618 0.4121924 0.4136244 0.4150579 0.4164928 0.4179292 0.4193669
## 36 37 38 39 40 41 42
## 0.4208060 0.4222465 0.4236883 0.4251313 0.4265757 0.4280213 0.4294682
## 43 44 45 46 47 48 49
## 0.4309162 0.4323654 0.4338158 0.4352673 0.4367200 0.4381737 0.4396285
## 50 51 52 53 54 55 56
## 0.4410843 0.4425411 0.4439990 0.4454578 0.4469175 0.4483781 0.4498397
## 57 58 59 60 61 62 63
## 0.4513021 0.4527653 0.4542294 0.4556942 0.4571598 0.4586262 0.4600933
## 64 65 66 67 68 69 70
## 0.4615611 0.4630295 0.4644986 0.4659683 0.4674386 0.4689094 0.4703808
## 71 72 73 74 75 76 77
## 0.4718527 0.4733251 0.4747980 0.4762713 0.4777450 0.4792191 0.4806935
## 78 79 80 81 82 83 84
## 0.4821683 0.4836434 0.4851188 0.4865945 0.4880704 0.4895465 0.4910228
## 85 86 87 88 89 90 91
## 0.4924992 0.4939758 0.4954524 0.4969292 0.4984060 0.4998828 0.5013596
## 92 93 94 95 96 97 98
## 0.5028365 0.5043132 0.5057899 0.5072665 0.5087429 0.5102192 0.5116954
## 99 100 101
## 0.5131713 0.5146470 0.5161224
Function: ANOVA
input: categorical x variable and continuous y variable
output: p value
analVar <- function(xVar=as.factor(rep(c("A","B","C","D","E"),each=3)),
yVar=c(rgamma(10,shape=5,scale=5),rgamma(5,shape=5,scale=10))){
df=data.frame(xVar,yVar)
aovMod <- aov(yVar~xVar,data=df)
aovOut <- summary(aovMod)[[1]][["Pr(>F)"]][1]
return(aovOut)}
analVar()
## [1] 0.3487473
Function for plotting boxplot to represent ANOVA results
analVarPlot <- function(xVar=as.factor(rep(c("A","B","C","D","E"),each=3)),yVar=c(rgamma(15,shape=5,scale=5))){
df <- data.frame(xVar,yVar)
aovMod <- aov(yVar~xVar,data=df)
aovPlot <- boxplot(yVar~xVar,
data=df,
col=c("red","yellow","blue","green","purple"),
xlab=names(xVar),ylab=names(yVar))
return(aovPlot)}
analVarPlot()
## $stats
## [,1] [,2] [,3] [,4] [,5]
## [1,] 11.30472 19.19192 17.68174 11.84153 12.65913
## [2,] 12.95046 23.77338 18.70487 16.89273 14.73855
## [3,] 14.59621 28.35485 19.72799 21.94392 16.81797
## [4,] 21.28848 34.92308 43.28703 28.43865 21.24710
## [5,] 27.98075 41.49131 66.84606 34.93338 25.67623
##
## $n
## [1] 3 3 3 3 3
##
## $conf
## [,1] [,2] [,3] [,4] [,5]
## [1,] 6.990153 18.18394 -2.696184 11.41157 10.88078
## [2,] 22.202257 38.52576 42.152170 32.47627 22.75516
##
## $out
## numeric(0)
##
## $group
## numeric(0)
##
## $names
## [1] "A" "B" "C" "D" "E"
test analVar and analVarPlot using tiny data set
# make tiny data set
xVar1 <- as.factor(rep(c("a","b","c"),each=5))
yVar1 <- c(rgamma(10,shape=5,scale=5),rgamma(5,shape=5,scale=10))
# test analVar() using tiny data set
analVar(xVar=xVar1,yVar=yVar1)
## [1] 0.3119682
# test analVarPlot() using tiny data set
analVarPlot(xVar=xVar1,yVar=yVar1)
## $stats
## [,1] [,2] [,3]
## [1,] 8.867711 22.01823 10.57734
## [2,] 13.415654 22.01823 25.01972
## [3,] 30.672002 23.63924 38.25846
## [4,] 32.614166 24.19501 50.94457
## [5,] 34.052092 24.19501 51.43919
##
## $n
## [1] 5 5 5
##
## $conf
## [,1] [,2] [,3]
## [1,] 17.10638 22.10113 19.94003
## [2,] 44.23762 25.17735 56.57689
##
## $out
## [1] 33.50240 18.67288
##
## $group
## [1] 2 2
##
## $names
## [1] "a" "b" "c"
Function: Contingency Table
Input: discrete independent variable and discrete dependent variable
Output:
contTable <- function(x=c(22,40,60),y=c(40,80,45),datamatrix=rbind(x,y)){
rownames(datamatrix)=c("Cold","Warm")
colnames(datamatrix)=c("Species1","Species2","Species3")
contTableMod <- chisq.test(datamatrix)
contTableOut <- print(chisq.test(datamatrix)[3])
return(contTableOut)}
contTable()
## $p.value
## [1] 0.0006799671
## $p.value
## [1] 0.0006799671
Function for plotting mosaic plot of data complementary to contingency table results
contPlot <- function(x=c(22,40,60),y=c(40,80,45),datamatrix=rbind(x,y)){
rownames(datamatrix)=c("Cold","Warm")
colnames(datamatrix)=c("Species1","Species2","Species3")
mplot <- mosaicplot(x=datamatrix,col=c("red","blue","yellow"),shade=F)
return(mplot)}
contPlot()
## NULL
Test contingency table functions using tiny data set
x <- c(2,10,7)
y <- c(20,40,50)
dm <- rbind(x,y)
# test contTable()
contTable(x,y,dm)
## $p.value
## [1] 0.3800403
## $p.value
## [1] 0.3800403
# test contPlot()
contPlot(x,y,dm)
## NULL