# Practicals 7 # Task 1 ## This task should be done step by step in order to understand the confidence intervals computations. # Do not use the available barplotN function. summary(people) # eye.color sex height # blue :10 F:11 Min. :160.0 # brown:13 M:12 1st Qu.:174.5 # Median :180.0 # Mean :179.2 # 3rd Qu.:185.0 # Max. :200.0 means<-aggregate(people$height, list(people$eye.color, people$sex), mean) ### Computation of means for all combinations ses<-aggregate(people$height, list(people$eye.color, people$sex), se) # Computation of standard errors # Computation of degrees of freedom for t: number of observations in each of the # categories minus 1 ns<-aggregate(people$height, list(people$eye.color, people$sex), length) # Numbers of observations in all categories dfs<-ns$x-1 # corresponding degrees of freedom ##Computation of standard errors sds<-aggregate(people$height, list(people$eye.color, people$sex), sd) # First we compute standard deviations ses<-sds$x/sqrt(ns$x)#, and then divide them by square-root of numbers of observations #in corresponding categories ## Computation of 95% confidence limits. CI.low<-means$x+ses*qt(0.025, dfs)# lower limit CI.high<-means$x+ses*qt(0.025, dfs, lower.tail=F) #upper limit x.coord<-barplot(means$x, ylim=c(0, 200))# plots the barplot arrows(x0=x.coord, y0=CI.low, y1=CI.high, code=3, angle=90, length=0.03)# adds the confidence interval-based error bars ### task 2 summary(lettuce) # X leaf.col germ.days harv.days harv.mass # Min. : 1.00 green:60 Min. :2.000 Min. :52.00 Min. :278.0 # 1st Qu.: 30.75 red :60 1st Qu.:3.000 1st Qu.:57.00 1st Qu.:288.8 # Median : 60.50 Median :4.000 Median :59.00 Median :300.0 # Mean : 60.50 Mean :3.592 Mean :59.01 Mean :300.5 # 3rd Qu.: 90.25 3rd Qu.:5.000 3rd Qu.:61.00 3rd Qu.:310.2 # Max. :120.00 Max. :5.000 Max. :69.00 Max. :330.0 # taste seed.prod slug.damage # Min. :1.000 Min. : 52.00 Min. :1.000 # 1st Qu.:1.000 1st Qu.: 69.75 1st Qu.:2.000 # Median :2.000 Median : 92.00 Median :3.000 # Mean :2.042 Mean : 135.51 Mean :2.975 # 3rd Qu.:3.000 3rd Qu.: 131.25 3rd Qu.:4.000 # Max. :5.000 Max. :1243.00 Max. :5.000 t.test(harv.mass~leaf.col, data=lettuce) # A two-sample t-test (with Welch correction) testing the null hypothesis # that means in the red and green group are egual # Welch Two Sample t-test # # data: harv.mass by leaf.col # t = 15.512, df = 109.28, p-value < 2.2e-16 # alternative hypothesis: true difference in means is not equal to 0 # 95 percent confidence interval: # 18.70942 24.19058 # sample estimates: # mean in group green mean in group red # 311.2333 289.7833 # Conclusion: There is a significant difference in harvest mass with # green lettuce variaties being heavier than the red ones. plot(harv.mass~leaf.col, data=lettuce) t.test(harv.days~leaf.col, data=lettuce)# A two-sample t-test (with Welch correction) testing the null hypothesis # that means in the red and green group are egual # Welch Two Sample t-test # # data: harv.days by leaf.col # t = -0.74061, df = 117.99, p-value = 0.4604 # alternative hypothesis: true difference in means is not equal to 0 # 95 percent confidence interval: # -1.6532276 0.7532276 # sample estimates: # mean in group green mean in group red # 58.78333 59.23333 # Conclusion: There is no significant difference in number of days to harvest between # green and red lettuce variaties. plot(harv.days~leaf.col, data=lettuce) ### Task 3 fert<-data.frame(block=1:5, F=c(23, 25, 36, 19, 22), NF=c(20,24,33,18,21)) summary(fert) ## These are paired observations. Therefore, we need to use a paired t-test here! t.test(fert$F, fert$NF, paired=T)# Paired t-test testing that the mean difference #between fertilized and non-fertilized plot is 0 # Paired t-test # # data: fert$F and fert$NF # t = 3.6742, df = 4, p-value = 0.02131 # alternative hypothesis: true difference in means is not equal to 0 # 95 percent confidence interval: # 0.4398252 3.1601748 # sample estimates: # mean of the differences # 1.8 ### Conclusion: there is a significant effect of fertilization, Biomass is significantly # higher on fertilized plots. # What would happen if you made a mistake here and used two-sample t-test? t.test(fert$F, fert$NF, paired=F) # Welch Two Sample t-test # # data: fert$F and fert$NF # t = 0.45809, df = 7.9192, p-value = 0.6592 # alternative hypothesis: true difference in means is not equal to 0 # 95 percent confidence interval: # -7.277285 10.877285 # sample estimates: # mean of x mean of y # 25.0 23.2 ## As you can see, the outcome is completely different and misleading. It is really important # to specify the analysis correctly, in this case to use the correct version of t-test.