# Practicals 7
# Task 1

## This task should be done step by step in order to understand the confidence intervals computations.
# Do not use the available barplotN function.

summary(people)
 # eye.color  sex        height     
 # blue :10   F:11   Min.   :160.0  
 # brown:13   M:12   1st Qu.:174.5  
 #                   Median :180.0  
 #                   Mean   :179.2  
 #                   3rd Qu.:185.0  
 #                   Max.   :200.0 
means<-aggregate(people$height, list(people$eye.color, 
            people$sex), mean) ### Computation of means for all combinations

ses<-aggregate(people$height, list(people$eye.color,
      people$sex), se) # Computation of standard errors
 # Computation of degrees of freedom for t: number of observations in each of the 
# categories minus 1 
ns<-aggregate(people$height, list(people$eye.color,
        people$sex), length) # Numbers of observations in all categories
dfs<-ns$x-1 # corresponding degrees of freedom

##Computation of standard errors
sds<-aggregate(people$height, list(people$eye.color,
        people$sex), sd) # First we compute standard deviations 
ses<-sds$x/sqrt(ns$x)#, and then divide them by square-root of numbers of observations 
#in corresponding categories

## Computation of 95% confidence limits.
CI.low<-means$x+ses*qt(0.025, dfs)# lower limit
CI.high<-means$x+ses*qt(0.025, dfs, 
                          lower.tail=F) #upper limit

x.coord<-barplot(means$x, ylim=c(0, 200))# plots the barplot
arrows(x0=x.coord, y0=CI.low, y1=CI.high,
       code=3, angle=90, length=0.03)# adds the confidence interval-based error bars

### task 2
summary(lettuce)
 #       X           leaf.col    germ.days       harv.days       harv.mass    
 # Min.   :  1.00   green:60   Min.   :2.000   Min.   :52.00   Min.   :278.0  
 # 1st Qu.: 30.75   red  :60   1st Qu.:3.000   1st Qu.:57.00   1st Qu.:288.8  
 # Median : 60.50              Median :4.000   Median :59.00   Median :300.0  
 # Mean   : 60.50              Mean   :3.592   Mean   :59.01   Mean   :300.5  
 # 3rd Qu.: 90.25              3rd Qu.:5.000   3rd Qu.:61.00   3rd Qu.:310.2  
 # Max.   :120.00              Max.   :5.000   Max.   :69.00   Max.   :330.0  
 #     taste         seed.prod        slug.damage   
 # Min.   :1.000   Min.   :  52.00   Min.   :1.000  
 # 1st Qu.:1.000   1st Qu.:  69.75   1st Qu.:2.000  
 # Median :2.000   Median :  92.00   Median :3.000  
 # Mean   :2.042   Mean   : 135.51   Mean   :2.975  
 # 3rd Qu.:3.000   3rd Qu.: 131.25   3rd Qu.:4.000  
 # Max.   :5.000   Max.   :1243.00   Max.   :5.000 

t.test(harv.mass~leaf.col, data=lettuce) # A two-sample t-test (with Welch correction) testing the null hypothesis
# that means in the red and green group are egual

# 	Welch Two Sample t-test
# 
# data:  harv.mass by leaf.col
# t = 15.512, df = 109.28, p-value < 2.2e-16
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
#  18.70942 24.19058
# sample estimates:
# mean in group green   mean in group red 
#            311.2333            289.7833 

# Conclusion: There is a significant difference in harvest mass with 
# green lettuce variaties being heavier than the red ones.  

plot(harv.mass~leaf.col, data=lettuce)


t.test(harv.days~leaf.col, data=lettuce)# A two-sample t-test (with Welch correction) testing the null hypothesis
# that means in the red and green group are egual

# 	Welch Two Sample t-test
# 
# data:  harv.days by leaf.col
# t = -0.74061, df = 117.99, p-value = 0.4604
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
#  -1.6532276  0.7532276
# sample estimates:
# mean in group green   mean in group red 
#            58.78333            59.23333 

# Conclusion: There is no significant difference in number of days to harvest between 
# green and red lettuce variaties.
plot(harv.days~leaf.col, data=lettuce) 

### Task 3
fert<-data.frame(block=1:5, F=c(23, 25, 36, 19, 22), 
           NF=c(20,24,33,18,21))
summary(fert)

## These are paired observations. Therefore, we need to use a paired t-test here!

t.test(fert$F, fert$NF, paired=T)# Paired t-test testing that the mean difference 
#between fertilized and non-fertilized plot is 0

# 	Paired t-test
# 
# data:  fert$F and fert$NF
# t = 3.6742, df = 4, p-value = 0.02131
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
#  0.4398252 3.1601748
# sample estimates:
# mean of the differences 
#                     1.8 

### Conclusion: there is a significant effect of fertilization, Biomass is significantly
# higher on fertilized plots.


# What would happen if you made a mistake here and used two-sample t-test? 
t.test(fert$F, fert$NF, paired=F)
# 	Welch Two Sample t-test
# 
# data:  fert$F and fert$NF
# t = 0.45809, df = 7.9192, p-value = 0.6592
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
#  -7.277285 10.877285
# sample estimates:
# mean of x mean of y 
#      25.0      23.2 

## As you can see, the outcome is completely different and misleading. It is really important
# to specify the analysis correctly, in this case to use the correct version of t-test.