pigs<-read.csv("pigs_normality.csv") #Let's look at the structure str(pigs) pigs$Bodyweight<-as.numeric(pigs$Bodyweight) str(pigs) #Let's check categorical variables unique(pigs$Gender) unique(pigs$Diet) #Let's harmonize it for (i in 1:nrow(pigs)){ if (pigs$Diet[i]=="hight fat"){ pigs$Diet[i]<-"hf" } } unique(pigs$Diet) #Let's check numerical variables summary(pigs$Bodyweight) library(tidyverse) pigs<- pigs %>% filter (!is.na(Bodyweight)) summary(pigs$Bodyweight) hist(pigs$Bodyweight) #I would divide our data set to two. According to the diet types. unique(pigs$Diet) #Do we need all rows? #Do we need all column? #Why I don' delete the column first? hf<- pigs %>% filter(Diet=="hf") %>% select (-Diet) #I didn't change the pigs data set, but I extracted some data from there and saved it with a new name. #But the initial "pigs" data set is left untouched. chow<- pigs %>% filter(Diet=="chow") %>% select (-Diet) #Now let's do some preliminary analysis: hist(pigs$Bodyweight, breaks=20) qqnorm(pigs$Bodyweight) qqline(pigs$Bodyweight, col = "red", lwd=3) #Let's test its normality shapiro.test(pigs$Bodyweight) #kolmogorov-smirnov, it needs 2 variables, x and y. x is our data and y what we compare with - "pnorm" #what means normal distribution with mean and sd of our data set. ks.test(pigs$Bodyweight, "pnorm", mean(pigs$Bodyweight), sd (pigs$Bodyweight)) install.packages('nortest') library(nortest) ad.test(pigs$Bodyweight) #According to all tests it isn't normally distributed. #Now let's try to transform the data using logarithm. pigs_transf<-log10(pigs$Bodyweight) hist(pigs_transf, breaks=20) #Let's put two plots together to compare par(mfcol = c(1,2)) hist(pigs$Bodyweight, breaks=20) hist(pigs_transf, breaks=20) par(mfcol = c(1,1)) par(mfcol = c(1,2)) qqnorm(pigs$Bodyweight) qqline(pigs$Bodyweight, col = "red", lwd=3) qqnorm(pigs_transf) qqline(pigs_transf, col = "red", lwd=3) par(mfcol = c(1,1)) #Let's look at tests now shapiro.test(pigs_transf) ks.test(pigs_transf, "pnorm", mean(pigs_transf), sd (pigs_transf)) ad.test(pigs_transf) #still not normal #Here I would conclude it was slightly skewed and all tests resulted so, #log-transformation is suitable here. #Let's perform all the steps for checking distribution for different diets. #1. High fat diet #1.1 Build a histogram and QQ plot of body weights for the "hf" diet. Put the plots in one row, 2 columns for a better view. par(mfcol = c(1,2)) hist(hf$Bodyweight, breaks=20) qqnorm(hf$Bodyweight) qqline(hf$Bodyweight, col = "red", lwd=3) par(mfcol = c(1,1)) #1.2 Perform three normality tests. shapiro.test(hf$Bodyweight) ks.test(hf$Bodyweight, "pnorm", mean(hf$Bodyweight), sd (hf$Bodyweight)) ad.test(hf$Bodyweight) #1.3 Transform the data, build two histograms in one row to compare (before and after transformation). hf_transf<-log10(hf$Bodyweight) par(mfcol = c(1,2)) hist(hf$Bodyweight, breaks=20) hist(hf_transf, breaks=20) par(mfcol = c(1,1)) #1.4 Build two QQ plots in one row to compare (before and after transformation). par(mfcol = c(1,2)) qqnorm(hf$Bodyweight) qqline(hf$Bodyweight, col = "red", lwd=3) qqnorm(hf_transf) qqline(hf_transf, col = "red", lwd=3) par(mfcol = c(1,1)) #1.5 Perform again the normality tests but on the transformed data. #What can you conclude? shapiro.test(hf_transf) ks.test(hf_transf, "pnorm", mean(hf_transf), sd (hf_transf)) ad.test(hf_transf) #2. Now repeat all the steps for the chow diet. #2.1 par(mfcol = c(1,2)) hist(chow$Bodyweight, breaks=20) qqnorm(chow$Bodyweight) qqline(chow$Bodyweight, col = "red", lwd=3) par(mfcol = c(1,1)) #2.2 shapiro.test(chow$Bodyweight) ks.test(chow$Bodyweight, "pnorm",mean(chow$Bodyweight), sd (chow$Bodyweight)) ad.test(chow$Bodyweight) #2.3 chow_transf<-log10(chow$Bodyweight) par(mfcol = c(1,2)) hist(chow$Bodyweight, breaks=20) hist(chow_transf, breaks=20) par(mfcol = c(1,1)) #2.4 par(mfcol = c(1,2)) qqnorm(chow$Bodyweight) qqline(chow$Bodyweight, col = "red", lwd=3) qqnorm(chow_transf) qqline(chow_transf, col = "red", lwd=3) par(mfcol = c(1,1)) #2.5 shapiro.test(chow_transf) ks.test(chow_transf, "pnorm",mean(chow_transf), sd (chow_transf)) ad.test(chow_transf)