head <- read.table('DATA/head.txt', header=T) is.factor(head$sex) ## [1] TRUE table(head$sex) colMeans(head[head$sex=='f', 2:6]) cov(head[head$sex=='f', 2:6]) colMeans(head[head$sex=='m', 2:6]) cov(head[head$sex=='m', 2:6]) plot(head[head$sex=='f', 2:6], main='Zeny') plot(head[head$sex=='m', 2:6], main='Muzi') library(MVN) par(mfrow=c(1,2)) mvn(head, subset='sex', mvnTest = 'mardia', multivariatePlot = 'qq')$multivariateNormality mvn(head, subset='sex', mvnTest = 'hz')$multivariateNormality library('biotools') boxM(head[,2:6], grouping=head$sex) library('heplots') boxM(head[,2:6], group=head$sex) library('ICSNP') HotellingsT2(head[head$sex=="f",2:6], head[head$sex=="m",2:6]) library('MASS') head.lda <- lda(sex ~ body.H + head.L + head.W + bigo.W + bizyg.W, data=head) head.lda fitted <- predict(head.lda) (tab <- table(fitted$class, head$sex)) #spravna klasifikace sum(diag(tab))/sum(tab) ## [1] 0.8971429 #mylna klasifikace (tab[1,2] + tab[2,1])/sum(tab) #mylna klasifikace pri nahodnem zarazovani p <- head.lda$prior 2*p[1]*p[2] predict(head.lda, newdata=list(body.H=c(1820, 1700), head.L=c(190, 185), head.W=c(165, 154), bigo.W=c(110, 99), bizyg.W=c(152, 130))) library('klaR') greedy.wilks(sex ~ body.H + head.L + head.W + bigo.W + bizyg.W, data=head) head.lda2 <- lda(sex ~ body.H + head.L + head.W + bigo.W, data=head) head.lda2 fitted2 <- predict(head.lda2) (tab2 <- table(fitted2$class, head$sex)) sum(diag(tab2))/sum(tab2) cranio <- read.csv('DATA/Howell.csv',header=T, na.strings='0') howells.data <- cranio[cranio$Sex == 'M' & cranio$Population %in% c('ZULU', 'BUSHMAN', 'AUSTRALI'), c('Population', 'ZYB', 'ZMB', 'BPL', 'NPH', 'NLH', 'OBH', 'WCB')] howells.data$Population <- factor(howells.data$Population) table(howells.data$Population) colMeans(howells.data[howells.data$Population=='AUSTRALI',-1]) cov(howells.data[howells.data$Population=='AUSTRALI',-1]) colMeans(howells.data[howells.data$Population=='BUSHMAN',-1]) cov(howells.data[howells.data$Population=='BUSHMAN',-1]) colMeans(howells.data[howells.data$Population=='ZULU',-1]) cov(howells.data[howells.data$Population=='ZULU',-1]) plot(howells.data[howells.data$Population=='AUSTRALI',-1], main='AUSTRALI') plot(howells.data[howells.data$Population=='BUSHMAN',-1], main='BUSHMAN') plot(howells.data[howells.data$Population=='ZULU',-1], main='ZULU') library(MVN) par(mfrow=c(1,3)) mvn(howells.data, subset='Population', mvnTest = 'mardia', multivariatePlot = 'qq')$multivariateNormality mvn(howells.data, subset='Population', mvnTest = 'hz')$multivariateNormality library('biotools') boxM(howells.data[,-1], grouping=howells.data$Population) library('heplots') boxM(howells.data[,-1], group=howells.data$Population) model <- manova(as.matrix(howells.data[,-1]) ~ howells.data$Population) summary(model, test='Wilks') library('MASS') how.lda <- lda(Population ~ ZYB + ZMB + BPL + NPH + NLH + OBH + WCB, data=howells.data) how.lda fit <- predict(how.lda) (tab.h <- table(fit$class, howells.data$Population)) sum(diag(tab.h)) / sum(tab.h) predict(how.lda, newdata=list(ZYB=130, ZMB=98, BPL=100, NPH=68, NLH=51, OBH=34, WCB=70)) library('klaR') greedy.wilks(Population ~ ZYB + ZMB + BPL + NPH + NLH + OBH + WCB, data=howells.data) how.lda2 <- lda(Population ~ ZYB + NPH + WCB + NLH + BPL, data=howells.data) fit2 <- predict(how.lda2) (tab.h2 <- table(fit2$class, howells.data$Population)) sum(diag(tab.h2)) / sum(tab.h2) ## [1] 0.8513514