head <- read.table("DATA/head.txt", header=T) summary(head) is.factor(head$sex) charakteristiky <- function(x){ # funkce pocitajici pocet pozorovani, prumer a smerodatnou odchylku # argument: x ... vektor # vraci: vektor (pocet pozorovani, prumer, smerodatna odchylka) x <- na.omit(x) #odstrani chybejici hodnoty n <- length(x) m <- mean(x) s <- sd(x) return(c(n, m, s)) } charakteristiky(head$body.H[head$sex=='f']) ## [1] 100.00000 1667.33000 67.20811 charakteristiky(head$body.H[head$sex=='m']) ## [1] 75.00000 1789.72000 59.70639 charakteristiky(head$head.L[head$sex=='f']) ## [1] 100.000000 185.010000 6.545096 charakteristiky(head$head.L[head$sex=='m']) ## [1] 75.000000 195.946667 6.970776 charakteristiky(head$head.W[head$sex=='f']) ## [1] 100.000000 146.920000 5.336514 charakteristiky(head$head.W[head$sex=='m']) ## [1] 75.000000 155.653333 6.081637 charakteristiky(head$bigo.W[head$sex=='f']) ## [1] 100.00000 100.57000 4.69957 charakteristiky(head$bigo.W[head$sex=='m']) ## [1] 75.000000 107.813333 6.872769 charakteristiky(head$bizyg.W[head$sex=='f']) ## [1] 100.000000 133.460000 6.110795 charakteristiky(head$bizyg.W[head$sex=='m']) ## [1] 75.000000 140.293333 7.714103 par(mfrow=c(2,3)) boxplot(head$body.H ~ head$sex, varwidth=T, notch=T, xlab="Vyska [mm]", ylab="Pohlavi", horizontal=T) boxplot(head$head.L ~ head$sex, varwidth=T, notch=T, xlab="Delka hlavy [mm]", ylab="Pohlavi", horizontal=T) boxplot(head$head.W ~ head$sex, varwidth=T, notch=T, xlab="Sirka hlavy [mm]", ylab="Pohlavi", horizontal=T) boxplot(head$bigo.W ~ head$sex, varwidth=T, notch=T, xlab="Sirka dolni celisti [mm]", ylab="Pohlavi", horizontal=T) boxplot(head$bizyg.W ~ head$sex, varwidth=T, notch=T, xlab="Sirka obliceje [mm]", ylab="Pohlavi", horizontal=T) par(mfrow=c(1,2)) shapiro.test(head$body.H[head$sex=='f']) qqnorm(head$body.H[head$sex=='f'], main='Telesna vyska - zeny') qqline(head$body.H[head$sex=='f']) shapiro.test(head$body.H[head$sex=='m']) qqnorm(head$body.H[head$sex=='m'], main='Telesna vyska - muzi') qqline(head$body.H[head$sex=='m']) var.test(head$body.H ~ head$sex) t.test(head$body.H ~ head$sex) par(mfrow=c(1,2)) shapiro.test(head$head.L[head$sex=='f']) qqnorm(head$head.L[head$sex=='f'], main='Delka hlavy - zeny') qqline(head$head.L[head$sex=='f']) shapiro.test(head$head.L[head$sex=='m']) qqnorm(head$head.L[head$sex=='m'], main='Delka hlavy - muzi') qqline(head$head.L[head$sex=='m']) var.test(head$head.L ~ head$sex) t.test(head$head.L ~ head$sex) par(mfrow=c(1,2)) shapiro.test(head$head.W[head$sex=='f']) qqnorm(head$head.W[head$sex=='f'], main='Sirka hlavy - zeny') qqline(head$head.W[head$sex=='f']) shapiro.test(head$head.W[head$sex=='m']) qqnorm(head$head.W[head$sex=='m'], main='Sirka hlavy - muzi') qqline(head$head.W[head$sex=='m']) var.test(head$head.W ~ head$sex) t.test(head$head.W ~ head$sex) par(mfrow=c(1,2)) shapiro.test(head$bigo.W[head$sex=='f']) qqnorm(head$bigo.W[head$sex=='f'], main='Sirka celisti - zeny') qqline(head$bigo.W[head$sex=='f']) shapiro.test(head$bigo.W[head$sex=='m']) qqnorm(head$bigo.W[head$sex=='m'], main='Sirka celisti - muzi') qqline(head$bigo.W[head$sex=='m']) var.test(head$bigo.W ~ head$sex) t.test(head$bigo.W ~ head$sex, var.equal=F) par(mfrow=c(1,2)) shapiro.test(head$bizyg.W[head$sex=='f']) qqnorm(head$bizyg.W[head$sex=='f'], main='Sirka obliceje - zeny') qqline(head$bizyg.W[head$sex=='f']) shapiro.test(head$bizyg.W[head$sex=='m']) qqnorm(head$bizyg.W[head$sex=='m'], main='Sirka obliceje - muzi') qqline(head$bizyg.W[head$sex=='m']) var.test(head$bizyg.W ~ head$sex) t.test(head$bizyg.W ~ head$sex, var.equal=F) m.head <- glm(sex ~ body.H + head.L + head.W + bigo.W + bizyg.W, family=binomial(logit), data=head) summary(m.head) m0 <- glm(sex ~ 1, family=binomial(logit), data=head) anova(m0, m.head, test="Chisq") m.head2 <- glm(sex ~ body.H + head.L + head.W + bigo.W, family=binomial(logit), data=head) anova(m.head2, m.head, test="Chisq") coef(m.head) lower <- coef(m.head) - qnorm(0.975) * summary(m.head)$coefficients[,2] upper <- coef(m.head) + qnorm(0.975) * summary(m.head)$coefficients[,2] cbind(lower,upper) exp(coef(m.head)) exp(cbind(lower,upper)) step(glm(sex ~ body.H + head.L + head.W + bigo.W + bizyg.W, family=binomial(logit), data=head), direction='backward') step(glm(sex ~ 1, family=binomial(logit), data=head), scope= ~ body.H + head.L + head.W + bigo.W + bizyg.W, direction='forward') step(glm(sex ~ body.H + head.L + head.W + bigo.W + bizyg.W, family=binomial(logit), data=head), direction='both') step(glm(sex ~ 1, family=binomial(logit), data=head), scope= ~ body.H + head.L + head.W + bigo.W + bizyg.W, direction='both') library(rsq) rsq(m.head, type='n') # nagelkerke ## [1] 0.7977113 rsq(m.head, type='kl') # mcfadden ## [1] 0.6602573 rsq(m.head, type='lr') # cox and snell ## [1] 0.5941575 fitted <- predict(m.head, newdata=head, type="response") fitted.cat <- ifelse(fitted < 0.5, "f", "m") tab <- table(fitted.cat,head$sex) tab sum(diag(tab))/sum(tab) library(ROCR) preds <- prediction(fitted, as.numeric(head$sex)) roc <- performance(preds, "tpr", "fpr") auc <- performance(preds, "auc") auc.value <- round(as.numeric(auc@y.values),4) plot(roc, main="ROC Curve", lwd=2, col="blue") grid() lines(c(0,1),c(0,1)) text(0.5, 0.1, paste("AUC = ",auc.value), col="darkred")