# Skript slouzi k: # 1. Nacteni souboru s daty (Data_neuro.xlsx). # 2. Uprave hodnot kategorialnich promennych. # 3. Vizualizaci dat: # a) Teckovy graf podle kategorialni promenne # b) Maticovy graf # c) Maticovy graf podle kategorialni promenne # d) Krabicove grafy pro vice spojitych promennych # e) Vicenasobne krabicove grafy (pro vice kategorialnich promennych a jednu spojitou promennou) # f) Ikonove grafy # Vytvoreno 27.2.2019; EK ##### Nacteni knihoven pro praci s daty ##### library(readxl) # nacteni knihovny s funkci read_excel library(plyr) # nacteni knihovny s funkci mapvalues library(aplpack) # nacteni knihovny s funkci faces ##### Nacteni datoveho souboru ##### wd <- "c:/Users/koritakova/Documents/Vyuka/DSAN02/2019 jaro/DSAN02_predn01_Uvod/" # cesta ke slozce, ve ktere je datovy soubor data1 <- read_excel(file.path(wd,"Data_neuro.xlsx"),sheet="data") # nacteni dat dim(data1) # pocet pacientu a pocet promennych names(data1) # nazvy promennych ##### Uprava dat ##### # Vytvoreni promenne s textovymi popisky skupin subjektu data1$Group_3kat_rek <- mapvalues(data1$Group_3kat,from=levels(as.factor(data1$Group_3kat)),to=c("CN","MCI","AD")) # vytvoreni nove promenne s textovymi hodnotami kategorii data1$Group_3kat_rek <- factor(data1$Group_3kat_rek, levels=c("AD","MCI","CN")) # prerazeni poradi kategorii table(data1$Group_3kat_rek) # pocet subjektu v kazde kategorii # Vytvoreni kategorizovaneho veku: data1$Age_kat <- cut(data1$Age, c(0,seq(70,80,10),200), right=FALSE) levels(data1$Age_kat) <- c("<70","70-79","80+") # rename labels table(data1$Age_kat) # Prevedeni promenne Gender_rek na faktor data1$Gender_rek <- as.factor(data1$Gender_rek) table(data1$Gender_rek) ##### Vizualizace dat ##### ## a) Teckovy graf podle kategorialni promenne barvy = c('green','orange','red') # nadefinovani barev pro kategorialni promennou plot(data1$Hippocampus_volume_mm3, data1$Amygdala_volume_mm3, pch=20, col=barvy[as.numeric(data1$Group_3kat)], xlab="Hippocampus volume (mm3)", ylab="Amygdala volume (mm3)") # vykresleni grafu (pch - typ symbolu, col - barva, xlab - popisek na ose x, ylab - popisek na ose y) legend("bottomright", c("CN","MCI","AD"), pch=20, col=barvy, cex=0.9, bty="n") # pridani legendy (pch - typ symbolu, col - barva, cex - velikost popisku, bty - zobrazeni obdelniku kolem legendy ("n" = ne)) ## b) Maticovy graf pairs(~Age+Height+Hippocampus_volume_mm3+Amygdala_volume_mm3, col="black", data=data1) # vykresleni grafu pairs(~Age+Height+Hippocampus_volume_mm3+Amygdala_volume_mm3, panel= function(x,y,...) { points(x, y,...) abline(lm(y~x),col="grey")}, col="black", data=data1) # vykresleni grafu vcetne pridani primky linearni regrese ## c) Maticovy graf podle kategorialni promenne barvy = c('green','orange','red') # nadefinovani barev pro kategorialni promennou pairs(~Age+Height+Hippocampus_volume_mm3+Amygdala_volume_mm3,col=barvy[as.numeric(data1$Group_3kat)], data=data1) # vykresleni grafu ## d) Krabicove grafy pro vice spojitych promennych par(mar=c(12.1, 4.1, 4.1, 2.1)) # zvetseni prostoru pod grafem, aby se tam vesly popisky boxplot(data1[,c("Hippocampus_volume_mm3","Amygdala_volume_mm3","Thalamus_volume_mm3","Pallidum_volume_mm3","Putamen_volume_mm3","Nucl_caud_volume_mm3")],xaxt='n',las=2) # xaxt='n' - nevykresli popisky na ose x, las=2 - otoci popisky na ose y axis(1, 1:6, c("Hippocampus volume (mm3)","Amygdala volume (mm3)","Thalamus volume (mm3)","Pallidum volume (mm3)","Putamen volume (mm3)","Nucl caud volume (mm3)"), las=2) # vykresleni otocenych popisku na ose x dev.off() # zavreni grafickeho okna ## e) Vicenasobne krabicove grafy (pro vice kategorialnich promennych a jednu spojitou promennou) table(data1$Group_3kat_rek,data1$Gender_rek) # pomocna kontingencni tabulka pro predstavu, kolik bude subjektu v jednotlivych grafech n_radku <- length(levels(data1$Group_3kat_rek)) # pocet radku s grafy n_sloupcu <- length(levels(data1$Gender_rek)) # pocet radku s grafy n_boxu <- length(levels(data1$Age_kat)) # pocet krabicovych grafu v 1 grafu par(mfrow=c(n_radku,n_sloupcu)) # nastaveni "gridu" grafu par(oma=c(3,3,3,3)) # nastaveni "outer margin area" par(mar=c(1,1,1,1)) # nastaveni "margin" u jednotlivych grafu par(mgp = c(0, 0.5, 0)) # nastaveni popisku blize "tickmarks" for (j in 1:n_radku) { for (i in 1:n_sloupcu) { data1_sel <- data1[data1$Group_3kat_rek==levels(data1$Group_3kat_rek)[j] & data1$Gender_rek==levels(data1$Gender_rek)[i],] # vyber dat podle j-te kategorie 1. promenne a i-te kategorie 2. promenne if (nrow(data1_sel)>0) { bstat=boxplot(data1_sel$Hippocampus_volume_mm3 ~ data1_sel$Age_kat, range=0, plot=FALSE) # vypocet charakteristik (minima, 1.kvartilu, medianu, 2.kvartilu, maxima) plot(1:n_boxu, bstat$stats[3,], col="blue", pch=18, cex=2, xaxt="n", xlim=c(0.5,n_boxu+0.5), ann=FALSE, ylim=c(5500,7500),cex.axis=0.9) # vykresleni medianu (xaxt="n" zabrani vykresleni osy x) arrows(1:n_boxu, bstat$stats[1,], 1:n_boxu, bstat$stats[5,], code = 3, col = "blue", angle = 90, length = .1) # vykresleni minima a maxima axis(1,at=c(1:n_boxu),labels=levels(data1$Age_kat),cex.axis=0.9) # vykresleni osy x for (k in 1:n_boxu) { text(k,11,paste("n=",length(data1_sel$Hippocampus_volume_mm3[data1_sel$Age_kat==levels(data1_sel$Age_kat)[k]]),sep=""),cex=0.9) # pridani textovych popisku "n=XX" } } else { plot(1,1,col="white",xaxt="n",yaxt="n",axes=F,frame.plot=FALSE,ann=FALSE) # v pripade, ze pro danou kombinaci kategorii nejsou data zadneho pacienta, vykresli se prazdny graf } } } for (j in 1:n_radku) { mtext(levels(data1$Group_3kat_rek)[j],side=2,line=1,outer=TRUE,at=1.17-j/n_radku,cex=0.8) # popisky pro kategorialni promennou v radcich } for (i in 1:n_sloupcu) { mtext(levels(data1$Gender_rek)[i],side=3,line=1,outer=TRUE,at=i/n_sloupcu-0.25,cex=0.75) # popisky pro kategorialni promennou ve sloupeccich } dev.off() # zavreni grafickeho okna ## f) Ikonove grafy data1_sel=data1[c(1:10,801:810),] # vyber 20 subjektu stars(data1_sel[,c("Hippocampus_volume_mm3","Amygdala_volume_mm3","Thalamus_volume_mm3","Pallidum_volume_mm3","Putamen_volume_mm3")], labels=data1_sel$ID, ncol=5) # parskove (hvezdicove) grafy stars(data1_sel[,c("Hippocampus_volume_mm3","Amygdala_volume_mm3","Thalamus_volume_mm3","Pallidum_volume_mm3","Putamen_volume_mm3")], labels=data1_sel$ID, ncol=5, radius=F, col.stars=rep("blue",length(data1_sel$ID))) # polygony faces(data1_sel[,c("Hippocampus_volume_mm3","Amygdala_volume_mm3","Thalamus_volume_mm3","Pallidum_volume_mm3","Putamen_volume_mm3")], face.type=0) # Chernoffovy tvare - typ 0 faces(data1_sel[,c("Hippocampus_volume_mm3","Amygdala_volume_mm3","Thalamus_volume_mm3","Pallidum_volume_mm3","Putamen_volume_mm3")], face.type=1) # Chernoffovy tvare - typ 1 faces(data1_sel[,c("Hippocampus_volume_mm3","Amygdala_volume_mm3","Thalamus_volume_mm3","Pallidum_volume_mm3","Putamen_volume_mm3")], face.type=2) # Chernoffovy tvare - typ 2