install.packages(vegan) library(vegan) getwd() setwd("c:/DANKA HLAVNY ADRESAR/Vyuka/Bi5980 Biodiverzita/Biodiverzita vyuka 2016") pilatkoviti <- read.csv("05_2016_Data Pilatkoviti.csv",sep = ";" ,dec = ",", header = T, row.names = 1) pilatkoviti # nazvy riadkov su povodny prvy stlpec pilatkoviti.bray <- vegdist(pilatkoviti, "bray", diag = T, upper = T) pilatkoviti.bray ?hclust hclust(pilatkoviti.bray, method = "complete") pilatkoviti.bray.complete <- hclust(pilatkoviti.bray, method = "complete") plot(pilatkoviti.bray.complete) plot(pilatkoviti.bray.complete, main = "Bray-Curtis, Complete linkage") plot(pilatkoviti.bray.complete, labels = rownames(pilatkoviti.bray), hang = -1, main = "Bray-Curtis, Complete linkage") plot(pilatkoviti.bray.complete, labels = rownames(pilatkoviti.bray), hang = -1, axes = FALSE, main = "Bray-Curtis, Complete linkage") plot(pilatkoviti.bray.complete, labels = rownames(pilatkoviti.bray), hang = -1, axes = FALSE, ann = FALSE) pilatkoviti.bray.ward <- hclust(pilatkoviti.bray, method = "ward.D2") # ... ale tu pozor, lebo Ward sa nemá používať s Bray-Curtis # Ward method should not be combined with distance measures, which are not strictly metric, which is e.g. popular Bray-Curtis distance plot(pilatkoviti.bray.ward, labels = rownames(pilatkoviti.bray), hang = -1, main = "Bray-Curtis, Ward") plot(pilatkoviti.bray.ward, labels = rownames(pilatkoviti.bray), hang = -1, axes = FALSE, ann = FALSE) -------------------------------------------------------------------------------------------------------------- # oznaceni shluku v dendrogramu, budem tam chciet oznacit 2 shkuky, alebo 3 shluky plot (pilatkoviti.bray.complete, labels = rownames(pilatkoviti.bray), hang = -1, axes = FALSE, main = 'Complete linkage') rect.hclust (pilatkoviti.bray.complete, k = 2) rect.hclust (pilatkoviti.bray.complete, k = 3, border = "blue") prislusnost.3shluky <- cutree (pilatkoviti.bray.complete, k = 3) # vrati prislusnost objektu ke shlukum prislusnost.3shluky -------------------------------------------------------------------------------------------------------------- # nakreslime 3 dendrogramy na jeden obrazok - single linkage, average linkage, complete linkage pilatkoviti.bray.single <- hclust(pilatkoviti.bray, method = "single") pilatkoviti.bray.average <- hclust(pilatkoviti.bray, method = "average") pilatkoviti.bray.complete <- hclust(pilatkoviti.bray, method = "complete") par (mfrow = c (1,3)) # nakresli vsetky dendrogramy do jedneho obrazku plot (pilatkoviti.bray.single, main = 'Single linkage') plot (pilatkoviti.bray.average, main = 'Average linkage') plot (pilatkoviti.bray.complete, main = 'Complete linkage') # alebo ak chceme mat "nozicky" od zaciatku, tak takto: par (mfrow = c (1,3)) plot (pilatkoviti.bray.single, labels = rownames(pilatkoviti.bray), hang = -1, axes = FALSE, main = 'Single linkage') plot (pilatkoviti.bray.average, labels = rownames(pilatkoviti.bray), hang = -1, axes = FALSE, main = 'Average linkage') plot (pilatkoviti.bray.complete, labels = rownames(pilatkoviti.bray), hang = -1, axes = FALSE, main = 'Complete linkage') # aby nechcelo kreslit vsetky dalsie obrazky po troch, tak ... par(mfrow = c(1,1)) -------------------------------------------------------------------------------------------------------------- POZNAMKA: agnes (library cluster) ... obsahuje 6 shlukovacich algoritmu, nektere z nich nejsou obsazeny ve funkci 'hclust'. pak pri kresleni dendrogramu je vhodne nejdrive transformovat objekt z agnes jako objekt z hclust (pouzit as.hclust) -------------------------------------------------------------------------------------------------------------- POZNAMKA: nehierarchicke shlukovani - metoda k-means cluster.kmeans <- kmeans (pilatkoviti, centers = 3) cluster.kmeans$cluster ------------------------------------ K-means clustering - KORYSE --------------------------------------------- setwd("c:/DANKA HLAVNY ADRESAR/Vyuka/Bi5980 Biodiverzita/Biodiverzita vyuka 2016") koryse <- read.csv("06_2016_Data_Plankton Koryse.csv",sep = ";" ,dec = ",", header = T, row.names = 1) koryse koryse.kmeans3 <- kmeans (koryse, centers = 3) koryse.kmeans3 koryse.kmeans3$cluster koryse.kmeans3$withinss # mozne zopakovat k-means clustering pro pocet shluku 2, 3, 4, 5, ... a urcit si sucet vnutroskupinovych sum ctvercu withinss2 <- sum(koryse.kmeans2$withinss) withinss3 <- sum(koryse.kmeans3$withinss) withinss4 <- sum(koryse.kmeans4$withinss) withinss5 <- sum(koryse.kmeans5$withinss) koryse.kmeans1 <- kmeans (koryse, centers = 1) koryse.kmeans1$withinss withinss <- koryse.kmeans1$withinss withinss # rozptyl pri jednom shluku # je to to same, jako rozptyl sloupcu vynasobeny poctem radku snizeny o jednu, tj.: (nrow(koryse)-1)*sum(apply(koryse,2,var)) for (i in 2:15) withinss[i] <- sum(kmeans(koryse, centers=i)$withinss) plot(1:15, withinss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares") # z tohoto grafu - scree plot - muzeme odhadnout "spravny pocet shluku", tj. pocet pri "ohybu".