#install.packages('R.matlab') #install.packages("e1071") rm(list=ls()) library(readxl) # read_excel library(R.matlab) # readMat library(stats) # p.adjust library(e1071) # SVM ### 0. Data Loading and Preprocessing # Set working directory to the folder containing the data setwd("c:/Users/koritakova/Documents/Vyuka/Vicerozmerky a AKD/Analyza a klasifikace dat/2018-podzim/AKD_predn-07_cviceni/Data_klasifikace/") # Load data from Excel sheet data <- read_excel("gm_identifikatory.xls", sheet = "List1") names <- data$id # File names of the images group <- data$group # Group assignment (patients vs controls) inxP <- which(group == 1) # Indices for patients inxC <- which(group == 0) # Indices for controls # Load brain mask (GM mask) mask <- readMat("gm_mask_2D.mat")[[1]] image(mask, main = "GM Mask") # Display the mask # Load the first patient's image as an example i <- 1 img <- readMat(paste0(names[inxP[i]], "_rez50.mat"))[[1]] image(img, main = paste("Patient", i)) # Display the 2D image of the first patient's GM data # Load all image data into a matrix X <- matrix(0, nrow = length(inxP) + length(inxC), ncol = sum(mask == 1)) # Create matrix to hold the image data for all subjects (patients and controls) for (i in 1:length(inxP)) { img <- readMat(paste0(names[inxP[i]], "_rez50.mat"))[[1]] X[i, ] <- img[mask == 1] } for (i in 1:length(inxC)) { img <- readMat(paste0(names[inxC[i]], "_rez50.mat"))[[1]] X[i + length(inxP), ] <- img[mask == 1] } ### 1. Data Reduction and Classification: Resubstitution Validation # Perform Mann-Whitney test for feature selection pvals <- sapply(1:ncol(X), function(j) wilcox.test(X[group == 1, j], X[group == 0, j], exact=FALSE)$p.value) sum(pvals<0.05) # 1457 significant pixels before correction # Apply False Discovery Rate (FDR) correction pvals_cor <- p.adjust(pvals, method = "fdr") sum(pvals_cor<0.05) # 226 significant pixels after FDR correction X_red <- X[, pvals_cor < 0.05] # Select features with corrected p-value < 0.05 # Display the significant pixels significant_mask <- mask significant_mask[mask == 1] <- 1 + (pvals_cor < 0.05) image(significant_mask, main = "Significant Pixels") # Train SVM classifier with the reduced features (X_red) svm_model <- svm(X_red, as.factor(group)) # SVM training group_klasif <- predict(svm_model, X_red) # SVM prediction table(group_klasif,group) # Check accuracy accuracy <- mean(group_klasif == group) sensitivity <- mean(group_klasif[group == 1] == group[group == 1]) specificity <- mean(group_klasif[group == 0] == group[group == 0]) cat("Accuracy:", accuracy, "\n") cat("Sensitivity:", sensitivity, "\n") cat("Specificity:", specificity, "\n") # 93.8%, 91.7%, 95.8% ### 2. Data Reduction and Classification: Leave-One-Out Cross-Validation (Incorrect Variant) # Perform Mann-Whitney test for feature selection pvals <- sapply(1:ncol(X), function(j) wilcox.test(X[group == 1, j], X[group == 0, j], exact=FALSE)$p.value) sum(pvals<0.05) # 1457 significant pixels before correction # Apply False Discovery Rate (FDR) correction pvals_cor <- p.adjust(pvals, method = "fdr") sum(pvals_cor<0.05) # 226 significant pixels before correction X_red <- X[, pvals_cor < 0.05] # Select features with corrected p-value < 0.05 # Leave-One-Out Cross-Validation for feature selection and SVM group_klasif2 <- numeric(length(group)) for (i in 1:length(group)) { cat("Processing subject", i, "\n") x_test_red <- X_red[i, ] # test data (1 subject) X_train_red <- X_red[-i, ] # train data group_train <- group[-i] svm_model_loo <- svm(X_train_red, as.factor(group_train)) # train model group_klasif2[i] <- predict(svm_model_loo, t(as.matrix(x_test_red))) } table(group_klasif2,group) group_klasif2 <- as.numeric(group_klasif2)-1 table(group_klasif2,group) accuracy_loo <- mean(group_klasif2 == group) sensitivity_loo <- mean(group_klasif2[group == 1] == group[group == 1]) specificity_loo <- mean(group_klasif2[group == 0] == group[group == 0]) cat("Accuracy (LOO):", accuracy_loo, "\n") cat("Sensitivity (LOO):", sensitivity_loo, "\n") cat("Specificity (LOO):", specificity_loo, "\n") # 79.2%; 79.2%; 79.2% ### 3. Data Reduction and Classification: Leave-One-Out Cross-Validation (Correct Variant) group_klasif3 <- numeric(length(group)) sel <- matrix(0, nrow = nrow(X), ncol = ncol(X)) # Create matrix to hold information about statistically significant pixels from all iterations for (i in 1:length(group)) { cat("Processing subject", i, "\n") x_test <- X[i, ] # test data (1 subject) X_train <- X[-i, ] # train data group_train <- group[-i] pvals <- sapply(1:ncol(X_train), function(j) wilcox.test(X_train[group_train == 1, j], X_train[group_train == 0, j], exact=FALSE)$p.value) pvals_cor <- p.adjust(pvals, method = "fdr") # False Discovery Rate (FDR) correction cat("Number of significant pixels after FDR correction:", sum(pvals_cor<0.05), "\n") sel[i, ] <- (pvals_cor<0.05) X_train_red <- X_train[, pvals_cor < 0.05] # Select features with corrected p-value < 0.05 in train data x_test_red <- x_test[pvals_cor < 0.05] # Select features with corrected p-value < 0.05 in test data svm_model_loo <- svm(X_train_red, as.factor(group_train)) # train model group_klasif3[i] <- predict(svm_model_loo, t(as.matrix(x_test_red))) } table(group_klasif3,group) group_klasif3 <- as.numeric(group_klasif3)-1 table(group_klasif3,group) accuracy_loo <- mean(group_klasif3 == group) sensitivity_loo <- mean(group_klasif3[group == 1] == group[group == 1]) specificity_loo <- mean(group_klasif3[group == 0] == group[group == 0]) cat("Accuracy (LOO):", accuracy_loo, "\n") cat("Sensitivity (LOO):", sensitivity_loo, "\n") cat("Specificity (LOO):", specificity_loo, "\n") # 75.0%; 75.0%; 75.0% # POZNAMKA: je patrne z vypisu statisticky vyznamnych pixelu, ze je pocet # statisticky vyznamnych pixelu v kazde iteraci hodne variabilni, a tudiz # se musi delat vyber pixelu jen pomoci trenovacich dat, jinak dostaneme # zkreslene vysledky! # vizualizace vyznamnych pixelu - v kazde iteraci dostaneme jiny pocet # vyznamnych pixelu; pri vizualizaci muzeme napr. vykreslit ty pixely, # ktere byly statisticky vyznamne nejmene v polovine iteraci: significant_mask3 <- mask significant_mask3[mask == 1] <- 1 + ((colSums(sel)/nrow(sel))>=0.5) image(significant_mask3, main = "Significant Pixels") ### 4. Data Reduction and Classification: 10-FOLD CROSS-VALIDATION set.seed(123) folds <- sample(rep(1:10, length.out = length(group))) group_klasif4 <- numeric(length(group)) sel <- matrix(0, nrow = 10, ncol = ncol(X)) # Create matrix to hold information about statistically significant pixels from all iterations for (fold in 1:10) { test_idx <- which(folds == fold) train_idx <- setdiff(1:length(group), test_idx) X_train <- X[train_idx, ] group_train <- group[train_idx] X_test <- X[test_idx, ] pvals <- sapply(1:ncol(X_train), function(j) wilcox.test(X_train[group_train == 1, j], X_train[group_train == 0, j], exact = FALSE)$p.value) pvals_cor <- p.adjust(pvals, method = "fdr") # False Discovery Rate (FDR) correction cat("Number of significant pixels after FDR correction:", sum(pvals_cor<0.05), "\n") sel[fold, ] <- (pvals_cor<0.05) X_train_red <- X_train[, pvals_cor < 0.05] # Select features with corrected p-value < 0.05 in train data X_test_red <- X_test[, pvals_cor < 0.05] # Select features with corrected p-value < 0.05 in test data svm_model <- svm(X_train_red, as.factor(group_train)) # train model group_klasif4[test_idx] <- predict(svm_model, X_test_red) } table(group_klasif4,group) group_klasif4 <- as.numeric(group_klasif4)-1 table(group_klasif4,group) accuracy_loo <- mean(group_klasif4 == group) sensitivity_loo <- mean(group_klasif4[group == 1] == group[group == 1]) specificity_loo <- mean(group_klasif4[group == 0] == group[group == 0]) cat("Accuracy (LOO):", accuracy_loo, "\n") cat("Sensitivity (LOO):", sensitivity_loo, "\n") cat("Specificity (LOO):", specificity_loo, "\n") # 64.6%; 58.3%; 70.8% # vizualizace vyznamnych pixelu - v kazde iteraci dostaneme jiny pocet # vyznamnych pixelu; pri vizualizaci muzeme napr. vykreslit ty pixely, # ktere byly statisticky vyznamne nejmene v polovine iteraci: significant_mask4 <- mask significant_mask4[mask == 1] <- 1 + ((colSums(sel)/nrow(sel))>=0.5) image(significant_mask4, main = "Significant Pixels") ### 5. Data Reduction and Classification: HOLD-OUT VALIDATION set.seed(123) train_idx <- sample(1:length(group), size = 0.75 * length(group)) test_idx <- setdiff(1:length(group), train_idx) X_train <- X[train_idx, ] X_test <- X[test_idx, ] group_train <- group[train_idx] group_test <- group[test_idx] pvals <- sapply(1:ncol(X_train), function(j) wilcox.test(X_train[group_train == 1, j], X_train[group_train == 0, j], exact=FALSE)$p.value) pvals_cor <- p.adjust(pvals, method = "fdr") # False Discovery Rate (FDR) correction cat("Number of significant pixels after FDR correction:", sum(pvals_cor<0.05), "\n") X_train_red <- X_train[, pvals_cor < 0.05] # Select features with corrected p-value < 0.05 in train data X_test_red <- X_test[, pvals_cor < 0.05] # Select features with corrected p-value < 0.05 in test data svm_model <- svm(X_train_red, as.factor(group_train)) # train model group_klasif5 <- predict(svm_model, X_test_red) table(group_klasif5,group_test) accuracy_loo <- mean(group_klasif5 == group_test) sensitivity_loo <- mean(group_klasif5[group_test == 1] == group_test[group_test == 1]) specificity_loo <- mean(group_klasif5[group_test == 0] == group_test[group_test == 0]) cat("Accuracy (LOO):", accuracy_loo, "\n") cat("Sensitivity (LOO):", sensitivity_loo, "\n") cat("Specificity (LOO):", specificity_loo, "\n") # 75.0%; 87.5%; 50.0%