####### POPISNA STATISTIKA ####### library(MASS) data(chem) x = chem # A numeric vector of 24 determinations of copper in wholemeal flour, in parts per million. library(openxlsx) pisky <- read.xlsx("c:\\Users\\lubop\\Documents\\kurs\\DVpisky1.xlsx", sheet = "Pisky", startRow = 1, colNames = TRUE, rowNames = TRUE,detectDates = FALSE, skipEmptyRows = TRUE,rows = NULL, cols = NULL,check.names = FALSE) # pisky <- read.xlsx("c:/Users/lubop/Documents/kurs/DVpisky1.xlsx", sheet = "Pisky", startRow = 1, colNames = TRUE, rowNames = TRUE,detectDates = FALSE, skipEmptyRows = TRUE,rows = NULL, cols = NULL,check.names = FALSE) summary(pisky) pisky = pisky[,-1] colnames(pisky) x = pisky[,"Ba"] ### arithmetic mean mean(x, na.rm=TRUE) apply(pisky,2,mean) # trimming mean(x,trim=0.1) library(asbio) trim.me(x, trim = 0.1) mean(trim.me(x, trim = 0.1)) library(climtrends) TrimmedMean(x, percentTrim=0.1) # winsorizace library(asbio) win(x, lambda = 0.2) mean(win(x, lambda = 0.2)) ### rozptyl (variance) # populace n / vyber n-1 viz Excel var(x, na.rm=TRUE) library(asbio) var(trim.me(x, trim = 0.1)) # trim var(win(x, lambda = 0.2)) # win #### SD sd(x, na.rm=TRUE) sqrt(var(x, na.rm=TRUE)) sd(trim.me(x, trim = 0.1)) # trim sd(win(x, lambda = 0.2)) # win ### geometric mean prod(x)^(1/length(x)) #compute the geometric mean library(caroline) geomean(x) library(FSA) geomean(x, na.rm = FALSE, zneg.rm = FALSE) library(FinCal) geometric.mean(x) # lognormal mean library(fuel) lognormalmean(x, estimator="ml", base = exp(1),n = length(x), m = n - 1, d = 1/n) #### geometric SD library(FSA) sg = geosd(x, na.rm = FALSE, zneg.rm = FALSE) 10^(sg) #### lognormal SD library(fuel) lognormalsd(x, estimator="ml", base = exp(1),n = length(x),m = n - 1, d = 1/n) ### variacni rozpeti range(x) max(x) min(x) max(x)-min(x) max(x)/min(x) # ratio exceeds 2 orders of magnitude, logarithmic plots will be informative # if the ratio is between 1.5 and 2 orders of magnitude, logarithmically scaled plots will likely be informative; #### Variacni koeficient # CV > 100 % = plots on a logarithmic scale # CV = 70% - 100%, the inspection of logarithmically scaled plots will likely be informative. library(DescTools) CoefVar(x, weights = NULL, unbiased = FALSE, conf.level = 0,95, na.rm = FALSE) library(EnvStats) cv(x, method = "moments", sd.method = "sqrt.unbiased", l.moment.method = "unbiased", plot.pos.cons = c(a = 0.35, b = 0), na.rm = FALSE) library(FinCal) coefficient.variation(sd=sd(x),avg=mean(x)) library(goeveg) cv(x, na.rm = FALSE) library(litteR) cv(x, na.rm = FALSE) library(asht) cvTest(x, nullCV = 1, alternative = "two.sided", conf.level = 0.95, distn = "normal", CVmax = 10^6) # alternative = c("two.sided", "less", "greater"), # distn = c("normal", "lognormal") ### sikmost library(moments) skewness(x)# sikmost skewness(x)/(sqrt(6/length(x))) # sikmost delena odhadem str. chyby #hodnoty pod -2 a nad 2 indikují nenormalitu (Havranek) library(asbio) skew(x) library(e1071) skewness(x, na.rm = FALSE, type = 3) library(s20x) skewness(x) library(agricolae) skewness(x) library(ACSWR) skewcoeff(x) # Pearsonova mira sikmosti SK <- 3*(mean(x)-median(x))/sd(x); SK ### spicatost library(moments) kurtosis(x)# spicatost kurtosis(x)-3 (kurtosis(x)-3)/(sqrt(24/length(x))) # sikmost delena odhadem str. chyby #hodnoty pod -2 a nad 2 indikují nenormalitu (Havranek) library(asbio) kurt(x) library(agricolae) kurtosis(x)-3 library(ACSWR) kurtcoeff(x)-3 #### kvantily ### kvartily Q1 = quantile(x,0.25) Q2 = quantile(x,0.5) Q3 = quantile(x,0.75) quantile(x,c(0.25,0.5,0.75)) library(Hmisc) # Harrell-Davis hdquantile(x, c(0.25,0.5,0.75), se=FALSE) library(fmsb) percentile(x) ### median median(x) quantile(x, 0.5) library(fmsb) truemedian(x, h=1) library(Hmisc) # Harrell-Davis hdquantile(x, 0.5, se=FALSE) #### Tukey's trimean trimean(x) (Q1 + 2*Q2 + Q3)/4 hist(x, main="") rug(x) rug(trim.me(x, trim = 0.1),col=2) # asbio #### lognormal median library(fuel) lognormalmedian(x, estimator="ml", base = exp(1),n = length(x), m = n - 1, d = 1/n) ### interkvartilove rozpeti IQR(x) Q3-Q1 library(fmsb) SIQR(x, mode=1) # https://en.wikipedia.org/wiki/Robust_measures_of_scale ### absolutni medianova odchylka (MAD) mad(x) library(lawstat) j.maad(x) ### coefficient of dispersion (CD) MAAD/med library(lawstat) cd(x) ### index of dispersion, relative variance library(litteR) iod(x, na.rm = FALSE) ### Relative Median Absolute Deviation: mad/median library(litteR) rmad(x, na.rm = FALSE) # kvartilova sikmost QS <- ((quantile(x, 0.75) - median(x)) - (median(x) - quantile(x, 0.25)))/IQR(x); QS # oktilova sikmost OS <- ((quantile(x, 0.875) - median(x)) - (median(x) - quantile(x, 0.125)))/(quantile(x, 0.875) - quantile(x, 0.125)); OS ### SUMMARY ########### summary(x) library(pastecs) stat.desc(x) library(fields) stats(x) library(s20x) summaryStats(x) library(epiR) epi.descriptives(x, conf.level = 0.95) library(live) data(wine) wine head(wine) summary(wine) library(psych) describe(wine) library(Hmisc) describe(wine) library(pastecs) stat.desc(wine) library(fields) stats(wine) ##### Intervalove odhady stredni hodnoty ### konfidencni intervaly t.test(x,conf.level=0.95)$conf.int library(DescTools) MeanCI(x,conf.level=0.95) library(Rmisc) CI(x,ci=0.95) ### bootstrap CI library(DescTools) BootCI(x, y = NULL, mean, bci.method = "norm", conf.level = 0.95, sides = "two.sided", R = 999) # bci.method = c("norm", "basic", "stud", "perc", "bca") # sides = c("two.sided", "left", "right") library(boot) Mboot = boot(x,function(x,i) mean(x[i]), R=10000) mean(Mboot$t[,1]) boot.ci(Mboot,conf = 0.95,type = "norm") #### Predikcni intervaly library(predictionInterval) pi.m(M=mean(x),SD=sd(x),n=length(x),rep.n=length(x)) #### Tolerancni intervaly library(tolerance) nptol.int(y,alpha=0.05,P=0.95,side=2,method="WALD",upper=NULL,lower=NULL) # method = c("WILKS", "WALD", "HM","YM") library(MASS) data(chem) x = chem # A numeric vector of 24 determinations of copper in wholemeal flour, in parts per million. library(openxlsx) pisky <- read.xlsx("c:\\Users\\lubop\\Documents\\kurs\\DVpisky1.xlsx", sheet = "Pisky", startRow = 1, colNames = TRUE, rowNames = TRUE,detectDates = FALSE, skipEmptyRows = TRUE,rows = NULL, cols = NULL,check.names = FALSE) # pisky <- read.xlsx("c:/Users/lubop/Documents/kurs/DVpisky1.xlsx", sheet = "Pisky", startRow = 1, colNames = TRUE, rowNames = TRUE,detectDates = FALSE, skipEmptyRows = TRUE,rows = NULL, cols = NULL,check.names = FALSE) summary(pisky) pisky = pisky[,-1] colnames(pisky) x = pisky[,"Ba"] # Cullen Frey plot library(fitdistrplus) descdist(x, discrete=FALSE) descdist(x, discrete=FALSE, boot=900) library(asbio) descdist(trim.me(x, trim = 0.1), discrete=FALSE, boot=900) library(ssdtools) ssd_plot_cf(as.data.frame(x), left = "x") library(UsingR) simple.eda(x) # ECDF P = ecdf(x) plot(P, pch = 1, col = 1, cex = 1.2, main = NULL) # z = seq(0, 30, by=0.1) # The values at which we want to evaluate the empirical CDF # p = P(z) # p now stores the empirical CDF evaluated at the values in z # points(z, p, type="l", col=2, lwd=2, lty = 1) library(Hmisc) Ecdf(x) rug(x, col=1, lwd=2) library(jmuOutlier) plotEcdf(x, y = NULL, col = c("black", "red")) library(fitteR) e <- ecdf2(x) plot(e$x,e$y,xlab="x", ylab="Fn(x)",cex=1.2) # plot(e$x, e$cs) library(mltools) empirical_cdf(x, ubounds=1:length(x)) library(ssdtools) ssd_plot_cdf(x) library(litteR) recdf(sort(x), n=length(x)) recdf(1:5, 10) # Mountain plot mountain <- function(x){n<-length(x)+1 x<-sort(x) percent=100*rank(x)/n rperc=(100-percent) fold=percent folded <- function(x) { for(i in 1:length(x)) if (percent[i]>50) fold[i]<-rperc[i] return(fold)} plot(x, folded(x), type="b", ylab="Folded Empirical CDF", col=1) return(mountain)} mountain(x) library(mountainplot) mountainplot(x, type="b", col=1, cex=1) # stripchart stripchart(x,method="overplot",pch=1,col=1,cex=1.5) # vertical=FALSE stripchart(x,method="overplot",pch=1,col=1,cex=1.5,log="x") stripchart(x,method="jitter",jitter=0.1, pch=1,col=1,cex=1.5) stripchart(x,method="jitter",jitter=0.1, pch=1,col=1,cex=1.5,log="x") stripchart(x,method="stack",pch=1,col=1,cex=1.5) stripchart(x,method="stack",pch=1,col=1,cex=1.5,log="x") library(beeswarm) beeswarm(x, log = FALSE,pch = 1, col = 1, cex = 1.2, vertical=FALSE) beeswarm(x, log = TRUE,pch = 1, col = 1, cex = 1.2, vertical=FALSE) # boxplot boxplot(x, horizontal=TRUE, notch = FALSE, xlab = "x") boxplot(x, horizontal=TRUE, notch = TRUE, xlab = "x") library(beeswarm) boxplot(x, horizontal=TRUE, notch = FALSE, xlab = "x", log = "x", cex = 1.2) beeswarm(x, log = TRUE,pch = 1, col = 1, cex = 1.2, vertical=FALSE, add=TRUE) boxplot(x,xlab="x",las=2, horizontal=TRUE, cex=1.2,log="x") stripchart(x,pch=1, col=1, vert=FALSE, add=TRUE, cex=1.2,log="x") boxplot.stats(x) bs=boxplot.stats(x) bs$out library(StatDA) bs=StatDA::boxplot.stats(x) bs$out # histogram hist(x, freq=TRUE, breaks = 20, plot = TRUE,main = NULL) # pro counts hist(x, freq=FALSE, breaks = 10, plot = TRUE,main = NULL) rug(x) # Sturges" nc1 = nclass.Sturges(x); nc1 hist(x, freq=FALSE, breaks = nc1, plot = TRUE,main = NULL) rug(x, col=1, lwd = 2) hist(x, freq=FALSE, breaks = "Sturges", plot = TRUE,main = NULL) rug(x, col=1, lwd = 2) hist(x, breaks = nc1, plot = FALSE) # Scott nc2 = nclass.scott(x); nc2 hist(x, freq=FALSE, breaks = nc2, plot = TRUE,main = NULL) rug(x, col=1, lwd = 2) hist(x, freq=FALSE, breaks = "Scott", plot = TRUE,main = NULL) rug(x, col=1, lwd = 2) hist(x, breaks = nc2, plot = FALSE) # Freedman-Diaconis nc3 = nclass.FD(x); nc3 hist(x, freq=FALSE, breaks = nc3, plot = TRUE,main = NULL) rug(x, col=1, lwd = 2) hist(x, freq=FALSE, breaks = "FD", plot = TRUE,main = NULL) hist(x, breaks = nc3, plot = FALSE) library(MASS) truehist(x, nbins = "Sturges") ### nefunguje rug(x, lwd=2) truehist(x, nbins = "Scott") rug(x, lwd=2) truehist(x, nbins = "Freedman-Diaconis") # nebo "FD" rug(x, lwd=2) hist.scott(x, prob = TRUE, xlab = "x", main=NULL) rug(x, lwd=2) hist.FD(x, prob = TRUE, xlab ="x", main=NULL) rug(x, lwd=2) library(DistributionUtils) logHist(x, breaks="Sturges", main=NULL) rug(x, lwd=2) logHist(x, breaks="Scott", main=NULL) rug(x, lwd=2) logHist(x, breaks="FD", main=NULL) rug(x, lwd=2) # histogram s frekvencnim polygonem h <- hist(x, prob=TRUE, plot=FALSE) diffBreaks <- h$mids[2] - h$mids[1] xx <- c( h$mids[1]-diffBreaks, h$mids, tail(h$mids,1)+diffBreaks ) yy <- c(0, h$density, 0) hist(x, prob = TRUE, xlim=range(xx), border=1, col=0, main="") lines(xx, yy, lwd=1, col=2) rug(x, lwd = 2, col=3) # Averaged shifted histogram (ASH) library(ash) f <- ash1(bin1(x,nbin=50),5) # ash estimate plot(f, type="s", xlab ="x", ylab ="density") # lines(f, type="l", col=2) rug(x, lwd=2) # Jadrovy odhad hustoty pravdepodobnosti (Kernel density estimator, KDE) den1 = density(x) den1$bw den1$x den1$y plot(den1$x, den1$y, type="l", ylab = "density", xlab = "x") rug(x, lwd=2) plot(density(x), main="") #A bit bumpy rug(x, lwd=2) density(x,adjust = 10) plot(density(x,adjust = 10), main="") #Very smooth rug(x, lwd=2) density(x,adjust = 0.1) plot(density(x,adjust = 0.1), main="") #crazy bumpy rug(x, lwd=2) bw.nrd0(x) # Silverman (1986) bw.nrd(x) # Scott (1992) bw.SJ(x, method ="dpi") # Sheather a Jones dpi bw.SJ(x, method ="ste") # Sheather a Jones ste hmax = 5*bw.nrd0(x) lower = 0.1 * hmax upper = hmax bw.ucv(x, nb = 1000, lower = 0.1 * hmax, upper = hmax, tol = 0.1 * lower) # unbiased cross-validation hmax = 0.5*bw.nrd0(x) lower = 0.1 * hmax upper = hmax bw.bcv(x, nb = 1000, lower = 0.1 * hmax, upper = hmax, tol = 0.1 * lower) # biased cross-validation library(kerdiest) # Altman and Leger (1995). ALbw(type_kernel = "n", vec_data=x) # cross-validation method of Bowman, Hall and Prvan (1998) CVbw(type_kernel = "n", vec_data=x, n_pts = 100, seq_bws =NULL) # Polansky and Baker (2000) PBbw(type_kernel = "n", vec_data=x, num_stage = 2) # "e" Epanechnikov, "n" Normal,"b" Biweight and "t" Triweight vec_data=x kde(type_kernel = "n", vec_data=x, y = NULL, bw = PBbw(type_kernel = "n",vec_data, 2)) library(kdensity) xx = kdensity(x, bw = "RHE", adjust = 1, kernel = NULL, start = NULL,support = NULL, na.rm = FALSE, normalized = FALSE, tolerance = 0.01) # "nrd0", "nrd", "bcv", "SJ" # "ucv": Unbiased cross validation, "RHE" Hjort & Glad (1995), "JH" Jones& Henderson plot(xx, main="") rug(x, lwd=2) #### Mode tree library(multimode) modetree(x,bws=NULL,gridsize=NULL,cbw1=NULL,cbw2=NULL,display=TRUE,logbw=FALSE,logbw.regulargrid=NULL) modetree(x,bws=c(0,10),gridsize=NULL,cbw1=NULL,cbw2=NULL,display=TRUE,logbw=FALSE,logbw.regulargrid=NULL) # houslovy graf library(vioplot) vioplot(x, range=1.5, horizontal=TRUE, col=0) stripchart(x, pch=1, col=2, vert=FALSE, add=TRUE, cex=1.2) vioplot(x, range=1.5, horizontal=TRUE, col=0) stripchart(x, method="jitter", pch=1, col=2, vert=FALSE, add=TRUE, cex=1.2) vioplot(x, range=1.5, horizontal=TRUE, col=0) beeswarm(x, log = FALSE,pch = 16, col = 2, cex = 1.2, vertical=FALSE, add=TRUE) # Box-percentile plot library(Hmisc) bpplot(x) # EDA plot library(StatDA) edaplot(x, H.freq=FALSE,box=TRUE,S.cex=3, P.main="",P.log=FALSE) # QQ graf library(animation) sim.qqnorm(n = 20, last.plot = NULL) my.qq <- qqnorm(sort(x), type = "p", xlab = "x", ylab = "Quantiles of the Normal Distribution",main=NULL) qqline(x, col = "red") library(ryouready) xx = qqnorm_spss(x, standardize = FALSE, method = 1, ties.method = "average") # 1 =Blom (default),2 =Rankit/Hazen,3 =Tukey,4 =Van der Waerden/Weibull,5 =Benard and Bos-Levenbach,6 =Gringorten, 7 =Yu and Huang. # ties.method: "average", "first", "random", "max", "min". plot(xx) library(car) qqPlot(x, dist="norm",line="robust",envelope=.95) # line=c("quartiles", "robust", "none") library(StatDA) qqplot.das(x, distribution="norm", ylab="x", xlab="Kvantily normalniho rozdeleni", line="quartiles", labels="row.names",col=1) library(lawstat) rqq(x,plot.it = TRUE, square.it = TRUE, scale = "MAD", location = "median",line.it = TRUE,line.type = "QQ", col.line = 1, lwd = 1,outliers = TRUE, alpha = 0.05) library(openintro) qqnormsim(x,data=as.data.frame(x)) library(faraway) qqnorml(x,main = "", xlab = "Theoretical Quantiles", ylab = "Sample Quantiles", type="p") # scale = c("MAD", "J", "classical"), # location = c("median", "mean"), # line.type = c("45 degrees", "QQ") library(qualityTools) qqPlot(x, confbounds = TRUE, alpha=0.05, dist="norm") qqPlot(x, "log-normal") qqPlot(x, "normal") qqPlot(x, "cauchy") library(limma) qqt(x, df = Inf, ylim = range(x), main = "Student's t Q-Q Plot",xlab = "Theoretical Quantiles", ylab = "Sample Quantiles", plot.it = TRUE) # default df=Inf represents the normal distribution qqf(x, df1, df2, ylim=range(x), main= "F Distribution Q-Q Plot",xlab = "Theoretical Quantiles", ylab = "Sample Quantiles", plot.it = TRUE) library(snpMatrix) qq.chisq(x, df=1, main="QQ plot",sub=paste("Expected distribution: chi-squared (",df," df)", sep=""), xlab="Expected", ylab="Observed", conc=c(0.025, 0.975), overdisp=FALSE, trim=0.5, slope.one=FALSE, slope.lambda=FALSE, thin=c(0.25,50), oor.pch=24, col.shade="gray") # PP plot library(StatDA) ppplot.das(x, pdist=pnorm, xlab="Pravdepodobnost x", ylab="Pravdepodobnost normalniho rozdeleni",line=TRUE) library(qualityTools) ppPlot(x, "log-normal", confbounds = TRUE, alpha=0.05) ppPlot(x, "normal", confbounds = TRUE, alpha=0.05) ppPlot(x, "cauchy", confbounds = TRUE, alpha=0.05) library(nsRFA) unifplot (x, a=0, orient="xF", line=TRUE) normplot (x, a=0, orient="xF", line=TRUE) lognormplot (x, a=0, orient="xF", line=TRUE) gumbelplot (x, a=0, orient="xF", line=TRUE) frechetplot (x, a=0, orient="xF", line=TRUE) ## TREND plot(x) abline(h=median(x), lty=2) library(s20x) trendscatter(c(1:length(x)), y = x, f = 0.5, xlab = "Index", ylab = NULL, main = "") # von Neumann test library(DescTools) VonNeumannTest(x, alternative = "two.sided", unbiased = TRUE) # alternative = c("two.sided", "less", "greater") library(climtrends) VonNeumannRatioRank(x) # Bartels test library(lawstat) bartels.test(x, alternative = "two.sided") library(DescTools) BartelsRankTest(x, alternative = "two.sided", method = "normal") # alternative = c("two.sided", "trend", "oscillation") # method = c("normal", "beta", "auto") # Mann Kendall test library(Kendall) MannKendall(x) library(litteR) mk = mann_kendall(x, type = "both") # type = c("both", "increasing", "decreasing") test_statistic(mk) p_value(mk) # Median crossing test library(climtrends) MedianCrossingTest(x) # runs test for randomness library(lawstat) runs.test(x,plot.it = TRUE, alternative = "two.sided") ### Odlehle hodnoty x = pisky[,"Ba"] ### Vypocet z parametru distribuce library(climtrends) FindOutliersMAD(x,coef=3) # 2*IQRcoef FindOutliersMAD(x,coef=2.5) FindOutliersMAD(x,coef=2) # coefficient, 3=very conservative, 2.5=moderately conservative 2=poorly conservative FindOutliersQuant(x,coef=1.5) FindOutliersSD(x,coef=3) FindOutliersTrimmedMeans(x, percentTrim=0.1, coef=3) FindOutliersZscore(x, coef=2.5) ### Testy na odlehlé body library(outliers) dixon.test(x, type=22, opposite=FALSE, two.sided=TRUE) # 10 pro 3-7, 11 pro 8-10, 21 pro 11-13, 22 pro 14 a vice hodnot grubbs.test(x, type=10, opposite=FALSE, two.sided=TRUE) grubbs.test(x, type=11, opposite=FALSE, two.sided=TRUE) grubbs.test(x, type=20, opposite=FALSE, two.sided=TRUE) # 10 pro 1 odl. bod, 11 pro 1 odl. bod na kazdem konci, 20 pro 2 odl. body na 1 konci library(climtrends) FindOutliersTietjenMooreTest(x,k=1,alpha=0.05) library(EnvStats) rosnerTest(x, k = 2, alpha = 0.05, warn = TRUE) ## Outliers na zaklade distribucni funkce library(extremevalues) getOutliers(x,method="I",distribution="normal") getOutliers(x,method="II",distribution="normal") getOutliers(x,method="I",distribution="lognormal") getOutliers(x,method="II",distribution="lognormal") L <- getOutliers(x,method="II",distribution="normal") outlierPlot(x,L,mode="qq") outlierPlot(x,L,mode="residual") library(OutlierDetection) UnivariateOutlierDetection(x, k = 0.05 * length(x), cutoff = 0.95, dist = FALSE, dens = FALSE, depth = FALSE, Method = "euclidean", rnames = FALSE) library(mvoutlier) # test pro celou matici uni.plot(as.data.frame(pisky), symb=FALSE, quan=1/2, alpha=0.025) ###### Box plot symetric ######## # Tukey IQRcoef = 1.5 boxplot(x,ylab="x",range = IQRcoef) boxplot.stats(x, coef = IQRcoef, do.conf = TRUE, do.out = TRUE) # Vypocet podle Hoaglin, Iglewicz 1987 pro B 0.95 n<-length(x) k<- 2.25-3.6/length(x) K1<-quantile(x,probs=0.25)-k*(IQR(x)); K1 K2<-quantile(x,probs=0.75)+k*(IQR(x)); K2 which(xK2) # Vypocet podle Anguse # Gauss n<-length(x) cl<-0.95 an<-((2*log(n))^(0.5))-0.5*(log(log(n))+log(4*pi))*(2*log(n))^(-1/2) bn<-(2*log(n))^(-1/2) k<-(1/1.34898)*(-1*log(-0.5*log(cl))*bn+an-0.67449); k K1<-quantile(x,probs=0.25)-k*(IQR(x)); K1 K2<-quantile(x,probs=0.75)+k*(IQR(x)); K2 which(xK2) # Carling 2000 n<-length(x) k<-(17.63*n - 23.64)/(7.74*n -3.71); k K1<-median(x)-k*(IQR(x)); K1 K2<-median(x)+k*(IQR(x)); K2 which(xK2) ## Box plot asymetric ## ## identifikace odlehlých bodu neparametricky boxplot ##### IQRcoef = 1.5 fQ1 <-(quantile(x,probs = 0.25))+(-1*IQRcoef*IQR(x)) fQ3 <-(quantile(x,probs = 0.75))+IQRcoef*IQR(x) which(xfQ3) ## identifikace odlehlých bodu podle Carlinga (2000) n<-length(x) r<- 0.001 # pravdepodobnost odlehle hodnoty library(gld) lamd<-starship(x, optim.method="Nelder-Mead") lam1<-lamd$lambda[1] lam2<-lamd$lambda[2] lam3<-lamd$lambda[3] lam4<-lamd$lambda[4] g1<- lam3 g2<- lam4 qqgl(x, lambda1= lam1, lambda2=lam2, lambda3=lam3, lambda4=lam4, abline=TRUE, main="Generalized lambda distribution") k1<-(17.63*n - 23.64)/(n*((100*r+8.07)-3.71/n-0.83*g1-0.48*(g2^2)-0.48*(g2-3)+0.04*(g2-3)^2)) K1c<-median(x)-k1*(IQR(x)); K1c K2c<-median(x)+k1*(IQR(x)); K2c length(which(x>K2c)) # pocet odlehlych bodu which(xK2c) ## modifikace Carlingova postupu dle Carterové (2009) SIQRL<- median(x)-quantile(x,probs=0.25) SIQRU<- quantile(x,probs=0.75)-median(x) K1cm<-median(x)-2*k1*SIQRL; K1cm K2cm<-median(x)+2*k1*SIQRU; K2cm which(xK2cm) ## identifikace odlehlých bodu podle Schwertmana a de Silvy (2007) r<- 0.01 # pravdepodobnost odlehle hodnoty n<-length(x); n a<- -1*log(1-r)/n; a Za<-qnorm(0.975); Za # kvantil alfa n z N f<- 7.6809524+0.5294156*n-0.00237*n*n; f # stupne volnosti pro t tfa<-0 # kvantil alfa n z t pro dane n-1 kn<-1.34278 # hodnota kn pro dané n K1s<-median(x)-(Za/kn)*(IQR(x)); K1s K2s<-median(x)+(Za/kn)*(IQR(x)); K2s which(xK2s) ## modifikace dle Carterové (2009) SIQRL<- median(x)-quantile(x,probs=0.25) SIQRU<- quantile(x,probs=0.75)-median(x) C4a<-median(x)-2*(Za/kn)*SIQRL; C4a<-as.numeric(C4a) C4b<-median(x)+2*(Za/kn)*SIQRU; C4b<-as.numeric(C4b) which(xC4b) ## identifikace odlehlých bodu podle Hubertové a Vandervierena (2008) n<-length(x); n library(robustbase) MC<-mc(y); MC if(MC<0) a<--3 else a<--4; a if(MC<0) b<-4 else b<-3; b K1h<-(quantile(x, probs=0.25)-1.5*exp(a*MC)*IQR(x)); K1h K2h<-(quantile(x, probs=0.75)+1.5*exp(b*MC)*IQR(x)); K2h ab<- adjbox(x, plot=FALSE) K1a<-ab$stats[1,1]; K1a K2a<-ab$stats[5,1]; K2a K1a<-as.numeric(K1a) K2a<-as.numeric(K2a) which(xK2a) library(parody) # bioc calout.detect(x,alpha=.05,method="boxplot",ftype="ideal") calout.detect(x,alpha=.05,method="GESD",k=5) calout.detect(x,alpha=.05,method="medmad",scaling=hamp.scale.3) calout.detect(x,alpha=.05,method="shorth") calout.detect(x,alpha=.05,method="hybrid") library(univOutl) boxB(x, k=1.5, method="resistant", weights=NULL, id=NULL, exclude=NA, logt=FALSE) boxB(x, k=1.5, method= "asymmetric", weights=NULL, id=NULL, exclude=NA, logt=FALSE) boxB(x, k=1.5, method="adjbox", weights=NULL, id=NULL, exclude=NA, logt=FALSE) # k = 1.5 (default), 2, or 3 ## MULTIMODALITA ### library(multimode) data(acidity) x = acidity data(chondrite) x = chondrite data(enzyme) x = enzyme library(openxlsx) kyjov<- read.delim("c:\\Users\\lubop\\Documents\\kurs\\kyjov.txt", header=T) x = kyjov[which(kyjov[,3]=="1034"),] x = x[,6] hist(x, main="") rug(x) plot(density(x)) rug(x) library(diptest) dtb = dip(x, full.result = TRUE, min.is.0 = TRUE, debug = FALSE); dtb dip.test(x, simulate.p.value = FALSE, B = 2000) plot(dtb) library(Rfolding) folding.test(as.matrix(x)) ## Kernel density estimate library(multimode) locmodes(x,mod0=1,display=TRUE,xlab="x") locmodes(x,mod0=2,display=TRUE,xlab="x") locmodes(x,mod0=3,display=TRUE,xlab="x") locmodes(x,mod0=4,display=TRUE,xlab="x") bw1 = bw.crit(x,mod0=1,lowsup=-Inf,uppsup=Inf,n=2^15,tol=10^(-5),full.result=F) bw2 = bw.crit(x,mod0=2,lowsup=-Inf,uppsup=Inf,n=2^15,tol=10^(-5),full.result=F) bw3 = bw.crit(x,mod0=3,lowsup=-Inf,uppsup=Inf,n=2^15,tol=10^(-5),full.result=F) bw4 = bw.crit(x,mod0=4,lowsup=-Inf,uppsup=Inf,n=2^15,tol=10^(-5),full.result=F) modetree(x,bws=NULL,gridsize=NULL,cbw1=NULL,cbw2=NULL,display=TRUE,logbw=FALSE,logbw.regulargrid=NULL,xlab="x") modetree(x,bws=c(0.1,4),gridsize=NULL,cbw1=NULL,cbw2=NULL,display=TRUE,logbw=FALSE,logbw.regulargrid=NULL,xlab="x") library(silvermantest) # https://www.mathematik.uni-marburg.de/~stochastik/R_packages/ silverman.test(x, k=1, M = 999, adjust = FALSE, digits = 6) silverman.test(x, k=2, M = 999, adjust = FALSE, digits = 6) silverman.test(x, k=3, M = 999, adjust = FALSE, digits = 6) silverman.plot(x, kmin=1, kmax=5, alpha=0.05, adjust=FALSE) ## Bhattacharya plot s ASH library(ash) nn=30 f <- ash1(bin1(x,nbin=nn),5) # ash estimate nk<-length(f$y)-1 yk<-rep(0,nk) xk<-rep(0,nk) for(i in 1:nk){ yk[i]<-log(f$y[i+1]/f$y[i]) xk[i]<-(f$x[i+1]+f$x[i])/2 } plot(xk,yk, type="l",col=1,lty=1, main="", xlab="x",ylab="ln( f(i+1) / f(i) )") rug(x) library(mixtools) fit <- normalmixEM(x, k = 2) #try to fit k Gaussians summary(fit) library(kdensity) xx = kdensity(x, bw =bw4, adjust = 1, kernel = NULL, start = NULL,support = NULL, na.rm = FALSE, normalized = FALSE, tolerance = 0.01) plot(xx,main="") rug(x, lwd=2) library(mclust) xs<- sort(x) dmc <- Mclust(xs,G=NULL) dmcBIC <- mclustBIC(x); dmcBIC nn = 3 dmc <- Mclust(xs,G=nn) dmcz <-round(dmc$z,nn); dmcz dmcc <-dmc$classification; dmcc #### TESTY GOODNESS OF FIT / Normalita ##################################################################################### library(MASS) data(chem) x = chem # A numeric vector of 24 determinations of copper in wholemeal flour, in parts per million. pisky <- read.xlsx("c:\\Users\\lubop\\Documents\\kurs\\DVpisky1.xlsx", sheet = "Pisky", startRow = 1, colNames = TRUE, rowNames = TRUE,detectDates = FALSE, skipEmptyRows = TRUE,rows = NULL, cols = NULL,check.names = FALSE) summary(pisky) pisky = pisky[,-1] colnames(pisky) x = pisky[,"Ba"] # Shapiro Francia test library(nortest) sf.test(x) library(fBasics) sfTest(x) library(DescTools) ShapiroFranciaTest(x) # Shapiro Wilk test shapiro.test(x) library(fBasic) shapiroTest(x) library(cwhmisc) shapiro.wilk.test(x) # Anderson Darling test library(nortest) ad.test(x) library(fBasics) adTest(x) # Cramer von Mises test library(nortest) cvm.test(x) library(fBasic) cvmTest(x) # D'Agostino test library(fBasics) dagoTest(x) # DAgostinuv test sikmosti library(moments) agostino.test(x, alternative = "two.sided") # alternative = c("two.sided", "less", "greater") # Anscombe-Glynnuv test spicatosti library(moments) anscombe.test(x, alternative = "two.sided") # alternative = c("two.sided", "less", "greater") # Geary test spicatosti library(fmsb) geary.test(x) # Jarque Bera test library(tseries) jarque.bera.test(x) library(moments) jarque.test(x) library(fBasic) jarqueberaTest(x) # robustní Jarque Bera test library(lawstat) rjb.test(x, option="RJB",crit.values="empirical", N = 5000) # robustni J. B. test rjb.test(x, option="JB",crit.values="empirical", N = 500) # klasicky J. B. test # crit.values = c("chisq.approximation", "empirical") # Lilliefors test (modif. Kolmogorov-Smirnov) library(nortest) lillie.test(x) library(fBasic) lillieTest(x) # Kolmogorov-Smirnov normality test library(fBasic) ksnormTest(x) # sj test library(lawstat) sj.test(x, crit.values = "empirical", N = 1000) # crit.values = c("t.approximation", "empirical") # Szekely library(energy) mvnorm.etest(x) library(cwhmisc) # T3 plot (Ghosh) T3plot(x,lab=paste("T3 plot of ",deparse(substitute(x))), legend.pos="bottom", cex=0.6) ### normplot Hazelton halfwidth <- max(mean(x)-min(x),max(x)-mean(x)) xgrid <- seq(mean(x)-1.01*halfwidth,mean(x)+1.01*halfwidth,length=512) hh <- 1.06*sd(x)/length(x)^0.2 normfit1 <- dnorm(xgrid, median(x), sqrt((1.4826*mad(x))^2+hh^2)) # Hazelton robust normfit2 <- dnorm(xgrid, median(x), 1.4826*mad(x)) # puvodni robust normfit3 <- dnorm(xgrid, mean(x), sqrt(sd(x)^2+hh^2)) # Hazelton normfit4 <- dnorm(xgrid, mean(x), sd(x)) # puvodni denfit <- apply(dnorm(outer(x,x,"-"), sd=hh), 2, mean) YLIM=c(min(min(log(denfit),log(normfit1),log(normfit2), log(normfit3), log(normfit4))),max(max(log(denfit),log(normfit1),log(normfit2), log(normfit3), log(normfit4)))) plot(xgrid,log(normfit1),type="n",xlab="x",ylab="log density",ylim=YLIM) points(x,log(denfit),pch=1, col=1) lines(xgrid,log(normfit1), col=2, lty=2) lines(xgrid,log(normfit2), col=4, lty=2) lines(xgrid,log(normfit3), col=2, lty=3) lines(xgrid,log(normfit4), col=4, lty=3) library(s20x) normcheck(x, xlab = c("Theoretical Quantiles", ""), ylab = c("Sample Quantiles", ""), main = c("", ""), col = "light blue", bootstrap = TRUE, B = 5, bpch = 3, bcol = "lightgrey", shapiro.wilk = TRUE, whichPlot = 1:2, usePar = TRUE) # test symetrie library(LambertW) test_symmetry(x, method = "Wald") test_symmetry(x, method = "LR") # method = c("Wald","LR") library(lawstat) symmetry.test(x,option="MGG",side="both",boot=TRUE, B=1000, q=8/9) # option = c("MGG", "CM", "M") # side = c("both", "left", "right") library(LearnEDA) # archiv hinkley(x) symplot(x) # graf symetrie n<-length(x) no<-floor((n+1)/2) sx<-sort(x) i=1:no u=sx[n+1-i]-median(x) v=median(x)-sx[i] plot(v,u,pch=1,cex=1, main="Symmetry Plot") abline(0,1,lwd=1,col=2,lty=2) # Anderson Darlinguv test library(DescTools) AndersonDarlingTest(x, "pnorm", mean=mean(x), sd=sd(x)) library(ADGofTest) ad.test( x, "pnorm", mean(x), sd(x) ) library(goftest) ad.test(x, "pnorm", mean=mean(x), sd=sd(x), estimated=TRUE) # Cramer von Mises library(goftest) cvm.test(x, "pnorm", mean=mean(x), sd=sd(x), estimated=TRUE) # Kolmogorov Smirnov test library(KScorrect) LcKS(x, cdf="pnorm", nreps = 4999, G = 1:9, varModel = "E", parallel = FALSE, cores = NULL) # varModel = c("E", "V") # "pnorm" for normal,"pmixnorm" for (univariate) normal mixture,"plnorm" for lognormal (log-normal, log normal), "punif" for uniform, "plunif" for loguniform (log-uniform, log uniform), library(dgof) ks.test(x, y="pnorm",alternative = "two.sided", exact = NULL, tol=1e-8, simulate.p.value=FALSE, B=2000) # alternative = c("two.sided", "less", "greater") # Vasicek-Song test (Shannonova entropie) library(vsgoftest) vs.test(x, densfun = 'dnorm', param = c(2,3), B = 500) #Simple null hypothesis vs.test(x, densfun='dnorm', B = 500) #Composite null hypothesis vs.test(x, densfun='dnorm', simulate.p.value = FALSE) vs.test(x, densfun='dnorm', simulate.p.value = TRUE) ### Fitting distribution / Goodness of fit ################################################################# ### normal library(MASS) dfp <- fitdistr(x, densfun="normal") (dffl <- dfp$loglik) dff <- dfp$estimate mean <- as.numeric(dff[1]); mean sd <- as.numeric(dff[2]); sd mean(x) sd(x) ncd = seq(min(x), max(x), 0.05) npd = dnorm(ncd, mean = mean, sd = sd) hist(x,freq = FALSE) lines(ncd,npd, type="l",col=2) rug(x) pii <-((1:length(x))-0.35)/length(x) qni<- qnorm(pii, mean = mean, sd = sd) plot(sort(qni),sort(x),col=1,ylab="x", xlab="Kvantily norm. normalniho rozdeleni",cex=1.2) q1<-c(quantile(qni, 0.25), quantile(qni, 0.75)) q3<-c(quantile(x, 0.25), quantile(x, 0.75)) abline(lm(q3~q1), col=2,lty=2) xx<-pnorm(x, mean = mean, sd = sd) plot((1:length(x))/(length(x)+1), sort(xx), xlab="Poradova pravdepodobnost x", ylab="Distribucni funkce norm. normalniho rozdeleni", xlim=c(0, 1), ylim=c(0, 1), col=1,cex=1.2) q1<-c(0.25, 0.75) q3<-c(quantile(xx, 0.25), quantile(xx, 0.75)) abline(lm(q3~q1), col=2, lty=2) library(EnvStats) norm.est <- enorm(x, method = "mvue", ci = TRUE, ci.type = "two-sided",ci.method = "exact", conf.level = 0.95, ci.param = "mean") norm.m <- as.vector(norm.est[[3]])[1] norm.sd <- as.vector(norm.est[[3]])[2] ### sw.norm <- gofTest(x, test="sw", dist = "norm"); sw.norm plot(sw.norm) norm.W <- sw.norm[[6]] norm.p <- sw.norm[[10]] ### lognormal library(MASS) dfp <- fitdistr(x, densfun="lognormal") (dffl <- dfp$loglik) dff <- dfp$estimate meanlog <- as.numeric(dff[1]) sdlog <- as.numeric(dff[2]) pii <-((1:length(x))-0.35)/length(x) qli<- qlnorm(pii, meanlog = meanlog, sdlog = sdlog, lower.tail = TRUE, log.p = FALSE) plot(sort(qli),sort(x),col=1,ylab="x", xlab="Kvantily log. normalniho rozdeleni",cex=1.2) q1<-c(quantile(qli, 0.25), quantile(qli, 0.75)) q3<-c(quantile(x, 0.25), quantile(x, 0.75)) abline(lm(q3~q1), col=2,lty=2) xx<-plnorm(x, meanlog = meanlog, sdlog = sdlog, lower.tail = TRUE, log.p = FALSE) plot((1:length(x))/(length(x)+1), sort(xx), xlab="Poradova pravdepodobnost x", ylab="Distribucni funkce log. normalniho rozdeleni", xlim=c(0, 1), ylim=c(0, 1), col=1,cex=1.2) q1<-c(0.25, 0.75) q3<-c(quantile(xx, 0.25), quantile(xx, 0.75)) abline(lm(q3~q1), col=1, lty=2) library(EnvStats) lnorm.est <- elnorm(x, method = "mvue", ci = TRUE, ci.type = "two-sided",ci.method = "exact", conf.level = 0.95) lnorm.mlog <- as.vector(lnorm.est[[3]])[1] lnorm.sdlog <- as.vector(lnorm.est[[3]])[2] sw.lnorm <- gofTest(x, test="sw", dist = "lnorm",param.list = list(meanlog=lnorm.mlog, sdlog = lnorm.sdlog)); sw.lnorm plot(sw.lnorm) lnorm.W <- sw.lnorm[[6]] lnorm.p <- sw.lnorm[[10]] ### gamma dfp <- fitdistr(x, densfun="gamma") (dffl <- dfp$loglik) dff <- dfp$estimate shape <- as.numeric(dff[1]) rate <- as.numeric(dff[2]) pii <-((1:length(x))-0.35)/length(x) qgi<- qgamma(pii, shape = shape, rate = rate) plot(sort(qgi),sort(x),col=1,ylab="x", xlab="Kvantily gamma rozdeleni",cex=1.2) q1<-c(quantile(qgi, 0.25), quantile(qgi, 0.75)) q3<-c(quantile(sort(x), 0.25), quantile(x, 0.75)) abline(lm(q3~q1), col=1, lty=2) xx<-pgamma(x, shape = shape, rate = rate) plot((1:length(x))/(length(x)+1), sort(xx), xlab="Poradova pravdepodobnost x", ylab="Distribucni funkce gamma rozdeleni", xlim=c(0, 1), ylim=c(0, 1), col=1,cex=1.2) q1<-c(0.25, 0.75) q3<-c(quantile(xx, 0.25), quantile(xx, 0.75)) abline(lm(q3~q1), col=1, lty=2) library(EnvStats) gamma.est <- egamma(x,ci = TRUE,ci.type = "two-sided", ci.method = "normal.approx",normal.approx.transform = "kulkarni.powar", conf.level = 0.95) gamma.shape <- as.vector(gamma.est[[3]])[1] gamma.scale <- as.vector(gamma.est[[3]])[2] sw.gamma <- gofTest(x, test="sw",dist = "gamma",param.list = list(shape =gamma.shape, scale = gamma.scale)); sw.gamma plot(sw.gamma) gamma.W <- sw.gamma[[6]] gamma.p <- sw.gamma[[10]] ### GEV library(MASS) library(fExtremes) ygev <- gevFit(x, block = 1, type = "mle", title = NULL, description = NULL) # type = c("mle", "pwm") print(ygev) summary(ygev) (gevp <- ygev@fit$par.ests) xi <- as.numeric(gevp[1]) mu <- as.numeric(gevp[2]) beta <- as.numeric(gevp[3]) pii <-((1:length(x))-0.35)/length(x) qi <- qgev(pii, xi, mu, beta) plot(sort(qi),sort(x),col=1,xlab="x", ylab="Kvantily GEV rozdeleni",cex=1.2) q1<-c(0.25, 0.75) q3<-c(quantile(xx, 0.25), quantile(xx, 0.75)) abline(lm(q3~q1), col=1, lty=2) xx<-pgev(x, xi, mu, beta) plot((1:length(x))/(length(x)+1), sort(xx), xlim=c(0, 1), ylim=c(0, 1), col=1,main="", xlab="Poradova pravdepodobnost x", ylab="Distribucni funkce GEV rozdeleni",cex=1.2) q1<-c(0.25, 0.75) q3<-c(quantile(xx, 0.25), quantile(xx, 0.75)) abline(lm(q3~q1), col=1, lty=2) evd.est <- eevd(x, method = "mle", pwme.method = "unbiased",plot.pos.cons = c(a = 0.326, b = 0.348), ci = TRUE, ci.parameter = "location", ci.type = "two-sided",ci.method = "normal.approx", conf.level = 0.95) evd.loc <- as.vector(evd.est[[3]])[1] evd.scale <- as.vector(evd.est[[3]])[2] sw.evd <- gofTest(x, test="sw", dist = "evd",param.list = list(location = evd.loc, scale = evd.scale)); sw.evd plot(sw.evd) evd.W <- sw.evd[[6]] evd.p <- sw.evd[[10]] pp <- ((1:length(x))-0.326)/(n+0.348) kevd <- qevd(pp, location = evd.loc, scale = evd.scale) plot(kevd,sort(x)) abline(0,1,lty=2,col=2) library(EnvStats) gevd.est <- egevd(x, method = "mle", pwme.method = "unbiased", tsoe.method = "med",plot.pos.cons = c(a = 0.326, b = 0.348), ci = TRUE, ci.parameter = "location", ci.type = "two-sided", ci.method = "normal.approx", information = "observed",conf.level = 0.95) gevd.loc <- as.vector(gevd.est[[3]])[1] gevd.scale <- as.vector(gevd.est[[3]])[2] gevd.shape <- as.vector(gevd.est[[3]])[3] sw.gevd <- gofTest(x, test="sw", dist = "gevd",param.list = list(location=gevd.loc, shape = gevd.shape, scale = gevd.scale)); sw.gevd plot(sw.gevd) gevd.W <- sw.gevd[[6]] gevd.p <- sw.gevd[[10]] ###### TRANSFORMACE DAT ############################################################################################################################################################################################################################################################### library(MASS) data(chem) x = chem # A numeric vector of 24 determinations of copper in wholemeal flour, in parts per million. pisky <- read.xlsx("c:\\Users\\lubop\\Documents\\kurs\\DVpisky1.xlsx", sheet = "Pisky", startRow = 1, colNames = TRUE, rowNames = TRUE,detectDates = FALSE, skipEmptyRows = TRUE,rows = NULL, cols = NULL,check.names = FALSE) summary(pisky) pisky = pisky[,-1] colnames(pisky) x = pisky[,"Ba"] #### Transformace na rovnomerne rozdeleni rank(x) #### Transformace poradovych hodnot na normovane normalni(Lebart et al.) ranknorm <- function(x){ r.nt<-qnorm(rank(x)/(length(x)+1), 0, 1) r.nt } rx = ranknorm(x) hist(rx) #### centilova transformace per = 100*x/sum(x) sum(x) xper = 100*x/rowSums(x) xper = 100*x/colSums(x) #### logaritmicka transformace ml1<-log10(m); ml1 # dekadicky ml2<-log(m); ml2 # prirozeny library(DescTools) LogSt(x, base = 10, calib = x, threshold = NULL, mult = 1) LogStInv(x, base = NULL, threshold = NULL) library(bestNormalize) # log(x + a) log_x(x, a = konst, b = 10, standardize = FALSE, eps = 0.001, warn = TRUE) ##### Box-Cox ################ library(MASS) MASS::boxcox(lm(x~1)) library(car) boxCox(lm(x~1), lambda = seq(-2, 2, 1/10), plotit = TRUE, interp = TRUE, eps = 1/50, xlab=NULL, ylab=NULL, family="bcPower", param= "lambda", gamma=NULL,grid=TRUE) library(AID) xbc = boxcoxnc(x, method = "sw", lambda = seq(-3,3,0.01), lambda2 = NULL, plot = TRUE,alpha = 0.05, verbose = TRUE) xbc par(mfrow=c(1,2)) hist(x, main="",freq = FALSE) hist(xbc$tf.data, main="",xlab="BCtransf x",freq = FALSE) par(mfrow=c(1,1)) confInt(xbc, level = 0.95, plot = TRUE, xlab = NULL, ylab = NULL, title = NULL,width = NULL, verbose = TRUE) library(forecast) lambda = BoxCox.lambda(x, method = "loglik", lower = -2, upper = 2) # method = c("guerrero", "loglik") xbc = BoxCox(x, lambda) xrt = InvBoxCox(xbc, lambda, biasadj = FALSE, fvar = NULL) par(mfrow=c(1,2)) hist(x, main="") hist(xrt, main="",xlab="retransf x") par(mfrow=c(1,1)) er <- qt(0.975,df=length(xbc)-1)*sd(xbc)/sqrt(length(xbc)) left <- mean(xbc)-er; left right <- mean(xbc)+er; right InvBoxCox(c(mean(xbc), left, right), lambda, biasadj = FALSE, fvar = NULL) library(bestNormalize) bct <- boxcox(x) bct # Retransformované hodnoty z Box Cox (Meloun) meanr1 <- (0.5*(1+lam*mean(bc)+sqrt(1+2*lam*(mean(bc)+var(bc))+(lam^2)*((mean(bc)^2)-2*var(bc)))))^(1/lam); meanr1 varr1 <- var(bc)*(meanr1^(-2*lam+2)); varr1 ### Yeo-Johnsonova transformace library(car) boxCox(lm(x~1), lambda = seq(-2, 2, 1/10), plotit = TRUE, interp = TRUE, eps = 1/50, xlab=NULL, ylab=NULL, family="yjPower", param= "lambda", gamma=NULL,grid=TRUE) library(bestNormalize) yjt <- yeojohnson(x)$lambda library(VGAM) xjj = yeo.johnson(x, lambda=yjt, derivative = 0, epsilon = sqrt(.Machine$double.eps), inverse = FALSE) par(mfrow=c(1,2)) hist(x, main="",freq=F) hist(xjj, main="",xlab="retransf x", freq=F) par(mfrow=c(1,1)) ### Vyberova rozdeleni ### t-rozdeleni xrt = rt(600, df=length(xre)-1, ncp=0) hist(xrt) xre = seq(-5,5,0.05) xde = dt(xre, df=length(xre)-1, ncp=0, log = FALSE) plot(xre,xde, type="l") xre = seq(-5,5,0.05) xpe = pt(xre, df=length(xre)-1, ncp=0, lower.tail = TRUE, log.p = FALSE) qt(0.975, df=length(xre)-1, ncp=0, lower.tail = TRUE, log.p = FALSE) #### Chi-kvadrat rozdeleni library(visualize) # Evaluates lower tail. visualize.chisq(stat = 1, df = 3, section = "lower") # Evaluates bounded region. visualize.chisq(stat = c(1,1), df = 3, section = "bounded") # Evaluates upper tail. visualize.chisq(stat = 1, df = 3, section = "upper") #### F rozdeleni library(visualize) # Evaluates lower tail. visualize.f(stat = 1, df1 = 5, df2 = 4, section = "lower") # Evaluates bounded region. visualize.f(stat = c(1,1), df1 = 5, df2 = 4, section = "bounded") # Evaluates upper tail. visualize.f(stat = 1, df1 = 5, df2 = 4, section = "upper") ###### JEDNOSOUBOROVE TESTY #### # Vzorek s deklarovaným obsahem penicilinu 2.4 mg/l byl promeren HPLC analýzou. # Odpovídá, na hladine významnosti 0.05, stanovená koncentrace pozadavku? b314<- read.table("c:\\Users\\lubop\\Documents\\kurs\\Data1\\ascii03\\B\\B314.txt", header=T) # dekl obsah 2.4 mg/l View(b314) is.list(b314) x = unlist(b314) #### t-test ### # one sample t-test: t.test(x,mu=3, alternative = "two.sided") t.test(x, y = NULL, alternative = "two.sided", mu = 2.4, paired = FALSE, var.equal = FALSE, conf.level = 0.95) # alternative = c("two.sided", "less", "greater") #### medianovy test #### library(BSDA) SIGN.test(x, y = NULL, md = 2.4, alternative = "two.sided", conf.level = 0.95) #### Wilcoxonuv (Mann-Whitney) test #### wilcox.test(x, y = NULL, alternative = "two.sided", mu = 2.4, paired = FALSE, exact = NULL, correct = TRUE, conf.int = TRUE, conf.level = 0.95) ###### Location ########### # V analytické laboratori je obsah sledovaného léciva v plazme stanovován na dvou soustavách kapalinových chromatografu HPLC. # Poskytují obe soustavy správné a navzájem shodné výsledky, kdyz bylo zmereno 20 vzorku krevní plazmy s obsahem léciva 45 mg. b325ab<- read.table("c:\\Users\\lubop\\Documents\\kurs\\B325.txt", header=T) is.list(b325ab) X = as.data.frame(b325ab) # x = unlist(b325ab) x= unlist(X[,1]) y= unlist(X[,2]) #### t-test #### # t.test(x,mu=3, alternative = "two.sided") sd(x) sd(y) boxplot(x,y) t.test(x, y, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = TRUE, conf.level = 0.95) t.test(x, y, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) # Welch # alternative = c("two.sided", "less", "greater") #### 2 sample medianovy test #### # one sample sign test library(BSDA) SIGN.test(x, y, md = 0, alternative = "two.sided", conf.level = 0.95) #### one sample Wilcoxonuv (Mann-Whitney) test ### wilcox.test(x, y,alternative = "two.sided", mu = 0, paired = FALSE, exact = FALSE, correct = TRUE,conf.int = FALSE, conf.level = 0.95) # alternative = c("two.sided", "less", "greater") library(exactRankTests) wilcox.exact(x, y, mu = 0,alternative = "two.sided",paired = FALSE, exact = FALSE,conf.int = TRUE, conf.level = 0.95) boxplot(x,y) ### konfidencni intervaly t.test(x,conf.level=0.95)$conf.int library(DescTools) MeanCI(x,conf.level=0.95) library(Rmisc) CI(x,ci=0.95) ### bootstrap CI library(DescTools) BootCI(x, y = NULL, mean, bci.method = "norm", conf.level = 0.95, sides = "two.sided", R = 999) # bci.method = c("norm", "basic", "stud", "perc", "bca") # sides = c("two.sided", "left", "right") library(boot) Mboot = boot(x,function(x,i) mean(x[i]), R=10000) mean(Mboot$t[,1]) boot.ci(Mboot,conf = 0.95,type = "norm") #### Predikcni intervaly library(predictionInterval) pi.m(M=mean(x),SD=sd(x),n=length(x),rep.n=length(x)) #### Tolerancni intervaly library(tolerance) nptol.int(y,alpha=0.05,P=0.95,side=2,method="WALD",upper=NULL,lower=NULL) # method = c("WILKS", "WALD", "HM","YM") #### two sample t-test #### boxplot(x,y) t.test(x, y, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = TRUE, conf.level = 0.95) t.test(x, y , alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) # Welch test # alternative = c("two.sided", "less", "greater") #### two sample median test ### library(BSDA) SIGN.test(x, y, alternative = "two.sided", conf.level = 0.95) library(RVAideMemoire) lxy= list(x,y); names(lxy)=c("x","y") gxy = stack(lxy); names(gxy)=c("xy","g") mood.medtest(gxy[,1], gxy[,2], exact = NULL) #### two sample Wilcoxonuv (Mann-Whitney) test ##### wilcox.test(x, y, conf.level = 0.95,alternative = "two.sided",exact = FALSE) library(exactRankTests) wilcox.exact(x, y, mu = 0,alternative = "two.sided",paired = FALSE, exact = FALSE,conf.int = TRUE, conf.level = 0.95) # "two.sided"(default),"greater"or"less" perm.test(x, y, paired=FALSE, alternative="two.sided", exact=FALSE, conf.int=FALSE, conf.level=0.95, tol=NULL) library(coin) wilcox_test(x~as.factor(xab),conf.int = FALSE,conf.level = 0.95) ### Brunner Munzeluv (zobecneny Wilcoxonuv) test library(lawstat) brunner.munzel.test(x, y, alternative="two.sided", alpha=0.05) # alternative = c("two.sided", "greater", "less") ###### Scale ####################################################################################### ### F test var.test(x, y, ratio = 1, alternative = "two.sided", conf.level = 0.95) # alternative = c("two.sided", "less", "greater") ### Ansari Bradley two sample test ansari.test(x, y, alternative = "two.sided", exact = FALSE, conf.int = FALSE, conf.level = 0.95) library(exactRankTests) ansari.exact(x, y, alternative = "two.sided", exact = FALSE, conf.int = FALSE, conf.level = 0.95) ### Siegel Tukey two sample test library(DescTools) # nefunguje? SiegelTukeyTest(x, y, adjust.median = TRUE, alternative = "two.sided", mu = 0, exact = FALSE, correct = TRUE, conf.int = TRUE, conf.level = 0.95) library(jmuOutlier) stt = siegel.test(x,y,alternative="two.sided",reverse = FALSE,all.perms=TRUE,num.sim=999) stt$p.value ### Mood test of scale mood.test(x, y, alternative = "two.sided") alternative = c("two.sided", "less", "greater") library(fBasics) scaleTest(x, y, method="mood") lxy = list(x,y); names(lxy)= c("x","y") data = stack(lxy) data = as.data.frame(data); colnames(data)=c("xy","z") xy = data[,1] z = data[,2] ### Levene test library(DescTools) DescTools::LeveneTest(xy~z,center=median) # Brown-Forsythe DescTools::LeveneTest(xy~z,center=mean) ### Srovnani 2 distribuci ########################################################################################## ### Kolmogorov Smirnov test ks.test(x,y, alternative = "two.sided", exact = FALSE) # alternative = c("two.sided", "less", "greater") library(KScorrect) ks_test_stat(x, y) library(dgof) ks.test(x, y,alternative = "two.sided", exact = NULL, tol=1e-8, simulate.p.value=TRUE, B=2000) # alternative = c("two.sided", "less", "greater") library(twosamples) ks_test(x, y, nboots = 2000, p = 0.025) ### Cramer von Mises library(CDFt) CramerVonMisesTwoSamples(x, y) library(cramer) cramer.test(x,y,conf.level=0.95,replicates=1000,sim="ordinary",just.statistic=FALSE,kernel="phiCramer", maxM=2^14, K=160) library(twosamples) # nefunguje cvm_test(x, y, nboots = 2000, p = 0.05) ### Anderson Darling library(kSamples) ad.test(xy~z, data = data, method = "simulated", dist = FALSE, Nsim = 10000) # method = c("asymptotic", "simulated", "exact") library(twosamples) # nefunfuje ad_test(x, y, nboots = 2000, p = 0.05) stripchart(xy~z, method="jitter",pch=1,col=1,cex=1.5) boxplot(xy~z, pch=1,col=5,cex=1.5) #### DTS Test library(twosamples) dts_test(x, y, nboots = 2000, p = 0.05) two_sample(x, y, nboots = 2000, p = 0.05) ### Baumgartner-Weiss-Schindler/ test library(BWStest) bws_test(x, y, method = "BWS", alternative = "two.sided") # method = c("default", "BWS", "Neuhauser", "B1", "B2", "B3", "B4", "B5") # alternative = c("two.sided", "greater", "less") ### Kuiper test library(twosamples) kuiper_test(x, y, nboots = 2000, p = 0.05) ### Wasserstein Distance Test library(twosamples) wass_test(x, y, nboots = 2000, p = 0.05) ### Parove testy ################################################################## # Weight of the mice before treatment before <-c(200.1, 190.9, 192.7, 213, 241.4, 196.9, 172.2, 185.5, 205.2, 193.7) summary(before) # Weight of the mice after treatment after <-c(392.9, 393.2, 345.1, 393, 434, 427.9, 422, 383.9, 392.3, 352.2) summary(after) my_data <- data.frame(weight=c(before,after),group=rep(c("before","after"),each=10)) my_data$group <- factor(my_data$group, levels=c("before","after")) boxplot(my_data$weight~my_data$group) library(PairedData) pd <- paired(before, after) plot(pd, type = "profile") d <- before-after shapiro.test(d) # Shapiro-Wilk normality test for the differences t.test(before, after, paired = TRUE) t.test(weight ~ group, data = my_data, paired = TRUE) # average weight before treatment is less than the average weight after treatment t.test(weight ~ group, data = my_data, paired = TRUE, alternative = "less") # average weight before treatment is greater than the average weight after treatment t.test(weight ~ group, data = my_data, paired = TRUE, alternative = "greater") library(fmsb) library(parviol) ### Analyza rozptylu ##################################################################################### # Byl sledován vliv druhu konzervacního cinidla A1 az A4 (faktor A) na trvanlivost potravinárského výrobku. # Vysetrete, zda na hladine významnosti 0.05 závisí trvanlivost potravin na druhu konzervacního cinidla. # Který druh konzervacního cinidla dosahuje silne odlisných výsledku vuci ostatním? E503 <- read.table("C:/Users/lubop/Documents/kurs/Data1/ascii05/E/E503.txt")[-1,] E503 data = stack(E503) data data = as.data.frame(data); colnames(data)=c("elem","lom") elem = data[,1] lom = data[,2] ### Distribucni grafy boxplot(E503,ylab="conc",xlab="lab",las=1,cex=1,outline=TRUE) stripchart(E503, pch=1, col=1,vert=TRUE,cex=1,add=TRUE) boxplot(elem~lom,ylab="conc",xlab="lab",las=1,cex=1,outline=TRUE) stripchart(elem~as.factor(lom), pch=1, col=1,vert=TRUE,cex=1,add=TRUE) library(beeswarm) boxplot(E503,ylab="",xlab="",las=1,cex=1,outline=TRUE,cex.lab=1) beeswarm(E503, col = 1, pch = 1, add = TRUE,horizontal=FALSE,pwcol = NULL,cex=1) boxplot(elem~lom,ylab="",xlab="",las=1,cex=1,outline=TRUE,cex.lab=1) beeswarm(elem~as.factor(lom), col = 1, pch = 1, add = TRUE,horizontal=FALSE,pwcol = NULL,cex=1) library(beanplot) beanplot(E503, col = "bisque", ylim = "") beanplot(elem ~ lom, data = data, col = "bisque", ylim = "") #### Global test of model assumptions library(gvlma) gvmodel <- gvlma(lm(elem~lom)) summary(gvmodel) gvmodel <- gvlma(lm(elem~lom-1)) summary(gvmodel) ### outliers # Bonferroni Outlier Test library(car) ou = outlierTest(lm(elem~lom), cutoff=0.05, n.max=15, order=TRUE); ou as.numeric(names(ou$rstudent)) # Lund Outlier Test # modified https://stackoverflow.com/questions/1444306/how-to-use-outlier-tests-in-r-code/1444548#1444548 lundout<-function(x1,x2,a) { testoutlm<-lm(x1~x2) standardresid<-rstandard(testoutlm) nl<-length(x1) q<-length(testoutlm$coefficients) F<-qf(c(1-(a/nl)),df1=1,df2=nl-q-1,lower.tail=TRUE) crit<-((nl-q)*F/(nl-q-1+F))^0.5 oul = which(abs(standardresid)>crit) return(oul) } lundout(elem,lom,0.05) library(referenceIntervals) vanderLoo.outliers(stresid) cook.outliers(stresid) dixon.outliers(stresid) horn.outliers(stresid) #### shoda rozptylu ## Bartlett bartlett.test(elem~as.factor(lom)) library(RVAideMemoire) pairwise.var.test(elem,as.factor(lom)) library(onewaytests) homog.test(elem~lom, data = data, method = "Bartlett", alpha = 0.05, na.rm = TRUE, verbose = TRUE) library(RVAideMemoire) perm.bartlett.test(elem~lom, nperm = 999, progress = TRUE) pairwise.perm.var.test(elem,as.factor(lom),nperm=999) ## Fligner Kileen fligner.test(elem~lom) library(onewaytests) homog.test(elem~lom, data = data, method = "Fligner", alpha = 0.05, na.rm = TRUE, verbose = TRUE) library(coin) fligner_test(elem~lom, data=data, subset = NULL, weights = NULL, conf.int = TRUE, conf.level = 0.95, ties.method = "average-scores") fligner_test(elem~lom, data=data, subset = NULL, weights = NULL, conf.int = TRUE, conf.level = 0.95, ties.method = "average-scores", distribution = approximate(nresample = 1000)) # ties.method = c("mid-ranks", "average-scores"), ## Conover, Johnson, Johnson library(coin) conover_test(elem~lom, data=data, subset = NULL, weights = NULL, conf.int = FALSE, conf.level = 0.95,ties.method = "average-scores") conover_test(elem~lom, data=data, subset = NULL, weights = NULL, conf.int = FALSE, conf.level = 0.95,ties.method = "average-scores", distribution = approximate(nresample = 1000)) ## Levene library(lawstat) lawstat::levene.test(elem, group = as.factor(lom), location="mean", trim.alpha=0.25,bootstrap = TRUE, num.bootstrap=999, kruskal.test=TRUE,correction.method="none") # location=c("median", "mean", "trim.mean") # correction.method=c("none","correction.factor","zero.removal","zero.correction") library(car) car::leveneTest(elem~as.factor(lom),center=mean) car::leveneTest(elem~as.factor(lom),center=median) # center = c("median", "mean") library(DescTools) DescTools::LeveneTest(elem~as.factor(lom),center=median) DescTools::LeveneTest(elem~as.factor(lom),center=mean) library(onewaytests) homog.test(elem ~ lom, data = data, method = "Levene", alpha = 0.05, na.rm = TRUE, verbose = TRUE) library(s20x) s20x::levene.test(elem ~ lom, data=data, digit = 5, show.table = TRUE) library(asbio) modlevene.test(elem,lom) ## Brown Forsythe library(HH) HH::hov(elem~lom,method = "bf") library(onewaytests) bf.test(elem ~ lom, data = data, alpha = 0.05, na.rm = TRUE, verbose = TRUE) # Hartley test library(PMCMRplus) hartleyTest(elem~lom) # Cochran C test library(outliers) cochran.test(elem~lom) library(GAD) C.test(lm(elem ~ lom)) library(PMCMRplus) cochranTest(elem,as.factor(lom), subset=NULL, na.action=NULL,alternative = "greater") # alternative = c("greater", "less") library(outliers) # Cochran cochran.test(elem~lom, inlying = FALSE) cochran.test(elem~lom, inlying = TRUE) library(PMCMRplus) GSTTest(elem,as.factor(lom), subset=NULL, na.action=NULL,alternative = "two.sided") # dist = c("Chisquare", "KruskalWallis") ans <- aov(elem ~ as.factor(lom)) # spravnejsi GSTTest(residuals(ans) ~ as.factor(lom), subset=NULL, na.action=NULL,alternative = "two.sided") ### ANOM for variances library(ANOM) # Absolute deviations from the median (robust Levene residuals) mmedian <- tapply(elem, lom, median)[lom] absdev <- abs(elem - mmedian) dataf = data.frame(cbind(absdev,lom)) dataf[,1] = as.numeric(absdev) dataf[,2] = as.factor(lom) library(multcomp) mod <- lm(absdev~lom, dataf) test <- glht(mod, mcp(lom="GrandMean")) ANOM(test,xlab="") # variance mmean <- tapply(elem, lom, mean)[lom] sqdev <- (elem - mmean)^2 dataf = data.frame(cbind(sqdev,lom)) dataf[,1] = as.numeric(sqdev) dataf[,2] = as.factor(lom) library(multcomp) mod <- lm(sqdev~lom, dataf) test2 <- glht(mod, mcp(lom="GrandMean")) ANOM(test2,xlab="") #### Trend v rozptylech # Mudholkar-McDermott-Aumont Test library(lawstat) mma.test(elem, lom, tail = "both") # tail = c("right", "left", "both") # robustni Mudholkar-McDermott-Aumont Test library(lawstat) robust.mmm.test(elem, lom, tail = "both") # tail = c("right", "left", "both") # Ltrend library(lawstat) ltrend.test(elem,lom,score=NULL,location="mean", tail="both", trim.alpha=0.25,bootstrap=FALSE,num.bootstrap=1000, correction.method = "none", correlation.method = "pearson") # location = c("median", "mean", "trim.mean") # tail = c("right", "left", "both") # correction.method = c("none", "correction.factor", "zero.removal", "zero.correction") # correlation.method = c("pearson", "kendall", "spearman") # Lntrend library(lawstat) lnested.test(elem,lom,location="mean", tail="both",trim.alpha=0.25, bootstrap=FALSE, num.bootstrap=1000, correction.method="none", correlation.method="pearson") # location = c("median", "mean", "trim.mean") # tail = c("right", "left", "both") # correction.method = c("none", "correction.factor", "zero.removal", "zero.correction") # correlation.method = c("pearson", "kendall", "spearman") ######################################################################################################################################################################################################################################################### #### ANOVA homoskedastic fit <- aov(elem~lom) summary(fit) # display Type I ANOVA table summary.lm(fit) anova(fit) model.tables(fit,se=T) model.tables(fit,"means",se=T) library(car) Anova(fit, type="II") ### If you use type="III", you need the following line before the analysis ### options(contrasts = c("contr.sum", "contr.poly")) library(FSA) Summarize(elem ~ lom, data=data, digits=3) library(multcompView) multcompBoxplot(elem ~ lom, data = data, horizontal = FALSE, compFn = "TukeyHSD",sortFn = "mean", decreasing = TRUE, plotList = list(boxplot = list(fig = c(0, 0.75, 0, 1)), multcompTs = list(fig = c(0.7, 0.85, 0, 1)),multcompLetters = list(fig = c(0.87, 0.97, 0.03, 0.98), fontsize = 20,fontface = "bold"))) # Student-Newman-Keuls (SNK) test library(PMCMRplus) out = snkTest(elem,lom, subset=NULL, na.action=NULL,alternative = "two.sided") summary(out) # Tukey Honestly Significant Differences test, Tukey - Kramer library(agricolae) out <- HSD.test(aov(elem~lom),"lom",alpha = 0.95, group=TRUE,console=TRUE,main="") plot(out,las=2) library(PMCMRplus) out = tukeyTest(elem,lom, subset=NULL, na.action=NULL,alternative = "two.sided") summary(out) summaryGroup(out) ## Scheffe test library(agricolae) out <- scheffe.test(aov(elem~lom),"lom",alpha = 0.95, group=TRUE,console=TRUE,main="") plot(out,las=2) library(PMCMRplus) out = scheffeTest(elem~lom, subset=NULL, na.action=NULL) summary(out) summaryGroup(out) ## Waller test library(agricolae) out <- waller.test(aov(elem~lom),"lom", K = 100, group=TRUE, main = NULL,console=TRUE) plot(out,las=2) # least significant difference all-pairs comparisons test for normally distributed data with equal group variances. library(PMCMRplus) out = lsdTest(elem,lom, subset=NULL, na.action=NULL); out summary(out) summaryGroup(out) library(agricolae) out <- LSD.test(aov(elem~lom),"lom", alpha=0.95, group=TRUE,main=NULL,console=TRUE,p.adj="bonferroni") # p.adj=c("none","holm","hommel","hochberg", "bonferroni", "BH", "BY", "fdr") plot(out,las=2) # Ryan, Einot and Gabriel and Welsch multiple range test library(agricolae) out<- REGW.test(aov(elem~lom),"lom",alpha = 0.95, group=TRUE, main = NULL,console=TRUE) plot(out,las=2) summary(out) ############################################################################################################################################################################################################################################################# #### ANOVA heteroskedastic # trimmed means library(WRS2) t1way(elem~lom, data, tr = 0.2, alpha = 0.05, nboot = 500) vx=lincon(elem~lom, data, tr = 0.2, alpha = 0.05) cbind(vx$comp[,c(1,2)],round((vx$comp[,6]*100),0)) ## Alexander-Govern test. library(onewaytests) out = ag.test(elem ~ lom, data = data, alpha = 0.05, na.rm = TRUE, verbose = TRUE) out out =paircomp(out, adjust.method = "BH") # p.adjust.methods: "holm", "hochberg","hommel","bonferroni","BH","BY","fdr","none","tukey","games-howell" ## Welch test library(car) mod = aov(elem~lom, data=data, na.action=na.omit) # mod = lm(elem~lom, data=as.data.frame(cbind(as.numeric(elem),lom)), na.action=na.omit) Anova(mod, white.adjust=TRUE) # white.adjust=c(FALSE, TRUE, "hc3", "hc0", "hc1", "hc2", "hc4") oneway.test(elem~as.factor(lom), subset=NULL, na.action=NULL, var.equal = FALSE) library(welchADF) summary(welchADF.test(elem~lom)) library(asbio) pairw.oneway(elem,as.factor(lom), conf = 0.95, digits = 5, method = "fdr") library(userfriendlyscience) oneway(elem,lom, fullDescribe = TRUE, posthoc = 'fdr',corrections=TRUE,plot=TRUE) # p.adjust.methods: "holm", "hochberg","hommel","bonferroni","BH","BY","fdr","none","tukey","games-howell" library(coin) oneway_test(elem~lom, data=data, subset = NULL, weights = NULL, conf.int = FALSE, conf.level = 0.95, ties.method = "average-scores") oneway_test(elem~lom, data=data, subset = NULL, weights = NULL, conf.int = FALSE, conf.level = 0.95, ties.method = "average-scores", distribution = approximate(nresample = 1000)) ## Games-Howell library(userfriendlyscience) posthocTGH(elem,lom, method= "games-howell", conf.level = 0.95, digits=2, p.adjust="holm",formatPvalue = TRUE) # method=c("games-howell", "tukey") # p.adjust= c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none") library(PMCMRplus) out = gamesHowellTest(elem~lom,alternative = "two.sided",subset=NULL, p.adjust="BH") summary(out) summaryGroup(out) ## Tamhane T2 library(PMCMRplus) out = tamhaneT2Test(elem,lom, subset=NULL, na.action=NULL,welch = TRUE,alternative = "two.sided") summary(out) summaryGroup(out) ## Ury Wiggins Hochberg library(PMCMRplus) out = uryWigginsHochbergTest(elem, lom, subset=NULL, na.action=NULL,alternative = "two.sided", p.adjust.method ="holm") # p.adjust.method = p.adjust.methods, ...) summary(out) summaryGroup(out) library(robustbase) r.mod = lmrob(elem~lom,data=data, na.action=na.omit,weights=NULL) summary(r.mod) #### shoda distribuce ######################################################################################### library(PMCMRplus) adKSampleTest(elem~lom, subset=NULL, na.action=NULL) out = adAllPairsTest(elem~lom, subset=NULL, na.action=NULL, p.adjust.method = "BH"); out # p.adjust.methods = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none") summary(out) summaryGroup(out) barPlot(out, alpha = 0.05) plot(out, alpha = 0.05) library(PMCMRplus) # Murakami k-sample Baumgartner Weiss Schindler Test of Equal Distributions bwsKSampleTest(elem,lom, subset=NULL, na.action,nperm = 1000) out <- bwsAllPairsTest(elem, lom, p.adjust="fdr"); out summary(out) summaryGroup(out) ######################################################################################################################################################################################################################################################################## #### Non-parametric ## Kruskal-Wallis test # homoscedastic kruskal.test(elem~lom) library(agricolae) kruskal(elem,lom, alpha = 0.05, p.adj="holm",group=TRUE, main = NULL,console=TRUE) # p.adj=c("none","holm","hommel", "hochberg", "bonferroni", "BH", "BY", "fdr") library(asbio) pairw.kw(elem, lom, conf=0.05) plot(pairw.kw(elem, lom, conf=0.05),las=2) library(NSM3) pKW(elem,lom,method="Monte Carlo",n.mc=500) # Kruskal-Wallis # method=c("Exact", "Monte Carlo", or "Asymptotic") library(coin) kruskal_test(elem~lom, data=data, subset = NULL, weights = NULL, conf.int = FALSE, conf.level = 0.95, ties.method = "average-scores") kruskal_test(elem~lom, data=data, subset = NULL, weights = NULL, conf.int = FALSE, conf.level = 0.95, ties.method = "average-scores", distribution = approximate(nresample = 1000)) library(onewaytests) out = kw.test(elem ~ lom, data = data, alpha = 0.05, na.rm = TRUE, verbose = TRUE) # paircomp(out, adjust.method = "bonferroni") # adjust.method = c("bonferroni", "holm", "hochberg", "hommel", "BH","BY", "fdr", "none") library(PMCMRplus) kruskalTest(elem, lom, subset=NULL, na.action=NULL, dist = "KruskalWallis") # dist = c("Chisquare", "KruskalWallis", "FDist") ## Nemenyi library(PMCMR) posthoc.kruskal.nemenyi.test(elem,lom, method="Tukey") # method="Tukey", "Chisq" library(PMCMRplus) out = kwAllPairsNemenyiTest(elem, lom, subset=NULL, na.action=NULL,dist = "Tukey"); out # dist = c("Tukey", "Chisquare") summary(out) summaryGroup(out) library(PMCMRplus) out=kwAllPairsDunnTest(elem, lom, subset=NULL, na.action=NULL, p.adjust.method ="fdr"); out summary(out) summaryGroup(out) library(PMCMR) out = posthoc.kruskal.dunn.test(elem,lom,p.adjust.method ="holm"); out summary(out) ## Conover's non-parametric all-pairs comparison test for Kruskal-type ranked data. library(conover.test) conover.test(elem,lom, method="holm", kw=TRUE, label=TRUE,wrap=FALSE, table=TRUE, list=FALSE, rmc=FALSE, alpha=0.95) # method=c("none", "bonferroni", "sidak", "holm", "hs", "hochberg", "bh", "by") library(PMCMRplus) out = kwAllPairsConoverTest(elem, lom, subset=NULL, na.action=NULL,p.adjust.method = "holm"); out # p.adjust.method = c("single-step", p.adjust.methods); single-step = Tukey's p-adjustment summary(out) summaryGroup(out) library(PMCMR) posthoc.kruskal.conover.test(elem, lom,p.adjust.method ="holm") ## Dwass, Steel, Critchlow and Fligner library(PMCMRplus) out = dscfAllPairsTest(elem, lom, subset=NULL, na.action=NULL, p.adjust.method = "holm", alternative = "two.sided") summary(out) summaryGroup(out) E518 <- read.table("C:/Users/lubop/Documents/kurs/Data1/ascii05/E/E518.txt") data = stack(E518[-1,]) data = as.data.frame(data); colnames(data)=c("elem","lom") elem = data[,1] lom = data[,2] # Jonckheere-Terpstra (testuje trend v poradi souboru) library(DescTools) lomc = ordered(sapply(lom,function(x) sub("V", "", x))) JonckheereTerpstraTest(elem, lomc, alternative = "increasing",nperm = 500) # alternative = c("two.sided", "increasing", "decreasing") #### normal scores ## Waerden test library(agricolae) out<-waerden.test(elem,lom,alpha=0.95,group=TRUE,console=TRUE,main="") plot(out,las=2) # van-der-Waerden normal scores test library(PMCMRplus) vanWaerdenTest(elem,as.factor(lom), subset=NULL, na.action=NULL,alternative = "two.sided") out = vanWaerdenAllPairsTest(elem,as.factor(lom), subset=NULL, na.action=NULL,alternative = "two.sided",p.adjust.method = "fdr") # p.adjust.method = c("single-step", p.adjust.methods) summary(out) summaryGroup(out) library(PMCMR) vanWaerden.test(elem,as.factor(lom), subset=NULL, na.action=NULL,alternative = "two.sided") out = posthoc.vanWaerden.test(elem,as.factor(lom),p.adjust.method ="holm") summary(out) library(coin) normal_test(as.numeric(elem)~as.factor(lom), ties.method = "mid-ranks", conf.int = FALSE, conf.level = 0.95) normal_test(as.numeric(elem)~as.factor(lom), ties.method = "mid-ranks", conf.int = FALSE, conf.level = 0.95, distribution = approximate(B = 1000)) # method = c("mid-ranks", "average-scores") # Lu-Smith normal scores test, transformace dat na normalitu library(PMCMRplus) normalScoresTest(elem,lom, subset=NULL, na.action=NULL,alternative = "two.sided") out = normalScoresAllPairsTest(elem,lom, subset=NULL, na.action=NULL,alternative = "two.sided", p.adjust.method = "holm"); out # p.adjust.method = c("single-step", p.adjust.methods) summary(out) summaryGroup(out) library(ARTool) oart = art(elem ~ lom, data = data) summary(oart) anova(oart, response = "art",type = "I", factor.contrasts = "contr.sum", test = "F", all.rows = FALSE) # response = c("art", "aligned") # type = c("III","II", "I", 3, 2, 1) # factor.contrasts = "contr.sum" # test = c("F", "Chisq") library(ART) aligned.rank.transform(elem ~ lom, data = data, perform.aov = TRUE, SS.type = "I") # SS.type = c("III","II", "1I") ## ordinal logistic regression library(rms) olr.mod = orm(elem~lom) olr.mod ## Median test library(agricolae) out<-Median.test(elem,lom,correct=TRUE,simulate.p.value = TRUE,alpha=0.95,group=TRUE,console=TRUE,main="") plot(out,las=2) library(coin) median_test(elem~lom, mid.score = c("0", "0.5", "1"), conf.int = FALSE, conf.level = 0.95) median_test(elem~lom, mid.score = c("0", "0.5", "1"), conf.int = FALSE, conf.level = 0.95, distribution = approximate(nresample = 1000)) library(RVAideMemoire) mood.medtest(elem, lom, exact = TRUE) pairwise.mood.medtest(elem, lom, exact = TRUE, p.method = "fdr") ### Standard ANOM (Gaussian, homoscedastic) library(multcomp) model <- lm(elem ~ lom, data) hom <- glht(model, mcp(lom="GrandMean"), alternative="two.side") ANOM(hom,ylab="",xlab="") ### Heteroscedastic ANOM # With sandwich covariance matrix estimate (Herberich et al. 2010) library(ANOM) library(multcomp) library(sandwich) model <- lm(elem ~ lom, data) het1 <- glht(model, mcp(lom="GrandMean"), alternative="two.side", vcov=vcovHC) ANOM(het1,ylab="",xlab="") ### Nonparametric ANOM library(ANOM) ss <- tapply(elem, lom, length) library(multcomp) Mat <- contrMat(ss, "GrandMean") ## Using a multivariate t approximation library(nparcomp) mult <- mctp(elem~lom, data=data, type="UserDefined", contrast.matrix=Mat, alternative="two.side", info=FALSE, correlation=TRUE, asy.method="mult.t") mult ANOM(mult) ### Dunnettuv test ###################################################################### data <- data.frame(value = c(76, 77, 77, 81, 82, 82, 83, 84, 85, 89, 81, 82, 83, 83, 83, 84, 87, 90, 92, 93, 77, 78, 79, 88, 89, 90, 91, 95, 95, 98), Group = rep(c("control", "V1", "V2"), each = 10)) data$Group<-as.factor(data$Group) head(data) boxplot(value~Group,data = data,main = "",xlab = "Groups",ylab = "Value",col = "gray",border = "black") model <- aov(value ~ Group, data = data) summary(model) library(DescTools) DunnettTest(x=data$value, g=data$Group) library(PMCMRplus) dunnettTest(x=data$value, g=data$Group, alternative = "two.sided") # alternative = c("two.sided", "greater", "less") boxplot(Ozone ~ Month, data = airquality) DunnettTest(Ozone ~ Month, data = airquality) DunnettTest(Ozone ~ Month, data = airquality, control="8", conf.level=0.9) ## Mucociliary efficiency from the rate of removal of dust in normal ## subjects, subjects with obstructive airway disease, and subjects ## with asbestosis. reference <- c(2.9, 3.0, 2.5, 2.6, 3.2) # normal subjects OAD <- c(3.8, 2.7, 4.0, 2.4) # with obstructive airway disease asbestosis <- c(2.8, 3.4, 3.7, 2.2, 2.0) # with asbestosis data2 = list(reference, OAD, asbestosis) names(data2) = c("normal", "OAD", "asbestosis") boxplot(data2) DunnettTest(data2) ##### 2-way ANOVA #################################################################################################################################### library(openxlsx) shn = getSheetNames("c:\\Users\\lubop\\Documents\\kurs\\DVpisky1.xlsx") shn sal <- read.xlsx("c:\\Users\\lubop\\Documents\\kurs\\DVpisky1.xlsx", sheet = "2wANOVA", startRow = 1, colNames = TRUE, rowNames = FALSE,detectDates = FALSE, skipEmptyRows = TRUE,rows = NULL, cols = NULL,check.names = FALSE) data = as.data.frame(sal) nitr = data[,1] subst = data[,2] hnoj = data[,3] ### Global test of model assumptions library(gvlma) gvmodel <- with(data,gvlma(lm(nitr~subst*hnoj))) summary(gvmodel) fit = lm(nitr~subst*hnoj,data=data) fit = lm(nitr~subst*hnoj-1,data=data) summary(fit) anova(fit) fit2 = aov(nitr~subst*hnoj,data=data) anova(fit2) summary(fit2) library(Hmisc) with(data,summary(nitr~subst*hnoj)) with(data,summary(nitr~subst*hnoj-1)) library(car) (my_anova <- aov(nitr~subst*hnoj,data=data)) anova(my_anova) Anova(my_anova, type = "II") # preferable # Anova(my_anova, type = "III") with(data,interaction.plot(subst,hnoj,nitr,ylab="",type ="l")) summary(aov(nitr~subst*hnoj,data=data)) with(data,interaction.plot(subst,hnoj,nitr,ylab="")) anova(aov(nitr~subst*hnoj,data=data)) library(ExpDes) with(data, fat2.crd(subst, hnoj, nitr, quali = c(TRUE, TRUE), mcomp = "tukey", fac.names = c("substrat", "hnojeni"), sigT = 0.05, sigF = 0.05)) library(welchADF) summary(welchADF.test(lm(nitr~subst*hnoj,data=data))) library(ARTool) oart = art(nitr~as.factor(subst)*as.factor(hnoj),data=data) summary(oart) anova(oart, response = "art",type = "I", factor.contrasts = "contr.sum", test = "F", all.rows = FALSE) # response = c("art", "aligned") # type = c("III","II", "I", 3, 2, 1) # factor.contrasts = "contr.sum" # test = c("F", "Chisq") library(ART) aligned.rank.transform(nitr~subst*hnoj,data=data,perform.aov = TRUE, SS.type = "I") # SS.type = c("III","II", "1I") aligned.rank.transform(nitr~subst*hnoj,data=data,perform.aov = TRUE, SS.type = "II") library(rankFD) model <- rankFD(nitr~subst*hnoj,data=data, CI.method = "Normal", effect = "unweighted", hypothesis = "H0F") model ## ordinal logistic regression library(rms) orm(nitr~subst*hnoj,data=data) library(lmPerm) # perm "Exact", "Prob", "SPR" will produce permutation probabilities. Anything else, such as "", will produce F-test probabilities. out = aovp(nitr~subst*hnoj,data=data,perm="Prob",nCycle = 5000); out summary(out) anova(lmp(nitr~subst*hnoj,data=data)) library(permuco) out = aovperm(nitr~subst*hnoj,data=data, np = 5000, method = "freedman_lane"); out summary(out) library(robustbase) model = lmrob(nitr~subst*hnoj-1,data=data) summary(model) plot(fitted(model), residuals(model)) abline(h=0, col=2, lty=2) library(robustbase) model = lmrob(nitr~subst*hnoj,data=data) summary(model) plot(fitted(model), residuals(model)) abline(h=0, col=2, lty=2) ################################################################################ library(openxlsx) pisky <- read.xlsx("c:\\Users\\lubop\\Documents\\kurs\\DVpisky1.xlsx", sheet = "DVPisky", startRow = 1, colNames = TRUE, rowNames = TRUE,detectDates = FALSE, skipEmptyRows = TRUE,rows = NULL, cols = NULL,check.names = FALSE) summary(pisky) pisky = pisky[,-1] X = pisky[,"Sr"] Y = pisky[,"Ba"] plot(X,Y,col=0,pch=1, xlab="Sr",ylab="Ba",cex=1,cex.lab=1,cex.axis=1,main="",ylim=c(0,650)) # xlim=c(0.1,1.4),ylim=c(0.2,1.7), nn = which(Y>200) points(X[nn],Y[nn],col=2,pch=16) points(X[-nn],Y[-nn],col=3,pch=17) legend("topleft",c("normal", "contam"),col=c(3,2),pch=c(17,16),cex=0.8) sdpisky <- read.xlsx("c:\\Users\\lubop\\Documents\\kurs\\DVpisky1.xlsx", sheet = "sdDVPisky", startRow = 1, colNames = TRUE, rowNames = TRUE,detectDates = FALSE, skipEmptyRows = TRUE,rows = NULL, cols = NULL,check.names = FALSE) sdx = sdpisky[,"Sr"] sdy = sdpisky[,"Ba"] library(plotrix) errm1 <- qt(0.975,(length(X)-1))*sdx/sqrt(length(X)) errm2 <- qt(0.975,(length(Y)-1))*sdy/sqrt(length(Y)) plotCI(X,Y,uiw=errm1,err="x",add=TRUE) plotCI(X,Y,uiw=errm2,err="y",add=TRUE) text(X,Y, labels=rownames(pisky), cex=0.6, pos=3,col=4) ##### KORELACE ################################################################################ ## Anscombovo kvarteto library(asbio) data(anscombe) cor(anscombe[,"x1"],anscombe[,"y1"]) cor(anscombe[,"x2"],anscombe[,"y2"]) cor(anscombe[,"x3"],anscombe[,"y3"]) cor(anscombe[,"x4"],anscombe[,"y4"]) par(mfrow=c(2,2)) plot(anscombe[,"x1"],anscombe[,"y1"]) plot(anscombe[,"x2"],anscombe[,"y2"]) plot(anscombe[,"x3"],anscombe[,"y3"]) plot(anscombe[,"x4"],anscombe[,"y4"]) par(mfrow=c(1,1)) library(datasauRus) # Anscombe par(mfrow = c(5, 3), mar=c(1,3,3,1)) nms <- names(datasaurus_dozen_wide) for (i in seq(1, 25, by = 2)){ nm <- substr(nms[i], 1, nchar(nms[i]) - 2) plot(datasaurus_dozen_wide[[nms[i]]], datasaurus_dozen_wide[[nms[i+1]]], xlab = "", ylab = "", main = nm) } # Simpson par(mfrow = c(1, 2)) plot(simpsons_paradox_wide[["simpson_1_x"]], simpsons_paradox_wide[["simpson_1_y"]], xlab = "x", ylab = "y", main = "Simpson's Paradox 1") plot(simpsons_paradox_wide[["simpson_2_x"]], simpsons_paradox_wide[["simpson_2_y"]], xlab = "x", ylab = "y", main = "Simpson's Paradox 2") par(mfrow = c(1, 1)) ################################################################################################################ library(ACSWR) data(viscos) data=viscos x = data[,1] y = data[,2] plot(x,y) # kovariance cov(x,y) # korelace cov(x,y)/(sd(x)*sd(y)) cor(x,y) library(openxlsx) pisky <- read.xlsx("c:\\Users\\lubop\\Documents\\kurs\\DVpisky1.xlsx", sheet = "Pisky", startRow = 1, colNames = TRUE, rowNames = TRUE,detectDates = FALSE, skipEmptyRows = TRUE,rows = NULL, cols = NULL,check.names = FALSE) summary(pisky) pisky = pisky[,-1] colnames(pisky) x = pisky[,"Sr"] y = pisky[,"Ba"] ################################################################################################################ cor(x,y, method="pearson") cor(x,y, method="spearman") cor(x,y, method="kendall") library(StatDA) cor.sign(cbind(x,y), method="pearson") cor.sign(cbind(x,y), method="kendall") cor.sign(cbind(x,y), method="spearman") library(Hmisc) rs<-rcorr(x, y, type="spearman"); rs # type=c("pearson","spearman") library(StatDA) RobCor.plot(x, y, quan=1/2, alpha=0.025, colC=3, colR=4) library(robustbase) mr<-covMcd(cbind(x,y), alpha=0.975, cor=TRUE)$cor; mr library(fmsb) spearman.ci.sas(x, y, adj.bias=TRUE, conf.level=0.95) plot(x,y,log="") library(aplpack) bagplot(x,y,show.baghull=TRUE,show.loophull=TRUE,precision=1,dkmethod=2) ### test vyznamnosti cor.test(x, y, alternative = "two.sided", method = "pearson", exact = NULL, conf.level = 0.95, continuity = FALSE) # alternative = c("two.sided", "less", "greater") # method = c("pearson", "kendall", "spearman") library(UsingR) scatter.with.hist(x, y, hist.col = gray(0.95),trend.line = "lm") simple.scatterplot(x, y) # rank scatter plot xr <-rank(x) yr <-rank(y) plot(xr,yr, col=1, xlab=" x", ylab="y",cex=1.2) # text(xr,yr, labels=rownames(mb)) abline(0,1,lty=2) library(asbio) chi.plot(x,y) library(lcopula) K.plot(cbind(x,y), add = F) set.seed(123) data=cbind(x.y) library(boot) b3 <- boot(data, statistic = function(data, i) { cor(data[i, "x"], data[i, "y"], method='pearson')}, R = 1000) b3 ########################################################################################################## library(openair) corPlot(pisky, pollutants = NULL, type = "default",cluster = TRUE, cols = "hue", r.thresh = 0.8,text.col = c("black", "black"), auto.text = FALSE) cols =c("default","increment","heat","spectral","hue", "brewer1","greyscale") m = cor(pisky, method="pearson") library(ellipse) plotcorr(m) library(ellipse) colors <- c("#A50F15","#DE2D26","#FB6A4A","#FCAE91","#FEE5D9","white", "#EFF3FF","#BDD7E7","#6BAED6","#3182BD","#08519C") plotcorr(m, col=colors[5*m + 6]) library(corrgram) corrgram(m, order=TRUE) library(StatDA) mp<-cor(pisky, method="pearson") ms<-cor(pisky, method="spearman") CorCompare(mp, ms, labels1="pearson", labels2="spearman", ndigits=3, cex.cor=0.8, cex.label=0.8) # TESTY KORELACE library(fBasics) correlationTest(x, y, method = "pearson", title = NULL, description = NULL) # method = c("pearson", "kendall", "spearman") pearsonTest(x, y, title = NULL, description = NULL) kendallTest(x, y, title = NULL, description = NULL) spearmanTest(x, y, title = NULL, description = NULL) library(psych) corr.test(x, y, use = "pairwise",method="pearson",adjust="holm") # use= c("pairwise", "complete") # method= c("pearson", "spearman", "kendall") # adjustment for multiple tests: adjust = c("holm", "hochberg", "hommel","bonferroni", "BH", "BY", "fdr", "none") library(psych) # correlation matrix is an identity matrix cortest.bartlett(m, n = NULL) # Steiger test cortest.normal(m, R2 = NULL, n1 = NULL, n2 = NULL, fisher = TRUE) cortest(pisky,R2=NULL,n1=NULL,n2 = NULL, fisher = TRUE,cor=TRUE) # Jennrich test cortest.jennrich(m,R2,n1=NULL, n2=NULL) # alternative test cortest.mat(m,R2=NULL,n1=NULL,n2 = NULL) rb = corCi(pisky, keys = NULL, n.iter = 500, p = 0.05,overlap = FALSE,poly = FALSE, method = "pearson", plot=F,minlength=5,n=NULL) rb$rho rb$ci corCi(pisky, keys = NULL, n.iter = 500, p = 0.05,overlap = FALSE,poly = FALSE, method = "pearson", plot=T,minlength=5,n=NULL)