#Takto byla nagenerovana nase data #set.seed(1235) #data=c(rexp(117,1/30000),rlnorm(110,9.7,1.27)) #data=sample(data,227) #data=round(data) n<-length(data) #1.) library(MASS) #a) hist(data,freq=FALSE,ylim=c(0,0.000045)) lambda<-1/mean(data) #odhad lambda v exp. modelu mu<-mean(log(data)) #odhad mu v lognorm. modelu b<-sd(log(data)) # 1/(n-1)*.... sigma<-sqrt(b^2*(n-1)/n) #odhad sigma v lognorm. modelu fitdistr(data, "exponential") #stejny vysledek fitdistr(data, "lognormal") #stejny vysledek curve(dexp(x,lambda),col=2,add=TRUE) curve(dlnorm(x,mu,sigma),col=3,add=TRUE) #b) plot(density(data),ylim=c(0,0.000045)) curve(dexp(x,lambda),col=2,add=TRUE) curve(dlnorm(x,mu,sigma),col=3,add=TRUE) # data musi byt kladna library(logKDE) plot(logdensity(data,bw=0.7),ylim=c(0,0.000045)) curve(dexp(x,lambda),col=2,add=TRUE) curve(dlnorm(x,mu,sigma),col=3,add=TRUE) #c) plot(ecdf(data),main="Empiricka distribucni funkce") curve(pexp(x,lambda),col=2,add=TRUE) curve(plnorm(x,mu,sigma),col=3,add=TRUE) #d) D1<-function(x){ D1<-ecdf(data)(x)- pexp(x,lambda) } D2<-function(x){ D2<-ecdf(data)(x)-plnorm(x,mu,sigma) } curve(D1,0,200000,col=2,ylim=c(-0.07,0.07),ylab="D(x)") curve(D2,col=3,add=TRUE) abline(h=0) #e) Q-Q plot par(mfrow=c(1,2)) i<-1:n q<-qexp((i-0.5)/n,lambda) plot(sort(data)~q,xlab='Theoretical quantiles',ylab='Empirical quantiles', main='Exponential') abline(0,1) q2<-qlnorm((i-0.5)/n,mu,sigma) plot(sort(data)~q2,xlab='Theoretical quantiles',ylab='Empirical quantiles', main='Lognormal') abline(0,1) # P-P plot i<-1:n p<-i/(n+1) F<-pexp(sort(data),lambda) plot(p~F,xlab='Theoretical cdf',ylab='Empirical cdf', main='Exponential') abline(0,1) F2<-plnorm(sort(data),mu,sigma) plot(p~F2,xlab='Theoretical cdf',ylab='Empirical cdf', main='Lognormal') abline(0,1) ################################## #2.) #a) #Vsuvka pro simulovanou p-hodnotu K.-S. testu #pro shodu s exponencialnim rozdelenim s parametrem 1/(30 000) ks.test(data, "pexp",1/30000) stat<-ks.test(data, "pexp",1/30000)$stat p<-0 opak<-10000 for (i in 1:opak){ x<-rexp(n,1/30000) if (ks.test(x, "pexp", 1/30000)$stat>stat) p=p+1 } p/opak #simulovana p-hodnota ################################################################ stat<-ks.test(data, "pexp",lambda)$stat #hodnota testove statistiky p<-0 opak<-10000 for (i in 1:opak){ x<-rexp(n,lambda) lambda_hat<-1/mean(x) if (ks.test(x, "pexp", lambda_hat)$stat>stat) p=p+1 } p/opak #simulovana p-hodnota #a pote s lognormalnim stat2<-ks.test(data, "plnorm",mu,sigma)$stat #hodnota testove statistiky p2<-0 opak<-10000 for (i in 1:opak){ x<-rlnorm(n,mu,sigma) mu_hat<-mean(log(x)) b<-sd(log(x)) sigma_hat<-sqrt(b^2*(n-1)/n) if (ks.test(x, "plnorm", mu_hat,sigma_hat)$stat>stat2) p2=p2+1 } p2/opak #simulovana p-hodnota #b) #zacneme opet exponencialnim rozdelenim k<-floor(2*n^(2/5)) k #pocet trid i<-(1:(k-1))/k breaks<-c(0,qexp(i,lambda),10000000) #delici body prob<-rep(1/k,k) #teoreticke pravdepodobnosti hk <- hist(data,breaks=breaks)$counts #empiricke cetnosti chisq.test(hk,p=prob) #test pouzijeme jen pro vypocet testove statistiky (nesedi stupne volnosti)) X2<-chisq.test(hk,p=prob)$statistic 1-pchisq(X2,k-2) #hledana p-hodnota #a pote i lognormalnim breaks<-c(0,qlnorm(i,mu,sigma),10000000) prob<-rep(1/k,k) hk <- hist(data,breaks=breaks)$counts chisq.test(hk,p=prob) #test pouzijeme jen pro vypocet testove statistiky X2<-chisq.test(hk,p=prob)$statistic 1-pchisq(X2,k-3) #hledana p-hodnota #3.) #a) fitdistr(data, "exponential")$loglik fitdistr(data, "log-normal")$loglik #b) 2-fitdistr(data, "exponential")$loglik 4-fitdistr(data, "log-normal")$loglik #c) log(n)-2*fitdistr(data, "exponential")$loglik 2*log(n)-2*fitdistr(data, "log-normal")$loglik