#Takto byla nagenerovana nase data #set.seed(1235) #data=c(rexp(117,1/30000),rlnorm(110,9.7,1.27)) #data=sample(data,227) #data=round(data) n=length(data) #1.) library(MASS) #a) hist(data,freq=FALSE,ylim=c(0,0.000045)) lambda=1/mean(data) #odhad lambda v exp. modelu mu=mean(log(data)) #odhad mu v lognorm. modelu b=sd(log(data)) # 1/(n-1)*.... sigma=sqrt(b^2*(n-1)/n) #odhad sigma v lognorm. modelu fitdistr(data, "exponential") #stejny vysledek fitdistr(data, "lognormal") #stejny vysledek curve(dexp(x,lambda),col=2,add=TRUE) curve(dlnorm(x,mu,sigma),col=3,add=TRUE) #b) plot(density(data),ylim=c(0,0.000045)) curve(dexp(x,lambda),col=2,add=TRUE) curve(dlnorm(x,mu,sigma),col=3,add=TRUE) #c) plot(ecdf(data)) curve(pexp(x,lambda),col=2,add=TRUE) curve(plnorm(x,mu,sigma),col=3,add=TRUE) #d) D1=function(x){ D1=ecdf(data)(x)- pexp(x,lambda) } D2=function(x){ D2=ecdf(data)(x)-plnorm(x,mu,sigma) } curve(D1,0,200000,col=2,ylim=c(-0.07,0.07)) curve(D2,col=3,add=TRUE) abline(h=0) #e) Q-Q plot par(mfrow=c(1,2)) i=1:n q=qexp((i-0.5)/n,lambda) plot(sort(data)~q,xlab='Theoretical quantiles',ylab='Empirical quantiles', main='Exponential') abline(0,1) q2=qlnorm((i-0.5)/n,mu,sigma) plot(sort(data)~q2,xlab='Theoretical quantiles',ylab='Empirical quantiles', main='Lognormal') abline(0,1) #f) P-P plot i=1:n p=i/(n+1) F=pexp(sort(data),lambda) plot(p~F,xlab='Theoretical cdf',ylab='Empirical cdf', main='Exponential') abline(0,1) F2=plnorm(sort(data),mu,sigma) plot(p~F2,xlab='Theoretical cdf',ylab='Empirical cdf', main='Lognormal') abline(0,1) ################################## #2.) #a) #Vsuvka pro simulovanou p-hodnotu K.-S. testu #pro shodu s exponencialnim rozdelenim s parametrem 1/(30 000) ks.test(data, "pexp",1/30000) stat=ks.test(data, "pexp",1/30000)$stat p=0 opak=10000 for (i in 1:opak){ x=rexp(n,1/30000) if (ks.test(x, "pexp", 1/30000)$stat>stat) p=p+1 } p/opak #simulovana p-hodnota ################################################################ stat=ks.test(data, "pexp",lambda)$stat #hodnota testove statistiky p=0 opak=10000 for (i in 1:opak){ x=rexp(n,lambda) lambda_hat=1/mean(x) if (ks.test(x, "pexp", lambda_hat)$stat>stat) p=p+1 } p/opak #simulovana p-hodnota #a pote s lognormalnim stat2=ks.test(data, "plnorm",mu,sigma)$stat #hodnota testove statistiky p2=0 opak=10000 for (i in 1:opak){ x=rlnorm(n,mu,sigma) mu_hat=mean(log(x)) b=sd(log(x)) sigma_hat=sqrt(b^2*(n-1)/n) if (ks.test(x, "plnorm", mu_hat,sigma_hat)$stat>stat2) p2=p2+1 } p2/opak #simulovana p-hodnota #b) #zacneme opet exponencialnim rozdelenim k=floor(2*n^(2/5)) k #pocet trid i=1:(k-1) i=i/k breaks=c(0,qexp(i,lambda),10000000) #delici body prob=rep(1/k,k) #teoreticke pravdepodobnosti hk <- hist(data,breaks=breaks)$counts #empiricke cetnosti chisq.test(hk,p=prob) #test pouzijeme jen pro vypocet testove statistiky (nesedi stupne volnosti)) X2=chisq.test(hk,p=prob)$statistic 1-pchisq(X2,k-2) #hledana p-hodnota #a pote i lognormalnim breaks=c(0,qlnorm(i,mu,sigma),10000000) prob=rep(1/k,k) hk <- hist(data,breaks=breaks)$counts chisq.test(hk,p=prob) #test pouzijeme jen pro vypocet testove statistiky X2=chisq.test(hk,p=prob)$statistic 1-pchisq(X2,k-3) #hledana p-hodnota #3.) #a) fitdistr(data, "exponential")$loglik fitdistr(data, "log-normal")$loglik #b) 2-fitdistr(data, "exponential")$loglik 4-fitdistr(data, "log-normal")$loglik #c) log(n)-2*fitdistr(data, "exponential")$loglik 2*log(n)-2*fitdistr(data, "log-normal")$loglik