Centrální limitní věta

sčítání rovnom. rozdělených dat

  • součet k=5,10,...,25 vzorků z R(0,10) - 1e4 opakování
  • porovnání s N(5 k,sqrt(k*100/12.))
In [1]:
%matplotlib inline 
from matplotlib.pyplot import *
from math import sqrt,pi
from numpy import random,histogram,r_,array,exp,polyfit
nexp=10000
sim=array([random.uniform(0,10,30).reshape(5,6).sum(0).cumsum() for i in range(nexp)]).transpose()
bin=r_[0:200:1.]
import matplotlib.pyplot as rp
his1=[histogram(sim[0],bin)[0]/float(nexp)]
a=rp.plot(bin[:-1],his1[0])[0].axes
i=0
sig=sqrt((i+1)*500./12.)
a.plot(bin[:-1],1/sqrt(2*pi)/sig*exp(-(bin[:-1]-(i+1)*25)**2/2/sig**2),'k:')
for i in range(1,len(sim)):
    s=sim[i]
    his1.append(histogram(s,bin)[0]/float(nexp))
    sig=sqrt((i+1)*500./12.)
    a.plot(bin[:-1],his1[-1])
    a.plot(bin[:-1],1/sqrt(2*pi)/sig*exp(-(bin[:-1]-(i+1)*25)**2/2/sig**2),'k:')
sum(his1[-3])
a.legend(r_[5:31:5])
a
Out[1]:
<matplotlib.axes._subplots.AxesSubplot at 0x1fff1473f28>

exponenciální rozdělení - E(5) o stejných počtech, srovnání s N(5k,25k)

In [2]:
sim=array([random.exponential(5,30).reshape(5,6).sum(0).cumsum() for i in range(nexp)]).transpose()
bin=r_[0:200:1.]
import matplotlib.pyplot as rp
his1=[histogram(sim[0],bin)[0]/float(nexp)]
a=rp.plot(bin[:-1],his1[0])[0].axes
i=0
sig=sqrt((i+1)*125.)
a.plot(bin[:-1],1/sqrt(2*pi)/sig*exp(-(bin[:-1]-(i+1)*25)**2/2/sig**2),'k:')
for i in range(1,len(sim)):
    s=sim[i]
    his1.append(histogram(s,bin)[0]/float(nexp))
    sig=sqrt((i+1)*125.)
    a.plot(bin[:-1],his1[-1])
    a.plot(bin[:-1],1/sqrt(2*pi)/sig*exp(-(bin[:-1]-(i+1)*25)**2/2/sig**2),'k:')
sum(his1[-3])
a.legend(r_[5:30:5])
a
Out[2]:
<matplotlib.axes._subplots.AxesSubplot at 0x1fff1966978>
In [3]:
for i in range(1,len(sim)):
    sig=sqrt((i+1)*125.)
    plot(bin[:-1],his1[i]-1/sqrt(2*pi)/sig*exp(-(bin[:-1]-(i+1)*25)**2/2/sig**2))
legend(r_[5:30:5])
Out[3]:
<matplotlib.legend.Legend at 0x1fff1ac6898>

Maximální hodnota souboru

Soubor o 10,30,100,200,300 prvcích s norm. rozdělením kolem 0

In [4]:
psim2=[]
bs=[10,30,100,200,300]
for i in range(nexp):
    dat=random.normal(0,1.,300)
    psim2.append([max(dat[:k]) for k in bs])
import matplotlib.pyplot as rp
        
sim2=array(psim2).transpose()
bin=r_[:5:0.05]
his1=[histogram(sim2[0],bin)[0]/float(nexp)]
b=rp.plot(bin[:-1],his1[0])[0].axes
i=0
sig=sqrt((i+1)*500./12.)
#b.plot(bin[:-1],1/sqrt(2*pi)/sig*exp(-(bin[:-1]-(i+1)*25)**2/2/sig**2),'k:')
for i in range(1,len(sim2)):
    s=sim2[i]
    his1.append(histogram(s,bin)[0]/float(nexp))
    sig=sqrt((i+1)*500./12.)
    b.plot(bin[:-1],his1[-1])
    #b.plot(bin[:-1],1/sqrt(2*pi)/sig*exp(-(bin[:-1]-(i+1)*25)**2/2/sig**2),'k:')

b.legend(bs)
b.set_xlabel("max.hodnota opak. norm. rozdeleni")
b
Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x1fff20b8550>

použití vyšších momentů (šikmost, špičatost) pro testy normálnosti (viz)

In [5]:
from scipy import stats
ta=[stats.skew(h) for h in sim2]
tb=[stats.kurtosis(h) for h in sim2]
ta,tb
Out[5]:
([0.40875898944157535,
  0.5457465218316685,
  0.6554526909660684,
  0.6837966248053485,
  0.7119868162049116],
 [0.33937660035473804,
  0.5490464044828229,
  0.7426730375999995,
  0.8057125400816845,
  0.8250910415457704])
In [6]:
from numpy import random
random.exponential(2,size=100).max()
Out[6]:
13.862491122848747

Konstatní funkce - hustota prsti $f(x)=1$ dává distrib. funkci $F(x)=x$. Maximum z $N$ proměnných má distrib. fci $F_M(x)=x^N$ a hustotu prsti
$$f_M(x)=N x^{N-1}$$

In [10]:
psim3=[]
bs=[10,20,30,50,80,100]
for i in range(nexp//10):
    dat=random.uniform(0,1.,size=100)
    psim3.append([max(dat[:k]) for k in bs])
import matplotlib.pyplot as rp
        
sim3=array(psim3).transpose()
bin=r_[:1.1:0.01]
his2=[histogram(sim3[0],bin)[0]/float(nexp)]
b=rp.plot(bin[:-1],his2[0])[0].axes
i=0
sig=sqrt((i+1)*500./12.)
#b.plot(bin[:-1],1/sqrt(2*pi)/sig*exp(-(bin[:-1]-(i+1)*25)**2/2/sig**2),'k:')
for i in range(1,len(sim3)):
    s=sim3[i]
    his2.append(histogram(s,bin)[0]/float(nexp))
    sig=sqrt((i+1)*500./12.)
    b.plot(bin[:-1],his2[-1])
    #b.plot(bin[:-1],1/sqrt(2*pi)/sig*exp(-(bin[:-1]-(i+1)*25)**2/2/sig**2),'k:')

b.legend(bs,loc=2)
b.set_xlabel("max.hodnota opak. rovnom. rozdeleni")
b.set_xlim(0.8,1.01)

#for q in bs:
#    plot(bin,q*pow(bin,q-1))
Out[10]:
(0.8, 1.01)

Expon. funkce - hustota prsti $$f(x)=\frac{1}{\mu} \exp(-x/\mu)$$ dává distrib. funkci $F(x)=1-\exp(-x/\mu)$. Maximum z $N$ proměnných má distrib. fci $F_M(x)=(1-\exp(-x/\mu))^N$ a hustotu prsti $$f_M(x)=N\mu\exp(-x/\mu)\ \left(1-\exp(-x/\mu)\right)^{N-1}$$

In [8]:
psim4=[]
bs=[10,30,100,200,300]
#bs=[10,20,30,50,80,100]
for i in range(nexp):
    dat=random.exponential(5,300)#dat=random.uniform(0,1.,size=300)
    psim4.append([max(dat[:k]) for k in bs])
import matplotlib.pyplot as rp
        
sim4=array(psim4).transpose()
bin=r_[:75:0.5]
his4=[histogram(sim4[0],bin)[0]/float(nexp)]
b=rp.plot(bin[:-1],his4[0])[0].axes
i=0
sig=sqrt((i+1)*500./12.)
#b.plot(bin[:-1],1/sqrt(2*pi)/sig*exp(-(bin[:-1]-(i+1)*25)**2/2/sig**2),'k:')
for i in range(1,len(sim4)):
    s=sim4[i]
    his4.append(histogram(s,bin)[0]/float(nexp))
    sig=sqrt((i+1)*500./12.)
    b.plot(bin[:-1],his4[-1])
    #b.plot(bin[:-1],1/sqrt(2*pi)/sig*exp(-(bin[:-1]-(i+1)*25)**2/2/sig**2),'k:')

b.legend(bs,loc=1)
b.set_xlabel("max.hodnota opak. exponen. rozdeleni")
#b.set_xlim(0.7,1.01)

for q in bs:
    yvals=exp(-bin/5)
    yvals=q*yvals*pow(1-yvals,q-1)
    plot(bin,yvals/10.)