pravé hodnoty odpovídají polynomu 4. stupně (dominantně kvadratický)
testujeme modely pro polynomy 1. - 10. stupně
%matplotlib inline
from numpy import *
from matplotlib import pyplot as pl
x=r_[-3:3:20j]
sigy=3.
tres=[0.5,0.2,7,-0.5,0] #skutecne parametry
ytrue=polyval(tres,x)
pl.plot(x,ytrue,'k')
y=ytrue+random.normal(0,sigy,size=x.shape)
pl.plot(x,y,'*')
ords=arange(1,10)
res=[polyfit(x,y,i,cov=True) for i in ords]
#[[round(p,3) for p in r[0][::-1]] for r in res]
[" ".join(["%6.3f"%p for p in r[0][::-1]]) for r in res]
['32.659 1.131', '-4.453 1.131 11.193', '-4.453 0.341 11.193 0.133', ' 1.140 0.341 5.503 0.133 0.673', ' 1.140 -0.344 5.503 0.460 0.673 -0.030', ' 2.005 -0.344 3.619 0.460 1.257 -0.030 -0.044', ' 2.005 -1.814 3.619 1.848 1.257 -0.349 -0.044 0.021', ' 1.468 -1.814 5.678 1.848 0.054 -0.349 0.176 0.021 -0.012', ' 1.468 -3.986 5.678 5.296 0.054 -1.799 0.176 0.243 -0.012 -0.011']
errs=[[round(p,5) for p in sqrt(r[1].diagonal()[::-1])/sigy] for r in res]
print('směrodatná odchylka koeficientů:')
[" ".join(["%6.3f"%p for p in e]) for e in errs]
směrodatná odchylka koeficientů:
[' 2.790 1.532', ' 0.678 0.248 0.153', ' 0.697 0.642 0.157 0.099', ' 0.300 0.221 0.192 0.034 0.022', ' 0.307 0.400 0.196 0.162 0.022 0.015', ' 0.337 0.375 0.426 0.151 0.121 0.014 0.009', ' 0.333 0.566 0.420 0.431 0.119 0.094 0.009 0.006', ' 0.377 0.565 0.801 0.431 0.416 0.094 0.074 0.006 0.004', ' 0.362 0.767 0.769 0.956 0.399 0.373 0.071 0.056 0.004 0.003']
ok=pl.hist(smanu[:,5]/sigy**2,20)
ok=pl.hist(smanu[:,8]/sigy**2,20)
[stats.chi2.fit(sm/sigy**2, floc=0, fscale=1)[0] for sm in smanu.T]
[2489.5191406250046, 48.125585937500091, 45.055273437500077, 15.026171875000029, 14.207714843750027, 13.179003906250026, 12.124902343750023, 11.108105468750022, 10.135546875000021]
msig=smanu.mean(0)/sigy**2/(20-1-ords)
msig
array([ 138.3667646 , 2.86775049, 2.85382229, 0.9973652 , 1.00940502, 1.00798318, 1.00484096, 1.00343799, 1.00835644])
dif=(smanu[:,5]-smanu[:,8])/sigy**2
ok=pl.hist(dif,20)
ps0=stats.chi2.fit(dif, floc=0, fscale=1)[0]
voo=stats.chi2(ps0).pdf(ok[1])
voo*=sum(ok[0])/sum(voo)
pl.plot(ok[1],voo)
[<matplotlib.lines.Line2D at 0xae373c8>]
s0=[((y-polyval(r[0],x))**2).sum() for r in res]
siges=sqrt(array(s0)/(20-1-ords))
print("redukov. chi2: \n",siges)
('redukov. chi2: \n', array([ 34.59892989, 5.21216215, 4.8776368 , 2.24094107, 2.31738384, 2.40354435, 2.36482779, 1.86413792, 1.91733631]))
from scipy import stats
print("5% kvantil Stud. rozdělení (s kles. počtem stupňů volnosti): \n",stats.t(19-ords).isf(0.05))
('5% kvantil Stud. rozd\xc4\x9blen\xc3\xad (s kles. po\xc4\x8dtem stup\xc5\x88\xc5\xaf volnosti): \n', array([ 1.73406361, 1.73960673, 1.74588368, 1.75305036, 1.76131014, 1.7709334 , 1.78228756, 1.79588482, 1.81246112]))
statistické významnosti (t-hodnoty pro nulovou hypotézu) jednotlivých parametrů
pro dané měření mohou vycházet významné hodnoty i u vyšších momentů
[" ".join(["%6.3f"%p for p in abs(res[i][0][::-1]/array(errs[i]))]) for i in range(len(res))]
['12.135 0.234', ' 5.221 1.546 78.509', ' 5.556 4.106 83.540 5.183', ' 3.564 8.895 29.817 11.227 21.776', ' 3.428 5.193 28.675 2.764 20.942 0.454', ' 2.971 4.975 11.563 2.648 3.789 0.435 0.330', ' 2.996 5.756 11.662 3.997 3.822 3.303 0.332 3.273', ' 7.012 7.236 1.017 5.024 8.871 4.152 7.817 4.112 7.819', ' 6.742 6.115 0.977 3.615 8.529 2.605 7.515 2.090 7.516 1.691']
strue=[((ytrue-polyval(r[0],x))**2).sum() for r in res]
#sigcom=sqrt(array(s0)/(20-1-ords)) # redukovany chi2
pl.semilogy(ords,siges)
pl.semilogy(ords,sqrt(array(strue)/(20-1-ords)))
pl.legend(['red. chi2','true chi2'])
pl.grid()
správným kritériem je validace - chi2 na testovacím vzorku:
from numpy.random import normal
def test_poly(x,ytrue,sigy,ords=r_[1:10],rep=1):
'''podle rep:
1: rozdil skutecne funkce a modelu
2: rozdil nove sady dat a modelu
'''
y=ytrue+normal(0,sigy,size=x.shape)
res=[polyfit(x,y,i,cov=True) for i in ords]
if rep==1:
scom=[((ytrue-polyval(r[0],x))**2).sum() for r in res]
return array(scom)/(len(x)-1-ords)
if rep==2:
ycom=ytrue+normal(0,sigy,size=x.shape)
scom=[((ycom-polyval(r[0],x))**2).sum() for r in res]
return array(scom)/(len(x)-1-ords)
return res
# provedeme 100 opakování testu
fullmat=r_[[test_poly(x,ytrue,2,rep=1) for k in range(100)]]
fullmat.mean(0)
array([ 1238.85841607, 17.42292297, 17.68858138, 1.41782944, 1.80194635, 2.20549544, 2.62870594, 3.26606789, 3.93339854])
fullmat2=r_[[test_poly(x,ytrue,2,rep=2) for k in range(100)]]
pl.errorbar(ords[1:],fullmat.mean(0)[1:],fullmat.std(0)[1:]/10.)
pl.errorbar(ords[1:],fullmat2.mean(0)[1:],fullmat2.std(0)[1:]/10.)
pl.xlabel("stupen polynomu")
pl.legend(["gen. chyba","validace"])
<matplotlib.legend.Legend at 0xb92ae10>
apars=[test_poly(x,ytrue,2,rep=0) for k in range(100)] #zaznam hodnot nafitovanych parametru
def rep_pars(iord=4):
zmat=r_[[a[iord][0] for a in apars]]
cmat=r_[[a[iord][1].diagonal() for a in apars]]
zrep=array([zmat.mean(0),zmat.std(0),sqrt(cmat.mean(0)),sqrt(cmat).std(0)])
return zrep[:,::-1]
zot6=rep_pars(6)
zot6[0]/zot6[1] # podil stredni hodnoty a nejistoty
array([ 0.08969589, -0.34124228, 5.28565051, 0.24595817, 1.54955129, -0.07131472, -0.13749907, 0.06867428])
kovarianční matice dobře odpovídá rozptylu parametrů
zot6[1]/zot6[2]
array([ 0.98661703, 0.95119805, 0.96080738, 0.87037112, 0.95952248, 0.85984801, 0.95468107, 0.86476922])
zotx=[rep_pars(i) for i in range(7)]
[' '.join(['%6.3f'%ww for ww in zz[0]/zz[1]]) for zz in zotx]
['73.504 3.083', '-5.852 3.083 73.102', '-5.852 -0.720 73.102 2.004', ' 0.024 -0.720 12.865 2.004 8.473', ' 0.024 -0.416 12.865 0.473 8.473 -0.019', ' 0.090 -0.416 5.286 0.473 1.550 -0.019 -0.137', ' 0.090 -0.341 5.286 0.246 1.550 -0.071 -0.137 0.069']