pravé hodnoty odpovídají polynomu 4. stupně (dominantně kvadratický)
testujeme modely pro polynomy 1. - 10. stupně
%matplotlib inline
from numpy import *
from matplotlib import pyplot as pl
x=r_[-3:3:20j]
sigy=3.
tres=[0.5,0.2,7,-0.5,0] #skutecne parametry
ytrue=polyval(tres,x)
pl.plot(x,ytrue,'k')
y=ytrue+random.normal(0,sigy,size=x.shape)
pl.plot(x,y,'*')
ords=arange(1,10)
res=[polyfit(x,y,i,cov=True) for i in ords]
#[[round(p,3) for p in r[0][::-1]] for r in res]
[" ".join(["%6.3f"%p for p in r[0][::-1]]) for r in res]
['33.162 0.223', '-4.349 0.223 11.313', '-4.349 -0.897 11.313 0.188', '-0.655 -0.897 7.555 0.188 0.444', '-0.655 -2.988 7.555 1.188 0.444 -0.092', '-0.785 -2.988 7.838 1.188 0.357 -0.092 0.007', '-0.785 -0.601 7.838 -1.065 0.357 0.426 0.007 -0.033', '-0.377 -0.601 6.273 -1.065 1.271 0.426 -0.161 -0.033 0.009', '-0.377 2.066 6.273 -5.299 1.271 2.206 -0.161 -0.306 0.009 0.014']
errs=[[round(p,5) for p in sqrt(r[1].diagonal()[::-1])/sigy] for r in res]
print('směrodatná odchylka koeficientů:')
[" ".join(["%6.3f"%p for p in e]) for e in errs]
směrodatná odchylka koeficientů:
[' 2.865 1.573', ' 0.616 0.225 0.139', ' 0.616 0.568 0.139 0.088', ' 0.407 0.299 0.261 0.046 0.030', ' 0.423 0.552 0.271 0.223 0.031 0.020', ' 0.482 0.535 0.609 0.216 0.173 0.019 0.013', ' 0.498 0.847 0.629 0.645 0.179 0.141 0.013 0.009', ' 0.562 0.843 1.195 0.642 0.620 0.140 0.110 0.009 0.006', ' 0.572 1.214 1.216 1.512 0.631 0.590 0.112 0.088 0.006 0.004']
smanu=[]
for i in range(1000):
y=ytrue+random.normal(0,sigy,size=x.shape)
res=[polyfit(x,y,i,cov=False) for i in ords]
smanu.append([((y-polyval(r,x))**2).sum() for r in res] )
smanu=array(smanu)
#siges=sqrt(array(s0)/(20-1-ords))
ok=pl.hist(smanu[:,5]/sigy**2,20,alpha=0.6)
ok=pl.hist(smanu[:,8]/sigy**2,20,alpha=0.6)
[stats.chi2.fit(sm/sigy**2, floc=0, fscale=1)[0] for sm in smanu.T]
[2493.5637695312548, 47.639453125000088, 44.588183593750081, 15.22070312500003, 14.202246093750029, 13.122949218750025, 12.132812500000023, 11.146191406250024, 10.05781250000002]
# redukovany chi2
msig=smanu.mean(0)/sigy**2/(20-1-ords)
msig
array([ 138.58103253, 2.8518133 , 2.83957728, 1.01562264, 1.01592899, 1.01340671, 1.01367193, 1.01469931, 1.00651353])
# rozdeleni rozdilu rezidualni sumy
dif=(smanu[:,5]-smanu[:,8])/sigy**2
ok=pl.hist(dif,20)
ps0=stats.chi2.fit(dif, floc=0, fscale=1)[0]
voo=stats.chi2(ps0).pdf(ok[1])
voo*=sum(ok[0])/sum(voo)
pl.plot(ok[1],voo)
ps0
3.0758789062500047
from scipy import stats
print("5% kvantil Stud. rozdělení (s kles. poctem stupnu volnosti): \n",stats.t(19-ords).isf(0.05))
5% kvantil Stud. rozdělení (s kles. poctem stupnu volnosti): [ 1.73406361 1.73960673 1.74588368 1.75305036 1.76131014 1.7709334 1.78228756 1.79588482 1.81246112]
(t-hodnoty pro nulovou hypotézu) jednotlivých parametrů
pro dané měření mohou vycházet významné hodnoty i u vyšších momentů
[" ".join(["%6.3f"%p for p in abs(res[i][0][::-1]/array(errs[i]))]) for i in range(len(res))]
['12.135 0.234', ' 5.221 1.546 78.509', ' 5.556 4.106 83.540 5.183', ' 3.564 8.895 29.817 11.227 21.776', ' 3.428 5.193 28.675 2.764 20.942 0.454', ' 2.971 4.975 11.563 2.648 3.789 0.435 0.330', ' 2.996 5.756 11.662 3.997 3.822 3.303 0.332 3.273', ' 7.012 7.236 1.017 5.024 8.871 4.152 7.817 4.112 7.819', ' 6.742 6.115 0.977 3.615 8.529 2.605 7.515 2.090 7.516 1.691']
strue=[((ytrue-polyval(r[0],x))**2).sum() for r in res]
#sigcom=sqrt(array(s0)/(20-1-ords)) # redukovany chi2
pl.semilogy(ords,siges)
pl.semilogy(ords,sqrt(array(strue)/(20-1-ords)))
pl.legend(['red. chi2','true chi2'])
pl.grid()
správným kritériem je validace - chi2 na testovacím vzorku...
numerický experiment můžeme snadno opakovat
from numpy.random import normal
def test_poly(x,ytrue,sigy,ords=r_[1:10],rep=1):
'''podle rep:
1: rozdil skutecne funkce a modelu
2: rozdil nove sady dat a modelu
'''
y=ytrue+normal(0,sigy,size=x.shape)
res=[polyfit(x,y,i,cov=True) for i in ords]
if rep==1:
scom=[((ytrue-polyval(r[0],x))**2).sum() for r in res]
return array(scom)/(len(x)-1-ords)
if rep==2:
ycom=ytrue+normal(0,sigy,size=x.shape)
scom=[((ycom-polyval(r[0],x))**2).sum() for r in res]
return array(scom)/(len(x)-1-ords)
return res
# provedeme 100 opakování testu
fullmat=r_[[test_poly(x,ytrue,2,rep=1) for k in range(100)]]
fullmat.mean(0)
array([ 1238.85841607, 17.42292297, 17.68858138, 1.41782944, 1.80194635, 2.20549544, 2.62870594, 3.26606789, 3.93339854])
fullmat2=r_[[test_poly(x,ytrue,2,rep=2) for k in range(100)]]
pl.errorbar(ords[1:],fullmat.mean(0)[1:],fullmat.std(0)[1:]/10.)
pl.errorbar(ords[1:],fullmat2.mean(0)[1:],fullmat2.std(0)[1:]/10.)
pl.xlabel("stupen polynomu")
pl.legend(["gen. chyba","validace"])
<matplotlib.legend.Legend at 0xb92ae10>
apars=[test_poly(x,ytrue,2,rep=0) for k in range(100)]
def rep_pars(iord=4):
# vraci prumerne hodnoty fitovanych parametru, teoret. nejistoty a rozptyl parametru
zmat=r_[[a[iord][0] for a in apars]]
cmat=r_[[a[iord][1].diagonal() for a in apars]]
zrep=array([zmat.mean(0),zmat.std(0),sqrt(cmat.mean(0)),sqrt(cmat).std(0)])
return zrep[:,::-1]
zot6=rep_pars(6)
zot6[0]/zot6[1] # podil stredni hodnoty a nejistoty
array([ 0.08969589, -0.34124228, 5.28565051, 0.24595817, 1.54955129, -0.07131472, -0.13749907, 0.06867428])
kovarianční matice dobře odpovídá rozptylu parametrů
zot6[1]/zot6[2]
array([ 0.98661703, 0.95119805, 0.96080738, 0.87037112, 0.95952248, 0.85984801, 0.95468107, 0.86476922])
zotx=[rep_pars(i) for i in range(7)]
[' '.join(['%6.3f'%ww for ww in zz[0]/zz[1]]) for zz in zotx]
['73.504 3.083', '-5.852 3.083 73.102', '-5.852 -0.720 73.102 2.004', ' 0.024 -0.720 12.865 2.004 8.473', ' 0.024 -0.416 12.865 0.473 8.473 -0.019', ' 0.090 -0.416 5.286 0.473 1.550 -0.019 -0.137', ' 0.090 -0.341 5.286 0.246 1.550 -0.071 -0.137 0.069']