*Budeme hledat diskriminacni pravidla pro datovy soubor IRIS z knihovy SASHELP; *Pripomenme si, jak vypada; proc sgscatter data=sashelp.iris; matrix SepalLength -- PetalWidth/group=Species; run; *Zacneme s jednorozmernou analyzou (p=1) - pouzijeme promennou PETALWIDTH; *Nejprve provedeme jednorozmernou analyzu promenne PETALWIDTH pro vsechny hodnoty kategorie SPECIES. Nakreslime prislusne 3 histogramy; *3 histogramy; proc univariate data=sashelp.iris noprint; var PetalWidth; class Species; histogram/kernel; run; *Jina moznost pomoci grafu cetnosti; proc freq data=sashelp.iris noprint; tables petalwidth * species / out=freqout; run; proc sgplot data=freqout; vbar petalwidth / response=count group=species; keylegend / location=inside position=ne noborder across=1; run; *Definujeme si pomocna data potrebna pro vykreslovani hustot jednotlivych skupin; data plotdata; do PetalWidth=-5 to 30 by 0.5; output; end; run; *Fisherova linearni diskriminacni analyza; proc discrim data=sashelp.iris method=normal pool=yes testdata=plotdata testout=plotp testoutd=plotd listerr; class Species; var PetalWidth; run; *Makra pro vykresleni hustot a klasifikacnich pravdepodobnosti; %macro plotden; title3 'Plot of Estimated Densities'; data plotd2; set plotd; if setosa < .002 then setosa = .; if versicolor < .002 then versicolor = .; if virginica < .002 then virginica = .; g = 'Setosa '; Density = setosa; output; g = 'Versicolor'; Density = versicolor; output; g = 'Virginica '; Density = virginica; output; label PetalWidth='Petal Width in mm.'; run; proc sgplot data=plotd2; series y=Density x=PetalWidth / group=g; discretelegend; run; %mend; %macro plotprob; title3 'Plot of Posterior Probabilities'; data plotp2; set plotp; if setosa < .01 then setosa = .; if versicolor < .01 then versicolor = .; if virginica < .01 then virginica = .; g = 'Setosa '; Probability = setosa; output; g = 'Versicolor'; Probability = versicolor; output; g = 'Virginica '; Probability = virginica; output; label PetalWidth='Petal Width in mm.'; run; proc sgplot data=plotp2; series y=Probability x=PetalWidth / group=g; discretelegend; run; %mend; %plotden; %plotprob; title; *Fisherova kvadraticka diskriminacni analyza; proc discrim data=sashelp.iris method=normal pool=no testdata=plotdata testout=plotp testoutd=plotd listerr; class Species; var PetalWidth; run; %plotden; %plotprob; title; *Prejdeme do dvou dimenzi. Budou nas zajimat promenne PETALLENGTH a PETALWIDTH; *Klasicky scatterplot - trochu sloziteji pomoci templatu; title 'Discriminant Analysis of Fisher (1936) Iris Data'; proc template; define statgraph scatter; begingraph; entrytitle 'Fisher (1936) Iris Data'; layout overlayequated / equatetype=fit; scatterplot x=petallength y=petalwidth / group=species name='iris'; layout gridded / autoalign=(topleft); discretelegend 'iris' / border=false opaque=false; endlayout; endlayout; endgraph; end; run; proc sgrender data=sashelp.iris template=scatter; run; title; *Opet si definujeme pomocny datovy soubor; data plotdata; do PetalLength = -2 to 72 by 0.5; do PetalWidth= - 5 to 32 by 0.5; output; end; end; run; *Fisherova linearni diskriminacni analyza (2 promenne); proc discrim data=sashelp.iris method=normal pool=yes testdata=plotdata testout=plotp testoutd=plotd listerr; class Species; var Petal:; run; *Makra pro vykresleni hustot a klasifikacnich pravdepodobnosti; %let close = thresholdmin=0 thresholdmax=0 offsetmin=0 offsetmax=0; %let close = xaxisopts=(&close) yaxisopts=(&close); proc template; define statgraph contour; begingraph; layout overlayequated / equatetype=equate &close; contourplotparm x=petallength y=petalwidth z=z / contourtype=fill nhint=30; scatterplot x=pl y=pw / group=species name='iris' includemissinggroup=false primary=true; layout gridded / autoalign=(topleft); discretelegend 'iris' / border=false opaque=false; endlayout; endlayout; endgraph; end; run; %macro contden; data contour(keep=PetalWidth PetalLength species z pl pw); merge plotd(in=d) sashelp.iris(keep=PetalWidth PetalLength species rename=(PetalWidth=pw PetalLength=pl)); if d then z = max(setosa,versicolor,virginica); run; title3 'Plot of Estimated Densities'; proc sgrender data=contour template=contour; run; %mend; %macro contprob; data posterior(keep=PetalWidth PetalLength species z pl pw into); merge plotp(in=d) sashelp.iris(keep=PetalWidth PetalLength species rename=(PetalWidth=pw PetalLength=pl)); if d then z = max(setosa,versicolor,virginica); into = 1 * (_into_ =: 'Set') + 2 * (_into_ =: 'Ver') + 3 * (_into_ =: 'Vir'); run; title3 'Plot of Posterior Probabilities '; proc sgrender data=posterior template=contour; run; %mend; %macro contclass; title3 'Plot of Classification Results'; proc sgrender data=posterior(drop=z rename=(into=z)) template=contour; run; %mend; %contden %contprob %contclass title; *Fisherova kvadraticka diskriminacni analyza (2 promenne); proc discrim data=sashelp.iris method=normal pool=no testdata=plotdata testout=plotp testoutd=plotd listerr; class Species; var Petal:; run; %contden %contprob %contclass title; *Fisherova diskriminacni analyza se vsemi 4 promennymi. Nechame SAS rozhodnout, zda linearni ci kvadratickou; proc discrim data=sashelp.iris method=normal pool=test listerr; class Species; var SepalLength SepalWidth PetalLength PetalWidth; run; *A konecne provedme analyzu tak, jak se v praxi dela. Data rozdelime na trenovaci a testovaci; proc surveyselect data=sashelp.iris out=split rate=0.7 seed=12345 outall; run; data train test; set split; if selected = 1 then output train; else output test; run; *A provedeme Fisherovu diskriminacni analyzu; proc discrim data=train method=normal pool=test listerr testdata=test; class Species; var SepalLength SepalWidth PetalLength PetalWidth; run; *Nyni zkuste sami provest diskriminacni analyzu na datovy soubor CROPS a klasifikujte pozorovani z datoveho souboru TEST.