library(psych)
library(lavaan)
library(semTools)
library(lm.beta)

# Funkce -------------
#Kppírování do schránky
cl<-function (x){write.table(x, "clipboard-32000", row.names = F, dec=",", sep="\t", na = "")}


# Data -----
setwd(paste(Sys.getenv("dropbox"),"/!Výuka/FSS928/PSY028_E - SEM/",sep=""))
pedhazur <- read.csv2("pedhazur_sim.csv")
describe(pedhazur)
data <- pedhazur[,21:24]    # Budeme pracovat jen se součtovými skóry
data$GPA <- data$GPA*10     # Pro snadnost interpretace GPA vynásobíme 10

# Popisné statistiky a korelace ----------
describe(data)
C <- cor(data)
C
V <- cov(data)
V


# Lineární regrese tradičně ---------------------
# Jaký smysl korelace mezi proměnnými mají?
# Vyjděme z teorie, která sběr dat motivovala:
# Školní výkony (GPA) by měly být závislé na původu (SES), motivaci (nAch) a inteligenci (IQ).
# I v kontextu regrese tu závislost nemíníme kauzálně, jde jen o predikci.

# Tradičně
lm1.fit <- lm("GPA ~ SES + nAch + IQ", data = data)
summary(lm1.fit)                         #bez standardizovaných regr. koef.
lm1.fit.beta<-lm.beta::lm.beta(lm1.fit)  #přidání standardizace
summary(lm1.fit.beta)

# Coefficients:
#   Estimate Standardized Std. Error t value Pr(>|t|)    
#   (Intercept) -1.747656     0.000000   0.202594  -8.626   <2e-16 ***
#   SES          0.013846     0.034759   0.010777   1.285    0.199    
#   nAch         0.029912     0.372774   0.002134  14.019   <2e-16 ***
#   IQ           0.024911     0.402726   0.001617  15.406   <2e-16 ***

# Lineární regrese jako path model v lavaanu -------------------------
# Specifikace modelu a odhad jeho parametrů jsou dva oddělené kroky.
# 1. specifikace modelu.
reg1 <- "
       # regresní model

       GPA ~ b1*SES + b2*nAch + b3*IQ   

       # ostatní parametry, které by lavaan (jako lm) sám doplnil
       # rozptyly
       SES  ~~ v1*SES   #rozptyl SES
       nAch ~~ v2*nAch  #rozptyl nAch
       IQ   ~~ v3*IQ    #rozptyl IQ
       GPA  ~~ e1*GPA   #reziduální rozptyl GPA
       # kovariance exogenních 
       SES ~~ r12*nAch     #kovariance SES a nAch 
       SES ~~ r13*IQ
       nAch ~~ r23*IQ  
       "

# Odhad parametrů modelu
reg1.fit <- sem(model = reg1, data = data, estimator = "ULS" ) # ULS je odhad identický s regresí, jinak je běžné ML, MLR
# Výpis parametrů modelu
summary(reg1.fit, standardized = TRUE, fit.measures = FALSE, ci = TRUE)
parameterestimates(reg1.fit, standardized = TRUE)  # tabulkový výpis parametrů
# Korelační matice implikovaná modelem
# Model ve skutečnosti analyzuje kovarianční matici, a tak je i modelem implikovaná matice kovarianční
# Funkce cov2cor() standardizuje kovarianční matici na korelační
cov2cor(lavInspect(reg1.fit, "fitted")$cov)       # taky: cov2cor(fitted(reg1.fit)$cov)
# Pro kontrolu pozorovaná korelační matice
cov2cor(lavInspect(reg1.fit, "observed")$cov)

# Regressions:
#                   Estimate  Std.Err  z-value  P(>|z|) ci.lower ci.upper   Std.lv  Std.all
# GPA ~                                                                                   
#   SES       (b1)    0.138    0.007   19.471    0.000    0.125    0.152    0.138    0.035
#   nAch      (b2)    0.299    0.000  639.316    0.000    0.298    0.300    0.299    0.373
#   IQ        (b3)    0.249    0.000  925.045    0.000    0.249    0.250    0.249    0.403
# 
# Covariances:
#                   Estimate  Std.Err  z-value  P(>|z|) ci.lower ci.upper   Std.lv  Std.all
# SES ~~                                                                                  
#   nAch     (r12)    8.257    0.032  260.982    0.000    8.195    8.319    8.257    0.301
#   IQ       (r13)    8.643    0.032  273.194    0.000    8.581    8.705    8.643    0.243
# nAch ~~                                                                                 
#   IQ       (r23)   28.494    0.032  900.604    0.000   28.432   28.556   28.494    0.161
# 
# Variances:
#                  Estimate  Std.Err  z-value  P(>|z|) ci.lower ci.upper   Std.lv  Std.all
#   SES       (v1)    5.525    0.032  174.632    0.000    5.463    5.587    5.525    1.000
#   nAch      (v2)  136.169    0.032 4303.898    0.000  136.107  136.231  136.169    1.000
#   IQ        (v3)  229.161    0.032 7243.086    0.000  229.099  229.223  229.161    1.000
#  .GPA       (e1)   55.641    0.042 1337.512    0.000   55.559   55.722   55.641    0.635


# Obrázek stojí za tisíc slov (a je kontrolou toho, že jsme model specifikovali dobře)
windows(1500,1000)
semPaths(reg1.fit, what="path", whatLabels = "std")
         # intercepts=F,  rotation=2, layout="tree2", residuals=T, sizeLat=7, sizeMan=4, edge.color="black", edge.label.position=0.5,
         # label.cex=1.0, edge.label.cex = .4, cex.main=1)





# První úsekový model v lavaanu -------------------------
# Specifikace modelu a odhad jeho parametrů jsou dva oddělené kroky.
# 1. specifikace modelu.
path1 <- "
# regresní vztahy
nAch ~ SES + IQ 
GPA ~ SES + nAch + IQ   

# rozptyly
SES  ~~ SES   # rozptyl SES
IQ   ~~ IQ    # rozptyl IQ
nAch ~~ nAch  # disturbance nAch
GPA  ~~ GPA   # disturbance GPA 

# kovariance exogenních 
SES ~~ IQ
"

# Odhad parametrů modelu
path1.fit <- sem(model = path1, data = data, estimator = "MLR" )  #meanstructure = TRUE 
# Výpis parametrů modelu
summary(path1.fit, standardized = TRUE, fit.measures = FALSE, ci = TRUE)
parameterestimates(path1.fit, standardized = TRUE)  # tabulkový výpis parametrů
# Korelační matice implikovaná modelem
# Model ve skutečnosti analyzuje kovarianční matici, a tak je i modelem implikovaná matice kovarianční
# Funkce cov2cor() standardizuje kovarianční matici na korelační
cov2cor(lavInspect(path1.fit, "fitted")$cov)       # taky: cov2cor(fitted(reg1.fit)$cov)
# Pro kontrolu pozorovaná korelační matice
cov2cor(lavInspect(path1.fit, "observed")$cov)
# Obrázek 
windows(1500,1000)
semPaths(path1.fit, what="path", whatLabels = "std", layout = "spring")



# Overidentified úsekový model v lavaanu -------------------------
# Specifikace modelu a odhad jeho parametrů jsou dva oddělené kroky.
# Specifikace modelu.
path2 <- "
# regresní vztahy
nAch ~ SES            #odstraněno IQ 
GPA ~  nAch + IQ      #odstraněno SES

# rozptyly
SES  ~~ SES   # rozptyl SES
IQ   ~~ IQ    # rozptyl IQ
nAch ~~ nAch  # disturbance nAch
GPA  ~~ GPA   # disturbance GPA 

# kovariance exogenních 
SES ~~ IQ
"

# Odhad parametrů modelu
path2.fit <- sem(model = path2, data = data, estimator = "MLR" )                         #meanstructure = TRUE 
# Výpis parametrů modelu
summary(path2.fit, standardized = TRUE, fit.measures = FALSE, ci = TRUE)
parameterestimates(path2.fit, standardized = TRUE)  # tabulkový výpis parametrů
# Korelační matice implikovaná modelem
# Model ve skutečnosti analyzuje kovarianční matici, a tak je i modelem implikovaná matice kovarianční
# Funkce cov2cor() standardizuje kovarianční matici na korelační
cov2cor(lavInspect(path2.fit, "fitted")$cov)       # taky: cov2cor(fitted(reg1.fit)$cov)
# Pro kontrolu pozorovaná korelační matice
cov2cor(lavInspect(path2.fit, "observed")$cov)
# Obrázek 
windows(1500,1000)
semPaths(path2.fit, what="path", whatLabels = "std", layout = "spring")



















# Chapter 2: Path Models and Analysis ----

#Example: Path Analysis using lavaan
# create a correlation matrix
library(lavaan)
regression.cor <- lav_matrix_lower2full(c(1.0,0.20,1,0.24,0.30,1,0.70,0.80,0.30,1))
# name the variables in the matrix
colnames(regression.cor) <- rownames(regression.cor) <- c("X1", "X2", "X3", "Y") 

# model syntax
regression.model <-'
# structural model for Y
Y ~ a*X1 + b*X2 + c*X3 
# label the residual variance of Y
Y ~~ z*Y 
'
# fit the model
regression.fit <- sem(regression.model, sample.cov=regression.cor, sample.nobs=1000)
summary(regression.fit, rsquare=TRUE)
parameterestimates(regression.fit)

# Example: Indirect Effects
# input data
beaujean.cov <- lav_matrix_lower2full(c(648.07, 30.05, 8.64, 140.18, 25.57, 233.21))
colnames(beaujean.cov) <- rownames(beaujean.cov) <- c("salary", "school", "iq")
beaujean.cor<-cov2cor(beaujean.cov)

# specify the path model
beaujean.model <- '
salary ~ a*school + c*iq
iq ~ b*school # this is reversed in first printing of the book 
ind:= b*c 
'
# estimate parameters
beaujean.fit <- sem(beaujean.model, sample.cov=beaujean.cov, sample.nobs=300)
summary(beaujean.fit)

#Create output table
xtable(parameterEstimates(regression.fit, standardized=TRUE)[,c(1:3,5:6,12)],
       caption="Parameter Estimates from Path Analysis Model.", label="tab:path-analysis-estimates")