library(fda) library(ggplot2) #### Canadian Weather data(CanadianWeather) temp = CanadianWeather$dailyAv[,,1] precip = CanadianWeather$dailyAv[,,2] daytime = (1:365)-0.5 day5 = seq(0,365,5) ### Plot these data df <- data.frame(daytime = daytime, temp = c(temp), precip = c(precip), place = rep(CanadianWeather$place, each = 365), region = rep(CanadianWeather$region, each = 365)) # We can plot by region; Atlantic, Pacific, Central and North. # vykresleni teploty v provinciich x11() p0 <- ggplot (data = df, aes (x = daytime, y = temp, colour = region, group = place)) p0 <- p0 + geom_line () p0 <- p0 + labs (x = "Days", y = "Temperature") p0 <- p0 + theme_bw () print (p0) dayrng <- c(0,365) #### B-spline bases with knots every 5 days breaks <- day5 norder <- 4 #nbasis <- length(knots) + norder - 2 bbasis <- create.bspline.basis(dayrng, norder = norder, breaks = breaks) curv.Lfd <- int2Lfd(2) curv.fdPar <- fdPar(bbasis,curv.Lfd) curv.fdPar$lambda <- 3.162278 # from previous lessons tempSmooth <- smooth.basis(daytime, temp, curv.fdPar) ### Plot smooth data tempfd <- tempSmooth$fd fdobjSmootheval <- eval.fd(fdobj = tempfd, evalarg = daytime) dfs <- data.frame(daytime = daytime, temp = c(fdobjSmootheval), precip = c(precip), place = rep(CanadianWeather$place, each = 365), region = rep(CanadianWeather$region, each = 365)) x11() p0 <- ggplot (data = dfs, aes (x = daytime, y = temp, colour = region, group = place)) p0 <- p0 + geom_line () p0 <- p0 + labs (x = "Days", y = "Temperature") p0 <- p0 + theme_bw () print (p0) # Functional ANOVA # As the first, set covariates regions <- unique(CanadianWeather$region) p <- length(regions) + 1 regionList <- vector("list", p) regionList[[1]] <- c(rep(1, 35), 0) for (j in 2:p) { xj <- CanadianWeather$region == regions[j-1] regionList[[j]] <- c(xj, 1) } # The next step is to augment the temperature functional data object by a 36th observation # that takes only zero values as required by additional condition Sum \beta_j(t) = 0 coef <- tempfd$coef n.coef <- nrow(coef) coef36 <- cbind(coef, matrix(0, n.coef, 1)) temp36fd <- fd(coef36, bbasis, tempfd$fdnames) # We now create functional parameter objects for each of the coefficient functions, # using 11 Fourier basis functions for each. betabasis <- create.fourier.basis(dayrng, 11) betafdPar <- fdPar(betabasis) betaList <- vector("list",p) for (j in 1:p) betaList[[j]] <- betafdPar # Now call fRegress, extract the coefficients and plot them, along with the predicted # curves for the regions. fRegressList <- fRegress(temp36fd, regionList, betaList) betaestList <- fRegressList$betaestlist regionFit <- fRegressList$yhatfd regions <- c("Canada", regions) # Plot betas par(mfrow = c(2,3), cex = 1) for (j in 1:p) plot(betaestList[[j]]$fd, lwd=2, xlab="Day (July 1 to June 30)", ylab="", main=regions[j]) # Plot fits x11() plot(regionFit, lwd = 2, lty = 1, xlab="Day", ylab="", main="Prediction") # ggplot # betas # bety <- matrix(0, length(daytime), p) # for (j in 1:p) bety[, j] <- eval.fd(daytime, betaestList[[j]]$fd) # # df.betas <- data.frame(daytime = daytime, Beta = c(bety), # region = factor(rep(regions, each = length(daytime)), ordered = TRUE, levels = regions)) # # p0 <- ggplot(df.betas, aes(x = daytime, y = Beta)) # p0 <- p0 + geom_line() # p0 <- p0 + geom_hline(yintercept = 0, linetype = "dashed") # p0 <- p0 + labs (x = "Days", y = "Beta") # p0 <- p0 + theme_bw() # p0 <- p0 + facet_wrap(~ region, nrow = 2) # p0 # prediction # pred <- matrix(0, length(daytime), p - 1) # for (j in 2:p) pred[, j - 1] <- eval.fd(daytime, betaestList[[j]]$fd + betaestList[[1]]$fd) # # df.pred <- data.frame(daytime = daytime, Predicted = c(pred), # region = factor(rep(regions[-1], each = length(daytime)))) # # p0 <- ggplot(df.pred, aes(x = daytime, y = Predicted, colour = region)) # p0 <- p0 + geom_line(size = 1) # p0 <- p0 + labs (x = "Days", y = "Predicted Values") # p0 <- p0 + theme_bw() # p0 # Confidence Intervals y2cMap <- tempSmooth$y2cMap TempErrfd <- regionFit$fd - temp36fd # residuals ### In Ver. 5.1.7 must be TempErrfd <- regionFit - temp36fd ### TempErr <- eval.fd(daytime, TempErrfd) SigmaE <- TempErr%*%t(TempErr)/35 ### In Ver. 5.1.7 must be fRegressList$yfdPar <- fRegressList$yfdobj ### stderrList <- fRegress.stderr(fRegressList, y2cMap, SigmaE) betastderrList <- stderrList$betastderrlist # Plot betas par(mfrow = c(2,3), cex = 1) for (j in 1:p) { plot(betaestList[[j]]$fd, lwd=2, xlab="Day (July 1 to June 30)", ylab="", main=regions[j], ylim = c(-20, 20)) lines(betaestList[[j]]$fd + 1.96*betastderrList[[j]], lty = "dashed") lines(betaestList[[j]]$fd - 1.96*betastderrList[[j]], lty = "dashed") } # Plot fits x11() plot(regionFit, lwd = 2, lty = 1, xlab="Day", ylab="", main="Prediction") lines(regionFit[[1]] + 1.96*betastderrList[[1]], lwd = 2, lty = "dashed") lines(regionFit[[1]] - 1.96*betastderrList[[1]], lwd = 2, lty = "dashed") ### In Ver. 5.1.7 must be lines(regionFit + 1.96*betastderrList[[1]], lwd = 2, lty = "dashed") lines(regionFit - 1.96*betastderrList[[1]], lwd = 2, lty = "dashed") ### # ggplot # bety <- matrix(0, length(daytime), p) # pom1 <- matrix(0, length(daytime), p) # for (j in 1:p) { # bety[, j] <- eval.fd(daytime, betaestList[[j]]$fd) # pom1[, j] <- eval.fd(daytime, betastderrList[[j]]) # } # # df.betas.ci <- data.frame(daytime = daytime, Beta = c(bety), # lower = c(bety) - 1.96*c(pom1), upper = c(bety) + 1.96*c(pom1), # region = factor(rep(regions, each = length(daytime)), ordered = TRUE, levels = regions)) # # p0 <- ggplot(df.betas.ci, aes(x = daytime, y = Beta)) # p0 <- p0 + geom_line() # p0 <- p0 + geom_hline(yintercept = 0, linetype = "dashed") # p0 <- p0 + geom_line(aes(y = lower), linetype = "dashed") # p0 <- p0 + geom_line(aes(y = upper), linetype = "dashed") # p0 <- p0 + labs (x = "Days", y = "Beta") # p0 <- p0 + theme_bw() # p0 <- p0 + facet_wrap(~ region, nrow = 2) # p0 # prediction with CI # pred <- matrix(0, length(daytime), p - 1) # for (j in 2:p) pred[, j - 1] <- eval.fd(daytime, betaestList[[j]]$fd + betaestList[[1]]$fd) # # df.pred.ci <- data.frame(daytime = daytime, Predicted = c(pred), # lower = c(pred) - 1.96*c(pom1[, -1]), upper = c(pred) + 1.96*c(pom1[, -1]), # region = factor(rep(regions[-1], each = length(daytime)))) # # p0 <- ggplot(df.pred.ci, aes(x = daytime, y = Predicted, colour = region)) # p0 <- p0 + geom_line(size = 1) # p0 <- p0 + geom_line(aes(y = lower), linetype = "dashed") # p0 <- p0 + geom_line(aes(y = upper), linetype = "dashed") # p0 <- p0 + labs (x = "Days", y = "Predicted Values") # p0 <- p0 + theme_bw() # p0 # Assesing the fit of the fANOVA # plot residuals plot(TempErrfd) # ggplot # df.res <- data.frame(daytime = daytime, residuals = c(TempErr[,-36]), # place = rep(CanadianWeather$place, each = 365), region = rep(CanadianWeather$region, each = 365)) # # x11() # p0 <- ggplot (data = df.res, aes (x = daytime, y = residuals, colour = place)) # p0 <- p0 + geom_hline (yintercept = 0, linetype = "dashed") # p0 <- p0 + geom_line () # p0 <- p0 + labs (x = "Days", y = "Residuals") # p0 <- p0 + theme_bw () # print (p0) # Functional R^2 Rfd <- 1 - mean.fd((TempErrfd)^2)*mean.fd((center.fd(temp36fd))^2)^(-1) plot(Rfd, ylim = c(0, 0.8)) # ggplot # Rvalues <- eval.fd(daytime, Rfd) # # df.r2 <- data.frame(daytime = daytime, r2 = c(Rvalues)) # # x11() # p0 <- ggplot (data = df.r2, aes (x = daytime, y = r2)) # p0 <- p0 + geom_hline (yintercept = 0, linetype = "dashed") # p0 <- p0 + geom_line () # p0 <- p0 + labs (x = "Days", y = expression(R^2)) # p0 <- p0 + theme_bw () # print (p0) # F-Ratio F.res <- Fperm.fd(temp36fd, regionList, betaList) with(F.res,{ q = 0.95 ylims = c(min(c(Fvals, qval, qvals.pts)), max(c(Fobs, qval))) plot(argvals, Fvals, type = "l", ylim = ylims, col = 1, lwd = 2, xlab = "day", ylab = "F-statistic", cex.lab=1.5,cex.axis=1.5) lines(argvals, qvals.pts, lty = 3, col = 1, lwd = 2) abline(h = qval, lty = 2, col = 1, lwd = 2) legendstr = c("Observed Statistic", paste("pointwise", 1 - q, "critical value"), paste("maximum", 1 - q, "critical value")) legend(argvals[1], 1.2, legend = legendstr, col = c(1, 1, 1), lty = c(1, 3, 2), lwd = c(2, 2, 2)) } ) # ggplot # popisky <- c("Observed statistic", "pointwise 0.05 critical value", "maximum 0.05 critical value") # df.Fratio <- data.frame(daytime = F.res$argvals, Fratio = c(F.res$Fvals, F.res$qvals.pts, rep(F.res$qval, length(F.res$argvals))), # type = factor(rep(popisky, each = length(F.res$argvals)), ordered = TRUE, levels = popisky)) # # x11() # p0 <- ggplot (data = df.Fratio, aes (x = daytime, y = Fratio, linetype = type)) # p0 <- p0 + geom_line () # p0 <- p0 + theme_bw () # # p0 <- p0 + theme(legend.position = c(0.62, 0.45), panel.border = element_rect(colour = "black"), # # legend.box.background = element_rect(colour = "black"), legend.background = element_blank())#, legend.key.width = unit(10, "mm")) # p0 <- p0 + labs (x = "Days", y = "F-statistic", linetype = "") # print (p0) # podrobneji # df.Fratio$clr <- "black" # df.pom <- data.frame(daytime = F.res$argvals, F.all = c(F.res$Fnullvals), sample = rep(1:200, each = length(F.res$argvals)), # type = factor("Sample statistic (B=200)"), clr = "grey") # # # vsechny F-statistiky v jednotlivych permutacich # # plot 1 # p0 <- ggplot (data = df.Fratio[1:202,], aes (x = daytime, y = Fratio, linetype = type, colour = clr)) # p0 <- p0 + geom_line (data = df.pom, aes(x = daytime, y = F.all, group = sample), colour = "grey", linetype = 1) # p0 <- p0 + geom_line (data = df.pom, aes(x = daytime, y = F.all, group = sample)) # p0 <- p0 + geom_line () # p0 <- p0 + scale_colour_manual(values = c("black","grey")) # p0 <- p0 + guides(linetype = guide_legend(override.aes = list(linetype = c(1,3,1), colour = c("black","black","grey")), title = NULL)) # p0 <- p0 + guides(colour = FALSE) # p0 <- p0 + theme_bw () # p0 <- p0 + labs (x = "Days", y = "F-statistic", linetype = "") # print (p0) matplot(F.res$Fnullvals, type = "l") lines(F.res$qvals.pts, lwd = 2) # pointwise critical values # plot 2 x11() plot(F.res$Fnull, type = "b") #maxima abline(h = F.res$qval, col = "red") # ggplot # popisky2 <- c("Observed maximum F*", "maximum 0.05 critical value", "Sample maximum F*") # df.Fsamples <- data.frame(x = 1:200, y = c(rep(F.res$Fobs, 200), rep(F.res$qval, 200), F.res$Fnull), # type = factor(rep(popisky2, each = 200), # ordered = TRUE, levels = popisky2)) # # df.pom2 <- data.frame(x = 1:200, y = c(F.res$Fnull), type = factor("Sample maximum F*")) # # # x11() # p0 <- ggplot (data = df.Fsamples, aes (x = x, y = y, linetype = type)) # p0 <- p0 + geom_line () # p0 <- p0 + geom_point (data = df.pom2) # p0 <- p0 + scale_linetype_manual(values = c(1, 2, 1)) # p0 <- p0 + guides(linetype = guide_legend(override.aes = list(linetype = c(1, 2, 1)), title = NULL)) # p0 <- p0 + guides(colour = FALSE) # p0 <- p0 + theme_bw () # p0 <- p0 + labs (x = "Sample", y = "Maximum F*", linetype = "") # print (p0) # t-test ylim <- with(growth, range(hgtm, hgtf)) boys <- growth$hgtm girls <- growth$hgtf age <- growth$age n.t <- length(age) with(growth, matplot(age, hgtm[, 1:10], type='l', col = "blue", lty='dashed', ylab='height (cm)')) with(growth, matlines(age, hgtf[, 1:10], lty='solid', col = "red")) legend('topleft', legend=c('girls', 'boys'), lty=c('solid', 'dashed'), col=c("red", "blue")) # ggplot # df.growth <- data.frame(Age = age, Height = c(boys, girls), Sample = c(rep(1:ncol(boys), each = n.t), rep(1:ncol(girls), each = n.t)), # Sex = factor(c(rep("boys", n.t*ncol(boys)), rep("girls", n.t*ncol(girls))))) # # p0 <- ggplot(df.growth, aes(x = Age, y = Height, colour = Sex, group = interaction(Sample, Sex))) # p0 <- p0 + geom_line() # p0 <- p0 + labs (x = "Age [years]", y = "Height [cm]") # p0 <- p0 + theme_bw() # p0 <- p0 + scale_colour_manual(values = c("#00BFC4", "#F8766D")) # # p0 <- p0 + guides(linetype = guide_legend(override.aes = list(linetype = c(1,3,1), colour = c("black","black","grey")), title = NULL)) # p0 <- p0 + theme(legend.title=element_blank()) # p0 growthbasis <- create.bspline.basis(breaks = age, norder = 6) growfdPar <- fdPar(growthbasis, 3, 10^(-0.5)) boyssmooth <- smooth.basis(age, boys, growfdPar) girlssmooth <- smooth.basis(age, girls, growfdPar) T.res <- tperm.fd(boyssmooth$fd, girlssmooth$fd) matplot(T.res$Tnullvals, type = "l") lines(T.res$qvals.pts, lwd = 2) # pointwise critical values # ggplot # popisky <- c("Observed statistic", "pointwise 0.05 critical value", "maximum 0.05 critical value") # df.ttest <- data.frame(daytime = T.res$argvals, Tstatistic = c(T.res$Tvals, T.res$qvals.pts, rep(T.res$qval, length(T.res$argvals))), # type = factor(rep(popisky, each = length(T.res$argvals)), ordered = TRUE, levels = popisky)) # # x11() # p0 <- ggplot (data = df.ttest, aes (x = daytime, y = Tstatistic, linetype = type)) # p0 <- p0 + geom_line () # p0 <- p0 + theme_bw () # # p0 <- p0 + theme(legend.position = c(0.62, 0.45), panel.border = element_rect(colour = "black"), # # legend.box.background = element_rect(colour = "black"), legend.background = element_blank())#, legend.key.width = unit(10, "mm")) # p0 <- p0 + labs (x = "Age", y = "t-statistic", linetype = "") # print (p0) # # # # podrobneji # df.ttest$clr <- "black" # df.pom <- data.frame(daytime = T.res$argvals, T.all = c(T.res$Tnullvals), sample = rep(1:200, each = length(T.res$argvals)), # type = factor("Sample statistic (B=200)"), clr = "grey") # # # vsechny T-statistiky v jednotlivych permutacich # # plot 1 # p0 <- ggplot (data = df.ttest[1:202,], aes (x = daytime, y = Tstatistic, linetype = type, colour = clr)) # p0 <- p0 + geom_line (data = df.pom, aes(x = daytime, y = T.all, group = sample), colour = "grey", linetype = 1) # p0 <- p0 + geom_line (data = df.pom, aes(x = daytime, y = T.all, group = sample)) # p0 <- p0 + geom_line () # p0 <- p0 + scale_colour_manual(values = c("black","grey")) # p0 <- p0 + guides(linetype = guide_legend(override.aes = list(linetype = c(1,3,1), colour = c("black","black","grey")), title = NULL)) # p0 <- p0 + guides(colour = FALSE) # p0 <- p0 + theme_bw () # p0 <- p0 + labs (x = "Age", y = "t-statistic", linetype = "") # print (p0) # plot 2 x11() plot(T.res$Tnull, type = "b") #maxima abline(h = T.res$qval, col = "red") # ggplot # popisky2 <- c("Observed maximum T*", "maximum 0.05 critical value", "Sample maximum T*") # df.Tsamples <- data.frame(x = 1:200, y = c(rep(T.res$Tobs, 200), rep(T.res$qval, 200), T.res$Tnull), # type = factor(rep(popisky2, each = 200), # ordered = TRUE, levels = popisky2)) # # df.pom2 <- data.frame(x = 1:200, y = c(T.res$Tnull), type = factor("Sample maximum T*")) # # # x11() # p0 <- ggplot (data = df.Tsamples, aes (x = x, y = y, linetype = type)) # p0 <- p0 + geom_line () # p0 <- p0 + scale_linetype_manual(values = c(1, 2, 1)) # p0 <- p0 + guides(linetype = guide_legend(override.aes = list(linetype = c(1, 2, 1)), title = NULL)) # p0 <- p0 + guides(colour = FALSE) # p0 <- p0 + theme_bw () # p0 <- p0 + labs (x = "Sample", y = "Maximum T*", linetype = "") # p0 <- p0 + geom_point (data = df.pom2) # print (p0) ######################## # Problems ###################### load("rat3.RData") x <- rat3$intensity y <- rat3$epi ### Plot these data df <- data.frame(intensity = x, epi = c(y), day = factor(rep(colnames(y), each = 19)), sample = rep(1:79, each = length(x))) x11() p0 <- ggplot (data = df, aes (x = intensity, y = epi, colour = day, group = interaction(day, sample))) p0 <- p0 + geom_point () p0 <- p0 + labs (x = "Intensity", y = "EPI") p0 <- p0 + theme_bw () print (p0) # t-test # ggplot df.tt <- subset(df, is.element(day, c("SS4", "SS5"))) p0 <- ggplot(df.tt, aes(x = intensity, y = epi, colour = day, group = interaction(day, sample))) p0 <- p0 + geom_point() p0 <- p0 + labs (x = "Intensity", y = "EPI") p0 <- p0 + theme_bw() p0 <- p0 + scale_colour_manual(values = c("#00BFC4", "#F8766D")) p0 # choosing both groups ss4 <- epiSmooth$fd ind <- which(colnames(epiSmooth$fd$coefs) == "SS4") ss4$coefs <- ss4$coefs[, ind] ss4$fdnames$reps <- ss4$fdnames$reps[ind] ss5 <- epiSmooth$fd ind <- which(colnames(epiSmooth$fd$coefs) == "SS5") ss5$coefs <- ss5$coefs[, ind] ss5$fdnames$reps <- ss5$fdnames$reps[ind]