library(fda) library(ggplot2) library(plyr) # SIMULATION # Curves simulation # 1. Wiener process N <- 4 W.mat <- matrix(0, ncol = N, nrow = 10000) for(n in 1:N){W.mat[, n] <- cumsum(rnorm(10000))/100} x11() matplot(W.mat, xlab="", ylab="", type = "l", lwd = 2, main = "Random walks") abline(h = 0, lty = "dashed") B75.basis <- create.bspline.basis(rangeval = c(0,10000), nbasis = 75) tempSmooth1 <- smooth.basis(y = W.mat, fdParobj = B75.basis) fdobjSmooth1 <- tempSmooth1$fd plot(fdobjSmooth1, ylab="", xlab="", lty = 1, lwd = 2, main = "Smoothed random walks") # 2. As a regression ti <- seq(1, 365, length.out = 100) gen_values <- function(x, n, sig) { X <- matrix(rep(x, times = n), ncol = n) eps <- matrix(rnorm(length(x)*n, 0, sig), ncol = n) return(sin(X/365) + eps) # any regression function } N <- 4 y <- gen_values(ti, N, 0.1) matplot(ti, y, type = "l", ylab="", xlab="", lty = 1, lwd = 2, main = "Regression") abline(h = 0, lty = "dashed") rknots <- c(1, 365) bbasis <- create.bspline.basis(rknots, 12) tempSmooth2 <- smooth.basis(ti, y, bbasis) fdobjSmooth2 <- tempSmooth2$fd plot(fdobjSmooth2, ylab="", xlab="", lty = 1, lwd = 2, main = "Smoothed regression") # 3. Basis expansion # Set "knots" for splines - rides the smoothing knots <- seq(1, 365, length.out = 10) rknots <- range(knots) # order for B-splines norder <- 4 # dimension of basis nbasis <- length(knots) + norder - 2 # Make the basis bbasis <- create.bspline.basis(rknots, nbasis, norder, knots) # Sample size N <- 4 # Random coefficients #coefs <- matrix(rgamma(nbasis*N, 5, 2), nbasis, N) coefs <- matrix(rnorm(nbasis*N, 5, 1), nbasis, N) # Make the FD object fdobjSmooth3 <- fd(coefs, bbasis) x11() plot(fdobjSmooth3, ylab="", xlab="", lty = 1, lwd = 2) # 4. Gaussian process library(MASS) N <- 4 C <- function(x, y) 0.01 * exp(-0.001 * (x - y)^2) # covariance function M <- function(x) sin(x/365) # mean function t <- seq(1, 365, length.out = 100) # will sample the GP at these points k <- length(t) m <- M(t) S <- matrix(nrow = k, ncol = k) for (i in 1:k) for (j in 1:k) S[i, j] = C(t[i], t[j]) z <- t(mvrnorm(N, m, S)) matplot(t, z, xlab="", ylab="", type = "l", lty = 1, lwd = 2, main = "Gaussian process") abline(h = 0, lty = "dashed") # transfer to FDA form rknots <- c(1, 365) bbasis <- create.bspline.basis(range(t), length(t) - 2, norder = 4) tempSmooth4 <- smooth.basis(t, z, bbasis) fdobjSmooth4 <- tempSmooth4$fd # plot plot(fdobjSmooth4, ylab="", xlab="", lty = 1, lwd = 2, main = "Gaussian process") # 5. Random Gaussian process library(MASS) calcSigma <- function(X1, X2, l = 100){ Sigma <- matrix(rep(0, length(X1)*length(X2)), nrow = length(X1)) for(i in 1:nrow(Sigma)){ for (j in 1:ncol(Sigma)) Sigma[i,j] <- exp(-1/2*(abs(X1[i]-X2[j])/l)^2) } return(Sigma) } # The standard deviation of the noise N <- 4 n.draws <- 100 x.star <- seq(1, 365, len = n.draws) nval <- 5 # f <- data.frame(x = seq(1, 365, length.out = nval), y = rnorm(nval, 0, 1)) f <- data.frame(x = seq(1, 365, length.out = nval), y = sort(runif(nval, 0, 1))) sigma.n <- 0.2 # Recalculate the mean and covariance functions k.xx <- calcSigma(f$x, f$x) k.xxs <- calcSigma(f$x, x.star) k.xsx <- calcSigma(x.star, f$x) k.xsxs <- calcSigma(x.star, x.star) f.bar.star <- k.xsx %*% solve(k.xx + sigma.n^2*diag(1, ncol(k.xx))) %*% f$y cov.f.star <- k.xsxs - k.xsx %*% solve(k.xx + sigma.n^2*diag(1, ncol(k.xx))) %*% k.xxs values <- matrix(rep(0, length(x.star)*N), ncol = N) for (i in 1:N) values[, i] <- mvrnorm(1, f.bar.star, cov.f.star) values <- cbind(x = x.star, as.data.frame(values)) matplot(x = values[, 1], y = values[, -1], xlab="", ylab="", type = "l", lty = 1, lwd = 2, main = "Random Gaussian process") abline(h = 0, lty = "dashed") # lines(x.star, f.bar.star, col="red", lwd=2) # points(f$x, f$y, pch = 20, col = "cyan", cex = 3) # transfer to FDA form bbasis <- create.bspline.basis(range(x.star), length(x.star) - 2, norder = 4) tempSmooth5 <- smooth.basis(x.star, as.matrix(values[, -1]), bbasis) fdobjSmooth5 <- tempSmooth5$fd # plot plot(fdobjSmooth5, ylab="", xlab="", lty = 1, lwd = 2, main = "Random Gaussian process") ### Simulation of regression #### # As the first, generate Xi(t) as the Gaussian process (4) library(MASS) N <- 30 C <- function(x, y) 0.01 * exp(-0.001 * (x - y)^2) # covariance function M <- function(x) sin(x/365) # mean function t <- seq(1, 365, length.out = 100) # will sample the GP at these points k <- length(t) m <- M(t) S <- matrix(nrow = k, ncol = k) for (i in 1:k) for (j in 1:k) S[i, j] = C(t[i], t[j]) z <- t(mvrnorm(N, m, S)) # transfer to FDA form rknots <- c(1, 365) bbasis <- create.bspline.basis(range(t), nbasis = length(t) - 2, norder = 4) tempSmooth <- smooth.basis(t, z, bbasis) fdobjSmooth <- tempSmooth$fd # plot Xi's plot(fdobjSmooth) # Regression Model # Set beta(t) = (1 + 2t + t^2)/365 rozsah <- range(t) # Use power basis to create Beta bbb <- create.power.basis(rangeval = rozsah, nbasis = 3, exponents = c(0, 1, 2)) # coefficients coef <- c(1, 2/365, -1/(365^2)) #coef <- c(1, 2, -1) # Create FD object betaobj <- fd(coef, bbb) Beta0 <- eval.fd(t, betaobj) # alternatively Beta0 <- 1 + 2*t/365 - (t/365)^2 # Beta0 <- 1 + 2*t - (t/1)^2 # beta = sin(grid * 2 * pi) # beta = -dnorm(grid, mean=.2, sd=.03) +3*dnorm(grid, mean=.5, # sd=.04)+dnorm(grid, mean=.75, sd=.05) alpha <- -10 X <- t(eval.fd(t, fdobjSmooth)) y0 <- alpha + (X %*% Beta0)*(rozsah[2] - rozsah[1])/k + rnorm(N, 0, 5) ### model alpha <- -10 y <- alpha + inprod(betaobj, fdobjSmooth) + rnorm(N, 0, 5) y <- c(y) # plotting # fdobjSmootheval <- eval.fd(fdobj = fdobjSmooth, evalarg = t) # SPECURVES1 <- t(fdobjSmootheval) # # library(rgl) # # open3d() # # plot3d (t, rep(y[1], times = length(t)), SPECURVES1[1,], ylim = range(y), xlim = rknots, # zlim = range(SPECURVES1), ylab = "y", xlab = "time [days]", zlab = "x(t)", type = "n") # # crs <- rainbow(N+5) # srt <- rank(y) # crs <- crs[srt] # # for (k in 1:N){ # plot3d (t, rep(y[k], times = length(t)), SPECURVES1[k,], col = crs[k], add = T, type = "l", lwd = 3) # } # reconstruction # 1. Low-Dimensional Regression Coefficient Function # Design matrix xlist <- vector("list", 2) xlist[[1]] <- rep(1, N) # First "column" are 1's xlist[[2]] <- fdobjSmooth # Second "column" are Xi's # setting betas cbasis <- create.constant.basis(rknots) betabasis <- create.fourier.basis(rknots, 5) # dim of basis for beta = 5 betalist <- vector("list", 2) betalist[[1]] <- cbasis betalist[[2]] <- betabasis # regression fRegressList1 <- fRegress(y, xlist, betalist) betaestlist <- fRegressList1$betaestlist betafd1 <- betaestlist[[2]]$fd yhat1 <- fRegressList1$yhatfdobj Beta1 <- eval.fd(t, betafd1) # 2. Estimate Using a Roughness Penalty Lcoef <- c(0,(2*pi/365)^2,0) harmaccelLfd <- vec2Lfd(Lcoef, rknots) betabasis2 <- create.fourier.basis(rknots, 35) lambda <- 10^12.5 betafdPar <- fdPar(betabasis2, harmaccelLfd, lambda) betalist[[2]] <- betafdPar # Choosing lambda # loglam <- seq(8, 12, 0.5) # nlam <- length(loglam) # SSE.CV <- matrix(0, nlam, 1) # for (ilam in 1:nlam) { # lambda <- 10^loglam[ilam] # betalisti <- betalist # betafdPar2 <- betalisti[[2]] # betafdPar2$lambda <- lambda # betalisti[[2]] <- betafdPar2 # fRegi <- fRegress.CV(y, xlist, betalisti) # SSE.CV[ilam] <- fRegi$SSE.CV # } # plot(loglam, SSE.CV, type = "b") # best <- which.min(SSE.CV) # lambdabest <- 10^loglam[best] # lambda <- lambdabest lambda <- 10^12 betafdPar <- fdPar(betabasis2, harmaccelLfd, lambda) betalist[[2]] <- betafdPar # regression fRegressList2 <- fRegress(y, xlist, betalist) betaestlist2 <- fRegressList2$betaestlist yhat2 <- fRegressList2$yhatfdobj betafd2 <- betaestlist2[[2]]$fd # same as tempbetafd2 Beta2 <- eval.fd(t, betafd2) # 3. Scalar Response Models by Functional Principal Components daybasis365 <- create.fourier.basis(rknots, 365) lambda <- 1e8 tempfdPar <- fdPar(daybasis365, harmaccelLfd, lambda) tempfd <- smooth.basis(t, z, tempfdPar)$fd lambda <- 1e6 tempfdPar <- fdPar(daybasis365, harmaccelLfd, lambda) tempPCA <- pca.fd(tempfd, 4, tempfdPar) harmonics <- tempPCA$harmonics pcamodel <- lm(y ~ tempPCA$scores) pcacoefs <- summary(pcamodel)$coef # Variance proportion (staci 2 komponenty) cumsum(tempPCA$varprop) # betafd3 <- pcacoefs[2, 1]*harmonics[1] + pcacoefs[3,1]*harmonics[2] + pcacoefs[4,1]*harmonics[3] betafd3 <- pcacoefs[2, 1]*harmonics[1] + pcacoefs[3,1]*harmonics[2] + pcacoefs[4,1]*harmonics[3]+ pcacoefs[5,1]*harmonics[4] Beta3 <- c(eval.fd(t, betafd3)) XI <- cbind(1, tempPCA$scores) yhat3 <- XI%*%pcacoefs[,1] # 4. Nonparametric regression source("npfda.R") X <- t(eval.fd(t, fdobjSmooth)) ytesthat <- numeric(length(y)) for (ind in 1:length(y)){ ytrain <- y[-ind] Xtrain <- X[-ind, ] ytest <- y[ind] Xtest <- X[ind, ] result.cv1 <- funopare.kernel.cv(ytrain, Xtrain, Xtest, q = 0, range.grid = rknots, nknot = min(c(length(t)-4)%/%2, 10)) ytesthat[ind] <- result.cv1$Predicted.values } # Betas Comparison df.srov <- data.frame(Day = t, Beta = c(Beta0, Beta1, Beta2, Beta3), Type = factor(rep(c("0. Original", "1. Basis expansion", "2. Roughness Penalty", "3. Functional PCA"), each = length(t)))) p <- ggplot(df.srov, aes(x = Day, y = Beta, colour = Type)) p <- p + geom_line() #p <- p + geom_hline(aes(yintercept = 0), linetype = "dashed") p <- p + labs (x = "Days", y = "Beta") p <- p + theme_bw() p # # Fits Comparison df.fits.srov <- data.frame(y = c(y), yhat = c(yhat1, yhat2, yhat3, ytesthat), Type = factor(rep(c("1. Basis expansion", "2. Roughness Penalty", "3. Functional PCA", "4. Nonparametric"), each = length(y)))) p <- ggplot(df.fits.srov, aes(x = y, y = yhat, colour = Type)) p <- p + geom_point(size = 2) p <- p + geom_abline(slope = 1, intercept = 0) p <- p + labs (x = "y", y = "Estimates of y") p <- p + theme_bw() p ### Simulation of regression #### # Simulation 2 # As the first, generate Xi(t) as the Gaussian process (4) library(MASS) N <- 30 C <- function(x, y) 0.5 * exp(-10 * (x - y)^2) # covariance function M <- function(x) exp(x/2*pi) # mean function t <- seq(0, 1, length.out = 100) # will sample the GP at these points k <- length(t) m <- M(t) S <- matrix(nrow = k, ncol = k) for (i in 1:k) for (j in 1:k) S[i, j] = C(t[i], t[j]) z <- t(mvrnorm(N, m, S)) # transfer to FDA form rozsah <- range(t) bbasis <- create.bspline.basis(rozsah, nbasis = length(t) - 2, norder = 4) tempSmooth <- smooth.basis(t, z, bbasis) fdobjSmooth <- tempSmooth$fd # plot Xi's plot(fdobjSmooth) # Regression Model # Set beta(t) = sin(2pi*t) # Use Fourier basis to create Beta bbb <- create.fourier.basis(rangeval = rozsah) # coefficients coef <- c(0, 1, 0) # Create FD object betaobj <- fd(coef, bbb) Beta0 <- eval.fd(t, betaobj) # alternatively Beta0 <- sin(2*pi*t) alpha <- 5 X <- t(eval.fd(t, fdobjSmooth)) y0 <- alpha + (X %*% Beta0)*(rozsah[2] - rozsah[1])/k + rnorm(N, 0, .1) ### model alpha <- 5 y <- alpha + inprod(betaobj, fdobjSmooth) + rnorm(N, 0, .1) y <- c(y) # plot # fdobjSmootheval <- eval.fd(fdobj = fdobjSmooth, evalarg = t) # SPECURVES1 <- t(fdobjSmootheval) # # library(rgl) # # open3d() # # plot3d (t, rep(y[1], times = length(t)), SPECURVES1[1,], ylim = range(y), xlim = rknots, # zlim = range(SPECURVES1), ylab = "y", xlab = "time [days]", zlab = "x(t)", type = "n") # # crs <- rainbow(N+5) # srt <- rank(y) # crs <- crs[srt] # # for (k in 1:N){ # plot3d (t, rep(y[k], times = length(t)), SPECURVES1[k,], col = crs[k], add = T, type = "l", lwd = 3) # } # reconstruction # 1. Low-Dimensional Regression Coefficient Function # Design matrix xlist <- vector("list", 2) xlist[[1]] <- rep(1, N) # First "column" are 1's xlist[[2]] <- fdobjSmooth # Second "column" are Xi's # setting betas cbasis <- create.constant.basis(rozsah) betabasis <- create.fourier.basis(rozsah, 3) # dim of basis for beta = 3 betalist <- vector("list", 2) betalist[[1]] <- cbasis betalist[[2]] <- betabasis # regression fRegressList1 <- fRegress(y, xlist, betalist) betaestlist <- fRegressList1$betaestlist betafd1 <- betaestlist[[2]]$fd yhat1 <- fRegressList1$yhatfdobj Beta1 <- eval.fd(t, betafd1) # 2. Estimate Using a Roughness Penalty # curv.Lfd = int2Lfd(2) # betabasis2 <- create.bspline.basis(rknots, 75) # lambda <- 10^-2 # betafdPar <- fdPar(bbasis, curv.Lfd, lambda) # betalist[[2]] <- betafdPar Lcoef <- c(0,(2*pi)^2,0) harmaccelLfd <- vec2Lfd(Lcoef, rozsah) betabasis2 <- create.fourier.basis(rozsah, 25) lambda <- 10^-8 betafdPar <- fdPar(betabasis2, harmaccelLfd, lambda) betalist[[2]] <- betafdPar # Choosing lambda # loglam <- seq(-10, -5, 0.5) # nlam <- length(loglam) # SSE.CV <- matrix(0, nlam, 1) # for (ilam in 1:nlam) { # lambda <- 10^loglam[ilam] # betalisti <- betalist # betafdPar2 <- betalisti[[2]] # betafdPar2$lambda <- lambda # betalisti[[2]] <- betafdPar2 # fRegi <- fRegress.CV(y, xlist, betalisti) # SSE.CV[ilam] <- fRegi$SSE.CV # } # plot CV # plot(loglam, SSE.CV, type = "b") # best = which.min(SSE.CV) # lambdabest = 10^loglam[best] # lambda <- lambdabest lambda <- 10^(-6) betafdPar <- fdPar(betabasis2, harmaccelLfd, lambda) betalist[[2]] <- betafdPar # regression fRegressList2 <- fRegress(y, xlist, betalist) betaestlist2 <- fRegressList2$betaestlist yhat2 <- fRegressList2$yhatfdobj betafd2 <- betaestlist2[[2]]$fd # same as tempbetafd2 Beta2 <- eval.fd(t, betafd2) # 3. Scalar Response Models by Functional Principal Components daybasis365 <- create.fourier.basis(rozsah, 25) lambda <- 1e-9 tempfdPar <- fdPar(daybasis365, harmaccelLfd, lambda) tempfd <- smooth.basis(t, z, tempfdPar)$fd lambda <- 1e-6 tempfdPar <- fdPar(daybasis365, harmaccelLfd, lambda) tempPCA <- pca.fd(tempfd, 4, tempfdPar) harmonics <- tempPCA$harmonics pcamodel <- lm(y ~ tempPCA$scores) pcacoefs <- summary(pcamodel)$coef # Variance proportion (staci 2 komponenty) cumsum(tempPCA$varprop) # betafd3 <- pcacoefs[2, 1]*harmonics[1] + pcacoefs[3,1]*harmonics[2] + pcacoefs[4,1]*harmonics[3] betafd3 <- pcacoefs[2, 1]*harmonics[1] + pcacoefs[3,1]*harmonics[2] + pcacoefs[4,1]*harmonics[3]+ pcacoefs[5,1]*harmonics[4] Beta3 <- c(eval.fd(t, betafd3)) XI <- cbind(1, tempPCA$scores) yhat3 <- XI%*%pcacoefs[,1] # 4. Nonparametric regression source("npfda.R") X <- t(eval.fd(t, fdobjSmooth)) ytesthat <- numeric(length(y)) for (ind in 1:length(y)){ ytrain <- y[-ind] Xtrain <- X[-ind, ] ytest <- y[ind] Xtest <- X[ind, ] result.cv1 <- funopare.kernel1(ytrain, Xtrain, Xtest, bandwidth = 1, q = 0, range.grid = rozsah, nknot = min(c(length(t)-4)%/%2, 1)) ytesthat[ind] <- result.cv1$Predicted.values } # Betas Comparison df.srov <- data.frame(Day = t, Beta = c(Beta0, Beta1, Beta2, Beta3), Type = factor(rep(c("0. Original", "1. Basis expansion", "2. Roughness Penalty", "3. Functional PCA"), each = length(t)))) p <- ggplot(df.srov, aes(x = Day, y = Beta, colour = Type)) p <- p + geom_line() #p <- p + geom_hline(aes(yintercept = 0), linetype = "dashed") p <- p + labs (x = "Days", y = "Beta") p <- p + theme_bw() p # Fits Comparison df.fits.srov <- data.frame(y = y, yhat = c(yhat1, yhat2, yhat3, ytesthat), Type = factor(rep(c("1. Basis expansion", "2. Roughness Penalty", "3. Functional PCA", "4. Nonparametric"), each = length(y)))) p <- ggplot(df.fits.srov, aes(x = y, y = yhat, colour = Type)) p <- p + geom_point(size = 2) p <- p + geom_abline(slope = 1, intercept = 0) p <- p + labs (x = "y", y = "Estimates of y") p <- p + theme_bw() p ####################### # Problems # ####################### set.seed(9000) N <- 1000 t <- seq(0, 1, length = 101) beta <- -dnorm(t, mean = 0.2, sd=0.03) + 3*dnorm(t, mean = 0.5, sd = 0.04) + dnorm(t, mean = 0.75, sd = 0.05) Beta0 <- beta X <- matrix(0, nrow = N, ncol = length(t)) for(i2 in 1:N){ X[i2, ] <- X[i2, ] + rnorm(length(t), 0, 1) X[i2, ] <- X[i2, ] + runif(1, 0, 5) X[i2, ] <- X[i2, ] + rnorm(1, 1, 0.2)*t for(j2 in 1:10){ e <- rnorm(2, 0, 1/j2^(2)) X[i2, ] <- X[i2,] + e[1]*sin((2*pi)*t*j2) X[i2, ] <- X[i2,] + e[2]*cos((2*pi)*t*j2) } } matplot(t, t(X[1:10,]), type = "l") y <- X %*% beta*0.01 + rnorm(N, 0, .4) y <- c(y) # transfer x'j to FDA form rozsah <- range(t) bbasis <- create.bspline.basis(rozsah, breaks = t[-c(1, 101)], norder = 4) XSmooth <- smooth.basis(t, t(X), bbasis) fdobjSmooth <- XSmooth$fd # plot Xi's x11() plot(fdobjSmooth[1:10])