library(fda) library(ggplot2) library(plyr) library(rgl) library(fdapace) # Auction Bids Study # 7-Day auctions for new Palm M515 PDAs # 149 Auctions, collected May-June 2003 load("ebay.RData") # plot the data x11() p0 <- ggplot (data = subset(df.ebay, is.element(Auction.ID, 1:10)), aes (x = BidTime, y = BidAmount, colour = Auction.ID)) p0 <- p0 + geom_point (size = 2) p0 <- p0 + geom_line (linetype = "dashed") p0 <- p0 + labs (x = "Bid Time [days]", y = "Bid Amount") p0 <- p0 + theme_bw () print (p0) # just prices ceny <- function(x, t){ y <- x[1] yt <- t[1] lev <- x[1] for (k in 2:length(x)){ if (x[k] > lev){ y <- c(y, x[k]) yt <- c(yt, t[k]) lev <- x[k] } } data.frame(am = y, tim = yt) } df.ebay.price <- ddply(df.ebay, .(Auction.ID), summarise, PriceAmount = ceny(BidAmount, BidTime)$am, PriceTime = ceny(BidAmount, BidTime)$tim) # plot the data x11() p0 <- ggplot (data = subset(df.ebay.price, is.element(Auction.ID, 1:10)), aes (x = PriceTime, y = PriceAmount, colour = Auction.ID)) p0 <- p0 + geom_point (size = 2) p0 <- p0 + geom_line (linetype = "dashed") p0 <- p0 + labs (x = "Price Time [days]", y = "Price Amount") p0 <- p0 + theme_bw () print (p0) # transfer to FDA form PTime <- df.ebay.price$PriceTime PAmount <- df.ebay.price$PriceAmount N <- 149 tecka <- vector("list", N) ypska <- vector("list", N) for (k in 1:N){ ind <- which(df.ebay.price$Auction.ID == k) tecka[[k]] <- df.ebay.price$PriceTime[ind] ypska[[k]] <- df.ebay.price$PriceAmount[ind] } ### 'fdapace' package res <- FPCA(ypska, tecka, optns = list(dataType = 'Sparse')) plot(res) CreatePathPlot(res, subset = 1:5, K = 3, pch = 20) # ggplot # ypska.hat <- t(fitted(res)) # # df.ebay.hat <- data.frame(TimeHat = res$workGrid, BidHat = c(ypska.hat), # Auction.ID = factor(rep(1:N, each = length(res$workGrid)))) # # x11() # p0 <- ggplot (data = subset(df.ebay.price, is.element(Auction.ID, 1:5)), aes (x = PriceTime, y = PriceAmount, colour = Auction.ID)) # p0 <- p0 + geom_point (size = 2) # p0 <- p0 + geom_line (data = subset(df.ebay.hat, is.element(Auction.ID, 1:5)), aes (x = TimeHat, y = BidHat)) # p0 <- p0 + labs (x = "Price Time [days]", y = "Price Amount") # p0 <- p0 + theme_bw () # print (p0) ### proposed source("functionsM7777.R") # mean estimation df <- df.ebay.price colnames(df) <- c("ID", "PriceAmount", "time") mu.hat <- mu.estimate(0.9, df$time, df$PriceAmount) # plot x11() x <- sort(unique(df$time)) plot(x, mu.hat, type = "l", col = "red") points(df$time, df$Price, pch = 20) # ggplot # df.mu <- data.frame(est = c(mu.hat), Time = x) # # x11() # p0 <- ggplot (data = df, aes (x = time, y = PriceAmount)) # p0 <- p0 + geom_point (colour = "black") # p0 <- p0 + geom_line(data = df.mu, aes (x = Time, y = est), colour = 2, size = 1) # p0 <- p0 + labs (x = "Time", y = "Mean Function Estimate") # p0 <- p0 + theme_bw () # print (p0) # covariance estimation # For 41. auction is just 1 observation, thus we put it away tecka <- tecka[-41] ypska <- ypska[-41] cov.hat.list <- cov.hat.bks(tecka, ypska, h.mu = 0.9, h.cov = 2.5) t0 <- cov.hat.list$t mu.hat <- cov.hat.list$mu.hat cov.hat <- cov.hat.list$cov.hat zz <- matrix(unlist(cov.hat), nrow = length(t0)) # plot covariance open3d() persp3d(t0, t0, zz, col = "grey") # FPCA ypska.hat <- data.fpca(tecka, ypska, mu.hat = mu.hat, Cmat = zz, t0 = t0, K = 6) # plot matplot(t0, ypska.hat[, 1:5], type = "l") # ggplot # df.ebay.hat <- data.frame(TimeHat = t0, BidHat = c(ypska.hat), # Auction.ID = factor(rep(1:length(tecka), each = length(t0)))) # # x11() # p0 <- ggplot (data = subset(df.ebay.price, is.element(Auction.ID, 1:5)), aes (x = PriceTime, y = PriceAmount, colour = Auction.ID)) # p0 <- p0 + geom_point (size = 2) # p0 <- p0 + geom_line (data = subset(df.ebay.hat, is.element(Auction.ID, 1:5)), aes (x = TimeHat, y = BidHat)) # p0 <- p0 + labs (x = "Price Time [days]", y = "Price Amount") # p0 <- p0 + theme_bw () # print (p0) ####################### # Problems # ####################### ## Motor Oil Data load("motoroil.RData") # plot the data x11() p0 <- ggplot (data = df.motor, aes (x = Mh, y = Fe, colour = id)) p0 <- p0 + geom_point (size = 2) p0 <- p0 + geom_line (linetype = "dashed") p0 <- p0 + labs (x = "Motor Time [hrs]", y = "Fe") p0 <- p0 + theme_bw () p0 <- p0 + facet_wrap( . ~ trial, ncol = 2) print (p0) N <- 29 tecka <- vector("list", N) ypska <- vector("list", N) kombinace <- ddply (df.motor, .(id), summarise, n = length(id)) tecka <- apply (kombinace, 1, function (x) { a <- subset (df.motor, id == x[1]) t <- a$Mh ind <- order(t) t <- t[ind] y <- a$Fe[ind] if (any(duplicated(t))) { ind <- which(duplicated(t)) t <- t[- ind] } return(t) }) ypska <- apply (kombinace, 1, function (x) { a <- subset (df.motor, id == x[1]) t <- a$M ind <- order(t) t <- t[ind] y <- a$Fe[ind] if (any(duplicated(t))) { ind <- which(duplicated(t)) y <- y[- ind] } return(y) }) ind <- ddply(df.motor, .(id, Mh), transform, dup = !duplicated(Mh)) df <- subset(ind, dup == TRUE)[, 1:4] colnames(df) <- c("ID", "time", "Fe", "trial") zaloha <- df$trial levels(df$trial)[4] <- "4" ulozto <- vector("list", 4) ulozto.fpca <- vector("list", 4) h.mu <- c(30, 30, 150, 90) h.cov <- c(420, 420, 850, 900) for (k in 1:4){ df.trial.k <- subset(df, trial == k) ttt <- ddply(df.trial.k, .(ID), summarise, n = length(time)) gut <- ttt$ID[which(ttt$n > 2)] df.trial.k.gut <- subset(df.trial.k, is.element(ID, gut)) # fdapace # prevod z data.frame na list abc <- ddply(df.trial.k.gut, .(ID), summarise, tecka = list(time), ypska = list(Fe)) teckak <- abc$tecka ypskak <- abc$ypska res <- FPCA(ypskak, teckak, optns = list(dataType = 'Sparse', maxK = 4)) lev <- as.double(unique(df.trial.k.gut$ID)) df.fpca <- data.frame(ID = rep(lev, each = length(res$workGrid)), time = res$workGrid, curve.est = c(t(fitted(res))), trial = k) ulozto.fpca[[k]] <- df.fpca # moje print(k) temp <- cov.hat.bks(teckak, ypskak, h.cov = h.cov[k], h.mu = h.mu[k]) cov.hat <- temp$cov.hat #+ diag(c(temp$var.hat)) t0 <- temp$t zz <- matrix(unlist(cov.hat), nrow = length(t0)) mu.hat <- temp$mu.hat ypska.hat <- data.fpca(teckak, ypskak, mu.hat = mu.hat, Cmat = zz, t0 = t0, K = 4) ypska.hat <- data.frame(ID = rep(lev, each = length(t0)), time = t0, mu.hat = mu.hat, trial = k, curve.est = c(ypska.hat)) ulozto[[k]] <- ypska.hat } data.hat.fpca <- data.frame() data.hat <- data.frame() for (k in 1:4){ data.hat <- rbind(data.hat, ulozto[[k]]) data.hat.fpca <- rbind(data.hat.fpca, ulozto.fpca[[k]]) } data.hat$trial <- as.factor(data.hat$trial) data.hat.fpca$trial <- as.factor(data.hat.fpca$trial) data.hat.fpca$ID <- as.factor(data.hat.fpca$ID) data.hat$ID <- as.factor(data.hat$ID) levels(data.hat$trial)[4] <- "4+" levels(data.hat.fpca$trial)[4] <- "4+" df$trial <- zaloha x11() p0 <- ggplot (data = df, aes (x = time, y = Fe, colour = ID)) p0 <- p0 + geom_point (size = 2) p0 <- p0 + geom_line (linetype = "dashed") p0 <- p0 + geom_line (data = data.hat, aes (x = time, y = curve.est)) + ylim(0, 60) # p0 <- p0 + geom_line (data = data.hat.fpca, aes (x = time, y = curve.est)) + ylim(0, 60) p0 <- p0 + labs (x = "Motor Time [hrs]", y = "Fe") p0 <- p0 + facet_wrap( ~ trial, nrow = 2, scales = "free_y") p0 <- p0 + guides(colour = FALSE) p0 <- p0 + theme_bw () print (p0) ################# # Put together ################# # x11() p0 <- ggplot (data = subset(df.motor, is.element(id, 1:10)), aes (x = Mh, y = Fe, colour = id)) p0 <- p0 + geom_point (size = 2) p0 <- p0 + geom_line (linetype = "dashed") p0 <- p0 + labs (x = "Motor Time [hrs]", y = "Fe") p0 <- p0 + theme_bw () print (p0)