# ==============================================================================
# --------------------------------- SEMINAR 2 ----------------------------------
# ==============================================================================


# ----------------------------------- TASK 1 -----------------------------------


comp <- read.csv("Computers.csv", sep = ",")
summary(comp)
str(comp)

# load the price and ram columns into the special variables:
price <- 
ram <- 


# ..................................... A ......................................


# compute mean and median value of price data sample

# useful functions:
?mean
?median

mean_price <- 
median_price <- 


# ..................................... B ......................................


# create your function to compute quantiles in R
# use it to compute quartiles of price

# HINT: explore descriptive statistics pdf-file in IS outline

# useful functions: 
?length
?sort
?ceiling

my_quantile <- function(sample, a) {
  n <-          # number of observations
  x <-          # sorted data sample
  c <-          # index
  if (c == n * a) {
    q <-        # choose the right observation from x
  } else {
    q <- 
  }
  return(q)
}

my_quantile(sample = price, a = 0.25)
my_quantile(price, 0.5)
my_quantile(price, 0.75)

# Build-in R function:

# there are different definitions of quantiles, we use type = 2
# in lecture slides there is type = 1
(quant <- quantile(price, probs = 0.5, type = 2)) # median = 0.5-quantile
(quant <- quantile(price, probs = c(0, 0.25, 0.5, 0.75, 1), type = 2))


# ..................................... C ......................................


# create your R functions to compute trimmed, and winsorized mean
# compute 0.1-trimmed, and 0.1-winsorized mean of price

# HINT: explore descriptive statistics pdf-file in IS outline

# useful functions:
?floor

my_trimmed_mean <- function(x, k) {
  n <-          # number of observations
  x <-          # sorted data sample 
  c <-          # index
  x_trimmed <- x[(c + 1):(n - c)]
  return(mean(x_trimmed))
}
my_trimmed_mean(price, 0.1)

# build-in R function:

mean(price, trim = 0.1)

my_winsorized_mean <- function(x, k) {
  n <-          # number of observations
  x <-          # sorted data sample 
  c <-          # index
  x_new <- c()  # choose and define the right elements of x
  return(mean(x_new))
}
my_winsorized_mean(price, 0.1)


# ..................................... D ......................................


# compute following characteristics of price variable

# use the formulas from the descriptive statistics pdf-file in IS outline

# useful functions:
?sum
?max
?min

n <- length(price)
variance <- 
s <-              # standard deviation
range <- 
IQR <-            # interquartile range
MAD <- 

skew <- 
kur <- 


# ..................................... E ......................................


# create a boxplot for price variable

# HINT: apply boxplot() function to the price variable:

b <- 

# try it again with the specification of main input and col input:

b <- boxplot(, main = "Boxplot of price", col = "yellow")

b$stats # are you able to interpret these values?

# HINT:
min(price)
quant[2] - 1.5 * IQR # lower than minimum: it is not the lower whisker

quant[2]
median(price)
quant[4]

quant[4] + 1.5 * IQR # lower than maximum: it is the upper whisker (nearly)
max(price)


# ..................................... F ......................................


# create a histogram for price

hist(price, col = "red")

# find out how to change the number of cells of histogram
# or how to specify the breaks of intervals:

# HINT: explore the BREAKS input parameter of hist()

?hist
hist(price, col = "red", breaks = )

# Add kernel density estimation into the histogram

?density      # kernel density estimation build-in function
?lines

d <-          # apply density() function to price

# plot the histogram together with the density line
# HINT: use lines() command for d variable after you plot the histogram


# ..................................... G ......................................


# create a table of relative frequencies for different RAM sizes:

# useful functions:
?table
?nrow

# HINT: first apply table() to the RAM variable,
# then divide it by the number of raws:
freq <- 
relative_freq <- freq / 

# gini index, entropy:
gini <- 1 - sum(relative_freq^2)
entropy <- - sum(relative_freq * log(relative_freq))

# create pie chart and bar chart for the RAM size:

# HINT: apply these functions to the relative_freq or freq variables:
# change the color of barplot (col input)
?pie
?barplot


# ..................................... H ......................................


# create a boxplot of price for each RAM size:

b <- boxplot(price ~ ram, main = "Boxplot of price", col = 1:6)


# ..................................... I ......................................


# create a histogram of price for each RAM size:

par(mfrow = c(2, 3)) # divides the plot into 6 sections

# fill the 6 histograms commands, you can use for cycle if you want

par(mfrow = c(1, 1)) # returns the setting of sections back


# ----------------------------------- TASK 2 -----------------------------------


# Download any colored image in jpg format. Load it into R using jpeg
# package command readJPEG. Create empty plot a insert the image into it
# using rasterImage command. From the load data matrix corresponding
# to your figure, create three histograms for red, green, and blue color. Add
# kernel density estimation to each histogram. Place all three histograms
# into one figure.

# install.packages("jpeg")
library(jpeg)
img <- readJPEG("img.jpg", native = FALSE)

?rasterImage

par(mar = c(0, 0, 0, 0)) # set the plot margins to zero
plot(1:2, type = 'n', xlab = "", ylab = "", xaxt = 'n', yaxt = 'n')
# type NONE prepares the EMPTY plot
rasterImage(img, 1, 1, 2, 2) # draws the image itself

str(img)
r <- img[, , 1]
g <- img[, , 2]
b <- img[, , 3]

par(mfrow = c(1, 3), mar = c(3, 2, 3, 1))

# fill the histogram commands for each color

par(mfrow = c(1, 1), mar = c(5.1, 4.1, 4.1, 2.1)) # default setting