library(dplyr)
library(summarytools)
library(ggplot2)
Videogames <- read.csv("Video_Games_Sales_as_at_22_Dec_2016.csv")
view(dfSummary(Videogames))
table(Videogames$Genre,
Videogames$Rating)
AO E E10+ EC K-A M RP T
2 0 0 0 0 0 0 0 0
Action 1182 1 416 481 1 0 608 0 681
Adventure 857 0 162 68 2 0 99 0 115
Fighting 411 0 8 19 0 0 49 0 362
Misc 868 0 457 167 5 1 13 0 239
Platform 319 0 358 144 0 0 3 0 64
Puzzle 238 0 289 43 0 0 0 0 10
Racing 377 0 585 96 0 0 18 1 172
Role-Playing 723 0 84 111 0 0 162 0 420
Shooter 304 0 48 58 0 0 565 0 348
Simulation 305 0 326 48 0 0 5 0 190
Sports 839 0 1188 107 0 0 16 0 198
Strategy 344 0 70 78 0 2 25 2 162
glimpse(Videogames %>%
filter(Rating %in% c("E", "T")) %>%
droplevels(),
width = 50)
Observations: 6,952
Variables: 16
$ Name [3m[38;5;246m<fct>[39m[23m Wii Sports, Mario Kar...
$ Platform [3m[38;5;246m<fct>[39m[23m Wii, Wii, Wii, DS, Wi...
$ Year_of_Release [3m[38;5;246m<fct>[39m[23m 2006, 2008, 2009, 200...
$ Genre [3m[38;5;246m<fct>[39m[23m Sports, Racing, Sport...
$ Publisher [3m[38;5;246m<fct>[39m[23m Nintendo, Nintendo, N...
$ NA_Sales [3m[38;5;246m<dbl>[39m[23m 41.36, 15.68, 15.61, ...
$ EU_Sales [3m[38;5;246m<dbl>[39m[23m 28.96, 12.76, 10.93, ...
$ JP_Sales [3m[38;5;246m<dbl>[39m[23m 3.77, 3.79, 3.28, 6.5...
$ Other_Sales [3m[38;5;246m<dbl>[39m[23m 8.45, 3.29, 2.95, 2.8...
$ Global_Sales [3m[38;5;246m<dbl>[39m[23m 82.53, 35.52, 32.77, ...
$ Critic_Score [3m[38;5;246m<int>[39m[23m 76, 82, 80, 89, 58, 8...
$ Critic_Count [3m[38;5;246m<int>[39m[23m 51, 73, 73, 65, 41, 8...
$ User_Score [3m[38;5;246m<fct>[39m[23m 8, 8.3, 8, 8.5, 6.6, ...
$ User_Count [3m[38;5;246m<int>[39m[23m 322, 709, 192, 431, 1...
$ Developer [3m[38;5;246m<fct>[39m[23m "Nintendo", "Nintendo...
$ Rating [3m[38;5;246m<fct>[39m[23m E, E, E, E, E, E, E, ...
# Data
Videogames_Everyone_Teens <- Videogames %>%
filter(Rating %in% c("E", "T")) %>%
droplevels() # drop unused levels
# Plot
ggplot(Videogames_Everyone_Teens, aes(x = Genre, fill = Rating)) +
geom_bar(position = "dodge")
Videogames_Everyone_Teens_Bar_Plot <- Videogames %>%
filter(Rating %in% c("E", "T")) %>%
droplevels() %>%
ggplot(aes(x = Genre, fill = Rating)) +
geom_bar(position = "dodge") +
theme(axis.text.x = element_text(angle = 90))
Videogames_Everyone_Teens_Bar_Plot
GenreRating = table(Videogames_Everyone_Teens$Genre,
Videogames_Everyone_Teens$Rating)
GenreRating
E T
Action 416 681
Adventure 162 115
Fighting 8 362
Misc 457 239
Platform 358 64
Puzzle 289 10
Racing 585 172
Role-Playing 84 420
Shooter 48 348
Simulation 326 190
Sports 1188 198
Strategy 70 162
prop.table(GenreRating)
E T
Action 0.059838895 0.097957422
Adventure 0.023302647 0.016542002
Fighting 0.001150748 0.052071346
Misc 0.065736479 0.034378596
Platform 0.051495972 0.009205984
Puzzle 0.041570771 0.001438435
Racing 0.084148446 0.024741082
Role-Playing 0.012082854 0.060414269
Shooter 0.006904488 0.050057537
Simulation 0.046892980 0.027330265
Sports 0.170886076 0.028481013
Strategy 0.010069045 0.023302647
round(prop.table(GenreRating, 1), 3)
E T
Action 0.379 0.621
Adventure 0.585 0.415
Fighting 0.022 0.978
Misc 0.657 0.343
Platform 0.848 0.152
Puzzle 0.967 0.033
Racing 0.773 0.227
Role-Playing 0.167 0.833
Shooter 0.121 0.879
Simulation 0.632 0.368
Sports 0.857 0.143
Strategy 0.302 0.698
round(prop.table(GenreRating, 2), 2)
E T
Action 0.10 0.23
Adventure 0.04 0.04
Fighting 0.00 0.12
Misc 0.11 0.08
Platform 0.09 0.02
Puzzle 0.07 0.00
Racing 0.15 0.06
Role-Playing 0.02 0.14
Shooter 0.01 0.12
Simulation 0.08 0.06
Sports 0.30 0.07
Strategy 0.02 0.05
ggplot(Videogames_Everyone_Teens, aes(x = Genre, fill = Rating)) +
geom_bar(position = "fill") +
ylab("proportion")
Videogames_Everyone_Teens$Rating <- factor(Videogames_Everyone_Teens$Rating,
levels = c("E", "T"),
labels = c("Everyone", "Teen"))
ggplot(Videogames_Everyone_Teens,
aes(x = Rating)) +
geom_bar()
ggplot(Videogames_Everyone_Teens,
aes(x = Genre)) +
geom_bar() +
facet_wrap(~ Rating) +
theme(axis.text.x = element_text(angle = 90))
ggplot(Videogames_Everyone_Teens,
aes(x = Critic_Score)) +
geom_histogram() +
facet_wrap(~ Rating)
Shooter_Strategy_RPG <- filter(Videogames,
Genre %in% c("Shooter",
"Strategy",
"Role-Playing"))
glimpse(Shooter_Strategy_RPG,
width = 50)
Observations: 3,506
Variables: 16
$ Name [3m[38;5;246m<fct>[39m[23m Pokemon Red/Pokemon B...
$ Platform [3m[38;5;246m<fct>[39m[23m GB, NES, GB, DS, GBA,...
$ Year_of_Release [3m[38;5;246m<fct>[39m[23m 1996, 1984, 1999, 200...
$ Genre [3m[38;5;246m<fct>[39m[23m Role-Playing, Shooter...
$ Publisher [3m[38;5;246m<fct>[39m[23m Nintendo, Nintendo, N...
$ NA_Sales [3m[38;5;246m<dbl>[39m[23m 11.27, 26.93, 9.00, 6...
$ EU_Sales [3m[38;5;246m<dbl>[39m[23m 8.89, 0.63, 6.18, 4.4...
$ JP_Sales [3m[38;5;246m<dbl>[39m[23m 10.22, 0.28, 7.20, 6....
$ Other_Sales [3m[38;5;246m<dbl>[39m[23m 1.00, 0.47, 0.71, 1.3...
$ Global_Sales [3m[38;5;246m<dbl>[39m[23m 31.37, 28.31, 23.10, ...
$ Critic_Score [3m[38;5;246m<int>[39m[23m NA, NA, NA, NA, NA, N...
$ Critic_Count [3m[38;5;246m<int>[39m[23m NA, NA, NA, NA, NA, N...
$ User_Score [3m[38;5;246m<fct>[39m[23m , , , , , , 3.4, , , ...
$ User_Count [3m[38;5;246m<int>[39m[23m NA, NA, NA, NA, NA, N...
$ Developer [3m[38;5;246m<fct>[39m[23m "", "", "", "", "", "...
$ Rating [3m[38;5;246m<fct>[39m[23m , , , , , , M, , , M,...
ggplot(Shooter_Strategy_RPG,
aes(x = as.factor(Genre),
y = Critic_Score)) +
geom_boxplot()
ggplot(Shooter_Strategy_RPG,
aes(x = Critic_Score,
fill = as.factor(Genre))) +
geom_density(alpha = .3)
Videogames %>%
ggplot(aes(x = EU_Sales)) +
geom_histogram(binwidth = 0.01) +
xlim(c(0, 2)) +
ylim(0, 1750) +
ggtitle("Millions of sold copies videogames in the EU")
Videogames %>%
filter(Genre == "Sports") %>%
ggplot(aes(x = EU_Sales)) +
geom_histogram(binwidth = 0.1) +
xlim(c(0, 3)) +
ylim(0, 500) +
ggtitle("Histogram for the millions of sold copies of sport videogames in the EU")
Videogames %>%
ggplot(aes(x = 1, y = User_Count)) +
geom_boxplot()
Videogames_no_out <- Videogames %>%
mutate(User_Outliers = 1.5 * IQR(User_Count, na.rm = TRUE)) %>%
filter(User_Count <= User_Outliers) %>%
ggplot(aes(x = 1, y = User_Count)) +
geom_boxplot()
Videogames_no_out
Ubisoft_Reviews <- filter(Videogames, Developer == "Ubisoft") %>%
ggplot(aes(x = 1, y = User_Count)) +
geom_boxplot()
Ubisoft_Reviews
Videogames %>%
filter(Developer == "Ubisoft") %>%
group_by(Genre) %>%
summarize(round(mean(NA_Sales), 3),
median(NA_Sales))
Ubisoft <- Videogames %>%
filter(Developer == "Ubisoft")
Ubisoft %>%
ggplot(aes(x = Genre, y = NA_Sales)) +
geom_boxplot()
Data Wrangling with dplyr and tidyr Cheat Sheet
Wickham, H. (2009). ggplot2: Elegant Graphics for Data Analysis. Available online: http://moderngraphics11.pbworks.com/f/ggplot2-Book09hWickham.pdf.