getwd()
setwd("~/Data")
setwd("...//Data")
Tento přístup není příliš efektivní.
Proč bychom měli pracovat “projektovým způsobem”?
pools = read.csv("swimming_pools.csv")
str(pools)
## 'data.frame': 253 obs. of 8 variables:
## $ Name : Factor w/ 251 levels "A.B.C. Swim Centre",..: 3 20 24 41 44 59 69 77 83 85 ...
## $ Address : Factor w/ 228 levels "","1 Aquatic Drive",..: 153 1 184 188 1 187 68 14 149 180 ...
## $ Suburb : Factor w/ 192 levels "Albert Park",..: 29 11 13 75 29 44 52 58 68 71 ...
## $ Postcode : int 3083 3875 3354 3046 3083 3064 3108 3095 3199 3220 ...
## $ State : Factor w/ 2 levels "VIC","VIC ": 1 1 1 1 1 1 1 1 1 1 ...
## $ Business.Category: Factor w/ 23 levels "Australian Rules Football Club,Swimming Club",..: 20 20 20 20 20 20 20 20 20 20 ...
## $ LGA : Factor w/ 133 levels "","Alpine","Ararat (RC)",..: 115 26 4 82 114 116 60 89 27 41 ...
## $ Region : Factor w/ 21 levels "","Barwon S/W",..: 17 7 9 17 17 17 5 17 18 2 ...
pools = read.csv("swimming_pools.csv",
stringsAsFactors = FALSE)
str(pools)
## 'data.frame': 253 obs. of 8 variables:
## $ Name : chr "Aquabear AUSSI Masters" "Bairnsdale Amateur Swimming Club" "Ballarat GCO Swim Club" "Broadmeadows Swimming Clubs" ...
## $ Address : chr "La Trobe University Sports Centre" "" "PO Box 1709" "PO Box 605" ...
## $ Suburb : chr "Bundoora" "Bairnsdale" "Ballarat" "Glenroy" ...
## $ Postcode : int 3083 3875 3354 3046 3083 3064 3108 3095 3199 3220 ...
## $ State : chr "VIC" "VIC" "VIC" "VIC" ...
## $ Business.Category: chr "Swimming Club" "Swimming Club" "Swimming Club" "Swimming Club" ...
## $ LGA : chr "Whittlesea,Darebin,Banyule" "East Gippsland" "Ballarat" "Moreland" ...
## $ Region : chr "North and West Metropolitan Region" "Gippsland" "Grampians" "North and West Metropolitan Region" ...
hotdogs_1 = read.delim("hotdogs_1.txt",
header = TRUE)
hotdogs_2 = read.delim("hotdogs_2.txt",
header = FALSE,
col.names = c("type", "calories", "sodium"))
summary(hotdogs_1)
## Type Calories Sodium
## Beef :20 Min. : 86.0 Min. :144.0
## Meat :17 1st Qu.:132.0 1st Qu.:362.5
## Poultry:17 Median :145.0 Median :405.0
## Mean :145.4 Mean :424.8
## 3rd Qu.:172.8 3rd Qu.:503.5
## Max. :195.0 Max. :645.0
str(hotdogs_1)
## 'data.frame': 54 obs. of 3 variables:
## $ Type : Factor w/ 3 levels "Beef","Meat",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Calories: int 186 181 176 149 184 190 158 139 175 148 ...
## $ Sodium : int 495 477 425 322 482 587 370 322 479 375 ...
str(hotdogs_2)
## 'data.frame': 54 obs. of 3 variables:
## $ type : Factor w/ 3 levels "Beef","Meat",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ calories: int 186 181 176 149 184 190 158 139 175 148 ...
## $ sodium : int 495 477 425 322 482 587 370 322 479 375 ...
Cal <- hotdogs_1[which.min(hotdogs_1$Calories), ]
Cal
## Type Calories Sodium
## 50 Poultry 86 358
Sod = hotdogs_1[which.max(hotdogs_1$Sodium), ]
install.packages("readxl")
library(readxl)
excel_sheets() # Výčet listů v daném excelovském (.xls, .xlsx) souboru
read_excel() # Načtení souboru excelovského formátu
excel_sheets("latitude.xlsx")
## [1] "1700" "1900" "1900_N"
latitude_1 = read_excel("latitude.xlsx",
sheet = "1700")
latitude_1
## # A tibble: 246 x 2
## country latitude_1700
## <chr> <dbl>
## 1 Afghanistan 34.6
## 2 Akrotiri and Dhekelia 34.6
## 3 Albania 41.3
## 4 Algeria 36.7
## 5 American Samoa -14.3
## 6 Andorra 42.5
## 7 Angola -8.84
## 8 Anguilla 18.2
## 9 Antigua and Barbuda 17.1
## 10 Argentina -36.7
## # ... with 236 more rows
latitude_2 = read_excel("latitude.xlsx",
sheet = 2)
latitude_2
## # A tibble: 246 x 2
## country latitude_1900
## <chr> <dbl>
## 1 Afghanistan 34.6
## 2 Akrotiri and Dhekelia 34.6
## 3 Albania 41.3
## 4 Algeria 36.7
## 5 American Samoa -14.3
## 6 Andorra 42.5
## 7 Angola -8.84
## 8 Anguilla 18.2
## 9 Antigua and Barbuda 17.1
## 10 Argentina -36.7
## # ... with 236 more rows
lat_list = list(latitude_1, latitude_2)
read_excel()
.
latitude_3 = read_excel("latitude.xlsx",
sheet = 3,
col_names = FALSE)
## New names:
## * `` -> ...1
## * `` -> ...2
latitude_3
## # A tibble: 246 x 2
## ...1 ...2
## <chr> <dbl>
## 1 Afghanistan 34.6
## 2 Akrotiri and Dhekelia 34.6
## 3 Albania 41.3
## 4 Algeria 36.7
## 5 American Samoa -14.3
## 6 Andorra 42.5
## 7 Angola -8.84
## 8 Anguilla 18.2
## 9 Antigua and Barbuda 17.1
## 10 Argentina -36.7
## # ... with 236 more rows
latitude_4 = read_excel("latitude.xlsx",
sheet = 3,
col_names = c("country", "latitude"))
latitude_4
## # A tibble: 246 x 2
## country latitude
## <chr> <dbl>
## 1 Afghanistan 34.6
## 2 Akrotiri and Dhekelia 34.6
## 3 Albania 41.3
## 4 Algeria 36.7
## 5 American Samoa -14.3
## 6 Andorra 42.5
## 7 Angola -8.84
## 8 Anguilla 18.2
## 9 Antigua and Barbuda 17.1
## 10 Argentina -36.7
## # ... with 236 more rows
summary(latitude_3)
## ...1 ...2
## Length:246 Min. :-51.750
## Class :character 1st Qu.: 2.557
## Mode :character Median : 16.755
## Mean : 18.026
## 3rd Qu.: 39.051
## Max. : 78.000
## NA's :4
summary(latitude_4)
## country latitude
## Length:246 Min. :-51.750
## Class :character 1st Qu.: 2.557
## Mode :character Median : 16.755
## Mean : 18.026
## 3rd Qu.: 39.051
## Max. : 78.000
## NA's :4
read_excel("latitude.xlsx",
skip = 15)
## # A tibble: 231 x 2
## Azerbaijan `40.351999999999997`
## <chr> <dbl>
## 1 Bahamas 24.7
## 2 Bahrain 26.0
## 3 Bangladesh 23.9
## 4 Barbados 13.2
## 5 Belarus 53.5
## 6 Belgium 50.8
## 7 Belize 17.8
## 8 Benin 6.36
## 9 Bermuda 32.2
## 10 Bhutan 27.5
## # ... with 221 more rows
read_excel("latitude.xlsx",
skip = 15,
col_names = FALSE)
## New names:
## * `` -> ...1
## * `` -> ...2
## # A tibble: 232 x 2
## ...1 ...2
## <chr> <dbl>
## 1 Azerbaijan 40.4
## 2 Bahamas 24.7
## 3 Bahrain 26.0
## 4 Bangladesh 23.9
## 5 Barbados 13.2
## 6 Belarus 53.5
## 7 Belgium 50.8
## 8 Belize 17.8
## 9 Benin 6.36
## 10 Bermuda 32.2
## # ... with 222 more rows
latitude_all <- cbind(latitude_1, latitude_2[-1])
tail(latitude_all)
## country latitude_1700 latitude_1900
## 241 Virgin Islands (U.S.) 17.736 17.736
## 242 Yemen, Rep. 15.228 15.228
## 243 Yugoslavia NA NA
## 244 Zambia -12.942 -12.942
## 245 Zimbabwe -17.876 -17.876
## 246 Åland 60.000 60.000
latitude_all_clean = na.omit(latitude_all)
summary(latitude_all_clean)
## country latitude_1700 latitude_1900
## Length:242 Min. :-51.750 Min. :-51.750
## Class :character 1st Qu.: 2.557 1st Qu.: 2.557
## Mode :character Median : 16.755 Median : 16.755
## Mean : 18.026 Mean : 18.026
## 3rd Qu.: 39.051 3rd Qu.: 39.051
## Max. : 78.000 Max. : 78.000
library(foreign)
demo_1 = read.spss("international.sav",
to.data.frame = TRUE)
## re-encoding from CP1252
head(demo_1)
## id country contint m_illit f_illit lifeexpt gdp
## 1 1 Argentina Americas 3.0 3.0 16 3375
## 2 2 Benin Africa 45.2 74.5 7 521
## 3 3 Burundi Africa 33.2 48.1 5 86
## 4 4 Chile Americas 4.2 4.4 14 4523
## 5 5 Dominican Republic Americas 12.0 12.7 12 2408
## 6 6 El Salvador Americas 17.6 22.9 11 2302
Jak nastavit “value labels” z SPSS jako “factors” v R?
Skrze argument “se.value.labels” v rámci příkazu “read.spss()”. Tento argument upřesňuje, zda mají být “value labels” konvertovány do R jako “factors”.
Načtení dat
demo_2 = read.spss("international.sav",
to.data.frame = TRUE,
use.value.labels = FALSE)
## re-encoding from CP1252
head(demo_2)
## id country contint m_illit f_illit lifeexpt gdp
## 1 1 Argentina 2 3.0 3.0 16 3375
## 2 2 Benin 1 45.2 74.5 7 521
## 3 3 Burundi 1 33.2 48.1 5 86
## 4 4 Chile 2 4.2 4.4 14 4523
## 5 5 Dominican Republic 2 12.0 12.7 12 2408
## 6 6 El Salvador 2 17.6 22.9 11 2302
Jak nastavit “value labels” z SPSS u “factors” v R u dílčích proměnných?
Summary demo_2$contint
summary(demo_2$contint)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 1 2 2 3 4
class(demo_2$contint)
## [1] "numeric"
demo_2$contint = as.factor(demo_2$contint)
summary(demo_2$contint)
## 1 2 3 4
## 9 5 3 3
class(demo_2$contint)
## [1] "factor"
continents = c("Africa", "Americas", "Asia", "Europe")
demo_2$contint = factor(demo_2$contint,
levels = c(1, 2, 3, 4),
labels = continents)
summary(demo_2$contint)
## Africa Americas Asia Europe
## 9 5 3 3
Packages (n.d.) Packages. In Quick-R. Staženo dne 2. 10. 2016 z http://www.statmethods.net/interface/packages.html
Prostý databázový soubor. (n.d.). In Wikipedia. Staženo dne 2. 10. 2016 z https://cs.wikipedia.org/wiki/Prost%C3%BD_datab%C3%A1zov%C3%BD_soubor