Tidyr
suppressWarnings(library(tidyr))
?gather
?spread
k<- gather(cases,"year","n",2:4)
kable(k)
FR |
2011 |
7000 |
DE |
2011 |
5800 |
US |
2011 |
15000 |
FR |
2012 |
6900 |
DE |
2012 |
6000 |
US |
2012 |
14000 |
FR |
2013 |
7000 |
DE |
2013 |
6200 |
US |
2013 |
13000 |
PollutionTable <- spread(pollution,"size","amount")
kable(PollutionTable)
Beijing |
121 |
56 |
London |
22 |
16 |
New York |
23 |
14 |
SepStorms<- separate(storms, date, c("year", "month", "day"), sep = "-")
kable(SepStorms)
Alberto |
110 |
1007 |
2000 |
08 |
03 |
Alex |
45 |
1009 |
1998 |
07 |
27 |
Allison |
65 |
1005 |
1995 |
06 |
03 |
Ana |
40 |
1013 |
1997 |
06 |
30 |
Arlene |
50 |
1010 |
1999 |
06 |
11 |
Arthur |
45 |
1010 |
1996 |
06 |
17 |
UniteStorms <- unite(SepStorms, "date", year, month, day, sep = "-")
kable(UniteStorms)
Alberto |
110 |
1007 |
2000-08-03 |
Alex |
45 |
1009 |
1998-07-27 |
Allison |
65 |
1005 |
1995-06-03 |
Ana |
40 |
1013 |
1997-06-30 |
Arlene |
50 |
1010 |
1999-06-11 |
Arthur |
45 |
1010 |
1996-06-17 |
Dplyr
suppressWarnings(library(dplyr))
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
suppressWarnings(library(nycflights13))
StormPressure<- select(storms, storm,pressure) #selects from "storms" data "storm and pressure"
select(storms,-storm) #sellects all except "storm"
## Source: local data frame [6 x 3]
##
## wind pressure date
## (int) (int) (date)
## 1 110 1007 2000-08-03
## 2 45 1009 1998-07-27
## 3 65 1005 1995-06-03
## 4 40 1013 1997-06-30
## 5 50 1010 1999-06-11
## 6 45 1010 1996-06-17
Avion<- select(planes,type:model)
kable(head(Avion))
Fixed wing multi engine |
EMBRAER |
EMB-145XR |
Fixed wing multi engine |
AIRBUS INDUSTRIE |
A320-214 |
Fixed wing multi engine |
AIRBUS INDUSTRIE |
A320-214 |
Fixed wing multi engine |
AIRBUS INDUSTRIE |
A320-214 |
Fixed wing multi engine |
EMBRAER |
EMB-145LR |
Fixed wing multi engine |
AIRBUS INDUSTRIE |
A320-214 |
filter(Avion,manufacturer == "EMBRAER")
## Source: local data frame [299 x 3]
##
## type manufacturer model
## (chr) (chr) (chr)
## 1 Fixed wing multi engine EMBRAER EMB-145XR
## 2 Fixed wing multi engine EMBRAER EMB-145LR
## 3 Fixed wing multi engine EMBRAER EMB-145XR
## 4 Fixed wing multi engine EMBRAER EMB-145XR
## 5 Fixed wing multi engine EMBRAER EMB-145XR
## 6 Fixed wing multi engine EMBRAER EMB-145XR
## 7 Fixed wing multi engine EMBRAER EMB-145XR
## 8 Fixed wing multi engine EMBRAER EMB-145XR
## 9 Fixed wing multi engine EMBRAER EMB-145XR
## 10 Fixed wing multi engine EMBRAER EMB-145XR
## .. ... ... ...
filter(planes, manufacturer == "EMBRAER", model %in% c("EMB-145XR","EMB-145LR"))
## Source: local data frame [218 x 9]
##
## tailnum year type manufacturer model engines
## (chr) (int) (chr) (chr) (chr) (int)
## 1 N10156 2004 Fixed wing multi engine EMBRAER EMB-145XR 2
## 2 N10575 2002 Fixed wing multi engine EMBRAER EMB-145LR 2
## 3 N11106 2002 Fixed wing multi engine EMBRAER EMB-145XR 2
## 4 N11107 2002 Fixed wing multi engine EMBRAER EMB-145XR 2
## 5 N11109 2002 Fixed wing multi engine EMBRAER EMB-145XR 2
## 6 N11113 2002 Fixed wing multi engine EMBRAER EMB-145XR 2
## 7 N11119 2002 Fixed wing multi engine EMBRAER EMB-145XR 2
## 8 N11121 2003 Fixed wing multi engine EMBRAER EMB-145XR 2
## 9 N11127 2003 Fixed wing multi engine EMBRAER EMB-145XR 2
## 10 N11137 2003 Fixed wing multi engine EMBRAER EMB-145XR 2
## .. ... ... ... ... ... ...
## Variables not shown: seats (int), speed (int), engine (chr)
Planes2 <- select(planes,manufacturer,engines,seats)
PlanesMut<- mutate(Planes2, PPlPerSeat = seats/engines)
kable(head(PlanesMut))
EMBRAER |
2 |
55 |
27.5 |
AIRBUS INDUSTRIE |
2 |
182 |
91.0 |
AIRBUS INDUSTRIE |
2 |
182 |
91.0 |
AIRBUS INDUSTRIE |
2 |
182 |
91.0 |
EMBRAER |
2 |
55 |
27.5 |
AIRBUS INDUSTRIE |
2 |
182 |
91.0 |
planes %>% summarise(median = median(seats), variance = var(seats),mean=mean(seats),sd=sd(seats),n=n())
## Source: local data frame [1 x 5]
##
## median variance mean sd n
## (dbl) (dbl) (dbl) (dbl) (int)
## 1 149 5425.055 154.3164 73.65497 3322
arrange(storms,wind)
## Source: local data frame [6 x 4]
##
## storm wind pressure date
## (chr) (int) (int) (date)
## 1 Ana 40 1013 1997-06-30
## 2 Alex 45 1009 1998-07-27
## 3 Arthur 45 1010 1996-06-17
## 4 Arlene 50 1010 1999-06-11
## 5 Allison 65 1005 1995-06-03
## 6 Alberto 110 1007 2000-08-03
The Pipe Operator
select(airports, name,alt)
## Source: local data frame [1,397 x 2]
##
## name alt
## (chr) (int)
## 1 Lansdowne Airport 1044
## 2 Moton Field Municipal Airport 264
## 3 Schaumburg Regional 801
## 4 Randall Airport 523
## 5 Jekyll Island Airport 11
## 6 Elizabethton Municipal Airport 1593
## 7 Williams County Airport 730
## 8 Finger Lakes Regional Airport 492
## 9 Shoestring Aviation Airfield 1000
## 10 Jefferson County Intl 108
## .. ... ...
airports %>% select(name,alt)
## Source: local data frame [1,397 x 2]
##
## name alt
## (chr) (int)
## 1 Lansdowne Airport 1044
## 2 Moton Field Municipal Airport 264
## 3 Schaumburg Regional 801
## 4 Randall Airport 523
## 5 Jekyll Island Airport 11
## 6 Elizabethton Municipal Airport 1593
## 7 Williams County Airport 730
## 8 Finger Lakes Regional Airport 492
## 9 Shoestring Aviation Airfield 1000
## 10 Jefferson County Intl 108
## .. ... ...
A<- airports %>% filter(alt >= 2000)
kable(head(A))
2G9 |
Somerset County Airport |
40.03887 |
-79.0150 |
2275 |
-5 |
A |
36U |
Heber City Municipal Airport |
40.48181 |
-111.4288 |
5637 |
-6 |
A |
4U9 |
Dell Flight Strip |
44.73575 |
-112.7200 |
6007 |
-7 |
A |
6S0 |
Big Timber Airport |
45.80639 |
-109.9811 |
4492 |
-7 |
A |
ABQ |
Albuquerque International Sunport |
35.04022 |
-106.6092 |
5355 |
-7 |
A |
AIA |
Alliance Municipal Airport |
42.05333 |
-102.8039 |
3931 |
-7 |
A |
Unit of Analysis
pollution %>% group_by(city) %>% summarise(mean = mean(amount), sum = sum(amount), n = n())
## Source: local data frame [3 x 4]
##
## city mean sum n
## (chr) (dbl) (dbl) (int)
## 1 Beijing 88.5 177 2
## 2 London 19.0 38 2
## 3 New York 18.5 37 2
planes %>% group_by(manufacturer) %>% summarise(mean=mean(seats),sd=sd(seats),n=n())
## Source: local data frame [35 x 4]
##
## manufacturer mean sd n
## (chr) (dbl) (dbl) (int)
## 1 AGUSTA SPA 8.0000 NA 1
## 2 AIRBUS 221.2024 81.4309017 336
## 3 AIRBUS INDUSTRIE 187.4025 23.8565154 400
## 4 AMERICAN AIRCRAFT INC 2.0000 0.0000000 2
## 5 AVIAT AIRCRAFT INC 2.0000 NA 1
## 6 AVIONS MARCEL DASSAULT 12.0000 NA 1
## 7 BARKER JACK L 2.0000 NA 1
## 8 BEECH 9.5000 0.7071068 2
## 9 BELL 8.0000 4.2426407 2
## 10 BOEING 175.1877 59.4688097 1630
## .. ... ... ... ...
Joining Data
Colors<-bind_cols(y,z)
kable(Colors)
kable(Colors)
Cr<- bind_rows(y, z)
kable(Cr)
Un<-union(y, z)
kable(Un)
In<-intersect(y, z)
kable(In)
Set<-setdiff(y, z)
kable(Set)
AName <- left_join(songs, artists, by = "name")
kable(AName)
Across the Universe |
John |
guitar |
Come Together |
John |
guitar |
Hello, Goodbye |
Paul |
bass |
Peggy Sue |
Buddy |
NA |
LFL<- left_join(songs2, artists2, by = c("first", "last"))
kable(LFL)
Across the Universe |
John |
Lennon |
guitar |
Come Together |
John |
Lennon |
guitar |
Hello, Goodbye |
Paul |
McCartney |
bass |
Peggy Sue |
Buddy |
Holly |
NA |
InnerN<- inner_join(songs, artists, by = "name")
kable(InnerN)
Across the Universe |
John |
guitar |
Come Together |
John |
guitar |
Hello, Goodbye |
Paul |
bass |
SemNa<- semi_join(songs, artists, by = "name")
kable(SemNa)
Across the Universe |
John |
Come Together |
John |
Hello, Goodbye |
Paul |
AJ<-anti_join(songs, artists, by = "name")
kable(AJ)