Rの構文
ドットインストールで学んだ。
変数代入
<-
で、PHPとかの=
と同じ意味
> x <- 5
> x
5
> y <- 0
> ls()
[1] "x" "y"
> rm(x)
> z <- 5
> rm(y, z)
> ls()
character(0)
> NULL
NULL
> NA
[1] NA
> NaN
[1] NaN
> Inf
[1] Inf
四則演算
> 5 + 3
[1] 8
> 10 / 3
[1] 3.333333
> 10 %% 3
[1] 1
> 10 %/% 3
[1] 3
> 2 ^ 4
[1] 16
> cos(1)
[1] 0.5403023
> sqrt(2)
[1] 1.414214
> round(2.555)
[1] 3
> 'abbccccc'
[1] "abbccccc"
> paste('a', 'b', 'c')
[1] "a b c"
> paste('a', 'b', 'c', sep='')
[1] "abc"
> paste('a', 'b', 'c', sep='/')
[1] "a/b/c"
> x <- 5
> x
[1] 5
> as.character(x)
[1] "5"
> s <- "5"
> as.numeric()
numeric(0)
> as.numeric(s)
[1] 5
ベクトル
配列みたいだけど、配列とは違うらしい
> v <- c(1, 3, 5)
> v
[1] 1 3 5
> v[2]
[1] 3
> v[1]
[1] 1
> v[3]
[1] 5
> v[2] <- 10
> v[2]
[1] 10
> v
[1] 1 10 5
> v <- c("abc", "bdee")
> v <- c(TRUE, FALSE)
> v
[1] TRUE FALSE
> length(v)
[1] 2
> v <- 1:10
> v
[1] 1 2 3 4 5 6 7 8 9 10
> v <- 1:-10
> v
[1] 1 0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10
> v <- seq(1, 10)
> v
[1] 1 2 3 4 5 6 7 8 9 10
> v <- seq(1, 10, by=2)
> v
[1] 1 3 5 7 9
> v <- seq(1, 10, length=5)
> v
[1] 1.00 3.25 5.50 7.75 10.00
> v <- rep(1:5, times=3)
> v
[1] 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5
> v <- rep(1:5, length=10)
> v
[1] 1 2 3 4 5 1 2 3 4 5
ベクトルで掛け算など
> x <- c(1, 3, 5)
> y <- c(2, 3, 4)
> x * 2
[1] 2 6 10
> x * 10
[1] 10 30 50
> x - 1
[1] 0 2 4
> x - 10
[1] -9 -7 -5
> x + y
[1] 3 6 9
> x
[1] 1 3 5
> x * y
[1] 2 9 20
> x > y
[1] FALSE FALSE TRUE
> x < y
[1] TRUE FALSE FALSE
> x == y
[1] FALSE TRUE FALSE
> x != y
[1] TRUE FALSE TRUE
> x === y
エラー: 予想外の '=' です in "x ==="
> x %in% y
[1] FALSE TRUE FALSE
> union(x, y)
[1] 1 3 5 2 4
> intersect(x,y )
[1] 3
> intersect(x,y)
[1] 3
> setdiff(x, y)
[1] 1 5
> setequal(x, y)
[1] FALSE
因子ベクトル
> x <- c('S', 'M', 'L', 'M', 'L')
> x
[1] "S" "M" "L" "M" "L"
> x.fc <- factor(x)
> x.fc
[1] S M L M L
Levels: L M S
> levels(x.fc)
[1] "L" "M" "S"
> x.fc <- factor(x, levels=c("S", "M", "L"))
> x.fc
[1] S M L M L
Levels: S M L
> x.fc <- orderd(x, levels=c("S", "M", "L"))
エラー: 関数 "orderd" を見つけることができませんでした
> x.fc <- ordered(x, levels=c("S", "M", "L"))
> x.fc
[1] S M L M L
Levels: S < M < L
数列
> x <- matrix(c(1:6), nrow=3, ncol=2)
> x
[,1] [,2]
[1,] 1 4
[2,] 2 5
[3,] 3 6
> x <- matrix(c(1:6), nrow=3, ncol=2, byrow=TRUE)
> x
[,1] [,2]
[1,] 1 2
[2,] 3 4
[3,] 5 6
> x <- rbind(c(1,2), 3:4, 5:6)
> x
[,1] [,2]
[1,] 1 2
[2,] 3 4
[3,] 5 6
> x <- cbind(c(1,2), 3:4, 5:6)
> x
[,1] [,2] [,3]
[1,] 1 3 5
[2,] 2 4 6
> x + 1
[,1] [,2] [,3]
[1,] 2 4 6
[2,] 3 5 7
> x
[,1] [,2] [,3]
[1,] 1 3 5
[2,] 2 4 6
> 1 / x
[,1] [,2] [,3]
[1,] 1.0 0.3333333 0.2000000
[2,] 0.5 0.2500000 0.1666667
> dim(x)
[1] 2 3
> nrow(x)
[1] 2
> ncol(x)
[1] 3
> x[, 1]
[1] 1 2
> x[2, ]
[1] 2 4 6
> x[1, 2]
[1] 3
> x[1, 1:2]
[1] 1 3
> x[1, c(1:2)]
[1] 1 3
> x[1, c(1:3)]
[1] 1 3 5
> x[1, c(1, 3)]
[1] 1 5
> x[1, 2] <- 10
> x
[,1] [,2] [,3]
[1,] 1 10 5
[2,] 2 4 6
データフレーム
> x <- list(5:10, "abc", matrix(1:6, nrow=2, ncol=3))
> x
[[1]]
[1] 5 6 7 8 9 10
[[2]]
[1] "abc"
[[3]]
[,1] [,2] [,3]
[1,] 1 3 5
[2,] 2 4 6
> x[1]
[[1]]
[1] 5 6 7 8 9 10
> x[[1]]
[1] 5 6 7 8 9 10
> x[[3]][1, 2]
[1] 3
> x[[2]][1, 2]
x[[2]][1, 2] でエラー: 次元数が正しくありません
> x[[2]][1]
[1] "abc"
> x[[2]]
[1] "abc"
> x[2]
[[1]]
[1] "abc"
> x <- data.frame()
> x <- data.frame(SIZE=c("M", "L", "S", 'L', 'M'), SALES=c(1,2,1,3,1))
> x
SIZE SALES
1 M 1
2 L 2
3 S 1
4 L 3
5 M 1
> x[1,2]
[1] 1
> x$SIZE
[1] M L S L M
Levels: L M S
> x$SALES
[1] 1 2 1 3 1
CSVファイルを読み込んで色々試す
> nba_2013 <- read.csv("~/nba_2013.csv", header=FALSE)
> View(nba_2013)
> nba_2013 <- read.csv("~/nba_2013.csv", header=FALSE)
> View(nba_2013)
> sum(nba_2013$V5)
Summary.factor(c(83L, 59L, 79L, 48L, 70L, 51L, 40L, 65L, 61L, でエラー:
‘sum’ は因子に対しては無意味です
> sum(nba_2013$V6)
Summary.factor(c(81L, 1L, 14L, 5L, 69L, 25L, 13L, 64L, 13L, 80L, でエラー:
‘sum’ は因子に対しては無意味です
> max(nba_2013$V5)
Summary.factor(c(83L, 59L, 79L, 48L, 70L, 51L, 40L, 65L, 61L, でエラー:
‘max’ は因子に対しては無意味です
> max(nba_2013$gs)
[1] -Inf
警告メッセージ:
max(nba_2013$gs) で: max の引数に有限な値がありません: -Inf を返します
> nba_2013 <- read.csv("~/nba_2013.csv", header=TRUE)
> View(nba_2013)
> sum(nba_2013$g)
[1] 25615
> max(nba_2013$g)
[1] 83
> min(nba_20134)
エラー: オブジェクト 'nba_20134' がありません
> min(nba_2013$gs)
[1] 0
> min(nba_2013$g)
[1] 1
> median(nba_2013$g)
[1] 61
> sd(nba_2013$g)
[1] 25.32271
> mean(nba_2013$x3p.)
[1] NA
> mean(nba_2013$x3p., na.rm=TRUE)
[1] 0.2851112
> summary(nba_2013$g)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.00 32.00 61.00 53.25 76.00 83.00
> summary(nba_2013)
player pos age bref_team_id g gs
A.J. Price : 1 C : 90 Min. :19.00 TOT : 63 Min. : 1.00 Min. : 0.00
Aaron Brooks : 1 F : 1 1st Qu.:23.00 NOP : 18 1st Qu.:32.00 1st Qu.: 0.00
Aaron Gray : 1 G : 1 Median :26.00 OKC : 17 Median :61.00 Median :10.00
Adonis Thomas: 1 PF: 96 Mean :26.51 LAC : 16 Mean :53.25 Mean :25.57
Al Harrington: 1 PG: 85 3rd Qu.:29.00 LAL : 16 3rd Qu.:76.00 3rd Qu.:54.00
Al Horford : 1 SF: 99 Max. :39.00 UTA : 16 Max. :83.00 Max. :82.00
(Other) :475 SG:109 (Other):335
mp fg fga fg. x3p x3pa
Min. : 1 Min. : 0.0 Min. : 0.0 Min. :0.0000 Min. : 0.00 Min. : 0.0
1st Qu.: 388 1st Qu.: 47.0 1st Qu.: 110.0 1st Qu.:0.4005 1st Qu.: 0.00 1st Qu.: 3.0
Median :1141 Median :146.0 Median : 332.0 Median :0.4380 Median : 16.00 Median : 48.0
Mean :1237 Mean :192.9 Mean : 424.5 Mean :0.4364 Mean : 39.61 Mean :110.1
3rd Qu.:2016 3rd Qu.:307.0 3rd Qu.: 672.0 3rd Qu.:0.4795 3rd Qu.: 68.00 3rd Qu.:193.0
Max. :3122 Max. :849.0 Max. :1688.0 Max. :1.0000 Max. :261.00 Max. :615.0
NA's :2
x3p. x2p x2pa x2p. efg. ft
Min. :0.0000 Min. : 0.0 Min. : 0.0 Min. :0.0000 Min. :0.0000 Min. : 0.00
1st Qu.:0.2344 1st Qu.: 31.0 1st Qu.: 67.0 1st Qu.:0.4347 1st Qu.:0.4510 1st Qu.: 16.00
Median :0.3310 Median :110.0 Median : 227.0 Median :0.4745 Median :0.4880 Median : 53.00
Mean :0.2851 Mean :153.3 Mean : 314.3 Mean :0.4669 Mean :0.4808 Mean : 91.21
3rd Qu.:0.3750 3rd Qu.:230.0 3rd Qu.: 459.0 3rd Qu.:0.5137 3rd Qu.:0.5260 3rd Qu.:126.00
Max. :1.0000 Max. :706.0 Max. :1408.0 Max. :1.0000 Max. :1.0000 Max. :703.00
NA's :67 NA's :3 NA's :2
fta ft. orb drb trb ast
Min. : 0.0 Min. :0.0000 Min. : 0.00 Min. : 0.0 Min. : 0.0 Min. : 0.0
1st Qu.: 22.0 1st Qu.:0.6540 1st Qu.: 12.00 1st Qu.: 43.0 1st Qu.: 55.0 1st Qu.: 20.0
Median : 73.0 Median :0.7510 Median : 35.00 Median :135.0 Median : 168.0 Median : 65.0
Mean :120.6 Mean :0.7224 Mean : 55.81 Mean :162.8 Mean : 218.6 Mean :112.5
3rd Qu.:179.0 3rd Qu.:0.8210 3rd Qu.: 73.00 3rd Qu.:230.0 3rd Qu.: 310.0 3rd Qu.:152.0
Max. :805.0 Max. :1.0000 Max. :440.00 Max. :783.0 Max. :1114.0 Max. :721.0
NA's :20
stl blk tov pf pts season
Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0.0 Min. : 0.0 2013-2014:481
1st Qu.: 9.00 1st Qu.: 4.0 1st Qu.: 21.00 1st Qu.: 44.0 1st Qu.: 115.0
Median : 32.00 Median : 14.0 Median : 58.00 Median :104.0 Median : 401.0
Mean : 39.28 Mean : 24.1 Mean : 71.86 Mean :105.9 Mean : 516.6
3rd Qu.: 60.00 3rd Qu.: 32.0 3rd Qu.:108.00 3rd Qu.:158.0 3rd Qu.: 821.0
Max. :191.00 Max. :219.0 Max. :295.00 Max. :273.0 Max. :2593.0
season_end
Min. :2013
1st Qu.:2013
Median :2013
Mean :2013
3rd Qu.:2013
Max. :2013
> str(nba_2013)
'data.frame': 481 obs. of 31 variables:
$ player : Factor w/ 481 levels "A.J. Price","Aaron Brooks",..: 371 432 213 40 13 89 281 286 379 446 ...
$ pos : Factor w/ 7 levels "C","F","G","PF",..: 6 1 4 7 1 1 4 4 7 7 ...
$ age : int 23 20 27 28 25 25 28 24 38 32 ...
$ bref_team_id: Factor w/ 31 levels "ATL","BOS","BRK",..: 29 21 29 22 19 20 25 29 16 15 ...
$ g : int 63 81 53 73 56 46 69 65 73 55 ...
$ gs : int 0 20 12 73 30 2 69 2 9 28 ...
$ mp : int 847 1197 961 2552 951 330 2498 1072 1936 1278 ...
$ fg : int 66 93 143 464 136 33 652 134 240 204 ...
$ fga : int 141 185 275 1011 249 61 1423 300 543 413 ...
$ fg. : num 0.468 0.503 0.52 0.459 0.546 0.541 0.458 0.447 0.442 0.494 ...
$ x3p : int 4 0 0 128 0 0 3 2 116 11 ...
$ x3pa : int 15 0 0 300 1 0 15 13 309 47 ...
$ x3p. : num 0.267 NA NA 0.427 0 ...
$ x2p : int 62 93 143 336 136 33 649 132 124 193 ...
$ x2pa : int 126 185 275 711 248 61 1408 287 234 366 ...
$ x2p. : num 0.492 0.503 0.52 0.473 0.548 ...
$ efg. : num 0.482 0.503 0.52 0.522 0.546 0.541 0.459 0.45 0.549 0.507 ...
$ ft : int 35 79 76 274 56 26 296 33 105 76 ...
$ fta : int 53 136 119 336 67 30 360 50 116 121 ...
$ ft. : num 0.66 0.581 0.639 0.815 0.836 0.867 0.822 0.66 0.905 0.628 ...
$ orb : int 72 142 102 32 94 37 166 119 23 79 ...
$ drb : int 144 190 204 230 183 92 599 192 182 129 ...
$ trb : int 216 332 306 262 277 129 765 311 205 208 ...
$ ast : int 28 43 38 248 40 14 178 71 143 94 ...
$ stl : int 23 40 24 35 23 8 63 24 54 90 ...
$ blk : int 26 57 36 3 46 30 68 33 8 19 ...
$ tov : int 30 71 39 146 63 18 123 44 84 90 ...
$ pf : int 122 203 108 136 187 40 147 126 115 121 ...
$ pts : int 171 265 362 1330 328 92 1603 303 701 495 ...
$ season : Factor w/ 1 level "2013-2014": 1 1 1 1 1 1 1 1 1 1 ...
$ season_end : int 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
> date()
[1] "Fri Dec 18 19:24:06 2015"
> data()
> data(cars)
> cars
speed dist
1 4 2
2 4 10
3 7 4
4 7 22
5 8 16
6 9 10
7 10 18
8 10 26
9 10 34
10 11 17
11 11 28
12 12 14
13 12 20
14 12 24
15 12 28
16 13 26
17 13 34
18 13 34
19 13 46
20 14 26
21 14 36
22 14 60
23 14 80
24 15 20
25 15 26
26 15 54
27 16 32
28 16 40
29 17 32
30 17 40
31 17 50
32 18 42
33 18 56
34 18 76
35 18 84
36 19 36
37 19 46
38 19 68
39 20 32
40 20 48
41 20 52
42 20 56
43 20 64
44 22 66
45 23 54
46 24 70
47 24 92
48 24 93
49 24 120
50 25 85
> str(cars)
'data.frame': 50 obs. of 2 variables:
$ speed: num 4 4 7 7 8 9 10 10 10 11 ...
$ dist : num 2 10 4 22 16 10 18 26 34 17 ...
> summary(cars)
speed dist
Min. : 4.0 Min. : 2.00
1st Qu.:12.0 1st Qu.: 26.00
Median :15.0 Median : 36.00
Mean :15.4 Mean : 42.98
3rd Qu.:19.0 3rd Qu.: 56.00
Max. :25.0 Max. :120.00
> hist(cars$speed)
> barplot(cars$speed)
> plot(cars$speed, cars$dist)
> cor(cars$speed, cars$dist)
[1] 0.8068949