読者です 読者をやめる 読者になる 読者になる

ITの隊長のブログ

ITの隊長のブログです。いや、まだ隊長と呼べるほどには至っていないけど、日々がんばります。CakePHPとPlayFrameworkを使って仕事しています。最近はAngular2をさわりはじめたお(^ω^ = ^ω^)

R言語を勉強してみた

R

スポンサードリンク

Rの構文

ドットインストールで学んだ。

変数代入

<-で、PHPとかの=と同じ意味

# 代入
> x <- 5
> x
5

> y <- 0

# 定義した変数の確認
> ls()
[1] "x" "y"

# 変数削除
> rm(x)

> z <- 5

# 複数削除
> rm(y, z)
> ls()
character(0) # 何もないって意味らしい


# 同じみNull
> NULL
NULL

# 欠損値
> NA
[1] NA

# 非数
> NaN
[1] NaN

# 無限大
> Inf
[1] Inf

四則演算

> 5 + 3
[1] 8
> 10 / 3
[1] 3.333333
> 10 %% 3
[1] 1
> 10 %/% 3
[1] 3
> 2 ^ 4
[1] 16
> cos(1)
[1] 0.5403023
> sqrt(2)
[1] 1.414214
> round(2.555)
[1] 3
> 'abbccccc'
[1] "abbccccc"
> paste('a', 'b', 'c')
[1] "a b c"
> paste('a', 'b', 'c', sep='')
[1] "abc"
> paste('a', 'b', 'c', sep='/')
[1] "a/b/c"
> x <- 5
> x
[1] 5
> as.character(x)
[1] "5"
> s <- "5"
> as.numeric()
numeric(0)
> as.numeric(s)
[1] 5

ベクトル

配列みたいだけど、配列とは違うらしい

> v <- c(1, 3, 5)
> v
[1] 1 3 5
> v[2]
[1] 3
> v[1]
[1] 1
> v[3]
[1] 5
> v[2] <- 10
> v[2]
[1] 10
> v
[1]  1 10  5
> v <- c("abc", "bdee")
> v <- c(TRUE, FALSE)
> v
[1]  TRUE FALSE
> length(v)
[1] 2
> v <- 1:10
> v
 [1]  1  2  3  4  5  6  7  8  9 10
> v <- 1:-10
> v
 [1]   1   0  -1  -2  -3  -4  -5  -6  -7  -8  -9 -10
> v <- seq(1, 10)
> v
 [1]  1  2  3  4  5  6  7  8  9 10
> v <- seq(1, 10, by=2)
> v
[1] 1 3 5 7 9
> v <- seq(1, 10, length=5)
> v
[1]  1.00  3.25  5.50  7.75 10.00
> v <- rep(1:5, times=3)
> v
 [1] 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5
> v <- rep(1:5, length=10)
> v
 [1] 1 2 3 4 5 1 2 3 4 5

ベクトルで掛け算など

> x <- c(1, 3, 5)
> y <- c(2, 3, 4)
> x * 2
[1]  2  6 10
> x * 10
[1] 10 30 50
> x - 1
[1] 0 2 4
> x - 10
[1] -9 -7 -5
> x + y
[1] 3 6 9
> x
[1] 1 3 5
> x * y
[1]  2  9 20
> x > y
[1] FALSE FALSE  TRUE
> x < y
[1]  TRUE FALSE FALSE
> x == y
[1] FALSE  TRUE FALSE
> x != y
[1]  TRUE FALSE  TRUE
> x === y
 エラー:  予想外の '=' です  in "x ==="
> x %in% y
[1] FALSE  TRUE FALSE
> union(x, y)
[1] 1 3 5 2 4
> intersect(x,y )
[1] 3
> intersect(x,y)
[1] 3
> setdiff(x, y)
[1] 1 5
> setequal(x, y)
[1] FALSE

因子ベクトル

> x <- c('S', 'M', 'L', 'M', 'L')
> x
[1] "S" "M" "L" "M" "L"
> x.fc <- factor(x)
> x.fc
[1] S M L M L
Levels: L M S
> levels(x.fc)
[1] "L" "M" "S"
> x.fc <- factor(x, levels=c("S", "M", "L"))
> x.fc
[1] S M L M L
Levels: S M L
> x.fc <- orderd(x, levels=c("S", "M", "L"))
 エラー:  関数 "orderd" を見つけることができませんでした 
> x.fc <- ordered(x, levels=c("S", "M", "L"))
> x.fc
[1] S M L M L
Levels: S < M < L

数列

> x <- matrix(c(1:6), nrow=3, ncol=2)
> x
     [,1] [,2]
[1,]    1    4
[2,]    2    5
[3,]    3    6
> x <- matrix(c(1:6), nrow=3, ncol=2, byrow=TRUE)
> x
     [,1] [,2]
[1,]    1    2
[2,]    3    4
[3,]    5    6
> x <- rbind(c(1,2), 3:4, 5:6)
> x
     [,1] [,2]
[1,]    1    2
[2,]    3    4
[3,]    5    6
> x <- cbind(c(1,2), 3:4, 5:6)
> x
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6
> x + 1
     [,1] [,2] [,3]
[1,]    2    4    6
[2,]    3    5    7
> x
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6
> 1 / x
     [,1]      [,2]      [,3]
[1,]  1.0 0.3333333 0.2000000
[2,]  0.5 0.2500000 0.1666667
> dim(x)
[1] 2 3
> nrow(x)
[1] 2
> ncol(x)
[1] 3

> x[, 1]
[1] 1 2
> x[2, ]
[1] 2 4 6
> x[1, 2]
[1] 3
> x[1, 1:2]
[1] 1 3
> x[1, c(1:2)]
[1] 1 3
> x[1, c(1:3)]
[1] 1 3 5
> x[1, c(1, 3)]
[1] 1 5
> x[1, 2] <- 10
> x
     [,1] [,2] [,3]
[1,]    1   10    5
[2,]    2    4    6

データフレーム

> x <- list(5:10, "abc", matrix(1:6, nrow=2, ncol=3))
> x
[[1]]
[1]  5  6  7  8  9 10

[[2]]
[1] "abc"

[[3]]
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6

> x[1]
[[1]]
[1]  5  6  7  8  9 10

> x[[1]]
[1]  5  6  7  8  9 10
> x[[3]][1, 2]
[1] 3
> x[[2]][1, 2]
 x[[2]][1, 2] でエラー:  次元数が正しくありません 
> x[[2]][1]
[1] "abc"
> x[[2]]
[1] "abc"
> x[2]
[[1]]
[1] "abc"
> x <- data.frame()
> x <- data.frame(SIZE=c("M", "L", "S", 'L', 'M'), SALES=c(1,2,1,3,1))
> x
  SIZE SALES
1    M     1
2    L     2
3    S     1
4    L     3
5    M     1
> x[1,2]
[1] 1
> x$SIZE
[1] M L S L M
Levels: L M S
> x$SALES
[1] 1 2 1 3 1

CSVファイルを読み込んで色々試す

> nba_2013 <- read.csv("~/nba_2013.csv", header=FALSE)
>   View(nba_2013)
> nba_2013 <- read.csv("~/nba_2013.csv", header=FALSE)
>   View(nba_2013)
> sum(nba_2013$V5)
 Summary.factor(c(83L, 59L, 79L, 48L, 70L, 51L, 40L, 65L, 61L,  でエラー: 
   ‘sum’ は因子に対しては無意味です 
> sum(nba_2013$V6)
 Summary.factor(c(81L, 1L, 14L, 5L, 69L, 25L, 13L, 64L, 13L, 80L,  でエラー: 
   ‘sum’ は因子に対しては無意味です 
> max(nba_2013$V5)
 Summary.factor(c(83L, 59L, 79L, 48L, 70L, 51L, 40L, 65L, 61L,  でエラー: 
   ‘max’ は因子に対しては無意味です 
> max(nba_2013$gs)
[1] -Inf
 警告メッセージ: 
 max(nba_2013$gs):   max の引数に有限な値がありません: -Inf を返します 
> nba_2013 <- read.csv("~/nba_2013.csv", header=TRUE)
>   View(nba_2013)
> sum(nba_2013$g)
[1] 25615
> max(nba_2013$g)
[1] 83
> min(nba_20134)
 エラー:  オブジェクト 'nba_20134' がありません 
> min(nba_2013$gs)
[1] 0
> min(nba_2013$g)
[1] 1
> median(nba_2013$g)
[1] 61
> sd(nba_2013$g)
[1] 25.32271
> mean(nba_2013$x3p.)
[1] NA
> mean(nba_2013$x3p., na.rm=TRUE)
[1] 0.2851112
> summary(nba_2013$g)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1.00   32.00   61.00   53.25   76.00   83.00 
> summary(nba_2013)
           player    pos           age         bref_team_id       g               gs       
 A.J. Price   :  1   C : 90   Min.   :19.00   TOT    : 63   Min.   : 1.00   Min.   : 0.00  
 Aaron Brooks :  1   F :  1   1st Qu.:23.00   NOP    : 18   1st Qu.:32.00   1st Qu.: 0.00  
 Aaron Gray   :  1   G :  1   Median :26.00   OKC    : 17   Median :61.00   Median :10.00  
 Adonis Thomas:  1   PF: 96   Mean   :26.51   LAC    : 16   Mean   :53.25   Mean   :25.57  
 Al Harrington:  1   PG: 85   3rd Qu.:29.00   LAL    : 16   3rd Qu.:76.00   3rd Qu.:54.00  
 Al Horford   :  1   SF: 99   Max.   :39.00   UTA    : 16   Max.   :83.00   Max.   :82.00  
 (Other)      :475   SG:109                   (Other):335                                  
       mp             fg             fga              fg.              x3p              x3pa      
 Min.   :   1   Min.   :  0.0   Min.   :   0.0   Min.   :0.0000   Min.   :  0.00   Min.   :  0.0  
 1st Qu.: 388   1st Qu.: 47.0   1st Qu.: 110.0   1st Qu.:0.4005   1st Qu.:  0.00   1st Qu.:  3.0  
 Median :1141   Median :146.0   Median : 332.0   Median :0.4380   Median : 16.00   Median : 48.0  
 Mean   :1237   Mean   :192.9   Mean   : 424.5   Mean   :0.4364   Mean   : 39.61   Mean   :110.1  
 3rd Qu.:2016   3rd Qu.:307.0   3rd Qu.: 672.0   3rd Qu.:0.4795   3rd Qu.: 68.00   3rd Qu.:193.0  
 Max.   :3122   Max.   :849.0   Max.   :1688.0   Max.   :1.0000   Max.   :261.00   Max.   :615.0  
                                                 NA's   :2                                        
      x3p.             x2p             x2pa             x2p.             efg.              ft        
 Min.   :0.0000   Min.   :  0.0   Min.   :   0.0   Min.   :0.0000   Min.   :0.0000   Min.   :  0.00  
 1st Qu.:0.2344   1st Qu.: 31.0   1st Qu.:  67.0   1st Qu.:0.4347   1st Qu.:0.4510   1st Qu.: 16.00  
 Median :0.3310   Median :110.0   Median : 227.0   Median :0.4745   Median :0.4880   Median : 53.00  
 Mean   :0.2851   Mean   :153.3   Mean   : 314.3   Mean   :0.4669   Mean   :0.4808   Mean   : 91.21  
 3rd Qu.:0.3750   3rd Qu.:230.0   3rd Qu.: 459.0   3rd Qu.:0.5137   3rd Qu.:0.5260   3rd Qu.:126.00  
 Max.   :1.0000   Max.   :706.0   Max.   :1408.0   Max.   :1.0000   Max.   :1.0000   Max.   :703.00  
 NA's   :67                                        NA's   :3        NA's   :2                        
      fta             ft.              orb              drb             trb              ast       
 Min.   :  0.0   Min.   :0.0000   Min.   :  0.00   Min.   :  0.0   Min.   :   0.0   Min.   :  0.0  
 1st Qu.: 22.0   1st Qu.:0.6540   1st Qu.: 12.00   1st Qu.: 43.0   1st Qu.:  55.0   1st Qu.: 20.0  
 Median : 73.0   Median :0.7510   Median : 35.00   Median :135.0   Median : 168.0   Median : 65.0  
 Mean   :120.6   Mean   :0.7224   Mean   : 55.81   Mean   :162.8   Mean   : 218.6   Mean   :112.5  
 3rd Qu.:179.0   3rd Qu.:0.8210   3rd Qu.: 73.00   3rd Qu.:230.0   3rd Qu.: 310.0   3rd Qu.:152.0  
 Max.   :805.0   Max.   :1.0000   Max.   :440.00   Max.   :783.0   Max.   :1114.0   Max.   :721.0  
                 NA's   :20                                                                        
      stl              blk             tov               pf             pts               season   
 Min.   :  0.00   Min.   :  0.0   Min.   :  0.00   Min.   :  0.0   Min.   :   0.0   2013-2014:481  
 1st Qu.:  9.00   1st Qu.:  4.0   1st Qu.: 21.00   1st Qu.: 44.0   1st Qu.: 115.0                  
 Median : 32.00   Median : 14.0   Median : 58.00   Median :104.0   Median : 401.0                  
 Mean   : 39.28   Mean   : 24.1   Mean   : 71.86   Mean   :105.9   Mean   : 516.6                  
 3rd Qu.: 60.00   3rd Qu.: 32.0   3rd Qu.:108.00   3rd Qu.:158.0   3rd Qu.: 821.0                  
 Max.   :191.00   Max.   :219.0   Max.   :295.00   Max.   :273.0   Max.   :2593.0                  
                                                                                                   
   season_end  
 Min.   :2013  
 1st Qu.:2013  
 Median :2013  
 Mean   :2013  
 3rd Qu.:2013  
 Max.   :2013  
               
> str(nba_2013)
'data.frame': 481 obs. of  31 variables:
 $ player      : Factor w/ 481 levels "A.J. Price","Aaron Brooks",..: 371 432 213 40 13 89 281 286 379 446 ...
 $ pos         : Factor w/ 7 levels "C","F","G","PF",..: 6 1 4 7 1 1 4 4 7 7 ...
 $ age         : int  23 20 27 28 25 25 28 24 38 32 ...
 $ bref_team_id: Factor w/ 31 levels "ATL","BOS","BRK",..: 29 21 29 22 19 20 25 29 16 15 ...
 $ g           : int  63 81 53 73 56 46 69 65 73 55 ...
 $ gs          : int  0 20 12 73 30 2 69 2 9 28 ...
 $ mp          : int  847 1197 961 2552 951 330 2498 1072 1936 1278 ...
 $ fg          : int  66 93 143 464 136 33 652 134 240 204 ...
 $ fga         : int  141 185 275 1011 249 61 1423 300 543 413 ...
 $ fg.         : num  0.468 0.503 0.52 0.459 0.546 0.541 0.458 0.447 0.442 0.494 ...
 $ x3p         : int  4 0 0 128 0 0 3 2 116 11 ...
 $ x3pa        : int  15 0 0 300 1 0 15 13 309 47 ...
 $ x3p.        : num  0.267 NA NA 0.427 0 ...
 $ x2p         : int  62 93 143 336 136 33 649 132 124 193 ...
 $ x2pa        : int  126 185 275 711 248 61 1408 287 234 366 ...
 $ x2p.        : num  0.492 0.503 0.52 0.473 0.548 ...
 $ efg.        : num  0.482 0.503 0.52 0.522 0.546 0.541 0.459 0.45 0.549 0.507 ...
 $ ft          : int  35 79 76 274 56 26 296 33 105 76 ...
 $ fta         : int  53 136 119 336 67 30 360 50 116 121 ...
 $ ft.         : num  0.66 0.581 0.639 0.815 0.836 0.867 0.822 0.66 0.905 0.628 ...
 $ orb         : int  72 142 102 32 94 37 166 119 23 79 ...
 $ drb         : int  144 190 204 230 183 92 599 192 182 129 ...
 $ trb         : int  216 332 306 262 277 129 765 311 205 208 ...
 $ ast         : int  28 43 38 248 40 14 178 71 143 94 ...
 $ stl         : int  23 40 24 35 23 8 63 24 54 90 ...
 $ blk         : int  26 57 36 3 46 30 68 33 8 19 ...
 $ tov         : int  30 71 39 146 63 18 123 44 84 90 ...
 $ pf          : int  122 203 108 136 187 40 147 126 115 121 ...
 $ pts         : int  171 265 362 1330 328 92 1603 303 701 495 ...
 $ season      : Factor w/ 1 level "2013-2014": 1 1 1 1 1 1 1 1 1 1 ...
 $ season_end  : int  2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
 > date()
[1] "Fri Dec 18 19:24:06 2015"
> data()
> data(cars)
> cars
   speed dist
1      4    2
2      4   10
3      7    4
4      7   22
5      8   16
6      9   10
7     10   18
8     10   26
9     10   34
10    11   17
11    11   28
12    12   14
13    12   20
14    12   24
15    12   28
16    13   26
17    13   34
18    13   34
19    13   46
20    14   26
21    14   36
22    14   60
23    14   80
24    15   20
25    15   26
26    15   54
27    16   32
28    16   40
29    17   32
30    17   40
31    17   50
32    18   42
33    18   56
34    18   76
35    18   84
36    19   36
37    19   46
38    19   68
39    20   32
40    20   48
41    20   52
42    20   56
43    20   64
44    22   66
45    23   54
46    24   70
47    24   92
48    24   93
49    24  120
50    25   85
> str(cars)
'data.frame': 50 obs. of  2 variables:
 $ speed: num  4 4 7 7 8 9 10 10 10 11 ...
 $ dist : num  2 10 4 22 16 10 18 26 34 17 ...
> summary(cars)
     speed           dist       
 Min.   : 4.0   Min.   :  2.00  
 1st Qu.:12.0   1st Qu.: 26.00  
 Median :15.0   Median : 36.00  
 Mean   :15.4   Mean   : 42.98  
 3rd Qu.:19.0   3rd Qu.: 56.00  
 Max.   :25.0   Max.   :120.00  
> hist(cars$speed)
> barplot(cars$speed)
> plot(cars$speed, cars$dist)
> cor(cars$speed, cars$dist)
[1] 0.8068949