Rの構文
ドットインストールで学んだ。
変数代入
<-
で、PHPとかの=
と同じ意味
# 代入 > x <- 5 > x 5 > y <- 0 # 定義した変数の確認 > ls() [1] "x" "y" # 変数削除 > rm(x) > z <- 5 # 複数削除 > rm(y, z) > ls() character(0) # 何もないって意味らしい # 同じみNull > NULL NULL # 欠損値 > NA [1] NA # 非数 > NaN [1] NaN # 無限大 > Inf [1] Inf
四則演算
> 5 + 3 [1] 8 > 10 / 3 [1] 3.333333 > 10 %% 3 [1] 1 > 10 %/% 3 [1] 3 > 2 ^ 4 [1] 16 > cos(1) [1] 0.5403023 > sqrt(2) [1] 1.414214 > round(2.555) [1] 3 > 'abbccccc' [1] "abbccccc" > paste('a', 'b', 'c') [1] "a b c" > paste('a', 'b', 'c', sep='') [1] "abc" > paste('a', 'b', 'c', sep='/') [1] "a/b/c" > x <- 5 > x [1] 5 > as.character(x) [1] "5" > s <- "5" > as.numeric() numeric(0) > as.numeric(s) [1] 5
ベクトル
配列みたいだけど、配列とは違うらしい
> v <- c(1, 3, 5) > v [1] 1 3 5 > v[2] [1] 3 > v[1] [1] 1 > v[3] [1] 5 > v[2] <- 10 > v[2] [1] 10 > v [1] 1 10 5 > v <- c("abc", "bdee") > v <- c(TRUE, FALSE) > v [1] TRUE FALSE > length(v) [1] 2 > v <- 1:10 > v [1] 1 2 3 4 5 6 7 8 9 10 > v <- 1:-10 > v [1] 1 0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 > v <- seq(1, 10) > v [1] 1 2 3 4 5 6 7 8 9 10 > v <- seq(1, 10, by=2) > v [1] 1 3 5 7 9 > v <- seq(1, 10, length=5) > v [1] 1.00 3.25 5.50 7.75 10.00 > v <- rep(1:5, times=3) > v [1] 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 > v <- rep(1:5, length=10) > v [1] 1 2 3 4 5 1 2 3 4 5
ベクトルで掛け算など
> x <- c(1, 3, 5) > y <- c(2, 3, 4) > x * 2 [1] 2 6 10 > x * 10 [1] 10 30 50 > x - 1 [1] 0 2 4 > x - 10 [1] -9 -7 -5 > x + y [1] 3 6 9 > x [1] 1 3 5 > x * y [1] 2 9 20 > x > y [1] FALSE FALSE TRUE > x < y [1] TRUE FALSE FALSE > x == y [1] FALSE TRUE FALSE > x != y [1] TRUE FALSE TRUE > x === y エラー: 予想外の '=' です in "x ===" > x %in% y [1] FALSE TRUE FALSE > union(x, y) [1] 1 3 5 2 4 > intersect(x,y ) [1] 3 > intersect(x,y) [1] 3 > setdiff(x, y) [1] 1 5 > setequal(x, y) [1] FALSE
因子ベクトル
> x <- c('S', 'M', 'L', 'M', 'L') > x [1] "S" "M" "L" "M" "L" > x.fc <- factor(x) > x.fc [1] S M L M L Levels: L M S > levels(x.fc) [1] "L" "M" "S" > x.fc <- factor(x, levels=c("S", "M", "L")) > x.fc [1] S M L M L Levels: S M L > x.fc <- orderd(x, levels=c("S", "M", "L")) エラー: 関数 "orderd" を見つけることができませんでした > x.fc <- ordered(x, levels=c("S", "M", "L")) > x.fc [1] S M L M L Levels: S < M < L
数列
> x <- matrix(c(1:6), nrow=3, ncol=2) > x [,1] [,2] [1,] 1 4 [2,] 2 5 [3,] 3 6 > x <- matrix(c(1:6), nrow=3, ncol=2, byrow=TRUE) > x [,1] [,2] [1,] 1 2 [2,] 3 4 [3,] 5 6 > x <- rbind(c(1,2), 3:4, 5:6) > x [,1] [,2] [1,] 1 2 [2,] 3 4 [3,] 5 6 > x <- cbind(c(1,2), 3:4, 5:6) > x [,1] [,2] [,3] [1,] 1 3 5 [2,] 2 4 6 > x + 1 [,1] [,2] [,3] [1,] 2 4 6 [2,] 3 5 7 > x [,1] [,2] [,3] [1,] 1 3 5 [2,] 2 4 6 > 1 / x [,1] [,2] [,3] [1,] 1.0 0.3333333 0.2000000 [2,] 0.5 0.2500000 0.1666667 > dim(x) [1] 2 3 > nrow(x) [1] 2 > ncol(x) [1] 3 > x[, 1] [1] 1 2 > x[2, ] [1] 2 4 6 > x[1, 2] [1] 3 > x[1, 1:2] [1] 1 3 > x[1, c(1:2)] [1] 1 3 > x[1, c(1:3)] [1] 1 3 5 > x[1, c(1, 3)] [1] 1 5 > x[1, 2] <- 10 > x [,1] [,2] [,3] [1,] 1 10 5 [2,] 2 4 6
データフレーム
> x <- list(5:10, "abc", matrix(1:6, nrow=2, ncol=3)) > x [[1]] [1] 5 6 7 8 9 10 [[2]] [1] "abc" [[3]] [,1] [,2] [,3] [1,] 1 3 5 [2,] 2 4 6 > x[1] [[1]] [1] 5 6 7 8 9 10 > x[[1]] [1] 5 6 7 8 9 10 > x[[3]][1, 2] [1] 3 > x[[2]][1, 2] x[[2]][1, 2] でエラー: 次元数が正しくありません > x[[2]][1] [1] "abc" > x[[2]] [1] "abc" > x[2] [[1]] [1] "abc"
> x <- data.frame() > x <- data.frame(SIZE=c("M", "L", "S", 'L', 'M'), SALES=c(1,2,1,3,1)) > x SIZE SALES 1 M 1 2 L 2 3 S 1 4 L 3 5 M 1 > x[1,2] [1] 1 > x$SIZE [1] M L S L M Levels: L M S > x$SALES [1] 1 2 1 3 1
CSVファイルを読み込んで色々試す
> nba_2013 <- read.csv("~/nba_2013.csv", header=FALSE) > View(nba_2013) > nba_2013 <- read.csv("~/nba_2013.csv", header=FALSE) > View(nba_2013) > sum(nba_2013$V5) Summary.factor(c(83L, 59L, 79L, 48L, 70L, 51L, 40L, 65L, 61L, でエラー: ‘sum’ は因子に対しては無意味です > sum(nba_2013$V6) Summary.factor(c(81L, 1L, 14L, 5L, 69L, 25L, 13L, 64L, 13L, 80L, でエラー: ‘sum’ は因子に対しては無意味です > max(nba_2013$V5) Summary.factor(c(83L, 59L, 79L, 48L, 70L, 51L, 40L, 65L, 61L, でエラー: ‘max’ は因子に対しては無意味です > max(nba_2013$gs) [1] -Inf 警告メッセージ: max(nba_2013$gs) で: max の引数に有限な値がありません: -Inf を返します > nba_2013 <- read.csv("~/nba_2013.csv", header=TRUE) > View(nba_2013) > sum(nba_2013$g) [1] 25615 > max(nba_2013$g) [1] 83 > min(nba_20134) エラー: オブジェクト 'nba_20134' がありません > min(nba_2013$gs) [1] 0 > min(nba_2013$g) [1] 1 > median(nba_2013$g) [1] 61 > sd(nba_2013$g) [1] 25.32271 > mean(nba_2013$x3p.) [1] NA > mean(nba_2013$x3p., na.rm=TRUE) [1] 0.2851112 > summary(nba_2013$g) Min. 1st Qu. Median Mean 3rd Qu. Max. 1.00 32.00 61.00 53.25 76.00 83.00 > summary(nba_2013) player pos age bref_team_id g gs A.J. Price : 1 C : 90 Min. :19.00 TOT : 63 Min. : 1.00 Min. : 0.00 Aaron Brooks : 1 F : 1 1st Qu.:23.00 NOP : 18 1st Qu.:32.00 1st Qu.: 0.00 Aaron Gray : 1 G : 1 Median :26.00 OKC : 17 Median :61.00 Median :10.00 Adonis Thomas: 1 PF: 96 Mean :26.51 LAC : 16 Mean :53.25 Mean :25.57 Al Harrington: 1 PG: 85 3rd Qu.:29.00 LAL : 16 3rd Qu.:76.00 3rd Qu.:54.00 Al Horford : 1 SF: 99 Max. :39.00 UTA : 16 Max. :83.00 Max. :82.00 (Other) :475 SG:109 (Other):335 mp fg fga fg. x3p x3pa Min. : 1 Min. : 0.0 Min. : 0.0 Min. :0.0000 Min. : 0.00 Min. : 0.0 1st Qu.: 388 1st Qu.: 47.0 1st Qu.: 110.0 1st Qu.:0.4005 1st Qu.: 0.00 1st Qu.: 3.0 Median :1141 Median :146.0 Median : 332.0 Median :0.4380 Median : 16.00 Median : 48.0 Mean :1237 Mean :192.9 Mean : 424.5 Mean :0.4364 Mean : 39.61 Mean :110.1 3rd Qu.:2016 3rd Qu.:307.0 3rd Qu.: 672.0 3rd Qu.:0.4795 3rd Qu.: 68.00 3rd Qu.:193.0 Max. :3122 Max. :849.0 Max. :1688.0 Max. :1.0000 Max. :261.00 Max. :615.0 NA's :2 x3p. x2p x2pa x2p. efg. ft Min. :0.0000 Min. : 0.0 Min. : 0.0 Min. :0.0000 Min. :0.0000 Min. : 0.00 1st Qu.:0.2344 1st Qu.: 31.0 1st Qu.: 67.0 1st Qu.:0.4347 1st Qu.:0.4510 1st Qu.: 16.00 Median :0.3310 Median :110.0 Median : 227.0 Median :0.4745 Median :0.4880 Median : 53.00 Mean :0.2851 Mean :153.3 Mean : 314.3 Mean :0.4669 Mean :0.4808 Mean : 91.21 3rd Qu.:0.3750 3rd Qu.:230.0 3rd Qu.: 459.0 3rd Qu.:0.5137 3rd Qu.:0.5260 3rd Qu.:126.00 Max. :1.0000 Max. :706.0 Max. :1408.0 Max. :1.0000 Max. :1.0000 Max. :703.00 NA's :67 NA's :3 NA's :2 fta ft. orb drb trb ast Min. : 0.0 Min. :0.0000 Min. : 0.00 Min. : 0.0 Min. : 0.0 Min. : 0.0 1st Qu.: 22.0 1st Qu.:0.6540 1st Qu.: 12.00 1st Qu.: 43.0 1st Qu.: 55.0 1st Qu.: 20.0 Median : 73.0 Median :0.7510 Median : 35.00 Median :135.0 Median : 168.0 Median : 65.0 Mean :120.6 Mean :0.7224 Mean : 55.81 Mean :162.8 Mean : 218.6 Mean :112.5 3rd Qu.:179.0 3rd Qu.:0.8210 3rd Qu.: 73.00 3rd Qu.:230.0 3rd Qu.: 310.0 3rd Qu.:152.0 Max. :805.0 Max. :1.0000 Max. :440.00 Max. :783.0 Max. :1114.0 Max. :721.0 NA's :20 stl blk tov pf pts season Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0.0 Min. : 0.0 2013-2014:481 1st Qu.: 9.00 1st Qu.: 4.0 1st Qu.: 21.00 1st Qu.: 44.0 1st Qu.: 115.0 Median : 32.00 Median : 14.0 Median : 58.00 Median :104.0 Median : 401.0 Mean : 39.28 Mean : 24.1 Mean : 71.86 Mean :105.9 Mean : 516.6 3rd Qu.: 60.00 3rd Qu.: 32.0 3rd Qu.:108.00 3rd Qu.:158.0 3rd Qu.: 821.0 Max. :191.00 Max. :219.0 Max. :295.00 Max. :273.0 Max. :2593.0 season_end Min. :2013 1st Qu.:2013 Median :2013 Mean :2013 3rd Qu.:2013 Max. :2013 > str(nba_2013) 'data.frame': 481 obs. of 31 variables: $ player : Factor w/ 481 levels "A.J. Price","Aaron Brooks",..: 371 432 213 40 13 89 281 286 379 446 ... $ pos : Factor w/ 7 levels "C","F","G","PF",..: 6 1 4 7 1 1 4 4 7 7 ... $ age : int 23 20 27 28 25 25 28 24 38 32 ... $ bref_team_id: Factor w/ 31 levels "ATL","BOS","BRK",..: 29 21 29 22 19 20 25 29 16 15 ... $ g : int 63 81 53 73 56 46 69 65 73 55 ... $ gs : int 0 20 12 73 30 2 69 2 9 28 ... $ mp : int 847 1197 961 2552 951 330 2498 1072 1936 1278 ... $ fg : int 66 93 143 464 136 33 652 134 240 204 ... $ fga : int 141 185 275 1011 249 61 1423 300 543 413 ... $ fg. : num 0.468 0.503 0.52 0.459 0.546 0.541 0.458 0.447 0.442 0.494 ... $ x3p : int 4 0 0 128 0 0 3 2 116 11 ... $ x3pa : int 15 0 0 300 1 0 15 13 309 47 ... $ x3p. : num 0.267 NA NA 0.427 0 ... $ x2p : int 62 93 143 336 136 33 649 132 124 193 ... $ x2pa : int 126 185 275 711 248 61 1408 287 234 366 ... $ x2p. : num 0.492 0.503 0.52 0.473 0.548 ... $ efg. : num 0.482 0.503 0.52 0.522 0.546 0.541 0.459 0.45 0.549 0.507 ... $ ft : int 35 79 76 274 56 26 296 33 105 76 ... $ fta : int 53 136 119 336 67 30 360 50 116 121 ... $ ft. : num 0.66 0.581 0.639 0.815 0.836 0.867 0.822 0.66 0.905 0.628 ... $ orb : int 72 142 102 32 94 37 166 119 23 79 ... $ drb : int 144 190 204 230 183 92 599 192 182 129 ... $ trb : int 216 332 306 262 277 129 765 311 205 208 ... $ ast : int 28 43 38 248 40 14 178 71 143 94 ... $ stl : int 23 40 24 35 23 8 63 24 54 90 ... $ blk : int 26 57 36 3 46 30 68 33 8 19 ... $ tov : int 30 71 39 146 63 18 123 44 84 90 ... $ pf : int 122 203 108 136 187 40 147 126 115 121 ... $ pts : int 171 265 362 1330 328 92 1603 303 701 495 ... $ season : Factor w/ 1 level "2013-2014": 1 1 1 1 1 1 1 1 1 1 ... $ season_end : int 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
> date() [1] "Fri Dec 18 19:24:06 2015" > data() > data(cars) > cars speed dist 1 4 2 2 4 10 3 7 4 4 7 22 5 8 16 6 9 10 7 10 18 8 10 26 9 10 34 10 11 17 11 11 28 12 12 14 13 12 20 14 12 24 15 12 28 16 13 26 17 13 34 18 13 34 19 13 46 20 14 26 21 14 36 22 14 60 23 14 80 24 15 20 25 15 26 26 15 54 27 16 32 28 16 40 29 17 32 30 17 40 31 17 50 32 18 42 33 18 56 34 18 76 35 18 84 36 19 36 37 19 46 38 19 68 39 20 32 40 20 48 41 20 52 42 20 56 43 20 64 44 22 66 45 23 54 46 24 70 47 24 92 48 24 93 49 24 120 50 25 85 > str(cars) 'data.frame': 50 obs. of 2 variables: $ speed: num 4 4 7 7 8 9 10 10 10 11 ... $ dist : num 2 10 4 22 16 10 18 26 34 17 ... > summary(cars) speed dist Min. : 4.0 Min. : 2.00 1st Qu.:12.0 1st Qu.: 26.00 Median :15.0 Median : 36.00 Mean :15.4 Mean : 42.98 3rd Qu.:19.0 3rd Qu.: 56.00 Max. :25.0 Max. :120.00 > hist(cars$speed) > barplot(cars$speed) > plot(cars$speed, cars$dist) > cor(cars$speed, cars$dist) [1] 0.8068949