Basic Stat Using R 描述性统计函数 myvars <-mtcars[c("mpg","hp","ut","am")] summary(myvars)#对一个数据集进行详细统计 # mpg hp t am ##Min. :10.40 Min. :52.0 Min. :1.513Min. :0.0000 #1stQu.:15.4318t0u.:96.518tQu.:2.581 18tQu.:0.0000 ##Median :19.20 Median 123.0 Median :3.325 Median 0.0000 ##Mean :20.09 Mean:146.7 Mean:3.217 Mean:0.4062 #3rdQu.:22.803rdQu.:180.03rdQu.:3.6103rdQu.:1.0000 #Max.:33.90 Max. :335.0Max. :5.424 Max. :1.0000 fivenum(myvars$hp) #[1]5296123180335 来五个分位数 install.packages("Hmisc") #Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.1' ##(as 'lib'is unspecified) library(Hmisc) #Loading required package:lattice #Loading required package:survival #Loading required package:Formula #Loading required package:ggplot2 #Attaching package:'Hmisc' #The following objects are masked from'package:base': 料样 format.pval,units 1
Basic Stat Using R 描述性统计函数 myvars <- mtcars[c("mpg","hp","wt","am")] summary(myvars) # 对一个数据集进行详细统计 ## mpg hp wt am ## Min. :10.40 Min. : 52.0 Min. :1.513 Min. :0.0000 ## 1st Qu.:15.43 1st Qu.: 96.5 1st Qu.:2.581 1st Qu.:0.0000 ## Median :19.20 Median :123.0 Median :3.325 Median :0.0000 ## Mean :20.09 Mean :146.7 Mean :3.217 Mean :0.4062 ## 3rd Qu.:22.80 3rd Qu.:180.0 3rd Qu.:3.610 3rd Qu.:1.0000 ## Max. :33.90 Max. :335.0 Max. :5.424 Max. :1.0000 fivenum(myvars$hp) ## [1] 52 96 123 180 335 # 五个分位数 install.packages("Hmisc") ## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.1' ## (as 'lib' is unspecified) library(Hmisc) ## Loading required package: lattice ## Loading required package: survival ## Loading required package: Formula ## Loading required package: ggplot2 ## ## Attaching package: 'Hmisc' ## The following objects are masked from 'package:base': ## ## format.pval, units 1
describe(myvars) #myvars #排 ##4 Variables 32 Observations ##mpg n missing distinct Info Mean Gmd .05 .10 特转 32 0 25 0.999 20.096.79612.0014.34 25 50 .75 .90 .95 #拼 15.43 19.20 22.80 30.09 31.30 排排 #1 owest:10.413.314.314.715.0,highest:26.027.330.432.433.9 hp #拼 n missing distinct Info Mean Gmd .05 .10 0 22 0.997 146.7 77.0463.6566.00 标 .25 .50 .75 .90 .95 96.50 123.00180.00 243.50253.55 #10we8t:5262656691,h1ghe8t:215230245264335 #wt n missing distinct Info Mean 05 32 0 29 0.999 3.217 1.0891.7361.956 26 .50 .75 90 96 2.581 3.3253.610 4.048 5.293 ¥#1 owest:1.5131.6151.8351.9352.140,highest:3.8454.0705.2505.3455.424 #am n missing distinct Info Sum Mean Gmd 0 2 0.724 130.40620.498 #排 #五个分位数 install.packages("pastecs") #Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.1
describe(myvars) ## myvars ## ## 4 Variables 32 Observations ## -------------------------------------------------------------------------------- ## mpg ## n missing distinct Info Mean Gmd .05 .10 ## 32 0 25 0.999 20.09 6.796 12.00 14.34 ## .25 .50 .75 .90 .95 ## 15.43 19.20 22.80 30.09 31.30 ## ## lowest : 10.4 13.3 14.3 14.7 15.0, highest: 26.0 27.3 30.4 32.4 33.9 ## -------------------------------------------------------------------------------- ## hp ## n missing distinct Info Mean Gmd .05 .10 ## 32 0 22 0.997 146.7 77.04 63.65 66.00 ## .25 .50 .75 .90 .95 ## 96.50 123.00 180.00 243.50 253.55 ## ## lowest : 52 62 65 66 91, highest: 215 230 245 264 335 ## -------------------------------------------------------------------------------- ## wt ## n missing distinct Info Mean Gmd .05 .10 ## 32 0 29 0.999 3.217 1.089 1.736 1.956 ## .25 .50 .75 .90 .95 ## 2.581 3.325 3.610 4.048 5.293 ## ## lowest : 1.513 1.615 1.835 1.935 2.140, highest: 3.845 4.070 5.250 5.345 5.424 ## -------------------------------------------------------------------------------- ## am ## n missing distinct Info Sum Mean Gmd ## 32 0 2 0.724 13 0.4062 0.498 ## ## -------------------------------------------------------------------------------- # 五个分位数 install.packages("pastecs") ## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.1' 2
##(as 'lib'is unspecified) library(pastecs) stat.desc(myvars) t ##nbr.val 32.0000000 32.000000032.000000032.00000000 ##nbr.null 0.0000000 0.0000000 0.000000019.00000000 ##nbr.na 0.0000000 0.0000000 0.00000000.00000000 #min 10.4000000 52.0000000 1.51300000.00000000 料nax 33.9000000 335.0000000 5.4240000 1.00000000 ##range 23.5000000283.0000000 3.91100001.00000000 ##sum 642.90000004694.0000000102.952000013.00000000 #median 19.2000000 123.0000000 3.32500000.00000000 #mean 20.0906250 146.6875000 3.21725000.40625000 ##SE.mean 1.0654240 12.1203173 0.17296850.08820997 #CI.mean.0.95 2.1729465 24.7195501 0.35277150.17990541 ##var 36.32410284700.866935 0.9573790 0.24899194 ##std.dev 6.0269481 68.5628685 0.97845740.49899092 ##coef.var 0.2999881 0.4674077 0.3041285 1.22828533 stat.desc(myvars,basic-T) mpg hp t am ##nbr.val 32.0000000 32.000000032.000000032.00000000 #nbr.null 0.0000000 0.0000000 0.000000019.00000000 #nbr.na 0.0000000 0.0000000 0.00000000.00000000 ##min 10.4000000 52.0000000 1.51300000.00000000 ##max 33.9000000335.0000000 5.42400001.00000000 range 23.5000000283.0000000 3.91100001.00000000 ##sum 642.90000004694.0000000102.952000013.00000000 #料median 19.2000000123.0000000 3.32500000.00000000 ##mean 20.0906250 146.6875000 3.21725000.40625000 ##SE.mean 1.0654240 12.1203173 0.17296850.08820997 ##CI.mean.0.95 2.1729465 24.7195501 0.35277150.17990541 ##var 36,32410284700.8669355 0.95737900.24899194 ##std.dev 6.0269481 68.5628685 0.97845740.49899092 ##coef.var 0.29998810.4674077 0.30412851.22828533 basic=T计算一些基础值,例如缺失值的数量等等 stat.desc(myvars,desc=T)
## (as 'lib' is unspecified) library(pastecs) stat.desc(myvars) ## mpg hp wt am ## nbr.val 32.0000000 32.0000000 32.0000000 32.00000000 ## nbr.null 0.0000000 0.0000000 0.0000000 19.00000000 ## nbr.na 0.0000000 0.0000000 0.0000000 0.00000000 ## min 10.4000000 52.0000000 1.5130000 0.00000000 ## max 33.9000000 335.0000000 5.4240000 1.00000000 ## range 23.5000000 283.0000000 3.9110000 1.00000000 ## sum 642.9000000 4694.0000000 102.9520000 13.00000000 ## median 19.2000000 123.0000000 3.3250000 0.00000000 ## mean 20.0906250 146.6875000 3.2172500 0.40625000 ## SE.mean 1.0654240 12.1203173 0.1729685 0.08820997 ## CI.mean.0.95 2.1729465 24.7195501 0.3527715 0.17990541 ## var 36.3241028 4700.8669355 0.9573790 0.24899194 ## std.dev 6.0269481 68.5628685 0.9784574 0.49899092 ## coef.var 0.2999881 0.4674077 0.3041285 1.22828533 stat.desc(myvars,basic=T) ## mpg hp wt am ## nbr.val 32.0000000 32.0000000 32.0000000 32.00000000 ## nbr.null 0.0000000 0.0000000 0.0000000 19.00000000 ## nbr.na 0.0000000 0.0000000 0.0000000 0.00000000 ## min 10.4000000 52.0000000 1.5130000 0.00000000 ## max 33.9000000 335.0000000 5.4240000 1.00000000 ## range 23.5000000 283.0000000 3.9110000 1.00000000 ## sum 642.9000000 4694.0000000 102.9520000 13.00000000 ## median 19.2000000 123.0000000 3.3250000 0.00000000 ## mean 20.0906250 146.6875000 3.2172500 0.40625000 ## SE.mean 1.0654240 12.1203173 0.1729685 0.08820997 ## CI.mean.0.95 2.1729465 24.7195501 0.3527715 0.17990541 ## var 36.3241028 4700.8669355 0.9573790 0.24899194 ## std.dev 6.0269481 68.5628685 0.9784574 0.49899092 ## coef.var 0.2999881 0.4674077 0.3041285 1.22828533 #basic=T 计算一些基础值,例如缺失值的数量等等 stat.desc(myvars,desc=T) 3
mpg 如 a叫 #nbr.val 32.000000032.000000032.000000032.00000000 nbr.null 0.0000000 0.0000000 0.000000019.00000000 ##nbr.na 0.0000000 0.0000000 0.00000000.00000000 #min 10.4000000 52.0000000 1.51300000.00000000 ##max 33.9000000 335.0000000 5.4240000 1.00000000 ##range 23.5000000283.0000000 3.91100001.00000000 642.90000004694.0000000 102.952000013.00000000 #median 19.2000000123.0000000 3.32500000.00000000 #mean 20.0906250 146.6875000 3.21725000.40625000 #SE.mean 1.0654240 12.1203173 0.17296850.08820997 #样C.mean.0.95 2.1729465 24.7195501 0.35277150.17990541 ##var 36.32410284700.8669355 0.95737900.24899194 ##std.dev 6.0269481 68.5628685 0.97845740.49899092 ##coef.var 0.2999881 0.4674077 0.3041285 1.22828533 #desc=T计算一些描述值,例如中位数、分位数等等 stat.desc(myvars,norm-T) ## mpg hp am ##nbr.val 32.0000000 32.0000000032.000000003.200000e+01 #nbr.null 0.0000000 0.00000000 0.00000000 1.900000e+01 ##nbr.na 0.0000000 0.00000000 0.000000000.000000e+00 #min 10.4000000 52.00000000 1.51300000 0.000000e+00 ##max 33.9000000335.00000000 5.424000001.000000e+00 ##range 23.5000000 283.00000000 3.911000001.000000e+00 #sum 642.90000004694.00000000102.952000001.300000e+01 #median 19.2000000 123.00000000 3.325000000.000000e+00 ##mean 20.0906250 146.68750000 3.217250004.062500e-01 ##SE.mean 1.0654240 12.12031731 0.172968478.820997e-02 #CI.mean.0.95 2.1729465 24.71955013 0.352771531.799054e-01 #排Var 36.32410284700.86693548 0.957378972.489919e-01 ##std.dev 6.0269481 68.56286849 0.97845744 4.989909e-01 #coef.var 0.2999881 0.46740771 0.304128511.228285e+00 #skewness 0.6106550 0.72602366 0.423146463.640159e-01 ##skew.2SE 0.7366922 0.87587259 0.510482524.391476e-01 ##kurtosis 0.3727660 -0.13555112 -0.02271075-1.924741e+00 ##kurt.2SE -0.2302812 -0.08373853 -0.01402987-1.189035e+00 #normtest.W 0.9475647 0.933419340.943257726.2507449-01
## mpg hp wt am ## nbr.val 32.0000000 32.0000000 32.0000000 32.00000000 ## nbr.null 0.0000000 0.0000000 0.0000000 19.00000000 ## nbr.na 0.0000000 0.0000000 0.0000000 0.00000000 ## min 10.4000000 52.0000000 1.5130000 0.00000000 ## max 33.9000000 335.0000000 5.4240000 1.00000000 ## range 23.5000000 283.0000000 3.9110000 1.00000000 ## sum 642.9000000 4694.0000000 102.9520000 13.00000000 ## median 19.2000000 123.0000000 3.3250000 0.00000000 ## mean 20.0906250 146.6875000 3.2172500 0.40625000 ## SE.mean 1.0654240 12.1203173 0.1729685 0.08820997 ## CI.mean.0.95 2.1729465 24.7195501 0.3527715 0.17990541 ## var 36.3241028 4700.8669355 0.9573790 0.24899194 ## std.dev 6.0269481 68.5628685 0.9784574 0.49899092 ## coef.var 0.2999881 0.4674077 0.3041285 1.22828533 #desc=T 计算一些描述值,例如中位数、分位数等等 stat.desc(myvars,norm=T) ## mpg hp wt am ## nbr.val 32.0000000 32.00000000 32.00000000 3.200000e+01 ## nbr.null 0.0000000 0.00000000 0.00000000 1.900000e+01 ## nbr.na 0.0000000 0.00000000 0.00000000 0.000000e+00 ## min 10.4000000 52.00000000 1.51300000 0.000000e+00 ## max 33.9000000 335.00000000 5.42400000 1.000000e+00 ## range 23.5000000 283.00000000 3.91100000 1.000000e+00 ## sum 642.9000000 4694.00000000 102.95200000 1.300000e+01 ## median 19.2000000 123.00000000 3.32500000 0.000000e+00 ## mean 20.0906250 146.68750000 3.21725000 4.062500e-01 ## SE.mean 1.0654240 12.12031731 0.17296847 8.820997e-02 ## CI.mean.0.95 2.1729465 24.71955013 0.35277153 1.799054e-01 ## var 36.3241028 4700.86693548 0.95737897 2.489919e-01 ## std.dev 6.0269481 68.56286849 0.97845744 4.989909e-01 ## coef.var 0.2999881 0.46740771 0.30412851 1.228285e+00 ## skewness 0.6106550 0.72602366 0.42314646 3.640159e-01 ## skew.2SE 0.7366922 0.87587259 0.51048252 4.391476e-01 ## kurtosis -0.3727660 -0.13555112 -0.02271075 -1.924741e+00 ## kurt.2SE -0.2302812 -0.08373853 -0.01402987 -1.189035e+00 ## normtest.W 0.9475647 0.93341934 0.94325772 6.250744e-01 4
#normtest.p0.12288140.048808240.092654997.836354e-08 nom=T计算一些正态分布统计量,例如偏度、峰度等 独立性检验函数 独立性检验:是根据频数信息判断两类因子彼此相关或互相独立的假设检验 卡方检验Fisher检验Cochran-Mantel-Haenszel检验 p-value就是probability值,是通过计算得到的概率值,即在原假设为真时,得到最大的/超出所得到 的检验统计量值的概率 一般将p值定位到0.05当p0.05不拒绝原假设 相关性分析 进行过独立性检验后,才能进行相关分析 相关性衡量指标 Pearson相关系数Spearman相关系数Kendall相关系数偏相关系数多分格(polychoric)相关系数多系 列(polyserial)相关系数 ?cor #通过method可以计算pearson\化endall\spearman相关系数 state.x77 Population Income Illiteracy Life Exp Murder HS Grad Frost #Alabama 3615 3624 2.1 69.0515.1 41.3 20 ##Alaska 365 6315 1.5 69.31 11.3 66.7 152 ##Arizona 2212 4530 1.8 70.55 7.8 58.1 #Arkansas 2110 3378 1.9 70.66 10.1 39.9 ##California 21198 5114 1.1 71.71 10.3 62.6 520 ##Colorado 2541 4884 0.7 72.06 6.8 63.9 166 #Connecticut 3100 5348 1.1 72.48 3.1 56.0 139 Delaware 579 4809 0.9 70.06 6.2 54.6 103 #Florida 8277 4815 1.3 70.66 10.7 52.6 11 ##Georgia 4931 4091 2.0 68.54 13.9 40.6 ##Hawaii 868 4963 1.9 73.60 6.2 61.9 ##Idaho 813 4119 0.6 71.87 59.5 #Illinois 11197 5107 0.9 70.14 103 52.6 127 ##Indiana 5313 4458 0.7 70.8 7.1 52.9 122 ##Iowa 2861 4628 0.5 72.56 2.3 59.0 140
## normtest.p 0.1228814 0.04880824 0.09265499 7.836354e-08 #norm=T 计算一些正态分布统计量,例如偏度、峰度等 独立性检验函数 独立性检验:是根据频数信息判断两类因子彼此相关或互相独立的假设检验 卡方检验 Fisher 检验 Cochran-Mantel-Haenszel 检验 p-value 就是 probability 值,是通过计算得到的概率值,即在原假设为真时,得到最大的/超出所得到 的检验统计量值的概率 一般将 p 值定位到 0.05 当 p0.05 不拒绝原假设 相关性分析 进行过独立性检验后,才能进行相关分析 相关性衡量指标 Pearson 相关系数 Spearman 相关系数 Kendall 相关系数偏相关系数多分格 (polychoric) 相关系数多系 列 (polyserial) 相关系数 ?cor # 通过 method 可以计算 pearson\kendall\spearman 相关系数 state.x77 ## Population Income Illiteracy Life Exp Murder HS Grad Frost ## Alabama 3615 3624 2.1 69.05 15.1 41.3 20 ## Alaska 365 6315 1.5 69.31 11.3 66.7 152 ## Arizona 2212 4530 1.8 70.55 7.8 58.1 15 ## Arkansas 2110 3378 1.9 70.66 10.1 39.9 65 ## California 21198 5114 1.1 71.71 10.3 62.6 20 ## Colorado 2541 4884 0.7 72.06 6.8 63.9 166 ## Connecticut 3100 5348 1.1 72.48 3.1 56.0 139 ## Delaware 579 4809 0.9 70.06 6.2 54.6 103 ## Florida 8277 4815 1.3 70.66 10.7 52.6 11 ## Georgia 4931 4091 2.0 68.54 13.9 40.6 60 ## Hawaii 868 4963 1.9 73.60 6.2 61.9 0 ## Idaho 813 4119 0.6 71.87 5.3 59.5 126 ## Illinois 11197 5107 0.9 70.14 10.3 52.6 127 ## Indiana 5313 4458 0.7 70.88 7.1 52.9 122 ## Iowa 2861 4628 0.5 72.56 2.3 59.0 140 5
#Kansas 2280 4669 0.6 72.58 4.5 59.9 114 ##Kentucky 3387 3712 1.6 70.10 10.6 38.5 #Louisiana 3806 3545 2.8 68.76 13.2 42.2 #Maine 1058 3694 0.7 70.39 2.7 54.7 2 #Maryland 4122 529g 0.9 70.22 #Massachusetts 5814 4755 71.83 18 #Michigan 9111 4751 0.9 70.63 11 52.8 #Minnesota 3921 4675 0.6 72.96 23 57.6 160 #Mississippi 2341 3098 2.4 68.09 12. 41.0 #Missouri 4767 4254 0.8 70.69 48.8 ##Montana 746 4347 0.6 70.56 5.0 59.2 9场 #样Nebraska 1544 4508 0.6 72.60 ##Nevada 590 5149 0.5 69. ##New Hampshire 812 4281 0.7 71.23 3.3 57.6 17 New Jersey 7333 5237 1.1 70.93 5.2 52.5 15 #New Mexico 1144 3601 2.2 70.32 9.7 55.2 ##New York 18076 4903 1.4 70.55 10.9 #North Carolina 5441 3875 1.8 69.21 11.1 38.5 80 #样North Dakota 63 5087 0.8 72.78 1.4 ##0hi0 10735 4561 0.8 70.82 2715 3983 1.1 71.42 6.4 51.6 #Oregon 2284 4660 0.6 72.13 4.2 60.0 24 11860 4449 1.0 70.43 6.1 50.2 126 #Rhode Island 937 4558 1.3 71.90 2.4 46.4 27 #South Carolina 2816 3635 2.3 67.96 11.6 37.8 ##South Dakota 681 4167 0.5 72.08 1.7 53.3 #Tennessee 4173 3821 1.7 70.11 11.0 41.8 #Texas 12237 4188 2.2 70.90 12.2 ##Utah 1203 4022 0.6 72.90 ##Vermont 472 3907 0.6 71.64 5 57.1 #Virginia A98 470 1.4 70.08 9.5 47.8 #Washington 3559 4864 0. 71.72 4.3 63.5 #West Virginia 1799 3617 14 69.48 6.7 41.6 ##Wisconsin 4589 4468 0 72.48 3.0 54.5 88 ##Wyoming 376 4566 0.6 70.29 6 9 62.9 173 Area #Alabama 50708 ##Alaska 566432 ##Arizona 113417
## Kansas 2280 4669 0.6 72.58 4.5 59.9 114 ## Kentucky 3387 3712 1.6 70.10 10.6 38.5 95 ## Louisiana 3806 3545 2.8 68.76 13.2 42.2 12 ## Maine 1058 3694 0.7 70.39 2.7 54.7 161 ## Maryland 4122 5299 0.9 70.22 8.5 52.3 101 ## Massachusetts 5814 4755 1.1 71.83 3.3 58.5 103 ## Michigan 9111 4751 0.9 70.63 11.1 52.8 125 ## Minnesota 3921 4675 0.6 72.96 2.3 57.6 160 ## Mississippi 2341 3098 2.4 68.09 12.5 41.0 50 ## Missouri 4767 4254 0.8 70.69 9.3 48.8 108 ## Montana 746 4347 0.6 70.56 5.0 59.2 155 ## Nebraska 1544 4508 0.6 72.60 2.9 59.3 139 ## Nevada 590 5149 0.5 69.03 11.5 65.2 188 ## New Hampshire 812 4281 0.7 71.23 3.3 57.6 174 ## New Jersey 7333 5237 1.1 70.93 5.2 52.5 115 ## New Mexico 1144 3601 2.2 70.32 9.7 55.2 120 ## New York 18076 4903 1.4 70.55 10.9 52.7 82 ## North Carolina 5441 3875 1.8 69.21 11.1 38.5 80 ## North Dakota 637 5087 0.8 72.78 1.4 50.3 186 ## Ohio 10735 4561 0.8 70.82 7.4 53.2 124 ## Oklahoma 2715 3983 1.1 71.42 6.4 51.6 82 ## Oregon 2284 4660 0.6 72.13 4.2 60.0 44 ## Pennsylvania 11860 4449 1.0 70.43 6.1 50.2 126 ## Rhode Island 931 4558 1.3 71.90 2.4 46.4 127 ## South Carolina 2816 3635 2.3 67.96 11.6 37.8 65 ## South Dakota 681 4167 0.5 72.08 1.7 53.3 172 ## Tennessee 4173 3821 1.7 70.11 11.0 41.8 70 ## Texas 12237 4188 2.2 70.90 12.2 47.4 35 ## Utah 1203 4022 0.6 72.90 4.5 67.3 137 ## Vermont 472 3907 0.6 71.64 5.5 57.1 168 ## Virginia 4981 4701 1.4 70.08 9.5 47.8 85 ## Washington 3559 4864 0.6 71.72 4.3 63.5 32 ## West Virginia 1799 3617 1.4 69.48 6.7 41.6 100 ## Wisconsin 4589 4468 0.7 72.48 3.0 54.5 149 ## Wyoming 376 4566 0.6 70.29 6.9 62.9 173 ## Area ## Alabama 50708 ## Alaska 566432 ## Arizona 113417 6
##Arkansas 51945 #California 156361 ##Colorado 103766 #Connecticut 4862 ##Delaware 1982 ##Florida 54090 #Georgia 58073 ##Hawaii 6425 ##Idaho 82677 ##Illinois 55748 #Indiana 36097 #样Iowa 55941 ##Kansas 81787 #Kentucky 39650 #Louisiana 44930 #Maine 30920 #Maryland 9891 #Massachusetts 7826 #Michigan 56817 #Minnesota 79289 47296 #料Missouri 68995 #Montana 145587 ##Nebraska 76483 #Nevada 109889 #New Hampshire 902 ##New Jersey 7521 #New Mexico 121412 New York 47831 #North Carolina 48798 #North Dakota 69273 ##Ohio 40975 ##Oklahoma 68782 #Oregon 96184 #Pennsylvania 44966 ##Rhode Island 1049 #South Carolina 30225 ##South Dakota 75955 #Tennessee 41328
## Arkansas 51945 ## California 156361 ## Colorado 103766 ## Connecticut 4862 ## Delaware 1982 ## Florida 54090 ## Georgia 58073 ## Hawaii 6425 ## Idaho 82677 ## Illinois 55748 ## Indiana 36097 ## Iowa 55941 ## Kansas 81787 ## Kentucky 39650 ## Louisiana 44930 ## Maine 30920 ## Maryland 9891 ## Massachusetts 7826 ## Michigan 56817 ## Minnesota 79289 ## Mississippi 47296 ## Missouri 68995 ## Montana 145587 ## Nebraska 76483 ## Nevada 109889 ## New Hampshire 9027 ## New Jersey 7521 ## New Mexico 121412 ## New York 47831 ## North Carolina 48798 ## North Dakota 69273 ## Ohio 40975 ## Oklahoma 68782 ## Oregon 96184 ## Pennsylvania 44966 ## Rhode Island 1049 ## South Carolina 30225 ## South Dakota 75955 ## Tennessee 41328 7
#Texas 262134 #Utah 82096 #Vermont 9267 #Virginia 39780 #样Washington 66570 #West Virginia 24070 #Wisconsin 54464 Wyoming 97203 ?state.x77 cor(state.x77) Population Income Illiteracy Life Exp Murder ##Population 1.000000000.20822760.10762237-0.068051950.3436428 #Income 0.208227561.0000000-0.437075190.34025534-0.2300776 #I11 iteracy0.10762237 -0.4370752 1.00000000 -0.588477930.7029752 ##Life Exp -0.068051950.3402553-0.58847793 1.00000000-0.7808458 #Murder 0.34364275-0.2300776 0.70297520 -0.780845751.0000000 ##HS Grad -0.098489750.6199323-0.657188610.58221620-0.4879710 ##Frost -0.332152450.2262822-0.671946970.26206801-0.5388834 #排Area 0.022543840.36331540.07726113-0.107331940.2283902 HS Grad Frost Area #Popu1at1on-0.09848975-0.33215250.02254384 ##Income 0.619932320.22628220.36331544 #I11 iteracy-0.65718861 -0.6719470 0.07726113 ##Life Exp 0.582216200.2620680-0.10733194 Murder -0.48797102-0.5388834 0.22839021 #HS Grad 1.000000000.36677970.33354187 ##Frost 0.366779701.00000000.05922910 ##Area 0.333541870.05922911.00000000 #协方差 ?cov cov(state.x77) 特格 Population Income Illiteracy Life Exp Murder #Population 19931683.7588 571229.7796 292.8679592-4.078425e+02 5663.523714 #Income 571229.7796 377573.3061-163.70204082.8066320+02-521.894286 Illiteracy 292.8680 -163.7020 0.3715306-4.815122e-01 1.581776 ##Life Exp -407.8425 280.6632 -0.48151221.802020e+00 -3.869480
## Texas 262134 ## Utah 82096 ## Vermont 9267 ## Virginia 39780 ## Washington 66570 ## West Virginia 24070 ## Wisconsin 54464 ## Wyoming 97203 ?state.x77 cor(state.x77) ## Population Income Illiteracy Life Exp Murder ## Population 1.00000000 0.2082276 0.10762237 -0.06805195 0.3436428 ## Income 0.20822756 1.0000000 -0.43707519 0.34025534 -0.2300776 ## Illiteracy 0.10762237 -0.4370752 1.00000000 -0.58847793 0.7029752 ## Life Exp -0.06805195 0.3402553 -0.58847793 1.00000000 -0.7808458 ## Murder 0.34364275 -0.2300776 0.70297520 -0.78084575 1.0000000 ## HS Grad -0.09848975 0.6199323 -0.65718861 0.58221620 -0.4879710 ## Frost -0.33215245 0.2262822 -0.67194697 0.26206801 -0.5388834 ## Area 0.02254384 0.3633154 0.07726113 -0.10733194 0.2283902 ## HS Grad Frost Area ## Population -0.09848975 -0.3321525 0.02254384 ## Income 0.61993232 0.2262822 0.36331544 ## Illiteracy -0.65718861 -0.6719470 0.07726113 ## Life Exp 0.58221620 0.2620680 -0.10733194 ## Murder -0.48797102 -0.5388834 0.22839021 ## HS Grad 1.00000000 0.3667797 0.33354187 ## Frost 0.36677970 1.0000000 0.05922910 ## Area 0.33354187 0.0592291 1.00000000 # 协方差 ?cov cov(state.x77) ## Population Income Illiteracy Life Exp Murder ## Population 19931683.7588 571229.7796 292.8679592 -4.078425e+02 5663.523714 ## Income 571229.7796 377573.3061 -163.7020408 2.806632e+02 -521.894286 ## Illiteracy 292.8680 -163.7020 0.3715306 -4.815122e-01 1.581776 ## Life Exp -407.8425 280.6632 -0.4815122 1.802020e+00 -3.869480 8
##Murder 5663.5237 -521.8943 1.5817755-3.869480e+00 13.627465 ##HS Grad -3551.5096 3076.7690 -3.23546946.312685e+00 -14.549616 ##Frost -77081.9727 7227.6041 -21.2900000 1.828678e+01 -103.406000 ##Area 8587916.949419049013.75104018.3371429-1.229410e+0471940.429959 HS Grad Frost Area #Popu1at1on-3551.509551-77081.972658.587917e+06 #Income 3076.768980 7227.604081.904901e+07 #Illiteracy -3.235469 -21.290004.018337e+03 #Life Exp 6.312685 18.28678-1.229410e+04 ##Murder -14.549616 -103.40600 7.194043e+04 #HS Grad 65237894 153992162.2987328+05 ##Frost 153.992163 2702.00857 2.627039e+05 ##Area 229873.192816262703.893067.280748e+09 names(state.x77) ##NULL colnames(state.x77) ##[1]"Population""Income" "Illiteracy""Life Exp" "Murder" #[6】"HS Grad" "Frost" 'Area" x<state.x77[,c(1,2,3,6)] y<-state.x77[,c(4,5)] head(x) 母拼 Population Income Illiteracy HS Grad ##Alabama 3615 3624 2.1 41.3 ##Alaska 365 6315 1.5 66.7 ##Arizona 2212 4530 1.8 58.1 #Arkansas 2110 3378 1.9 39.9 ##California 21198 5114 1.1 62.6 #Colorado 2541 4884 0.7 63.9 head(y) Life Exp Murder ##Alabama 69.05 15.1 ##Alaska 69.31 11.3 #样Arizona 70.55 7.8 ##Arkansas 70.66 10.1 ##California 71.71 10.3 #Colorado 72.06 6.8
## Murder 5663.5237 -521.8943 1.5817755 -3.869480e+00 13.627465 ## HS Grad -3551.5096 3076.7690 -3.2354694 6.312685e+00 -14.549616 ## Frost -77081.9727 7227.6041 -21.2900000 1.828678e+01 -103.406000 ## Area 8587916.9494 19049013.7510 4018.3371429 -1.229410e+04 71940.429959 ## HS Grad Frost Area ## Population -3551.509551 -77081.97265 8.587917e+06 ## Income 3076.768980 7227.60408 1.904901e+07 ## Illiteracy -3.235469 -21.29000 4.018337e+03 ## Life Exp 6.312685 18.28678 -1.229410e+04 ## Murder -14.549616 -103.40600 7.194043e+04 ## HS Grad 65.237894 153.99216 2.298732e+05 ## Frost 153.992163 2702.00857 2.627039e+05 ## Area 229873.192816 262703.89306 7.280748e+09 names(state.x77) ## NULL colnames(state.x77) ## [1] "Population" "Income" "Illiteracy" "Life Exp" "Murder" ## [6] "HS Grad" "Frost" "Area" x <- state.x77[,c(1,2,3,6)] y <- state.x77[,c(4,5)] head(x) ## Population Income Illiteracy HS Grad ## Alabama 3615 3624 2.1 41.3 ## Alaska 365 6315 1.5 66.7 ## Arizona 2212 4530 1.8 58.1 ## Arkansas 2110 3378 1.9 39.9 ## California 21198 5114 1.1 62.6 ## Colorado 2541 4884 0.7 63.9 head(y) ## Life Exp Murder ## Alabama 69.05 15.1 ## Alaska 69.31 11.3 ## Arizona 70.55 7.8 ## Arkansas 70.66 10.1 ## California 71.71 10.3 ## Colorado 72.06 6.8 9
cor(x,y) Life Exp Murder ##Popu1at1on-0.068051950.3436428 ##Income 0.34025534-0.2300776 #I11 iteracy-0.58847793 0.702975 #HS Grad 0.58221620-0.4879710 相关性检验函数 cor(state.x77) 样进 Population Income Illiteracy Life Exp Murder #Population 1.000000000.2082276 0.10762237-0.068051950.3436428 #Income 0.208227561.0000000-0.437075190.34025534-0.2300776 ##Illiteracy 0.10762237-0.43707521.00000000-0.588477930.7029752 ##Life Exp -0.068051950.3402553-0.588477931.00000000-0.7808458 ##Murder 0.34364275-0.23007760.70297520-0.780845751.0000000 #HS Grad -0.09848975 0.619932 -0.65718861 0.58221620-0.4879710 #Frost -0.332152450.2262822-0.671946970.26206801-0.5388834 #Area 0.02254384 0.36331540.07726113-0.107331940.2283902 #样 HS Grad Frost Area #Popu1 ation-0.09848975-0.33215250.02254384 ##Income 0.619932320.22628220.36331544 ##Illiteracy -0.65718861-0.67194700.07726113 ##Life Exp 0.582216200.2620680-0.10733194 ##Murder -0.48797102-0.53888340.22839021 ##HS Grad 1.00000000 0.3667797 0.33354187 ##Frost 0.366779701.00000000.05922910 Area 0.333541870.05922911.00000000 7cor.test *alternative是用来选择指定单侧检脸还是双侧检验 cor.test(state.x77[,3],state.x77[,5]) #Pearson's product-moment correlation 10
cor(x,y) ## Life Exp Murder ## Population -0.06805195 0.3436428 ## Income 0.34025534 -0.2300776 ## Illiteracy -0.58847793 0.7029752 ## HS Grad 0.58221620 -0.4879710 相关性检验函数 cor(state.x77) ## Population Income Illiteracy Life Exp Murder ## Population 1.00000000 0.2082276 0.10762237 -0.06805195 0.3436428 ## Income 0.20822756 1.0000000 -0.43707519 0.34025534 -0.2300776 ## Illiteracy 0.10762237 -0.4370752 1.00000000 -0.58847793 0.7029752 ## Life Exp -0.06805195 0.3402553 -0.58847793 1.00000000 -0.7808458 ## Murder 0.34364275 -0.2300776 0.70297520 -0.78084575 1.0000000 ## HS Grad -0.09848975 0.6199323 -0.65718861 0.58221620 -0.4879710 ## Frost -0.33215245 0.2262822 -0.67194697 0.26206801 -0.5388834 ## Area 0.02254384 0.3633154 0.07726113 -0.10733194 0.2283902 ## HS Grad Frost Area ## Population -0.09848975 -0.3321525 0.02254384 ## Income 0.61993232 0.2262822 0.36331544 ## Illiteracy -0.65718861 -0.6719470 0.07726113 ## Life Exp 0.58221620 0.2620680 -0.10733194 ## Murder -0.48797102 -0.5388834 0.22839021 ## HS Grad 1.00000000 0.3667797 0.33354187 ## Frost 0.36677970 1.0000000 0.05922910 ## Area 0.33354187 0.0592291 1.00000000 ?cor.test #alternative 是用来选择指定单侧检验还是双侧检验 # two.sided\greater\less cor.test(state.x77[,3],state.x77[,5]) ## ## Pearson's product-moment correlation ## 10