Demo 1 R basics

# Demo 1 install R, R studio, and NodeXL

######################## R basics

# get work directory
getwd()
## [1] "E:/Github/demo1_install_softwares"
setwd("E:/github/ergm/")  # modify here to set your work directory


1 + 3  # evaluation
## [1] 4
a = 3  # assignment
a  # evaluation
## [1] 3
a = 3  # spacing does not matter
sqrt(a)  # use the square root function
## [1] 1.732
b = sqrt(a)
b  # use function and save result
## [1] 1.732


help(sqrt)  # get specific help for a function
## starting httpd help server ... done
`?`(sqrt  # get specific help for a function
)

a == b  # Using two equals sign to judge whether a is equivalent to b. 
## [1] FALSE
a != b  # is a not equal to b?
## [1] TRUE

############################## Vectors and matrices in R

# create a vector by combining values
a = c(1, 3, 5)
a
## [1] 1 3 5
a[2]  # select the second element
## [1] 3
b = c("one", "three", "five")
b  # also works with strings
## [1] "one"   "three" "five"
b[2]
## [1] "three"
a = c(a, a)
a  # can apply recursively
## [1] 1 3 5 1 3 5
a = c(a, b)
a  # mixing types---what happens?
## [1] "1"     "3"     "5"     "1"     "3"     "5"     "one"   "three" "five"

## Sequences and replication
a = seq(from = 1, to = 5, by = 1)  # from 1 to 5 the slow way
b = 1:5  # a shortcut!
a == b  # all TRUE
## [1] TRUE TRUE TRUE TRUE TRUE
rep(1, times = 5)  # a lot of ones
## [1] 1 1 1 1 1
rep(1:5, times = 2)  # repeat an entire sequence
##  [1] 1 2 3 4 5 1 2 3 4 5
rep(1:5, each = 2)  # same, but element-wise
##  [1] 1 1 2 2 3 3 4 4 5 5
rep(1:5, times = 5:1)  # can vary the count of each element
##  [1] 1 1 1 1 1 2 2 2 2 3 3 3 4 4 5

## matrices
a = matrix(data = 1:25, nrow = 5, ncol = 5)
a  # create a matrix the 'formal' way
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    6   11   16   21
## [2,]    2    7   12   17   22
## [3,]    3    8   13   18   23
## [4,]    4    9   14   19   24
## [5,]    5   10   15   20   25
a[1, 2]  # select a matrix element (two dimensions)
## [1] 6
a[1, ]  # just the first row
## [1]  1  6 11 16 21
a[, 2]  # can also perform for columns
## [1]  6  7  8  9 10
a[2:3, 3:5]  # select submatrices
##      [,1] [,2] [,3]
## [1,]   12   17   22
## [2,]   13   18   23
a[-1, ]  # get rid of row one
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    2    7   12   17   22
## [2,]    3    8   13   18   23
## [3,]    4    9   14   19   24
## [4,]    5   10   15   20   25

# create matrices by combining rows or columns
b = cbind(1:5, 1:5)
b
##      [,1] [,2]
## [1,]    1    1
## [2,]    2    2
## [3,]    3    3
## [4,]    4    4
## [5,]    5    5
d = rbind(1:5, 1:5)
d  # can perform with rows, too
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    2    3    4    5
## [2,]    1    2    3    4    5

dim(b)  # dimensions = number of rows * number of columns
## [1] 5 2
nrow(b)  # the number of rows
## [1] 5
ncol(b)  # the number of columns
## [1] 2

# Element-wise operations
a = 1:5
a + 1  # addition
## [1] 2 3 4 5 6
a * 2  # multiplication
## [1]  2  4  6  8 10
a/3  # division
## [1] 0.3333 0.6667 1.0000 1.3333 1.6667
a - 4  # subtraction
## [1] -3 -2 -1  0  1
a^5  # the 5th power
## [1]    1   32  243 1024 3125
a + a  # also works on pairs of vectors
## [1]  2  4  6  8 10
a * a
## [1]  1  4  9 16 25
log(a)
## [1] 0.0000 0.6931 1.0986 1.3863 1.6094
exp(b)
##         [,1]    [,2]
## [1,]   2.718   2.718
## [2,]   7.389   7.389
## [3,]  20.086  20.086
## [4,]  54.598  54.598
## [5,] 148.413 148.413

################# Data frames

d = data.frame(income = 1:5, health = c(T, T, T, T, F), name = LETTERS[1:5])
d
##   income health name
## 1      1   TRUE    A
## 2      2   TRUE    B
## 3      3   TRUE    C
## 4      4   TRUE    D
## 5      5  FALSE    E
d[1, 2]  # acts a lot like a matrix!
## [1] TRUE
d[, 1] * 5
## [1]  5 10 15 20 25
d[-1, ]
##   income health name
## 2      2   TRUE    B
## 3      3   TRUE    C
## 4      4   TRUE    D
## 5      5  FALSE    E
d$health  # can use dollar sign notation 
## [1]  TRUE  TRUE  TRUE  TRUE FALSE
d$health[3] = FALSE  # making changes
d
##   income health name
## 1      1   TRUE    A
## 2      2   TRUE    B
## 3      3  FALSE    C
## 4      4   TRUE    D
## 5      5  FALSE    E
d[2, 3]  # shows factors for string values
## [1] B
## Levels: A B C D E
d$name = LETTERS[1:5]  # eliminate evil factors by overwriting
d[2, 3]
## [1] "B"

# avoid to use factors (if you want)
d = data.frame(income = 1:5, health = c(T, T, T, T, F), name = LETTERS[1:5], 
    stringsAsFactors = FALSE)
d
##   income health name
## 1      1   TRUE    A
## 2      2   TRUE    B
## 3      3   TRUE    C
## 4      4   TRUE    D
## 5      5  FALSE    E
d = as.data.frame(cbind(1:5, 2:6))  # can create from matrices
d
##   V1 V2
## 1  1  2
## 2  2  3
## 3  3  4
## 4  4  5
## 5  5  6
is.data.frame(d)  # how can we tell it's not a matrix?
## [1] TRUE
is.matrix(d)  # the truth comes out
## [1] FALSE


# Finding built-in data sets Many packages have built-in data for testing
# and educational purposes

data()  # lists them all
`?`(USArrests  # get help on a data set
)
data(USArrests)  # load the data set
USArrests  # view the object
##                Murder Assault UrbanPop Rape
## Alabama          13.2     236       58 21.2
## Alaska           10.0     263       48 44.5
## Arizona           8.1     294       80 31.0
## Arkansas          8.8     190       50 19.5
## California        9.0     276       91 40.6
## Colorado          7.9     204       78 38.7
## Connecticut       3.3     110       77 11.1
## Delaware          5.9     238       72 15.8
## Florida          15.4     335       80 31.9
## Georgia          17.4     211       60 25.8
## Hawaii            5.3      46       83 20.2
## Idaho             2.6     120       54 14.2
## Illinois         10.4     249       83 24.0
## Indiana           7.2     113       65 21.0
## Iowa              2.2      56       57 11.3
## Kansas            6.0     115       66 18.0
## Kentucky          9.7     109       52 16.3
## Louisiana        15.4     249       66 22.2
## Maine             2.1      83       51  7.8
## Maryland         11.3     300       67 27.8
## Massachusetts     4.4     149       85 16.3
## Michigan         12.1     255       74 35.1
## Minnesota         2.7      72       66 14.9
## Mississippi      16.1     259       44 17.1
## Missouri          9.0     178       70 28.2
## Montana           6.0     109       53 16.4
## Nebraska          4.3     102       62 16.5
## Nevada           12.2     252       81 46.0
## New Hampshire     2.1      57       56  9.5
## New Jersey        7.4     159       89 18.8
## New Mexico       11.4     285       70 32.1
## New York         11.1     254       86 26.1
## North Carolina   13.0     337       45 16.1
## North Dakota      0.8      45       44  7.3
## Ohio              7.3     120       75 21.4
## Oklahoma          6.6     151       68 20.0
## Oregon            4.9     159       67 29.3
## Pennsylvania      6.3     106       72 14.9
## Rhode Island      3.4     174       87  8.3
## South Carolina   14.4     279       48 22.5
## South Dakota      3.8      86       45 12.8
## Tennessee        13.2     188       59 26.9
## Texas            12.7     201       80 25.5
## Utah              3.2     120       80 22.9
## Vermont           2.2      48       32 11.2
## Virginia          8.5     156       63 20.7
## Washington        4.0     145       73 26.2
## West Virginia     5.7      81       39  9.3
## Wisconsin         2.6      53       66 10.8
## Wyoming           6.8     161       60 15.6

############################# Elementary visualization

## R's workhorse is the ``plot' command

plot(USArrests$Murder, USArrests$UrbanPop)  # using dollar sign notation

plot of chunk unnamed-chunk-1

plot(USArrests$Murder, USArrests$UrbanPop, log = "xy")  # log-log scale

plot of chunk unnamed-chunk-1


## Adding plot title and axis labels

plot(USArrests$Murder, USArrests$Assault, xlab = "Murder", ylab = "Assault", 
    main = "USArrests")

plot of chunk unnamed-chunk-1


## Can also add text
plot(USArrests$Murder, USArrests$Assault, xlab = "Murder", ylab = "Assault", 
    main = "USArrests", type = "n")
text(USArrests$Murder, USArrests$Assault, rownames(USArrests))

plot of chunk unnamed-chunk-1


## Histograms and boxplots
hist(USArrests$Murder)

plot of chunk unnamed-chunk-1

boxplot(USArrests)

plot of chunk unnamed-chunk-1