1 + 2
## [1] 3
x = c(1, 2, 3)
1:3 # sequence of integers from 1 to 3
## [1] 1 2 3
x + c(4, 5, 6) # vectorized
## [1] 5 7 9
x + 4 # recycling
## [1] 5 6 7
Vectors
numeric()
, character()
, logical()
, integer()
, complex()
, …NA
: ‘not available’factor()
: values from restricted set of ‘levels’.Operations
==
, <
, <=
, >
, >=
, …|
(or), &
(and), !
(not)[
, e.g., x[c(2, 3)]
[<-
, e.g., x[c(1, 3)] = x[c(1, 3)]
is.na()
Functions
x = rnorm(100)
y = x + rnorm(100)
plot(x, y)
data.frame
df <- data.frame(Independent = x, Dependent = y)
head(df)
## Independent Dependent
## 1 0.1709466 -1.9343537
## 2 -1.0432251 -1.4366633
## 3 0.1874114 -0.6666756
## 4 -1.0410644 -1.9896486
## 5 0.7765771 1.9752691
## 6 -1.4374832 -1.6424475
df[1:5, 1:2]
## Independent Dependent
## 1 0.1709466 -1.9343537
## 2 -1.0432251 -1.4366633
## 3 0.1874114 -0.6666756
## 4 -1.0410644 -1.9896486
## 5 0.7765771 1.9752691
df[1:5, ]
## Independent Dependent
## 1 0.1709466 -1.9343537
## 2 -1.0432251 -1.4366633
## 3 0.1874114 -0.6666756
## 4 -1.0410644 -1.9896486
## 5 0.7765771 1.9752691
plot(Dependent ~ Independent, df) # 'formula' interface
df[, 1]
, df[, "Indep"]
, df[[1]]
, df[["Indep"]]
, df$Indep
Exercise: plot only values with Dependent > 0
, Independent > 0
Select rows
ridx <- (df$Dependent > 0) & (df$Independent > 0)
Plot subset
plot(Dependent ~ Independent, df[ridx, ])
Skin the cat another way
plot(
Dependent ~ Independent, df,
subset = (Dependent > 0) & (Independent > 0)
)
fit <- lm(Dependent ~ Independent, df) # linear model -- regression
anova(fit) # summary table
## Analysis of Variance Table
##
## Response: Dependent
## Df Sum Sq Mean Sq F value Pr(>F)
## Independent 1 118.609 118.609 118.03 < 2.2e-16 ***
## Residuals 98 98.483 1.005
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(Dependent ~ Independent, df)
abline(fit)
lm()
: plain-old functionfit
: an object of class “lm”anova()
: a generic with a specific method for class “lm”class(fit)
## [1] "lm"
methods(class="lm")
## [1] add1 alias anova case.names
## [5] coerce confint cooks.distance deviance
## [9] dfbeta dfbetas drop1 dummy.coef
## [13] effects extractAIC family formula
## [17] hatvalues influence initialize kappa
## [21] labels logLik model.frame model.matrix
## [25] nobs plot predict print
## [29] proj qr residuals rstandard
## [33] rstudent show simulate slotsFromS3
## [37] summary variable.names vcov
## see '?methods' for accessing help and source code
?"plot" # plain-old-function or generic
?"plot.formula" # method
library(ggplot2)
ggplot(df, aes(x = Independent, y = Dependent)) +
geom_point() + geom_smooth(method = "lm")
library(ggplot2)
, once per session)