library(MASS)
library(tidyverse)
library(modelr)
options(repr.matrix.max.rows=20)
means = c(0,0)
cov_m = frame_matrix(
~x, ~y,
2, 1,
1, 1
)
cov_m
data_mat = mvrnorm(10000, means, cov_m)
data = as_data_frame(data_mat)
head(data)
dim(data)
ggplot(data) +
aes(x=V1, y=V2) +
geom_point(alpha=0.3) +
geom_rug() +
geom_smooth(method="lm")
line = lm(V2 ~ V1, data)
summary(line)
preds = data %>%
add_predictions(line) %>%
add_residuals(line)
head(preds)
ggplot(preds) +
aes(sample=resid) +
geom_qq()
ggplot(preds) +
aes(x=pred, y=resid) +
geom_point()
decomp = svd(data_mat)
decomp$d
decomp$v
The columns of V give the direction of our primary and secondary lines, as vectors; we divide to convert to slopes.
slopes = decomp$v[2,] / decomp$v[1,]
slopes
ggplot(data) +
aes(x=V1, y=V2) +
geom_point(alpha=0.3) +
geom_rug() +
geom_abline(slope=slopes[1], color='red') +
geom_abline(slope=slopes[2], color="blue")
Et voilá! We have our main axes for this data.