# UCLA's canonical correlation analysis
# http://www.ats.ucla.edu/stat/r/dae/canonical.htm
#
require(ggplot2)
require(GGally)
require(CCA)
#mm <- read.csv("http://www.ats.ucla.edu/stat/data/mmreg.csv")
mm <- read.csv("mmreg.csv")
colnames(mm) <- c("Control", "Concept", "Motivation", "Read", "Write", "Math", "Science", "Sex")
summary(mm)
# our split of the variables
psych <- mm[, 1:3]
acad <- mm[, 4:8]
# get an idea of what the data looks like
#print(summary(mm))
#ggpairs(mm)
readline('data matrix mm is loaded (....Pause....)')
#@ hit ^C here for first part of example
# gives us the canonical correlations and the canonical coefficients
cc1 <- cc(psych, acad)
# display the canonical correlations
print('The canonical correlations')
print(cc1$cor)
readline('(....Pause....)')
# raw canonical coefficients
print('The canonical coefficients (but remember the numbers had different ranges)')
print(cc1[3:4])
readline('(....Pause....)')
#@ should prove that the variance of the new vectors is 1.
# compute canonical loadings
print('Interpreting what the linear combinations mean:')
print('how much is each variable correlated with the combinations?')
cc2 <- comput(psych, acad, cc1)
# display canonical loadings
print(cc2[3:6])
#"When the variables in the model have very different standard deviations, the
#standardized coefficients allow for easier comparisons among the variables."
# standardized psych canonical coefficients, using diagonal matrix of psych sd's
s1 <- diag(sqrt(diag(cov(psych))))
s1 %*% cc1$xcoef
# standardized acad canonical coefficients, using diagonal matrix of acad sd's
s2 <- diag(sqrt(diag(cov(acad))))
s2 %*% cc1$ycoef