\documentclass[11pt,onecolumn]{article}
\usepackage{amssymb, amsmath, amsthm,graphicx, paralist,algpseudocode,algorithm,cancel,url,color}
\usepackage{sectsty}
\usepackage{fancyvrb}
\usepackage{mathrsfs}
\usepackage{multirow}
\usepackage{hhline}
\usepackage{booktabs}
\usepackage[table]{xcolor}
\usepackage{tikz}
% \usepackage[framed,numbered,autolinebreaks,useliterate]{mcode}
\usepackage{listings}
\usepackage{enumitem}
\newcommand{\bvec}[1]{\mathbf{#1}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\calO}{\mathcal{O}}
\DeclareMathOperator{\Tr}{Tr}
\DeclareMathOperator{\trace}{trace}
\DeclareMathOperator{\diag}{diag}
\sectionfont{\Large\sc}
\subsectionfont{\sc}
\usepackage[margin=1 in]{geometry}
\begin{document}
\noindent
\textsc{\Large CS 4220 / MATH 4260: Homework 3}\\
Instructor: Anil Damle\\
Due: March 28, 2019\\
\subsection*{Policies}
You may discuss the homework problems freely with other students, but please
refrain from looking at their code or writeups (or sharing your own).
Ultimately, you must implement your own code and write up your own solution to
be turned in. Your solution, including plots and requested output from your code
should be typeset and submitted via the CMS as a pdf file. Additionally, please
submit any code written for the assignment via the CMS as well. This can be done
by either including it in your solution as an appendix, or uploading it as a zip
file via the CMS.\\
\hrule
\subsection*{Question 1:}
Assume that we are given $A\in\mathbb{R}^{n\times n},$ $A=A^T,$ and $A$ has eigenvalue and vector pairs $\left\{(v_i,\lambda_i)\right\}_{i=1}^n.$ Furthermore, assume that $\lvert\lambda_1\rvert = \lvert\lambda_2\rvert > \lvert \lambda_3\rvert \geq \lvert\lambda_4\rvert \geq \cdots.$
\begin{enumerate}[label=(\alph*)]
\item Prove that for any initial guess $v^{(0)}$ such that $v^{(0)}$ is not simultaneously orthogonal to both $v_1$ and $v_2$ the power method yields iterates $v^{(k)}$ that converge to lie in the span of $v_1$ and $v_2.$
\item What is the rate of convergence of $$\left(1-\left\|\left(v^{(k)}\right)^T\begin{bmatrix}v_1 & v_2\end{bmatrix}\right\|^2_2\right)^{1/2}.$$ Note that if you have already solved the problem using $$1-\left\|\left(v^{(k)}\right)^T\begin{bmatrix}v_1 & v_2\end{bmatrix}\right\|_2$$ it is fine to simply turn in that solution (either is fine).
\item Does the associated eigenvalue estimate via the Rayleigh quotient necessarily converge in this setting? what about if $\lambda_1 = \lambda_2?$
\end{enumerate}
\subsection*{Question 2:}
What does the $QR$ algorithm without shifts applied to the matrix
\[
\begin{bmatrix}0 & 1 \\ 1 & 0 \end{bmatrix}
\]
converge to? Does using a shift of $A^{(k-1)}_{n,n}$ change this behavior? (Algorithm~\ref{alg:qr} gives the shifted QR algorithm.)
\subsection*{Question 3:}
For the QR algorithm with an arbitrary shift and initial tri-diagonalization of a symmetric matrix $A$ (see Algorithm~\ref{alg:qr}) prove that $A^{(k)}$ is similar to $A.$
\begin{algorithm}
\caption{QR algorithm with tri-diagonalization and shifts}
\label{alg:qr}
\begin{algorithmic}[1]
\State {\bf input:} $A\in\mathbb{R}^{n\times n}$ symmetric
\State Compute $Q^{(0)}$ such that $A^{(0)} = \left(Q^{(0)}\right)^TAQ^{(0)}$ is tri-diagonal
\For{$k=1,2,3,\ldots$}
\State Pick a shift $\mu^{(k)}$
\State Compute the QR factorization $A^{(k-1)} - \mu^{(k)}I = Q^{(k)}R^{(k)}$
\State $A^{(k)} = R^{(k)}Q^{(k)} + \mu^{(k)}I$
\EndFor
\end{algorithmic}
\end{algorithm}
\subsection*{question 4:}
Implement Rayleigh iteration and shifted inverse iteration (you may use built in routines to solve the requisite linear systems). In doing so you will have to think about and use a reasonable convergence criteria given the remainder of this problem. Describe the criteria you chose to implement.
Now, generate a dense symmetric matrix $A \in \mathbb{R}^{500 \times 500}$ where each eigenvalue is drawn independently from the uniform distribution on $[0,1]$ and whose eigenvectors are an orthogonalized set of normalized Gaussian random vectors of length 500. (This can be computed via the Q factor from a QR factorization of a $500 \times 500$ Gaussian random matrix.)
\begin{enumerate}[label=(\alph*)]
\item Now, for 500 random vectors ($\mathcal{N}(0,1)$ entries) that have been normalized, run Rayleigh iteration and plot a histogram of the eigenvalues you converge to. What do you observe? Can you reason about why you might be observing what you do?
\item Now, repeat the same experiment as in the previous part, but generate the initial guess for Rayleigh iteration by running one step of shifted inverse iteration with a shift of $\mu = 1$ on a random vector. Plot a histogram of your 500 computed eigenvalues, what do you observe? Can you reason about why you might be observing what you do?
\item Now, we will consider a single run of Rayleigh iteration and shifted inverse iteration. In particular, for some random initial guess run Rayleigh iteration and shifted inverse iteration with a shift of your choice (not an eigenvalue). After you have converged to an eigenvalue $\lambda^*$ (remember you know what the eigenvalues are in this case and can therefore determine the eigenvalue you are closest too once converged) consider $\lvert \lambda^{k}-\lambda^*\rvert.$ In each case plot that error (on a log scale) vs iteration. Does what you observe align with our understanding of the convergence properties of these algorithms.
\end{enumerate}
\subsection*{Question 5:}
Suppose we are using a splitting method with $A=M-N$ to solve $Ax=b$ via the iteration $Mx^{k+1} = Nx^{k}+b$ with some arbitrary initial guess $x^(0).$ Prove that the iteration does not necessarily converge if $\rho(M^{1}N) \geq 1.$ You may assume that the iteration matrix $M^{-1}N$ is diagonalizable.
\subsection*{Question 6 (A more challenging, ungraded problem):}
Let $A$ be a $n\times n$ matrix that is not diagonalizable, and whose eigenvalue of largest magnitude, denoted $\lambda_1,$ is associated with a Jordan block of size two. You may assume the rest of the eigenvalues ($\lambda_2,\ldots,\lambda_{n-1}$) are simple. This means that there exists a matrix $X$ such that
\[
X^{-1}AX = \begin{bmatrix}\lambda_1 & 1 & \\ & \lambda_1 & \\ & & \Lambda \end{bmatrix}
\]
where $\Lambda$ is a diagonal matrix and $\|\Lambda\|_2<\lvert \lambda_1\rvert.$ Given essentially any initial guess, what, if anything, does the power method applied to $A$ converge to? If it does converge, at what rate does it do so?
\end{document}