\documentclass[11pt,onecolumn]{article}
\usepackage{amssymb, amsmath, amsthm,graphicx, paralist,algpseudocode,algorithm,cancel,url,color}
\usepackage{sectsty}
\usepackage{fancyvrb}
\usepackage{mathrsfs}
\usepackage{multirow}
\usepackage{hhline}
\usepackage{booktabs}
\usepackage[table]{xcolor}
\usepackage{tikz}
% \usepackage[framed,numbered,autolinebreaks,useliterate]{mcode}
\usepackage{listings}
\usepackage{enumitem}

\newcommand{\bvec}[1]{\mathbf{#1}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\C}{\mathbb{C}}
\newcommand{\Rnn}{\R^{n\times n}}
\newcommand{\Rmn}{\R^{m\times n}}
\newcommand{\Cnn}{\C^{n\times n}}
\newcommand{\Cmn}{\C^{m\times n}}
\newcommand{\cO}{\mathcal{O}}
\DeclareMathOperator{\Tr}{Tr}
\DeclareMathOperator{\trace}{trace}
\DeclareMathOperator{\diag}{diag}
\sectionfont{\Large\sc}
\subsectionfont{\sc}
\usepackage[margin=1 in]{geometry}
\begin{document}
\noindent
\textsc{\Large CS 6210: Homework 4}\\
Instructor: Anil Damle\\
Due: November 1, 2018\\

\subsection*{Policies}
You may discuss the homework problems freely with other students, but please
refrain from looking at their code or writeups (or sharing your own).
Ultimately, you must implement your own code and write up your own solution to
be turned in. Your solution, including supporting plots and requested output from your code
must be typeset and submitted via the CMS as a single pdf file. Additionally, please
submit any code written for the assignment via the CMS as well. This can be done
by either including it in your solution as an appendix, or uploading it as a zip
file.\\
\hrule
\vspace{1cm}
\noindent

\subsection*{Question 1:}
Let us assume that we are using a stationary iterative method to solve $Ax=b$ with the splitting $A=M-N$ and initial guess $x^{(0)}.$ Furthermore, assume that $x^{(1)},\ldots,x^{(k)}$ have been computed using the iteration ${Mx^{(j+1)} = Nx^{(j)} + b}$. Normally we would consider $x^{(k)}$ as our current approximation of the solution. However, maybe there is some process that allows us to accelerate the convergence of our method.

Now, we are going to consider one specific way of accomplishing our goal. Specifically, we would like to construct coefficients $\left\{v_j^{(k)}\right\}_{j=1}^{k}$ for each iteration $k$ such that $$y^{(k)} = \sum_{j=0}^kv_j^{(k)}x^{(j)}$$ gives a better approximation to our true solution, denoted $x.$ Let us define $G = M^{-1}N$ and
$$p_k(z) = \sum_{j=0}^kv_j^{(k)}z^j$$

\begin{enumerate}
\item Let us further assume that $p_k(1) = 1$. Prove that
$$y^{(k)}-x = p_k\left(G\right)e^{(0)}$$
\item Prove that if $B$ is similar to a Hermitian matrix then $$\rho(p_k(B)) = \max_{\lambda_i\in \lambda(B)} \vert p_k(\lambda_i) \vert$$
where $\rho(p_k(B))$ is the spectral radius of $p_k(B).$
\end{enumerate}

We now assume that the iteration matrix $G$ is similar to a Hermitian matrix and has real eigenvalues $\lambda_1,\ldots,\lambda_n$. Recall that for convergence we must have
$$-1 < \lambda_n \leq \lambda_1 < 1$$
Moreover let $\alpha$ and $\beta$ be such that
$$-1 < \alpha \leq \lambda_n \leq \dots \leq \lambda_1 \leq \beta < 1.$$

\begin{enumerate}
\setcounter{enumi}{2}
\item Since we may write $\|y^{(k)} - x\| = \|p_k(G)e^{(0)}\|,$ what choice of $p_k$ would give us zero error? Is such a choice feasible?
\end{enumerate}

Since we may write $$ \max_{\lambda_i\in \lambda(B)} \vert p_k(\lambda_i) \vert \leq \max_{\alpha \leq \lambda \leq \beta}\vert p_k(\lambda) \vert,$$ it seems reasonable to pick a $p_k$ that is small on the interval $[\alpha,\beta].$ The ideal choice, given the constraint on $p_k(1),$ is a scaled and shifted version of the $k^\text{th}$ Chebyshev polynomial. These polynomials may be defined by the recursion
${c_j(z) = 2zc_{j-1}(z)-c_{j-2}(z)}$ where $c_0(z) = 1$ and $c_1(z) = z.$ Alternatively, we may write $c_j(z)  = \cos{(j\theta)}$ where $\theta = \arccos{(z)}.$ Specifically, we may choose our polynomial to be
$$p_k(z) = \frac{c_k\left(-1+2\frac{z-\alpha}{\beta-\alpha}\right)}{c_k(\mu)},$$
where $\mu = 1+2\frac{1-\beta}{\beta-\alpha}.$ You can verify that $p_k(1) = 1$. Moreover, $c_k(z)$ has the property that it is bounded between $-1$ and 1 in the interval $[-1,1]$, but then grows rapidly outside of this interval. So for example, in the formula above, $c_k(\mu)$ becomes large as $k \to \infty$. With the chosen scaling we ensure that $p(z)$ is small in the interval $[\alpha,\beta]$ while satisfying $p(1)=1$.

\begin{enumerate}
\setcounter{enumi}{3}
\item Given the above choice for $p_k(z)$ prove that there exists a constant $C$ such that $$\|y^{(k)}-x\|_2 \leq C\left(\frac{1}{c_k(\mu)}\right)\|x-x^{(0)}\|_2,$$ where $C$ may depend on the matrix $G.$
\item Let $\alpha = -0.9$ and $\beta = 0.9.$ Plot $c_k(\mu),$ on a logarithmic scale, for $k = 0,1,\ldots,100.$
\end{enumerate}

We will now consider using this acceleration method in conjunction with the Jacobi iteration. For the remainder of this problem assume that $A$ is a real symmetric matrix that is strictly diagonally dominant and has positive diagonal entries.

\begin{enumerate}
\setcounter{enumi}{5}
\item Under the aforementioned assumptions, prove that the iteration matrix associated with $A$ is similar to a Hermitian matrix.
\item Implement the Jacobi method both with and without Chebyshev acceleration. You can find pseudo-code in Golub and Van Loan 4th edition, section 11.2.8 (3rd edition, section 10.1.5) that leverages the three-term recurrence for Chebyshev polynomials for an efficient implementation.

The file \verb+probC.mat+ contains a matrix $A,$ vectors $x^{(0)}$ and $b$ and eigenvalue bounds $\alpha$ and $\beta.$ Use your algorithm both with and without the acceleration to solve $Ax=b.$ You may stop your algorithm when the 2 norm of the residual is less than $10^{-6}$ or you have run $1000$ iterations. Provide error plots, on a logarithmic scale, of the 2 norm of the residual vs iteration both with and without the acceleration. Comment on your observations.
\end{enumerate}
\newpage
\subsection*{Question 2:}
Here we will work out a proof we omitted in class and further characterize the behavior of $A^k$ in terms of its spectral radius. 
\begin{itemize}
\item Prove that if $\rho(A)<1$ then there exists a sub-multiplicative matrix norm $\|\cdot\|$ such that $\|A\|<1.$ Hint: start with the Schur decomposition and note that for any non-singular matrix $S$ and norm $\|\cdot\|,$ $\|A\|_S = \|S^{-1}AS\|$ is also a norm.
\item Given $A\in\Cnn,$ a matrix norm $\|\cdot\|,$ and some $\epsilon > 0$ prove that there exist constants $\alpha$ (depending on the norm) and $\beta_{A,\epsilon}$ (depending on $A,$ the norm, and $\epsilon$) such that
\[
\alpha \rho(A)^k \leq \|A^k\| \leq \beta_{A,\epsilon}(\rho(A)+\epsilon)^k.
\] 
\end{itemize}

\subsection*{Question 3:}
We are now going to prove some interesting properties of the Lanczos process and Krylov subspaces as they relate to solving linear systems.
\begin{itemize}
\item Given a symmetric positive definite matrix $A$ and vector $b,$ prove that if the Lanczos process breaks down at some point (\emph{i.e.} $\beta_k = 0$ using the notation from class and Trefethen and Bau) then the subspace $\mathcal{K}_k(A,b)$ contains a solution to the linear system $Ax=b.$
\item Given a symmetric positive definite matrix $A$ with at most $p$ distinct eigenvalues and a vector $b$ show that a solution to $Ax=b$ exists in $\mathcal{K}_k(A,b)$ for some $k\leq p.$ In other words, we certainly have a solution in the p$^{th}$ Krylov subspace, though we may find one sooner in some special circumstances.
\end{itemize}

\end{document}