Chapter 7 Problems

Chapter 7: Problem Solutions

P7.1.1	P7.2.1	P7.3.1	P7.4.1
P7.1.2	P7.2.2	P7.3.2	P7.4.2
P7.1.3	P7.2.3	P7.3.3	P7.4.3
P7.1.4	P7.2.4	P7.3.4
P7.1.5	P7.2.5	P7.3.5
	P7.2.6	P7.3.6

P7.1.1

% Problem P7_1_1
%
% Examine minimization in different norms.

close all
n = 100;
b = sort(rand(3,1));
b = b(3:-1:1);
A = [1;1;1];
x = linspace(-1,1,n);
v1   = zeros(n,1);
v2   = zeros(n,1);
vinf = zeros(n,1);
for k=1:n
   v1(k)   = norm(A*x(k)-b,1);
   v2(k)   = norm(A*x(k)-b,2);
   vinf(k) = norm(A*x(k)-b,'inf');
end
subplot(2,2,1)
plot(x,v1,[b(2),b(2)],[0,6])
title(sprintf('1-norm: min at %5.3f',b(2)))
subplot(2,2,2)
plot(x,v2,[sum(b)/3, sum(b)/3],[0,6])
title(sprintf('2-norm: min at %5.3f',sum(b)/3))
subplot(2,2,3)
plot(x,vinf,[(b(1)+b(3))/2,(b(1)+b(3))/2],[0,6])
title(sprintf('inf-norm: min at %5.3f',(b(1)+b(3))/2))

P7.1.2

% Problem P7_1_2
%
% Quadratic LS fits to the sine function.

close all
for m=[3 6 12 24]
   [a,b,c] = LSFit(0,pi/3,'tan',m);
   x = linspace(0,pi/3)';
   y = tan(x);
   qvals = (c*x + b).*x + a;
   figure
   plot(x,y,x,qvals)
   title(sprintf('m = %2.0f',m))
end


  function [a,b,c] = LSFit(L,R,fname,m)
% [a,b,c] = LSFit(L,R,fname,m)
%
% L,R satisfy L= 3.
%
% a,b,c are scalars so that if q(x) =ax^2 + bx +c, then
%      sum from k=1 to m of [q(x(k)) -f(x(k))]^2
%      is minimized where x = linspace(L.R,m).

x = linspace(L,R,m)';
z = [ones(m,1) x x.^2]\feval(fname,x);
a = z(1);
b = z(2);
c = z(3);

P7.1.3

Not Available

P7.1.4

Not Available

P7.1.5

% Problem P7_1_5
%
% Assume A (nxn) is given and b, c, and d are all given
% column n-vectors. We want to choose scalars alfa and
% beta so the solution to Ax = b + alfa*c + beta*d is
% as small as possible. Note that the solution is
%
%      x = u + alfa*v + beta*w = u + [v w]*[alfa;beta]
%
% where Au = b, Av = c, and Aw = d. 

% Solve for u, v, and w...

[L,U,P] = lu(A);
u = U\(L\(P*b));
v = U\(L\(P*c));
w = U\(L\(P*d));

% The act of minimizing norm(u + [v w]*[alfa;beta]) is the
% act of solving the LS problem min norm(Cz - r) where
% C is the n-by-2 matrix [v w], r = -u, and setting
% alfa = z(1) and beta = z(2).

z = -[v w]\u;
alfa = z(1);
beta = z(2);

P7.2.1

% Problem P7_2_1
%
% Solving Hessenberg systems via the QR factorization.

clc
n = 6;
A = triu(randn(n,n),-1)
b = randn(n,1)
x = HessQRSolve(A,b)
disp('A*x-b = ')
disp(A*x - b)


  function x = HessQRSolve(H,b)
% x = HessQRSolve(H,b)
%
% H is an n-by-n nonsingular upper Hessenberg. 
% b is a column n-vector.
%
% x  column n-vector so Hx = b.

[Q,R] = HessQRrot(H);  % See P7.2.4

x = UTriSol(R,Q'*b);   % Ax = QRx = b  means Rx = Q'b.

P7.2.2

% Script P7_2_2
%
% Symmetrizing a 2-by-2 matrix through rotation.
clc
A = randn(2,2)

% To make the (1,2) and (2,1) entries of [c s;-s c]'*A
% the same, we require cA(1,2) - sA(2,2) = sA(1,1)+c(2,1), i.e.,
%
%        -s(A(1,1)+A(2,2)) + c(A(1,2)-A(2,1)) = 0
%
% Note that this means that we want the bottom component
% of [c s;-s c][(A(1,1)+A(2,2); A(1,2)-A(2,1)] to be zero.
[c,s] = Rotate(A(1,1)+A(2,2),A(1,2)-A(2,1));
disp('[c s;-s c] =')
disp(' ')
disp([c s;-s c])
disp('[c s; -s c]''*A = ')
disp(' ')
disp([c s;-s c]'*A)

P7.2.3

% Problem P7_2_3
%
% Lower triangularizing a matrix using rotations

clc
n = 5;
A = randn(n,n)
[Q,L] = QLRot(A)
disp('Q''*Q - I = ')
disp(Q'*Q - eye(n,n))
disp('Q''*A - L =')
disp(Q'*A - L)

  function [Q,L] = QLrot(A)
% [Q,L] = QLrot(A)
%
% A is an n-by-n matrix. 
%
% Q is an n-by-n orthogonal matrix and
% L is an n-by-n lower triangular so A = QL.

[n,n] = size(A);
Q = eye(n,n);
for j=n:-1:2
   % Zero A(1:j-1,j)
   for i=1:j-1
      % Zero A(i,j)
      % Note that if [c s;-s c][z;-y] = [r;0], then
      % [c s;-s c][y z] = [0;r]. So we can use Rotate to
      % zero the top of a 2-vector.
      
      [c,s] = Rotate(A(i+1,j),-A(i,j));
      A(i:i+1,1:j) = [c s; -s c]*A(i:i+1,1:j);
      Q(:,i:i+1) = Q(:,i:i+1)*[c s; -s c]';
   end
end
L = tril(A);

P7.2.4

% Problem P7_2_4
%
% hessenberg QR factorization.

clc 
n = 6;
A = triu(randn(n,n),-1)
[Q,R] = HessQRrot(A)
disp('Q''*Q - I = ')
disp(Q'*Q - eye(n,n))
disp('Q''*A - R =')
disp(Q'*A - R)

  function [Q,R] = HessQRrot(A)
% [Q,R] = HessQRrot(A)
%
% A is an n-by-n upper Hessenberg matrix.
%
% Q is an n-by-n orthogonal matrix and 
% R is an n-by-n upper triangular such that A = QR.

[n,n] = size(A);
Q = eye(n,n);
for j=1:n-1
   %Zero A(j+1,j)
   [c,s] = Rotate(A(j,j),A(j+1,j));
   A(j:j+1,j:n) = [c s; -s c]*A(j:j+1,j:n);
   Q(:,j:j+1) = Q(:,j:j+1)*[c s; -s c]';
end
R = triu(A);

P7.2.5

% Problem P7_2_5
%
% Least squares fit of the square root function on [.25,1]

clc 
disp('   m             a                     b')
disp('-------------------------------------------------')
for m = [2:10 20 40 80 200]
   x = linspace(.25,1,m)';
   z = [ones(m,1) x]\sqrt(x);
   disp(sprintf('%4.0f   %20.16f  %20.16f',m,z(1),z(2)))
end

P7.2.6

% Problem P7_2_6
%
% Using a rotation to zero the top component of a 2-vector.

clc 
x = randn(2,1)
[c,s] = Rotate1(x(1),x(2));
disp('[c s;-s c]] =')
disp(' ')
disp([c s;-s c])
disp('[c s;-s c]*x =')
disp(' ')
disp([c s;-s c]*x)



  function [c,s] = Rotate1(x,y)
% [c,s] = Rotate1(x,y)
%
% x,y are real scalars
%
% c,s satisfy  c^2+s^=1 with the property that the top component 
% of the vector [c s;-s c][x;y] is zero.

[c,s] = Rotate(-y,x);

P7.3.1

% Problem P7_3_1
%
% Test BVPsolver

a = 0;
b = 1;
close all
for n = [10 100 1000]
   uvals = TwoPtBVP(n,a,b,'MyF731p','MyF731q','MyF731r');
   figure
   plot(linspace(a,b,n)',uvals)
   title(sprintf('n = %1.0f',n))
end

  function uvals = TwoPtBVP(n,a,b,pname,qname,rname)
% uvals = TwoPtBVP(n,a,b,pname,qname,rname)
%
% a,b are reals with a<b.
% n is an integer >= 3
% pname is a string that names a positive function defined on [a,b].
% qname is a string that names a positive function defined on [a,b].
% rname is a string that names a function defined on [a,b].
%
% uvals is a column n-vector with the property that uvals(i) approximates
%            the solution to the 2-point boundary value problem
%
%                -D[p(x)Du(x)] + q(x)u(x) = r(x)    a<=x<=b
%                 u(a) = u(b) = 0
%
% at x = a+(i-1)h where h = (b-a)/(n-1)

x = linspace(a,b,n)';
h = (b-a)/(n-1);
p = feval(pname,x(1:n-1)+h/2);
q = feval(qname,x);
r = feval(rname,x);
uvals = zeros(n,1);
e = zeros(n-2,1);
e(2:n-2) = -p(2:n-2);
d      =  p(1:n-2)+p(2:n-1)+(h^2)*q(2:n-1);
[d,e] = CholTrid(d,e);
rhs = (h^2)*r(2:n-1);
uvals(2:n-1) = CholTridSol(d,e,rhs);

P7.3.2

% Problem P7_3_2
%
% Recursive outer product Cholesky

clc 
n = 5;
Gexact = tril(rand(n,n))
A = Gexact*Gexact';
G = CholOuter1(A)

P7.3.3

% Problem P7_3_3
%
% pentadiagonal Cholesky
% Produce plot of timings:
% set up A and b

clc

time = [];
for n = 400:200:2000
   % Set up A as instructed:
   j = 1:n;
   d = 100 + j';
   e = 10 + j';
   f = j';
   % Don't actually form A
   % A = diag(d) + diag(e(2:n),1) + diag(e(2:n),-1) + ...
   %    diag(f(3:n),2) + diag(f(3:n),-2);
   % Right-hand side:
   b = 100*ones(n,1);
   % Record starting time.
   tic
   % Solve Ax = b
   % Factor A = G*G'
   % G = diag(g) + diag(h(2:n),-1) + diag(p(3:n),-2);
   [g,h,p] = Chol5(d,e,f);
   % Solve Gy = b for y
   y = LTriSol5(g,h,p,b);
   % Solve G'*x = y for x
   x = UTriSol5(g,h,p,y);
   % Compute elapsed time.
   dt = toc;
   % Add to table of timings.
   time = [time dt];
end
% Make plot of timings.

close all
plot(400:200:2000,time)
xlabel('n')
ylabel('time')
title('Time to solve n-by-n penta-diagonal system')

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Produce table of flops:
clc

disp('n         flops');
disp('------------------');
for n = [10 20 40 80 160]
   % Set up A as instructed:
   j = 1:n;
   d = 100 + j';
   e = 10 + j';
   f = j';
   % Set up right-hand side.
   b = 100*ones(n,1);
   % Reset number of flops.
   flops(0)
   % Factor A = G * G'
   [g,h,p] = Chol5(d,e,f);
   % Solve Gy = b for y
   y = LTriSol5(g,h,p,b);
   % Solve G'*x = y for x
   x = UTriSol5(g,h,p,y);
   % Print number of flops required.
   disp(sprintf('%4.0f  %8.0f', n, flops));
end

  function x = LTriSol5(g,h,p,b)
% x = LTriSol5(g,h,p,b)
%
% for recitation problem 7.3.3
% Pre:
%   g,h,p  as in Chol5.m
%   b      n-vector
%
% Post:
%   x      such that G*x = b,
%          where G = diag(g) + diag(h(2:n),-1) + diag(p(3:n),-2)
n = length(g);
x = zeros(n,1);
% initialize first 2 x values
x(1) = b(1)/g(1);
x(2) = (b(2) - x(1)*h(2)) / g(2);
% get rest of x values (this is similar to 2nd part of
% CholTriDiSol, p.241)
for i = 3:n
   x(i) = (b(i) - x(i-2)*p(i) - x(i-1)*h(i)) / g(i);
end

P7.3.4

Not Availabe

P7.3.5

% Problem 7_3_5
%
% We need to solve Ay = b for y and Az = u for z:

G = CholScalar(A);
y1 = LTriSol(G,b); y = UTriSol(G',y1);
z1 = LTriSol(G,u); z = UTriSol(G',z1);

% It follows that 

x = y - ((u'*y)/(1+u'*z))*z;

P7.3.6

Not Available.

P7.4.1

% Problem P7_4_1
%
% Illustrate a general block Cholesky

p = [ 2 4 3 1 5];
n = sum(p);
G0 = tril(rand(n,n));
A = G0*G0';
G = MyCholBlock(A,p);
clc
home
disp('The block width vector:')
disp(' ')
s = sprintf('%2.0f ',p);
disp(['[' s ']'])
disp(' ' )
error = norm(G-G0,1)/norm(G0,1)



  function G = MyCholBlock(A,p)
% G = MyCholBlock(A,p)
%
%  Pre:  A is an n-by-n symmetric and positive definite matrix
%        and p is a positive integer vector with sum(p) = n
%  Post: G is lower triangular and A = G*G'.

[n,n] = size(A);
m = length(p);
finish = cumsum(p);
start = [1 finish(1:m-1)+1];
G = zeros(n,n);
for i=1:m
   ivec = start(i):finish(i);
   for j=1:i
      jvec = start(j):finish(j);
      %Find the (i,j) block of G, i.e., G(ivec,jvec).
      S = A(ivec,jvec);
      for k=1:j-1
         kvec = start(k):finish(k);
         S = S - G(ivec,kvec)*G(jvec,kvec)';       end
      if j<i 
         for q=1:(finish(i)-start(i)+1)
            G(start(i)+q-1,jvec)= (LTriSol(G(jvec,jvec),S(q,:)'))';
         end 
      else
         G(ivec,ivec) = CholScalar(S);
      end
   end
end

P7.4.2

% Problem P7_4_2
%
% Explore load balancing in shared memory Cholesky with
% "wrap mapping" of tasks.

n = 100;
clc 
disp('   p  max/min flops')
disp('---------------------')
for p = [2 4 8 16 32 64]
   f = FlopBalance(n,p);
   maxf = max(f);	 minf = min(f);
   disp(sprintf(' %3.0f      %7.5f',p,maxf/minf))
end


  function f = FlopBalance(n,p)
% f = FlopBalance(n,p)
%
% n,p are positive integers with n>p.
%
% f is a column p-vector where f(mu) is the approximate number
%        of flops required by the mu-th processor when parallel
%        cholesky is implemented with wrap mapping.

f = zeros(p,1);
for mu=1:p
   MyCols = mu:p:n;
   for k=1:n
      if any(MyCols==k)
         % Flops associated with column generation.
         f(mu) = f(mu) + (n-k)+3;
      end
      % Flops associated with saxpy updates
      f(mu) = f(mu) + 2*(n-k+1)*ceil((n-k)/p);			 
   end
end

P7.4.3

% Script P7_4_3
%
% Explore load balancing in shared memory Cholesky with
% "contiguous mapping" of tasks.

n = 100;
clc 
disp('   p  max/min flops')
disp('---------------------')
for p = [2 4 8 16 32 64]
   f = FlopBalance1(n,p);
   maxf = max(f);
   minf = min(f);
   disp(sprintf(' %3.0f      %7.2f',p,maxf/minf))
end