Chapter 7: Problem Solutions
| P7.1.1 | P7.2.1 | P7.3.1 | P7.4.1 |
| P7.1.2 | P7.2.2 | P7.3.2 | P7.4.2 |
| P7.1.3 | P7.2.3 | P7.3.3 | P7.4.3 |
| P7.1.4 | P7.2.4 | P7.3.4 | |
| P7.1.5 | P7.2.5 | P7.3.5 | |
| P7.2.6 | P7.3.6 |
% Problem P7_1_1
%
% Examine minimization in different norms.
close all
n = 100;
b = sort(rand(3,1));
b = b(3:-1:1);
A = [1;1;1];
x = linspace(-1,1,n);
v1 = zeros(n,1);
v2 = zeros(n,1);
vinf = zeros(n,1);
for k=1:n
v1(k) = norm(A*x(k)-b,1);
v2(k) = norm(A*x(k)-b,2);
vinf(k) = norm(A*x(k)-b,'inf');
end
subplot(2,2,1)
plot(x,v1,[b(2),b(2)],[0,6])
title(sprintf('1-norm: min at %5.3f',b(2)))
subplot(2,2,2)
plot(x,v2,[sum(b)/3, sum(b)/3],[0,6])
title(sprintf('2-norm: min at %5.3f',sum(b)/3))
subplot(2,2,3)
plot(x,vinf,[(b(1)+b(3))/2,(b(1)+b(3))/2],[0,6])
title(sprintf('inf-norm: min at %5.3f',(b(1)+b(3))/2))
% Problem P7_1_2
%
% Quadratic LS fits to the sine function.
close all
for m=[3 6 12 24]
[a,b,c] = LSFit(0,pi/3,'tan',m);
x = linspace(0,pi/3)';
y = tan(x);
qvals = (c*x + b).*x + a;
figure
plot(x,y,x,qvals)
title(sprintf('m = %2.0f',m))
end
function [a,b,c] = LSFit(L,R,fname,m)
% [a,b,c] = LSFit(L,R,fname,m)
%
% L,R satisfy L= 3.
%
% a,b,c are scalars so that if q(x) =ax^2 + bx +c, then
% sum from k=1 to m of [q(x(k)) -f(x(k))]^2
% is minimized where x = linspace(L.R,m).
x = linspace(L,R,m)';
z = [ones(m,1) x x.^2]\feval(fname,x);
a = z(1);
b = z(2);
c = z(3);
Not Available
Not Available
% Problem P7_1_5 % % Assume A (nxn) is given and b, c, and d are all given % column n-vectors. We want to choose scalars alfa and % beta so the solution to Ax = b + alfa*c + beta*d is % as small as possible. Note that the solution is % % x = u + alfa*v + beta*w = u + [v w]*[alfa;beta] % % where Au = b, Av = c, and Aw = d. % Solve for u, v, and w... [L,U,P] = lu(A); u = U\(L\(P*b)); v = U\(L\(P*c)); w = U\(L\(P*d)); % The act of minimizing norm(u + [v w]*[alfa;beta]) is the % act of solving the LS problem min norm(Cz - r) where % C is the n-by-2 matrix [v w], r = -u, and setting % alfa = z(1) and beta = z(2). z = -[v w]\u; alfa = z(1); beta = z(2);
% Problem P7_2_1
%
% Solving Hessenberg systems via the QR factorization.
clc
n = 6;
A = triu(randn(n,n),-1)
b = randn(n,1)
x = HessQRSolve(A,b)
disp('A*x-b = ')
disp(A*x - b)
function x = HessQRSolve(H,b)
% x = HessQRSolve(H,b)
%
% H is an n-by-n nonsingular upper Hessenberg.
% b is a column n-vector.
%
% x column n-vector so Hx = b.
[Q,R] = HessQRrot(H); % See P7.2.4
x = UTriSol(R,Q'*b); % Ax = QRx = b means Rx = Q'b.
% Script P7_2_2
%
% Symmetrizing a 2-by-2 matrix through rotation.
clc
A = randn(2,2)
% To make the (1,2) and (2,1) entries of [c s;-s c]'*A
% the same, we require cA(1,2) - sA(2,2) = sA(1,1)+c(2,1), i.e.,
%
% -s(A(1,1)+A(2,2)) + c(A(1,2)-A(2,1)) = 0
%
% Note that this means that we want the bottom component
% of [c s;-s c][(A(1,1)+A(2,2); A(1,2)-A(2,1)] to be zero.
[c,s] = Rotate(A(1,1)+A(2,2),A(1,2)-A(2,1));
disp('[c s;-s c] =')
disp(' ')
disp([c s;-s c])
disp('[c s; -s c]''*A = ')
disp(' ')
disp([c s;-s c]'*A)
% Problem P7_2_3
%
% Lower triangularizing a matrix using rotations
clc
n = 5;
A = randn(n,n)
[Q,L] = QLRot(A)
disp('Q''*Q - I = ')
disp(Q'*Q - eye(n,n))
disp('Q''*A - L =')
disp(Q'*A - L)
function [Q,L] = QLrot(A)
% [Q,L] = QLrot(A)
%
% A is an n-by-n matrix.
%
% Q is an n-by-n orthogonal matrix and
% L is an n-by-n lower triangular so A = QL.
[n,n] = size(A);
Q = eye(n,n);
for j=n:-1:2
% Zero A(1:j-1,j)
for i=1:j-1
% Zero A(i,j)
% Note that if [c s;-s c][z;-y] = [r;0], then
% [c s;-s c][y z] = [0;r]. So we can use Rotate to
% zero the top of a 2-vector.
[c,s] = Rotate(A(i+1,j),-A(i,j));
A(i:i+1,1:j) = [c s; -s c]*A(i:i+1,1:j);
Q(:,i:i+1) = Q(:,i:i+1)*[c s; -s c]';
end
end
L = tril(A);
% Problem P7_2_4
%
% hessenberg QR factorization.
clc
n = 6;
A = triu(randn(n,n),-1)
[Q,R] = HessQRrot(A)
disp('Q''*Q - I = ')
disp(Q'*Q - eye(n,n))
disp('Q''*A - R =')
disp(Q'*A - R)
function [Q,R] = HessQRrot(A)
% [Q,R] = HessQRrot(A)
%
% A is an n-by-n upper Hessenberg matrix.
%
% Q is an n-by-n orthogonal matrix and
% R is an n-by-n upper triangular such that A = QR.
[n,n] = size(A);
Q = eye(n,n);
for j=1:n-1
%Zero A(j+1,j)
[c,s] = Rotate(A(j,j),A(j+1,j));
A(j:j+1,j:n) = [c s; -s c]*A(j:j+1,j:n);
Q(:,j:j+1) = Q(:,j:j+1)*[c s; -s c]';
end
R = triu(A);
% Problem P7_2_5
%
% Least squares fit of the square root function on [.25,1]
clc
disp(' m a b')
disp('-------------------------------------------------')
for m = [2:10 20 40 80 200]
x = linspace(.25,1,m)';
z = [ones(m,1) x]\sqrt(x);
disp(sprintf('%4.0f %20.16f %20.16f',m,z(1),z(2)))
end
% Problem P7_2_6
%
% Using a rotation to zero the top component of a 2-vector.
clc
x = randn(2,1)
[c,s] = Rotate1(x(1),x(2));
disp('[c s;-s c]] =')
disp(' ')
disp([c s;-s c])
disp('[c s;-s c]*x =')
disp(' ')
disp([c s;-s c]*x)
function [c,s] = Rotate1(x,y)
% [c,s] = Rotate1(x,y)
%
% x,y are real scalars
%
% c,s satisfy c^2+s^=1 with the property that the top component
% of the vector [c s;-s c][x;y] is zero.
[c,s] = Rotate(-y,x);
% Problem P7_3_1
%
% Test BVPsolver
a = 0;
b = 1;
close all
for n = [10 100 1000]
uvals = TwoPtBVP(n,a,b,'MyF731p','MyF731q','MyF731r');
figure
plot(linspace(a,b,n)',uvals)
title(sprintf('n = %1.0f',n))
end
function uvals = TwoPtBVP(n,a,b,pname,qname,rname)
% uvals = TwoPtBVP(n,a,b,pname,qname,rname)
%
% a,b are reals with a<b.
% n is an integer >= 3
% pname is a string that names a positive function defined on [a,b].
% qname is a string that names a positive function defined on [a,b].
% rname is a string that names a function defined on [a,b].
%
% uvals is a column n-vector with the property that uvals(i) approximates
% the solution to the 2-point boundary value problem
%
% -D[p(x)Du(x)] + q(x)u(x) = r(x) a<=x<=b
% u(a) = u(b) = 0
%
% at x = a+(i-1)h where h = (b-a)/(n-1)
x = linspace(a,b,n)';
h = (b-a)/(n-1);
p = feval(pname,x(1:n-1)+h/2);
q = feval(qname,x);
r = feval(rname,x);
uvals = zeros(n,1);
e = zeros(n-2,1);
e(2:n-2) = -p(2:n-2);
d = p(1:n-2)+p(2:n-1)+(h^2)*q(2:n-1);
[d,e] = CholTrid(d,e);
rhs = (h^2)*r(2:n-1);
uvals(2:n-1) = CholTridSol(d,e,rhs);
% Problem P7_3_2 % % Recursive outer product Cholesky clc n = 5; Gexact = tril(rand(n,n)) A = Gexact*Gexact'; G = CholOuter1(A)
% Problem P7_3_3
%
% pentadiagonal Cholesky
% Produce plot of timings:
% set up A and b
clc
time = [];
for n = 400:200:2000
% Set up A as instructed:
j = 1:n;
d = 100 + j';
e = 10 + j';
f = j';
% Don't actually form A
% A = diag(d) + diag(e(2:n),1) + diag(e(2:n),-1) + ...
% diag(f(3:n),2) + diag(f(3:n),-2);
% Right-hand side:
b = 100*ones(n,1);
% Record starting time.
tic
% Solve Ax = b
% Factor A = G*G'
% G = diag(g) + diag(h(2:n),-1) + diag(p(3:n),-2);
[g,h,p] = Chol5(d,e,f);
% Solve Gy = b for y
y = LTriSol5(g,h,p,b);
% Solve G'*x = y for x
x = UTriSol5(g,h,p,y);
% Compute elapsed time.
dt = toc;
% Add to table of timings.
time = [time dt];
end
% Make plot of timings.
close all
plot(400:200:2000,time)
xlabel('n')
ylabel('time')
title('Time to solve n-by-n penta-diagonal system')
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Produce table of flops:
clc
disp('n flops');
disp('------------------');
for n = [10 20 40 80 160]
% Set up A as instructed:
j = 1:n;
d = 100 + j';
e = 10 + j';
f = j';
% Set up right-hand side.
b = 100*ones(n,1);
% Reset number of flops.
flops(0)
% Factor A = G * G'
[g,h,p] = Chol5(d,e,f);
% Solve Gy = b for y
y = LTriSol5(g,h,p,b);
% Solve G'*x = y for x
x = UTriSol5(g,h,p,y);
% Print number of flops required.
disp(sprintf('%4.0f %8.0f', n, flops));
end
function x = LTriSol5(g,h,p,b)
% x = LTriSol5(g,h,p,b)
%
% for recitation problem 7.3.3
% Pre:
% g,h,p as in Chol5.m
% b n-vector
%
% Post:
% x such that G*x = b,
% where G = diag(g) + diag(h(2:n),-1) + diag(p(3:n),-2)
n = length(g);
x = zeros(n,1);
% initialize first 2 x values
x(1) = b(1)/g(1);
x(2) = (b(2) - x(1)*h(2)) / g(2);
% get rest of x values (this is similar to 2nd part of
% CholTriDiSol, p.241)
for i = 3:n
x(i) = (b(i) - x(i-2)*p(i) - x(i-1)*h(i)) / g(i);
end
Not Availabe
% Problem 7_3_5 % % We need to solve Ay = b for y and Az = u for z: G = CholScalar(A); y1 = LTriSol(G,b); y = UTriSol(G',y1); z1 = LTriSol(G,u); z = UTriSol(G',z1); % It follows that x = y - ((u'*y)/(1+u'*z))*z;
Not Available.
% Problem P7_4_1
%
% Illustrate a general block Cholesky
p = [ 2 4 3 1 5];
n = sum(p);
G0 = tril(rand(n,n));
A = G0*G0';
G = MyCholBlock(A,p);
clc
home
disp('The block width vector:')
disp(' ')
s = sprintf('%2.0f ',p);
disp(['[' s ']'])
disp(' ' )
error = norm(G-G0,1)/norm(G0,1)
function G = MyCholBlock(A,p)
% G = MyCholBlock(A,p)
%
% Pre: A is an n-by-n symmetric and positive definite matrix
% and p is a positive integer vector with sum(p) = n
% Post: G is lower triangular and A = G*G'.
[n,n] = size(A);
m = length(p);
finish = cumsum(p);
start = [1 finish(1:m-1)+1];
G = zeros(n,n);
for i=1:m
ivec = start(i):finish(i);
for j=1:i
jvec = start(j):finish(j);
%Find the (i,j) block of G, i.e., G(ivec,jvec).
S = A(ivec,jvec);
for k=1:j-1
kvec = start(k):finish(k);
S = S - G(ivec,kvec)*G(jvec,kvec)'; end
if j<i
for q=1:(finish(i)-start(i)+1)
G(start(i)+q-1,jvec)= (LTriSol(G(jvec,jvec),S(q,:)'))';
end
else
G(ivec,ivec) = CholScalar(S);
end
end
end
% Problem P7_4_2
%
% Explore load balancing in shared memory Cholesky with
% "wrap mapping" of tasks.
n = 100;
clc
disp(' p max/min flops')
disp('---------------------')
for p = [2 4 8 16 32 64]
f = FlopBalance(n,p);
maxf = max(f); minf = min(f);
disp(sprintf(' %3.0f %7.5f',p,maxf/minf))
end
function f = FlopBalance(n,p)
% f = FlopBalance(n,p)
%
% n,p are positive integers with n>p.
%
% f is a column p-vector where f(mu) is the approximate number
% of flops required by the mu-th processor when parallel
% cholesky is implemented with wrap mapping.
f = zeros(p,1);
for mu=1:p
MyCols = mu:p:n;
for k=1:n
if any(MyCols==k)
% Flops associated with column generation.
f(mu) = f(mu) + (n-k)+3;
end
% Flops associated with saxpy updates
f(mu) = f(mu) + 2*(n-k+1)*ceil((n-k)/p);
end
end
% Script P7_4_3
%
% Explore load balancing in shared memory Cholesky with
% "contiguous mapping" of tasks.
n = 100;
clc
disp(' p max/min flops')
disp('---------------------')
for p = [2 4 8 16 32 64]
f = FlopBalance1(n,p);
maxf = max(f);
minf = min(f);
disp(sprintf(' %3.0f %7.2f',p,maxf/minf))
end