Chapter 5: Problem Solutions
% Script P5_1_1
%
% Compare two ways of setting up the Pascal matrix
clc
disp(' n PascalTri PascalTri1')
disp(' Flops Flops ')
disp('------------------------------------')
for n=[5 10 20 40 80]
flops(0);
P = PascalTri(n);
f = flops;
flops(0);
P = PascalTri1(n);
f1 = flops;
disp(sprintf(' %3.0f %6.0f %6.0f',n, f,f1))
end
function P = PascalTri1(n)
% n is a positive integer
% T is nxn lower triangular matrix with T(i,j) =
% the binomial coefficient (i-1)-choose-(j-1)
P = tril(ones(n,n));
for i=3:n
for j=2:i-1
P(i,j) = P(i-1,j-1) + P(i-1,j);
end
end
% Script P5_1_2 % % Illustrate Circulant matrix set-up via Toeplitz clc v = (1:8)'; C = Circulant(v) function C = Circulant(v) % v a column vector % C is a circulant matrix with first row = v. C = toeplitz([1; v(length(v):-1:2)],v);
% Script P5_1_3 % % Embeddig a Toeplitz matrix into a circulant matrix. clc row = [10 20 30 40 50]; col = [10; 200; 300; 400; 500]; T = toeplitz(col,row) C = EmbedToep(col,row) function C = EmbedToep(col,row) % col is a column n-vector % row is a row n-vector with row(1) = col(1) % % C is a circulant matrix of size 2n-1 with the property % that C(1:n,1:n) = T where T is an n-by-n Toeplitz % matrix with T(:,1) = col and T(1,:) = row. n = length(col); m = 2*n-1; C = zeros(m,m); C(1,:) = [row col(n:-1:2)']; for k=2:m C(k,:) = [C(k-1,m) C(k-1,1:m-1)]; end
% Script P5_1_4
%
% Illustrate RandBand
clc
disp('p = 1 and q = 1')
A = RandBand(5,5,1,1)
disp('p = 3 and q = 2')
A = RandBand(9,5,3,2)
disp('p = 1 and q = 3')
A = RandBand(5,6,1,3)
function A = RandBand(m,n,p,q)
% m, n, p, and q are positive integers
% random m-by-n matrix with lower bandwidth p and upper bandwidth q.
A = triu(tril(rand(m,n),q),-p);
% Script P5_1_5
%
% Illustrate Plasmid Matrix Set-up
clc
f = zeros(6,1); % store number of flops to compute P(5), P(10), ..., P(30)
flops(0);
P = SetUp(5); % Compute P(5)
f(1) = flops; % f(1) is number of flops for P(5)
disp('P(5) is:')
disp(P)
P = SetUp(6); % Compute P(6)
disp(' ')
disp('P(6) is:')
disp(P)
function P = SetUp(n)
% n is a positive integer
% P is the (n+1)-by-(n+1) plasmid transition matrix.
% Notice the symmetry that the jth column of P is the same as the (n+2-j)th
% column upside-down.
P = zeros(n+1,n+1);
mid = floor(n/2) + 1;
for j=1:mid
% L = (2*iA i'A)
L= BiVec(2*(j-1),n);
% R = (2*iB i'B)
R= BiVec(2*(n-j+1),n);
P(:,j) = L.*R(n+1:-1:1);
P(:,n+2-j) = P(n+1:-1:1,j);
end
% Finally, we need to divide all the entries in P by (2n n), but this is
% the entry in P(1,1) and P(n+1,n+1).
P = P/P(1,1);
% Script P5_2_1 % % Checks the Hessenberg matrix-vector product. clc n = 10; A = triu(randn(n,n),-1); z = randn(n,1); y = HessMatVec(A,z); err = y - A*z function y = HessMatVec(A,z) % A is m-by-n with A(i,j) = 0, i>j+1, z is n-by-1. % y = A*z [n,n] = size(A); y = z(n)*A(:,n); for k=1:n-1 y(1:k+1) = y(1:k+1) + A(1:k+1,k)*z(k); end
% Script P5_2_2 % % Checks TriMatVec clc m = 5; n = 3; A = triu(randn(m,n)); x = randn(n,1); y = TriMatVec(A,x); err = y - A*x A = triu(randn(n,m)); x = randn(m,1); y = TriMatVec(A,x); err = y - A*x function y = TriMatVec(A,x) % A is an m-by-n upper triangular matrix and x n-by-1. % y = A*x [m,n] = size(A); y = zeros(m,1); for k=1:n s = min([k m]); y(1:s) = y(1:s) + A(1:s,k)*x(k); end
% Script P5_2_3
%
% Saxpy matrix multiply with one factor triangular.
clc
disp('Example where B has more columns than rows:')
A = rand(4,4)
B = triu(rand(4,6))
C = MatMatSax1(A,B)
Cexact = A*B
disp(' ')
disp('Example where B has more rows than columns:')
A = rand(6,6)
B = triu(rand(6,4))
C = MatMatSax1(A,B)
Cexact = A*B
function C = MatMatSax1(A,B)
% A is an m-by-p matrix.
% B is a p-by-n matrix (upper triangular).
% C A*B (Saxpy Version)
[m,p] = size(A);
[p,n] = size(B);
C = zeros(m,n);
for j=1:n
% Compute j-th column of C.
for k=1:min([j p])
C(:,j) = C(:,j) + A(:,k)*B(k,j);
end
end
% Script P5_2_4
%
% Saxpy matrix multiply with both factors triangular.
clc
A = triu(rand(5,5))
B = triu(rand(5,5))
C = MatMatSax2(A,B)
Cexact = A*B
function C = MatMatSax2(A,B)
% A is an n-by-n matrix.
% B is a p-by-n matrix (upper triangular)
% C A*B (Saxpy Version)
%
[n,n] = size(A);
C = zeros(n,n);
for j=1:n
% Compute j-th column of C.
for k=1:j
C(1:j,j) = C(1:j,j) + A(1:j,k)*B(k,j);
end
end
% Script 5_2_5
%
% Illustrate (Lower Triangular)*(Upper Triangular) products
A = tril(rand(6,6))
B = triu(rand(6,6))
C = MatMatDot1(A,B)
Error = C - A*B
function C = MatMatDot1(A,B)
% A and B are n-by-n lower triangular matrices.
% C = A*B (Dot Product Version)
[n,n] = size(A);
C = zeros(n,n);
for j=1:n
% Compute j-th column of C.
for i=1:n
s = 1:min([i j]);
C(i,j) = A(i,s)*B(s,j);
end
end
% Script P5_2_6
%
% Illustrate A*(Banded B)
p = 2;
A = rand(6,6)
B = triu(tril(rand(6,6),p),-p)
C = BandMatMatSax(A,B,p)
Error = C - A*B
function C = bandMatMatSax(A,B,p)
% A is an n-by-n matrix.
% B is an n-by-n matrix, bandwidth p
% C = A*B (Saxpy Version)
[n,n] = size(A);
C = zeros(n,n);
for j=1:n
% Compute j-th column of C.
for k=max([1 j-p]):min([j+p n])
C(:,j) = C(:,j) + A(:,k)*B(k,j);
end
end
% Script P5_2_7
%
% Matrix powers via repeated squaring
clc
k = input('Compute A^k where k = ');
A = randn(5,5);
A = A/norm(A,1);
B = MatPower(A,k);
Error = B - A^k
function B = MatPower(A,k)
% A is an n-by-n matrix and k is a positive integer.
% B = A^k
s = '';
x = k;
while x>=1
if rem(x,2)==0
s = ['0' s];
x = x/2;
else
s = ['1' s];
x = (x-1)/2;
end
end
C = A;
First = 0;
for j=length(s):-1:1
if s(j)=='1'
if First==0
B = C;
First =1;
else
B = B*C;
end
end
C = C*C;
end
% Script P5_2_8 % % y = (polynomial in matrix A)*(vector) clc v = randn(8,1); A = randn(8,8); c = randn(4,1); y = HornerM(c,A,v); Error = y - (c(1)*v + c(2)*A*v + c(3)*A*A*v + c(4)*A*A*A*v)
The function HornerM is not provided.
% Script P5_2_9 % % Illustrate Krylov matrix set-up clc A = magic(4); B = A(:,3:4); p = 3 C = Krylov(A,B,p) function C = Krylov(A,B,p) % A is n-by-n, B is n-by-t, and p is a positive integer. % C = [ B AB (A^2)B ... (A^p-1))B] C = B; C1 = B; for k=1:p-1 C1 = A*C1; C = [ C C1]; end
% Script P5_2_10 % % Illustrate row scaling clc A = magic(5); A = A(:,1:3) d = 1:5 B = ScaleRows(A,d) function B = ScaleRows(A,d) % A is an m-by-n and d an m-vector % B = diag(d)*A [m,n] = size(A); B = zeros(m,n); for i=1:m B(i,:) = d(i)*A(i,:); end
% Script P5_2_11
%
% Structured Matrix-vector multiplication
clc
close all
n = 20;
k = 30;
P = SetUp(n-1);
v0 = rand(n,1);
v0 = (v0 + v0(n:-1:1))/2;
V = Forward(P,v0,k);
for j=1:k
plot(V(:,j))
title(sprintf('j = %1d',j))
pause(1)
end
hold off
[The function Forward is not provided.]
z
z
z
z
z
% Script P5_3_1
%
% Integral of SampleF2 over [0,2]x[0,2] for various 2D composite
% QNC(7) rule.
clc
m = 7;
disp(' Subintervals Integral Relative Time')
disp('------------------------------------------------')
for n = [4 8 16 32]
tic;
numI2D = MyCompQNC2D('SampleF2',0,2,0,2,m,n,m,n);
t = toc;
if n==4;
base = t;
time = 1;
else
time = t/base;
end
disp(sprintf(' %7.0f %17.15f %11.1f',n,numI2D,time))
end
The function MyCompQNC2D is not provided.
% Script P5_3_2
%
% Solving integral equations.
close all
a = 0;
b = 5;
clc
disp('m n Set-up Time ')
disp('-----------------------')
for sigma = [.01 .1]
j = 1;
figure
for m = [3 5]
for n = [5 10]
tic;
Kmat = Kernel(a,b,m,n,sigma);
tfinish = toc;
T = etime(tfinish,tstart);
disp(sprintf('%2.0f %2.0f %5.3f',m,n,T))
x = linspace(a,b,n*(m-1)+1)';
rhs = 1./((x-2).^2 + .1) + 1./((x-4).^2 + .2);
fvals = Kmat\rhs;
z = linspace(a,b,200)';
sz = spline(x,fvals,z);
subplot(2,2,j)
plot(z,sz)
title(sprintf('m=%2.0f, n=%2.0f sigma = %5.2f',m,n,sigma))
j= j+1;
end
end
end
function Kmat = Kernel(a,b,m,n,sigma)
%
% Kmat(i,j) =
[omega,x] = wCompNC(a,b,m,n);
N = length(omega);
v = exp(-((x(1)-x).^2)/sigma);
Kmat = Toeplitz(v);
omega = (b-a)*omega;
for j=1:N
Kmat(:,j) = Kmat(:,j)*omega(j);
end
z
% Script P5_4_1
%
% Compares the ordinary DFT with the FFT.
clc
global w
x = randn(1024,1)+sqrt(-1)*randn(1024,1);
disp(' n DFT Flops FFTRecur Flops FFT Flops');
disp('------------------------------------------------------')
for n = [2 4 8 16 32 64 128 256 ]
flops(0);
y = DFT(x(1:n));
dftFlops = flops;
flops(0);
% Precompute the weight vector.
w = exp(-2*pi*sqrt(-1)/n).^(0:((n/2)-1))';
% Note: there are better ways to do this.
y2 = MyFFTRecur(x(1:n));
recurFlops = flops;
flops(0);
y = fft(x(1:n));
fftFlops = flops;
disp(sprintf(' %4.0f %10.0f %10.0f %10.0f', n,dftFlops,recurFlops,fftFlops))
end
function y = MyFFTRecur(x)
% x is a column vector with power-of-two length.
% y is the DFT of x.
global w % Precomputed weight vector
n = length(x);
if n ==1
y = x;
else
m = n/2;
yT = MyFFTRecur(x(1:2:n));
yB = MyFFTRecur(x(2:2:n));
% The required weight vector is a subvector of the precomputed weight vector.
n_orig = 2*length(w);
s = n_orig/n;
d = w(1:s:length(w));
z = d.*yB;
y = [ yT+z ; yT-z ];
end
z
% Problem P5_4_3
%
% Test generalized strassen multiply.
clc
n = input('Enter n:');
nmin = input('Enter nmin:');
A = randn(n,n);
B = randn(n,n);
C = MyStrass(A,B,nmin);
err = norm(C-A*B)
function C = MyStrass(A,B,nmin)
% C = MyStrass(A,B)
%
% A,B are n-by-n matrices, n a power of 2.
% nmin is the regukar matrix multiply threshold, a positive integer.
% C is an n-by-n matrix equal to A*B.
[n,n] = size(A);
if n <= nmin
C = A*B;
else
m = floor(n/2); u = 1:m; v = m+1:2*m;
P1 = MyStrass(A(u,u)+A(v,v),B(u,u)+B(v,v),nmin);
P2 = MyStrass(A(v,u)+A(v,v),B(u,u),nmin);
P3 = MyStrass(A(u,u),B(u,v)-B(v,v),nmin);
P4 = MyStrass(A(v,v),B(v,u)-B(u,u),nmin);
P5 = MyStrass(A(u,u)+A(u,v),B(v,v),nmin);
P6 = MyStrass(A(v,u)-A(u,u),B(u,u) + B(u,v),nmin);
P7 = MyStrass(A(u,v)-A(v,v),B(v,u)+B(v,v),nmin);
C = [ P1+P4-P5+P7 P3+P5; P2+P4 P1+P3-P2+P6];
if 2*m < n
% The odd n case.
% Partition A = [A(1:n-1:n-1) u; v alfa]
% Partition B = [B(1:n-1:n-1) w; y alfa]
u = A(1:n-1,n); v = A(n,1:n-1); alfa = A(n,n);
w = B(1:n-1,n); y = B(n,1:n-1); beta = B(n,n);
% Perform the 2-by-2 block multiply using Strassen multiply in the
% (1,1) block.
C = [C+u*y A(1:2*m,1:2*m)*w+beta*u ; v*B(1:2*m,1:2*m)+alfa*y v*w+alfa*beta];
end
end
% Script P5_4_3
%
% Compares compares general-n Strassen method flops with ordinary A*B flops
clc
A0 = rand(128,128);
B0 = rand(128,128);
disp(' n Strass Flops Ordinary Flops')
disp('-------------------------------------')
for n = [15:33 63 64 127 128]
A = A0(1:n,1:n);
B = B0(1:n,1:n);
flops(0);
C = MyStrass(A,B);
f = flops;
disp(sprintf('%3.0f %10.0f %10.0f', n,f,2*n^3))
end
Function MyStrass not provided.
% Script 5_4_4
%
% Compare Strass and conventional matrix multiply
clc
disp(' q r (Strass Flops)/(Conventional Flops)')
disp('-------------------------------------------------')
for q = 1:20
n = 2^q;
flop_min = StrassCount(n,1);
rmin = 0;
for r=1:q
f = StrassCount(n,2^r);
if f<flop_min
flop_min = f;
rmin = r;
end
end
disp(sprintf(' %2.0f %2.0f %15.2f ',q,rmin,flop_min/(2^(3*q+1))))
end
function f = StrassCount(n,nmin)
% n and nmin are powers of 2 and nmin<=n
% f is the number of flops required to compute an n-by-n matrix multiply
% using Strassen if n>nmin and conventional multiplication otherwise.
if n==nmin
% flops for conventional n-by-n multiply.
f = 2*n^3;
else
m = n/2;
f = 18*m^2 + 7*StrassCount(m,nmin);
end
Solution not provided.
% Script P5_5_2
%
% Explores when it pays to do a 2-processor matrix-vector
% multiply.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Time for single processor execution:
% T_seq = (2*n^2)/R;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Let n = 2m and set top = 1:m and bot = (m+1):n.
% Proc(1) sends A(bot,:) to Proc(2)
% T_send_mat = alpha + (n^2/2)*beta;
% Proc(1) computes y(top) = A(top,:)*x and Proc(2) computes
% y(bot) = A(bot,:)*x.
% T_work = (n^2)/R;
% Proc(2) sends y(bot) to Proc(1)
% T_send_vec = alpha + (n/2)*beta;
% Total execution time:
% T_par = 2*alpha + beta*n*(n+1)/2 + (n^2)/R;
% Comparison quotient T_par/T_seq:
% T_ratio = .5*(1+beta*R/2) + (alpha*R/n^2) + (beta*R)/(4*n);
% Thus, if beta*R<2 then for sufficiently large n, it pays
% to distribute the computation.
clc
disp('R = 1/beta alpha Minimum n')
disp('-----------------------------------')
for R = [10^5 10^6 10^7 10^8 10^9]
beta = 1/R; %safe by a factor of two
for alpha = [10^(-3) 10^(-4) 10^(-5) 10^(-6)]
%compute the smallest n so that it pays to distribute.
n = ceil((1 + sqrt(1+16*alpha*R))/2);
disp(sprintf(' %6.1e %6.1e %8.0f',R,alpha,n))
end
disp(' ')
end