Chapter 5: Problem Solutions

Problems Home

 P5.1.1 P5.2.1 P5.3.1 P5.4.1 P5.5.1 P5.1.2 P5.2.2 P5.3.2 P5.4.2 P5.5.2 P5.1.3 P5.2.3 P5.3.3 P5.4.3 P5.1.4 P5.2.4 P5.4.4 P5.1.5 P5.2.5 P5.2.6 P5.2.7 P5.2.8 P5.2.9 P5.2.10 P5.2.11 P5.2.12 P5.2.13 P5.2.14 P5.2.15 P5.2.16

P5.1.1

```% Script  P5_1_1
%
% Compare two ways of setting up the Pascal matrix

clc
disp('   n     PascalTri   PascalTri1')
disp('           Flops        Flops ')
disp('------------------------------------')
for n=[5 10 20 40 80]
flops(0);
P = PascalTri(n);
f = flops;
flops(0);
P = PascalTri1(n);
f1 = flops;
disp(sprintf(' %3.0f      %6.0f      %6.0f',n, f,f1))
end

function P = PascalTri1(n)
% n is a positive integer
% T is nxn lower triangular matrix with T(i,j) =
%       the binomial coefficient (i-1)-choose-(j-1)

P = tril(ones(n,n));
for i=3:n
for j=2:i-1
P(i,j) = P(i-1,j-1) + P(i-1,j);
end
end

```

P5.1.2

```% Script P5_1_2
%
% Illustrate Circulant matrix set-up via Toeplitz

clc
v = (1:8)';
C = Circulant(v)

function C = Circulant(v)
% v a column vector
% C is a circulant matrix with first row = v.

C = toeplitz([1; v(length(v):-1:2)],v);
```

P5.1.3

```% Script P5_1_3
%
% Embeddig a Toeplitz matrix into a circulant matrix.

clc
row = [10 20 30 40 50];
col = [10; 200; 300; 400; 500];
T = toeplitz(col,row)
C = EmbedToep(col,row)

function C = EmbedToep(col,row)
% col is a column n-vector
% row is a row n-vector with row(1) = col(1)
%
% C is a circulant matrix of size 2n-1 with the property
% that C(1:n,1:n) = T where T is an n-by-n Toeplitz
% matrix with T(:,1) = col and T(1,:) = row.

n = length(col);
m = 2*n-1;
C = zeros(m,m);
C(1,:) = [row col(n:-1:2)'];
for k=2:m
C(k,:) = [C(k-1,m) C(k-1,1:m-1)];
end
```

P5.1.4

```% Script P5_1_4
%
% Illustrate RandBand

clc
disp('p = 1 and q = 1')
A = RandBand(5,5,1,1)
disp('p = 3 and q = 2')
A = RandBand(9,5,3,2)
disp('p = 1 and q = 3')
A = RandBand(5,6,1,3)

function A = RandBand(m,n,p,q)
% m, n, p, and q are  positive integers
% random m-by-n matrix with lower bandwidth p and upper bandwidth q.

A = triu(tril(rand(m,n),q),-p);

```

P5.1.5

``` Not Available

```

P5.2.1

```% Script P5_2_1
%
% Checks the Hessenberg matrix-vector product.

clc
n = 10;
A = triu(randn(n,n),-1);
z = randn(n,1);
y = HessMatVec(A,z);
err = y - A*z

function y = HessMatVec(A,z)
% A is m-by-n with A(i,j) = 0, i>j+1, z is n-by-1.
% y = A*z
[n,n] = size(A);
y = z(n)*A(:,n);
for k=1:n-1
y(1:k+1) = y(1:k+1) + A(1:k+1,k)*z(k);
end
```

P5.2.2

```% Script P5_2_2
%
% Checks TriMatVec

clc
m = 5;
n = 3;

A = triu(randn(m,n));
x = randn(n,1);
y = TriMatVec(A,x);
err = y - A*x
A = triu(randn(n,m));
x = randn(m,1);
y = TriMatVec(A,x);
err = y - A*x

function y = TriMatVec(A,x)
% A is an m-by-n upper triangular matrix and x n-by-1.
% y = A*x

[m,n] = size(A);
y = zeros(m,1);
for k=1:n
s = min([k m]);
y(1:s) = y(1:s) + A(1:s,k)*x(k);
end
```

P5.2.3

```% Script P5_2_3
%
% Saxpy matrix multiply with one factor triangular.

clc
disp('Example where B has more columns than rows:')
A = rand(4,4)
B = triu(rand(4,6))
C = MatMatSax1(A,B)
Cexact = A*B

disp(' ')
disp('Example where B has more rows than columns:')
A = rand(6,6)
B = triu(rand(6,4))
C = MatMatSax1(A,B)
Cexact = A*B

function C = MatMatSax1(A,B)
% A is an m-by-p matrix.
% B is a p-by-n matrix (upper triangular).
% C     A*B (Saxpy Version)

[m,p] = size(A);
[p,n] = size(B);
C = zeros(m,n);
for j=1:n
% Compute j-th column of C.
for k=1:min([j p])
C(:,j) = C(:,j) + A(:,k)*B(k,j);
end
end
```

P5.2.4

```% Script P5_2_4
%
% Saxpy matrix multiply with both factors triangular.

clc
A = triu(rand(5,5))
B = triu(rand(5,5))
C = MatMatSax2(A,B)
Cexact = A*B

function C = MatMatSax2(A,B)
% A is an n-by-n matrix.
% B is a p-by-n matrix (upper triangular)
% C     A*B (Saxpy Version)
%
[n,n] = size(A);
C = zeros(n,n);
for j=1:n
% Compute j-th column of C.
for k=1:j
C(1:j,j) = C(1:j,j) + A(1:j,k)*B(k,j);
end
end
```

P5.2.5

```% Script 5_2_5
%
% Illustrate (Lower Triangular)*(Upper Triangular) products

A = tril(rand(6,6))
B = triu(rand(6,6))
C = MatMatDot1(A,B)
Error = C - A*B

function C = MatMatDot1(A,B)
% A and B are n-by-n lower triangular matrices.
% C = A*B (Dot Product Version)

[n,n] = size(A);
C = zeros(n,n);
for j=1:n
% Compute j-th column of C.
for i=1:n
s = 1:min([i j]);
C(i,j) = A(i,s)*B(s,j);
end
end
```

P5.2.6

```% Script P5_2_6
%
% Illustrate A*(Banded B)

p = 2;
A = rand(6,6)
B = triu(tril(rand(6,6),p),-p)
C = BandMatMatSax(A,B,p)
Error = C - A*B

function C = bandMatMatSax(A,B,p)
% A is an n-by-n matrix.
% B is an n-by-n matrix, bandwidth p
% C = A*B (Saxpy Version)

[n,n] = size(A);
C = zeros(n,n);
for j=1:n
% Compute j-th column of C.
for k=max([1 j-p]):min([j+p n])
C(:,j) = C(:,j) + A(:,k)*B(k,j);
end
end

```

P5.2.7

```% Script P5_2_7
%
% Matrix powers via repeated squaring

clc
k = input('Compute A^k where k = ');
A = randn(5,5);
A = A/norm(A,1);
B = MatPower(A,k);
Error = B - A^k

function B = MatPower(A,k)
% A is an n-by-n matrix and k is a positive integer.
% B = A^k
s = '';
x = k;
while x>=1
if rem(x,2)==0
s = ['0' s];
x = x/2;
else
s = ['1' s];
x = (x-1)/2;
end
end
C = A;
First = 0;
for j=length(s):-1:1
if s(j)=='1'
if First==0
B = C;
First =1;
else
B = B*C;
end
end
C = C*C;
end
```

P5.2.8

```% Script P5_2_8
%
% y = (polynomial in matrix A)*(vector)

clc
v = randn(8,1);
A = randn(8,8);
c = randn(4,1);
y = HornerM(c,A,v);
Error = y - (c(1)*v + c(2)*A*v + c(3)*A*A*v + c(4)*A*A*A*v)
```

The function HornerM is not provided.

P5.2.9

```% Script P5_2_9
%
% Illustrate Krylov matrix set-up

clc
A = magic(4);
B = A(:,3:4);
p = 3
C = Krylov(A,B,p)

function C = Krylov(A,B,p)
% A is n-by-n, B is n-by-t, and p is a positive integer.
% C = [ B AB (A^2)B ... (A^p-1))B]

C = B;
C1 = B;
for k=1:p-1
C1 = A*C1;
C = [ C C1];
end
```

P5.2.10

```% Script P5_2_10
%
% Illustrate row scaling

clc
A = magic(5); A = A(:,1:3)
d = 1:5
B = ScaleRows(A,d)

function B = ScaleRows(A,d)
% A is an m-by-n and d an m-vector
% B = diag(d)*A
[m,n] = size(A);
B = zeros(m,n);
for i=1:m
B(i,:) = d(i)*A(i,:);
end
```

P5.2.11

```% Script P5_2_11
%
% Structured Matrix-vector multiplication

clc
close all

n = 20;
k = 30;
P = SetUp(n-1);
v0 = rand(n,1);
v0 = (v0 + v0(n:-1:1))/2;
V = Forward(P,v0,k);
for j=1:k
plot(V(:,j))
title(sprintf('j = %1d',j))
pause(1)
end
hold off
```

[The function Forward is not provided.]

P5.2.12

```z
```

P5.2.13

```z
```

P5.2.14

```z
```

P5.2.15

```z
```

P5.2.16

```z
```

P5.3.1

```% Script P5_3_1
%
% Integral of SampleF2 over [0,2]x[0,2] for various 2D composite
% QNC(7) rule.

clc
m = 7;
disp(' Subintervals    Integral       Relative Time')
disp('------------------------------------------------')
for n = [4 8 16 32]
tic;
numI2D = MyCompQNC2D('SampleF2',0,2,0,2,m,n,m,n);
t = toc;
if n==4;
base = t;
time = 1;
else
time = t/base;
end
disp(sprintf(' %7.0f  %17.15f  %11.1f',n,numI2D,time))
end
```

The function MyCompQNC2D is not provided.

P5.3.2

```% Script P5_3_2
%
% Solving integral equations.

close all
a = 0;
b = 5;
clc
disp('m   n     Set-up Time ')
disp('-----------------------')
for sigma = [.01 .1]
j = 1;
figure
for m = [3 5]
for n = [5 10]
tic;
Kmat = Kernel(a,b,m,n,sigma);
tfinish = toc;
T = etime(tfinish,tstart);
disp(sprintf('%2.0f  %2.0f    %5.3f',m,n,T))
x = linspace(a,b,n*(m-1)+1)';
rhs = 1./((x-2).^2 + .1) + 1./((x-4).^2 + .2);
fvals = Kmat\rhs;
z = linspace(a,b,200)';
sz = spline(x,fvals,z);
subplot(2,2,j)
plot(z,sz)
title(sprintf('m=%2.0f, n=%2.0f   sigma = %5.2f',m,n,sigma))
j= j+1;
end
end
end

function Kmat = Kernel(a,b,m,n,sigma)
%
%  Kmat(i,j) =
[omega,x] = wCompNC(a,b,m,n);
N = length(omega);
v = exp(-((x(1)-x).^2)/sigma);
Kmat = Toeplitz(v);
omega = (b-a)*omega;
for j=1:N
Kmat(:,j) = Kmat(:,j)*omega(j);
end
```

P5.3.3

```z
```

P5.4.1

```% Script P5_4_1
%
% Compares the ordinary DFT with the FFT.

clc
global w
x = randn(1024,1)+sqrt(-1)*randn(1024,1);
disp('   n       DFT Flops    FFTRecur Flops    FFT Flops');
disp('------------------------------------------------------')
for n = [2 4 8 16 32 64 128 256 ]
flops(0);
y = DFT(x(1:n));
dftFlops = flops;

flops(0);
% Precompute the weight vector.
w = exp(-2*pi*sqrt(-1)/n).^(0:((n/2)-1))';
% Note: there are better ways to do this.
y2 = MyFFTRecur(x(1:n));
recurFlops = flops;
flops(0);
y = fft(x(1:n));
fftFlops = flops;
disp(sprintf(' %4.0f   %10.0f    %10.0f      %10.0f', n,dftFlops,recurFlops,fftFlops))
end

function y = MyFFTRecur(x)
% x is a column vector with power-of-two length.
% y is the DFT of x.

global w         % Precomputed weight vector
n = length(x);
if n ==1
y = x;
else
m = n/2;
yT = MyFFTRecur(x(1:2:n));
yB = MyFFTRecur(x(2:2:n));

% The required weight vector is a subvector of the precomputed weight vector.
n_orig = 2*length(w);
s = n_orig/n;
d = w(1:s:length(w));

z = d.*yB;
y = [ yT+z ; yT-z ];
end
```

P5.4.2

```z
```

P5.4.3

```% Problem P5_4_3
%
% Test generalized strassen multiply.

clc
n = input('Enter n:');
nmin = input('Enter nmin:');
A = randn(n,n);
B = randn(n,n);
C = MyStrass(A,B,nmin);
err = norm(C-A*B)

function C = MyStrass(A,B,nmin)
% C = MyStrass(A,B)
%
% A,B are n-by-n matrices, n a power of 2.
% nmin is the regukar matrix multiply threshold, a positive integer.
% C is an n-by-n matrix equal to A*B.

[n,n] = size(A);
if n <= nmin
C = A*B;
else
m  = floor(n/2); u = 1:m; v = m+1:2*m;
P1 = MyStrass(A(u,u)+A(v,v),B(u,u)+B(v,v),nmin);
P2 = MyStrass(A(v,u)+A(v,v),B(u,u),nmin);
P3 = MyStrass(A(u,u),B(u,v)-B(v,v),nmin);
P4 = MyStrass(A(v,v),B(v,u)-B(u,u),nmin);
P5 = MyStrass(A(u,u)+A(u,v),B(v,v),nmin);
P6 = MyStrass(A(v,u)-A(u,u),B(u,u) + B(u,v),nmin);
P7 = MyStrass(A(u,v)-A(v,v),B(v,u)+B(v,v),nmin);
C = [ P1+P4-P5+P7   P3+P5; P2+P4 P1+P3-P2+P6];

if 2*m < n
% The odd n case.
% Partition A = [A(1:n-1:n-1) u; v alfa]
% Partition B = [B(1:n-1:n-1) w; y alfa]
u = A(1:n-1,n); v = A(n,1:n-1); alfa = A(n,n);
w = B(1:n-1,n); y = B(n,1:n-1); beta = B(n,n);
% Perform the 2-by-2 block multiply using Strassen multiply in the
% (1,1) block.
C = [C+u*y   A(1:2*m,1:2*m)*w+beta*u  ; v*B(1:2*m,1:2*m)+alfa*y  v*w+alfa*beta];
end
end

% Script P5_4_3
%
% Compares compares general-n Strassen method flops with ordinary A*B flops

clc
A0 = rand(128,128);
B0 = rand(128,128);
disp('  n   Strass Flops   Ordinary Flops')
disp('-------------------------------------')
for n = [15:33 63 64 127 128]
A = A0(1:n,1:n);
B = B0(1:n,1:n);
flops(0);
C = MyStrass(A,B);
f = flops;
disp(sprintf('%3.0f   %10.0f   %10.0f', n,f,2*n^3))
end
```

Function MyStrass not provided.

P5.4.4

```% Script 5_4_4
%
% Compare Strass and conventional matrix multiply

clc
disp('  q     r   (Strass Flops)/(Conventional Flops)')
disp('-------------------------------------------------')
for q = 1:20
n = 2^q;
flop_min = StrassCount(n,1);
rmin = 0;
for r=1:q
f = StrassCount(n,2^r);
if f<flop_min
flop_min = f;
rmin = r;
end
end
disp(sprintf(' %2.0f    %2.0f   %15.2f ',q,rmin,flop_min/(2^(3*q+1))))
end

function f = StrassCount(n,nmin)
% n and nmin are powers of 2 and nmin<=n
% f is the number of flops required to compute an n-by-n matrix multiply
% using Strassen if n>nmin and conventional multiplication otherwise.

if n==nmin
% flops for conventional n-by-n multiply.
f = 2*n^3;
else
m = n/2;
f = 18*m^2 + 7*StrassCount(m,nmin);
end
```

P5.5.1

Solution not provided.

P5.5.2

```% Script P5_5_2
%
% Explores when it pays to do a 2-processor matrix-vector
% multiply.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Time for single processor execution:
% T_seq = (2*n^2)/R;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Let n = 2m and set top = 1:m and bot = (m+1):n.
% Proc(1) sends A(bot,:) to Proc(2)
% T_send_mat = alpha + (n^2/2)*beta;
% Proc(1) computes y(top) = A(top,:)*x and Proc(2) computes
% y(bot) = A(bot,:)*x.
% T_work = (n^2)/R;
% Proc(2) sends y(bot) to Proc(1)
% T_send_vec = alpha + (n/2)*beta;
% Total execution time:
% T_par = 2*alpha + beta*n*(n+1)/2 + (n^2)/R;
% Comparison quotient T_par/T_seq:
% T_ratio = .5*(1+beta*R/2) + (alpha*R/n^2) + (beta*R)/(4*n);
% Thus, if beta*R&lt2 then for sufficiently large n, it pays
% to distribute the computation.

clc
disp('R = 1/beta     alpha    Minimum n')
disp('-----------------------------------')
for R = [10^5  10^6  10^7  10^8  10^9]
beta = 1/R; %safe by a factor of two
for alpha = [10^(-3) 10^(-4) 10^(-5) 10^(-6)]
%compute the smallest n so that it pays to distribute.
n = ceil((1 + sqrt(1+16*alpha*R))/2);

disp(sprintf('  %6.1e    %6.1e  %8.0f',R,alpha,n))
end
disp(' ')
end
```