function C = clusterProteins(M, proteins) % CLUSTERPROTEINS Clusters of proteins based on pairwise distances % M is the matrix of pairwise distances. PROTEINS is an optional % cell array of strings, where each string is a protein name, % identifier, or some other kind of label. if (size(M,1) ~= size(M,2)) error('M must be a symmetric square matrix'); end N = size(M,1); clusters = cell(1, N); % In the beginning, clusters are just individual proteins for i=1:N if (nargin == 1) clusters{i} = makeCluster(i); else clusters{i} = makeCluster(i, proteins{i}); end end % Set up matrix of distances, making sure it is symmetric. % triu() extracts the upper triangular part of a matrix dist = triu(M) + (triu(M))'; % Run the clustering algorithm while(N>1) min_dist = Inf; for i=1:N % Find the pair of closest clusters for j=i+1:N if (dist(i,j) < min_dist) min_dist = dist(i,j); min_i = i; min_j = j; end end end % Combine the closest clusters, and compute new distances clusters{N+1} = combineClusters(clusters{min_i}, clusters{min_j}); for k=1:N if (k ~= min_i & k ~= min_j) dist(k,N+1) = (dist(min_i,k) + dist(min_j,k))/2; dist(N+1,k) = dist(k,N+1); end end dist(N+1,N+1) = 0; % Remove the two clusters, and their corresponding distances dist([min_i, min_j], :) = []; % Delete i-th and j-th matrix rows dist(:, [min_i, min_j]) = []; % Delete i-th and j-th matrix columns clusters([min_i, min_j]) = []; % Delete i-th and j-th clusters N = N-1; end C = clusters{1};