function S = pickSequence(pdbFileName) % PICKSEQUENCE Extract sequence from PDB file. % seq = pickSequence(pdbFilename, ATOM) reads data from a pdb text % file, and extracts the amino acid sequence. % % For documentation of the PDB file format, see % http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html % Define amino acid 3-letter and 1-letter codes codes_3let = strvcat('ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', ... 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', ... 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'); codes_1let = 'ARNDCQEGHILKMFPSTWYVU'; % Read lines until SEQRES is encountered fid = fopen(pdbFileName); L = fgetl(fid); while (~strcmp(L(1:6), 'SEQRES')) L = fgetl(fid); end % Extract the amino acids seqnum = 0; while (strcmp(L(1:6), 'SEQRES')) % Keep reading SEQRES records residues = sscanf(L(20:70), '%3s', [3, inf])'; for i = 1:length(residues) res_index = strmatch(residues(i,:), codes_3let, 'exact'); if (~isempty(res_index)) seqnum = seqnum+1; seq(seqnum) = codes_1let(res_index); end end L = fgetl(fid); % Read the next line end fclose(fid); S = seq(1:seqnum);