@article {Pokarowski2007,
title = {Ideal amino acid exchange forms for approximating substitution matrices},
journal = {Proteins: Structure, Function, Bioinformatics},
volume = {69},
year = {2007},
pages = {379{\textendash}393},
abstract = {We have analyzed 29 published substitution matrices (SMs) and five statistical protein contact potentials (CPs) for comparison. We find that popular, {\textquoteleft}classical{\textquoteright} SMs obtained mainly from sequence alignments of globular proteins are mostly correlated by at least a value of 0.9. The BLOSUM62 is the central element of this group. A second group includes SMs derived from alignments of remote homologs or transmembrane proteins. These matrices correlate better with classical SMs (0.8) than among themselves (0.7). A third group consists of intermediate links between SMs and CPs - matrices and potentials that exhibit mutual correlations of at least 0.8. Next, we show that SMs can be approximated with a correlation of 0.9 by expressions c0 + xixj + yiyj + zizj, 1<= i, j <= 20, where c0 is a constant and the vectors (xi), (yi), (zi) correlate highly with hydrophobicity, molecular volume and coil preferences of amino acids, respectively. The present paper is the continuation of our work (Pokarowski et al., Proteins 2005;59:49{\textendash}57), where similar approximation were used to derive ideal amino acid interaction forms from CPs. Both approximations allow us to understand general trends in amino acid similarity and can help improve multiple sequence alignments using the fast Fourier transform (MAFFT), fast threading or another methods based on alignments of physicochemical profiles of protein sequences. The use of this approximation in sequence alignments instead of a classical SM yields results that differ by less than 5\%. Intermediate links between SMs and CPs, new formulas for approximating these matrices, and the highly significant dependence of classical SMs on coil preferences are new findings.},
keywords = {protein contact potentials, protein structure prediction, Sequence Alignment, substitution matrices},
doi = {10.1002/prot},
url = {http://onlinelibrary.wiley.com/doi/10.1002/prot.21509/full},
author = {Piotr Pokarowski and Andrzej Kloczkowski and Szymon Nowakowski and Maria Pokarowska and Robert L. Jernigan and Andrzej Koli{\'n}ski}
}
@article {Pokarowski2005,
title = {Inferring ideal amino acid interaction forms from statistical protein contact potentials},
journal = {Proteins},
volume = {59},
number = {1},
year = {2005},
month = {apr},
pages = {49{\textendash}57},
abstract = {We have analyzed 29 different published matrices of protein pairwise contact potentials (CPs) between amino acids derived from different sets of proteins, either crystallographic structures taken from the Protein Data Bank (PDB) or computer-generated decoys. Each of the CPs is similar to 1 of the 2 matrices derived in the work of Miyazawa and Jernigan (Proteins 1999;34:49-68). The CP matrices of the first class can be approximated with a correlation of order 0.9 by the formula e(ij) = h(i) + h(j), 1