@article {Zhang2003, title = {TOUCHSTONE II: a new approach to ab initio protein structure prediction}, journal = {Biophysical Journal}, volume = {85}, number = {2}, year = {2003}, pages = {1145{\textendash}64}, abstract = {We have developed a new combined approach for ab initio protein structure prediction. The protein conformation is described as a lattice chain connecting C(alpha) atoms, with attached C(beta) atoms and side-chain centers of mass. The model force field includes various short-range and long-range knowledge-based potentials derived from a statistical analysis of the regularities of protein structures. The combination of these energy terms is optimized through the maximization of correlation for 30 x 60,000 decoys between the root mean square deviation (RMSD) to native and energies, as well as the energy gap between native and the decoy ensemble. To accelerate the conformational search, a newly developed parallel hyperbolic sampling algorithm with a composite movement set is used in the Monte Carlo simulation processes. We exploit this strategy to successfully fold 41/100 small proteins (36 approximately 120 residues) with predicted structures having a RMSD from native below 6.5 A in the top five cluster centroids. To fold larger-size proteins as well as to improve the folding yield of small proteins, we incorporate into the basic force field side-chain contact predictions from our threading program PROSPECTOR where homologous proteins were excluded from the data base. With these threading-based restraints, the program can fold 83/125 test proteins (36 approximately 174 residues) with structures having a RMSD to native below 6.5 A in the top five cluster centroids. This shows the significant improvement of folding by using predicted tertiary restraints, especially when the accuracy of side-chain contact prediction is \>20\%. For native fold selection, we introduce quantities dependent on the cluster density and the combination of energy and free energy, which show a higher discriminative power to select the native structure than the previously used cluster energy or cluster size, and which can be used in native structure identification in blind simulations. These procedures are readily automated and are being implemented on a genomic scale.}, keywords = {Algorithms, Amino Acid Sequence, Computer Simulation, Crystallography, Crystallography: methods, Energy Transfer, Models, Molecular, Molecular Sequence Data, Protein, Protein Conformation, Protein Folding, Protein Structure, Protein: methods, Proteins, Proteins: chemistry, Secondary, Sequence Analysis, Software, Static Electricity, Statistical}, issn = {0006-3495}, doi = {10.1016/S0006-3495(03)74551-2}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=1303233\&tool=pmcentrez\&rendertype=abstract}, author = {Yang Zhang and Andrzej Koli{\'n}ski and Jeffrey Skolnick} } @article {291, title = {TOUCHSTONEX: protein structure prediction with sparse NMR data}, journal = {Proteins}, volume = {53}, year = {2003}, month = {2003 Nov 1}, pages = {290-306}, abstract = {TOUCHSTONEX, a new method for folding proteins that uses a small number of long-range contact restraints derived from NMR experimental NOE (nuclear Overhauser enhancement) data, is described. The method employs a new lattice-based, reduced model of proteins that explicitly represents C(alpha), C(beta), and the sidechain centers of mass. The force field consists of knowledge-based terms to produce protein-like behavior, including various short-range interactions, hydrogen bonding, and one-body, pairwise, and multibody long-range interactions. Contact restraints were incorporated into the force field as an NOE-specific pairwise potential. We evaluated the algorithm using a set of 125 proteins of various secondary structure types and lengths up to 174 residues. Using N/8 simulated, long-range sidechain contact restraints, where N is the number of residues, 108 proteins were folded to a C(alpha)-root-mean-square deviation (RMSD) from native below 6.5 A. The average RMSD of the lowest RMSD structures for all 125 proteins (folded and unfolded) was 4.4 A. The algorithm was also applied to limited experimental NOE data generated for three proteins. Using very few experimental sidechain contact restraints, and a small number of sidechain-main chain and main chain-main chain contact restraints, we folded all three proteins to low-to-medium resolution structures. The algorithm can be applied to the NMR structure determination process or other experimental methods that can provide tertiary restraint information, especially in the early stage of structure determination, when only limited data are available.}, keywords = {Algorithms, Amino Acids, Models, Molecular, Nuclear Magnetic Resonance, Biomolecular, Protein Conformation, Protein Folding, Protein Structure, Tertiary, Proteins, Staphylococcal Protein A}, issn = {1097-0134}, doi = {10.1002/prot.10499}, author = {Wei Li and Yang Zhang and Daisuke Kihara and Yuanpeng Janet Huang and Deyou Zheng and Gaetano T. Montelione and Andrzej Koli{\'n}ski and Jeffrey Skolnick} } @article {Kihara2002, title = {Ab initio protein structure prediction on a genomic scale: application to the Mycoplasma genitalium genome}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {99}, year = {2002}, month = {apr}, pages = {5993{\textendash}5998}, abstract = {An ab initio protein structure prediction procedure, TOUCHSTONE, was applied to all 85 small proteins of the Mycoplasma genitalium genome. TOUCHSTONE is based on a Monte Carlo refinement of a lattice model of proteins, which uses threading-based tertiary restraints. Such restraints are derived by extracting consensus contacts and local secondary structure from at least weakly scoring structures that, in some cases, can lack any global similarity to the sequence of interest. Selection of the native fold was done by using the convergence of the simulation from two different conformational search schemes and the lowest energy structure by a knowledge-based atomic-detailed potential. Among the 85 proteins, for 34 proteins with significant threading hits, the template structures were reasonably well reproduced. Of the remaining 51 proteins, 29 proteins converged to five or fewer clusters. In the test set, 84.8\% of the proteins that converged to five or fewer clusters had a correct fold among the clusters. If this statistic is simply applied, 24 proteins (84.8\% of the 29 proteins) may have correct folds. Thus, the topology of a total of 58 proteins probably has been correctly predicted. Based on these results, ab initio protein structure prediction is becoming a practical approach.}, keywords = {Algorithms, Bacterial, Databases as Topic, Genome, Models, Molecular, Monte Carlo Method, Mycoplasma, Mycoplasma: genetics, Protein Folding, Proteins, Proteins: chemistry, Software}, issn = {0027-8424}, doi = {10.1073/pnas.092135699}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=122890\&tool=pmcentrez\&rendertype=abstract}, author = {Daisuke Kihara and Yang Zhang and Hui Lu and Andrzej Koli{\'n}ski and Jeffrey Skolnick} }