@article {567, title = {Integrative modeling of diverse protein-peptide systems using CABS-dock}, journal = {PLoS Comput Biol.}, volume = {5;19(7)}, year = {2023}, pages = {e1011275}, abstract = {The CABS model can be applied to a wide range of protein-protein and protein-peptide molecular modeling tasks, such as simulating folding pathways, predicting structures, docking, and analyzing the structural dynamics of molecular complexes. In this work, we use the CABS-dock tool in two diverse modeling tasks: 1) predicting the structures of amyloid protofilaments and 2) identifying cleavage sites in the peptide substrates of proteolytic enzymes. In the first case, simulations of the simultaneous docking of amyloidogenic peptides indicated that the CABS model can accurately predict the structures of amyloid protofilaments which have an in-register parallel architecture. Scoring based on a combination of symmetry criteria and estimated interaction energy values for bound monomers enables the identification of protofilament models that closely match their experimental structures for 5 out of 6 analyzed systems. For the second task, it has been shown that CABS-dock coarse-grained docking simulations can be used to identify the positions of cleavage sites in the peptide substrates of proteolytic enzymes. The cleavage site position was correctly identified for 12 out of 15 analyzed peptides. When combined with sequence-based methods, these docking simulations may lead to an efficient way of predicting cleavage sites in degraded proteins. The method also provides the atomic structures of enzyme-substrate complexes, which can give insights into enzyme-substrate interactions that are crucial for the design of new potent inhibitors.}, author = {Wojciech Pulawski and Michal Kolinski and Andrzej Koli{\'n}ski} } @article {563, title = {Synthetic Transition from Thiourea-Based Compounds to Tetrazole Derivatives: Structure and Biological Evaluation of Synthesized New N-(Furan-2-ylmethyl)-1H-tetrazol-5-amine Derivatives}, journal = {Molecules}, volume = {26}, year = {2021}, abstract = {Twelve novel derivatives of N-(furan-2-ylmethyl)-1H-tetrazol-5-amine were synthesized. For obtained compound 8, its corresponding substrate single crystals were isolated and X-ray diffraction experiments were completed. In the initial stage of research, in silico structure-based pharmacological prediction was conducted. All compounds were screened for their antibacterial and antimycobacterial activities using standard and clinical strains. The cytotoxic activity was evaluated against a panel of human cancer cell lines, in contrast to normal (HaCaT) cell lines, by using the MTT method. All examined derivatives were found to be noncytotoxic against normal cell lines. Within the studied group, compound 6 showed the most promising results in antimicrobial studies. It inhibited four hospital S. epidermidis rods{\textquoteright} growth, when applied at the amount of 4 {\textmu}g/mL. However, the most susceptible to the presence of compound 6 was S. epidermidis T 5501 851/19 clinical strain, for which the MIC value was only 2 {\textmu}g/mL. Finally, a pharmacophore model was established based on lead compounds from this and our previous work.}, issn = {1420-3049}, doi = {10.3390/molecules26020323}, url = {https://www.mdpi.com/1420-3049/26/2/323}, author = {Daniel Szulczyk and Anna Bielenica and Piotr Roszkowski and Micha{\l} A. Dobrowolski and Wioletta Olejarz and Sebastian Kmiecik and Malgorzata Podsiad and Marta Struga} } @article {560, title = {Docking interactions determine early cleavage events in insulin proteolysis by pepsin: Experiment and simulation}, journal = {International Journal of Biological Macromolecules}, volume = {149}, year = {2020}, pages = {1151-1160}, abstract = {In silico modelling of cascade enzymatic proteolysis is an exceedingly complex and challenging task. Here, we study partial proteolysis of insulin by pepsin: a process leading to the release of a highly amyloidogenic two chain {\textquoteleft}H-fragment{\textquoteright}. The H-fragment retains several cleavage sites for pepsin. However, under favorable conditions H-monomers rapidly self-assemble into proteolysis-resistant amyloid fibrils whose composition provides snapshots of early and intermediate stages of the proteolysis. In this work, we report a remarkable agreement of experimentally determined and simulation-predicted cleavage sites on different stages of the proteolysis. Prediction of cleavage sites was based on the comprehensive analysis of the docking interactions from direct simulation of coupled folding and binding of insulin (or its cleaved derivatives) to pepsin. The most frequent interactions were found to be between the pepsin{\textquoteright}s active site, or its direct vicinity, and the experimentally determined insulin cleavage sites, which suggest that the docking interactions govern the proteolytic process.}, keywords = {Cleavage site prediction, Insulin, Pepsin, Peptide docking, Protein degradation pathways, Proteolysis}, issn = {0141-8130}, doi = {https://doi.org/10.1016/j.ijbiomac.2020.01.253}, url = {https://www.sciencedirect.com/science/article/pii/S0141813019401906}, author = {Micha{\l} Koli{\'n}ski and Sebastian Kmiecik and Robert Dec and Marcin Piejko and Pawe{\l} Mak and Wojciech Dzwolak} } @article {559, title = {Isoxazole-containing 5' mRNA cap analogues as inhibitors of the translation initiation process}, journal = {Bioorganic Chemistry}, volume = {96}, year = {2020}, pages = {103583}, abstract = {Herein we describe a synthesis of new isoxazole-containing 5' mRNA cap analogues via a cycloaddition reaction. The obtained analogues show a capability to inhibit cap-dependent translation in vitro and are characterized by a new binding mode in which an isoxazolic ring, instead of guanine, is involved in the stacking effect. Our study provides valuable information toward designing new compounds that can be potentially used as anticancer therapeutics.}, keywords = {Cap analogue, Cycloaddition reaction, Isoxazol, mRNA, Translation initiation}, issn = {0045-2068}, doi = {https://doi.org/10.1016/j.bioorg.2020.103583}, url = {https://www.sciencedirect.com/science/article/pii/S004520681931819X}, author = {Karolina Piecyk and Maciej Lukaszewicz and Karol Kamel and Maria Janowska and Paulina Pietrow and Sebastian Kmiecik and Marzena Jankowska-Anyszka} } @article {555, title = {Aggrescan3D (A3D) 2.0: prediction and engineering of protein solubility}, journal = {Nucleic Acids Research}, volume = {47}, year = {2019}, month = {05}, pages = {W300-W307}, abstract = {Protein aggregation is a hallmark of a growing number of human disorders and constitutes a major bottleneck in the manufacturing of therapeutic proteins. Therefore, there is a strong need of in-silico methods that can anticipate the aggregative properties of protein variants linked to disease and assist the engineering of soluble protein-based drugs. A few years ago, we developed a method for structure-based prediction of aggregation properties that takes into account the dynamic fluctuations of proteins. The method has been made available as the Aggrescan3D (A3D) web server and applied in numerous studies of protein structure-aggregation relationship. Here, we present a major update of the A3D web server to version 2.0. The new features include: extension of dynamic calculations to significantly larger and multimeric proteins, simultaneous prediction of changes in protein solubility and stability upon mutation, rapid screening for functional protein variants with improved solubility, a REST-ful service to incorporate A3D calculations in automatic pipelines, and a new, enhanced web server interface. A3D 2.0 is freely available at: http://biocomp.chem.uw.edu.pl/A3D2/}, issn = {0305-1048}, doi = {10.1093/nar/gkz321}, url = {https://doi.org/10.1093/nar/gkz321}, author = {Aleksander Kuriata and Valentin Iglesias and Jordi Pujols and Mateusz Kurcinski and Sebastian Kmiecik and Salvador Ventura} } @article {546, title = {Modeling of Disordered Protein Structures Using Monte Carlo Simulations and Knowledge-Based Statistical Force Fields}, journal = {International Journal of Molecular Sciences}, volume = {20}, year = {2019}, type = {Journal Article}, chapter = {606}, abstract = {The description of protein disordered states is important for understanding protein folding mechanisms and their functions. In this short review, we briefly describe a simulation approach to modeling protein interactions, which involve disordered peptide partners or intrinsically disordered protein regions, and unfolded states of globular proteins. It is based on the CABS coarse-grained protein model that uses a Monte Carlo (MC) sampling scheme and a knowledge-based statistical force field. We review several case studies showing that description of protein disordered states resulting from CABS simulations is consistent with experimental data. The case studies comprise investigations of protein(-)peptide binding and protein folding processes. The CABS model has been recently made available as the simulation engine of multiscale modeling tools enabling studies of protein(-)peptide docking and protein flexibility. Those tools offer customization of the modeling process, driving the conformational search using distance restraints, reconstruction of selected models to all-atom resolution, and simulation of large protein systems in a reasonable computational time. Therefore, CABS can be combined in integrative modeling pipelines incorporating experimental data and other modeling tools of various resolution.}, keywords = {CABS model MC simulations coarse-grained disordered protein protein structure statistical force fields}, issn = {1422-0067 (Electronic) 1422-0067 (Linking)}, doi = {10.3390/ijms20030606}, url = {http://www.ncbi.nlm.nih.gov/pubmed/30708941}, author = {Maciej Ciemny and Aleksandra E. Badaczewska-Dawid and Monika Pikuzinska and Andrzej Koli{\'n}ski and Sebastian Kmiecik} } @inbook {540, title = {Protein Structure Prediction Using Coarse-Grained Models}, booktitle = {Computational Methods to Study the Structure and Dynamics of Biomolecules and Biomolecular Processes: From Bioinformatics to Molecular Quantum Mechanics}, year = {2019}, pages = {27{\textendash}59}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, abstract = {The knowledge of the three-dimensional structure of proteins is crucial for understanding many important biological processes. Most of the biologically relevant protein systems are too large for classical, atomistic molecular modeling tools. In such cases, coarse-grained (CG) models offer various opportunities for efficient conformational sampling and thus prediction of the three-dimensional structure. A variety of CG models have been proposed, each based on a similar framework consisting of a set of conceptual components such as protein representation, force field, sampling, etc. In this chapter we discuss these components, highlighting ideas which have proven to be the most successful. As CG methods are usually part of multistage procedures, we also describe approaches used for the incorporation of homology data and all-atom reconstruction methods.}, isbn = {978-3-319-95843-9}, doi = {10.1007/978-3-319-95843-9_2}, url = {https://doi.org/10.1007/978-3-319-95843-9_2}, author = {Maciej Blaszczyk and Dominik Gront and Sebastian Kmiecik and Mateusz Kurcinski and Michal Kolinski and Maciej Ciemny and Katarzyna Ziolkowska and Marta Panek and Andrzej Koli{\'n}ski} } @article {529, title = {Synthesis, structural and antimicrobial studies of type II topoisomerase-targeted copper(II) complexes of 1,3-disubstituted thiourea ligands}, journal = {Journal of Inorganic Biochemistry}, volume = {182}, year = {2018}, pages = {61 - 70}, abstract = {A series of Cu(II) complexes of 3-(trifluoromethyl)phenylthiourea derivatives was synthesized. Their structural properties were investigated by spectroscopic techniques (infrared and electron paramagnetic resonance), as well as molecular modeling. All studied coordination compounds are mononuclear complexes containing two chelating ligands bonded to the metal cation via S and deprotonated N atoms. The new chelates were evaluated for their antimicrobial potency. The complex of 1-(3,4-dichlorophenyl)-3-[3-(trifluoromethyl)phenyl]thiourea (3) presented the highest activity against Gram-positive pathogens, even stronger than the activity of its non-complexed counterpart and the reference drug. The compound also prevented the biofilm formation of methicillin-resistant and standard strains of staphylococcal cocci. The title derivatives were found to be effective inhibitors of DNA gyrase and topoisomerase IV isolated from Staphylococcus aureus. The binding modes of the ligand L3 with DNA gyrase and topoisomerase IV were presented.}, keywords = {Copper complexes, DNA gyrase, docking, FTIR, Thiourea}, issn = {0162-0134}, doi = {https://doi.org/10.1016/j.jinorgbio.2018.01.005}, url = {http://www.sciencedirect.com/science/article/pii/S016201341730692X}, author = {Anna Bielenica and Aleksandra Drzewiecka-Antonik and Pawe{\l} Rejmak and Joanna Stefa{\'n}ska and Micha{\l} Koli{\'n}ski and Sebastian Kmiecik and Bogdan Lesyng and Marta W{\l}odarczyk and Piotr Pietrzyk and Marta Struga} } @article {521, title = {Biofunctionalisation of p-doped silicon with cytochrome c553 minimises charge recombination and enhances photovoltaic performance of the all-solid-state photosystem I-based biophotoelectrode}, journal = {RSC Advances}, volume = {7}, year = {2017}, pages = {47854-47866}, abstract = {Surface-directed passivation of p-doped silicon (Si) substrate was achieved by its biofunctionalisation with hexahistidine (His6)-tagged cytochrome c553 (cyt c553), a soluble electroactive photosynthetic protein responsible for electron donation to photooxidised photosystem I (PSI). Five distinct variants of cyt c553 were genetically engineered by introducing the specific linker peptides of 0{\textendash}19 amino acids (AA) in length between the cyt c553 holoprotein and a C-terminal His6-tag, the latter being the affinity {\textquoteleft}anchor{\textquoteright} used for the specific immobilisation of this protein on the semiconductor surface. Calculation of 2D Gibbs free energy maps for the five cyt c553 variants showed a significantly higher number of thermodynamically feasible conformations of immobilised cyt c variants containing longer linker peptides. Here we show that the distinct cyt c553-based Si bioelectrodes display some characteristics of the p{\textendash}n-type diodes, albeit varying in the level of dark saturation current J0 considered as the charge recombination parameter. These combined bioinformatic and electrochemical analyses indicate that the cyt c553 variants with longer linker peptides, up to 19AA in length, allow for more structural flexibility of immobilised cyt c553 in terms of both, orientation and distance of the haem group with respect to the Si surface, and promote the efficient biopassivation of the semiconductor substrate. Incorporation of the specifically immobilised 19AA cyt c553 variant into the all-solid-state biophotoelectrodes containing light harvesting PSI module enhanced biophotovoltaic performance of the PSI biophotoelectrode compared to the analogous device devoid of cyt c553.}, doi = {10.1039/c7ra10895h}, url = {http://pubs.rsc.org/en/content/articlepdf/2017/ra/c7ra10895h}, author = {Julian David Janna Olmos and Philippe Becquet and Dominik Gront and Jaros{\l}aw Sar and Andrzej D{\k a}browski and Grzegorz Gawlik and Marian Teodorczyk and Dorota Pawlak and Joanna Kargul} } @inbook {488, title = {Predicting real-valued protein residue fluctuation using FlexPred}, booktitle = {Methods in Molecular Biology}, volume = {1484}, year = {2017}, pages = {175-186}, abstract = {The conventional view of a protein structure as static provides only a limited picture.There is increasing evidence that protein dynamics are often vital to protein function including interaction with partners such as other proteins, nucleic acids, and small molecules. Considering flexibility is also important in applications such as computational protein docking and protein design. While residue flexibility is partially indicated by experimental measures such as the B-factor from X-ray crystallography and ensemble fluctuation from nuclear magnetic resonance (NMR) spectroscopy as well as computational molecular dynamics (MD) simulation, these techniques are resource-intensive. In this chapter, we describe the web server and standalone version of FlexPred, which rapidly predicts absolute per-residue fluctuation from a three-dimensional protein structure. On a set of 592 non-redundant structures, comparing the fluctuations predicted by FlexPred to the observed fluctuations in MD simulations showed an average correlation coefficient of 0.669 and an average root mean square error of 1.07 {\r A}. FlexPred is available at http://kiharalab.org/flexPred/.}, doi = {10.1007/978-1-4939-6406-2_13}, author = {Lenna Peterson and Michal Jamroz and Andrzej Koli{\'n}ski and Daisuke Kihara} } @article {493, title = {Coarse-grained simulations of membrane insertion and folding of small helical proteins using CABS model}, journal = {Journal of Chemical Information and Modeling}, volume = {56}, year = {2016}, pages = {2207{\textendash}2215}, abstract = {The CABS coarse-grained model is a well-established tool for modeling globular proteins (predicting their structure, dynamics and interactions). Here we introduce an extension of CABS representation and force field (CABS-membrane) to the modeling of the effect of biological membrane environment on the structure of membrane proteins. We validate the CABS-membrane model in folding simulations of 10 short helical membrane proteins not using any knowledge about their structure. The simulations start from random protein conformations placed outside the membrane environment and allow for full flexibility of the modeled proteins during their spontaneous insertion into the membrane. In the resulting trajectories, we have found models close to the experimental membrane structures. We also attempted to select the correctly folded models using simple filtering followed by structural clustering combined with reconstruction to all-atom representation and all-atom scoring. In conclusion, the CABS-membrane model is a promising approach for further development towards modeling of large protein-membrane systems.}, doi = {10.1021/acs.jcim.6b00350}, url = {https://pubs.acs.org/doi/abs/10.1021/acs.jcim.6b00350}, author = {Wojciech Pulawski and Michal Jamroz and Michal Kolinski and Andrzej Koli{\'n}ski and Sebastian Kmiecik} } @article {499, title = {Protein-peptide molecular docking with large-scale conformational changes: the p53-MDM2 interaction}, journal = {Scientific Reports}, volume = {6}, year = {2016}, pages = {37532}, abstract = {Protein-peptide interactions are often associated with large-scale conformational changes that are difficult to study either by classical molecular modeling or by experiment. Recently, we have developed the CABS-dock method for flexible protein-peptide docking that enables large-scale rearrangements of the protein chain. In this study, we use CABS-dock to investigate the binding of the p53-MDM2 complex, an element of the cell cycle regulation system crucial for anti-cancer drug design. Experimental data suggest that p53-MDM2 binding is affected by significant rearrangements of a lid region - the N-terminal highly flexible MDM2 fragment; however, the details are not clear. The large size of the highly flexible MDM2 fragments makes p53-MDM2 intractable for exhaustive binding dynamics studies using atomistic models. We performed extensive dynamics simulations using the CABS-dock method, including large-scale structural rearrangements of MDM2 flexible regions. Without a priori knowledge of the p53 peptide structure or its binding site, we obtained near-native models of the p53-MDM2 complex. The simulation results match well the experimental data and provide new insights into the possible role of the lid fragment in p53 binding. The presented case study demonstrates that CABS-dock methodology opens up new opportunities for protein-peptide docking with large-scale changes of the protein receptor structure.}, author = {Maciej Ciemny and Aleksander Debinski and Marta Paczkowska and Andrzej Koli{\'n}ski and Mateusz Kurcinski and Sebastian Kmiecik} } @article {444, title = {AGGRESCAN3D (A3D): server for prediction of aggregation properties of protein structures}, journal = {Nucleic Acids Research}, volume = {43 (W1)}, year = {2015}, pages = {W306-W313}, abstract = {Protein aggregation underlies an increasing number of disorders and constitutes a major bottleneck in the development of therapeutic proteins. Our present understanding on the molecular determinants of protein aggregation has crystalized in a series of predictive algorithms to identify aggregation-prone sites. A majority of these methods rely only on sequence. Therefore, they find difficulties to predict the aggregation properties of folded globular proteins, where aggregation-prone sites are often not contiguous in sequence or buried inside the native structure. The AGGRESCAN3D (A3D) server overcomes these limitations by taking into account the protein structure and the experimental aggregation propensity scale from the well-established AGGRESCAN method. Using the A3D server, the identified aggregation-prone residues can be virtually mutated to design variants with increased solubility, or to test the impact of pathogenic mutations. Additionally, A3D server enables to take into account the dynamic fluctuations of protein structure in solution, which may influence aggregation propensity. This is possible in A3D Dynamic Mode that exploits the CABS-flex approach for the fast simulations\ of flexibility of globular proteins. The A3D server can be accessed at http://biocomp.chem.uw.edu.pl/A3D/}, author = {Rafael Zambrano and Michal Jamroz and Agata Szczasiuk and Jordi Pujols and Sebastian Kmiecik and Salvador Ventura} } @inbook {346, title = {Coarse-Grained Protein Models in Structure Prediction}, booktitle = {Computational Methods to Study the Structure and Dynamics of Biomolecules and Biomolecular Processes, Springer Series in Bio-/Neuroinformatics, Adam Liwo, Ed.}, volume = {1}, number = {1}, year = {2014}, pages = {25-53}, abstract = {The knowledge of the three-dimensional structure of proteins is crucial for understanding many important biological processes. Most biologically important proteins are too large to handle for the classical simulation tools. In such cases, coarse-grained (CG) models nowadays offer various opportunities for efficient conformational sampling and thus prediction of the three-dimensional structure. A variety of CG models have been proposed, each based on a similar framework consisting of a set of conceptual components such as protein representation, force field, sampling, etc. In this chapter we discuss these components, highlighting ideas which have proven to be the most successful. As CG methods are usually part of multistage procedures, we also describe approaches used for the incorporation of homology data and all-atom reconstruction methods.}, author = {Maciej Blaszczyk and Dominik Gront and Sebastian Kmiecik and Katarzyna Ziolkowska and Marta Panek and Andrzej Koli{\'n}ski} } @article {292, title = {13,13-Dimethyl-des-C,D analogues of (20S)-1α,25-dihydroxy-2-methylene-19-norvitamin D$_{3}$ (2MD): total synthesis, docking to the VDR, and biological evaluation}, journal = {Bioorganic \& Medicinal Chemistry}, volume = {19}, year = {2011}, month = {2011 Dec 1}, pages = {7205-20}, abstract = {As a continuation of our studies focused on the vitamin D compounds lacking the C,D-hydrindane system, 13,13-dimethyl-des-C,D analogues of (20S)-1α,25-dihydroxy-2-methylene-19-norvitamin D(3) (2, 2MD) were prepared by total synthesis. The known cyclohexanone 30, a precursor of the desired A-ring phosphine oxide 11, was synthesized starting with the keto acetal 13, whereas the aldehyde 12, constituting an acyclic {\textquoteright}upper{\textquoteright} building block, was obtained from the isomeric esters 34, prepared previously in our laboratory. The commercial 1,4-cyclohexanedione monoethylene ketal (13) was enantioselectively α-hydroxylated utilizing the α-aminoxylation process catalyzed by l-proline, and the introduced hydroxy group was protected as a TBS, TPDPS, and SEM ether. Then the keto group in the obtained compounds 15-17 was methylenated and the allylic hydroxylation was performed with selenium dioxide and pyridine N-oxide. After separation of the isomers, the newly introduced hydroxy group was protected and the ketal group hydrolyzed to yield the corresponding protected (3R,5R)-3,5-dihydroxycyclohexanones 30-32. The esters 34, starting compounds for the C,D-fragment 12, were first α-methylated, then reduced and the resulted primary alcohols 36 were deoxygenated using the Barton-McCombie protocol. Primary hydroxy group in the obtained diether 38 was deprotected and oxidized to furnish the aldehyde 12. The Wittig-Horner coupling of the latter with the anion of the phosphine oxide 11, followed by hydroxyl deprotection furnished two isomeric 13,13-dimethyl-des-C,D analogues of 2MD (compounds 10 and 42) differing in configuration of their 7,8-double bond. Pure vitamin D analogues were isolated by HPLC and their biological activity was examined. The in vitro tests indicated that, compared to the analogue 7, unsubstituted at C-13, the synthesized vitamin D analogue 10 showed markedly improved VDR binding ability, significantly enhanced HL-60 differentiation activity as well as increased transcriptional potency. Docking simulations provided a rational explanation for the observed binding affinity of these ligands to the VDR. Biological in vivo tests proved that des-C,D compound 10 retained some intestinal activity. Its geometrical isomer 42 was devoid of any biological activity.}, keywords = {Animals, Calcitriol, Cell Differentiation, Crystallography, X-Ray, HL-60 Cells, Humans, Male, Models, Molecular, Molecular Conformation, Rats, Receptors, Calcitriol, Structure-Activity Relationship}, issn = {1464-3391}, doi = {10.1016/j.bmc.2011.09.048}, author = {Katarzyna Plonska-Ocypa and Izabela Sibilska and Rafal R. Sicinski and Wanda Sicinska and Lori A. Plum and Hector F. DeLuca} } @article {Kloczkowski2009, title = {Distance matrix-based approach to protein structure prediction}, journal = {Journal of Structural and Functional Genomics}, volume = {10}, number = {1}, year = {2009}, month = {mar}, pages = {67{\textendash}81}, abstract = {
Much structural information is encoded in the internal distances; a distance matrix-based approach can be used to predict protein structure and dynamics, and for structural refinement. Our approach is based on the square distance matrix D = [r(ij)(2)] containing all square distances between residues in proteins. This distance matrix contains more information than the contact matrix C, that has elements of either 0 or 1 depending on whether the distance r (ij) is greater or less than a cutoff value r (cutoff). We have performed spectral decomposition of the distance matrices D = sigma lambda(k)V(k)V(kT), in terms of eigenvalues lambda kappa and the corresponding eigenvectors v kappa and found that it contains at most five nonzero terms. A dominant eigenvector is proportional to r (2){\textendash}the square distance of points from the center of mass, with the next three being the principal components of the system of points. By predicting r (2) from the sequence we can approximate a distance matrix of a protein with an expected RMSD value of about 7.3 A, and by combining it with the prediction of the first principal component we can improve this approximation to 4.0 A. We can also explain the role of hydrophobic interactions for the protein structure, because r is highly correlated with the hydrophobic profile of the sequence. Moreover, r is highly correlated with several sequence profiles which are useful in protein structure prediction, such as contact number, the residue-wise contact order (RWCO) or mean square fluctuations (i.e. crystallographic temperature factors). We have also shown that the next three components are related to spatial directionality of the secondary structure elements, and they may be also predicted from the sequence, improving overall structure prediction. We have also shown that the large number of available HIV-1 protease structures provides a remarkable sampling of conformations, which can be viewed as direct structural information about the dynamics. After structure matching, we apply principal component analysis (PCA) to obtain the important apparent motions for both bound and unbound structures. There are significant similarities between the first few key motions and the first few low-frequency normal modes calculated from a static representative structure with an elastic network model (ENM) that is based on the contact matrix C (related to D), strongly suggesting that the variations among the observed structures and the corresponding conformational changes are facilitated by the low-frequency, global motions intrinsic to the structure. Similarities are also found when the approach is applied to an NMR ensemble, as well as to atomic molecular dynamics (MD) trajectories. Thus, a sufficiently large number of experimental structures can directly provide important information about protein dynamics, but ENM can also provide a similar sampling of conformations. Finally, we use distance constraints from databases of known protein structures for structure refinement. We use the distributions of distances of various types in known protein structures to obtain the most probable ranges or the mean-force potentials for the distances. We then impose these constraints on structures to be refined or include the mean-force potentials directly in the energy minimization so that more plausible structural models can be built. This approach has been successfully used by us in 2006 in the CASPR structure refinement (http://predictioncenter.org/caspR).
}, keywords = {Binding Sites, Computer Simulation, Databases, Models, Molecular, Principal Component Analysis, Protein, Protein Conformation, Proteins, Proteins: chemistry}, issn = {1570-0267}, doi = {10.1007/s10969-009-9062-2}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3018873\&tool=pmcentrez\&rendertype=abstract}, author = {Andrzej Kloczkowski and Robert L. Jernigan and Zhijun Wu and Guang Song and Lei Yang and Andrzej Koli{\'n}ski and Piotr Pokarowski} } @article {Kawashima2008, title = {AAindex: amino acid index database, progress report 2008}, journal = {Nucleic Acids Research}, volume = {36}, number = {Database issue}, year = {2008}, month = {jan}, pages = {D202{\textendash}5}, abstract = {AAindex is a database of numerical indices representing various physicochemical and biochemical properties of amino acids and pairs of amino acids. We have added a collection of protein contact potentials to the AAindex as a new section. Accordingly AAindex consists of three sections now: AAindex1 for the amino acid index of 20 numerical values, AAindex2 for the amino acid substitution matrix and AAindex3 for the statistical protein contact potentials. All data are derived from published literature. The database can be accessed through the DBGET/LinkDB system at GenomeNet (http://www.genome.jp/dbget-bin/www\_bfind?aaindex) or downloaded by anonymous FTP (ftp://ftp.genome.jp/pub/db/community/aaindex/).
}, keywords = {Amino Acids, Amino Acids: chemistry, Databases, Internet, Protein, Proteins, Proteins: chemistry}, issn = {1362-4962}, doi = {10.1093/nar/gkm998}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2238890\&tool=pmcentrez\&rendertype=abstract}, author = {Kawashima, Shuichi and Piotr Pokarowski and Pokarowska, Maria and Andrzej Koli{\'n}ski and Katayama, Toshiaki and Kanehisa, Minoru} } @article {Knizewski2008, title = {Uncharacterized DUF1574 leptospira proteins are SGNH hydrolases}, journal = {Cell Cycle (Georgetown, Tex.)}, volume = {7}, number = {4}, year = {2008}, month = {feb}, pages = {542{\textendash}4}, keywords = {Amino Acid Sequence, Bacterial Proteins, Bacterial Proteins: genetics, Base Sequence, Computational Biology, DNA, Hydrolases, Hydrolases: genetics, Leptospira, Leptospira: enzymology, Models, Molecular, Molecular Sequence Data, Sequence Alignment, Sequence Analysis}, issn = {1551-4005}, url = {http://www.ncbi.nlm.nih.gov/pubmed/18235229}, author = {Lukasz Knizewski and Kamil Steczkiewicz and Krzysztof Kuchta and Lucjan Wyrwicz and Dariusz Plewczynski and Andrzej Koli{\'n}ski and Leszek Rychlewski and Krzysztof Ginalski} } @article {Pokarowski2007, title = {Ideal amino acid exchange forms for approximating substitution matrices}, journal = {Proteins: Structure, Function, Bioinformatics}, volume = {69}, year = {2007}, pages = {379{\textendash}393}, abstract = {We have analyzed 29 published substitution matrices (SMs) and five statistical protein contact potentials (CPs) for comparison. We find that popular, {\textquoteleft}classical{\textquoteright} SMs obtained mainly from sequence alignments of globular proteins are mostly correlated by at least a value of 0.9. The BLOSUM62 is the central element of this group. A second group includes SMs derived from alignments of remote homologs or transmembrane proteins. These matrices correlate better with classical SMs (0.8) than among themselves (0.7). A third group consists of intermediate links between SMs and CPs - matrices and potentials that exhibit mutual correlations of at least 0.8. Next, we show that SMs can be approximated with a correlation of 0.9 by expressions c0 + xixj + yiyj + zizj, 1<= i, j <= 20, where c0 is a constant and the vectors (xi), (yi), (zi) correlate highly with hydrophobicity, molecular volume and coil preferences of amino acids, respectively. The present paper is the continuation of our work (Pokarowski et al., Proteins 2005;59:49{\textendash}57), where similar approximation were used to derive ideal amino acid interaction forms from CPs. Both approximations allow us to understand general trends in amino acid similarity and can help improve multiple sequence alignments using the fast Fourier transform (MAFFT), fast threading or another methods based on alignments of physicochemical profiles of protein sequences. The use of this approximation in sequence alignments instead of a classical SM yields results that differ by less than 5\%. Intermediate links between SMs and CPs, new formulas for approximating these matrices, and the highly significant dependence of classical SMs on coil preferences are new findings.}, keywords = {protein contact potentials, protein structure prediction, Sequence Alignment, substitution matrices}, doi = {10.1002/prot}, url = {http://onlinelibrary.wiley.com/doi/10.1002/prot.21509/full}, author = {Piotr Pokarowski and Andrzej Kloczkowski and Szymon Nowakowski and Maria Pokarowska and Robert L. Jernigan and Andrzej Koli{\'n}ski} } @article {Pokarowski2005, title = {Inferring ideal amino acid interaction forms from statistical protein contact potentials}, journal = {Proteins}, volume = {59}, number = {1}, year = {2005}, month = {apr}, pages = {49{\textendash}57}, abstract = {We have analyzed 29 different published matrices of protein pairwise contact potentials (CPs) between amino acids derived from different sets of proteins, either crystallographic structures taken from the Protein Data Bank (PDB) or computer-generated decoys. Each of the CPs is similar to 1 of the 2 matrices derived in the work of Miyazawa and Jernigan (Proteins 1999;34:49-68). The CP matrices of the first class can be approximated with a correlation of order 0.9 by the formula e(ij) = h(i) + h(j), 1