Publications
Preprints available on bioRxiv.
Humphreys, Ian R.; Zhang, Jing; Baek, Minkyung; Wang, Yaxi; Krishnakumar, Aditya; Pei, Jimin; Anishchenko, Ivan; Tower, Catherine A.; Jackson, Blake A.; Warrier, Thulasi; Hung, Deborah T.; Peterson, S. Brook; Mougous, Joseph D.; Cong, Qian; Baker, David
Protein interactions in human pathogens revealed through deep learning Journal Article
In: Nature Microbiology, 2024, ISSN: 2058-5276.
@article{Humphreys2024,
title = {Protein interactions in human pathogens revealed through deep learning},
author = {Ian R. Humphreys and Jing Zhang and Minkyung Baek and Yaxi Wang and Aditya Krishnakumar and Jimin Pei and Ivan Anishchenko and Catherine A. Tower and Blake A. Jackson and Thulasi Warrier and Deborah T. Hung and S. Brook Peterson and Joseph D. Mougous and Qian Cong and David Baker},
url = {https://www.nature.com/articles/s41564-024-01791-x, Nature Microbiology [Open Access]},
doi = {10.1038/s41564-024-01791-x},
issn = {2058-5276},
year = {2024},
date = {2024-09-18},
urldate = {2024-09-18},
journal = {Nature Microbiology},
publisher = {Springer Science and Business Media LLC},
abstract = {Identification of bacterial protein–protein interactions and predicting the structures of these complexes could aid in the understanding of pathogenicity mechanisms and developing treatments for infectious diseases. Here we developed RoseTTAFold2-Lite, a rapid deep learning model that leverages residue–residue coevolution and protein structure prediction to systematically identify and structurally characterize protein–protein interactions at the proteome-wide scale. Using this pipeline, we searched through 78 million pairs of proteins across 19 human bacterial pathogens and identified 1,923 confidently predicted complexes involving essential genes and 256 involving virulence factors. Many of these complexes were not previously known; we experimentally tested 12 such predictions, and half of them were validated. The predicted interactions span core metabolic and virulence pathways ranging from post-transcriptional modification to acid neutralization to outer-membrane machinery and should contribute to our understanding of the biology of these important pathogens and the design of drugs to combat them.
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Mansoor, Sanaa; Baek, Minkyung; Park, Hahnbeom; Lee, Gyu Rie; Baker, David
Protein Ensemble Generation Through Variational Autoencoder Latent Space Sampling Journal Article
In: J. Chem. Theory Comput., 2024.
@article{Mansoor2024,
title = {Protein Ensemble Generation Through Variational Autoencoder Latent Space Sampling},
author = {Sanaa Mansoor and Minkyung Baek and Hahnbeom Park and Gyu Rie Lee and David Baker},
url = {https://pubs.acs.org/doi/10.1021/acs.jctc.3c01057, J. Chem. Theory Comput.
https://www.bakerlab.org/wp-content/uploads/2024/05/mansoor-et-al-2024-protein-ensemble-generation-through-variational-autoencoder-latent-space-sampling.pdf, PDF},
doi = {10.1021/acs.jctc.3c01057},
year = {2024},
date = {2024-03-28},
urldate = {2024-04-09},
journal = {J. Chem. Theory Comput.},
publisher = {American Chemical Society (ACS)},
abstract = {Mapping the ensemble of protein conformations that contribute to function and can be targeted by small molecule drugs remains an outstanding challenge. Here, we explore the use of variational autoencoders for reducing the challenge of dimensionality in the protein structure ensemble generation problem. We convert high-dimensional protein structural data into a continuous, low-dimensional representation, carry out a search in this space guided by a structure quality metric, and then use RoseTTAFold guided by the sampled structural information to generate 3D structures. We use this approach to generate ensembles for the cancer relevant protein K-Ras, train the VAE on a subset of the available K-Ras crystal structures and MD simulation snapshots, and assess the extent of sampling close to crystal structures withheld from training. We find that our latent space sampling procedure rapidly generates ensembles with high structural quality and is able to sample within 1 Å of held-out crystal structures, with a consistency higher than that of MD simulation or AlphaFold2 prediction. The sampled structures sufficiently recapitulate the cryptic pockets in the held-out K-Ras structures to allow for small molecule docking.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Krishna, Rohith; Wang, Jue; Ahern, Woody; Sturmfels, Pascal; Venkatesh, Preetham; Kalvet, Indrek; Lee, Gyu Rie; Morey-Burrows, Felix S.; Anishchenko, Ivan; Humphreys, Ian R.; McHugh, Ryan; Vafeados, Dionne; Li, Xinting; Sutherland, George A.; Hitchcock, Andrew; Hunter, C. Neil; Kang, Alex; Brackenbrough, Evans; Bera, Asim K.; Baek, Minkyung; DiMaio, Frank; Baker, David
Generalized biomolecular modeling and design with RoseTTAFold All-Atom Journal Article
In: Science, 2024.
@article{Krishna2024,
title = {Generalized biomolecular modeling and design with RoseTTAFold All-Atom},
author = {Rohith Krishna and Jue Wang and Woody Ahern and Pascal Sturmfels and Preetham Venkatesh and Indrek Kalvet and Gyu Rie Lee and Felix S. Morey-Burrows and Ivan Anishchenko and Ian R. Humphreys and Ryan McHugh and Dionne Vafeados and Xinting Li and George A. Sutherland and Andrew Hitchcock and C. Neil Hunter and Alex Kang and Evans Brackenbrough and Asim K. Bera and Minkyung Baek and Frank DiMaio and David Baker},
url = {https://www.science.org/stoken/author-tokens/ST-1739/full, Science [Full Access Link]
https://www.bakerlab.org/wp-content/uploads/2024/03/science.adl2528.pdf, PDF},
doi = {10.1126/science.adl2528},
year = {2024},
date = {2024-03-07},
urldate = {2024-03-07},
journal = {Science},
publisher = {American Association for the Advancement of Science (AAAS)},
abstract = {Deep learning methods have revolutionized protein structure prediction and design but are currently limited to protein-only systems. We describe RoseTTAFold All-Atom (RFAA) which combines a residue-based representation of amino acids and DNA bases with an atomic representation of all other groups to model assemblies containing proteins, nucleic acids, small molecules, metals, and covalent modifications given their sequences and chemical structures. By fine tuning on denoising tasks we obtain RFdiffusionAA, which builds protein structures around small molecules. Starting from random distributions of amino acid residues surrounding target small molecules, we design and experimentally validate, through crystallography and binding measurements, proteins that bind the cardiac disease therapeutic digoxigenin, the enzymatic cofactor heme, and the light harvesting molecule bilin.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Baek, Minkyung; McHugh, Ryan; Anishchenko, Ivan; Jiang, Hanlun; Baker, David; DiMaio, Frank
Accurate prediction of protein–nucleic acid complexes using RoseTTAFoldNA Journal Article
In: Nature Methods, 2023.
@article{Baek2023,
title = {Accurate prediction of protein–nucleic acid complexes using RoseTTAFoldNA},
author = {Minkyung Baek and Ryan McHugh and Ivan Anishchenko and Hanlun Jiang and David Baker and Frank DiMaio},
url = {https://www.nature.com/articles/s41592-023-02086-5, Nature Methods [Open Access]},
doi = {10.1038/s41592-023-02086-5},
year = {2023},
date = {2023-11-23},
urldate = {2023-11-23},
journal = {Nature Methods},
publisher = {Springer Science and Business Media LLC},
abstract = {Protein–RNA and protein–DNA complexes play critical roles in biology. Despite considerable recent advances in protein structure prediction, the prediction of the structures of protein–nucleic acid complexes without homology to known complexes is a largely unsolved problem. Here we extend the RoseTTAFold machine learning protein-structure-prediction approach to additionally predict nucleic acid and protein–nucleic acid complexes. We develop a single trained network, RoseTTAFoldNA, that rapidly produces three-dimensional structure models with confidence estimates for protein–DNA and protein–RNA complexes. Here we show that confident predictions have considerably higher accuracy than current state-of-the-art methods. RoseTTAFoldNA should be broadly useful for modeling the structure of naturally occurring protein–nucleic acid complexes, and for designing sequence-specific RNA and DNA-binding proteins.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Mansoor, Sanaa; Baek, Minkyung; Juergens, David; Watson, Joseph L.; Baker, David
Zero‐shot Mutation Effect Prediction on Protein Stability and Function using RoseTTAFold Journal Article
In: Protein Science, 2023.
@article{Mansoor2023,
title = {Zero‐shot Mutation Effect Prediction on Protein Stability and Function using RoseTTAFold},
author = {Sanaa Mansoor and Minkyung Baek and David Juergens and Joseph L. Watson and David Baker},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/pro.4780, Protein Science
https://www.bakerlab.org/wp-content/uploads/2023/09/Protein-Science-2023-Mansoor.pdf, PDF},
doi = {10.1002/pro.4780},
year = {2023},
date = {2023-09-11},
urldate = {2023-09-11},
journal = {Protein Science},
publisher = {Wiley},
abstract = {Predicting the effects of mutations on protein function and stability is an outstanding challenge. Here, we assess the performance of a variant of RoseTTAFold jointly trained for sequence and structure recovery, RFjoint, for mutation effect prediction. Without any further training, we achieve comparable accuracy in predicting mutation effects for a diverse set of protein families using RFjoint to both another zero‐shot model (MSA Transformer) and a model which requires specific training on a particular protein family for mutation effect prediction (DeepSequence). Thus, although the architecture of RFjoint was developed to address the protein design problem of scaffolding functional motifs, RFjoint acquired an understanding of the mutational landscapes of proteins during model training that is equivalent to that of recently developed large protein language models. The ability to simultaneously reason over protein structure and sequence could enable even more precise mutation effect predictions following supervised training on the task. These results suggest that RFjoint has a quite broad understanding of protein sequence‐structure landscapes, and can be viewed as a joint model for protein sequence and structure which could be broadly useful for protein modeling.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Watson, Joseph L.; Juergens, David; Bennett, Nathaniel R.; Trippe, Brian L.; Yim, Jason; Eisenach, Helen E.; Ahern, Woody; Borst, Andrew J.; Ragotte, Robert J.; Milles, Lukas F.; Wicky, Basile I. M.; Hanikel, Nikita; Pellock, Samuel J.; Courbet, Alexis; Sheffler, William; Wang, Jue; Venkatesh, Preetham; Sappington, Isaac; Torres, Susana Vázquez; Lauko, Anna; De Bortoli, Valentin; Mathieu, Emile; Ovchinnikov, Sergey; Barzilay, Regina; Jaakkola, Tommi S.; DiMaio, Frank; Baek, Minkyung; Baker, David
De novo design of protein structure and function with RFdiffusion Journal Article
In: Nature, 2023.
@article{Watson2023,
title = {De novo design of protein structure and function with RFdiffusion},
author = {Watson, Joseph L.
and Juergens, David
and Bennett, Nathaniel R.
and Trippe, Brian L.
and Yim, Jason
and Eisenach, Helen E.
and Ahern, Woody
and Borst, Andrew J.
and Ragotte, Robert J.
and Milles, Lukas F.
and Wicky, Basile I. M.
and Hanikel, Nikita
and Pellock, Samuel J.
and Courbet, Alexis
and Sheffler, William
and Wang, Jue
and Venkatesh, Preetham
and Sappington, Isaac
and Torres, Susana Vázquez
and Lauko, Anna
and De Bortoli, Valentin
and Mathieu, Emile
and Ovchinnikov, Sergey
and Barzilay, Regina
and Jaakkola, Tommi S.
and DiMaio, Frank
and Baek, Minkyung
and Baker, David},
url = {https://www.nature.com/articles/s41586-023-06415-8, Nature
https://www.bakerlab.org/wp-content/uploads/2023/07/s41586-023-06415-8_reference.pdf, PDF (29MB)},
doi = {10.1038/s41586-023-06415-8},
year = {2023},
date = {2023-07-11},
journal = {Nature},
abstract = {There has been considerable recent progress in designing new proteins using deep learning methods1–9. Despite this progress, a general deep learning framework for protein design that enables solution of a wide range of design challenges, including de novo binder design and design of higher order symmetric architectures, has yet to be described. Diffusion models10,11 have had considerable success in image and language generative modeling but limited success when applied to protein modeling, likely due to the complexity of protein backbone geometry and sequence-structure relationships. Here we show that by fine tuning the RoseTTAFold structure prediction network on protein structure denoising tasks, we obtain a generative model of protein backbones that achieves outstanding performance on unconditional and topology-constrained protein monomer design, protein binder design, symmetric oligomer design, enzyme active site scaffolding, and symmetric motif scaffolding for therapeutic and metal-binding protein design. We demonstrate the power and generality of the method, called RoseTTAFold Diffusion (RFdiffusion), by experimentally characterizing the structures and functions of hundreds of designed symmetric assemblies, metal binding proteins and protein binders. The accuracy of RFdiffusion is confirmed by the cryo-EM structure of a designed binder in complex with Influenza hemagglutinin which is nearly identical to the design model. In a manner analogous to networks which produce images from user-specified inputs, RFdiffusion enables the design of diverse functional proteins from simple molecular specifications.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Justas Dauparas Amir Motmaen, Minkyung Baek
Peptide-binding specificity prediction using fine-tuned protein structure prediction networks Journal Article
In: Proceedings of the National Academy of Sciences, 2023.
@article{nokey,
title = {Peptide-binding specificity prediction using fine-tuned protein structure prediction networks},
author = {Amir Motmaen, Justas Dauparas, Minkyung Baek, Mohamad H. Abedi, David Baker, Philip Bradley},
url = {https://www.pnas.org/doi/10.1073/pnas.2216697120, PNAS (Open Access)},
doi = {10.1073/pnas.2216697120},
year = {2023},
date = {2023-02-21},
urldate = {2023-02-21},
journal = {Proceedings of the National Academy of Sciences},
abstract = {Peptide-binding proteins play key roles in biology, and predicting their binding specificity is a long-standing challenge. While considerable protein structural information is available, the most successful current methods use sequence information alone, in part because it has been a challenge to model the subtle structural changes accompanying sequence substitutions. Protein structure prediction networks such as AlphaFold model sequence-structure relationships very accurately, and we reasoned that if it were possible to specifically train such networks on binding data, more generalizable models could be created. We show that placing a classifier on top of the AlphaFold network and fine-tuning the combined network parameters for both classification and structure prediction accuracy leads to a model with strong generalizable performance on a wide range of Class I and Class II peptide-MHC interactions that approaches the overall performance of the state-of-the-art NetMHCpan sequence-based method. The peptide-MHC optimized model shows excellent performance in distinguishing binding and non-binding peptides to SH3 and PDZ domains. This ability to generalize well beyond the training set far exceeds that of sequence-only models and should be particularly powerful for systems where less experimental data are available.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Dauparas, J.; Anishchenko, I.; Bennett, N.; Bai, H.; Ragotte, R. J.; Milles, L. F.; Wicky, B. I. M.; Courbet, A.; de Haas, R. J.; Bethel, N.; Leung, P. J. Y.; Huddy, T. F.; Pellock, S.; Tischer, D.; Chan, F.; Koepnick, B.; Nguyen, H.; Kang, A.; Sankaran, B.; Bera, A. K.; King, N. P.; Baker, D.
Robust deep learning–based protein sequence design using ProteinMPNN Journal Article
In: Science, 2022.
@article{Dauparas2022,
title = {Robust deep learning–based protein sequence design using ProteinMPNN},
author = {Dauparas, J.
and Anishchenko, I.
and Bennett, N.
and Bai, H.
and Ragotte, R. J.
and Milles, L. F.
and Wicky, B. I. M.
and Courbet, A.
and de Haas, R. J.
and Bethel, N.
and Leung, P. J. Y.
and Huddy, T. F.
and Pellock, S.
and Tischer, D.
and Chan, F.
and Koepnick, B.
and Nguyen, H.
and Kang, A.
and Sankaran, B.
and Bera, A. K.
and King, N. P.
and Baker, D.},
url = {https://www.science.org/doi/abs/10.1126/science.add2187, Science
https://www.bakerlab.org/wp-content/uploads/2022/09/Dauparas_etal_Science2022_Sequence_design_via_ProteinMPNN.pdf, PDF},
doi = {10.1126/science.add2187},
year = {2022},
date = {2022-09-15},
journal = {Science},
abstract = {While deep learning has revolutionized protein structure prediction, almost all experimentally characterized de novo protein designs have been generated using physically based approaches such as Rosetta. Here we describe a deep learning–based protein sequence design method, ProteinMPNN, with outstanding performance in both in silico and experimental tests. On native protein backbones, ProteinMPNN has a sequence recovery of 52.4%, compared to 32.9% for Rosetta. The amino acid sequence at different positions can be coupled between single or multiple chains, enabling application to a wide range of current protein design challenges. We demonstrate the broad utility and high accuracy of ProteinMPNN using X-ray crystallography, cryoEM and functional studies by rescuing previously failed designs, made using Rosetta or AlphaFold, of protein monomers, cyclic homo-oligomers, tetrahedral nanoparticles, and target binding proteins},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Wang, Jue; Lisanza, Sidney; Juergens, David; Tischer, Doug; Watson, Joseph L.; Castro, Karla M.; Ragotte, Robert; Saragovi, Amijai; Milles, Lukas F.; Baek, Minkyung; Anishchenko, Ivan; Yang, Wei; Hicks, Derrick R.; Expòsit, Marc; Schlichthaerle, Thomas; Chun, Jung-Ho; Dauparas, Justas; Bennett, Nathaniel; Wicky, Basile I. M.; Muenks, Andrew; DiMaio, Frank; Correia, Bruno; Ovchinnikov, Sergey; Baker, David
Scaffolding protein functional sites using deep learning Journal Article
In: Science, 2022.
@article{Wang2022,
title = {Scaffolding protein functional sites using deep learning},
author = {Jue Wang and Sidney Lisanza and David Juergens and Doug Tischer and Joseph L. Watson and Karla M. Castro and Robert Ragotte and Amijai Saragovi and Lukas F. Milles and Minkyung Baek and Ivan Anishchenko and Wei Yang and Derrick R. Hicks and Marc Expòsit and Thomas Schlichthaerle and Jung-Ho Chun and Justas Dauparas and Nathaniel Bennett and Basile I. M. Wicky and Andrew Muenks and Frank DiMaio and Bruno Correia and Sergey Ovchinnikov and David Baker },
url = {https://www.science.org/doi/abs/10.1126/science.abn2100, Science
https://www.ipd.uw.edu/wp-content/uploads/2022/07/science.abn2100.pdf, Download PDF},
doi = {10.1126/science.abn2100},
year = {2022},
date = {2022-07-21},
urldate = {2022-07-21},
journal = {Science},
abstract = {The binding and catalytic functions of proteins are generally mediated by a small number of functional residues held in place by the overall protein structure. Here, we describe deep learning approaches for scaffolding such functional sites without needing to prespecify the fold or secondary structure of the scaffold. The first approach, “constrained hallucination,” optimizes sequences such that their predicted structures contain the desired functional site. The second approach, “inpainting,” starts from the functional site and fills in additional sequence and structure to create a viable protein scaffold in a single forward pass through a specifically trained RoseTTAFold network. We use these two methods to design candidate immunogens, receptor traps, metalloproteins, enzymes, and protein-binding proteins and validate the designs using a combination of in silico and experimental tests. Protein design has had success in finding sequences that fold into a desired conformation, but designing functional proteins remains challenging. Wang et al. describe two deep-learning methods to design proteins that contain prespecified functional sites. In the first, they found sequences predicted to fold into stable structures that contain the functional site. In the second, they retrained a structure prediction network to recover the sequence and full structure of a protein given only the functional site. The authors demonstrate their methods by designing proteins containing a variety of functional motifs. —VV Deep-learning methods enable the scaffolding of desired functional residues within a well-folded designed protein.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sen, Neeladri; Anishchenko, Ivan; N, Bordin; Sillitoe, Ian; Velankar, Sameer; Baker, David; Orengo, Christine
Characterizing and explaining the impact of disease-associated mutations in proteins without known structures or structural homologs Journal Article
In: Briefings in Bioinformatics, 2022.
@article{Sen2022,
title = {Characterizing and explaining the impact of disease-associated mutations in proteins without known structures or structural homologs},
author = {Sen, Neeladri
and Anishchenko, Ivan
and Bordin N
and Sillitoe, Ian
and Velankar, Sameer
and Baker, David
and Orengo, Christine},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9294430/},
doi = {10.1093/bib/bbac187},
year = {2022},
date = {2022-07-18},
journal = {Briefings in Bioinformatics},
abstract = {Mutations in human proteins lead to diseases. The structure of these proteins can help understand the mechanism of such diseases and develop therapeutics against them. With improved deep learning techniques, such as RoseTTAFold and AlphaFold, we can predict the structure of proteins even in the absence of structural homologs. We modeled and extracted the domains from 553 disease-associated human proteins without known protein structures or close homologs in the Protein Databank. We noticed that the model quality was higher and the Root mean square deviation (RMSD) lower between AlphaFold and RoseTTAFold models for domains that could be assigned to CATH families as compared to those which could only be assigned to Pfam families of unknown structure or could not be assigned to either. We predicted ligand-binding sites, protein-protein interfaces and conserved residues in these predicted structures. We then explored whether the disease-associated missense mutations were in the proximity of these predicted functional sites, whether they destabilized the protein structure based on ddG calculations or whether they were predicted to be pathogenic. We could explain 80% of these disease-associated mutations based on proximity to functional sites, structural destabilization or pathogenicity. When compared to polymorphisms, a larger percentage of disease-associated missense mutations were buried, closer to predicted functional sites, predicted as destabilizing and pathogenic. Usage of models from the two state-of-the-art techniques provide better confidence in our predictions, and we explain 93 additional mutations based on RoseTTAFold models which could not be explained based solely on AlphaFold models.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Linder, Johannes; La Fleur, Alyssa; Chen, Zibo; Ljubetič, Ajasja; Baker, David; Kannan, Sreeram; Seelig, Georg
Interpreting neural networks for biological sequences by learning stochastic masks Journal Article
In: Nature Machine Intelligence, 2022.
@article{Linder2022,
title = {Interpreting neural networks for biological sequences by learning stochastic masks},
author = {Linder, Johannes and La Fleur, Alyssa and Chen, Zibo and Ljubetič, Ajasja and Baker, David and Kannan, Sreeram and Seelig, Georg},
url = {https://www.nature.com/articles/s42256-021-00428-6, Nature Machine Intelligence},
doi = {10.1038/s42256-021-00428-6},
year = {2022},
date = {2022-01-25},
urldate = {2022-01-25},
journal = {Nature Machine Intelligence},
abstract = {Sequence-based neural networks can learn to make accurate predictions from large biological datasets, but model interpretation remains challenging. Many existing feature attribution methods are optimized for continuous rather than discrete input patterns and assess individual feature importance in isolation, making them ill-suited for interpreting nonlinear interactions in molecular sequences. Here, building on work in computer vision and natural language processing, we developed an approach based on deep learning—scrambler networks—wherein the most important sequence positions are identified with learned input masks. Scramblers learn to predict position-specific scoring matrices where unimportant nucleotides or residues are scrambled by raising their entropy. We apply scramblers to interpret the effects of genetic variants, uncover nonlinear interactions between cis-regulatory elements, explain binding specificity for protein–protein interactions, and identify structural determinants of de novo-designed proteins. We show that scramblers enable efficient attribution across large datasets and result in high-quality explanations, often outperforming state-of-the-art methods.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Baek, Minkyung; Baker, David
Deep learning and protein structure modeling Journal Article
In: Nature Methods, 2022.
@article{Baek2022,
title = {Deep learning and protein structure modeling},
author = {Minkyung Baek and David Baker},
url = {https://www.nature.com/articles/s41592-021-01360-8, Nature Methods
https://www.bakerlab.org/wp-content/uploads/2022/01/Baek_Baker_NatureMethods2022_Deep_Learning_and_Protein_Structure_Modeling.pdf, Download PDF
},
doi = {10.1038/s41592-021-01360-8},
year = {2022},
date = {2022-01-22},
urldate = {2022-01-22},
journal = {Nature Methods},
abstract = {Deep learning has transformed protein structure modeling. Here we relate AlphaFold and RoseTTAFold to classical physically based approaches to protein structure prediction, and discuss the many areas of structural biology that are likely to be affected by further advances in deep learning.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Anishchenko, Ivan; Pellock, Samuel J.; Chidyausiku, Tamuka M.; Ramelot, Theresa A.; Ovchinnikov, Sergey; Hao, Jingzhou; Bafna, Khushboo; Norn, Christoffer; Kang, Alex; Bera, Asim K.; DiMaio, Frank; Carter, Lauren; Chow, Cameron M.; Montelione, Gaetano T.; Baker, David
De novo protein design by deep network hallucination Journal Article
In: Nature, 2021.
@article{Anishchenko2021,
title = {De novo protein design by deep network hallucination},
author = {Anishchenko, Ivan
and Pellock, Samuel J.
and Chidyausiku, Tamuka M.
and Ramelot, Theresa A.
and Ovchinnikov, Sergey
and Hao, Jingzhou
and Bafna, Khushboo
and Norn, Christoffer
and Kang, Alex
and Bera, Asim K.
and DiMaio, Frank
and Carter, Lauren
and Chow, Cameron M.
and Montelione, Gaetano T.
and Baker, David},
url = {https://www.nature.com/articles/s41586-021-04184-w
https://www.bakerlab.org/wp-content/uploads/2022/01/Anishchenko_etal_Nature2021_DeepNetworkHallucination.pdf},
doi = {10.1038/s41586-021-04184-w},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
journal = {Nature},
abstract = {There has been considerable recent progress in protein structure prediction using deep neural networks to predict inter-residue distances from amino acid sequences1–3. Here we investigate whether the information captured by such networks is sufficiently rich to generate new folded proteins with sequences unrelated to those of the naturally occurring proteins used in training the models. We generate random amino acid sequences, and input them into the trRosetta structure prediction network to predict starting residue–residue distance maps, which, as expected, are quite featureless. We then carry out Monte Carlo sampling in amino acid sequence space, optimizing the contrast (Kullback–Leibler divergence) between the inter-residue distance distributions predicted by the network and background distributions averaged over all proteins. Optimization from different random starting points resulted in novel proteins spanning a wide range of sequences and predicted structures. We obtained synthetic genes encoding 129 of the network-‘hallucinated’ sequences, and expressed and purified the proteins in Escherichia coli; 27 of the proteins yielded monodisperse species with circular dichroism spectra consistent with the hallucinated structures. We determined the three-dimensional structures of three of the hallucinated proteins, two by X-ray crystallography and one by NMR, and these closely matched the hallucinated models. Thus, deep networks trained to predict native protein structures from their sequences can be inverted to design new proteins, and such networks and methods should contribute alongside traditional physics-based models to the de novo design of proteins with new functions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Du, Zongyang; Su, Hong; Wang, Wenkai; Ye, Lisha; Wei, Hong; Peng, Zhenling; Anishchenko, Ivan; Baker, David; Yang, Jianyi
The trRosetta server for fast and accurate protein structure prediction Journal Article
In: Nature Protocols, 2021.
@article{Du2021,
title = {The trRosetta server for fast and accurate protein structure prediction},
author = {Du, Zongyang
and Su, Hong
and Wang, Wenkai
and Ye, Lisha
and Wei, Hong
and Peng, Zhenling
and Anishchenko, Ivan
and Baker, David
and Yang, Jianyi},
url = {https://www.nature.com/articles/s41596-021-00628-9
https://www.bakerlab.org/wp-content/uploads/2022/01/Du_etal_NatProt2021_trRosetta_server.pdf},
doi = {10.1038/s41596-021-00628-9},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
journal = {Nature Protocols},
abstract = {The trRosetta (transform-restrained Rosetta) server is a web-based platform for fast and accurate protein structure prediction, powered by deep learning and Rosetta. With the input of a protein’s amino acid sequence, a deep neural network is first used to predict the inter-residue geometries, including distance and orientations. The predicted geometries are then transformed as restraints to guide the structure prediction on the basis of direct energy minimization, which is implemented under the framework of Rosetta. The trRosetta server distinguishes itself from other similar structure prediction servers in terms of rapid and accurate de novo structure prediction. As an illustration, trRosetta was applied to two Pfam families with unknown structures, for which the predicted de novo models were estimated to have high accuracy. Nevertheless, to take advantage of homology modeling, homologous templates are used as additional inputs to the network automatically. In general, it takes ~1 h to predict the final structure for a typical protein with ~300 amino acids, using a maximum of 10 CPU cores in parallel in our cluster system. To enable large-scale structure modeling, a downloadable package of trRosetta with open-source codes is available as well. A detailed guidance for using the package is also available in this protocol. The server and the package are available at https://yanglab.nankai.edu.cn/trRosetta/ and https://yanglab.nankai.edu.cn/trRosetta/download/, respectively.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Baek, Minkyung; Anishchenko, Ivan; Park, Hahnbeom; Humphreys, Ian R.; Baker, David
Protein oligomer modeling guided by predicted inter-chain contacts in CASP14 Journal Article
In: Proteins, 2021.
@article{Baek2021b,
title = {Protein oligomer modeling guided by predicted inter-chain contacts in CASP14},
author = {Minkyung Baek and Ivan Anishchenko and Hahnbeom Park and Ian R. Humphreys and David Baker},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/prot.26197, Proteins},
doi = {10.1002/prot.26197},
year = {2021},
date = {2021-07-29},
urldate = {2021-07-29},
journal = {Proteins},
abstract = {For CASP14, we developed deep learning-based methods for predicting homo-oligomeric and hetero-oligomeric contacts and used them for oligomer modeling. To build structure models, we developed an oligomer structure generation method that utilizes predicted inter-chain contacts to guide iterative restrained minimization from random backbone structures. We supplemented this gradient-based fold-and-dock method with template-based and ab initio docking approaches using deep learning-based subunit predictions on 29 assembly targets. These methods produced oligomer models with summed Z-scores 5.5 units higher than the next best group, with the fold-and-dock method having the best relative performance. Over the eight targets for which this method was used, the best of the five submitted models had average oligomer TM-score of 0.71 (average oligomer TM-score of the next best group: 0.64), and explicit modeling of inter-subunit interactions improved modeling of six out of 40 individual domains (ΔGDT-TS > 2.0).
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Baek, Minkyung; DiMaio, Frank; Anishchenko, Ivan; Dauparas, Justas; Ovchinnikov, Sergey; Lee, Gyu Rie; Wang, Jue; Cong, Qian; Kinch, Lisa N.; Schaeffer, R. Dustin; Millán, Claudia; Park, Hahnbeom; Adams, Carson; Glassman, Caleb R.; DeGiovanni, Andy; Pereira, Jose H.; Rodrigues, Andria V.; van Dijk, Alberdina A.; Ebrecht, Ana C.; Opperman, Diederik J.; Sagmeister, Theo; Buhlheller, Christoph; Pavkov-Keller, Tea; Rathinaswamy, Manoj K.; Dalwadi, Udit; Yip, Calvin K.; Burke, John E.; Garcia, K. Christopher; Grishin, Nick V.; Adams, Paul D.; Read, Randy J.; Baker, David
Accurate prediction of protein structures and interactions using a three-track neural network Journal Article
In: Science, 2021.
@article{Baek2021,
title = {Accurate prediction of protein structures and interactions using a three-track neural network},
author = {Baek, Minkyung
and DiMaio, Frank
and Anishchenko, Ivan
and Dauparas, Justas
and Ovchinnikov, Sergey
and Lee, Gyu Rie
and Wang, Jue
and Cong, Qian
and Kinch, Lisa N.
and Schaeffer, R. Dustin
and Millán, Claudia
and Park, Hahnbeom
and Adams, Carson
and Glassman, Caleb R.
and DeGiovanni, Andy
and Pereira, Jose H.
and Rodrigues, Andria V.
and van Dijk, Alberdina A.
and Ebrecht, Ana C.
and Opperman, Diederik J.
and Sagmeister, Theo
and Buhlheller, Christoph
and Pavkov-Keller, Tea
and Rathinaswamy, Manoj K.
and Dalwadi, Udit
and Yip, Calvin K.
and Burke, John E.
and Garcia, K. Christopher
and Grishin, Nick V.
and Adams, Paul D.
and Read, Randy J.
and Baker, David},
url = {http://science.sciencemag.org/content/early/2021/07/14/science.abj8754, Science
https://www.ipd.uw.edu/wp-content/uploads/2021/07/Baek_etal_Science2021_RoseTTAFold.pdf, Download PDF},
doi = {10.1126/science.abj8754},
year = {2021},
date = {2021-07-15},
urldate = {2021-07-15},
journal = {Science},
abstract = {DeepMind presented remarkably accurate predictions at the recent CASP14 protein structure prediction assessment conference. We explored network architectures incorporating related ideas and obtained the best performance with a three-track network in which information at the 1D sequence level, the 2D distance map level, and the 3D coordinate level is successively transformed and integrated. The three-track network produces structure predictions with accuracies approaching those of DeepMind in CASP14, enables the rapid solution of challenging X-ray crystallography and cryo-EM structure modeling problems, and provides insights into the functions of proteins of currently unknown structure. The network also enables rapid generation of accurate protein-protein complex models from sequence information alone, short-circuiting traditional approaches which require modeling of individual subunits followed by docking. We make the method available to the scientific community to speed biological research.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Norn, Christoffer; Wicky, Basile I. M.; Juergens, David; Liu, Sirui; Kim, David; Tischer, Doug; Koepnick, Brian; Anishchenko, Ivan; Baker, David; Ovchinnikov, Sergey
Protein sequence design by conformational landscape optimization Journal Article
In: Proceedings of the National Academy of Sciences, vol. 118, no. 11, 2021.
@article{Norn2021,
title = {Protein sequence design by conformational landscape optimization},
author = {Norn, Christoffer and Wicky, Basile I. M. and Juergens, David and Liu, Sirui and Kim, David and Tischer, Doug and Koepnick, Brian and Anishchenko, Ivan and Baker, David and Ovchinnikov, Sergey},
url = {https://www.pnas.org/content/118/11/e2017228118, PNAS
https://www.bakerlab.org/wp-content/uploads/2021/03/Norn_etal_PNAS2021_LandscapeOptimization.pdf, Download PDF},
doi = {10.1073/pnas.2017228118},
year = {2021},
date = {2021-03-16},
urldate = {2021-03-16},
journal = {Proceedings of the National Academy of Sciences},
volume = {118},
number = {11},
abstract = {Almost all proteins fold to their lowest free energy state, which is determined by their amino acid sequence. Computational protein design has primarily focused on finding sequences that have very low energy in the target designed structure. However, what is most relevant during folding is not the absolute energy of the folded state but the energy difference between the folded state and the lowest-lying alternative states. We describe a deep learning approach that captures aspects of the folding landscape, in particular the presence of structures in alternative energy minima, and show that it can enhance current protein design methods.The protein design problem is to identify an amino acid sequence that folds to a desired structure. Given Anfinsen{textquoteright}s thermodynamic hypothesis of folding, this can be recast as finding an amino acid sequence for which the desired structure is the lowest energy state. As this calculation involves not only all possible amino acid sequences but also, all possible structures, most current approaches focus instead on the more tractable problem of finding the lowest-energy amino acid sequence for the desired structure, often checking by protein structure prediction in a second step that the desired structure is indeed the lowest-energy conformation for the designed sequence, and typically discarding a large fraction of designed sequences for which this is not the case. Here, we show that by backpropagating gradients through the transform-restrained Rosetta (trRosetta) structure prediction network from the desired structure to the input amino acid sequence, we can directly optimize over all possible amino acid sequences and all possible structures in a single calculation. We find that trRosetta calculations, which consider the full conformational landscape, can be more effective than Rosetta single-point energy estimations in predicting folding and stability of de novo designed proteins. We compare sequence design by conformational landscape optimization with the standard energy-based sequence design methodology in Rosetta and show that the former can result in energy landscapes with fewer alternative energy minima. We show further that more funneled energy landscapes can be designed by combining the strengths of the two approaches: the low-resolution trRosetta model serves to disfavor alternative states, and the high-resolution Rosetta model serves to create a deep energy minimum at the design target structure.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Hiranuma, Naozumi; Park, Hahnbeom; Baek, Minkyung; Anishchenko, Ivan; Dauparas, Justas; Baker, David
Improved protein structure refinement guided by deep learning based accuracy estimation Journal Article
In: Nature Communications, vol. 12, no. 1340, 2021.
@article{Hiranuma2021,
title = {Improved protein structure refinement guided by deep learning based accuracy estimation},
author = {Naozumi Hiranuma and Hahnbeom Park and Minkyung Baek and Ivan Anishchenko and Justas Dauparas and David Baker
},
url = {https://www.nature.com/articles/s41467-021-21511-x, Nature Communications
https://www.bakerlab.org/wp-content/uploads/2021/02/Hiranuma_etal_NatureComms2021_DeepLearningStructureRefinement.pdf, Download PDF},
doi = {10.1038/s41467-021-21511-x},
year = {2021},
date = {2021-02-26},
urldate = {2021-02-26},
journal = {Nature Communications},
volume = {12},
number = {1340},
abstract = {We develop a deep learning framework (DeepAccNet) that estimates per-residue accuracy and residue-residue distance signed error in protein models and uses these predictions to guide Rosetta protein structure refinement. The network uses 3D convolutions to evaluate local atomic environments followed by 2D convolutions to provide their global contexts and outperforms other methods that similarly predict the accuracy of protein structure models. Overall accuracy predictions for X-ray and cryoEM structures in the PDB correlate with their resolution, and the network should be broadly useful for assessing the accuracy of both predicted structure models and experimentally determined structures and identifying specific regions likely to be in error. Incorporation of the accuracy predictions at multiple stages in the Rosetta refinement protocol considerably increased the accuracy of the resulting protein structure models, illustrating how deep learning can improve search for global energy minima of biomolecules.
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Park, Hahnbeom; Zhou, Guangfeng; Baek, Minkyung; Baker, David; DiMaio, Frank
Force Field Optimization Guided by Small Molecule Crystal Lattice Data Enables Consistent Sub-Angstrom Protein–Ligand Docking Journal Article
In: Journal of Chemical Theory and Computation, 2021.
@article{Park2021,
title = {Force Field Optimization Guided by Small Molecule Crystal Lattice Data Enables Consistent Sub-Angstrom Protein–Ligand Docking},
author = {Hahnbeom Park and Guangfeng Zhou and Minkyung Baek and David Baker and Frank DiMaio},
url = {https://pubs.acs.org/doi/full/10.1021/acs.jctc.0c01184
https://www.bakerlab.org/wp-content/uploads/2021/02/Park_etal_JCTC2021_Small_mol_force_field_optimization.pdf},
doi = {10.1021/acs.jctc.0c01184},
year = {2021},
date = {2021-02-12},
journal = {Journal of Chemical Theory and Computation},
abstract = {Accurate and rapid calculation of protein-small molecule interaction free energies is critical for computational drug discovery. Because of the large chemical space spanned by drug-like molecules, classical force fields contain thousands of parameters describing atom-pair distance and torsional preferences; each parameter is typically optimized independently on simple representative molecules. Here, we describe a new approach in which small molecule force field parameters are jointly optimized guided by the rich source of information contained within thousands of available small molecule crystal structures. We optimize parameters by requiring that the experimentally determined molecular lattice arrangements have lower energy than all alternative lattice arrangements. Thousands of independent crystal lattice-prediction simulations were run on each of 1386 small molecule crystal structures, and energy function parameters of an implicit solvent energy model were optimized, so native crystal lattice arrangements had the lowest energy. The resulting energy model was implemented in Rosetta, together with a rapid genetic algorithm docking method employing grid-based scoring and receptor flexibility. The success rate of bound structure recapitulation in cross-docking on 1112 complexes was improved by more than 10% over previously published methods, with solutions within <1 Å in over half of the cases. Our results demonstrate that small molecule crystal structures are a rich source of information for guiding molecular force field development, and the improved Rosetta energy function should increase accuracy in a wide range of small molecule structure prediction and design studies.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Yang, Jianyi; Anishchenko, Ivan; Park, Hahnbeom; Peng, Zhenling; Ovchinnikov, Sergey; Baker, David
Improved protein structure prediction using predicted interresidue orientations Journal Article
In: Proceedings of the National Academy of Sciences, 2020, ISBN: 0027-8424.
@article{Yang2020,
title = {Improved protein structure prediction using predicted interresidue orientations},
author = {Yang, Jianyi and Anishchenko, Ivan and Park, Hahnbeom and Peng, Zhenling and Ovchinnikov, Sergey and Baker, David},
url = {https://www.pnas.org/content/early/2020/01/01/1914677117
https://www.bakerlab.org/wp-content/uploads/2020/01/Yang2020_ImprovedStructurePredictionInterresidueOrientations.pdf
},
doi = {10.1073/pnas.1914677117},
isbn = {0027-8424},
year = {2020},
date = {2020-01-02},
journal = {Proceedings of the National Academy of Sciences},
abstract = {Protein structure prediction is a longstanding challenge in computational biology. Through extension of deep learning-based prediction to interresidue orientations in addition to distances, and the development of a constrained optimization by Rosetta, we show that more accurate models can be generated. Results on a set of 18 de novo-designed proteins suggests the proposed method should be directly applicable to current challenges in de novo protein design.The prediction of interresidue contacts and distances from coevolutionary data using deep learning has considerably advanced protein structure prediction. Here, we build on these advances by developing a deep residual network for predicting interresidue orientations, in addition to distances, and a Rosetta-constrained energy-minimization protocol for rapidly and accurately generating structure models guided by these restraints. In benchmark tests on 13th Community-Wide Experiment on the Critical Assessment of Techniques for Protein Structure Prediction (CASP13)- and Continuous Automated Model Evaluation (CAMEO)-derived sets, the method outperforms all previously described structure-prediction methods. Although trained entirely on native proteins, the network consistently assigns higher probability to de novo-designed proteins, identifying the key fold-determining residues and providing an independent quantitative measure of the "ideality" of a protein structure. The method promises to be useful for a broad range of protein structure prediction and design problems.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2024
FROM THE LAB
Ian R. Humphreys, Jing Zhang, Minkyung Baek, Yaxi Wang, Aditya Krishnakumar, Jimin Pei, Ivan Anishchenko, Catherine A. Tower, Blake A. Jackson, Thulasi Warrier, Deborah T. Hung, S. Brook Peterson, Joseph D. Mougous, Qian Cong, David Baker
Protein interactions in human pathogens revealed through deep learning Journal Article
In: Nature Microbiology, 2024, ISSN: 2058-5276.
@article{Humphreys2024,
title = {Protein interactions in human pathogens revealed through deep learning},
author = {Ian R. Humphreys and Jing Zhang and Minkyung Baek and Yaxi Wang and Aditya Krishnakumar and Jimin Pei and Ivan Anishchenko and Catherine A. Tower and Blake A. Jackson and Thulasi Warrier and Deborah T. Hung and S. Brook Peterson and Joseph D. Mougous and Qian Cong and David Baker},
url = {https://www.nature.com/articles/s41564-024-01791-x, Nature Microbiology [Open Access]},
doi = {10.1038/s41564-024-01791-x},
issn = {2058-5276},
year = {2024},
date = {2024-09-18},
urldate = {2024-09-18},
journal = {Nature Microbiology},
publisher = {Springer Science and Business Media LLC},
abstract = {Identification of bacterial protein–protein interactions and predicting the structures of these complexes could aid in the understanding of pathogenicity mechanisms and developing treatments for infectious diseases. Here we developed RoseTTAFold2-Lite, a rapid deep learning model that leverages residue–residue coevolution and protein structure prediction to systematically identify and structurally characterize protein–protein interactions at the proteome-wide scale. Using this pipeline, we searched through 78 million pairs of proteins across 19 human bacterial pathogens and identified 1,923 confidently predicted complexes involving essential genes and 256 involving virulence factors. Many of these complexes were not previously known; we experimentally tested 12 such predictions, and half of them were validated. The predicted interactions span core metabolic and virulence pathways ranging from post-transcriptional modification to acid neutralization to outer-membrane machinery and should contribute to our understanding of the biology of these important pathogens and the design of drugs to combat them.
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sanaa Mansoor, Minkyung Baek, Hahnbeom Park, Gyu Rie Lee, David Baker
Protein Ensemble Generation Through Variational Autoencoder Latent Space Sampling Journal Article
In: J. Chem. Theory Comput., 2024.
@article{Mansoor2024,
title = {Protein Ensemble Generation Through Variational Autoencoder Latent Space Sampling},
author = {Sanaa Mansoor and Minkyung Baek and Hahnbeom Park and Gyu Rie Lee and David Baker},
url = {https://pubs.acs.org/doi/10.1021/acs.jctc.3c01057, J. Chem. Theory Comput.
https://www.bakerlab.org/wp-content/uploads/2024/05/mansoor-et-al-2024-protein-ensemble-generation-through-variational-autoencoder-latent-space-sampling.pdf, PDF},
doi = {10.1021/acs.jctc.3c01057},
year = {2024},
date = {2024-03-28},
urldate = {2024-04-09},
journal = {J. Chem. Theory Comput.},
publisher = {American Chemical Society (ACS)},
abstract = {Mapping the ensemble of protein conformations that contribute to function and can be targeted by small molecule drugs remains an outstanding challenge. Here, we explore the use of variational autoencoders for reducing the challenge of dimensionality in the protein structure ensemble generation problem. We convert high-dimensional protein structural data into a continuous, low-dimensional representation, carry out a search in this space guided by a structure quality metric, and then use RoseTTAFold guided by the sampled structural information to generate 3D structures. We use this approach to generate ensembles for the cancer relevant protein K-Ras, train the VAE on a subset of the available K-Ras crystal structures and MD simulation snapshots, and assess the extent of sampling close to crystal structures withheld from training. We find that our latent space sampling procedure rapidly generates ensembles with high structural quality and is able to sample within 1 Å of held-out crystal structures, with a consistency higher than that of MD simulation or AlphaFold2 prediction. The sampled structures sufficiently recapitulate the cryptic pockets in the held-out K-Ras structures to allow for small molecule docking.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Rohith Krishna, Jue Wang, Woody Ahern, Pascal Sturmfels, Preetham Venkatesh, Indrek Kalvet, Gyu Rie Lee, Felix S. Morey-Burrows, Ivan Anishchenko, Ian R. Humphreys, Ryan McHugh, Dionne Vafeados, Xinting Li, George A. Sutherland, Andrew Hitchcock, C. Neil Hunter, Alex Kang, Evans Brackenbrough, Asim K. Bera, Minkyung Baek, Frank DiMaio, David Baker
Generalized biomolecular modeling and design with RoseTTAFold All-Atom Journal Article
In: Science, 2024.
@article{Krishna2024,
title = {Generalized biomolecular modeling and design with RoseTTAFold All-Atom},
author = {Rohith Krishna and Jue Wang and Woody Ahern and Pascal Sturmfels and Preetham Venkatesh and Indrek Kalvet and Gyu Rie Lee and Felix S. Morey-Burrows and Ivan Anishchenko and Ian R. Humphreys and Ryan McHugh and Dionne Vafeados and Xinting Li and George A. Sutherland and Andrew Hitchcock and C. Neil Hunter and Alex Kang and Evans Brackenbrough and Asim K. Bera and Minkyung Baek and Frank DiMaio and David Baker},
url = {https://www.science.org/stoken/author-tokens/ST-1739/full, Science [Full Access Link]
https://www.bakerlab.org/wp-content/uploads/2024/03/science.adl2528.pdf, PDF},
doi = {10.1126/science.adl2528},
year = {2024},
date = {2024-03-07},
urldate = {2024-03-07},
journal = {Science},
publisher = {American Association for the Advancement of Science (AAAS)},
abstract = {Deep learning methods have revolutionized protein structure prediction and design but are currently limited to protein-only systems. We describe RoseTTAFold All-Atom (RFAA) which combines a residue-based representation of amino acids and DNA bases with an atomic representation of all other groups to model assemblies containing proteins, nucleic acids, small molecules, metals, and covalent modifications given their sequences and chemical structures. By fine tuning on denoising tasks we obtain RFdiffusionAA, which builds protein structures around small molecules. Starting from random distributions of amino acid residues surrounding target small molecules, we design and experimentally validate, through crystallography and binding measurements, proteins that bind the cardiac disease therapeutic digoxigenin, the enzymatic cofactor heme, and the light harvesting molecule bilin.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
COLLABORATOR LED
Sorry, no publications matched your criteria.
2023
FROM THE LAB
Minkyung Baek, Ryan McHugh, Ivan Anishchenko, Hanlun Jiang, David Baker, Frank DiMaio
Accurate prediction of protein–nucleic acid complexes using RoseTTAFoldNA Journal Article
In: Nature Methods, 2023.
@article{Baek2023,
title = {Accurate prediction of protein–nucleic acid complexes using RoseTTAFoldNA},
author = {Minkyung Baek and Ryan McHugh and Ivan Anishchenko and Hanlun Jiang and David Baker and Frank DiMaio},
url = {https://www.nature.com/articles/s41592-023-02086-5, Nature Methods [Open Access]},
doi = {10.1038/s41592-023-02086-5},
year = {2023},
date = {2023-11-23},
urldate = {2023-11-23},
journal = {Nature Methods},
publisher = {Springer Science and Business Media LLC},
abstract = {Protein–RNA and protein–DNA complexes play critical roles in biology. Despite considerable recent advances in protein structure prediction, the prediction of the structures of protein–nucleic acid complexes without homology to known complexes is a largely unsolved problem. Here we extend the RoseTTAFold machine learning protein-structure-prediction approach to additionally predict nucleic acid and protein–nucleic acid complexes. We develop a single trained network, RoseTTAFoldNA, that rapidly produces three-dimensional structure models with confidence estimates for protein–DNA and protein–RNA complexes. Here we show that confident predictions have considerably higher accuracy than current state-of-the-art methods. RoseTTAFoldNA should be broadly useful for modeling the structure of naturally occurring protein–nucleic acid complexes, and for designing sequence-specific RNA and DNA-binding proteins.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sanaa Mansoor, Minkyung Baek, David Juergens, Joseph L. Watson, David Baker
Zero‐shot Mutation Effect Prediction on Protein Stability and Function using RoseTTAFold Journal Article
In: Protein Science, 2023.
@article{Mansoor2023,
title = {Zero‐shot Mutation Effect Prediction on Protein Stability and Function using RoseTTAFold},
author = {Sanaa Mansoor and Minkyung Baek and David Juergens and Joseph L. Watson and David Baker},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/pro.4780, Protein Science
https://www.bakerlab.org/wp-content/uploads/2023/09/Protein-Science-2023-Mansoor.pdf, PDF},
doi = {10.1002/pro.4780},
year = {2023},
date = {2023-09-11},
urldate = {2023-09-11},
journal = {Protein Science},
publisher = {Wiley},
abstract = {Predicting the effects of mutations on protein function and stability is an outstanding challenge. Here, we assess the performance of a variant of RoseTTAFold jointly trained for sequence and structure recovery, RFjoint, for mutation effect prediction. Without any further training, we achieve comparable accuracy in predicting mutation effects for a diverse set of protein families using RFjoint to both another zero‐shot model (MSA Transformer) and a model which requires specific training on a particular protein family for mutation effect prediction (DeepSequence). Thus, although the architecture of RFjoint was developed to address the protein design problem of scaffolding functional motifs, RFjoint acquired an understanding of the mutational landscapes of proteins during model training that is equivalent to that of recently developed large protein language models. The ability to simultaneously reason over protein structure and sequence could enable even more precise mutation effect predictions following supervised training on the task. These results suggest that RFjoint has a quite broad understanding of protein sequence‐structure landscapes, and can be viewed as a joint model for protein sequence and structure which could be broadly useful for protein modeling.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Watson, Joseph L. and Juergens, David and Bennett, Nathaniel R. and Trippe, Brian L. and Yim, Jason and Eisenach, Helen E. and Ahern, Woody and Borst, Andrew J. and Ragotte, Robert J. and Milles, Lukas F. and Wicky, Basile I. M. and Hanikel, Nikita and Pellock, Samuel J. and Courbet, Alexis and Sheffler, William and Wang, Jue and Venkatesh, Preetham and Sappington, Isaac and Torres, Susana Vázquez and Lauko, Anna and De Bortoli, Valentin and Mathieu, Emile and Ovchinnikov, Sergey and Barzilay, Regina and Jaakkola, Tommi S. and DiMaio, Frank and Baek, Minkyung and Baker, David
De novo design of protein structure and function with RFdiffusion Journal Article
In: Nature, 2023.
@article{Watson2023,
title = {De novo design of protein structure and function with RFdiffusion},
author = {Watson, Joseph L.
and Juergens, David
and Bennett, Nathaniel R.
and Trippe, Brian L.
and Yim, Jason
and Eisenach, Helen E.
and Ahern, Woody
and Borst, Andrew J.
and Ragotte, Robert J.
and Milles, Lukas F.
and Wicky, Basile I. M.
and Hanikel, Nikita
and Pellock, Samuel J.
and Courbet, Alexis
and Sheffler, William
and Wang, Jue
and Venkatesh, Preetham
and Sappington, Isaac
and Torres, Susana Vázquez
and Lauko, Anna
and De Bortoli, Valentin
and Mathieu, Emile
and Ovchinnikov, Sergey
and Barzilay, Regina
and Jaakkola, Tommi S.
and DiMaio, Frank
and Baek, Minkyung
and Baker, David},
url = {https://www.nature.com/articles/s41586-023-06415-8, Nature
https://www.bakerlab.org/wp-content/uploads/2023/07/s41586-023-06415-8_reference.pdf, PDF (29MB)},
doi = {10.1038/s41586-023-06415-8},
year = {2023},
date = {2023-07-11},
journal = {Nature},
abstract = {There has been considerable recent progress in designing new proteins using deep learning methods1–9. Despite this progress, a general deep learning framework for protein design that enables solution of a wide range of design challenges, including de novo binder design and design of higher order symmetric architectures, has yet to be described. Diffusion models10,11 have had considerable success in image and language generative modeling but limited success when applied to protein modeling, likely due to the complexity of protein backbone geometry and sequence-structure relationships. Here we show that by fine tuning the RoseTTAFold structure prediction network on protein structure denoising tasks, we obtain a generative model of protein backbones that achieves outstanding performance on unconditional and topology-constrained protein monomer design, protein binder design, symmetric oligomer design, enzyme active site scaffolding, and symmetric motif scaffolding for therapeutic and metal-binding protein design. We demonstrate the power and generality of the method, called RoseTTAFold Diffusion (RFdiffusion), by experimentally characterizing the structures and functions of hundreds of designed symmetric assemblies, metal binding proteins and protein binders. The accuracy of RFdiffusion is confirmed by the cryo-EM structure of a designed binder in complex with Influenza hemagglutinin which is nearly identical to the design model. In a manner analogous to networks which produce images from user-specified inputs, RFdiffusion enables the design of diverse functional proteins from simple molecular specifications.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Amir Motmaen, Justas Dauparas, Minkyung Baek, Mohamad H. Abedi, David Baker, Philip Bradley
Peptide-binding specificity prediction using fine-tuned protein structure prediction networks Journal Article
In: Proceedings of the National Academy of Sciences, 2023.
@article{nokey,
title = {Peptide-binding specificity prediction using fine-tuned protein structure prediction networks},
author = {Amir Motmaen, Justas Dauparas, Minkyung Baek, Mohamad H. Abedi, David Baker, Philip Bradley},
url = {https://www.pnas.org/doi/10.1073/pnas.2216697120, PNAS (Open Access)},
doi = {10.1073/pnas.2216697120},
year = {2023},
date = {2023-02-21},
urldate = {2023-02-21},
journal = {Proceedings of the National Academy of Sciences},
abstract = {Peptide-binding proteins play key roles in biology, and predicting their binding specificity is a long-standing challenge. While considerable protein structural information is available, the most successful current methods use sequence information alone, in part because it has been a challenge to model the subtle structural changes accompanying sequence substitutions. Protein structure prediction networks such as AlphaFold model sequence-structure relationships very accurately, and we reasoned that if it were possible to specifically train such networks on binding data, more generalizable models could be created. We show that placing a classifier on top of the AlphaFold network and fine-tuning the combined network parameters for both classification and structure prediction accuracy leads to a model with strong generalizable performance on a wide range of Class I and Class II peptide-MHC interactions that approaches the overall performance of the state-of-the-art NetMHCpan sequence-based method. The peptide-MHC optimized model shows excellent performance in distinguishing binding and non-binding peptides to SH3 and PDZ domains. This ability to generalize well beyond the training set far exceeds that of sequence-only models and should be particularly powerful for systems where less experimental data are available.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
COLLABORATOR LED
Sorry, no publications matched your criteria.
2022
FROM THE LAB
Dauparas, J. and Anishchenko, I. and Bennett, N. and Bai, H. and Ragotte, R. J. and Milles, L. F. and Wicky, B. I. M. and Courbet, A. and de Haas, R. J. and Bethel, N. and Leung, P. J. Y. and Huddy, T. F. and Pellock, S. and Tischer, D. and Chan, F. and Koepnick, B. and Nguyen, H. and Kang, A. and Sankaran, B. and Bera, A. K. and King, N. P. and Baker, D.
Robust deep learning–based protein sequence design using ProteinMPNN Journal Article
In: Science, 2022.
@article{Dauparas2022,
title = {Robust deep learning–based protein sequence design using ProteinMPNN},
author = {Dauparas, J.
and Anishchenko, I.
and Bennett, N.
and Bai, H.
and Ragotte, R. J.
and Milles, L. F.
and Wicky, B. I. M.
and Courbet, A.
and de Haas, R. J.
and Bethel, N.
and Leung, P. J. Y.
and Huddy, T. F.
and Pellock, S.
and Tischer, D.
and Chan, F.
and Koepnick, B.
and Nguyen, H.
and Kang, A.
and Sankaran, B.
and Bera, A. K.
and King, N. P.
and Baker, D.},
url = {https://www.science.org/doi/abs/10.1126/science.add2187, Science
https://www.bakerlab.org/wp-content/uploads/2022/09/Dauparas_etal_Science2022_Sequence_design_via_ProteinMPNN.pdf, PDF},
doi = {10.1126/science.add2187},
year = {2022},
date = {2022-09-15},
journal = {Science},
abstract = {While deep learning has revolutionized protein structure prediction, almost all experimentally characterized de novo protein designs have been generated using physically based approaches such as Rosetta. Here we describe a deep learning–based protein sequence design method, ProteinMPNN, with outstanding performance in both in silico and experimental tests. On native protein backbones, ProteinMPNN has a sequence recovery of 52.4%, compared to 32.9% for Rosetta. The amino acid sequence at different positions can be coupled between single or multiple chains, enabling application to a wide range of current protein design challenges. We demonstrate the broad utility and high accuracy of ProteinMPNN using X-ray crystallography, cryoEM and functional studies by rescuing previously failed designs, made using Rosetta or AlphaFold, of protein monomers, cyclic homo-oligomers, tetrahedral nanoparticles, and target binding proteins},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jue Wang, Sidney Lisanza, David Juergens, Doug Tischer, Joseph L. Watson, Karla M. Castro, Robert Ragotte, Amijai Saragovi, Lukas F. Milles, Minkyung Baek, Ivan Anishchenko, Wei Yang, Derrick R. Hicks, Marc Expòsit, Thomas Schlichthaerle, Jung-Ho Chun, Justas Dauparas, Nathaniel Bennett, Basile I. M. Wicky, Andrew Muenks, Frank DiMaio, Bruno Correia, Sergey Ovchinnikov, David Baker
Scaffolding protein functional sites using deep learning Journal Article
In: Science, 2022.
@article{Wang2022,
title = {Scaffolding protein functional sites using deep learning},
author = {Jue Wang and Sidney Lisanza and David Juergens and Doug Tischer and Joseph L. Watson and Karla M. Castro and Robert Ragotte and Amijai Saragovi and Lukas F. Milles and Minkyung Baek and Ivan Anishchenko and Wei Yang and Derrick R. Hicks and Marc Expòsit and Thomas Schlichthaerle and Jung-Ho Chun and Justas Dauparas and Nathaniel Bennett and Basile I. M. Wicky and Andrew Muenks and Frank DiMaio and Bruno Correia and Sergey Ovchinnikov and David Baker },
url = {https://www.science.org/doi/abs/10.1126/science.abn2100, Science
https://www.ipd.uw.edu/wp-content/uploads/2022/07/science.abn2100.pdf, Download PDF},
doi = {10.1126/science.abn2100},
year = {2022},
date = {2022-07-21},
urldate = {2022-07-21},
journal = {Science},
abstract = {The binding and catalytic functions of proteins are generally mediated by a small number of functional residues held in place by the overall protein structure. Here, we describe deep learning approaches for scaffolding such functional sites without needing to prespecify the fold or secondary structure of the scaffold. The first approach, “constrained hallucination,” optimizes sequences such that their predicted structures contain the desired functional site. The second approach, “inpainting,” starts from the functional site and fills in additional sequence and structure to create a viable protein scaffold in a single forward pass through a specifically trained RoseTTAFold network. We use these two methods to design candidate immunogens, receptor traps, metalloproteins, enzymes, and protein-binding proteins and validate the designs using a combination of in silico and experimental tests. Protein design has had success in finding sequences that fold into a desired conformation, but designing functional proteins remains challenging. Wang et al. describe two deep-learning methods to design proteins that contain prespecified functional sites. In the first, they found sequences predicted to fold into stable structures that contain the functional site. In the second, they retrained a structure prediction network to recover the sequence and full structure of a protein given only the functional site. The authors demonstrate their methods by designing proteins containing a variety of functional motifs. —VV Deep-learning methods enable the scaffolding of desired functional residues within a well-folded designed protein.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Minkyung Baek, David Baker
Deep learning and protein structure modeling Journal Article
In: Nature Methods, 2022.
@article{Baek2022,
title = {Deep learning and protein structure modeling},
author = {Minkyung Baek and David Baker},
url = {https://www.nature.com/articles/s41592-021-01360-8, Nature Methods
https://www.bakerlab.org/wp-content/uploads/2022/01/Baek_Baker_NatureMethods2022_Deep_Learning_and_Protein_Structure_Modeling.pdf, Download PDF
},
doi = {10.1038/s41592-021-01360-8},
year = {2022},
date = {2022-01-22},
urldate = {2022-01-22},
journal = {Nature Methods},
abstract = {Deep learning has transformed protein structure modeling. Here we relate AlphaFold and RoseTTAFold to classical physically based approaches to protein structure prediction, and discuss the many areas of structural biology that are likely to be affected by further advances in deep learning.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
COLLABORATOR LED
Sen, Neeladri and Anishchenko, Ivan and Bordin N and Sillitoe, Ian and Velankar, Sameer and Baker, David and Orengo, Christine
Characterizing and explaining the impact of disease-associated mutations in proteins without known structures or structural homologs Journal Article
In: Briefings in Bioinformatics, 2022.
@article{Sen2022,
title = {Characterizing and explaining the impact of disease-associated mutations in proteins without known structures or structural homologs},
author = {Sen, Neeladri
and Anishchenko, Ivan
and Bordin N
and Sillitoe, Ian
and Velankar, Sameer
and Baker, David
and Orengo, Christine},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9294430/},
doi = {10.1093/bib/bbac187},
year = {2022},
date = {2022-07-18},
journal = {Briefings in Bioinformatics},
abstract = {Mutations in human proteins lead to diseases. The structure of these proteins can help understand the mechanism of such diseases and develop therapeutics against them. With improved deep learning techniques, such as RoseTTAFold and AlphaFold, we can predict the structure of proteins even in the absence of structural homologs. We modeled and extracted the domains from 553 disease-associated human proteins without known protein structures or close homologs in the Protein Databank. We noticed that the model quality was higher and the Root mean square deviation (RMSD) lower between AlphaFold and RoseTTAFold models for domains that could be assigned to CATH families as compared to those which could only be assigned to Pfam families of unknown structure or could not be assigned to either. We predicted ligand-binding sites, protein-protein interfaces and conserved residues in these predicted structures. We then explored whether the disease-associated missense mutations were in the proximity of these predicted functional sites, whether they destabilized the protein structure based on ddG calculations or whether they were predicted to be pathogenic. We could explain 80% of these disease-associated mutations based on proximity to functional sites, structural destabilization or pathogenicity. When compared to polymorphisms, a larger percentage of disease-associated missense mutations were buried, closer to predicted functional sites, predicted as destabilizing and pathogenic. Usage of models from the two state-of-the-art techniques provide better confidence in our predictions, and we explain 93 additional mutations based on RoseTTAFold models which could not be explained based solely on AlphaFold models.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Linder, Johannes, La Fleur, Alyssa, Chen, Zibo, Ljubetič, Ajasja, Baker, David, Kannan, Sreeram, Seelig, Georg
Interpreting neural networks for biological sequences by learning stochastic masks Journal Article
In: Nature Machine Intelligence, 2022.
@article{Linder2022,
title = {Interpreting neural networks for biological sequences by learning stochastic masks},
author = {Linder, Johannes and La Fleur, Alyssa and Chen, Zibo and Ljubetič, Ajasja and Baker, David and Kannan, Sreeram and Seelig, Georg},
url = {https://www.nature.com/articles/s42256-021-00428-6, Nature Machine Intelligence},
doi = {10.1038/s42256-021-00428-6},
year = {2022},
date = {2022-01-25},
urldate = {2022-01-25},
journal = {Nature Machine Intelligence},
abstract = {Sequence-based neural networks can learn to make accurate predictions from large biological datasets, but model interpretation remains challenging. Many existing feature attribution methods are optimized for continuous rather than discrete input patterns and assess individual feature importance in isolation, making them ill-suited for interpreting nonlinear interactions in molecular sequences. Here, building on work in computer vision and natural language processing, we developed an approach based on deep learning—scrambler networks—wherein the most important sequence positions are identified with learned input masks. Scramblers learn to predict position-specific scoring matrices where unimportant nucleotides or residues are scrambled by raising their entropy. We apply scramblers to interpret the effects of genetic variants, uncover nonlinear interactions between cis-regulatory elements, explain binding specificity for protein–protein interactions, and identify structural determinants of de novo-designed proteins. We show that scramblers enable efficient attribution across large datasets and result in high-quality explanations, often outperforming state-of-the-art methods.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2021
FROM THE LAB
Anishchenko, Ivan and Pellock, Samuel J. and Chidyausiku, Tamuka M. and Ramelot, Theresa A. and Ovchinnikov, Sergey and Hao, Jingzhou and Bafna, Khushboo and Norn, Christoffer and Kang, Alex and Bera, Asim K. and DiMaio, Frank and Carter, Lauren and Chow, Cameron M. and Montelione, Gaetano T. and Baker, David
De novo protein design by deep network hallucination Journal Article
In: Nature, 2021.
@article{Anishchenko2021,
title = {De novo protein design by deep network hallucination},
author = {Anishchenko, Ivan
and Pellock, Samuel J.
and Chidyausiku, Tamuka M.
and Ramelot, Theresa A.
and Ovchinnikov, Sergey
and Hao, Jingzhou
and Bafna, Khushboo
and Norn, Christoffer
and Kang, Alex
and Bera, Asim K.
and DiMaio, Frank
and Carter, Lauren
and Chow, Cameron M.
and Montelione, Gaetano T.
and Baker, David},
url = {https://www.nature.com/articles/s41586-021-04184-w
https://www.bakerlab.org/wp-content/uploads/2022/01/Anishchenko_etal_Nature2021_DeepNetworkHallucination.pdf},
doi = {10.1038/s41586-021-04184-w},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
journal = {Nature},
abstract = {There has been considerable recent progress in protein structure prediction using deep neural networks to predict inter-residue distances from amino acid sequences1–3. Here we investigate whether the information captured by such networks is sufficiently rich to generate new folded proteins with sequences unrelated to those of the naturally occurring proteins used in training the models. We generate random amino acid sequences, and input them into the trRosetta structure prediction network to predict starting residue–residue distance maps, which, as expected, are quite featureless. We then carry out Monte Carlo sampling in amino acid sequence space, optimizing the contrast (Kullback–Leibler divergence) between the inter-residue distance distributions predicted by the network and background distributions averaged over all proteins. Optimization from different random starting points resulted in novel proteins spanning a wide range of sequences and predicted structures. We obtained synthetic genes encoding 129 of the network-‘hallucinated’ sequences, and expressed and purified the proteins in Escherichia coli; 27 of the proteins yielded monodisperse species with circular dichroism spectra consistent with the hallucinated structures. We determined the three-dimensional structures of three of the hallucinated proteins, two by X-ray crystallography and one by NMR, and these closely matched the hallucinated models. Thus, deep networks trained to predict native protein structures from their sequences can be inverted to design new proteins, and such networks and methods should contribute alongside traditional physics-based models to the de novo design of proteins with new functions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Minkyung Baek, Ivan Anishchenko, Hahnbeom Park, Ian R. Humphreys, David Baker
Protein oligomer modeling guided by predicted inter-chain contacts in CASP14 Journal Article
In: Proteins, 2021.
@article{Baek2021b,
title = {Protein oligomer modeling guided by predicted inter-chain contacts in CASP14},
author = {Minkyung Baek and Ivan Anishchenko and Hahnbeom Park and Ian R. Humphreys and David Baker},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/prot.26197, Proteins},
doi = {10.1002/prot.26197},
year = {2021},
date = {2021-07-29},
urldate = {2021-07-29},
journal = {Proteins},
abstract = {For CASP14, we developed deep learning-based methods for predicting homo-oligomeric and hetero-oligomeric contacts and used them for oligomer modeling. To build structure models, we developed an oligomer structure generation method that utilizes predicted inter-chain contacts to guide iterative restrained minimization from random backbone structures. We supplemented this gradient-based fold-and-dock method with template-based and ab initio docking approaches using deep learning-based subunit predictions on 29 assembly targets. These methods produced oligomer models with summed Z-scores 5.5 units higher than the next best group, with the fold-and-dock method having the best relative performance. Over the eight targets for which this method was used, the best of the five submitted models had average oligomer TM-score of 0.71 (average oligomer TM-score of the next best group: 0.64), and explicit modeling of inter-subunit interactions improved modeling of six out of 40 individual domains (ΔGDT-TS > 2.0).
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Baek, Minkyung and DiMaio, Frank and Anishchenko, Ivan and Dauparas, Justas and Ovchinnikov, Sergey and Lee, Gyu Rie and Wang, Jue and Cong, Qian and Kinch, Lisa N. and Schaeffer, R. Dustin and Millán, Claudia and Park, Hahnbeom and Adams, Carson and Glassman, Caleb R. and DeGiovanni, Andy and Pereira, Jose H. and Rodrigues, Andria V. and van Dijk, Alberdina A. and Ebrecht, Ana C. and Opperman, Diederik J. and Sagmeister, Theo and Buhlheller, Christoph and Pavkov-Keller, Tea and Rathinaswamy, Manoj K. and Dalwadi, Udit and Yip, Calvin K. and Burke, John E. and Garcia, K. Christopher and Grishin, Nick V. and Adams, Paul D. and Read, Randy J. and Baker, David
Accurate prediction of protein structures and interactions using a three-track neural network Journal Article
In: Science, 2021.
@article{Baek2021,
title = {Accurate prediction of protein structures and interactions using a three-track neural network},
author = {Baek, Minkyung
and DiMaio, Frank
and Anishchenko, Ivan
and Dauparas, Justas
and Ovchinnikov, Sergey
and Lee, Gyu Rie
and Wang, Jue
and Cong, Qian
and Kinch, Lisa N.
and Schaeffer, R. Dustin
and Millán, Claudia
and Park, Hahnbeom
and Adams, Carson
and Glassman, Caleb R.
and DeGiovanni, Andy
and Pereira, Jose H.
and Rodrigues, Andria V.
and van Dijk, Alberdina A.
and Ebrecht, Ana C.
and Opperman, Diederik J.
and Sagmeister, Theo
and Buhlheller, Christoph
and Pavkov-Keller, Tea
and Rathinaswamy, Manoj K.
and Dalwadi, Udit
and Yip, Calvin K.
and Burke, John E.
and Garcia, K. Christopher
and Grishin, Nick V.
and Adams, Paul D.
and Read, Randy J.
and Baker, David},
url = {http://science.sciencemag.org/content/early/2021/07/14/science.abj8754, Science
https://www.ipd.uw.edu/wp-content/uploads/2021/07/Baek_etal_Science2021_RoseTTAFold.pdf, Download PDF},
doi = {10.1126/science.abj8754},
year = {2021},
date = {2021-07-15},
urldate = {2021-07-15},
journal = {Science},
abstract = {DeepMind presented remarkably accurate predictions at the recent CASP14 protein structure prediction assessment conference. We explored network architectures incorporating related ideas and obtained the best performance with a three-track network in which information at the 1D sequence level, the 2D distance map level, and the 3D coordinate level is successively transformed and integrated. The three-track network produces structure predictions with accuracies approaching those of DeepMind in CASP14, enables the rapid solution of challenging X-ray crystallography and cryo-EM structure modeling problems, and provides insights into the functions of proteins of currently unknown structure. The network also enables rapid generation of accurate protein-protein complex models from sequence information alone, short-circuiting traditional approaches which require modeling of individual subunits followed by docking. We make the method available to the scientific community to speed biological research.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Norn, Christoffer, Wicky, Basile I. M., Juergens, David, Liu, Sirui, Kim, David, Tischer, Doug, Koepnick, Brian, Anishchenko, Ivan, Baker, David, Ovchinnikov, Sergey
Protein sequence design by conformational landscape optimization Journal Article
In: Proceedings of the National Academy of Sciences, vol. 118, no. 11, 2021.
@article{Norn2021,
title = {Protein sequence design by conformational landscape optimization},
author = {Norn, Christoffer and Wicky, Basile I. M. and Juergens, David and Liu, Sirui and Kim, David and Tischer, Doug and Koepnick, Brian and Anishchenko, Ivan and Baker, David and Ovchinnikov, Sergey},
url = {https://www.pnas.org/content/118/11/e2017228118, PNAS
https://www.bakerlab.org/wp-content/uploads/2021/03/Norn_etal_PNAS2021_LandscapeOptimization.pdf, Download PDF},
doi = {10.1073/pnas.2017228118},
year = {2021},
date = {2021-03-16},
urldate = {2021-03-16},
journal = {Proceedings of the National Academy of Sciences},
volume = {118},
number = {11},
abstract = {Almost all proteins fold to their lowest free energy state, which is determined by their amino acid sequence. Computational protein design has primarily focused on finding sequences that have very low energy in the target designed structure. However, what is most relevant during folding is not the absolute energy of the folded state but the energy difference between the folded state and the lowest-lying alternative states. We describe a deep learning approach that captures aspects of the folding landscape, in particular the presence of structures in alternative energy minima, and show that it can enhance current protein design methods.The protein design problem is to identify an amino acid sequence that folds to a desired structure. Given Anfinsen{textquoteright}s thermodynamic hypothesis of folding, this can be recast as finding an amino acid sequence for which the desired structure is the lowest energy state. As this calculation involves not only all possible amino acid sequences but also, all possible structures, most current approaches focus instead on the more tractable problem of finding the lowest-energy amino acid sequence for the desired structure, often checking by protein structure prediction in a second step that the desired structure is indeed the lowest-energy conformation for the designed sequence, and typically discarding a large fraction of designed sequences for which this is not the case. Here, we show that by backpropagating gradients through the transform-restrained Rosetta (trRosetta) structure prediction network from the desired structure to the input amino acid sequence, we can directly optimize over all possible amino acid sequences and all possible structures in a single calculation. We find that trRosetta calculations, which consider the full conformational landscape, can be more effective than Rosetta single-point energy estimations in predicting folding and stability of de novo designed proteins. We compare sequence design by conformational landscape optimization with the standard energy-based sequence design methodology in Rosetta and show that the former can result in energy landscapes with fewer alternative energy minima. We show further that more funneled energy landscapes can be designed by combining the strengths of the two approaches: the low-resolution trRosetta model serves to disfavor alternative states, and the high-resolution Rosetta model serves to create a deep energy minimum at the design target structure.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
COLLABORATOR LED
Du, Zongyang and Su, Hong and Wang, Wenkai and Ye, Lisha and Wei, Hong and Peng, Zhenling and Anishchenko, Ivan and Baker, David and Yang, Jianyi
The trRosetta server for fast and accurate protein structure prediction Journal Article
In: Nature Protocols, 2021.
@article{Du2021,
title = {The trRosetta server for fast and accurate protein structure prediction},
author = {Du, Zongyang
and Su, Hong
and Wang, Wenkai
and Ye, Lisha
and Wei, Hong
and Peng, Zhenling
and Anishchenko, Ivan
and Baker, David
and Yang, Jianyi},
url = {https://www.nature.com/articles/s41596-021-00628-9
https://www.bakerlab.org/wp-content/uploads/2022/01/Du_etal_NatProt2021_trRosetta_server.pdf},
doi = {10.1038/s41596-021-00628-9},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
journal = {Nature Protocols},
abstract = {The trRosetta (transform-restrained Rosetta) server is a web-based platform for fast and accurate protein structure prediction, powered by deep learning and Rosetta. With the input of a protein’s amino acid sequence, a deep neural network is first used to predict the inter-residue geometries, including distance and orientations. The predicted geometries are then transformed as restraints to guide the structure prediction on the basis of direct energy minimization, which is implemented under the framework of Rosetta. The trRosetta server distinguishes itself from other similar structure prediction servers in terms of rapid and accurate de novo structure prediction. As an illustration, trRosetta was applied to two Pfam families with unknown structures, for which the predicted de novo models were estimated to have high accuracy. Nevertheless, to take advantage of homology modeling, homologous templates are used as additional inputs to the network automatically. In general, it takes ~1 h to predict the final structure for a typical protein with ~300 amino acids, using a maximum of 10 CPU cores in parallel in our cluster system. To enable large-scale structure modeling, a downloadable package of trRosetta with open-source codes is available as well. A detailed guidance for using the package is also available in this protocol. The server and the package are available at https://yanglab.nankai.edu.cn/trRosetta/ and https://yanglab.nankai.edu.cn/trRosetta/download/, respectively.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Naozumi Hiranuma, Hahnbeom Park, Minkyung Baek, Ivan Anishchenko, Justas Dauparas, David Baker
Improved protein structure refinement guided by deep learning based accuracy estimation Journal Article
In: Nature Communications, vol. 12, no. 1340, 2021.
@article{Hiranuma2021,
title = {Improved protein structure refinement guided by deep learning based accuracy estimation},
author = {Naozumi Hiranuma and Hahnbeom Park and Minkyung Baek and Ivan Anishchenko and Justas Dauparas and David Baker
},
url = {https://www.nature.com/articles/s41467-021-21511-x, Nature Communications
https://www.bakerlab.org/wp-content/uploads/2021/02/Hiranuma_etal_NatureComms2021_DeepLearningStructureRefinement.pdf, Download PDF},
doi = {10.1038/s41467-021-21511-x},
year = {2021},
date = {2021-02-26},
urldate = {2021-02-26},
journal = {Nature Communications},
volume = {12},
number = {1340},
abstract = {We develop a deep learning framework (DeepAccNet) that estimates per-residue accuracy and residue-residue distance signed error in protein models and uses these predictions to guide Rosetta protein structure refinement. The network uses 3D convolutions to evaluate local atomic environments followed by 2D convolutions to provide their global contexts and outperforms other methods that similarly predict the accuracy of protein structure models. Overall accuracy predictions for X-ray and cryoEM structures in the PDB correlate with their resolution, and the network should be broadly useful for assessing the accuracy of both predicted structure models and experimentally determined structures and identifying specific regions likely to be in error. Incorporation of the accuracy predictions at multiple stages in the Rosetta refinement protocol considerably increased the accuracy of the resulting protein structure models, illustrating how deep learning can improve search for global energy minima of biomolecules.
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Hahnbeom Park, Guangfeng Zhou, Minkyung Baek, David Baker, Frank DiMaio
Force Field Optimization Guided by Small Molecule Crystal Lattice Data Enables Consistent Sub-Angstrom Protein–Ligand Docking Journal Article
In: Journal of Chemical Theory and Computation, 2021.
@article{Park2021,
title = {Force Field Optimization Guided by Small Molecule Crystal Lattice Data Enables Consistent Sub-Angstrom Protein–Ligand Docking},
author = {Hahnbeom Park and Guangfeng Zhou and Minkyung Baek and David Baker and Frank DiMaio},
url = {https://pubs.acs.org/doi/full/10.1021/acs.jctc.0c01184
https://www.bakerlab.org/wp-content/uploads/2021/02/Park_etal_JCTC2021_Small_mol_force_field_optimization.pdf},
doi = {10.1021/acs.jctc.0c01184},
year = {2021},
date = {2021-02-12},
journal = {Journal of Chemical Theory and Computation},
abstract = {Accurate and rapid calculation of protein-small molecule interaction free energies is critical for computational drug discovery. Because of the large chemical space spanned by drug-like molecules, classical force fields contain thousands of parameters describing atom-pair distance and torsional preferences; each parameter is typically optimized independently on simple representative molecules. Here, we describe a new approach in which small molecule force field parameters are jointly optimized guided by the rich source of information contained within thousands of available small molecule crystal structures. We optimize parameters by requiring that the experimentally determined molecular lattice arrangements have lower energy than all alternative lattice arrangements. Thousands of independent crystal lattice-prediction simulations were run on each of 1386 small molecule crystal structures, and energy function parameters of an implicit solvent energy model were optimized, so native crystal lattice arrangements had the lowest energy. The resulting energy model was implemented in Rosetta, together with a rapid genetic algorithm docking method employing grid-based scoring and receptor flexibility. The success rate of bound structure recapitulation in cross-docking on 1112 complexes was improved by more than 10% over previously published methods, with solutions within <1 Å in over half of the cases. Our results demonstrate that small molecule crystal structures are a rich source of information for guiding molecular force field development, and the improved Rosetta energy function should increase accuracy in a wide range of small molecule structure prediction and design studies.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2020
FROM THE LAB
Yang, Jianyi, Anishchenko, Ivan, Park, Hahnbeom, Peng, Zhenling, Ovchinnikov, Sergey, Baker, David
Improved protein structure prediction using predicted interresidue orientations Journal Article
In: Proceedings of the National Academy of Sciences, 2020, ISBN: 0027-8424.
@article{Yang2020,
title = {Improved protein structure prediction using predicted interresidue orientations},
author = {Yang, Jianyi and Anishchenko, Ivan and Park, Hahnbeom and Peng, Zhenling and Ovchinnikov, Sergey and Baker, David},
url = {https://www.pnas.org/content/early/2020/01/01/1914677117
https://www.bakerlab.org/wp-content/uploads/2020/01/Yang2020_ImprovedStructurePredictionInterresidueOrientations.pdf
},
doi = {10.1073/pnas.1914677117},
isbn = {0027-8424},
year = {2020},
date = {2020-01-02},
journal = {Proceedings of the National Academy of Sciences},
abstract = {Protein structure prediction is a longstanding challenge in computational biology. Through extension of deep learning-based prediction to interresidue orientations in addition to distances, and the development of a constrained optimization by Rosetta, we show that more accurate models can be generated. Results on a set of 18 de novo-designed proteins suggests the proposed method should be directly applicable to current challenges in de novo protein design.The prediction of interresidue contacts and distances from coevolutionary data using deep learning has considerably advanced protein structure prediction. Here, we build on these advances by developing a deep residual network for predicting interresidue orientations, in addition to distances, and a Rosetta-constrained energy-minimization protocol for rapidly and accurately generating structure models guided by these restraints. In benchmark tests on 13th Community-Wide Experiment on the Critical Assessment of Techniques for Protein Structure Prediction (CASP13)- and Continuous Automated Model Evaluation (CAMEO)-derived sets, the method outperforms all previously described structure-prediction methods. Although trained entirely on native proteins, the network consistently assigns higher probability to de novo-designed proteins, identifying the key fold-determining residues and providing an independent quantitative measure of the "ideality" of a protein structure. The method promises to be useful for a broad range of protein structure prediction and design problems.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
COLLABORATOR LED
Sorry, no publications matched your criteria.
2019
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Qi Wu, Zhenling Peng, Ivan Anishchenko, Qian Cong, David Baker, Jianyi Yang
Protein contact prediction using metagenome sequence data and residual neural networks Journal Article
In: Bioinformatics, vol. 36, no. 1, 2019.
@article{Wu2019,
title = {Protein contact prediction using metagenome sequence data and residual neural networks},
author = {Qi Wu and Zhenling Peng and Ivan Anishchenko and Qian Cong and David Baker and Jianyi Yang},
url = {https://academic.oup.com/bioinformatics/article/36/1/41/5512356},
doi = {10.1093/bioinformatics/btz477},
year = {2019},
date = {2019-06-07},
journal = {Bioinformatics},
volume = {36},
number = {1},
abstract = {Motivation: Almost all protein residue contact prediction methods rely on the availability of deep multiple sequence alignments (MSAs). However, many proteins from the poorly populated families do not have sufficient number of homologs in the conventional UniProt database. Here we aim to solve this issue by exploring the rich sequence data from the metagenome sequencing projects. Results: Based on the improved MSA constructed from the metagenome sequence data, we developed MapPred, a new deep learning-based contact prediction method. MapPred consists of two component methods, DeepMSA and DeepMeta, both trained with the residual neural networks. DeepMSA was inspired by the recent method DeepCov, which was trained on 441 matrices of covariance features. By considering the symmetry of contact map, we reduced the number of matrices to 231, which makes the training more efficient in DeepMSA. Experiments show that DeepMSA outperforms DeepCov by 10–13% in precision. DeepMeta works by combining predicted contacts and other sequence profile features. Experiments on three benchmark datasets suggest that the contribution from the metagenome sequence data is significant with P-values less than 4.04E-17. MapPred is shown to be complementary and comparable the state-of-the-art methods. The success of MapPred is attributed to three factors: the deeper MSA from the metagenome sequence data, improved feature design in DeepMSA and optimized training by the residual neural networks.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2018
FROM THE LAB
Park, Hahnbeom, Ovchinnikov, Sergey, Kim, David E., DiMaio, Frank, Baker, David
Protein homology model refinement by large-scale energy optimization Journal Article
In: Proceedings of the National Academy of Sciences, vol. 115, no. 12, pp. 3054–3059, 2018, ISSN: 0027-8424.
@article{Park2018,
title = {Protein homology model refinement by large-scale energy optimization},
author = {Park, Hahnbeom and Ovchinnikov, Sergey and Kim, David E. and DiMaio, Frank and Baker, David},
url = {https://www.pnas.org/content/115/12/3054
https://www.bakerlab.org/wp-content/uploads/2019/01/Park2018_refinement.pdf},
doi = {10.1073/pnas.1719115115},
issn = {0027-8424},
year = {2018},
date = {2018-03-20},
journal = {Proceedings of the National Academy of Sciences},
volume = {115},
number = {12},
pages = {3054–3059},
abstract = {Protein structure refinement by direct global energy optimization has been a longstanding challenge in computational structural biology due to limitations in both energy function accuracy and conformational sampling. This manuscript demonstrates that with recent advances in both areas, refinement can significantly improve protein comparative models based on structures of distant homologues.Proteins fold to their lowest free-energy structures, and hence the most straightforward way to increase the accuracy of a partially incorrect protein structure model is to search for the lowest-energy nearby structure. This direct approach has met with little success for two reasons: first, energy function inaccuracies can lead to false energy minima, resulting in model degradation rather than improvement; and second, even with an accurate energy function, the search problem is formidable because the energy only drops considerably in the immediate vicinity of the global minimum, and there are a very large number of degrees of freedom. Here we describe a large-scale energy optimization-based refinement method that incorporates advances in both search and energy function accuracy that can substantially improve the accuracy of low-resolution homology models. The method refined low-resolution homology models into correct folds for 50 of 84 diverse protein families and generated improved models in recent blind structure prediction experiments. Analyses of the basis for these improvements reveal contributions from both the improvements in conformational sampling techniques and the energy function.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
COLLABORATOR LED
Day, Austin L, Greisen, Per, Doyle, Lindsey, Schena, Alberto, Stella, Nephi, Johnsson, Kai, Baker, David, Stoddard, Barry
Unintended specificity of an engineered ligand-binding protein facilitated by unpredicted plasticity of the protein fold Journal Article
In: Protein Engineering, Design and Selection, 2018.
@article{Day2018,
title = {Unintended specificity of an engineered ligand-binding protein facilitated by unpredicted plasticity of the protein fold},
author = {Day, Austin L and Greisen, Per and Doyle, Lindsey and Schena, Alberto and Stella, Nephi and Johnsson, Kai and Baker, David and Stoddard, Barry
},
url = {https://dx.doi.org/10.1093/protein/gzy031
https://www.bakerlab.org/wp-content/uploads/2019/02/Day2018.pdf},
doi = {10.1093/protein/gzy031},
year = {2018},
date = {2018-12-19},
journal = {Protein Engineering, Design and Selection},
abstract = {Attempts to create novel ligand-binding proteins often focus on formation of a binding pocket with shape complementarity against the desired ligand (particularly for compounds that lack distinct polar moieties). Although designed proteins often exhibit binding of the desired ligand, in some cases they display unintended recognition behavior. One such designed protein, that was originally intended to bind tetrahydrocannabinol (THC), was found instead to display binding of 25-hydroxy-cholecalciferol (25-D3) and was subjected to biochemical characterization, further selections for enhanced 25-D3 binding affinity and crystallographic analyses. The deviation in specificity is due in part to unexpected altertion of its conformation, corresponding to a significant change of the orientation of an α-helix and an equally large movement of a loop, both of which flank the designed ligand-binding pocket. Those changes led to engineered protein constructs that exhibit significantly more contacts and complementarity towards the 25-D3 ligand than the initial designed protein had been predicted to form towards its intended THC ligand. Molecular dynamics simulations imply that the initial computationally designed mutations may contribute to the movement of the helix. These analyses collectively indicate that accurate prediction and control of backbone dynamics conformation, through a combination of improved conformational sampling and/or de novo structure design, represents a key area of further development for the design and optimization of engineered ligand-binding proteins.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2017-1988
ALL PAPERS
2017
Sergey Ovchinnikov, Hahnbeom Park, David E. Kim, Frank DiMaio, David Baker
Protein structure prediction using Rosetta in CASP12 Journal Article
In: Proteins, 2017.
@article{Ovchinnikov2017,
title = {Protein structure prediction using Rosetta in CASP12},
author = {Sergey Ovchinnikov, Hahnbeom Park, David E. Kim, Frank DiMaio, David Baker},
url = {https://onlinelibrary.wiley.com/doi/epdf/10.1002/prot.25390
https://www.bakerlab.org/wp-content/uploads/2019/10/Ovchinnikov_et_al-2018-Proteins__Structure_Function_and_Bioinformatics.pdf},
doi = {10.1002/prot.25390},
year = {2017},
date = {2017-09-22},
journal = {Proteins},
abstract = {We describe several notable aspects of our structure predictions using Rosetta in CASP12 in the free modeling (FM) and refinement (TR) categories. First, we had previously generated (and published) models for most large protein families lacking experimentally determined structures usingRosetta guided by co-evolution based contact predictions, and for several targets these models proved better starting points for comparative modeling than any known crystal structure—our model database thus starts to fulfill one of the goals of the original protein structure initiative. Second, while our“human”group simply submitted ROBETTA models for most targets, for six targets expert intervention improved predictions considerably; the largest improvement was for T0886where we correctly parsed two discontinuous domains guided by predicted contact maps to accurately identify a structural homolog of the same fold. Third, Rosetta all atom refinement followed by MD simulations led to consistent but small improvements when starting models were close to the native structure, and larger but less consistent improvements when starting models were further away.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
I Anishchenko, S Ovchinnikov, H Kamisetty, D Baker
Origins of coevolution between residues distant in protein 3D structures Journal Article
In: Proceedings of the National Academy of Sciences, vol. 114, no. 34, pp. 9122-9127, 2017.
@article{1000,
title = {Origins of coevolution between residues distant in protein 3D structures},
author = {I Anishchenko and S Ovchinnikov and H Kamisetty and D Baker},
editor = {August 22, 2017},
url = {http://www.pnas.org/content/114/34/9122
https://www.bakerlab.org/wp-content/uploads/2018/08/9122.full1_.pdf},
doi = {10.1073/pnas.1702664114},
year = {2017},
date = {2017-08-22},
journal = {Proceedings of the National Academy of Sciences},
volume = {114},
number = {34},
pages = {9122-9127},
abstract = {Residue pairs that directly coevolve in protein families are generally close in protein 3D structures. Here we study the exceptions to this general trend—directly coevolving residue pairs that are distant in protein structures—to determine the origins of evolutionary pressure on spatially distant residues and to understand the sources of error in contact-based structure prediction. Over a set of 4,000 protein families, we find that 25% of directly coevolving residue pairs are separated by more than 5 Å in protein structures and 3% by more than 15 Å. The majority (91%) of directly coevolving residue pairs in the 5–15 Å range are found to be in contact in at least one homologous structure—these exceptions arise from structural variation in the family in the region containing the residues. Thirty-five percent of the exceptions greater than 15 Å are at homo-oligomeric interfaces, 19% arise from family structural variation, and 27% are in repeat proteins likely reflecting alignment errors. Of the remaining long-range exceptions (<1% of the total number of coupled pairs), many can be attributed to close interactions in an oligomeric state. Overall, the results suggest that directly coevolving residue pairs not in repeat proteins are spatially proximal in at least one biologically relevant protein conformation within the family; we find little evidence for direct coupling between residues at spatially separated allosteric and functional sites or for increased direct coupling between residue pairs on putative allosteric pathways connecting them.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sergey Ovchinnikov, Hahnbeom Park, Neha Varghese, Po-Ssu Huang, Georgios A. Pavlopoulos, David E. Kim, Hetunandan Kamisetty, Nikos C. Kyrpides, David Baker
Protein structure determination using metagenome sequence data Journal Article
In: Science, vol. 355, no. 6322, pp. 294–298, 2017, ISSN: 0036-8075.
@article{Ovchinnikov294,
title = {Protein structure determination using metagenome sequence data},
author = { Sergey Ovchinnikov and Hahnbeom Park and Neha Varghese and Po-Ssu Huang and Georgios A. Pavlopoulos and David E. Kim and Hetunandan Kamisetty and Nikos C. Kyrpides and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2017/01/ovchinnikov_science_2017.pdf
http://science.sciencemag.org/content/355/6322/294},
doi = {10.1126/science.aah4043},
issn = {0036-8075},
year = {2017},
date = {2017-01-01},
journal = {Science},
volume = {355},
number = {6322},
pages = {294--298},
publisher = {American Association for the Advancement of Science},
abstract = {Fewer than a third of the 14,849 known protein families have at least one member with an experimentally determined structure. This leaves more than 5000 protein families with no structural information. Protein modeling using residue-residue contacts inferred from evolutionary data has been successful in modeling unknown structures, but it requires large numbers of aligned sequences. Ovchinnikov et al. augmented such sequence alignments with metagenome sequence data (see the Perspective by S"oding). They determined the number of sequences required to allow modeling, developed criteria for model quality, and, where possible, improved modeling by matching predicted contacts to known structures. Their method predicted quality structural models for 614 protein families, of which about 140 represent newly discovered protein folds.Science, this issue p. 294; see also p. 248Despite decades of work by structural biologists, there are still ~5200 protein families with unknown structure outside the range of comparative modeling. We show that Rosetta structure prediction guided by residue-residue contacts inferred from evolutionary information can accurately model proteins that belong to large families and that metagenome sequence data more than triple the number of protein families with sufficient sequences for accurate modeling. We then integrate metagenome data, contact-based structure matching, and Rosetta structure calculations to generate models for 614 protein families with currently unknown structures; 206 are membrane proteins and 137 have folds not represented in the Protein Data Bank. This approach provides the representative models for large protein families originally envisioned as the goal of the Protein Structure Initiative at a fraction of the cost.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2016
Ovchinnikov, Sergey, Park, Hahnbeom, Kim, David E., Liu, Yuan, Wang, Ray Yu-Ruei, Baker, David
Structure prediction using sparse simulated NOE restraints with Rosetta in CASP11 Journal Article
In: Proteins: Structure, Function, and Bioinformatics, pp. n/a–n/a, 2016, ISSN: 1097-0134.
@article{PROT:PROT25006,
title = {Structure prediction using sparse simulated NOE restraints with Rosetta in CASP11},
author = {Ovchinnikov, Sergey and Park, Hahnbeom and Kim, David E. and Liu, Yuan and Wang, Ray Yu-Ruei and Baker, David},
url = {http://dx.doi.org/10.1002/prot.25006
https://www.bakerlab.org/wp-content/uploads/2016/05/Ovchinnikov_et_al-2016-Proteins__Structure_Function_and_Bioinformatics.pdf},
doi = {10.1002/prot.25006},
issn = {1097-0134},
year = {2016},
date = {2016-01-01},
journal = {Proteins: Structure, Function, and Bioinformatics},
pages = {n/a--n/a},
abstract = {In CASP11 we generated protein structure models using simulated ambiguous and unambiguous nuclear Overhauser effect (NOE) restraints with a two stage protocol. Low resolution models were generated guided by the unambiguous restraints using continuous chain folding for alpha and alpha-beta proteins, and iterative annealing for all beta proteins to take advantage of the strand pairing information implicit in the restraints. The Rosetta fragment/model hybridization protocol was then used to recombine and regularize these models, and refine them in the Rosetta full atom energy function guided by both the unambiguous and the ambiguous restraints. Fifteen out of 19 targets were modeled with GDT-TS quality scores greater than 60 for Model 1, significantly improving upon the non-assisted predictions. Our results suggest that atomic level accuracy is achievable using sparse NOE data when there is at least one correctly assigned NOE for every residue. Proteins 2016. © 2016 Wiley Periodicals, Inc.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2015
S Ovchinnikov, DE Kim, RY Wang, Y Liu, F DiMaio, D Baker
Improved de novo structure prediction in CASP11 by incorporating Co-evolution information into rosetta Journal Article
In: Proteins, 2015.
@article{S2015,
title = {Improved de novo structure prediction in CASP11 by incorporating Co-evolution information into rosetta},
author = {S Ovchinnikov and DE Kim and RY Wang and Y Liu and F DiMaio and D Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2015/12/Ovchinnikov_Proteins_2015.pdf},
doi = {10.1002/prot.24974},
year = {2015},
date = {2015-12-17},
journal = {Proteins},
abstract = {We describe CASP11 de novo blind structure predictions made using the Rosetta structure prediction methodology with both automatic and human assisted protocols. Model accuracy was generally improved using co-evolution derived residue-residue contact information as restraints during Rosetta conformational sampling and refinement, particularly when the number of sequences in the family was more than three times the length of the protein. The highlight was the human assisted prediction of T0806, a large and topologically complex target with no homologs of known structure, which had unprecedented accuracy - <3.0 Å root-mean-square deviation (RMSD) from the crystal structure over 223 residues. For this target, we increased the amount of conformational sampling over our fully automated method by employing an iterative hybridization protocol. Our results clearly demonstrate, in a blind prediction scenario, that co-evolution derived contacts can considerably increase the accuracy of template-free structure modeling. This article is protected by copyright. All rights reserved.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
S Ovchinnikov, L Kinch, H Park, Y Liao, J Pei, DE Kim, H Kamisetty, NV Grishin, D Baker
Large-scale determination of previously unsolved protein structures using evolutionary information Journal Article
In: eLife, 2015.
@article{S2015b,
title = {Large-scale determination of previously unsolved protein structures using evolutionary information},
author = {S Ovchinnikov, L Kinch, H Park, Y Liao, J Pei, DE Kim, H Kamisetty, NV Grishin, D Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/01/Ovchinnikov_eLife_2015.pdf},
doi = {10.7554/eLife.09248},
year = {2015},
date = {2015-09-03},
journal = {eLife},
abstract = {The prediction of the structures of proteins without detectable sequence similarity to any protein of known structure remains an outstanding scientific challenge. Here we report significant progress in this area. We first describe de novo blind structure predictions of unprecendented accuracy we made for two proteins in large families in the recent CASP11 blind test of protein structure prediction methods by incorporating residue-residue co-evolution information in the Rosetta structure prediction program. We then describe the use of this method to generate structure models for 58 of the 121 large protein families in prokaryotes for which three-dimensional structures are not available. These models, which are posted online for public access, provide structural information for the over 400,000 proteins belonging to the 58 families and suggest hypotheses about mechanism for the subset for which the function is known, and hypotheses about function for the remainder. },
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2014
George A Khoury, Adam Liwo, Firas Khatib, Hongyi Zhou, Gaurav Chopra, Jaume Bacardit, Leandro O Bortot, Rodrigo A Faccioli, Xin Deng, Yi He, Pawel Krupa, Jilong Li, Magdalena A Mozolewska, Adam K Sieradzan, James Smadbeck, Tomasz Wirecki, Seth Cooper, Jeff Flatten, Kefan Xu, David Baker, Jianlin Cheng, Alexandre C B Delbem, Christodoulos A Floudas, Chen Keasar, Michael Levitt, Zoran Popovi’c, Harold A Scheraga, Jeffrey Skolnick, Silvia N Crivelli
WeFold: a coopetition for protein structure prediction. Journal Article
In: Proteins, vol. 82, pp. 1850-68, 2014, ISSN: 1097-0134.
@article{625,
title = {WeFold: a coopetition for protein structure prediction.},
author = { George A Khoury and Adam Liwo and Firas Khatib and Hongyi Zhou and Gaurav Chopra and Jaume Bacardit and Leandro O Bortot and Rodrigo A Faccioli and Xin Deng and Yi He and Pawel Krupa and Jilong Li and Magdalena A Mozolewska and Adam K Sieradzan and James Smadbeck and Tomasz Wirecki and Seth Cooper and Jeff Flatten and Kefan Xu and David Baker and Jianlin Cheng and Alexandre C B Delbem and Christodoulos A Floudas and Chen Keasar and Michael Levitt and Zoran Popovi'c and Harold A Scheraga and Jeffrey Skolnick and Silvia N Crivelli},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Khoury_Proteins_2014.pdf},
doi = {10.1002/prot.24538},
issn = {1097-0134},
year = {2014},
date = {2014-09-01},
journal = {Proteins},
volume = {82},
pages = {1850-68},
abstract = {The protein structure prediction problem continues to elude scientists. Despite the introduction of many methods, only modest gains were made over the last decade for certain classes of prediction targets. To address this challenge, a social-media based worldwide collaborative effort, named WeFold, was undertaken by 13 labs. During the collaboration, the laboratories were simultaneously competing with each other. Here, we present the first attempt at "coopetition" in scientific research applied to the protein structure prediction and refinement problems. The coopetition was possible by allowing the participating labs to contribute different components of their protein structure prediction pipelines and create new hybrid pipelines that they tested during CASP10. This manuscript describes both successes and areas needing improvement as identified throughout the first WeFold experiment and discusses the efforts that are underway to advance this initiative. A footprint of all contributions and structures are publicly accessible at http://www.wefold.org.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kuang-Yui M Chen, Jiaming Sun, Jason S Salvo, David Baker, Patrick Barth
High-resolution modeling of transmembrane helical protein structures from distant homologues. Journal Article
In: PLoS computational biology, vol. 10, pp. e1003636, 2014, ISSN: 1553-7358.
@article{622,
title = {High-resolution modeling of transmembrane helical protein structures from distant homologues.},
author = { Kuang-Yui M Chen and Jiaming Sun and Jason S Salvo and David Baker and Patrick Barth},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Chen_PLOS_2014.pdf},
doi = {10.1371/journal.pcbi.1003636},
issn = {1553-7358},
year = {2014},
date = {2014-05-01},
journal = {PLoS computational biology},
volume = {10},
pages = {e1003636},
abstract = {Eukaryotic transmembrane helical (TMH) proteins perform a wide diversity of critical cellular functions, but remain structurally largely uncharacterized and their high-resolution structure prediction is currently hindered by the lack of close structural homologues. To address this problem, we present a novel and generic method for accurately modeling large TMH protein structures from distant homologues exhibiting distinct loop and TMH conformations. Models of the adenosine A2AR and chemokine CXCR4 receptors were first ranked in GPCR-DOCK blind prediction contests in the receptor structure accuracy category. In a benchmark of 50 TMH protein homolog pairs of diverse topology (from 5 to 12 TMHs), size (from 183 to 420 residues) and sequence identity (from 15% to 70%), the method improves most starting templates, and achieves near-atomic accuracy prediction of membrane-embedded regions. Unlike starting templates, the models are of suitable quality for computer-based protein engineering: redesigned models and redesigned X-ray structures exhibit very similar native interactions. The method should prove useful for the atom-level modeling and design of a large fraction of structurally uncharacterized TMH proteins from a wide range of structural homologues.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sergey Ovchinnikov, Hetunandan Kamisetty, David Baker
Robust and accurate prediction of residue-residue interactions across protein interfaces using evolutionary information. Journal Article
In: eLife, vol. 3, pp. e02030, 2014, ISSN: 2050-084X.
@article{540,
title = {Robust and accurate prediction of residue-residue interactions across protein interfaces using evolutionary information.},
author = { Sergey Ovchinnikov and Hetunandan Kamisetty and David Baker},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Ovchinnikov_2014A.pdf},
doi = {10.7554/eLife.02030},
issn = {2050-084X},
year = {2014},
date = {2014-05-01},
journal = {eLife},
volume = {3},
pages = {e02030},
abstract = {Do the amino acid sequence identities of residues that make contact across protein interfaces covary during evolution? If so, such covariance could be used to predict contacts across interfaces and assemble models of biological complexes. We find that residue pairs identified using a pseudo-likelihood-based method to covary across protein-protein interfaces in the 50S ribosomal unit and 28 additional bacterial protein complexes with known structure are almost always in contact in the complex, provided that the number of aligned sequences is greater than the average length of the two proteins. We use this method to make subunit contact predictions for an additional 36 protein complexes with unknown structures, and present models based on these predictions for the tripartite ATP-independent periplasmic (TRAP) transporter, the tripartite efflux system, the pyruvate formate lyase-activating enzyme complex, and the methionine ABC transporter.DOI: http://dx.doi.org/10.7554/eLife.02030.001.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
David Baker
Protein folding, structure prediction and design. Journal Article
In: Biochemical Society transactions, vol. 42, pp. 225-9, 2014, ISSN: 1470-8752.
@article{529,
title = {Protein folding, structure prediction and design.},
author = { David Baker},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Baker_BiochemSocTrans_2014.pdf},
doi = {10.1042/BST20130055},
issn = {1470-8752},
year = {2014},
date = {2014-04-01},
journal = {Biochemical Society transactions},
volume = {42},
pages = {225-9},
abstract = {I describe how experimental studies of protein folding have led to advances in protein structure prediction and protein design. I describe the finding that protein sequences are not optimized for rapid folding, the contact order-protein folding rate correlation, the incorporation of experimental insights into protein folding into the Rosetta protein structure production methodology and the use of this methodology to determine structures from sparse experimental data. I then describe the inverse problem (protein design) and give an overview of recent work on designing proteins with new structures and functions. I also describe the contributions of the general public to these efforts through the Rosetta@home distributed computing project and the FoldIt interactive protein folding and design game.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2013
Rocco Moretti, Sarel J Fleishman, Rudi Agius, Mieczyslaw Torchala, Paul A Bates, Panagiotis L Kastritis, Jo~ao P G L M Rodrigues, Mika"el Trellet, Alexandre M J J Bonvin, Meng Cui, Marianne Rooman, Dimitri Gillis, Yves Dehouck, Iain Moal, Miguel Romero-Durana, Laura Perez-Cano, Chiara Pallara, Brian Jimenez, Juan Fernandez-Recio, Samuel Flores, Michael Pacella, Krishna Praneeth Kilambi, Jeffrey J Gray, Petr Popov, Sergei Grudinin, Juan Esquivel-Rodr’iguez, Daisuke Kihara, Nan Zhao, Dmitry Korkin, Xiaolei Zhu, Omar N A Demerdash, Julie C Mitchell, Eiji Kanamori, Yuko Tsuchiya, Haruki Nakamura, Hasup Lee, Hahnbeom Park, Chaok Seok, Jamica Sarmiento, Shide Liang, Shusuke Teraguchi, Daron M Standley, Hiromitsu Shimoyama, Genki Terashi, Mayuko Takeda-Shitaka, Mitsuo Iwadate, Hideaki Umeyama, Dmitri Beglov, David R Hall, Dima Kozakov, Sandor Vajda, Brian G Pierce, Howook Hwang, Thom Vreven, Zhiping Weng, Yangyu Huang, Haotian Li, Xiufeng Yang, Xiaofeng Ji, Shiyong Liu, Yi Xiao, Martin Zacharias, Sanbo Qin, Huan-Xiang Zhou, Sheng-You Huang, Xiaoqin Zou, Sameer Velankar, Jo"el Janin, Shoshana J Wodak, David Baker
Community-wide evaluation of methods for predicting the effect of mutations on protein-protein interactions. Journal Article
In: Proteins, vol. 81, pp. 1980-7, 2013, ISSN: 1097-0134.
@article{505,
title = {Community-wide evaluation of methods for predicting the effect of mutations on protein-protein interactions.},
author = { Rocco Moretti and Sarel J Fleishman and Rudi Agius and Mieczyslaw Torchala and Paul A Bates and Panagiotis L Kastritis and Jo~ao P G L M Rodrigues and Mika"el Trellet and Alexandre M J J Bonvin and Meng Cui and Marianne Rooman and Dimitri Gillis and Yves Dehouck and Iain Moal and Miguel Romero-Durana and Laura Perez-Cano and Chiara Pallara and Brian Jimenez and Juan Fernandez-Recio and Samuel Flores and Michael Pacella and Krishna Praneeth Kilambi and Jeffrey J Gray and Petr Popov and Sergei Grudinin and Juan Esquivel-Rodr'iguez and Daisuke Kihara and Nan Zhao and Dmitry Korkin and Xiaolei Zhu and Omar N A Demerdash and Julie C Mitchell and Eiji Kanamori and Yuko Tsuchiya and Haruki Nakamura and Hasup Lee and Hahnbeom Park and Chaok Seok and Jamica Sarmiento and Shide Liang and Shusuke Teraguchi and Daron M Standley and Hiromitsu Shimoyama and Genki Terashi and Mayuko Takeda-Shitaka and Mitsuo Iwadate and Hideaki Umeyama and Dmitri Beglov and David R Hall and Dima Kozakov and Sandor Vajda and Brian G Pierce and Howook Hwang and Thom Vreven and Zhiping Weng and Yangyu Huang and Haotian Li and Xiufeng Yang and Xiaofeng Ji and Shiyong Liu and Yi Xiao and Martin Zacharias and Sanbo Qin and Huan-Xiang Zhou and Sheng-You Huang and Xiaoqin Zou and Sameer Velankar and Jo"el Janin and Shoshana J Wodak and David Baker},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Moretti_Proteins_2013.pdf},
doi = {10.1002/prot.24356},
issn = {1097-0134},
year = {2013},
date = {2013-11-01},
journal = {Proteins},
volume = {81},
pages = {1980-7},
abstract = {Community-wide blind prediction experiments such as CAPRI and CASP provide an objective measure of the current state of predictive methodology. Here we describe a community-wide assessment of methods to predict the effects of mutations on protein-protein interactions. Twenty-two groups predicted the effects of comprehensive saturation mutagenesis for two designed influenza hemagglutinin binders and the results were compared with experimental yeast display enrichment data obtained using deep sequencing. The most successful methods explicitly considered the effects of mutation on monomer stability in addition to binding affinity, carried out explicit side-chain sampling and backbone relaxation, evaluated packing, electrostatic, and solvation effects, and correctly identified around a third of the beneficial mutations. Much room for improvement remains for even the best techniques, and large-scale fitness landscapes should continue to provide an excellent test bed for continued evaluation of both existing and new prediction methodologies.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Robert Vernon, Yang Shen, David Baker, Oliver F Lange
Improved chemical shift based fragment selection for CS-Rosetta using Rosetta3 fragment picker. Journal Article
In: Journal of biomolecular NMR, vol. 57, pp. 117-27, 2013, ISSN: 1573-5001.
@article{508,
title = {Improved chemical shift based fragment selection for CS-Rosetta using Rosetta3 fragment picker.},
author = { Robert Vernon and Yang Shen and David Baker and Oliver F Lange},
doi = {10.1007/s10858-013-9772-4},
issn = {1573-5001},
year = {2013},
date = {2013-10-01},
journal = {Journal of biomolecular NMR},
volume = {57},
pages = {117-27},
abstract = {A new fragment picker has been developed for CS-Rosetta that combines beneficial features of the original fragment picker, MFR, used with CS-Rosetta, and the fragment picker, NNMake, that was used for purely sequence based fragment selection in the context of ROSETTA de-novo structure prediction. Additionally, the new fragment picker has reduced sensitivity to outliers and other difficult to match data points rendering the protocol more robust and less likely to introduce bias towards wrong conformations in cases where data is bad, missing or inconclusive. The fragment picker protocol gives significant improvements on 6 of 23 CS-Rosetta targets. An independent benchmark on 39 protein targets, whose NMR data sets were published only after protocol optimization had been finished, also show significantly improved performance for the new fragment picker (van der Schot et al. in J Biomol NMR, 2013).},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Hetunandan Kamisetty, Sergey Ovchinnikov, David Baker
Assessing the utility of coevolution-based residue-residue contact predictions in a sequence- and structure-rich era. Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 110, pp. 15674-9, 2013, ISSN: 1091-6490.
@article{498,
title = {Assessing the utility of coevolution-based residue-residue contact predictions in a sequence- and structure-rich era.},
author = { Hetunandan Kamisetty and Sergey Ovchinnikov and David Baker},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Kamisetty_PNAS_2013.pdf},
doi = {10.1073/pnas.1314045110},
issn = {1091-6490},
year = {2013},
date = {2013-09-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {110},
pages = {15674-9},
abstract = {Recently developed methods have shown considerable promise in predicting residue-residue contacts in protein 3D structures using evolutionary covariance information. However, these methods require large numbers of evolutionarily related sequences to robustly assess the extent of residue covariation, and the larger the protein family, the more likely that contact information is unnecessary because a reasonable model can be built based on the structure of a homolog. Here we describe a method that integrates sequence coevolution and structural context information using a pseudolikelihood approach, allowing more accurate contact predictions from fewer homologous sequences. We rigorously assess the utility of predicted contacts for protein structure prediction using large and representative sequence and structure databases from recent structure prediction experiments. We find that contact predictions are likely to be accurate when the number of aligned sequences (with sequence redundancy reduced to 90%) is greater than five times the length of the protein, and that accurate predictions are likely to be useful for structure modeling if the aligned sequences are more similar to the protein of interest than to the closest homolog of known structure. These conditions are currently met by 422 of the protein families collected in the Pfam database.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Gijs van der Schot, Zaiyong Zhang, Robert Vernon, Yang Shen, Wim F Vranken, David Baker, Alexandre M J J Bonvin, Oliver F Lange
Improving 3D structure prediction from chemical shift data. Journal Article
In: Journal of biomolecular NMR, vol. 57, pp. 27-35, 2013, ISSN: 1573-5001.
@article{507,
title = {Improving 3D structure prediction from chemical shift data.},
author = { Gijs van der Schot and Zaiyong Zhang and Robert Vernon and Yang Shen and Wim F Vranken and David Baker and Alexandre M J J Bonvin and Oliver F Lange},
doi = {10.1007/s10858-013-9762-6},
issn = {1573-5001},
year = {2013},
date = {2013-09-01},
journal = {Journal of biomolecular NMR},
volume = {57},
pages = {27-35},
abstract = {We report advances in the calculation of protein structures from chemical shift nuclear magnetic resonance data alone. Our previously developed method, CS-Rosetta, assembles structures from a library of short protein fragments picked from a large library of protein structures using chemical shifts and sequence information. Here we demonstrate that combination of a new and improved fragment picker and the iterative sampling algorithm RASREC yield significant improvements in convergence and accuracy. Moreover, we introduce improved criteria for assessing the accuracy of the models produced by the method. The method was tested on 39 proteins in the 50-100 residue size range and yields reliable structures in 70~% of the cases. All structures that passed the reliability filter were accurate (<2~r A RMSD from the reference).},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
David E Kim, Frank DiMaio, Ray Yu-Ruei Wang, Yifan Song, David Baker
One contact for every twelve residues allows robust and accurate topology-level protein structure modeling. Journal Article
In: Proteins, 2013, ISSN: 1097-0134.
@article{506,
title = {One contact for every twelve residues allows robust and accurate topology-level protein structure modeling.},
author = { David E Kim and Frank DiMaio and Ray Yu-Ruei Wang and Yifan Song and David Baker},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Kim_Proteins_2013.pdf},
doi = {10.1002/prot.24374},
issn = {1097-0134},
year = {2013},
date = {2013-07-01},
journal = {Proteins},
abstract = {A number of methods have been described for identifying pairs of contacting residues in protein three-dimensional structures, but it is unclear how many contacts are required for accurate structure modeling. The CASP10 assisted contact experiment provided a blind test of contact guided protein structure modeling. We describe the models generated for these contact guided prediction challenges using the Rosetta structure modeling methodology. For nearly all cases, the submitted models had the correct overall topology, and in some cases, they had near atomic-level accuracy; for example the model of the 384 residue homo-oligomeric tetramer (Tc680o) had only 2.9 r A root-mean-square deviation (RMSD) from the crystal structure. Our results suggest that experimental and bioinformatic methods for obtaining contact information may need to generate only one correct contact for every 12 residues in the protein to allow accurate topology level modeling. Proteins 2013;. textcopyright 2013 Wiley Periodicals, Inc.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Gert Kiss, Nihan Celebi-"Olc c"um, Rocco Moretti, David Baker, K N Houk
Computational enzyme design Journal Article
In: Angewandte Chemie (International ed. in English), vol. 52, pp. 5700-25, 2013, ISSN: 1521-3773.
@article{472,
title = {Computational enzyme design},
author = { Gert Kiss and Nihan Celebi-"Olc c"um and Rocco Moretti and David Baker and K N Houk},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Kiss_AngewChemIntEd_2013.pdf},
doi = {10.1002/anie.201204077},
issn = {1521-3773},
year = {2013},
date = {2013-05-01},
journal = {Angewandte Chemie (International ed. in English)},
volume = {52},
pages = {5700-25},
abstract = {Recent developments in computational chemistry and biology have come together in the "inside-out" approach to enzyme engineering. Proteins have been designed to catalyze reactions not previously accelerated in nature. Some of these proteins fold and act as catalysts, but the success rate is still low. The achievements and limitations of the current technology are highlighted and contrasted to other protein engineering techniques. On its own, computational "inside-out" design can lead to the production of catalytically active and selective proteins, but their kinetic performances fall short of natural enzymes. When combined with directed evolution, molecular dynamics simulations, and crowd-sourced structure-prediction approaches, however, computational designs can be significantly improved in terms of binding, turnover, and thermal stability.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
MA Molski, JL Goodman, FC Chou, D Baker, R Das, A Schepartz
Remodeling a beta-peptide bundle Journal Article
In: Chemical Science, vol. 4, pp. 319-324, 2013, ISSN: 2041-6520.
@article{605,
title = {Remodeling a beta-peptide bundle},
author = { MA Molski and JL Goodman and FC Chou and D Baker and R Das and A Schepartz},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/remodelingabeta_Baker2013.pdf},
doi = {10.1039/c2sc21117c},
issn = {2041-6520},
year = {2013},
date = {2013-00-01},
journal = {Chemical Science},
volume = {4},
pages = {319-324},
abstract = {Natural biopolymers fold with fidelity, burying diverse side chains into well-packed cores and protecting their backbones from solvent. Certain beta-peptide oligomers assemble into bundles of defined octameric stoichiometry that resemble natural proteins in many respects. These beta-peptide bundles are thermostable, fold cooperatively, exchange interior amide N-H protons slowly, exclude hydrophobic dyes, and can be characterized at high resolution using X-ray crystallography - just like many proteins found in nature. But unlike natural proteins, all octameric beta-peptide bundles contain a sequence-uniform hydrophobic core composed of 32 leucine side chains. Here we apply rational design principles, including the Rosetta computational design methodology, to introduce sequence diversity into the bundle core while retaining the characteristic beta-peptide bundle fold. Using circular dichroism spectroscopy and analytical ultracentrifugation, we confirmed the prediction that an octameric bundle still assembles upon a major remodelling of its core: the mutation of sixteen core beta-homo-leucine side chains into sixteen beta-homo-phenylalanine side chains. Nevertheless, the bundle containing a partially beta-homo-phenylalanine core poorly protects interior amide protons from exchange, suggesting molten-globule-like properties. We further improve stability by the incorporation of eight beta-homo-pentafluorophenyalanine side chains, giving an assembly with amide protection factors comparable to prior well-structured bundles. By demonstrating that their cores tolerate significant sequence variation, the beta-peptide bundles reported here represent a starting point for the "bottom-up" construction of beta-peptide assemblies possessing both structure and sophisticated function.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2012
Troy C Krzysiak, Jinwon Jung, James Thompson, David Baker, Angela M Gronenborn
APOBEC2 is a monomer in solution: implications for APOBEC3G models Journal Article
In: Biochemistry, vol. 51, pp. 2008-17, 2012, ISSN: 1520-4995.
@article{604,
title = {APOBEC2 is a monomer in solution: implications for APOBEC3G models},
author = { Troy C Krzysiak and Jinwon Jung and James Thompson and David Baker and Angela M Gronenborn},
url = {http://beta.baker/wp-content/uploads/2015/12/apobec2isamonomer_Baker2012.pdf},
doi = {10.1021/bi300021s},
issn = {1520-4995},
year = {2012},
date = {2012-03-01},
journal = {Biochemistry},
volume = {51},
pages = {2008-17},
abstract = {Although the physiological role of APOBEC2 is still largely unknown, a crystal structure of a truncated variant of this protein was determined several years ago [Prochnow, C. (2007) Nature445, 447-451]. This APOBEC2 structure had considerable impact in the HIV field because it was considered a good model for the structure of APOBEC3G, an important HIV restriction factor that abrogates HIV infectivity in the absence of the viral accessory protein Vif. The quaternary structure and the arrangement of the monomers of APOBEC2 in the crystal were taken as being representative for APOBEC3G and exploited in explaining its enzymatic and anti-HIV activity. Here we show, unambiguously, that in contrast to the findings for the crystal, APOBEC2 is monomeric in solution. The nuclear magnetic resonance solution structure of full-length APOBEC2 reveals that the N-terminal tail that was removed for crystallization resides close to strand β2, the dimer interface in the crystal structure, and shields this region of the protein from engaging in intermolecular contacts. In addition, the presence of the N-terminal region drastically alters the aggregation propensity of APOBEC2, rendering the full-length protein highly soluble and not prone to precipitation. In summary, our results cast doubt on all previous structure-function predictions for APOBEC3G that were based on the crystal structure of APOBEC2.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Oliver F Lange, David Baker
Resolution-adapted recombination of structural features significantly improves sampling in restraint-guided structure calculation. Journal Article
In: Proteins, vol. 80, pp. 884-95, 2012, ISSN: 1097-0134.
@article{460,
title = {Resolution-adapted recombination of structural features significantly improves sampling in restraint-guided structure calculation.},
author = { Oliver F Lange and David Baker},
url = {http://beta.baker/wp-content/uploads/2015/12/Lange_Proteins_2012.pdf},
issn = {1097-0134},
year = {2012},
date = {2012-03-01},
journal = {Proteins},
volume = {80},
pages = {884-95},
abstract = {Recent work has shown that NMR structures can be determined by integrating sparse NMR data with structure prediction methods such as Rosetta. The experimental data serve to guide the search for the lowest energy state towards the deep minimum at the native state which is frequently missed in Rosetta de novo structure calculations. However, as the protein size increases, sampling again becomes limiting; for example, the standard Rosetta protocol involving Monte Carlo fragment insertion starting from an extended chain fails to converge for proteins over 150 amino acids even with guidance from chemical shifts (CS-Rosetta) and other NMR data. The primary limitation of this protocol--that every folding trajectory is completely independent of every other--was recently overcome with the development of a new approach involving resolution-adapted structural recombination (RASREC). Here we describe the RASREC approach in detail and compare it to standard CS-Rosetta. We show that the improved sampling of RASREC is essential in obtaining accurate structures over a benchmark set of 11 proteins in the 15-25 kDa size range using chemical shifts, backbone RDCs and HN-HN NOE data; in a number of cases the improved sampling methodology makes a larger contribution than incorporation of additional experimental data. Experimental data are invaluable for guiding sampling to the vicinity of the global energy minimum, but for larger proteins, the standard Rosetta fold-from-extended-chain protocol does not converge on the native minimum even with experimental data and the more powerful RASREC approach is necessary to converge to accurate solutions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Julia Handl, Joshua Knowles, Robert Vernon, David Baker, Simon C Lovell
The dual role of fragments in fragment-assembly methods for de novo protein structure prediction Journal Article
In: Proteins, vol. 80, pp. 490-504, 2012, ISSN: 1097-0134.
@article{601,
title = {The dual role of fragments in fragment-assembly methods for de novo protein structure prediction},
author = { Julia Handl and Joshua Knowles and Robert Vernon and David Baker and Simon C Lovell},
url = {https://www.bakerlab.org/wp-content/uploads/2018/06/Handl_et_al-2012-Proteins3A_Structure2C_Function2C_and_Bioinformatics.pdf
https://onlinelibrary.wiley.com/doi/full/10.1002/prot.23215},
doi = {10.1002/prot.23215},
issn = {1097-0134},
year = {2012},
date = {2012-02-01},
journal = {Proteins},
volume = {80},
pages = {490-504},
abstract = {In fragment-assembly techniques for protein structure prediction, models of protein structure are assembled from fragments of known protein structures. This process is typically guided by a knowledge-based energy function and uses a heuristic optimization method. The fragments play two important roles in this process: they define the set of structural parameters available, and they also assume the role of the main variation operators that are used by the optimiser. Previous analysis has typically focused on the first of these roles. In particular, the relationship between local amino acid sequence and local protein structure has been studied by a range of authors. The correlation between the two has been shown to vary with the window length considered, and the results of these analyses have informed directly the choice of fragment length in state-of-the-art prediction techniques. Here, we focus on the second role of fragments and aim to determine the effect of fragment length from an optimization perspective. We use theoretical analyses to reveal how the size and structure of the search space changes as a function of insertion length. Furthermore, empirical analyses are used to explore additional ways in which the size of the fragment insertion influences the search both in a simulation model and for the fragment-assembly technique, Rosetta.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2011
Sarel J Fleishman, Timothy A Whitehead, Eva-Maria Strauch, Jacob E Corn, Sanbo Qin, Huan-Xiang Zhou, Julie C Mitchell, Omar N A Demerdash, Mayuko Takeda-Shitaka, Genki Terashi, Iain H Moal, Xiaofan Li, Paul A Bates, Martin Zacharias, Hahnbeom Park, Jun-su Ko, Hasup Lee, Chaok Seok, Thomas Bourquard, Julie Bernauer, Anne Poupon, J’er^ome Az’e, Seren Soner, Sefik Kerem Ovali, Pemra Ozbek, Nir Ben Tal, T"urkan Haliloglu, Howook Hwang, Thom Vreven, Brian G Pierce, Zhiping Weng, Laura P’erez-Cano, Carles Pons, Juan Fern’andez-Recio, Fan Jiang, Feng Yang, Xinqi Gong, Libin Cao, Xianjin Xu, Bin Liu, Panwen Wang, Chunhua Li, Cunxin Wang, Charles H Robert, Mainak Guharoy, Shiyong Liu, Yangyu Huang, Lin Li, Dachuan Guo, Ying Chen, Yi Xiao, Nir London, Zohar Itzhaki, Ora Schueler-Furman, Yuval Inbar, Vladimir Potapov, Mati Cohen, Gideon Schreiber, Yuko Tsuchiya, Eiji Kanamori, Daron M Standley, Haruki Nakamura, Kengo Kinoshita, Camden M Driggers, Robert G Hall, Jessica L Morgan, Victor L Hsu, Jian Zhan, Yuedong Yang, Yaoqi Zhou, Panagiotis L Kastritis, Alexandre M J J Bonvin, Weiyi Zhang, Carlos J Camacho, Krishna P Kilambi, Aroop Sircar, Jeffrey J Gray, Masahito Ohue, Nobuyuki Uchikoga, Yuri Matsuzaki, Takashi Ishida, Yutaka Akiyama, Raed Khashan, Stephen Bush, Denis Fouches, Alexander Tropsha, Juan Esquivel-Rodr’iguez, Daisuke Kihara, P Benjamin Stranges, Ron Jacak, Brian Kuhlman, Sheng-You Huang, Xiaoqin Zou, Shoshana J Wodak, Joel Janin, David Baker
Community-wide assessment of protein-interface modeling suggests improvements to design methodology Journal Article
In: Journal of Molecular Biology, vol. 414, pp. 289-302, 2011, ISSN: 1089-8638.
@article{598,
title = {Community-wide assessment of protein-interface modeling suggests improvements to design methodology},
author = { Sarel J Fleishman and Timothy A Whitehead and Eva-Maria Strauch and Jacob E Corn and Sanbo Qin and Huan-Xiang Zhou and Julie C Mitchell and Omar N A Demerdash and Mayuko Takeda-Shitaka and Genki Terashi and Iain H Moal and Xiaofan Li and Paul A Bates and Martin Zacharias and Hahnbeom Park and Jun-su Ko and Hasup Lee and Chaok Seok and Thomas Bourquard and Julie Bernauer and Anne Poupon and J'er^ome Az'e and Seren Soner and Sefik Kerem Ovali and Pemra Ozbek and Nir Ben Tal and T"urkan Haliloglu and Howook Hwang and Thom Vreven and Brian G Pierce and Zhiping Weng and Laura P'erez-Cano and Carles Pons and Juan Fern'andez-Recio and Fan Jiang and Feng Yang and Xinqi Gong and Libin Cao and Xianjin Xu and Bin Liu and Panwen Wang and Chunhua Li and Cunxin Wang and Charles H Robert and Mainak Guharoy and Shiyong Liu and Yangyu Huang and Lin Li and Dachuan Guo and Ying Chen and Yi Xiao and Nir London and Zohar Itzhaki and Ora Schueler-Furman and Yuval Inbar and Vladimir Potapov and Mati Cohen and Gideon Schreiber and Yuko Tsuchiya and Eiji Kanamori and Daron M Standley and Haruki Nakamura and Kengo Kinoshita and Camden M Driggers and Robert G Hall and Jessica L Morgan and Victor L Hsu and Jian Zhan and Yuedong Yang and Yaoqi Zhou and Panagiotis L Kastritis and Alexandre M J J Bonvin and Weiyi Zhang and Carlos J Camacho and Krishna P Kilambi and Aroop Sircar and Jeffrey J Gray and Masahito Ohue and Nobuyuki Uchikoga and Yuri Matsuzaki and Takashi Ishida and Yutaka Akiyama and Raed Khashan and Stephen Bush and Denis Fouches and Alexander Tropsha and Juan Esquivel-Rodr'iguez and Daisuke Kihara and P Benjamin Stranges and Ron Jacak and Brian Kuhlman and Sheng-You Huang and Xiaoqin Zou and Shoshana J Wodak and Joel Janin and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2018/06/1-s2.0-S0022283611010552-main.pdf
https://www.sciencedirect.com/science/article/pii/S0022283611010552?via%3Dihub},
doi = {10.1016/j.jmb.2011.09.031},
issn = {1089-8638},
year = {2011},
date = {2011-11-01},
journal = {Journal of Molecular Biology},
volume = {414},
pages = {289-302},
abstract = {The CAPRI (Critical Assessment of Predicted Interactions) and CASP (Critical Assessment of protein Structure Prediction) experiments have demonstrated the power of community-wide tests of methodology in assessing the current state of the art and spurring progress in the very challenging areas of protein docking and structure prediction. We sought to bring the power of community-wide experiments to bear on a very challenging protein design problem that provides a complementary but equally fundamental test of current understanding of protein-binding thermodynamics. We have generated a number of designed protein-protein interfaces with very favorable computed binding energies but which do not appear to be formed in experiments, suggesting that there may be important physical chemistry missing in the energy calculations. A total of 28 research groups took up the challenge of determining what is missing: we provided structures of 87 designed complexes and 120 naturally occurring complexes and asked participants to identify energetic contributions and/or structural features that distinguish between the two sets. The community found that electrostatics and solvation terms partially distinguish the designs from the natural complexes, largely due to the nonpolar character of the designed interactions. Beyond this polarity difference, the community found that the designed binding surfaces were, on average, structurally less embedded in the designed monomers, suggesting that backbone conformational rigidity at the designed surface is important for realization of the designed function. These results can be used to improve computational design strategies, but there is still much to be learned; for example, one designed complex, which does form in experiments, was classified by all metrics as a nonbinder.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
James Thompson, David Baker
Incorporation of evolutionary information into Rosetta comparative modeling. Journal Article
In: Proteins, vol. 79, pp. 2380-8, 2011, ISSN: 1097-0134.
@article{421,
title = {Incorporation of evolutionary information into Rosetta comparative modeling.},
author = { James Thompson and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2018/06/7a8a6bd9c93cfb06e1f3c0416a914b7494ffd1d2e15654117ed9e259a487cf33.pdf
https://onlinelibrary.wiley.com/doi/abs/10.1002/prot.23046},
doi = {10.1002/prot.23046},
issn = {1097-0134},
year = {2011},
date = {2011-08-01},
journal = {Proteins},
volume = {79},
pages = {2380-8},
abstract = {Prediction of protein structures from sequences is a fundamental problem in computational biology. Algorithms that attempt to predict a structure from sequence primarily use two sources of information. The first source is physical in nature: proteins fold into their lowest energy state. Given an energy function that describes the interactions governing folding, a method for constructing models of protein structures, and the amino acid sequence of a protein of interest, the structure prediction problem becomes a search for the lowest energy structure. Evolution provides an orthogonal source of information: proteins of similar sequences have similar structure, and therefore proteins of known structure can guide modeling. The relatively successful Rosetta approach takes advantage of the first, but not the second source of information during model optimization. Following the classic work by Andrej Sali and colleagues, we develop a probabilistic approach to derive spatial restraints from proteins of known structure using advances in alignment technology and the growth in the number of structures in the Protein Data Bank. These restraints define a region of conformational space that is high-probability, given the template information, and we incorporate them into Rosettatextquoterights comparative modeling protocol. The combined approach performs considerably better on a benchmark based on previous CASP experiments. Incorporating evolutionary information into Rosetta is analogous to incorporating sparse experimental data: in both cases, the additional information eliminates large regions of conformational space and increases the probability that energy-based refinement will hone in on the deep energy minimum at the native state.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Elizabeth H Kellogg, Andrew Leaver-Fay, David Baker
Role of conformational sampling in computing mutation-induced changes in protein structure and stability Journal Article
In: Proteins, vol. 79, pp. 830-8, 2011, ISSN: 1097-0134.
@article{354,
title = {Role of conformational sampling in computing mutation-induced changes in protein structure and stability},
author = { Elizabeth H Kellogg and Andrew Leaver-Fay and David Baker},
doi = {10.1002/prot.22921},
issn = {1097-0134},
year = {2011},
date = {2011-03-01},
journal = {Proteins},
volume = {79},
pages = {830-8},
abstract = {The prediction of changes in protein stability and structure resulting from single amino acid substitutions is both a fundamental test of macromolecular modeling methodology and an important current problem as high throughput sequencing reveals sequence polymorphisms at an increasing rate. In principle, given the structure of a wild-type protein and a point mutation whose effects are to be predicted, an accurate method should recapitulate both the structural changes and the change in the folding-free energy. Here, we explore the performance of protocols which sample an increasing diversity of conformations. We find that surprisingly similar performances in predicting changes in stability are achieved using protocols that involve very different amounts of conformational sampling, provided that the resolution of the force field is matched to the resolution of the sampling method. Methods involving backbone sampling can in some cases closely recapitulate the structural changes accompanying mutations but not surprisingly tend to do more harm than good in cases where structural changes are negligible. Analysis of the outliers in the stability change calculations suggests areas needing particular improvement; these include the balance between desolvation and the formation of favorable buried polar interactions, and unfolded state modeling.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Frank DiMaio, Andrew Leaver-Fay, Phil Bradley, David Baker, Ingemar Andr’e
Modeling symmetric macromolecular structures in Rosetta3 Journal Article
In: PloS One, vol. 6, pp. e20450, 2011, ISSN: 1932-6203.
@article{590,
title = {Modeling symmetric macromolecular structures in Rosetta3},
author = { Frank DiMaio and Andrew Leaver-Fay and Phil Bradley and David Baker and Ingemar Andr'e},
doi = {10.1371/journal.pone.0020450},
issn = {1932-6203},
year = {2011},
date = {2011-00-01},
journal = {PloS One},
volume = {6},
pages = {e20450},
abstract = {Symmetric protein assemblies play important roles in many biochemical processes. However, the large size of such systems is challenging for traditional structure modeling methods. This paper describes the implementation of a general framework for modeling arbitrary symmetric systems in Rosetta3. We describe the various types of symmetries relevant to the study of protein structure that may be modeled using Rosettatextquoterights symmetric framework. We then describe how this symmetric framework is efficiently implemented within Rosetta, which restricts the conformational search space by sampling only symmetric degrees of freedom, and explicitly simulates only a subset of the interacting monomers. Finally, we describe structure prediction and design applications that utilize the Rosetta3 symmetric modeling capabilities, and provide a guide to running simulations on symmetric systems.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2010
Sarel J Fleishman, Jacob E Corn, Eva M Strauch, Tim A Whitehead, Ingemar Andre, James Thompson, James J Havranek, Rhiju Das, Philip Bradley, David Baker
Rosetta in CAPRI rounds 13-19. Journal Article
In: Proteins, vol. 78, pp. 3212-8, 2010, ISSN: 1097-0134.
@article{578,
title = {Rosetta in CAPRI rounds 13-19.},
author = { Sarel J Fleishman and Jacob E Corn and Eva M Strauch and Tim A Whitehead and Ingemar Andre and James Thompson and James J Havranek and Rhiju Das and Philip Bradley and David Baker},
doi = {10.1002/prot.22784},
issn = {1097-0134},
year = {2010},
date = {2010-11-01},
journal = {Proteins},
volume = {78},
pages = {3212-8},
abstract = {Modeling the conformational changes that occur on binding of macromolecules is an unsolved challenge. In previous rounds of the Critical Assessment of PRediction of Interactions (CAPRI), it was demonstrated that the Rosetta approach to macromolecular modeling could capture side chain conformational changes on binding with high accuracy. In rounds 13-19 we tested the ability of various backbone remodeling strategies to capture the main-chain conformational changes observed during binding events. These approaches span a wide range of backbone motions, from limited refinement of loops to relieve clashes in homologous docking, through extensive remodeling of loop segments, to large-scale remodeling of RNA. Although the results are encouraging, major improvements in sampling and energy evaluation are clearly required for consistent high accuracy modeling. Analysis of our failures in the CAPRI challenges suggest that conformational sampling at the termini of exposed beta strands is a particularly pressing area for improvement.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Michael D Tyka, Daniel A Keedy, Ingemar Andr’e, Frank DiMaio, Yifan Song, David C Richardson, Jane S Richardson, David Baker
Alternate States of Proteins Revealed by Detailed Energy Landscape Mapping Journal Article
In: Journal of molecular biology, 2010, ISSN: 1089-8638.
@article{260,
title = {Alternate States of Proteins Revealed by Detailed Energy Landscape Mapping},
author = { Michael D Tyka and Daniel A Keedy and Ingemar Andr'e and Frank DiMaio and Yifan Song and David C Richardson and Jane S Richardson and David Baker},
issn = {1089-8638},
year = {2010},
date = {2010-11-01},
journal = {Journal of molecular biology},
abstract = {What conformations do protein molecules populate in solution? Crystallography provides a high-resolution description of protein structure in the crystal environment, while NMR describes structure in solution but using less data. NMR structures display more variability, but is this because crystal contacts are absent or because of fewer data constraints? Here we report unexpected insight into this issue obtained through analysis of detailed protein energy landscapes generated by large-scale, native-enhanced sampling of conformational space with Rosetta@home for 111 protein domains. In the absence of tightly associating binding partners or ligands, the lowest-energy Rosetta models were nearly all <2.5~r A C(α)RMSD from the experimental structure; this result demonstrates that structure prediction accuracy for globular proteins is limited mainly by the ability to sample close to the native structure. While the lowest-energy models are similar to deposited structures, they are not identical; the largest deviations are most often in regions involved in ligand, quaternary, or crystal contacts. For ligand binding proteins, the low energy models may resemble the apo structures, and for oligomeric proteins, the monomeric assembly intermediates. The deviations between the low energy models and crystal structures largely disappear when landscapes are computed in the context of the crystal lattice or multimer. The computed low-energy ensembles, with tight crystal-structure-like packing in the core, but more NMR-structure-like variability in loops, may in some cases resemble the native state ensembles of proteins better than individual crystal or NMR structures, and can suggest experimentally testable hypotheses relating alternative states and structural heterogeneity to function.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Seth Cooper, Firas Khatib, Adrien Treuille, Janos Barbero, Jeehyung Lee, Michael Beenen, Andrew Leaver-Fay, David Baker, Zoran Popovi’c, Foldit Players
Predicting protein structures with a multiplayer online game Journal Article
In: Nature, vol. 466, pp. 756-60, 2010, ISSN: 1476-4687.
@article{16,
title = {Predicting protein structures with a multiplayer online game},
author = { Seth Cooper and Firas Khatib and Adrien Treuille and Janos Barbero and Jeehyung Lee and Michael Beenen and Andrew Leaver-Fay and David Baker and Zoran Popovi'c and Foldit Players},
issn = {1476-4687},
year = {2010},
date = {2010-08-01},
journal = {Nature},
volume = {466},
pages = {756-60},
abstract = {People exert large amounts of problem-solving effort playing computer games. Simple image- and text-recognition tasks have been successfully textquoterightcrowd-sourcedtextquoteright through games, but it is not clear if more complex scientific problems can be solved with human-directed computing. Protein structure prediction is one such problem: locating the biologically relevant native conformation of a protein is a formidable computational challenge given the very large size of the search space. Here we describe Foldit, a multiplayer online game that engages non-scientists in solving hard prediction problems. Foldit players interact with protein structures using direct manipulation tools and user-friendly versions of algorithms from the Rosetta structure prediction methodology, while they compete and collaborate to optimize the computed energy. We show that top-ranked Foldit players excel at solving challenging structure refinement problems in which substantial backbone rearrangements are necessary to achieve the burial of hydrophobic residues. Players working collaboratively develop a rich assortment of new strategies and algorithms; unlike computational approaches, they explore not only the conformational space but also the space of possible search strategies. The integration of human visual problem-solving and strategy development capabilities with traditional computational algorithms through interactive multiplayer games is a powerful new approach to solving computationally-limited scientific problems.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ben Blum, Michael I Jordan, David Baker
Feature space resampling for protein conformational search Journal Article
In: Proteins, vol. 78, pp. 1583-93, 2010, ISSN: 1097-0134.
@article{271,
title = {Feature space resampling for protein conformational search},
author = { Ben Blum and Michael I Jordan and David Baker},
issn = {1097-0134},
year = {2010},
date = {2010-05-01},
journal = {Proteins},
volume = {78},
pages = {1583-93},
abstract = {De novo protein structure prediction requires location of the lowest energy state of the polypeptide chain among a vast set of possible conformations. Powerful approaches include conformational space annealing, in which search progressively focuses on the most promising regions of conformational space, and genetic algorithms, in which features of the best conformations thus far identified are recombined. We describe a new approach that combines the strengths of these two approaches. Protein conformations are projected onto a discrete feature space which includes backbone torsion angles, secondary structure, and beta pairings. For each of these there is one "native" value: the one found in the native structure. We begin with a large number of conformations generated in independent Monte Carlo structure prediction trajectories from Rosetta. Native values for each feature are predicted from the frequencies of feature value occurrences and the energy distribution in conformations containing them. A second round of structure prediction trajectories are then guided by the predicted native feature distributions. We show that native features can be predicted at much higher than background rates, and that using the predicted feature distributions improves structure prediction in a benchmark of 28 proteins. The advantages of our approach are that features from many different input structures can be combined simultaneously without producing atomic clashes or otherwise physically inviable models, and that the features being recombined have a relatively high chance of being correct.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Chu Wang, Robert Vernon, Oliver Lange, Michael Tyka, David Baker
Prediction of structures of zinc-binding proteins through explicit modeling of metal coordination geometry Journal Article
In: Protein science, vol. 19, pp. 494-506, 2010, ISSN: 1469-896X.
@article{257,
title = {Prediction of structures of zinc-binding proteins through explicit modeling of metal coordination geometry},
author = { Chu Wang and Robert Vernon and Oliver Lange and Michael Tyka and David Baker},
issn = {1469-896X},
year = {2010},
date = {2010-03-01},
journal = {Protein science},
volume = {19},
pages = {494-506},
abstract = {Metal ions play an essential role in stabilizing protein structures and contributing to protein function. Ions such as zinc have well-defined coordination geometries, but it has not been easy to take advantage of this knowledge in protein structure prediction efforts. Here, we present a computational method to predict structures of zinc-binding proteins given knowledge of the positions of zinc-coordinating residues in the amino acid sequence. The method takes advantage of the "atom-tree" representation of molecular systems and modular architecture of the Rosetta3 software suite to incorporate explicit metal ion coordination geometry into previously developed de novo prediction and loop modeling protocols. Zinc cofactors are tethered to their interacting residues based on coordination geometries observed in natural zinc-binding proteins. The incorporation of explicit zinc atoms and their coordination geometry in both de novo structure prediction and loop modeling significantly improves sampling near the native conformation. The method can be readily extended to predict protein structures bound to other metal and/or small chemical cofactors with well-defined coordination or ligation geometry.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Yang Shen, Philip N Bryan, Yanan He, John Orban, David Baker, Ad Bax
De novo structure generation using chemical shifts for proteins with high-sequence identity but different folds Journal Article
In: Protein Science : A Publication of the Protein Society, vol. 19, pp. 349-56, 2010, ISSN: 1469-896X.
@article{584,
title = {De novo structure generation using chemical shifts for proteins with high-sequence identity but different folds},
author = { Yang Shen and Philip N Bryan and Yanan He and John Orban and David Baker and Ad Bax},
doi = {10.1002/pro.303},
issn = {1469-896X},
year = {2010},
date = {2010-02-01},
journal = {Protein Science : A Publication of the Protein Society},
volume = {19},
pages = {349-56},
abstract = {Proteins with high-sequence identity but very different folds present a special challenge to sequence-based protein structure prediction methods. In particular, a 56-residue three-helical bundle protein (GA(95)) and an alpha/beta-fold protein (GB(95)), which share 95% sequence identity, were targets in the CASP-8 structure prediction contest. With only 12 out of 300 submitted server-CASP8 models for GA(95) exhibiting the correct fold, this protein proved particularly challenging despite its small size. Here, we demonstrate that the information contained in NMR chemical shifts can readily be exploited by the CS-Rosetta structure prediction program and yields adequate convergence, even when input chemical shifts are limited to just amide (1)H(N) and (15)N or (1)H(N) and (1)H(alpha) values.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Srivatsan Raman, Yuanpeng J Huang, Binchen Mao, Paolo Rossi, James M Aramini, Gaohua Liu, Gaetano T Montelione, David Baker
Accurate automated protein NMR structure determination using unassigned NOESY data Journal Article
In: Journal of the American Chemical Society, vol. 132, pp. 202-7, 2010, ISSN: 1520-5126.
@article{258,
title = {Accurate automated protein NMR structure determination using unassigned NOESY data},
author = { Srivatsan Raman and Yuanpeng J Huang and Binchen Mao and Paolo Rossi and James M Aramini and Gaohua Liu and Gaetano T Montelione and David Baker},
issn = {1520-5126},
year = {2010},
date = {2010-01-01},
journal = {Journal of the American Chemical Society},
volume = {132},
pages = {202-7},
abstract = {Conventional NMR structure determination requires nearly complete assignment of the cross peaks of a refined NOESY peak list. Depending on the size of the protein and quality of the spectral data, this can be a time-consuming manual process requiring several rounds of peak list refinement and structure determination. Programs such as Aria, CYANA, and AutoStructure can generate models using unassigned NOESY data but are very sensitive to the quality of the input peak lists and can converge to inaccurate structures if the signal-to-noise of the peak lists is low. Here, we show that models with high accuracy and reliability can be produced by combining the strengths of the high-resolution structure prediction program Rosetta with global measures of the agreement between structure models and experimental data. A first round of models generated using CS-Rosetta (Rosetta supplemented with backbone chemical shift information) are filtered on the basis of their goodness-of-fit with unassigned NOESY peak lists using the DP-score, and the best fitting models are subjected to high resolution refinement with the Rosetta rebuild-and-refine protocol. This hybrid approach uses both local backbone chemical shift and the unassigned NOESY data to direct Rosetta trajectories toward the native structure and produces more accurate models than AutoStructure/CYANA or CS-Rosetta alone, particularly when using raw unedited NOESY peak lists. We also show that when accurate manually refined NOESY peak lists are available, Rosetta refinement can consistently increase the accuracy of models generated using CYANA and AutoStructure.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2009
Rhiju Das, Ingemar Andr’e, Yang Shen, Yibing Wu, Alexander Lemak, Sonal Bansal, Cheryl H Arrowsmith, Thomas Szyperski, David Baker
Simultaneous prediction of protein folding and docking at high resolution Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 106, pp. 18978-83, 2009, ISSN: 1091-6490.
@article{124,
title = {Simultaneous prediction of protein folding and docking at high resolution},
author = { Rhiju Das and Ingemar Andr'e and Yang Shen and Yibing Wu and Alexander Lemak and Sonal Bansal and Cheryl H Arrowsmith and Thomas Szyperski and David Baker},
issn = {1091-6490},
year = {2009},
date = {2009-11-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {106},
pages = {18978-83},
abstract = {Interleaved dimers and higher order symmetric oligomers are ubiquitous in biology but present a challenge to de novo structure prediction methodology: The structure adopted by a monomer can be stabilized largely by interactions with other monomers and hence not the lowest energy state of a single chain. Building on the Rosetta framework, we present a general method to simultaneously model the folding and docking of multiple-chain interleaved homo-oligomers. For more than a third of the cases in a benchmark set of interleaved homo-oligomers, the method generates near-native models of large alpha-helical bundles, interlocking beta sandwiches, and interleaved alpha/beta motifs with an accuracy high enough for molecular replacement based phasing. With the incorporation of NMR chemical shift information, accurate models can be obtained consistently for symmetric complexes with as many as 192 total amino acids; a blind prediction was within 1 A rmsd of the traditionally determined NMR structure, and fit independently collected RDC data equally well. Together, these results show that the Rosetta "fold-and-dock" protocol can produce models of homo-oligomeric complexes with near-atomic-level accuracy and should be useful for crystallographic phasing and the rapid determination of the structures of multimers with limited NMR information.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
David E Kim, Ben Blum, Philip Bradley, David Baker
Sampling bottlenecks in de novo protein structure prediction Journal Article
In: Journal of molecular biology, vol. 393, pp. 249-60, 2009, ISSN: 1089-8638.
@article{131,
title = {Sampling bottlenecks in de novo protein structure prediction},
author = { David E Kim and Ben Blum and Philip Bradley and David Baker},
issn = {1089-8638},
year = {2009},
date = {2009-10-01},
journal = {Journal of molecular biology},
volume = {393},
pages = {249-60},
abstract = {The primary obstacle to de novo protein structure prediction is conformational sampling: the native state generally has lower free energy than nonnative structures but is exceedingly difficult to locate. Structure predictions with atomic level accuracy have been made for small proteins using the Rosetta structure prediction method, but for larger and more complex proteins, the native state is virtually never sampled, and it has been unclear how much of an increase in computing power would be required to successfully predict the structures of such proteins. In this paper, we develop an approach to determining how much computer power is required to accurately predict the structure of a protein, based on a reformulation of the conformational search problem as a combinatorial sampling problem in a discrete feature space. We find that conformational sampling for many proteins is limited by critical "linchpin" features, often the backbone torsion angles of individual residues, which are sampled very rarely in unbiased trajectories and, when constrained, dramatically increase the sampling of the native state. These critical features frequently occur in less regular and likely strained regions of proteins that contribute to protein function. In a number of proteins, the linchpin features are in regions found experimentally to form late in folding, suggesting a correspondence between folding in silico and in reality.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ian W Davis, Kaushik Raha, Martha S Head, David Baker
Blind docking of pharmaceutically relevant compounds using RosettaLigand Journal Article
In: Protein science, vol. 18, pp. 1998-2002, 2009, ISSN: 1469-896X.
@article{126,
title = {Blind docking of pharmaceutically relevant compounds using RosettaLigand},
author = { Ian W Davis and Kaushik Raha and Martha S Head and David Baker},
issn = {1469-896X},
year = {2009},
date = {2009-09-01},
journal = {Protein science},
volume = {18},
pages = {1998-2002},
abstract = {It is difficult to properly validate algorithms that dock a small molecule ligand into its protein receptor using data from the public domain: the predictions are not blind because the correct binding mode is already known, and public test cases may not be representative of compounds of interest such as drug leads. Here, we use private data from a real drug discovery program to carry out a blind evaluation of the RosettaLigand docking methodology and find that its performance is on average comparable with that of the best commercially available current small molecule docking programs. The strength of RosettaLigand is the use of the Rosetta sampling methodology to simultaneously optimize protein sidechain, protein backbone and ligand degrees of freedom; the extensive benchmark test described here identifies shortcomings in other aspects of the protocol and suggests clear routes to improving the method.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Brian A Kidd, David Baker, Wendy E Thomas
Computation of conformational coupling in allosteric proteins Journal Article
In: PLoS computational biology, vol. 5, pp. e1000484, 2009, ISSN: 1553-7358.
@article{130,
title = {Computation of conformational coupling in allosteric proteins},
author = { Brian A Kidd and David Baker and Wendy E Thomas},
issn = {1553-7358},
year = {2009},
date = {2009-08-01},
journal = {PLoS computational biology},
volume = {5},
pages = {e1000484},
abstract = {In allosteric regulation, an effector molecule binding a protein at one site induces conformational changes, which alter structure and function at a distant active site. Two key challenges in the computational modeling of allostery are the prediction of the structure of one allosteric state starting from the structure of the other, and elucidating the mechanisms underlying the conformational coupling of the effector and active sites. Here we approach these two challenges using the Rosetta high-resolution structure prediction methodology. We find that the method can recapitulate the relaxation of effector-bound forms of single domain allosteric proteins into the corresponding ligand-free states, particularly when sampling is focused on regions known to change conformation most significantly. Analysis of the coupling between contacting pairs of residues in large ensembles of conformations spread throughout the landscape between and around the two allosteric states suggests that the transitions are built up from blocks of tightly coupled interacting sets of residues that are more loosely coupled to one another.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ruslan I Sadreyev, ShuoYong Shi, David Baker, Nick V Grishin
Structure similarity measure with penalty for close non-equivalent residues Journal Article
In: Bioinformatics, vol. 25, pp. 1259-63, 2009, ISSN: 1367-4811.
@article{135,
title = {Structure similarity measure with penalty for close non-equivalent residues},
author = { Ruslan I Sadreyev and ShuoYong Shi and David Baker and Nick V Grishin},
issn = {1367-4811},
year = {2009},
date = {2009-05-01},
journal = {Bioinformatics},
volume = {25},
pages = {1259-63},
abstract = {MOTIVATION: Recent improvement in homology-based structure modeling emphasizes the importance of sensitive evaluation measures that help identify and correct modest distortions in models compared with the target structures. Global Distance Test Total Score (GDT_TS), otherwise a very powerful and effective measure for model evaluation, is still insensitive to and can even reward such distortions, as observed for remote homology modeling in the latest CASP8 (Comparative Assessment of Structure Prediction). RESULTS: We develop a new measure that balances GDT_TS reward for the closeness of equivalent model and target residues (textquoterightattractiontextquoteright term) with the penalty for the closeness of non-equivalent residues (textquoterightrepulsiontextquoteright term). Compared with GDT_TS, the resulting score, TR (total score with repulsion), is much more sensitive to structure compression both in real remote homologs and in CASP models. TR is correlated yet different from other measures of structure similarity. The largest difference from GDT_TS is observed in models of mid-range quality based on remote homology modeling. AVAILABILITY: The script for TR calculation is included in Supplementary Material. TR scores for all server models in CASP8 are available at http://prodata.swmed.edu/CASP8.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
P Barth, B Wallner, David Baker
Prediction of membrane protein structures with complex topologies using limited constraints Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 106, pp. 1409-14, 2009, ISSN: 1091-6490.
@article{123,
title = {Prediction of membrane protein structures with complex topologies using limited constraints},
author = { P Barth and B Wallner and David Baker},
issn = {1091-6490},
year = {2009},
date = {2009-02-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {106},
pages = {1409-14},
abstract = {Reliable structure-prediction methods for membrane proteins are important because the experimental determination of high-resolution membrane protein structures remains very difficult, especially for eukaryotic proteins. However, membrane proteins are typically longer than 200 aa and represent a formidable challenge for structure prediction. We have developed a method for predicting the structures of large membrane proteins by constraining helix-helix packing arrangements at particular positions predicted from sequence or identified by experiments. We tested the method on 12 membrane proteins of diverse topologies and functions with lengths ranging between 190 and 300 residues. Enforcing a single constraint during the folding simulations enriched the population of near-native models for 9 proteins. In 4 of the cases in which the constraint was predicted from the sequence, 1 of the 5 lowest energy models was superimposable within 4 A on the native structure. Near-native structures could also be selected for heme-binding and pore-forming domains from simulations in which pairs of conserved histidine-chelating hemes and one experimentally determined salt bridge were constrained, respectively. These results suggest that models within 4 A of the native structure can be achieved for complex membrane proteins if even limited information on residue-residue interactions can be obtained from protein structure databases or experiments.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Will Sheffler, David Baker
RosettaHoles: rapid assessment of protein core packing for structure prediction, refinement, design, and validation Journal Article
In: Protein science, vol. 18, pp. 229-39, 2009, ISSN: 1469-896X.
@article{136,
title = {RosettaHoles: rapid assessment of protein core packing for structure prediction, refinement, design, and validation},
author = { Will Sheffler and David Baker},
url = {https://onlinelibrary.wiley.com/doi/full/10.1002/pro.8
https://www.bakerlab.org/wp-content/uploads/2020/08/pro.8.pdf},
doi = {10.1002/pro.8},
issn = {1469-896X},
year = {2009},
date = {2009-01-01},
journal = {Protein science},
volume = {18},
pages = {229-39},
abstract = {We present a novel method called RosettaHoles for visual and quantitative assessment of underpacking in the protein core. RosettaHoles generates a set of spherical cavity balls that fill the empty volume between atoms in the protein interior. For visualization, the cavity balls are aggregated into contiguous overlapping clusters and small cavities are discarded, leaving an uncluttered representation of the unfilled regions of space in a structure. For quantitative analysis, the cavity ball data are used to estimate the probability of observing a given cavity in a high-resolution crystal structure. RosettaHoles provides excellent discrimination between real and computationally generated structures, is predictive of incorrect regions in models, identifies problematic structures in the Protein Data Bank, and promises to be a useful validation tool for newly solved experimental structures.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Srivatsan Raman, Robert Vernon, James Thompson, Michael Tyka, Ruslan Sadreyev, Jimin Pei, David Kim, Elizabeth Kellogg, Frank DiMaio, Oliver Lange, Lisa Kinch, Will Sheffler, Bong-Hyun Kim, Rhiju Das, Nick V Grishin, David Baker
Structure prediction for CASP8 with all-atom refinement using Rosetta Journal Article
In: Proteins, vol. 77 Suppl 9, pp. 89-99, 2009, ISSN: 1097-0134.
@article{273,
title = {Structure prediction for CASP8 with all-atom refinement using Rosetta},
author = { Srivatsan Raman and Robert Vernon and James Thompson and Michael Tyka and Ruslan Sadreyev and Jimin Pei and David Kim and Elizabeth Kellogg and Frank DiMaio and Oliver Lange and Lisa Kinch and Will Sheffler and Bong-Hyun Kim and Rhiju Das and Nick V Grishin and David Baker},
issn = {1097-0134},
year = {2009},
date = {2009-00-01},
journal = {Proteins},
volume = {77 Suppl 9},
pages = {89-99},
abstract = {We describe predictions made using the Rosetta structure prediction methodology for the Eighth Critical Assessment of Techniques for Protein Structure Prediction. Aggressive sampling and all-atom refinement were carried out for nearly all targets. A combination of alignment methodologies was used to generate starting models from a range of templates, and the models were then subjected to Rosetta all atom refinement. For the 64 domains with readily identified templates, the best submitted model was better than the best alignment to the best template in the Protein Data Bank for 24 cases, and improved over the best starting model for 43 cases. For 13 targets where only very distant sequence relationships to proteins of known structure were detected, models were generated using the Rosetta de novo structure prediction methodology followed by all-atom refinement; in several cases the submitted models were better than those based on the available templates. Of the 12 refinement challenges, the best submitted model improved on the starting model in seven cases. These improvements over the starting template-based models and refinement tests demonstrate the power of Rosetta structure refinement in improving model accuracy.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2008
Anthony H Keeble, Lukasz A Joachimiak, Mar’ia Jesus Mat’e, Nicola Meenan, Nadine Kirkpatrick, David Baker, Colin Kleanthous
Experimental and computational analyses of the energetic basis for dual recognition of immunity proteins by colicin endonucleases Journal Article
In: Journal of molecular biology, vol. 379, pp. 745-59, 2008, ISSN: 1089-8638.
@article{221,
title = {Experimental and computational analyses of the energetic basis for dual recognition of immunity proteins by colicin endonucleases},
author = { Anthony H Keeble and Lukasz A Joachimiak and Mar'ia Jesus Mat'e and Nicola Meenan and Nadine Kirkpatrick and David Baker and Colin Kleanthous},
issn = {1089-8638},
year = {2008},
date = {2008-06-01},
journal = {Journal of molecular biology},
volume = {379},
pages = {745-59},
abstract = {Colicin endonucleases (DNases) are bound and inactivated by immunity (Im) proteins. Im proteins are broadly cross-reactive yet specific inhibitors binding cognate and non-cognate DNases with K(d) values that vary between 10(-4) and 10(-14) M, characteristics that are explained by a textquoterightdual-recognitiontextquoteright mechanism. In this work, we addressed for the first time the energetics of Im protein recognition by colicin DNases through a combination of E9 DNase alanine scanning and double-mutant cycles (DMCs) coupled with kinetic and calorimetric analyses of cognate Im9 and non-cognate Im2 binding, as well as computational analysis of alanine scanning and DMC data. We show that differential DeltaDeltaGs observed for four E9 DNase residues cumulatively distinguish cognate Im9 association from non-cognate Im2 association. E9 DNase Phe86 is the primary specificity hotspot residue in the centre of the interface, which is coordinated by conserved and variable hotspot residues of the cognate Im protein. Experimental DMC analysis reveals that only modest coupling energies to Im9 residues are observed, in agreement with calculated DMCs using the program ROSETTA and consistent with the largely hydrophobic nature of E9 DNase-Im9 specificity contacts. Computed values for the 12 E9 DNase alanine mutants showed reasonable agreement with experimental DeltaDeltaG data, particularly for interactions not mediated by interfacial water molecules. DeltaDeltaG predictions for residues that contact buried water molecules calculated using solvated rotamer models met with mixed success; however, we were able to predict with a high degree of accuracy the location and energetic contribution of one such contact. Our study highlights how colicin DNases are able to utilise both conserved and variable amino acids to distinguish cognate from non-cognate Im proteins, with the energetic contributions of the conserved residues modulated by neighbouring specificity sites.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jian Qiu, Will Sheffler, David Baker, William Stafford Noble
Ranking predicted protein structures with support vector regression Journal Article
In: Proteins, vol. 71, pp. 1175-82, 2008, ISSN: 1097-0134.
@article{220,
title = {Ranking predicted protein structures with support vector regression},
author = { Jian Qiu and Will Sheffler and David Baker and William Stafford Noble},
issn = {1097-0134},
year = {2008},
date = {2008-05-01},
journal = {Proteins},
volume = {71},
pages = {1175-82},
abstract = {Protein structure prediction is an important problem of both intellectual and practical interest. Most protein structure prediction approaches generate multiple candidate models first, and then use a scoring function to select the best model among these candidates. In this work, we develop a scoring function using support vector regression (SVR). Both consensus-based features and features from individual structures are extracted from a training data set containing native protein structures and predicted structural models submitted to CASP5 and CASP6. The SVR learns a scoring function that is a linear combination of these features. We test this scoring function on two data sets. First, when used to rank server models submitted to CASP7, the SVR score selects predictions that are comparable to the best performing server in CASP7, Zhang-Server, and significantly better than all the other servers. Even if the SVR score is not allowed to select Zhang-Server models, the SVR score still selects predictions that are significantly better than all the other servers. In addition, the SVR is able to select significantly better models and yield significantly better Pearson correlation coefficients than the two best Quality Assessment groups in CASP7, QA556 (LEE), and QA634 (Pcons). Second, this work aims to improve the ability of the Robetta server to select best models, and hence we evaluate the performance of the SVR score on ranking the Robetta server template-based models for the CASP7 targets. The SVR selects significantly better models than the Robetta K*Sync consensus alignment score.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Raman S, Qian B, Baker D, Walker RC
Advances in Rosetta Protein Structure Prediction on Massively Parallel Systems Journal Article
In: Journal of Research and Development, vol. 52(1-2):7-17, 2008.
@article{280,
title = {Advances in Rosetta Protein Structure Prediction on Massively Parallel Systems},
author = { Raman S and Qian B and Baker D and Walker RC},
year = {2008},
date = {2008-01-01},
journal = {Journal of Research and Development},
volume = {52(1-2):7-17},
abstract = {One of the key challenges in computational biology is prediction of three-dimensional protein structures from amino-acid sequences. For most proteins, the "native state" lies at the bottom of a free-energy landscape. Protein structure prediction involves varying the degrees of freedom of the protein in a constrained manner until it approaches its native state. In the Rosetta protein structure prediction protocols, a large number of independent folding trajectories are simulated, and several lowest-energy results are likely to be close to the native state. The availability of hundred-teraflop, and shortly, petaflop, computing resources is revolutionizing the approaches available for protein structure prediction. Here, we discuss issues involved in utilizing such machines efficiently with the Rosetta code, including an overview of recent results of the Critical Assessment of Techniques for Protein Structure Prediction 7 (CASP7) in which the computationally demanding structure-refinement process was run on 16 racks of the IBM Blue Gene/L (TM) system at the IBM T. J. Watson Research Center. We highlight recent advances in high-performance computing and discuss,future development paths that make use of the next-generation petascale (> 10(12) floating-point operations per second) machines.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Rhiju Das, David Baker
Macromolecular modeling with rosetta Journal Article
In: Annual review of biochemistry, vol. 77, pp. 363-82, 2008, ISSN: 0066-4154.
@article{227,
title = {Macromolecular modeling with rosetta},
author = { Rhiju Das and David Baker},
issn = {0066-4154},
year = {2008},
date = {2008-00-01},
journal = {Annual review of biochemistry},
volume = {77},
pages = {363-82},
abstract = {Advances over the past few years have begun to enable prediction and design of macromolecular structures at near-atomic accuracy. Progress has stemmed from the development of reasonably accurate and efficiently computed all-atom potential functions as well as effective conformational sampling strategies appropriate for searching a highly rugged energy landscape, both driven by feedback from structure prediction and design tests. A unified energetic and kinematic framework in the Rosetta program allows a wide range of molecular modeling problems, from fibril structure prediction to RNA folding to the design of new protein interfaces, to be readily investigated and highlights areas for improvement. The methodology enables the creation of novel molecules with useful functions and holds promise for accelerating experimental structural inference. Emerging connections to crystallographic phasing, NMR modeling, and lower-resolution approaches are described and critically assessed.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Erkang Fan, David Baker, Stanley Fields, Michael H Gelb, Frederick S Buckner, Wesley C Van Voorhis, Eric Phizicky, Mark Dumont, Christopher Mehlin, Elizabeth Grayhack, Mark Sullivan, Christophe Verlinde, George Detitta, Deirdre R Meldrum, Ethan A Merritt, Thomas Earnest, Michael Soltis, Frank Zucker, Peter J Myler, Lori Schoenfeld, David E Kim, Liz Worthey, Doug Lacount, Marissa Vignali, Jizhen Li, Somnath Mondal, Archna Massey, Brian Carroll, Stacey Gulde, Joseph Luft, Larry Desoto, Mark Holl, Jonathan Caruthers, J”urgen Bosch, Mark Robien, Tracy Arakaki, Margaret Holmes, Isolde Le Trong, Wim G J Hol
Structural genomics of pathogenic protozoa: an overview Journal Article
In: Methods in molecular biology, vol. 426, pp. 497-513, 2008, ISSN: 1064-3745.
@article{225,
title = {Structural genomics of pathogenic protozoa: an overview},
author = { Erkang Fan and David Baker and Stanley Fields and Michael H Gelb and Frederick S Buckner and Wesley C Van Voorhis and Eric Phizicky and Mark Dumont and Christopher Mehlin and Elizabeth Grayhack and Mark Sullivan and Christophe Verlinde and George Detitta and Deirdre R Meldrum and Ethan A Merritt and Thomas Earnest and Michael Soltis and Frank Zucker and Peter J Myler and Lori Schoenfeld and David E Kim and Liz Worthey and Doug Lacount and Marissa Vignali and Jizhen Li and Somnath Mondal and Archna Massey and Brian Carroll and Stacey Gulde and Joseph Luft and Larry Desoto and Mark Holl and Jonathan Caruthers and J"urgen Bosch and Mark Robien and Tracy Arakaki and Margaret Holmes and Isolde Le Trong and Wim G J Hol},
issn = {1064-3745},
year = {2008},
date = {2008-00-01},
journal = {Methods in molecular biology},
volume = {426},
pages = {497-513},
abstract = {The Structural Genomics of Pathogenic Protozoa (SGPP) Consortium aimed to determine crystal structures of proteins from trypanosomatid and malaria parasites in a high throughput manner. The pipeline of target selection, protein production, crystallization, and structure determination, is sketched. Special emphasis is given to a number of technology developments including domain prediction, the use of "co-crystallants," and capillary crystallization. "Fragment cocktail crystallography" for medical structural genomics is also described.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2007
Chu Wang, Ora Schueler-Furman, Ingemar Andre, Nir London, Sarel J Fleishman, Philip Bradley, Bin Qian, David Baker
RosettaDock in CAPRI rounds 6-12 Journal Article
In: Proteins, vol. 69, pp. 758-63, 2007, ISSN: 1097-0134.
@article{112,
title = {RosettaDock in CAPRI rounds 6-12},
author = { Chu Wang and Ora Schueler-Furman and Ingemar Andre and Nir London and Sarel J Fleishman and Philip Bradley and Bin Qian and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/wang07B.pdf},
issn = {1097-0134},
year = {2007},
date = {2007-12-01},
journal = {Proteins},
volume = {69},
pages = {758-63},
abstract = {A challenge in protein-protein docking is to account for the conformational changes in the monomers that occur upon binding. The RosettaDock method, which incorporates sidechain flexibility but keeps the backbone fixed, was found in previous CAPRI rounds (4 and 5) to generate docking models with atomic accuracy, provided that conformational changes were mainly restricted to protein sidechains. In the recent rounds of CAPRI (6-12), large backbone conformational changes occur upon binding for several target complexes. To address these challenges, we explicitly introduced backbone flexibility in our modeling procedures by combining rigid-body docking with protein structure prediction techniques such as modeling variable loops and building homology models. Encouragingly, using this approach we were able to correctly predict a significant backbone conformational change of an interface loop for Target 20 (12 A rmsd between those in the unbound monomer and complex structures), but accounting for backbone flexibility in protein-protein docking is still very challenging because of the significantly larger conformational space, which must be surveyed. Motivated by these CAPRI challenges, we have made progress in reformulating RosettaDock using a "fold-tree" representation, which provides a general framework for treating a wide variety of flexible-backbone docking problems.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Bin Qian, Srivatsan Raman, Rhiju Das, Philip Bradley, Airlie J McCoy, Randy J Read, David Baker
High-resolution structure prediction and the crystallographic phase problem Journal Article
In: Nature, vol. 450, pp. 259-64, 2007, ISSN: 1476-4687.
@article{115,
title = {High-resolution structure prediction and the crystallographic phase problem},
author = { Bin Qian and Srivatsan Raman and Rhiju Das and Philip Bradley and Airlie J McCoy and Randy J Read and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/qian07A.pdf},
issn = {1476-4687},
year = {2007},
date = {2007-11-01},
journal = {Nature},
volume = {450},
pages = {259-64},
abstract = {The energy-based refinement of low-resolution protein structure models to atomic-level accuracy is a major challenge for computational structural biology. Here we describe a new approach to refining protein structure models that focuses sampling in regions most likely to contain errors while allowing the whole structure to relax in a physically realistic all-atom force field. In applications to models produced using nuclear magnetic resonance data and to comparative models based on distant structural homologues, the method can significantly improve the accuracy of the structures in terms of both the backbone conformations and the placement of core side chains. Furthermore, the resulting models satisfy a particularly stringent test: they provide significantly better solutions to the X-ray crystallographic phase problem in molecular replacement trials. Finally, we show that all-atom refinement can produce de novo protein structure predictions that reach the high accuracy required for molecular replacement without any experimental phase information and in the absence of templates suitable for molecular replacement from the Protein Data Bank. These results suggest that the combination of high-resolution structure prediction with state-of-the-art phasing tools may be unexpectedly powerful in phasing crystallographic data for which molecular replacement is hindered by the absence of sufficiently accurate previous models.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ingemar Andr’e, Philip Bradley, Chu Wang, David Baker
Prediction of the structure of symmetrical protein assemblies Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 104, pp. 17656-61, 2007, ISSN: 0027-8424.
@article{121,
title = {Prediction of the structure of symmetrical protein assemblies},
author = { Ingemar Andr'e and Philip Bradley and Chu Wang and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/André07A.pdf},
issn = {0027-8424},
year = {2007},
date = {2007-11-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {104},
pages = {17656-61},
abstract = {Biological supramolecular systems are commonly built up by the self-assembly of identical protein subunits to produce symmetrical oligomers with cyclical, icosahedral, or helical symmetry that play roles in processes ranging from allosteric control and molecular transport to motor action. The large size of these systems often makes them difficult to structurally characterize using experimental techniques. We have developed a computational protocol to predict the structure of symmetrical protein assemblies based on the structure of a single subunit. The method carries out simultaneous optimization of backbone, side chain, and rigid-body degrees of freedom, while restricting the search space to symmetrical conformations. Using this protocol, we can reconstruct, starting from the structure of a single subunit, the structure of cyclic oligomers and the icosahedral virus capsid of satellite panicum virus using a rigid backbone approximation. We predict the oligomeric state of EscJ from the type III secretion system both in its proposed cyclical and crystallized helical form. Finally, we show that the method can recapitulate the structure of an amyloid-like fibril formed by the peptide NNQQNY from the yeast prion protein Sup35 starting from the amino acid sequence alone and searching the complete space of backbone, side chain, and rigid-body degrees of freedom.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
P Barth, J Schonbrun, David Baker
Toward high-resolution prediction and design of transmembrane helical protein structures Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 104, pp. 15682-7, 2007, ISSN: 0027-8424.
@article{120,
title = {Toward high-resolution prediction and design of transmembrane helical protein structures},
author = { P Barth and J Schonbrun and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/barth07A.pdf},
issn = {0027-8424},
year = {2007},
date = {2007-10-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {104},
pages = {15682-7},
abstract = {The prediction and design at the atomic level of membrane protein structures and interactions is a critical but unsolved challenge. To address this problem, we have developed an all-atom physical model that describes intraprotein and protein-solvent interactions in the membrane environment. We evaluated the ability of the model to recapitulate the energetics and structural specificities of polytopic membrane proteins by using a battery of in silico prediction and design tests. First, in side-chain packing and design tests, the model successfully predicts the side-chain conformations at 73% of nonexposed positions and the native amino acid identities at 34% of positions in naturally occurring membrane proteins. Second, the model predicts significant energy gaps between native and nonnative structures of transmembrane helical interfaces and polytopic membrane proteins. Third, distortions in transmembrane helices are successfully recapitulated in docking experiments by using fragments of ideal helices judiciously defined around helical kinks. Finally, de novo structure prediction reaches near-atomic accuracy (<2.5 A) for several small membrane protein domains (<150 residues). The success of the model highlights the critical role of van der Waals and hydrogen-bonding interactions in the stability and structural specificity of membrane protein structures and sets the stage for the high-resolution prediction and design of complex membrane protein architectures.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Rhiju Das, David Baker
Automated de novo prediction of native-like RNA tertiary structures Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 104, pp. 14664-9, 2007, ISSN: 0027-8424.
@article{117,
title = {Automated de novo prediction of native-like RNA tertiary structures},
author = { Rhiju Das and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/das07A.pdf},
issn = {0027-8424},
year = {2007},
date = {2007-09-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {104},
pages = {14664-9},
abstract = {RNA tertiary structure prediction has been based almost entirely on base-pairing constraints derived from phylogenetic covariation analysis. We describe here a complementary approach, inspired by the Rosetta low-resolution protein structure prediction method, that seeks the lowest energy tertiary structure for a given RNA sequence without using evolutionary information. In a benchmark test of 20 RNA sequences with known structure and lengths of approximately 30 nt, the new method reproduces better than 90% of Watson-Crick base pairs, comparable with the accuracy of secondary structure prediction methods. In more than half the cases, at least one of the top five models agrees with the native structure to better than 4 A rmsd over the backbone. Most importantly, the method recapitulates more than one-third of non-Watson-Crick base pairs seen in the native structures. Tandem stacks of "sheared" base pairs, base triplets, and pseudoknots are among the noncanonical features reproduced in the models. In the cases in which none of the top five models were native-like, higher energy conformations similar to the native structures are still sampled frequently but not assigned low energies. These results suggest that modest improvements in the energy function, together with the incorporation of information from phylogenetic covariance, may allow confident and accurate structure prediction for larger and more complex RNA chains.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Lars Malmstrom, Michael Riffle, Charlie E M Strauss, Dylan Chivian, Trisha N Davis, Richard Bonneau, David Baker
Superfamily assignments for the yeast proteome through integration of structure prediction with the gene ontology Journal Article
In: PLoS biology, vol. 5, pp. e76, 2007, ISSN: 1545-7885.
@article{116,
title = {Superfamily assignments for the yeast proteome through integration of structure prediction with the gene ontology},
author = { Lars Malmstrom and Michael Riffle and Charlie E M Strauss and Dylan Chivian and Trisha N Davis and Richard Bonneau and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/malmström07A.pdf},
issn = {1545-7885},
year = {2007},
date = {2007-04-01},
journal = {PLoS biology},
volume = {5},
pages = {e76},
abstract = {Saccharomyces cerevisiae is one of the best-studied model organisms, yet the three-dimensional structure and molecular function of many yeast proteins remain unknown. Yeast proteins were parsed into 14,934 domains, and those lacking sequence similarity to proteins of known structure were folded using the Rosetta de novo structure prediction method on the World Community Grid. This structural data was integrated with process, component, and function annotations from the Saccharomyces Genome Database to assign yeast protein domains to SCOP superfamilies using a simple Bayesian approach. We have predicted the structure of 3,338 putative domains and assigned SCOP superfamily annotations to 581 of them. We have also assigned structural annotations to 7,094 predicted domains based on fold recognition and homology modeling methods. The domain predictions and structural information are available in an online database at http://rd.plos.org/10.1371_journal.pbio.0050076_01.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Andrew M Wollacott, Alexandre Zanghellini, Paul Murphy, David Baker
Prediction of structures of multidomain proteins from structures of the individual domains Journal Article
In: Protein science, vol. 16, pp. 165-75, 2007, ISSN: 0961-8368.
@article{109,
title = {Prediction of structures of multidomain proteins from structures of the individual domains},
author = { Andrew M Wollacott and Alexandre Zanghellini and Paul Murphy and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/wollacott07A.pdf},
issn = {0961-8368},
year = {2007},
date = {2007-02-01},
journal = {Protein science},
volume = {16},
pages = {165-75},
abstract = {We describe the development of a method for assembling structures of multidomain proteins from structures of isolated domains. The method consists of an initial low-resolution search in which the conformational space of the domain linker is explored using the Rosetta de novo structure prediction method, followed by a high-resolution search in which all atoms are treated explicitly and backbone and side chain degrees of freedom are simultaneously optimized. The method recapitulates, often with very high accuracy, the structures of existing multidomain proteins.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Michael Tress, Jianlin Cheng, Pierre Baldi, Keehyoung Joo, Jinwoo Lee, Joo-Hyun Seo, Jooyoung Lee, David Baker, Dylan Chivian, David Kim, Iakes Ezkurdia
Assessment of predictions submitted for the CASP7 domain prediction category Journal Article
In: Proteins, vol. 69 Suppl 8, pp. 137-51, 2007, ISSN: 1097-0134.
@article{286,
title = {Assessment of predictions submitted for the CASP7 domain prediction category},
author = { Michael Tress and Jianlin Cheng and Pierre Baldi and Keehyoung Joo and Jinwoo Lee and Joo-Hyun Seo and Jooyoung Lee and David Baker and Dylan Chivian and David Kim and Iakes Ezkurdia},
issn = {1097-0134},
year = {2007},
date = {2007-00-01},
journal = {Proteins},
volume = {69 Suppl 8},
pages = {137-51},
abstract = {This paper details the assessment process and evaluation results for the Critical Assessment of Protein Structure Prediction (CASP7) domain prediction category. Domain predictions were assessed using the Normalized Domain Overlap score introduced in CASP6 and the accuracy of prediction of domain break points. The results of the analysis clearly demonstrate that the best methods are able to make consistently reliable predictions when the target has a structural template, although they are less good when the domain break occurs in a region not covered by a template. The conditions of the experiment meant that it was impossible to draw any conclusions about domain prediction for free modeling targets and it was also difficult to draw many distinctions between the best groups. Two thirds of the targets submitted were single domains and hence regarded as easy to predict. Even those targets defined as having multiple domains always had at least one domain with a similar template structure.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
James D R Knight, Bin Qian, David Baker, Rashmi Kothary
Conservation, variability and the modeling of active protein kinases Journal Article
In: PloS one, vol. 2, pp. e982, 2007, ISSN: 1932-6203.
@article{281,
title = {Conservation, variability and the modeling of active protein kinases},
author = { James D R Knight and Bin Qian and David Baker and Rashmi Kothary},
issn = {1932-6203},
year = {2007},
date = {2007-00-01},
journal = {PloS one},
volume = {2},
pages = {e982},
abstract = {The human proteome is rich with protein kinases, and this richness has made the kinase of crucial importance in initiating and maintaining cell behavior. Elucidating cell signaling networks and manipulating their components to understand and alter behavior require well designed inhibitors. These inhibitors are needed in culture to cause and study network perturbations, and the same compounds can be used as drugs to treat disease. Understanding the structural biology of protein kinases in detail, including their commonalities, differences and modes of substrate interaction, is necessary for designing high quality inhibitors that will be of true use for cell biology and disease therapy. To this end, we here report on a structural analysis of all available active-conformation protein kinases, discussing residue conservation, the novel features of such conservation, unique properties of atypical kinases and variability in the context of substrate binding. We also demonstrate how this information can be used for structure prediction. Our findings will be of use not only in understanding protein kinase function and evolution, but they highlight the flaws inherent in kinase drug design as commonly practiced and dictate an appropriate strategy for the sophisticated design of specific inhibitors for use in the laboratory and disease therapy.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Rhiju Das, Bin Qian, Srivatsan Raman, Robert Vernon, James Thompson, Philip Bradley, Sagar Khare, Michael D Tyka, Divya Bhat, Dylan Chivian, David E Kim, William H Sheffler, Lars Malmstr”om, Andrew M Wollacott, Chu Wang, Ingemar Andre, David Baker
Structure prediction for CASP7 targets using extensive all-atom refinement with Rosetta@home Journal Article
In: Proteins, vol. 69 Suppl 8, pp. 118-28, 2007, ISSN: 1097-0134.
@article{118,
title = {Structure prediction for CASP7 targets using extensive all-atom refinement with Rosetta@home},
author = { Rhiju Das and Bin Qian and Srivatsan Raman and Robert Vernon and James Thompson and Philip Bradley and Sagar Khare and Michael D Tyka and Divya Bhat and Dylan Chivian and David E Kim and William H Sheffler and Lars Malmstr"om and Andrew M Wollacott and Chu Wang and Ingemar Andre and David Baker},
issn = {1097-0134},
year = {2007},
date = {2007-00-01},
journal = {Proteins},
volume = {69 Suppl 8},
pages = {118-28},
abstract = {We describe predictions made using the Rosetta structure prediction methodology for both template-based modeling and free modeling categories in the Seventh Critical Assessment of Techniques for Protein Structure Prediction. For the first time, aggressive sampling and all-atom refinement could be carried out for the majority of targets, an advance enabled by the Rosetta@home distributed computing network. Template-based modeling predictions using an iterative refinement algorithm improved over the best existing templates for the majority of proteins with less than 200 residues. Free modeling methods gave near-atomic accuracy predictions for several targets under 100 residues from all secondary structure classes. These results indicate that refinement with an all-atom energy function, although computationally expensive, is a powerful method for obtaining accurate structure predictions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2006
Philip Bradley, David Baker
Improved beta-protein structure prediction by multilevel optimization of nonlocal strand pairings and local backbone conformation Journal Article
In: Proteins, vol. 65, pp. 922-9, 2006, ISSN: 1097-0134.
@article{154,
title = {Improved beta-protein structure prediction by multilevel optimization of nonlocal strand pairings and local backbone conformation},
author = { Philip Bradley and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/bradley06A.pdf},
issn = {1097-0134},
year = {2006},
date = {2006-12-01},
journal = {Proteins},
volume = {65},
pages = {922-9},
abstract = {Proteins with complex, nonlocal beta-sheets are challenging for de novo structure prediction, due in part to the difficulty of efficiently sampling long-range strand pairings. We present a new, multilevel approach to beta-sheet structure prediction that circumvents this difficulty by reformulating structure generation in terms of a folding tree. Nonlocal connections in this tree allow us to explicitly sample alternative beta-strand pairings while simultaneously exploring local conformational space using backbone torsion-space moves. An iterative, energy-biased resampling strategy is used to explore the space of beta-strand pairings; we expect that such a strategy will be generally useful for searching large conformational spaces with a high degree of combinatorial complexity.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jens Meiler, David Baker
ROSETTALIGAND: protein-small molecule docking with full side-chain flexibility Journal Article
In: Proteins, vol. 65, pp. 538-48, 2006, ISSN: 1097-0134.
@article{159,
title = {ROSETTALIGAND: protein-small molecule docking with full side-chain flexibility},
author = { Jens Meiler and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/meiler06A.pdf},
issn = {1097-0134},
year = {2006},
date = {2006-11-01},
journal = {Proteins},
volume = {65},
pages = {538-48},
abstract = {Protein-small molecule docking algorithms provide a means to model the structure of protein-small molecule complexes in structural detail and play an important role in drug development. In recent years the necessity of simulating protein side-chain flexibility for an accurate prediction of the protein-small molecule interfaces has become apparent, and an increasing number of docking algorithms probe different approaches to include protein flexibility. Here we describe a new method for docking small molecules into protein binding sites employing a Monte Carlo minimization procedure in which the rigid body position and orientation of the small molecule and the protein side-chain conformations are optimized simultaneously. The energy function comprises van der Waals (VDW) interactions, an implicit solvation model, an explicit orientation hydrogen bonding potential, and an electrostatics model. In an evaluation of the scoring function the computed energy correlated with experimental small molecule binding energy with a correlation coefficient of 0.63 across a diverse set of 229 protein- small molecule complexes. The docking method produced lowest energy models with a root mean square deviation (RMSD) smaller than 2 A in 71 out of 100 protein-small molecule crystal structure complexes (self-docking). In cross-docking calculations in which both protein side-chain and small molecule internal degrees of freedom were varied the lowest energy predictions had RMSDs less than 2 A in 14 of 20 test cases.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Elizabeth R Sprague, Chu Wang, David Baker, Pamela J Bjorkman
Crystal structure of the HSV-1 Fc receptor bound to Fc reveals a mechanism for antibody bipolar bridging Journal Article
In: PLoS biology, vol. 4, pp. e148, 2006, ISSN: 1545-7885.
@article{295,
title = {Crystal structure of the HSV-1 Fc receptor bound to Fc reveals a mechanism for antibody bipolar bridging},
author = { Elizabeth R Sprague and Chu Wang and David Baker and Pamela J Bjorkman},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/sprague06A.pdf},
issn = {1545-7885},
year = {2006},
date = {2006-06-01},
journal = {PLoS biology},
volume = {4},
pages = {e148},
abstract = {Herpes simplex virus type-1 expresses a heterodimeric Fc receptor, gE-gI, on the surfaces of virions and infected cells that binds the Fc region of host immunoglobulin G and is implicated in the cell-to-cell spread of virus. gE-gI binds immunoglobulin G at the basic pH of the cell surface and releases it at the acidic pH of lysosomes, consistent with a role in facilitating the degradation of antiviral antibodies. Here we identify the C-terminal domain of the gE ectodomain (CgE) as the minimal Fc-binding domain and present a 1.78-angstroms CgE structure. A 5-angstroms gE-gI/Fc crystal structure, which was independently verified by a theoretical prediction method, reveals that CgE binds Fc at the C(H)2-C(H)3 interface, the binding site for several mammalian and bacterial Fc-binding proteins. The structure identifies interface histidines that may confer pH-dependent binding and regions of CgE implicated in cell-to-cell spread of virus. The ternary organization of the gE-gI/Fc complex is compatible with antibody bipolar bridging, which can interfere with the antiviral immune response.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Vanita D Sood, David Baker
Recapitulation and design of protein binding peptide structures and sequences Journal Article
In: Journal of molecular biology, vol. 357, pp. 917-27, 2006, ISSN: 0022-2836.
@article{162,
title = {Recapitulation and design of protein binding peptide structures and sequences},
author = { Vanita D Sood and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/sood06A.pdf},
issn = {0022-2836},
year = {2006},
date = {2006-03-01},
journal = {Journal of molecular biology},
volume = {357},
pages = {917-27},
abstract = {An important objective of computational protein design is the generation of high affinity peptide inhibitors of protein-peptide interactions, both as a precursor to the development of therapeutics aimed at disrupting disease causing complexes, and as a tool to aid investigators in understanding the role of specific complexes in the cell. We have developed a computational approach to increase the affinity of a protein-peptide complex by designing N or C-terminal extensions which interact with the protein outside the canonical peptide binding pocket. In a first in silico test, we show that by simultaneously optimizing the sequence and structure of three to nine residue peptide extensions starting from short (1-6 residue) peptide stubs in the binding pocket of a peptide binding protein, the approach can recover both the conformations and the sequences of known binding peptides. Comparison with phage display and other experimental data suggests that the peptide extension approach recapitulates naturally occurring peptide binding specificity better than fixed backbone design, and that it should be useful for predicting peptide binding specificities from crystal structures. We then experimentally test the approach by designing extensions for p53 and dystroglycan-based peptides predicted to bind with increased affinity to the Mdm2 oncoprotein and to dystrophin, respectively. The measured increases in affinity are modest, revealing some limitations of the method. Based on these in silico and experimental results, we discuss future applications of the approach to the prediction and design of protein-peptide interactions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
David Baker
Prediction and design of macromolecular structures and interactions Journal Article
In: Philosophical transactions of the Royal Society of London, vol. 361, pp. 459-63, 2006, ISSN: 0962-8436.
@article{153,
title = {Prediction and design of macromolecular structures and interactions},
author = { David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/baker06A.pdf},
issn = {0962-8436},
year = {2006},
date = {2006-03-01},
journal = {Philosophical transactions of the Royal Society of London},
volume = {361},
pages = {459-63},
abstract = {In this article, I summarize recent work from my group directed towards developing an improved model of intra and intermolecular interactions and applying this improved model to the prediction and design of macromolecular structures and interactions. Prediction and design applications can be of great biological interest in their own right, and also provide very stringent and objective tests which drive the improvement of the model and increases in fundamental understanding. I emphasize the results from the prediction and design tests that suggest progress is being made in high-resolution modelling, and that there is hope for reliably and accurately computing structural biology.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Michael J Thompson, Stuart A Sievers, John Karanicolas, Magdalena I Ivanova, David Baker, David Eisenberg
The 3D profile method for identifying fibril-forming segments of proteins Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 103, pp. 4074-8, 2006, ISSN: 0027-8424.
@article{163,
title = {The 3D profile method for identifying fibril-forming segments of proteins},
author = { Michael J Thompson and Stuart A Sievers and John Karanicolas and Magdalena I Ivanova and David Baker and David Eisenberg},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/thompson06A.pdf},
issn = {0027-8424},
year = {2006},
date = {2006-03-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {103},
pages = {4074-8},
abstract = {Based on the crystal structure of the cross-beta spine formed by the peptide NNQQNY, we have developed a computational approach for identifying those segments of amyloidogenic proteins that themselves can form amyloid-like fibrils. The approach builds on experiments showing that hexapeptides are sufficient for forming amyloid-like fibrils. Each six-residue peptide of a protein of interest is mapped onto an ensemble of templates, or 3D profile, generated from the crystal structure of the peptide NNQQNY by small displacements of one of the two intermeshed beta-sheets relative to the other. The energy of each mapping of a sequence to the profile is evaluated by using ROSETTADESIGN, and the lowest energy match for a given peptide to the template library is taken as the putative prediction. If the energy of the putative prediction is lower than a threshold value, a prediction of fibril formation is made. This method can reach an accuracy of approximately 80% with a P value of approximately 10(-12) when a conservative energy threshold is used to separate peptides that form fibrils from those that do not. We see enrichment for positive predictions in a set of fibril-forming segments of amyloid proteins, and we illustrate the method with applications to proteins of interest in amyloid research.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Vladimir Yarov-Yarovoy, Jack Schonbrun, David Baker
Multipass membrane protein structure prediction using Rosetta Journal Article
In: Proteins, vol. 62, pp. 1010-25, 2006, ISSN: 1097-0134.
@article{165,
title = {Multipass membrane protein structure prediction using Rosetta},
author = { Vladimir Yarov-Yarovoy and Jack Schonbrun and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/yarov-yarovoy06B.pdf},
issn = {1097-0134},
year = {2006},
date = {2006-03-01},
journal = {Proteins},
volume = {62},
pages = {1010-25},
abstract = {We describe the adaptation of the Rosetta de novo structure prediction method for prediction of helical transmembrane protein structures. The membrane environment is modeled by embedding the protein chain into a model membrane represented by parallel planes defining hydrophobic, interface, and polar membrane layers for each energy evaluation. The optimal embedding is determined by maximizing the exposure of surface hydrophobic residues within the membrane and minimizing hydrophobic exposure outside of the membrane. Protein conformations are built up using the Rosetta fragment assembly method and evaluated using a new membrane-specific version of the Rosetta low-resolution energy function in which residue-residue and residue-environment interactions are functions of the membrane layer in addition to amino acid identity, distance, and density. We find that lower energy and more native-like structures are achieved by sequential addition of helices to a growing chain, which may mimic some aspects of helical protein biogenesis after translocation, rather than folding the whole chain simultaneously as in the Rosetta soluble protein prediction method. In tests on 12 membrane proteins for which the structure is known, between 51 and 145 residues were predicted with root-mean-square deviation <4 A from the native structure.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Tracy Arakaki, Isolde Le Trong, Eric Phizicky, Erin Quartley, George Detitta, Joseph Luft, Angela Lauricella, Lori Anderson, Oleksandr Kalyuzhniy, Elizabeth Worthey, Peter J Myler, David Kim, David Baker, Wim G J Hol, Ethan A Merritt
Structure of Lmaj006129AAA, a hypothetical protein from Leishmania major Journal Article
In: Acta crystallographica. Section F, Structural biology and crystallization communications, vol. 62, pp. 175-9, 2006, ISSN: 1744-3091.
@article{575,
title = {Structure of Lmaj006129AAA, a hypothetical protein from Leishmania major},
author = { Tracy Arakaki and Isolde Le Trong and Eric Phizicky and Erin Quartley and George Detitta and Joseph Luft and Angela Lauricella and Lori Anderson and Oleksandr Kalyuzhniy and Elizabeth Worthey and Peter J Myler and David Kim and David Baker and Wim G J Hol and Ethan A Merritt},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/structureoflmaj006129aaa_Baker2006.pdf},
doi = {10.1107/S1744309106005902},
issn = {1744-3091},
year = {2006},
date = {2006-03-01},
journal = {Acta crystallographica. Section F, Structural biology and crystallization communications},
volume = {62},
pages = {175-9},
abstract = {The gene product of structural genomics target Lmaj006129 from Leishmania major codes for a 164-residue protein of unknown function. When SeMet expression of the full-length gene product failed, several truncation variants were created with the aid of Ginzu, a domain-prediction method. 11 truncations were selected for expression, purification and crystallization based upon secondary-structure elements and disorder. The structure of one of these variants, Lmaj006129AAH, was solved by multiple-wavelength anomalous diffraction (MAD) using ELVES, an automatic protein crystal structure-determination system. This model was then successfully used as a molecular-replacement probe for the parent full-length target, Lmaj006129AAA. The final structure of Lmaj006129AAA was refined to an R value of 0.185 (Rfree = 0.229) at 1.60 A resolution. Structure and sequence comparisons based on Lmaj006129AAA suggest that proteins belonging to Pfam sequence families PF04543 and PF01878 may share a common ligand-binding motif.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2005
Ora Schueler-Furman, Chu Wang, Phil Bradley, Kira Misura, David Baker
Progress in modeling of protein structures and interactions Journal Article
In: Science, vol. 310, pp. 638-42, 2005, ISSN: 1095-9203.
@article{94,
title = {Progress in modeling of protein structures and interactions},
author = { Ora Schueler-Furman and Chu Wang and Phil Bradley and Kira Misura and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/schueler-furman05B.pdf},
issn = {1095-9203},
year = {2005},
date = {2005-10-01},
journal = {Science},
volume = {310},
pages = {638-42},
abstract = {The prediction of the structures and interactions of biological macromolecules at the atomic level and the design of new structures and interactions are critical tests of our understanding of the interatomic interactions that underlie molecular biology. Equally important, the capability to accurately predict and design macromolecular structures and interactions would streamline the interpretation of genome sequence information and allow the creation of macromolecules with new and useful functions. This review summarizes recent progress in modeling that suggests that we are entering an era in which high-resolution prediction and design will make increasingly important contributions to biology and medicine.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Philip Bradley, Kira M S Misura, David Baker
Toward high-resolution de novo structure prediction for small proteins Journal Article
In: Science, vol. 309, pp. 1868-71, 2005, ISSN: 1095-9203.
@article{104,
title = {Toward high-resolution de novo structure prediction for small proteins},
author = { Philip Bradley and Kira M S Misura and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/bradley05B.pdf},
issn = {1095-9203},
year = {2005},
date = {2005-09-01},
journal = {Science},
volume = {309},
pages = {1868-71},
abstract = {The prediction of protein structure from amino acid sequence is a grand challenge of computational molecular biology. By using a combination of improved low- and high-resolution conformational sampling methods, improved atomically detailed potential functions that capture the jigsaw puzzle-like packing of protein cores, and high-performance computing, high-resolution structure prediction (<1.5 angstroms) can be achieved for small protein domains (<85 residues). The primary bottleneck to consistent high-resolution prediction appears to be conformational sampling.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ora Schueler-Furman, Chu Wang, David Baker
In: Proteins, vol. 60, pp. 187-94, 2005, ISSN: 1097-0134.
@article{95,
title = {Progress in protein-protein docking: atomic resolution predictions in the CAPRI experiment using RosettaDock with an improved treatment of side-chain flexibility},
author = { Ora Schueler-Furman and Chu Wang and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/schueler-rurman05A.pdf},
issn = {1097-0134},
year = {2005},
date = {2005-08-01},
journal = {Proteins},
volume = {60},
pages = {187-94},
abstract = {RosettaDock uses real-space Monte Carlo minimization (MCM) on both rigid-body and side-chain degrees of freedom to identify the lowest free energy docked arrangement of 2 protein structures. An improved version of the method that uses gradient-based minimization for off-rotamer side-chain optimization and includes information from unbound structures was used to create predictions for Rounds 4 and 5 of CAPRI. First, large numbers of independent MCM trajectories were carried out and the lowest free energy docked configurations identified. Second, new trajectories were started from these lowest energy structures to thoroughly sample the surrounding conformation space, and the lowest energy configurations were submitted as predictions. For all cases in which there were no significant backbone conformational changes, a small number of very low-energy configurations were identified in the first, global search and subsequently found to be close to the center of the basin of attraction in the free energy landscape in the second, local search. Following the release of the experimental coordinates, it was found that the centers of these free energy minima were remarkably close to the native structures in not only the rigid-body orientation but also the detailed conformations of the side-chains. Out of 8 targets, the lowest energy models had interface root-mean-square deviations (RMSDs) less than 1.1 A from the correct structures for 6 targets, and interface RMSDs less than 0.4 A for 3 targets. The predictions were top submissions to CAPRI for Targets 11, 12, 14, 15, and 19. The close correspondence of the lowest free energy structures found in our searches to the experimental structures suggests that our free energy function is a reasonable representation of the physical chemistry, and that the real space search with full side-chain flexibility to some extent solves the protein-protein docking problem in the absence of significant backbone conformational changes. On the other hand, the approach fails when there are significant backbone conformational changes as the steric complementarity of the 2 proteins cannot be modeled without incorporating backbone flexibility, and this is the major goal of our current work.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Chu Wang, Ora Schueler-Furman, David Baker
Improved side-chain modeling for protein-protein docking Journal Article
In: Protein science, vol. 14, pp. 1328-39, 2005, ISSN: 0961-8368.
@article{93,
title = {Improved side-chain modeling for protein-protein docking},
author = { Chu Wang and Ora Schueler-Furman and David Baker},
issn = {0961-8368},
year = {2005},
date = {2005-05-01},
journal = {Protein science},
volume = {14},
pages = {1328-39},
abstract = {Success in high-resolution protein-protein docking requires accurate modeling of side-chain conformations at the interface. Most current methods either leave side chains fixed in the conformations observed in the unbound protein structures or allow the side chains to sample a set of discrete rotamer conformations. Here we describe a rapid and efficient method for sampling off-rotamer side-chain conformations by torsion space minimization during protein-protein docking starting from discrete rotamer libraries supplemented with side-chain conformations taken from the unbound structures, and show that the new method improves side-chain modeling and increases the energetic discrimination between good and bad models. Analysis of the distribution of side-chain interaction energies within and between the two protein partners shows that the new method leads to more native-like distributions of interaction energies and that the neglect of side-chain entropy produces a small but measurable increase in the number of residues whose interaction energy cannot compensate for the entropic cost of side-chain freezing at the interface. The power of the method is highlighted by a number of predictions of unprecedented accuracy in the recent CAPRI (Critical Assessment of PRedicted Interactions) blind test of protein-protein docking methods.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kira M S Misura, David Baker
Progress and challenges in high-resolution refinement of protein structure models Journal Article
In: Proteins, vol. 59, pp. 15-29, 2005, ISSN: 1097-0134.
@article{98,
title = {Progress and challenges in high-resolution refinement of protein structure models},
author = { Kira M S Misura and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/misura05A.pdf},
issn = {1097-0134},
year = {2005},
date = {2005-04-01},
journal = {Proteins},
volume = {59},
pages = {15-29},
abstract = {Achieving atomic level accuracy in de novo structure prediction presents a formidable challenge even in the context of protein models with correct topologies. High-resolution refinement is a fundamental test of force field accuracy and sampling methodology, and its limited success in both comparative modeling and de novo prediction contexts highlights the limitations of current approaches. We constructed four tests to identify bottlenecks in our current approach and to guide progress in this challenging area. The first three tests showed that idealized native structures are stable under our refinement simulation conditions and that the refinement protocol can significantly decrease the root mean square deviation (RMSD) of perturbed native structures. In the fourth test we applied the refinement protocol to de novo models and showed that accurate models could be identified based on their energies, and in several cases many of the buried side chains adopted native-like conformations. We also showed that the differences in backbone and side-chain conformations between the refined de novo models and the native structures are largely localized to loop regions and regions where the native structure has unusual features such as rare rotamers or atypical hydrogen bonding between beta-strands. The refined de novo models typically have higher energies than refined idealized native structures, indicating that sampling of local backbone conformations and side-chain packing arrangements in a condensed state is a primary obstacle.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jens Meiler, David Baker
The fumarate sensor DcuS: progress in rapid protein fold elucidation by combining protein structure prediction methods with NMR spectroscopy Journal Article
In: Journal of magnetic resonance, vol. 173, pp. 310-6, 2005, ISSN: 1090-7807.
@article{99,
title = {The fumarate sensor DcuS: progress in rapid protein fold elucidation by combining protein structure prediction methods with NMR spectroscopy},
author = { Jens Meiler and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/meiler05A.pdf},
issn = {1090-7807},
year = {2005},
date = {2005-04-01},
journal = {Journal of magnetic resonance},
volume = {173},
pages = {310-6},
abstract = {We illustrate how moderate resolution protein structures can be rapidly obtained by interlinking computational prediction methodologies with un- or partially assigned NMR data. To facilitate the application of our recently described method of ranking and subsequent refining alternative structural models using unassigned NMR data [Proc. Natl. Acad. Sci. USA 100 (2003) 15404] for such "structural genomics"-type experiments it is combined with protein models from several prediction techniques, enhanced to utilize partial assignments, and applied on a protein with an unknown structure and fold. From the original NMR spectra obtained for the 140 residue fumarate sensor DcuS, 1100 1H, 13C, and 15N chemical shift signals, 3000 1H-1H NOESY cross peak intensities, and 209 backbone residual dipolar couplings were extracted and used to rank models produced by de novo structure prediction and comparative modeling methods. The ranking proceeds in two steps: first, an optimal assignment of the NMR peaks to atoms is found for each model independently, and second, the models are ranked based on the consistency between the NMR data and the model assuming these optimal assignments. The low-resolution model selected using this ranking procedure had the correct overall fold and a global backbone RMSD of 6.0 angstrom, and was subsequently refined to 3.7 angstrom RMSD. With the incorporation of a small number of NOE and residual dipolar coupling constraints available very early in the traditional spectral assignment process, a model with an RMSD of 2.8 angstrom could rapidly be built. The ability to generate moderate resolution models within days of NMR data collection should facilitate large scale NMR structure determination efforts.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Lin Jiang, Brian Kuhlman, Tanja Kortemme, David Baker
A "solvated rotamer" approach to modeling water-mediated hydrogen bonds at protein-protein interfaces Journal Article
In: Proteins, vol. 58, pp. 893-904, 2005, ISSN: 1097-0134.
@article{101,
title = {A "solvated rotamer" approach to modeling water-mediated hydrogen bonds at protein-protein interfaces},
author = { Lin Jiang and Brian Kuhlman and Tanja Kortemme and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/jiang05A.pdf},
issn = {1097-0134},
year = {2005},
date = {2005-03-01},
journal = {Proteins},
volume = {58},
pages = {893-904},
abstract = {Water-mediated hydrogen bonds play critical roles at protein-protein and protein-nucleic acid interfaces, and the interactions formed by discrete water molecules cannot be captured using continuum solvent models. We describe a simple model for the energetics of water-mediated hydrogen bonds, and show that, together with knowledge of the positions of buried water molecules observed in X-ray crystal structures, the model improves the prediction of free-energy changes upon mutation at protein-protein interfaces, and the recovery of native amino acid sequences in protein interface design calculations. We then describe a "solvated rotamer" approach to efficiently predict the positions of water molecules, at protein-protein interfaces and in monomeric proteins, that is compatible with widely used rotamer-based side-chain packing and protein design algorithms. Finally, we examine the extent to which the predicted water molecules can be used to improve prediction of amino acid identities and protein-protein interface stability, and discuss avenues for overcoming current limitations of the approach.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Christopher T Saunders, David Baker
Recapitulation of protein family divergence using flexible backbone protein design Journal Article
In: Journal of molecular biology, vol. 346, pp. 631-44, 2005, ISSN: 0022-2836.
@article{96,
title = {Recapitulation of protein family divergence using flexible backbone protein design},
author = { Christopher T Saunders and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/saundersa05A.pdf},
issn = {0022-2836},
year = {2005},
date = {2005-02-01},
journal = {Journal of molecular biology},
volume = {346},
pages = {631-44},
abstract = {We use flexible backbone protein design to explore the sequence and structure neighborhoods of naturally occurring proteins. The method samples sequence and structure space in the vicinity of a known sequence and structure by alternately optimizing the sequence for a fixed protein backbone using rotamer based sequence search, and optimizing the backbone for a fixed amino acid sequence using atomic-resolution structure prediction. We find that such a flexible backbone design method better recapitulates protein family sequence variation than sequence optimization on fixed backbones or randomly perturbed backbone ensembles for ten diverse protein structures. For the SH3 domain, the backbone structure variation in the family is also better recapitulated than in randomly perturbed backbones. The potential application of this method as a model of protein family evolution is highlighted by a concerted transition to the amino acid sequence in the structural core of one SH3 domain starting from the backbone coordinates of an homologous structure.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
David E Kim, Dylan Chivian, Lars Malmstr"om, David Baker
Automated prediction of domain boundaries in CASP6 targets using Ginzu and RosettaDOM Journal Article
In: Proteins, vol. 61 Suppl 7, pp. 193-200, 2005, ISSN: 1097-0134.
@article{100,
title = {Automated prediction of domain boundaries in CASP6 targets using Ginzu and RosettaDOM},
author = { David E Kim and Dylan Chivian and Lars Malmstr"om and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/kim05A.pdf},
issn = {1097-0134},
year = {2005},
date = {2005-00-01},
journal = {Proteins},
volume = {61 Suppl 7},
pages = {193-200},
abstract = {Domain boundary prediction is an important step in both experimental and computational protein structure characterization. We have developed two fully automated domain parsing methods: the first, Ginzu, which we have described previously, utilizes information from homologous sequences and structures, while the second, RosettaDOM, which has not been described previously, uses only information in the query sequence. Ginzu iteratively assigns domains by homology to structures and sequence families using successively less confident methods. RosettaDOM uses the Rosetta de novo structure prediction method to build three-dimensional models, and then applies Taylortextquoterights structure based domain assignment method to parse the models into domains. Domain boundaries observed repeatedly in the models are predicted to be domain boundaries for the protein. Interestingly, RosettaDOM produced quite good domain predictions for proteins of a size typically considered to be beyond the reach of de novo structure prediction methods. For remote fold recognition targets and new folds, both Ginzu and RosettaDOM produced promising results, and in some cases where one method failed to detect the correct domain boundary, it was correctly identified by the other method. We describe here the successes and failures using both methods, and address the possibility of incorporating both protocols into an improved hybrid method.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Osvaldo Gra~na, David Baker, Robert M MacCallum, Jens Meiler, Marco Punta, Burkhard Rost, Michael L Tress, Alfonso Valencia
CASP6 assessment of contact prediction Journal Article
In: Proteins, vol. 61 Suppl 7, pp. 214-24, 2005, ISSN: 1097-0134.
@article{297,
title = {CASP6 assessment of contact prediction},
author = { Osvaldo Gra~na and David Baker and Robert M MacCallum and Jens Meiler and Marco Punta and Burkhard Rost and Michael L Tress and Alfonso Valencia},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/grana05A.pdf},
issn = {1097-0134},
year = {2005},
date = {2005-00-01},
journal = {Proteins},
volume = {61 Suppl 7},
pages = {214-24},
abstract = {Here we present the evaluation results of the Critical Assessment of Protein Structure Prediction (CASP6) contact prediction category. Contact prediction was assessed with standard measures well known in the field and the performance of specialist groups was evaluated alongside groups that submitted models with 3D coordinates. The evaluation was mainly focused on long range contact predictions for the set of new fold targets, although we analyzed predictions for all targets. Three groups with similar levels of accuracy and coverage performed a little better than the others. Comparisons of the predictions of the three best methods with those of CASP5/CAFASP3 suggested some improvement, although there were not enough targets in the comparisons to make this statistically significant.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Philip Bradley, Lars Malmstr"om, Bin Qian, Jack Schonbrun, Dylan Chivian, David E Kim, Jens Meiler, Kira M S Misura, David Baker
Free modeling with Rosetta in CASP6 Journal Article
In: Proteins, vol. 61 Suppl 7, pp. 128-34, 2005, ISSN: 1097-0134.
@article{105,
title = {Free modeling with Rosetta in CASP6},
author = { Philip Bradley and Lars Malmstr"om and Bin Qian and Jack Schonbrun and Dylan Chivian and David E Kim and Jens Meiler and Kira M S Misura and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/bradley05A.pdf},
issn = {1097-0134},
year = {2005},
date = {2005-00-01},
journal = {Proteins},
volume = {61 Suppl 7},
pages = {128-34},
abstract = {We describe Rosetta predictions in the Sixth Community-Wide Experiment on the Critical Assessment of Techniques for Protein Structure Prediction (CASP), focusing on the free modeling category. Methods developed since CASP5 are described, and their application to selected targets is discussed. Highlights include improved performance on larger proteins (100-200 residues) and the prediction of a 70-residue alpha-beta protein to near-atomic resolution.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Gong Cheng, Bin Qian, Ram Samudrala, David Baker
In: Nucleic acids research, vol. 33, pp. 5861-7, 2005, ISSN: 1362-4962.
@article{103,
title = {Improvement in protein functional site prediction by distinguishing structural and functional constraints on protein family evolution using computational design},
author = { Gong Cheng and Bin Qian and Ram Samudrala and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/cheng05A.pdf},
issn = {1362-4962},
year = {2005},
date = {2005-00-01},
journal = {Nucleic acids research},
volume = {33},
pages = {5861-7},
abstract = {The prediction of functional sites in newly solved protein structures is a challenge for computational structural biology. Most methods for approaching this problem use evolutionary conservation as the primary indicator of the location of functional sites. However, sequence conservation reflects not only evolutionary selection at functional sites to maintain protein function, but also selection throughout the protein to maintain the stability of the folded state. To disentangle sequence conservation due to protein functional constraints from sequence conservation due to protein structural constraints, we use all atom computational protein design methodology to predict sequence profiles expected under solely structural constraints, and to compute the free energy difference between the naturally occurring amino acid and the lowest free energy amino acid at each position. We show that functional sites are more likely than non-functional sites to have computed sequence profiles which differ significantly from the naturally occurring sequence profiles and to have residues with sub-optimal free energies, and that incorporation of these two measures improves sequence based prediction of protein functional sites. The combined sequence and structure based functional site prediction method has been implemented in a publicly available web server.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Dylan Chivian, David E Kim, Lars Malmstr"om, Jack Schonbrun, Carol A Rohl, David Baker
Prediction of CASP6 structures using automated Robetta protocols Journal Article
In: Proteins, vol. 61 Suppl 7, pp. 157-66, 2005, ISSN: 1097-0134.
@article{102,
title = {Prediction of CASP6 structures using automated Robetta protocols},
author = { Dylan Chivian and David E Kim and Lars Malmstr"om and Jack Schonbrun and Carol A Rohl and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/chivian05A.pdf},
issn = {1097-0134},
year = {2005},
date = {2005-00-01},
journal = {Proteins},
volume = {61 Suppl 7},
pages = {157-66},
abstract = {The Robetta server and revised automatic protocols were used to predict structures for CASP6 targets. Robetta is a publicly available protein structure prediction server (http://robetta.bakerlab.org/ that uses the Rosetta de novo and homology modeling structure prediction methods. We incorporated some of the lessons learned in the CASP5 experiment into the server prior to participating in CASP6. We additionally tested new ideas that were amenable to full-automation with an eye toward improving the server. We find that the Robetta server shows the greatest promise for the more challenging targets. The most significant finding from CASP5, that automated protocols can be roughly comparable in ability with the better human-intervention predictors, is repeated here in CASP6.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Alexandre V Morozov, James J Havranek, David Baker, Eric D Siggia
Protein-DNA binding specificity predictions with structural models Journal Article
In: Nucleic acids research, vol. 33, pp. 5781-98, 2005, ISSN: 1362-4962.
@article{97,
title = {Protein-DNA binding specificity predictions with structural models},
author = { Alexandre V Morozov and James J Havranek and David Baker and Eric D Siggia},
issn = {1362-4962},
year = {2005},
date = {2005-00-01},
journal = {Nucleic acids research},
volume = {33},
pages = {5781-98},
abstract = {Protein-DNA interactions play a central role in transcriptional regulation and other biological processes. Investigating the mechanism of binding affinity and specificity in protein-DNA complexes is thus an important goal. Here we develop a simple physical energy function, which uses electrostatics, solvation, hydrogen bonds and atom-packing terms to model direct readout and sequence-specific DNA conformational energy to model indirect readout of DNA sequence by the bound protein. The predictive capability of the model is tested against another model based only on the knowledge of the consensus sequence and the number of contacts between amino acids and DNA bases. Both models are used to carry out predictions of protein-DNA binding affinities which are then compared with experimental measurements. The nearly additive nature of protein-DNA interaction energies in our model allows us to construct position-specific weight matrices by computing base pair probabilities independently for each position in the binding site. Our approach is less data intensive than knowledge-based models of protein-DNA interactions, and is not limited to any specific family of transcription factors. However, native structures of protein-DNA complexes or their close homologs are required as input to the model. Use of homology modeling can significantly increase the extent of our approach, making it a useful tool for studying regulatory pathways in many organisms and cell types.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2004
James J Havranek, Carlos M Duarte, David Baker
A simple physical model for the prediction and design of protein-DNA interactions Journal Article
In: Journal of molecular biology, vol. 344, pp. 59-70, 2004, ISSN: 0022-2836.
@article{168,
title = {A simple physical model for the prediction and design of protein-DNA interactions},
author = { James J Havranek and Carlos M Duarte and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/havranek04A.pdf},
issn = {0022-2836},
year = {2004},
date = {2004-11-01},
journal = {Journal of molecular biology},
volume = {344},
pages = {59-70},
abstract = {Protein-DNA interactions are crucial for many biological processes. Attempts to model these interactions have generally taken the form of amino acid-base recognition codes or purely sequence-based profile methods, which depend on the availability of extensive sequence and structural information for specific structural families, neglect side-chain conformational variability, and lack generality beyond the structural family used to train the model. Here, we take advantage of recent advances in rotamer-based protein design and the large number of structurally characterized protein-DNA complexes to develop and parameterize a simple physical model for protein-DNA interactions. The model shows considerable promise for redesigning amino acids at protein-DNA interfaces, as design calculations recover the amino acid residue identities and conformations at these interfaces with accuracies comparable to sequence recovery in globular proteins. The model shows promise also for predicting DNA-binding specificity for fixed protein sequences: native DNA sequences are selected correctly from pools of competing DNA substrates; however, incorporation of backbone movement will likely be required to improve performance in homology modeling applications. Interestingly, optimization of zinc finger protein amino acid sequences for high-affinity binding to specific DNA sequences results in proteins with little or no predicted specificity, suggesting that naturally occurring DNA-binding proteins are optimized for specificity rather than affinity. When combined with algorithms that optimize specificity directly, the simple computational model developed here should be useful for the engineering of proteins with novel DNA-binding specificities.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kira M S Misura, Alexandre V Morozov, David Baker
Analysis of anisotropic side-chain packing in proteins and application to high-resolution structure prediction Journal Article
In: Journal of molecular biology, vol. 342, pp. 651-64, 2004, ISSN: 0022-2836.
@article{173,
title = {Analysis of anisotropic side-chain packing in proteins and application to high-resolution structure prediction},
author = { Kira M S Misura and Alexandre V Morozov and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/misura04A.pdf},
issn = {0022-2836},
year = {2004},
date = {2004-09-01},
journal = {Journal of molecular biology},
volume = {342},
pages = {651-64},
abstract = {pi-pi, Cation-pi, and hydrophobic packing interactions contribute specificity to protein folding and stability to the native state. As a step towards developing improved models of these interactions in proteins, we compare the side-chain packing arrangements in native proteins to those found in compact decoys produced by the Rosetta de novo structure prediction method. We find enrichments in the native distributions for T-shaped and parallel offset arrangements of aromatic residue pairs, in parallel stacked arrangements of cation-aromatic pairs, in parallel stacked pairs involving proline residues, and in parallel offset arrangements for aliphatic residue pairs. We then investigate the extent to which the distinctive features of native packing can be explained using Lennard-Jones and electrostatics models. Finally, we derive orientation-dependent pi-pi, cation-pi and hydrophobic interaction potentials based on the differences between the native and compact decoy distributions and investigate their efficacy for high-resolution protein structure prediction. Surprisingly, the orientation-dependent potential derived from the packing arrangements of aliphatic side-chain pairs distinguishes the native structure from compact decoys better than the orientation-dependent potentials describing pi-pi and cation-pi interactions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
David E Kim, Dylan Chivian, David Baker
Protein structure prediction and analysis using the Robetta server Journal Article
In: Nucleic acids research, vol. 32, pp. W526-31, 2004, ISSN: 1362-4962.
@article{169,
title = {Protein structure prediction and analysis using the Robetta server},
author = { David E Kim and Dylan Chivian and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/kim04A.pdf},
issn = {1362-4962},
year = {2004},
date = {2004-07-01},
journal = {Nucleic acids research},
volume = {32},
pages = {W526-31},
abstract = {The Robetta server (http://robetta.bakerlab.org) provides automated tools for protein structure prediction and analysis. For structure prediction, sequences submitted to the server are parsed into putative domains and structural models are generated using either comparative modeling or de novo structure prediction methods. If a confident match to a protein of known structure is found using BLAST, PSI-BLAST, FFAS03 or 3D-Jury, it is used as a template for comparative modeling. If no match is found, structure predictions are made using the de novo Rosetta fragment insertion method. Experimental nuclear magnetic resonance (NMR) constraints data can also be submitted with a query sequence for RosettaNMR de novo structure determination. Other current capabilities include the prediction of the effects of mutations on protein-protein interactions using computational interface alanine scanning. The Rosetta protein design and protein-protein docking methodologies will soon be available through the server as well.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Carol A Rohl, Charlie E M Strauss, Dylan Chivian, David Baker
Modeling structurally variable regions in homologous proteins with rosetta Journal Article
In: Proteins, vol. 55, pp. 656-77, 2004, ISSN: 1097-0134.
@article{177,
title = {Modeling structurally variable regions in homologous proteins with rosetta},
author = { Carol A Rohl and Charlie E M Strauss and Dylan Chivian and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/rohl04A.pdf},
issn = {1097-0134},
year = {2004},
date = {2004-05-01},
journal = {Proteins},
volume = {55},
pages = {656-77},
abstract = {A major limitation of current comparative modeling methods is the accuracy with which regions that are structurally divergent from homologues of known structure can be modeled. Because structural differences between homologous proteins are responsible for variations in protein function and specificity, the ability to model these differences has important functional consequences. Although existing methods can provide reasonably accurate models of short loop regions, modeling longer structurally divergent regions is an unsolved problem. Here we describe a method based on the de novo structure prediction algorithm, Rosetta, for predicting conformations of structurally divergent regions in comparative models. Initial conformations for short segments are selected from the protein structure database, whereas longer segments are built up by using three- and nine-residue fragments drawn from the database and combined by using the Rosetta algorithm. A gap closure term in the potential in combination with modified Newtontextquoterights method for gradient descent minimization is used to ensure continuity of the peptide backbone. Conformations of variable regions are refined in the context of a fixed template structure using Monte Carlo minimization together with rapid repacking of side-chains to iteratively optimize backbone torsion angles and side-chain rotamers. For short loops, mean accuracies of 0.69, 1.45, and 3.62 A are obtained for 4, 8, and 12 residue loops, respectively. In addition, the method can provide reasonable models of conformations of longer protein segments: predicted conformations of 3A root-mean-square deviation or better were obtained for 5 of 10 examples of segments ranging from 13 to 34 residues. In combination with a sequence alignment algorithm, this method generates complete, ungapped models of protein structures, including regions both similar to and divergent from a homologous structure. This combined method was used to make predictions for 28 protein domains in the Critical Assessment of Protein Structure 4 (CASP 4) and 59 domains in CASP 5, where the method ranked highly among comparative modeling and fold recognition methods. Model accuracy in these blind predictions is dominated by alignment quality, but in the context of accurate alignments, long protein segments can be accurately modeled. Notably, the method correctly predicted the local structure of a 39-residue insertion into a TIM barrel in CASP 5 target T0186.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Tanja Kortemme, David E Kim, David Baker
Computational alanine scanning of protein-protein interfaces Journal Article
In: Sciencetextquoterights STKE, vol. 2004, pp. pl2, 2004, ISSN: 1525-8882.
@article{300,
title = {Computational alanine scanning of protein-protein interfaces},
author = { Tanja Kortemme and David E Kim and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/kortemme04B-1.pdf},
issn = {1525-8882},
year = {2004},
date = {2004-02-01},
journal = {Sciencetextquoterights STKE},
volume = {2004},
pages = {pl2},
abstract = {Protein-protein interactions are key components of all signal transduction processes, so methods to alter these interactions promise to become important tools in dissecting function of connectivities in these networks. We have developed a fast computational approach for the prediction of energetically important amino acid residues in protein-protein interfaces (available at http://robetta.bakerlab.org/alaninescan), which we, following Peter Kollman, have termed "computational alanine scanning." The input consists of a three-dimensional structure of a protein-protein complex; output is a list of "hot spots," or amino acid side chains that are predicted to significantly destabilize the interface when mutated to alanine, analogous to the results of experimental alanine-scanning mutagenesis. 79% of hot spots and 68% of neutral residues were correctly predicted in a test of 233 mutations in 19 protein-protein complexes. A single interface can be analyzed in minutes. The computational methodology has been validated by the successful design of protein interfaces with new specificity and activity, and has yielded new insights into the mechanisms of receptor specificity and promiscuity in biological systems.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Michael Kuhn, Jens Meiler, David Baker
Strand-loop-strand motifs: prediction of hairpins and diverging turns in proteins Journal Article
In: Proteins, vol. 54, pp. 282-8, 2004, ISSN: 1097-0134.
@article{172,
title = {Strand-loop-strand motifs: prediction of hairpins and diverging turns in proteins},
author = { Michael Kuhn and Jens Meiler and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/kuhn04A.pdf},
issn = {1097-0134},
year = {2004},
date = {2004-02-01},
journal = {Proteins},
volume = {54},
pages = {282-8},
abstract = {Beta-sheet proteins have been particularly challenging for de novo structure prediction methods, which tend to pair adjacent beta-strands into beta-hairpins and produce overly local topologies. To remedy this problem and facilitate de novo prediction of beta-sheet protein structures, we have developed a neural network that classifies strand-loop-strand motifs by local hairpins and nonlocal diverging turns by using the amino acid sequence as input. The neural network is trained with a representative subset of the Protein Data Bank and achieves a prediction accuracy of 75.9 +/- 4.4% compared to a baseline prediction rate of 59.1%. Hairpins are predicted with an accuracy of 77.3 +/- 6.1%, diverging turns with an accuracy of 73.9 +/- 6.0%. Incorporation of the beta-hairpin/diverging turn classification into the ROSETTA de novo structure prediction method led to higher contact order models and somewhat improved tertiary structure predictions for a test set of 11 all-beta-proteins and 3 alphabeta-proteins. The beta-hairpin/diverging turn classification from amino acid sequences is available online for academic use (Meiler and Kuhn, 2003; www.jens-meiler.de/turnpred.html).},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Carol A Rohl, Charlie E M Strauss, Kira M S Misura, David Baker
Protein structure prediction using Rosetta. Journal Article
In: Methods in enzymology, vol. 383, pp. 66-93, 2004, ISSN: 0076-6879.
@article{176,
title = {Protein structure prediction using Rosetta.},
author = { Carol A Rohl and Charlie E M Strauss and Kira M S Misura and David Baker},
issn = {0076-6879},
year = {2004},
date = {2004-00-01},
journal = {Methods in enzymology},
volume = {383},
pages = {66-93},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2003
Jens Meiler, David Baker
Rapid protein fold determination using unassigned NMR data Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 100, pp. 15404-9, 2003, ISSN: 0027-8424.
@article{79,
title = {Rapid protein fold determination using unassigned NMR data},
author = { Jens Meiler and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/meiler03B.pdf},
issn = {0027-8424},
year = {2003},
date = {2003-12-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {100},
pages = {15404-9},
abstract = {Experimental structure determination by x-ray crystallography and NMR spectroscopy is slow and time-consuming compared with the rate at which new protein sequences are being identified. NMR spectroscopy has the advantage of rapidly providing the structurally relevant information in the form of unassigned chemical shifts (CSs), intensities of NOESY crosspeaks [nuclear Overhauser effects (NOEs)], and residual dipolar couplings (RDCs), but use of these data are limited by the time and effort needed to assign individual resonances to specific atoms. Here, we develop a method for generating low-resolution protein structures by using unassigned NMR data that relies on the de novo protein structure prediction algorithm, rosetta [Simons, K. T., Kooperberg, C., Huang, E. & Baker, D. (1997) J. Mol. Biol. 268, 209-225] and a Monte Carlo procedure that searches for the assignment of resonances to atoms that produces the best fit of the experimental NMR data to a candidate 3D structure. A large ensemble of models is generated from sequence information alone by using rosetta, an optimal assignment is identified for each model, and the models are then ranked based on their fit with the NMR data assuming the identified assignments. The method was tested on nine protein sequences between 56 and 140 amino acids and published CS, NOE, and RDC data. The procedure yielded models with rms deviations between 3 and 6 A, and, in four of the nine cases, the partial assignments obtained by the method could be used to refine the structures to high resolution (0.6-1.8 A) by repeated cycles of structure generation guided by the partial assignments, followed by reassignment using the newly generated models.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Tony R Hazbun, Lars Malmstr"om, Scott Anderson, Beth J Graczyk, Bethany Fox, Michael Riffle, Bryan A Sundin, J Derringer Aranda, W Hayes McDonald, Chun-Hwei Chiu, Brian E Snydsman, Phillip Bradley, Eric G D Muller, Stanley Fields, David Baker, John R Yates, Trisha N Davis
Assigning function to yeast proteins by integration of technologies Journal Article
In: Molecular cell, vol. 12, pp. 1353-65, 2003, ISSN: 1097-2765.
@article{314,
title = {Assigning function to yeast proteins by integration of technologies},
author = { Tony R Hazbun and Lars Malmstr"om and Scott Anderson and Beth J Graczyk and Bethany Fox and Michael Riffle and Bryan A Sundin and J Derringer Aranda and W Hayes McDonald and Chun-Hwei Chiu and Brian E Snydsman and Phillip Bradley and Eric G D Muller and Stanley Fields and David Baker and John R Yates and Trisha N Davis},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/hazbun03A.pdf},
issn = {1097-2765},
year = {2003},
date = {2003-12-01},
journal = {Molecular cell},
volume = {12},
pages = {1353-65},
abstract = {Interpreting genome sequences requires the functional analysis of thousands of predicted proteins, many of which are uncharacterized and without obvious homologs. To assess whether the roles of large sets of uncharacterized genes can be assigned by targeted application of a suite of technologies, we used four complementary protein-based methods to analyze a set of 100 uncharacterized but essential open reading frames (ORFs) of the yeast Saccharomyces cerevisiae. These proteins were subjected to affinity purification and mass spectrometry analysis to identify copurifying proteins, two-hybrid analysis to identify interacting proteins, fluorescence microscopy to localize the proteins, and structure prediction methodology to predict structural domains or identify remote homologies. Integration of the data assigned function to 48 ORFs using at least two of the Gene Ontology (GO) categories of biological process, molecular function, and cellular component; 77 ORFs were annotated by at least one method. This combination of technologies, coupled with annotation using GO, is a powerful approach to classifying genes.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Brian Kuhlman, Gautam Dantas, Gregory C Ireton, Gabriele Varani, Barry L Stoddard, David Baker
Design of a novel globular protein fold with atomic-level accuracy Journal Article
In: Science, vol. 302, pp. 1364-8, 2003, ISSN: 1095-9203.
@article{82,
title = {Design of a novel globular protein fold with atomic-level accuracy},
author = { Brian Kuhlman and Gautam Dantas and Gregory C Ireton and Gabriele Varani and Barry L Stoddard and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/kuhlman03A.pdf},
issn = {1095-9203},
year = {2003},
date = {2003-11-01},
journal = {Science},
volume = {302},
pages = {1364-8},
abstract = {A major challenge of computational protein design is the creation of novel proteins with arbitrarily chosen three-dimensional structures. Here, we used a general computational strategy that iterates between sequence design and structure prediction to design a 93-residue alpha/beta protein called Top7 with a novel sequence and topology. Top7 was found experimentally to be folded and extremely stable, and the x-ray crystal structure of Top7 is similar (root mean square deviation equals 1.2 angstroms) to the design model. The ability to design a new protein fold makes possible the exploration of the large regions of the protein universe not yet observed in nature.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
William J Wedemeyer, David Baker
Efficient minimization of angle-dependent potentials for polypeptides in internal coordinates Journal Article
In: Proteins, vol. 53, pp. 262-72, 2003, ISSN: 1097-0134.
@article{73,
title = {Efficient minimization of angle-dependent potentials for polypeptides in internal coordinates},
author = { William J Wedemeyer and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/wedemeyer03A.pdf},
issn = {1097-0134},
year = {2003},
date = {2003-11-01},
journal = {Proteins},
volume = {53},
pages = {262-72},
abstract = {Angular potentials play an important role in the refinement of protein structures through angle-dependent restraints (e.g., those determined by cross-correlated relaxations, residual dipolar couplings, and hydrogen bonds). Analytic derivatives of such angular potentials with respect to the dihedral angles of proteins would be useful for optimizing such restraints and other types of angular potentials (i.e., such as we are now introducing into protein structure prediction) but have not been described. In this article, analytic derivatives are calculated for four types of angular potentials and integrated with the efficient recursive derivative calculation methods of Go and coworkers. The formulas are implemented in publicly available software and illustrated by refining a low-resolution protein structure with idealized vector-angle, dipolar-coupling, and hydrogen-bond restraints. The method is now being used routinely to optimize hydrogen-bonding potentials in ROSETTA.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jens Meiler, David Baker
Coupled prediction of protein secondary and tertiary structure Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 100, pp. 12105-10, 2003, ISSN: 0027-8424.
@article{80,
title = {Coupled prediction of protein secondary and tertiary structure},
author = { Jens Meiler and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/meiler03A.pdf},
issn = {0027-8424},
year = {2003},
date = {2003-10-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {100},
pages = {12105-10},
abstract = {The strong coupling between secondary and tertiary structure formation in protein folding is neglected in most structure prediction methods. In this work we investigate the extent to which nonlocal interactions in predicted tertiary structures can be used to improve secondary structure prediction. The architecture of a neural network for secondary structure prediction that utilizes multiple sequence alignments was extended to accept low-resolution nonlocal tertiary structure information as an additional input. By using this modified network, together with tertiary structure information from native structures, the Q3-prediction accuracy is increased by 7-10% on average and by up to 35% in individual cases for independent test data. By using tertiary structure information from models generated with the ROSETTA de novo tertiary structure prediction method, the Q3-prediction accuracy is improved by 4-5% on average for small and medium-sized single-domain proteins. Analysis of proteins with particularly large improvements in secondary structure prediction using tertiary structure information provides insight into the feedback from tertiary to secondary structure.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jerry Tsai, Richard Bonneau, Alexandre V Morozov, Brian Kuhlman, Carol A Rohl, David Baker
An improved protein decoy set for testing energy functions for protein structure prediction Journal Article
In: Proteins, vol. 53, pp. 76-87, 2003, ISSN: 1097-0134.
@article{74,
title = {An improved protein decoy set for testing energy functions for protein structure prediction},
author = { Jerry Tsai and Richard Bonneau and Alexandre V Morozov and Brian Kuhlman and Carol A Rohl and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/Tsai03A.pdf},
issn = {1097-0134},
year = {2003},
date = {2003-10-01},
journal = {Proteins},
volume = {53},
pages = {76-87},
abstract = {We have improved the original Rosetta centroid/backbone decoy set by increasing the number of proteins and frequency of near native models and by building on sidechains and minimizing clashes. The new set consists of 1,400 model structures for 78 different and diverse protein targets and provides a challenging set for the testing and evaluation of scoring functions. We evaluated the extent to which a variety of all-atom energy functions could identify the native and close-to-native structures in the new decoy sets. Of various implicit solvent models, we found that a solvent-accessible surface area-based solvation provided the best enrichment and discrimination of close-to-native decoys. The combination of this solvation treatment with Lennard Jones terms and the original Rosetta energy provided better enrichment and discrimination than any of the individual terms. The results also highlight the differences in accuracy of NMR and X-ray crystal structures: a large energy gap was observed between native and non-native conformations for X-ray structures but not for NMR structures.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ruslan I Sadreyev, David Baker, Nick V Grishin
Profile-profile comparisons by COMPASS predict intricate homologies between protein families Journal Article
In: Protein science, vol. 12, pp. 2262-72, 2003, ISSN: 0961-8368.
@article{316,
title = {Profile-profile comparisons by COMPASS predict intricate homologies between protein families},
author = { Ruslan I Sadreyev and David Baker and Nick V Grishin},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/sadreyev03A.pdf},
issn = {0961-8368},
year = {2003},
date = {2003-10-01},
journal = {Protein science},
volume = {12},
pages = {2262-72},
abstract = {Recently we proposed a novel method of alignment-alignment comparison, COMPASS (the tool for COmparison of Multiple Protein Alignments with Assessment of Statistical Significance). Here we present several examples of the relations between PFAM protein families that were detected by COMPASS and that lead to the predictions of presently unresolved protein structures. We discuss relatively straightforward COMPASS predictions that are new and interesting to us, and that would require a substantial time and effort to justify even for a skilled PSI-BLAST user. All of the presented COMPASS hits are independently confirmed by other methods, including the ab initio structure-prediction method ROSETTA. The tertiary structure predictions made by ROSETTA proved to be useful for improving sequence-derived alignments, because they are based on a reasonable folding of the polypeptide chain rather than on the information from sequence databases. The ability of COMPASS to predict new relations within the PFAM database indicates the high sensitivity of COMPASS searches and substantiates its potential value for the discovery of previously unknown similarities between protein families.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Lisa N Kinch, David Baker, Nick V Grishin
Deciphering a novel thioredoxin-like fold family Journal Article
In: Proteins, vol. 52, pp. 323-31, 2003, ISSN: 1097-0134.
@article{573,
title = {Deciphering a novel thioredoxin-like fold family},
author = { Lisa N Kinch and David Baker and Nick V Grishin},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/decipheringanovel_Baker2003.pdf},
doi = {10.1002/prot.10425},
issn = {1097-0134},
year = {2003},
date = {2003-08-01},
journal = {Proteins},
volume = {52},
pages = {323-31},
abstract = {Sequence--and structure-based searching strategies have proven useful in the identification of remote homologs and have facilitated both structural and functional predictions of many uncharacterized protein families. We implement these strategies to predict the structure of and to classify a previously uncharacterized cluster of orthologs (COG3019) in the thioredoxin-like fold superfamily. The results of each searching method indicate that thioltransferases are the closest structural family to COG3019. We substantiate this conclusion using the ab initio structure prediction method rosetta, which generates a thioredoxin-like fold similar to that of the glutaredoxin-like thioltransferase (NrdH) for a COG3019 target sequence. This structural model contains the thiol-redox functional motif CYS-X-X-CYS in close proximity to other absolutely conserved COG3019 residues, defining a novel thioredoxin-like active site that potentially binds metal ions. Finally, the rosetta-derived model structure assists us in assembling a global multiple-sequence alignment of COG3019 with two other thioredoxin-like fold families, the thioltransferases and the bacterial arsenate reductases (ArsC).},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ora Schueler-Furman, David Baker
Conserved residue clustering and protein structure prediction Journal Article
In: Proteins, vol. 52, pp. 225-35, 2003, ISSN: 1097-0134.
@article{75,
title = {Conserved residue clustering and protein structure prediction},
author = { Ora Schueler-Furman and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/schueler-furman03A.pdf},
issn = {1097-0134},
year = {2003},
date = {2003-08-01},
journal = {Proteins},
volume = {52},
pages = {225-35},
abstract = {Protein residues that are critical for structure and function are expected to be conserved throughout evolution. Here, we investigate the extent to which these conserved residues are clustered in three-dimensional protein structures. In 92% of the proteins in a data set of 79 proteins, the most conserved positions in multiple sequence alignments are significantly more clustered than randomly selected sets of positions. The comparison to random subsets is not necessarily appropriate, however, because the signal could be the result of differences in the amino acid composition of sets of conserved residues compared to random subsets (hydrophobic residues tend to be close together in the protein core), or differences in sequence separation of the residues in the different sets. In order to overcome these limits, we compare the degree of clustering of the conserved positions on the native structure and on alternative conformations generated by the de novo structure prediction method Rosetta. For 65% of the 79 proteins, the conserved residues are significantly more clustered in the native structure than in the alternative conformations, indicating that the clustering of conserved residues in protein structures goes beyond that expected purely from sequence locality and composition effects. The differences in the spatial distribution of conserved residues can be utilized in de novo protein structure prediction: We find that for 79% of the proteins, selection of the Rosetta generated conformations with the greatest clustering of the conserved residues significantly enriches the fraction of close-to-native structures.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jeffrey J Gray, Stewart Moughon, Chu Wang, Ora Schueler-Furman, Brian Kuhlman, Carol A Rohl, David Baker
Protein-protein docking with simultaneous optimization of rigid-body displacement and side-chain conformations Journal Article
In: Journal of molecular biology, vol. 331, pp. 281-99, 2003, ISSN: 0022-2836.
@article{85,
title = {Protein-protein docking with simultaneous optimization of rigid-body displacement and side-chain conformations},
author = { Jeffrey J Gray and Stewart Moughon and Chu Wang and Ora Schueler-Furman and Brian Kuhlman and Carol A Rohl and David Baker},
issn = {0022-2836},
year = {2003},
date = {2003-08-01},
journal = {Journal of molecular biology},
volume = {331},
pages = {281-99},
abstract = {Protein-protein docking algorithms provide a means to elucidate structural details for presently unknown complexes. Here, we present and evaluate a new method to predict protein-protein complexes from the coordinates of the unbound monomer components. The method employs a low-resolution, rigid-body, Monte Carlo search followed by simultaneous optimization of backbone displacement and side-chain conformations using Monte Carlo minimization. Up to 10(5) independent simulations are carried out, and the resulting "decoys" are ranked using an energy function dominated by van der Waals interactions, an implicit solvation model, and an orientation-dependent hydrogen bonding potential. Top-ranking decoys are clustered to select the final predictions. Small-perturbation studies reveal the formation of binding funnels in 42 of 54 cases using coordinates derived from the bound complexes and in 32 of 54 cases using independently determined coordinates of one or both monomers. Experimental binding affinities correlate with the calculated score function and explain the predictive success or failure of many targets. Global searches using one or both unbound components predict at least 25% of the native residue-residue contacts in 28 of the 32 cases where binding funnels exist. The results suggest that the method may soon be useful for generating models of biologically important complexes from the structures of the isolated components, but they also highlight the challenges that must be met to achieve consistent and accurate prediction of protein-protein interactions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jeffrey J Gray, Stewart E Moughon, Tanja Kortemme, Ora Schueler-Furman, Kira M S Misura, Alexandre V Morozov, David Baker
Protein-protein docking predictions for the CAPRI experiment Journal Article
In: Proteins, vol. 52, pp. 118-22, 2003, ISSN: 1097-0134.
@article{86,
title = {Protein-protein docking predictions for the CAPRI experiment},
author = { Jeffrey J Gray and Stewart E Moughon and Tanja Kortemme and Ora Schueler-Furman and Kira M S Misura and Alexandre V Morozov and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/gray03B.pdf},
issn = {1097-0134},
year = {2003},
date = {2003-07-01},
journal = {Proteins},
volume = {52},
pages = {118-22},
abstract = {We predicted structures for all seven targets in the CAPRI experiment using a new method in development at the time of the challenge. The technique includes a low-resolution rigid body Monte Carlo search followed by high-resolution refinement with side-chain conformational changes and rigid body minimization. Decoys (approximately 10(6) per target) were discriminated using a scoring function including van der Waals and solvation interactions, hydrogen bonding, residue-residue pair statistics, and rotamer probabilities. Decoys were ranked, clustered, manually inspected, and selected. The top ranked model for target 6 predicted the experimental structure to 1.5 A RMSD and included 48 of 65 correct residue-residue contacts. Target 7 was predicted at 5.3 A RMSD with 22 of 37 correct residue-residue contacts using a homology model from a known complex structure. Using a preliminary version of the protocol in round 1, target 1 was predicted within 8.8 A although few contacts were correct. For targets 2 and 3, the interface locations and a small fraction of the contacts were correctly identified.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Tanja Kortemme, Alexandre V Morozov, David Baker
An orientation-dependent hydrogen bonding potential improves prediction of specificity and structure for proteins and protein-protein complexes Journal Article
In: Journal of molecular biology, vol. 326, pp. 1239-59, 2003, ISSN: 0022-2836.
@article{83,
title = {An orientation-dependent hydrogen bonding potential improves prediction of specificity and structure for proteins and protein-protein complexes},
author = { Tanja Kortemme and Alexandre V Morozov and David Baker},
issn = {0022-2836},
year = {2003},
date = {2003-02-01},
journal = {Journal of molecular biology},
volume = {326},
pages = {1239-59},
abstract = {Hydrogen bonding is a key contributor to the specificity of intramolecular and intermolecular interactions in biological systems. Here, we develop an orientation-dependent hydrogen bonding potential based on the geometric characteristics of hydrogen bonds in high-resolution protein crystal structures, and evaluate it using four tests related to the prediction and design of protein structures and protein-protein complexes. The new potential is superior to the widely used Coulomb model of hydrogen bonding in prediction of the sequences of proteins and protein-protein interfaces from their structures, and improves discrimination of correctly docked protein-protein complexes from large sets of alternative structures.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Dylan Chivian, David E Kim, Lars Malmstr"om, Philip Bradley, Timothy Robertson, Paul Murphy, Charles E M Strauss, Richard Bonneau, Carol A Rohl, David Baker
Automated prediction of CASP-5 structures using the Robetta server Journal Article
In: Proteins, vol. 53 Suppl 6, pp. 524-33, 2003, ISSN: 1097-0134.
@article{88,
title = {Automated prediction of CASP-5 structures using the Robetta server},
author = { Dylan Chivian and David E Kim and Lars Malmstr"om and Philip Bradley and Timothy Robertson and Paul Murphy and Charles E M Strauss and Richard Bonneau and Carol A Rohl and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/chivian03A.pdf},
issn = {1097-0134},
year = {2003},
date = {2003-00-01},
journal = {Proteins},
volume = {53 Suppl 6},
pages = {524-33},
abstract = {Robetta is a fully automated protein structure prediction server that uses the Rosetta fragment-insertion method. It combines template-based and de novo structure prediction methods in an attempt to produce high quality models that cover every residue of a submitted sequence. The first step in the procedure is the automatic detection of the locations of domains and selection of the appropriate modeling protocol for each domain. For domains matched to a homolog with an experimentally characterized structure by PSI-BLAST or Pcons2, Robetta uses a new alignment method, called K*Sync, to align the query sequence onto the parent structure. It then models the variable regions by allowing them to explore conformational space with fragments in fashion similar to the de novo protocol, but in the context of the template. When no structural homolog is available, domains are modeled with the Rosetta de novo protocol, which allows the full length of the domain to explore conformational space via fragment-insertion, producing a large decoy ensemble from which the final models are selected. The Robetta server produced quite reasonable predictions for targets in the recent CASP-5 and CAFASP-3 experiments, some of which were at the level of the best human predictions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Philip Bradley, Dylan Chivian, Jens Meiler, Kira M S Misura, Carol A Rohl, William R Schief, William J Wedemeyer, Ora Schueler-Furman, Paul Murphy, Jack Schonbrun, Charles E M Strauss, David Baker
Rosetta predictions in CASP5: successes, failures, and prospects for complete automation Journal Article
In: Proteins, vol. 53 Suppl 6, pp. 457-68, 2003, ISSN: 1097-0134.
@article{90,
title = {Rosetta predictions in CASP5: successes, failures, and prospects for complete automation},
author = { Philip Bradley and Dylan Chivian and Jens Meiler and Kira M S Misura and Carol A Rohl and William R Schief and William J Wedemeyer and Ora Schueler-Furman and Paul Murphy and Jack Schonbrun and Charles E M Strauss and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/bradley03A.pdf},
issn = {1097-0134},
year = {2003},
date = {2003-00-01},
journal = {Proteins},
volume = {53 Suppl 6},
pages = {457-68},
abstract = {We describe predictions of the structures of CASP5 targets using Rosetta. The Rosetta fragment insertion protocol was used to generate models for entire target domains without detectable sequence similarity to a protein of known structure and to build long loop insertions (and N-and C-terminal extensions) in cases where a structural template was available. Encouraging results were obtained both for the de novo predictions and for the long loop insertions; we describe here the successes as well as the failures in the context of current efforts to improve the Rosetta method. In particular, de novo predictions failed for large proteins that were incorrectly parsed into domains and for topologically complex (high contact order) proteins with swapping of segments between domains. However, for the remaining targets, at least one of the five submitted models had a long fragment with significant similarity to the native structure. A fully automated version of the CASP5 protocol produced results that were comparable to the human-assisted predictions for most of the targets, suggesting that automated genomic-scale, de novo protein structure prediction may soon be worthwhile. For the three targets where the human-assisted predictions were significantly closer to the native structure, we identify the steps that remain to be automated.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2002
Richard Bonneau, Charlie E M Strauss, Carol A Rohl, Dylan Chivian, Phillip Bradley, Lars Malmstr"om, Tim Robertson, David Baker
De novo prediction of three-dimensional structures for major protein families Journal Article
In: Journal of molecular biology, vol. 322, pp. 65-78, 2002, ISSN: 0022-2836.
@article{184,
title = {De novo prediction of three-dimensional structures for major protein families},
author = { Richard Bonneau and Charlie E M Strauss and Carol A Rohl and Dylan Chivian and Phillip Bradley and Lars Malmstr"om and Tim Robertson and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/bonneau02B.pdf},
issn = {0022-2836},
year = {2002},
date = {2002-09-01},
journal = {Journal of molecular biology},
volume = {322},
pages = {65-78},
abstract = {We use the Rosetta de novo structure prediction method to produce three-dimensional structure models for all Pfam-A sequence families with average length under 150 residues and no link to any protein of known structure. To estimate the reliability of the predictions, the method was calibrated on 131 proteins of known structure. For approximately 60% of the proteins one of the top five models was correctly predicted for 50 or more residues, and for approximately 35%, the correct SCOP superfamily was identified in a structure-based search of the Protein Data Bank using one of the models. This performance is consistent with results from the fourth critical assessment of structure prediction (CASP4). Correct and incorrect predictions could be partially distinguished using a confidence function based on a combination of simulation convergence, protein length and the similarity of a given structure prediction to known protein structures. While the limited accuracy and reliability of the method precludes definitive conclusions, the Pfam models provide the only tertiary structure information available for the 12% of publicly available sequences represented by these large protein families.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Christopher T Saunders, David Baker
Evaluation of structural and evolutionary contributions to deleterious mutation prediction Journal Article
In: Journal of molecular biology, vol. 322, pp. 891-901, 2002, ISSN: 0022-2836.
@article{233,
title = {Evaluation of structural and evolutionary contributions to deleterious mutation prediction},
author = { Christopher T Saunders and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/saunders02A.pdf},
issn = {0022-2836},
year = {2002},
date = {2002-09-01},
journal = {Journal of molecular biology},
volume = {322},
pages = {891-901},
abstract = {Methods for automated prediction of deleterious protein mutations have utilized both structural and evolutionary information but the relative contribution of these two factors remains unclear. To address this, we have used a variety of structural and evolutionary features to create simple deleterious mutation models that have been tested on both experimental mutagenesis and human allele data. We find that the most accurate predictions are obtained using a solvent-accessibility term, the C(beta) density, and a score derived from homologous sequences, SIFT. A classification tree using these two features has a cross-validated prediction error of 20.5% on an experimental mutagenesis test set when the prior probability for deleterious and neutral cases is equal, whereas this prediction error is 28.8% and 22.2% using either the C(beta) density or SIFT alone. The improvement imparted by structure increases when fewer homologs are available: when restricted to three homologs the prediction error improves from 26.9% using SIFT alone to 22.4% using SIFT and the C(beta) density, or 24.8% using SIFT and a noisy C(beta) density term approximating the inaccuracy of ab initio structures modeled by the Rosetta method. We conclude that methods for deleterious mutation prediction should include structural information when fewer than five to ten homologs are available, and that ab initio predicted structures may soon be useful in such cases when high-resolution structures are unavailable.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Eric Alm, Alexandre V Morozov, Tanja Kortemme, David Baker
Simple physical models connect theory and experiment in protein folding kinetics Journal Article
In: Journal of molecular biology, vol. 322, pp. 463-76, 2002, ISSN: 0022-2836.
@article{182,
title = {Simple physical models connect theory and experiment in protein folding kinetics},
author = { Eric Alm and Alexandre V Morozov and Tanja Kortemme and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/alm02A.pdf},
issn = {0022-2836},
year = {2002},
date = {2002-09-01},
journal = {Journal of molecular biology},
volume = {322},
pages = {463-76},
abstract = {Our understanding of the principles underlying the protein-folding problem can be tested by developing and characterizing simple models that make predictions which can be compared to experimental data. Here we extend our earlier model of folding free energy landscapes, in which each residue is considered to be either folded as in the native state or completely disordered, by investigating the role of additional factors representing hydrogen bonding and backbone torsion strain, and by using a hybrid between the master equation approach and the simple transition state theory to evaluate kinetics near the free energy barrier in greater detail. Model calculations of folding phi-values are compared to experimental data for 19 proteins, and for more than half of these, experimental data are reproduced with correlation coefficients between r=0.41 and 0.88; calculations of transition state free energy barriers correlate with rates measured for 37 single domain proteins (r=0.69). The model provides insight into the contribution of alternative-folding pathways, the validity of quasi-equilibrium treatments of the folding landscape, and the magnitude of the Arrhenius prefactor for protein folding. Finally, we discuss the limitations of simple native-state-based models, and as a more general test of such models, provide predictions of folding rates and mechanisms for a comprehensive set of over 400 small protein domains of known structure.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Richard Bonneau, Ingo Ruczinski, Jerry Tsai, David Baker
Contact order and ab initio protein structure prediction Journal Article
In: Protein science, vol. 11, pp. 1937-44, 2002, ISSN: 0961-8368.
@article{183,
title = {Contact order and ab initio protein structure prediction},
author = { Richard Bonneau and Ingo Ruczinski and Jerry Tsai and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/bonneau02A.pdf},
issn = {0961-8368},
year = {2002},
date = {2002-08-01},
journal = {Protein science},
volume = {11},
pages = {1937-44},
abstract = {Although much of the motivation for experimental studies of protein folding is to obtain insights for improving protein structure prediction, there has been relatively little connection between experimental protein folding studies and computational structural prediction work in recent years. In the present study, we show that the relationship between protein folding rates and the contact order (CO) of the native structure has implications for ab initio protein structure prediction. Rosetta ab initio folding simulations produce a dearth of high CO structures and an excess of low CO structures, as expected if the computer simulations mimic to some extent the actual folding process. Consistent with this, the majority of failures in ab initio prediction in the CASP4 (critical assessment of structure prediction) experiment involved high CO structures likely to fold much more slowly than the lower CO structures for which reasonable predictions were made. This bias against high CO structures can be partially alleviated by performing large numbers of additional simulations, selecting out the higher CO structures, and eliminating the very low CO structures; this leads to a modest improvement in prediction quality. More significant improvements in predictions for proteins with complex topologies may be possible following significant increases in high-performance computing power, which will be required for thoroughly sampling high CO conformations (high CO proteins can take six orders of magnitude longer to fold than low CO proteins). Importantly for such a strategy, simulations performed for high CO structures converge much less strongly than those for low CO structures, and hence, lack of simulation convergence can indicate the need for improved sampling of high CO conformations. The parallels between Rosetta simulations and folding in vivo may extend to misfolding: The very low CO structures that accumulate in Rosetta simulations consist primarily of local up-down beta-sheets that may resemble precursors to amyloid formation.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ingo Ruczinski, Charles Kooperberg, Richard Bonneau, David Baker
Distributions of beta sheets in proteins with application to structure prediction Journal Article
In: Proteins, vol. 48, pp. 85-97, 2002, ISSN: 1097-0134.
@article{191,
title = {Distributions of beta sheets in proteins with application to structure prediction},
author = { Ingo Ruczinski and Charles Kooperberg and Richard Bonneau and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/ruczinski02A.pdf},
issn = {1097-0134},
year = {2002},
date = {2002-07-01},
journal = {Proteins},
volume = {48},
pages = {85-97},
abstract = {We recently developed the Rosetta algorithm for ab initio protein structure prediction, which generates protein structures from fragment libraries using simulated annealing. The scoring function in this algorithm favors the assembly of strands into sheets. However, it does not discriminate between different sheet motifs. After generating many structures using Rosetta, we found that the folding algorithm predominantly generates very local structures. We surveyed the distribution of beta-sheet motifs with two edge strands (open sheets) in a large set of non-homologous proteins. We investigated how much of that distribution can be accounted for by rules previously published in the literature, and developed a filter and a scoring method that enables us to improve protein structure prediction for beta-sheet proteins. Proteins 2002;48:85-97.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jack Schonbrun, William J Wedemeyer, David Baker
Protein structure prediction in 2002. Journal Article
In: Current opinion in structural biology, vol. 12, pp. 348-54, 2002, ISSN: 0959-440X.
@article{234,
title = {Protein structure prediction in 2002.},
author = { Jack Schonbrun and William J Wedemeyer and David Baker},
issn = {0959-440X},
year = {2002},
date = {2002-06-01},
journal = {Current opinion in structural biology},
volume = {12},
pages = {348-54},
abstract = {Central issues concerning protein structure prediction have been highlighted by the recently published summary of the fourth community-wide protein structure prediction experiment (CASP4). Although sequence/structure alignment remains the bottleneck in comparative modeling, there has been substantial progress in fully automated remote homolog detection and in de novo structure prediction. Significant further progress will probably require improvements in high-resolution modeling.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Carol A Rohl, David Baker
De novo determination of protein backbone structure from residual dipolar couplings using Rosetta Journal Article
In: Journal of the American Chemical Society, vol. 124, pp. 2723-9, 2002, ISSN: 0002-7863.
@article{190,
title = {De novo determination of protein backbone structure from residual dipolar couplings using Rosetta},
author = { Carol A Rohl and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/rohl02A.pdf},
issn = {0002-7863},
year = {2002},
date = {2002-03-01},
journal = {Journal of the American Chemical Society},
volume = {124},
pages = {2723-9},
abstract = {As genome-sequencing projects rapidly increase the database of protein sequences, the gap between known sequences and known structures continues to grow exponentially, increasing the demand to accelerate structure determination methods. Residual dipolar couplings (RDCs) are an attractive source of experimental restraints for NMR structure determination, particularly rapid, high-throughput methods, because they yield both local and long-range orientational information and can be easily measured and assigned once the backbone resonances of a protein have been assigned. While very extensive RDC data sets have been used to determine the structure of ubiquitin, it is unclear to what extent such methods will generalize to larger proteins with less complete data sets. Here we incorporate experimental RDC restraints into Rosetta, an ab initio structure prediction method, and demonstrate that the combined algorithm provides a general method for de novo determination of a variety of protein folds from RDC data. Backbone structures for multiple proteins up to approximately 125 residues in length and spanning a range of topological complexities are rapidly and reproducibly generated using data sets that are insufficient in isolation to uniquely determine the protein fold de novo, although ambiguities and errors are observed for proteins with symmetry about an axis of the alignment tensor. The models generated are not high-resolution structures completely defined by experimental data but are sufficiently accurate to accelerate traditional high-resolution NMR structure determination and provide structure-based functional insights.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2001
David Baker, A Sali
Protein structure prediction and structural genomics. Journal Article
In: Science (New York, N.Y.), vol. 294, pp. 93-6, 2001, ISSN: 0036-8075.
@article{71,
title = {Protein structure prediction and structural genomics.},
author = { David Baker and A Sali},
issn = {0036-8075},
year = {2001},
date = {2001-10-01},
journal = {Science (New York, N.Y.)},
volume = {294},
pages = {93-6},
abstract = {Genome sequencing projects are producing linear amino acid sequences, but full understanding of the biological role of these proteins will require knowledge of their structure and function. Although experimental structure determination methods are providing high-resolution structure information about a subset of the proteins, computational structure prediction methods will provide valuable information for the large fraction of sequences whose structures will not be determined experimentally. The first class of protein structure prediction methods, including threading and comparative modeling, rely on detectable similarity spanning most of the modeled sequence and at least one known structure. The second class of methods, de novo or ab initio methods, predict the structure from sequence alone, without relying on similarity at the fold level between the modeled sequence and any of the known structures. In this Viewpoint, we begin by describing the essential features of the methods, the accuracy of the models, and their application to the prediction and understanding of protein function, both for single proteins and on the scale of whole genomes. We then discuss the important role that protein structure prediction methods play in the growing worldwide effort in structural genomics.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
M R Lee, J Tsai, David Baker, P A Kollman
Molecular dynamics in the endgame of protein structure prediction Journal Article
In: Journal of molecular biology, vol. 313, pp. 417-30, 2001, ISSN: 0022-2836.
@article{62,
title = {Molecular dynamics in the endgame of protein structure prediction},
author = { M R Lee and J Tsai and David Baker and P A Kollman},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/lee01B.pdf},
issn = {0022-2836},
year = {2001},
date = {2001-10-01},
journal = {Journal of molecular biology},
volume = {313},
pages = {417-30},
abstract = {In order adequately to sample conformational space, methods for protein structure prediction make necessary simplifications that also prevent them from being as accurate as desired. Thus, the idea of feeding them, hierarchically, into a more accurate method that samples less effectively was introduced a decade ago but has not met with more than limited success in a few isolated instances. Ideally, the final stages should be able to identify the native state, show a good correlation with native similarity in order to add value to the selection process, and refine the structures even further. In this work, we explore the possibility of using state-of-the-art explicit solvent molecular dynamics and implicit solvent free energy calculations to accomplish all three of those objectives on 12 small, single-domain proteins, four each of alpha, beta and mixed topologies. We find that this approach is very successful in ranking the native and also enhances the structure selection of predictions generated from the Rosetta method.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
R Bonneau, J Tsai, I Ruczinski, David Baker
Functional inferences from blind ab initio protein structure predictions Journal Article
In: Journal of structural biology, vol. 134, pp. 186-90, 2001, ISSN: 1047-8477.
@article{67,
title = {Functional inferences from blind ab initio protein structure predictions},
author = { R Bonneau and J Tsai and I Ruczinski and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/bonneau01B.pdf},
issn = {1047-8477},
year = {2001},
date = {2001-05-01},
journal = {Journal of structural biology},
volume = {134},
pages = {186-90},
abstract = {Ab initio protein structure prediction methods have improved dramatically in the past several years. Because these methods require only the sequence of the protein of interest, they are potentially applicable to the open reading frames in the many organisms whose sequences have been and will be determined. Ab initio methods cannot currently produce models of high enough resolution for use in rational drug design, but there is an exciting potential for using the methods for functional annotation of protein sequences on a genomic scale. Here we illustrate how functional insights can be obtained from low-resolution predicted structures using examples from blind ab initio structure predictions from the third and fourth critical assessment of structure prediction (CASP3, CASP4) experiments.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
R Bonneau, C E Strauss, David Baker
Improving the performance of Rosetta using multiple sequence alignment information and global measures of hydrophobic core formation Journal Article
In: Proteins, vol. 43, pp. 1-11, 2001, ISSN: 0887-3585.
@article{70,
title = {Improving the performance of Rosetta using multiple sequence alignment information and global measures of hydrophobic core formation},
author = { R Bonneau and C E Strauss and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/bonneau01A.pdf},
issn = {0887-3585},
year = {2001},
date = {2001-04-01},
journal = {Proteins},
volume = {43},
pages = {1-11},
abstract = {This study explores the use of multiple sequence alignment (MSA) information and global measures of hydrophobic core formation for improving the Rosetta ab initio protein structure prediction method. The most effective use of the MSA information is achieved by carrying out independent folding simulations for a subset of the homologous sequences in the MSA and then identifying the free energy minima common to all folded sequences via simultaneous clustering of the independent folding runs. Global measures of hydrophobic core formation, using ellipsoidal rather than spherical representations of the hydrophobic core, are found to be useful in removing non-native conformations before cluster analysis. Through this combination of MSA information and global measures of protein core formation, we significantly increase the performance of Rosetta on a challenging test set. Proteins 2001;43:1-11.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
K T Simons, C Strauss, David Baker
Prospects for ab initio protein structural genomics Journal Article
In: Journal of molecular biology, vol. 306, pp. 1191-9, 2001, ISSN: 0022-2836.
@article{56,
title = {Prospects for ab initio protein structural genomics},
author = { K T Simons and C Strauss and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/simons01A.pdf},
issn = {0022-2836},
year = {2001},
date = {2001-03-01},
journal = {Journal of molecular biology},
volume = {306},
pages = {1191-9},
abstract = {We present the results of a large-scale testing of the ROSETTA method for ab initio protein structure prediction. Models were generated for two independently generated lists of small proteins (up to 150 amino acid residues), and the results were evaluated using traditional rmsd based measures and a novel measure based on the structure-based comparison of the models to the structures in the PDB using DALI. For 111 of 136 all alpha and alpha/beta proteins 50 to 150 residues in length, the method produced at least one model within 7 A rmsd of the native structure in 1000 attempts. For 60 of these proteins, the closest structure match in the PDB to at least one of the ten most frequently generated conformations was found to be structurally related (four standard deviations above background) to the native protein. These results suggest that ab initio structure prediction approaches may soon be useful for generating low resolution models and identifying distantly related proteins with similar structures and perhaps functions for these classes of proteins on the genome scale.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
M R Lee, David Baker, P A Kollman
2.1 and 1.8 A average C(alpha) RMSD structure predictions on two small proteins, HP-36 and s15 Journal Article
In: Journal of the American Chemical Society, vol. 123, pp. 1040-6, 2001, ISSN: 0002-7863.
@article{61,
title = {2.1 and 1.8 A average C(alpha) RMSD structure predictions on two small proteins, HP-36 and s15},
author = { M R Lee and David Baker and P A Kollman},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/lee01A.pdf},
issn = {0002-7863},
year = {2001},
date = {2001-02-01},
journal = {Journal of the American Chemical Society},
volume = {123},
pages = {1040-6},
abstract = {On two different small proteins, the 36-mer villin headpiece domain (HP-36) and the 65-mer structured region of ribosomal protein (S15), several model predictions from the ab initio approach Rosetta were subjected to molecular dynamics simulations for refinement. After clustering the resulting trajectories into conformational families, the average molecular mechanics--Poisson Boltzmann/surface area (MM-PBSA) free energies and alpha carbon (C(alpha)) RMSDs were then calculated for each family. Those conformational families with the lowest average free energies also contained the best C(alpha) RMSD structures (1.4 A for S15 and HP-36 core) and the lowest average C(alpha) RMSDs (1.8 A for S15, 2.1 A for HP-36 core). For comparison, control simulations starting with the two experimental structures were very stable, each consisting of a single conformational family, with an average C(alpha) RMSD of 1.3 A for S15 and 1.2 A for HP-36 core (1.9 A over all residues). In addition, the average free energiestextquoteright ranks (Spearman rank, r(s)) correlate well with the average C(alpha) RMSDs (r(s) = 0.77 for HP-36, r(s) = 0.83 for S15). Molecular dynamics simulations combined with the MM--PBSA free energy function provide a potentially powerful tool for the protein structure prediction community in allowing for both high-resolution structural refinement and accurate ranking of model predictions. With all of the information that genomics is now providing, this methodology may allow for advances in going from sequence to structure.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
R Bonneau, David Baker
Ab initio protein structure prediction: progress and prospects. Journal Article
In: Annual review of biophysics and biomolecular structure, vol. 30, pp. 173-89, 2001, ISSN: 1056-8700.
@article{69,
title = {Ab initio protein structure prediction: progress and prospects.},
author = { R Bonneau and David Baker},
issn = {1056-8700},
year = {2001},
date = {2001-00-01},
journal = {Annual review of biophysics and biomolecular structure},
volume = {30},
pages = {173-89},
abstract = {Considerable recent progress has been made in the field of ab initio protein structure prediction, as witnessed by the third Critical Assessment of Structure Prediction (CASP3). In spite of this progress, much work remains, for the field has yet to produce consistently reliable ab initio structure prediction protocols. In this work, we review the features of current ab initio protocols in an attempt to highlight the foundations of recent progress in the field and suggest promising directions for future work.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
R Bonneau, J Tsai, I Ruczinski, D Chivian, C Rohl, C E Strauss, David Baker
Rosetta in CASP4: progress in ab initio protein structure prediction Journal Article
In: Proteins, vol. Suppl 5, pp. 119-26, 2001, ISSN: 0887-3585.
@article{68,
title = {Rosetta in CASP4: progress in ab initio protein structure prediction},
author = { R Bonneau and J Tsai and I Ruczinski and D Chivian and C Rohl and C E Strauss and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/bonneau01C.pdf},
issn = {0887-3585},
year = {2001},
date = {2001-00-01},
journal = {Proteins},
volume = {Suppl 5},
pages = {119-26},
abstract = {Rosetta ab initio protein structure predictions in CASP4 were considerably more consistent and more accurate than previous ab initio structure predictions. Large segments were correctly predicted (>50 residues superimposed within an RMSD of 6.5 A) for 16 of the 21 domains under 300 residues for which models were submitted. Models with the global fold largely correct were produced for several targets with new folds, and for several difficult fold recognition targets, the Rosetta models were more accurate than those produced with traditional fold recognition models. These promising results suggest that Rosetta may soon be able to contribute to the interpretation of genome sequence information.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2000
P M Bowers, C E Strauss, David Baker
De novo protein structure determination using sparse NMR data. Journal Article
In: Journal of biomolecular NMR, vol. 18, pp. 311-8, 2000, ISSN: 0925-2738.
@article{193,
title = {De novo protein structure determination using sparse NMR data.},
author = { P M Bowers and C E Strauss and David Baker},
issn = {0925-2738},
year = {2000},
date = {2000-12-01},
journal = {Journal of biomolecular NMR},
volume = {18},
pages = {311-8},
abstract = {We describe a method for generating moderate to high-resolution protein structures using limited NMR data combined with the ab initio protein structure prediction method Rosetta. Peptide fragments are selected from proteins of known structure based on sequence similarity and consistency with chemical shift and NOE data. Models are built from these fragments by minimizing an energy function that favors hydrophobic burial, strand pairing, and satisfaction of NOE constraints. Models generated using this procedure with approximately 1 NOE constraint per residue are in some cases closer to the corresponding X-ray structures than the published NMR solution structures. The method requires only the sparse constraints available during initial stages of NMR structure determination, and thus holds promise for increasing the speed with which protein solution structures can be determined.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
C Bystroff, V Thorsson, David Baker
HMMSTR: a hidden Markov model for local sequence-structure correlations in proteins Journal Article
In: Journal of molecular biology, vol. 301, pp. 173-90, 2000, ISSN: 0022-2836.
@article{194,
title = {HMMSTR: a hidden Markov model for local sequence-structure correlations in proteins},
author = { C Bystroff and V Thorsson and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/bystroff00A.pdf},
issn = {0022-2836},
year = {2000},
date = {2000-08-01},
journal = {Journal of molecular biology},
volume = {301},
pages = {173-90},
abstract = {We describe a hidden Markov model, HMMSTR, for general protein sequence based on the I-sites library of sequence-structure motifs. Unlike the linear hidden Markov models used to model individual protein families, HMMSTR has a highly branched topology and captures recurrent local features of protein sequences and structures that transcend protein family boundaries. The model extends the I-sites library by describing the adjacencies of different sequence-structure motifs as observed in the protein database and, by representing overlapping motifs in a much more compact form, achieves a great reduction in parameters. The HMM attributes a considerably higher probability to coding sequence than does an equivalent dipeptide model, predicts secondary structure with an accuracy of 74.3 %, backbone torsion angles better than any previously reported method and the structural context of beta strands and turns with an accuracy that should be useful for tertiary structure prediction.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
David Baker
A surprising simplicity to protein folding Journal Article
In: Nature, vol. 405, pp. 39-42, 2000, ISSN: 0028-0836.
@article{192,
title = {A surprising simplicity to protein folding},
author = { David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/baker00A.pdf},
issn = {0028-0836},
year = {2000},
date = {2000-05-01},
journal = {Nature},
volume = {405},
pages = {39-42},
abstract = {The polypeptide chains that make up proteins have thousands of atoms and hence millions of possible inter-atomic interactions. It might be supposed that the resulting complexity would make prediction of protein structure and protein-folding mechanisms nearly impossible. But the fundamental physics underlying folding may be much simpler than this complexity would lead us to expect folding rates and mechanisms appear to be largely determined by the topology of the native (folded) state, and new methods have shown great promise in predicting protein-folding mechanisms and the three-dimensional structures of proteins.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
K W Plaxco, S Larson, I Ruczinski, D S Riddle, E C Thayer, B Buchwitz, A R Davidson, David Baker
Evolutionary conservation in protein folding kinetics Journal Article
In: Journal of molecular biology, vol. 298, pp. 303-12, 2000, ISSN: 0022-2836.
@article{200,
title = {Evolutionary conservation in protein folding kinetics},
author = { K W Plaxco and S Larson and I Ruczinski and D S Riddle and E C Thayer and B Buchwitz and A R Davidson and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/plaxco00a.pdf},
issn = {0022-2836},
year = {2000},
date = {2000-04-01},
journal = {Journal of molecular biology},
volume = {298},
pages = {303-12},
abstract = {The sequence and structural conservation of folding transition states have been predicted on theoretical grounds. Using homologous sequence alignments of proteins previously characterized via coupled mutagenesis/kinetics studies, we tested these predictions experimentally. Only one of the six appropriately characterized proteins exhibits a statistically significant correlation between residuestextquoteright roles in transition state structure and their evolutionary conservation. However, a significant correlation is observed between the contributions of individual sequence positions to the transition state structure across a set of homologous proteins. Thus the structure of the folding transition state ensemble appears to be more highly conserved than the specific interactions that stabilize it.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
1999
E Alm, David Baker
Prediction of protein-folding mechanisms from free-energy landscapes derived from native structures Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 96, pp. 11305-10, 1999, ISSN: 0027-8424.
@article{48,
title = {Prediction of protein-folding mechanisms from free-energy landscapes derived from native structures},
author = { E Alm and David Baker},
issn = {0027-8424},
year = {1999},
date = {1999-09-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {96},
pages = {11305-10},
abstract = {Guided by recent experimental results suggesting that protein-folding rates and mechanisms are determined largely by native-state topology, we develop a simple model for protein folding free-energy landscapes based on native-state structures. The configurations considered by the model contain one or two contiguous stretches of residues ordered as in the native structure with all other residues completely disordered; the free energy of each configuration is the difference between the entropic cost of ordering the residues, which depends on the total number of residues ordered and the length of the loop between the two ordered segments, and the favorable attractive interactions, which are taken to be proportional to the total surface area buried by the ordered residues in the native structure. Folding kinetics are modeled by allowing only one residue to become ordered/disordered at a time, and a rigorous and exact method is used to identify free-energy maxima on the lowest free-energy paths connecting the fully disordered and fully ordered configurations. The distribution of structure in these free-energy maxima, which comprise the transition-state ensemble in the model, are reasonably consistent with experimental data on the folding transition state for five of seven proteins studied. Thus, the model appears to capture, at least in part, the basic physics underlying protein folding and the aspects of native-state topology that determine protein-folding mechanisms.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
K T Simons, I Ruczinski, C Kooperberg, B A Fox, C Bystroff, D Baker
Improved recognition of native-like protein structures using a combination of sequence-dependent and sequence-independent features of proteins Journal Article
In: Proteins, vol. 34, pp. 82-95, 1999, ISSN: 0887-3585.
@article{322,
title = {Improved recognition of native-like protein structures using a combination of sequence-dependent and sequence-independent features of proteins},
author = { K T Simons and I Ruczinski and C Kooperberg and B A Fox and C Bystroff and D Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/simons98A.pdf},
issn = {0887-3585},
year = {1999},
date = {1999-01-01},
journal = {Proteins},
volume = {34},
pages = {82-95},
abstract = {We describe the development of a scoring function based on the decomposition P(structure/sequence) proportional to P(sequence/structure) *P(structure), which outperforms previous scoring functions in correctly identifying native-like protein structures in large ensembles of compact decoys. The first term captures sequence-dependent features of protein structures, such as the burial of hydrophobic residues in the core, the second term, universal sequence-independent features, such as the assembly of beta-strands into beta-sheets. The efficacies of a wide variety of sequence-dependent and sequence-independent features of protein structures for recognizing native-like structures were systematically evaluated using ensembles of approximately 30,000 compact conformations with fixed secondary structure for each of 17 small protein domains. The best results were obtained using a core scoring function with P(sequence/structure) parameterized similarly to our previous work (Simons et al., J Mol Biol 1997;268:209-225] and P(structure) focused on secondary structure packing preferences; while several additional features had some discriminatory power on their own, they did not provide any additional discriminatory power when combined with the core scoring function. Our results, on both the training set and the independent decoy set of Park and Levitt (J Mol Biol 1996;258:367-392), suggest that this scoring function should contribute to the prediction of tertiary structure from knowledge of sequence and secondary structure.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
K T Simons, R Bonneau, I Ruczinski, David Baker
Ab initio protein structure prediction of CASP III targets using ROSETTA Journal Article
In: Proteins, vol. Suppl 3, pp. 171-6, 1999, ISSN: 0887-3585.
@article{41,
title = {Ab initio protein structure prediction of CASP III targets using ROSETTA},
author = { K T Simons and R Bonneau and I Ruczinski and David Baker},
issn = {0887-3585},
year = {1999},
date = {1999-00-01},
journal = {Proteins},
volume = {Suppl 3},
pages = {171-6},
abstract = {To generate structures consistent with both the local and nonlocal interactions responsible for protein stability, 3 and 9 residue fragments of known structures with local sequences similar to the target sequence were assembled into complete tertiary structures using a Monte Carlo simulated annealing procedure (Simons et al., J Mol Biol 1997; 268:209-225). The scoring function used in the simulated annealing procedure consists of sequence-dependent terms representing hydrophobic burial and specific pair interactions such as electrostatics and disulfide bonding and sequence-independent terms representing hard sphere packing, alpha-helix and beta-strand packing, and the collection of beta-strands in beta-sheets (Simons et al., Proteins 1999;34:82-95). For each of 21 small, ab initio targets, 1,200 final structures were constructed, each the result of 100,000 attempted fragment substitutions. The five structures submitted for the CASP III experiment were chosen from the approximately 25 structures with the lowest scores in the broadest minima (assessed through the number of structural neighbors; Shortle et al., Proc Natl Acad Sci USA 1998;95:1158-1162). The results were encouraging: highlights of the predictions include a 99-residue segment for MarA with an rmsd of 6.4 A to the native structure, a 95-residue (full length) prediction for the EH2 domain of EPS15 with an rmsd of 6.0 A, a 75-residue segment of DNAB helicase with an rmsd of 4.7 A, and a 67-residue segment of ribosomal protein L30 with an rmsd of 3.8 A. These results suggest that ab initio methods may soon become useful for low-resolution structure prediction for proteins that lack a close homologue of known structure.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
1998
David E Kim, Q Yi, S T Gladwin, J M Goldberg, David Baker
The single helix in protein L is largely disrupted at the rate-limiting step in folding Journal Article
In: Journal of molecular biology, vol. 284, pp. 807-15, 1998, ISSN: 0022-2836.
@article{209,
title = {The single helix in protein L is largely disrupted at the rate-limiting step in folding},
author = { David E Kim and Q Yi and S T Gladwin and J M Goldberg and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/kim98A_0.pdf},
issn = {0022-2836},
year = {1998},
date = {1998-12-01},
journal = {Journal of molecular biology},
volume = {284},
pages = {807-15},
abstract = {To investigate the role of helix formation in the folding of protein L, a 62 residue alpha/beta protein, we studied the consequences of both single and multiple mutations in the helix on the kinetics of folding. A triple mutant with 11 additional carbon atoms in core residues in the amino-terminal portion of the helix folded substantially faster than wild type, suggesting that hydrophobic association with residues elsewhere in the protein occurs at the rate-limiting step in folding. However, helix-destabilizing mutations had little effect on the rate of folding; in particular, a triple glycine substitution on the solvent-exposed side of the helix increased the unfolding rate 56-fold while reducing the folding rate less than threefold. Thus, in contrast to the predictions of models of folding involving the coalescence of well-formed secondary structure elements, the single helix in protein L appears to be largely disrupted at the rate-limiting step in folding and unfolding.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
D Shortle, K T Simons, David Baker
Clustering of low-energy conformations near the native structures of small proteins Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 95, pp. 11158-62, 1998, ISSN: 0027-8424.
@article{213,
title = {Clustering of low-energy conformations near the native structures of small proteins},
author = { D Shortle and K T Simons and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/shortle98A.pdf},
issn = {0027-8424},
year = {1998},
date = {1998-09-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {95},
pages = {11158-62},
abstract = {Recent experimental studies of the denatured state and theoretical analyses of the folding landscape suggest that there are a large multiplicity of low-energy, partially folded conformations near the native state. In this report, we describe a strategy for predicting protein structure based on the working hypothesis that there are a greater number of low-energy conformations surrounding the correct fold than there are surrounding low-energy incorrect folds. To test this idea, 12 ensembles of 500 to 1,000 low-energy structures for 10 small proteins were analyzed by calculating the rms deviation of the Calpha coordinates between each conformation and every other conformation in the ensemble. In all 12 cases, the conformation with the greatest number of conformations within 4-A rms deviation was closer to the native structure than were the majority of conformations in the ensemble, and in most cases it was among the closest 1 to 5%. These results suggest that, to fold efficiently and retain robustness to changes in amino acid sequence, proteins may have evolved a native structure situated within a broad basin of low-energy conformations, a feature which could facilitate the prediction of protein structure at low resolution.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
C Bystroff, D Baker
Prediction of local structure in proteins using a library of sequence-structure motifs Journal Article
In: Journal of molecular biology, vol. 281, pp. 565-77, 1998, ISSN: 0022-2836.
@article{311,
title = {Prediction of local structure in proteins using a library of sequence-structure motifs},
author = { C Bystroff and D Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/bystroff98A.pdf},
issn = {0022-2836},
year = {1998},
date = {1998-08-01},
journal = {Journal of molecular biology},
volume = {281},
pages = {565-77},
abstract = {We describe a new method for local protein structure prediction based on a library of short sequence pattern that correlate strongly with protein three-dimensional structural elements. The library was generated using an automated method for finding correlations between protein sequence and local structure, and contains most previously described local sequence-structure correlations as well as new relationships, including a diverging type-II beta-turn, a frayed helix, and a proline-terminated helix. The query sequence is scanned for segments 7 to 19 residues in length that strongly match one of the 82 patterns in the library. Matching segments are assigned the three-dimensional structure characteristic of the corresponding sequence pattern, and backbone torsion angles for the entire query sequence are then predicted by piecing together mutually compatible segment predictions. In predictions of local structure in a test set of 55 proteins, about 50% of all residues, and 76% of residues covered by high-confidence predictions, were found in eight-residue segments within 1.4 A of their true structures. The predictions are complementary to traditional secondary structure predictions because they are considerably more specific in turn regions, and may contribute to ab initio tertiary structure prediction and fold recognition.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Q Yi, C Bystroff, P Rajagopal, R E Klevit, David Baker
Prediction and structural characterization of an independently folding substructure in the src SH3 domain Journal Article
In: Journal of molecular biology, vol. 283, pp. 293-300, 1998, ISSN: 0022-2836.
@article{212,
title = {Prediction and structural characterization of an independently folding substructure in the src SH3 domain},
author = { Q Yi and C Bystroff and P Rajagopal and R E Klevit and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/yi98A.pdf},
issn = {0022-2836},
year = {1998},
date = {1998-00-01},
journal = {Journal of molecular biology},
volume = {283},
pages = {293-300},
abstract = {Previous studies of the conformations of peptides spanning the length of the alpha-spectrin SH3 domain suggested that SH3 domains lack independently folding substructures. Using a local structure prediction method based on the I-sites library of sequence-structure motifs, we identified a seven residue peptide in the src SH3 domain predicted to adopt a native-like structure, a type II beta-turn bridging unpaired beta-strands, that was not contained intact in any of the SH3 domain peptides studied earlier. NMR characterization confirmed that the isolated peptide, FKKGERL, adopts a structure similar to that adopted in the native protein: the NOE and 3JNHalpha coupling constant patterns were indicative of a type II beta-turn, and NOEs between the Phe and the Leu side-chains suggest that they are juxtaposed as in the prediction and the native structure. These results support the idea that high-confidence I-sites predictions identify protein segments that are likely to form native-like structures early in folding.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
1997
C Bystroff, David Baker
Blind predictions of local protein structure in CASP2 targets using the I-sites library Journal Article
In: Proteins, vol. Suppl 1, pp. 167-71, 1997, ISSN: 0887-3585.
@article{30,
title = {Blind predictions of local protein structure in CASP2 targets using the I-sites library},
author = { C Bystroff and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/bystroff97A.pdf},
issn = {0887-3585},
year = {1997},
date = {1997-00-01},
journal = {Proteins},
volume = {Suppl 1},
pages = {167-71},
abstract = {Blind predictions of the local structure of nine CASP2 targets were made using the I-sites library of short sequence--structure motifs, revealing strengths and weaknesses in this new knowledge-based method. Many turns between secondary structural elements were accurately predicted. Estimates of the confidence of prediction correlated well with the accuracy over the whole set. Bias toward structures used to develop the library was minimal, probably because of the extensive use of cross-validation. However, helix positions were better predicted by the PHD program. The method is likely to be sensitive to the quality of the sequence alignment. A general measure for evaluating local structure predictions is suggested.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
1996
K F Han, David Baker
Global properties of the mapping between local amino acid sequence and local structure in proteins Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 93, pp. 5814-8, 1996, ISSN: 0027-8424.
@article{215,
title = {Global properties of the mapping between local amino acid sequence and local structure in proteins},
author = { K F Han and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/han96A.pdf},
issn = {0027-8424},
year = {1996},
date = {1996-06-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {93},
pages = {5814-8},
abstract = {Local protein structure prediction efforts have consistently failed to exceed approximately 70% accuracy. We characterize the degeneracy of the mapping from local sequence to local structure responsible for this failure by investigating the extent to which similar sequence segments found in different proteins adopt similar three-dimensional structures. Sequence segments 3-15 residues in length from 154 different protein families are partitioned into neighborhoods containing segments with similar sequences using cluster analysis. The consistency of the sequence-to-structure mapping is assessed by comparing the local structures adopted by sequence segments in the same neighborhood in proteins of known structure. In the 154 families, 45% and 28% of the positions occur in neighborhoods in which one and two local structures predominate, respectively. The sequence patterns that characterize the neighborhoods in the first class probably include virtually all of the short sequence motifs in proteins that consistently occur in a particular local structure. These patterns, many of which occur in transitions between secondary structural elements, are an interesting combination of previously studied and novel motifs. The identification of sequence patterns that consistently occur in one or a small number of local structures in proteins should contribute to the prediction of protein structure from sequence.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
1995
K F Han, David Baker
Recurring local sequence motifs in proteins Journal Article
In: Journal of molecular biology, vol. 251, pp. 176-87, 1995, ISSN: 0022-2836.
@article{22,
title = {Recurring local sequence motifs in proteins},
author = { K F Han and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/han95A.pdf},
issn = {0022-2836},
year = {1995},
date = {1995-08-01},
journal = {Journal of molecular biology},
volume = {251},
pages = {176-87},
abstract = {We describe a completely automated approach to identifying local sequence motifs that transcend protein family boundaries. Cluster analysis is used to identify recurring patterns of variation at single positions and in short segments of contiguous positions in multiple sequence alignments for a non-redundant set of protein families. Parallel experiments on simulated data sets constructed with the overall residue frequencies of proteins but not the inter-residue correlations show that naturally occurring protein sequences are significantly more clustered than the corresponding random sequences for window lengths ranging from one to 13 contiguous positions. The patterns of variation at single positions are not in general surprising: chemically similar amino acids tend to be grouped together. More interesting patterns emerge as the window length increases. The patterns of variation for longer window lengths are in part recognizable patterns of hydrophobic and hydrophilic residues, and in part less obvious combinations. A particularly interesting class of patterns features highly conserved glycine residues. The patterns provide a means to abstract the information contained in multiple sequence alignments and may be useful for comparison of distantly related sequences or sequence families and for protein structure prediction.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}