Publications
Anishchenko, I; Ovchinnikov, S; Kamisetty, H; Baker, D
Origins of coevolution between residues distant in protein 3D structures Journal Article
In: Proceedings of the National Academy of Sciences, vol. 114, no. 34, pp. 9122-9127, 2017.
@article{1000,
title = {Origins of coevolution between residues distant in protein 3D structures},
author = {I Anishchenko and S Ovchinnikov and H Kamisetty and D Baker},
editor = {August 22, 2017},
url = {http://www.pnas.org/content/114/34/9122
https://www.bakerlab.org/wp-content/uploads/2018/08/9122.full1_.pdf},
doi = {10.1073/pnas.1702664114},
year = {2017},
date = {2017-08-22},
journal = {Proceedings of the National Academy of Sciences},
volume = {114},
number = {34},
pages = {9122-9127},
abstract = {Residue pairs that directly coevolve in protein families are generally close in protein 3D structures. Here we study the exceptions to this general trend—directly coevolving residue pairs that are distant in protein structures—to determine the origins of evolutionary pressure on spatially distant residues and to understand the sources of error in contact-based structure prediction. Over a set of 4,000 protein families, we find that 25% of directly coevolving residue pairs are separated by more than 5 Å in protein structures and 3% by more than 15 Å. The majority (91%) of directly coevolving residue pairs in the 5–15 Å range are found to be in contact in at least one homologous structure—these exceptions arise from structural variation in the family in the region containing the residues. Thirty-five percent of the exceptions greater than 15 Å are at homo-oligomeric interfaces, 19% arise from family structural variation, and 27% are in repeat proteins likely reflecting alignment errors. Of the remaining long-range exceptions (<1% of the total number of coupled pairs), many can be attributed to close interactions in an oligomeric state. Overall, the results suggest that directly coevolving residue pairs not in repeat proteins are spatially proximal in at least one biologically relevant protein conformation within the family; we find little evidence for direct coupling between residues at spatially separated allosteric and functional sites or for increased direct coupling between residue pairs on putative allosteric pathways connecting them.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ovchinnikov, Sergey; Park, Hahnbeom; Varghese, Neha; Huang, Po-Ssu; Pavlopoulos, Georgios A.; Kim, David E.; Kamisetty, Hetunandan; Kyrpides, Nikos C.; Baker, David
Protein structure determination using metagenome sequence data Journal Article
In: Science, vol. 355, no. 6322, pp. 294–298, 2017, ISSN: 0036-8075.
@article{Ovchinnikov294,
title = {Protein structure determination using metagenome sequence data},
author = { Sergey Ovchinnikov and Hahnbeom Park and Neha Varghese and Po-Ssu Huang and Georgios A. Pavlopoulos and David E. Kim and Hetunandan Kamisetty and Nikos C. Kyrpides and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2017/01/ovchinnikov_science_2017.pdf
http://science.sciencemag.org/content/355/6322/294},
doi = {10.1126/science.aah4043},
issn = {0036-8075},
year = {2017},
date = {2017-01-01},
journal = {Science},
volume = {355},
number = {6322},
pages = {294--298},
publisher = {American Association for the Advancement of Science},
abstract = {Fewer than a third of the 14,849 known protein families have at least one member with an experimentally determined structure. This leaves more than 5000 protein families with no structural information. Protein modeling using residue-residue contacts inferred from evolutionary data has been successful in modeling unknown structures, but it requires large numbers of aligned sequences. Ovchinnikov et al. augmented such sequence alignments with metagenome sequence data (see the Perspective by S"oding). They determined the number of sequences required to allow modeling, developed criteria for model quality, and, where possible, improved modeling by matching predicted contacts to known structures. Their method predicted quality structural models for 614 protein families, of which about 140 represent newly discovered protein folds.Science, this issue p. 294; see also p. 248Despite decades of work by structural biologists, there are still ~5200 protein families with unknown structure outside the range of comparative modeling. We show that Rosetta structure prediction guided by residue-residue contacts inferred from evolutionary information can accurately model proteins that belong to large families and that metagenome sequence data more than triple the number of protein families with sufficient sequences for accurate modeling. We then integrate metagenome data, contact-based structure matching, and Rosetta structure calculations to generate models for 614 protein families with currently unknown structures; 206 are membrane proteins and 137 have folds not represented in the Protein Data Bank. This approach provides the representative models for large protein families originally envisioned as the goal of the Protein Structure Initiative at a fraction of the cost.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ovchinnikov, Sergey; Park, Hahnbeom; Kim, David E.; Liu, Yuan; Wang, Ray Yu-Ruei; Baker, David
Structure prediction using sparse simulated NOE restraints with Rosetta in CASP11 Journal Article
In: Proteins: Structure, Function, and Bioinformatics, pp. n/a–n/a, 2016, ISSN: 1097-0134.
@article{PROT:PROT25006,
title = {Structure prediction using sparse simulated NOE restraints with Rosetta in CASP11},
author = {Ovchinnikov, Sergey and Park, Hahnbeom and Kim, David E. and Liu, Yuan and Wang, Ray Yu-Ruei and Baker, David},
url = {http://dx.doi.org/10.1002/prot.25006
https://www.bakerlab.org/wp-content/uploads/2016/05/Ovchinnikov_et_al-2016-Proteins__Structure_Function_and_Bioinformatics.pdf},
doi = {10.1002/prot.25006},
issn = {1097-0134},
year = {2016},
date = {2016-01-01},
journal = {Proteins: Structure, Function, and Bioinformatics},
pages = {n/a--n/a},
abstract = {In CASP11 we generated protein structure models using simulated ambiguous and unambiguous nuclear Overhauser effect (NOE) restraints with a two stage protocol. Low resolution models were generated guided by the unambiguous restraints using continuous chain folding for alpha and alpha-beta proteins, and iterative annealing for all beta proteins to take advantage of the strand pairing information implicit in the restraints. The Rosetta fragment/model hybridization protocol was then used to recombine and regularize these models, and refine them in the Rosetta full atom energy function guided by both the unambiguous and the ambiguous restraints. Fifteen out of 19 targets were modeled with GDT-TS quality scores greater than 60 for Model 1, significantly improving upon the non-assisted predictions. Our results suggest that atomic level accuracy is achievable using sparse NOE data when there is at least one correctly assigned NOE for every residue. Proteins 2016. © 2016 Wiley Periodicals, Inc.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ovchinnikov, S; Kim, DE; Wang, RY; Liu, Y; DiMaio, F; Baker, D
Improved de novo structure prediction in CASP11 by incorporating Co-evolution information into rosetta Journal Article
In: Proteins, 2015.
@article{S2015,
title = {Improved de novo structure prediction in CASP11 by incorporating Co-evolution information into rosetta},
author = {S Ovchinnikov and DE Kim and RY Wang and Y Liu and F DiMaio and D Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2015/12/Ovchinnikov_Proteins_2015.pdf},
doi = {10.1002/prot.24974},
year = {2015},
date = {2015-12-17},
journal = {Proteins},
abstract = {We describe CASP11 de novo blind structure predictions made using the Rosetta structure prediction methodology with both automatic and human assisted protocols. Model accuracy was generally improved using co-evolution derived residue-residue contact information as restraints during Rosetta conformational sampling and refinement, particularly when the number of sequences in the family was more than three times the length of the protein. The highlight was the human assisted prediction of T0806, a large and topologically complex target with no homologs of known structure, which had unprecedented accuracy - <3.0 Å root-mean-square deviation (RMSD) from the crystal structure over 223 residues. For this target, we increased the amount of conformational sampling over our fully automated method by employing an iterative hybridization protocol. Our results clearly demonstrate, in a blind prediction scenario, that co-evolution derived contacts can considerably increase the accuracy of template-free structure modeling. This article is protected by copyright. All rights reserved.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
L Kinch S Ovchinnikov, H Park
Large-scale determination of previously unsolved protein structures using evolutionary information Journal Article
In: eLife, 2015.
@article{S2015b,
title = {Large-scale determination of previously unsolved protein structures using evolutionary information},
author = {S Ovchinnikov, L Kinch, H Park, Y Liao, J Pei, DE Kim, H Kamisetty, NV Grishin, D Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/01/Ovchinnikov_eLife_2015.pdf},
doi = {10.7554/eLife.09248},
year = {2015},
date = {2015-09-03},
journal = {eLife},
abstract = {The prediction of the structures of proteins without detectable sequence similarity to any protein of known structure remains an outstanding scientific challenge. Here we report significant progress in this area. We first describe de novo blind structure predictions of unprecendented accuracy we made for two proteins in large families in the recent CASP11 blind test of protein structure prediction methods by incorporating residue-residue co-evolution information in the Rosetta structure prediction program. We then describe the use of this method to generate structure models for 58 of the 121 large protein families in prokaryotes for which three-dimensional structures are not available. These models, which are posted online for public access, provide structural information for the over 400,000 proteins belonging to the 58 families and suggest hypotheses about mechanism for the subset for which the function is known, and hypotheses about function for the remainder. },
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Khoury, George A; Liwo, Adam; Khatib, Firas; Zhou, Hongyi; Chopra, Gaurav; Bacardit, Jaume; Bortot, Leandro O; Faccioli, Rodrigo A; Deng, Xin; He, Yi; Krupa, Pawel; Li, Jilong; Mozolewska, Magdalena A; Sieradzan, Adam K; Smadbeck, James; Wirecki, Tomasz; Cooper, Seth; Flatten, Jeff; Xu, Kefan; Baker, David; Cheng, Jianlin; Delbem, Alexandre C B; Floudas, Christodoulos A; Keasar, Chen; Levitt, Michael; Popovi’c, Zoran; Scheraga, Harold A; Skolnick, Jeffrey; Crivelli, Silvia N
WeFold: a coopetition for protein structure prediction. Journal Article
In: Proteins, vol. 82, pp. 1850-68, 2014, ISSN: 1097-0134.
@article{625,
title = {WeFold: a coopetition for protein structure prediction.},
author = { George A Khoury and Adam Liwo and Firas Khatib and Hongyi Zhou and Gaurav Chopra and Jaume Bacardit and Leandro O Bortot and Rodrigo A Faccioli and Xin Deng and Yi He and Pawel Krupa and Jilong Li and Magdalena A Mozolewska and Adam K Sieradzan and James Smadbeck and Tomasz Wirecki and Seth Cooper and Jeff Flatten and Kefan Xu and David Baker and Jianlin Cheng and Alexandre C B Delbem and Christodoulos A Floudas and Chen Keasar and Michael Levitt and Zoran Popovi'c and Harold A Scheraga and Jeffrey Skolnick and Silvia N Crivelli},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Khoury_Proteins_2014.pdf},
doi = {10.1002/prot.24538},
issn = {1097-0134},
year = {2014},
date = {2014-09-01},
journal = {Proteins},
volume = {82},
pages = {1850-68},
abstract = {The protein structure prediction problem continues to elude scientists. Despite the introduction of many methods, only modest gains were made over the last decade for certain classes of prediction targets. To address this challenge, a social-media based worldwide collaborative effort, named WeFold, was undertaken by 13 labs. During the collaboration, the laboratories were simultaneously competing with each other. Here, we present the first attempt at "coopetition" in scientific research applied to the protein structure prediction and refinement problems. The coopetition was possible by allowing the participating labs to contribute different components of their protein structure prediction pipelines and create new hybrid pipelines that they tested during CASP10. This manuscript describes both successes and areas needing improvement as identified throughout the first WeFold experiment and discusses the efforts that are underway to advance this initiative. A footprint of all contributions and structures are publicly accessible at http://www.wefold.org.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Chen, Kuang-Yui M; Sun, Jiaming; Salvo, Jason S; Baker, David; Barth, Patrick
High-resolution modeling of transmembrane helical protein structures from distant homologues. Journal Article
In: PLoS computational biology, vol. 10, pp. e1003636, 2014, ISSN: 1553-7358.
@article{622,
title = {High-resolution modeling of transmembrane helical protein structures from distant homologues.},
author = { Kuang-Yui M Chen and Jiaming Sun and Jason S Salvo and David Baker and Patrick Barth},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Chen_PLOS_2014.pdf},
doi = {10.1371/journal.pcbi.1003636},
issn = {1553-7358},
year = {2014},
date = {2014-05-01},
journal = {PLoS computational biology},
volume = {10},
pages = {e1003636},
abstract = {Eukaryotic transmembrane helical (TMH) proteins perform a wide diversity of critical cellular functions, but remain structurally largely uncharacterized and their high-resolution structure prediction is currently hindered by the lack of close structural homologues. To address this problem, we present a novel and generic method for accurately modeling large TMH protein structures from distant homologues exhibiting distinct loop and TMH conformations. Models of the adenosine A2AR and chemokine CXCR4 receptors were first ranked in GPCR-DOCK blind prediction contests in the receptor structure accuracy category. In a benchmark of 50 TMH protein homolog pairs of diverse topology (from 5 to 12 TMHs), size (from 183 to 420 residues) and sequence identity (from 15% to 70%), the method improves most starting templates, and achieves near-atomic accuracy prediction of membrane-embedded regions. Unlike starting templates, the models are of suitable quality for computer-based protein engineering: redesigned models and redesigned X-ray structures exhibit very similar native interactions. The method should prove useful for the atom-level modeling and design of a large fraction of structurally uncharacterized TMH proteins from a wide range of structural homologues.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ovchinnikov, Sergey; Kamisetty, Hetunandan; Baker, David
Robust and accurate prediction of residue-residue interactions across protein interfaces using evolutionary information. Journal Article
In: eLife, vol. 3, pp. e02030, 2014, ISSN: 2050-084X.
@article{540,
title = {Robust and accurate prediction of residue-residue interactions across protein interfaces using evolutionary information.},
author = { Sergey Ovchinnikov and Hetunandan Kamisetty and David Baker},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Ovchinnikov_2014A.pdf},
doi = {10.7554/eLife.02030},
issn = {2050-084X},
year = {2014},
date = {2014-05-01},
journal = {eLife},
volume = {3},
pages = {e02030},
abstract = {Do the amino acid sequence identities of residues that make contact across protein interfaces covary during evolution? If so, such covariance could be used to predict contacts across interfaces and assemble models of biological complexes. We find that residue pairs identified using a pseudo-likelihood-based method to covary across protein-protein interfaces in the 50S ribosomal unit and 28 additional bacterial protein complexes with known structure are almost always in contact in the complex, provided that the number of aligned sequences is greater than the average length of the two proteins. We use this method to make subunit contact predictions for an additional 36 protein complexes with unknown structures, and present models based on these predictions for the tripartite ATP-independent periplasmic (TRAP) transporter, the tripartite efflux system, the pyruvate formate lyase-activating enzyme complex, and the methionine ABC transporter.DOI: http://dx.doi.org/10.7554/eLife.02030.001.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Baker, David
Protein folding, structure prediction and design. Journal Article
In: Biochemical Society transactions, vol. 42, pp. 225-9, 2014, ISSN: 1470-8752.
@article{529,
title = {Protein folding, structure prediction and design.},
author = { David Baker},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Baker_BiochemSocTrans_2014.pdf},
doi = {10.1042/BST20130055},
issn = {1470-8752},
year = {2014},
date = {2014-04-01},
journal = {Biochemical Society transactions},
volume = {42},
pages = {225-9},
abstract = {I describe how experimental studies of protein folding have led to advances in protein structure prediction and protein design. I describe the finding that protein sequences are not optimized for rapid folding, the contact order-protein folding rate correlation, the incorporation of experimental insights into protein folding into the Rosetta protein structure production methodology and the use of this methodology to determine structures from sparse experimental data. I then describe the inverse problem (protein design) and give an overview of recent work on designing proteins with new structures and functions. I also describe the contributions of the general public to these efforts through the Rosetta@home distributed computing project and the FoldIt interactive protein folding and design game.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Moretti, Rocco; Fleishman, Sarel J; Agius, Rudi; Torchala, Mieczyslaw; Bates, Paul A; Kastritis, Panagiotis L; ao P G L M Rodrigues, Jo; Trellet, Mika"el; Bonvin, Alexandre M J J; Cui, Meng; Rooman, Marianne; Gillis, Dimitri; Dehouck, Yves; Moal, Iain; Romero-Durana, Miguel; Perez-Cano, Laura; Pallara, Chiara; Jimenez, Brian; Fernandez-Recio, Juan; Flores, Samuel; Pacella, Michael; Kilambi, Krishna Praneeth; Gray, Jeffrey J; Popov, Petr; Grudinin, Sergei; Esquivel-Rodr’iguez, Juan; Kihara, Daisuke; Zhao, Nan; Korkin, Dmitry; Zhu, Xiaolei; Demerdash, Omar N A; Mitchell, Julie C; Kanamori, Eiji; Tsuchiya, Yuko; Nakamura, Haruki; Lee, Hasup; Park, Hahnbeom; Seok, Chaok; Sarmiento, Jamica; Liang, Shide; Teraguchi, Shusuke; Standley, Daron M; Shimoyama, Hiromitsu; Terashi, Genki; Takeda-Shitaka, Mayuko; Iwadate, Mitsuo; Umeyama, Hideaki; Beglov, Dmitri; Hall, David R; Kozakov, Dima; Vajda, Sandor; Pierce, Brian G; Hwang, Howook; Vreven, Thom; Weng, Zhiping; Huang, Yangyu; Li, Haotian; Yang, Xiufeng; Ji, Xiaofeng; Liu, Shiyong; Xiao, Yi; Zacharias, Martin; Qin, Sanbo; Zhou, Huan-Xiang; Huang, Sheng-You; Zou, Xiaoqin; Velankar, Sameer; Janin, Jo"el; Wodak, Shoshana J; Baker, David
Community-wide evaluation of methods for predicting the effect of mutations on protein-protein interactions. Journal Article
In: Proteins, vol. 81, pp. 1980-7, 2013, ISSN: 1097-0134.
@article{505,
title = {Community-wide evaluation of methods for predicting the effect of mutations on protein-protein interactions.},
author = { Rocco Moretti and Sarel J Fleishman and Rudi Agius and Mieczyslaw Torchala and Paul A Bates and Panagiotis L Kastritis and Jo~ao P G L M Rodrigues and Mika"el Trellet and Alexandre M J J Bonvin and Meng Cui and Marianne Rooman and Dimitri Gillis and Yves Dehouck and Iain Moal and Miguel Romero-Durana and Laura Perez-Cano and Chiara Pallara and Brian Jimenez and Juan Fernandez-Recio and Samuel Flores and Michael Pacella and Krishna Praneeth Kilambi and Jeffrey J Gray and Petr Popov and Sergei Grudinin and Juan Esquivel-Rodr'iguez and Daisuke Kihara and Nan Zhao and Dmitry Korkin and Xiaolei Zhu and Omar N A Demerdash and Julie C Mitchell and Eiji Kanamori and Yuko Tsuchiya and Haruki Nakamura and Hasup Lee and Hahnbeom Park and Chaok Seok and Jamica Sarmiento and Shide Liang and Shusuke Teraguchi and Daron M Standley and Hiromitsu Shimoyama and Genki Terashi and Mayuko Takeda-Shitaka and Mitsuo Iwadate and Hideaki Umeyama and Dmitri Beglov and David R Hall and Dima Kozakov and Sandor Vajda and Brian G Pierce and Howook Hwang and Thom Vreven and Zhiping Weng and Yangyu Huang and Haotian Li and Xiufeng Yang and Xiaofeng Ji and Shiyong Liu and Yi Xiao and Martin Zacharias and Sanbo Qin and Huan-Xiang Zhou and Sheng-You Huang and Xiaoqin Zou and Sameer Velankar and Jo"el Janin and Shoshana J Wodak and David Baker},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Moretti_Proteins_2013.pdf},
doi = {10.1002/prot.24356},
issn = {1097-0134},
year = {2013},
date = {2013-11-01},
journal = {Proteins},
volume = {81},
pages = {1980-7},
abstract = {Community-wide blind prediction experiments such as CAPRI and CASP provide an objective measure of the current state of predictive methodology. Here we describe a community-wide assessment of methods to predict the effects of mutations on protein-protein interactions. Twenty-two groups predicted the effects of comprehensive saturation mutagenesis for two designed influenza hemagglutinin binders and the results were compared with experimental yeast display enrichment data obtained using deep sequencing. The most successful methods explicitly considered the effects of mutation on monomer stability in addition to binding affinity, carried out explicit side-chain sampling and backbone relaxation, evaluated packing, electrostatic, and solvation effects, and correctly identified around a third of the beneficial mutations. Much room for improvement remains for even the best techniques, and large-scale fitness landscapes should continue to provide an excellent test bed for continued evaluation of both existing and new prediction methodologies.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Vernon, Robert; Shen, Yang; Baker, David; Lange, Oliver F
Improved chemical shift based fragment selection for CS-Rosetta using Rosetta3 fragment picker. Journal Article
In: Journal of biomolecular NMR, vol. 57, pp. 117-27, 2013, ISSN: 1573-5001.
@article{508,
title = {Improved chemical shift based fragment selection for CS-Rosetta using Rosetta3 fragment picker.},
author = { Robert Vernon and Yang Shen and David Baker and Oliver F Lange},
doi = {10.1007/s10858-013-9772-4},
issn = {1573-5001},
year = {2013},
date = {2013-10-01},
journal = {Journal of biomolecular NMR},
volume = {57},
pages = {117-27},
abstract = {A new fragment picker has been developed for CS-Rosetta that combines beneficial features of the original fragment picker, MFR, used with CS-Rosetta, and the fragment picker, NNMake, that was used for purely sequence based fragment selection in the context of ROSETTA de-novo structure prediction. Additionally, the new fragment picker has reduced sensitivity to outliers and other difficult to match data points rendering the protocol more robust and less likely to introduce bias towards wrong conformations in cases where data is bad, missing or inconclusive. The fragment picker protocol gives significant improvements on 6 of 23 CS-Rosetta targets. An independent benchmark on 39 protein targets, whose NMR data sets were published only after protocol optimization had been finished, also show significantly improved performance for the new fragment picker (van der Schot et al. in J Biomol NMR, 2013).},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kamisetty, Hetunandan; Ovchinnikov, Sergey; Baker, David
Assessing the utility of coevolution-based residue-residue contact predictions in a sequence- and structure-rich era. Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 110, pp. 15674-9, 2013, ISSN: 1091-6490.
@article{498,
title = {Assessing the utility of coevolution-based residue-residue contact predictions in a sequence- and structure-rich era.},
author = { Hetunandan Kamisetty and Sergey Ovchinnikov and David Baker},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Kamisetty_PNAS_2013.pdf},
doi = {10.1073/pnas.1314045110},
issn = {1091-6490},
year = {2013},
date = {2013-09-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {110},
pages = {15674-9},
abstract = {Recently developed methods have shown considerable promise in predicting residue-residue contacts in protein 3D structures using evolutionary covariance information. However, these methods require large numbers of evolutionarily related sequences to robustly assess the extent of residue covariation, and the larger the protein family, the more likely that contact information is unnecessary because a reasonable model can be built based on the structure of a homolog. Here we describe a method that integrates sequence coevolution and structural context information using a pseudolikelihood approach, allowing more accurate contact predictions from fewer homologous sequences. We rigorously assess the utility of predicted contacts for protein structure prediction using large and representative sequence and structure databases from recent structure prediction experiments. We find that contact predictions are likely to be accurate when the number of aligned sequences (with sequence redundancy reduced to 90%) is greater than five times the length of the protein, and that accurate predictions are likely to be useful for structure modeling if the aligned sequences are more similar to the protein of interest than to the closest homolog of known structure. These conditions are currently met by 422 of the protein families collected in the Pfam database.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
van der Schot, Gijs; Zhang, Zaiyong; Vernon, Robert; Shen, Yang; Vranken, Wim F; Baker, David; Bonvin, Alexandre M J J; Lange, Oliver F
Improving 3D structure prediction from chemical shift data. Journal Article
In: Journal of biomolecular NMR, vol. 57, pp. 27-35, 2013, ISSN: 1573-5001.
@article{507,
title = {Improving 3D structure prediction from chemical shift data.},
author = { Gijs van der Schot and Zaiyong Zhang and Robert Vernon and Yang Shen and Wim F Vranken and David Baker and Alexandre M J J Bonvin and Oliver F Lange},
doi = {10.1007/s10858-013-9762-6},
issn = {1573-5001},
year = {2013},
date = {2013-09-01},
journal = {Journal of biomolecular NMR},
volume = {57},
pages = {27-35},
abstract = {We report advances in the calculation of protein structures from chemical shift nuclear magnetic resonance data alone. Our previously developed method, CS-Rosetta, assembles structures from a library of short protein fragments picked from a large library of protein structures using chemical shifts and sequence information. Here we demonstrate that combination of a new and improved fragment picker and the iterative sampling algorithm RASREC yield significant improvements in convergence and accuracy. Moreover, we introduce improved criteria for assessing the accuracy of the models produced by the method. The method was tested on 39 proteins in the 50-100 residue size range and yields reliable structures in 70~% of the cases. All structures that passed the reliability filter were accurate (<2~r A RMSD from the reference).},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kim, David E; DiMaio, Frank; Wang, Ray Yu-Ruei; Song, Yifan; Baker, David
One contact for every twelve residues allows robust and accurate topology-level protein structure modeling. Journal Article
In: Proteins, 2013, ISSN: 1097-0134.
@article{506,
title = {One contact for every twelve residues allows robust and accurate topology-level protein structure modeling.},
author = { David E Kim and Frank DiMaio and Ray Yu-Ruei Wang and Yifan Song and David Baker},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Kim_Proteins_2013.pdf},
doi = {10.1002/prot.24374},
issn = {1097-0134},
year = {2013},
date = {2013-07-01},
journal = {Proteins},
abstract = {A number of methods have been described for identifying pairs of contacting residues in protein three-dimensional structures, but it is unclear how many contacts are required for accurate structure modeling. The CASP10 assisted contact experiment provided a blind test of contact guided protein structure modeling. We describe the models generated for these contact guided prediction challenges using the Rosetta structure modeling methodology. For nearly all cases, the submitted models had the correct overall topology, and in some cases, they had near atomic-level accuracy; for example the model of the 384 residue homo-oligomeric tetramer (Tc680o) had only 2.9 r A root-mean-square deviation (RMSD) from the crystal structure. Our results suggest that experimental and bioinformatic methods for obtaining contact information may need to generate only one correct contact for every 12 residues in the protein to allow accurate topology level modeling. Proteins 2013;. textcopyright 2013 Wiley Periodicals, Inc.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kiss, Gert; c"um, Nihan Celebi-"Olc; Moretti, Rocco; Baker, David; Houk, K N
Computational enzyme design Journal Article
In: Angewandte Chemie (International ed. in English), vol. 52, pp. 5700-25, 2013, ISSN: 1521-3773.
@article{472,
title = {Computational enzyme design},
author = { Gert Kiss and Nihan Celebi-"Olc c"um and Rocco Moretti and David Baker and K N Houk},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Kiss_AngewChemIntEd_2013.pdf},
doi = {10.1002/anie.201204077},
issn = {1521-3773},
year = {2013},
date = {2013-05-01},
journal = {Angewandte Chemie (International ed. in English)},
volume = {52},
pages = {5700-25},
abstract = {Recent developments in computational chemistry and biology have come together in the "inside-out" approach to enzyme engineering. Proteins have been designed to catalyze reactions not previously accelerated in nature. Some of these proteins fold and act as catalysts, but the success rate is still low. The achievements and limitations of the current technology are highlighted and contrasted to other protein engineering techniques. On its own, computational "inside-out" design can lead to the production of catalytically active and selective proteins, but their kinetic performances fall short of natural enzymes. When combined with directed evolution, molecular dynamics simulations, and crowd-sourced structure-prediction approaches, however, computational designs can be significantly improved in terms of binding, turnover, and thermal stability.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Molski, MA; Goodman, JL; Chou, FC; Baker, D; Das, R; Schepartz, A
Remodeling a beta-peptide bundle Journal Article
In: Chemical Science, vol. 4, pp. 319-324, 2013, ISSN: 2041-6520.
@article{605,
title = {Remodeling a beta-peptide bundle},
author = { MA Molski and JL Goodman and FC Chou and D Baker and R Das and A Schepartz},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/remodelingabeta_Baker2013.pdf},
doi = {10.1039/c2sc21117c},
issn = {2041-6520},
year = {2013},
date = {2013-00-01},
journal = {Chemical Science},
volume = {4},
pages = {319-324},
abstract = {Natural biopolymers fold with fidelity, burying diverse side chains into well-packed cores and protecting their backbones from solvent. Certain beta-peptide oligomers assemble into bundles of defined octameric stoichiometry that resemble natural proteins in many respects. These beta-peptide bundles are thermostable, fold cooperatively, exchange interior amide N-H protons slowly, exclude hydrophobic dyes, and can be characterized at high resolution using X-ray crystallography - just like many proteins found in nature. But unlike natural proteins, all octameric beta-peptide bundles contain a sequence-uniform hydrophobic core composed of 32 leucine side chains. Here we apply rational design principles, including the Rosetta computational design methodology, to introduce sequence diversity into the bundle core while retaining the characteristic beta-peptide bundle fold. Using circular dichroism spectroscopy and analytical ultracentrifugation, we confirmed the prediction that an octameric bundle still assembles upon a major remodelling of its core: the mutation of sixteen core beta-homo-leucine side chains into sixteen beta-homo-phenylalanine side chains. Nevertheless, the bundle containing a partially beta-homo-phenylalanine core poorly protects interior amide protons from exchange, suggesting molten-globule-like properties. We further improve stability by the incorporation of eight beta-homo-pentafluorophenyalanine side chains, giving an assembly with amide protection factors comparable to prior well-structured bundles. By demonstrating that their cores tolerate significant sequence variation, the beta-peptide bundles reported here represent a starting point for the "bottom-up" construction of beta-peptide assemblies possessing both structure and sophisticated function.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Krzysiak, Troy C; Jung, Jinwon; Thompson, James; Baker, David; Gronenborn, Angela M
APOBEC2 is a monomer in solution: implications for APOBEC3G models Journal Article
In: Biochemistry, vol. 51, pp. 2008-17, 2012, ISSN: 1520-4995.
@article{604,
title = {APOBEC2 is a monomer in solution: implications for APOBEC3G models},
author = { Troy C Krzysiak and Jinwon Jung and James Thompson and David Baker and Angela M Gronenborn},
url = {http://beta.baker/wp-content/uploads/2015/12/apobec2isamonomer_Baker2012.pdf},
doi = {10.1021/bi300021s},
issn = {1520-4995},
year = {2012},
date = {2012-03-01},
journal = {Biochemistry},
volume = {51},
pages = {2008-17},
abstract = {Although the physiological role of APOBEC2 is still largely unknown, a crystal structure of a truncated variant of this protein was determined several years ago [Prochnow, C. (2007) Nature445, 447-451]. This APOBEC2 structure had considerable impact in the HIV field because it was considered a good model for the structure of APOBEC3G, an important HIV restriction factor that abrogates HIV infectivity in the absence of the viral accessory protein Vif. The quaternary structure and the arrangement of the monomers of APOBEC2 in the crystal were taken as being representative for APOBEC3G and exploited in explaining its enzymatic and anti-HIV activity. Here we show, unambiguously, that in contrast to the findings for the crystal, APOBEC2 is monomeric in solution. The nuclear magnetic resonance solution structure of full-length APOBEC2 reveals that the N-terminal tail that was removed for crystallization resides close to strand β2, the dimer interface in the crystal structure, and shields this region of the protein from engaging in intermolecular contacts. In addition, the presence of the N-terminal region drastically alters the aggregation propensity of APOBEC2, rendering the full-length protein highly soluble and not prone to precipitation. In summary, our results cast doubt on all previous structure-function predictions for APOBEC3G that were based on the crystal structure of APOBEC2.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Lange, Oliver F; Baker, David
Resolution-adapted recombination of structural features significantly improves sampling in restraint-guided structure calculation. Journal Article
In: Proteins, vol. 80, pp. 884-95, 2012, ISSN: 1097-0134.
@article{460,
title = {Resolution-adapted recombination of structural features significantly improves sampling in restraint-guided structure calculation.},
author = { Oliver F Lange and David Baker},
url = {http://beta.baker/wp-content/uploads/2015/12/Lange_Proteins_2012.pdf},
issn = {1097-0134},
year = {2012},
date = {2012-03-01},
journal = {Proteins},
volume = {80},
pages = {884-95},
abstract = {Recent work has shown that NMR structures can be determined by integrating sparse NMR data with structure prediction methods such as Rosetta. The experimental data serve to guide the search for the lowest energy state towards the deep minimum at the native state which is frequently missed in Rosetta de novo structure calculations. However, as the protein size increases, sampling again becomes limiting; for example, the standard Rosetta protocol involving Monte Carlo fragment insertion starting from an extended chain fails to converge for proteins over 150 amino acids even with guidance from chemical shifts (CS-Rosetta) and other NMR data. The primary limitation of this protocol--that every folding trajectory is completely independent of every other--was recently overcome with the development of a new approach involving resolution-adapted structural recombination (RASREC). Here we describe the RASREC approach in detail and compare it to standard CS-Rosetta. We show that the improved sampling of RASREC is essential in obtaining accurate structures over a benchmark set of 11 proteins in the 15-25 kDa size range using chemical shifts, backbone RDCs and HN-HN NOE data; in a number of cases the improved sampling methodology makes a larger contribution than incorporation of additional experimental data. Experimental data are invaluable for guiding sampling to the vicinity of the global energy minimum, but for larger proteins, the standard Rosetta fold-from-extended-chain protocol does not converge on the native minimum even with experimental data and the more powerful RASREC approach is necessary to converge to accurate solutions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Handl, Julia; Knowles, Joshua; Vernon, Robert; Baker, David; Lovell, Simon C
The dual role of fragments in fragment-assembly methods for de novo protein structure prediction Journal Article
In: Proteins, vol. 80, pp. 490-504, 2012, ISSN: 1097-0134.
@article{601,
title = {The dual role of fragments in fragment-assembly methods for de novo protein structure prediction},
author = { Julia Handl and Joshua Knowles and Robert Vernon and David Baker and Simon C Lovell},
url = {https://www.bakerlab.org/wp-content/uploads/2018/06/Handl_et_al-2012-Proteins3A_Structure2C_Function2C_and_Bioinformatics.pdf
https://onlinelibrary.wiley.com/doi/full/10.1002/prot.23215},
doi = {10.1002/prot.23215},
issn = {1097-0134},
year = {2012},
date = {2012-02-01},
journal = {Proteins},
volume = {80},
pages = {490-504},
abstract = {In fragment-assembly techniques for protein structure prediction, models of protein structure are assembled from fragments of known protein structures. This process is typically guided by a knowledge-based energy function and uses a heuristic optimization method. The fragments play two important roles in this process: they define the set of structural parameters available, and they also assume the role of the main variation operators that are used by the optimiser. Previous analysis has typically focused on the first of these roles. In particular, the relationship between local amino acid sequence and local protein structure has been studied by a range of authors. The correlation between the two has been shown to vary with the window length considered, and the results of these analyses have informed directly the choice of fragment length in state-of-the-art prediction techniques. Here, we focus on the second role of fragments and aim to determine the effect of fragment length from an optimization perspective. We use theoretical analyses to reveal how the size and structure of the search space changes as a function of insertion length. Furthermore, empirical analyses are used to explore additional ways in which the size of the fragment insertion influences the search both in a simulation model and for the fragment-assembly technique, Rosetta.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Fleishman, Sarel J; Whitehead, Timothy A; Strauch, Eva-Maria; Corn, Jacob E; Qin, Sanbo; Zhou, Huan-Xiang; Mitchell, Julie C; Demerdash, Omar N A; Takeda-Shitaka, Mayuko; Terashi, Genki; Moal, Iain H; Li, Xiaofan; Bates, Paul A; Zacharias, Martin; Park, Hahnbeom; Ko, Jun-su; Lee, Hasup; Seok, Chaok; Bourquard, Thomas; Bernauer, Julie; Poupon, Anne; Az’e, J’er^ome; Soner, Seren; Ovali, Sefik Kerem; Ozbek, Pemra; Tal, Nir Ben; Haliloglu, T"urkan; Hwang, Howook; Vreven, Thom; Pierce, Brian G; Weng, Zhiping; P’erez-Cano, Laura; Pons, Carles; Fern’andez-Recio, Juan; Jiang, Fan; Yang, Feng; Gong, Xinqi; Cao, Libin; Xu, Xianjin; Liu, Bin; Wang, Panwen; Li, Chunhua; Wang, Cunxin; Robert, Charles H; Guharoy, Mainak; Liu, Shiyong; Huang, Yangyu; Li, Lin; Guo, Dachuan; Chen, Ying; Xiao, Yi; London, Nir; Itzhaki, Zohar; Schueler-Furman, Ora; Inbar, Yuval; Potapov, Vladimir; Cohen, Mati; Schreiber, Gideon; Tsuchiya, Yuko; Kanamori, Eiji; Standley, Daron M; Nakamura, Haruki; Kinoshita, Kengo; Driggers, Camden M; Hall, Robert G; Morgan, Jessica L; Hsu, Victor L; Zhan, Jian; Yang, Yuedong; Zhou, Yaoqi; Kastritis, Panagiotis L; Bonvin, Alexandre M J J; Zhang, Weiyi; Camacho, Carlos J; Kilambi, Krishna P; Sircar, Aroop; Gray, Jeffrey J; Ohue, Masahito; Uchikoga, Nobuyuki; Matsuzaki, Yuri; Ishida, Takashi; Akiyama, Yutaka; Khashan, Raed; Bush, Stephen; Fouches, Denis; Tropsha, Alexander; Esquivel-Rodr’iguez, Juan; Kihara, Daisuke; Stranges, P Benjamin; Jacak, Ron; Kuhlman, Brian; Huang, Sheng-You; Zou, Xiaoqin; Wodak, Shoshana J; Janin, Joel; Baker, David
Community-wide assessment of protein-interface modeling suggests improvements to design methodology Journal Article
In: Journal of Molecular Biology, vol. 414, pp. 289-302, 2011, ISSN: 1089-8638.
@article{598,
title = {Community-wide assessment of protein-interface modeling suggests improvements to design methodology},
author = { Sarel J Fleishman and Timothy A Whitehead and Eva-Maria Strauch and Jacob E Corn and Sanbo Qin and Huan-Xiang Zhou and Julie C Mitchell and Omar N A Demerdash and Mayuko Takeda-Shitaka and Genki Terashi and Iain H Moal and Xiaofan Li and Paul A Bates and Martin Zacharias and Hahnbeom Park and Jun-su Ko and Hasup Lee and Chaok Seok and Thomas Bourquard and Julie Bernauer and Anne Poupon and J'er^ome Az'e and Seren Soner and Sefik Kerem Ovali and Pemra Ozbek and Nir Ben Tal and T"urkan Haliloglu and Howook Hwang and Thom Vreven and Brian G Pierce and Zhiping Weng and Laura P'erez-Cano and Carles Pons and Juan Fern'andez-Recio and Fan Jiang and Feng Yang and Xinqi Gong and Libin Cao and Xianjin Xu and Bin Liu and Panwen Wang and Chunhua Li and Cunxin Wang and Charles H Robert and Mainak Guharoy and Shiyong Liu and Yangyu Huang and Lin Li and Dachuan Guo and Ying Chen and Yi Xiao and Nir London and Zohar Itzhaki and Ora Schueler-Furman and Yuval Inbar and Vladimir Potapov and Mati Cohen and Gideon Schreiber and Yuko Tsuchiya and Eiji Kanamori and Daron M Standley and Haruki Nakamura and Kengo Kinoshita and Camden M Driggers and Robert G Hall and Jessica L Morgan and Victor L Hsu and Jian Zhan and Yuedong Yang and Yaoqi Zhou and Panagiotis L Kastritis and Alexandre M J J Bonvin and Weiyi Zhang and Carlos J Camacho and Krishna P Kilambi and Aroop Sircar and Jeffrey J Gray and Masahito Ohue and Nobuyuki Uchikoga and Yuri Matsuzaki and Takashi Ishida and Yutaka Akiyama and Raed Khashan and Stephen Bush and Denis Fouches and Alexander Tropsha and Juan Esquivel-Rodr'iguez and Daisuke Kihara and P Benjamin Stranges and Ron Jacak and Brian Kuhlman and Sheng-You Huang and Xiaoqin Zou and Shoshana J Wodak and Joel Janin and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2018/06/1-s2.0-S0022283611010552-main.pdf
https://www.sciencedirect.com/science/article/pii/S0022283611010552?via%3Dihub},
doi = {10.1016/j.jmb.2011.09.031},
issn = {1089-8638},
year = {2011},
date = {2011-11-01},
journal = {Journal of Molecular Biology},
volume = {414},
pages = {289-302},
abstract = {The CAPRI (Critical Assessment of Predicted Interactions) and CASP (Critical Assessment of protein Structure Prediction) experiments have demonstrated the power of community-wide tests of methodology in assessing the current state of the art and spurring progress in the very challenging areas of protein docking and structure prediction. We sought to bring the power of community-wide experiments to bear on a very challenging protein design problem that provides a complementary but equally fundamental test of current understanding of protein-binding thermodynamics. We have generated a number of designed protein-protein interfaces with very favorable computed binding energies but which do not appear to be formed in experiments, suggesting that there may be important physical chemistry missing in the energy calculations. A total of 28 research groups took up the challenge of determining what is missing: we provided structures of 87 designed complexes and 120 naturally occurring complexes and asked participants to identify energetic contributions and/or structural features that distinguish between the two sets. The community found that electrostatics and solvation terms partially distinguish the designs from the natural complexes, largely due to the nonpolar character of the designed interactions. Beyond this polarity difference, the community found that the designed binding surfaces were, on average, structurally less embedded in the designed monomers, suggesting that backbone conformational rigidity at the designed surface is important for realization of the designed function. These results can be used to improve computational design strategies, but there is still much to be learned; for example, one designed complex, which does form in experiments, was classified by all metrics as a nonbinder.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Preprints are available on bioRxiv.
2023
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2022
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2021
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2020
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2019
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2018
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2017–1998
ALL PAPERS
2010
Srivatsan Raman, Yuanpeng J Huang, Binchen Mao, Paolo Rossi, James M Aramini, Gaohua Liu, Gaetano T Montelione, David Baker
Accurate automated protein NMR structure determination using unassigned NOESY data Journal Article
In: Journal of the American Chemical Society, vol. 132, pp. 202-7, 2010, ISSN: 1520-5126.
@article{258,
title = {Accurate automated protein NMR structure determination using unassigned NOESY data},
author = { Srivatsan Raman and Yuanpeng J Huang and Binchen Mao and Paolo Rossi and James M Aramini and Gaohua Liu and Gaetano T Montelione and David Baker},
issn = {1520-5126},
year = {2010},
date = {2010-01-01},
journal = {Journal of the American Chemical Society},
volume = {132},
pages = {202-7},
abstract = {Conventional NMR structure determination requires nearly complete assignment of the cross peaks of a refined NOESY peak list. Depending on the size of the protein and quality of the spectral data, this can be a time-consuming manual process requiring several rounds of peak list refinement and structure determination. Programs such as Aria, CYANA, and AutoStructure can generate models using unassigned NOESY data but are very sensitive to the quality of the input peak lists and can converge to inaccurate structures if the signal-to-noise of the peak lists is low. Here, we show that models with high accuracy and reliability can be produced by combining the strengths of the high-resolution structure prediction program Rosetta with global measures of the agreement between structure models and experimental data. A first round of models generated using CS-Rosetta (Rosetta supplemented with backbone chemical shift information) are filtered on the basis of their goodness-of-fit with unassigned NOESY peak lists using the DP-score, and the best fitting models are subjected to high resolution refinement with the Rosetta rebuild-and-refine protocol. This hybrid approach uses both local backbone chemical shift and the unassigned NOESY data to direct Rosetta trajectories toward the native structure and produces more accurate models than AutoStructure/CYANA or CS-Rosetta alone, particularly when using raw unedited NOESY peak lists. We also show that when accurate manually refined NOESY peak lists are available, Rosetta refinement can consistently increase the accuracy of models generated using CYANA and AutoStructure.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2009
Rhiju Das, Ingemar Andr’e, Yang Shen, Yibing Wu, Alexander Lemak, Sonal Bansal, Cheryl H Arrowsmith, Thomas Szyperski, David Baker
Simultaneous prediction of protein folding and docking at high resolution Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 106, pp. 18978-83, 2009, ISSN: 1091-6490.
@article{124,
title = {Simultaneous prediction of protein folding and docking at high resolution},
author = { Rhiju Das and Ingemar Andr'e and Yang Shen and Yibing Wu and Alexander Lemak and Sonal Bansal and Cheryl H Arrowsmith and Thomas Szyperski and David Baker},
issn = {1091-6490},
year = {2009},
date = {2009-11-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {106},
pages = {18978-83},
abstract = {Interleaved dimers and higher order symmetric oligomers are ubiquitous in biology but present a challenge to de novo structure prediction methodology: The structure adopted by a monomer can be stabilized largely by interactions with other monomers and hence not the lowest energy state of a single chain. Building on the Rosetta framework, we present a general method to simultaneously model the folding and docking of multiple-chain interleaved homo-oligomers. For more than a third of the cases in a benchmark set of interleaved homo-oligomers, the method generates near-native models of large alpha-helical bundles, interlocking beta sandwiches, and interleaved alpha/beta motifs with an accuracy high enough for molecular replacement based phasing. With the incorporation of NMR chemical shift information, accurate models can be obtained consistently for symmetric complexes with as many as 192 total amino acids; a blind prediction was within 1 A rmsd of the traditionally determined NMR structure, and fit independently collected RDC data equally well. Together, these results show that the Rosetta "fold-and-dock" protocol can produce models of homo-oligomeric complexes with near-atomic-level accuracy and should be useful for crystallographic phasing and the rapid determination of the structures of multimers with limited NMR information.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
David E Kim, Ben Blum, Philip Bradley, David Baker
Sampling bottlenecks in de novo protein structure prediction Journal Article
In: Journal of molecular biology, vol. 393, pp. 249-60, 2009, ISSN: 1089-8638.
@article{131,
title = {Sampling bottlenecks in de novo protein structure prediction},
author = { David E Kim and Ben Blum and Philip Bradley and David Baker},
issn = {1089-8638},
year = {2009},
date = {2009-10-01},
journal = {Journal of molecular biology},
volume = {393},
pages = {249-60},
abstract = {The primary obstacle to de novo protein structure prediction is conformational sampling: the native state generally has lower free energy than nonnative structures but is exceedingly difficult to locate. Structure predictions with atomic level accuracy have been made for small proteins using the Rosetta structure prediction method, but for larger and more complex proteins, the native state is virtually never sampled, and it has been unclear how much of an increase in computing power would be required to successfully predict the structures of such proteins. In this paper, we develop an approach to determining how much computer power is required to accurately predict the structure of a protein, based on a reformulation of the conformational search problem as a combinatorial sampling problem in a discrete feature space. We find that conformational sampling for many proteins is limited by critical "linchpin" features, often the backbone torsion angles of individual residues, which are sampled very rarely in unbiased trajectories and, when constrained, dramatically increase the sampling of the native state. These critical features frequently occur in less regular and likely strained regions of proteins that contribute to protein function. In a number of proteins, the linchpin features are in regions found experimentally to form late in folding, suggesting a correspondence between folding in silico and in reality.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ian W Davis, Kaushik Raha, Martha S Head, David Baker
Blind docking of pharmaceutically relevant compounds using RosettaLigand Journal Article
In: Protein science, vol. 18, pp. 1998-2002, 2009, ISSN: 1469-896X.
@article{126,
title = {Blind docking of pharmaceutically relevant compounds using RosettaLigand},
author = { Ian W Davis and Kaushik Raha and Martha S Head and David Baker},
issn = {1469-896X},
year = {2009},
date = {2009-09-01},
journal = {Protein science},
volume = {18},
pages = {1998-2002},
abstract = {It is difficult to properly validate algorithms that dock a small molecule ligand into its protein receptor using data from the public domain: the predictions are not blind because the correct binding mode is already known, and public test cases may not be representative of compounds of interest such as drug leads. Here, we use private data from a real drug discovery program to carry out a blind evaluation of the RosettaLigand docking methodology and find that its performance is on average comparable with that of the best commercially available current small molecule docking programs. The strength of RosettaLigand is the use of the Rosetta sampling methodology to simultaneously optimize protein sidechain, protein backbone and ligand degrees of freedom; the extensive benchmark test described here identifies shortcomings in other aspects of the protocol and suggests clear routes to improving the method.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Brian A Kidd, David Baker, Wendy E Thomas
Computation of conformational coupling in allosteric proteins Journal Article
In: PLoS computational biology, vol. 5, pp. e1000484, 2009, ISSN: 1553-7358.
@article{130,
title = {Computation of conformational coupling in allosteric proteins},
author = { Brian A Kidd and David Baker and Wendy E Thomas},
issn = {1553-7358},
year = {2009},
date = {2009-08-01},
journal = {PLoS computational biology},
volume = {5},
pages = {e1000484},
abstract = {In allosteric regulation, an effector molecule binding a protein at one site induces conformational changes, which alter structure and function at a distant active site. Two key challenges in the computational modeling of allostery are the prediction of the structure of one allosteric state starting from the structure of the other, and elucidating the mechanisms underlying the conformational coupling of the effector and active sites. Here we approach these two challenges using the Rosetta high-resolution structure prediction methodology. We find that the method can recapitulate the relaxation of effector-bound forms of single domain allosteric proteins into the corresponding ligand-free states, particularly when sampling is focused on regions known to change conformation most significantly. Analysis of the coupling between contacting pairs of residues in large ensembles of conformations spread throughout the landscape between and around the two allosteric states suggests that the transitions are built up from blocks of tightly coupled interacting sets of residues that are more loosely coupled to one another.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ruslan I Sadreyev, ShuoYong Shi, David Baker, Nick V Grishin
Structure similarity measure with penalty for close non-equivalent residues Journal Article
In: Bioinformatics, vol. 25, pp. 1259-63, 2009, ISSN: 1367-4811.
@article{135,
title = {Structure similarity measure with penalty for close non-equivalent residues},
author = { Ruslan I Sadreyev and ShuoYong Shi and David Baker and Nick V Grishin},
issn = {1367-4811},
year = {2009},
date = {2009-05-01},
journal = {Bioinformatics},
volume = {25},
pages = {1259-63},
abstract = {MOTIVATION: Recent improvement in homology-based structure modeling emphasizes the importance of sensitive evaluation measures that help identify and correct modest distortions in models compared with the target structures. Global Distance Test Total Score (GDT_TS), otherwise a very powerful and effective measure for model evaluation, is still insensitive to and can even reward such distortions, as observed for remote homology modeling in the latest CASP8 (Comparative Assessment of Structure Prediction). RESULTS: We develop a new measure that balances GDT_TS reward for the closeness of equivalent model and target residues (textquoterightattractiontextquoteright term) with the penalty for the closeness of non-equivalent residues (textquoterightrepulsiontextquoteright term). Compared with GDT_TS, the resulting score, TR (total score with repulsion), is much more sensitive to structure compression both in real remote homologs and in CASP models. TR is correlated yet different from other measures of structure similarity. The largest difference from GDT_TS is observed in models of mid-range quality based on remote homology modeling. AVAILABILITY: The script for TR calculation is included in Supplementary Material. TR scores for all server models in CASP8 are available at http://prodata.swmed.edu/CASP8.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
P Barth, B Wallner, David Baker
Prediction of membrane protein structures with complex topologies using limited constraints Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 106, pp. 1409-14, 2009, ISSN: 1091-6490.
@article{123,
title = {Prediction of membrane protein structures with complex topologies using limited constraints},
author = { P Barth and B Wallner and David Baker},
issn = {1091-6490},
year = {2009},
date = {2009-02-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {106},
pages = {1409-14},
abstract = {Reliable structure-prediction methods for membrane proteins are important because the experimental determination of high-resolution membrane protein structures remains very difficult, especially for eukaryotic proteins. However, membrane proteins are typically longer than 200 aa and represent a formidable challenge for structure prediction. We have developed a method for predicting the structures of large membrane proteins by constraining helix-helix packing arrangements at particular positions predicted from sequence or identified by experiments. We tested the method on 12 membrane proteins of diverse topologies and functions with lengths ranging between 190 and 300 residues. Enforcing a single constraint during the folding simulations enriched the population of near-native models for 9 proteins. In 4 of the cases in which the constraint was predicted from the sequence, 1 of the 5 lowest energy models was superimposable within 4 A on the native structure. Near-native structures could also be selected for heme-binding and pore-forming domains from simulations in which pairs of conserved histidine-chelating hemes and one experimentally determined salt bridge were constrained, respectively. These results suggest that models within 4 A of the native structure can be achieved for complex membrane proteins if even limited information on residue-residue interactions can be obtained from protein structure databases or experiments.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Will Sheffler, David Baker
RosettaHoles: rapid assessment of protein core packing for structure prediction, refinement, design, and validation Journal Article
In: Protein science, vol. 18, pp. 229-39, 2009, ISSN: 1469-896X.
@article{136,
title = {RosettaHoles: rapid assessment of protein core packing for structure prediction, refinement, design, and validation},
author = { Will Sheffler and David Baker},
url = {https://onlinelibrary.wiley.com/doi/full/10.1002/pro.8
https://www.bakerlab.org/wp-content/uploads/2020/08/pro.8.pdf},
doi = {10.1002/pro.8},
issn = {1469-896X},
year = {2009},
date = {2009-01-01},
journal = {Protein science},
volume = {18},
pages = {229-39},
abstract = {We present a novel method called RosettaHoles for visual and quantitative assessment of underpacking in the protein core. RosettaHoles generates a set of spherical cavity balls that fill the empty volume between atoms in the protein interior. For visualization, the cavity balls are aggregated into contiguous overlapping clusters and small cavities are discarded, leaving an uncluttered representation of the unfilled regions of space in a structure. For quantitative analysis, the cavity ball data are used to estimate the probability of observing a given cavity in a high-resolution crystal structure. RosettaHoles provides excellent discrimination between real and computationally generated structures, is predictive of incorrect regions in models, identifies problematic structures in the Protein Data Bank, and promises to be a useful validation tool for newly solved experimental structures.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Srivatsan Raman, Robert Vernon, James Thompson, Michael Tyka, Ruslan Sadreyev, Jimin Pei, David Kim, Elizabeth Kellogg, Frank DiMaio, Oliver Lange, Lisa Kinch, Will Sheffler, Bong-Hyun Kim, Rhiju Das, Nick V Grishin, David Baker
Structure prediction for CASP8 with all-atom refinement using Rosetta Journal Article
In: Proteins, vol. 77 Suppl 9, pp. 89-99, 2009, ISSN: 1097-0134.
@article{273,
title = {Structure prediction for CASP8 with all-atom refinement using Rosetta},
author = { Srivatsan Raman and Robert Vernon and James Thompson and Michael Tyka and Ruslan Sadreyev and Jimin Pei and David Kim and Elizabeth Kellogg and Frank DiMaio and Oliver Lange and Lisa Kinch and Will Sheffler and Bong-Hyun Kim and Rhiju Das and Nick V Grishin and David Baker},
issn = {1097-0134},
year = {2009},
date = {2009-00-01},
journal = {Proteins},
volume = {77 Suppl 9},
pages = {89-99},
abstract = {We describe predictions made using the Rosetta structure prediction methodology for the Eighth Critical Assessment of Techniques for Protein Structure Prediction. Aggressive sampling and all-atom refinement were carried out for nearly all targets. A combination of alignment methodologies was used to generate starting models from a range of templates, and the models were then subjected to Rosetta all atom refinement. For the 64 domains with readily identified templates, the best submitted model was better than the best alignment to the best template in the Protein Data Bank for 24 cases, and improved over the best starting model for 43 cases. For 13 targets where only very distant sequence relationships to proteins of known structure were detected, models were generated using the Rosetta de novo structure prediction methodology followed by all-atom refinement; in several cases the submitted models were better than those based on the available templates. Of the 12 refinement challenges, the best submitted model improved on the starting model in seven cases. These improvements over the starting template-based models and refinement tests demonstrate the power of Rosetta structure refinement in improving model accuracy.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2008
Anthony H Keeble, Lukasz A Joachimiak, Mar’ia Jesus Mat’e, Nicola Meenan, Nadine Kirkpatrick, David Baker, Colin Kleanthous
Experimental and computational analyses of the energetic basis for dual recognition of immunity proteins by colicin endonucleases Journal Article
In: Journal of molecular biology, vol. 379, pp. 745-59, 2008, ISSN: 1089-8638.
@article{221,
title = {Experimental and computational analyses of the energetic basis for dual recognition of immunity proteins by colicin endonucleases},
author = { Anthony H Keeble and Lukasz A Joachimiak and Mar'ia Jesus Mat'e and Nicola Meenan and Nadine Kirkpatrick and David Baker and Colin Kleanthous},
issn = {1089-8638},
year = {2008},
date = {2008-06-01},
journal = {Journal of molecular biology},
volume = {379},
pages = {745-59},
abstract = {Colicin endonucleases (DNases) are bound and inactivated by immunity (Im) proteins. Im proteins are broadly cross-reactive yet specific inhibitors binding cognate and non-cognate DNases with K(d) values that vary between 10(-4) and 10(-14) M, characteristics that are explained by a textquoterightdual-recognitiontextquoteright mechanism. In this work, we addressed for the first time the energetics of Im protein recognition by colicin DNases through a combination of E9 DNase alanine scanning and double-mutant cycles (DMCs) coupled with kinetic and calorimetric analyses of cognate Im9 and non-cognate Im2 binding, as well as computational analysis of alanine scanning and DMC data. We show that differential DeltaDeltaGs observed for four E9 DNase residues cumulatively distinguish cognate Im9 association from non-cognate Im2 association. E9 DNase Phe86 is the primary specificity hotspot residue in the centre of the interface, which is coordinated by conserved and variable hotspot residues of the cognate Im protein. Experimental DMC analysis reveals that only modest coupling energies to Im9 residues are observed, in agreement with calculated DMCs using the program ROSETTA and consistent with the largely hydrophobic nature of E9 DNase-Im9 specificity contacts. Computed values for the 12 E9 DNase alanine mutants showed reasonable agreement with experimental DeltaDeltaG data, particularly for interactions not mediated by interfacial water molecules. DeltaDeltaG predictions for residues that contact buried water molecules calculated using solvated rotamer models met with mixed success; however, we were able to predict with a high degree of accuracy the location and energetic contribution of one such contact. Our study highlights how colicin DNases are able to utilise both conserved and variable amino acids to distinguish cognate from non-cognate Im proteins, with the energetic contributions of the conserved residues modulated by neighbouring specificity sites.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jian Qiu, Will Sheffler, David Baker, William Stafford Noble
Ranking predicted protein structures with support vector regression Journal Article
In: Proteins, vol. 71, pp. 1175-82, 2008, ISSN: 1097-0134.
@article{220,
title = {Ranking predicted protein structures with support vector regression},
author = { Jian Qiu and Will Sheffler and David Baker and William Stafford Noble},
issn = {1097-0134},
year = {2008},
date = {2008-05-01},
journal = {Proteins},
volume = {71},
pages = {1175-82},
abstract = {Protein structure prediction is an important problem of both intellectual and practical interest. Most protein structure prediction approaches generate multiple candidate models first, and then use a scoring function to select the best model among these candidates. In this work, we develop a scoring function using support vector regression (SVR). Both consensus-based features and features from individual structures are extracted from a training data set containing native protein structures and predicted structural models submitted to CASP5 and CASP6. The SVR learns a scoring function that is a linear combination of these features. We test this scoring function on two data sets. First, when used to rank server models submitted to CASP7, the SVR score selects predictions that are comparable to the best performing server in CASP7, Zhang-Server, and significantly better than all the other servers. Even if the SVR score is not allowed to select Zhang-Server models, the SVR score still selects predictions that are significantly better than all the other servers. In addition, the SVR is able to select significantly better models and yield significantly better Pearson correlation coefficients than the two best Quality Assessment groups in CASP7, QA556 (LEE), and QA634 (Pcons). Second, this work aims to improve the ability of the Robetta server to select best models, and hence we evaluate the performance of the SVR score on ranking the Robetta server template-based models for the CASP7 targets. The SVR selects significantly better models than the Robetta K*Sync consensus alignment score.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Raman S, Qian B, Baker D, Walker RC
Advances in Rosetta Protein Structure Prediction on Massively Parallel Systems Journal Article
In: Journal of Research and Development, vol. 52(1-2):7-17, 2008.
@article{280,
title = {Advances in Rosetta Protein Structure Prediction on Massively Parallel Systems},
author = { Raman S and Qian B and Baker D and Walker RC},
year = {2008},
date = {2008-01-01},
journal = {Journal of Research and Development},
volume = {52(1-2):7-17},
abstract = {One of the key challenges in computational biology is prediction of three-dimensional protein structures from amino-acid sequences. For most proteins, the "native state" lies at the bottom of a free-energy landscape. Protein structure prediction involves varying the degrees of freedom of the protein in a constrained manner until it approaches its native state. In the Rosetta protein structure prediction protocols, a large number of independent folding trajectories are simulated, and several lowest-energy results are likely to be close to the native state. The availability of hundred-teraflop, and shortly, petaflop, computing resources is revolutionizing the approaches available for protein structure prediction. Here, we discuss issues involved in utilizing such machines efficiently with the Rosetta code, including an overview of recent results of the Critical Assessment of Techniques for Protein Structure Prediction 7 (CASP7) in which the computationally demanding structure-refinement process was run on 16 racks of the IBM Blue Gene/L (TM) system at the IBM T. J. Watson Research Center. We highlight recent advances in high-performance computing and discuss,future development paths that make use of the next-generation petascale (> 10(12) floating-point operations per second) machines.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Rhiju Das, David Baker
Macromolecular modeling with rosetta Journal Article
In: Annual review of biochemistry, vol. 77, pp. 363-82, 2008, ISSN: 0066-4154.
@article{227,
title = {Macromolecular modeling with rosetta},
author = { Rhiju Das and David Baker},
issn = {0066-4154},
year = {2008},
date = {2008-00-01},
journal = {Annual review of biochemistry},
volume = {77},
pages = {363-82},
abstract = {Advances over the past few years have begun to enable prediction and design of macromolecular structures at near-atomic accuracy. Progress has stemmed from the development of reasonably accurate and efficiently computed all-atom potential functions as well as effective conformational sampling strategies appropriate for searching a highly rugged energy landscape, both driven by feedback from structure prediction and design tests. A unified energetic and kinematic framework in the Rosetta program allows a wide range of molecular modeling problems, from fibril structure prediction to RNA folding to the design of new protein interfaces, to be readily investigated and highlights areas for improvement. The methodology enables the creation of novel molecules with useful functions and holds promise for accelerating experimental structural inference. Emerging connections to crystallographic phasing, NMR modeling, and lower-resolution approaches are described and critically assessed.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Erkang Fan, David Baker, Stanley Fields, Michael H Gelb, Frederick S Buckner, Wesley C Van Voorhis, Eric Phizicky, Mark Dumont, Christopher Mehlin, Elizabeth Grayhack, Mark Sullivan, Christophe Verlinde, George Detitta, Deirdre R Meldrum, Ethan A Merritt, Thomas Earnest, Michael Soltis, Frank Zucker, Peter J Myler, Lori Schoenfeld, David E Kim, Liz Worthey, Doug Lacount, Marissa Vignali, Jizhen Li, Somnath Mondal, Archna Massey, Brian Carroll, Stacey Gulde, Joseph Luft, Larry Desoto, Mark Holl, Jonathan Caruthers, J”urgen Bosch, Mark Robien, Tracy Arakaki, Margaret Holmes, Isolde Le Trong, Wim G J Hol
Structural genomics of pathogenic protozoa: an overview Journal Article
In: Methods in molecular biology, vol. 426, pp. 497-513, 2008, ISSN: 1064-3745.
@article{225,
title = {Structural genomics of pathogenic protozoa: an overview},
author = { Erkang Fan and David Baker and Stanley Fields and Michael H Gelb and Frederick S Buckner and Wesley C Van Voorhis and Eric Phizicky and Mark Dumont and Christopher Mehlin and Elizabeth Grayhack and Mark Sullivan and Christophe Verlinde and George Detitta and Deirdre R Meldrum and Ethan A Merritt and Thomas Earnest and Michael Soltis and Frank Zucker and Peter J Myler and Lori Schoenfeld and David E Kim and Liz Worthey and Doug Lacount and Marissa Vignali and Jizhen Li and Somnath Mondal and Archna Massey and Brian Carroll and Stacey Gulde and Joseph Luft and Larry Desoto and Mark Holl and Jonathan Caruthers and J"urgen Bosch and Mark Robien and Tracy Arakaki and Margaret Holmes and Isolde Le Trong and Wim G J Hol},
issn = {1064-3745},
year = {2008},
date = {2008-00-01},
journal = {Methods in molecular biology},
volume = {426},
pages = {497-513},
abstract = {The Structural Genomics of Pathogenic Protozoa (SGPP) Consortium aimed to determine crystal structures of proteins from trypanosomatid and malaria parasites in a high throughput manner. The pipeline of target selection, protein production, crystallization, and structure determination, is sketched. Special emphasis is given to a number of technology developments including domain prediction, the use of "co-crystallants," and capillary crystallization. "Fragment cocktail crystallography" for medical structural genomics is also described.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2007
Chu Wang, Ora Schueler-Furman, Ingemar Andre, Nir London, Sarel J Fleishman, Philip Bradley, Bin Qian, David Baker
RosettaDock in CAPRI rounds 6-12 Journal Article
In: Proteins, vol. 69, pp. 758-63, 2007, ISSN: 1097-0134.
@article{112,
title = {RosettaDock in CAPRI rounds 6-12},
author = { Chu Wang and Ora Schueler-Furman and Ingemar Andre and Nir London and Sarel J Fleishman and Philip Bradley and Bin Qian and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/wang07B.pdf},
issn = {1097-0134},
year = {2007},
date = {2007-12-01},
journal = {Proteins},
volume = {69},
pages = {758-63},
abstract = {A challenge in protein-protein docking is to account for the conformational changes in the monomers that occur upon binding. The RosettaDock method, which incorporates sidechain flexibility but keeps the backbone fixed, was found in previous CAPRI rounds (4 and 5) to generate docking models with atomic accuracy, provided that conformational changes were mainly restricted to protein sidechains. In the recent rounds of CAPRI (6-12), large backbone conformational changes occur upon binding for several target complexes. To address these challenges, we explicitly introduced backbone flexibility in our modeling procedures by combining rigid-body docking with protein structure prediction techniques such as modeling variable loops and building homology models. Encouragingly, using this approach we were able to correctly predict a significant backbone conformational change of an interface loop for Target 20 (12 A rmsd between those in the unbound monomer and complex structures), but accounting for backbone flexibility in protein-protein docking is still very challenging because of the significantly larger conformational space, which must be surveyed. Motivated by these CAPRI challenges, we have made progress in reformulating RosettaDock using a "fold-tree" representation, which provides a general framework for treating a wide variety of flexible-backbone docking problems.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Bin Qian, Srivatsan Raman, Rhiju Das, Philip Bradley, Airlie J McCoy, Randy J Read, David Baker
High-resolution structure prediction and the crystallographic phase problem Journal Article
In: Nature, vol. 450, pp. 259-64, 2007, ISSN: 1476-4687.
@article{115,
title = {High-resolution structure prediction and the crystallographic phase problem},
author = { Bin Qian and Srivatsan Raman and Rhiju Das and Philip Bradley and Airlie J McCoy and Randy J Read and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/qian07A.pdf},
issn = {1476-4687},
year = {2007},
date = {2007-11-01},
journal = {Nature},
volume = {450},
pages = {259-64},
abstract = {The energy-based refinement of low-resolution protein structure models to atomic-level accuracy is a major challenge for computational structural biology. Here we describe a new approach to refining protein structure models that focuses sampling in regions most likely to contain errors while allowing the whole structure to relax in a physically realistic all-atom force field. In applications to models produced using nuclear magnetic resonance data and to comparative models based on distant structural homologues, the method can significantly improve the accuracy of the structures in terms of both the backbone conformations and the placement of core side chains. Furthermore, the resulting models satisfy a particularly stringent test: they provide significantly better solutions to the X-ray crystallographic phase problem in molecular replacement trials. Finally, we show that all-atom refinement can produce de novo protein structure predictions that reach the high accuracy required for molecular replacement without any experimental phase information and in the absence of templates suitable for molecular replacement from the Protein Data Bank. These results suggest that the combination of high-resolution structure prediction with state-of-the-art phasing tools may be unexpectedly powerful in phasing crystallographic data for which molecular replacement is hindered by the absence of sufficiently accurate previous models.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ingemar Andr’e, Philip Bradley, Chu Wang, David Baker
Prediction of the structure of symmetrical protein assemblies Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 104, pp. 17656-61, 2007, ISSN: 0027-8424.
@article{121,
title = {Prediction of the structure of symmetrical protein assemblies},
author = { Ingemar Andr'e and Philip Bradley and Chu Wang and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/André07A.pdf},
issn = {0027-8424},
year = {2007},
date = {2007-11-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {104},
pages = {17656-61},
abstract = {Biological supramolecular systems are commonly built up by the self-assembly of identical protein subunits to produce symmetrical oligomers with cyclical, icosahedral, or helical symmetry that play roles in processes ranging from allosteric control and molecular transport to motor action. The large size of these systems often makes them difficult to structurally characterize using experimental techniques. We have developed a computational protocol to predict the structure of symmetrical protein assemblies based on the structure of a single subunit. The method carries out simultaneous optimization of backbone, side chain, and rigid-body degrees of freedom, while restricting the search space to symmetrical conformations. Using this protocol, we can reconstruct, starting from the structure of a single subunit, the structure of cyclic oligomers and the icosahedral virus capsid of satellite panicum virus using a rigid backbone approximation. We predict the oligomeric state of EscJ from the type III secretion system both in its proposed cyclical and crystallized helical form. Finally, we show that the method can recapitulate the structure of an amyloid-like fibril formed by the peptide NNQQNY from the yeast prion protein Sup35 starting from the amino acid sequence alone and searching the complete space of backbone, side chain, and rigid-body degrees of freedom.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
P Barth, J Schonbrun, David Baker
Toward high-resolution prediction and design of transmembrane helical protein structures Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 104, pp. 15682-7, 2007, ISSN: 0027-8424.
@article{120,
title = {Toward high-resolution prediction and design of transmembrane helical protein structures},
author = { P Barth and J Schonbrun and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/barth07A.pdf},
issn = {0027-8424},
year = {2007},
date = {2007-10-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {104},
pages = {15682-7},
abstract = {The prediction and design at the atomic level of membrane protein structures and interactions is a critical but unsolved challenge. To address this problem, we have developed an all-atom physical model that describes intraprotein and protein-solvent interactions in the membrane environment. We evaluated the ability of the model to recapitulate the energetics and structural specificities of polytopic membrane proteins by using a battery of in silico prediction and design tests. First, in side-chain packing and design tests, the model successfully predicts the side-chain conformations at 73% of nonexposed positions and the native amino acid identities at 34% of positions in naturally occurring membrane proteins. Second, the model predicts significant energy gaps between native and nonnative structures of transmembrane helical interfaces and polytopic membrane proteins. Third, distortions in transmembrane helices are successfully recapitulated in docking experiments by using fragments of ideal helices judiciously defined around helical kinks. Finally, de novo structure prediction reaches near-atomic accuracy (<2.5 A) for several small membrane protein domains (<150 residues). The success of the model highlights the critical role of van der Waals and hydrogen-bonding interactions in the stability and structural specificity of membrane protein structures and sets the stage for the high-resolution prediction and design of complex membrane protein architectures.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Rhiju Das, David Baker
Automated de novo prediction of native-like RNA tertiary structures Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 104, pp. 14664-9, 2007, ISSN: 0027-8424.
@article{117,
title = {Automated de novo prediction of native-like RNA tertiary structures},
author = { Rhiju Das and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/das07A.pdf},
issn = {0027-8424},
year = {2007},
date = {2007-09-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {104},
pages = {14664-9},
abstract = {RNA tertiary structure prediction has been based almost entirely on base-pairing constraints derived from phylogenetic covariation analysis. We describe here a complementary approach, inspired by the Rosetta low-resolution protein structure prediction method, that seeks the lowest energy tertiary structure for a given RNA sequence without using evolutionary information. In a benchmark test of 20 RNA sequences with known structure and lengths of approximately 30 nt, the new method reproduces better than 90% of Watson-Crick base pairs, comparable with the accuracy of secondary structure prediction methods. In more than half the cases, at least one of the top five models agrees with the native structure to better than 4 A rmsd over the backbone. Most importantly, the method recapitulates more than one-third of non-Watson-Crick base pairs seen in the native structures. Tandem stacks of "sheared" base pairs, base triplets, and pseudoknots are among the noncanonical features reproduced in the models. In the cases in which none of the top five models were native-like, higher energy conformations similar to the native structures are still sampled frequently but not assigned low energies. These results suggest that modest improvements in the energy function, together with the incorporation of information from phylogenetic covariance, may allow confident and accurate structure prediction for larger and more complex RNA chains.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Lars Malmstrom, Michael Riffle, Charlie E M Strauss, Dylan Chivian, Trisha N Davis, Richard Bonneau, David Baker
Superfamily assignments for the yeast proteome through integration of structure prediction with the gene ontology Journal Article
In: PLoS biology, vol. 5, pp. e76, 2007, ISSN: 1545-7885.
@article{116,
title = {Superfamily assignments for the yeast proteome through integration of structure prediction with the gene ontology},
author = { Lars Malmstrom and Michael Riffle and Charlie E M Strauss and Dylan Chivian and Trisha N Davis and Richard Bonneau and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/malmström07A.pdf},
issn = {1545-7885},
year = {2007},
date = {2007-04-01},
journal = {PLoS biology},
volume = {5},
pages = {e76},
abstract = {Saccharomyces cerevisiae is one of the best-studied model organisms, yet the three-dimensional structure and molecular function of many yeast proteins remain unknown. Yeast proteins were parsed into 14,934 domains, and those lacking sequence similarity to proteins of known structure were folded using the Rosetta de novo structure prediction method on the World Community Grid. This structural data was integrated with process, component, and function annotations from the Saccharomyces Genome Database to assign yeast protein domains to SCOP superfamilies using a simple Bayesian approach. We have predicted the structure of 3,338 putative domains and assigned SCOP superfamily annotations to 581 of them. We have also assigned structural annotations to 7,094 predicted domains based on fold recognition and homology modeling methods. The domain predictions and structural information are available in an online database at http://rd.plos.org/10.1371_journal.pbio.0050076_01.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Andrew M Wollacott, Alexandre Zanghellini, Paul Murphy, David Baker
Prediction of structures of multidomain proteins from structures of the individual domains Journal Article
In: Protein science, vol. 16, pp. 165-75, 2007, ISSN: 0961-8368.
@article{109,
title = {Prediction of structures of multidomain proteins from structures of the individual domains},
author = { Andrew M Wollacott and Alexandre Zanghellini and Paul Murphy and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/wollacott07A.pdf},
issn = {0961-8368},
year = {2007},
date = {2007-02-01},
journal = {Protein science},
volume = {16},
pages = {165-75},
abstract = {We describe the development of a method for assembling structures of multidomain proteins from structures of isolated domains. The method consists of an initial low-resolution search in which the conformational space of the domain linker is explored using the Rosetta de novo structure prediction method, followed by a high-resolution search in which all atoms are treated explicitly and backbone and side chain degrees of freedom are simultaneously optimized. The method recapitulates, often with very high accuracy, the structures of existing multidomain proteins.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Michael Tress, Jianlin Cheng, Pierre Baldi, Keehyoung Joo, Jinwoo Lee, Joo-Hyun Seo, Jooyoung Lee, David Baker, Dylan Chivian, David Kim, Iakes Ezkurdia
Assessment of predictions submitted for the CASP7 domain prediction category Journal Article
In: Proteins, vol. 69 Suppl 8, pp. 137-51, 2007, ISSN: 1097-0134.
@article{286,
title = {Assessment of predictions submitted for the CASP7 domain prediction category},
author = { Michael Tress and Jianlin Cheng and Pierre Baldi and Keehyoung Joo and Jinwoo Lee and Joo-Hyun Seo and Jooyoung Lee and David Baker and Dylan Chivian and David Kim and Iakes Ezkurdia},
issn = {1097-0134},
year = {2007},
date = {2007-00-01},
journal = {Proteins},
volume = {69 Suppl 8},
pages = {137-51},
abstract = {This paper details the assessment process and evaluation results for the Critical Assessment of Protein Structure Prediction (CASP7) domain prediction category. Domain predictions were assessed using the Normalized Domain Overlap score introduced in CASP6 and the accuracy of prediction of domain break points. The results of the analysis clearly demonstrate that the best methods are able to make consistently reliable predictions when the target has a structural template, although they are less good when the domain break occurs in a region not covered by a template. The conditions of the experiment meant that it was impossible to draw any conclusions about domain prediction for free modeling targets and it was also difficult to draw many distinctions between the best groups. Two thirds of the targets submitted were single domains and hence regarded as easy to predict. Even those targets defined as having multiple domains always had at least one domain with a similar template structure.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
James D R Knight, Bin Qian, David Baker, Rashmi Kothary
Conservation, variability and the modeling of active protein kinases Journal Article
In: PloS one, vol. 2, pp. e982, 2007, ISSN: 1932-6203.
@article{281,
title = {Conservation, variability and the modeling of active protein kinases},
author = { James D R Knight and Bin Qian and David Baker and Rashmi Kothary},
issn = {1932-6203},
year = {2007},
date = {2007-00-01},
journal = {PloS one},
volume = {2},
pages = {e982},
abstract = {The human proteome is rich with protein kinases, and this richness has made the kinase of crucial importance in initiating and maintaining cell behavior. Elucidating cell signaling networks and manipulating their components to understand and alter behavior require well designed inhibitors. These inhibitors are needed in culture to cause and study network perturbations, and the same compounds can be used as drugs to treat disease. Understanding the structural biology of protein kinases in detail, including their commonalities, differences and modes of substrate interaction, is necessary for designing high quality inhibitors that will be of true use for cell biology and disease therapy. To this end, we here report on a structural analysis of all available active-conformation protein kinases, discussing residue conservation, the novel features of such conservation, unique properties of atypical kinases and variability in the context of substrate binding. We also demonstrate how this information can be used for structure prediction. Our findings will be of use not only in understanding protein kinase function and evolution, but they highlight the flaws inherent in kinase drug design as commonly practiced and dictate an appropriate strategy for the sophisticated design of specific inhibitors for use in the laboratory and disease therapy.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Rhiju Das, Bin Qian, Srivatsan Raman, Robert Vernon, James Thompson, Philip Bradley, Sagar Khare, Michael D Tyka, Divya Bhat, Dylan Chivian, David E Kim, William H Sheffler, Lars Malmstr”om, Andrew M Wollacott, Chu Wang, Ingemar Andre, David Baker
Structure prediction for CASP7 targets using extensive all-atom refinement with Rosetta@home Journal Article
In: Proteins, vol. 69 Suppl 8, pp. 118-28, 2007, ISSN: 1097-0134.
@article{118,
title = {Structure prediction for CASP7 targets using extensive all-atom refinement with Rosetta@home},
author = { Rhiju Das and Bin Qian and Srivatsan Raman and Robert Vernon and James Thompson and Philip Bradley and Sagar Khare and Michael D Tyka and Divya Bhat and Dylan Chivian and David E Kim and William H Sheffler and Lars Malmstr"om and Andrew M Wollacott and Chu Wang and Ingemar Andre and David Baker},
issn = {1097-0134},
year = {2007},
date = {2007-00-01},
journal = {Proteins},
volume = {69 Suppl 8},
pages = {118-28},
abstract = {We describe predictions made using the Rosetta structure prediction methodology for both template-based modeling and free modeling categories in the Seventh Critical Assessment of Techniques for Protein Structure Prediction. For the first time, aggressive sampling and all-atom refinement could be carried out for the majority of targets, an advance enabled by the Rosetta@home distributed computing network. Template-based modeling predictions using an iterative refinement algorithm improved over the best existing templates for the majority of proteins with less than 200 residues. Free modeling methods gave near-atomic accuracy predictions for several targets under 100 residues from all secondary structure classes. These results indicate that refinement with an all-atom energy function, although computationally expensive, is a powerful method for obtaining accurate structure predictions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2006
Philip Bradley, David Baker
Improved beta-protein structure prediction by multilevel optimization of nonlocal strand pairings and local backbone conformation Journal Article
In: Proteins, vol. 65, pp. 922-9, 2006, ISSN: 1097-0134.
@article{154,
title = {Improved beta-protein structure prediction by multilevel optimization of nonlocal strand pairings and local backbone conformation},
author = { Philip Bradley and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/bradley06A.pdf},
issn = {1097-0134},
year = {2006},
date = {2006-12-01},
journal = {Proteins},
volume = {65},
pages = {922-9},
abstract = {Proteins with complex, nonlocal beta-sheets are challenging for de novo structure prediction, due in part to the difficulty of efficiently sampling long-range strand pairings. We present a new, multilevel approach to beta-sheet structure prediction that circumvents this difficulty by reformulating structure generation in terms of a folding tree. Nonlocal connections in this tree allow us to explicitly sample alternative beta-strand pairings while simultaneously exploring local conformational space using backbone torsion-space moves. An iterative, energy-biased resampling strategy is used to explore the space of beta-strand pairings; we expect that such a strategy will be generally useful for searching large conformational spaces with a high degree of combinatorial complexity.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jens Meiler, David Baker
ROSETTALIGAND: protein-small molecule docking with full side-chain flexibility Journal Article
In: Proteins, vol. 65, pp. 538-48, 2006, ISSN: 1097-0134.
@article{159,
title = {ROSETTALIGAND: protein-small molecule docking with full side-chain flexibility},
author = { Jens Meiler and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/meiler06A.pdf},
issn = {1097-0134},
year = {2006},
date = {2006-11-01},
journal = {Proteins},
volume = {65},
pages = {538-48},
abstract = {Protein-small molecule docking algorithms provide a means to model the structure of protein-small molecule complexes in structural detail and play an important role in drug development. In recent years the necessity of simulating protein side-chain flexibility for an accurate prediction of the protein-small molecule interfaces has become apparent, and an increasing number of docking algorithms probe different approaches to include protein flexibility. Here we describe a new method for docking small molecules into protein binding sites employing a Monte Carlo minimization procedure in which the rigid body position and orientation of the small molecule and the protein side-chain conformations are optimized simultaneously. The energy function comprises van der Waals (VDW) interactions, an implicit solvation model, an explicit orientation hydrogen bonding potential, and an electrostatics model. In an evaluation of the scoring function the computed energy correlated with experimental small molecule binding energy with a correlation coefficient of 0.63 across a diverse set of 229 protein- small molecule complexes. The docking method produced lowest energy models with a root mean square deviation (RMSD) smaller than 2 A in 71 out of 100 protein-small molecule crystal structure complexes (self-docking). In cross-docking calculations in which both protein side-chain and small molecule internal degrees of freedom were varied the lowest energy predictions had RMSDs less than 2 A in 14 of 20 test cases.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Elizabeth R Sprague, Chu Wang, David Baker, Pamela J Bjorkman
Crystal structure of the HSV-1 Fc receptor bound to Fc reveals a mechanism for antibody bipolar bridging Journal Article
In: PLoS biology, vol. 4, pp. e148, 2006, ISSN: 1545-7885.
@article{295,
title = {Crystal structure of the HSV-1 Fc receptor bound to Fc reveals a mechanism for antibody bipolar bridging},
author = { Elizabeth R Sprague and Chu Wang and David Baker and Pamela J Bjorkman},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/sprague06A.pdf},
issn = {1545-7885},
year = {2006},
date = {2006-06-01},
journal = {PLoS biology},
volume = {4},
pages = {e148},
abstract = {Herpes simplex virus type-1 expresses a heterodimeric Fc receptor, gE-gI, on the surfaces of virions and infected cells that binds the Fc region of host immunoglobulin G and is implicated in the cell-to-cell spread of virus. gE-gI binds immunoglobulin G at the basic pH of the cell surface and releases it at the acidic pH of lysosomes, consistent with a role in facilitating the degradation of antiviral antibodies. Here we identify the C-terminal domain of the gE ectodomain (CgE) as the minimal Fc-binding domain and present a 1.78-angstroms CgE structure. A 5-angstroms gE-gI/Fc crystal structure, which was independently verified by a theoretical prediction method, reveals that CgE binds Fc at the C(H)2-C(H)3 interface, the binding site for several mammalian and bacterial Fc-binding proteins. The structure identifies interface histidines that may confer pH-dependent binding and regions of CgE implicated in cell-to-cell spread of virus. The ternary organization of the gE-gI/Fc complex is compatible with antibody bipolar bridging, which can interfere with the antiviral immune response.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Vanita D Sood, David Baker
Recapitulation and design of protein binding peptide structures and sequences Journal Article
In: Journal of molecular biology, vol. 357, pp. 917-27, 2006, ISSN: 0022-2836.
@article{162,
title = {Recapitulation and design of protein binding peptide structures and sequences},
author = { Vanita D Sood and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/sood06A.pdf},
issn = {0022-2836},
year = {2006},
date = {2006-03-01},
journal = {Journal of molecular biology},
volume = {357},
pages = {917-27},
abstract = {An important objective of computational protein design is the generation of high affinity peptide inhibitors of protein-peptide interactions, both as a precursor to the development of therapeutics aimed at disrupting disease causing complexes, and as a tool to aid investigators in understanding the role of specific complexes in the cell. We have developed a computational approach to increase the affinity of a protein-peptide complex by designing N or C-terminal extensions which interact with the protein outside the canonical peptide binding pocket. In a first in silico test, we show that by simultaneously optimizing the sequence and structure of three to nine residue peptide extensions starting from short (1-6 residue) peptide stubs in the binding pocket of a peptide binding protein, the approach can recover both the conformations and the sequences of known binding peptides. Comparison with phage display and other experimental data suggests that the peptide extension approach recapitulates naturally occurring peptide binding specificity better than fixed backbone design, and that it should be useful for predicting peptide binding specificities from crystal structures. We then experimentally test the approach by designing extensions for p53 and dystroglycan-based peptides predicted to bind with increased affinity to the Mdm2 oncoprotein and to dystrophin, respectively. The measured increases in affinity are modest, revealing some limitations of the method. Based on these in silico and experimental results, we discuss future applications of the approach to the prediction and design of protein-peptide interactions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
David Baker
Prediction and design of macromolecular structures and interactions Journal Article
In: Philosophical transactions of the Royal Society of London, vol. 361, pp. 459-63, 2006, ISSN: 0962-8436.
@article{153,
title = {Prediction and design of macromolecular structures and interactions},
author = { David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/baker06A.pdf},
issn = {0962-8436},
year = {2006},
date = {2006-03-01},
journal = {Philosophical transactions of the Royal Society of London},
volume = {361},
pages = {459-63},
abstract = {In this article, I summarize recent work from my group directed towards developing an improved model of intra and intermolecular interactions and applying this improved model to the prediction and design of macromolecular structures and interactions. Prediction and design applications can be of great biological interest in their own right, and also provide very stringent and objective tests which drive the improvement of the model and increases in fundamental understanding. I emphasize the results from the prediction and design tests that suggest progress is being made in high-resolution modelling, and that there is hope for reliably and accurately computing structural biology.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Michael J Thompson, Stuart A Sievers, John Karanicolas, Magdalena I Ivanova, David Baker, David Eisenberg
The 3D profile method for identifying fibril-forming segments of proteins Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 103, pp. 4074-8, 2006, ISSN: 0027-8424.
@article{163,
title = {The 3D profile method for identifying fibril-forming segments of proteins},
author = { Michael J Thompson and Stuart A Sievers and John Karanicolas and Magdalena I Ivanova and David Baker and David Eisenberg},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/thompson06A.pdf},
issn = {0027-8424},
year = {2006},
date = {2006-03-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {103},
pages = {4074-8},
abstract = {Based on the crystal structure of the cross-beta spine formed by the peptide NNQQNY, we have developed a computational approach for identifying those segments of amyloidogenic proteins that themselves can form amyloid-like fibrils. The approach builds on experiments showing that hexapeptides are sufficient for forming amyloid-like fibrils. Each six-residue peptide of a protein of interest is mapped onto an ensemble of templates, or 3D profile, generated from the crystal structure of the peptide NNQQNY by small displacements of one of the two intermeshed beta-sheets relative to the other. The energy of each mapping of a sequence to the profile is evaluated by using ROSETTADESIGN, and the lowest energy match for a given peptide to the template library is taken as the putative prediction. If the energy of the putative prediction is lower than a threshold value, a prediction of fibril formation is made. This method can reach an accuracy of approximately 80% with a P value of approximately 10(-12) when a conservative energy threshold is used to separate peptides that form fibrils from those that do not. We see enrichment for positive predictions in a set of fibril-forming segments of amyloid proteins, and we illustrate the method with applications to proteins of interest in amyloid research.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}