Publications
Preprints available on bioRxiv.
Wu, Qi; Peng, Zhenling; Anishchenko, Ivan; Cong, Qian; Baker, David; Yang, Jianyi
Protein contact prediction using metagenome sequence data and residual neural networks Journal Article
In: Bioinformatics, vol. 36, no. 1, 2019.
@article{Wu2019,
title = {Protein contact prediction using metagenome sequence data and residual neural networks},
author = {Qi Wu and Zhenling Peng and Ivan Anishchenko and Qian Cong and David Baker and Jianyi Yang},
url = {https://academic.oup.com/bioinformatics/article/36/1/41/5512356},
doi = {10.1093/bioinformatics/btz477},
year = {2019},
date = {2019-06-07},
journal = {Bioinformatics},
volume = {36},
number = {1},
abstract = {Motivation: Almost all protein residue contact prediction methods rely on the availability of deep multiple sequence alignments (MSAs). However, many proteins from the poorly populated families do not have sufficient number of homologs in the conventional UniProt database. Here we aim to solve this issue by exploring the rich sequence data from the metagenome sequencing projects. Results: Based on the improved MSA constructed from the metagenome sequence data, we developed MapPred, a new deep learning-based contact prediction method. MapPred consists of two component methods, DeepMSA and DeepMeta, both trained with the residual neural networks. DeepMSA was inspired by the recent method DeepCov, which was trained on 441 matrices of covariance features. By considering the symmetry of contact map, we reduced the number of matrices to 231, which makes the training more efficient in DeepMSA. Experiments show that DeepMSA outperforms DeepCov by 10–13% in precision. DeepMeta works by combining predicted contacts and other sequence profile features. Experiments on three benchmark datasets suggest that the contribution from the metagenome sequence data is significant with P-values less than 4.04E-17. MapPred is shown to be complementary and comparable the state-of-the-art methods. The success of MapPred is attributed to three factors: the deeper MSA from the metagenome sequence data, improved feature design in DeepMSA and optimized training by the residual neural networks.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Day, Austin L; Greisen, Per; Doyle, Lindsey; Schena, Alberto; Stella, Nephi; Johnsson, Kai; Baker, David; Stoddard, Barry
Unintended specificity of an engineered ligand-binding protein facilitated by unpredicted plasticity of the protein fold Journal Article
In: Protein Engineering, Design and Selection, 2018.
@article{Day2018,
title = {Unintended specificity of an engineered ligand-binding protein facilitated by unpredicted plasticity of the protein fold},
author = {Day, Austin L and Greisen, Per and Doyle, Lindsey and Schena, Alberto and Stella, Nephi and Johnsson, Kai and Baker, David and Stoddard, Barry
},
url = {https://dx.doi.org/10.1093/protein/gzy031
https://www.bakerlab.org/wp-content/uploads/2019/02/Day2018.pdf},
doi = {10.1093/protein/gzy031},
year = {2018},
date = {2018-12-19},
journal = {Protein Engineering, Design and Selection},
abstract = {Attempts to create novel ligand-binding proteins often focus on formation of a binding pocket with shape complementarity against the desired ligand (particularly for compounds that lack distinct polar moieties). Although designed proteins often exhibit binding of the desired ligand, in some cases they display unintended recognition behavior. One such designed protein, that was originally intended to bind tetrahydrocannabinol (THC), was found instead to display binding of 25-hydroxy-cholecalciferol (25-D3) and was subjected to biochemical characterization, further selections for enhanced 25-D3 binding affinity and crystallographic analyses. The deviation in specificity is due in part to unexpected altertion of its conformation, corresponding to a significant change of the orientation of an α-helix and an equally large movement of a loop, both of which flank the designed ligand-binding pocket. Those changes led to engineered protein constructs that exhibit significantly more contacts and complementarity towards the 25-D3 ligand than the initial designed protein had been predicted to form towards its intended THC ligand. Molecular dynamics simulations imply that the initial computationally designed mutations may contribute to the movement of the helix. These analyses collectively indicate that accurate prediction and control of backbone dynamics conformation, through a combination of improved conformational sampling and/or de novo structure design, represents a key area of further development for the design and optimization of engineered ligand-binding proteins.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Park, Hahnbeom; Ovchinnikov, Sergey; Kim, David E.; DiMaio, Frank; Baker, David
Protein homology model refinement by large-scale energy optimization Journal Article
In: Proceedings of the National Academy of Sciences, vol. 115, no. 12, pp. 3054–3059, 2018, ISSN: 0027-8424.
@article{Park2018,
title = {Protein homology model refinement by large-scale energy optimization},
author = {Park, Hahnbeom and Ovchinnikov, Sergey and Kim, David E. and DiMaio, Frank and Baker, David},
url = {https://www.pnas.org/content/115/12/3054
https://www.bakerlab.org/wp-content/uploads/2019/01/Park2018_refinement.pdf},
doi = {10.1073/pnas.1719115115},
issn = {0027-8424},
year = {2018},
date = {2018-03-20},
journal = {Proceedings of the National Academy of Sciences},
volume = {115},
number = {12},
pages = {3054–3059},
abstract = {Protein structure refinement by direct global energy optimization has been a longstanding challenge in computational structural biology due to limitations in both energy function accuracy and conformational sampling. This manuscript demonstrates that with recent advances in both areas, refinement can significantly improve protein comparative models based on structures of distant homologues.Proteins fold to their lowest free-energy structures, and hence the most straightforward way to increase the accuracy of a partially incorrect protein structure model is to search for the lowest-energy nearby structure. This direct approach has met with little success for two reasons: first, energy function inaccuracies can lead to false energy minima, resulting in model degradation rather than improvement; and second, even with an accurate energy function, the search problem is formidable because the energy only drops considerably in the immediate vicinity of the global minimum, and there are a very large number of degrees of freedom. Here we describe a large-scale energy optimization-based refinement method that incorporates advances in both search and energy function accuracy that can substantially improve the accuracy of low-resolution homology models. The method refined low-resolution homology models into correct folds for 50 of 84 diverse protein families and generated improved models in recent blind structure prediction experiments. Analyses of the basis for these improvements reveal contributions from both the improvements in conformational sampling techniques and the energy function.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Hahnbeom Park Sergey Ovchinnikov, David E. Kim
Protein structure prediction using Rosetta in CASP12 Journal Article
In: Proteins, 2017.
@article{Ovchinnikov2017,
title = {Protein structure prediction using Rosetta in CASP12},
author = {Sergey Ovchinnikov, Hahnbeom Park, David E. Kim, Frank DiMaio, David Baker},
url = {https://onlinelibrary.wiley.com/doi/epdf/10.1002/prot.25390
https://www.bakerlab.org/wp-content/uploads/2019/10/Ovchinnikov_et_al-2018-Proteins__Structure_Function_and_Bioinformatics.pdf},
doi = {10.1002/prot.25390},
year = {2017},
date = {2017-09-22},
journal = {Proteins},
abstract = {We describe several notable aspects of our structure predictions using Rosetta in CASP12 in the free modeling (FM) and refinement (TR) categories. First, we had previously generated (and published) models for most large protein families lacking experimentally determined structures usingRosetta guided by co-evolution based contact predictions, and for several targets these models proved better starting points for comparative modeling than any known crystal structure—our model database thus starts to fulfill one of the goals of the original protein structure initiative. Second, while our“human”group simply submitted ROBETTA models for most targets, for six targets expert intervention improved predictions considerably; the largest improvement was for T0886where we correctly parsed two discontinuous domains guided by predicted contact maps to accurately identify a structural homolog of the same fold. Third, Rosetta all atom refinement followed by MD simulations led to consistent but small improvements when starting models were close to the native structure, and larger but less consistent improvements when starting models were further away.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Anishchenko, I; Ovchinnikov, S; Kamisetty, H; Baker, D
Origins of coevolution between residues distant in protein 3D structures Journal Article
In: Proceedings of the National Academy of Sciences, vol. 114, no. 34, pp. 9122-9127, 2017.
@article{1000,
title = {Origins of coevolution between residues distant in protein 3D structures},
author = {I Anishchenko and S Ovchinnikov and H Kamisetty and D Baker},
editor = {August 22, 2017},
url = {http://www.pnas.org/content/114/34/9122
https://www.bakerlab.org/wp-content/uploads/2018/08/9122.full1_.pdf},
doi = {10.1073/pnas.1702664114},
year = {2017},
date = {2017-08-22},
journal = {Proceedings of the National Academy of Sciences},
volume = {114},
number = {34},
pages = {9122-9127},
abstract = {Residue pairs that directly coevolve in protein families are generally close in protein 3D structures. Here we study the exceptions to this general trend—directly coevolving residue pairs that are distant in protein structures—to determine the origins of evolutionary pressure on spatially distant residues and to understand the sources of error in contact-based structure prediction. Over a set of 4,000 protein families, we find that 25% of directly coevolving residue pairs are separated by more than 5 Å in protein structures and 3% by more than 15 Å. The majority (91%) of directly coevolving residue pairs in the 5–15 Å range are found to be in contact in at least one homologous structure—these exceptions arise from structural variation in the family in the region containing the residues. Thirty-five percent of the exceptions greater than 15 Å are at homo-oligomeric interfaces, 19% arise from family structural variation, and 27% are in repeat proteins likely reflecting alignment errors. Of the remaining long-range exceptions (<1% of the total number of coupled pairs), many can be attributed to close interactions in an oligomeric state. Overall, the results suggest that directly coevolving residue pairs not in repeat proteins are spatially proximal in at least one biologically relevant protein conformation within the family; we find little evidence for direct coupling between residues at spatially separated allosteric and functional sites or for increased direct coupling between residue pairs on putative allosteric pathways connecting them.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ovchinnikov, Sergey; Park, Hahnbeom; Varghese, Neha; Huang, Po-Ssu; Pavlopoulos, Georgios A.; Kim, David E.; Kamisetty, Hetunandan; Kyrpides, Nikos C.; Baker, David
Protein structure determination using metagenome sequence data Journal Article
In: Science, vol. 355, no. 6322, pp. 294–298, 2017, ISSN: 0036-8075.
@article{Ovchinnikov294,
title = {Protein structure determination using metagenome sequence data},
author = { Sergey Ovchinnikov and Hahnbeom Park and Neha Varghese and Po-Ssu Huang and Georgios A. Pavlopoulos and David E. Kim and Hetunandan Kamisetty and Nikos C. Kyrpides and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2017/01/ovchinnikov_science_2017.pdf
http://science.sciencemag.org/content/355/6322/294},
doi = {10.1126/science.aah4043},
issn = {0036-8075},
year = {2017},
date = {2017-01-01},
journal = {Science},
volume = {355},
number = {6322},
pages = {294--298},
publisher = {American Association for the Advancement of Science},
abstract = {Fewer than a third of the 14,849 known protein families have at least one member with an experimentally determined structure. This leaves more than 5000 protein families with no structural information. Protein modeling using residue-residue contacts inferred from evolutionary data has been successful in modeling unknown structures, but it requires large numbers of aligned sequences. Ovchinnikov et al. augmented such sequence alignments with metagenome sequence data (see the Perspective by S"oding). They determined the number of sequences required to allow modeling, developed criteria for model quality, and, where possible, improved modeling by matching predicted contacts to known structures. Their method predicted quality structural models for 614 protein families, of which about 140 represent newly discovered protein folds.Science, this issue p. 294; see also p. 248Despite decades of work by structural biologists, there are still ~5200 protein families with unknown structure outside the range of comparative modeling. We show that Rosetta structure prediction guided by residue-residue contacts inferred from evolutionary information can accurately model proteins that belong to large families and that metagenome sequence data more than triple the number of protein families with sufficient sequences for accurate modeling. We then integrate metagenome data, contact-based structure matching, and Rosetta structure calculations to generate models for 614 protein families with currently unknown structures; 206 are membrane proteins and 137 have folds not represented in the Protein Data Bank. This approach provides the representative models for large protein families originally envisioned as the goal of the Protein Structure Initiative at a fraction of the cost.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ovchinnikov, Sergey; Park, Hahnbeom; Kim, David E.; Liu, Yuan; Wang, Ray Yu-Ruei; Baker, David
Structure prediction using sparse simulated NOE restraints with Rosetta in CASP11 Journal Article
In: Proteins: Structure, Function, and Bioinformatics, pp. n/a–n/a, 2016, ISSN: 1097-0134.
@article{PROT:PROT25006,
title = {Structure prediction using sparse simulated NOE restraints with Rosetta in CASP11},
author = {Ovchinnikov, Sergey and Park, Hahnbeom and Kim, David E. and Liu, Yuan and Wang, Ray Yu-Ruei and Baker, David},
url = {http://dx.doi.org/10.1002/prot.25006
https://www.bakerlab.org/wp-content/uploads/2016/05/Ovchinnikov_et_al-2016-Proteins__Structure_Function_and_Bioinformatics.pdf},
doi = {10.1002/prot.25006},
issn = {1097-0134},
year = {2016},
date = {2016-01-01},
journal = {Proteins: Structure, Function, and Bioinformatics},
pages = {n/a--n/a},
abstract = {In CASP11 we generated protein structure models using simulated ambiguous and unambiguous nuclear Overhauser effect (NOE) restraints with a two stage protocol. Low resolution models were generated guided by the unambiguous restraints using continuous chain folding for alpha and alpha-beta proteins, and iterative annealing for all beta proteins to take advantage of the strand pairing information implicit in the restraints. The Rosetta fragment/model hybridization protocol was then used to recombine and regularize these models, and refine them in the Rosetta full atom energy function guided by both the unambiguous and the ambiguous restraints. Fifteen out of 19 targets were modeled with GDT-TS quality scores greater than 60 for Model 1, significantly improving upon the non-assisted predictions. Our results suggest that atomic level accuracy is achievable using sparse NOE data when there is at least one correctly assigned NOE for every residue. Proteins 2016. © 2016 Wiley Periodicals, Inc.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ovchinnikov, S; Kim, DE; Wang, RY; Liu, Y; DiMaio, F; Baker, D
Improved de novo structure prediction in CASP11 by incorporating Co-evolution information into rosetta Journal Article
In: Proteins, 2015.
@article{S2015,
title = {Improved de novo structure prediction in CASP11 by incorporating Co-evolution information into rosetta},
author = {S Ovchinnikov and DE Kim and RY Wang and Y Liu and F DiMaio and D Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2015/12/Ovchinnikov_Proteins_2015.pdf},
doi = {10.1002/prot.24974},
year = {2015},
date = {2015-12-17},
journal = {Proteins},
abstract = {We describe CASP11 de novo blind structure predictions made using the Rosetta structure prediction methodology with both automatic and human assisted protocols. Model accuracy was generally improved using co-evolution derived residue-residue contact information as restraints during Rosetta conformational sampling and refinement, particularly when the number of sequences in the family was more than three times the length of the protein. The highlight was the human assisted prediction of T0806, a large and topologically complex target with no homologs of known structure, which had unprecedented accuracy - <3.0 Å root-mean-square deviation (RMSD) from the crystal structure over 223 residues. For this target, we increased the amount of conformational sampling over our fully automated method by employing an iterative hybridization protocol. Our results clearly demonstrate, in a blind prediction scenario, that co-evolution derived contacts can considerably increase the accuracy of template-free structure modeling. This article is protected by copyright. All rights reserved.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
L Kinch S Ovchinnikov, H Park
Large-scale determination of previously unsolved protein structures using evolutionary information Journal Article
In: eLife, 2015.
@article{S2015b,
title = {Large-scale determination of previously unsolved protein structures using evolutionary information},
author = {S Ovchinnikov, L Kinch, H Park, Y Liao, J Pei, DE Kim, H Kamisetty, NV Grishin, D Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/01/Ovchinnikov_eLife_2015.pdf},
doi = {10.7554/eLife.09248},
year = {2015},
date = {2015-09-03},
journal = {eLife},
abstract = {The prediction of the structures of proteins without detectable sequence similarity to any protein of known structure remains an outstanding scientific challenge. Here we report significant progress in this area. We first describe de novo blind structure predictions of unprecendented accuracy we made for two proteins in large families in the recent CASP11 blind test of protein structure prediction methods by incorporating residue-residue co-evolution information in the Rosetta structure prediction program. We then describe the use of this method to generate structure models for 58 of the 121 large protein families in prokaryotes for which three-dimensional structures are not available. These models, which are posted online for public access, provide structural information for the over 400,000 proteins belonging to the 58 families and suggest hypotheses about mechanism for the subset for which the function is known, and hypotheses about function for the remainder. },
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Khoury, George A; Liwo, Adam; Khatib, Firas; Zhou, Hongyi; Chopra, Gaurav; Bacardit, Jaume; Bortot, Leandro O; Faccioli, Rodrigo A; Deng, Xin; He, Yi; Krupa, Pawel; Li, Jilong; Mozolewska, Magdalena A; Sieradzan, Adam K; Smadbeck, James; Wirecki, Tomasz; Cooper, Seth; Flatten, Jeff; Xu, Kefan; Baker, David; Cheng, Jianlin; Delbem, Alexandre C B; Floudas, Christodoulos A; Keasar, Chen; Levitt, Michael; Popovi’c, Zoran; Scheraga, Harold A; Skolnick, Jeffrey; Crivelli, Silvia N
WeFold: a coopetition for protein structure prediction. Journal Article
In: Proteins, vol. 82, pp. 1850-68, 2014, ISSN: 1097-0134.
@article{625,
title = {WeFold: a coopetition for protein structure prediction.},
author = { George A Khoury and Adam Liwo and Firas Khatib and Hongyi Zhou and Gaurav Chopra and Jaume Bacardit and Leandro O Bortot and Rodrigo A Faccioli and Xin Deng and Yi He and Pawel Krupa and Jilong Li and Magdalena A Mozolewska and Adam K Sieradzan and James Smadbeck and Tomasz Wirecki and Seth Cooper and Jeff Flatten and Kefan Xu and David Baker and Jianlin Cheng and Alexandre C B Delbem and Christodoulos A Floudas and Chen Keasar and Michael Levitt and Zoran Popovi'c and Harold A Scheraga and Jeffrey Skolnick and Silvia N Crivelli},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Khoury_Proteins_2014.pdf},
doi = {10.1002/prot.24538},
issn = {1097-0134},
year = {2014},
date = {2014-09-01},
journal = {Proteins},
volume = {82},
pages = {1850-68},
abstract = {The protein structure prediction problem continues to elude scientists. Despite the introduction of many methods, only modest gains were made over the last decade for certain classes of prediction targets. To address this challenge, a social-media based worldwide collaborative effort, named WeFold, was undertaken by 13 labs. During the collaboration, the laboratories were simultaneously competing with each other. Here, we present the first attempt at "coopetition" in scientific research applied to the protein structure prediction and refinement problems. The coopetition was possible by allowing the participating labs to contribute different components of their protein structure prediction pipelines and create new hybrid pipelines that they tested during CASP10. This manuscript describes both successes and areas needing improvement as identified throughout the first WeFold experiment and discusses the efforts that are underway to advance this initiative. A footprint of all contributions and structures are publicly accessible at http://www.wefold.org.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Chen, Kuang-Yui M; Sun, Jiaming; Salvo, Jason S; Baker, David; Barth, Patrick
High-resolution modeling of transmembrane helical protein structures from distant homologues. Journal Article
In: PLoS computational biology, vol. 10, pp. e1003636, 2014, ISSN: 1553-7358.
@article{622,
title = {High-resolution modeling of transmembrane helical protein structures from distant homologues.},
author = { Kuang-Yui M Chen and Jiaming Sun and Jason S Salvo and David Baker and Patrick Barth},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Chen_PLOS_2014.pdf},
doi = {10.1371/journal.pcbi.1003636},
issn = {1553-7358},
year = {2014},
date = {2014-05-01},
journal = {PLoS computational biology},
volume = {10},
pages = {e1003636},
abstract = {Eukaryotic transmembrane helical (TMH) proteins perform a wide diversity of critical cellular functions, but remain structurally largely uncharacterized and their high-resolution structure prediction is currently hindered by the lack of close structural homologues. To address this problem, we present a novel and generic method for accurately modeling large TMH protein structures from distant homologues exhibiting distinct loop and TMH conformations. Models of the adenosine A2AR and chemokine CXCR4 receptors were first ranked in GPCR-DOCK blind prediction contests in the receptor structure accuracy category. In a benchmark of 50 TMH protein homolog pairs of diverse topology (from 5 to 12 TMHs), size (from 183 to 420 residues) and sequence identity (from 15% to 70%), the method improves most starting templates, and achieves near-atomic accuracy prediction of membrane-embedded regions. Unlike starting templates, the models are of suitable quality for computer-based protein engineering: redesigned models and redesigned X-ray structures exhibit very similar native interactions. The method should prove useful for the atom-level modeling and design of a large fraction of structurally uncharacterized TMH proteins from a wide range of structural homologues.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ovchinnikov, Sergey; Kamisetty, Hetunandan; Baker, David
Robust and accurate prediction of residue-residue interactions across protein interfaces using evolutionary information. Journal Article
In: eLife, vol. 3, pp. e02030, 2014, ISSN: 2050-084X.
@article{540,
title = {Robust and accurate prediction of residue-residue interactions across protein interfaces using evolutionary information.},
author = { Sergey Ovchinnikov and Hetunandan Kamisetty and David Baker},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Ovchinnikov_2014A.pdf},
doi = {10.7554/eLife.02030},
issn = {2050-084X},
year = {2014},
date = {2014-05-01},
journal = {eLife},
volume = {3},
pages = {e02030},
abstract = {Do the amino acid sequence identities of residues that make contact across protein interfaces covary during evolution? If so, such covariance could be used to predict contacts across interfaces and assemble models of biological complexes. We find that residue pairs identified using a pseudo-likelihood-based method to covary across protein-protein interfaces in the 50S ribosomal unit and 28 additional bacterial protein complexes with known structure are almost always in contact in the complex, provided that the number of aligned sequences is greater than the average length of the two proteins. We use this method to make subunit contact predictions for an additional 36 protein complexes with unknown structures, and present models based on these predictions for the tripartite ATP-independent periplasmic (TRAP) transporter, the tripartite efflux system, the pyruvate formate lyase-activating enzyme complex, and the methionine ABC transporter.DOI: http://dx.doi.org/10.7554/eLife.02030.001.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Baker, David
Protein folding, structure prediction and design. Journal Article
In: Biochemical Society transactions, vol. 42, pp. 225-9, 2014, ISSN: 1470-8752.
@article{529,
title = {Protein folding, structure prediction and design.},
author = { David Baker},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Baker_BiochemSocTrans_2014.pdf},
doi = {10.1042/BST20130055},
issn = {1470-8752},
year = {2014},
date = {2014-04-01},
journal = {Biochemical Society transactions},
volume = {42},
pages = {225-9},
abstract = {I describe how experimental studies of protein folding have led to advances in protein structure prediction and protein design. I describe the finding that protein sequences are not optimized for rapid folding, the contact order-protein folding rate correlation, the incorporation of experimental insights into protein folding into the Rosetta protein structure production methodology and the use of this methodology to determine structures from sparse experimental data. I then describe the inverse problem (protein design) and give an overview of recent work on designing proteins with new structures and functions. I also describe the contributions of the general public to these efforts through the Rosetta@home distributed computing project and the FoldIt interactive protein folding and design game.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Moretti, Rocco; Fleishman, Sarel J; Agius, Rudi; Torchala, Mieczyslaw; Bates, Paul A; Kastritis, Panagiotis L; ao P G L M Rodrigues, Jo; Trellet, Mika"el; Bonvin, Alexandre M J J; Cui, Meng; Rooman, Marianne; Gillis, Dimitri; Dehouck, Yves; Moal, Iain; Romero-Durana, Miguel; Perez-Cano, Laura; Pallara, Chiara; Jimenez, Brian; Fernandez-Recio, Juan; Flores, Samuel; Pacella, Michael; Kilambi, Krishna Praneeth; Gray, Jeffrey J; Popov, Petr; Grudinin, Sergei; Esquivel-Rodr’iguez, Juan; Kihara, Daisuke; Zhao, Nan; Korkin, Dmitry; Zhu, Xiaolei; Demerdash, Omar N A; Mitchell, Julie C; Kanamori, Eiji; Tsuchiya, Yuko; Nakamura, Haruki; Lee, Hasup; Park, Hahnbeom; Seok, Chaok; Sarmiento, Jamica; Liang, Shide; Teraguchi, Shusuke; Standley, Daron M; Shimoyama, Hiromitsu; Terashi, Genki; Takeda-Shitaka, Mayuko; Iwadate, Mitsuo; Umeyama, Hideaki; Beglov, Dmitri; Hall, David R; Kozakov, Dima; Vajda, Sandor; Pierce, Brian G; Hwang, Howook; Vreven, Thom; Weng, Zhiping; Huang, Yangyu; Li, Haotian; Yang, Xiufeng; Ji, Xiaofeng; Liu, Shiyong; Xiao, Yi; Zacharias, Martin; Qin, Sanbo; Zhou, Huan-Xiang; Huang, Sheng-You; Zou, Xiaoqin; Velankar, Sameer; Janin, Jo"el; Wodak, Shoshana J; Baker, David
Community-wide evaluation of methods for predicting the effect of mutations on protein-protein interactions. Journal Article
In: Proteins, vol. 81, pp. 1980-7, 2013, ISSN: 1097-0134.
@article{505,
title = {Community-wide evaluation of methods for predicting the effect of mutations on protein-protein interactions.},
author = { Rocco Moretti and Sarel J Fleishman and Rudi Agius and Mieczyslaw Torchala and Paul A Bates and Panagiotis L Kastritis and Jo~ao P G L M Rodrigues and Mika"el Trellet and Alexandre M J J Bonvin and Meng Cui and Marianne Rooman and Dimitri Gillis and Yves Dehouck and Iain Moal and Miguel Romero-Durana and Laura Perez-Cano and Chiara Pallara and Brian Jimenez and Juan Fernandez-Recio and Samuel Flores and Michael Pacella and Krishna Praneeth Kilambi and Jeffrey J Gray and Petr Popov and Sergei Grudinin and Juan Esquivel-Rodr'iguez and Daisuke Kihara and Nan Zhao and Dmitry Korkin and Xiaolei Zhu and Omar N A Demerdash and Julie C Mitchell and Eiji Kanamori and Yuko Tsuchiya and Haruki Nakamura and Hasup Lee and Hahnbeom Park and Chaok Seok and Jamica Sarmiento and Shide Liang and Shusuke Teraguchi and Daron M Standley and Hiromitsu Shimoyama and Genki Terashi and Mayuko Takeda-Shitaka and Mitsuo Iwadate and Hideaki Umeyama and Dmitri Beglov and David R Hall and Dima Kozakov and Sandor Vajda and Brian G Pierce and Howook Hwang and Thom Vreven and Zhiping Weng and Yangyu Huang and Haotian Li and Xiufeng Yang and Xiaofeng Ji and Shiyong Liu and Yi Xiao and Martin Zacharias and Sanbo Qin and Huan-Xiang Zhou and Sheng-You Huang and Xiaoqin Zou and Sameer Velankar and Jo"el Janin and Shoshana J Wodak and David Baker},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Moretti_Proteins_2013.pdf},
doi = {10.1002/prot.24356},
issn = {1097-0134},
year = {2013},
date = {2013-11-01},
journal = {Proteins},
volume = {81},
pages = {1980-7},
abstract = {Community-wide blind prediction experiments such as CAPRI and CASP provide an objective measure of the current state of predictive methodology. Here we describe a community-wide assessment of methods to predict the effects of mutations on protein-protein interactions. Twenty-two groups predicted the effects of comprehensive saturation mutagenesis for two designed influenza hemagglutinin binders and the results were compared with experimental yeast display enrichment data obtained using deep sequencing. The most successful methods explicitly considered the effects of mutation on monomer stability in addition to binding affinity, carried out explicit side-chain sampling and backbone relaxation, evaluated packing, electrostatic, and solvation effects, and correctly identified around a third of the beneficial mutations. Much room for improvement remains for even the best techniques, and large-scale fitness landscapes should continue to provide an excellent test bed for continued evaluation of both existing and new prediction methodologies.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Vernon, Robert; Shen, Yang; Baker, David; Lange, Oliver F
Improved chemical shift based fragment selection for CS-Rosetta using Rosetta3 fragment picker. Journal Article
In: Journal of biomolecular NMR, vol. 57, pp. 117-27, 2013, ISSN: 1573-5001.
@article{508,
title = {Improved chemical shift based fragment selection for CS-Rosetta using Rosetta3 fragment picker.},
author = { Robert Vernon and Yang Shen and David Baker and Oliver F Lange},
doi = {10.1007/s10858-013-9772-4},
issn = {1573-5001},
year = {2013},
date = {2013-10-01},
journal = {Journal of biomolecular NMR},
volume = {57},
pages = {117-27},
abstract = {A new fragment picker has been developed for CS-Rosetta that combines beneficial features of the original fragment picker, MFR, used with CS-Rosetta, and the fragment picker, NNMake, that was used for purely sequence based fragment selection in the context of ROSETTA de-novo structure prediction. Additionally, the new fragment picker has reduced sensitivity to outliers and other difficult to match data points rendering the protocol more robust and less likely to introduce bias towards wrong conformations in cases where data is bad, missing or inconclusive. The fragment picker protocol gives significant improvements on 6 of 23 CS-Rosetta targets. An independent benchmark on 39 protein targets, whose NMR data sets were published only after protocol optimization had been finished, also show significantly improved performance for the new fragment picker (van der Schot et al. in J Biomol NMR, 2013).},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kamisetty, Hetunandan; Ovchinnikov, Sergey; Baker, David
Assessing the utility of coevolution-based residue-residue contact predictions in a sequence- and structure-rich era. Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 110, pp. 15674-9, 2013, ISSN: 1091-6490.
@article{498,
title = {Assessing the utility of coevolution-based residue-residue contact predictions in a sequence- and structure-rich era.},
author = { Hetunandan Kamisetty and Sergey Ovchinnikov and David Baker},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Kamisetty_PNAS_2013.pdf},
doi = {10.1073/pnas.1314045110},
issn = {1091-6490},
year = {2013},
date = {2013-09-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {110},
pages = {15674-9},
abstract = {Recently developed methods have shown considerable promise in predicting residue-residue contacts in protein 3D structures using evolutionary covariance information. However, these methods require large numbers of evolutionarily related sequences to robustly assess the extent of residue covariation, and the larger the protein family, the more likely that contact information is unnecessary because a reasonable model can be built based on the structure of a homolog. Here we describe a method that integrates sequence coevolution and structural context information using a pseudolikelihood approach, allowing more accurate contact predictions from fewer homologous sequences. We rigorously assess the utility of predicted contacts for protein structure prediction using large and representative sequence and structure databases from recent structure prediction experiments. We find that contact predictions are likely to be accurate when the number of aligned sequences (with sequence redundancy reduced to 90%) is greater than five times the length of the protein, and that accurate predictions are likely to be useful for structure modeling if the aligned sequences are more similar to the protein of interest than to the closest homolog of known structure. These conditions are currently met by 422 of the protein families collected in the Pfam database.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
van der Schot, Gijs; Zhang, Zaiyong; Vernon, Robert; Shen, Yang; Vranken, Wim F; Baker, David; Bonvin, Alexandre M J J; Lange, Oliver F
Improving 3D structure prediction from chemical shift data. Journal Article
In: Journal of biomolecular NMR, vol. 57, pp. 27-35, 2013, ISSN: 1573-5001.
@article{507,
title = {Improving 3D structure prediction from chemical shift data.},
author = { Gijs van der Schot and Zaiyong Zhang and Robert Vernon and Yang Shen and Wim F Vranken and David Baker and Alexandre M J J Bonvin and Oliver F Lange},
doi = {10.1007/s10858-013-9762-6},
issn = {1573-5001},
year = {2013},
date = {2013-09-01},
journal = {Journal of biomolecular NMR},
volume = {57},
pages = {27-35},
abstract = {We report advances in the calculation of protein structures from chemical shift nuclear magnetic resonance data alone. Our previously developed method, CS-Rosetta, assembles structures from a library of short protein fragments picked from a large library of protein structures using chemical shifts and sequence information. Here we demonstrate that combination of a new and improved fragment picker and the iterative sampling algorithm RASREC yield significant improvements in convergence and accuracy. Moreover, we introduce improved criteria for assessing the accuracy of the models produced by the method. The method was tested on 39 proteins in the 50-100 residue size range and yields reliable structures in 70~% of the cases. All structures that passed the reliability filter were accurate (<2~r A RMSD from the reference).},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kim, David E; DiMaio, Frank; Wang, Ray Yu-Ruei; Song, Yifan; Baker, David
One contact for every twelve residues allows robust and accurate topology-level protein structure modeling. Journal Article
In: Proteins, 2013, ISSN: 1097-0134.
@article{506,
title = {One contact for every twelve residues allows robust and accurate topology-level protein structure modeling.},
author = { David E Kim and Frank DiMaio and Ray Yu-Ruei Wang and Yifan Song and David Baker},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Kim_Proteins_2013.pdf},
doi = {10.1002/prot.24374},
issn = {1097-0134},
year = {2013},
date = {2013-07-01},
journal = {Proteins},
abstract = {A number of methods have been described for identifying pairs of contacting residues in protein three-dimensional structures, but it is unclear how many contacts are required for accurate structure modeling. The CASP10 assisted contact experiment provided a blind test of contact guided protein structure modeling. We describe the models generated for these contact guided prediction challenges using the Rosetta structure modeling methodology. For nearly all cases, the submitted models had the correct overall topology, and in some cases, they had near atomic-level accuracy; for example the model of the 384 residue homo-oligomeric tetramer (Tc680o) had only 2.9 r A root-mean-square deviation (RMSD) from the crystal structure. Our results suggest that experimental and bioinformatic methods for obtaining contact information may need to generate only one correct contact for every 12 residues in the protein to allow accurate topology level modeling. Proteins 2013;. textcopyright 2013 Wiley Periodicals, Inc.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kiss, Gert; c"um, Nihan Celebi-"Olc; Moretti, Rocco; Baker, David; Houk, K N
Computational enzyme design Journal Article
In: Angewandte Chemie (International ed. in English), vol. 52, pp. 5700-25, 2013, ISSN: 1521-3773.
@article{472,
title = {Computational enzyme design},
author = { Gert Kiss and Nihan Celebi-"Olc c"um and Rocco Moretti and David Baker and K N Houk},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Kiss_AngewChemIntEd_2013.pdf},
doi = {10.1002/anie.201204077},
issn = {1521-3773},
year = {2013},
date = {2013-05-01},
journal = {Angewandte Chemie (International ed. in English)},
volume = {52},
pages = {5700-25},
abstract = {Recent developments in computational chemistry and biology have come together in the "inside-out" approach to enzyme engineering. Proteins have been designed to catalyze reactions not previously accelerated in nature. Some of these proteins fold and act as catalysts, but the success rate is still low. The achievements and limitations of the current technology are highlighted and contrasted to other protein engineering techniques. On its own, computational "inside-out" design can lead to the production of catalytically active and selective proteins, but their kinetic performances fall short of natural enzymes. When combined with directed evolution, molecular dynamics simulations, and crowd-sourced structure-prediction approaches, however, computational designs can be significantly improved in terms of binding, turnover, and thermal stability.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Molski, MA; Goodman, JL; Chou, FC; Baker, D; Das, R; Schepartz, A
Remodeling a beta-peptide bundle Journal Article
In: Chemical Science, vol. 4, pp. 319-324, 2013, ISSN: 2041-6520.
@article{605,
title = {Remodeling a beta-peptide bundle},
author = { MA Molski and JL Goodman and FC Chou and D Baker and R Das and A Schepartz},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/remodelingabeta_Baker2013.pdf},
doi = {10.1039/c2sc21117c},
issn = {2041-6520},
year = {2013},
date = {2013-00-01},
journal = {Chemical Science},
volume = {4},
pages = {319-324},
abstract = {Natural biopolymers fold with fidelity, burying diverse side chains into well-packed cores and protecting their backbones from solvent. Certain beta-peptide oligomers assemble into bundles of defined octameric stoichiometry that resemble natural proteins in many respects. These beta-peptide bundles are thermostable, fold cooperatively, exchange interior amide N-H protons slowly, exclude hydrophobic dyes, and can be characterized at high resolution using X-ray crystallography - just like many proteins found in nature. But unlike natural proteins, all octameric beta-peptide bundles contain a sequence-uniform hydrophobic core composed of 32 leucine side chains. Here we apply rational design principles, including the Rosetta computational design methodology, to introduce sequence diversity into the bundle core while retaining the characteristic beta-peptide bundle fold. Using circular dichroism spectroscopy and analytical ultracentrifugation, we confirmed the prediction that an octameric bundle still assembles upon a major remodelling of its core: the mutation of sixteen core beta-homo-leucine side chains into sixteen beta-homo-phenylalanine side chains. Nevertheless, the bundle containing a partially beta-homo-phenylalanine core poorly protects interior amide protons from exchange, suggesting molten-globule-like properties. We further improve stability by the incorporation of eight beta-homo-pentafluorophenyalanine side chains, giving an assembly with amide protection factors comparable to prior well-structured bundles. By demonstrating that their cores tolerate significant sequence variation, the beta-peptide bundles reported here represent a starting point for the "bottom-up" construction of beta-peptide assemblies possessing both structure and sophisticated function.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2025
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2024
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2023
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2022
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2021
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2020
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2019
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2018
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2017-1988
ALL PAPERS
Sorry, no publications matched your criteria.