Publications
Preprints available on bioRxiv.
Humphreys, Ian R.; Zhang, Jing; Baek, Minkyung; Wang, Yaxi; Krishnakumar, Aditya; Pei, Jimin; Anishchenko, Ivan; Tower, Catherine A.; Jackson, Blake A.; Warrier, Thulasi; Hung, Deborah T.; Peterson, S. Brook; Mougous, Joseph D.; Cong, Qian; Baker, David
Protein interactions in human pathogens revealed through deep learning Journal Article
In: Nature Microbiology, 2024, ISSN: 2058-5276.
@article{Humphreys2024,
title = {Protein interactions in human pathogens revealed through deep learning},
author = {Ian R. Humphreys and Jing Zhang and Minkyung Baek and Yaxi Wang and Aditya Krishnakumar and Jimin Pei and Ivan Anishchenko and Catherine A. Tower and Blake A. Jackson and Thulasi Warrier and Deborah T. Hung and S. Brook Peterson and Joseph D. Mougous and Qian Cong and David Baker},
url = {https://www.nature.com/articles/s41564-024-01791-x, Nature Microbiology [Open Access]},
doi = {10.1038/s41564-024-01791-x},
issn = {2058-5276},
year = {2024},
date = {2024-09-18},
urldate = {2024-09-18},
journal = {Nature Microbiology},
publisher = {Springer Science and Business Media LLC},
abstract = {Identification of bacterial protein–protein interactions and predicting the structures of these complexes could aid in the understanding of pathogenicity mechanisms and developing treatments for infectious diseases. Here we developed RoseTTAFold2-Lite, a rapid deep learning model that leverages residue–residue coevolution and protein structure prediction to systematically identify and structurally characterize protein–protein interactions at the proteome-wide scale. Using this pipeline, we searched through 78 million pairs of proteins across 19 human bacterial pathogens and identified 1,923 confidently predicted complexes involving essential genes and 256 involving virulence factors. Many of these complexes were not previously known; we experimentally tested 12 such predictions, and half of them were validated. The predicted interactions span core metabolic and virulence pathways ranging from post-transcriptional modification to acid neutralization to outer-membrane machinery and should contribute to our understanding of the biology of these important pathogens and the design of drugs to combat them.
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
An, Linna; Said, Meerit; Tran, Long; Majumder, Sagardip; Goreshnik, Inna; Lee, Gyu Rie; Juergens, David; Dauparas, Justas; Anishchenko, Ivan; Coventry, Brian; Bera, Asim K.; Kang, Alex; Levine, Paul M.; Alvarez, Valentina; Pillai, Arvind; Norn, Christoffer; Feldman, David; Zorine, Dmitri; Hicks, Derrick R.; Li, Xinting; Sanchez, Mariana Garcia; Vafeados, Dionne K.; Salveson, Patrick J.; Vorobieva, Anastassia A.; Baker, David
Binding and sensing diverse small molecules using shape-complementary pseudocycles Journal Article
In: Science, 2024.
@article{An2024,
title = {Binding and sensing diverse small molecules using shape-complementary pseudocycles},
author = {Linna An and Meerit Said and Long Tran and Sagardip Majumder and Inna Goreshnik and Gyu Rie Lee and David Juergens and Justas Dauparas and Ivan Anishchenko and Brian Coventry and Asim K. Bera and Alex Kang and Paul M. Levine and Valentina Alvarez and Arvind Pillai and Christoffer Norn and David Feldman and Dmitri Zorine and Derrick R. Hicks and Xinting Li and Mariana Garcia Sanchez and Dionne K. Vafeados and Patrick J. Salveson and Anastassia A. Vorobieva and David Baker},
url = {https://www.science.org/doi/10.1126/science.adn3780, Science},
doi = {10.1126/science.adn3780},
year = {2024},
date = {2024-07-19},
urldate = {2024-07-19},
journal = {Science},
publisher = {American Association for the Advancement of Science (AAAS)},
abstract = {We describe an approach for designing high-affinity small molecule–binding proteins poised for downstream sensing. We use deep learning–generated pseudocycles with repeating structural units surrounding central binding pockets with widely varying shapes that depend on the geometry and number of the repeat units. We dock small molecules of interest into the most shape complementary of these pseudocycles, design the interaction surfaces for high binding affinity, and experimentally screen to identify designs with the highest affinity. We obtain binders to four diverse molecules, including the polar and flexible methotrexate and thyroxine. Taking advantage of the modular repeat structure and central binding pockets, we construct chemically induced dimerization systems and low-noise nanopore sensors by splitting designs into domains that reassemble upon ligand addition.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Goverde, Casper A.; Pacesa, Martin; Goldbach, Nicolas; Dornfeld, Lars J.; Balbi, Petra E. M.; Georgeon, Sandrine; Rosset, Stéphane; Kapoor, Srajan; Choudhury, Jagrity; Dauparas, Justas; Schellhaas, Christian; Kozlov, Simon; Baker, David; Ovchinnikov, Sergey; Vecchio, Alex J.; Correia, Bruno E.
Computational design of soluble and functional membrane protein analogues Journal Article
In: Nature, 2024, ISSN: 1476-4687.
@article{Goverde2024,
title = {Computational design of soluble and functional membrane protein analogues},
author = {Casper A. Goverde and Martin Pacesa and Nicolas Goldbach and Lars J. Dornfeld and Petra E. M. Balbi and Sandrine Georgeon and Stéphane Rosset and Srajan Kapoor and Jagrity Choudhury and Justas Dauparas and Christian Schellhaas and Simon Kozlov and David Baker and Sergey Ovchinnikov and Alex J. Vecchio and Bruno E. Correia},
url = {https://www.nature.com/articles/s41586-024-07601-y, Nature [Open Access]
},
doi = {10.1038/s41586-024-07601-y},
issn = {1476-4687},
year = {2024},
date = {2024-06-19},
urldate = {2024-06-19},
journal = {Nature},
publisher = {Springer Science and Business Media LLC},
abstract = {De novo design of complex protein folds using solely computational means remains a substantial challenge. Here we use a robust deep learning pipeline to design complex folds and soluble analogues of integral membrane proteins. Unique membrane topologies, such as those from G-protein-coupled receptors, are not found in the soluble proteome, and we demonstrate that their structural features can be recapitulated in solution. Biophysical analyses demonstrate the high thermal stability of the designs, and experimental structures show remarkable design accuracy. The soluble analogues were functionalized with native structural motifs, as a proof of concept for bringing membrane protein functions to the soluble proteome, potentially enabling new approaches in drug discovery. In summary, we have designed complex protein topologies and enriched them with functionalities from membrane proteins, with high experimental success rates, leading to a de facto expansion of the functional soluble fold space.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Krishna, Rohith; Wang, Jue; Ahern, Woody; Sturmfels, Pascal; Venkatesh, Preetham; Kalvet, Indrek; Lee, Gyu Rie; Morey-Burrows, Felix S.; Anishchenko, Ivan; Humphreys, Ian R.; McHugh, Ryan; Vafeados, Dionne; Li, Xinting; Sutherland, George A.; Hitchcock, Andrew; Hunter, C. Neil; Kang, Alex; Brackenbrough, Evans; Bera, Asim K.; Baek, Minkyung; DiMaio, Frank; Baker, David
Generalized biomolecular modeling and design with RoseTTAFold All-Atom Journal Article
In: Science, 2024.
@article{Krishna2024,
title = {Generalized biomolecular modeling and design with RoseTTAFold All-Atom},
author = {Rohith Krishna and Jue Wang and Woody Ahern and Pascal Sturmfels and Preetham Venkatesh and Indrek Kalvet and Gyu Rie Lee and Felix S. Morey-Burrows and Ivan Anishchenko and Ian R. Humphreys and Ryan McHugh and Dionne Vafeados and Xinting Li and George A. Sutherland and Andrew Hitchcock and C. Neil Hunter and Alex Kang and Evans Brackenbrough and Asim K. Bera and Minkyung Baek and Frank DiMaio and David Baker},
url = {https://www.science.org/stoken/author-tokens/ST-1739/full, Science [Full Access Link]
https://www.bakerlab.org/wp-content/uploads/2024/03/science.adl2528.pdf, PDF},
doi = {10.1126/science.adl2528},
year = {2024},
date = {2024-03-07},
urldate = {2024-03-07},
journal = {Science},
publisher = {American Association for the Advancement of Science (AAAS)},
abstract = {Deep learning methods have revolutionized protein structure prediction and design but are currently limited to protein-only systems. We describe RoseTTAFold All-Atom (RFAA) which combines a residue-based representation of amino acids and DNA bases with an atomic representation of all other groups to model assemblies containing proteins, nucleic acids, small molecules, metals, and covalent modifications given their sequences and chemical structures. By fine tuning on denoising tasks we obtain RFdiffusionAA, which builds protein structures around small molecules. Starting from random distributions of amino acid residues surrounding target small molecules, we design and experimentally validate, through crystallography and binding measurements, proteins that bind the cardiac disease therapeutic digoxigenin, the enzymatic cofactor heme, and the light harvesting molecule bilin.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Torres, Susana Vázquez; Leung, Philip J Y; Venkatesh, Preetham; Lutz, Isaac D; Hink, Fabian; Huynh, Huu-Hien; Becker, Jessica; Yeh, Andy Hsien-Wei; Juergens, David; Bennett, Nathaniel R; Hoofnagle, Andrew N; Huang, Eric; MacCoss, Michael J; Expòsit, Marc; Lee, Gyu Rie; Bera, Asim K; Kang, Alex; Cruz, Joshmyn De La; Levine, Paul M; Li, Xinting; Lamb, Mila; Gerben, Stacey R; Murray, Analisa; Heine, Piper; Korkmaz, Elif Nihal; Nivala, Jeff; Stewart, Lance; Watson, Joseph L; Rogers, Joseph M; Baker, David
De novo design of high-affinity binders of bioactive helical peptides Journal Article
In: Nature, 2023, ISSN: 1476-4687.
@article{pmid38109936,
title = {De novo design of high-affinity binders of bioactive helical peptides},
author = {Susana Vázquez Torres and Philip J Y Leung and Preetham Venkatesh and Isaac D Lutz and Fabian Hink and Huu-Hien Huynh and Jessica Becker and Andy Hsien-Wei Yeh and David Juergens and Nathaniel R Bennett and Andrew N Hoofnagle and Eric Huang and Michael J MacCoss and Marc Expòsit and Gyu Rie Lee and Asim K Bera and Alex Kang and Joshmyn De La Cruz and Paul M Levine and Xinting Li and Mila Lamb and Stacey R Gerben and Analisa Murray and Piper Heine and Elif Nihal Korkmaz and Jeff Nivala and Lance Stewart and Joseph L Watson and Joseph M Rogers and David Baker},
url = {https://www.nature.com/articles/s41586-023-06953-1, Nature [Open Access]},
doi = {10.1038/s41586-023-06953-1},
issn = {1476-4687},
year = {2023},
date = {2023-12-01},
urldate = {2023-12-01},
journal = {Nature},
abstract = {Many peptide hormones form an alpha-helix upon binding their receptors, and sensitive detection methods for them could contribute to better clinical management of disease. De novo protein design can now generate binders with high affinity and specificity to structured proteins. However, the design of interactions between proteins and short peptides with helical propensity is an unmet challenge. Here, we describe parametric generation and deep learning-based methods for designing proteins to address this challenge. We show that by extending RFdiffusion to enable binder design to flexible targets, and to refining input structure models by successive noising and denoising (partial diffusion), picomolar affinity binders can be generated to helical peptide targets both by refining designs generated with other methods, or completely de novo starting from random noise distributions. To our knowledge these are the highest affinity designed binding proteins against any protein or small molecule target generated directly by computation without any experimental optimisation. The RFdiffusion designs enable the enrichment and subsequent detection of parathyroid hormone and glucagon by mass spectrometry, and the construction of bioluminescence-based protein biosensors. The ability to design binders to conformationally variable targets, and to optimise by partial diffusion both natural and designed proteins, should be broadly useful.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Watson, Joseph L.; Juergens, David; Bennett, Nathaniel R.; Trippe, Brian L.; Yim, Jason; Eisenach, Helen E.; Ahern, Woody; Borst, Andrew J.; Ragotte, Robert J.; Milles, Lukas F.; Wicky, Basile I. M.; Hanikel, Nikita; Pellock, Samuel J.; Courbet, Alexis; Sheffler, William; Wang, Jue; Venkatesh, Preetham; Sappington, Isaac; Torres, Susana Vázquez; Lauko, Anna; De Bortoli, Valentin; Mathieu, Emile; Ovchinnikov, Sergey; Barzilay, Regina; Jaakkola, Tommi S.; DiMaio, Frank; Baek, Minkyung; Baker, David
De novo design of protein structure and function with RFdiffusion Journal Article
In: Nature, 2023.
@article{Watson2023,
title = {De novo design of protein structure and function with RFdiffusion},
author = {Watson, Joseph L.
and Juergens, David
and Bennett, Nathaniel R.
and Trippe, Brian L.
and Yim, Jason
and Eisenach, Helen E.
and Ahern, Woody
and Borst, Andrew J.
and Ragotte, Robert J.
and Milles, Lukas F.
and Wicky, Basile I. M.
and Hanikel, Nikita
and Pellock, Samuel J.
and Courbet, Alexis
and Sheffler, William
and Wang, Jue
and Venkatesh, Preetham
and Sappington, Isaac
and Torres, Susana Vázquez
and Lauko, Anna
and De Bortoli, Valentin
and Mathieu, Emile
and Ovchinnikov, Sergey
and Barzilay, Regina
and Jaakkola, Tommi S.
and DiMaio, Frank
and Baek, Minkyung
and Baker, David},
url = {https://www.nature.com/articles/s41586-023-06415-8, Nature
https://www.bakerlab.org/wp-content/uploads/2023/07/s41586-023-06415-8_reference.pdf, PDF (29MB)},
doi = {10.1038/s41586-023-06415-8},
year = {2023},
date = {2023-07-11},
journal = {Nature},
abstract = {There has been considerable recent progress in designing new proteins using deep learning methods1–9. Despite this progress, a general deep learning framework for protein design that enables solution of a wide range of design challenges, including de novo binder design and design of higher order symmetric architectures, has yet to be described. Diffusion models10,11 have had considerable success in image and language generative modeling but limited success when applied to protein modeling, likely due to the complexity of protein backbone geometry and sequence-structure relationships. Here we show that by fine tuning the RoseTTAFold structure prediction network on protein structure denoising tasks, we obtain a generative model of protein backbones that achieves outstanding performance on unconditional and topology-constrained protein monomer design, protein binder design, symmetric oligomer design, enzyme active site scaffolding, and symmetric motif scaffolding for therapeutic and metal-binding protein design. We demonstrate the power and generality of the method, called RoseTTAFold Diffusion (RFdiffusion), by experimentally characterizing the structures and functions of hundreds of designed symmetric assemblies, metal binding proteins and protein binders. The accuracy of RFdiffusion is confirmed by the cryo-EM structure of a designed binder in complex with Influenza hemagglutinin which is nearly identical to the design model. In a manner analogous to networks which produce images from user-specified inputs, RFdiffusion enables the design of diverse functional proteins from simple molecular specifications.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Bennett, Nathaniel R.; Coventry, Brian; Goreshnik, Inna; Huang, Buwei; Allen, Aza; Vafeados, Dionne; Peng, Ying Po; Dauparas, Justas; Baek, Minkyung; Stewart, Lance; DiMaio, Frank; De Munck, Steven; Savvides, Savvas N.; Baker, David
Improving de novo protein binder design with deep learning Journal Article
In: Nature Communications, 2023.
@article{Bennett2023,
title = {Improving de novo protein binder design with deep learning},
author = {Bennett, Nathaniel R.
and Coventry, Brian
and Goreshnik, Inna
and Huang, Buwei
and Allen, Aza
and Vafeados, Dionne
and Peng, Ying Po
and Dauparas, Justas
and Baek, Minkyung
and Stewart, Lance
and DiMaio, Frank
and De Munck, Steven
and Savvides, Savvas N.
and Baker, David},
url = {https://www.nature.com/articles/s41467-023-38328-5, Nature Communications (Open Access)},
doi = {10.1038/s41467-023-38328-5},
year = {2023},
date = {2023-05-06},
journal = {Nature Communications},
abstract = {Recently it has become possible to de novo design high affinity protein binding proteins from target structural information alone. There is, however, considerable room for improvement as the overall design success rate is low. Here, we explore the augmentation of energy-based protein binder design using deep learning. We find that using AlphaFold2 or RoseTTAFold to assess the probability that a designed sequence adopts the designed monomer structure, and the probability that this structure binds the target as designed, increases design success rates nearly 10-fold. We find further that sequence design using ProteinMPNN rather than Rosetta considerably increases computational efficiency.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kim, David E.; Jensen, Davin R.; Feldman, David; Tischer, Doug; Saleem, Ayesha; Chow, Cameron M.; Li, Xinting; Carter, Lauren; Milles, Lukas; Nguyen, Hannah; Kang, Alex; Bera, Asim K.; Peterson, Francis C.; Volkman, Brian F.; Ovchinnikov, Sergey; Baker, David
De novo design of small beta barrel proteins Journal Article
In: Proceedings of the National Academy of Sciences, 2023.
@article{Kim2023,
title = {De novo design of small beta barrel proteins},
author = {Kim, David E.
and Jensen, Davin R.
and Feldman, David
and Tischer, Doug
and Saleem, Ayesha
and Chow, Cameron M.
and Li, Xinting
and Carter, Lauren
and Milles, Lukas
and Nguyen, Hannah
and Kang, Alex
and Bera, Asim K.
and Peterson, Francis C.
and Volkman, Brian F.
and Ovchinnikov, Sergey
and Baker, David},
url = {https://www.pnas.org/doi/10.1073/pnas.2207974120, PNAS (Open Access)},
doi = {10.1073/pnas.2207974120},
year = {2023},
date = {2023-03-10},
urldate = {2023-03-10},
journal = {Proceedings of the National Academy of Sciences},
abstract = {Small beta barrel proteins are attractive targets for computational design because of their considerable functional diversity despite their very small size (<70 amino acids). However, there are considerable challenges to designing such structures, and there has been little success thus far. Because of the small size, the hydrophobic core stabilizing the fold is necessarily very small, and the conformational strain of barrel closure can oppose folding; also intermolecular aggregation through free beta strand edges can compete with proper monomer folding. Here, we explore the de novo design of small beta barrel topologies using both Rosetta energy–based methods and deep learning approaches to design four small beta barrel folds: Src homology 3 (SH3) and oligonucleotide/oligosaccharide-binding (OB) topologies found in nature and five and six up-and-down-stranded barrels rarely if ever seen in nature. Both approaches yielded successful designs with high thermal stability and experimentally determined structures with less than 2.4 Å rmsd from the designed models. Using deep learning for backbone generation and Rosetta for sequence design yielded higher design success rates and increased structural diversity than Rosetta alone. The ability to design a large and structurally diverse set of small beta barrel proteins greatly increases the protein shape space available for designing binders to protein targets of interest.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Andy Hsien-Wei Norn Yeh, Christoffer Kipnis
De novo design of luciferases using deep learning Journal Article
In: Nature, 2023.
@article{Yeh2023,
title = {De novo design of luciferases using deep learning},
author = {Yeh, Andy Hsien-Wei
Norn, Christoffer
Kipnis, Yakov
Tischer, Doug
Pellock, Samuel J.
Evans, Declan
Ma, Pengchen
Lee, Gyu Rie
Zhang, Jason Z.
Anishchenko, Ivan
Coventry, Brian
Cao, Longxing
Dauparas, Justas
Halabiya, Samer
DeWitt, Michelle
Carter, Lauren
Houk, K. N.
Baker, David},
url = {https://www.nature.com/articles/s41586-023-05696-3, Nature (Open Access)},
doi = {10.1038/s41586-023-05696-3},
year = {2023},
date = {2023-02-22},
journal = {Nature},
abstract = {De novo enzyme design has sought to introduce active sites and substrate-binding pockets that are predicted to catalyse a reaction of interest into geometrically compatible native scaffolds1,2, but has been limited by a lack of suitable protein structures and the complexity of native protein sequence–structure relationships. Here we describe a deep-learning-based ‘family-wide hallucination’ approach that generates large numbers of idealized protein structures containing diverse pocket shapes and designed sequences that encode them. We use these scaffolds to design artificial luciferases that selectively catalyse the oxidative chemiluminescence of the synthetic luciferin substrates diphenylterazine3 and 2-deoxycoelenterazine. The designed active sites position an arginine guanidinium group adjacent to an anion that develops during the reaction in a binding pocket with high shape complementarity. For both luciferin substrates, we obtain designed luciferases with high selectivity; the most active of these is a small (13.9 kDa) and thermostable (with a melting temperature higher than 95 °C) enzyme that has a catalytic efficiency on diphenylterazine (kcat/Km = 106 M−1 s−1) comparable to that of native luciferases, but a much higher substrate specificity. The creation of highly active and specific biocatalysts from scratch with broad applications in biomedicine is a key milestone for computational enzyme design, and our approach should enable generation of a wide range of luciferases and other enzymes.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Wicky, B. I. M.; Milles, L. F.; Courbet, A.; Ragotte, R. J.; Dauparas, J.; Kinfu, E.; Tipps, S.; Kibler, R. D.; Baek, M.; DiMaio, F.; Li, X.; Carter, L.; Kang, A.; Nguyen, H.; Bera, A. K.; Baker, D.
Hallucinating symmetric protein assemblies Journal Article
In: Science, 2022.
@article{Wicky2022,
title = {Hallucinating symmetric protein assemblies},
author = {B. I. M. Wicky and L. F. Milles and A. Courbet and R. J. Ragotte and J. Dauparas and E. Kinfu and S. Tipps and R. D. Kibler and M. Baek and F. DiMaio and X. Li and L. Carter and A. Kang and H. Nguyen and A. K. Bera and D. Baker},
url = {https://www.science.org/doi/abs/10.1126/science.add1964, Science
https://www.bakerlab.org/wp-content/uploads/2022/09/Wicky_etal_Science2022_Hallucinating_symmetric_protein_assemblies.pdf, PDF
},
doi = {10.1126/science.add1964},
year = {2022},
date = {2022-09-15},
journal = {Science},
abstract = {Deep learning generative approaches provide an opportunity to broadly explore protein structure space beyond the sequences and structures of natural proteins. Here we use deep network hallucination to generate a wide range of symmetric protein homo-oligomers given only a specification of the number of protomers and the protomer length. Crystal structures of 7 designs are very close to the computational models (median RMSD: 0.6 Å), as are 3 cryoEM structures of giant 10 nanometer rings with up to 1550 residues and C33 symmetry; all differ considerably from previously solved structures. Our results highlight the rich diversity of new protein structures that can be generated using deep learning, and pave the way for the design of increasingly complex components for nanomachines and biomaterials.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Dauparas, J.; Anishchenko, I.; Bennett, N.; Bai, H.; Ragotte, R. J.; Milles, L. F.; Wicky, B. I. M.; Courbet, A.; de Haas, R. J.; Bethel, N.; Leung, P. J. Y.; Huddy, T. F.; Pellock, S.; Tischer, D.; Chan, F.; Koepnick, B.; Nguyen, H.; Kang, A.; Sankaran, B.; Bera, A. K.; King, N. P.; Baker, D.
Robust deep learning–based protein sequence design using ProteinMPNN Journal Article
In: Science, 2022.
@article{Dauparas2022,
title = {Robust deep learning–based protein sequence design using ProteinMPNN},
author = {Dauparas, J.
and Anishchenko, I.
and Bennett, N.
and Bai, H.
and Ragotte, R. J.
and Milles, L. F.
and Wicky, B. I. M.
and Courbet, A.
and de Haas, R. J.
and Bethel, N.
and Leung, P. J. Y.
and Huddy, T. F.
and Pellock, S.
and Tischer, D.
and Chan, F.
and Koepnick, B.
and Nguyen, H.
and Kang, A.
and Sankaran, B.
and Bera, A. K.
and King, N. P.
and Baker, D.},
url = {https://www.science.org/doi/abs/10.1126/science.add2187, Science
https://www.bakerlab.org/wp-content/uploads/2022/09/Dauparas_etal_Science2022_Sequence_design_via_ProteinMPNN.pdf, PDF},
doi = {10.1126/science.add2187},
year = {2022},
date = {2022-09-15},
journal = {Science},
abstract = {While deep learning has revolutionized protein structure prediction, almost all experimentally characterized de novo protein designs have been generated using physically based approaches such as Rosetta. Here we describe a deep learning–based protein sequence design method, ProteinMPNN, with outstanding performance in both in silico and experimental tests. On native protein backbones, ProteinMPNN has a sequence recovery of 52.4%, compared to 32.9% for Rosetta. The amino acid sequence at different positions can be coupled between single or multiple chains, enabling application to a wide range of current protein design challenges. We demonstrate the broad utility and high accuracy of ProteinMPNN using X-ray crystallography, cryoEM and functional studies by rescuing previously failed designs, made using Rosetta or AlphaFold, of protein monomers, cyclic homo-oligomers, tetrahedral nanoparticles, and target binding proteins},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Wang, Jue; Lisanza, Sidney; Juergens, David; Tischer, Doug; Watson, Joseph L.; Castro, Karla M.; Ragotte, Robert; Saragovi, Amijai; Milles, Lukas F.; Baek, Minkyung; Anishchenko, Ivan; Yang, Wei; Hicks, Derrick R.; Expòsit, Marc; Schlichthaerle, Thomas; Chun, Jung-Ho; Dauparas, Justas; Bennett, Nathaniel; Wicky, Basile I. M.; Muenks, Andrew; DiMaio, Frank; Correia, Bruno; Ovchinnikov, Sergey; Baker, David
Scaffolding protein functional sites using deep learning Journal Article
In: Science, 2022.
@article{Wang2022,
title = {Scaffolding protein functional sites using deep learning},
author = {Jue Wang and Sidney Lisanza and David Juergens and Doug Tischer and Joseph L. Watson and Karla M. Castro and Robert Ragotte and Amijai Saragovi and Lukas F. Milles and Minkyung Baek and Ivan Anishchenko and Wei Yang and Derrick R. Hicks and Marc Expòsit and Thomas Schlichthaerle and Jung-Ho Chun and Justas Dauparas and Nathaniel Bennett and Basile I. M. Wicky and Andrew Muenks and Frank DiMaio and Bruno Correia and Sergey Ovchinnikov and David Baker },
url = {https://www.science.org/doi/abs/10.1126/science.abn2100, Science
https://www.ipd.uw.edu/wp-content/uploads/2022/07/science.abn2100.pdf, Download PDF},
doi = {10.1126/science.abn2100},
year = {2022},
date = {2022-07-21},
urldate = {2022-07-21},
journal = {Science},
abstract = {The binding and catalytic functions of proteins are generally mediated by a small number of functional residues held in place by the overall protein structure. Here, we describe deep learning approaches for scaffolding such functional sites without needing to prespecify the fold or secondary structure of the scaffold. The first approach, “constrained hallucination,” optimizes sequences such that their predicted structures contain the desired functional site. The second approach, “inpainting,” starts from the functional site and fills in additional sequence and structure to create a viable protein scaffold in a single forward pass through a specifically trained RoseTTAFold network. We use these two methods to design candidate immunogens, receptor traps, metalloproteins, enzymes, and protein-binding proteins and validate the designs using a combination of in silico and experimental tests. Protein design has had success in finding sequences that fold into a desired conformation, but designing functional proteins remains challenging. Wang et al. describe two deep-learning methods to design proteins that contain prespecified functional sites. In the first, they found sequences predicted to fold into stable structures that contain the functional site. In the second, they retrained a structure prediction network to recover the sequence and full structure of a protein given only the functional site. The authors demonstrate their methods by designing proteins containing a variety of functional motifs. —VV Deep-learning methods enable the scaffolding of desired functional residues within a well-folded designed protein.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sen, Neeladri; Anishchenko, Ivan; N, Bordin; Sillitoe, Ian; Velankar, Sameer; Baker, David; Orengo, Christine
Characterizing and explaining the impact of disease-associated mutations in proteins without known structures or structural homologs Journal Article
In: Briefings in Bioinformatics, 2022.
@article{Sen2022,
title = {Characterizing and explaining the impact of disease-associated mutations in proteins without known structures or structural homologs},
author = {Sen, Neeladri
and Anishchenko, Ivan
and Bordin N
and Sillitoe, Ian
and Velankar, Sameer
and Baker, David
and Orengo, Christine},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9294430/},
doi = {10.1093/bib/bbac187},
year = {2022},
date = {2022-07-18},
journal = {Briefings in Bioinformatics},
abstract = {Mutations in human proteins lead to diseases. The structure of these proteins can help understand the mechanism of such diseases and develop therapeutics against them. With improved deep learning techniques, such as RoseTTAFold and AlphaFold, we can predict the structure of proteins even in the absence of structural homologs. We modeled and extracted the domains from 553 disease-associated human proteins without known protein structures or close homologs in the Protein Databank. We noticed that the model quality was higher and the Root mean square deviation (RMSD) lower between AlphaFold and RoseTTAFold models for domains that could be assigned to CATH families as compared to those which could only be assigned to Pfam families of unknown structure or could not be assigned to either. We predicted ligand-binding sites, protein-protein interfaces and conserved residues in these predicted structures. We then explored whether the disease-associated missense mutations were in the proximity of these predicted functional sites, whether they destabilized the protein structure based on ddG calculations or whether they were predicted to be pathogenic. We could explain 80% of these disease-associated mutations based on proximity to functional sites, structural destabilization or pathogenicity. When compared to polymorphisms, a larger percentage of disease-associated missense mutations were buried, closer to predicted functional sites, predicted as destabilizing and pathogenic. Usage of models from the two state-of-the-art techniques provide better confidence in our predictions, and we explain 93 additional mutations based on RoseTTAFold models which could not be explained based solely on AlphaFold models.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Lovelock, Sarah L.; Crawshaw, Rebecca; Basler, Sophie; Levy, Colin; Baker, David; Hilvert, Donald; Green, Anthony P.
The road to fully programmable protein catalysis Journal Article
In: Nature, 2022.
@article{Lovelock2022,
title = {The road to fully programmable protein catalysis},
author = {Sarah L. Lovelock and Rebecca Crawshaw and Sophie Basler and Colin Levy and David Baker and Donald Hilvert and Anthony P. Green
},
url = {https://www.nature.com/articles/s41586-022-04456-z, Nature
https://www.bakerlab.org/wp-content/uploads/2022/06/s41586-022-04456-z.pdf, Download PDF},
doi = {10.1038/s41586-022-04456-z},
year = {2022},
date = {2022-06-01},
journal = {Nature},
abstract = {The ability to design efficient enzymes from scratch would have a profound effect on chemistry, biotechnology and medicine. Rapid progress in protein engineering over the past decade makes us optimistic that this ambition is within reach. The development of artificial enzymes containing metal cofactors and noncanonical organocatalytic groups shows how protein structure can be optimized to harness the reactivity of nonproteinogenic elements. In parallel, computational methods have been used to design protein catalysts for diverse reactions on the basis of fundamental principles of transition state stabilization. Although the activities of designed catalysts have been quite low, extensive laboratory evolution has been used to generate efficient enzymes. Structural analysis of these systems has revealed the high degree of precision that will be needed to design catalysts with greater activity. To this end, emerging protein design methods, including deep learning, hold particular promise for improving model accuracy. Here we take stock of key developments in the field and highlight new opportunities for innovation that should allow us to transition beyond the current state of the art and enable the robust design of biocatalysts to address societal needs.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Linder, Johannes; La Fleur, Alyssa; Chen, Zibo; Ljubetič, Ajasja; Baker, David; Kannan, Sreeram; Seelig, Georg
Interpreting neural networks for biological sequences by learning stochastic masks Journal Article
In: Nature Machine Intelligence, 2022.
@article{Linder2022,
title = {Interpreting neural networks for biological sequences by learning stochastic masks},
author = {Linder, Johannes and La Fleur, Alyssa and Chen, Zibo and Ljubetič, Ajasja and Baker, David and Kannan, Sreeram and Seelig, Georg},
url = {https://www.nature.com/articles/s42256-021-00428-6, Nature Machine Intelligence},
doi = {10.1038/s42256-021-00428-6},
year = {2022},
date = {2022-01-25},
urldate = {2022-01-25},
journal = {Nature Machine Intelligence},
abstract = {Sequence-based neural networks can learn to make accurate predictions from large biological datasets, but model interpretation remains challenging. Many existing feature attribution methods are optimized for continuous rather than discrete input patterns and assess individual feature importance in isolation, making them ill-suited for interpreting nonlinear interactions in molecular sequences. Here, building on work in computer vision and natural language processing, we developed an approach based on deep learning—scrambler networks—wherein the most important sequence positions are identified with learned input masks. Scramblers learn to predict position-specific scoring matrices where unimportant nucleotides or residues are scrambled by raising their entropy. We apply scramblers to interpret the effects of genetic variants, uncover nonlinear interactions between cis-regulatory elements, explain binding specificity for protein–protein interactions, and identify structural determinants of de novo-designed proteins. We show that scramblers enable efficient attribution across large datasets and result in high-quality explanations, often outperforming state-of-the-art methods.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Baek, Minkyung; Baker, David
Deep learning and protein structure modeling Journal Article
In: Nature Methods, 2022.
@article{Baek2022,
title = {Deep learning and protein structure modeling},
author = {Minkyung Baek and David Baker},
url = {https://www.nature.com/articles/s41592-021-01360-8, Nature Methods
https://www.bakerlab.org/wp-content/uploads/2022/01/Baek_Baker_NatureMethods2022_Deep_Learning_and_Protein_Structure_Modeling.pdf, Download PDF
},
doi = {10.1038/s41592-021-01360-8},
year = {2022},
date = {2022-01-22},
urldate = {2022-01-22},
journal = {Nature Methods},
abstract = {Deep learning has transformed protein structure modeling. Here we relate AlphaFold and RoseTTAFold to classical physically based approaches to protein structure prediction, and discuss the many areas of structural biology that are likely to be affected by further advances in deep learning.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Du, Zongyang; Su, Hong; Wang, Wenkai; Ye, Lisha; Wei, Hong; Peng, Zhenling; Anishchenko, Ivan; Baker, David; Yang, Jianyi
The trRosetta server for fast and accurate protein structure prediction Journal Article
In: Nature Protocols, 2021.
@article{Du2021,
title = {The trRosetta server for fast and accurate protein structure prediction},
author = {Du, Zongyang
and Su, Hong
and Wang, Wenkai
and Ye, Lisha
and Wei, Hong
and Peng, Zhenling
and Anishchenko, Ivan
and Baker, David
and Yang, Jianyi},
url = {https://www.nature.com/articles/s41596-021-00628-9
https://www.bakerlab.org/wp-content/uploads/2022/01/Du_etal_NatProt2021_trRosetta_server.pdf},
doi = {10.1038/s41596-021-00628-9},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
journal = {Nature Protocols},
abstract = {The trRosetta (transform-restrained Rosetta) server is a web-based platform for fast and accurate protein structure prediction, powered by deep learning and Rosetta. With the input of a protein’s amino acid sequence, a deep neural network is first used to predict the inter-residue geometries, including distance and orientations. The predicted geometries are then transformed as restraints to guide the structure prediction on the basis of direct energy minimization, which is implemented under the framework of Rosetta. The trRosetta server distinguishes itself from other similar structure prediction servers in terms of rapid and accurate de novo structure prediction. As an illustration, trRosetta was applied to two Pfam families with unknown structures, for which the predicted de novo models were estimated to have high accuracy. Nevertheless, to take advantage of homology modeling, homologous templates are used as additional inputs to the network automatically. In general, it takes ~1 h to predict the final structure for a typical protein with ~300 amino acids, using a maximum of 10 CPU cores in parallel in our cluster system. To enable large-scale structure modeling, a downloadable package of trRosetta with open-source codes is available as well. A detailed guidance for using the package is also available in this protocol. The server and the package are available at https://yanglab.nankai.edu.cn/trRosetta/ and https://yanglab.nankai.edu.cn/trRosetta/download/, respectively.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Baek, Minkyung; Anishchenko, Ivan; Park, Hahnbeom; Humphreys, Ian R.; Baker, David
Protein oligomer modeling guided by predicted inter-chain contacts in CASP14 Journal Article
In: Proteins, 2021.
@article{Baek2021b,
title = {Protein oligomer modeling guided by predicted inter-chain contacts in CASP14},
author = {Minkyung Baek and Ivan Anishchenko and Hahnbeom Park and Ian R. Humphreys and David Baker},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/prot.26197, Proteins},
doi = {10.1002/prot.26197},
year = {2021},
date = {2021-07-29},
urldate = {2021-07-29},
journal = {Proteins},
abstract = {For CASP14, we developed deep learning-based methods for predicting homo-oligomeric and hetero-oligomeric contacts and used them for oligomer modeling. To build structure models, we developed an oligomer structure generation method that utilizes predicted inter-chain contacts to guide iterative restrained minimization from random backbone structures. We supplemented this gradient-based fold-and-dock method with template-based and ab initio docking approaches using deep learning-based subunit predictions on 29 assembly targets. These methods produced oligomer models with summed Z-scores 5.5 units higher than the next best group, with the fold-and-dock method having the best relative performance. Over the eight targets for which this method was used, the best of the five submitted models had average oligomer TM-score of 0.71 (average oligomer TM-score of the next best group: 0.64), and explicit modeling of inter-subunit interactions improved modeling of six out of 40 individual domains (ΔGDT-TS > 2.0).
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Norn, Christoffer; Wicky, Basile I. M.; Juergens, David; Liu, Sirui; Kim, David; Tischer, Doug; Koepnick, Brian; Anishchenko, Ivan; Baker, David; Ovchinnikov, Sergey
Protein sequence design by conformational landscape optimization Journal Article
In: Proceedings of the National Academy of Sciences, vol. 118, no. 11, 2021.
@article{Norn2021,
title = {Protein sequence design by conformational landscape optimization},
author = {Norn, Christoffer and Wicky, Basile I. M. and Juergens, David and Liu, Sirui and Kim, David and Tischer, Doug and Koepnick, Brian and Anishchenko, Ivan and Baker, David and Ovchinnikov, Sergey},
url = {https://www.pnas.org/content/118/11/e2017228118, PNAS
https://www.bakerlab.org/wp-content/uploads/2021/03/Norn_etal_PNAS2021_LandscapeOptimization.pdf, Download PDF},
doi = {10.1073/pnas.2017228118},
year = {2021},
date = {2021-03-16},
urldate = {2021-03-16},
journal = {Proceedings of the National Academy of Sciences},
volume = {118},
number = {11},
abstract = {Almost all proteins fold to their lowest free energy state, which is determined by their amino acid sequence. Computational protein design has primarily focused on finding sequences that have very low energy in the target designed structure. However, what is most relevant during folding is not the absolute energy of the folded state but the energy difference between the folded state and the lowest-lying alternative states. We describe a deep learning approach that captures aspects of the folding landscape, in particular the presence of structures in alternative energy minima, and show that it can enhance current protein design methods.The protein design problem is to identify an amino acid sequence that folds to a desired structure. Given Anfinsen{textquoteright}s thermodynamic hypothesis of folding, this can be recast as finding an amino acid sequence for which the desired structure is the lowest energy state. As this calculation involves not only all possible amino acid sequences but also, all possible structures, most current approaches focus instead on the more tractable problem of finding the lowest-energy amino acid sequence for the desired structure, often checking by protein structure prediction in a second step that the desired structure is indeed the lowest-energy conformation for the designed sequence, and typically discarding a large fraction of designed sequences for which this is not the case. Here, we show that by backpropagating gradients through the transform-restrained Rosetta (trRosetta) structure prediction network from the desired structure to the input amino acid sequence, we can directly optimize over all possible amino acid sequences and all possible structures in a single calculation. We find that trRosetta calculations, which consider the full conformational landscape, can be more effective than Rosetta single-point energy estimations in predicting folding and stability of de novo designed proteins. We compare sequence design by conformational landscape optimization with the standard energy-based sequence design methodology in Rosetta and show that the former can result in energy landscapes with fewer alternative energy minima. We show further that more funneled energy landscapes can be designed by combining the strengths of the two approaches: the low-resolution trRosetta model serves to disfavor alternative states, and the high-resolution Rosetta model serves to create a deep energy minimum at the design target structure.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Hiranuma, Naozumi; Park, Hahnbeom; Baek, Minkyung; Anishchenko, Ivan; Dauparas, Justas; Baker, David
Improved protein structure refinement guided by deep learning based accuracy estimation Journal Article
In: Nature Communications, vol. 12, no. 1340, 2021.
@article{Hiranuma2021,
title = {Improved protein structure refinement guided by deep learning based accuracy estimation},
author = {Naozumi Hiranuma and Hahnbeom Park and Minkyung Baek and Ivan Anishchenko and Justas Dauparas and David Baker
},
url = {https://www.nature.com/articles/s41467-021-21511-x, Nature Communications
https://www.bakerlab.org/wp-content/uploads/2021/02/Hiranuma_etal_NatureComms2021_DeepLearningStructureRefinement.pdf, Download PDF},
doi = {10.1038/s41467-021-21511-x},
year = {2021},
date = {2021-02-26},
urldate = {2021-02-26},
journal = {Nature Communications},
volume = {12},
number = {1340},
abstract = {We develop a deep learning framework (DeepAccNet) that estimates per-residue accuracy and residue-residue distance signed error in protein models and uses these predictions to guide Rosetta protein structure refinement. The network uses 3D convolutions to evaluate local atomic environments followed by 2D convolutions to provide their global contexts and outperforms other methods that similarly predict the accuracy of protein structure models. Overall accuracy predictions for X-ray and cryoEM structures in the PDB correlate with their resolution, and the network should be broadly useful for assessing the accuracy of both predicted structure models and experimentally determined structures and identifying specific regions likely to be in error. Incorporation of the accuracy predictions at multiple stages in the Rosetta refinement protocol considerably increased the accuracy of the resulting protein structure models, illustrating how deep learning can improve search for global energy minima of biomolecules.
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2024
FROM THE LAB
Ian R. Humphreys, Jing Zhang, Minkyung Baek, Yaxi Wang, Aditya Krishnakumar, Jimin Pei, Ivan Anishchenko, Catherine A. Tower, Blake A. Jackson, Thulasi Warrier, Deborah T. Hung, S. Brook Peterson, Joseph D. Mougous, Qian Cong, David Baker
Protein interactions in human pathogens revealed through deep learning Journal Article
In: Nature Microbiology, 2024, ISSN: 2058-5276.
@article{Humphreys2024,
title = {Protein interactions in human pathogens revealed through deep learning},
author = {Ian R. Humphreys and Jing Zhang and Minkyung Baek and Yaxi Wang and Aditya Krishnakumar and Jimin Pei and Ivan Anishchenko and Catherine A. Tower and Blake A. Jackson and Thulasi Warrier and Deborah T. Hung and S. Brook Peterson and Joseph D. Mougous and Qian Cong and David Baker},
url = {https://www.nature.com/articles/s41564-024-01791-x, Nature Microbiology [Open Access]},
doi = {10.1038/s41564-024-01791-x},
issn = {2058-5276},
year = {2024},
date = {2024-09-18},
urldate = {2024-09-18},
journal = {Nature Microbiology},
publisher = {Springer Science and Business Media LLC},
abstract = {Identification of bacterial protein–protein interactions and predicting the structures of these complexes could aid in the understanding of pathogenicity mechanisms and developing treatments for infectious diseases. Here we developed RoseTTAFold2-Lite, a rapid deep learning model that leverages residue–residue coevolution and protein structure prediction to systematically identify and structurally characterize protein–protein interactions at the proteome-wide scale. Using this pipeline, we searched through 78 million pairs of proteins across 19 human bacterial pathogens and identified 1,923 confidently predicted complexes involving essential genes and 256 involving virulence factors. Many of these complexes were not previously known; we experimentally tested 12 such predictions, and half of them were validated. The predicted interactions span core metabolic and virulence pathways ranging from post-transcriptional modification to acid neutralization to outer-membrane machinery and should contribute to our understanding of the biology of these important pathogens and the design of drugs to combat them.
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Linna An, Meerit Said, Long Tran, Sagardip Majumder, Inna Goreshnik, Gyu Rie Lee, David Juergens, Justas Dauparas, Ivan Anishchenko, Brian Coventry, Asim K. Bera, Alex Kang, Paul M. Levine, Valentina Alvarez, Arvind Pillai, Christoffer Norn, David Feldman, Dmitri Zorine, Derrick R. Hicks, Xinting Li, Mariana Garcia Sanchez, Dionne K. Vafeados, Patrick J. Salveson, Anastassia A. Vorobieva, David Baker
Binding and sensing diverse small molecules using shape-complementary pseudocycles Journal Article
In: Science, 2024.
@article{An2024,
title = {Binding and sensing diverse small molecules using shape-complementary pseudocycles},
author = {Linna An and Meerit Said and Long Tran and Sagardip Majumder and Inna Goreshnik and Gyu Rie Lee and David Juergens and Justas Dauparas and Ivan Anishchenko and Brian Coventry and Asim K. Bera and Alex Kang and Paul M. Levine and Valentina Alvarez and Arvind Pillai and Christoffer Norn and David Feldman and Dmitri Zorine and Derrick R. Hicks and Xinting Li and Mariana Garcia Sanchez and Dionne K. Vafeados and Patrick J. Salveson and Anastassia A. Vorobieva and David Baker},
url = {https://www.science.org/doi/10.1126/science.adn3780, Science},
doi = {10.1126/science.adn3780},
year = {2024},
date = {2024-07-19},
urldate = {2024-07-19},
journal = {Science},
publisher = {American Association for the Advancement of Science (AAAS)},
abstract = {We describe an approach for designing high-affinity small molecule–binding proteins poised for downstream sensing. We use deep learning–generated pseudocycles with repeating structural units surrounding central binding pockets with widely varying shapes that depend on the geometry and number of the repeat units. We dock small molecules of interest into the most shape complementary of these pseudocycles, design the interaction surfaces for high binding affinity, and experimentally screen to identify designs with the highest affinity. We obtain binders to four diverse molecules, including the polar and flexible methotrexate and thyroxine. Taking advantage of the modular repeat structure and central binding pockets, we construct chemically induced dimerization systems and low-noise nanopore sensors by splitting designs into domains that reassemble upon ligand addition.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Rohith Krishna, Jue Wang, Woody Ahern, Pascal Sturmfels, Preetham Venkatesh, Indrek Kalvet, Gyu Rie Lee, Felix S. Morey-Burrows, Ivan Anishchenko, Ian R. Humphreys, Ryan McHugh, Dionne Vafeados, Xinting Li, George A. Sutherland, Andrew Hitchcock, C. Neil Hunter, Alex Kang, Evans Brackenbrough, Asim K. Bera, Minkyung Baek, Frank DiMaio, David Baker
Generalized biomolecular modeling and design with RoseTTAFold All-Atom Journal Article
In: Science, 2024.
@article{Krishna2024,
title = {Generalized biomolecular modeling and design with RoseTTAFold All-Atom},
author = {Rohith Krishna and Jue Wang and Woody Ahern and Pascal Sturmfels and Preetham Venkatesh and Indrek Kalvet and Gyu Rie Lee and Felix S. Morey-Burrows and Ivan Anishchenko and Ian R. Humphreys and Ryan McHugh and Dionne Vafeados and Xinting Li and George A. Sutherland and Andrew Hitchcock and C. Neil Hunter and Alex Kang and Evans Brackenbrough and Asim K. Bera and Minkyung Baek and Frank DiMaio and David Baker},
url = {https://www.science.org/stoken/author-tokens/ST-1739/full, Science [Full Access Link]
https://www.bakerlab.org/wp-content/uploads/2024/03/science.adl2528.pdf, PDF},
doi = {10.1126/science.adl2528},
year = {2024},
date = {2024-03-07},
urldate = {2024-03-07},
journal = {Science},
publisher = {American Association for the Advancement of Science (AAAS)},
abstract = {Deep learning methods have revolutionized protein structure prediction and design but are currently limited to protein-only systems. We describe RoseTTAFold All-Atom (RFAA) which combines a residue-based representation of amino acids and DNA bases with an atomic representation of all other groups to model assemblies containing proteins, nucleic acids, small molecules, metals, and covalent modifications given their sequences and chemical structures. By fine tuning on denoising tasks we obtain RFdiffusionAA, which builds protein structures around small molecules. Starting from random distributions of amino acid residues surrounding target small molecules, we design and experimentally validate, through crystallography and binding measurements, proteins that bind the cardiac disease therapeutic digoxigenin, the enzymatic cofactor heme, and the light harvesting molecule bilin.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
COLLABORATOR LED
Casper A. Goverde, Martin Pacesa, Nicolas Goldbach, Lars J. Dornfeld, Petra E. M. Balbi, Sandrine Georgeon, Stéphane Rosset, Srajan Kapoor, Jagrity Choudhury, Justas Dauparas, Christian Schellhaas, Simon Kozlov, David Baker, Sergey Ovchinnikov, Alex J. Vecchio, Bruno E. Correia
Computational design of soluble and functional membrane protein analogues Journal Article
In: Nature, 2024, ISSN: 1476-4687.
@article{Goverde2024,
title = {Computational design of soluble and functional membrane protein analogues},
author = {Casper A. Goverde and Martin Pacesa and Nicolas Goldbach and Lars J. Dornfeld and Petra E. M. Balbi and Sandrine Georgeon and Stéphane Rosset and Srajan Kapoor and Jagrity Choudhury and Justas Dauparas and Christian Schellhaas and Simon Kozlov and David Baker and Sergey Ovchinnikov and Alex J. Vecchio and Bruno E. Correia},
url = {https://www.nature.com/articles/s41586-024-07601-y, Nature [Open Access]
},
doi = {10.1038/s41586-024-07601-y},
issn = {1476-4687},
year = {2024},
date = {2024-06-19},
urldate = {2024-06-19},
journal = {Nature},
publisher = {Springer Science and Business Media LLC},
abstract = {De novo design of complex protein folds using solely computational means remains a substantial challenge. Here we use a robust deep learning pipeline to design complex folds and soluble analogues of integral membrane proteins. Unique membrane topologies, such as those from G-protein-coupled receptors, are not found in the soluble proteome, and we demonstrate that their structural features can be recapitulated in solution. Biophysical analyses demonstrate the high thermal stability of the designs, and experimental structures show remarkable design accuracy. The soluble analogues were functionalized with native structural motifs, as a proof of concept for bringing membrane protein functions to the soluble proteome, potentially enabling new approaches in drug discovery. In summary, we have designed complex protein topologies and enriched them with functionalities from membrane proteins, with high experimental success rates, leading to a de facto expansion of the functional soluble fold space.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2023
FROM THE LAB
Susana Vázquez Torres, Philip J Y Leung, Preetham Venkatesh, Isaac D Lutz, Fabian Hink, Huu-Hien Huynh, Jessica Becker, Andy Hsien-Wei Yeh, David Juergens, Nathaniel R Bennett, Andrew N Hoofnagle, Eric Huang, Michael J MacCoss, Marc Expòsit, Gyu Rie Lee, Asim K Bera, Alex Kang, Joshmyn De La Cruz, Paul M Levine, Xinting Li, Mila Lamb, Stacey R Gerben, Analisa Murray, Piper Heine, Elif Nihal Korkmaz, Jeff Nivala, Lance Stewart, Joseph L Watson, Joseph M Rogers, David Baker
De novo design of high-affinity binders of bioactive helical peptides Journal Article
In: Nature, 2023, ISSN: 1476-4687.
@article{pmid38109936,
title = {De novo design of high-affinity binders of bioactive helical peptides},
author = {Susana Vázquez Torres and Philip J Y Leung and Preetham Venkatesh and Isaac D Lutz and Fabian Hink and Huu-Hien Huynh and Jessica Becker and Andy Hsien-Wei Yeh and David Juergens and Nathaniel R Bennett and Andrew N Hoofnagle and Eric Huang and Michael J MacCoss and Marc Expòsit and Gyu Rie Lee and Asim K Bera and Alex Kang and Joshmyn De La Cruz and Paul M Levine and Xinting Li and Mila Lamb and Stacey R Gerben and Analisa Murray and Piper Heine and Elif Nihal Korkmaz and Jeff Nivala and Lance Stewart and Joseph L Watson and Joseph M Rogers and David Baker},
url = {https://www.nature.com/articles/s41586-023-06953-1, Nature [Open Access]},
doi = {10.1038/s41586-023-06953-1},
issn = {1476-4687},
year = {2023},
date = {2023-12-01},
urldate = {2023-12-01},
journal = {Nature},
abstract = {Many peptide hormones form an alpha-helix upon binding their receptors, and sensitive detection methods for them could contribute to better clinical management of disease. De novo protein design can now generate binders with high affinity and specificity to structured proteins. However, the design of interactions between proteins and short peptides with helical propensity is an unmet challenge. Here, we describe parametric generation and deep learning-based methods for designing proteins to address this challenge. We show that by extending RFdiffusion to enable binder design to flexible targets, and to refining input structure models by successive noising and denoising (partial diffusion), picomolar affinity binders can be generated to helical peptide targets both by refining designs generated with other methods, or completely de novo starting from random noise distributions. To our knowledge these are the highest affinity designed binding proteins against any protein or small molecule target generated directly by computation without any experimental optimisation. The RFdiffusion designs enable the enrichment and subsequent detection of parathyroid hormone and glucagon by mass spectrometry, and the construction of bioluminescence-based protein biosensors. The ability to design binders to conformationally variable targets, and to optimise by partial diffusion both natural and designed proteins, should be broadly useful.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Watson, Joseph L. and Juergens, David and Bennett, Nathaniel R. and Trippe, Brian L. and Yim, Jason and Eisenach, Helen E. and Ahern, Woody and Borst, Andrew J. and Ragotte, Robert J. and Milles, Lukas F. and Wicky, Basile I. M. and Hanikel, Nikita and Pellock, Samuel J. and Courbet, Alexis and Sheffler, William and Wang, Jue and Venkatesh, Preetham and Sappington, Isaac and Torres, Susana Vázquez and Lauko, Anna and De Bortoli, Valentin and Mathieu, Emile and Ovchinnikov, Sergey and Barzilay, Regina and Jaakkola, Tommi S. and DiMaio, Frank and Baek, Minkyung and Baker, David
De novo design of protein structure and function with RFdiffusion Journal Article
In: Nature, 2023.
@article{Watson2023,
title = {De novo design of protein structure and function with RFdiffusion},
author = {Watson, Joseph L.
and Juergens, David
and Bennett, Nathaniel R.
and Trippe, Brian L.
and Yim, Jason
and Eisenach, Helen E.
and Ahern, Woody
and Borst, Andrew J.
and Ragotte, Robert J.
and Milles, Lukas F.
and Wicky, Basile I. M.
and Hanikel, Nikita
and Pellock, Samuel J.
and Courbet, Alexis
and Sheffler, William
and Wang, Jue
and Venkatesh, Preetham
and Sappington, Isaac
and Torres, Susana Vázquez
and Lauko, Anna
and De Bortoli, Valentin
and Mathieu, Emile
and Ovchinnikov, Sergey
and Barzilay, Regina
and Jaakkola, Tommi S.
and DiMaio, Frank
and Baek, Minkyung
and Baker, David},
url = {https://www.nature.com/articles/s41586-023-06415-8, Nature
https://www.bakerlab.org/wp-content/uploads/2023/07/s41586-023-06415-8_reference.pdf, PDF (29MB)},
doi = {10.1038/s41586-023-06415-8},
year = {2023},
date = {2023-07-11},
journal = {Nature},
abstract = {There has been considerable recent progress in designing new proteins using deep learning methods1–9. Despite this progress, a general deep learning framework for protein design that enables solution of a wide range of design challenges, including de novo binder design and design of higher order symmetric architectures, has yet to be described. Diffusion models10,11 have had considerable success in image and language generative modeling but limited success when applied to protein modeling, likely due to the complexity of protein backbone geometry and sequence-structure relationships. Here we show that by fine tuning the RoseTTAFold structure prediction network on protein structure denoising tasks, we obtain a generative model of protein backbones that achieves outstanding performance on unconditional and topology-constrained protein monomer design, protein binder design, symmetric oligomer design, enzyme active site scaffolding, and symmetric motif scaffolding for therapeutic and metal-binding protein design. We demonstrate the power and generality of the method, called RoseTTAFold Diffusion (RFdiffusion), by experimentally characterizing the structures and functions of hundreds of designed symmetric assemblies, metal binding proteins and protein binders. The accuracy of RFdiffusion is confirmed by the cryo-EM structure of a designed binder in complex with Influenza hemagglutinin which is nearly identical to the design model. In a manner analogous to networks which produce images from user-specified inputs, RFdiffusion enables the design of diverse functional proteins from simple molecular specifications.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Bennett, Nathaniel R. and Coventry, Brian and Goreshnik, Inna and Huang, Buwei and Allen, Aza and Vafeados, Dionne and Peng, Ying Po and Dauparas, Justas and Baek, Minkyung and Stewart, Lance and DiMaio, Frank and De Munck, Steven and Savvides, Savvas N. and Baker, David
Improving de novo protein binder design with deep learning Journal Article
In: Nature Communications, 2023.
@article{Bennett2023,
title = {Improving de novo protein binder design with deep learning},
author = {Bennett, Nathaniel R.
and Coventry, Brian
and Goreshnik, Inna
and Huang, Buwei
and Allen, Aza
and Vafeados, Dionne
and Peng, Ying Po
and Dauparas, Justas
and Baek, Minkyung
and Stewart, Lance
and DiMaio, Frank
and De Munck, Steven
and Savvides, Savvas N.
and Baker, David},
url = {https://www.nature.com/articles/s41467-023-38328-5, Nature Communications (Open Access)},
doi = {10.1038/s41467-023-38328-5},
year = {2023},
date = {2023-05-06},
journal = {Nature Communications},
abstract = {Recently it has become possible to de novo design high affinity protein binding proteins from target structural information alone. There is, however, considerable room for improvement as the overall design success rate is low. Here, we explore the augmentation of energy-based protein binder design using deep learning. We find that using AlphaFold2 or RoseTTAFold to assess the probability that a designed sequence adopts the designed monomer structure, and the probability that this structure binds the target as designed, increases design success rates nearly 10-fold. We find further that sequence design using ProteinMPNN rather than Rosetta considerably increases computational efficiency.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kim, David E. and Jensen, Davin R. and Feldman, David and Tischer, Doug and Saleem, Ayesha and Chow, Cameron M. and Li, Xinting and Carter, Lauren and Milles, Lukas and Nguyen, Hannah and Kang, Alex and Bera, Asim K. and Peterson, Francis C. and Volkman, Brian F. and Ovchinnikov, Sergey and Baker, David
De novo design of small beta barrel proteins Journal Article
In: Proceedings of the National Academy of Sciences, 2023.
@article{Kim2023,
title = {De novo design of small beta barrel proteins},
author = {Kim, David E.
and Jensen, Davin R.
and Feldman, David
and Tischer, Doug
and Saleem, Ayesha
and Chow, Cameron M.
and Li, Xinting
and Carter, Lauren
and Milles, Lukas
and Nguyen, Hannah
and Kang, Alex
and Bera, Asim K.
and Peterson, Francis C.
and Volkman, Brian F.
and Ovchinnikov, Sergey
and Baker, David},
url = {https://www.pnas.org/doi/10.1073/pnas.2207974120, PNAS (Open Access)},
doi = {10.1073/pnas.2207974120},
year = {2023},
date = {2023-03-10},
urldate = {2023-03-10},
journal = {Proceedings of the National Academy of Sciences},
abstract = {Small beta barrel proteins are attractive targets for computational design because of their considerable functional diversity despite their very small size (<70 amino acids). However, there are considerable challenges to designing such structures, and there has been little success thus far. Because of the small size, the hydrophobic core stabilizing the fold is necessarily very small, and the conformational strain of barrel closure can oppose folding; also intermolecular aggregation through free beta strand edges can compete with proper monomer folding. Here, we explore the de novo design of small beta barrel topologies using both Rosetta energy–based methods and deep learning approaches to design four small beta barrel folds: Src homology 3 (SH3) and oligonucleotide/oligosaccharide-binding (OB) topologies found in nature and five and six up-and-down-stranded barrels rarely if ever seen in nature. Both approaches yielded successful designs with high thermal stability and experimentally determined structures with less than 2.4 Å rmsd from the designed models. Using deep learning for backbone generation and Rosetta for sequence design yielded higher design success rates and increased structural diversity than Rosetta alone. The ability to design a large and structurally diverse set of small beta barrel proteins greatly increases the protein shape space available for designing binders to protein targets of interest.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Yeh, Andy Hsien-Wei Norn, Christoffer Kipnis, Yakov Tischer, Doug Pellock, Samuel J. Evans, Declan Ma, Pengchen Lee, Gyu Rie Zhang, Jason Z. Anishchenko, Ivan Coventry, Brian Cao, Longxing Dauparas, Justas Halabiya, Samer DeWitt, Michelle Carter, Lauren Houk, K. N. Baker, David
De novo design of luciferases using deep learning Journal Article
In: Nature, 2023.
@article{Yeh2023,
title = {De novo design of luciferases using deep learning},
author = {Yeh, Andy Hsien-Wei
Norn, Christoffer
Kipnis, Yakov
Tischer, Doug
Pellock, Samuel J.
Evans, Declan
Ma, Pengchen
Lee, Gyu Rie
Zhang, Jason Z.
Anishchenko, Ivan
Coventry, Brian
Cao, Longxing
Dauparas, Justas
Halabiya, Samer
DeWitt, Michelle
Carter, Lauren
Houk, K. N.
Baker, David},
url = {https://www.nature.com/articles/s41586-023-05696-3, Nature (Open Access)},
doi = {10.1038/s41586-023-05696-3},
year = {2023},
date = {2023-02-22},
journal = {Nature},
abstract = {De novo enzyme design has sought to introduce active sites and substrate-binding pockets that are predicted to catalyse a reaction of interest into geometrically compatible native scaffolds1,2, but has been limited by a lack of suitable protein structures and the complexity of native protein sequence–structure relationships. Here we describe a deep-learning-based ‘family-wide hallucination’ approach that generates large numbers of idealized protein structures containing diverse pocket shapes and designed sequences that encode them. We use these scaffolds to design artificial luciferases that selectively catalyse the oxidative chemiluminescence of the synthetic luciferin substrates diphenylterazine3 and 2-deoxycoelenterazine. The designed active sites position an arginine guanidinium group adjacent to an anion that develops during the reaction in a binding pocket with high shape complementarity. For both luciferin substrates, we obtain designed luciferases with high selectivity; the most active of these is a small (13.9 kDa) and thermostable (with a melting temperature higher than 95 °C) enzyme that has a catalytic efficiency on diphenylterazine (kcat/Km = 106 M−1 s−1) comparable to that of native luciferases, but a much higher substrate specificity. The creation of highly active and specific biocatalysts from scratch with broad applications in biomedicine is a key milestone for computational enzyme design, and our approach should enable generation of a wide range of luciferases and other enzymes.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
COLLABORATOR LED
Sorry, no publications matched your criteria.
2022
FROM THE LAB
B. I. M. Wicky, L. F. Milles, A. Courbet, R. J. Ragotte, J. Dauparas, E. Kinfu, S. Tipps, R. D. Kibler, M. Baek, F. DiMaio, X. Li, L. Carter, A. Kang, H. Nguyen, A. K. Bera, D. Baker
Hallucinating symmetric protein assemblies Journal Article
In: Science, 2022.
@article{Wicky2022,
title = {Hallucinating symmetric protein assemblies},
author = {B. I. M. Wicky and L. F. Milles and A. Courbet and R. J. Ragotte and J. Dauparas and E. Kinfu and S. Tipps and R. D. Kibler and M. Baek and F. DiMaio and X. Li and L. Carter and A. Kang and H. Nguyen and A. K. Bera and D. Baker},
url = {https://www.science.org/doi/abs/10.1126/science.add1964, Science
https://www.bakerlab.org/wp-content/uploads/2022/09/Wicky_etal_Science2022_Hallucinating_symmetric_protein_assemblies.pdf, PDF
},
doi = {10.1126/science.add1964},
year = {2022},
date = {2022-09-15},
journal = {Science},
abstract = {Deep learning generative approaches provide an opportunity to broadly explore protein structure space beyond the sequences and structures of natural proteins. Here we use deep network hallucination to generate a wide range of symmetric protein homo-oligomers given only a specification of the number of protomers and the protomer length. Crystal structures of 7 designs are very close to the computational models (median RMSD: 0.6 Å), as are 3 cryoEM structures of giant 10 nanometer rings with up to 1550 residues and C33 symmetry; all differ considerably from previously solved structures. Our results highlight the rich diversity of new protein structures that can be generated using deep learning, and pave the way for the design of increasingly complex components for nanomachines and biomaterials.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Dauparas, J. and Anishchenko, I. and Bennett, N. and Bai, H. and Ragotte, R. J. and Milles, L. F. and Wicky, B. I. M. and Courbet, A. and de Haas, R. J. and Bethel, N. and Leung, P. J. Y. and Huddy, T. F. and Pellock, S. and Tischer, D. and Chan, F. and Koepnick, B. and Nguyen, H. and Kang, A. and Sankaran, B. and Bera, A. K. and King, N. P. and Baker, D.
Robust deep learning–based protein sequence design using ProteinMPNN Journal Article
In: Science, 2022.
@article{Dauparas2022,
title = {Robust deep learning–based protein sequence design using ProteinMPNN},
author = {Dauparas, J.
and Anishchenko, I.
and Bennett, N.
and Bai, H.
and Ragotte, R. J.
and Milles, L. F.
and Wicky, B. I. M.
and Courbet, A.
and de Haas, R. J.
and Bethel, N.
and Leung, P. J. Y.
and Huddy, T. F.
and Pellock, S.
and Tischer, D.
and Chan, F.
and Koepnick, B.
and Nguyen, H.
and Kang, A.
and Sankaran, B.
and Bera, A. K.
and King, N. P.
and Baker, D.},
url = {https://www.science.org/doi/abs/10.1126/science.add2187, Science
https://www.bakerlab.org/wp-content/uploads/2022/09/Dauparas_etal_Science2022_Sequence_design_via_ProteinMPNN.pdf, PDF},
doi = {10.1126/science.add2187},
year = {2022},
date = {2022-09-15},
journal = {Science},
abstract = {While deep learning has revolutionized protein structure prediction, almost all experimentally characterized de novo protein designs have been generated using physically based approaches such as Rosetta. Here we describe a deep learning–based protein sequence design method, ProteinMPNN, with outstanding performance in both in silico and experimental tests. On native protein backbones, ProteinMPNN has a sequence recovery of 52.4%, compared to 32.9% for Rosetta. The amino acid sequence at different positions can be coupled between single or multiple chains, enabling application to a wide range of current protein design challenges. We demonstrate the broad utility and high accuracy of ProteinMPNN using X-ray crystallography, cryoEM and functional studies by rescuing previously failed designs, made using Rosetta or AlphaFold, of protein monomers, cyclic homo-oligomers, tetrahedral nanoparticles, and target binding proteins},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jue Wang, Sidney Lisanza, David Juergens, Doug Tischer, Joseph L. Watson, Karla M. Castro, Robert Ragotte, Amijai Saragovi, Lukas F. Milles, Minkyung Baek, Ivan Anishchenko, Wei Yang, Derrick R. Hicks, Marc Expòsit, Thomas Schlichthaerle, Jung-Ho Chun, Justas Dauparas, Nathaniel Bennett, Basile I. M. Wicky, Andrew Muenks, Frank DiMaio, Bruno Correia, Sergey Ovchinnikov, David Baker
Scaffolding protein functional sites using deep learning Journal Article
In: Science, 2022.
@article{Wang2022,
title = {Scaffolding protein functional sites using deep learning},
author = {Jue Wang and Sidney Lisanza and David Juergens and Doug Tischer and Joseph L. Watson and Karla M. Castro and Robert Ragotte and Amijai Saragovi and Lukas F. Milles and Minkyung Baek and Ivan Anishchenko and Wei Yang and Derrick R. Hicks and Marc Expòsit and Thomas Schlichthaerle and Jung-Ho Chun and Justas Dauparas and Nathaniel Bennett and Basile I. M. Wicky and Andrew Muenks and Frank DiMaio and Bruno Correia and Sergey Ovchinnikov and David Baker },
url = {https://www.science.org/doi/abs/10.1126/science.abn2100, Science
https://www.ipd.uw.edu/wp-content/uploads/2022/07/science.abn2100.pdf, Download PDF},
doi = {10.1126/science.abn2100},
year = {2022},
date = {2022-07-21},
urldate = {2022-07-21},
journal = {Science},
abstract = {The binding and catalytic functions of proteins are generally mediated by a small number of functional residues held in place by the overall protein structure. Here, we describe deep learning approaches for scaffolding such functional sites without needing to prespecify the fold or secondary structure of the scaffold. The first approach, “constrained hallucination,” optimizes sequences such that their predicted structures contain the desired functional site. The second approach, “inpainting,” starts from the functional site and fills in additional sequence and structure to create a viable protein scaffold in a single forward pass through a specifically trained RoseTTAFold network. We use these two methods to design candidate immunogens, receptor traps, metalloproteins, enzymes, and protein-binding proteins and validate the designs using a combination of in silico and experimental tests. Protein design has had success in finding sequences that fold into a desired conformation, but designing functional proteins remains challenging. Wang et al. describe two deep-learning methods to design proteins that contain prespecified functional sites. In the first, they found sequences predicted to fold into stable structures that contain the functional site. In the second, they retrained a structure prediction network to recover the sequence and full structure of a protein given only the functional site. The authors demonstrate their methods by designing proteins containing a variety of functional motifs. —VV Deep-learning methods enable the scaffolding of desired functional residues within a well-folded designed protein.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Minkyung Baek, David Baker
Deep learning and protein structure modeling Journal Article
In: Nature Methods, 2022.
@article{Baek2022,
title = {Deep learning and protein structure modeling},
author = {Minkyung Baek and David Baker},
url = {https://www.nature.com/articles/s41592-021-01360-8, Nature Methods
https://www.bakerlab.org/wp-content/uploads/2022/01/Baek_Baker_NatureMethods2022_Deep_Learning_and_Protein_Structure_Modeling.pdf, Download PDF
},
doi = {10.1038/s41592-021-01360-8},
year = {2022},
date = {2022-01-22},
urldate = {2022-01-22},
journal = {Nature Methods},
abstract = {Deep learning has transformed protein structure modeling. Here we relate AlphaFold and RoseTTAFold to classical physically based approaches to protein structure prediction, and discuss the many areas of structural biology that are likely to be affected by further advances in deep learning.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
COLLABORATOR LED
Sen, Neeladri and Anishchenko, Ivan and Bordin N and Sillitoe, Ian and Velankar, Sameer and Baker, David and Orengo, Christine
Characterizing and explaining the impact of disease-associated mutations in proteins without known structures or structural homologs Journal Article
In: Briefings in Bioinformatics, 2022.
@article{Sen2022,
title = {Characterizing and explaining the impact of disease-associated mutations in proteins without known structures or structural homologs},
author = {Sen, Neeladri
and Anishchenko, Ivan
and Bordin N
and Sillitoe, Ian
and Velankar, Sameer
and Baker, David
and Orengo, Christine},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9294430/},
doi = {10.1093/bib/bbac187},
year = {2022},
date = {2022-07-18},
journal = {Briefings in Bioinformatics},
abstract = {Mutations in human proteins lead to diseases. The structure of these proteins can help understand the mechanism of such diseases and develop therapeutics against them. With improved deep learning techniques, such as RoseTTAFold and AlphaFold, we can predict the structure of proteins even in the absence of structural homologs. We modeled and extracted the domains from 553 disease-associated human proteins without known protein structures or close homologs in the Protein Databank. We noticed that the model quality was higher and the Root mean square deviation (RMSD) lower between AlphaFold and RoseTTAFold models for domains that could be assigned to CATH families as compared to those which could only be assigned to Pfam families of unknown structure or could not be assigned to either. We predicted ligand-binding sites, protein-protein interfaces and conserved residues in these predicted structures. We then explored whether the disease-associated missense mutations were in the proximity of these predicted functional sites, whether they destabilized the protein structure based on ddG calculations or whether they were predicted to be pathogenic. We could explain 80% of these disease-associated mutations based on proximity to functional sites, structural destabilization or pathogenicity. When compared to polymorphisms, a larger percentage of disease-associated missense mutations were buried, closer to predicted functional sites, predicted as destabilizing and pathogenic. Usage of models from the two state-of-the-art techniques provide better confidence in our predictions, and we explain 93 additional mutations based on RoseTTAFold models which could not be explained based solely on AlphaFold models.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sarah L. Lovelock, Rebecca Crawshaw, Sophie Basler, Colin Levy, David Baker, Donald Hilvert, Anthony P. Green
The road to fully programmable protein catalysis Journal Article
In: Nature, 2022.
@article{Lovelock2022,
title = {The road to fully programmable protein catalysis},
author = {Sarah L. Lovelock and Rebecca Crawshaw and Sophie Basler and Colin Levy and David Baker and Donald Hilvert and Anthony P. Green
},
url = {https://www.nature.com/articles/s41586-022-04456-z, Nature
https://www.bakerlab.org/wp-content/uploads/2022/06/s41586-022-04456-z.pdf, Download PDF},
doi = {10.1038/s41586-022-04456-z},
year = {2022},
date = {2022-06-01},
journal = {Nature},
abstract = {The ability to design efficient enzymes from scratch would have a profound effect on chemistry, biotechnology and medicine. Rapid progress in protein engineering over the past decade makes us optimistic that this ambition is within reach. The development of artificial enzymes containing metal cofactors and noncanonical organocatalytic groups shows how protein structure can be optimized to harness the reactivity of nonproteinogenic elements. In parallel, computational methods have been used to design protein catalysts for diverse reactions on the basis of fundamental principles of transition state stabilization. Although the activities of designed catalysts have been quite low, extensive laboratory evolution has been used to generate efficient enzymes. Structural analysis of these systems has revealed the high degree of precision that will be needed to design catalysts with greater activity. To this end, emerging protein design methods, including deep learning, hold particular promise for improving model accuracy. Here we take stock of key developments in the field and highlight new opportunities for innovation that should allow us to transition beyond the current state of the art and enable the robust design of biocatalysts to address societal needs.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Linder, Johannes, La Fleur, Alyssa, Chen, Zibo, Ljubetič, Ajasja, Baker, David, Kannan, Sreeram, Seelig, Georg
Interpreting neural networks for biological sequences by learning stochastic masks Journal Article
In: Nature Machine Intelligence, 2022.
@article{Linder2022,
title = {Interpreting neural networks for biological sequences by learning stochastic masks},
author = {Linder, Johannes and La Fleur, Alyssa and Chen, Zibo and Ljubetič, Ajasja and Baker, David and Kannan, Sreeram and Seelig, Georg},
url = {https://www.nature.com/articles/s42256-021-00428-6, Nature Machine Intelligence},
doi = {10.1038/s42256-021-00428-6},
year = {2022},
date = {2022-01-25},
urldate = {2022-01-25},
journal = {Nature Machine Intelligence},
abstract = {Sequence-based neural networks can learn to make accurate predictions from large biological datasets, but model interpretation remains challenging. Many existing feature attribution methods are optimized for continuous rather than discrete input patterns and assess individual feature importance in isolation, making them ill-suited for interpreting nonlinear interactions in molecular sequences. Here, building on work in computer vision and natural language processing, we developed an approach based on deep learning—scrambler networks—wherein the most important sequence positions are identified with learned input masks. Scramblers learn to predict position-specific scoring matrices where unimportant nucleotides or residues are scrambled by raising their entropy. We apply scramblers to interpret the effects of genetic variants, uncover nonlinear interactions between cis-regulatory elements, explain binding specificity for protein–protein interactions, and identify structural determinants of de novo-designed proteins. We show that scramblers enable efficient attribution across large datasets and result in high-quality explanations, often outperforming state-of-the-art methods.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2021
FROM THE LAB
Minkyung Baek, Ivan Anishchenko, Hahnbeom Park, Ian R. Humphreys, David Baker
Protein oligomer modeling guided by predicted inter-chain contacts in CASP14 Journal Article
In: Proteins, 2021.
@article{Baek2021b,
title = {Protein oligomer modeling guided by predicted inter-chain contacts in CASP14},
author = {Minkyung Baek and Ivan Anishchenko and Hahnbeom Park and Ian R. Humphreys and David Baker},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/prot.26197, Proteins},
doi = {10.1002/prot.26197},
year = {2021},
date = {2021-07-29},
urldate = {2021-07-29},
journal = {Proteins},
abstract = {For CASP14, we developed deep learning-based methods for predicting homo-oligomeric and hetero-oligomeric contacts and used them for oligomer modeling. To build structure models, we developed an oligomer structure generation method that utilizes predicted inter-chain contacts to guide iterative restrained minimization from random backbone structures. We supplemented this gradient-based fold-and-dock method with template-based and ab initio docking approaches using deep learning-based subunit predictions on 29 assembly targets. These methods produced oligomer models with summed Z-scores 5.5 units higher than the next best group, with the fold-and-dock method having the best relative performance. Over the eight targets for which this method was used, the best of the five submitted models had average oligomer TM-score of 0.71 (average oligomer TM-score of the next best group: 0.64), and explicit modeling of inter-subunit interactions improved modeling of six out of 40 individual domains (ΔGDT-TS > 2.0).
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Norn, Christoffer, Wicky, Basile I. M., Juergens, David, Liu, Sirui, Kim, David, Tischer, Doug, Koepnick, Brian, Anishchenko, Ivan, Baker, David, Ovchinnikov, Sergey
Protein sequence design by conformational landscape optimization Journal Article
In: Proceedings of the National Academy of Sciences, vol. 118, no. 11, 2021.
@article{Norn2021,
title = {Protein sequence design by conformational landscape optimization},
author = {Norn, Christoffer and Wicky, Basile I. M. and Juergens, David and Liu, Sirui and Kim, David and Tischer, Doug and Koepnick, Brian and Anishchenko, Ivan and Baker, David and Ovchinnikov, Sergey},
url = {https://www.pnas.org/content/118/11/e2017228118, PNAS
https://www.bakerlab.org/wp-content/uploads/2021/03/Norn_etal_PNAS2021_LandscapeOptimization.pdf, Download PDF},
doi = {10.1073/pnas.2017228118},
year = {2021},
date = {2021-03-16},
urldate = {2021-03-16},
journal = {Proceedings of the National Academy of Sciences},
volume = {118},
number = {11},
abstract = {Almost all proteins fold to their lowest free energy state, which is determined by their amino acid sequence. Computational protein design has primarily focused on finding sequences that have very low energy in the target designed structure. However, what is most relevant during folding is not the absolute energy of the folded state but the energy difference between the folded state and the lowest-lying alternative states. We describe a deep learning approach that captures aspects of the folding landscape, in particular the presence of structures in alternative energy minima, and show that it can enhance current protein design methods.The protein design problem is to identify an amino acid sequence that folds to a desired structure. Given Anfinsen{textquoteright}s thermodynamic hypothesis of folding, this can be recast as finding an amino acid sequence for which the desired structure is the lowest energy state. As this calculation involves not only all possible amino acid sequences but also, all possible structures, most current approaches focus instead on the more tractable problem of finding the lowest-energy amino acid sequence for the desired structure, often checking by protein structure prediction in a second step that the desired structure is indeed the lowest-energy conformation for the designed sequence, and typically discarding a large fraction of designed sequences for which this is not the case. Here, we show that by backpropagating gradients through the transform-restrained Rosetta (trRosetta) structure prediction network from the desired structure to the input amino acid sequence, we can directly optimize over all possible amino acid sequences and all possible structures in a single calculation. We find that trRosetta calculations, which consider the full conformational landscape, can be more effective than Rosetta single-point energy estimations in predicting folding and stability of de novo designed proteins. We compare sequence design by conformational landscape optimization with the standard energy-based sequence design methodology in Rosetta and show that the former can result in energy landscapes with fewer alternative energy minima. We show further that more funneled energy landscapes can be designed by combining the strengths of the two approaches: the low-resolution trRosetta model serves to disfavor alternative states, and the high-resolution Rosetta model serves to create a deep energy minimum at the design target structure.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
COLLABORATOR LED
Du, Zongyang and Su, Hong and Wang, Wenkai and Ye, Lisha and Wei, Hong and Peng, Zhenling and Anishchenko, Ivan and Baker, David and Yang, Jianyi
The trRosetta server for fast and accurate protein structure prediction Journal Article
In: Nature Protocols, 2021.
@article{Du2021,
title = {The trRosetta server for fast and accurate protein structure prediction},
author = {Du, Zongyang
and Su, Hong
and Wang, Wenkai
and Ye, Lisha
and Wei, Hong
and Peng, Zhenling
and Anishchenko, Ivan
and Baker, David
and Yang, Jianyi},
url = {https://www.nature.com/articles/s41596-021-00628-9
https://www.bakerlab.org/wp-content/uploads/2022/01/Du_etal_NatProt2021_trRosetta_server.pdf},
doi = {10.1038/s41596-021-00628-9},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
journal = {Nature Protocols},
abstract = {The trRosetta (transform-restrained Rosetta) server is a web-based platform for fast and accurate protein structure prediction, powered by deep learning and Rosetta. With the input of a protein’s amino acid sequence, a deep neural network is first used to predict the inter-residue geometries, including distance and orientations. The predicted geometries are then transformed as restraints to guide the structure prediction on the basis of direct energy minimization, which is implemented under the framework of Rosetta. The trRosetta server distinguishes itself from other similar structure prediction servers in terms of rapid and accurate de novo structure prediction. As an illustration, trRosetta was applied to two Pfam families with unknown structures, for which the predicted de novo models were estimated to have high accuracy. Nevertheless, to take advantage of homology modeling, homologous templates are used as additional inputs to the network automatically. In general, it takes ~1 h to predict the final structure for a typical protein with ~300 amino acids, using a maximum of 10 CPU cores in parallel in our cluster system. To enable large-scale structure modeling, a downloadable package of trRosetta with open-source codes is available as well. A detailed guidance for using the package is also available in this protocol. The server and the package are available at https://yanglab.nankai.edu.cn/trRosetta/ and https://yanglab.nankai.edu.cn/trRosetta/download/, respectively.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Naozumi Hiranuma, Hahnbeom Park, Minkyung Baek, Ivan Anishchenko, Justas Dauparas, David Baker
Improved protein structure refinement guided by deep learning based accuracy estimation Journal Article
In: Nature Communications, vol. 12, no. 1340, 2021.
@article{Hiranuma2021,
title = {Improved protein structure refinement guided by deep learning based accuracy estimation},
author = {Naozumi Hiranuma and Hahnbeom Park and Minkyung Baek and Ivan Anishchenko and Justas Dauparas and David Baker
},
url = {https://www.nature.com/articles/s41467-021-21511-x, Nature Communications
https://www.bakerlab.org/wp-content/uploads/2021/02/Hiranuma_etal_NatureComms2021_DeepLearningStructureRefinement.pdf, Download PDF},
doi = {10.1038/s41467-021-21511-x},
year = {2021},
date = {2021-02-26},
urldate = {2021-02-26},
journal = {Nature Communications},
volume = {12},
number = {1340},
abstract = {We develop a deep learning framework (DeepAccNet) that estimates per-residue accuracy and residue-residue distance signed error in protein models and uses these predictions to guide Rosetta protein structure refinement. The network uses 3D convolutions to evaluate local atomic environments followed by 2D convolutions to provide their global contexts and outperforms other methods that similarly predict the accuracy of protein structure models. Overall accuracy predictions for X-ray and cryoEM structures in the PDB correlate with their resolution, and the network should be broadly useful for assessing the accuracy of both predicted structure models and experimentally determined structures and identifying specific regions likely to be in error. Incorporation of the accuracy predictions at multiple stages in the Rosetta refinement protocol considerably increased the accuracy of the resulting protein structure models, illustrating how deep learning can improve search for global energy minima of biomolecules.
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ziatdinov, Maxim and Zhang, Shuai and Dollar, Orion and Pfaendtner, Jim and Mundy, Christopher J. and Li, Xin and Pyles, Harley and Baker, David and De Yoreo, James J. and Kalinin, Sergei V.
Quantifying the Dynamics of Protein Self-Organization Using Deep Learning Analysis of Atomic Force Microscopy Data Journal Article
In: Nano Letters, 2021.
@article{Ziatdinov2021,
title = {Quantifying the Dynamics of Protein Self-Organization Using Deep Learning Analysis of Atomic Force Microscopy Data},
author = {Ziatdinov, Maxim
and Zhang, Shuai
and Dollar, Orion
and Pfaendtner, Jim
and Mundy, Christopher J.
and Li, Xin
and Pyles, Harley
and Baker, David
and De Yoreo, James J.
and Kalinin, Sergei V.},
url = {https://pubs.acs.org/doi/10.1021/acs.nanolett.0c03447},
doi = {10.1021/acs.nanolett.0c03447},
year = {2021},
date = {2021-01-13},
journal = {Nano Letters},
abstract = {The dynamics of protein self-assembly on the inorganic surface and the resultant geometric patterns are visualized using high-speed atomic force microscopy. The time dynamics of the classical macroscopic descriptors such as 2D fast Fourier transforms, correlation, and pair distribution functions are explored using the unsupervised linear unmixing, demonstrating the presence of static ordered and dynamic disordered phases and establishing their time dynamics. The deep learning (DL)-based workflow is developed to analyze detailed particle dynamics and explore the evolution of local geometries. Finally, we use a combination of DL feature extraction and mixture modeling to define particle neighborhoods free of physics constraints, allowing for a separation of possible classes of particle behavior and identification of the associated transitions. Overall, this work establishes the workflow for the analysis of the self-organization processes in complex systems from observational data and provides insight into the fundamental mechanisms.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2020
FROM THE LAB
Yang, Jianyi, Anishchenko, Ivan, Park, Hahnbeom, Peng, Zhenling, Ovchinnikov, Sergey, Baker, David
Improved protein structure prediction using predicted interresidue orientations Journal Article
In: Proceedings of the National Academy of Sciences, 2020, ISBN: 0027-8424.
@article{Yang2020,
title = {Improved protein structure prediction using predicted interresidue orientations},
author = {Yang, Jianyi and Anishchenko, Ivan and Park, Hahnbeom and Peng, Zhenling and Ovchinnikov, Sergey and Baker, David},
url = {https://www.pnas.org/content/early/2020/01/01/1914677117
https://www.bakerlab.org/wp-content/uploads/2020/01/Yang2020_ImprovedStructurePredictionInterresidueOrientations.pdf
},
doi = {10.1073/pnas.1914677117},
isbn = {0027-8424},
year = {2020},
date = {2020-01-02},
journal = {Proceedings of the National Academy of Sciences},
abstract = {Protein structure prediction is a longstanding challenge in computational biology. Through extension of deep learning-based prediction to interresidue orientations in addition to distances, and the development of a constrained optimization by Rosetta, we show that more accurate models can be generated. Results on a set of 18 de novo-designed proteins suggests the proposed method should be directly applicable to current challenges in de novo protein design.The prediction of interresidue contacts and distances from coevolutionary data using deep learning has considerably advanced protein structure prediction. Here, we build on these advances by developing a deep residual network for predicting interresidue orientations, in addition to distances, and a Rosetta-constrained energy-minimization protocol for rapidly and accurately generating structure models guided by these restraints. In benchmark tests on 13th Community-Wide Experiment on the Critical Assessment of Techniques for Protein Structure Prediction (CASP13)- and Continuous Automated Model Evaluation (CAMEO)-derived sets, the method outperforms all previously described structure-prediction methods. Although trained entirely on native proteins, the network consistently assigns higher probability to de novo-designed proteins, identifying the key fold-determining residues and providing an independent quantitative measure of the "ideality" of a protein structure. The method promises to be useful for a broad range of protein structure prediction and design problems.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
COLLABORATOR LED
Sorry, no publications matched your criteria.
2019
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Qi Wu, Zhenling Peng, Ivan Anishchenko, Qian Cong, David Baker, Jianyi Yang
Protein contact prediction using metagenome sequence data and residual neural networks Journal Article
In: Bioinformatics, vol. 36, no. 1, 2019.
@article{Wu2019,
title = {Protein contact prediction using metagenome sequence data and residual neural networks},
author = {Qi Wu and Zhenling Peng and Ivan Anishchenko and Qian Cong and David Baker and Jianyi Yang},
url = {https://academic.oup.com/bioinformatics/article/36/1/41/5512356},
doi = {10.1093/bioinformatics/btz477},
year = {2019},
date = {2019-06-07},
journal = {Bioinformatics},
volume = {36},
number = {1},
abstract = {Motivation: Almost all protein residue contact prediction methods rely on the availability of deep multiple sequence alignments (MSAs). However, many proteins from the poorly populated families do not have sufficient number of homologs in the conventional UniProt database. Here we aim to solve this issue by exploring the rich sequence data from the metagenome sequencing projects. Results: Based on the improved MSA constructed from the metagenome sequence data, we developed MapPred, a new deep learning-based contact prediction method. MapPred consists of two component methods, DeepMSA and DeepMeta, both trained with the residual neural networks. DeepMSA was inspired by the recent method DeepCov, which was trained on 441 matrices of covariance features. By considering the symmetry of contact map, we reduced the number of matrices to 231, which makes the training more efficient in DeepMSA. Experiments show that DeepMSA outperforms DeepCov by 10–13% in precision. DeepMeta works by combining predicted contacts and other sequence profile features. Experiments on three benchmark datasets suggest that the contribution from the metagenome sequence data is significant with P-values less than 4.04E-17. MapPred is shown to be complementary and comparable the state-of-the-art methods. The success of MapPred is attributed to three factors: the deeper MSA from the metagenome sequence data, improved feature design in DeepMSA and optimized training by the residual neural networks.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2018
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2017-1988
ALL PAPERS
Sorry, no publications matched your criteria.