Preprints
Available on bioRxiv.
Publications
Kshirsagar, Meghana; Meller, Artur; Humphreys, Ian R; Sledzieski, Samuel; Xu, Yixi; Dodhia, Rahul; Horvitz, Eric; Berger, Bonnie; Bowman, Gregory R; Ferres, Juan Lavista; Baker, David; Baek, Minkyung
Rapid and accurate prediction of protein homo-oligomer symmetry using Seq2Symm Journal Article
In: Nature Communications, 2025.
@article{pmid40016259,
title = {Rapid and accurate prediction of protein homo-oligomer symmetry using Seq2Symm},
author = {Meghana Kshirsagar and Artur Meller and Ian R Humphreys and Samuel Sledzieski and Yixi Xu and Rahul Dodhia and Eric Horvitz and Bonnie Berger and Gregory R Bowman and Juan Lavista Ferres and David Baker and Minkyung Baek},
url = {https://www.nature.com/articles/s41467-025-57148-3, Nature Communications
https://www.bakerlab.org/wp-content/uploads/2025/03/s41467-025-57148-3.pdf, PDF},
doi = {10.1038/s41467-025-57148-3},
year = {2025},
date = {2025-02-27},
urldate = {2025-02-27},
journal = {Nature Communications},
abstract = {The majority of proteins must form higher-order assemblies to perform their biological functions, yet few machine learning models can accurately and rapidly predict the symmetry of assemblies involving multiple copies of the same protein chain. Here, we address this gap by finetuning several classes of protein foundation models, to predict homo-oligomer symmetry. Our best model named Seq2Symm, which utilizes ESM2, outperforms existing template-based and deep learning methods achieving an average AUC-PR of 0.47, 0.44 and 0.49 across homo-oligomer symmetries on three held-out test sets compared to 0.24, 0.24 and 0.25 with template-based search. Seq2Symm uses a single sequence as input and can predict at the rate of ~80,000 proteins/hour. We apply this method to 5 proteomes and ~3.5 million unlabeled protein sequences, showing its promise to be used in conjunction with downstream computationally intensive all-atom structure generation methods such as RoseTTAFold2 and AlphaFold2-multimer. Code, datasets, model are available at: https://github.com/microsoft/seq2symm .},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Humphreys, Ian R.; Zhang, Jing; Baek, Minkyung; Wang, Yaxi; Krishnakumar, Aditya; Pei, Jimin; Anishchenko, Ivan; Tower, Catherine A.; Jackson, Blake A.; Warrier, Thulasi; Hung, Deborah T.; Peterson, S. Brook; Mougous, Joseph D.; Cong, Qian; Baker, David
Protein interactions in human pathogens revealed through deep learning Journal Article
In: Nature Microbiology, 2024, ISSN: 2058-5276.
@article{Humphreys2024,
title = {Protein interactions in human pathogens revealed through deep learning},
author = {Ian R. Humphreys and Jing Zhang and Minkyung Baek and Yaxi Wang and Aditya Krishnakumar and Jimin Pei and Ivan Anishchenko and Catherine A. Tower and Blake A. Jackson and Thulasi Warrier and Deborah T. Hung and S. Brook Peterson and Joseph D. Mougous and Qian Cong and David Baker},
url = {https://www.nature.com/articles/s41564-024-01791-x, Nature Microbiology [Open Access]},
doi = {10.1038/s41564-024-01791-x},
issn = {2058-5276},
year = {2024},
date = {2024-09-18},
urldate = {2024-09-18},
journal = {Nature Microbiology},
publisher = {Springer Science and Business Media LLC},
abstract = {Identification of bacterial protein–protein interactions and predicting the structures of these complexes could aid in the understanding of pathogenicity mechanisms and developing treatments for infectious diseases. Here we developed RoseTTAFold2-Lite, a rapid deep learning model that leverages residue–residue coevolution and protein structure prediction to systematically identify and structurally characterize protein–protein interactions at the proteome-wide scale. Using this pipeline, we searched through 78 million pairs of proteins across 19 human bacterial pathogens and identified 1,923 confidently predicted complexes involving essential genes and 256 involving virulence factors. Many of these complexes were not previously known; we experimentally tested 12 such predictions, and half of them were validated. The predicted interactions span core metabolic and virulence pathways ranging from post-transcriptional modification to acid neutralization to outer-membrane machinery and should contribute to our understanding of the biology of these important pathogens and the design of drugs to combat them.
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Pillai, Arvind; Idris, Abbas; Philomin, Annika; Weidle, Connor; Skotheim, Rebecca; Leung, Philip J. Y.; Broerman, Adam; Demakis, Cullen; Borst, Andrew J.; Praetorius, Florian; Baker, David
De novo design of allosterically switchable protein assemblies Journal Article
In: Nature, 2024.
@article{Pillai2024,
title = {De novo design of allosterically switchable protein assemblies},
author = {Arvind Pillai and Abbas Idris and Annika Philomin and Connor Weidle and Rebecca Skotheim and Philip J. Y. Leung and Adam Broerman and Cullen Demakis and Andrew J. Borst and Florian Praetorius and David Baker},
url = {https://www.nature.com/articles/s41586-024-07813-2, Nature [Open Access]},
doi = {10.1038/s41586-024-07813-2},
year = {2024},
date = {2024-08-14},
urldate = {2024-08-14},
journal = {Nature},
publisher = {Springer Science and Business Media LLC},
abstract = {Allosteric modulation of protein function, wherein the binding of an effector to a protein triggers conformational changes at distant functional sites, plays a central part in the control of metabolism and cell signalling. There has been considerable interest in designing allosteric systems, both to gain insight into the mechanisms underlying such ‘action at a distance’ modulation and to create synthetic proteins whose functions can be regulated by effectors. However, emulating the subtle conformational changes distributed across many residues, characteristic of natural allosteric proteins, is a significant challenge. Here, inspired by the classic Monod–Wyman–Changeux model of cooperativity, we investigate the de novo design of allostery through rigid-body coupling of peptide-switchable hinge modules to protein interfaces that direct the formation of alternative oligomeric states. We find that this approach can be used to generate a wide variety of allosterically switchable systems, including cyclic rings that incorporate or eject subunits in response to peptide binding and dihedral cages that undergo effector-induced disassembly. Size-exclusion chromatography, mass photometry and electron microscopy reveal that these designed allosteric protein assemblies closely resemble the design models in both the presence and absence of peptide effectors and can have ligand-binding cooperativity comparable to classic natural systems such as haemoglobin. Our results indicate that allostery can arise from global coupling of the energetics of protein substructures without optimized side-chain–side-chain allosteric communication pathways and provide a roadmap for generating allosterically triggerable delivery systems, protein nanomachines and cellular feedback control circuitry.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Wait, Sarah J.; Expòsit, Marc; Lin, Sophia; Rappleye, Michael; Lee, Justin Daho; Colby, Samuel A.; Torp, Lily; Asencio, Anthony; Smith, Annette; Regnier, Michael; Moussavi-Harami, Farid; Baker, David; Kim, Christina K.; Berndt, Andre
Machine learning-guided engineering of genetically encoded fluorescent calcium indicators Journal Article
In: Nature Computational Science, 2024.
@article{Wait2024,
title = {Machine learning-guided engineering of genetically encoded fluorescent calcium indicators},
author = {Sarah J. Wait and Marc Expòsit and Sophia Lin and Michael Rappleye and Justin Daho Lee and Samuel A. Colby and Lily Torp and Anthony Asencio and Annette Smith and Michael Regnier and Farid Moussavi-Harami and David Baker and Christina K. Kim and Andre Berndt},
url = {https://www.nature.com/articles/s43588-024-00611-w, Nat Comp Sci
https://www.bakerlab.org/wp-content/uploads/2024/03/s43588-024-00611-w.pdf, PDF},
doi = {10.1038/s43588-024-00611-w},
year = {2024},
date = {2024-03-21},
urldate = {2024-03-00},
journal = {Nature Computational Science},
publisher = {Springer Science and Business Media LLC},
abstract = {Here we used machine learning to engineer genetically encoded fluorescent indicators, protein-based sensors critical for real-time monitoring of biological activity. We used machine learning to predict the outcomes of sensor mutagenesis by analyzing established libraries that link sensor sequences to functions. Using the GCaMP calcium indicator as a scaffold, we developed an ensemble of three regression models trained on experimentally derived GCaMP mutation libraries. The trained ensemble performed an in silico functional screen on 1,423 novel, uncharacterized GCaMP variants. As a result, we identified the ensemble-derived GCaMP (eGCaMP) variants, eGCaMP and eGCaMP+, which achieve both faster kinetics and larger ∆F/F0 responses upon stimulation than previously published fast variants. Furthermore, we identified a combinatorial mutation with extraordinary dynamic range, eGCaMP2+, which outperforms the tested sixth-, seventh- and eighth-generation GCaMPs. These findings demonstrate the value of machine learning as a tool to facilitate the efficient engineering of proteins for desired biophysical characteristics.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Baek, Minkyung; McHugh, Ryan; Anishchenko, Ivan; Jiang, Hanlun; Baker, David; DiMaio, Frank
Accurate prediction of protein–nucleic acid complexes using RoseTTAFoldNA Journal Article
In: Nature Methods, 2023.
@article{Baek2023,
title = {Accurate prediction of protein–nucleic acid complexes using RoseTTAFoldNA},
author = {Minkyung Baek and Ryan McHugh and Ivan Anishchenko and Hanlun Jiang and David Baker and Frank DiMaio},
url = {https://www.nature.com/articles/s41592-023-02086-5, Nature Methods [Open Access]},
doi = {10.1038/s41592-023-02086-5},
year = {2023},
date = {2023-11-23},
urldate = {2023-11-23},
journal = {Nature Methods},
publisher = {Springer Science and Business Media LLC},
abstract = {Protein–RNA and protein–DNA complexes play critical roles in biology. Despite considerable recent advances in protein structure prediction, the prediction of the structures of protein–nucleic acid complexes without homology to known complexes is a largely unsolved problem. Here we extend the RoseTTAFold machine learning protein-structure-prediction approach to additionally predict nucleic acid and protein–nucleic acid complexes. We develop a single trained network, RoseTTAFoldNA, that rapidly produces three-dimensional structure models with confidence estimates for protein–DNA and protein–RNA complexes. Here we show that confident predictions have considerably higher accuracy than current state-of-the-art methods. RoseTTAFoldNA should be broadly useful for modeling the structure of naturally occurring protein–nucleic acid complexes, and for designing sequence-specific RNA and DNA-binding proteins.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Wang, Jing Yang (John); Khmelinskaia, Alena; Sheffler, William; Miranda, Marcos C.; Antanasijevic, Aleksandar; Borst, Andrew J.; Torres, Susana V.; Shu, Chelsea; Hsia, Yang; Nattermann, Una; Ellis, Daniel; Walkey, Carl; Ahlrichs, Maggie; Chan, Sidney; Kang, Alex; Nguyen, Hannah; Sydeman, Claire; Sankaran, Banumathi; Wu, Mengyu; Bera, Asim K.; Carter, Lauren; Fiala, Brooke; Murphy, Michael; Baker, David; Ward, Andrew B.; King, Neil P.
Improving the secretion of designed protein assemblies through negative design of cryptic transmembrane domains Journal Article
In: Proceedings of the National Academy of Sciences, 2023.
@article{Wang2023,
title = {Improving the secretion of designed protein assemblies through negative design of cryptic transmembrane domains},
author = {Wang, Jing Yang (John)
and Khmelinskaia, Alena
and Sheffler, William
and Miranda, Marcos C.
and Antanasijevic, Aleksandar
and Borst, Andrew J.
and Torres, Susana V.
and Shu, Chelsea
and Hsia, Yang
and Nattermann, Una
and Ellis, Daniel
and Walkey, Carl
and Ahlrichs, Maggie
and Chan, Sidney
and Kang, Alex
and Nguyen, Hannah
and Sydeman, Claire
and Sankaran, Banumathi
and Wu, Mengyu
and Bera, Asim K.
and Carter, Lauren
and Fiala, Brooke
and Murphy, Michael
and Baker, David
and Ward, Andrew B.
and King, Neil P.},
url = {https://www.pnas.org/doi/10.1073/pnas.2214556120, PNAS (Open Access)},
doi = {10.1073/pnas.2214556120},
year = {2023},
date = {2023-03-08},
urldate = {2023-03-08},
journal = {Proceedings of the National Academy of Sciences},
abstract = {Computationally designed protein nanoparticles have recently emerged as a promising platform for the development of new vaccines and biologics. For many applications, secretion of designed nanoparticles from eukaryotic cells would be advantageous, but in practice, they often secrete poorly. Here we show that designed hydrophobic interfaces that drive nanoparticle assembly are often predicted to form cryptic transmembrane domains, suggesting that interaction with the membrane insertion machinery could limit efficient secretion. We develop a general computational protocol, the Degreaser, to design away cryptic transmembrane domains without sacrificing protein stability. The retroactive application of the Degreaser to previously designed nanoparticle components and nanoparticles considerably improves secretion, and modular integration of the Degreaser into design pipelines results in new nanoparticles that secrete as robustly as naturally occurring protein assemblies. Both the Degreaser protocol and the nanoparticles we describe may be broadly useful in biotechnological applications.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Wicky, B. I. M.; Milles, L. F.; Courbet, A.; Ragotte, R. J.; Dauparas, J.; Kinfu, E.; Tipps, S.; Kibler, R. D.; Baek, M.; DiMaio, F.; Li, X.; Carter, L.; Kang, A.; Nguyen, H.; Bera, A. K.; Baker, D.
Hallucinating symmetric protein assemblies Journal Article
In: Science, 2022.
@article{Wicky2022,
title = {Hallucinating symmetric protein assemblies},
author = {B. I. M. Wicky and L. F. Milles and A. Courbet and R. J. Ragotte and J. Dauparas and E. Kinfu and S. Tipps and R. D. Kibler and M. Baek and F. DiMaio and X. Li and L. Carter and A. Kang and H. Nguyen and A. K. Bera and D. Baker},
url = {https://www.science.org/doi/abs/10.1126/science.add1964, Science
https://www.bakerlab.org/wp-content/uploads/2022/09/Wicky_etal_Science2022_Hallucinating_symmetric_protein_assemblies.pdf, PDF
},
doi = {10.1126/science.add1964},
year = {2022},
date = {2022-09-15},
journal = {Science},
abstract = {Deep learning generative approaches provide an opportunity to broadly explore protein structure space beyond the sequences and structures of natural proteins. Here we use deep network hallucination to generate a wide range of symmetric protein homo-oligomers given only a specification of the number of protomers and the protomer length. Crystal structures of 7 designs are very close to the computational models (median RMSD: 0.6 Å), as are 3 cryoEM structures of giant 10 nanometer rings with up to 1550 residues and C33 symmetry; all differ considerably from previously solved structures. Our results highlight the rich diversity of new protein structures that can be generated using deep learning, and pave the way for the design of increasingly complex components for nanomachines and biomaterials.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Courbet, A.; Hansen, J.; Hsia, Y.; Bethel, N.; Park, Y. -J.; Xu, C.; Moyer, A.; Boyken, S. E.; Ueda, G.; Nattermann, U.; Nagarajan, D.; Silva, D.; Sheffler, W.; Quispe, J.; Nord, A.; King, N.; Bradley, P.; Veesler, D.; Kollman, J.; Baker, D.
Computational design of mechanically coupled axle-rotor protein assemblies Journal Article
In: Science, 2022.
@article{Courbet2022,
title = {Computational design of mechanically coupled axle-rotor protein assemblies},
author = {A. Courbet and J. Hansen and Y. Hsia and N. Bethel and Y.-J. Park and C. Xu and A. Moyer and S. E. Boyken and G. Ueda and U. Nattermann and D. Nagarajan and D. Silva and W. Sheffler and J. Quispe and A. Nord and N. King and P. Bradley and D. Veesler and J. Kollman and D. Baker},
url = {https://www.science.org/doi/abs/10.1126/science.abm1183, Science
https://www.bakerlab.org/wp-content/uploads/2022/04/science.abm1183.pdf, Download PDF},
year = {2022},
date = {2022-04-21},
urldate = {2022-04-21},
journal = {Science},
abstract = {Natural molecular machines contain protein components that undergo motion relative to each other. Designing such mechanically constrained nanoscale protein architectures with internal degrees of freedom is an outstanding challenge for computational protein design. Here we explore the de novo construction of protein machinery from designed axle and rotor components with internal cyclic or dihedral symmetry. We find that the axle-rotor systems assemble in vitro and in vivo as designed. Using cryo–electron microscopy, we find that these systems populate conformationally variable relative orientations reflecting the symmetry of the coupled components and the computationally designed interface energy landscape. These mechanical systems with internal degrees of freedom are a step toward the design of genetically encodable nanomachines. Protein rotary machines such as ATP synthase contain axle-like and ring-like components and couple biochemical energy to the mechanical work of rotating the components relative to each other. Courbet et al. have taken a step toward designing such axel-rotor nanomachines. A structural requirement is that interactions between the components must be strong enough to allow assembly but still allow different rotational states to be populated. The authors met this design challenge and computationally designed ring-like protein topologies (rotors) with a range of inner diameters that accommodate designed axle-like binding partners. The systems assemble and populate the different rotational states anticipated by the designs. These rotational energy landscapes provide one of two needed elements for a directional motor. —VV Computationally designed self-assembling axle-rotor protein systems populate multiple rotational states.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Linder, Johannes; La Fleur, Alyssa; Chen, Zibo; Ljubetič, Ajasja; Baker, David; Kannan, Sreeram; Seelig, Georg
Interpreting neural networks for biological sequences by learning stochastic masks Journal Article
In: Nature Machine Intelligence, 2022.
@article{Linder2022,
title = {Interpreting neural networks for biological sequences by learning stochastic masks},
author = {Linder, Johannes and La Fleur, Alyssa and Chen, Zibo and Ljubetič, Ajasja and Baker, David and Kannan, Sreeram and Seelig, Georg},
url = {https://www.nature.com/articles/s42256-021-00428-6, Nature Machine Intelligence},
doi = {10.1038/s42256-021-00428-6},
year = {2022},
date = {2022-01-25},
urldate = {2022-01-25},
journal = {Nature Machine Intelligence},
abstract = {Sequence-based neural networks can learn to make accurate predictions from large biological datasets, but model interpretation remains challenging. Many existing feature attribution methods are optimized for continuous rather than discrete input patterns and assess individual feature importance in isolation, making them ill-suited for interpreting nonlinear interactions in molecular sequences. Here, building on work in computer vision and natural language processing, we developed an approach based on deep learning—scrambler networks—wherein the most important sequence positions are identified with learned input masks. Scramblers learn to predict position-specific scoring matrices where unimportant nucleotides or residues are scrambled by raising their entropy. We apply scramblers to interpret the effects of genetic variants, uncover nonlinear interactions between cis-regulatory elements, explain binding specificity for protein–protein interactions, and identify structural determinants of de novo-designed proteins. We show that scramblers enable efficient attribution across large datasets and result in high-quality explanations, often outperforming state-of-the-art methods.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Yaman, Muammer Y; Guye, Kathryn N; Ziatdinov, Maxim; Shen, Hao; Baker, David; Kalinin, Sergei V; Ginger, David S
Alignment of Au nanorods along de novo designed protein nanofibers studied with automated image analysis Journal Article
In: Soft Matter, 2021.
@article{Yaman2021,
title = {Alignment of Au nanorods along de novo designed protein nanofibers studied with automated image analysis},
author = {Muammer Y Yaman and Kathryn N Guye and Maxim Ziatdinov and Hao Shen and David Baker and Sergei V Kalinin and David S Ginger
},
url = {https://pubmed.ncbi.nlm.nih.gov/34128040/
https://www.bakerlab.org/wp-content/uploads/2021/06/Muammer_etal_SoftMatter2021_Aisngment_along_nanofibers.pdf},
doi = {10.1039/d1sm00645b},
year = {2021},
date = {2021-06-15},
journal = {Soft Matter},
abstract = {In this study, we focus on exploring the directional assembly of anisotropic Au nanorods along de novo designed 1D protein nanofiber templates. Using machine learning and automated image processing, we analyze scanning electron microscopy (SEM) images to study how the attachment density and alignment fidelity are influenced by variables such as the aspect ratio of the Au nanorods, and the salt concentration of the solution. We find that the Au nanorods prefer to align parallel to the protein nanofibers. This preference decreases with increasing salt concentration, but is only weakly sensitive to the nanorod aspect ratio. While the overall specific Au nanorod attachment density to the protein fibers increases with increasing solution ionic strength, this increase is dominated primarily by non-specific binding to the substrate background, and we find that greater specific attachment (nanorods attached to the nanofiber template as compared to the substrates) occurs at the lower studied salt concentrations, with the maximum ratio of specific to non-specific binding occurring when the protein fiber solutions are prepared in 75 mM NaCl concentration.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Vorobieva, Anastassia A.; White, Paul; Liang, Binyong; Horne, Jim E.; Bera, Asim K.; Chow, Cameron M.; Gerben, Stacey; Marx, Sinduja; Kang, Alex; Stiving, Alyssa Q.; Harvey, Sophie R.; Marx, Dagan C.; Khan, G. Nasir; Fleming, Karen G.; Wysocki, Vicki H.; Brockwell, David J.; Tamm, Lukas K.; Radford, Sheena E.; Baker, David
De novo design of transmembrane beta barrels Journal Article
In: Science, vol. 371, no. 6531, 2021.
@article{Vorobieva2021,
title = {De novo design of transmembrane beta barrels},
author = {Vorobieva, Anastassia A. and White, Paul and Liang, Binyong and Horne, Jim E. and Bera, Asim K. and Chow, Cameron M. and Gerben, Stacey and Marx, Sinduja and Kang, Alex and Stiving, Alyssa Q. and Harvey, Sophie R. and Marx, Dagan C. and Khan, G. Nasir and Fleming, Karen G. and Wysocki, Vicki H. and Brockwell, David J. and Tamm, Lukas K. and Radford, Sheena E. and Baker, David},
url = {https://science.sciencemag.org/content/371/6531/eabc8182, Science
https://www.bakerlab.org/wp-content/uploads/2021/02/Vorobieva_etal_Science2021_De_Novo_Transmembrane_beta_barrels.pdf, Download PDF},
doi = {10.1126/science.abc8182},
year = {2021},
date = {2021-02-19},
urldate = {2021-02-19},
journal = {Science},
volume = {371},
number = {6531},
abstract = {Computational design offers the possibility of making proteins with customized structures and functions. The range of accessible protein scaffolds has expanded with the design of increasingly complex cytoplasmic proteins and, recently, helical membrane proteins. Vorobieva et al. describe the successful computational design of eight-stranded transmembrane β-barrel proteins (TMBs). Using an iterative approach, they show the importance of negative design to prevent off-target structures and gain insight into the sequence determinants of TMB folding. Twenty-three designs satisfied biochemical screens for a TMB structure, and two structures were experimentally validated by nuclear magnetic resonance spectroscopy or x-ray crystallography. This is a step toward the custom design of pores for applications such as single-molecule sequencing.Science, this issue p. eabc8182INTRODUCTIONDespite their key biological roles, only a few proteins that fold into lipid membranes have been designed de novo. A class of membrane proteins{textemdash}transmembrane β barrels (TMBs){textemdash}forms a continuous sheet that closes on itself in lipid membranes. In addition to the challenge of designing β-sheet proteins, which are prone to misfolding and aggregation if folding is not properly controlled, the computational design of TMBs is complicated by limited understanding of TMB folding. As a result, no TMB has been designed de novo to date.Although the folding of TMBs in vivo is catalyzed by the β-barrel assembly machinery (BAM), many TMBs can also fold spontaneously in synthetic membranes to form stable pores, making them attractive for biotechnology and single-molecule analytical applications. Hence, de novo design of TMBs has potential both for understanding the determinants of TMB folding and membrane insertion and for the custom engineering of TMB nanopores.RATIONALEWe used de novo protein design to distill key principles of TMB folding through several design-build-test cycles. We iterated between hypothesis formulation, its implementation into computational design methods, and experimental characterization of the resulting proteins. To focus on the fundamental principles of TMB folding in the absence of complications due to interactions with chaperones and BAM in vivo, we focused on the challenge of de novo design of eight-stranded TMBs, which can fold and assemble into synthetic lipid membranes.RESULTSWe used a combination of purely geometric models and explicit Rosetta protein structure simulations to determine the constraints that β-strand connectivity and membrane embedding place on the TMB architecture. Through a series of design-build-test cycles, we found that, unlike almost all other classes of proteins, locally destabilizing sequences are critical for expression and folding of TMBs, and that the β-turns that translocate through the bilayer during folding have to be destabilized to enable correct assembly in the membrane. Our results suggest that premature formation of β hairpins may result in off-target β-sheet structures that compete with proper membrane insertion and folding, and hence the β hairpins of TMBs must be designed such that they are only transiently formed prior to membrane insertion, when the protein is in an aqueous environment. In the hydrophobic environment of the lipid bilayer, the full TMB can assemble because the membrane-facing nonpolar residues, which would tend to cluster nonspecifically in an aqueous environment, instead make favorable interactions with the lipids. As the TMB assembles, the β hairpins are stabilized by interactions with the neighboring β strands.Using computational methods that incorporate the above insights, we designed TMB sequences that successfully fold and assemble into both detergent micelles and lipid bilayers. Two of the designs were highly stable and could fold into liposomes more rapidly and reversibly than the transmembrane domain of the model outer membrane protein A (tOmpA) of Escherichia coli. A nuclear magnetic resonance solution structure and a high-resolution crystal structure for two different designs closely match the design models, showing that the TMB design method developed here can generate new structures with atomic-level accuracy.CONCLUSIONThis study elucidates key principles for de novo design of transmembrane β barrels, ranging from constraints on β-barrel architecture and β-hairpin design, as well as local and global sequence features. Our designs provide starting points for the bottom-up elucidation of the molecular mechanisms underlying TMB folding and interactions with the cellular outer membrane folding and insertion machinery. More generally, our work demonstrates that TMBs can be designed with atomic-level accuracy and opens the door to custom design of nanopores tailored for applications such as single-molecule sensing and sequencing.De novo{textendash}designed eight-stranded transmembrane β barrels fold spontaneously and reversibly into synthetic lipid membranes.The illustration shows the crystal structure of the protein TMB2.17 designed in this study, which adopts a structure identical to the design model.Credit: Ian Haydon.Transmembrane β-barrel proteins (TMBs) are of great interest for single-molecule analytical technologies because they can spontaneously fold and insert into membranes and form stable pores, but the range of pore properties that can be achieved by repurposing natural TMBs is limited. We leverage the power of de novo computational design coupled with a {textquotedblleft}hypothesis, design, and test{textquotedblright} approach to determine TMB design principles, notably, the importance of negative design to slow β-sheet assembly. We design new eight-stranded TMBs, with no homology to known TMBs, that insert and fold reversibly into synthetic lipid membranes and have nuclear magnetic resonance and x-ray crystal structures very similar to the computational models. These advances should enable the custom design of pores for a wide range of applications.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Chen, Zibo; Kibler, Ryan D.; Hunt, Andrew; Busch, Florian; Pearl, Jocelynn; Jia, Mengxuan; VanAernum, Zachary L.; Wicky, Basile I. M.; Dods, Galen; Liao, Hanna; Wilken, Matthew S.; Ciarlo, Christie; Green, Shon; El-Samad, Hana; Stamatoyannopoulos, John; Wysocki, Vicki H.; Jewett, Michael C.; Boyken, Scott E.; Baker, David
De novo design of protein logic gates Journal Article
In: Science, vol. 368, no. 6486, pp. 78-84, 2020.
@article{Chen2020,
title = {De novo design of protein logic gates},
author = {Chen, Zibo and Kibler, Ryan D. and Hunt, Andrew and Busch, Florian and Pearl, Jocelynn and Jia, Mengxuan and VanAernum, Zachary L. and Wicky, Basile I. M. and Dods, Galen and Liao, Hanna and Wilken, Matthew S. and Ciarlo, Christie and Green, Shon and El-Samad, Hana and Stamatoyannopoulos, John and Wysocki, Vicki H. and Jewett, Michael C. and Boyken, Scott E. and Baker, David},
url = {https://science.sciencemag.org/content/368/6486/78
https://www.bakerlab.org/wp-content/uploads/2020/04/Chen2020_DeNovoProteinLogicGates.pdf},
doi = {10.1126/science.aay2790},
year = {2020},
date = {2020-03-04},
journal = {Science},
volume = {368},
number = {6486},
pages = {78-84},
abstract = {The design of modular protein logic for regulating protein function at the posttranscriptional level is a challenge for synthetic biology. Here, we describe the design of two-input AND, OR, NAND, NOR, XNOR, and NOT gates built from de novo–designed proteins. These gates regulate the association of arbitrary protein units ranging from split enzymes to transcriptional machinery in vitro, in yeast and in primary human T cells, where they control the expression of the TIM3 gene related to T cell exhaustion. Designed binding interaction cooperativity, confirmed by native mass spectrometry, makes the gates largely insensitive to stoichiometric imbalances in the inputs, and the modularity of the approach enables ready extension to three-input OR, AND, and disjunctive normal form gates. The modularity and cooperativity of the control elements, coupled with the ability to de novo design an essentially unlimited number of protein components, should enable the design of sophisticated posttranslational control logic over a wide range of biological functions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Bale, Jacob B.; Gonen, Shane; Liu, Yuxi; Sheffler, William; Ellis, Daniel; Thomas, Chantz; Cascio, Duilio; Yeates, Todd O.; Gonen, Tamir; King, Neil P.; Baker, David
Accurate design of megadalton-scale two-component icosahedral protein complexes Journal Article
In: Science, vol. 353, no. 6297, pp. 389-394, 2016.
@article{Bale2016,
title = {Accurate design of megadalton-scale two-component icosahedral protein complexes},
author = {Jacob B. Bale and Shane Gonen and Yuxi Liu and William Sheffler and Daniel Ellis and Chantz Thomas and Duilio Cascio and Todd O. Yeates and Tamir Gonen and Neil P. King and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/Bale_Science_2016.pdf},
doi = {10.1126/science.aaf8818},
year = {2016},
date = {2016-07-22},
journal = {Science},
volume = {353},
number = {6297},
pages = {389-394},
abstract = {Nature provides many examples of self- and co-assembling protein-based molecular machines, including icosahedral protein cages that serve as scaffolds, enzymes, and compartments for essential biochemical reactions and icosahedral virus capsids, which encapsidate and protect viral genomes and mediate entry into host cells. Inspired by these natural materials, we report the computational design and experimental characterization of co-assembling, two-component, 120-subunit icosahedral protein nanostructures with molecular weights (1.8 to 2.8 megadaltons) and dimensions (24 to 40 nanometers in diameter) comparable to those of small viral capsids. Electron microscopy, small-angle x-ray scattering, and x-ray crystallography show that 10 designs spanning three distinct icosahedral architectures form materials closely matching the design models. In vitro assembly of icosahedral complexes from independently purified components occurs rapidly, at rates comparable to those of viral capsids, and enables controlled packaging of molecular cargo through charge complementarity. The ability to design megadalton-scale materials with atomic-level accuracy and controllable assembly opens the door to a new generation of genetically programmable protein-based molecular machines.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
King, Neil P.; Bale, Jacob B; Sheffler, William; McNamara, Dan E; Gonen, Shane; Gonen, Tamir; Yeates, Todd O.; Baker, David
Accurate design of co-assembling multi-component protein nanomaterials. Journal Article
In: Nature, 2014, ISSN: 1476-4687.
@article{534,
title = {Accurate design of co-assembling multi-component protein nanomaterials.},
author = { Neil P. King and Jacob B Bale and William Sheffler and Dan E McNamara and Shane Gonen and Tamir Gonen and Todd O. Yeates and David Baker},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/King_Nature2014A.pdf},
doi = {10.1038/nature13404},
issn = {1476-4687},
year = {2014},
date = {2014-05-01},
journal = {Nature},
abstract = {The self-assembly of proteins into highly ordered nanoscale architectures is a hallmark of biological systems. The sophisticated functions of these molecular machines have inspired the development of methods to engineer self-assembling protein nanostructures; however, the design of multi-component protein nanomaterials with high accuracy remains an outstanding challenge. Here we report a computational method for designing protein nanomaterials in which multiple copies of two distinct subunits co-assemble into a specific architecture. We use the method to design five 24-subunit cage-like protein nanomaterials in two distinct symmetric architectures and experimentally demonstrate that their structures are in close agreement with the computational design models. The accuracy of the method and the number and variety of two-component materials that it makes accessible suggest a route to the construction of functional protein nanomaterials tailored to specific applications.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Demers, Jean-Philippe; Sgourakis, Nikolaos G; Gupta, Rashmi; Loquet, Antoine; Giller, Karin; Riedel, Dietmar; Laube, Britta; Kolbe, Michael; Baker, David; Becker, Stefan; Lange, Adam
The common structural architecture of Shigella flexneri and Salmonella typhimurium type three secretion needles Journal Article
In: PLoS pathogens, vol. 9, pp. e1003245, 2013, ISSN: 1553-7374.
@article{471,
title = {The common structural architecture of Shigella flexneri and Salmonella typhimurium type three secretion needles},
author = { Jean-Philippe Demers and Nikolaos G Sgourakis and Rashmi Gupta and Antoine Loquet and Karin Giller and Dietmar Riedel and Britta Laube and Michael Kolbe and David Baker and Stefan Becker and Adam Lange},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Demers_PLosPathogen_13P.pdf},
doi = {10.1371/journal.ppat.1003245},
issn = {1553-7374},
year = {2013},
date = {2013-03-01},
journal = {PLoS pathogens},
volume = {9},
pages = {e1003245},
abstract = {The Type Three Secretion System (T3SS), or injectisome, is a macromolecular infection machinery present in many pathogenic Gram-negative bacteria. It consists of a basal body, anchored in both bacterial membranes, and a hollow needle through which effector proteins are delivered into the target host cell. Two different architectures of the T3SS needle have been previously proposed. First, an atomic model of the Salmonella typhimurium needle was generated from solid-state NMR data. The needle subunit protein, PrgI, comprises a rigid-extended N-terminal segment and a helix-loop-helix motif with the N-terminus located on the outside face of the needle. Second, a model of the Shigella flexneri needle was generated from a high-resolution 7.7-r A cryo-electron microscopy density map. The subunit protein, MxiH, contains an N-terminal α-helix, a loop, another α-helix, a 14-residue-long β-hairpin (Q51-Q64) and a C-terminal α-helix, with the N-terminus facing inward to the lumen of the needle. In the current study, we carried out solid-state NMR measurements of wild-type Shigella flexneri needles polymerized in vitro and identified the following secondary structure elements for MxiH: a rigid-extended N-terminal segment (S2-T11), an α-helix (L12-A38), a loop (E39-P44) and a C-terminal α-helix (Q45-R83). Using immunogold labeling in vitro and in vivo on functional needles, we located the N-terminus of MxiH subunits on the exterior of the assembly, consistent with evolutionary sequence conservation patterns and mutagenesis data. We generated a homology model of Shigella flexneri needles compatible with both experimental data: the MxiH solid-state NMR chemical shifts and the state-of-the-art cryoEM density map. These results corroborate the solid-state NMR structure previously solved for Salmonella typhimurium PrgI needles and establish that Shigella flexneri and Salmonella typhimurium subunit proteins adopt a conserved structure and orientation in their assembled state. Our study reveals a common structural architecture of T3SS needles, essential to understand T3SS-mediated infection and develop treatments.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Harger, Matthew; Zheng, Lei; Moon, Austin; Ager, Casey; An, Ju Hye; Choe, Chris; Lai, Yi-Ling; Mo, Benjamin; Zong, David; Smith, Matthew D; Egbert, Robert G; Mills, Jeremy H; Baker, David; Pultz, Ingrid Swanson; Siegel, Justin B
Expanding the product profile of a microbial alkane biosynthetic pathway. Journal Article
In: ACS synthetic biology, vol. 2, pp. 59-62, 2013, ISSN: 2161-5063.
@article{503,
title = {Expanding the product profile of a microbial alkane biosynthetic pathway.},
author = { Matthew Harger and Lei Zheng and Austin Moon and Casey Ager and Ju Hye An and Chris Choe and Yi-Ling Lai and Benjamin Mo and David Zong and Matthew D Smith and Robert G Egbert and Jeremy H Mills and David Baker and Ingrid Swanson Pultz and Justin B Siegel},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Harger_ACSSynthBiol_2013.pdf},
doi = {10.1021/sb300061x},
issn = {2161-5063},
year = {2013},
date = {2013-01-01},
journal = {ACS synthetic biology},
volume = {2},
pages = {59-62},
abstract = {Microbially produced alkanes are a new class of biofuels that closely match the chemical composition of petroleum-based fuels. Alkanes can be generated from the fatty acid biosynthetic pathway by the reduction of acyl-ACPs followed by decarbonylation of the resulting aldehydes. A current limitation of this pathway is the restricted product profile, which consists of n-alkanes of 13, 15, and 17 carbons in length. To expand the product profile, we incorporated a new part, FabH2 from Bacillus subtilis , an enzyme known to have a broader specificity profile for fatty acid initiation than the native FabH of Escherichia coli . When provided with the appropriate substrate, the addition of FabH2 resulted in an altered alkane product profile in which significant levels of n-alkanes of 14 and 16 carbons in length are produced. The production of even chain length alkanes represents initial steps toward the expansion of this recently discovered microbial alkane production pathway to synthesize complex fuels. This work was conceived and performed as part of the 2011 University of Washington international Genetically Engineered Machines (iGEM) project.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Warner, Lisa R; Varga, Krisztina; Lange, Oliver F; Baker, Susan L; Baker, David; Sousa, Marcelo C; Pardi, Arthur
Structure of the BamC two-domain protein obtained by Rosetta with a limited NMR data set Journal Article
In: Journal of Molecular Biology, vol. 411, pp. 83-95, 2011, ISSN: 1089-8638.
@article{586,
title = {Structure of the BamC two-domain protein obtained by Rosetta with a limited NMR data set},
author = { Lisa R Warner and Krisztina Varga and Oliver F Lange and Susan L Baker and David Baker and Marcelo C Sousa and Arthur Pardi},
url = {https://www.bakerlab.org/wp-content/uploads/2018/06/1-s2.0-S0022283611005729-main.pdf
https://www.sciencedirect.com/science/article/pii/S0022283611005729?via%3Dihub},
doi = {10.1016/j.jmb.2011.05.022},
issn = {1089-8638},
year = {2011},
date = {2011-08-01},
journal = {Journal of Molecular Biology},
volume = {411},
pages = {83-95},
abstract = {The CS-RDC-NOE Rosetta program was used to generate the solution structure of a 27-kDa fragment of the Escherichia coli BamC protein from a limited set of NMR data. The BamC protein is a component of the essential five-protein β-barrel assembly machine in E. coli. The first 100 residues in BamC were disordered in solution. The Rosetta calculations showed that BamC$_1$$_0$$_1$$_-$$_3$$_4$$_4$ forms two well-defined domains connected by an ~18-residue linker, where the relative orientation of the domains was not defined. Both domains adopt a helix-grip fold previously observed in the Bet v 1 superfamily. textonesuperior$^5$N relaxation data indicated a high degree of conformational flexibility for the linker connecting the N-terminal domain and the C-terminal domain in BamC. The results here show that CS-RDC-NOE Rosetta is robust and has a high tolerance for misassigned nuclear Overhauser effect restraints, greatly simplifying NMR structure determinations.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Zhang, Junjie; Ma, Boxue; DiMaio, Frank; Douglas, Nicholai R; Joachimiak, Lukasz A; Baker, David; Frydman, Judith; Levitt, Michael; Chiu, Wah
Cryo-EM structure of a group II chaperonin in the prehydrolysis ATP-bound state leading to lid closure Journal Article
In: Structure (London, England : 1993), vol. 19, pp. 633-9, 2011, ISSN: 1878-4186.
@article{596,
title = {Cryo-EM structure of a group II chaperonin in the prehydrolysis ATP-bound state leading to lid closure},
author = { Junjie Zhang and Boxue Ma and Frank DiMaio and Nicholai R Douglas and Lukasz A Joachimiak and David Baker and Judith Frydman and Michael Levitt and Wah Chiu},
doi = {10.1016/j.str.2011.03.005},
issn = {1878-4186},
year = {2011},
date = {2011-05-01},
journal = {Structure (London, England : 1993)},
volume = {19},
pages = {633-9},
abstract = {Chaperonins are large ATP-driven molecular machines that mediate cellular protein folding. Group II chaperonins use their "built-in lid" to close their central folding chamber. Here we report the structure of an archaeal group II chaperonin in its prehydrolysis ATP-bound state at subnanometer resolution using single particle cryo-electron microscopy (cryo-EM). Structural comparison of Mm-cpn in ATP-free, ATP-bound, and ATP-hydrolysis states reveals that ATP binding alone causes the chaperonin to close slightly with a ~45textdegree counterclockwise rotation of the apical domain. The subsequent ATP hydrolysis drives each subunit to rock toward the folding chamber and to close the lid completely. These motions are attributable to the local interactions of specific active site residues with the nucleotide, the tight couplings between the apical and intermediate domains within the subunit, and the aligned interactions between two subunits across the rings. This mechanism of structural changes in response to ATP is entirely different from those found in group I chaperonins.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sanowar, Sarah; Singh, Pragya; Pfuetzner, Richard A; Andr’e, Ingemar; Zheng, Hongjin; Spreter, Thomas; Strynadka, Natalie C J; Gonen, Tamir; Baker, David; Goodlett, David R; Miller, Samuel I
Interactions of the Transmembrane Polymeric Rings of the Salmonella enterica Serovar Typhimurium Type III Secretion System Journal Article
In: mBio, vol. 1, 2010, ISSN: 2150-7511.
@article{261,
title = {Interactions of the Transmembrane Polymeric Rings of the Salmonella enterica Serovar Typhimurium Type III Secretion System},
author = { Sarah Sanowar and Pragya Singh and Richard A Pfuetzner and Ingemar Andr'e and Hongjin Zheng and Thomas Spreter and Natalie C J Strynadka and Tamir Gonen and David Baker and David R Goodlett and Samuel I Miller},
issn = {2150-7511},
year = {2010},
date = {2010-00-01},
journal = {mBio},
volume = {1},
abstract = {The type III secretion system (T3SS) is an interspecies protein transport machine that plays a major role in interactions of Gram-negative bacteria with animals and plants by delivering bacterial effector proteins into host cells. T3SSs span both membranes of Gram-negative bacteria by forming a structure of connected oligomeric rings termed the needle complex (NC). Here, the localization of subunits in the Salmonella enterica serovar Typhimurium T3SS NC were probed via mass spectrometry-assisted identification of chemical cross-links in intact NC preparations. Cross-links between amino acids near the amino terminus of the outer membrane ring component InvG and the carboxyl terminus of the inner membrane ring component PrgH and between the two inner membrane components PrgH and PrgK allowed for spatial localization of the three ring components within the electron density map structures of NCs. Mutational and biochemical analysis demonstrated that the amino terminus of InvG and the carboxyl terminus of PrgH play a critical role in the assembly and function of the T3SS apparatus. Analysis of an InvG mutant indicates that the structure of the InvG oligomer can affect the switching of the T3SS substrate to translocon and effector components. This study provides insights into how structural organization of needle complex base components promotes T3SS assembly and function.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
S, Raman; B, Qian; D, Baker; RC, Walker
Advances in Rosetta Protein Structure Prediction on Massively Parallel Systems Journal Article
In: Journal of Research and Development, vol. 52(1-2):7-17, 2008.
@article{280,
title = {Advances in Rosetta Protein Structure Prediction on Massively Parallel Systems},
author = { Raman S and Qian B and Baker D and Walker RC},
year = {2008},
date = {2008-01-01},
journal = {Journal of Research and Development},
volume = {52(1-2):7-17},
abstract = {One of the key challenges in computational biology is prediction of three-dimensional protein structures from amino-acid sequences. For most proteins, the "native state" lies at the bottom of a free-energy landscape. Protein structure prediction involves varying the degrees of freedom of the protein in a constrained manner until it approaches its native state. In the Rosetta protein structure prediction protocols, a large number of independent folding trajectories are simulated, and several lowest-energy results are likely to be close to the native state. The availability of hundred-teraflop, and shortly, petaflop, computing resources is revolutionizing the approaches available for protein structure prediction. Here, we discuss issues involved in utilizing such machines efficiently with the Rosetta code, including an overview of recent results of the Critical Assessment of Techniques for Protein Structure Prediction 7 (CASP7) in which the computationally demanding structure-refinement process was run on 16 racks of the IBM Blue Gene/L (TM) system at the IBM T. J. Watson Research Center. We highlight recent advances in high-performance computing and discuss,future development paths that make use of the next-generation petascale (> 10(12) floating-point operations per second) machines.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2025
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Meghana Kshirsagar, Artur Meller, Ian R Humphreys, Samuel Sledzieski, Yixi Xu, Rahul Dodhia, Eric Horvitz, Bonnie Berger, Gregory R Bowman, Juan Lavista Ferres, David Baker, Minkyung Baek
Rapid and accurate prediction of protein homo-oligomer symmetry using Seq2Symm Journal Article
In: Nature Communications, 2025.
@article{pmid40016259,
title = {Rapid and accurate prediction of protein homo-oligomer symmetry using Seq2Symm},
author = {Meghana Kshirsagar and Artur Meller and Ian R Humphreys and Samuel Sledzieski and Yixi Xu and Rahul Dodhia and Eric Horvitz and Bonnie Berger and Gregory R Bowman and Juan Lavista Ferres and David Baker and Minkyung Baek},
url = {https://www.nature.com/articles/s41467-025-57148-3, Nature Communications
https://www.bakerlab.org/wp-content/uploads/2025/03/s41467-025-57148-3.pdf, PDF},
doi = {10.1038/s41467-025-57148-3},
year = {2025},
date = {2025-02-27},
urldate = {2025-02-27},
journal = {Nature Communications},
abstract = {The majority of proteins must form higher-order assemblies to perform their biological functions, yet few machine learning models can accurately and rapidly predict the symmetry of assemblies involving multiple copies of the same protein chain. Here, we address this gap by finetuning several classes of protein foundation models, to predict homo-oligomer symmetry. Our best model named Seq2Symm, which utilizes ESM2, outperforms existing template-based and deep learning methods achieving an average AUC-PR of 0.47, 0.44 and 0.49 across homo-oligomer symmetries on three held-out test sets compared to 0.24, 0.24 and 0.25 with template-based search. Seq2Symm uses a single sequence as input and can predict at the rate of ~80,000 proteins/hour. We apply this method to 5 proteomes and ~3.5 million unlabeled protein sequences, showing its promise to be used in conjunction with downstream computationally intensive all-atom structure generation methods such as RoseTTAFold2 and AlphaFold2-multimer. Code, datasets, model are available at: https://github.com/microsoft/seq2symm .},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2024
FROM THE LAB
Ian R. Humphreys, Jing Zhang, Minkyung Baek, Yaxi Wang, Aditya Krishnakumar, Jimin Pei, Ivan Anishchenko, Catherine A. Tower, Blake A. Jackson, Thulasi Warrier, Deborah T. Hung, S. Brook Peterson, Joseph D. Mougous, Qian Cong, David Baker
Protein interactions in human pathogens revealed through deep learning Journal Article
In: Nature Microbiology, 2024, ISSN: 2058-5276.
@article{Humphreys2024,
title = {Protein interactions in human pathogens revealed through deep learning},
author = {Ian R. Humphreys and Jing Zhang and Minkyung Baek and Yaxi Wang and Aditya Krishnakumar and Jimin Pei and Ivan Anishchenko and Catherine A. Tower and Blake A. Jackson and Thulasi Warrier and Deborah T. Hung and S. Brook Peterson and Joseph D. Mougous and Qian Cong and David Baker},
url = {https://www.nature.com/articles/s41564-024-01791-x, Nature Microbiology [Open Access]},
doi = {10.1038/s41564-024-01791-x},
issn = {2058-5276},
year = {2024},
date = {2024-09-18},
urldate = {2024-09-18},
journal = {Nature Microbiology},
publisher = {Springer Science and Business Media LLC},
abstract = {Identification of bacterial protein–protein interactions and predicting the structures of these complexes could aid in the understanding of pathogenicity mechanisms and developing treatments for infectious diseases. Here we developed RoseTTAFold2-Lite, a rapid deep learning model that leverages residue–residue coevolution and protein structure prediction to systematically identify and structurally characterize protein–protein interactions at the proteome-wide scale. Using this pipeline, we searched through 78 million pairs of proteins across 19 human bacterial pathogens and identified 1,923 confidently predicted complexes involving essential genes and 256 involving virulence factors. Many of these complexes were not previously known; we experimentally tested 12 such predictions, and half of them were validated. The predicted interactions span core metabolic and virulence pathways ranging from post-transcriptional modification to acid neutralization to outer-membrane machinery and should contribute to our understanding of the biology of these important pathogens and the design of drugs to combat them.
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Arvind Pillai, Abbas Idris, Annika Philomin, Connor Weidle, Rebecca Skotheim, Philip J. Y. Leung, Adam Broerman, Cullen Demakis, Andrew J. Borst, Florian Praetorius, David Baker
De novo design of allosterically switchable protein assemblies Journal Article
In: Nature, 2024.
@article{Pillai2024,
title = {De novo design of allosterically switchable protein assemblies},
author = {Arvind Pillai and Abbas Idris and Annika Philomin and Connor Weidle and Rebecca Skotheim and Philip J. Y. Leung and Adam Broerman and Cullen Demakis and Andrew J. Borst and Florian Praetorius and David Baker},
url = {https://www.nature.com/articles/s41586-024-07813-2, Nature [Open Access]},
doi = {10.1038/s41586-024-07813-2},
year = {2024},
date = {2024-08-14},
urldate = {2024-08-14},
journal = {Nature},
publisher = {Springer Science and Business Media LLC},
abstract = {Allosteric modulation of protein function, wherein the binding of an effector to a protein triggers conformational changes at distant functional sites, plays a central part in the control of metabolism and cell signalling. There has been considerable interest in designing allosteric systems, both to gain insight into the mechanisms underlying such ‘action at a distance’ modulation and to create synthetic proteins whose functions can be regulated by effectors. However, emulating the subtle conformational changes distributed across many residues, characteristic of natural allosteric proteins, is a significant challenge. Here, inspired by the classic Monod–Wyman–Changeux model of cooperativity, we investigate the de novo design of allostery through rigid-body coupling of peptide-switchable hinge modules to protein interfaces that direct the formation of alternative oligomeric states. We find that this approach can be used to generate a wide variety of allosterically switchable systems, including cyclic rings that incorporate or eject subunits in response to peptide binding and dihedral cages that undergo effector-induced disassembly. Size-exclusion chromatography, mass photometry and electron microscopy reveal that these designed allosteric protein assemblies closely resemble the design models in both the presence and absence of peptide effectors and can have ligand-binding cooperativity comparable to classic natural systems such as haemoglobin. Our results indicate that allostery can arise from global coupling of the energetics of protein substructures without optimized side-chain–side-chain allosteric communication pathways and provide a roadmap for generating allosterically triggerable delivery systems, protein nanomachines and cellular feedback control circuitry.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
COLLABORATOR LED
Sarah J. Wait, Marc Expòsit, Sophia Lin, Michael Rappleye, Justin Daho Lee, Samuel A. Colby, Lily Torp, Anthony Asencio, Annette Smith, Michael Regnier, Farid Moussavi-Harami, David Baker, Christina K. Kim, Andre Berndt
Machine learning-guided engineering of genetically encoded fluorescent calcium indicators Journal Article
In: Nature Computational Science, 2024.
@article{Wait2024,
title = {Machine learning-guided engineering of genetically encoded fluorescent calcium indicators},
author = {Sarah J. Wait and Marc Expòsit and Sophia Lin and Michael Rappleye and Justin Daho Lee and Samuel A. Colby and Lily Torp and Anthony Asencio and Annette Smith and Michael Regnier and Farid Moussavi-Harami and David Baker and Christina K. Kim and Andre Berndt},
url = {https://www.nature.com/articles/s43588-024-00611-w, Nat Comp Sci
https://www.bakerlab.org/wp-content/uploads/2024/03/s43588-024-00611-w.pdf, PDF},
doi = {10.1038/s43588-024-00611-w},
year = {2024},
date = {2024-03-21},
urldate = {2024-03-00},
journal = {Nature Computational Science},
publisher = {Springer Science and Business Media LLC},
abstract = {Here we used machine learning to engineer genetically encoded fluorescent indicators, protein-based sensors critical for real-time monitoring of biological activity. We used machine learning to predict the outcomes of sensor mutagenesis by analyzing established libraries that link sensor sequences to functions. Using the GCaMP calcium indicator as a scaffold, we developed an ensemble of three regression models trained on experimentally derived GCaMP mutation libraries. The trained ensemble performed an in silico functional screen on 1,423 novel, uncharacterized GCaMP variants. As a result, we identified the ensemble-derived GCaMP (eGCaMP) variants, eGCaMP and eGCaMP+, which achieve both faster kinetics and larger ∆F/F0 responses upon stimulation than previously published fast variants. Furthermore, we identified a combinatorial mutation with extraordinary dynamic range, eGCaMP2+, which outperforms the tested sixth-, seventh- and eighth-generation GCaMPs. These findings demonstrate the value of machine learning as a tool to facilitate the efficient engineering of proteins for desired biophysical characteristics.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2023
FROM THE LAB
Minkyung Baek, Ryan McHugh, Ivan Anishchenko, Hanlun Jiang, David Baker, Frank DiMaio
Accurate prediction of protein–nucleic acid complexes using RoseTTAFoldNA Journal Article
In: Nature Methods, 2023.
@article{Baek2023,
title = {Accurate prediction of protein–nucleic acid complexes using RoseTTAFoldNA},
author = {Minkyung Baek and Ryan McHugh and Ivan Anishchenko and Hanlun Jiang and David Baker and Frank DiMaio},
url = {https://www.nature.com/articles/s41592-023-02086-5, Nature Methods [Open Access]},
doi = {10.1038/s41592-023-02086-5},
year = {2023},
date = {2023-11-23},
urldate = {2023-11-23},
journal = {Nature Methods},
publisher = {Springer Science and Business Media LLC},
abstract = {Protein–RNA and protein–DNA complexes play critical roles in biology. Despite considerable recent advances in protein structure prediction, the prediction of the structures of protein–nucleic acid complexes without homology to known complexes is a largely unsolved problem. Here we extend the RoseTTAFold machine learning protein-structure-prediction approach to additionally predict nucleic acid and protein–nucleic acid complexes. We develop a single trained network, RoseTTAFoldNA, that rapidly produces three-dimensional structure models with confidence estimates for protein–DNA and protein–RNA complexes. Here we show that confident predictions have considerably higher accuracy than current state-of-the-art methods. RoseTTAFoldNA should be broadly useful for modeling the structure of naturally occurring protein–nucleic acid complexes, and for designing sequence-specific RNA and DNA-binding proteins.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
COLLABORATOR LED
Wang, Jing Yang (John) and Khmelinskaia, Alena and Sheffler, William and Miranda, Marcos C. and Antanasijevic, Aleksandar and Borst, Andrew J. and Torres, Susana V. and Shu, Chelsea and Hsia, Yang and Nattermann, Una and Ellis, Daniel and Walkey, Carl and Ahlrichs, Maggie and Chan, Sidney and Kang, Alex and Nguyen, Hannah and Sydeman, Claire and Sankaran, Banumathi and Wu, Mengyu and Bera, Asim K. and Carter, Lauren and Fiala, Brooke and Murphy, Michael and Baker, David and Ward, Andrew B. and King, Neil P.
Improving the secretion of designed protein assemblies through negative design of cryptic transmembrane domains Journal Article
In: Proceedings of the National Academy of Sciences, 2023.
@article{Wang2023,
title = {Improving the secretion of designed protein assemblies through negative design of cryptic transmembrane domains},
author = {Wang, Jing Yang (John)
and Khmelinskaia, Alena
and Sheffler, William
and Miranda, Marcos C.
and Antanasijevic, Aleksandar
and Borst, Andrew J.
and Torres, Susana V.
and Shu, Chelsea
and Hsia, Yang
and Nattermann, Una
and Ellis, Daniel
and Walkey, Carl
and Ahlrichs, Maggie
and Chan, Sidney
and Kang, Alex
and Nguyen, Hannah
and Sydeman, Claire
and Sankaran, Banumathi
and Wu, Mengyu
and Bera, Asim K.
and Carter, Lauren
and Fiala, Brooke
and Murphy, Michael
and Baker, David
and Ward, Andrew B.
and King, Neil P.},
url = {https://www.pnas.org/doi/10.1073/pnas.2214556120, PNAS (Open Access)},
doi = {10.1073/pnas.2214556120},
year = {2023},
date = {2023-03-08},
urldate = {2023-03-08},
journal = {Proceedings of the National Academy of Sciences},
abstract = {Computationally designed protein nanoparticles have recently emerged as a promising platform for the development of new vaccines and biologics. For many applications, secretion of designed nanoparticles from eukaryotic cells would be advantageous, but in practice, they often secrete poorly. Here we show that designed hydrophobic interfaces that drive nanoparticle assembly are often predicted to form cryptic transmembrane domains, suggesting that interaction with the membrane insertion machinery could limit efficient secretion. We develop a general computational protocol, the Degreaser, to design away cryptic transmembrane domains without sacrificing protein stability. The retroactive application of the Degreaser to previously designed nanoparticle components and nanoparticles considerably improves secretion, and modular integration of the Degreaser into design pipelines results in new nanoparticles that secrete as robustly as naturally occurring protein assemblies. Both the Degreaser protocol and the nanoparticles we describe may be broadly useful in biotechnological applications.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2022
FROM THE LAB
B. I. M. Wicky, L. F. Milles, A. Courbet, R. J. Ragotte, J. Dauparas, E. Kinfu, S. Tipps, R. D. Kibler, M. Baek, F. DiMaio, X. Li, L. Carter, A. Kang, H. Nguyen, A. K. Bera, D. Baker
Hallucinating symmetric protein assemblies Journal Article
In: Science, 2022.
@article{Wicky2022,
title = {Hallucinating symmetric protein assemblies},
author = {B. I. M. Wicky and L. F. Milles and A. Courbet and R. J. Ragotte and J. Dauparas and E. Kinfu and S. Tipps and R. D. Kibler and M. Baek and F. DiMaio and X. Li and L. Carter and A. Kang and H. Nguyen and A. K. Bera and D. Baker},
url = {https://www.science.org/doi/abs/10.1126/science.add1964, Science
https://www.bakerlab.org/wp-content/uploads/2022/09/Wicky_etal_Science2022_Hallucinating_symmetric_protein_assemblies.pdf, PDF
},
doi = {10.1126/science.add1964},
year = {2022},
date = {2022-09-15},
journal = {Science},
abstract = {Deep learning generative approaches provide an opportunity to broadly explore protein structure space beyond the sequences and structures of natural proteins. Here we use deep network hallucination to generate a wide range of symmetric protein homo-oligomers given only a specification of the number of protomers and the protomer length. Crystal structures of 7 designs are very close to the computational models (median RMSD: 0.6 Å), as are 3 cryoEM structures of giant 10 nanometer rings with up to 1550 residues and C33 symmetry; all differ considerably from previously solved structures. Our results highlight the rich diversity of new protein structures that can be generated using deep learning, and pave the way for the design of increasingly complex components for nanomachines and biomaterials.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
A. Courbet, J. Hansen, Y. Hsia, N. Bethel, Y.-J. Park, C. Xu, A. Moyer, S. E. Boyken, G. Ueda, U. Nattermann, D. Nagarajan, D. Silva, W. Sheffler, J. Quispe, A. Nord, N. King, P. Bradley, D. Veesler, J. Kollman, D. Baker
Computational design of mechanically coupled axle-rotor protein assemblies Journal Article
In: Science, 2022.
@article{Courbet2022,
title = {Computational design of mechanically coupled axle-rotor protein assemblies},
author = {A. Courbet and J. Hansen and Y. Hsia and N. Bethel and Y.-J. Park and C. Xu and A. Moyer and S. E. Boyken and G. Ueda and U. Nattermann and D. Nagarajan and D. Silva and W. Sheffler and J. Quispe and A. Nord and N. King and P. Bradley and D. Veesler and J. Kollman and D. Baker},
url = {https://www.science.org/doi/abs/10.1126/science.abm1183, Science
https://www.bakerlab.org/wp-content/uploads/2022/04/science.abm1183.pdf, Download PDF},
year = {2022},
date = {2022-04-21},
urldate = {2022-04-21},
journal = {Science},
abstract = {Natural molecular machines contain protein components that undergo motion relative to each other. Designing such mechanically constrained nanoscale protein architectures with internal degrees of freedom is an outstanding challenge for computational protein design. Here we explore the de novo construction of protein machinery from designed axle and rotor components with internal cyclic or dihedral symmetry. We find that the axle-rotor systems assemble in vitro and in vivo as designed. Using cryo–electron microscopy, we find that these systems populate conformationally variable relative orientations reflecting the symmetry of the coupled components and the computationally designed interface energy landscape. These mechanical systems with internal degrees of freedom are a step toward the design of genetically encodable nanomachines. Protein rotary machines such as ATP synthase contain axle-like and ring-like components and couple biochemical energy to the mechanical work of rotating the components relative to each other. Courbet et al. have taken a step toward designing such axel-rotor nanomachines. A structural requirement is that interactions between the components must be strong enough to allow assembly but still allow different rotational states to be populated. The authors met this design challenge and computationally designed ring-like protein topologies (rotors) with a range of inner diameters that accommodate designed axle-like binding partners. The systems assemble and populate the different rotational states anticipated by the designs. These rotational energy landscapes provide one of two needed elements for a directional motor. —VV Computationally designed self-assembling axle-rotor protein systems populate multiple rotational states.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
COLLABORATOR LED
Linder, Johannes, La Fleur, Alyssa, Chen, Zibo, Ljubetič, Ajasja, Baker, David, Kannan, Sreeram, Seelig, Georg
Interpreting neural networks for biological sequences by learning stochastic masks Journal Article
In: Nature Machine Intelligence, 2022.
@article{Linder2022,
title = {Interpreting neural networks for biological sequences by learning stochastic masks},
author = {Linder, Johannes and La Fleur, Alyssa and Chen, Zibo and Ljubetič, Ajasja and Baker, David and Kannan, Sreeram and Seelig, Georg},
url = {https://www.nature.com/articles/s42256-021-00428-6, Nature Machine Intelligence},
doi = {10.1038/s42256-021-00428-6},
year = {2022},
date = {2022-01-25},
urldate = {2022-01-25},
journal = {Nature Machine Intelligence},
abstract = {Sequence-based neural networks can learn to make accurate predictions from large biological datasets, but model interpretation remains challenging. Many existing feature attribution methods are optimized for continuous rather than discrete input patterns and assess individual feature importance in isolation, making them ill-suited for interpreting nonlinear interactions in molecular sequences. Here, building on work in computer vision and natural language processing, we developed an approach based on deep learning—scrambler networks—wherein the most important sequence positions are identified with learned input masks. Scramblers learn to predict position-specific scoring matrices where unimportant nucleotides or residues are scrambled by raising their entropy. We apply scramblers to interpret the effects of genetic variants, uncover nonlinear interactions between cis-regulatory elements, explain binding specificity for protein–protein interactions, and identify structural determinants of de novo-designed proteins. We show that scramblers enable efficient attribution across large datasets and result in high-quality explanations, often outperforming state-of-the-art methods.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2021
FROM THE LAB
Vorobieva, Anastassia A., White, Paul, Liang, Binyong, Horne, Jim E., Bera, Asim K., Chow, Cameron M., Gerben, Stacey, Marx, Sinduja, Kang, Alex, Stiving, Alyssa Q., Harvey, Sophie R., Marx, Dagan C., Khan, G. Nasir, Fleming, Karen G., Wysocki, Vicki H., Brockwell, David J., Tamm, Lukas K., Radford, Sheena E., Baker, David
De novo design of transmembrane beta barrels Journal Article
In: Science, vol. 371, no. 6531, 2021.
@article{Vorobieva2021,
title = {De novo design of transmembrane beta barrels},
author = {Vorobieva, Anastassia A. and White, Paul and Liang, Binyong and Horne, Jim E. and Bera, Asim K. and Chow, Cameron M. and Gerben, Stacey and Marx, Sinduja and Kang, Alex and Stiving, Alyssa Q. and Harvey, Sophie R. and Marx, Dagan C. and Khan, G. Nasir and Fleming, Karen G. and Wysocki, Vicki H. and Brockwell, David J. and Tamm, Lukas K. and Radford, Sheena E. and Baker, David},
url = {https://science.sciencemag.org/content/371/6531/eabc8182, Science
https://www.bakerlab.org/wp-content/uploads/2021/02/Vorobieva_etal_Science2021_De_Novo_Transmembrane_beta_barrels.pdf, Download PDF},
doi = {10.1126/science.abc8182},
year = {2021},
date = {2021-02-19},
urldate = {2021-02-19},
journal = {Science},
volume = {371},
number = {6531},
abstract = {Computational design offers the possibility of making proteins with customized structures and functions. The range of accessible protein scaffolds has expanded with the design of increasingly complex cytoplasmic proteins and, recently, helical membrane proteins. Vorobieva et al. describe the successful computational design of eight-stranded transmembrane β-barrel proteins (TMBs). Using an iterative approach, they show the importance of negative design to prevent off-target structures and gain insight into the sequence determinants of TMB folding. Twenty-three designs satisfied biochemical screens for a TMB structure, and two structures were experimentally validated by nuclear magnetic resonance spectroscopy or x-ray crystallography. This is a step toward the custom design of pores for applications such as single-molecule sequencing.Science, this issue p. eabc8182INTRODUCTIONDespite their key biological roles, only a few proteins that fold into lipid membranes have been designed de novo. A class of membrane proteins{textemdash}transmembrane β barrels (TMBs){textemdash}forms a continuous sheet that closes on itself in lipid membranes. In addition to the challenge of designing β-sheet proteins, which are prone to misfolding and aggregation if folding is not properly controlled, the computational design of TMBs is complicated by limited understanding of TMB folding. As a result, no TMB has been designed de novo to date.Although the folding of TMBs in vivo is catalyzed by the β-barrel assembly machinery (BAM), many TMBs can also fold spontaneously in synthetic membranes to form stable pores, making them attractive for biotechnology and single-molecule analytical applications. Hence, de novo design of TMBs has potential both for understanding the determinants of TMB folding and membrane insertion and for the custom engineering of TMB nanopores.RATIONALEWe used de novo protein design to distill key principles of TMB folding through several design-build-test cycles. We iterated between hypothesis formulation, its implementation into computational design methods, and experimental characterization of the resulting proteins. To focus on the fundamental principles of TMB folding in the absence of complications due to interactions with chaperones and BAM in vivo, we focused on the challenge of de novo design of eight-stranded TMBs, which can fold and assemble into synthetic lipid membranes.RESULTSWe used a combination of purely geometric models and explicit Rosetta protein structure simulations to determine the constraints that β-strand connectivity and membrane embedding place on the TMB architecture. Through a series of design-build-test cycles, we found that, unlike almost all other classes of proteins, locally destabilizing sequences are critical for expression and folding of TMBs, and that the β-turns that translocate through the bilayer during folding have to be destabilized to enable correct assembly in the membrane. Our results suggest that premature formation of β hairpins may result in off-target β-sheet structures that compete with proper membrane insertion and folding, and hence the β hairpins of TMBs must be designed such that they are only transiently formed prior to membrane insertion, when the protein is in an aqueous environment. In the hydrophobic environment of the lipid bilayer, the full TMB can assemble because the membrane-facing nonpolar residues, which would tend to cluster nonspecifically in an aqueous environment, instead make favorable interactions with the lipids. As the TMB assembles, the β hairpins are stabilized by interactions with the neighboring β strands.Using computational methods that incorporate the above insights, we designed TMB sequences that successfully fold and assemble into both detergent micelles and lipid bilayers. Two of the designs were highly stable and could fold into liposomes more rapidly and reversibly than the transmembrane domain of the model outer membrane protein A (tOmpA) of Escherichia coli. A nuclear magnetic resonance solution structure and a high-resolution crystal structure for two different designs closely match the design models, showing that the TMB design method developed here can generate new structures with atomic-level accuracy.CONCLUSIONThis study elucidates key principles for de novo design of transmembrane β barrels, ranging from constraints on β-barrel architecture and β-hairpin design, as well as local and global sequence features. Our designs provide starting points for the bottom-up elucidation of the molecular mechanisms underlying TMB folding and interactions with the cellular outer membrane folding and insertion machinery. More generally, our work demonstrates that TMBs can be designed with atomic-level accuracy and opens the door to custom design of nanopores tailored for applications such as single-molecule sensing and sequencing.De novo{textendash}designed eight-stranded transmembrane β barrels fold spontaneously and reversibly into synthetic lipid membranes.The illustration shows the crystal structure of the protein TMB2.17 designed in this study, which adopts a structure identical to the design model.Credit: Ian Haydon.Transmembrane β-barrel proteins (TMBs) are of great interest for single-molecule analytical technologies because they can spontaneously fold and insert into membranes and form stable pores, but the range of pore properties that can be achieved by repurposing natural TMBs is limited. We leverage the power of de novo computational design coupled with a {textquotedblleft}hypothesis, design, and test{textquotedblright} approach to determine TMB design principles, notably, the importance of negative design to slow β-sheet assembly. We design new eight-stranded TMBs, with no homology to known TMBs, that insert and fold reversibly into synthetic lipid membranes and have nuclear magnetic resonance and x-ray crystal structures very similar to the computational models. These advances should enable the custom design of pores for a wide range of applications.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
COLLABORATOR LED
Muammer Y Yaman, Kathryn N Guye, Maxim Ziatdinov, Hao Shen, David Baker, Sergei V Kalinin, David S Ginger
Alignment of Au nanorods along de novo designed protein nanofibers studied with automated image analysis Journal Article
In: Soft Matter, 2021.
@article{Yaman2021,
title = {Alignment of Au nanorods along de novo designed protein nanofibers studied with automated image analysis},
author = {Muammer Y Yaman and Kathryn N Guye and Maxim Ziatdinov and Hao Shen and David Baker and Sergei V Kalinin and David S Ginger
},
url = {https://pubmed.ncbi.nlm.nih.gov/34128040/
https://www.bakerlab.org/wp-content/uploads/2021/06/Muammer_etal_SoftMatter2021_Aisngment_along_nanofibers.pdf},
doi = {10.1039/d1sm00645b},
year = {2021},
date = {2021-06-15},
journal = {Soft Matter},
abstract = {In this study, we focus on exploring the directional assembly of anisotropic Au nanorods along de novo designed 1D protein nanofiber templates. Using machine learning and automated image processing, we analyze scanning electron microscopy (SEM) images to study how the attachment density and alignment fidelity are influenced by variables such as the aspect ratio of the Au nanorods, and the salt concentration of the solution. We find that the Au nanorods prefer to align parallel to the protein nanofibers. This preference decreases with increasing salt concentration, but is only weakly sensitive to the nanorod aspect ratio. While the overall specific Au nanorod attachment density to the protein fibers increases with increasing solution ionic strength, this increase is dominated primarily by non-specific binding to the substrate background, and we find that greater specific attachment (nanorods attached to the nanofiber template as compared to the substrates) occurs at the lower studied salt concentrations, with the maximum ratio of specific to non-specific binding occurring when the protein fiber solutions are prepared in 75 mM NaCl concentration.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2020
FROM THE LAB
Chen, Zibo, Kibler, Ryan D., Hunt, Andrew, Busch, Florian, Pearl, Jocelynn, Jia, Mengxuan, VanAernum, Zachary L., Wicky, Basile I. M., Dods, Galen, Liao, Hanna, Wilken, Matthew S., Ciarlo, Christie, Green, Shon, El-Samad, Hana, Stamatoyannopoulos, John, Wysocki, Vicki H., Jewett, Michael C., Boyken, Scott E., Baker, David
De novo design of protein logic gates Journal Article
In: Science, vol. 368, no. 6486, pp. 78-84, 2020.
@article{Chen2020,
title = {De novo design of protein logic gates},
author = {Chen, Zibo and Kibler, Ryan D. and Hunt, Andrew and Busch, Florian and Pearl, Jocelynn and Jia, Mengxuan and VanAernum, Zachary L. and Wicky, Basile I. M. and Dods, Galen and Liao, Hanna and Wilken, Matthew S. and Ciarlo, Christie and Green, Shon and El-Samad, Hana and Stamatoyannopoulos, John and Wysocki, Vicki H. and Jewett, Michael C. and Boyken, Scott E. and Baker, David},
url = {https://science.sciencemag.org/content/368/6486/78
https://www.bakerlab.org/wp-content/uploads/2020/04/Chen2020_DeNovoProteinLogicGates.pdf},
doi = {10.1126/science.aay2790},
year = {2020},
date = {2020-03-04},
journal = {Science},
volume = {368},
number = {6486},
pages = {78-84},
abstract = {The design of modular protein logic for regulating protein function at the posttranscriptional level is a challenge for synthetic biology. Here, we describe the design of two-input AND, OR, NAND, NOR, XNOR, and NOT gates built from de novo–designed proteins. These gates regulate the association of arbitrary protein units ranging from split enzymes to transcriptional machinery in vitro, in yeast and in primary human T cells, where they control the expression of the TIM3 gene related to T cell exhaustion. Designed binding interaction cooperativity, confirmed by native mass spectrometry, makes the gates largely insensitive to stoichiometric imbalances in the inputs, and the modularity of the approach enables ready extension to three-input OR, AND, and disjunctive normal form gates. The modularity and cooperativity of the control elements, coupled with the ability to de novo design an essentially unlimited number of protein components, should enable the design of sophisticated posttranslational control logic over a wide range of biological functions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
COLLABORATOR LED
Sorry, no publications matched your criteria.
2019
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2018
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2017-1988
ALL PAPERS
2016
Jacob B. Bale, Shane Gonen, Yuxi Liu, William Sheffler, Daniel Ellis, Chantz Thomas, Duilio Cascio, Todd O. Yeates, Tamir Gonen, Neil P. King, David Baker
Accurate design of megadalton-scale two-component icosahedral protein complexes Journal Article
In: Science, vol. 353, no. 6297, pp. 389-394, 2016.
@article{Bale2016,
title = {Accurate design of megadalton-scale two-component icosahedral protein complexes},
author = {Jacob B. Bale and Shane Gonen and Yuxi Liu and William Sheffler and Daniel Ellis and Chantz Thomas and Duilio Cascio and Todd O. Yeates and Tamir Gonen and Neil P. King and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/Bale_Science_2016.pdf},
doi = {10.1126/science.aaf8818},
year = {2016},
date = {2016-07-22},
journal = {Science},
volume = {353},
number = {6297},
pages = {389-394},
abstract = {Nature provides many examples of self- and co-assembling protein-based molecular machines, including icosahedral protein cages that serve as scaffolds, enzymes, and compartments for essential biochemical reactions and icosahedral virus capsids, which encapsidate and protect viral genomes and mediate entry into host cells. Inspired by these natural materials, we report the computational design and experimental characterization of co-assembling, two-component, 120-subunit icosahedral protein nanostructures with molecular weights (1.8 to 2.8 megadaltons) and dimensions (24 to 40 nanometers in diameter) comparable to those of small viral capsids. Electron microscopy, small-angle x-ray scattering, and x-ray crystallography show that 10 designs spanning three distinct icosahedral architectures form materials closely matching the design models. In vitro assembly of icosahedral complexes from independently purified components occurs rapidly, at rates comparable to those of viral capsids, and enables controlled packaging of molecular cargo through charge complementarity. The ability to design megadalton-scale materials with atomic-level accuracy and controllable assembly opens the door to a new generation of genetically programmable protein-based molecular machines.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2014
Neil P. King, Jacob B Bale, William Sheffler, Dan E McNamara, Shane Gonen, Tamir Gonen, Todd O. Yeates, David Baker
Accurate design of co-assembling multi-component protein nanomaterials. Journal Article
In: Nature, 2014, ISSN: 1476-4687.
@article{534,
title = {Accurate design of co-assembling multi-component protein nanomaterials.},
author = { Neil P. King and Jacob B Bale and William Sheffler and Dan E McNamara and Shane Gonen and Tamir Gonen and Todd O. Yeates and David Baker},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/King_Nature2014A.pdf},
doi = {10.1038/nature13404},
issn = {1476-4687},
year = {2014},
date = {2014-05-01},
journal = {Nature},
abstract = {The self-assembly of proteins into highly ordered nanoscale architectures is a hallmark of biological systems. The sophisticated functions of these molecular machines have inspired the development of methods to engineer self-assembling protein nanostructures; however, the design of multi-component protein nanomaterials with high accuracy remains an outstanding challenge. Here we report a computational method for designing protein nanomaterials in which multiple copies of two distinct subunits co-assemble into a specific architecture. We use the method to design five 24-subunit cage-like protein nanomaterials in two distinct symmetric architectures and experimentally demonstrate that their structures are in close agreement with the computational design models. The accuracy of the method and the number and variety of two-component materials that it makes accessible suggest a route to the construction of functional protein nanomaterials tailored to specific applications.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2013
Jean-Philippe Demers, Nikolaos G Sgourakis, Rashmi Gupta, Antoine Loquet, Karin Giller, Dietmar Riedel, Britta Laube, Michael Kolbe, David Baker, Stefan Becker, Adam Lange
The common structural architecture of Shigella flexneri and Salmonella typhimurium type three secretion needles Journal Article
In: PLoS pathogens, vol. 9, pp. e1003245, 2013, ISSN: 1553-7374.
@article{471,
title = {The common structural architecture of Shigella flexneri and Salmonella typhimurium type three secretion needles},
author = { Jean-Philippe Demers and Nikolaos G Sgourakis and Rashmi Gupta and Antoine Loquet and Karin Giller and Dietmar Riedel and Britta Laube and Michael Kolbe and David Baker and Stefan Becker and Adam Lange},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Demers_PLosPathogen_13P.pdf},
doi = {10.1371/journal.ppat.1003245},
issn = {1553-7374},
year = {2013},
date = {2013-03-01},
journal = {PLoS pathogens},
volume = {9},
pages = {e1003245},
abstract = {The Type Three Secretion System (T3SS), or injectisome, is a macromolecular infection machinery present in many pathogenic Gram-negative bacteria. It consists of a basal body, anchored in both bacterial membranes, and a hollow needle through which effector proteins are delivered into the target host cell. Two different architectures of the T3SS needle have been previously proposed. First, an atomic model of the Salmonella typhimurium needle was generated from solid-state NMR data. The needle subunit protein, PrgI, comprises a rigid-extended N-terminal segment and a helix-loop-helix motif with the N-terminus located on the outside face of the needle. Second, a model of the Shigella flexneri needle was generated from a high-resolution 7.7-r A cryo-electron microscopy density map. The subunit protein, MxiH, contains an N-terminal α-helix, a loop, another α-helix, a 14-residue-long β-hairpin (Q51-Q64) and a C-terminal α-helix, with the N-terminus facing inward to the lumen of the needle. In the current study, we carried out solid-state NMR measurements of wild-type Shigella flexneri needles polymerized in vitro and identified the following secondary structure elements for MxiH: a rigid-extended N-terminal segment (S2-T11), an α-helix (L12-A38), a loop (E39-P44) and a C-terminal α-helix (Q45-R83). Using immunogold labeling in vitro and in vivo on functional needles, we located the N-terminus of MxiH subunits on the exterior of the assembly, consistent with evolutionary sequence conservation patterns and mutagenesis data. We generated a homology model of Shigella flexneri needles compatible with both experimental data: the MxiH solid-state NMR chemical shifts and the state-of-the-art cryoEM density map. These results corroborate the solid-state NMR structure previously solved for Salmonella typhimurium PrgI needles and establish that Shigella flexneri and Salmonella typhimurium subunit proteins adopt a conserved structure and orientation in their assembled state. Our study reveals a common structural architecture of T3SS needles, essential to understand T3SS-mediated infection and develop treatments.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Matthew Harger, Lei Zheng, Austin Moon, Casey Ager, Ju Hye An, Chris Choe, Yi-Ling Lai, Benjamin Mo, David Zong, Matthew D Smith, Robert G Egbert, Jeremy H Mills, David Baker, Ingrid Swanson Pultz, Justin B Siegel
Expanding the product profile of a microbial alkane biosynthetic pathway. Journal Article
In: ACS synthetic biology, vol. 2, pp. 59-62, 2013, ISSN: 2161-5063.
@article{503,
title = {Expanding the product profile of a microbial alkane biosynthetic pathway.},
author = { Matthew Harger and Lei Zheng and Austin Moon and Casey Ager and Ju Hye An and Chris Choe and Yi-Ling Lai and Benjamin Mo and David Zong and Matthew D Smith and Robert G Egbert and Jeremy H Mills and David Baker and Ingrid Swanson Pultz and Justin B Siegel},
url = {http://www.bakerlab.org/wp-content/uploads/2015/12/Harger_ACSSynthBiol_2013.pdf},
doi = {10.1021/sb300061x},
issn = {2161-5063},
year = {2013},
date = {2013-01-01},
journal = {ACS synthetic biology},
volume = {2},
pages = {59-62},
abstract = {Microbially produced alkanes are a new class of biofuels that closely match the chemical composition of petroleum-based fuels. Alkanes can be generated from the fatty acid biosynthetic pathway by the reduction of acyl-ACPs followed by decarbonylation of the resulting aldehydes. A current limitation of this pathway is the restricted product profile, which consists of n-alkanes of 13, 15, and 17 carbons in length. To expand the product profile, we incorporated a new part, FabH2 from Bacillus subtilis , an enzyme known to have a broader specificity profile for fatty acid initiation than the native FabH of Escherichia coli . When provided with the appropriate substrate, the addition of FabH2 resulted in an altered alkane product profile in which significant levels of n-alkanes of 14 and 16 carbons in length are produced. The production of even chain length alkanes represents initial steps toward the expansion of this recently discovered microbial alkane production pathway to synthesize complex fuels. This work was conceived and performed as part of the 2011 University of Washington international Genetically Engineered Machines (iGEM) project.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2011
Lisa R Warner, Krisztina Varga, Oliver F Lange, Susan L Baker, David Baker, Marcelo C Sousa, Arthur Pardi
Structure of the BamC two-domain protein obtained by Rosetta with a limited NMR data set Journal Article
In: Journal of Molecular Biology, vol. 411, pp. 83-95, 2011, ISSN: 1089-8638.
@article{586,
title = {Structure of the BamC two-domain protein obtained by Rosetta with a limited NMR data set},
author = { Lisa R Warner and Krisztina Varga and Oliver F Lange and Susan L Baker and David Baker and Marcelo C Sousa and Arthur Pardi},
url = {https://www.bakerlab.org/wp-content/uploads/2018/06/1-s2.0-S0022283611005729-main.pdf
https://www.sciencedirect.com/science/article/pii/S0022283611005729?via%3Dihub},
doi = {10.1016/j.jmb.2011.05.022},
issn = {1089-8638},
year = {2011},
date = {2011-08-01},
journal = {Journal of Molecular Biology},
volume = {411},
pages = {83-95},
abstract = {The CS-RDC-NOE Rosetta program was used to generate the solution structure of a 27-kDa fragment of the Escherichia coli BamC protein from a limited set of NMR data. The BamC protein is a component of the essential five-protein β-barrel assembly machine in E. coli. The first 100 residues in BamC were disordered in solution. The Rosetta calculations showed that BamC$_1$$_0$$_1$$_-$$_3$$_4$$_4$ forms two well-defined domains connected by an ~18-residue linker, where the relative orientation of the domains was not defined. Both domains adopt a helix-grip fold previously observed in the Bet v 1 superfamily. textonesuperior$^5$N relaxation data indicated a high degree of conformational flexibility for the linker connecting the N-terminal domain and the C-terminal domain in BamC. The results here show that CS-RDC-NOE Rosetta is robust and has a high tolerance for misassigned nuclear Overhauser effect restraints, greatly simplifying NMR structure determinations.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Junjie Zhang, Boxue Ma, Frank DiMaio, Nicholai R Douglas, Lukasz A Joachimiak, David Baker, Judith Frydman, Michael Levitt, Wah Chiu
Cryo-EM structure of a group II chaperonin in the prehydrolysis ATP-bound state leading to lid closure Journal Article
In: Structure (London, England : 1993), vol. 19, pp. 633-9, 2011, ISSN: 1878-4186.
@article{596,
title = {Cryo-EM structure of a group II chaperonin in the prehydrolysis ATP-bound state leading to lid closure},
author = { Junjie Zhang and Boxue Ma and Frank DiMaio and Nicholai R Douglas and Lukasz A Joachimiak and David Baker and Judith Frydman and Michael Levitt and Wah Chiu},
doi = {10.1016/j.str.2011.03.005},
issn = {1878-4186},
year = {2011},
date = {2011-05-01},
journal = {Structure (London, England : 1993)},
volume = {19},
pages = {633-9},
abstract = {Chaperonins are large ATP-driven molecular machines that mediate cellular protein folding. Group II chaperonins use their "built-in lid" to close their central folding chamber. Here we report the structure of an archaeal group II chaperonin in its prehydrolysis ATP-bound state at subnanometer resolution using single particle cryo-electron microscopy (cryo-EM). Structural comparison of Mm-cpn in ATP-free, ATP-bound, and ATP-hydrolysis states reveals that ATP binding alone causes the chaperonin to close slightly with a ~45textdegree counterclockwise rotation of the apical domain. The subsequent ATP hydrolysis drives each subunit to rock toward the folding chamber and to close the lid completely. These motions are attributable to the local interactions of specific active site residues with the nucleotide, the tight couplings between the apical and intermediate domains within the subunit, and the aligned interactions between two subunits across the rings. This mechanism of structural changes in response to ATP is entirely different from those found in group I chaperonins.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2010
Sarah Sanowar, Pragya Singh, Richard A Pfuetzner, Ingemar Andr’e, Hongjin Zheng, Thomas Spreter, Natalie C J Strynadka, Tamir Gonen, David Baker, David R Goodlett, Samuel I Miller
Interactions of the Transmembrane Polymeric Rings of the Salmonella enterica Serovar Typhimurium Type III Secretion System Journal Article
In: mBio, vol. 1, 2010, ISSN: 2150-7511.
@article{261,
title = {Interactions of the Transmembrane Polymeric Rings of the Salmonella enterica Serovar Typhimurium Type III Secretion System},
author = { Sarah Sanowar and Pragya Singh and Richard A Pfuetzner and Ingemar Andr'e and Hongjin Zheng and Thomas Spreter and Natalie C J Strynadka and Tamir Gonen and David Baker and David R Goodlett and Samuel I Miller},
issn = {2150-7511},
year = {2010},
date = {2010-00-01},
journal = {mBio},
volume = {1},
abstract = {The type III secretion system (T3SS) is an interspecies protein transport machine that plays a major role in interactions of Gram-negative bacteria with animals and plants by delivering bacterial effector proteins into host cells. T3SSs span both membranes of Gram-negative bacteria by forming a structure of connected oligomeric rings termed the needle complex (NC). Here, the localization of subunits in the Salmonella enterica serovar Typhimurium T3SS NC were probed via mass spectrometry-assisted identification of chemical cross-links in intact NC preparations. Cross-links between amino acids near the amino terminus of the outer membrane ring component InvG and the carboxyl terminus of the inner membrane ring component PrgH and between the two inner membrane components PrgH and PrgK allowed for spatial localization of the three ring components within the electron density map structures of NCs. Mutational and biochemical analysis demonstrated that the amino terminus of InvG and the carboxyl terminus of PrgH play a critical role in the assembly and function of the T3SS apparatus. Analysis of an InvG mutant indicates that the structure of the InvG oligomer can affect the switching of the T3SS substrate to translocon and effector components. This study provides insights into how structural organization of needle complex base components promotes T3SS assembly and function.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2008
Raman S, Qian B, Baker D, Walker RC
Advances in Rosetta Protein Structure Prediction on Massively Parallel Systems Journal Article
In: Journal of Research and Development, vol. 52(1-2):7-17, 2008.
@article{280,
title = {Advances in Rosetta Protein Structure Prediction on Massively Parallel Systems},
author = { Raman S and Qian B and Baker D and Walker RC},
year = {2008},
date = {2008-01-01},
journal = {Journal of Research and Development},
volume = {52(1-2):7-17},
abstract = {One of the key challenges in computational biology is prediction of three-dimensional protein structures from amino-acid sequences. For most proteins, the "native state" lies at the bottom of a free-energy landscape. Protein structure prediction involves varying the degrees of freedom of the protein in a constrained manner until it approaches its native state. In the Rosetta protein structure prediction protocols, a large number of independent folding trajectories are simulated, and several lowest-energy results are likely to be close to the native state. The availability of hundred-teraflop, and shortly, petaflop, computing resources is revolutionizing the approaches available for protein structure prediction. Here, we discuss issues involved in utilizing such machines efficiently with the Rosetta code, including an overview of recent results of the Critical Assessment of Techniques for Protein Structure Prediction 7 (CASP7) in which the computationally demanding structure-refinement process was run on 16 racks of the IBM Blue Gene/L (TM) system at the IBM T. J. Watson Research Center. We highlight recent advances in high-performance computing and discuss,future development paths that make use of the next-generation petascale (> 10(12) floating-point operations per second) machines.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2006
Gautam Dantas, Alexander L Watters, Bradley M Lunde, Ziad M Eletr, Nancy G Isern, Toby Roseman, Jan Lipfert, Sebastian Doniach, Martin Tompa, Brian Kuhlman, Barry L Stoddard, Gabriele Varani, David Baker
In: Journal of molecular biology, vol. 362, pp. 1004-24, 2006, ISSN: 0022-2836.
@article{156,
title = {Mis-translation of a computationally designed protein yields an exceptionally stable homodimer: implications for protein engineering and evolution},
author = { Gautam Dantas and Alexander L Watters and Bradley M Lunde and Ziad M Eletr and Nancy G Isern and Toby Roseman and Jan Lipfert and Sebastian Doniach and Martin Tompa and Brian Kuhlman and Barry L Stoddard and Gabriele Varani and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/dantas06A.pdf},
issn = {0022-2836},
year = {2006},
date = {2006-10-01},
journal = {Journal of molecular biology},
volume = {362},
pages = {1004-24},
abstract = {We recently used computational protein design to create an extremely stable, globular protein, Top7, with a sequence and fold not observed previously in nature. Since Top7 was created in the absence of genetic selection, it provides a rare opportunity to investigate aspects of the cellular protein production and surveillance machinery that are subject to natural selection. Here we show that a portion of the Top7 protein corresponding to the final 49 C-terminal residues is efficiently mis-translated and accumulates at high levels in Escherichia coli. We used circular dichroism, size-exclusion chromatography, small-angle X-ray scattering, analytical ultra-centrifugation, and NMR spectroscopy to show that the resulting C-terminal fragment (CFr) protein adopts a compact, extremely stable, homo-dimeric structure. Based on the solution structure, we engineered an even more stable variant of CFr by disulfide-induced covalent circularisation that should be an excellent platform for design of novel functions. The accumulation of high levels of CFr exposes the high error rate of the protein translation machinery. The rarity of correspondingly stable fragments in natural proteins coupled with the observation that high quality ribosome binding sites are found to occur within E. coli protein-coding regions significantly less often than expected by random chance implies a stringent evolutionary pressure against protein sub-fragments that can independently fold into stable structures. The symmetric self-association between two identical mis-translated CFr sub-domains to generate an extremely stable structure parallels a mechanism for natural protein-fold evolution by modular recombination of protein sub-structures.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}