Publications
Preprints available on bioRxiv.
Baek, Minkyung; Anishchenko, Ivan; Park, Hahnbeom; Humphreys, Ian R.; Baker, David
Protein oligomer modeling guided by predicted inter-chain contacts in CASP14 Journal Article
In: Proteins, 2021.
@article{Baek2021b,
title = {Protein oligomer modeling guided by predicted inter-chain contacts in CASP14},
author = {Minkyung Baek and Ivan Anishchenko and Hahnbeom Park and Ian R. Humphreys and David Baker},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/prot.26197, Proteins},
doi = {10.1002/prot.26197},
year = {2021},
date = {2021-07-29},
urldate = {2021-07-29},
journal = {Proteins},
abstract = {For CASP14, we developed deep learning-based methods for predicting homo-oligomeric and hetero-oligomeric contacts and used them for oligomer modeling. To build structure models, we developed an oligomer structure generation method that utilizes predicted inter-chain contacts to guide iterative restrained minimization from random backbone structures. We supplemented this gradient-based fold-and-dock method with template-based and ab initio docking approaches using deep learning-based subunit predictions on 29 assembly targets. These methods produced oligomer models with summed Z-scores 5.5 units higher than the next best group, with the fold-and-dock method having the best relative performance. Over the eight targets for which this method was used, the best of the five submitted models had average oligomer TM-score of 0.71 (average oligomer TM-score of the next best group: 0.64), and explicit modeling of inter-subunit interactions improved modeling of six out of 40 individual domains (ΔGDT-TS > 2.0).
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Norn, Christoffer; Wicky, Basile I. M.; Juergens, David; Liu, Sirui; Kim, David; Tischer, Doug; Koepnick, Brian; Anishchenko, Ivan; Baker, David; Ovchinnikov, Sergey
Protein sequence design by conformational landscape optimization Journal Article
In: Proceedings of the National Academy of Sciences, vol. 118, no. 11, 2021.
@article{Norn2021,
title = {Protein sequence design by conformational landscape optimization},
author = {Norn, Christoffer and Wicky, Basile I. M. and Juergens, David and Liu, Sirui and Kim, David and Tischer, Doug and Koepnick, Brian and Anishchenko, Ivan and Baker, David and Ovchinnikov, Sergey},
url = {https://www.pnas.org/content/118/11/e2017228118, PNAS
https://www.bakerlab.org/wp-content/uploads/2021/03/Norn_etal_PNAS2021_LandscapeOptimization.pdf, Download PDF},
doi = {10.1073/pnas.2017228118},
year = {2021},
date = {2021-03-16},
urldate = {2021-03-16},
journal = {Proceedings of the National Academy of Sciences},
volume = {118},
number = {11},
abstract = {Almost all proteins fold to their lowest free energy state, which is determined by their amino acid sequence. Computational protein design has primarily focused on finding sequences that have very low energy in the target designed structure. However, what is most relevant during folding is not the absolute energy of the folded state but the energy difference between the folded state and the lowest-lying alternative states. We describe a deep learning approach that captures aspects of the folding landscape, in particular the presence of structures in alternative energy minima, and show that it can enhance current protein design methods.The protein design problem is to identify an amino acid sequence that folds to a desired structure. Given Anfinsen{textquoteright}s thermodynamic hypothesis of folding, this can be recast as finding an amino acid sequence for which the desired structure is the lowest energy state. As this calculation involves not only all possible amino acid sequences but also, all possible structures, most current approaches focus instead on the more tractable problem of finding the lowest-energy amino acid sequence for the desired structure, often checking by protein structure prediction in a second step that the desired structure is indeed the lowest-energy conformation for the designed sequence, and typically discarding a large fraction of designed sequences for which this is not the case. Here, we show that by backpropagating gradients through the transform-restrained Rosetta (trRosetta) structure prediction network from the desired structure to the input amino acid sequence, we can directly optimize over all possible amino acid sequences and all possible structures in a single calculation. We find that trRosetta calculations, which consider the full conformational landscape, can be more effective than Rosetta single-point energy estimations in predicting folding and stability of de novo designed proteins. We compare sequence design by conformational landscape optimization with the standard energy-based sequence design methodology in Rosetta and show that the former can result in energy landscapes with fewer alternative energy minima. We show further that more funneled energy landscapes can be designed by combining the strengths of the two approaches: the low-resolution trRosetta model serves to disfavor alternative states, and the high-resolution Rosetta model serves to create a deep energy minimum at the design target structure.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Hiranuma, Naozumi; Park, Hahnbeom; Baek, Minkyung; Anishchenko, Ivan; Dauparas, Justas; Baker, David
Improved protein structure refinement guided by deep learning based accuracy estimation Journal Article
In: Nature Communications, vol. 12, no. 1340, 2021.
@article{Hiranuma2021,
title = {Improved protein structure refinement guided by deep learning based accuracy estimation},
author = {Naozumi Hiranuma and Hahnbeom Park and Minkyung Baek and Ivan Anishchenko and Justas Dauparas and David Baker
},
url = {https://www.nature.com/articles/s41467-021-21511-x, Nature Communications
https://www.bakerlab.org/wp-content/uploads/2021/02/Hiranuma_etal_NatureComms2021_DeepLearningStructureRefinement.pdf, Download PDF},
doi = {10.1038/s41467-021-21511-x},
year = {2021},
date = {2021-02-26},
urldate = {2021-02-26},
journal = {Nature Communications},
volume = {12},
number = {1340},
abstract = {We develop a deep learning framework (DeepAccNet) that estimates per-residue accuracy and residue-residue distance signed error in protein models and uses these predictions to guide Rosetta protein structure refinement. The network uses 3D convolutions to evaluate local atomic environments followed by 2D convolutions to provide their global contexts and outperforms other methods that similarly predict the accuracy of protein structure models. Overall accuracy predictions for X-ray and cryoEM structures in the PDB correlate with their resolution, and the network should be broadly useful for assessing the accuracy of both predicted structure models and experimentally determined structures and identifying specific regions likely to be in error. Incorporation of the accuracy predictions at multiple stages in the Rosetta refinement protocol considerably increased the accuracy of the resulting protein structure models, illustrating how deep learning can improve search for global energy minima of biomolecules.
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ziatdinov, Maxim; Zhang, Shuai; Dollar, Orion; Pfaendtner, Jim; Mundy, Christopher J.; Li, Xin; Pyles, Harley; Baker, David; De Yoreo, James J.; Kalinin, Sergei V.
Quantifying the Dynamics of Protein Self-Organization Using Deep Learning Analysis of Atomic Force Microscopy Data Journal Article
In: Nano Letters, 2021.
@article{Ziatdinov2021,
title = {Quantifying the Dynamics of Protein Self-Organization Using Deep Learning Analysis of Atomic Force Microscopy Data},
author = {Ziatdinov, Maxim
and Zhang, Shuai
and Dollar, Orion
and Pfaendtner, Jim
and Mundy, Christopher J.
and Li, Xin
and Pyles, Harley
and Baker, David
and De Yoreo, James J.
and Kalinin, Sergei V.},
url = {https://pubs.acs.org/doi/10.1021/acs.nanolett.0c03447},
doi = {10.1021/acs.nanolett.0c03447},
year = {2021},
date = {2021-01-13},
journal = {Nano Letters},
abstract = {The dynamics of protein self-assembly on the inorganic surface and the resultant geometric patterns are visualized using high-speed atomic force microscopy. The time dynamics of the classical macroscopic descriptors such as 2D fast Fourier transforms, correlation, and pair distribution functions are explored using the unsupervised linear unmixing, demonstrating the presence of static ordered and dynamic disordered phases and establishing their time dynamics. The deep learning (DL)-based workflow is developed to analyze detailed particle dynamics and explore the evolution of local geometries. Finally, we use a combination of DL feature extraction and mixture modeling to define particle neighborhoods free of physics constraints, allowing for a separation of possible classes of particle behavior and identification of the associated transitions. Overall, this work establishes the workflow for the analysis of the self-organization processes in complex systems from observational data and provides insight into the fundamental mechanisms.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Yang, Jianyi; Anishchenko, Ivan; Park, Hahnbeom; Peng, Zhenling; Ovchinnikov, Sergey; Baker, David
Improved protein structure prediction using predicted interresidue orientations Journal Article
In: Proceedings of the National Academy of Sciences, 2020, ISBN: 0027-8424.
@article{Yang2020,
title = {Improved protein structure prediction using predicted interresidue orientations},
author = {Yang, Jianyi and Anishchenko, Ivan and Park, Hahnbeom and Peng, Zhenling and Ovchinnikov, Sergey and Baker, David},
url = {https://www.pnas.org/content/early/2020/01/01/1914677117
https://www.bakerlab.org/wp-content/uploads/2020/01/Yang2020_ImprovedStructurePredictionInterresidueOrientations.pdf
},
doi = {10.1073/pnas.1914677117},
isbn = {0027-8424},
year = {2020},
date = {2020-01-02},
journal = {Proceedings of the National Academy of Sciences},
abstract = {Protein structure prediction is a longstanding challenge in computational biology. Through extension of deep learning-based prediction to interresidue orientations in addition to distances, and the development of a constrained optimization by Rosetta, we show that more accurate models can be generated. Results on a set of 18 de novo-designed proteins suggests the proposed method should be directly applicable to current challenges in de novo protein design.The prediction of interresidue contacts and distances from coevolutionary data using deep learning has considerably advanced protein structure prediction. Here, we build on these advances by developing a deep residual network for predicting interresidue orientations, in addition to distances, and a Rosetta-constrained energy-minimization protocol for rapidly and accurately generating structure models guided by these restraints. In benchmark tests on 13th Community-Wide Experiment on the Critical Assessment of Techniques for Protein Structure Prediction (CASP13)- and Continuous Automated Model Evaluation (CAMEO)-derived sets, the method outperforms all previously described structure-prediction methods. Although trained entirely on native proteins, the network consistently assigns higher probability to de novo-designed proteins, identifying the key fold-determining residues and providing an independent quantitative measure of the "ideality" of a protein structure. The method promises to be useful for a broad range of protein structure prediction and design problems.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Wu, Qi; Peng, Zhenling; Anishchenko, Ivan; Cong, Qian; Baker, David; Yang, Jianyi
Protein contact prediction using metagenome sequence data and residual neural networks Journal Article
In: Bioinformatics, vol. 36, no. 1, 2019.
@article{Wu2019,
title = {Protein contact prediction using metagenome sequence data and residual neural networks},
author = {Qi Wu and Zhenling Peng and Ivan Anishchenko and Qian Cong and David Baker and Jianyi Yang},
url = {https://academic.oup.com/bioinformatics/article/36/1/41/5512356},
doi = {10.1093/bioinformatics/btz477},
year = {2019},
date = {2019-06-07},
journal = {Bioinformatics},
volume = {36},
number = {1},
abstract = {Motivation: Almost all protein residue contact prediction methods rely on the availability of deep multiple sequence alignments (MSAs). However, many proteins from the poorly populated families do not have sufficient number of homologs in the conventional UniProt database. Here we aim to solve this issue by exploring the rich sequence data from the metagenome sequencing projects. Results: Based on the improved MSA constructed from the metagenome sequence data, we developed MapPred, a new deep learning-based contact prediction method. MapPred consists of two component methods, DeepMSA and DeepMeta, both trained with the residual neural networks. DeepMSA was inspired by the recent method DeepCov, which was trained on 441 matrices of covariance features. By considering the symmetry of contact map, we reduced the number of matrices to 231, which makes the training more efficient in DeepMSA. Experiments show that DeepMSA outperforms DeepCov by 10–13% in precision. DeepMeta works by combining predicted contacts and other sequence profile features. Experiments on three benchmark datasets suggest that the contribution from the metagenome sequence data is significant with P-values less than 4.04E-17. MapPred is shown to be complementary and comparable the state-of-the-art methods. The success of MapPred is attributed to three factors: the deeper MSA from the metagenome sequence data, improved feature design in DeepMSA and optimized training by the residual neural networks.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2025
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2024
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2023
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2022
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2021
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2020
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2019
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2018
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2017-1988
ALL PAPERS
Sorry, no publications matched your criteria.