Publications
Arakaki, Tracy; Trong, Isolde Le; Phizicky, Eric; Quartley, Erin; Detitta, George; Luft, Joseph; Lauricella, Angela; Anderson, Lori; Kalyuzhniy, Oleksandr; Worthey, Elizabeth; Myler, Peter J; Kim, David; Baker, David; Hol, Wim G J; Merritt, Ethan A
Structure of Lmaj006129AAA, a hypothetical protein from Leishmania major Journal Article
In: Acta crystallographica. Section F, Structural biology and crystallization communications, vol. 62, pp. 175-9, 2006, ISSN: 1744-3091.
@article{575,
title = {Structure of Lmaj006129AAA, a hypothetical protein from Leishmania major},
author = { Tracy Arakaki and Isolde Le Trong and Eric Phizicky and Erin Quartley and George Detitta and Joseph Luft and Angela Lauricella and Lori Anderson and Oleksandr Kalyuzhniy and Elizabeth Worthey and Peter J Myler and David Kim and David Baker and Wim G J Hol and Ethan A Merritt},
url = {https://www.bakerlab.org/wp-content/uploads/2016/08/structureoflmaj006129aaa_Baker2006.pdf},
doi = {10.1107/S1744309106005902},
issn = {1744-3091},
year = {2006},
date = {2006-03-01},
journal = {Acta crystallographica. Section F, Structural biology and crystallization communications},
volume = {62},
pages = {175-9},
abstract = {The gene product of structural genomics target Lmaj006129 from Leishmania major codes for a 164-residue protein of unknown function. When SeMet expression of the full-length gene product failed, several truncation variants were created with the aid of Ginzu, a domain-prediction method. 11 truncations were selected for expression, purification and crystallization based upon secondary-structure elements and disorder. The structure of one of these variants, Lmaj006129AAH, was solved by multiple-wavelength anomalous diffraction (MAD) using ELVES, an automatic protein crystal structure-determination system. This model was then successfully used as a molecular-replacement probe for the parent full-length target, Lmaj006129AAA. The final structure of Lmaj006129AAA was refined to an R value of 0.185 (Rfree = 0.229) at 1.60 A resolution. Structure and sequence comparisons based on Lmaj006129AAA suggest that proteins belonging to Pfam sequence families PF04543 and PF01878 may share a common ligand-binding motif.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Schueler-Furman, Ora; Wang, Chu; Bradley, Phil; Misura, Kira; Baker, David
Progress in modeling of protein structures and interactions Journal Article
In: Science, vol. 310, pp. 638-42, 2005, ISSN: 1095-9203.
@article{94,
title = {Progress in modeling of protein structures and interactions},
author = { Ora Schueler-Furman and Chu Wang and Phil Bradley and Kira Misura and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/schueler-furman05B.pdf},
issn = {1095-9203},
year = {2005},
date = {2005-10-01},
journal = {Science},
volume = {310},
pages = {638-42},
abstract = {The prediction of the structures and interactions of biological macromolecules at the atomic level and the design of new structures and interactions are critical tests of our understanding of the interatomic interactions that underlie molecular biology. Equally important, the capability to accurately predict and design macromolecular structures and interactions would streamline the interpretation of genome sequence information and allow the creation of macromolecules with new and useful functions. This review summarizes recent progress in modeling that suggests that we are entering an era in which high-resolution prediction and design will make increasingly important contributions to biology and medicine.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Bradley, Philip; Misura, Kira M S; Baker, David
Toward high-resolution de novo structure prediction for small proteins Journal Article
In: Science, vol. 309, pp. 1868-71, 2005, ISSN: 1095-9203.
@article{104,
title = {Toward high-resolution de novo structure prediction for small proteins},
author = { Philip Bradley and Kira M S Misura and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/bradley05B.pdf},
issn = {1095-9203},
year = {2005},
date = {2005-09-01},
journal = {Science},
volume = {309},
pages = {1868-71},
abstract = {The prediction of protein structure from amino acid sequence is a grand challenge of computational molecular biology. By using a combination of improved low- and high-resolution conformational sampling methods, improved atomically detailed potential functions that capture the jigsaw puzzle-like packing of protein cores, and high-performance computing, high-resolution structure prediction (<1.5 angstroms) can be achieved for small protein domains (<85 residues). The primary bottleneck to consistent high-resolution prediction appears to be conformational sampling.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Schueler-Furman, Ora; Wang, Chu; Baker, David
In: Proteins, vol. 60, pp. 187-94, 2005, ISSN: 1097-0134.
@article{95,
title = {Progress in protein-protein docking: atomic resolution predictions in the CAPRI experiment using RosettaDock with an improved treatment of side-chain flexibility},
author = { Ora Schueler-Furman and Chu Wang and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/schueler-rurman05A.pdf},
issn = {1097-0134},
year = {2005},
date = {2005-08-01},
journal = {Proteins},
volume = {60},
pages = {187-94},
abstract = {RosettaDock uses real-space Monte Carlo minimization (MCM) on both rigid-body and side-chain degrees of freedom to identify the lowest free energy docked arrangement of 2 protein structures. An improved version of the method that uses gradient-based minimization for off-rotamer side-chain optimization and includes information from unbound structures was used to create predictions for Rounds 4 and 5 of CAPRI. First, large numbers of independent MCM trajectories were carried out and the lowest free energy docked configurations identified. Second, new trajectories were started from these lowest energy structures to thoroughly sample the surrounding conformation space, and the lowest energy configurations were submitted as predictions. For all cases in which there were no significant backbone conformational changes, a small number of very low-energy configurations were identified in the first, global search and subsequently found to be close to the center of the basin of attraction in the free energy landscape in the second, local search. Following the release of the experimental coordinates, it was found that the centers of these free energy minima were remarkably close to the native structures in not only the rigid-body orientation but also the detailed conformations of the side-chains. Out of 8 targets, the lowest energy models had interface root-mean-square deviations (RMSDs) less than 1.1 A from the correct structures for 6 targets, and interface RMSDs less than 0.4 A for 3 targets. The predictions were top submissions to CAPRI for Targets 11, 12, 14, 15, and 19. The close correspondence of the lowest free energy structures found in our searches to the experimental structures suggests that our free energy function is a reasonable representation of the physical chemistry, and that the real space search with full side-chain flexibility to some extent solves the protein-protein docking problem in the absence of significant backbone conformational changes. On the other hand, the approach fails when there are significant backbone conformational changes as the steric complementarity of the 2 proteins cannot be modeled without incorporating backbone flexibility, and this is the major goal of our current work.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Wang, Chu; Schueler-Furman, Ora; Baker, David
Improved side-chain modeling for protein-protein docking Journal Article
In: Protein science, vol. 14, pp. 1328-39, 2005, ISSN: 0961-8368.
@article{93,
title = {Improved side-chain modeling for protein-protein docking},
author = { Chu Wang and Ora Schueler-Furman and David Baker},
issn = {0961-8368},
year = {2005},
date = {2005-05-01},
journal = {Protein science},
volume = {14},
pages = {1328-39},
abstract = {Success in high-resolution protein-protein docking requires accurate modeling of side-chain conformations at the interface. Most current methods either leave side chains fixed in the conformations observed in the unbound protein structures or allow the side chains to sample a set of discrete rotamer conformations. Here we describe a rapid and efficient method for sampling off-rotamer side-chain conformations by torsion space minimization during protein-protein docking starting from discrete rotamer libraries supplemented with side-chain conformations taken from the unbound structures, and show that the new method improves side-chain modeling and increases the energetic discrimination between good and bad models. Analysis of the distribution of side-chain interaction energies within and between the two protein partners shows that the new method leads to more native-like distributions of interaction energies and that the neglect of side-chain entropy produces a small but measurable increase in the number of residues whose interaction energy cannot compensate for the entropic cost of side-chain freezing at the interface. The power of the method is highlighted by a number of predictions of unprecedented accuracy in the recent CAPRI (Critical Assessment of PRedicted Interactions) blind test of protein-protein docking methods.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Misura, Kira M S; Baker, David
Progress and challenges in high-resolution refinement of protein structure models Journal Article
In: Proteins, vol. 59, pp. 15-29, 2005, ISSN: 1097-0134.
@article{98,
title = {Progress and challenges in high-resolution refinement of protein structure models},
author = { Kira M S Misura and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/misura05A.pdf},
issn = {1097-0134},
year = {2005},
date = {2005-04-01},
journal = {Proteins},
volume = {59},
pages = {15-29},
abstract = {Achieving atomic level accuracy in de novo structure prediction presents a formidable challenge even in the context of protein models with correct topologies. High-resolution refinement is a fundamental test of force field accuracy and sampling methodology, and its limited success in both comparative modeling and de novo prediction contexts highlights the limitations of current approaches. We constructed four tests to identify bottlenecks in our current approach and to guide progress in this challenging area. The first three tests showed that idealized native structures are stable under our refinement simulation conditions and that the refinement protocol can significantly decrease the root mean square deviation (RMSD) of perturbed native structures. In the fourth test we applied the refinement protocol to de novo models and showed that accurate models could be identified based on their energies, and in several cases many of the buried side chains adopted native-like conformations. We also showed that the differences in backbone and side-chain conformations between the refined de novo models and the native structures are largely localized to loop regions and regions where the native structure has unusual features such as rare rotamers or atypical hydrogen bonding between beta-strands. The refined de novo models typically have higher energies than refined idealized native structures, indicating that sampling of local backbone conformations and side-chain packing arrangements in a condensed state is a primary obstacle.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Meiler, Jens; Baker, David
The fumarate sensor DcuS: progress in rapid protein fold elucidation by combining protein structure prediction methods with NMR spectroscopy Journal Article
In: Journal of magnetic resonance, vol. 173, pp. 310-6, 2005, ISSN: 1090-7807.
@article{99,
title = {The fumarate sensor DcuS: progress in rapid protein fold elucidation by combining protein structure prediction methods with NMR spectroscopy},
author = { Jens Meiler and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/meiler05A.pdf},
issn = {1090-7807},
year = {2005},
date = {2005-04-01},
journal = {Journal of magnetic resonance},
volume = {173},
pages = {310-6},
abstract = {We illustrate how moderate resolution protein structures can be rapidly obtained by interlinking computational prediction methodologies with un- or partially assigned NMR data. To facilitate the application of our recently described method of ranking and subsequent refining alternative structural models using unassigned NMR data [Proc. Natl. Acad. Sci. USA 100 (2003) 15404] for such "structural genomics"-type experiments it is combined with protein models from several prediction techniques, enhanced to utilize partial assignments, and applied on a protein with an unknown structure and fold. From the original NMR spectra obtained for the 140 residue fumarate sensor DcuS, 1100 1H, 13C, and 15N chemical shift signals, 3000 1H-1H NOESY cross peak intensities, and 209 backbone residual dipolar couplings were extracted and used to rank models produced by de novo structure prediction and comparative modeling methods. The ranking proceeds in two steps: first, an optimal assignment of the NMR peaks to atoms is found for each model independently, and second, the models are ranked based on the consistency between the NMR data and the model assuming these optimal assignments. The low-resolution model selected using this ranking procedure had the correct overall fold and a global backbone RMSD of 6.0 angstrom, and was subsequently refined to 3.7 angstrom RMSD. With the incorporation of a small number of NOE and residual dipolar coupling constraints available very early in the traditional spectral assignment process, a model with an RMSD of 2.8 angstrom could rapidly be built. The ability to generate moderate resolution models within days of NMR data collection should facilitate large scale NMR structure determination efforts.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jiang, Lin; Kuhlman, Brian; Kortemme, Tanja; Baker, David
A "solvated rotamer" approach to modeling water-mediated hydrogen bonds at protein-protein interfaces Journal Article
In: Proteins, vol. 58, pp. 893-904, 2005, ISSN: 1097-0134.
@article{101,
title = {A "solvated rotamer" approach to modeling water-mediated hydrogen bonds at protein-protein interfaces},
author = { Lin Jiang and Brian Kuhlman and Tanja Kortemme and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/jiang05A.pdf},
issn = {1097-0134},
year = {2005},
date = {2005-03-01},
journal = {Proteins},
volume = {58},
pages = {893-904},
abstract = {Water-mediated hydrogen bonds play critical roles at protein-protein and protein-nucleic acid interfaces, and the interactions formed by discrete water molecules cannot be captured using continuum solvent models. We describe a simple model for the energetics of water-mediated hydrogen bonds, and show that, together with knowledge of the positions of buried water molecules observed in X-ray crystal structures, the model improves the prediction of free-energy changes upon mutation at protein-protein interfaces, and the recovery of native amino acid sequences in protein interface design calculations. We then describe a "solvated rotamer" approach to efficiently predict the positions of water molecules, at protein-protein interfaces and in monomeric proteins, that is compatible with widely used rotamer-based side-chain packing and protein design algorithms. Finally, we examine the extent to which the predicted water molecules can be used to improve prediction of amino acid identities and protein-protein interface stability, and discuss avenues for overcoming current limitations of the approach.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Saunders, Christopher T; Baker, David
Recapitulation of protein family divergence using flexible backbone protein design Journal Article
In: Journal of molecular biology, vol. 346, pp. 631-44, 2005, ISSN: 0022-2836.
@article{96,
title = {Recapitulation of protein family divergence using flexible backbone protein design},
author = { Christopher T Saunders and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/saundersa05A.pdf},
issn = {0022-2836},
year = {2005},
date = {2005-02-01},
journal = {Journal of molecular biology},
volume = {346},
pages = {631-44},
abstract = {We use flexible backbone protein design to explore the sequence and structure neighborhoods of naturally occurring proteins. The method samples sequence and structure space in the vicinity of a known sequence and structure by alternately optimizing the sequence for a fixed protein backbone using rotamer based sequence search, and optimizing the backbone for a fixed amino acid sequence using atomic-resolution structure prediction. We find that such a flexible backbone design method better recapitulates protein family sequence variation than sequence optimization on fixed backbones or randomly perturbed backbone ensembles for ten diverse protein structures. For the SH3 domain, the backbone structure variation in the family is also better recapitulated than in randomly perturbed backbones. The potential application of this method as a model of protein family evolution is highlighted by a concerted transition to the amino acid sequence in the structural core of one SH3 domain starting from the backbone coordinates of an homologous structure.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kim, David E; Chivian, Dylan; Malmstr"om, Lars; Baker, David
Automated prediction of domain boundaries in CASP6 targets using Ginzu and RosettaDOM Journal Article
In: Proteins, vol. 61 Suppl 7, pp. 193-200, 2005, ISSN: 1097-0134.
@article{100,
title = {Automated prediction of domain boundaries in CASP6 targets using Ginzu and RosettaDOM},
author = { David E Kim and Dylan Chivian and Lars Malmstr"om and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/kim05A.pdf},
issn = {1097-0134},
year = {2005},
date = {2005-00-01},
journal = {Proteins},
volume = {61 Suppl 7},
pages = {193-200},
abstract = {Domain boundary prediction is an important step in both experimental and computational protein structure characterization. We have developed two fully automated domain parsing methods: the first, Ginzu, which we have described previously, utilizes information from homologous sequences and structures, while the second, RosettaDOM, which has not been described previously, uses only information in the query sequence. Ginzu iteratively assigns domains by homology to structures and sequence families using successively less confident methods. RosettaDOM uses the Rosetta de novo structure prediction method to build three-dimensional models, and then applies Taylortextquoterights structure based domain assignment method to parse the models into domains. Domain boundaries observed repeatedly in the models are predicted to be domain boundaries for the protein. Interestingly, RosettaDOM produced quite good domain predictions for proteins of a size typically considered to be beyond the reach of de novo structure prediction methods. For remote fold recognition targets and new folds, both Ginzu and RosettaDOM produced promising results, and in some cases where one method failed to detect the correct domain boundary, it was correctly identified by the other method. We describe here the successes and failures using both methods, and address the possibility of incorporating both protocols into an improved hybrid method.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
na, Osvaldo Gra; Baker, David; MacCallum, Robert M; Meiler, Jens; Punta, Marco; Rost, Burkhard; Tress, Michael L; Valencia, Alfonso
CASP6 assessment of contact prediction Journal Article
In: Proteins, vol. 61 Suppl 7, pp. 214-24, 2005, ISSN: 1097-0134.
@article{297,
title = {CASP6 assessment of contact prediction},
author = { Osvaldo Gra~na and David Baker and Robert M MacCallum and Jens Meiler and Marco Punta and Burkhard Rost and Michael L Tress and Alfonso Valencia},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/grana05A.pdf},
issn = {1097-0134},
year = {2005},
date = {2005-00-01},
journal = {Proteins},
volume = {61 Suppl 7},
pages = {214-24},
abstract = {Here we present the evaluation results of the Critical Assessment of Protein Structure Prediction (CASP6) contact prediction category. Contact prediction was assessed with standard measures well known in the field and the performance of specialist groups was evaluated alongside groups that submitted models with 3D coordinates. The evaluation was mainly focused on long range contact predictions for the set of new fold targets, although we analyzed predictions for all targets. Three groups with similar levels of accuracy and coverage performed a little better than the others. Comparisons of the predictions of the three best methods with those of CASP5/CAFASP3 suggested some improvement, although there were not enough targets in the comparisons to make this statistically significant.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Bradley, Philip; Malmstr"om, Lars; Qian, Bin; Schonbrun, Jack; Chivian, Dylan; Kim, David E; Meiler, Jens; Misura, Kira M S; Baker, David
Free modeling with Rosetta in CASP6 Journal Article
In: Proteins, vol. 61 Suppl 7, pp. 128-34, 2005, ISSN: 1097-0134.
@article{105,
title = {Free modeling with Rosetta in CASP6},
author = { Philip Bradley and Lars Malmstr"om and Bin Qian and Jack Schonbrun and Dylan Chivian and David E Kim and Jens Meiler and Kira M S Misura and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/bradley05A.pdf},
issn = {1097-0134},
year = {2005},
date = {2005-00-01},
journal = {Proteins},
volume = {61 Suppl 7},
pages = {128-34},
abstract = {We describe Rosetta predictions in the Sixth Community-Wide Experiment on the Critical Assessment of Techniques for Protein Structure Prediction (CASP), focusing on the free modeling category. Methods developed since CASP5 are described, and their application to selected targets is discussed. Highlights include improved performance on larger proteins (100-200 residues) and the prediction of a 70-residue alpha-beta protein to near-atomic resolution.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Cheng, Gong; Qian, Bin; Samudrala, Ram; Baker, David
In: Nucleic acids research, vol. 33, pp. 5861-7, 2005, ISSN: 1362-4962.
@article{103,
title = {Improvement in protein functional site prediction by distinguishing structural and functional constraints on protein family evolution using computational design},
author = { Gong Cheng and Bin Qian and Ram Samudrala and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/cheng05A.pdf},
issn = {1362-4962},
year = {2005},
date = {2005-00-01},
journal = {Nucleic acids research},
volume = {33},
pages = {5861-7},
abstract = {The prediction of functional sites in newly solved protein structures is a challenge for computational structural biology. Most methods for approaching this problem use evolutionary conservation as the primary indicator of the location of functional sites. However, sequence conservation reflects not only evolutionary selection at functional sites to maintain protein function, but also selection throughout the protein to maintain the stability of the folded state. To disentangle sequence conservation due to protein functional constraints from sequence conservation due to protein structural constraints, we use all atom computational protein design methodology to predict sequence profiles expected under solely structural constraints, and to compute the free energy difference between the naturally occurring amino acid and the lowest free energy amino acid at each position. We show that functional sites are more likely than non-functional sites to have computed sequence profiles which differ significantly from the naturally occurring sequence profiles and to have residues with sub-optimal free energies, and that incorporation of these two measures improves sequence based prediction of protein functional sites. The combined sequence and structure based functional site prediction method has been implemented in a publicly available web server.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Chivian, Dylan; Kim, David E; Malmstr"om, Lars; Schonbrun, Jack; Rohl, Carol A; Baker, David
Prediction of CASP6 structures using automated Robetta protocols Journal Article
In: Proteins, vol. 61 Suppl 7, pp. 157-66, 2005, ISSN: 1097-0134.
@article{102,
title = {Prediction of CASP6 structures using automated Robetta protocols},
author = { Dylan Chivian and David E Kim and Lars Malmstr"om and Jack Schonbrun and Carol A Rohl and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/07/chivian05A.pdf},
issn = {1097-0134},
year = {2005},
date = {2005-00-01},
journal = {Proteins},
volume = {61 Suppl 7},
pages = {157-66},
abstract = {The Robetta server and revised automatic protocols were used to predict structures for CASP6 targets. Robetta is a publicly available protein structure prediction server (http://robetta.bakerlab.org/ that uses the Rosetta de novo and homology modeling structure prediction methods. We incorporated some of the lessons learned in the CASP5 experiment into the server prior to participating in CASP6. We additionally tested new ideas that were amenable to full-automation with an eye toward improving the server. We find that the Robetta server shows the greatest promise for the more challenging targets. The most significant finding from CASP5, that automated protocols can be roughly comparable in ability with the better human-intervention predictors, is repeated here in CASP6.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Morozov, Alexandre V; Havranek, James J; Baker, David; Siggia, Eric D
Protein-DNA binding specificity predictions with structural models Journal Article
In: Nucleic acids research, vol. 33, pp. 5781-98, 2005, ISSN: 1362-4962.
@article{97,
title = {Protein-DNA binding specificity predictions with structural models},
author = { Alexandre V Morozov and James J Havranek and David Baker and Eric D Siggia},
issn = {1362-4962},
year = {2005},
date = {2005-00-01},
journal = {Nucleic acids research},
volume = {33},
pages = {5781-98},
abstract = {Protein-DNA interactions play a central role in transcriptional regulation and other biological processes. Investigating the mechanism of binding affinity and specificity in protein-DNA complexes is thus an important goal. Here we develop a simple physical energy function, which uses electrostatics, solvation, hydrogen bonds and atom-packing terms to model direct readout and sequence-specific DNA conformational energy to model indirect readout of DNA sequence by the bound protein. The predictive capability of the model is tested against another model based only on the knowledge of the consensus sequence and the number of contacts between amino acids and DNA bases. Both models are used to carry out predictions of protein-DNA binding affinities which are then compared with experimental measurements. The nearly additive nature of protein-DNA interaction energies in our model allows us to construct position-specific weight matrices by computing base pair probabilities independently for each position in the binding site. Our approach is less data intensive than knowledge-based models of protein-DNA interactions, and is not limited to any specific family of transcription factors. However, native structures of protein-DNA complexes or their close homologs are required as input to the model. Use of homology modeling can significantly increase the extent of our approach, making it a useful tool for studying regulatory pathways in many organisms and cell types.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Havranek, James J; Duarte, Carlos M; Baker, David
A simple physical model for the prediction and design of protein-DNA interactions Journal Article
In: Journal of molecular biology, vol. 344, pp. 59-70, 2004, ISSN: 0022-2836.
@article{168,
title = {A simple physical model for the prediction and design of protein-DNA interactions},
author = { James J Havranek and Carlos M Duarte and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/havranek04A.pdf},
issn = {0022-2836},
year = {2004},
date = {2004-11-01},
journal = {Journal of molecular biology},
volume = {344},
pages = {59-70},
abstract = {Protein-DNA interactions are crucial for many biological processes. Attempts to model these interactions have generally taken the form of amino acid-base recognition codes or purely sequence-based profile methods, which depend on the availability of extensive sequence and structural information for specific structural families, neglect side-chain conformational variability, and lack generality beyond the structural family used to train the model. Here, we take advantage of recent advances in rotamer-based protein design and the large number of structurally characterized protein-DNA complexes to develop and parameterize a simple physical model for protein-DNA interactions. The model shows considerable promise for redesigning amino acids at protein-DNA interfaces, as design calculations recover the amino acid residue identities and conformations at these interfaces with accuracies comparable to sequence recovery in globular proteins. The model shows promise also for predicting DNA-binding specificity for fixed protein sequences: native DNA sequences are selected correctly from pools of competing DNA substrates; however, incorporation of backbone movement will likely be required to improve performance in homology modeling applications. Interestingly, optimization of zinc finger protein amino acid sequences for high-affinity binding to specific DNA sequences results in proteins with little or no predicted specificity, suggesting that naturally occurring DNA-binding proteins are optimized for specificity rather than affinity. When combined with algorithms that optimize specificity directly, the simple computational model developed here should be useful for the engineering of proteins with novel DNA-binding specificities.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Misura, Kira M S; Morozov, Alexandre V; Baker, David
Analysis of anisotropic side-chain packing in proteins and application to high-resolution structure prediction Journal Article
In: Journal of molecular biology, vol. 342, pp. 651-64, 2004, ISSN: 0022-2836.
@article{173,
title = {Analysis of anisotropic side-chain packing in proteins and application to high-resolution structure prediction},
author = { Kira M S Misura and Alexandre V Morozov and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/misura04A.pdf},
issn = {0022-2836},
year = {2004},
date = {2004-09-01},
journal = {Journal of molecular biology},
volume = {342},
pages = {651-64},
abstract = {pi-pi, Cation-pi, and hydrophobic packing interactions contribute specificity to protein folding and stability to the native state. As a step towards developing improved models of these interactions in proteins, we compare the side-chain packing arrangements in native proteins to those found in compact decoys produced by the Rosetta de novo structure prediction method. We find enrichments in the native distributions for T-shaped and parallel offset arrangements of aromatic residue pairs, in parallel stacked arrangements of cation-aromatic pairs, in parallel stacked pairs involving proline residues, and in parallel offset arrangements for aliphatic residue pairs. We then investigate the extent to which the distinctive features of native packing can be explained using Lennard-Jones and electrostatics models. Finally, we derive orientation-dependent pi-pi, cation-pi and hydrophobic interaction potentials based on the differences between the native and compact decoy distributions and investigate their efficacy for high-resolution protein structure prediction. Surprisingly, the orientation-dependent potential derived from the packing arrangements of aliphatic side-chain pairs distinguishes the native structure from compact decoys better than the orientation-dependent potentials describing pi-pi and cation-pi interactions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kim, David E; Chivian, Dylan; Baker, David
Protein structure prediction and analysis using the Robetta server Journal Article
In: Nucleic acids research, vol. 32, pp. W526-31, 2004, ISSN: 1362-4962.
@article{169,
title = {Protein structure prediction and analysis using the Robetta server},
author = { David E Kim and Dylan Chivian and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/kim04A.pdf},
issn = {1362-4962},
year = {2004},
date = {2004-07-01},
journal = {Nucleic acids research},
volume = {32},
pages = {W526-31},
abstract = {The Robetta server (http://robetta.bakerlab.org) provides automated tools for protein structure prediction and analysis. For structure prediction, sequences submitted to the server are parsed into putative domains and structural models are generated using either comparative modeling or de novo structure prediction methods. If a confident match to a protein of known structure is found using BLAST, PSI-BLAST, FFAS03 or 3D-Jury, it is used as a template for comparative modeling. If no match is found, structure predictions are made using the de novo Rosetta fragment insertion method. Experimental nuclear magnetic resonance (NMR) constraints data can also be submitted with a query sequence for RosettaNMR de novo structure determination. Other current capabilities include the prediction of the effects of mutations on protein-protein interactions using computational interface alanine scanning. The Rosetta protein design and protein-protein docking methodologies will soon be available through the server as well.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Rohl, Carol A; Strauss, Charlie E M; Chivian, Dylan; Baker, David
Modeling structurally variable regions in homologous proteins with rosetta Journal Article
In: Proteins, vol. 55, pp. 656-77, 2004, ISSN: 1097-0134.
@article{177,
title = {Modeling structurally variable regions in homologous proteins with rosetta},
author = { Carol A Rohl and Charlie E M Strauss and Dylan Chivian and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/rohl04A.pdf},
issn = {1097-0134},
year = {2004},
date = {2004-05-01},
journal = {Proteins},
volume = {55},
pages = {656-77},
abstract = {A major limitation of current comparative modeling methods is the accuracy with which regions that are structurally divergent from homologues of known structure can be modeled. Because structural differences between homologous proteins are responsible for variations in protein function and specificity, the ability to model these differences has important functional consequences. Although existing methods can provide reasonably accurate models of short loop regions, modeling longer structurally divergent regions is an unsolved problem. Here we describe a method based on the de novo structure prediction algorithm, Rosetta, for predicting conformations of structurally divergent regions in comparative models. Initial conformations for short segments are selected from the protein structure database, whereas longer segments are built up by using three- and nine-residue fragments drawn from the database and combined by using the Rosetta algorithm. A gap closure term in the potential in combination with modified Newtontextquoterights method for gradient descent minimization is used to ensure continuity of the peptide backbone. Conformations of variable regions are refined in the context of a fixed template structure using Monte Carlo minimization together with rapid repacking of side-chains to iteratively optimize backbone torsion angles and side-chain rotamers. For short loops, mean accuracies of 0.69, 1.45, and 3.62 A are obtained for 4, 8, and 12 residue loops, respectively. In addition, the method can provide reasonable models of conformations of longer protein segments: predicted conformations of 3A root-mean-square deviation or better were obtained for 5 of 10 examples of segments ranging from 13 to 34 residues. In combination with a sequence alignment algorithm, this method generates complete, ungapped models of protein structures, including regions both similar to and divergent from a homologous structure. This combined method was used to make predictions for 28 protein domains in the Critical Assessment of Protein Structure 4 (CASP 4) and 59 domains in CASP 5, where the method ranked highly among comparative modeling and fold recognition methods. Model accuracy in these blind predictions is dominated by alignment quality, but in the context of accurate alignments, long protein segments can be accurately modeled. Notably, the method correctly predicted the local structure of a 39-residue insertion into a TIM barrel in CASP 5 target T0186.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kortemme, Tanja; Kim, David E; Baker, David
Computational alanine scanning of protein-protein interfaces Journal Article
In: Sciencetextquoterights STKE, vol. 2004, pp. pl2, 2004, ISSN: 1525-8882.
@article{300,
title = {Computational alanine scanning of protein-protein interfaces},
author = { Tanja Kortemme and David E Kim and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/kortemme04B-1.pdf},
issn = {1525-8882},
year = {2004},
date = {2004-02-01},
journal = {Sciencetextquoterights STKE},
volume = {2004},
pages = {pl2},
abstract = {Protein-protein interactions are key components of all signal transduction processes, so methods to alter these interactions promise to become important tools in dissecting function of connectivities in these networks. We have developed a fast computational approach for the prediction of energetically important amino acid residues in protein-protein interfaces (available at http://robetta.bakerlab.org/alaninescan), which we, following Peter Kollman, have termed "computational alanine scanning." The input consists of a three-dimensional structure of a protein-protein complex; output is a list of "hot spots," or amino acid side chains that are predicted to significantly destabilize the interface when mutated to alanine, analogous to the results of experimental alanine-scanning mutagenesis. 79% of hot spots and 68% of neutral residues were correctly predicted in a test of 233 mutations in 19 protein-protein complexes. A single interface can be analyzed in minutes. The computational methodology has been validated by the successful design of protein interfaces with new specificity and activity, and has yielded new insights into the mechanisms of receptor specificity and promiscuity in biological systems.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Preprints are available on bioRxiv.
2023
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2022
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2021
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2020
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2019
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2018
FROM THE LAB
Sorry, no publications matched your criteria.
COLLABORATOR LED
Sorry, no publications matched your criteria.
2017–1998
ALL PAPERS
1998
D Shortle, K T Simons, David Baker
Clustering of low-energy conformations near the native structures of small proteins Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 95, pp. 11158-62, 1998, ISSN: 0027-8424.
@article{213,
title = {Clustering of low-energy conformations near the native structures of small proteins},
author = { D Shortle and K T Simons and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/shortle98A.pdf},
issn = {0027-8424},
year = {1998},
date = {1998-09-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {95},
pages = {11158-62},
abstract = {Recent experimental studies of the denatured state and theoretical analyses of the folding landscape suggest that there are a large multiplicity of low-energy, partially folded conformations near the native state. In this report, we describe a strategy for predicting protein structure based on the working hypothesis that there are a greater number of low-energy conformations surrounding the correct fold than there are surrounding low-energy incorrect folds. To test this idea, 12 ensembles of 500 to 1,000 low-energy structures for 10 small proteins were analyzed by calculating the rms deviation of the Calpha coordinates between each conformation and every other conformation in the ensemble. In all 12 cases, the conformation with the greatest number of conformations within 4-A rms deviation was closer to the native structure than were the majority of conformations in the ensemble, and in most cases it was among the closest 1 to 5%. These results suggest that, to fold efficiently and retain robustness to changes in amino acid sequence, proteins may have evolved a native structure situated within a broad basin of low-energy conformations, a feature which could facilitate the prediction of protein structure at low resolution.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
C Bystroff, D Baker
Prediction of local structure in proteins using a library of sequence-structure motifs Journal Article
In: Journal of molecular biology, vol. 281, pp. 565-77, 1998, ISSN: 0022-2836.
@article{311,
title = {Prediction of local structure in proteins using a library of sequence-structure motifs},
author = { C Bystroff and D Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/bystroff98A.pdf},
issn = {0022-2836},
year = {1998},
date = {1998-08-01},
journal = {Journal of molecular biology},
volume = {281},
pages = {565-77},
abstract = {We describe a new method for local protein structure prediction based on a library of short sequence pattern that correlate strongly with protein three-dimensional structural elements. The library was generated using an automated method for finding correlations between protein sequence and local structure, and contains most previously described local sequence-structure correlations as well as new relationships, including a diverging type-II beta-turn, a frayed helix, and a proline-terminated helix. The query sequence is scanned for segments 7 to 19 residues in length that strongly match one of the 82 patterns in the library. Matching segments are assigned the three-dimensional structure characteristic of the corresponding sequence pattern, and backbone torsion angles for the entire query sequence are then predicted by piecing together mutually compatible segment predictions. In predictions of local structure in a test set of 55 proteins, about 50% of all residues, and 76% of residues covered by high-confidence predictions, were found in eight-residue segments within 1.4 A of their true structures. The predictions are complementary to traditional secondary structure predictions because they are considerably more specific in turn regions, and may contribute to ab initio tertiary structure prediction and fold recognition.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Q Yi, C Bystroff, P Rajagopal, R E Klevit, David Baker
Prediction and structural characterization of an independently folding substructure in the src SH3 domain Journal Article
In: Journal of molecular biology, vol. 283, pp. 293-300, 1998, ISSN: 0022-2836.
@article{212,
title = {Prediction and structural characterization of an independently folding substructure in the src SH3 domain},
author = { Q Yi and C Bystroff and P Rajagopal and R E Klevit and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/yi98A.pdf},
issn = {0022-2836},
year = {1998},
date = {1998-00-01},
journal = {Journal of molecular biology},
volume = {283},
pages = {293-300},
abstract = {Previous studies of the conformations of peptides spanning the length of the alpha-spectrin SH3 domain suggested that SH3 domains lack independently folding substructures. Using a local structure prediction method based on the I-sites library of sequence-structure motifs, we identified a seven residue peptide in the src SH3 domain predicted to adopt a native-like structure, a type II beta-turn bridging unpaired beta-strands, that was not contained intact in any of the SH3 domain peptides studied earlier. NMR characterization confirmed that the isolated peptide, FKKGERL, adopts a structure similar to that adopted in the native protein: the NOE and 3JNHalpha coupling constant patterns were indicative of a type II beta-turn, and NOEs between the Phe and the Leu side-chains suggest that they are juxtaposed as in the prediction and the native structure. These results support the idea that high-confidence I-sites predictions identify protein segments that are likely to form native-like structures early in folding.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
1997
C Bystroff, David Baker
Blind predictions of local protein structure in CASP2 targets using the I-sites library Journal Article
In: Proteins, vol. Suppl 1, pp. 167-71, 1997, ISSN: 0887-3585.
@article{30,
title = {Blind predictions of local protein structure in CASP2 targets using the I-sites library},
author = { C Bystroff and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/bystroff97A.pdf},
issn = {0887-3585},
year = {1997},
date = {1997-00-01},
journal = {Proteins},
volume = {Suppl 1},
pages = {167-71},
abstract = {Blind predictions of the local structure of nine CASP2 targets were made using the I-sites library of short sequence--structure motifs, revealing strengths and weaknesses in this new knowledge-based method. Many turns between secondary structural elements were accurately predicted. Estimates of the confidence of prediction correlated well with the accuracy over the whole set. Bias toward structures used to develop the library was minimal, probably because of the extensive use of cross-validation. However, helix positions were better predicted by the PHD program. The method is likely to be sensitive to the quality of the sequence alignment. A general measure for evaluating local structure predictions is suggested.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
1996
K F Han, David Baker
Global properties of the mapping between local amino acid sequence and local structure in proteins Journal Article
In: Proceedings of the National Academy of Sciences of the United States of America, vol. 93, pp. 5814-8, 1996, ISSN: 0027-8424.
@article{215,
title = {Global properties of the mapping between local amino acid sequence and local structure in proteins},
author = { K F Han and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/han96A.pdf},
issn = {0027-8424},
year = {1996},
date = {1996-06-01},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {93},
pages = {5814-8},
abstract = {Local protein structure prediction efforts have consistently failed to exceed approximately 70% accuracy. We characterize the degeneracy of the mapping from local sequence to local structure responsible for this failure by investigating the extent to which similar sequence segments found in different proteins adopt similar three-dimensional structures. Sequence segments 3-15 residues in length from 154 different protein families are partitioned into neighborhoods containing segments with similar sequences using cluster analysis. The consistency of the sequence-to-structure mapping is assessed by comparing the local structures adopted by sequence segments in the same neighborhood in proteins of known structure. In the 154 families, 45% and 28% of the positions occur in neighborhoods in which one and two local structures predominate, respectively. The sequence patterns that characterize the neighborhoods in the first class probably include virtually all of the short sequence motifs in proteins that consistently occur in a particular local structure. These patterns, many of which occur in transitions between secondary structural elements, are an interesting combination of previously studied and novel motifs. The identification of sequence patterns that consistently occur in one or a small number of local structures in proteins should contribute to the prediction of protein structure from sequence.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
1995
K F Han, David Baker
Recurring local sequence motifs in proteins Journal Article
In: Journal of molecular biology, vol. 251, pp. 176-87, 1995, ISSN: 0022-2836.
@article{22,
title = {Recurring local sequence motifs in proteins},
author = { K F Han and David Baker},
url = {https://www.bakerlab.org/wp-content/uploads/2016/06/han95A.pdf},
issn = {0022-2836},
year = {1995},
date = {1995-08-01},
journal = {Journal of molecular biology},
volume = {251},
pages = {176-87},
abstract = {We describe a completely automated approach to identifying local sequence motifs that transcend protein family boundaries. Cluster analysis is used to identify recurring patterns of variation at single positions and in short segments of contiguous positions in multiple sequence alignments for a non-redundant set of protein families. Parallel experiments on simulated data sets constructed with the overall residue frequencies of proteins but not the inter-residue correlations show that naturally occurring protein sequences are significantly more clustered than the corresponding random sequences for window lengths ranging from one to 13 contiguous positions. The patterns of variation at single positions are not in general surprising: chemically similar amino acids tend to be grouped together. More interesting patterns emerge as the window length increases. The patterns of variation for longer window lengths are in part recognizable patterns of hydrophobic and hydrophilic residues, and in part less obvious combinations. A particularly interesting class of patterns features highly conserved glycine residues. The patterns provide a means to abstract the information contained in multiple sequence alignments and may be useful for comparison of distantly related sequences or sequence families and for protein structure prediction.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}