Main Page | Namespace List | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | Examples

pdb_distance.cc

00001 /* Copyright 2004
00002    Stanford University
00003 
00004    This file is part of the DSR PDB Library.
00005 
00006    The DSR PDB Library is free software; you can redistribute it and/or modify
00007    it under the terms of the GNU Lesser General Public License as published by
00008    the Free Software Foundation; either version 2.1 of the License, or (at your
00009    option) any later version.
00010 
00011    The DSR PDB Library is distributed in the hope that it will be useful, but
00012    WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
00013    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
00014    License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public License
00017    along with the DSR PDB Library; see the file COPYING.LIB.  If not, write to
00018    the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
00019    MA 02111-1307, USA. */
00020 
00021 #include <dsrpdb/PDB.h>
00022 
00023 #include <vector>
00024 #include <iterator>
00025 #include <fstream>
00026 #include <dsrpdb/align.h>
00027 #include <dsrpdb/distance.h>
00028 
00029 
00030 #ifdef PDB_USE_BOOST_PROGRAM_OPTIONS
00031 #include <boost/program_options.hpp>
00032 #endif
00033 
00043 int main(int argc, char *argv[]){
00044   std::string base_file, input_file, output_file;
00045   bool print_help=false;
00046   bool crms=false, drms=false;
00047   int base_model=0;
00048   bool warn=false;
00049   bool all_atoms=false;
00050   bool no_align=false;
00051   bool max_cdist=false;
00052   bool max_ddist=false;
00053 #ifdef PDB_USE_BOOST_PROGRAM_OPTIONS
00054   {
00055     boost::program_options::options_description o("Allowed options"), po, ao;
00056     o.add_options()
00057       ("crms,c", boost::program_options::bool_switch(&crms), 
00058        "Output the cRMS between the two pdbs (after alignment).")
00059       ("drms,d", boost::program_options::bool_switch(&drms), 
00060        "Output the dRMS between the two pdbs.")     
00061       ("all-atoms,a", boost::program_options::bool_switch(&all_atoms),
00062        "Output the distances between all atoms, not just the C_alphas.")     
00063       ("no-align,n", boost::program_options::bool_switch(&no_align), 
00064        "Do not rigidly align the proteins before computing cRMS.")     
00065       ("verbose,v", boost::program_options::bool_switch(&warn), 
00066        "Warn about errors parsing pdb file.")
00067       ("max-c-dist,C", boost::program_options::bool_switch(&max_cdist), 
00068        "Output the max distance between any two corresponding atoms.")
00069       ("max-d-dist,D", boost::program_options::bool_switch(&max_ddist), 
00070        "Output the max distance difference between pairwise distances.")
00071       ("help", boost::program_options::bool_switch(&print_help), "Produce help message");
00072 
00073     po.add_options()("input-pdb-0", 
00074                      boost::program_options::value< std::string>(&base_file), 
00075                      "First input file.")
00076       ("input-pdb-1", boost::program_options::value< std::string>(&input_file), 
00077        "Second input file");
00078     ao.add(o).add(po);
00079     
00080     boost::program_options::positional_options_description p;
00081     p.add("input-pdb-0", 1);
00082     p.add("input-pdb-1", 1);
00083 
00084     boost::program_options::variables_map vm;
00085     boost::program_options::store(boost::program_options::command_line_parser(argc, argv).
00086                                   options(ao).positional(p).run(), vm);
00087     boost::program_options::notify(vm);
00088 
00089 
00090     if (base_file.empty() || input_file.empty() || print_help) {
00091       std::cout << "This program computes the distance between two pdb files..\n";
00092       std::cout << "usage: " << argv[0] << " input-pdb-0 input-pdb-1\n\n";
00093       std::cout << o << "\n";
00094       return EXIT_FAILURE;
00095     }
00096   }
00097 
00098 #else
00099   if (argc != 3){
00100     std::cerr << "The program is built without boost/program_options.hpp.\n";
00101     std::cerr << "useage: " << argv[0] << " input0.pdb input0.pdb" << std::endl;
00102     return EXIT_FAILURE;
00103   }
00104   base_file = argv[1];
00105   input_file = argv[2];
00106   drms=true;
00107   crms=true;
00108 #endif
00109   
00110   std::ifstream in(input_file.c_str());
00111 
00112   if (!in){
00113     std::cerr<< "Error opening input file: " << input_file << std::endl;
00114     return EXIT_FAILURE;
00115   }
00116   std::ifstream bin(base_file.c_str());
00117 
00118   if (!bin){
00119     std::cerr<< "Error opening input file: " << base_file << std::endl;
00120     return EXIT_FAILURE;
00121   }
00122 
00123   dsrpdb::PDB input(in, warn);
00124   dsrpdb::PDB base(bin, warn);
00125   dsrpdb::Protein base_protein;
00126   for (unsigned int i=0; i< base.number_of_models(); ++i){
00127     const dsrpdb::Model &m= base.model(i);
00128     if (base_model ==0 || m.index()==base_model) {
00129       for (unsigned int j=0; j< m.number_of_chains(); ++j){
00130         base_protein= m.chain(j);
00131       }
00132     }
00133   }
00134   
00135   if ( crms && !no_align) {
00136     for (unsigned int i=0; i< input.number_of_models(); ++i){
00137       dsrpdb::Model &m= input.model(i);
00138       dsrpdb::Protein &p= m.chain(0);
00139       dsrpdb::align_second_protein_to_first(base_protein, p);
00140     }
00141   }
00142 
00143   if (crms) {
00144     std::cout << "cRMS:\n";
00145     for (unsigned int i=0; i< input.number_of_models(); ++i){
00146       dsrpdb::Model &m= input.model(i);
00147       dsrpdb::Protein &p= m.chain(0);
00148       double d;
00149       if (all_atoms) {
00150         d =dsrpdb::no_align_cRMS(base_protein, p);
00151       } else {
00152         d =dsrpdb::no_align_ca_cRMS(base_protein, p);
00153       }
00154       if (d < .00001) d=0;
00155       std::cout << "Model " << i << " is " << d << std::endl;
00156     }
00157   }
00158 
00159   if (max_cdist) {
00160     std::vector<dsrpdb::Point> base_coords;
00161     if (all_atoms) {
00162       dsrpdb::coordinates(base_protein.atoms_begin(), base_protein.atoms_end(), std::back_inserter(base_coords));
00163     } else {
00164       dsrpdb::ca_coordinates(base_protein.atoms_begin(), base_protein.atoms_end(), std::back_inserter(base_coords));
00165     }
00166     std::cout << "Max dist:\n";
00167     for (unsigned int i=0; i< input.number_of_models(); ++i){
00168       dsrpdb::Model &m= input.model(i);
00169       dsrpdb::Protein &p= m.chain(0);
00170       std::vector<dsrpdb::Point> coords;
00171       if (all_atoms) {
00172         dsrpdb::coordinates(p.atoms_begin(), p.atoms_end(), std::back_inserter(coords));
00173       } else {
00174         dsrpdb::ca_coordinates(p.atoms_begin(), p.atoms_end(), std::back_inserter(coords));
00175       }
00176       if (coords.size() != base_coords.size()){
00177         std::cerr<< "Proteins being compared must have the same number of atoms.\n";
00178         return EXIT_FAILURE;
00179       }
00180       double max= 0;
00181       dsrpdb::Squared_distance sd;
00182       for (unsigned int j=0; j< coords.size(); ++j){
00183         max=std::max(std::sqrt(sd(coords[j], base_coords[j])), max);
00184       }
00185       std::cout << "Model " << i << " is " << max << std::endl;
00186     }
00187   }
00188 
00189   if (drms) {
00190     std::cout << "\ndRMS:\n";
00191     for (unsigned int i=0; i< input.number_of_models(); ++i){
00192       dsrpdb::Model &m= input.model(i);
00193       dsrpdb::Protein &p= m.chain(0);
00194       double d;
00195       if (all_atoms) {
00196         d=dsrpdb::dRMS(base_protein, p);
00197       } else {
00198         d=dsrpdb::ca_dRMS(base_protein, p);
00199       }
00200       if (d < .00001) d=0;
00201       std::cout << "Model " << i << " is " << d << std::endl;
00202     }
00203   }
00204 
00205   if (max_ddist) {
00206     dsrpdb::Matrix base_matrix;
00207     if (all_atoms) {
00208       base_matrix=dsrpdb::distance_matrix(base_protein);
00209     } else {
00210       base_matrix=dsrpdb::ca_distance_matrix(base_protein);
00211     }
00212     std::cout << "Max dist:\n";
00213     for (unsigned int i=0; i< input.number_of_models(); ++i){
00214       dsrpdb::Model &m= input.model(i);
00215       dsrpdb::Protein &p= m.chain(0);
00216       dsrpdb::Matrix input_matrix;
00217       if (all_atoms) {
00218         input_matrix= dsrpdb::distance_matrix(p);
00219       } else {
00220         input_matrix = dsrpdb::ca_distance_matrix(p);
00221       }
00222       if (base_matrix.dim1() != input_matrix.dim1()){
00223         std::cerr << "Proteins being compared must have the same number of atoms.\n";
00224         std::cerr << "Base protein had " << base_matrix.dim1() 
00225                   << " atoms and second protein had " << input_matrix.dim1() 
00226                   << " atoms." << std::endl;
00227         return EXIT_FAILURE;
00228       }
00229       double max= 0;
00230       double maxr=0;
00231       for (int j=0; j< base_matrix.dim1(); ++j){
00232         for (int k=0; k< j; ++k){
00233           max=std::max(std::abs(base_matrix[j][k]-input_matrix[j][k]), max);
00234           maxr=std::max(std::abs(base_matrix[j][k]-input_matrix[j][k])
00235                         /std::max(base_matrix[j][k],input_matrix[j][k]), maxr);
00236         }
00237       }
00238       std::cout << "Model " << i << " is " << max << " with ratio " << maxr << std::endl;
00239     }
00240   }
00241 
00242   return EXIT_SUCCESS;
00243 
00244 }
00245