Main Page | Namespace List | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | Examples

pdb_split.cc

00001 /* Copyright 2004
00002    Stanford University
00003 
00004    This file is part of the DSR PDB Library.
00005 
00006    The DSR PDB Library is free software; you can redistribute it and/or modify
00007    it under the terms of the GNU Lesser General Public License as published by
00008    the Free Software Foundation; either version 2.1 of the License, or (at your
00009    option) any later version.
00010 
00011    The DSR PDB Library is distributed in the hope that it will be useful, but
00012    WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
00013    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
00014    License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public License
00017    along with the DSR PDB Library; see the file COPYING.LIB.  If not, write to
00018    the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
00019    MA 02111-1307, USA. */
00020 
00021 #include "dsrpdb/PDB.h"
00022 #include <cstdlib>
00023 #include <iostream>
00024 #include <fstream>
00025 
00026 #ifdef PDB_USE_BOOST_PROGRAM_OPTIONS
00027 #include <boost/program_options.hpp>
00028 #endif
00029 
00038 int main(int argc, char *argv[]){
00039   bool split_chains=false;
00040   std::string input_file, output_template;
00041   int split_domain=-1;
00042   bool print_help=false;
00043   bool verbose=false;
00044   char split_chain='\0';
00045 
00046 #ifdef PDB_USE_BOOST_PROGRAM_OPTIONS
00047   boost::program_options::options_description o("Allowed options"), po, ao;
00048   o.add_options()
00049     ("help", boost::program_options::bool_switch(&print_help),
00050      "produce help message")
00051     ("verbose,v", boost::program_options::bool_switch(&verbose),
00052      "print out verbose messages about reading and writing pdb files")
00053     ("split-chains,c", boost::program_options::bool_switch(&split_chains),
00054      "Split all chains into separate files.")
00055     ("select-chain,C", boost::program_options::value<char>(&split_chain),
00056       "Select this chain only.")
00057     ("domain-split,s", boost::program_options::value<int>(&split_domain),
00058      "Split a chain into domains at this residue.");
00059   po.add_options()
00060     ("input-pdb", boost::program_options::value< std::string>(&input_file),
00061      "input file")
00062     ("output-pdb-template", boost::program_options::value< std::string>(&output_template),
00063      "A sprintf style string that will be used to generate the names for the output files.");
00064 
00065   ao.add(o).add(po);
00066 
00067   boost::program_options::positional_options_description p;
00068   p.add("input-pdb", 1);
00069   p.add("output-pdb-template", 2);
00070 
00071   boost::program_options::variables_map vm;
00072   boost::program_options::store(boost::program_options::command_line_parser(argc, argv).
00073                                 options(ao).positional(p).run(), vm);
00074   boost::program_options::notify(vm);
00075 
00076 
00077  
00078   //boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm);
00079   //boost::program_options::notify(vm);    
00080 
00081   if (input_file.empty()
00082       || output_template.empty()
00083       || (split_chains && split_chain != '\0') 
00084       || (split_chains && split_domain != -1) 
00085       || (split_chain != '\0' && split_domain != -1) 
00086       || print_help) {
00087     std::cout << "This program splits a pdb file with multiple models or multiple domains into multiple files each with one model .\n";
00088     std::cout << "useage: " << argv[0] 
00089               << " [-c] input-pdb template%d[%c].pdb\n" << std::endl;
00090     std::cout << "The second argument is an sprintf style string that will be used to generate the names for the output files.\n\n";
00091     std::cout << o << "\n";
00092     return EXIT_FAILURE;
00093   }
00094 
00095 #else
00096   if (argc != 3){
00097     std::cerr << "The program is built without boost/program_options.hpp.\n";
00098     std::cerr << "useage: " << argv[0] << " [-c] file.pdb template%d[%c].pdb" << std::endl;
00099     return EXIT_FAILURE;
00100   }
00101   input_file = argv[1];
00102   output_template = argv[2];
00103 #endif
00104 
00105   if (split_chain != '\0'){ 
00106     std::cout << "Splitting on chain " << split_chain << std::endl;
00107   } else if (split_domain != -1) {
00108     std::cout << "Splitting on residue " << split_domain << std::endl;
00109   } else {
00110     std::cout << "Splitting into chains " << std::endl;
00111   }
00112 
00113 
00114   // std::cout << input_file << " " << output_template << " " << split_domain << " " << split_chains << std::endl;
00115 
00116   std::ifstream in(input_file.c_str());
00117   if (!in) {
00118     std::cerr << "Error opening input file " << input_file << std::endl;
00119     return EXIT_FAILURE;
00120   }
00121 
00122   //= new char[strlen(argv[2]+1000)];
00123   dsrpdb::PDB pdb(in, verbose);
00124 
00125   std::cout << "Input PDB has " << pdb.number_of_models() << " models." << std::endl;
00126 
00127   if (split_domain != -1 && pdb.number_of_models()!= 1){
00128     std::cerr << "Splitting a domain can only work if there is only one model and chain.\n";
00129     return EXIT_FAILURE;
00130   }
00131 
00132   for (unsigned int i=0; i< pdb.number_of_models(); ++i){
00133     const dsrpdb::Model &m= pdb.model(i);
00134     if (split_chain != '\0' || split_chains || split_domain!= -1) {
00135       std::cout << "Model " << i << " has " << m.number_of_chains() << " chains."<< std::endl;
00136       if (split_domain != -1 && m.number_of_chains()!= 1){
00137         std::cerr << "Splitting a domain can only work if there is only one model and chain.\n";
00138         return EXIT_FAILURE;
00139       }
00140       for (unsigned int j=0; j< m.number_of_chains(); ++j){
00141         const dsrpdb::Protein &p= m.chain(j);
00142         
00143         if (split_chain != '\0') {
00144           if (p.chain() == split_chain || p.chain() == std::toupper(split_chain) ) {
00145             std::cout << "Writing chain " << p.chain() << std::endl;
00146             assert(split_domain ==-1);
00147            
00148             std::ofstream out(output_template.c_str());
00149             dsrpdb::PDB npdb;
00150             dsrpdb::Model nm;
00151             nm.new_chain(p);
00152             npdb.new_model(nm);
00153             npdb.set_header(pdb.header_begin(), pdb.header_end());
00154             npdb.write(out);
00155           } else {
00156             std::cout << "Skipping chain " << p.chain() << std::endl;
00157           }
00158         } else if (split_chains) {
00159           std::cout << "Writing chain " << p.chain() << std::endl;
00160           assert(split_domain ==-1);
00161           char buf[100000];
00162           if (pdb.number_of_models()==1) {
00163             sprintf(buf, output_template.c_str(),p.chain());
00164           } else {
00165             sprintf(buf, output_template.c_str(), m.index(), p.chain());
00166           }
00167           std::ofstream out(buf);
00168           dsrpdb::PDB npdb;
00169           dsrpdb::Model nm;
00170           nm.new_chain(p);
00171           npdb.new_model(nm);
00172           npdb.set_header(pdb.header_begin(), pdb.header_end());
00173           npdb.write(out);
00174         } else {
00175           dsrpdb::Protein ps[2]={dsrpdb::Protein(), dsrpdb::Protein()};
00176           for (dsrpdb::Protein::Const_residues_iterator rit = p.residues_begin(); 
00177                rit != p.residues_end(); ++rit){
00178             //rit->write('c', std::cout);
00179             if (static_cast<unsigned int>(rit->index())
00180                 < static_cast<unsigned int>(split_domain)){
00181               ps[0].new_residue(*rit);
00182             } else {
00183               ps[1].new_residue(*rit);
00184             }
00185           }
00186           
00187           assert(ps[0].number_of_residues() + ps[1].number_of_residues()
00188                  == p.number_of_residues());
00189           
00190           for (unsigned int j=0; j< 2; ++j){
00191             std::cout << "Writing pdb with " << ps[j].number_of_residues() << " residues.\n";
00192             char buf[100000];
00193             sprintf(buf, output_template.c_str(), j);
00194             std::ofstream out(buf);
00195               dsrpdb::PDB npdb;
00196               dsrpdb::Model nm(0);
00197               nm.new_chain(ps[j]);
00198               npdb.new_model(nm);
00199               npdb.set_header(pdb.header_begin(), pdb.header_end());
00200               npdb.write(out);
00201             }
00202           }
00203       }
00204     } else {
00205       char buf[100000];
00206       sprintf(buf, argv[2], m.index());
00207       std::ofstream out(buf);
00208       dsrpdb::PDB npdb;
00209       npdb.new_model(m);
00210       npdb.set_header(pdb.header_begin(), pdb.header_end());
00211       npdb.write(out);
00212     }
00213   }
00214   //delete[] buf;
00215   return EXIT_SUCCESS;
00216 }