mal_readonly.h

Go to the documentation of this file.
00001 
00002 #ifndef MAL_READONLY_H_EA
00003 #define MAL_READONLY_H_EA
00004 
00005 /** @author Erik Arner, Karolinska Institute, (c) Erik Arner 2003.
00006     @version MAl version 0.1
00007 */
00008 
00009 /** \brief MAl stands for Multiple Alignment, it is a container class
00010     for sequences in such an alignment.
00011     
00012     The idea is that this class should have "two" interfaces, one
00013     alignment/matrix interface with global indexes, columns etc and
00014     one sequence collection interface similar to the old cosmid
00015     interface.
00016 
00017     First version is a front end to the Berkeley DB. This could be
00018     made more flexible in future versions, allowing any storage of
00019     data using pImpl idiom.
00020 
00021     Problem: BDB has 1-based indexing (recno), while every algo
00022     written in the TRAP system uses 0-based indexing. I'll stick to
00023     0-based indexing in this system, to avoid future hassle.
00024 
00025     Future plans: make this system plugable along the same lines as
00026     the data classes in trapper, as the first implementation of the
00027     MAl class will be based on these classes. Also remove the coupling
00028     between this class and TrapperDoc...
00029 */
00030 
00031 #include <set>
00032 #include <vector>
00033 #include "trapperdoc.h"//maybe fwd-decl instead??
00034 #include "db_cxx.h"
00035 
00036 
00037 //Public, global typedefs
00038 typedef char base_t;
00039 // typedef short int qual_t;
00040 typedef Q_UINT32 qual_t;
00041 
00042 class MAl_Readonly 
00043 {
00044   
00045 public:
00046 
00047   //'tors...
00048   MAl_Readonly(size_t bufsize, std::set<db_recno_t>& recnolist, TrapperDoc* pdoc);
00049   virtual ~MAl_Readonly();  
00050 
00051 
00052   void print_info(size_t ID) {
00053     cerr << "mal_readonly: print_info: ID = " << ID << endl;
00054     cerr << "get_seq_begin_global( ID ) = " << get_seq_begin_global( ID ) << endl;
00055     cerr << "get_seq_end_global( ID ) = " << get_seq_end_global( ID ) << endl;
00056     cerr << "get_len( ID ) = " << get_len( ID ) << endl;
00057   }
00058 
00059   //Common methods
00060 
00061 
00062   size_t get_num_seq();
00063   std::string get_name( size_t ID );
00064   std::string get_header( size_t ID );  
00065   std::string get_seq( size_t ID );
00066   std::string get_strand( size_t ID );
00067   size_t get_len( size_t ID);
00068   void select_read( size_t ID, bool status );
00069 
00070   //Separate interfaces
00071 
00072   size_t get_seq_row( size_t ID );
00073 
00074   size_t get_seq_begin( size_t ID );
00075   size_t get_seq_begin_global( size_t ID );
00076 
00077 //   int get_seq_begin( size_t ID );//FIX THIS
00078 //   int get_seq_begin_global( size_t ID );//FIX THIS
00079 
00080   size_t get_seq_end( size_t ID );
00081   size_t get_seq_end_global( size_t ID );
00082 
00083   size_t get_beginGood( size_t ID );
00084   size_t get_beginGood_global( size_t ID );
00085 
00086   size_t get_endGood( size_t ID );
00087   size_t get_endGood_global( size_t ID );
00088 
00089   base_t get_base( size_t ID, size_t index );
00090   base_t get_base_global( size_t ID, size_t index );
00091 
00092   qual_t get_qual( size_t ID, size_t index );
00093   qual_t get_qual_global( size_t ID, size_t index );
00094 
00095   bool is_DNP(size_t ID, size_t index);
00096   bool is_DNP_global(size_t ID, size_t index);
00097 
00098   int get_DNP_ID(size_t ID, size_t index);
00099   int get_DNP_ID_global(size_t ID, size_t index);
00100 
00101   int get_DNP_type(size_t ID, size_t index);
00102   int get_DNP_type_global(size_t ID, size_t index);
00103   
00104   double get_max_expression(size_t ID);
00105   size_t get_num_expression_points(size_t ID);
00106   double get_expression(size_t ID, size_t expression_index);
00107 
00108   template <typename Types>
00109   friend class MAlWrapper;
00110 
00111 protected:
00112   //Protected methods
00113   size_t get_buffID(size_t ID);
00114   size_t next_buffID();
00115   
00116   virtual void flush_buffer(size_t buffID );
00117   void read_from_db(size_t buffID, size_t ID);
00118   void read_seq_from_db( db_recno_t recno, size_t buffID );
00119   void read_feat_from_db( db_recno_t recno, size_t buffID, const string& data_type_name);
00120   
00121   base_t comp_base(const char& base);
00122 
00123   //Protected structs
00124 
00125   struct dnp_struct
00126   {
00127     dnp_struct(bool is = false, db_recno_t rec = 0, int id = -1, int t = -1 ) :
00128       isDNP(is), recno(rec), ID(id), type(t) {}
00129     
00130     bool isDNP;
00131     db_recno_t recno;
00132     int ID;
00133     int type;
00134   };
00135 
00136   struct tag_struct
00137   {
00138     tag_struct(size_t sta = 0, size_t sto = 0, std::string ty = "", std::string at = "", db_recno_t rec = 0 ) : 
00139       start(sta), stop(sto), type(ty), attr(at), recno(rec) {}
00140 
00141     size_t start;
00142     size_t stop;
00143     std::string type;
00144     std::string attr;
00145     db_recno_t recno;
00146 
00147   };
00148 
00149   //Members
00150   size_t buff_size;
00151   size_t num_seq;
00152   TrapperDoc* doc;
00153   std::set<db_recno_t>& selectedReads;
00154   
00155   //Maybe use deques instead??
00156   //These guys are of the buffer size
00157   std::vector<std::vector<base_t> > seqs;
00158   std::vector<std::vector<qual_t> > quals;
00159   std::vector<std::vector<dnp_struct> > DNPs;
00160   std::vector<std::multimap<size_t, tag_struct> > tags;
00161   std::vector<std::vector<double> > time_course;
00162 
00163   std::vector<std::string> names;
00164   std::vector<std::string> headers;
00165   std::vector<std::string> mates;
00166   std::vector<std::string> strands;
00167   
00168   std::vector<size_t> seq_rows;
00169   std::vector<size_t> seq_begin_global;
00170   std::vector<size_t> seq_end_global;//Unnecessary?? We have the sizes of seqs...
00171   std::vector<size_t> seq_beginGood;//NB, not global!
00172   std::vector<size_t> seq_endGood;//NB, not global!
00173   std::vector<size_t> mate_lengths;
00174 
00175   //Buffer stuff
00176   vector<db_recno_t> ID_to_dbID;//Should be of actual data set size
00177   vector<size_t> ID_to_buffID;//Ditto
00178   vector<db_recno_t> buffID_to_dbID;//buffer size
00179   vector<db_recno_t> buffID_to_ID;//buffer size
00180   vector<bool> put_in_db;//Watch out for vector<bool>...
00181   
00182 
00183 };
00184 
00185 
00186 #endif //MAL_READONLY_H_EA
00187  
00188 
00189 
00190 //Should this stuff be private??
00191 /*
00192   void change_base( size_t ID, size_t baseIndex, char newBase );
00193   void set_seq_begin( size_t index, size_t pos );
00194   void set_beginGood( const size_t index, const size_t pos );
00195   void set_endGood( const size_t index, const size_t pos );
00196   void insert_base(size_t ID, size_t before_index, char base);  
00197   void remove_base(size_t ID, size_t index);  
00198   void put_qual( size_t ID, size_t qualValIndex, const size_t qualityValue );
00199   void delete_seq(size_t ID);
00200   bool is_deleted( size_t ID );
00201 */
00202 
00203 
00204 //OBSOLETE???
00205 /*
00206   std::string get_headerQ( size_t ID );
00207   size_t isPossibleRepeat(size_t ID);//????????????????????
00208   void mark_possibleRepeat(size_t ID);//??????????????????
00209   size_t append_seq( char seqName[], char seqHeader[] );
00210   size_t get_first_revComp_index();
00211   void set_max_coverage( size_t ID, size_t index, size_t cov );
00212   size_t get_max_coverage( size_t ID, size_t index );
00213   void set_pos_non_chimeric( size_t ID, size_t index);
00214   size_t is_chimeric( size_t ID, size_t index );
00215   void set_beginAnalyzable( const size_t index, const size_t pos );
00216   size_t get_beginAnalyzable( const size_t index );
00217   void set_endAnalyzable( const size_t index, const size_t pos );
00218   size_t get_endAnalyzable( const size_t index );  
00219   size_t get_number_seqs_in_file(const std::string fileName);
00220   size_t get_number_seqs_in_DATA_file(const std::string fileName);
00221   void set_qualBegin( const size_t index, const size_t pos );
00222   size_t get_qualBegin( const size_t index );
00223   void set_qualEnd( const size_t index, const size_t pos );
00224   size_t get_qualEnd( const size_t index );
00225   size_t is_quality( size_t index );
00226   void mark_is_quality(size_t id);
00227   size_t seq_size( size_t ID );
00228   size_t different_strands(size_t ID1, size_t ID2);
00229   size_t qual_size( size_t ID );
00230   char get_comp_base(char base);
00231   size_t get_ID_in_revComp_counterpart(size_t ID);
00232   size_t get_index_in_revComp_counterpart(size_t ID, size_t index);
00233   char get_DNP(size_t ID, size_t index);
00234   void set_DNP(size_t ID, size_t index, char base, size_t unique);
00235   void set_templ_DNP(size_t ID, size_t index);
00236   void set_templ_DNP_pos(size_t ID, size_t index, size_t pos);
00237   size_t is_templ_DNP(size_t ID, size_t index);
00238   size_t get_templ_DNP_pos(size_t ID, size_t index);
00239   void set_DNP_ncorr(size_t ID, size_t index, size_t ncorr);
00240   size_t get_DNP_ncorr(size_t ID, size_t index);
00241   void set_DNP_p(size_t ID, size_t index, double p);
00242   double get_DNP_p(size_t ID, size_t index);
00243   void set_DNP_against_insert(size_t ID, size_t index, char base, size_t unique);
00244   void set_DNP_against_deletion(size_t ID, size_t index, size_t unique, size_t unset_DNP);
00245   char get_DNP_against_insert(size_t ID, size_t index);
00246   char get_DNP_against_deletion(size_t ID, size_t index);
00247   size_t get_DNP_type(size_t ID, size_t index);
00248   size_t get_DNP_type_against_insert(size_t ID, size_t index);
00249   size_t get_DNP_type_against_deletion(size_t ID, size_t index);
00250   size_t DNP_mismatch(size_t ID, size_t index, char base);
00251   size_t DNP_mismatch_against_insert(size_t ID, size_t index, char base);
00252   size_t DNP_mismatch_against_deletion(size_t ID, size_t index);
00253 */

Generated on Fri Jul 17 20:19:29 2009 for ngsview by  doxygen 1.5.1