generaldata.h

Go to the documentation of this file.
00001 /*******************************************************************************
00002  *                                                                             *
00003  *   Copyright (C) 2003  Erik Sjolund,  (<html>Erik Sj&ouml;lund</html>)       *
00004  *                       Center for Genomics and Bioinformatics,               *
00005  *                       Karolinska Institutet,                                *
00006  *                       Stockholm,                                            *
00007  *                       Sweden                                                *
00008  *                                                                             *
00009  *  Author: Erik Sjolund                                                       *
00010  *  Email: erik.sjolund@adivo.se                                               *
00011  *                                                                             *
00012  *******************************************************************************
00013  */
00014 #ifndef GENERALDATA_H
00015 #define GENERALDATA_H
00016 
00017 // berkeley db
00018 #include <db_cxx.h>
00019 
00020 // qt classes
00021 #include <qdatastream.h>
00022 #include <qdom.h>
00023 #include <qxml.h>
00024 
00025 // stl
00026 #include <iostream>
00027 #include <string>
00028 #include <cassert>
00029 #include <iosfwd>
00030 
00031 #include "trdb.h"
00032 #include "trapperdoc.h"
00033 #include "generalmaker.h"
00034 #include "generaldata.h"
00035 
00036 
00037 class TrapperDoc;
00038 
00039 /**
00040   * \brief This class is the base class for all data items that are to be stored in the
00041   * Berkeley Db databases. It has methods for serializing its data to
00042   * and from a bytestream and has methods for writing the same data 
00043   * to and from a xml dom tree. It also let you specify how this data should be 
00044   * indexed in the Berkeley Db databases for very fast retrieval.
00045   *
00046   * The function of this class is to transport data in and out of the Berkeley db backend. 
00047   * It is important to realize that changing values of data members of GeneralData and its subclasses
00048   * will have no effect to other parts of your application before your GeneralData object has been
00049   * used in the Database interface. 
00050   * 
00051   * When you are implementing subclasses of GeneralData you will see the benifits of object orientation.
00052   * You just have to serialize/unserialize the extra data members of the subclass.
00053   * The parent class will have to serialize/unserialize its data members so you have to remember 
00054   * letting readStream() and writeStream() of your subclass call the readStream() resp. writeStream() of its parent class. 
00055   * Just be careful of the order 
00056   * of things when doing serializing and unserializing.
00057   * 
00058   * Right now there is a limit of 2^32 ( ca 4 billions ) of records that can be stored in the
00059   * berkeley db backend for each GeneralData subclass. This limit is easy to recognize from the 
00060   * fact that the index type, db_recno_t, is a u_int32_t.
00061   * For more information about berkeley db limits see: http://www.sleepycat.com/docs/ref/am_misc/dbsizes.html
00062   *
00063   *
00064   * Idea for the future to  maybe implement:
00065   * add hooks to GeneralData, like
00066   * virtual void hookBeforeCreate(Txnid *) and
00067   * virtual void hookBeforeRemove(Txnid *).  
00068   * Database::Creator would then call these hooks everytime a 
00069   * create or remove is to be done.
00070   * We would now have the possibility to add substring search functionality for the
00071   * dna sequence in the DnaStrData feature.
00072   * We implement a new Class, SubStrData : public GeneralData that stores e.g. sequences 
00073   * of length 12. 
00074   * The DnaStrData::hookBeforeCreate(Txnid *) creates each possible 12-length-substring as records
00075   * in the SubStrData.
00076   * The SubStrData::getIndexMap() should return two secondary indices: one with sorting of the DnaStrData recno,
00077   * and one with sorting in lexicographical order of the 12-length-substring
00078   */
00079 
00080 class GeneralData
00081 {
00082 public:
00083   GeneralData();
00084   virtual ~GeneralData(){}
00085   /**Prints some info for debugging, this should be removed in release version!   */
00086   virtual void print_debug_info() { std::cerr<<"No debug info for class "<<uniqueName()<<endl;}
00087   
00088   /** Returns the primary key index value of this GeneralData object. The primary key index value is  
00089    * unique among all stored objects of this class. But the primary key index values are
00090    * not unique between different subclasses of GeneralData. The primary key indices for GeneralData and its
00091    * subclasses are all implemented as primary databases of type Recno.
00092    * \sa http://www.sleepycat.com/docs/ref/am_conf/intro.html
00093    */ 
00094   db_recno_t getRecno( );
00095   
00096   /** Set the primary key index value of this GeneralData object. */ 
00097   void setRecno( db_recno_t recno ) { m_recno = recno; }
00098   
00099   /** Loads the state of this object from QXmlAttributes.This method is used for importing. 
00100    */
00101   virtual void readAttributes( const QXmlAttributes& attr )= 0;
00102   /** Writes the state of this object to a stream as an XML entry. This method is used for exporting
00103    */
00104   virtual void writeXml( std::ostream& stream )= 0;
00105   /** Loads the state of this object from a dom tree. Or in other words reads
00106    * in the values of the data members of this class from a dom tree underneath
00107    * the QDomElement elem. This method is used for importing. 
00108    */
00109   virtual void readDom( QDomElement & elem );
00110   
00111   /** Saves the state of this object to a dom tree. Or in other words writes
00112    * out the internal data held in this class to a dom tree underneath
00113    * the QDomElement elem. This method is used for exporting. 
00114    */
00115   virtual void writeDom( QDomElement & elem );
00116   
00117   /** Loads the state of this object from a byte stream. Or in other words reads
00118    * and unserializes a byte stream to set the values of this class object.
00119    * This method is used for loading a GeneralData object from Berkeley Db. 
00120    */
00121   
00122   virtual void readStream( QDataStream & stream ) = 0;
00123   
00124   /** Saves the state of this object to a byte stream. Or in other words serializes
00125    * and writes the internal data held in this class object to a byte stream.
00126    * This method is used for storing a GeneralData object into Berkeley Db. 
00127    */
00128   virtual void writeStream( QDataStream & stream ) = 0;
00129   
00130   /** Returns a TrDb::IndexMap that specifies which secondary indexes should be 
00131    * generated for stored data objects of this class in the Berkeley Db backend. 
00132    * Each record in the TrDb::IndexMap generates
00133    * a secondary index built as a btree ( a DB_BTREE in Berkeley Db wording ).
00134    * The secondary index is sorted which makes it possible to do queries like
00135    * "Give me the object that has the smallest value, bigger or equal than x".
00136    *
00137    * And about speed performance, a quote from Berkeley Db documentation says: 
00138    * "Searches take O(log base_b N) time, where base_b is the average number of 
00139    * keys per page, and N is the total number of keys stored"
00140    */
00141   virtual TrDb::IndexMap getIndexMap() = 0;
00142   
00143   /** Specifies a name that uniquely should identify this class. It will be used
00144    * for lookups in the GeneralMaker class.
00145    */  
00146   virtual std::string uniqueName() = 0;
00147 
00148   void copy(GeneralData* other);
00149   
00150 protected:
00151   db_recno_t m_recno;
00152 };
00153 
00154 /** \brief namespace for all interactions with the berkeley db 
00155  *
00156  */
00157 
00158 namespace Database
00159 {
00160   void setFromDbt( const Dbt * dbtData, GeneralData * general);
00161   
00162   /** \brief to create records in the berkeley db.
00163    * @param T the derived GeneralData class that the pointer data() points to. By having this template 
00164    * argument we avoid the need of dynamic_cast by the users of this class.
00165    * 
00166    */
00167   
00168   template <class T>
00169   class Creator
00170   {
00171   public:
00172     /** \brief constructor
00173      * @param doc doc with the berkeley dbs opened
00174      * @param dataType the lookup string for the derived GeneralData class that are to be stored.
00175      */
00176     Creator( TrapperDoc *doc, std::string dataType );
00177     ~Creator();
00178     /** \brief create a new record in the berkeley db
00179      *
00180      * @param useRecno if true save at the index place of given from the GeneralData::getRecno() of the data().
00181      * If false a new recno value is automatically chosen by berkeley db. 
00182      * @param txnid a transaction id to encompass this operation in. If NULL
00183      * this operation will not be transaction-protected.
00184      
00185     */
00186     db_recno_t create( bool useRecno, DbTxn *txnid = NULL );
00187     /** \brief Removes a record from the DB 
00188      */
00189     void destroy( DbTxn *txnid = NULL );
00190     /** \brief returns a pointer to the data 
00191      */
00192     T * data() { return m_data; }
00193   protected:
00194     T * m_data;
00195     Db * m_db;
00196   };
00197   
00198   template <class T>
00199   Database::Creator<T>::Creator( TrapperDoc * doc, std::string dataType )
00200   {
00201     m_data = dynamic_cast< T * >(GeneralMaker::newData( dataType ));
00202     Q_CHECK_PTR( m_data );
00203     std::string datatype = m_data->uniqueName();
00204     m_db = doc->findTrDb( datatype )->primaryDb();
00205 //     cerr<<"In Creator(), doc: "<<doc<<endl;
00206   }
00207   
00208   template <class T>
00209   Database::Creator<T>::~Creator()
00210   {
00211     Q_CHECK_PTR( m_data );
00212     delete m_data;
00213   }
00214   
00215   template <class T>
00216   db_recno_t Database::Creator<T>::create( bool useRecno, DbTxn *txnid )
00217   {
00218 //     std::cerr<<"In "<<std::endl;
00219 //     std::cerr<<"txnid: "<<txnid<<std::endl;
00220     QByteArray ar;
00221     QDataStream stream( ar, IO_WriteOnly );
00222 //     std::cerr<<"Check1 "<<std::endl;
00223     m_data->writeStream( stream );
00224 //     std::cerr<<"Check2 "<<std::endl;
00225     
00226     Dbt data( ar.data(), ar.size() );
00227 //     std::cerr<<"Check3 "<<std::endl;
00228     Dbt key;
00229 
00230     //     db_recno_t recno;
00231     db_recno_t the_recno(0);//EA
00232     u_int32_t flags = 0;
00233     if ( useRecno )
00234       {
00235 //         std::cerr<<"Check4:1 "<<std::endl;
00236         the_recno = m_data->getRecno();
00237         key.set_data( &the_recno );
00238         key.set_size( sizeof( the_recno ) );
00239       }
00240     else
00241       {
00242 //         std::cerr<<"Check4:2 "<<std::endl;
00243         flags |= DB_APPEND;
00244         assert( flags == DB_APPEND );
00245       }
00246 //     std::cerr<<"Check5 "<<std::endl;
00247 //     std::cerr<<"Db: "<<m_db<<std::endl;
00248 //     m_data->print_debug_info();
00249 //     std::cerr<<"Back from debug "<<endl;
00250     
00251     int ret;
00252     if ((ret = m_db->put(txnid, &key, &data, flags )) != 0)
00253       {
00254 //         std::cerr<<"Check5:1 "<<std::endl;
00255         m_db->err(ret, "error in create");
00256 //         std::cerr<<"Check5:2 "<<std::endl;
00257         exit( 1 );
00258       }
00259 //     std::cerr<<"Check6 "<<std::endl;
00260     db_recno_t recnoRet = * static_cast< db_recno_t * >( key.get_data() );
00261 //     std::cerr<<"Out "<<std::endl;
00262     return recnoRet;
00263   }
00264   template <class T>
00265   void Database::Creator<T>::destroy( DbTxn *txnid )
00266   {
00267     Dbt key;
00268 
00269     db_recno_t the_recno(0);
00270     u_int32_t flags = 0;
00271     the_recno = m_data->getRecno();
00272     key.set_data( &the_recno );
00273     key.set_size( sizeof( the_recno ) );
00274     
00275     int ret;
00276     if ((ret = m_db->del(txnid, &key, flags )) != 0)
00277       {
00278         m_db->err(ret, "error in Creator::destroy");
00279         exit( 1 );
00280       }
00281   }
00282   /** \brief to search records and iterate over a secondary index
00283    * @param T the derived GeneralData class that the pointer data() points to. By having this template 
00284    * argument we avoid the need of dynamic_cast by the users of this class.
00285    * 
00286    */
00287   template <class T>
00288   class SecondaryIterator
00289   {
00290   public:
00291     /** \brief constructor
00292      *
00293      * @param secondaryIndexStr the secondary index lookup string
00294      * @param doc doc with the berkeley dbs opened
00295      * @param generalDataType the lookup string for the derived GeneralData class that are to be stored.
00296      * @param txnid a transaction id to encompass this operation in. If NULL
00297      * this operation will not be transaction-protected.
00298      */
00299     SecondaryIterator( string secondaryIndexStr, TrapperDoc * doc, string generalDataType, DbTxn *txnid = NULL );
00300     ~SecondaryIterator();
00301     std::string name() { return m_name; }
00302     int set();
00303     int setRange();
00304     int nextdup();
00305     int next();
00306     int prev();
00307     int first();
00308     int last();
00309     int delCurrentInclDups();
00310     int pget( Dbt & key, u_int32_t flags );
00311     T * key() { return m_GeneralData_key; }
00312     T * answer() { return m_GeneralData_answer; }
00313     void closeCursor();
00314   protected:
00315     void fillSecondaryDataDbt( Dbt & secondaryKey, const Dbt & primaryData );
00316     T * m_GeneralData_key;
00317     T * m_GeneralData_answer;
00318     Dbc * m_cursor;
00319     TrDb::Index index;
00320     std::string m_name;
00321     DbTxn* txn;
00322   };
00323   
00324   template <class T>
00325   SecondaryIterator<T>::SecondaryIterator( string secondaryIndexStr, TrapperDoc * doc, string generalDataType, DbTxn *txnid )
00326   {
00327     assert( doc!= 0 );
00328     
00329     m_name = generalDataType;
00330     m_GeneralData_key = dynamic_cast< T * >(GeneralMaker::newData( generalDataType ));
00331     m_GeneralData_answer = dynamic_cast< T * >(GeneralMaker::newData( generalDataType ));
00332     index = doc->findTrDb( generalDataType )->secondaryIndex( secondaryIndexStr );
00333     Q_CHECK_PTR(m_GeneralData_key);
00334     Q_CHECK_PTR(m_GeneralData_answer);
00335 
00336     index.db->cursor( txnid , &m_cursor, 0 );
00337 
00338     txn = txnid;
00339   }
00340   
00341   template <class T>
00342   SecondaryIterator<T>::~SecondaryIterator( )
00343   {
00344     if ( m_cursor ) {
00345       m_cursor->close();
00346     }
00347    if ( m_GeneralData_key ) {
00348      delete m_GeneralData_key;
00349      m_GeneralData_key = NULL;
00350    }
00351     if ( m_GeneralData_answer ) {
00352       delete m_GeneralData_answer;
00353       m_GeneralData_answer = NULL;
00354     }
00355   }
00356   template <class T>
00357   void SecondaryIterator<T>::closeCursor()
00358   {
00359     assert( m_cursor != 0 );
00360     m_cursor->close();
00361     m_cursor = 0;
00362   }
00363   
00364   
00365   template <class T>
00366   int SecondaryIterator<T>::set()
00367   {
00368       
00369     Dbt secondaryKey;
00370     QByteArray ar;
00371     QDataStream stream( ar, IO_WriteOnly );
00372     m_GeneralData_key->writeStream( stream );
00373     Dbt primaryData( ar.data(), ar.size() );
00374     fillSecondaryDataDbt( secondaryKey, primaryData );
00375     return pget( secondaryKey, DB_SET );
00376   }
00377 
00378   template <class T>
00379   int SecondaryIterator<T>::delCurrentInclDups()
00380   {
00381     Dbt secondaryKey;
00382     QByteArray ar;
00383     QDataStream stream( ar, IO_WriteOnly );
00384     m_GeneralData_key->writeStream( stream );
00385     Dbt primaryData( ar.data(), ar.size() );
00386     fillSecondaryDataDbt( secondaryKey, primaryData );
00387     
00388     int ret;
00389     if ( (ret = index.db->del(txn, &secondaryKey, 0)) != 0 ){
00390       if ( ret != DB_NOTFOUND )
00391         {
00392           index.db->err(ret, "del() call in SecondaryIterator::delCurrentInclDups()");
00393         }
00394       
00395     }
00396     return ret;
00397   }
00398   
00399   
00400   template <class T>
00401   int SecondaryIterator<T>::setRange()
00402   {
00403     Dbt secondaryKey;
00404     QByteArray ar;
00405     QDataStream stream( ar, IO_WriteOnly );
00406     m_GeneralData_key->writeStream( stream );
00407     Dbt primaryData( ar.data(), ar.size() );
00408     fillSecondaryDataDbt( secondaryKey, primaryData );
00409     return pget( secondaryKey, DB_SET_RANGE );
00410   }
00411   
00412   template <class T>
00413   void SecondaryIterator<T>::fillSecondaryDataDbt( Dbt & secondaryKey, const Dbt & primaryData )
00414   {
00415     Dbt primaryKey;
00416     index.associate_func( index.db , &primaryKey, &primaryData, &secondaryKey);
00417     return;
00418   }
00419   
00420   template <class T>
00421   int SecondaryIterator<T>::pget( Dbt & key, u_int32_t flags )
00422   {
00423     if ( !m_cursor )
00424       index.db->cursor( txn , &m_cursor, 0 );
00425 
00426 
00427     Dbt primary_key;
00428     Dbt primary_data;
00429     int ret;
00430     if ((ret = m_cursor->pget(&key, &primary_key, &primary_data, flags )) != 0) {
00431       if ( ret != DB_NOTFOUND ) {
00432         index.db->err(ret, "pget call in SecondaryIterator::pget");
00433       }
00434       //  throw DbException(ret);
00435     }
00436     else {
00437       Database::setFromDbt( &primary_data, m_GeneralData_answer );
00438       db_recno_t recno = * static_cast<db_recno_t *> ( primary_key.get_data() );
00439       m_GeneralData_answer->setRecno( recno );
00440     }
00441     return ret;
00442   }
00443   
00444   template <class T>
00445   int SecondaryIterator<T>::nextdup()
00446   {
00447     Dbt secondaryKey;
00448     return pget( secondaryKey, DB_NEXT_DUP );
00449   }
00450   
00451   template <class T>
00452   int SecondaryIterator<T>::next()
00453   {
00454     Dbt secondaryKey;
00455     return pget( secondaryKey, DB_NEXT );
00456   }
00457 
00458   template <class T>
00459   int SecondaryIterator<T>::prev()
00460   {
00461     Dbt secondaryKey;
00462     return pget( secondaryKey, DB_PREV );
00463   }
00464 
00465   template <class T>
00466   int SecondaryIterator<T>::first()
00467   {
00468     Dbt secondaryKey;
00469     return pget( secondaryKey, DB_FIRST );
00470   }
00471 
00472   template <class T>
00473   int SecondaryIterator<T>::last()
00474   {
00475     Dbt secondaryKey;
00476     return pget( secondaryKey, DB_LAST );
00477   }
00478 
00479   
00480   
00481   template <class T>
00482   class PrimaryIterator
00483   {
00484   public:
00485     PrimaryIterator( TrapperDoc * doc, string generalDataType, DbTxn *txnid = NULL );
00486     ~PrimaryIterator();
00487     int first();
00488     int next();
00489     int get( u_int32_t flags, Dbt & primary_key );
00490     T * key() { return m_GeneralData_key; }
00491     T * answer() { return m_GeneralData_answer; }
00492     int delCurrent();
00493     int setFromRecno( db_recno_t recno );
00494   protected:
00495     /**Not implemented... What's this for anyway??
00496      */
00497     void fillSecondaryDataDbt( Dbt & secondaryKey, const Dbt & primaryData );
00498     T * m_GeneralData_key;
00499     T * m_GeneralData_answer;
00500     Dbc * m_cursor;
00501     Db * m_db;
00502     std::string name;
00503   };
00504 
00505   
00506   template <class T>
00507   PrimaryIterator<T>::PrimaryIterator( TrapperDoc * doc, string generalDataType, DbTxn *txnid )
00508   {
00509     m_GeneralData_key = dynamic_cast< T * >(GeneralMaker::newData( generalDataType ));
00510     m_GeneralData_answer = dynamic_cast< T * >(GeneralMaker::newData( generalDataType ));
00511     m_db = doc->findTrDb( generalDataType )->primaryDb();
00512     Q_CHECK_PTR(m_GeneralData_key);
00513     Q_CHECK_PTR(m_GeneralData_answer);
00514     m_db->cursor( txnid , &m_cursor, 0 );
00515   }
00516   
00517   template <class T>
00518   PrimaryIterator<T>::~PrimaryIterator( )
00519   {
00520     m_cursor->close();
00521     if ( m_GeneralData_key )
00522       {
00523         delete m_GeneralData_key;
00524         m_GeneralData_key = NULL;
00525       }
00526     if ( m_GeneralData_answer )
00527       {
00528         delete m_GeneralData_answer;
00529         m_GeneralData_answer = NULL;
00530       }
00531   }
00532   
00533   template <class T>
00534   int PrimaryIterator<T>::delCurrent() 
00535   {
00536     int ret;
00537     if ((ret = m_cursor->del(0)) != 0)
00538       {
00539         m_db->err(ret, "get call in PrimaryIterator::get");
00540       }
00541     return ret;    
00542   }
00543   
00544   template <class T>
00545   int PrimaryIterator<T>::first()
00546   {
00547     Dbt key;
00548     return get( DB_FIRST, key );
00549   }
00550   
00551   template <class T>
00552   int PrimaryIterator<T>::get( u_int32_t flags, Dbt & primary_key )
00553   {
00554     Dbt primary_data;
00555     int ret;
00556     if ((ret = m_cursor->get
00557          (&primary_key, &primary_data, flags )) != 0)
00558       {
00559         if ( ret != DB_NOTFOUND )
00560           {
00561             m_db->err(ret, "get call in PrimaryIterator::get");
00562           }
00563         //  throw DbException(ret);
00564       }
00565     else
00566       {
00567         setFromDbt( &primary_data, m_GeneralData_answer );
00568         db_recno_t recno = * static_cast<db_recno_t *> ( primary_key.get_data() );
00569         m_GeneralData_answer->setRecno( recno );
00570       }
00571     return ret;
00572   }
00573   
00574   template <class T>
00575   int PrimaryIterator<T>::setFromRecno( db_recno_t recno )
00576   {
00577     Dbt key;
00578     key.set_size( sizeof( db_recno_t ));
00579     key.set_data( &recno );
00580     return get( DB_SET, key);
00581   }
00582   
00583   template <class T>
00584   int PrimaryIterator<T>::next()
00585   {
00586     Dbt key;
00587     return get( DB_NEXT, key );
00588   }
00589   
00590 }//END Database namespace
00591 
00592 #endif

Generated on Fri Jul 17 20:19:29 2009 for ngsview by  doxygen 1.5.1