MacsimParser Class Reference

#include <macsimparser.h>

Inheritance diagram for MacsimParser:

Inheritance graph
[legend]
Collaboration diagram for MacsimParser:

Collaboration graph
[legend]
List of all members.

Public Member Functions

 MacsimParser (DbEnv *dbenv, QString projectDir)
bool startElement (const QString &namespaceURI, const QString &localName, const QString &qName, const QXmlAttributes &attributes)
bool endElement (const QString &namespaceURI, const QString &localName, const QString &qName)
bool characters (const QString &ch)

Private Member Functions

QStringList contigNamesInProjectDir ()

Private Attributes

DbEnv * env
QString projDir
TrapperDoccurrent_doc
db_recno_t current_read_recno
bool format_ok
QMap< QString, QMap< QString,
QString > > 
att_map
QMap< QString, QStringatt_trans
QString curr_data
int curr_row
int curr_al_num
std::vector< std::vector<
QString > > 
tags

Detailed Description

Definition at line 14 of file macsimparser.h.


Constructor & Destructor Documentation

MacsimParser::MacsimParser ( DbEnv *  dbenv,
QString  projectDir 
)

Definition at line 18 of file macsimparser.cpp.

References att_trans, curr_al_num, and curr_row.

00018                                                            : 
00019   env(dbenv), projDir(projectDir), current_doc(0), format_ok(false)
00020 {
00021   curr_row = 1;
00022   curr_al_num = 0;
00023   att_trans["sense"] = "strand";
00024   att_trans["seq-name"] = "name";
00025   att_trans["seq-data"] = "trappervector";
00026 
00027 //   cerr<<"MacsimParser: NB, due to long names of genomic seq in old files, they are currently being shortened"<<endl;
00028 }


Member Function Documentation

bool MacsimParser::startElement ( const QString namespaceURI,
const QString localName,
const QString qName,
const QXmlAttributes attributes 
)

Definition at line 31 of file macsimparser.cpp.

References format_ok.

00035 {
00036 
00037   if ( qName == "macsim" ) {
00038     format_ok = true;
00039     return true;
00040     
00041   }
00042   else if ( format_ok ) {
00043     
00044     return true;
00045   }
00046   return false;
00047   
00048 }

bool MacsimParser::endElement ( const QString namespaceURI,
const QString localName,
const QString qName 
)

Definition at line 50 of file macsimparser.cpp.

References QString::append(), QXmlAttributes::append(), att_map, att_trans, QMap::clear(), contigNamesInProjectDir(), Database::Creator< T >::create(), curr_al_num, curr_data, curr_row, current_doc, Database::Creator< T >::data(), env, QString::find(), QString::findRev(), GeneralMaker::listRegistered(), QString::mid(), TrapperDoc::openDocument(), projDir, TagData::readAttributes(), QualityData::readAttributes(), ChromatData::readAttributes(), DnaStrData::readAttributes(), ReadData::readAttributes(), QString::replace(), FeatureData::setReadRecno(), ReadData::setRow(), tags, and QMessageBox::warning().

00052 {
00053   if ( qName == "alignment" ) {
00054     curr_al_num++;
00055     if ( curr_al_num % 100 == 0 ) {
00056       cerr<<curr_al_num<<" alignments parsed"<<endl;
00057     }
00058 
00059     curr_row = 1;
00060     //Delete old doc, if any
00061     delete current_doc;
00062     return true;
00063   }
00064   else if ( qName == "aln-name") {
00065     //Contig name
00066     
00067     //Create contig data
00068 
00069     QString contigName = curr_data;
00070 
00071 
00072     //Some bookkeeping to check if contig is already present
00073     QStringList contigNamesAlreadyPresent = contigNamesInProjectDir();
00074     if ( contigNamesAlreadyPresent.contains( contigName ) ) {
00075       QString message;
00076       message = QString("The contig name \"%1\" was specified in the import file, but that contig is already present in the project dir. Skipping this one").arg(contigName);
00077       QMessageBox::warning(0,"",message);
00078       return false;
00079     }
00080     //Create directory for contig
00081     QString fName = projDir + "/" + contigName;
00082                 
00083 
00084     //Create directories and databases for this contig
00085 
00086     current_doc = new TrapperDoc(env);
00087     current_doc->openDocument(fName);
00088 
00089     return true;
00090 
00091   }
00092   else if ( qName == "sequence") {
00093     //One ReadData parsed
00094 
00095     QXmlAttributes read_att;
00096 
00097     //Extract ReadData startPos, endPos etc, recalc corresponding feature coordinates
00098     //This also needs to be done for other features, when they are added
00099 
00100     //strand (group)
00101     if ( att_map["ReadData"]["strand"].toInt() > -1 ) {
00102       read_att.append("strand", namespaceURI, localName, "U");
00103     }
00104     else {
00105       read_att.append("strand", namespaceURI, localName, "C");
00106     }
00107     
00108     //name (seq-name)
00109     read_att.append("name", namespaceURI, localName, att_map["ReadData"]["name"]);
00110     
00111     //trappervector (seq-data)
00112     QString orig_str = att_map["DnaStrData"]["trappervector"].lower();
00113     
00114     //Parse out leading and trailing gaps, calculate start and endpos
00115     int indexleft = orig_str.find( QRegExp("[a-z]") );
00116     int indexright = orig_str.findRev( QRegExp("[a-z]") );
00117     assert( indexleft > -1 && indexright > -1 );
00118     /*
00119     -----aaatac---
00120     L = 14
00121     seqL = 6
00122     startPos = 5
00123     endPos = 10
00124     index = 5
00125     indexright = 10
00126 
00127     => startPos = indexleft, endPos = indexright
00128     Feature: endPos = seqL - 1 = indexright - indexleft
00129     */
00130     QString seq = orig_str.mid( indexleft, indexright - indexleft + 1 );
00131     seq.replace('u','t');
00132     seq.replace('-','*');
00133 
00134     read_att.append("startPos", namespaceURI, localName, QString().number(indexleft));
00135     read_att.append("endPos", namespaceURI, localName, QString().number(indexright));
00136     
00137     //Features, currently only DnaStrData
00138     QXmlAttributes dna_att;
00139     dna_att.append("startPos", namespaceURI, localName, "0");
00140     dna_att.append("endPos", namespaceURI, localName, QString().number(indexright - indexleft) );
00141     dna_att.append("trappervector", namespaceURI, localName, seq);
00142 
00143 
00144     //Create ReadData
00145 
00146     Database::Creator<ReadData> read_creator( current_doc, "ReadData" );
00147     read_creator.data()->readAttributes( read_att );
00148     read_creator.data()->setRow(curr_row);
00149     curr_row++;
00150     
00151     db_recno_t new_recno = read_creator.create( false );
00152 
00153     //Create FeatureData
00154 
00155     Database::Creator<DnaStrData> feat_creator( current_doc, "DnaStrData" );
00156     feat_creator.data()->readAttributes( dna_att );
00157     feat_creator.data()->setReadRecno( new_recno );
00158     feat_creator.create(false);
00159 
00160     //Hacking galore: create dummy vals for qual and chromat also
00161     QXmlAttributes chr_att;
00162     chr_att.append("startPos", namespaceURI, localName, "0");
00163     chr_att.append("endPos", namespaceURI, localName, QString().number(indexright - indexleft) );
00164 
00165     Database::Creator<ChromatData> chr_creator( current_doc, "ChromatData" );
00166     chr_creator.data()->readAttributes( chr_att );
00167     chr_creator.data()->setReadRecno( new_recno );
00168     chr_creator.create(false);
00169 
00170     QXmlAttributes qual_att;
00171     qual_att.append("startPos", namespaceURI, localName, "0");
00172     qual_att.append("endPos", namespaceURI, localName, QString().number(indexright - indexleft) );
00173     QString dummyqual;
00174     for( size_t i = 0; i < static_cast<size_t>(indexright - indexleft + 1); i++ ) {
00175       dummyqual.append(" 20");
00176     }
00177     qual_att.append("trappervector", namespaceURI, localName, dummyqual);
00178 
00179     Database::Creator<QualityData> qual_creator( current_doc, "QualityData" );
00180     qual_creator.data()->readAttributes( qual_att );
00181     qual_creator.data()->setReadRecno( new_recno );
00182     qual_creator.create(false);
00183 
00184     //Tag data
00185     
00186     for( size_t i = 0; i < tags.size(); i++ ) {
00187       assert( tags[i].size() == 5 );
00188       list<string> type_list = GeneralMaker::listRegistered();
00189       string type = tags[i][0].ascii();
00190 
00191       if ( find( type_list.begin(), type_list.end(), type ) == type_list.end() ) {
00192         tags[i][4] = type + " " + tags[i][4];
00193         type = "TagData";
00194         
00195       }
00196 //       cerr<<"type: "<<type<<endl;
00197       
00198       
00199       Database::Creator<TagData> tag_creator( current_doc, type );
00200       
00201       QXmlAttributes tag_att;
00202 //       tag_att.append( "startPos", namespaceURI, localName, QString().number( tags[i][1].toInt() - indexleft) );
00203 //       tag_att.append( "endPos", namespaceURI, localName, QString().number( tags[i][2].toInt() - indexleft) );
00204       tag_att.append( "startPos", namespaceURI, localName, QString().number( tags[i][1].toInt() - 1) );
00205       tag_att.append( "endPos", namespaceURI, localName, QString().number( tags[i][2].toInt() - 1) );
00206       tag_att.append( "score", namespaceURI, localName, tags[i][3] );
00207       tag_att.append( "info", namespaceURI, localName, tags[i][4] );
00208 
00209       tag_creator.data()->readAttributes( tag_att );
00210 
00211       tag_creator.data()->setReadRecno( new_recno );
00212       tag_creator.create(false);
00213       
00214     }
00215     
00216 
00217     //Clear old data
00218     att_map.clear();
00219     tags.clear();
00220     return true;
00221     
00222   }
00223   else if ( qName == "seq-name" || qName == "group" || qName == "sense" )  {
00224     //Fill attributes with name, strand, and sequence
00225 //     if ( qName == "seq-name" && curr_data.find('|') != -1 ) {
00226 //       curr_data = curr_data.section('|', 1, 1).section(' ', 0, 0);
00227 //     }
00228     
00229     
00230     att_map["ReadData"][ att_trans[qName] ] = curr_data;
00231 
00232     return true;
00233   }
00234   else if ( qName == "seq-data" ) {
00235     att_map["DnaStrData"][ att_trans[qName] ] = curr_data;
00236     return true;
00237   }
00238   else if ( qName == "ftype" ) {
00239     vector<QString> tmp;
00240     tags.push_back(tmp);
00241     tags[ tags.size() - 1 ].push_back(curr_data);
00242     
00243     return true;
00244   }
00245   else if ( qName == "fstart" || qName == "fstop" || qName == "fscore" || qName == "fnote" ) {
00246     tags[ tags.size() - 1 ].push_back(curr_data);
00247     return true;
00248   }
00249   
00250   return true;
00251 }

bool MacsimParser::characters ( const QString ch  ) 

Definition at line 253 of file macsimparser.cpp.

References curr_data.

00254 {
00255   curr_data = ch;
00256   return true;
00257 }

QStringList MacsimParser::contigNamesInProjectDir (  )  [private]

Definition at line 259 of file macsimparser.cpp.

References QDir::entryList(), QDir::exists(), projDir, QDir::setFilter(), and QDir::setSorting().

Referenced by endElement().

00260 {
00261     QString contigName;
00262     QDir d( projDir );
00263     Q_ASSERT( d.exists() );
00264     d.setFilter( QDir::Dirs | QDir::NoSymLinks );
00265     d.setSorting( QDir::Name );
00266     QStringList slist = d.entryList();
00267     return slist;
00268 }


Member Data Documentation

DbEnv* MacsimParser::env [private]

Definition at line 34 of file macsimparser.h.

Referenced by endElement().

QString MacsimParser::projDir [private]

Definition at line 35 of file macsimparser.h.

Referenced by contigNamesInProjectDir(), and endElement().

TrapperDoc* MacsimParser::current_doc [private]

Definition at line 37 of file macsimparser.h.

Referenced by endElement().

db_recno_t MacsimParser::current_read_recno [private]

Definition at line 38 of file macsimparser.h.

bool MacsimParser::format_ok [private]

Definition at line 39 of file macsimparser.h.

Referenced by startElement().

QMap<QString, QMap<QString, QString> > MacsimParser::att_map [private]

Definition at line 40 of file macsimparser.h.

Referenced by endElement().

QMap<QString,QString> MacsimParser::att_trans [private]

Definition at line 41 of file macsimparser.h.

Referenced by endElement(), and MacsimParser().

QString MacsimParser::curr_data [private]

Definition at line 42 of file macsimparser.h.

Referenced by characters(), and endElement().

int MacsimParser::curr_row [private]

Definition at line 43 of file macsimparser.h.

Referenced by endElement(), and MacsimParser().

int MacsimParser::curr_al_num [private]

Definition at line 44 of file macsimparser.h.

Referenced by endElement(), and MacsimParser().

std::vector<std::vector<QString> > MacsimParser::tags [private]

Definition at line 46 of file macsimparser.h.

Referenced by endElement().


The documentation for this class was generated from the following files:
Generated on Fri Jul 17 20:19:49 2009 for ngsview by  doxygen 1.5.1