#include <macsimparser.h>
Inheritance diagram for MacsimParser:
Public Member Functions | |
MacsimParser (DbEnv *dbenv, QString projectDir) | |
bool | startElement (const QString &namespaceURI, const QString &localName, const QString &qName, const QXmlAttributes &attributes) |
bool | endElement (const QString &namespaceURI, const QString &localName, const QString &qName) |
bool | characters (const QString &ch) |
Private Member Functions | |
QStringList | contigNamesInProjectDir () |
Private Attributes | |
DbEnv * | env |
QString | projDir |
TrapperDoc * | current_doc |
db_recno_t | current_read_recno |
bool | format_ok |
QMap< QString, QMap< QString, QString > > | att_map |
QMap< QString, QString > | att_trans |
QString | curr_data |
int | curr_row |
int | curr_al_num |
std::vector< std::vector< QString > > | tags |
Definition at line 14 of file macsimparser.h.
MacsimParser::MacsimParser | ( | DbEnv * | dbenv, | |
QString | projectDir | |||
) |
Definition at line 18 of file macsimparser.cpp.
References att_trans, curr_al_num, and curr_row.
00018 : 00019 env(dbenv), projDir(projectDir), current_doc(0), format_ok(false) 00020 { 00021 curr_row = 1; 00022 curr_al_num = 0; 00023 att_trans["sense"] = "strand"; 00024 att_trans["seq-name"] = "name"; 00025 att_trans["seq-data"] = "trappervector"; 00026 00027 // cerr<<"MacsimParser: NB, due to long names of genomic seq in old files, they are currently being shortened"<<endl; 00028 }
bool MacsimParser::startElement | ( | const QString & | namespaceURI, | |
const QString & | localName, | |||
const QString & | qName, | |||
const QXmlAttributes & | attributes | |||
) |
Definition at line 31 of file macsimparser.cpp.
References format_ok.
00035 { 00036 00037 if ( qName == "macsim" ) { 00038 format_ok = true; 00039 return true; 00040 00041 } 00042 else if ( format_ok ) { 00043 00044 return true; 00045 } 00046 return false; 00047 00048 }
bool MacsimParser::endElement | ( | const QString & | namespaceURI, | |
const QString & | localName, | |||
const QString & | qName | |||
) |
Definition at line 50 of file macsimparser.cpp.
References QString::append(), QXmlAttributes::append(), att_map, att_trans, QMap::clear(), contigNamesInProjectDir(), Database::Creator< T >::create(), curr_al_num, curr_data, curr_row, current_doc, Database::Creator< T >::data(), env, QString::find(), QString::findRev(), GeneralMaker::listRegistered(), QString::mid(), TrapperDoc::openDocument(), projDir, TagData::readAttributes(), QualityData::readAttributes(), ChromatData::readAttributes(), DnaStrData::readAttributes(), ReadData::readAttributes(), QString::replace(), FeatureData::setReadRecno(), ReadData::setRow(), tags, and QMessageBox::warning().
00052 { 00053 if ( qName == "alignment" ) { 00054 curr_al_num++; 00055 if ( curr_al_num % 100 == 0 ) { 00056 cerr<<curr_al_num<<" alignments parsed"<<endl; 00057 } 00058 00059 curr_row = 1; 00060 //Delete old doc, if any 00061 delete current_doc; 00062 return true; 00063 } 00064 else if ( qName == "aln-name") { 00065 //Contig name 00066 00067 //Create contig data 00068 00069 QString contigName = curr_data; 00070 00071 00072 //Some bookkeeping to check if contig is already present 00073 QStringList contigNamesAlreadyPresent = contigNamesInProjectDir(); 00074 if ( contigNamesAlreadyPresent.contains( contigName ) ) { 00075 QString message; 00076 message = QString("The contig name \"%1\" was specified in the import file, but that contig is already present in the project dir. Skipping this one").arg(contigName); 00077 QMessageBox::warning(0,"",message); 00078 return false; 00079 } 00080 //Create directory for contig 00081 QString fName = projDir + "/" + contigName; 00082 00083 00084 //Create directories and databases for this contig 00085 00086 current_doc = new TrapperDoc(env); 00087 current_doc->openDocument(fName); 00088 00089 return true; 00090 00091 } 00092 else if ( qName == "sequence") { 00093 //One ReadData parsed 00094 00095 QXmlAttributes read_att; 00096 00097 //Extract ReadData startPos, endPos etc, recalc corresponding feature coordinates 00098 //This also needs to be done for other features, when they are added 00099 00100 //strand (group) 00101 if ( att_map["ReadData"]["strand"].toInt() > -1 ) { 00102 read_att.append("strand", namespaceURI, localName, "U"); 00103 } 00104 else { 00105 read_att.append("strand", namespaceURI, localName, "C"); 00106 } 00107 00108 //name (seq-name) 00109 read_att.append("name", namespaceURI, localName, att_map["ReadData"]["name"]); 00110 00111 //trappervector (seq-data) 00112 QString orig_str = att_map["DnaStrData"]["trappervector"].lower(); 00113 00114 //Parse out leading and trailing gaps, calculate start and endpos 00115 int indexleft = orig_str.find( QRegExp("[a-z]") ); 00116 int indexright = orig_str.findRev( QRegExp("[a-z]") ); 00117 assert( indexleft > -1 && indexright > -1 ); 00118 /* 00119 -----aaatac--- 00120 L = 14 00121 seqL = 6 00122 startPos = 5 00123 endPos = 10 00124 index = 5 00125 indexright = 10 00126 00127 => startPos = indexleft, endPos = indexright 00128 Feature: endPos = seqL - 1 = indexright - indexleft 00129 */ 00130 QString seq = orig_str.mid( indexleft, indexright - indexleft + 1 ); 00131 seq.replace('u','t'); 00132 seq.replace('-','*'); 00133 00134 read_att.append("startPos", namespaceURI, localName, QString().number(indexleft)); 00135 read_att.append("endPos", namespaceURI, localName, QString().number(indexright)); 00136 00137 //Features, currently only DnaStrData 00138 QXmlAttributes dna_att; 00139 dna_att.append("startPos", namespaceURI, localName, "0"); 00140 dna_att.append("endPos", namespaceURI, localName, QString().number(indexright - indexleft) ); 00141 dna_att.append("trappervector", namespaceURI, localName, seq); 00142 00143 00144 //Create ReadData 00145 00146 Database::Creator<ReadData> read_creator( current_doc, "ReadData" ); 00147 read_creator.data()->readAttributes( read_att ); 00148 read_creator.data()->setRow(curr_row); 00149 curr_row++; 00150 00151 db_recno_t new_recno = read_creator.create( false ); 00152 00153 //Create FeatureData 00154 00155 Database::Creator<DnaStrData> feat_creator( current_doc, "DnaStrData" ); 00156 feat_creator.data()->readAttributes( dna_att ); 00157 feat_creator.data()->setReadRecno( new_recno ); 00158 feat_creator.create(false); 00159 00160 //Hacking galore: create dummy vals for qual and chromat also 00161 QXmlAttributes chr_att; 00162 chr_att.append("startPos", namespaceURI, localName, "0"); 00163 chr_att.append("endPos", namespaceURI, localName, QString().number(indexright - indexleft) ); 00164 00165 Database::Creator<ChromatData> chr_creator( current_doc, "ChromatData" ); 00166 chr_creator.data()->readAttributes( chr_att ); 00167 chr_creator.data()->setReadRecno( new_recno ); 00168 chr_creator.create(false); 00169 00170 QXmlAttributes qual_att; 00171 qual_att.append("startPos", namespaceURI, localName, "0"); 00172 qual_att.append("endPos", namespaceURI, localName, QString().number(indexright - indexleft) ); 00173 QString dummyqual; 00174 for( size_t i = 0; i < static_cast<size_t>(indexright - indexleft + 1); i++ ) { 00175 dummyqual.append(" 20"); 00176 } 00177 qual_att.append("trappervector", namespaceURI, localName, dummyqual); 00178 00179 Database::Creator<QualityData> qual_creator( current_doc, "QualityData" ); 00180 qual_creator.data()->readAttributes( qual_att ); 00181 qual_creator.data()->setReadRecno( new_recno ); 00182 qual_creator.create(false); 00183 00184 //Tag data 00185 00186 for( size_t i = 0; i < tags.size(); i++ ) { 00187 assert( tags[i].size() == 5 ); 00188 list<string> type_list = GeneralMaker::listRegistered(); 00189 string type = tags[i][0].ascii(); 00190 00191 if ( find( type_list.begin(), type_list.end(), type ) == type_list.end() ) { 00192 tags[i][4] = type + " " + tags[i][4]; 00193 type = "TagData"; 00194 00195 } 00196 // cerr<<"type: "<<type<<endl; 00197 00198 00199 Database::Creator<TagData> tag_creator( current_doc, type ); 00200 00201 QXmlAttributes tag_att; 00202 // tag_att.append( "startPos", namespaceURI, localName, QString().number( tags[i][1].toInt() - indexleft) ); 00203 // tag_att.append( "endPos", namespaceURI, localName, QString().number( tags[i][2].toInt() - indexleft) ); 00204 tag_att.append( "startPos", namespaceURI, localName, QString().number( tags[i][1].toInt() - 1) ); 00205 tag_att.append( "endPos", namespaceURI, localName, QString().number( tags[i][2].toInt() - 1) ); 00206 tag_att.append( "score", namespaceURI, localName, tags[i][3] ); 00207 tag_att.append( "info", namespaceURI, localName, tags[i][4] ); 00208 00209 tag_creator.data()->readAttributes( tag_att ); 00210 00211 tag_creator.data()->setReadRecno( new_recno ); 00212 tag_creator.create(false); 00213 00214 } 00215 00216 00217 //Clear old data 00218 att_map.clear(); 00219 tags.clear(); 00220 return true; 00221 00222 } 00223 else if ( qName == "seq-name" || qName == "group" || qName == "sense" ) { 00224 //Fill attributes with name, strand, and sequence 00225 // if ( qName == "seq-name" && curr_data.find('|') != -1 ) { 00226 // curr_data = curr_data.section('|', 1, 1).section(' ', 0, 0); 00227 // } 00228 00229 00230 att_map["ReadData"][ att_trans[qName] ] = curr_data; 00231 00232 return true; 00233 } 00234 else if ( qName == "seq-data" ) { 00235 att_map["DnaStrData"][ att_trans[qName] ] = curr_data; 00236 return true; 00237 } 00238 else if ( qName == "ftype" ) { 00239 vector<QString> tmp; 00240 tags.push_back(tmp); 00241 tags[ tags.size() - 1 ].push_back(curr_data); 00242 00243 return true; 00244 } 00245 else if ( qName == "fstart" || qName == "fstop" || qName == "fscore" || qName == "fnote" ) { 00246 tags[ tags.size() - 1 ].push_back(curr_data); 00247 return true; 00248 } 00249 00250 return true; 00251 }
bool MacsimParser::characters | ( | const QString & | ch | ) |
Definition at line 253 of file macsimparser.cpp.
References curr_data.
00254 { 00255 curr_data = ch; 00256 return true; 00257 }
QStringList MacsimParser::contigNamesInProjectDir | ( | ) | [private] |
Definition at line 259 of file macsimparser.cpp.
References QDir::entryList(), QDir::exists(), projDir, QDir::setFilter(), and QDir::setSorting().
Referenced by endElement().
00260 { 00261 QString contigName; 00262 QDir d( projDir ); 00263 Q_ASSERT( d.exists() ); 00264 d.setFilter( QDir::Dirs | QDir::NoSymLinks ); 00265 d.setSorting( QDir::Name ); 00266 QStringList slist = d.entryList(); 00267 return slist; 00268 }
DbEnv* MacsimParser::env [private] |
QString MacsimParser::projDir [private] |
Definition at line 35 of file macsimparser.h.
Referenced by contigNamesInProjectDir(), and endElement().
TrapperDoc* MacsimParser::current_doc [private] |
db_recno_t MacsimParser::current_read_recno [private] |
Definition at line 38 of file macsimparser.h.
bool MacsimParser::format_ok [private] |
QMap<QString,QString> MacsimParser::att_trans [private] |
QString MacsimParser::curr_data [private] |
int MacsimParser::curr_row [private] |
int MacsimParser::curr_al_num [private] |
std::vector<std::vector<QString> > MacsimParser::tags [private] |