libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
msfileaccessor.cpp
Go to the documentation of this file.
1// #include <proteowizard/pwiz/data/msdata/DefaultReaderList.hpp>
2
3#include <QDebug>
4#include <QFile>
5#include <QFileInfo>
6
7
8#include "msfileaccessor.h"
9#include "pwizmsfilereader.h"
14
21
28
30
31
32namespace pappso
33{
34
35
36MsFileAccessor::MsFileAccessor(const QString &file_name, const QString &xml_prefix)
37 : m_fileName(file_name), m_xmlPrefix(xml_prefix)
38{
39 QFile file(file_name);
40 if(!file.exists())
42 QObject::tr("File %1 not found.").arg(QFileInfo(file_name).absoluteFilePath())));
43
44
45 m_oboPsiModTermNativeIDFormat.setAccession("MS:1000824");
46 m_oboPsiModTermNativeIDFormat.m_name = "no nativeID format";
48 "No nativeID format indicates that the file tagged with this term does not "
49 "contain spectra that can have a nativeID format.";
50}
51
52
61
65
66
67const QString &
69{
70 return m_fileName;
71}
72
73
79
80const OboPsiModTerm
82{
83 OboPsiModTerm term;
84
85 // is_a: MS:1000560 ! mass spectrometer file format
86 switch(m_fileFormat)
87 {
89 term.setAccession("MS:1001560");
90 term.m_name = "SCIEX TOF/TOF T2D format";
91 term.m_definition =
92 "Applied Biosystems/MDS Analytical Technologies TOF/TOF instrument "
93 "export format.";
94 break;
96 term.setAccession("MS:1000562");
97 term.m_name = "ABI WIFF format";
98 term.m_definition = "Applied Biosystems WIFF file format.";
99 break;
101 term.setAccession("MS:1001509");
102 term.m_name = "Agilent MassHunter format";
103 term.m_definition =
104 "A data file format found in an Agilent MassHunter directory which "
105 "contains raw data acquired by an Agilent mass spectrometer.";
106 break;
108 break;
110 term.setAccession("MS:1000825");
111 term.m_name = "Bruker FID format";
112 term.m_definition = "Bruker FID file format.";
113 break;
115 term.setAccession("MS:1002817");
116 term.m_name = "Bruker TDF format";
117 term.m_definition = "Bruker TDF raw file format.";
118 break;
120 term.setAccession("MS:1000567");
121 term.m_name = "Bruker/Agilent YEP format";
122 term.m_definition = "Bruker/Agilent YEP file format.";
123 break;
125 term.setAccession("MS:1001062");
126 term.m_name = "Mascot MGF format";
127 term.m_definition = "Mascot MGF file format.";
128 break;
130 break;
132 term.setAccession("MS:1001881");
133 term.m_name = "mz5 format";
134 term.m_definition = "mz5 file format, modelled after mzML.";
135 break;
137 term.setAccession("MS:1000584");
138 term.m_name = "mzML format";
139 term.m_definition = "Proteomics Standards Inititative mzML file format.";
140 break;
142 // mzCBOR is a direct translation of mzML
143 // waiting for a true OBO term, we choose the mzML definition
144 term.setAccession("MS:1000584");
145 term.m_name = "mzML format";
146 term.m_definition = "Proteomics Standards Inititative mzML file format.";
147 break;
149 term.setAccession("MS:1000566");
150 term.m_name = "ISB mzXML format";
151 term.m_definition = "Institute of Systems Biology mzXML file format.";
152 break;
154 break;
156
157 term.setAccession("MS:1000563");
158 term.m_name = "Thermo RAW format";
159 term.m_definition = "Thermo Scientific RAW file format.";
160 break;
162 break;
164 term.setAccession("MS:1000526");
165 term.m_name = "Waters raw format";
166 term.m_definition =
167 "Waters data file format found in a Waters RAW directory, generated "
168 "from an MS acquisition.";
169 break;
171 term.setAccession("MS:1001369");
172 term.m_name = "BafAscii text format";
173 term.m_definition =
174 "Simple text file format obtained by exporting Bruker Baf to ascii "
175 "using Bruker software";
176 break;
178 term.setAccession("MS:1001369");
179 term.m_name = "text format";
180 term.m_definition =
181 "Simple text file format of \"m/z<separator>intensity\" value pairs "
182 "for a single mass spectrum, a PMF (or single MS2) search.";
183 break;
184 default:
185 break;
186 }
187
188 return term;
189}
190
191
192const OboPsiModTerm &
199
200
201std::vector<MsRunIdCstSPtr>
203{
204 qDebug();
205 // if (mzcbor_ms_file_reader.accept(m_fileName)) {
206 // }
207 std::vector<MsRunIdCstSPtr> ms_run_ids;
208
209
210 // try the mzcbor file reader
211 MzcborMsFileReader mzcbor_ms_file_reader(m_fileName);
212 if(mzcbor_ms_file_reader.getFileFormat() == pappso::Enums::MsDataFormat::mzcbor)
213 {
214 qDebug() << "sure, this is mzcbor";
215
216 ms_run_ids = mzcbor_ms_file_reader.getMsRunIds(m_xmlPrefix);
218 return ms_run_ids;
219 }
220
221 // Try the PwizMsFileReader
222
223 PwizMsFileReader pwiz_ms_file_reader(m_fileName);
224
225 ms_run_ids = pwiz_ms_file_reader.getMsRunIds(m_xmlPrefix);
226 if(ms_run_ids.size())
227 {
228 qDebug() << "Might well be handled using the Pwiz code.";
229
230 m_fileFormat = pwiz_ms_file_reader.getFileFormat();
232
233 // But the user might have configured one preferred reader type.
234
236 if(pref != m_preferredFileReaderTypeMap.end())
237 {
238 m_fileReaderType = pref->second;
239 }
240
241 return ms_run_ids;
242 }
243
244 qDebug() << "The Pwiz reader did not work.";
245
246 // Try the TimsData reader
247
248 try
249 {
250 QString tims_dir = m_fileName;
251 if(!QFileInfo(tims_dir).isDir())
252 {
253 tims_dir = QFileInfo(m_fileName).absolutePath();
254 }
255
256 TimsMsFileReader tims_file_reader(tims_dir);
257
258 ms_run_ids = tims_file_reader.getMsRunIds(m_xmlPrefix);
259
260 if(ms_run_ids.size())
261 {
262 qDebug() << "Might well be handled using the Bruker code";
263
264 m_fileName = tims_dir;
265 m_fileFormat = tims_file_reader.getFileFormat();
267
269 if(pref != m_preferredFileReaderTypeMap.end())
270 {
271 m_fileReaderType = pref->second;
272 }
273
274 qDebug() << "Returning Bruker::tims ms run(s)."
275 << "with preferred reader type:"
277
278 return ms_run_ids;
279 }
280 }
281 catch(const pappso::ExceptionNotRecognized &error)
282 {
283 qDebug() << "The Tims reader did not work.";
284 }
285
286
287 // Try the Baf->ascii export format from Bruker Compass
288
289 try
290 {
291 ms_run_ids.clear();
292 BafAsciiFileReader baf_ascii_ms_file_reader(m_fileName);
293
294 ms_run_ids = baf_ascii_ms_file_reader.getMsRunIds(m_xmlPrefix);
295
296 if(ms_run_ids.size())
297 {
298 qDebug() << "Might well be handled using the BafAscii code";
299
301
302 m_fileFormat = baf_ascii_ms_file_reader.getFileFormat();
303
305 {
306 ms_run_ids.clear();
307 }
308 else
309 {
310 return ms_run_ids;
311 }
312 }
313 }
314 catch(const pappso::PappsoException &error)
315 {
316 qDebug() << "This is not a BafAscii code file" << error.qwhat();
317 }
318
319
320 qDebug() << "The BafAscii reader did not work.";
321
322 // At this point try the XyMsFileReader
323
324 XyMsFileReader xy_ms_file_reader(m_fileName);
325
326 ms_run_ids = xy_ms_file_reader.getMsRunIds(m_xmlPrefix);
327
328 if(ms_run_ids.size())
329 {
330 qDebug() << "Might well be handled using the XY code";
331
333
334 m_fileFormat = xy_ms_file_reader.getFileFormat();
335
336 return ms_run_ids;
337 }
338
339 qDebug() << "The XY reader did not work.";
340
341
342 return ms_run_ids;
343}
344
345
346void
348 Enums::FileReaderType reader_type)
349{
350 // qDebug();
351
352 auto ret = m_preferredFileReaderTypeMap.insert(
353 std::pair<Enums::MsDataFormat, Enums::FileReaderType>(format, reader_type));
354
355 if(!ret.second)
356 {
357 // replace
358 ret.first->second = reader_type;
359 }
360}
361
362
365{
366 // qDebug();
367
368 auto ret = m_preferredFileReaderTypeMap.find(format);
369
370 if(ret != m_preferredFileReaderTypeMap.end())
371 {
372 return ret->second;
373 }
374
375 return m_fileReaderType;
376}
377
378
384
385
386void
388{
389 mcsp_selectedMsRunId = ms_run_id_csp;
390}
391
392
398
401{
402 // try TimsData reader
403 QString tims_dir = m_fileName;
404 if(!QFileInfo(tims_dir).isDir())
405 {
406 tims_dir = QFileInfo(m_fileName).absolutePath();
407 }
408 TimsMsFileReader tims_file_reader(tims_dir);
409
410 std::vector<MsRunIdCstSPtr> ms_run_ids = tims_file_reader.getMsRunIds(m_xmlPrefix);
411
412 if(ms_run_ids.size())
413 {
414 // qDebug() << "Might well be handled using the Bruker code";
416 m_fileFormat = tims_file_reader.getFileFormat();
417 m_fileName = tims_dir;
418
419 return std::make_shared<TimsMsRunReaderMs2>(ms_run_ids.front());
420 }
421 else
422 {
424 QObject::tr("Unable to read mz data directory %1 with TimsTOF reader.").arg(tims_dir)));
425 }
426}
427
428
431{
432 qDebug();
433
434 // We want to return a MsRunReader that accounts for the configuration that
435 // the user might have set.
436
437 if(m_fileName != ms_run_id->getFileName())
438 throw(
439 ExceptionNotPossible(QObject::tr("The MsRunId instance must have the name file name as the "
440 "MsFileAccessor. %1 != %2")
441 .arg(m_fileName)
442 .arg(ms_run_id->getFileName())));
443
445 {
446 qDebug() << "Returning a MzcborMsRunReader.";
447
448 return std::make_shared<MzcborMsRunReader>(ms_run_id);
449 }
451 {
452 qDebug() << "Returning a PwizMsRunReader.";
453 auto pwiz_reader = std::make_shared<PwizMsRunReader>(ms_run_id);
454 m_oboPsiModTermNativeIDFormat = pwiz_reader->getOboPsiModTermNativeIDFormat();
455 return pwiz_reader;
456 }
458 {
459 // qDebug() << "Returning a XyMsRunReader.";
460
461 return std::make_shared<XyMsRunReader>(ms_run_id);
462 }
464 {
465 qDebug() << "Returning a TimsMsRunReader Enums::FileReaderType::tims";
466 return std::make_shared<TimsMsRunReader>(ms_run_id);
467 }
469 {
470 qDebug() << "Returning a TimsFramesMsRunReader Enums::FileReaderType::tims_frames.";
471
472 return std::make_shared<TimsFramesMsRunReader>(ms_run_id);
473 }
475 {
476 qDebug() << "Returning a TimsMsRunReaderMs2 Enums::FileReaderType::tims_ms2";
477
478 return std::make_shared<TimsMsRunReaderMs2>(ms_run_id);
479 }
481 {
482 qDebug() << "Returning a Enums::FileReaderType::tims_dia";
483
484 // qInfo() << "std::make_shared<TimsMsRunReaderDia>(ms_run_id);";
485 return std::make_shared<TimsMsRunReaderDia>(ms_run_id);
486 }
488 {
489 // qDebug() << "Returning a BafAsciiMsRunReader.";
490
491 return std::make_shared<BafAsciiMsRunReader>(ms_run_id);
492 }
494 {
495 if(ms_run_id.get()->getMsDataFormat() == Enums::MsDataFormat::xy)
496 {
497 return std::make_shared<XyMsRunReader>(ms_run_id);
498 }
499 else
500 {
501 auto pwiz_reader = std::make_shared<PwizMsRunReader>(ms_run_id);
502 m_oboPsiModTermNativeIDFormat = pwiz_reader->getOboPsiModTermNativeIDFormat();
503 return pwiz_reader;
504 }
505 }
506 else
507 {
508 throw PappsoException(QObject::tr("No file format was found."));
509 }
510
511 return nullptr;
512}
513
514
516MsFileAccessor::msRunReaderSPtr(std::size_t ms_run_id_index)
517{
518 std::vector<MsRunIdCstSPtr> ms_run_ids = getMsRunIds();
519 if(ms_run_id_index >= ms_run_ids.size())
520 throw PappsoException(QObject::tr("MsRunId request out-of-bound error."));
521
522 return msRunReaderSPtr(ms_run_ids.at(ms_run_id_index));
523}
524
525
533
534
540
543 Enums::FileReaderType preferred_file_reader_type)
544{
545 QFile file(ms_run_id.get()->getFileName());
546 if(!file.exists())
547 throw(ExceptionNotFound(QObject::tr("unable to build a reader : file %1 not found.")
548 .arg(QFileInfo(ms_run_id.get()->getFileName()).absoluteFilePath())));
549
550 Enums::MsDataFormat file_format = ms_run_id.get()->getMsDataFormat();
551
552 if(file_format == Enums::MsDataFormat::xy)
553 {
554 // qDebug() << "Returning a XyMsRunReader.";
555
556 return std::make_shared<XyMsRunReader>(ms_run_id);
557 }
558 else if(file_format == Enums::MsDataFormat::brukerBafAscii)
559 {
560 // qDebug() << "Returning a XyMsRunReader.";
561
562 return std::make_shared<BafAsciiMsRunReader>(ms_run_id);
563 }
564 else if(file_format == Enums::MsDataFormat::unknown)
565 {
566 throw(PappsoException(QObject::tr("unable to build a reader for %1 : unknown file format")
567 .arg(QFileInfo(ms_run_id.get()->getFileName()).absoluteFilePath())));
568 }
569
570 else if(file_format == Enums::MsDataFormat::brukerTims)
571 {
572 if(preferred_file_reader_type == Enums::FileReaderType::tims)
573 {
574 return std::make_shared<TimsMsRunReader>(ms_run_id);
575 }
576 else if(preferred_file_reader_type == Enums::FileReaderType::tims_ms2)
577 {
578 return std::make_shared<TimsMsRunReaderMs2>(ms_run_id);
579 }
580 else if(preferred_file_reader_type == Enums::FileReaderType::tims_frames)
581 {
582 qDebug() << "returning std::make_shared<TimsFramesMsRunReader>(ms_run_id).";
583 return std::make_shared<TimsFramesMsRunReader>(ms_run_id);
584 }
585 // qDebug() << "by default, build a TimsMsRunReader.";
586 return std::make_shared<TimsMsRunReader>(ms_run_id);
587 }
588 else if(file_format == Enums::MsDataFormat::mzcbor)
589 {
590 return std::make_shared<MzcborMsRunReader>(ms_run_id);
591 }
592 else
593 {
594 // qDebug() << "Returning a PwizMsRunReader .";
595 return std::make_shared<PwizMsRunReader>(ms_run_id);
596 }
597}
598
599
601MsFileAccessor::getMsRunReaderSPtrByRunId(const QString &run_id, const QString &xml_id)
602{
603 std::vector<MsRunIdCstSPtr> run_list = getMsRunIds();
604 MsRunReaderSPtr reader_sp;
605 for(MsRunIdCstSPtr &original_run_id : run_list)
606 {
607 if(original_run_id.get()->getRunId() == run_id)
608 {
609 MsRunId new_run_id(*original_run_id.get());
610 new_run_id.setXmlId(xml_id);
611
612 return msRunReaderSPtr(std::make_shared<MsRunId>(new_run_id));
613 }
614 }
615
616 if((run_id.isEmpty()) && (run_list.size() == 1))
617 {
618 MsRunId new_run_id(*run_list[0].get());
619 new_run_id.setXmlId(xml_id);
620
621 return msRunReaderSPtr(std::make_shared<MsRunId>(new_run_id));
622 }
623
624
625 if(reader_sp == nullptr)
626 {
627 throw(ExceptionNotFound(QObject::tr("run id %1 not found in file %2")
628 .arg(run_id)
629 .arg(QFileInfo(m_fileName).absoluteFilePath())));
630 }
631 return reader_sp;
632}
633
634
635} // namespace pappso
virtual Enums::MsDataFormat getFileFormat() override
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
excetion to use when an item type is not recognized
const OboPsiModTerm & getOboPsiModTermNativeIDFormat() const
get OboPsiModTerm corresponding to the nativeID format format of mz data
MsRunIdCstSPtr getSelectedMsRunId() const
MsRunReaderSPtr msRunReaderSPtr(MsRunIdCstSPtr ms_run_id)
Enums::FileReaderType m_fileReaderType
Enums::MsDataFormat m_fileFormat
void setPreferredFileReaderType(Enums::MsDataFormat format, Enums::FileReaderType reader_type)
given an mz format, explicitly set the preferred reader
Enums::MsDataFormat getFileFormat() const
get the raw format of mz data
Enums::FileReaderType getFileReaderType() const
get the file reader type
MsRunReaderSPtr msRunReaderSPtrForSelectedMsRunId()
MsRunIdCstSPtr mcsp_selectedMsRunId
std::vector< MsRunIdCstSPtr > getMsRunIds()
OboPsiModTerm m_oboPsiModTermNativeIDFormat
void setSelectedMsRunId(MsRunIdCstSPtr ms_run_id_csp)
const OboPsiModTerm getOboPsiModTermFileFormat() const
get OboPsiModTerm corresponding to the raw format of mz data
MsRunReaderSPtr getMsRunReaderSPtrByRunId(const QString &run_id, const QString &xml_id)
get an msrun reader by finding the run_id in file
Enums::FileReaderType getpreferredFileReaderType(Enums::MsDataFormat format)
MsFileAccessor(const QString &file_name, const QString &xml_prefix)
static MsRunReaderSPtr buildMsRunReaderSPtr(MsRunIdCstSPtr ms_run_id)
get an MsRunReader directly from a valid MsRun ID
std::map< Enums::MsDataFormat, Enums::FileReaderType > m_preferredFileReaderTypeMap
TimsMsRunReaderMs2SPtr buildTimsMsRunReaderMs2SPtr()
if possible, builds directly a dedicated Tims TOF tdf file reader
const QString & getFileName() const
MS run identity MsRunId identifies an MS run with a unique ID (XmlId) and contains eventually informa...
Definition msrunid.h:54
void setXmlId(const QString &xml_id)
set an XML unique identifier for this MsRunId
Definition msrunid.cpp:147
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
virtual Enums::MsDataFormat getFileFormat() override
void setAccession(const QString &accession)
virtual const QString & qwhat() const
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
virtual Enums::MsDataFormat getFileFormat() override
virtual Enums::MsDataFormat getFileFormat() override
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
static QString fileReaderTypeAsString(Enums::FileReaderType file_reader_type)
Definition utils.cpp:529
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
virtual Enums::MsDataFormat getFileFormat() override
@ unknown
unknown format
Definition types.h:149
@ SQLite3
SQLite3 format.
Definition types.h:153
@ MGF
Mascot format.
Definition types.h:152
@ pwiz
using libpwizlite
Definition types.h:177
@ tims
TimsMsRunReader : each scan is returned as a mass spectrum.
Definition types.h:181
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< MsRunReader > MsRunReaderSPtr
Definition msrunreader.h:57
std::shared_ptr< TimsMsRunReaderMs2 > TimsMsRunReaderMs2SPtr
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46