libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
binarydataarray.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/core/processing/cbor/mzcbor/binarydataarray.cpp
3 * \date 25/11/2025
4 * \author Olivier Langella
5 * \brief PSI BinaryDataArray object for mzML/mzCBOR
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28
29#include "binarydataarray.h"
30#include "cvparam.h"
32#include <zlib.h>
33
34void
36{
37 QString txt_value;
38 reader.enterContainer();
39 // qDebug() << txt_value;
40 while(reader.hasNext() && (!reader.isInvalid()))
41 {
42 if(reader.isString())
43 {
44 if(reader.decodeString(txt_value))
45 {
46 // qDebug() << txt_value;
47 if(txt_value == "bits")
48 {
49 // cvParamMap = CvParam::getCvParamsMapFromCbor(reader);
50 bits = reader.toUnsignedInteger();
51 reader.next();
52 }
53 else if(txt_value == "isInt")
54 {
55 // cvParamMap = CvParam::getCvParamsMapFromCbor(reader);
56 isInt = reader.toBool();
57 reader.next();
58 }
59 else if(txt_value == "unit")
60 {
61 // cvParamMap = CvParam::getCvParamsMapFromCbor(reader);
62 reader.decodeString(txt_value);
63 unit = txt_value;
64 }
65 else if(txt_value == "compress")
66 {
67 // cvParamMap = CvParam::getCvParamsMapFromCbor(reader);
68 reader.decodeString(txt_value);
69 compress = txt_value;
70 }
71 else if(txt_value == "data")
72 {
73 // cvParamMap = CvParam::getCvParamsMapFromCbor(reader);
74 // reader.next();
75 // qDebug() << reader.type();
76 auto r = reader.readByteArray();
77 while(r.status == QCborStreamReader::Ok)
78 {
79 byteArray += r.data;
80 r = reader.readByteArray();
81 }
82
83 if(r.status == QCborStreamReader::Error)
84 {
85 // handle error condition
86 // qDebug() << "error";
87 byteArray.clear();
88 }
89 }
90 else
91 {
92 reader.next();
93 }
94 }
95 else
96 {
97 reader.next();
98 }
99 }
100 else
101 {
102 reader.next();
103 }
104 }
105 reader.leaveContainer();
106}
107
108void
110{
111
112 writer.startMap();
113 writer.append("unit");
114 writer.append(unit);
115
116 writer.append("bits");
117 writer.append(bits);
118 writer.append("isInt");
119 writer.append(isInt);
120
121 writer.append("compress");
122 writer.append(compress);
123
124 writer.append("data");
125 writer.append(byteArray);
126 writer.endMap();
127}
128
129void
131{
132
133 //<binaryDataArray encodedLength="6380">
134 std::size_t encodedLength = reader.attributes().value("encodedLength").toULongLong();
135 while(reader.readNext() && !reader.isEndElement())
136 {
137 if(reader.isStartElement())
138 {
139 if(reader.name().toString() == "cvParam")
140 {
141 QString accession = reader.attributes().value("accession").toString();
142
143 //<cvParam cvRef="MS" accession="MS:1000523" value="" name="64-bit float" />
144 if(accession == "MS:1000523")
145 {
146 bits = 64;
147 isInt = false;
148 }
149 else if(accession == "MS:1000519")
150 {
151 /*
152 *
153[Term]
154id: MS:1000519
155name: 32-bit integer
156def: "Signed 32-bit little-endian integer." [PSI:MS]
157is_a: MS:1000518 ! binary data type
158*/
159 bits = 32;
160 isInt = true;
161 }
162 else if(accession == "MS:1000521")
163 {
164 /*
165 [Term]
166 id: MS:1000521
167 name: 32-bit float
168 def: "32-bit precision little-endian floating point conforming to IEEE-754." [PSI:MS]
169 is_a: MS:1000518 ! binary data type
170 */
171 bits = 32;
172 isInt = false;
173 }
174 else if(accession == "MS:1000522")
175 {
176
177 /*
178 [Term]
179 id: MS:1000522
180 name: 64-bit integer
181 def: "Signed 64-bit little-endian integer." [PSI:MS]
182 is_a: MS:1000518 ! binary data type*/
183 bits = 64;
184 isInt = true;
185 }
186
187 //<cvParam cvRef="MS" accession="MS:1000574" value="" name="zlib compression" />
188 else if(accession == "MS:1000574")
189 {
190 compress = "zlib";
191 }
192 else if(accession == "MS:1000576")
193 {
194 /*
195 [Term]
196 id: MS:1000576
197 name: no compression
198 def: "No Compression." [PSI:MS]
199 is_a: MS:1000572 ! binary data compression type
200 */
201 compress = "none";
202 }
203
204 else if(accession == "MS:1000515")
205 {
206 unit = accession;
207 }
208 else if(accession == "MS:1000514")
209 {
210 // <cvParam cvRef="MS" accession="MS:1000514" value="" name="m/z array"
211 // unitAccession="MS:1000040" unitName="m/z" unitCvRef="MS" />
212
213 unit = accession;
214 }
215 else
216 {
217 reader.raiseError(
218 QObject::tr("cvParam accession %1 is not known in binaryDataArray")
219 .arg(accession));
220 }
221 reader.skipCurrentElement();
222 }
223 else if(reader.name().toString() == "binary")
224 {
225
226 while(reader.readNext() && !reader.isEndElement())
227 {
228 if(reader.isCharacters())
229 {
230 // clean content:
231 QStringView content = reader.text().trimmed();
232 if((reader.text().toString() == "\n") || (reader.text().toString() == "\n\t"))
233 {
234 }
235 else
236 {
237 // text node
238 if(!content.isEmpty())
239 {
240 // qDebug() << "text isCharacters" << content.mid(0, 10);
241
242 if((std::size_t)reader.text().size() != encodedLength)
243 {
244 qWarning() << "reader.text().size() != encodedLength"
245 << reader.text().size() << " " << encodedLength;
246 }
247
248 // mp_cborWriter->append("@text@");
249 // mp_cborWriter->append(content);
250 byteArray = byteArray.fromBase64(reader.text().trimmed().toLatin1());
251 }
252 }
253 }
254 }
255 }
256 else
257 {
258 reader.skipCurrentElement();
259 }
260 }
261 }
262}
263
264
265void
267{
268 //<binaryDataArray encodedLength="1152">
269 writer.writeStartElement("binaryDataArray");
270 auto base64 = byteArray.toBase64();
271 writer.writeAttribute("encodedLength", QString("%1").arg(base64.size()));
272 // <cvParam cvRef="MS" accession="MS:1000514" value="" name="m/z array"
273 // unitAccession="MS:1000040" unitName="m/z" unitCvRef="MS" />
274 CvParam cv_param;
275 cv_param.cvRef = "MS";
276
277 if(unit == "MS:1000514")
278 {
279 cv_param.accession = unit;
280 cv_param.name = "m/z array";
281 cv_param.unitCvRef = "MS";
282 cv_param.unitAccession = "MS:1000040";
283 cv_param.unitName = "m/z";
284 cv_param.setValue("");
285 cv_param.toMzml(writer);
286 }
287 else if(unit == "MS:1000515")
288 {
289 //<cvParam cvRef="MS" accession="MS:1000515" value="" name="intensity array"
290 // unitAccession="MS:1000131" unitName="number of counts" unitCvRef="MS" />
291
292 cv_param.accession = unit;
293 cv_param.name = "intensity array";
294 cv_param.unitCvRef = "MS";
295 cv_param.unitAccession = "MS:1000131";
296 cv_param.unitName = "number of counts";
297 cv_param.setValue("");
298 cv_param.toMzml(writer);
299 }
300
301 // <cvParam cvRef="MS" accession="MS:1000523" value="" name="64-bit float" />
302
303 cv_param.unitCvRef.clear();
304 cv_param.unitAccession.clear();
305 cv_param.unitName.clear();
306 cv_param.setValue("");
307 if(isInt)
308 {
309 /*
310 id: MS:1000519
311 name: 32-bit integer
312 def: "Signed 32-bit little-endian integer." [PSI:MS]
313 is_a: MS:1000518 ! binary data type*/
314 if(bits == 32)
315 {
316 cv_param.accession = "MS:1000519";
317 cv_param.name = "32-bit integer";
318 cv_param.toMzml(writer);
319 }
320 else if(bits == 64)
321 {
322 /*
323 [Term]
324 id: MS:1000522
325 name: 64-bit integer
326 def: "Signed 64-bit little-endian integer." [PSI:MS]
327 is_a: MS:1000518 ! binary data type*/
328 cv_param.accession = "MS:1000522";
329 cv_param.name = "64-bit integer";
330 cv_param.toMzml(writer);
331 }
332 }
333 else
334 {
335 if(bits == 64)
336 {
337 cv_param.accession = "MS:1000523";
338 cv_param.name = "64-bit float";
339 cv_param.toMzml(writer);
340 }
341 else if(bits == 32)
342 {
343 /*
344 [Term]
345 id: MS:1000521
346 name: 32-bit float
347 def: "32-bit precision little-endian floating point conforming to IEEE-754." [PSI:MS]
348 is_a: MS:1000518 ! binary data type
349 */
350 cv_param.accession = "MS:1000521";
351 cv_param.name = "32-bit float";
352 cv_param.toMzml(writer);
353 }
354 }
355 // <cvParam cvRef="MS" accession="MS:1000574" value="" name="zlib compression" />
356 /*
357
358[Term]
359id: MS:1000520
360name: 16-bit float
361def: "OBSOLETE Signed 16-bit float." [PSI:MS]
362is_a: MS:1000518 ! binary data type
363is_obsolete: true
364
365
366[Term]
367id: MS:1000523
368name: 64-bit float
369def: "64-bit precision little-endian floating point conforming to IEEE-754." [PSI:MS]
370is_a: MS:1000518 ! binary data type
371*/
372
373 if(compress == "zlib")
374 {
375 cv_param.accession = "MS:1000574";
376 cv_param.name = "zlib compression";
377 cv_param.toMzml(writer);
378 }
379 else if(compress == "none")
380 { /*
381[Term]
382id: MS:1000576
383name: no compression
384def: "No Compression." [PSI:MS]
385is_a: MS:1000572 ! binary data compression type
386*/
387 cv_param.accession = "MS:1000576";
388 cv_param.name = "no compression";
389 cv_param.toMzml(writer);
390 }
391
392 // <binary>eJwl0W9oW1U
393 // writer.writeStartElement("binary");
394 writer.writeTextElement("binary", base64);
395 // </binary>
396 // writer.writeEndElement();
397 // </binaryDataArray>
398 writer.writeEndElement();
399}
400
401
402void
404 std::vector<double> &double_list) const
405{
406
407 int size_in_byte = 8;
408 if(bits == 32)
409 {
410 size_in_byte = 4;
411 }
412
413 // if(result.decodingStatus == QByteArray::Base64DecodingStatus::Ok)
414 // { // Allocate buffer for decompressed data
415 if(compress == "zlib")
416 {
417 std::vector<unsigned char> data_heap;
418 uLongf decompressedSize = estimated_length * size_in_byte; // Estimate size
419 data_heap.resize(decompressedSize);
420
421 // Decompress the data
422 int result_zlib = uncompress(
423 data_heap.data(), &decompressedSize, (Bytef *)byteArray.constData(), byteArray.size());
424
425 if(result_zlib != Z_OK)
426 {
427 throw pappso::PappsoException(QObject::tr("Decompression failed: %1").arg(result_zlib));
428 }
429
430 // Resize the vector to the actual decompressed size
431 data_heap.resize(decompressedSize);
432 double_list.resize(decompressedSize / size_in_byte);
433
434
435 // double *double_ptr = (double *)&decompressedData[0];
436 std::size_t j = 0;
437 for(std::size_t i = 0; i < data_heap.size(); i += size_in_byte)
438 {
439 if(bits == 32)
440 {
441 if(isInt)
442 {
443 double_list[j] = *(std::int32_t *)&data_heap[i];
444 }
445 else
446 {
447 double_list[j] = *(std::float_t *)&data_heap[i];
448 }
449 }
450 else
451 {
452 if(isInt)
453 {
454 double_list[j] = *(std::int64_t *)&data_heap[i];
455 }
456 else
457 {
458 double_list[j] = *(double *)&data_heap[i];
459 }
460 }
461 // double_ptr++;
462 j++;
463 }
464 }
465 else if(compress == "none")
466 {
467
468 // double *double_ptr = (double *)&decompressedData[0];
469 std::size_t j = 0;
470 for(std::size_t i = 0; i < (std::size_t)byteArray.size(); i += size_in_byte)
471 {
472 if(bits == 32)
473 {
474 if(isInt)
475 {
476 double_list[j] = *(std::int32_t *)&byteArray.constData()[i];
477 }
478 else
479 {
480 double_list[j] = *(std::float_t *)&byteArray.constData()[i];
481 }
482 }
483 else
484 {
485 if(isInt)
486 {
487 double_list[j] = *(std::int64_t *)&byteArray.constData()[i];
488 }
489 else
490 {
491 double_list[j] = *(double *)&byteArray.constData()[i];
492 }
493 }
494 // double_ptr++;
495 j++;
496 }
497 }
498
499
500 // std::vector<double> v(decompressedData.cbegin(), decompressedData.cend());
501 // qDebug() << j << " " << double_list.size();
502}
503
504bool
506{
507 // <cvParam cvRef="MS" accession="MS:1000515" value="" name="intensity array"
508 // unitAccession="MS:1000131" unitName="number of counts" unitCvRef="MS" />
509
510 return unit == "MS:1000515";
511}
512
513bool
515{
516 // <cvParam cvRef="MS" accession="MS:1000514" value="" name="m/z array"
517 // unitAccession="MS:1000040" unitName="m/z" unitCvRef="MS" />
518
519 return unit == "MS:1000514";
520}
PSI BinaryDataArray object for mzML/mzCBOR.
bool decodeString(QString &the_str)
decode the current cbor value as a string the point to the next value the current value is decoded as...
PSI cvParam object for mzML/mzCBOR.
void fromCbor(CborStreamReader &reader)
void fromMzml(QXmlStreamReader &reader)
void toMzml(QXmlStreamWriter &writer)
void toCbor(CborStreamWriter &writer)
void decodeVector(std::size_t estimated_length, std::vector< double > &double_list) const
void setValue(const QString &value_str)
Definition cvparam.cpp:201
void toMzml(QXmlStreamWriter &writer)
Definition cvparam.cpp:209