19#include <unordered_map>
26#include "dirtyAllocator.h"
29#include "marginalTrek++.h"
37unsigned int parse_formula(
const char* formula,
38 std::vector<double>& isotope_masses,
39 std::vector<double>& isotope_probabilities,
42 unsigned int* confSize,
43 bool use_nominal_masses =
false);
50class ISOSPEC_EXPORT_SYMBOL
Iso {
59 void setupMarginals(
const double* _isotopeMasses,
60 const double* _isotopeProbabilities);
71 bool doMarginalsNeedSorting()
const;
86 const int* _isotopeNumbers,
87 const int* _atomCounts,
88 const double* _isotopeMasses,
89 const double* _isotopeProbabilities
93 const int* _isotopeNumbers,
94 const int* _atomCounts,
95 const double*
const * _isotopeMasses,
96 const double*
const * _isotopeProbabilities
100 Iso(
const char* formula,
bool use_nominal_masses =
false);
103 inline Iso(
const std::string& formula,
bool use_nominal_masses =
false) :
Iso(formula.c_str(), use_nominal_masses) {}
114 static Iso FromFASTA(
const char* fasta,
bool use_nominal_masses =
false,
bool add_water =
true);
117 static inline Iso FromFASTA(
const std::string& fasta,
bool use_nominal_masses =
false,
bool add_water =
true) {
return FromFASTA(fasta.c_str(), use_nominal_masses, add_water); }
123 Iso& operator=(
const Iso& other) =
delete;
130 Iso(
const Iso& other,
bool fullcopy);
136 double getLightestPeakMass()
const;
139 double getHeaviestPeakMass()
const;
146 double getMonoisotopicPeakMass()
const;
149 double getModeLProb()
const;
152 double getUnlikeliestPeakLProb()
const;
155 double getModeMass()
const;
158 double getTheoreticalAverageMass()
const;
161 double variance()
const;
164 double stddev()
const {
return sqrt(variance()); }
173 void addElement(
int atomCount,
int noIsotopes,
const double* isotopeMasses,
const double* isotopeProbabilities);
176 void saveMarginalLogSizeEstimates(
double* priorities,
double target_total_prob)
const;
187 const double mode_lprob;
205 virtual double lprob()
const {
return partialLProbs[0]; }
211 virtual double mass()
const {
return partialMasses[0]; }
217 virtual double prob()
const {
return partialProbs[0]; }
237template<
typename MarginalType>
241 MarginalType** marginalResults;
242 std::priority_queue<void*, pod_vector<void*>,
ConfOrder> pq;
256 bool advanceToNextConfiguration()
override final;
265 if constexpr (std::is_same<MarginalType, MarginalTrek>::value)
267 int* c = getConf(topConf);
272 for(
int ii = 0; ii < dimNumber; ii++)
274 memcpy(space, marginalResults[ii]->confs()[c[ii]], isotopeNumbers[ii]*
sizeof(
int));
275 space += isotopeNumbers[ii];
282 throw std::runtime_error(
"IsoOrderedGeneratorTemplate::get_conf_signature() called on a non-MarginalTrek generator. This is not supported yet.");
291 inline void get_conf_by_indexes(
int* space)
293 if constexpr (std::is_same<MarginalType, SingleAtomMarginal<false>>::value)
298 int* c = getConf(topConf);
299 space[0] = std::max(c[0]-1, 0);
301 for(
int ii = 1; ii < dimNumber; ii++)
307using IsoOrderedGenerator = IsoOrderedGeneratorTemplate<MarginalTrek>;
320 double* maxConfsLPSum;
321 const double Lcutoff;
326 const double* lProbs_ptr;
327 const double* lProbs_ptr_start;
328 double* partialLProbs_second;
329 double partialLProbs_second_val, lcfmsv;
338 counter[0] = lProbs_ptr - lProbs_ptr_start;
339 if(marginalOrder !=
nullptr)
341 for(
int ii = 0; ii < dimNumber; ii++)
343 int jj = marginalOrder[ii];
344 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[jj]), isotopeNumbers[ii]*
sizeof(
int));
345 space += isotopeNumbers[ii];
350 for(
int ii = 0; ii < dimNumber; ii++)
352 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[ii]), isotopeNumbers[ii]*
sizeof(
int));
353 space += isotopeNumbers[ii];
367 IsoThresholdGenerator(
Iso&& iso,
double _threshold,
bool _absolute =
true,
int _tabSize = 1000,
int _hashSize = 1000,
bool reorder_marginals =
true);
377 if(ISOSPEC_LIKELY(*lProbs_ptr >= lcfmsv))
385 lProbs_ptr = lProbs_ptr_start;
387 int * cntr_ptr = counter;
389 while(idx < dimNumber-1)
397 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->
get_lProb(counter[idx]);
398 if(partialLProbs[idx] + maxConfsLPSum[idx-1] >= Lcutoff)
400 partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->
get_mass(counter[idx]);
401 partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->
get_prob(counter[idx]);
412 ISOSPEC_FORCE_INLINE
double lprob() const override final {
return partialLProbs_second_val + (*(lProbs_ptr)); }
413 ISOSPEC_FORCE_INLINE
double mass() const override final {
return partialMasses[1] + marginalResults[0]->
get_mass(lProbs_ptr - lProbs_ptr_start); }
414 ISOSPEC_FORCE_INLINE
double prob() const override final {
return partialProbs[1] * marginalResults[0]->
get_prob(lProbs_ptr - lProbs_ptr_start); }
417 void terminate_search();
429 size_t count_confs();
433 ISOSPEC_FORCE_INLINE
void recalc(
int idx)
435 for(; idx > 0; idx--)
437 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->
get_lProb(counter[idx]);
438 partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->
get_mass(counter[idx]);
439 partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->
get_prob(counter[idx]);
441 partialLProbs_second_val = *partialLProbs_second;
442 partialLProbs[0] = *partialLProbs_second + marginalResults[0]->
get_lProb(counter[0]);
443 lcfmsv = Lcutoff - partialLProbs_second_val;
446 ISOSPEC_FORCE_INLINE
void short_recalc(
int idx)
448 for(; idx > 0; idx--)
449 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
450 partialLProbs_second_val = *partialLProbs_second;
451 partialLProbs[0] = *partialLProbs_second + marginalResults[0]->
get_lProb(counter[0]);
452 lcfmsv = Lcutoff - partialLProbs_second_val;
459template<
typename MarginalType>
464 double* maxConfsLPSum;
465 double currentLThreshold, lastLThreshold;
466 MarginalType** marginalResults;
467 MarginalType** marginalResultsUnsorted;
470 const double* lProbs_ptr;
471 const double* lProbs_ptr_start;
472 const double** resetPositions;
473 double* partialLProbs_second;
474 double partialLProbs_second_val, lcfmsv, last_lcfmsv;
475 bool marginalsNeedSorting;
484 counter[0] = lProbs_ptr - lProbs_ptr_start;
485 if(marginalOrder !=
nullptr)
487 for(
int ii = 0; ii < dimNumber; ii++)
489 int jj = marginalOrder[ii];
490 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[jj]), isotopeNumbers[ii]*
sizeof(
int));
491 space += isotopeNumbers[ii];
496 for(
int ii = 0; ii < dimNumber; ii++)
498 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[ii]), isotopeNumbers[ii]*
sizeof(
int));
499 space += isotopeNumbers[ii];
504 inline double get_currentLThreshold()
const {
return currentLThreshold; }
506 IsoLayeredGeneratorTemplate(Iso&& iso,
int _tabSize = 1000,
int _hashSize = 1000,
bool reorder_marginals =
true,
double t_prob_hint = 0.99);
508 ~IsoLayeredGeneratorTemplate();
514 if(advanceToNextConfigurationWithinLayer())
520 ISOSPEC_FORCE_INLINE
bool advanceToNextConfigurationWithinLayer()
525 if(ISOSPEC_LIKELY(*lProbs_ptr >= lcfmsv))
532 ISOSPEC_FORCE_INLINE
double lprob() const override final {
return partialLProbs_second_val + (*(lProbs_ptr)); };
533 ISOSPEC_FORCE_INLINE
double mass() const override final {
return partialMasses[1] + marginalResults[0]->get_mass(lProbs_ptr - lProbs_ptr_start); };
534 ISOSPEC_FORCE_INLINE
double prob() const override final {
return partialProbs[1] * marginalResults[0]->get_prob(lProbs_ptr - lProbs_ptr_start); };
537 void terminate_search();
541 ISOSPEC_FORCE_INLINE
void recalc(
int idx)
543 for(; idx > 0; idx--)
545 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
546 partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->get_mass(counter[idx]);
547 partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->get_prob(counter[idx]);
549 partialLProbs_second_val = *partialLProbs_second;
550 partialLProbs[0] = partialLProbs_second_val + marginalResults[0]->get_lProb(counter[0]);
551 lcfmsv = currentLThreshold - partialLProbs_second_val;
552 last_lcfmsv = lastLThreshold - partialLProbs_second_val;
555 bool nextLayer(
double offset);
557 void get_conf_by_indexes(
int* space)
const
559 if constexpr (std::is_same<MarginalType, SingleAtomMarginal<true>>::value)
561 counter[0] = lProbs_ptr - lProbs_ptr_start;
562 if(marginalOrder !=
nullptr)
564 for(
int ii = 0; ii < dimNumber; ii++)
566 int jj = marginalOrder[ii];
567 space[ii] = marginalResultsUnsorted[ii]->get_original_position(counter[jj]);
572 for(
int ii = 0; ii < dimNumber; ii++)
573 space[ii] = marginalResultsUnsorted[ii]->get_original_position(counter[ii]);
577 throw std::runtime_error(
"IsoLayeredGeneratorTemplate::get_conf_by_indexes() called on a non-SingleAtomMarginal generator. This is not supported yet.");
583using IsoLayeredGenerator = IsoLayeredGeneratorTemplate<LayeredMarginal>;
585template<
typename IsoType>
589 size_t to_sample_left;
590 const double precision;
591 const double beta_bias;
594 size_t current_count;
595 std::mt19937& rdvariate_gen;
600 ISOSPEC_FORCE_INLINE
size_t count()
const {
return current_count; }
602 ISOSPEC_FORCE_INLINE
double mass() const override final {
return ILG.mass(); }
604 ISOSPEC_FORCE_INLINE
double prob() const override final {
return static_cast<double>(count()); }
606 ISOSPEC_FORCE_INLINE
double lprob() const override final {
return log(
prob()); }
608 ISOSPEC_FORCE_INLINE
void get_conf_signature(
int* space)
const override final { ILG.get_conf_signature(space); }
617 double curr_conf_prob_left, current_prob;
619 if(to_sample_left <= 0)
622 if(confs_prob < chasing_prob)
627 if(!ILG.advanceToNextConfiguration())
629 current_prob = ILG.prob();
630 confs_prob += current_prob;
631 while(confs_prob <= chasing_prob)
633 if(!ILG.advanceToNextConfiguration())
635 current_prob = ILG.prob();
636 confs_prob += current_prob;
638 if(to_sample_left <= 0)
640 curr_conf_prob_left = confs_prob - chasing_prob;
646 if(!ILG.advanceToNextConfiguration())
648 current_prob = ILG.prob();
649 confs_prob += current_prob;
650 curr_conf_prob_left = current_prob;
653 double prob_left_to_1 = precision - chasing_prob;
654 double expected_confs = curr_conf_prob_left * to_sample_left / prob_left_to_1;
656 if(expected_confs <= beta_bias)
659 chasing_prob += rdvariate_beta_1_b(to_sample_left, rdvariate_gen) * prob_left_to_1;
660 while(chasing_prob <= confs_prob)
664 if(to_sample_left == 0)
666 prob_left_to_1 = precision - chasing_prob;
667 chasing_prob += rdvariate_beta_1_b(to_sample_left, rdvariate_gen) * prob_left_to_1;
669 if(current_count > 0)
675 size_t rbin = rdvariate_binom(to_sample_left, curr_conf_prob_left/prob_left_to_1, rdvariate_gen);
676 current_count += rbin;
677 to_sample_left -= rbin;
678 chasing_prob = confs_prob;
679 if(current_count > 0)
685 ISOSPEC_FORCE_INLINE
void get_indexes(
int* space)
687 ILG.get_conf_by_indexes(space);
691using IsoStochasticGenerator = IsoStochasticGeneratorTemplate<IsoLayeredGenerator>;
The generator of isotopologues.
virtual void get_conf_signature(int *space) const =0
Write the signature of configuration into target memory location. It must be large enough to accomoda...
virtual bool advanceToNextConfiguration()=0
Advance to the next, not yet visited, most probable isotopologue.
virtual double mass() const
Get the mass of the current isotopologue.
virtual double lprob() const
Get the log-probability of the current isotopologue.
virtual double prob() const
Get the probability of the current isotopologue.
The Iso class for the calculation of the isotopic distribution.
double stddev() const
Get the standard deviation of the theoretical distribution.
static Iso FromFASTA(const std::string &fasta, bool use_nominal_masses=false, bool add_water=true)
Constructor (named) from aminoacid FASTA sequence as C++ std::string. See above for details.
int getDimNumber() const
Get the number of elements in the chemical formula of the molecule.
int getAllDim() const
Get the total number of isotopes of elements present in a chemical formula.
Iso(const std::string &formula, bool use_nominal_masses=false)
Constructor from C++ std::string chemical formula.
ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
ISOSPEC_FORCE_INLINE double mass() const override final
Get the mass of the current isotopologue.
ISOSPEC_FORCE_INLINE void recalc(int idx)
Recalculate the current partial log-probabilities, masses, and probabilities.
void get_conf_signature(int *space) const override final
Write the signature of configuration into target memory location. It must be large enough to accomoda...
ISOSPEC_FORCE_INLINE double prob() const override final
Get the probability of the current isotopologue.
ISOSPEC_FORCE_INLINE double lprob() const override final
Get the log-probability of the current isotopologue.
The generator of isotopologues sorted by their probability of occurrence.
void get_conf_signature(int *space) const override final
Save the counts of isotopes in the space.
ISOSPEC_FORCE_INLINE double prob() const override final
Get the probability of the current isotopologue.
ISOSPEC_FORCE_INLINE double mass() const override final
Get the mass of the current isotopologue.
ISOSPEC_FORCE_INLINE double lprob() const override final
Get the log-probability of the current isotopologue.
ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
ISOSPEC_FORCE_INLINE void get_conf_signature(int *space) const override final
Write the signature of configuration into target memory location. It must be large enough to accomoda...
The generator of isotopologues above a given threshold value.
ISOSPEC_FORCE_INLINE double lprob() const override final
Get the log-probability of the current isotopologue.
void get_conf_signature(int *space) const override final
Write the signature of configuration into target memory location. It must be large enough to accomoda...
ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
ISOSPEC_FORCE_INLINE double prob() const override final
Get the probability of the current isotopologue.
ISOSPEC_FORCE_INLINE double mass() const override final
Get the mass of the current isotopologue.
The marginal distribution class (a subisotopologue).
Precalculated Marginal class.
const double & get_prob(int idx) const
Get the probability of the idx-th subisotopologue.
const double & get_lProb(int idx) const
Get the log-probability of the idx-th subisotopologue.
const double & get_mass(int idx) const
Get the mass of the idx-th subisotopologue.