scran
C++ library for basic single-cell RNA-seq analyses
Loading...
Searching...
No Matches
ChoosePseudoCount.hpp
Go to the documentation of this file.
1#ifndef SCRAN_CHOOSE_PSEUDO_COUNT_HPP
2#define SCRAN_CHOOSE_PSEUDO_COUNT_HPP
3
4#include <algorithm>
5#include <vector>
6#include <cmath>
7
13namespace scran {
14
36public:
40 struct Defaults {
44 static constexpr double quantile = 0.05;
45
49 static constexpr double max_bias = 0.1;
50
54 static constexpr double min_value = 1;
55 };
56
57private:
58 double quantile = Defaults::quantile;
59 double max_bias = Defaults::max_bias;
60 double min_value = Defaults::min_value;
61
62public:
71 quantile = q;
72 return *this;
73 }
74
81 max_bias = b;
82 return *this;
83 }
84
92 min_value = v;
93 return *this;
94 }
95
96public:
100 static double find_quantile(double quantile, size_t n, double* ptr) {
101 double raw = static_cast<double>(n - 1) * quantile;
102 size_t index = std::ceil(raw);
103 std::nth_element(ptr, ptr + index, ptr + n);
104 double upper = *(ptr + index);
105 std::nth_element(ptr, ptr + index - 1, ptr + index);
106 double lower = *(ptr + index - 1);
107 return lower * (index - raw) + upper * (raw - (index - 1));
108 }
113public:
122 double run(size_t n, const double* size_factors, double* buffer) const {
123 if (n <= 1) {
124 return min_value;
125 }
126
127 // Avoid problems with zeros.
128 size_t counter = 0;
129 for (size_t i = 0; i < n; ++i) {
130 if (size_factors[i] > 0) {
131 buffer[counter] = size_factors[i];
132 ++counter;
133 }
134 }
135 n = counter;
136
137 if (n <= 1) {
138 return min_value;
139 }
140
141 double lower_sf, upper_sf;
142 if (quantile == 0) {
143 lower_sf = *std::min_element(buffer, buffer + n);
144 upper_sf = *std::max_element(buffer, buffer + n);
145 } else {
146 lower_sf = find_quantile(quantile, n, buffer);
147 upper_sf = find_quantile(1 - quantile, n, buffer);
148 }
149
150 // Very confusing formulation in Equation 3, but whatever.
151 double pseudo_count = (1 / lower_sf - 1 / upper_sf) / (8 * max_bias);
152
153 return std::max(min_value, pseudo_count);
154 }
155
163 double run(size_t n, const double* size_factors) const {
164 std::vector<double> buffer(n);
165 return run(n, size_factors, buffer.data());
166 }
167};
168
169}
170
171#endif
Choose a pseudo-count for log-transformation.
Definition ChoosePseudoCount.hpp:35
ChoosePseudoCount & set_max_bias(double b=Defaults::max_bias)
Definition ChoosePseudoCount.hpp:80
ChoosePseudoCount & set_min_value(double v=Defaults::min_value)
Definition ChoosePseudoCount.hpp:91
double run(size_t n, const double *size_factors) const
Definition ChoosePseudoCount.hpp:163
ChoosePseudoCount & set_quantile(double q=Defaults::quantile)
Definition ChoosePseudoCount.hpp:70
double run(size_t n, const double *size_factors, double *buffer) const
Definition ChoosePseudoCount.hpp:122
Functions for single-cell RNA-seq analyses.
Definition AggregateAcrossCells.hpp:18
Default parameters.
Definition ChoosePseudoCount.hpp:40
static constexpr double quantile
Definition ChoosePseudoCount.hpp:44
static constexpr double max_bias
Definition ChoosePseudoCount.hpp:49
static constexpr double min_value
Definition ChoosePseudoCount.hpp:54