scran
C++ library for basic single-cell RNA-seq analyses
Loading...
Searching...
No Matches
SanitizeSizeFactors.hpp
1#ifndef SCRAN_NORMALIZATION_SANITIZE_SIZE_FACTORS_HPP
2#define SCRAN_NORMALIZATION_SANITIZE_SIZE_FACTORS_HPP
3
4#include <limits>
5#include <cmath>
6
7namespace scran {
8
16 bool has_negative = false;
17
21 bool has_zero = false;
22
26 bool has_nan = false;
27
31 bool has_infinite = false;
32};
33
45template<typename T>
46SizeFactorValidity validate_size_factors(size_t n, const T* size_factors) {
47 SizeFactorValidity output;
48
49 for (size_t i = 0; i < n; ++i) {
50 auto sf = size_factors[i];
51 if (sf < 0) {
52 output.has_negative = true;
53 } else if (sf == 0) {
54 output.has_zero = true;
55 } else if (std::isnan(sf)) {
56 output.has_nan = true;
57 } else if (std::isinf(sf)) {
58 output.has_infinite = true;
59 }
60 }
61
62 return output;
63}
64
76public:
84 enum class HandlerAction : char { IGNORE, ERROR, SANITIZE };
85
89 struct Defaults {
93 static constexpr HandlerAction handle_zero = HandlerAction::ERROR;
94
98 static constexpr HandlerAction handle_negative = HandlerAction::ERROR;
99
103 static constexpr HandlerAction handle_nan = HandlerAction::ERROR;
104
108 static constexpr HandlerAction handle_infinite = HandlerAction::ERROR;
109 };
110
111private:
116
117public:
134 handle_zero = h;
135 return *this;
136 }
137
148 handle_negative = h;
149 return *this;
150 }
151
161 handle_nan = h;
162 return *this;
163 }
164
175 handle_infinite = h;
176 return *this;
177 }
178
179public:
192
205
206private:
207 template<typename T>
208 static double find_smallest_valid_factor(size_t n, T* size_factors) {
209 auto smallest = std::numeric_limits<double>::infinity();
210 for (size_t i = 0; i < n; ++i) {
211 auto s = size_factors[i];
212 if (smallest > s && s > 0) { // NaN returns false here, and Inf won't be less than the starting 'smallest', so no need to handle these separately.
213 smallest = s;
214 }
215 }
216
217 if (std::isinf(smallest)) {
218 smallest = 1;
219 }
220 return smallest;
221 }
222
223 template<typename T>
224 static double find_largest_valid_factor(size_t n, T* size_factors) {
225 // Replacing them with the largest non-zero size factor, or 1.
226 double largest = 0;
227 for (size_t i = 0; i < n; ++i) {
228 auto s = size_factors[i];
229 if (std::isfinite(s) && largest < s) {
230 largest = s;
231 }
232 }
233
234 if (largest == 0) {
235 largest = 1;
236 }
237 return largest;
238 }
239
240public:
250 template<typename T>
251 void run(size_t n, T* size_factors, const SizeFactorValidity& status) const {
252 T smallest = -1;
253
254 if (status.has_negative) {
255 if (handle_negative == HandlerAction::ERROR) {
256 throw std::runtime_error("detected negative size factor");
257 } else if (handle_negative == HandlerAction::SANITIZE) {
258 smallest = find_smallest_valid_factor(n, size_factors);
259 for (size_t i = 0; i < n; ++i) {
260 auto& s = size_factors[i];
261 if (s < 0) {
262 s = smallest;
263 }
264 }
265 }
266 }
267
268 if (status.has_zero) {
269 if (handle_zero == HandlerAction::ERROR) {
270 throw std::runtime_error("detected size factor of zero");
271 } else if (handle_zero == HandlerAction::SANITIZE) {
272 if (smallest < 0) {
273 smallest = find_smallest_valid_factor(n, size_factors);
274 }
275 for (size_t i = 0; i < n; ++i) {
276 auto& s = size_factors[i];
277 if (s == 0) {
278 s = smallest;
279 }
280 }
281 }
282 }
283
284 if (status.has_nan) {
285 if (handle_nan == HandlerAction::ERROR) {
286 throw std::runtime_error("detected NaN size factor");
287 } else if (handle_nan == HandlerAction::SANITIZE) {
288 for (size_t i = 0; i < n; ++i) {
289 auto& s = size_factors[i];
290 if (std::isnan(s)) {
291 s = 1;
292 }
293 }
294 }
295 }
296
297 if (status.has_infinite) {
298 if (handle_infinite == HandlerAction::ERROR) {
299 throw std::runtime_error("detected infinite size factor");
300 } else if (handle_infinite == HandlerAction::SANITIZE) {
301 auto largest = find_largest_valid_factor(n, size_factors);
302 for (size_t i = 0; i < n; ++i) {
303 auto& s = size_factors[i];
304 if (std::isinf(s)) {
305 s = largest;
306 }
307 }
308 }
309 }
310 }
311
312public:
320 template<typename T>
321 void run(size_t n, T* size_factors) const {
322 auto status = validate_size_factors(n, size_factors);
323 run(n, size_factors, status);
324 }
325};
326
327}
328
329#endif
Sanitize invalid size factors.
Definition SanitizeSizeFactors.hpp:75
SanitizeSizeFactors & set_handle_nan(HandlerAction h=Defaults::handle_nan)
Definition SanitizeSizeFactors.hpp:160
void run(size_t n, T *size_factors, const SizeFactorValidity &status) const
Definition SanitizeSizeFactors.hpp:251
HandlerAction
Definition SanitizeSizeFactors.hpp:84
void run(size_t n, T *size_factors) const
Definition SanitizeSizeFactors.hpp:321
SanitizeSizeFactors & set_handle_infinite(HandlerAction h=Defaults::handle_infinite)
Definition SanitizeSizeFactors.hpp:174
SanitizeSizeFactors & set_handle_negative(HandlerAction h=Defaults::handle_negative)
Definition SanitizeSizeFactors.hpp:147
SanitizeSizeFactors & set_handle_non_finite(HandlerAction h)
Definition SanitizeSizeFactors.hpp:200
SanitizeSizeFactors & set_handle_non_positive(HandlerAction h)
Definition SanitizeSizeFactors.hpp:187
SanitizeSizeFactors & set_handle_zero(HandlerAction h=Defaults::handle_zero)
Definition SanitizeSizeFactors.hpp:133
Functions for single-cell RNA-seq analyses.
Definition AggregateAcrossCells.hpp:18
SizeFactorValidity validate_size_factors(size_t n, const T *size_factors)
Definition SanitizeSizeFactors.hpp:46
Default parameters.
Definition SanitizeSizeFactors.hpp:89
static constexpr HandlerAction handle_zero
Definition SanitizeSizeFactors.hpp:93
static constexpr HandlerAction handle_infinite
Definition SanitizeSizeFactors.hpp:108
static constexpr HandlerAction handle_negative
Definition SanitizeSizeFactors.hpp:98
static constexpr HandlerAction handle_nan
Definition SanitizeSizeFactors.hpp:103
Validity of size factors.
Definition SanitizeSizeFactors.hpp:12
bool has_nan
Definition SanitizeSizeFactors.hpp:26
bool has_zero
Definition SanitizeSizeFactors.hpp:21
bool has_infinite
Definition SanitizeSizeFactors.hpp:31
bool has_negative
Definition SanitizeSizeFactors.hpp:16