scran
C++ library for basic single-cell RNA-seq analyses
Loading...
Searching...
No Matches
LogNormCounts.hpp
Go to the documentation of this file.
1#ifndef SCRAN_LOG_NORM_COUNTS_H
2#define SCRAN_LOG_NORM_COUNTS_H
3
4#include "../utils/macros.hpp"
5
6#include <algorithm>
7#include <vector>
8#include <numeric>
9
10#include "tatami/tatami.hpp"
11
12#include "CenterSizeFactors.hpp"
13#include "SanitizeSizeFactors.hpp"
14#include "ChoosePseudoCount.hpp"
15
22namespace scran {
23
34public:
38 struct Defaults {
42 static constexpr double pseudo_count = 1;
43
47 static constexpr bool sparse_addition = true;
48
52 static constexpr bool choose_pseudo_count = false;
53
57 static constexpr bool center = true;
58
62 static constexpr bool handle_zeros = false;
63
67 static constexpr bool handle_non_finite = false;
68
72 static constexpr int num_threads = 1;
73 };
74
75private:
76 double pseudo_count = Defaults::pseudo_count;
77 bool sparse_addition = Defaults::sparse_addition;
78 bool handle_zeros = Defaults::handle_zeros;
79 bool handle_non_finite = Defaults::handle_non_finite;
80 int nthreads = Defaults::num_threads;
81
82 bool center = Defaults::center;
83 CenterSizeFactors centerer;
84
85 bool choose_pseudo_count = Defaults::choose_pseudo_count;
86 ChoosePseudoCount pseudo_chooser;
87
88public:
100 pseudo_count = p;
101 return *this;
102 }
103
114 sparse_addition = a;
115 return *this;
116 }
117
132 center = c;
133 return *this;
134 }
135
145
159 handle_zeros = z;
160 return *this;
161 }
162
175 handle_non_finite = n;
176 return *this;
177 }
178
187 nthreads = n;
188 return *this;
189 }
190
191public:
199 choose_pseudo_count = c;
200 return *this;
201 }
202
209 pseudo_chooser.set_max_bias(m);
210 return *this;
211 }
212
219 pseudo_chooser.set_quantile(q);
220 return *this;
221 }
222
229 pseudo_chooser.set_min_value(m);
230 return *this;
231 }
232
233public:
246 template<class MAT, class V>
247 std::shared_ptr<MAT> run(std::shared_ptr<MAT> mat, V size_factors) const {
248 return run_blocked(std::move(mat), std::move(size_factors), static_cast<int*>(NULL));
249 }
250
269 template<class MAT, class V, typename B>
270 std::shared_ptr<MAT> run_blocked(std::shared_ptr<MAT> mat, V size_factors, const B* block) const {
271 // One might ask why we don't require a pointer for size_factors here.
272 // It's because size_factors need to be moved into the Delayed operation
273 // anyway, so we might as well ask the user to construct a vector for us.
274 if (size_factors.size() != mat->ncol()) {
275 throw std::runtime_error("number of size factors and columns are not equal");
276 }
277
278 SizeFactorValidity cresults;
279 if (center) {
280 cresults = centerer.run_blocked(size_factors.size(), size_factors.data(), block);
281 } else {
282 cresults = validate_size_factors(size_factors.size(), size_factors.data());
283 }
284
285 SanitizeSizeFactors sanitizer;
286 sanitizer.set_handle_zero(handle_zeros ? SanitizeSizeFactors::HandlerAction::SANITIZE : SanitizeSizeFactors::HandlerAction::ERROR);
287 sanitizer.set_handle_non_finite(handle_non_finite ? SanitizeSizeFactors::HandlerAction::SANITIZE : SanitizeSizeFactors::HandlerAction::ERROR);
288 sanitizer.run(size_factors.size(), size_factors.data(), cresults);
289
290 double current_pseudo = pseudo_count;
291 if (choose_pseudo_count) {
292 current_pseudo = pseudo_chooser.run(size_factors.size(), size_factors.data());
293 }
294
295 if (sparse_addition && current_pseudo != 1) {
296 for (auto& d : size_factors) {
297 d *= current_pseudo;
298 }
299 current_pseudo = 1; // effectively 1 now.
300 }
301
302 typedef typename MAT::value_type Value_;
303 auto div = tatami::make_DelayedUnaryIsometricOp(std::move(mat), tatami::make_DelayedDivideVectorHelper<true, 1, Value_>(std::move(size_factors)));
304
305 if (current_pseudo == 1) {
306 return tatami::make_DelayedUnaryIsometricOp(std::move(div), tatami::DelayedLog1pHelper<Value_>(2.0));
307 } else {
308 auto add = tatami::make_DelayedUnaryIsometricOp(std::move(div), tatami::make_DelayedAddScalarHelper<Value_>(current_pseudo));
309 return tatami::make_DelayedUnaryIsometricOp(std::move(add), tatami::DelayedLogHelper<Value_>(2.0));
310 }
311 }
312
313public:
324 template<class MAT>
325 std::shared_ptr<MAT> run(std::shared_ptr<MAT> mat) const {
326 auto size_factors = tatami::column_sums(mat.get(), nthreads);
327 return run_blocked(std::move(mat), std::move(size_factors), static_cast<int*>(NULL));
328 }
329
342 template<class MAT, typename B>
343 std::shared_ptr<MAT> run_blocked(std::shared_ptr<MAT> mat, const B* block) const {
344 auto size_factors = tatami::column_sums(mat.get(), nthreads);
345 return run_blocked(mat, std::move(size_factors), block);
346 }
347};
348
349};
350
351#endif
Center size factors prior to normalization.
Choose a suitable pseudo-count.
Center size factors prior to scaling normalization.
Definition CenterSizeFactors.hpp:27
BlockMode
Definition CenterSizeFactors.hpp:32
SizeFactorValidity run_blocked(size_t n, T *size_factors, const B *block) const
Definition CenterSizeFactors.hpp:157
CenterSizeFactors & set_block_mode(BlockMode b=Defaults::block_mode)
Definition CenterSizeFactors.hpp:69
Choose a pseudo-count for log-transformation.
Definition ChoosePseudoCount.hpp:35
ChoosePseudoCount & set_max_bias(double b=Defaults::max_bias)
Definition ChoosePseudoCount.hpp:80
ChoosePseudoCount & set_min_value(double v=Defaults::min_value)
Definition ChoosePseudoCount.hpp:91
ChoosePseudoCount & set_quantile(double q=Defaults::quantile)
Definition ChoosePseudoCount.hpp:70
double run(size_t n, const double *size_factors, double *buffer) const
Definition ChoosePseudoCount.hpp:122
Compute log-normalized expression values.
Definition LogNormCounts.hpp:33
LogNormCounts & set_sparse_addition(bool a=Defaults::sparse_addition)
Definition LogNormCounts.hpp:113
LogNormCounts & set_center(bool c=Defaults::center)
Definition LogNormCounts.hpp:131
std::shared_ptr< MAT > run_blocked(std::shared_ptr< MAT > mat, const B *block) const
Definition LogNormCounts.hpp:343
LogNormCounts & set_quantile(double q=ChoosePseudoCount::Defaults::quantile)
Definition LogNormCounts.hpp:218
LogNormCounts & set_block_mode(CenterSizeFactors::BlockMode b=CenterSizeFactors::Defaults::block_mode)
Definition LogNormCounts.hpp:141
std::shared_ptr< MAT > run(std::shared_ptr< MAT > mat) const
Definition LogNormCounts.hpp:325
LogNormCounts & set_handle_non_finite(bool n=Defaults::handle_non_finite)
Definition LogNormCounts.hpp:174
std::shared_ptr< MAT > run_blocked(std::shared_ptr< MAT > mat, V size_factors, const B *block) const
Definition LogNormCounts.hpp:270
LogNormCounts & set_handle_zeros(bool z=Defaults::handle_zeros)
Definition LogNormCounts.hpp:158
LogNormCounts & set_num_threads(int n=Defaults::num_threads)
Definition LogNormCounts.hpp:186
LogNormCounts & set_min_value(double m=ChoosePseudoCount::Defaults::min_value)
Definition LogNormCounts.hpp:228
LogNormCounts & set_choose_pseudo_count(bool c=Defaults::choose_pseudo_count)
Definition LogNormCounts.hpp:198
LogNormCounts & set_max_bias(double m=ChoosePseudoCount::Defaults::max_bias)
Definition LogNormCounts.hpp:208
std::shared_ptr< MAT > run(std::shared_ptr< MAT > mat, V size_factors) const
Definition LogNormCounts.hpp:247
LogNormCounts & set_pseudo_count(double p=Defaults::pseudo_count)
Definition LogNormCounts.hpp:99
Sanitize invalid size factors.
Definition SanitizeSizeFactors.hpp:75
void run(size_t n, T *size_factors, const SizeFactorValidity &status) const
Definition SanitizeSizeFactors.hpp:251
SanitizeSizeFactors & set_handle_non_finite(HandlerAction h)
Definition SanitizeSizeFactors.hpp:200
SanitizeSizeFactors & set_handle_zero(HandlerAction h=Defaults::handle_zero)
Definition SanitizeSizeFactors.hpp:133
Functions for single-cell RNA-seq analyses.
Definition AggregateAcrossCells.hpp:18
SizeFactorValidity validate_size_factors(size_t n, const T *size_factors)
Definition SanitizeSizeFactors.hpp:46
static constexpr BlockMode block_mode
Definition CenterSizeFactors.hpp:41
static constexpr double quantile
Definition ChoosePseudoCount.hpp:44
static constexpr double max_bias
Definition ChoosePseudoCount.hpp:49
static constexpr double min_value
Definition ChoosePseudoCount.hpp:54
Default parameter settings.
Definition LogNormCounts.hpp:38
static constexpr bool handle_zeros
Definition LogNormCounts.hpp:62
static constexpr int num_threads
Definition LogNormCounts.hpp:72
static constexpr double pseudo_count
Definition LogNormCounts.hpp:42
static constexpr bool choose_pseudo_count
Definition LogNormCounts.hpp:52
static constexpr bool handle_non_finite
Definition LogNormCounts.hpp:67
static constexpr bool sparse_addition
Definition LogNormCounts.hpp:47
static constexpr bool center
Definition LogNormCounts.hpp:57
Validity of size factors.
Definition SanitizeSizeFactors.hpp:12